diff --git a/.coveragerc b/.coveragerc
index 1ed1a9704..e78e7a931 100644
--- a/.coveragerc
+++ b/.coveragerc
@@ -9,11 +9,6 @@ omit =
google/cloud/bigquery_v2/* # Legacy proto-based types.
exclude_lines =
# Re-enable the standard pragma
- pragma: NO COVER
+ pragma: (no cover|NO COVER)
# Ignore debug-only repr
def __repr__
- # Ignore pkg_resources exceptions.
- # This is added at the module level as a safeguard for if someone
- # generates the code and tries to run it without pip installing. This
- # makes it virtually impossible to test properly.
- except pkg_resources.DistributionNotFound
diff --git a/.flake8 b/.flake8
index 2e4387498..32986c792 100644
--- a/.flake8
+++ b/.flake8
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
-# Copyright 2020 Google LLC
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 6763f258c..c7478150e 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -4,8 +4,8 @@
# For syntax help see:
# https://help.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners#codeowners-syntax
-# The @googleapis/api-bigquery is the default owner for changes in this repo
-* @googleapis/api-bigquery @googleapis/yoshi-python
+# The @googleapis/python-core-client-libraries is the default owner for changes in this repo
+* @googleapis/python-core-client-libraries @googleapis/yoshi-python
# The python-samples-reviewers team is the default owner for samples changes
-/samples/ @googleapis/api-bigquery @googleapis/python-samples-owners @googleapis/yoshi-python
+/samples/ @googleapis/python-core-client-libraries @googleapis/python-samples-owners @googleapis/yoshi-python
diff --git a/.github/auto-approve.yml b/.github/auto-approve.yml
deleted file mode 100644
index 311ebbb85..000000000
--- a/.github/auto-approve.yml
+++ /dev/null
@@ -1,3 +0,0 @@
-# https://github.com/googleapis/repo-automation-bots/tree/main/packages/auto-approve
-processes:
- - "OwlBotTemplateChanges"
diff --git a/.github/auto-label.yaml b/.github/auto-label.yaml
index 41bff0b53..21786a4eb 100644
--- a/.github/auto-label.yaml
+++ b/.github/auto-label.yaml
@@ -1,4 +1,4 @@
-# Copyright 2022 Google LLC
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,3 +13,8 @@
# limitations under the License.
requestsize:
enabled: true
+
+path:
+ pullrequest: true
+ paths:
+ samples: "samples"
diff --git a/.github/blunderbuss.yml b/.github/blunderbuss.yml
new file mode 100644
index 000000000..ff168399d
--- /dev/null
+++ b/.github/blunderbuss.yml
@@ -0,0 +1,17 @@
+# Blunderbuss config
+#
+# This file controls who is assigned for pull requests and issues.
+# Note: This file is autogenerated. To make changes to the assignee
+# team, please update `codeowner_team` in `.repo-metadata.json`.
+assign_issues:
+ - googleapis/python-core-client-libraries
+
+assign_issues_by:
+ - labels:
+ - "samples"
+ to:
+ - googleapis/python-samples-reviewers
+ - googleapis/python-core-client-libraries
+
+assign_prs:
+ - googleapis/python-core-client-libraries
diff --git a/.github/release-please.yml b/.github/release-please.yml
deleted file mode 100644
index 5161ab347..000000000
--- a/.github/release-please.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-releaseType: python
-handleGHRelease: true
-# NOTE: this section is generated by synthtool.languages.python
-# See https://github.com/googleapis/synthtool/blob/master/synthtool/languages/python.py
-branches:
-- branch: v2
- handleGHRelease: true
- releaseType: python
-- branch: v1
- handleGHRelease: true
- releaseType: python
-- branch: v0
- handleGHRelease: true
- releaseType: python
diff --git a/.github/release-trigger.yml b/.github/release-trigger.yml
deleted file mode 100644
index d4ca94189..000000000
--- a/.github/release-trigger.yml
+++ /dev/null
@@ -1 +0,0 @@
-enabled: true
diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml
deleted file mode 100644
index 220c031b2..000000000
--- a/.github/sync-repo-settings.yaml
+++ /dev/null
@@ -1,28 +0,0 @@
-# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings
-mergeCommitAllowed: false
-# Rules for main branch protection
-branchProtectionRules:
-# Identifies the protection rule pattern. Name of the branch to be protected.
-# Defaults to `main`
-- pattern: main
- requiresLinearHistory: true
- requiresCodeOwnerReviews: true
- requiresStrictStatusChecks: true
- requiredStatusCheckContexts:
- - 'Kokoro'
- - 'Kokoro snippets-3.8'
- - 'cla/google'
- - 'Samples - Lint'
- - 'Samples - Python 3.7'
- - 'Samples - Python 3.8'
-- pattern: v2
- requiresLinearHistory: true
- requiresCodeOwnerReviews: true
- requiresStrictStatusChecks: true
- requiredStatusCheckContexts:
- - 'Kokoro'
- - 'Kokoro snippets-3.8'
- - 'cla/google'
- - 'Samples - Lint'
- - 'Samples - Python 3.7'
- - 'Samples - Python 3.8'
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 000000000..9372faac2
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,39 @@
+on:
+ pull_request:
+ branches:
+ - main
+name: docs
+jobs:
+ docs:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+ - name: Install nox
+ run: |
+ python -m pip install --upgrade setuptools pip wheel
+ python -m pip install nox
+ - name: Run docs session
+ run: |
+ nox -s docs-3.10
+
+ docfx:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.10'
+ - name: Install nox
+ run: |
+ python -m pip install --upgrade setuptools pip wheel
+ python -m pip install nox
+ - name: Run docfx session
+ run: |
+ nox -s docfx-3.10
diff --git a/.github/workflows/unittest.yml b/.github/workflows/unittest.yml
new file mode 100644
index 000000000..550724076
--- /dev/null
+++ b/.github/workflows/unittest.yml
@@ -0,0 +1,88 @@
+on:
+ pull_request:
+ branches:
+ - main
+name: unittest
+jobs:
+ unit:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14']
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python }}
+ - name: Install nox
+ run: |
+ python -m pip install --upgrade setuptools pip wheel
+ python -m pip install nox
+ - name: Run unit tests
+ env:
+ COVERAGE_FILE: .coverage-${{ matrix.python }}
+ run: |
+ nox -s unit-${{ matrix.python }}
+ - name: Upload coverage results
+ uses: actions/upload-artifact@v4
+ with:
+ name: coverage-artifact-${{ matrix.python }}
+ path: .coverage-${{ matrix.python }}
+ include-hidden-files: true
+
+ unit_noextras:
+ # Use `ubuntu-latest` runner.
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python: ['3.9', '3.14']
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python }}
+ - name: Install nox
+ run: |
+ python -m pip install --upgrade setuptools pip wheel
+ python -m pip install nox
+ - name: Run unit_noextras tests
+ env:
+ COVERAGE_FILE: .coverage-unit-noextras-${{ matrix.python }}
+ run: |
+ nox -s unit_noextras-${{ matrix.python }}
+ - name: Upload coverage results
+ uses: actions/upload-artifact@v4
+ with:
+ name: coverage-artifact-unit-noextras-${{ matrix.python }}
+ path: .coverage-unit-noextras-${{ matrix.python }}
+ include-hidden-files: true
+
+ cover:
+ runs-on: ubuntu-latest
+ needs:
+ - unit
+ - unit_noextras
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Setup Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: "3.9"
+ - name: Install coverage
+ run: |
+ python -m pip install --upgrade setuptools pip wheel
+ python -m pip install coverage
+ - name: Download coverage results
+ uses: actions/download-artifact@v4
+ with:
+ path: .coverage-results/
+ - name: Report coverage results
+ run: |
+ find .coverage-results -type f -name '*.zip' -exec unzip {} \;
+ coverage combine .coverage-results/**/.coverage*
+ coverage report --show-missing --fail-under=100
diff --git a/.gitignore b/.gitignore
index 99c3a1444..168b201f6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -51,6 +51,7 @@ docs.metadata
# Virtual environment
env/
+venv/
# Test logs
coverage.xml
diff --git a/.kokoro/build.sh b/.kokoro/build.sh
index 4d6a1d0f6..d41b45aa1 100755
--- a/.kokoro/build.sh
+++ b/.kokoro/build.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright 2018 Google LLC
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -15,11 +15,13 @@
set -eo pipefail
+CURRENT_DIR=$(dirname "${BASH_SOURCE[0]}")
+
if [[ -z "${PROJECT_ROOT:-}" ]]; then
- PROJECT_ROOT="github/python-bigquery"
+ PROJECT_ROOT=$(realpath "${CURRENT_DIR}/..")
fi
-cd "${PROJECT_ROOT}"
+pushd "${PROJECT_ROOT}"
# Disable buffering, so that the logs stream through.
export PYTHONUNBUFFERED=1
@@ -28,17 +30,16 @@ export PYTHONUNBUFFERED=1
env | grep KOKORO
# Setup service account credentials.
-export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json
+if [[ -f "${KOKORO_GFILE_DIR}/service-account.json" ]]
+then
+ export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json
+fi
# Setup project id.
-export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json")
-
-# Remove old nox
-python3 -m pip uninstall --yes --quiet nox-automation
-
-# Install nox
-python3 -m pip install --upgrade --quiet nox
-python3 -m nox --version
+if [[ -f "${KOKORO_GFILE_DIR}/project-id.json" ]]
+then
+ export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json")
+fi
# If this is a continuous build, send the test log to the FlakyBot.
# See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot.
@@ -53,7 +54,7 @@ fi
# If NOX_SESSION is set, it only runs the specified session,
# otherwise run all the sessions.
if [[ -n "${NOX_SESSION:-}" ]]; then
- python3 -m nox -s ${NOX_SESSION:-}
+ python3 -m nox -s ${NOX_SESSION:-}
else
- python3 -m nox
+ python3 -m nox
fi
diff --git a/.kokoro/continuous/prerelease-deps-3.10.cfg b/.kokoro/continuous/prerelease-deps-3.13.cfg
similarity index 77%
rename from .kokoro/continuous/prerelease-deps-3.10.cfg
rename to .kokoro/continuous/prerelease-deps-3.13.cfg
index 339980bdd..99a1e7150 100644
--- a/.kokoro/continuous/prerelease-deps-3.10.cfg
+++ b/.kokoro/continuous/prerelease-deps-3.13.cfg
@@ -3,5 +3,5 @@
# Only run this nox session.
env_vars: {
key: "NOX_SESSION"
- value: "prerelease_deps-3.10"
+ value: "prerelease_deps-3.13"
}
diff --git a/.kokoro/continuous/prerelease-deps-3.8.cfg b/.kokoro/continuous/prerelease-deps-3.8.cfg
deleted file mode 100644
index fabe3e347..000000000
--- a/.kokoro/continuous/prerelease-deps-3.8.cfg
+++ /dev/null
@@ -1,7 +0,0 @@
-# Format: //devtools/kokoro/config/proto/build.proto
-
-# Only run this nox session.
-env_vars: {
- key: "NOX_SESSION"
- value: "prerelease_deps-3.8"
-}
diff --git a/.kokoro/continuous/prerelease-deps.cfg b/.kokoro/continuous/prerelease-deps.cfg
deleted file mode 100644
index 3595fb43f..000000000
--- a/.kokoro/continuous/prerelease-deps.cfg
+++ /dev/null
@@ -1,7 +0,0 @@
-# Format: //devtools/kokoro/config/proto/build.proto
-
-# Only run this nox session.
-env_vars: {
- key: "NOX_SESSION"
- value: "prerelease_deps"
-}
diff --git a/.kokoro/continuous/unit-tests-misc.cfg b/.kokoro/continuous/unit-tests-misc.cfg
new file mode 100644
index 000000000..6598baee7
--- /dev/null
+++ b/.kokoro/continuous/unit-tests-misc.cfg
@@ -0,0 +1,9 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Only run these nox sessions.
+# A subset based on Python versions that are neither our newest OR oldest
+# supported versions of Python
+env_vars: {
+ key: "NOX_SESSION"
+ value: "unit_noextras-3.9 unit_noextras-3.10 unit_noextras-3.11 unit-3.9 unit-3.10 unit-3.11"
+}
\ No newline at end of file
diff --git a/.kokoro/docker/docs/Dockerfile b/.kokoro/docker/docs/Dockerfile
deleted file mode 100644
index 238b87b9d..000000000
--- a/.kokoro/docker/docs/Dockerfile
+++ /dev/null
@@ -1,83 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from ubuntu:22.04
-
-ENV DEBIAN_FRONTEND noninteractive
-
-# Ensure local Python is preferred over distribution Python.
-ENV PATH /usr/local/bin:$PATH
-
-# Install dependencies.
-RUN apt-get update \
- && apt-get install -y --no-install-recommends \
- apt-transport-https \
- build-essential \
- ca-certificates \
- curl \
- dirmngr \
- git \
- gpg-agent \
- graphviz \
- libbz2-dev \
- libdb5.3-dev \
- libexpat1-dev \
- libffi-dev \
- liblzma-dev \
- libreadline-dev \
- libsnappy-dev \
- libssl-dev \
- libsqlite3-dev \
- portaudio19-dev \
- python3-distutils \
- redis-server \
- software-properties-common \
- ssh \
- sudo \
- tcl \
- tcl-dev \
- tk \
- tk-dev \
- uuid-dev \
- wget \
- zlib1g-dev \
- && add-apt-repository universe \
- && apt-get update \
- && apt-get -y install jq \
- && apt-get clean autoclean \
- && apt-get autoremove -y \
- && rm -rf /var/lib/apt/lists/* \
- && rm -f /var/cache/apt/archives/*.deb
-
-###################### Install python 3.8.11
-
-# Download python 3.8.11
-RUN wget https://www.python.org/ftp/python/3.8.11/Python-3.8.11.tgz
-
-# Extract files
-RUN tar -xvf Python-3.8.11.tgz
-
-# Install python 3.8.11
-RUN ./Python-3.8.11/configure --enable-optimizations
-RUN make altinstall
-
-###################### Install pip
-RUN wget -O /tmp/get-pip.py 'https://bootstrap.pypa.io/get-pip.py' \
- && python3 /tmp/get-pip.py \
- && rm /tmp/get-pip.py
-
-# Test pip
-RUN python3 -m pip
-
-CMD ["python3.8"]
diff --git a/.kokoro/docker/docs/fetch_gpg_keys.sh b/.kokoro/docker/docs/fetch_gpg_keys.sh
deleted file mode 100755
index d653dd868..000000000
--- a/.kokoro/docker/docs/fetch_gpg_keys.sh
+++ /dev/null
@@ -1,45 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# A script to fetch gpg keys with retry.
-# Avoid jinja parsing the file.
-#
-
-function retry {
- if [[ "${#}" -le 1 ]]; then
- echo "Usage: ${0} retry_count commands.."
- exit 1
- fi
- local retries=${1}
- local command="${@:2}"
- until [[ "${retries}" -le 0 ]]; do
- $command && return 0
- if [[ $? -ne 0 ]]; then
- echo "command failed, retrying"
- ((retries--))
- fi
- done
- return 1
-}
-
-# 3.6.9, 3.7.5 (Ned Deily)
-retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \
- 0D96DF4D4110E5C43FBFB17F2D347EA6AA65421D
-
-# 3.8.0 (Ćukasz Langa)
-retry 3 gpg --keyserver ha.pool.sks-keyservers.net --recv-keys \
- E3FF2839C048B25C084DEBE9B26995E310250568
-
-#
diff --git a/.kokoro/docs/common.cfg b/.kokoro/docs/common.cfg
deleted file mode 100644
index 41b86fc29..000000000
--- a/.kokoro/docs/common.cfg
+++ /dev/null
@@ -1,66 +0,0 @@
-# Format: //devtools/kokoro/config/proto/build.proto
-
-# Build logs will be here
-action {
- define_artifacts {
- regex: "**/*sponge_log.xml"
- }
-}
-
-# Download trampoline resources.
-gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
-
-# Use the trampoline script to run in docker.
-build_file: "python-bigquery/.kokoro/trampoline_v2.sh"
-
-# Configure the docker image for kokoro-trampoline.
-env_vars: {
- key: "TRAMPOLINE_IMAGE"
- value: "gcr.io/cloud-devrel-kokoro-resources/python-lib-docs"
-}
-env_vars: {
- key: "TRAMPOLINE_BUILD_FILE"
- value: "github/python-bigquery/.kokoro/publish-docs.sh"
-}
-
-env_vars: {
- key: "STAGING_BUCKET"
- value: "docs-staging"
-}
-
-env_vars: {
- key: "V2_STAGING_BUCKET"
- # Push google cloud library docs to the Cloud RAD bucket `docs-staging-v2`
- value: "docs-staging-v2"
-}
-
-# It will upload the docker image after successful builds.
-env_vars: {
- key: "TRAMPOLINE_IMAGE_UPLOAD"
- value: "true"
-}
-
-# It will always build the docker image.
-env_vars: {
- key: "TRAMPOLINE_DOCKERFILE"
- value: ".kokoro/docker/docs/Dockerfile"
-}
-
-# Fetch the token needed for reporting release status to GitHub
-before_action {
- fetch_keystore {
- keystore_resource {
- keystore_config_id: 73713
- keyname: "yoshi-automation-github-key"
- }
- }
-}
-
-before_action {
- fetch_keystore {
- keystore_resource {
- keystore_config_id: 73713
- keyname: "docuploader_service_account"
- }
- }
-}
\ No newline at end of file
diff --git a/.kokoro/docs/docs-presubmit.cfg b/.kokoro/docs/docs-presubmit.cfg
deleted file mode 100644
index 08adb2e28..000000000
--- a/.kokoro/docs/docs-presubmit.cfg
+++ /dev/null
@@ -1,28 +0,0 @@
-# Format: //devtools/kokoro/config/proto/build.proto
-
-env_vars: {
- key: "STAGING_BUCKET"
- value: "gcloud-python-test"
-}
-
-env_vars: {
- key: "V2_STAGING_BUCKET"
- value: "gcloud-python-test"
-}
-
-# We only upload the image in the main `docs` build.
-env_vars: {
- key: "TRAMPOLINE_IMAGE_UPLOAD"
- value: "false"
-}
-
-env_vars: {
- key: "TRAMPOLINE_BUILD_FILE"
- value: "github/python-bigquery/.kokoro/build.sh"
-}
-
-# Only run this nox session.
-env_vars: {
- key: "NOX_SESSION"
- value: "docs docfx"
-}
diff --git a/.kokoro/docs/docs.cfg b/.kokoro/docs/docs.cfg
deleted file mode 100644
index 8f43917d9..000000000
--- a/.kokoro/docs/docs.cfg
+++ /dev/null
@@ -1 +0,0 @@
-# Format: //devtools/kokoro/config/proto/build.proto
\ No newline at end of file
diff --git a/.kokoro/populate-secrets.sh b/.kokoro/populate-secrets.sh
index f52514257..c435402f4 100755
--- a/.kokoro/populate-secrets.sh
+++ b/.kokoro/populate-secrets.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright 2020 Google LLC.
+# Copyright 2024 Google LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/.kokoro/presubmit/linting-typing.cfg b/.kokoro/presubmit/linting-typing.cfg
new file mode 100644
index 000000000..b1a7406c2
--- /dev/null
+++ b/.kokoro/presubmit/linting-typing.cfg
@@ -0,0 +1,7 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Only run these nox sessions.
+env_vars: {
+ key: "NOX_SESSION"
+ value: "lint lint_setup_py blacken mypy mypy_samples pytype"
+}
diff --git a/.kokoro/presubmit/prerelease-deps-3.10.cfg b/.kokoro/presubmit/prerelease-deps-3.10.cfg
deleted file mode 100644
index 339980bdd..000000000
--- a/.kokoro/presubmit/prerelease-deps-3.10.cfg
+++ /dev/null
@@ -1,7 +0,0 @@
-# Format: //devtools/kokoro/config/proto/build.proto
-
-# Only run this nox session.
-env_vars: {
- key: "NOX_SESSION"
- value: "prerelease_deps-3.10"
-}
diff --git a/.kokoro/presubmit/prerelease-deps-3.8.cfg b/.kokoro/presubmit/prerelease-deps-3.8.cfg
deleted file mode 100644
index fabe3e347..000000000
--- a/.kokoro/presubmit/prerelease-deps-3.8.cfg
+++ /dev/null
@@ -1,7 +0,0 @@
-# Format: //devtools/kokoro/config/proto/build.proto
-
-# Only run this nox session.
-env_vars: {
- key: "NOX_SESSION"
- value: "prerelease_deps-3.8"
-}
diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg
deleted file mode 100644
index 17d071cae..000000000
--- a/.kokoro/presubmit/presubmit.cfg
+++ /dev/null
@@ -1,11 +0,0 @@
-# Format: //devtools/kokoro/config/proto/build.proto
-
-# Disable system tests.
-env_vars: {
- key: "RUN_SYSTEM_TESTS"
- value: "false"
-}
-env_vars: {
- key: "RUN_SNIPPETS_TESTS"
- value: "false"
-}
diff --git a/.kokoro/presubmit/system-3.10.cfg b/.kokoro/presubmit/snippets-3.13.cfg
similarity index 81%
rename from .kokoro/presubmit/system-3.10.cfg
rename to .kokoro/presubmit/snippets-3.13.cfg
index 30956a3ab..0b89f0863 100644
--- a/.kokoro/presubmit/system-3.10.cfg
+++ b/.kokoro/presubmit/snippets-3.13.cfg
@@ -3,5 +3,5 @@
# Only run this nox session.
env_vars: {
key: "NOX_SESSION"
- value: "system-3.10"
+ value: "snippets-3.13"
}
diff --git a/.kokoro/presubmit/snippets-3.8.cfg b/.kokoro/presubmit/snippets-3.9.cfg
similarity index 82%
rename from .kokoro/presubmit/snippets-3.8.cfg
rename to .kokoro/presubmit/snippets-3.9.cfg
index 840d9e716..d1de209a2 100644
--- a/.kokoro/presubmit/snippets-3.8.cfg
+++ b/.kokoro/presubmit/snippets-3.9.cfg
@@ -3,5 +3,5 @@
# Only run this nox session.
env_vars: {
key: "NOX_SESSION"
- value: "snippets-3.8"
+ value: "snippets-3.9"
}
diff --git a/.kokoro/presubmit/snippets-3.10.cfg b/.kokoro/presubmit/system-3.13.cfg
similarity index 80%
rename from .kokoro/presubmit/snippets-3.10.cfg
rename to .kokoro/presubmit/system-3.13.cfg
index dde182fb9..a0e9a0108 100644
--- a/.kokoro/presubmit/snippets-3.10.cfg
+++ b/.kokoro/presubmit/system-3.13.cfg
@@ -3,5 +3,5 @@
# Only run this nox session.
env_vars: {
key: "NOX_SESSION"
- value: "snippets-3.10"
-}
+ value: "system-3.13"
+}
\ No newline at end of file
diff --git a/.kokoro/presubmit/system-3.8.cfg b/.kokoro/presubmit/system-3.9.cfg
similarity index 83%
rename from .kokoro/presubmit/system-3.8.cfg
rename to .kokoro/presubmit/system-3.9.cfg
index f4bcee3db..b8ae66b37 100644
--- a/.kokoro/presubmit/system-3.8.cfg
+++ b/.kokoro/presubmit/system-3.9.cfg
@@ -3,5 +3,5 @@
# Only run this nox session.
env_vars: {
key: "NOX_SESSION"
- value: "system-3.8"
+ value: "system-3.9"
}
\ No newline at end of file
diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh
deleted file mode 100755
index 1c4d62370..000000000
--- a/.kokoro/publish-docs.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -eo pipefail
-
-# Disable buffering, so that the logs stream through.
-export PYTHONUNBUFFERED=1
-
-export PATH="${HOME}/.local/bin:${PATH}"
-
-# Install nox
-python3 -m pip install --require-hashes -r .kokoro/requirements.txt
-python3 -m nox --version
-
-# build docs
-nox -s docs
-
-# create metadata
-python3 -m docuploader create-metadata \
- --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \
- --version=$(python3 setup.py --version) \
- --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \
- --distribution-name=$(python3 setup.py --name) \
- --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \
- --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \
- --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json)
-
-cat docs.metadata
-
-# upload docs
-python3 -m docuploader upload docs/_build/html --metadata-file docs.metadata --staging-bucket "${STAGING_BUCKET}"
-
-
-# docfx yaml files
-nox -s docfx
-
-# create metadata.
-python3 -m docuploader create-metadata \
- --name=$(jq --raw-output '.name // empty' .repo-metadata.json) \
- --version=$(python3 setup.py --version) \
- --language=$(jq --raw-output '.language // empty' .repo-metadata.json) \
- --distribution-name=$(python3 setup.py --name) \
- --product-page=$(jq --raw-output '.product_documentation // empty' .repo-metadata.json) \
- --github-repository=$(jq --raw-output '.repo // empty' .repo-metadata.json) \
- --issue-tracker=$(jq --raw-output '.issue_tracker // empty' .repo-metadata.json)
-
-cat docs.metadata
-
-# upload docs
-python3 -m docuploader upload docs/_build/html/docfx_yaml --metadata-file docs.metadata --destination-prefix docfx --staging-bucket "${V2_STAGING_BUCKET}"
diff --git a/.kokoro/release.sh b/.kokoro/release.sh
deleted file mode 100755
index 879f9ef84..000000000
--- a/.kokoro/release.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-#!/bin/bash
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# https://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -eo pipefail
-
-# Start the releasetool reporter
-python3 -m pip install --require-hashes -r .kokoro/requirements.txt
-python3 -m releasetool publish-reporter-script > /tmp/publisher-script; source /tmp/publisher-script
-
-# Disable buffering, so that the logs stream through.
-export PYTHONUNBUFFERED=1
-
-# Move into the package, build the distribution and upload.
-TWINE_PASSWORD=$(cat "${KOKORO_KEYSTORE_DIR}/73713_google-cloud-pypi-token-keystore-1")
-cd github/python-bigquery
-python3 setup.py sdist bdist_wheel
-twine upload --username __token__ --password "${TWINE_PASSWORD}" dist/*
diff --git a/.kokoro/release/common.cfg b/.kokoro/release/common.cfg
deleted file mode 100644
index 6ae81b743..000000000
--- a/.kokoro/release/common.cfg
+++ /dev/null
@@ -1,40 +0,0 @@
-# Format: //devtools/kokoro/config/proto/build.proto
-
-# Build logs will be here
-action {
- define_artifacts {
- regex: "**/*sponge_log.xml"
- }
-}
-
-# Download trampoline resources.
-gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
-
-# Use the trampoline script to run in docker.
-build_file: "python-bigquery/.kokoro/trampoline.sh"
-
-# Configure the docker image for kokoro-trampoline.
-env_vars: {
- key: "TRAMPOLINE_IMAGE"
- value: "gcr.io/cloud-devrel-kokoro-resources/python-multi"
-}
-env_vars: {
- key: "TRAMPOLINE_BUILD_FILE"
- value: "github/python-bigquery/.kokoro/release.sh"
-}
-
-# Fetch PyPI password
-before_action {
- fetch_keystore {
- keystore_resource {
- keystore_config_id: 73713
- keyname: "google-cloud-pypi-token-keystore-1"
- }
- }
-}
-
-# Tokens needed to report release status back to GitHub
-env_vars: {
- key: "SECRET_MANAGER_KEYS"
- value: "releasetool-publish-reporter-app,releasetool-publish-reporter-googleapis-installation,releasetool-publish-reporter-pem"
-}
diff --git a/.kokoro/release/release.cfg b/.kokoro/release/release.cfg
deleted file mode 100644
index 8f43917d9..000000000
--- a/.kokoro/release/release.cfg
+++ /dev/null
@@ -1 +0,0 @@
-# Format: //devtools/kokoro/config/proto/build.proto
\ No newline at end of file
diff --git a/.kokoro/requirements.in b/.kokoro/requirements.in
deleted file mode 100644
index 7718391a3..000000000
--- a/.kokoro/requirements.in
+++ /dev/null
@@ -1,8 +0,0 @@
-gcp-docuploader
-gcp-releasetool
-importlib-metadata
-typing-extensions
-twine
-wheel
-setuptools
-nox
\ No newline at end of file
diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt
deleted file mode 100644
index c4b824f24..000000000
--- a/.kokoro/requirements.txt
+++ /dev/null
@@ -1,464 +0,0 @@
-#
-# This file is autogenerated by pip-compile with python 3.10
-# To update, run:
-#
-# pip-compile --allow-unsafe --generate-hashes requirements.in
-#
-argcomplete==2.0.0 \
- --hash=sha256:6372ad78c89d662035101418ae253668445b391755cfe94ea52f1b9d22425b20 \
- --hash=sha256:cffa11ea77999bb0dd27bb25ff6dc142a6796142f68d45b1a26b11f58724561e
- # via nox
-attrs==22.1.0 \
- --hash=sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6 \
- --hash=sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c
- # via gcp-releasetool
-bleach==5.0.1 \
- --hash=sha256:085f7f33c15bd408dd9b17a4ad77c577db66d76203e5984b1bd59baeee948b2a \
- --hash=sha256:0d03255c47eb9bd2f26aa9bb7f2107732e7e8fe195ca2f64709fcf3b0a4a085c
- # via readme-renderer
-cachetools==5.2.0 \
- --hash=sha256:6a94c6402995a99c3970cc7e4884bb60b4a8639938157eeed436098bf9831757 \
- --hash=sha256:f9f17d2aec496a9aa6b76f53e3b614c965223c061982d434d160f930c698a9db
- # via google-auth
-certifi==2022.6.15 \
- --hash=sha256:84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d \
- --hash=sha256:fe86415d55e84719d75f8b69414f6438ac3547d2078ab91b67e779ef69378412
- # via requests
-cffi==1.15.1 \
- --hash=sha256:00a9ed42e88df81ffae7a8ab6d9356b371399b91dbdf0c3cb1e84c03a13aceb5 \
- --hash=sha256:03425bdae262c76aad70202debd780501fabeaca237cdfddc008987c0e0f59ef \
- --hash=sha256:04ed324bda3cda42b9b695d51bb7d54b680b9719cfab04227cdd1e04e5de3104 \
- --hash=sha256:0e2642fe3142e4cc4af0799748233ad6da94c62a8bec3a6648bf8ee68b1c7426 \
- --hash=sha256:173379135477dc8cac4bc58f45db08ab45d228b3363adb7af79436135d028405 \
- --hash=sha256:198caafb44239b60e252492445da556afafc7d1e3ab7a1fb3f0584ef6d742375 \
- --hash=sha256:1e74c6b51a9ed6589199c787bf5f9875612ca4a8a0785fb2d4a84429badaf22a \
- --hash=sha256:2012c72d854c2d03e45d06ae57f40d78e5770d252f195b93f581acf3ba44496e \
- --hash=sha256:21157295583fe8943475029ed5abdcf71eb3911894724e360acff1d61c1d54bc \
- --hash=sha256:2470043b93ff09bf8fb1d46d1cb756ce6132c54826661a32d4e4d132e1977adf \
- --hash=sha256:285d29981935eb726a4399badae8f0ffdff4f5050eaa6d0cfc3f64b857b77185 \
- --hash=sha256:30d78fbc8ebf9c92c9b7823ee18eb92f2e6ef79b45ac84db507f52fbe3ec4497 \
- --hash=sha256:320dab6e7cb2eacdf0e658569d2575c4dad258c0fcc794f46215e1e39f90f2c3 \
- --hash=sha256:33ab79603146aace82c2427da5ca6e58f2b3f2fb5da893ceac0c42218a40be35 \
- --hash=sha256:3548db281cd7d2561c9ad9984681c95f7b0e38881201e157833a2342c30d5e8c \
- --hash=sha256:3799aecf2e17cf585d977b780ce79ff0dc9b78d799fc694221ce814c2c19db83 \
- --hash=sha256:39d39875251ca8f612b6f33e6b1195af86d1b3e60086068be9cc053aa4376e21 \
- --hash=sha256:3b926aa83d1edb5aa5b427b4053dc420ec295a08e40911296b9eb1b6170f6cca \
- --hash=sha256:3bcde07039e586f91b45c88f8583ea7cf7a0770df3a1649627bf598332cb6984 \
- --hash=sha256:3d08afd128ddaa624a48cf2b859afef385b720bb4b43df214f85616922e6a5ac \
- --hash=sha256:3eb6971dcff08619f8d91607cfc726518b6fa2a9eba42856be181c6d0d9515fd \
- --hash=sha256:40f4774f5a9d4f5e344f31a32b5096977b5d48560c5592e2f3d2c4374bd543ee \
- --hash=sha256:4289fc34b2f5316fbb762d75362931e351941fa95fa18789191b33fc4cf9504a \
- --hash=sha256:470c103ae716238bbe698d67ad020e1db9d9dba34fa5a899b5e21577e6d52ed2 \
- --hash=sha256:4f2c9f67e9821cad2e5f480bc8d83b8742896f1242dba247911072d4fa94c192 \
- --hash=sha256:50a74364d85fd319352182ef59c5c790484a336f6db772c1a9231f1c3ed0cbd7 \
- --hash=sha256:54a2db7b78338edd780e7ef7f9f6c442500fb0d41a5a4ea24fff1c929d5af585 \
- --hash=sha256:5635bd9cb9731e6d4a1132a498dd34f764034a8ce60cef4f5319c0541159392f \
- --hash=sha256:59c0b02d0a6c384d453fece7566d1c7e6b7bae4fc5874ef2ef46d56776d61c9e \
- --hash=sha256:5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27 \
- --hash=sha256:5df2768244d19ab7f60546d0c7c63ce1581f7af8b5de3eb3004b9b6fc8a9f84b \
- --hash=sha256:5ef34d190326c3b1f822a5b7a45f6c4535e2f47ed06fec77d3d799c450b2651e \
- --hash=sha256:6975a3fac6bc83c4a65c9f9fcab9e47019a11d3d2cf7f3c0d03431bf145a941e \
- --hash=sha256:6c9a799e985904922a4d207a94eae35c78ebae90e128f0c4e521ce339396be9d \
- --hash=sha256:70df4e3b545a17496c9b3f41f5115e69a4f2e77e94e1d2a8e1070bc0c38c8a3c \
- --hash=sha256:7473e861101c9e72452f9bf8acb984947aa1661a7704553a9f6e4baa5ba64415 \
- --hash=sha256:8102eaf27e1e448db915d08afa8b41d6c7ca7a04b7d73af6514df10a3e74bd82 \
- --hash=sha256:87c450779d0914f2861b8526e035c5e6da0a3199d8f1add1a665e1cbc6fc6d02 \
- --hash=sha256:8b7ee99e510d7b66cdb6c593f21c043c248537a32e0bedf02e01e9553a172314 \
- --hash=sha256:91fc98adde3d7881af9b59ed0294046f3806221863722ba7d8d120c575314325 \
- --hash=sha256:94411f22c3985acaec6f83c6df553f2dbe17b698cc7f8ae751ff2237d96b9e3c \
- --hash=sha256:98d85c6a2bef81588d9227dde12db8a7f47f639f4a17c9ae08e773aa9c697bf3 \
- --hash=sha256:9ad5db27f9cabae298d151c85cf2bad1d359a1b9c686a275df03385758e2f914 \
- --hash=sha256:a0b71b1b8fbf2b96e41c4d990244165e2c9be83d54962a9a1d118fd8657d2045 \
- --hash=sha256:a0f100c8912c114ff53e1202d0078b425bee3649ae34d7b070e9697f93c5d52d \
- --hash=sha256:a591fe9e525846e4d154205572a029f653ada1a78b93697f3b5a8f1f2bc055b9 \
- --hash=sha256:a5c84c68147988265e60416b57fc83425a78058853509c1b0629c180094904a5 \
- --hash=sha256:a66d3508133af6e8548451b25058d5812812ec3798c886bf38ed24a98216fab2 \
- --hash=sha256:a8c4917bd7ad33e8eb21e9a5bbba979b49d9a97acb3a803092cbc1133e20343c \
- --hash=sha256:b3bbeb01c2b273cca1e1e0c5df57f12dce9a4dd331b4fa1635b8bec26350bde3 \
- --hash=sha256:cba9d6b9a7d64d4bd46167096fc9d2f835e25d7e4c121fb2ddfc6528fb0413b2 \
- --hash=sha256:cc4d65aeeaa04136a12677d3dd0b1c0c94dc43abac5860ab33cceb42b801c1e8 \
- --hash=sha256:ce4bcc037df4fc5e3d184794f27bdaab018943698f4ca31630bc7f84a7b69c6d \
- --hash=sha256:cec7d9412a9102bdc577382c3929b337320c4c4c4849f2c5cdd14d7368c5562d \
- --hash=sha256:d400bfb9a37b1351253cb402671cea7e89bdecc294e8016a707f6d1d8ac934f9 \
- --hash=sha256:d61f4695e6c866a23a21acab0509af1cdfd2c013cf256bbf5b6b5e2695827162 \
- --hash=sha256:db0fbb9c62743ce59a9ff687eb5f4afbe77e5e8403d6697f7446e5f609976f76 \
- --hash=sha256:dd86c085fae2efd48ac91dd7ccffcfc0571387fe1193d33b6394db7ef31fe2a4 \
- --hash=sha256:e00b098126fd45523dd056d2efba6c5a63b71ffe9f2bbe1a4fe1716e1d0c331e \
- --hash=sha256:e229a521186c75c8ad9490854fd8bbdd9a0c9aa3a524326b55be83b54d4e0ad9 \
- --hash=sha256:e263d77ee3dd201c3a142934a086a4450861778baaeeb45db4591ef65550b0a6 \
- --hash=sha256:ed9cb427ba5504c1dc15ede7d516b84757c3e3d7868ccc85121d9310d27eed0b \
- --hash=sha256:fa6693661a4c91757f4412306191b6dc88c1703f780c8234035eac011922bc01 \
- --hash=sha256:fcd131dd944808b5bdb38e6f5b53013c5aa4f334c5cad0c72742f6eba4b73db0
- # via cryptography
-charset-normalizer==2.1.1 \
- --hash=sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845 \
- --hash=sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f
- # via requests
-click==8.0.4 \
- --hash=sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1 \
- --hash=sha256:8458d7b1287c5fb128c90e23381cf99dcde74beaf6c7ff6384ce84d6fe090adb
- # via
- # gcp-docuploader
- # gcp-releasetool
-colorlog==6.6.0 \
- --hash=sha256:344f73204009e4c83c5b6beb00b3c45dc70fcdae3c80db919e0a4171d006fde8 \
- --hash=sha256:351c51e866c86c3217f08e4b067a7974a678be78f07f85fc2d55b8babde6d94e
- # via
- # gcp-docuploader
- # nox
-commonmark==0.9.1 \
- --hash=sha256:452f9dc859be7f06631ddcb328b6919c67984aca654e5fefb3914d54691aed60 \
- --hash=sha256:da2f38c92590f83de410ba1a3cbceafbc74fee9def35f9251ba9a971d6d66fd9
- # via rich
-cryptography==37.0.4 \
- --hash=sha256:190f82f3e87033821828f60787cfa42bff98404483577b591429ed99bed39d59 \
- --hash=sha256:2be53f9f5505673eeda5f2736bea736c40f051a739bfae2f92d18aed1eb54596 \
- --hash=sha256:30788e070800fec9bbcf9faa71ea6d8068f5136f60029759fd8c3efec3c9dcb3 \
- --hash=sha256:3d41b965b3380f10e4611dbae366f6dc3cefc7c9ac4e8842a806b9672ae9add5 \
- --hash=sha256:4c590ec31550a724ef893c50f9a97a0c14e9c851c85621c5650d699a7b88f7ab \
- --hash=sha256:549153378611c0cca1042f20fd9c5030d37a72f634c9326e225c9f666d472884 \
- --hash=sha256:63f9c17c0e2474ccbebc9302ce2f07b55b3b3fcb211ded18a42d5764f5c10a82 \
- --hash=sha256:6bc95ed67b6741b2607298f9ea4932ff157e570ef456ef7ff0ef4884a134cc4b \
- --hash=sha256:7099a8d55cd49b737ffc99c17de504f2257e3787e02abe6d1a6d136574873441 \
- --hash=sha256:75976c217f10d48a8b5a8de3d70c454c249e4b91851f6838a4e48b8f41eb71aa \
- --hash=sha256:7bc997818309f56c0038a33b8da5c0bfbb3f1f067f315f9abd6fc07ad359398d \
- --hash=sha256:80f49023dd13ba35f7c34072fa17f604d2f19bf0989f292cedf7ab5770b87a0b \
- --hash=sha256:91ce48d35f4e3d3f1d83e29ef4a9267246e6a3be51864a5b7d2247d5086fa99a \
- --hash=sha256:a958c52505c8adf0d3822703078580d2c0456dd1d27fabfb6f76fe63d2971cd6 \
- --hash=sha256:b62439d7cd1222f3da897e9a9fe53bbf5c104fff4d60893ad1355d4c14a24157 \
- --hash=sha256:b7f8dd0d4c1f21759695c05a5ec8536c12f31611541f8904083f3dc582604280 \
- --hash=sha256:d204833f3c8a33bbe11eda63a54b1aad7aa7456ed769a982f21ec599ba5fa282 \
- --hash=sha256:e007f052ed10cc316df59bc90fbb7ff7950d7e2919c9757fd42a2b8ecf8a5f67 \
- --hash=sha256:f2dcb0b3b63afb6df7fd94ec6fbddac81b5492513f7b0436210d390c14d46ee8 \
- --hash=sha256:f721d1885ecae9078c3f6bbe8a88bc0786b6e749bf32ccec1ef2b18929a05046 \
- --hash=sha256:f7a6de3e98771e183645181b3627e2563dcde3ce94a9e42a3f427d2255190327 \
- --hash=sha256:f8c0a6e9e1dd3eb0414ba320f85da6b0dcbd543126e30fcc546e7372a7fbf3b9
- # via
- # gcp-releasetool
- # secretstorage
-distlib==0.3.5 \
- --hash=sha256:a7f75737c70be3b25e2bee06288cec4e4c221de18455b2dd037fe2a795cab2fe \
- --hash=sha256:b710088c59f06338ca514800ad795a132da19fda270e3ce4affc74abf955a26c
- # via virtualenv
-docutils==0.19 \
- --hash=sha256:33995a6753c30b7f577febfc2c50411fec6aac7f7ffeb7c4cfe5991072dcf9e6 \
- --hash=sha256:5e1de4d849fee02c63b040a4a3fd567f4ab104defd8a5511fbbc24a8a017efbc
- # via readme-renderer
-filelock==3.8.0 \
- --hash=sha256:55447caa666f2198c5b6b13a26d2084d26fa5b115c00d065664b2124680c4edc \
- --hash=sha256:617eb4e5eedc82fc5f47b6d61e4d11cb837c56cb4544e39081099fa17ad109d4
- # via virtualenv
-gcp-docuploader==0.6.3 \
- --hash=sha256:ba8c9d76b3bbac54b0311c503a373b00edc2dc02d6d54ea9507045adb8e870f7 \
- --hash=sha256:c0f5aaa82ce1854a386197e4e359b120ad6d4e57ae2c812fce42219a3288026b
- # via -r requirements.in
-gcp-releasetool==1.8.6 \
- --hash=sha256:42e51ab8e2e789bc8e22a03c09352962cd3452951c801a2230d564816630304a \
- --hash=sha256:a3518b79d1b243c494eac392a01c7fd65187fd6d52602dcab9b529bc934d4da1
- # via -r requirements.in
-google-api-core==2.8.2 \
- --hash=sha256:06f7244c640322b508b125903bb5701bebabce8832f85aba9335ec00b3d02edc \
- --hash=sha256:93c6a91ccac79079ac6bbf8b74ee75db970cc899278b97d53bc012f35908cf50
- # via
- # google-cloud-core
- # google-cloud-storage
-google-auth==2.11.0 \
- --hash=sha256:be62acaae38d0049c21ca90f27a23847245c9f161ff54ede13af2cb6afecbac9 \
- --hash=sha256:ed65ecf9f681832298e29328e1ef0a3676e3732b2e56f41532d45f70a22de0fb
- # via
- # gcp-releasetool
- # google-api-core
- # google-cloud-core
- # google-cloud-storage
-google-cloud-core==2.3.2 \
- --hash=sha256:8417acf6466be2fa85123441696c4badda48db314c607cf1e5d543fa8bdc22fe \
- --hash=sha256:b9529ee7047fd8d4bf4a2182de619154240df17fbe60ead399078c1ae152af9a
- # via google-cloud-storage
-google-cloud-storage==2.5.0 \
- --hash=sha256:19a26c66c317ce542cea0830b7e787e8dac2588b6bfa4d3fd3b871ba16305ab0 \
- --hash=sha256:382f34b91de2212e3c2e7b40ec079d27ee2e3dbbae99b75b1bcd8c63063ce235
- # via gcp-docuploader
-google-crc32c==1.3.0 \
- --hash=sha256:04e7c220798a72fd0f08242bc8d7a05986b2a08a0573396187fd32c1dcdd58b3 \
- --hash=sha256:05340b60bf05b574159e9bd940152a47d38af3fb43803ffe71f11d704b7696a6 \
- --hash=sha256:12674a4c3b56b706153a358eaa1018c4137a5a04635b92b4652440d3d7386206 \
- --hash=sha256:127f9cc3ac41b6a859bd9dc4321097b1a4f6aa7fdf71b4f9227b9e3ebffb4422 \
- --hash=sha256:13af315c3a0eec8bb8b8d80b8b128cb3fcd17d7e4edafc39647846345a3f003a \
- --hash=sha256:1926fd8de0acb9d15ee757175ce7242e235482a783cd4ec711cc999fc103c24e \
- --hash=sha256:226f2f9b8e128a6ca6a9af9b9e8384f7b53a801907425c9a292553a3a7218ce0 \
- --hash=sha256:276de6273eb074a35bc598f8efbc00c7869c5cf2e29c90748fccc8c898c244df \
- --hash=sha256:318f73f5484b5671f0c7f5f63741ab020a599504ed81d209b5c7129ee4667407 \
- --hash=sha256:3bbce1be3687bbfebe29abdb7631b83e6b25da3f4e1856a1611eb21854b689ea \
- --hash=sha256:42ae4781333e331a1743445931b08ebdad73e188fd554259e772556fc4937c48 \
- --hash=sha256:58be56ae0529c664cc04a9c76e68bb92b091e0194d6e3c50bea7e0f266f73713 \
- --hash=sha256:5da2c81575cc3ccf05d9830f9e8d3c70954819ca9a63828210498c0774fda1a3 \
- --hash=sha256:6311853aa2bba4064d0c28ca54e7b50c4d48e3de04f6770f6c60ebda1e975267 \
- --hash=sha256:650e2917660e696041ab3dcd7abac160b4121cd9a484c08406f24c5964099829 \
- --hash=sha256:6a4db36f9721fdf391646685ecffa404eb986cbe007a3289499020daf72e88a2 \
- --hash=sha256:779cbf1ce375b96111db98fca913c1f5ec11b1d870e529b1dc7354b2681a8c3a \
- --hash=sha256:7f6fe42536d9dcd3e2ffb9d3053f5d05221ae3bbcefbe472bdf2c71c793e3183 \
- --hash=sha256:891f712ce54e0d631370e1f4997b3f182f3368179198efc30d477c75d1f44942 \
- --hash=sha256:95c68a4b9b7828ba0428f8f7e3109c5d476ca44996ed9a5f8aac6269296e2d59 \
- --hash=sha256:96a8918a78d5d64e07c8ea4ed2bc44354e3f93f46a4866a40e8db934e4c0d74b \
- --hash=sha256:9c3cf890c3c0ecfe1510a452a165431b5831e24160c5fcf2071f0f85ca5a47cd \
- --hash=sha256:9f58099ad7affc0754ae42e6d87443299f15d739b0ce03c76f515153a5cda06c \
- --hash=sha256:a0b9e622c3b2b8d0ce32f77eba617ab0d6768b82836391e4f8f9e2074582bf02 \
- --hash=sha256:a7f9cbea4245ee36190f85fe1814e2d7b1e5f2186381b082f5d59f99b7f11328 \
- --hash=sha256:bab4aebd525218bab4ee615786c4581952eadc16b1ff031813a2fd51f0cc7b08 \
- --hash=sha256:c124b8c8779bf2d35d9b721e52d4adb41c9bfbde45e6a3f25f0820caa9aba73f \
- --hash=sha256:c9da0a39b53d2fab3e5467329ed50e951eb91386e9d0d5b12daf593973c3b168 \
- --hash=sha256:ca60076c388728d3b6ac3846842474f4250c91efbfe5afa872d3ffd69dd4b318 \
- --hash=sha256:cb6994fff247987c66a8a4e550ef374671c2b82e3c0d2115e689d21e511a652d \
- --hash=sha256:d1c1d6236feab51200272d79b3d3e0f12cf2cbb12b208c835b175a21efdb0a73 \
- --hash=sha256:dd7760a88a8d3d705ff562aa93f8445ead54f58fd482e4f9e2bafb7e177375d4 \
- --hash=sha256:dda4d8a3bb0b50f540f6ff4b6033f3a74e8bf0bd5320b70fab2c03e512a62812 \
- --hash=sha256:e0f1ff55dde0ebcfbef027edc21f71c205845585fffe30d4ec4979416613e9b3 \
- --hash=sha256:e7a539b9be7b9c00f11ef16b55486141bc2cdb0c54762f84e3c6fc091917436d \
- --hash=sha256:eb0b14523758e37802f27b7f8cd973f5f3d33be7613952c0df904b68c4842f0e \
- --hash=sha256:ed447680ff21c14aaceb6a9f99a5f639f583ccfe4ce1a5e1d48eb41c3d6b3217 \
- --hash=sha256:f52a4ad2568314ee713715b1e2d79ab55fab11e8b304fd1462ff5cccf4264b3e \
- --hash=sha256:fbd60c6aaa07c31d7754edbc2334aef50601b7f1ada67a96eb1eb57c7c72378f \
- --hash=sha256:fc28e0db232c62ca0c3600884933178f0825c99be4474cdd645e378a10588125 \
- --hash=sha256:fe31de3002e7b08eb20823b3735b97c86c5926dd0581c7710a680b418a8709d4 \
- --hash=sha256:fec221a051150eeddfdfcff162e6db92c65ecf46cb0f7bb1bf812a1520ec026b \
- --hash=sha256:ff71073ebf0e42258a42a0b34f2c09ec384977e7f6808999102eedd5b49920e3
- # via google-resumable-media
-google-resumable-media==2.3.3 \
- --hash=sha256:27c52620bd364d1c8116eaac4ea2afcbfb81ae9139fb3199652fcac1724bfb6c \
- --hash=sha256:5b52774ea7a829a8cdaa8bd2d4c3d4bc660c91b30857ab2668d0eb830f4ea8c5
- # via google-cloud-storage
-googleapis-common-protos==1.56.4 \
- --hash=sha256:8eb2cbc91b69feaf23e32452a7ae60e791e09967d81d4fcc7fc388182d1bd394 \
- --hash=sha256:c25873c47279387cfdcbdafa36149887901d36202cb645a0e4f29686bf6e4417
- # via google-api-core
-idna==3.3 \
- --hash=sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff \
- --hash=sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d
- # via requests
-importlib-metadata==4.12.0 \
- --hash=sha256:637245b8bab2b6502fcbc752cc4b7a6f6243bb02b31c5c26156ad103d3d45670 \
- --hash=sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23
- # via
- # -r requirements.in
- # twine
-jeepney==0.8.0 \
- --hash=sha256:5efe48d255973902f6badc3ce55e2aa6c5c3b3bc642059ef3a91247bcfcc5806 \
- --hash=sha256:c0a454ad016ca575060802ee4d590dd912e35c122fa04e70306de3d076cce755
- # via
- # keyring
- # secretstorage
-jinja2==3.1.2 \
- --hash=sha256:31351a702a408a9e7595a8fc6150fc3f43bb6bf7e319770cbc0db9df9437e852 \
- --hash=sha256:6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61
- # via gcp-releasetool
-keyring==23.8.2 \
- --hash=sha256:0d9973f8891850f1ade5f26aafd06bb16865fbbae3fc56b0defb6a14a2624003 \
- --hash=sha256:10d2a8639663fe2090705a00b8c47c687cacdf97598ea9c11456679fa974473a
- # via
- # gcp-releasetool
- # twine
-markupsafe==2.1.1 \
- --hash=sha256:0212a68688482dc52b2d45013df70d169f542b7394fc744c02a57374a4207003 \
- --hash=sha256:089cf3dbf0cd6c100f02945abeb18484bd1ee57a079aefd52cffd17fba910b88 \
- --hash=sha256:10c1bfff05d95783da83491be968e8fe789263689c02724e0c691933c52994f5 \
- --hash=sha256:33b74d289bd2f5e527beadcaa3f401e0df0a89927c1559c8566c066fa4248ab7 \
- --hash=sha256:3799351e2336dc91ea70b034983ee71cf2f9533cdff7c14c90ea126bfd95d65a \
- --hash=sha256:3ce11ee3f23f79dbd06fb3d63e2f6af7b12db1d46932fe7bd8afa259a5996603 \
- --hash=sha256:421be9fbf0ffe9ffd7a378aafebbf6f4602d564d34be190fc19a193232fd12b1 \
- --hash=sha256:43093fb83d8343aac0b1baa75516da6092f58f41200907ef92448ecab8825135 \
- --hash=sha256:46d00d6cfecdde84d40e572d63735ef81423ad31184100411e6e3388d405e247 \
- --hash=sha256:4a33dea2b688b3190ee12bd7cfa29d39c9ed176bda40bfa11099a3ce5d3a7ac6 \
- --hash=sha256:4b9fe39a2ccc108a4accc2676e77da025ce383c108593d65cc909add5c3bd601 \
- --hash=sha256:56442863ed2b06d19c37f94d999035e15ee982988920e12a5b4ba29b62ad1f77 \
- --hash=sha256:671cd1187ed5e62818414afe79ed29da836dde67166a9fac6d435873c44fdd02 \
- --hash=sha256:694deca8d702d5db21ec83983ce0bb4b26a578e71fbdbd4fdcd387daa90e4d5e \
- --hash=sha256:6a074d34ee7a5ce3effbc526b7083ec9731bb3cbf921bbe1d3005d4d2bdb3a63 \
- --hash=sha256:6d0072fea50feec76a4c418096652f2c3238eaa014b2f94aeb1d56a66b41403f \
- --hash=sha256:6fbf47b5d3728c6aea2abb0589b5d30459e369baa772e0f37a0320185e87c980 \
- --hash=sha256:7f91197cc9e48f989d12e4e6fbc46495c446636dfc81b9ccf50bb0ec74b91d4b \
- --hash=sha256:86b1f75c4e7c2ac2ccdaec2b9022845dbb81880ca318bb7a0a01fbf7813e3812 \
- --hash=sha256:8dc1c72a69aa7e082593c4a203dcf94ddb74bb5c8a731e4e1eb68d031e8498ff \
- --hash=sha256:8e3dcf21f367459434c18e71b2a9532d96547aef8a871872a5bd69a715c15f96 \
- --hash=sha256:8e576a51ad59e4bfaac456023a78f6b5e6e7651dcd383bcc3e18d06f9b55d6d1 \
- --hash=sha256:96e37a3dc86e80bf81758c152fe66dbf60ed5eca3d26305edf01892257049925 \
- --hash=sha256:97a68e6ada378df82bc9f16b800ab77cbf4b2fada0081794318520138c088e4a \
- --hash=sha256:99a2a507ed3ac881b975a2976d59f38c19386d128e7a9a18b7df6fff1fd4c1d6 \
- --hash=sha256:a49907dd8420c5685cfa064a1335b6754b74541bbb3706c259c02ed65b644b3e \
- --hash=sha256:b09bf97215625a311f669476f44b8b318b075847b49316d3e28c08e41a7a573f \
- --hash=sha256:b7bd98b796e2b6553da7225aeb61f447f80a1ca64f41d83612e6139ca5213aa4 \
- --hash=sha256:b87db4360013327109564f0e591bd2a3b318547bcef31b468a92ee504d07ae4f \
- --hash=sha256:bcb3ed405ed3222f9904899563d6fc492ff75cce56cba05e32eff40e6acbeaa3 \
- --hash=sha256:d4306c36ca495956b6d568d276ac11fdd9c30a36f1b6eb928070dc5360b22e1c \
- --hash=sha256:d5ee4f386140395a2c818d149221149c54849dfcfcb9f1debfe07a8b8bd63f9a \
- --hash=sha256:dda30ba7e87fbbb7eab1ec9f58678558fd9a6b8b853530e176eabd064da81417 \
- --hash=sha256:e04e26803c9c3851c931eac40c695602c6295b8d432cbe78609649ad9bd2da8a \
- --hash=sha256:e1c0b87e09fa55a220f058d1d49d3fb8df88fbfab58558f1198e08c1e1de842a \
- --hash=sha256:e72591e9ecd94d7feb70c1cbd7be7b3ebea3f548870aa91e2732960fa4d57a37 \
- --hash=sha256:e8c843bbcda3a2f1e3c2ab25913c80a3c5376cd00c6e8c4a86a89a28c8dc5452 \
- --hash=sha256:efc1913fd2ca4f334418481c7e595c00aad186563bbc1ec76067848c7ca0a933 \
- --hash=sha256:f121a1420d4e173a5d96e47e9a0c0dcff965afdf1626d28de1460815f7c4ee7a \
- --hash=sha256:fc7b548b17d238737688817ab67deebb30e8073c95749d55538ed473130ec0c7
- # via jinja2
-nox==2022.8.7 \
- --hash=sha256:1b894940551dc5c389f9271d197ca5d655d40bdc6ccf93ed6880e4042760a34b \
- --hash=sha256:96cca88779e08282a699d672258ec01eb7c792d35bbbf538c723172bce23212c
- # via -r requirements.in
-packaging==21.3 \
- --hash=sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb \
- --hash=sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522
- # via
- # gcp-releasetool
- # nox
-pkginfo==1.8.3 \
- --hash=sha256:848865108ec99d4901b2f7e84058b6e7660aae8ae10164e015a6dcf5b242a594 \
- --hash=sha256:a84da4318dd86f870a9447a8c98340aa06216bfc6f2b7bdc4b8766984ae1867c
- # via twine
-platformdirs==2.5.2 \
- --hash=sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788 \
- --hash=sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19
- # via virtualenv
-protobuf==3.20.1 \
- --hash=sha256:06059eb6953ff01e56a25cd02cca1a9649a75a7e65397b5b9b4e929ed71d10cf \
- --hash=sha256:097c5d8a9808302fb0da7e20edf0b8d4703274d140fd25c5edabddcde43e081f \
- --hash=sha256:284f86a6207c897542d7e956eb243a36bb8f9564c1742b253462386e96c6b78f \
- --hash=sha256:32ca378605b41fd180dfe4e14d3226386d8d1b002ab31c969c366549e66a2bb7 \
- --hash=sha256:3cc797c9d15d7689ed507b165cd05913acb992d78b379f6014e013f9ecb20996 \
- --hash=sha256:62f1b5c4cd6c5402b4e2d63804ba49a327e0c386c99b1675c8a0fefda23b2067 \
- --hash=sha256:69ccfdf3657ba59569c64295b7d51325f91af586f8d5793b734260dfe2e94e2c \
- --hash=sha256:6f50601512a3d23625d8a85b1638d914a0970f17920ff39cec63aaef80a93fb7 \
- --hash=sha256:7403941f6d0992d40161aa8bb23e12575637008a5a02283a930addc0508982f9 \
- --hash=sha256:755f3aee41354ae395e104d62119cb223339a8f3276a0cd009ffabfcdd46bb0c \
- --hash=sha256:77053d28427a29987ca9caf7b72ccafee011257561259faba8dd308fda9a8739 \
- --hash=sha256:7e371f10abe57cee5021797126c93479f59fccc9693dafd6bd5633ab67808a91 \
- --hash=sha256:9016d01c91e8e625141d24ec1b20fed584703e527d28512aa8c8707f105a683c \
- --hash=sha256:9be73ad47579abc26c12024239d3540e6b765182a91dbc88e23658ab71767153 \
- --hash=sha256:adc31566d027f45efe3f44eeb5b1f329da43891634d61c75a5944e9be6dd42c9 \
- --hash=sha256:adfc6cf69c7f8c50fd24c793964eef18f0ac321315439d94945820612849c388 \
- --hash=sha256:af0ebadc74e281a517141daad9d0f2c5d93ab78e9d455113719a45a49da9db4e \
- --hash=sha256:cb29edb9eab15742d791e1025dd7b6a8f6fcb53802ad2f6e3adcb102051063ab \
- --hash=sha256:cd68be2559e2a3b84f517fb029ee611546f7812b1fdd0aa2ecc9bc6ec0e4fdde \
- --hash=sha256:cdee09140e1cd184ba9324ec1df410e7147242b94b5f8b0c64fc89e38a8ba531 \
- --hash=sha256:db977c4ca738dd9ce508557d4fce0f5aebd105e158c725beec86feb1f6bc20d8 \
- --hash=sha256:dd5789b2948ca702c17027c84c2accb552fc30f4622a98ab5c51fcfe8c50d3e7 \
- --hash=sha256:e250a42f15bf9d5b09fe1b293bdba2801cd520a9f5ea2d7fb7536d4441811d20 \
- --hash=sha256:ff8d8fa42675249bb456f5db06c00de6c2f4c27a065955917b28c4f15978b9c3
- # via
- # gcp-docuploader
- # gcp-releasetool
- # google-api-core
-py==1.11.0 \
- --hash=sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719 \
- --hash=sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378
- # via nox
-pyasn1==0.4.8 \
- --hash=sha256:39c7e2ec30515947ff4e87fb6f456dfc6e84857d34be479c9d4a4ba4bf46aa5d \
- --hash=sha256:aef77c9fb94a3ac588e87841208bdec464471d9871bd5050a287cc9a475cd0ba
- # via
- # pyasn1-modules
- # rsa
-pyasn1-modules==0.2.8 \
- --hash=sha256:905f84c712230b2c592c19470d3ca8d552de726050d1d1716282a1f6146be65e \
- --hash=sha256:a50b808ffeb97cb3601dd25981f6b016cbb3d31fbf57a8b8a87428e6158d0c74
- # via google-auth
-pycparser==2.21 \
- --hash=sha256:8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9 \
- --hash=sha256:e644fdec12f7872f86c58ff790da456218b10f863970249516d60a5eaca77206
- # via cffi
-pygments==2.13.0 \
- --hash=sha256:56a8508ae95f98e2b9bdf93a6be5ae3f7d8af858b43e02c5a2ff083726be40c1 \
- --hash=sha256:f643f331ab57ba3c9d89212ee4a2dabc6e94f117cf4eefde99a0574720d14c42
- # via
- # readme-renderer
- # rich
-pyjwt==2.4.0 \
- --hash=sha256:72d1d253f32dbd4f5c88eaf1fdc62f3a19f676ccbadb9dbc5d07e951b2b26daf \
- --hash=sha256:d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba
- # via gcp-releasetool
-pyparsing==3.0.9 \
- --hash=sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb \
- --hash=sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc
- # via packaging
-pyperclip==1.8.2 \
- --hash=sha256:105254a8b04934f0bc84e9c24eb360a591aaf6535c9def5f29d92af107a9bf57
- # via gcp-releasetool
-python-dateutil==2.8.2 \
- --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \
- --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9
- # via gcp-releasetool
-readme-renderer==37.0 \
- --hash=sha256:07b7ea234e03e58f77cc222e206e6abb8f4c0435becce5104794ee591f9301c5 \
- --hash=sha256:9fa416704703e509eeb900696751c908ddeb2011319d93700d8f18baff887a69
- # via twine
-requests==2.28.1 \
- --hash=sha256:7c5599b102feddaa661c826c56ab4fee28bfd17f5abca1ebbe3e7f19d7c97983 \
- --hash=sha256:8fefa2a1a1365bf5520aac41836fbee479da67864514bdb821f31ce07ce65349
- # via
- # gcp-releasetool
- # google-api-core
- # google-cloud-storage
- # requests-toolbelt
- # twine
-requests-toolbelt==0.9.1 \
- --hash=sha256:380606e1d10dc85c3bd47bf5a6095f815ec007be7a8b69c878507068df059e6f \
- --hash=sha256:968089d4584ad4ad7c171454f0a5c6dac23971e9472521ea3b6d49d610aa6fc0
- # via twine
-rfc3986==2.0.0 \
- --hash=sha256:50b1502b60e289cb37883f3dfd34532b8873c7de9f49bb546641ce9cbd256ebd \
- --hash=sha256:97aacf9dbd4bfd829baad6e6309fa6573aaf1be3f6fa735c8ab05e46cecb261c
- # via twine
-rich==12.5.1 \
- --hash=sha256:2eb4e6894cde1e017976d2975ac210ef515d7548bc595ba20e195fb9628acdeb \
- --hash=sha256:63a5c5ce3673d3d5fbbf23cd87e11ab84b6b451436f1b7f19ec54b6bc36ed7ca
- # via twine
-rsa==4.9 \
- --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \
- --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21
- # via google-auth
-secretstorage==3.3.3 \
- --hash=sha256:2403533ef369eca6d2ba81718576c5e0f564d5cca1b58f73a8b23e7d4eeebd77 \
- --hash=sha256:f356e6628222568e3af06f2eba8df495efa13b3b63081dafd4f7d9a7b7bc9f99
- # via keyring
-six==1.16.0 \
- --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \
- --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
- # via
- # bleach
- # gcp-docuploader
- # google-auth
- # python-dateutil
-twine==4.0.1 \
- --hash=sha256:42026c18e394eac3e06693ee52010baa5313e4811d5a11050e7d48436cf41b9e \
- --hash=sha256:96b1cf12f7ae611a4a40b6ae8e9570215daff0611828f5fe1f37a16255ab24a0
- # via -r requirements.in
-typing-extensions==4.3.0 \
- --hash=sha256:25642c956049920a5aa49edcdd6ab1e06d7e5d467fc00e0506c44ac86fbfca02 \
- --hash=sha256:e6d2677a32f47fc7eb2795db1dd15c1f34eff616bcaf2cfb5e997f854fa1c4a6
- # via -r requirements.in
-urllib3==1.26.12 \
- --hash=sha256:3fa96cf423e6987997fc326ae8df396db2a8b7c667747d47ddd8ecba91f4a74e \
- --hash=sha256:b930dd878d5a8afb066a637fbb35144fe7901e3b209d1cd4f524bd0e9deee997
- # via
- # requests
- # twine
-virtualenv==20.16.3 \
- --hash=sha256:4193b7bc8a6cd23e4eb251ac64f29b4398ab2c233531e66e40b19a6b7b0d30c1 \
- --hash=sha256:d86ea0bb50e06252d79e6c241507cb904fcd66090c3271381372d6221a3970f9
- # via nox
-webencodings==0.5.1 \
- --hash=sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78 \
- --hash=sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923
- # via bleach
-wheel==0.37.1 \
- --hash=sha256:4bdcd7d840138086126cd09254dc6195fb4fc6f01c050a1d7236f2630db1d22a \
- --hash=sha256:e9a504e793efbca1b8e0e9cb979a249cf4a0a7b5b8c9e8b65a5e39d49529c1c4
- # via -r requirements.in
-zipp==3.8.1 \
- --hash=sha256:05b45f1ee8f807d0cc928485ca40a07cb491cf092ff587c0df9cb1fd154848d2 \
- --hash=sha256:47c40d7fe183a6f21403a199b3e4192cca5774656965b0a4988ad2f8feb5f009
- # via importlib-metadata
-
-# The following packages are considered to be unsafe in a requirements file:
-setuptools==65.2.0 \
- --hash=sha256:7f4bc85450898a09f76ebf28b72fa25bc7111f6c7d665d514a60bba9c75ef2a9 \
- --hash=sha256:a3ca5857c89f82f5c9410e8508cb32f4872a3bafd4aa7ae122a24ca33bccc750
- # via -r requirements.in
diff --git a/.kokoro/samples/python3.7/common.cfg b/.kokoro/samples/python3.11/common.cfg
similarity index 93%
rename from .kokoro/samples/python3.7/common.cfg
rename to .kokoro/samples/python3.11/common.cfg
index d30dc6018..f5adc8703 100644
--- a/.kokoro/samples/python3.7/common.cfg
+++ b/.kokoro/samples/python3.11/common.cfg
@@ -10,13 +10,13 @@ action {
# Specify which tests to run
env_vars: {
key: "RUN_TESTS_SESSION"
- value: "py-3.7"
+ value: "py-3.11"
}
# Declare build specific Cloud project.
env_vars: {
key: "BUILD_SPECIFIC_GCLOUD_PROJECT"
- value: "python-docs-samples-tests-py37"
+ value: "python-docs-samples-tests-311"
}
env_vars: {
diff --git a/.kokoro/samples/python3.7/continuous.cfg b/.kokoro/samples/python3.11/continuous.cfg
similarity index 100%
rename from .kokoro/samples/python3.7/continuous.cfg
rename to .kokoro/samples/python3.11/continuous.cfg
diff --git a/.kokoro/samples/python3.7/periodic-head.cfg b/.kokoro/samples/python3.11/periodic-head.cfg
similarity index 100%
rename from .kokoro/samples/python3.7/periodic-head.cfg
rename to .kokoro/samples/python3.11/periodic-head.cfg
diff --git a/.kokoro/samples/python3.7/periodic.cfg b/.kokoro/samples/python3.11/periodic.cfg
similarity index 100%
rename from .kokoro/samples/python3.7/periodic.cfg
rename to .kokoro/samples/python3.11/periodic.cfg
diff --git a/.kokoro/samples/python3.7/presubmit.cfg b/.kokoro/samples/python3.11/presubmit.cfg
similarity index 100%
rename from .kokoro/samples/python3.7/presubmit.cfg
rename to .kokoro/samples/python3.11/presubmit.cfg
diff --git a/.kokoro/samples/python3.8/common.cfg b/.kokoro/samples/python3.12/common.cfg
similarity index 93%
rename from .kokoro/samples/python3.8/common.cfg
rename to .kokoro/samples/python3.12/common.cfg
index 46759c6d6..6eb699edd 100644
--- a/.kokoro/samples/python3.8/common.cfg
+++ b/.kokoro/samples/python3.12/common.cfg
@@ -10,13 +10,13 @@ action {
# Specify which tests to run
env_vars: {
key: "RUN_TESTS_SESSION"
- value: "py-3.8"
+ value: "py-3.12"
}
# Declare build specific Cloud project.
env_vars: {
key: "BUILD_SPECIFIC_GCLOUD_PROJECT"
- value: "python-docs-samples-tests-py38"
+ value: "python-docs-samples-tests-312"
}
env_vars: {
diff --git a/.kokoro/samples/python3.8/continuous.cfg b/.kokoro/samples/python3.12/continuous.cfg
similarity index 100%
rename from .kokoro/samples/python3.8/continuous.cfg
rename to .kokoro/samples/python3.12/continuous.cfg
diff --git a/.kokoro/samples/python3.8/periodic-head.cfg b/.kokoro/samples/python3.12/periodic-head.cfg
similarity index 100%
rename from .kokoro/samples/python3.8/periodic-head.cfg
rename to .kokoro/samples/python3.12/periodic-head.cfg
diff --git a/.kokoro/samples/python3.8/periodic.cfg b/.kokoro/samples/python3.12/periodic.cfg
similarity index 100%
rename from .kokoro/samples/python3.8/periodic.cfg
rename to .kokoro/samples/python3.12/periodic.cfg
diff --git a/.kokoro/samples/python3.8/presubmit.cfg b/.kokoro/samples/python3.12/presubmit.cfg
similarity index 100%
rename from .kokoro/samples/python3.8/presubmit.cfg
rename to .kokoro/samples/python3.12/presubmit.cfg
diff --git a/.kokoro/samples/python3.13/common.cfg b/.kokoro/samples/python3.13/common.cfg
new file mode 100644
index 000000000..ee9688995
--- /dev/null
+++ b/.kokoro/samples/python3.13/common.cfg
@@ -0,0 +1,40 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Build logs will be here
+action {
+ define_artifacts {
+ regex: "**/*sponge_log.xml"
+ }
+}
+
+# Specify which tests to run
+env_vars: {
+ key: "RUN_TESTS_SESSION"
+ value: "py-3.13"
+}
+
+# Declare build specific Cloud project.
+env_vars: {
+ key: "BUILD_SPECIFIC_GCLOUD_PROJECT"
+ value: "python-docs-samples-tests-313"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-bigquery/.kokoro/test-samples.sh"
+}
+
+# Configure the docker image for kokoro-trampoline.
+env_vars: {
+ key: "TRAMPOLINE_IMAGE"
+ value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker"
+}
+
+# Download secrets for samples
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
+
+# Download trampoline resources.
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
+
+# Use the trampoline script to run in docker.
+build_file: "python-bigquery/.kokoro/trampoline_v2.sh"
diff --git a/.kokoro/samples/python3.13/continuous.cfg b/.kokoro/samples/python3.13/continuous.cfg
new file mode 100644
index 000000000..a1c8d9759
--- /dev/null
+++ b/.kokoro/samples/python3.13/continuous.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
\ No newline at end of file
diff --git a/.kokoro/samples/python3.13/periodic-head.cfg b/.kokoro/samples/python3.13/periodic-head.cfg
new file mode 100644
index 000000000..5aa01bab5
--- /dev/null
+++ b/.kokoro/samples/python3.13/periodic-head.cfg
@@ -0,0 +1,11 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-bigquery/.kokoro/test-samples-against-head.sh"
+}
diff --git a/.kokoro/samples/python3.13/periodic.cfg b/.kokoro/samples/python3.13/periodic.cfg
new file mode 100644
index 000000000..71cd1e597
--- /dev/null
+++ b/.kokoro/samples/python3.13/periodic.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "False"
+}
diff --git a/.kokoro/samples/python3.13/presubmit.cfg b/.kokoro/samples/python3.13/presubmit.cfg
new file mode 100644
index 000000000..a1c8d9759
--- /dev/null
+++ b/.kokoro/samples/python3.13/presubmit.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
\ No newline at end of file
diff --git a/.kokoro/samples/python3.14/common.cfg b/.kokoro/samples/python3.14/common.cfg
new file mode 100644
index 000000000..d2fcee553
--- /dev/null
+++ b/.kokoro/samples/python3.14/common.cfg
@@ -0,0 +1,40 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+# Build logs will be here
+action {
+ define_artifacts {
+ regex: "**/*sponge_log.xml"
+ }
+}
+
+# Specify which tests to run
+env_vars: {
+ key: "RUN_TESTS_SESSION"
+ value: "py-3.14"
+}
+
+# Declare build specific Cloud project.
+env_vars: {
+ key: "BUILD_SPECIFIC_GCLOUD_PROJECT"
+ value: "python-docs-samples-tests-314"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-bigquery/.kokoro/test-samples.sh"
+}
+
+# Configure the docker image for kokoro-trampoline.
+env_vars: {
+ key: "TRAMPOLINE_IMAGE"
+ value: "gcr.io/cloud-devrel-kokoro-resources/python-samples-testing-docker"
+}
+
+# Download secrets for samples
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/python-docs-samples"
+
+# Download trampoline resources.
+gfile_resources: "/bigstore/cloud-devrel-kokoro-resources/trampoline"
+
+# Use the trampoline script to run in docker.
+build_file: "python-bigquery/.kokoro/trampoline_v2.sh"
diff --git a/.kokoro/samples/python3.14/continuous.cfg b/.kokoro/samples/python3.14/continuous.cfg
new file mode 100644
index 000000000..a1c8d9759
--- /dev/null
+++ b/.kokoro/samples/python3.14/continuous.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
\ No newline at end of file
diff --git a/.kokoro/samples/python3.14/periodic-head.cfg b/.kokoro/samples/python3.14/periodic-head.cfg
new file mode 100644
index 000000000..5aa01bab5
--- /dev/null
+++ b/.kokoro/samples/python3.14/periodic-head.cfg
@@ -0,0 +1,11 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
+
+env_vars: {
+ key: "TRAMPOLINE_BUILD_FILE"
+ value: "github/python-bigquery/.kokoro/test-samples-against-head.sh"
+}
diff --git a/.kokoro/samples/python3.14/periodic.cfg b/.kokoro/samples/python3.14/periodic.cfg
new file mode 100644
index 000000000..71cd1e597
--- /dev/null
+++ b/.kokoro/samples/python3.14/periodic.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "False"
+}
diff --git a/.kokoro/samples/python3.14/presubmit.cfg b/.kokoro/samples/python3.14/presubmit.cfg
new file mode 100644
index 000000000..a1c8d9759
--- /dev/null
+++ b/.kokoro/samples/python3.14/presubmit.cfg
@@ -0,0 +1,6 @@
+# Format: //devtools/kokoro/config/proto/build.proto
+
+env_vars: {
+ key: "INSTALL_LIBRARY_FROM_SOURCE"
+ value: "True"
+}
\ No newline at end of file
diff --git a/.kokoro/test-samples-against-head.sh b/.kokoro/test-samples-against-head.sh
index ba3a707b0..e9d8bd79a 100755
--- a/.kokoro/test-samples-against-head.sh
+++ b/.kokoro/test-samples-against-head.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright 2020 Google LLC
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh
index 2c6500cae..40e248822 100755
--- a/.kokoro/test-samples-impl.sh
+++ b/.kokoro/test-samples-impl.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright 2021 Google LLC
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -33,7 +33,7 @@ export PYTHONUNBUFFERED=1
env | grep KOKORO
# Install nox
-python3.9 -m pip install --upgrade --quiet nox
+python3.9 -m pip install --upgrade --quiet nox virtualenv
# Use secrets acessor service account to get secrets
if [[ -f "${KOKORO_GFILE_DIR}/secrets_viewer_service_account.json" ]]; then
diff --git a/.kokoro/test-samples.sh b/.kokoro/test-samples.sh
index 11c042d34..7933d8201 100755
--- a/.kokoro/test-samples.sh
+++ b/.kokoro/test-samples.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright 2020 Google LLC
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/.kokoro/trampoline.sh b/.kokoro/trampoline.sh
index f39236e94..48f796997 100755
--- a/.kokoro/trampoline.sh
+++ b/.kokoro/trampoline.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright 2017 Google Inc.
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/.kokoro/trampoline_v2.sh b/.kokoro/trampoline_v2.sh
index 4af6cdc26..35fa52923 100755
--- a/.kokoro/trampoline_v2.sh
+++ b/.kokoro/trampoline_v2.sh
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-# Copyright 2020 Google LLC
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/.librarian/state.yaml b/.librarian/state.yaml
new file mode 100644
index 000000000..8d67105e3
--- /dev/null
+++ b/.librarian/state.yaml
@@ -0,0 +1,11 @@
+image: us-central1-docker.pkg.dev/cloud-sdk-librarian-prod/images-prod/python-librarian-generator@sha256:c8612d3fffb3f6a32353b2d1abd16b61e87811866f7ec9d65b59b02eb452a620
+libraries:
+ - id: google-cloud-bigquery
+ version: 3.39.0
+ last_generated_commit: ""
+ apis: []
+ source_roots:
+ - .
+ preserve_regex: []
+ remove_regex: []
+ tag_format: v{version}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 46d237160..1d74695f7 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,4 +1,4 @@
-# Copyright 2021 Google LLC
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -22,10 +22,10 @@ repos:
- id: end-of-file-fixer
- id: check-yaml
- repo: https://github.com/psf/black
- rev: 22.3.0
+ rev: 23.7.0
hooks:
- id: black
-- repo: https://gitlab.com/pycqa/flake8
- rev: 3.9.2
+- repo: https://github.com/pycqa/flake8
+ rev: 6.1.0
hooks:
- id: flake8
diff --git a/.repo-metadata.json b/.repo-metadata.json
index d1be7ec4d..82a1684ca 100644
--- a/.repo-metadata.json
+++ b/.repo-metadata.json
@@ -12,7 +12,7 @@
"api_id": "bigquery.googleapis.com",
"requires_billing": false,
"default_version": "v2",
- "codeowner_team": "@googleapis/api-bigquery",
+ "codeowner_team": "@googleapis/python-core-client-libraries",
"api_shortname": "bigquery",
"api_description": "is a fully managed, NoOps, low cost data analytics service.\nData can be streamed into BigQuery at millions of rows per second to enable real-time analysis.\nWith BigQuery you can easily deploy Petabyte-scale Databases."
}
diff --git a/.trampolinerc b/.trampolinerc
index 0eee72ab6..008015237 100644
--- a/.trampolinerc
+++ b/.trampolinerc
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Template for .trampolinerc
-
# Add required env vars here.
required_envvars+=(
)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6ba373179..4cf177cc5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,784 @@
[1]: https://pypi.org/project/google-cloud-bigquery/#history
+## [3.39.0](https://github.com/googleapis/google-cloud-python/compare/google-cloud-bigquery-v3.38.0...google-cloud-bigquery-v3.39.0) (2025-12-12)
+
+
+### Documentation
+
+* remove experimental annotations from GA features (#2303) ([1f1f9d41e8a2c9016198d848ad3f1cbb88cf77b0](https://github.com/googleapis/google-cloud-python/commit/1f1f9d41e8a2c9016198d848ad3f1cbb88cf77b0))
+
+
+### Features
+
+* adds support for Python runtime 3.14 (#2322) ([6065e14c448cb430189982dd70025fa0575777ca](https://github.com/googleapis/google-cloud-python/commit/6065e14c448cb430189982dd70025fa0575777ca))
+* Add ExternalRuntimeOptions to BigQuery routine (#2311) ([fa76e310a16ea6cba0071ff1d767ca1c71514da7](https://github.com/googleapis/google-cloud-python/commit/fa76e310a16ea6cba0071ff1d767ca1c71514da7))
+
+
+### Bug Fixes
+
+* include `io.Base` in the `PathType` (#2323) ([b11e09cb6ee32e451b37eda66bece2220b9ceaba](https://github.com/googleapis/google-cloud-python/commit/b11e09cb6ee32e451b37eda66bece2220b9ceaba))
+* honor custom `retry` in `job.result()` (#2302) ([e118b029bbc89a5adbab83f39858c356c23665bf](https://github.com/googleapis/google-cloud-python/commit/e118b029bbc89a5adbab83f39858c356c23665bf))
+* remove ambiguous error codes from query retries (#2308) ([8bbd3d01026c493dfa5903b397d2b01c0e9bf43b](https://github.com/googleapis/google-cloud-python/commit/8bbd3d01026c493dfa5903b397d2b01c0e9bf43b))
+
+
+## [3.38.0](https://github.com/googleapis/python-bigquery/compare/v3.37.0...v3.38.0) (2025-09-15)
+
+
+### Features
+
+* Add additional query stats ([#2270](https://github.com/googleapis/python-bigquery/issues/2270)) ([7b1b718](https://github.com/googleapis/python-bigquery/commit/7b1b718123afd80c0f68212946e4179bcd6db67f))
+
+## [3.37.0](https://github.com/googleapis/python-bigquery/compare/v3.36.0...v3.37.0) (2025-09-08)
+
+
+### Features
+
+* Updates to fastpath query execution ([#2268](https://github.com/googleapis/python-bigquery/issues/2268)) ([ef2740a](https://github.com/googleapis/python-bigquery/commit/ef2740a158199633b5543a7b6eb19587580792cd))
+
+
+### Bug Fixes
+
+* Remove deepcopy while setting properties for _QueryResults ([#2280](https://github.com/googleapis/python-bigquery/issues/2280)) ([33ea296](https://github.com/googleapis/python-bigquery/commit/33ea29616c06a2e2a106a785d216e784737ae386))
+
+
+### Documentation
+
+* Clarify that the presence of `XyzJob.errors` doesn't necessarily mean that the job has not completed or was unsuccessful ([#2278](https://github.com/googleapis/python-bigquery/issues/2278)) ([6e88d7d](https://github.com/googleapis/python-bigquery/commit/6e88d7dbe42ebfc35986da665d656b49ac481db4))
+* Clarify the api_method arg for client.query() ([#2277](https://github.com/googleapis/python-bigquery/issues/2277)) ([8a13c12](https://github.com/googleapis/python-bigquery/commit/8a13c12905ffcb3dbb6086a61df37556f0c2cd31))
+
+## [3.36.0](https://github.com/googleapis/python-bigquery/compare/v3.35.1...v3.36.0) (2025-08-20)
+
+
+### Features
+
+* Add created/started/ended properties to RowIterator. ([#2260](https://github.com/googleapis/python-bigquery/issues/2260)) ([0a95b24](https://github.com/googleapis/python-bigquery/commit/0a95b24192395cc3ccf801aa9bc318999873a2bf))
+* Retry query jobs if `jobBackendError` or `jobInternalError` are encountered ([#2256](https://github.com/googleapis/python-bigquery/issues/2256)) ([3deff1d](https://github.com/googleapis/python-bigquery/commit/3deff1d963980800e8b79fa3aaf5b712d4fd5062))
+
+
+### Documentation
+
+* Add a TROUBLESHOOTING.md file with tips for logging ([#2262](https://github.com/googleapis/python-bigquery/issues/2262)) ([b684832](https://github.com/googleapis/python-bigquery/commit/b68483227693ea68f6b12eacca2be1803cffb1d1))
+* Update README to break infinite redirect loop ([#2254](https://github.com/googleapis/python-bigquery/issues/2254)) ([8f03166](https://github.com/googleapis/python-bigquery/commit/8f031666114a826da2ad965f8ecd4727466cb480))
+
+## [3.35.1](https://github.com/googleapis/python-bigquery/compare/v3.35.0...v3.35.1) (2025-07-21)
+
+
+### Documentation
+
+* Specify the inherited-members directive for job classes ([#2244](https://github.com/googleapis/python-bigquery/issues/2244)) ([d207f65](https://github.com/googleapis/python-bigquery/commit/d207f6539b7a4c248a5de5719d7f384abbe20abe))
+
+## [3.35.0](https://github.com/googleapis/python-bigquery/compare/v3.34.0...v3.35.0) (2025-07-15)
+
+
+### Features
+
+* Add null_markers property to LoadJobConfig and CSVOptions ([#2239](https://github.com/googleapis/python-bigquery/issues/2239)) ([289446d](https://github.com/googleapis/python-bigquery/commit/289446dd8c356d11a0b63b8e6275629b1ae5dc08))
+* Add total slot ms to RowIterator ([#2233](https://github.com/googleapis/python-bigquery/issues/2233)) ([d44bf02](https://github.com/googleapis/python-bigquery/commit/d44bf0231e6e96369e4e03667a3f96618fb664e2))
+* Add UpdateMode to update_dataset ([#2204](https://github.com/googleapis/python-bigquery/issues/2204)) ([eb9c2af](https://github.com/googleapis/python-bigquery/commit/eb9c2aff242c5107f968bbd8b6a9d30cecc877f6))
+* Adds dataset_view parameter to get_dataset method ([#2198](https://github.com/googleapis/python-bigquery/issues/2198)) ([28a5750](https://github.com/googleapis/python-bigquery/commit/28a5750d455f0381548df6f9b1f7661823837d81))
+* Adds date_format to load job and external config ([#2231](https://github.com/googleapis/python-bigquery/issues/2231)) ([7d31828](https://github.com/googleapis/python-bigquery/commit/7d3182802deccfceb0646b87fc8d12275d0a569b))
+* Adds datetime_format as an option ([#2236](https://github.com/googleapis/python-bigquery/issues/2236)) ([54d3dc6](https://github.com/googleapis/python-bigquery/commit/54d3dc66244d50a031e3c80d43d372d2743ecbc3))
+* Adds source_column_match and associated tests ([#2227](https://github.com/googleapis/python-bigquery/issues/2227)) ([6d5d236](https://github.com/googleapis/python-bigquery/commit/6d5d23685cd457d85955356705c1101e9ec3cdcd))
+* Adds time_format and timestamp_format and associated tests ([#2238](https://github.com/googleapis/python-bigquery/issues/2238)) ([371ad29](https://github.com/googleapis/python-bigquery/commit/371ad292df537278767dba71d81822ed57dd8e7d))
+* Adds time_zone to external config and load job ([#2229](https://github.com/googleapis/python-bigquery/issues/2229)) ([b2300d0](https://github.com/googleapis/python-bigquery/commit/b2300d032843512b7e4a5703377632fe60ef3f8d))
+
+
+### Bug Fixes
+
+* Adds magics.context.project to eliminate issues with unit tests ⊠([#2228](https://github.com/googleapis/python-bigquery/issues/2228)) ([27ff3a8](https://github.com/googleapis/python-bigquery/commit/27ff3a89a5f97305fa3ff673aa9183baa7df200f))
+* Fix rows returned when both start_index and page_size are provided ([#2181](https://github.com/googleapis/python-bigquery/issues/2181)) ([45643a2](https://github.com/googleapis/python-bigquery/commit/45643a2e20ce5d503118522dd195aeca00dec3bc))
+* Make AccessEntry equality consistent with from_api_repr ([#2218](https://github.com/googleapis/python-bigquery/issues/2218)) ([4941de4](https://github.com/googleapis/python-bigquery/commit/4941de441cb32cabeb55ec0320f305fb62551155))
+* Update type hints for various BigQuery files ([#2206](https://github.com/googleapis/python-bigquery/issues/2206)) ([b863291](https://github.com/googleapis/python-bigquery/commit/b86329188ba35e61871db82ae1d95d2a576eed1b))
+
+
+### Documentation
+
+* Improve clarity of "Output Only" fields in Dataset class ([#2201](https://github.com/googleapis/python-bigquery/issues/2201)) ([bd5aba8](https://github.com/googleapis/python-bigquery/commit/bd5aba8ba40c2f35fb672a68eed11d6baedb304f))
+
+## [3.34.0](https://github.com/googleapis/python-bigquery/compare/v3.33.0...v3.34.0) (2025-05-27)
+
+
+### Features
+
+* Job creation mode GA ([#2190](https://github.com/googleapis/python-bigquery/issues/2190)) ([64cd39f](https://github.com/googleapis/python-bigquery/commit/64cd39fb395c4a03ef6d2ec8261e1709477b2186))
+
+
+### Bug Fixes
+
+* **deps:** Update all dependencies ([#2184](https://github.com/googleapis/python-bigquery/issues/2184)) ([12490f2](https://github.com/googleapis/python-bigquery/commit/12490f2f03681516465fc34217dcdf57000f6fdd))
+
+
+### Documentation
+
+* Update query.py ([#2192](https://github.com/googleapis/python-bigquery/issues/2192)) ([9b5ee78](https://github.com/googleapis/python-bigquery/commit/9b5ee78f046d9ca3f758eeca6244b8485fe35875))
+* Use query_and_wait in the array parameters sample ([#2202](https://github.com/googleapis/python-bigquery/issues/2202)) ([28a9994](https://github.com/googleapis/python-bigquery/commit/28a9994792ec90a6a4d16835faf2137c09c0fb02))
+
+## [3.33.0](https://github.com/googleapis/python-bigquery/compare/v3.32.0...v3.33.0) (2025-05-19)
+
+
+### Features
+
+* Add ability to set autodetect_schema query param in update_table ([#2171](https://github.com/googleapis/python-bigquery/issues/2171)) ([57f940d](https://github.com/googleapis/python-bigquery/commit/57f940d957613b4d80fb81ea40a1177b73856189))
+* Add dtype parameters to to_geodataframe functions ([#2176](https://github.com/googleapis/python-bigquery/issues/2176)) ([ebfd0a8](https://github.com/googleapis/python-bigquery/commit/ebfd0a83d43bcb96f65f5669437220aa6138b766))
+* Support job reservation ([#2186](https://github.com/googleapis/python-bigquery/issues/2186)) ([cb646ce](https://github.com/googleapis/python-bigquery/commit/cb646ceea172bf199f366ae0592546dff2d3bcb2))
+
+
+### Bug Fixes
+
+* Ensure AccessEntry equality and repr uses the correct `entity_type` ([#2182](https://github.com/googleapis/python-bigquery/issues/2182)) ([0217637](https://github.com/googleapis/python-bigquery/commit/02176377d5e2fc25b5cd4f46aa6ebfb1b6a960a6))
+* Ensure SchemaField.field_dtype returns a string ([#2188](https://github.com/googleapis/python-bigquery/issues/2188)) ([7ec2848](https://github.com/googleapis/python-bigquery/commit/7ec2848379d5743bbcb36700a1153540c451e0e0))
+
+## [3.32.0](https://github.com/googleapis/python-bigquery/compare/v3.31.0...v3.32.0) (2025-05-12)
+
+
+### Features
+
+* Add dataset access policy version attribute ([#2169](https://github.com/googleapis/python-bigquery/issues/2169)) ([b7656b9](https://github.com/googleapis/python-bigquery/commit/b7656b97c1bd6c204d0508b1851d114719686655))
+* Add preview support for incremental results ([#2145](https://github.com/googleapis/python-bigquery/issues/2145)) ([22b80bb](https://github.com/googleapis/python-bigquery/commit/22b80bba9d0bed319fd3102e567906c9b458dd02))
+* Add WRITE_TRUNCATE_DATA enum ([#2166](https://github.com/googleapis/python-bigquery/issues/2166)) ([4692747](https://github.com/googleapis/python-bigquery/commit/46927479085f13fd326e3f2388f60dfdd37f7f69))
+* Adds condition class and assoc. unit tests ([#2159](https://github.com/googleapis/python-bigquery/issues/2159)) ([a69d6b7](https://github.com/googleapis/python-bigquery/commit/a69d6b796d2edb6ba453980c9553bc9b206c5a6e))
+* Support BigLakeConfiguration (managed Iceberg tables) ([#2162](https://github.com/googleapis/python-bigquery/issues/2162)) ([a1c8e9a](https://github.com/googleapis/python-bigquery/commit/a1c8e9aaf60986924868d54a0ab0334e77002a39))
+* Update the AccessEntry class with a new condition attribute and unit tests ([#2163](https://github.com/googleapis/python-bigquery/issues/2163)) ([7301667](https://github.com/googleapis/python-bigquery/commit/7301667272dfbdd04b1a831418a9ad2d037171fb))
+
+
+### Bug Fixes
+
+* `query()` now warns when `job_id` is set and the default `job_retry` is ignored ([#2167](https://github.com/googleapis/python-bigquery/issues/2167)) ([ca1798a](https://github.com/googleapis/python-bigquery/commit/ca1798aaee2d5905fe688d3097f8ee5c989da333))
+* Empty record dtypes ([#2147](https://github.com/googleapis/python-bigquery/issues/2147)) ([77d7173](https://github.com/googleapis/python-bigquery/commit/77d71736fcc006d3ab8f8ba17955ad5f06e21876))
+* Table iterator should not use bqstorage when page_size is not None ([#2154](https://github.com/googleapis/python-bigquery/issues/2154)) ([e89a707](https://github.com/googleapis/python-bigquery/commit/e89a707b162182ededbf94cc9a0f7594bc2be475))
+
+## [3.31.0](https://github.com/googleapis/python-bigquery/compare/v3.30.0...v3.31.0) (2025-03-20)
+
+
+### Features
+
+* Add query text and total bytes processed to RowIterator ([#2140](https://github.com/googleapis/python-bigquery/issues/2140)) ([2d5f932](https://github.com/googleapis/python-bigquery/commit/2d5f9320d7103bc64c7ba496ba54bb0ef52b5605))
+* Add support for Python 3.13 ([0842aa1](https://github.com/googleapis/python-bigquery/commit/0842aa10967b1d8395cfb43e52c8ea091b381870))
+
+
+### Bug Fixes
+
+* Adding property setter for table constraints, [#1990](https://github.com/googleapis/python-bigquery/issues/1990) ([#2092](https://github.com/googleapis/python-bigquery/issues/2092)) ([f8572dd](https://github.com/googleapis/python-bigquery/commit/f8572dd86595361bae82c3232b2c0d159690a7b7))
+* Allow protobuf 6.x ([0842aa1](https://github.com/googleapis/python-bigquery/commit/0842aa10967b1d8395cfb43e52c8ea091b381870))
+* Avoid "Unable to determine type" warning with JSON columns in `to_dataframe` ([#1876](https://github.com/googleapis/python-bigquery/issues/1876)) ([968020d](https://github.com/googleapis/python-bigquery/commit/968020d5be9d2a30b90d046eaf52f91bb2c70911))
+* Remove setup.cfg configuration for creating universal wheels ([#2146](https://github.com/googleapis/python-bigquery/issues/2146)) ([d7f7685](https://github.com/googleapis/python-bigquery/commit/d7f76853d598c354bfd2e65f5dde28dae97da0ec))
+
+
+### Dependencies
+
+* Remove Python 3.7 and 3.8 as supported runtimes ([#2133](https://github.com/googleapis/python-bigquery/issues/2133)) ([fb7de39](https://github.com/googleapis/python-bigquery/commit/fb7de398cb2ad000b80a8a702d1f6539dc03d8e0))
+
+## [3.30.0](https://github.com/googleapis/python-bigquery/compare/v3.29.0...v3.30.0) (2025-02-26)
+
+
+### Features
+
+* Add roundingmode enum, wiring, and tests ([#2121](https://github.com/googleapis/python-bigquery/issues/2121)) ([3a48948](https://github.com/googleapis/python-bigquery/commit/3a4894827f6e73a4a88cb22933c2004697dabcc7))
+* Adds foreign_type_info attribute to table class and adds unit tests. ([#2126](https://github.com/googleapis/python-bigquery/issues/2126)) ([2c19681](https://github.com/googleapis/python-bigquery/commit/2c1968115bef8e1dc84e0125615f551b9b011a4b))
+* Support resource_tags for table ([#2093](https://github.com/googleapis/python-bigquery/issues/2093)) ([d4070ca](https://github.com/googleapis/python-bigquery/commit/d4070ca21b5797e900a9e87b966837ee1c278217))
+
+
+### Bug Fixes
+
+* Avoid blocking in download thread when using BQ Storage API ([#2034](https://github.com/googleapis/python-bigquery/issues/2034)) ([54c8d07](https://github.com/googleapis/python-bigquery/commit/54c8d07f06a8ae460c9e0fb1614e1fbc21efb5df))
+* Retry 404 errors in `Client.query(...)` ([#2135](https://github.com/googleapis/python-bigquery/issues/2135)) ([c6d5f8a](https://github.com/googleapis/python-bigquery/commit/c6d5f8aaec21ab8f17436407aded4bc2316323fd))
+
+
+### Dependencies
+
+* Updates required checks list in github ([#2136](https://github.com/googleapis/python-bigquery/issues/2136)) ([fea49ff](https://github.com/googleapis/python-bigquery/commit/fea49ffbf8aa1d53451864ceb7fd73189b6661cb))
+* Use pandas-gbq to determine schema in `load_table_from_dataframe` ([#2095](https://github.com/googleapis/python-bigquery/issues/2095)) ([7603bd7](https://github.com/googleapis/python-bigquery/commit/7603bd71d60592ef2a551d9eea09987b218edc73))
+
+
+### Documentation
+
+* Update magics.rst ([#2125](https://github.com/googleapis/python-bigquery/issues/2125)) ([b5bcfb3](https://github.com/googleapis/python-bigquery/commit/b5bcfb303d27015b747a3b0747ecd7f7ed0ed557))
+
+## [3.29.0](https://github.com/googleapis/python-bigquery/compare/v3.28.0...v3.29.0) (2025-01-21)
+
+
+### Features
+
+* Add ExternalCatalogTableOptions class and tests ([#2116](https://github.com/googleapis/python-bigquery/issues/2116)) ([cdc1a6e](https://github.com/googleapis/python-bigquery/commit/cdc1a6e1623b8305c6a6a1a481b3365e866a073d))
+
+
+### Bug Fixes
+
+* Add default value in SchemaField.from_api_repr() ([#2115](https://github.com/googleapis/python-bigquery/issues/2115)) ([7de6822](https://github.com/googleapis/python-bigquery/commit/7de6822e1c556a68cb8d50e90664c094697cca1d))
+
+## [3.28.0](https://github.com/googleapis/python-bigquery/compare/v3.27.0...v3.28.0) (2025-01-15)
+
+
+### Features
+
+* Add property for `allowNonIncrementalDefinition` for materialized view ([#2084](https://github.com/googleapis/python-bigquery/issues/2084)) ([3359ef3](https://github.com/googleapis/python-bigquery/commit/3359ef37b90243bea2d9e68bb996fe5d736f304c))
+* Add property for maxStaleness in table definitions ([#2087](https://github.com/googleapis/python-bigquery/issues/2087)) ([729322c](https://github.com/googleapis/python-bigquery/commit/729322c2288a30464f2f135ba18b9c4aa7d2f0da))
+* Add type hints to Client ([#2044](https://github.com/googleapis/python-bigquery/issues/2044)) ([40529de](https://github.com/googleapis/python-bigquery/commit/40529de923e25c41c6728c121b9c82a042967ada))
+* Adds ExternalCatalogDatasetOptions and tests ([#2111](https://github.com/googleapis/python-bigquery/issues/2111)) ([b929a90](https://github.com/googleapis/python-bigquery/commit/b929a900d49e2c15897134209ed9de5fc7f238cd))
+* Adds ForeignTypeInfo class and tests ([#2110](https://github.com/googleapis/python-bigquery/issues/2110)) ([55ca63c](https://github.com/googleapis/python-bigquery/commit/55ca63c23fcb56573e2de67e4f7899939628c4a1))
+* Adds new input validation function similar to isinstance. ([#2107](https://github.com/googleapis/python-bigquery/issues/2107)) ([a2bebb9](https://github.com/googleapis/python-bigquery/commit/a2bebb95c5ef32ac7c7cbe19c3e7a9412cbee60d))
+* Adds StorageDescriptor and tests ([#2109](https://github.com/googleapis/python-bigquery/issues/2109)) ([6be0272](https://github.com/googleapis/python-bigquery/commit/6be0272ff25dac97a38ae4ee5aa02016dc82a0d8))
+* Adds the SerDeInfo class and tests ([#2108](https://github.com/googleapis/python-bigquery/issues/2108)) ([62960f2](https://github.com/googleapis/python-bigquery/commit/62960f255d05b15940a8d2cdc595592175fada11))
+* Migrate to pyproject.toml ([#2041](https://github.com/googleapis/python-bigquery/issues/2041)) ([1061611](https://github.com/googleapis/python-bigquery/commit/106161180ead01aca1ead909cf06ca559f68666d))
+* Preserve unknown fields from the REST API representation in `SchemaField` ([#2097](https://github.com/googleapis/python-bigquery/issues/2097)) ([aaf1eb8](https://github.com/googleapis/python-bigquery/commit/aaf1eb85ada95ab866be0199812ea7f5c7f50766))
+* Resource tags in dataset ([#2090](https://github.com/googleapis/python-bigquery/issues/2090)) ([3e13016](https://github.com/googleapis/python-bigquery/commit/3e130166f43dcc06704fe90edf9068dfd44842a6))
+* Support setting max_stream_count when fetching query result ([#2051](https://github.com/googleapis/python-bigquery/issues/2051)) ([d461297](https://github.com/googleapis/python-bigquery/commit/d4612979b812d2a835e47200f27a87a66bcb856a))
+
+
+### Bug Fixes
+
+* Allow geopandas 1.x ([#2065](https://github.com/googleapis/python-bigquery/issues/2065)) ([f2ab8cb](https://github.com/googleapis/python-bigquery/commit/f2ab8cbfe00d442ad3b40683ecfec320e53b4688))
+
+
+### Documentation
+
+* Render fields correctly for update calls ([#2055](https://github.com/googleapis/python-bigquery/issues/2055)) ([a4d9534](https://github.com/googleapis/python-bigquery/commit/a4d9534a900f13ae7355904cda05097d781f27e3))
+
+## [3.27.0](https://github.com/googleapis/python-bigquery/compare/v3.26.0...v3.27.0) (2024-11-01)
+
+
+### Features
+
+* Updates to allow users to set max_stream_count ([#2039](https://github.com/googleapis/python-bigquery/issues/2039)) ([7372ad6](https://github.com/googleapis/python-bigquery/commit/7372ad659fd3316a602e90f224e9a3304d4c1419))
+
+## [3.26.0](https://github.com/googleapis/python-bigquery/compare/v3.25.0...v3.26.0) (2024-09-25)
+
+
+### Features
+
+* Include LegacyPandasError in init imports ([#2014](https://github.com/googleapis/python-bigquery/issues/2014)) ([3ab5e95](https://github.com/googleapis/python-bigquery/commit/3ab5e95984ad521027a4e1efd9f16767403e668d))
+* Use `bigquery-magics` package for the `%%bigquery` magic ([#1965](https://github.com/googleapis/python-bigquery/issues/1965)) ([60128a5](https://github.com/googleapis/python-bigquery/commit/60128a522375823422f238312521a2ce356d9177))
+
+
+### Bug Fixes
+
+* Add docfx to the presubmit configuration and delete docs-presubmit ([#1995](https://github.com/googleapis/python-bigquery/issues/1995)) ([bd83cfd](https://github.com/googleapis/python-bigquery/commit/bd83cfd2eb25cec58d59af8048f5188d748b083d))
+* Add warning when encountering unknown field types ([#1989](https://github.com/googleapis/python-bigquery/issues/1989)) ([8f5a41d](https://github.com/googleapis/python-bigquery/commit/8f5a41d283a965ca161019588d3a3b2947b04b5b))
+* Allow protobuf 5.x; require protobuf >=3.20.2; proto-plus >=1.22.3 ([#1976](https://github.com/googleapis/python-bigquery/issues/1976)) ([57bf873](https://github.com/googleapis/python-bigquery/commit/57bf873474382cc2cb34243b704bc928fa1b64c6))
+* Do not set job timeout extra property if None ([#1987](https://github.com/googleapis/python-bigquery/issues/1987)) ([edcb79c](https://github.com/googleapis/python-bigquery/commit/edcb79ca69dba30d8102abebb9d53bc76e4882ee))
+* Set pyarrow field nullable to False for a BigQuery field in REPEATED mode ([#1999](https://github.com/googleapis/python-bigquery/issues/1999)) ([5352870](https://github.com/googleapis/python-bigquery/commit/5352870283ca7d4652aefc73f12645bcf6e1363c))
+
+
+### Dependencies
+
+* Bump min version of google-api-core and google-cloud-core to 2.x ([#1972](https://github.com/googleapis/python-bigquery/issues/1972)) ([a958732](https://github.com/googleapis/python-bigquery/commit/a958732aed7d9bd51ffde3dc0e6cae9ad7455b54))
+
+
+### Documentation
+
+* Add short mode query sample & test ([#1978](https://github.com/googleapis/python-bigquery/issues/1978)) ([ba61a8a](https://github.com/googleapis/python-bigquery/commit/ba61a8ab0da541ba1940211875d7ea2e9e17dfa8))
+* Improve QueryJobConfig.destination docstring ([#2016](https://github.com/googleapis/python-bigquery/issues/2016)) ([1b4cca0](https://github.com/googleapis/python-bigquery/commit/1b4cca0a3cc788a4570705572d5f04172f6b4b24))
+
+## [3.25.0](https://github.com/googleapis/python-bigquery/compare/v3.24.0...v3.25.0) (2024-06-17)
+
+
+### Features
+
+* Add prefer_bqstorage_client option for Connection ([#1945](https://github.com/googleapis/python-bigquery/issues/1945)) ([bfdeb3f](https://github.com/googleapis/python-bigquery/commit/bfdeb3fdbc1d5b26fcd3d1433abfb0be49d12018))
+* Support load job option ColumnNameCharacterMap ([#1952](https://github.com/googleapis/python-bigquery/issues/1952)) ([7e522ee](https://github.com/googleapis/python-bigquery/commit/7e522eea776cd9a74f8078c4236f63d5ff11f20e))
+
+
+### Bug Fixes
+
+* Do not overwrite page_size with max_results when start_index is set ([#1956](https://github.com/googleapis/python-bigquery/issues/1956)) ([7d0fcee](https://github.com/googleapis/python-bigquery/commit/7d0fceefdf28278c1f2cdaab571de9b235320998))
+
+## [3.24.0](https://github.com/googleapis/python-bigquery/compare/v3.23.1...v3.24.0) (2024-06-04)
+
+
+### Features
+
+* Add default timeout for Client.get_job() ([#1935](https://github.com/googleapis/python-bigquery/issues/1935)) ([9fbad76](https://github.com/googleapis/python-bigquery/commit/9fbad767cc228e02040436742d0cb6743d370b90))
+* Add support for map target type in Parquet options ([#1919](https://github.com/googleapis/python-bigquery/issues/1919)) ([c3f7b23](https://github.com/googleapis/python-bigquery/commit/c3f7b237383d4705ed6e720544728c4db61f6c83))
+
+
+### Bug Fixes
+
+* Create query job in job.result() if doesn't exist ([#1944](https://github.com/googleapis/python-bigquery/issues/1944)) ([8f5b4b7](https://github.com/googleapis/python-bigquery/commit/8f5b4b70423c277ffd559d2034bc0b2b5fb93169))
+* Retry `is_job_done` on `ConnectionError` ([#1930](https://github.com/googleapis/python-bigquery/issues/1930)) ([4f72723](https://github.com/googleapis/python-bigquery/commit/4f72723f539d35977bc52c5950f6e00889b5c7be))
+
+
+### Performance Improvements
+
+* If `page_size` or `max_results` is set on `QueryJob.result()`, use to download first page of results ([#1942](https://github.com/googleapis/python-bigquery/issues/1942)) ([3e7a48d](https://github.com/googleapis/python-bigquery/commit/3e7a48d36e3c7bf6abe1b5550097178f6ca6e174))
+
+## [3.23.1](https://github.com/googleapis/python-bigquery/compare/v3.23.0...v3.23.1) (2024-05-21)
+
+
+### Performance Improvements
+
+* Decrease the threshold in which we use the BQ Storage Read API ([#1925](https://github.com/googleapis/python-bigquery/issues/1925)) ([eaa1a52](https://github.com/googleapis/python-bigquery/commit/eaa1a52b360646909c14ca7194b8c6b17fefdd79))
+
+## [3.23.0](https://github.com/googleapis/python-bigquery/compare/v3.22.0...v3.23.0) (2024-05-16)
+
+
+### Features
+
+* Adds timer decorator to facilitate debugging ([#1917](https://github.com/googleapis/python-bigquery/issues/1917)) ([ea750e0](https://github.com/googleapis/python-bigquery/commit/ea750e0248473b6207b8517aa7ea1cf4e19bccf2))
+* Support insertAll for range ([#1909](https://github.com/googleapis/python-bigquery/issues/1909)) ([74e75e8](https://github.com/googleapis/python-bigquery/commit/74e75e89ce3a5ac18112b2c1c33248445ff072e4))
+
+
+### Bug Fixes
+
+* Add pyarrow version check for range support ([#1914](https://github.com/googleapis/python-bigquery/issues/1914)) ([a86d7b9](https://github.com/googleapis/python-bigquery/commit/a86d7b96813f67fea28b46c5252416222edca9a6))
+* Edit presubmit for to simplify configuration ([#1915](https://github.com/googleapis/python-bigquery/issues/1915)) ([b739596](https://github.com/googleapis/python-bigquery/commit/b739596f37b8c00b375cc811c316b618097d761a))
+
+## [3.22.0](https://github.com/googleapis/python-bigquery/compare/v3.21.0...v3.22.0) (2024-04-19)
+
+
+### Features
+
+* Support RANGE in queries Part 2: Arrow ([#1868](https://github.com/googleapis/python-bigquery/issues/1868)) ([5251b5d](https://github.com/googleapis/python-bigquery/commit/5251b5dbb254732ea730bab664ad319bd5be47e7))
+
+## [3.21.0](https://github.com/googleapis/python-bigquery/compare/v3.20.1...v3.21.0) (2024-04-18)
+
+
+### Features
+
+* Add compression option ZSTD. ([#1890](https://github.com/googleapis/python-bigquery/issues/1890)) ([5ed9cce](https://github.com/googleapis/python-bigquery/commit/5ed9ccee204b7cf8e96cb0e050f6830c05f3b4fd))
+* Adds billing to opentel ([#1889](https://github.com/googleapis/python-bigquery/issues/1889)) ([38697fb](https://github.com/googleapis/python-bigquery/commit/38697fb942516fc2f6f5e21e19a11811fbaeb1f4))
+* Support RANGE in queries Part 1: JSON ([#1884](https://github.com/googleapis/python-bigquery/issues/1884)) ([3634405](https://github.com/googleapis/python-bigquery/commit/3634405fa1b40ae5f69b06d7c7f8de4e3d246d92))
+
+
+### Bug Fixes
+
+* Add types to DatasetReference constructor ([#1601](https://github.com/googleapis/python-bigquery/issues/1601)) ([bf8861c](https://github.com/googleapis/python-bigquery/commit/bf8861c3473a1af978db7a06463ddc0bad86f326))
+* Creates linting-typing.cfg in presubmit ([#1881](https://github.com/googleapis/python-bigquery/issues/1881)) ([c852c15](https://github.com/googleapis/python-bigquery/commit/c852c153c55025ba1187d61e313ead2308616c55))
+* Remove duplicate key time_partitioning from Table._PROPERTY_TO_A⊠([#1898](https://github.com/googleapis/python-bigquery/issues/1898)) ([82ae908](https://github.com/googleapis/python-bigquery/commit/82ae908fbf3b2361343fff1859d3533383dc50ec))
+* Retry query jobs that fail even with ambiguous `jobs.getQueryResults` REST errors ([#1903](https://github.com/googleapis/python-bigquery/issues/1903), [#1900](https://github.com/googleapis/python-bigquery/issues/1900)) ([1367b58](https://github.com/googleapis/python-bigquery/commit/1367b584b68d917ec325ce4383a0e9a36205b894))
+
+
+### Performance Improvements
+
+* Avoid unnecessary API call in `QueryJob.result()` when job is already finished ([#1900](https://github.com/googleapis/python-bigquery/issues/1900)) ([1367b58](https://github.com/googleapis/python-bigquery/commit/1367b584b68d917ec325ce4383a0e9a36205b894))
+
+## [3.20.1](https://github.com/googleapis/python-bigquery/compare/v3.20.0...v3.20.1) (2024-04-01)
+
+
+### Bug Fixes
+
+* Make `pyarrow` an optional dependency post-3.20.0 yanked release ([#1879](https://github.com/googleapis/python-bigquery/issues/1879)) ([21714e1](https://github.com/googleapis/python-bigquery/commit/21714e18bad8d8d89ed5642dbdb61d14e97d5f33))
+
+## [3.20.0](https://github.com/googleapis/python-bigquery/compare/v3.19.0...v3.20.0) (2024-03-27)
+
+
+### Features
+
+* Add `fields` parameter to `set_iam_policy` for consistency with update methods ([#1872](https://github.com/googleapis/python-bigquery/issues/1872)) ([08b1e6f](https://github.com/googleapis/python-bigquery/commit/08b1e6f9c41121907c345daedbae40ece18e8b6a))
+
+
+### Bug Fixes
+
+* Correct type checking ([#1848](https://github.com/googleapis/python-bigquery/issues/1848)) ([2660dbd](https://github.com/googleapis/python-bigquery/commit/2660dbd4821a89a1e20e3e1541504a409f1979aa))
+* Update error logging when converting to pyarrow column fails ([#1836](https://github.com/googleapis/python-bigquery/issues/1836)) ([0ac6e9b](https://github.com/googleapis/python-bigquery/commit/0ac6e9bf186945832f5dcdf5a4d95667b4da223e))
+* Updates a number of optional dependencies ([#1864](https://github.com/googleapis/python-bigquery/issues/1864)) ([c2496a1](https://github.com/googleapis/python-bigquery/commit/c2496a1014a7d99e805b3d0a66e4517165bd7e01))
+* Use an allowlist instead of denylist to determine when `query_and_wait` uses `jobs.query` API ([#1869](https://github.com/googleapis/python-bigquery/issues/1869)) ([e265db6](https://github.com/googleapis/python-bigquery/commit/e265db6a6a37d13056dcaac240c2cf3975dfd644))
+
+## [3.19.0](https://github.com/googleapis/python-bigquery/compare/v3.18.0...v3.19.0) (2024-03-11)
+
+
+### Features
+
+* Support RANGE query parameters ([#1827](https://github.com/googleapis/python-bigquery/issues/1827)) ([b359a9a](https://github.com/googleapis/python-bigquery/commit/b359a9a55936a759a36aa69c5e5b014685e1fca6))
+* Support range sql ([#1807](https://github.com/googleapis/python-bigquery/issues/1807)) ([86a45c9](https://github.com/googleapis/python-bigquery/commit/86a45c989836b34dca456bac014352e55d6f86c0))
+
+
+### Bug Fixes
+
+* Add google-auth as a direct dependency ([713ce2c](https://github.com/googleapis/python-bigquery/commit/713ce2c2f6ce9931f67cbbcd63ad436ad336ad26))
+* Augment universe_domain handling ([#1837](https://github.com/googleapis/python-bigquery/issues/1837)) ([53c2cbf](https://github.com/googleapis/python-bigquery/commit/53c2cbf98d2961f553747514de273bcd5c117f0e))
+* **deps:** Require google-api-core>=1.34.1, >=2.11.0 ([713ce2c](https://github.com/googleapis/python-bigquery/commit/713ce2c2f6ce9931f67cbbcd63ad436ad336ad26))
+* Supplementary fix to env-based universe resolution ([#1844](https://github.com/googleapis/python-bigquery/issues/1844)) ([b818992](https://github.com/googleapis/python-bigquery/commit/b8189929b6008f7780214822062f8ed05d8d2a01))
+* Supplementary fix to env-based universe resolution ([#1847](https://github.com/googleapis/python-bigquery/issues/1847)) ([6dff50f](https://github.com/googleapis/python-bigquery/commit/6dff50f4fbc5aeb644383a4050dd5ffc05015ffe))
+
+## [3.18.0](https://github.com/googleapis/python-bigquery/compare/v3.17.2...v3.18.0) (2024-02-29)
+
+
+### Features
+
+* Support nullable boolean and Int64 dtypes in `insert_rows_from_dataframe` ([#1816](https://github.com/googleapis/python-bigquery/issues/1816)) ([ab0cf4c](https://github.com/googleapis/python-bigquery/commit/ab0cf4cc03292f62b56a8813cfb7681daa87f872))
+* Support slot_ms in QueryPlanEntry ([#1831](https://github.com/googleapis/python-bigquery/issues/1831)) ([d62cabb](https://github.com/googleapis/python-bigquery/commit/d62cabbf115637ecbaf8cc378f39329a5ae74c26))
+
+
+### Bug Fixes
+
+* Keyword rendering and docstring improvements ([#1829](https://github.com/googleapis/python-bigquery/issues/1829)) ([4dfb920](https://github.com/googleapis/python-bigquery/commit/4dfb920b106784e98f343b3e3fc8e8ff70c50560))
+
+
+### Documentation
+
+* **samples:** Updates to urllib3 constraint for Python 3.7 ([#1834](https://github.com/googleapis/python-bigquery/issues/1834)) ([b099c32](https://github.com/googleapis/python-bigquery/commit/b099c32a83946a347560f6a71d08c3f263e56cb6))
+* Update `client_query_w_named_params.py` to use `query_and_wait` API ([#1782](https://github.com/googleapis/python-bigquery/issues/1782)) ([89dfcb6](https://github.com/googleapis/python-bigquery/commit/89dfcb6469d22e78003a70371a0938a6856e033c))
+
+## [3.17.2](https://github.com/googleapis/python-bigquery/compare/v3.17.1...v3.17.2) (2024-01-30)
+
+
+### Bug Fixes
+
+* Change load_table_from_json autodetect logic ([#1804](https://github.com/googleapis/python-bigquery/issues/1804)) ([6249032](https://github.com/googleapis/python-bigquery/commit/62490325f64e5d66303d9218992e28ac5f21cb3f))
+
+
+### Documentation
+
+* Update to use API ([#1781](https://github.com/googleapis/python-bigquery/issues/1781)) ([81563b0](https://github.com/googleapis/python-bigquery/commit/81563b06298fe3a64be6a89b583c3d64758ca12a))
+* Update `client_query_destination_table.py` sample to use `query_and_wait` ([#1783](https://github.com/googleapis/python-bigquery/issues/1783)) ([68ebbe1](https://github.com/googleapis/python-bigquery/commit/68ebbe12d455ce8e9b1784fb11787c2fb842ef22))
+* Update query_external_sheets_permanent_table.py to use query_and_wait API ([#1778](https://github.com/googleapis/python-bigquery/issues/1778)) ([a7be88a](https://github.com/googleapis/python-bigquery/commit/a7be88adf8a480ee61aa79789cb53df1b79bb091))
+* Update sample for query_to_arrow to use query_and_wait API ([#1776](https://github.com/googleapis/python-bigquery/issues/1776)) ([dbf10de](https://github.com/googleapis/python-bigquery/commit/dbf10dee51a7635e9b98658f205ded2de087a06f))
+* Update the query destination table legacy file to use query_and_wait API ([#1775](https://github.com/googleapis/python-bigquery/issues/1775)) ([ef89f9e](https://github.com/googleapis/python-bigquery/commit/ef89f9e58c22b3af5a7757b69daa030116012350))
+* Update to use `query_and_wait` in `client_query_w_positional_params.py` ([#1786](https://github.com/googleapis/python-bigquery/issues/1786)) ([410f71e](https://github.com/googleapis/python-bigquery/commit/410f71e6b6e755928e363ed89c1044e14b0db9cc))
+* Update to use `query_and_wait` in `samples/client_query_w_timestamp_params.py` ([#1785](https://github.com/googleapis/python-bigquery/issues/1785)) ([ba36948](https://github.com/googleapis/python-bigquery/commit/ba3694852c13c8a29fe0f9d923353e82acfd4278))
+* Update to_geodataframe to use query_and_wait functionality ([#1800](https://github.com/googleapis/python-bigquery/issues/1800)) ([1298594](https://github.com/googleapis/python-bigquery/commit/12985942942b8f205ecd261fcdf620df9a640460))
+
+## [3.17.1](https://github.com/googleapis/python-bigquery/compare/v3.17.0...v3.17.1) (2024-01-24)
+
+
+### Bug Fixes
+
+* Add pyarrow.large_strign to the _ARROW_SCALAR_IDS_TO_BQ map ([#1796](https://github.com/googleapis/python-bigquery/issues/1796)) ([b402a6d](https://github.com/googleapis/python-bigquery/commit/b402a6df92e656aee10dd2c11c48f6ed93c74fd7))
+* Retry 'job exceeded rate limits' for DDL queries ([#1794](https://github.com/googleapis/python-bigquery/issues/1794)) ([39f33b2](https://github.com/googleapis/python-bigquery/commit/39f33b210ecbe9c2fd390825d29393c2d80257f5))
+
+## [3.17.0](https://github.com/googleapis/python-bigquery/compare/v3.16.0...v3.17.0) (2024-01-24)
+
+
+### Features
+
+* Support universe resolution ([#1774](https://github.com/googleapis/python-bigquery/issues/1774)) ([0b5c1d5](https://github.com/googleapis/python-bigquery/commit/0b5c1d597cdec3a05a16fb935595f773c5840bd4))
+
+
+### Bug Fixes
+
+* `query_and_wait` now retains unknown query configuration `_properties` ([#1793](https://github.com/googleapis/python-bigquery/issues/1793)) ([4ba4342](https://github.com/googleapis/python-bigquery/commit/4ba434287a0a25f027e3b63a80f8881a9b16723e))
+* Raise `ValueError` in `query_and_wait` with wrong `job_config` type ([4ba4342](https://github.com/googleapis/python-bigquery/commit/4ba434287a0a25f027e3b63a80f8881a9b16723e))
+
+
+### Documentation
+
+* Remove unused query code sample ([#1769](https://github.com/googleapis/python-bigquery/issues/1769)) ([1f96439](https://github.com/googleapis/python-bigquery/commit/1f96439b3dbd27f11be5e2af84f290ec6094d0a4))
+* Update `snippets.py` to use `query_and_wait` ([#1773](https://github.com/googleapis/python-bigquery/issues/1773)) ([d90602d](https://github.com/googleapis/python-bigquery/commit/d90602de87e58b665cb974401a327a640805822f))
+* Update multiple samples to change query to query_and_wait ([#1784](https://github.com/googleapis/python-bigquery/issues/1784)) ([d1161dd](https://github.com/googleapis/python-bigquery/commit/d1161dddde41a7d35b30033ccbf6984a5de640bd))
+* Update the query with no cache sample to use query_and_wait API ([#1770](https://github.com/googleapis/python-bigquery/issues/1770)) ([955a4cd](https://github.com/googleapis/python-bigquery/commit/955a4cd99e21cbca1b2f9c1dc6aa3fd8070cd61f))
+* Updates `query` to `query and wait` in samples/desktopapp/user_credentials.py ([#1787](https://github.com/googleapis/python-bigquery/issues/1787)) ([89f1299](https://github.com/googleapis/python-bigquery/commit/89f1299b3164b51fb0f29bc600a34ded59c10682))
+
+## [3.16.0](https://github.com/googleapis/python-bigquery/compare/v3.15.0...v3.16.0) (2024-01-12)
+
+
+### Features
+
+* Add `table_constraints` field to Table model ([#1755](https://github.com/googleapis/python-bigquery/issues/1755)) ([a167f9a](https://github.com/googleapis/python-bigquery/commit/a167f9a95f0a8fbf0bdb4943d06f07c03768c132))
+* Support jsonExtension in LoadJobConfig ([#1751](https://github.com/googleapis/python-bigquery/issues/1751)) ([0fd7347](https://github.com/googleapis/python-bigquery/commit/0fd7347ddb4ae1993f02b3bc109f64297437b3e2))
+
+
+### Bug Fixes
+
+* Add detailed message in job error ([#1762](https://github.com/googleapis/python-bigquery/issues/1762)) ([08483fb](https://github.com/googleapis/python-bigquery/commit/08483fba675f3b87571787e1e4420134a8fc8177))
+
+## [3.15.0](https://github.com/googleapis/python-bigquery/compare/v3.14.1...v3.15.0) (2024-01-09)
+
+
+### Features
+
+* Support JSON type in `insert_rows` and as a scalar query parameter ([#1757](https://github.com/googleapis/python-bigquery/issues/1757)) ([02a7d12](https://github.com/googleapis/python-bigquery/commit/02a7d129776b7da7da844ffa9c5cdf21811cd3af))
+* Support RANGE in schema ([#1746](https://github.com/googleapis/python-bigquery/issues/1746)) ([8585747](https://github.com/googleapis/python-bigquery/commit/8585747058e6db49a8078ae44d8e10735cdc27f9))
+
+
+### Bug Fixes
+
+* Deserializing JSON subfields within structs fails ([#1742](https://github.com/googleapis/python-bigquery/issues/1742)) ([0d93073](https://github.com/googleapis/python-bigquery/commit/0d930739c78b557db6cd48b38fe16eba93719c40))
+* Due to upstream change in dataset, updates expected results ([#1761](https://github.com/googleapis/python-bigquery/issues/1761)) ([132c14b](https://github.com/googleapis/python-bigquery/commit/132c14bbddfb61ea8bc408bef5e958e21b5b819c))
+* Load_table_from_dataframe for higher scale decimal ([#1703](https://github.com/googleapis/python-bigquery/issues/1703)) ([b9c8be0](https://github.com/googleapis/python-bigquery/commit/b9c8be0982c76187444300c414e0dda8b0ad105b))
+* Updates types-protobuf version for mypy-samples nox session ([#1764](https://github.com/googleapis/python-bigquery/issues/1764)) ([c0de695](https://github.com/googleapis/python-bigquery/commit/c0de6958e5761ad6ff532dd933b0f4387e18f1b9))
+
+
+### Performance Improvements
+
+* DB-API uses more efficient `query_and_wait` when no job ID is provided ([#1747](https://github.com/googleapis/python-bigquery/issues/1747)) ([d225a94](https://github.com/googleapis/python-bigquery/commit/d225a94e718a85877c495fbd32eca607b8919ac6))
+
+## [3.14.1](https://github.com/googleapis/python-bigquery/compare/v3.14.0...v3.14.1) (2023-12-13)
+
+
+### Bug Fixes
+
+* Add missing handler for deserializing json value ([#1587](https://github.com/googleapis/python-bigquery/issues/1587)) ([09017a9](https://github.com/googleapis/python-bigquery/commit/09017a997010f78bb6e34238fab15247ed14ea7e))
+
+## [3.14.0](https://github.com/googleapis/python-bigquery/compare/v3.13.0...v3.14.0) (2023-12-08)
+
+
+### Features
+
+* Add `Client.query_and_wait` which directly returns a `RowIterator` of results ([#1722](https://github.com/googleapis/python-bigquery/issues/1722)) ([89a647e](https://github.com/googleapis/python-bigquery/commit/89a647e19fe5d7302c0a39bba77a155635c5c29d))
+* Add `job_id`, `location`, `project`, and `query_id` properties on `RowIterator` ([#1733](https://github.com/googleapis/python-bigquery/issues/1733)) ([494f275](https://github.com/googleapis/python-bigquery/commit/494f275ab2493dc7904f685c4d12e60bef51ab21))
+* Add `job_timeout_ms` to job configuration classes ([#1675](https://github.com/googleapis/python-bigquery/issues/1675)) ([84d64cd](https://github.com/googleapis/python-bigquery/commit/84d64cdd157afef4a7bf7807e557d59452133434))
+* Add support dataset.max_time_travel_hours ([#1683](https://github.com/googleapis/python-bigquery/issues/1683)) ([f22eff2](https://github.com/googleapis/python-bigquery/commit/f22eff25f116f1c4973ac2b8b03bc8a4ae1f3f42))
+* Add support for Dataset.isCaseInsensitive ([#1671](https://github.com/googleapis/python-bigquery/issues/1671)) ([386fa86](https://github.com/googleapis/python-bigquery/commit/386fa86c89b8cff69fc02213254a1c53c02fee42))
+* Add support for Python 3.12 ([#1736](https://github.com/googleapis/python-bigquery/issues/1736)) ([3c0976a](https://github.com/googleapis/python-bigquery/commit/3c0976aecb0f917477feef4e9ed865997c2bb106))
+* Removed pkg_resources from all test files and moved importlib into pandas extra ([#1726](https://github.com/googleapis/python-bigquery/issues/1726)) ([1f4ebb1](https://github.com/googleapis/python-bigquery/commit/1f4ebb1eca4f9380a31172fc8cb2fae125f8c5a2))
+* Support data_governance_type ([#1708](https://github.com/googleapis/python-bigquery/issues/1708)) ([eff365d](https://github.com/googleapis/python-bigquery/commit/eff365dc17755d0855338e2f273428ffe2056f67))
+
+
+### Bug Fixes
+
+* `load_table_from_dataframe` now assumes there may be local null values ([#1735](https://github.com/googleapis/python-bigquery/issues/1735)) ([f05dc69](https://github.com/googleapis/python-bigquery/commit/f05dc69a1f8c65ac32085bfcc6950c2c83f8a843))
+* Ensure query job retry has longer deadline than API request deadline ([#1734](https://github.com/googleapis/python-bigquery/issues/1734)) ([5573579](https://github.com/googleapis/python-bigquery/commit/55735791122f97b7f67cb962b489fd1f12210af5))
+* Keep `RowIterator.total_rows` populated after iteration ([#1748](https://github.com/googleapis/python-bigquery/issues/1748)) ([8482f47](https://github.com/googleapis/python-bigquery/commit/8482f4759ce3c4b00fa06a7f306a2ac4d4ee8eb7))
+* Move grpc, proto-plus and protobuf packages to extras ([#1721](https://github.com/googleapis/python-bigquery/issues/1721)) ([5ce4d13](https://github.com/googleapis/python-bigquery/commit/5ce4d136af97b91fbe1cc56bba1021e50a9c8476))
+
+
+### Performance Improvements
+
+* Use the first page a results when `query(api_method="QUERY")` ([#1723](https://github.com/googleapis/python-bigquery/issues/1723)) ([6290517](https://github.com/googleapis/python-bigquery/commit/6290517d6b153a31f20098f75aee580b7915aca9))
+
+## [3.13.0](https://github.com/googleapis/python-bigquery/compare/v3.12.0...v3.13.0) (2023-10-30)
+
+
+### Features
+
+* Add `Model.transform_columns` property ([#1661](https://github.com/googleapis/python-bigquery/issues/1661)) ([5ceed05](https://github.com/googleapis/python-bigquery/commit/5ceed056482f6d1f2fc45e7e6b84382de45c85ed))
+* Add support for dataset.default_rounding_mode ([#1688](https://github.com/googleapis/python-bigquery/issues/1688)) ([83bc768](https://github.com/googleapis/python-bigquery/commit/83bc768b90a852d258a4805603020a296e02d2f9))
+
+
+### Bug Fixes
+
+* AccessEntry API representation parsing ([#1682](https://github.com/googleapis/python-bigquery/issues/1682)) ([a40d7ae](https://github.com/googleapis/python-bigquery/commit/a40d7ae03149708fc34c962b43a6ac198780b6aa))
+
+
+### Documentation
+
+* Remove redundant `bigquery_update_table_expiration` code sample ([#1673](https://github.com/googleapis/python-bigquery/issues/1673)) ([2dded33](https://github.com/googleapis/python-bigquery/commit/2dded33626b3de6c4ab5e1229eb4c85786b2ff53))
+* Revised `create_partitioned_table` sample ([#1447](https://github.com/googleapis/python-bigquery/issues/1447)) ([40ba859](https://github.com/googleapis/python-bigquery/commit/40ba859059c3e463e17ea7781bc5a9aff8244c5d))
+* Revised relax column mode sample ([#1467](https://github.com/googleapis/python-bigquery/issues/1467)) ([b8c9276](https://github.com/googleapis/python-bigquery/commit/b8c9276be011d971b941b583fd3d4417d438067f))
+
+## [3.12.0](https://github.com/googleapis/python-bigquery/compare/v3.11.4...v3.12.0) (2023-10-02)
+
+
+### Features
+
+* Add `Dataset.storage_billing_model` setter, use `client.update_dataset(ds, fields=["storage_billing_model"])` to update ([#1643](https://github.com/googleapis/python-bigquery/issues/1643)) ([5deba50](https://github.com/googleapis/python-bigquery/commit/5deba50b8c2d91d08bd5f5fb68742268c494b4a9))
+* Search statistics ([#1616](https://github.com/googleapis/python-bigquery/issues/1616)) ([b930e46](https://github.com/googleapis/python-bigquery/commit/b930e4673b0d1cceb53f683e47578d87af9361f3))
+* Widen retry predicate to include ServiceUnavailable ([#1641](https://github.com/googleapis/python-bigquery/issues/1641)) ([3e021a4](https://github.com/googleapis/python-bigquery/commit/3e021a46d387a0e3cb69913a281062fc221bb926))
+
+
+### Bug Fixes
+
+* Allow `storage_billing_model` to be explicitly set to `None` to use project default value ([#1665](https://github.com/googleapis/python-bigquery/issues/1665)) ([514d3e1](https://github.com/googleapis/python-bigquery/commit/514d3e12e5131bd589dff08893fd89bf40338ba3))
+* Relax timeout expectations ([#1645](https://github.com/googleapis/python-bigquery/issues/1645)) ([1760e94](https://github.com/googleapis/python-bigquery/commit/1760e945d16163980027fecf21113cd77ddc35a1))
+* Use isinstance() per E721, unpin flake8 ([#1659](https://github.com/googleapis/python-bigquery/issues/1659)) ([54a7769](https://github.com/googleapis/python-bigquery/commit/54a77694afcd80be4ba469c6ebb7ca8be112b04e))
+
+
+### Documentation
+
+* Revise update_table_expiration sample ([#1457](https://github.com/googleapis/python-bigquery/issues/1457)) ([03194e0](https://github.com/googleapis/python-bigquery/commit/03194e0156ed9201cb36301967c5af117d7ef29c))
+
+## [3.11.4](https://github.com/googleapis/python-bigquery/compare/v3.11.3...v3.11.4) (2023-07-19)
+
+
+### Bug Fixes
+
+* Updates typing in function definitions ([#1613](https://github.com/googleapis/python-bigquery/issues/1613)) ([db755ce](https://github.com/googleapis/python-bigquery/commit/db755ce5d2ae21e458f33f02cf63d2e5fbc45cf5))
+
+## [3.11.3](https://github.com/googleapis/python-bigquery/compare/v3.11.2...v3.11.3) (2023-06-27)
+
+
+### Bug Fixes
+
+* Type annotations include Optional when None is accepted ([#1554](https://github.com/googleapis/python-bigquery/issues/1554)) ([6c1ab80](https://github.com/googleapis/python-bigquery/commit/6c1ab802b09124ba837d6d5358962e3fce2d4a2c))
+
+## [3.11.2](https://github.com/googleapis/python-bigquery/compare/v3.11.1...v3.11.2) (2023-06-21)
+
+
+### Bug Fixes
+
+* Updates tests based on revised hacker_news tables ([#1591](https://github.com/googleapis/python-bigquery/issues/1591)) ([d73cf49](https://github.com/googleapis/python-bigquery/commit/d73cf495b8dfa032a43dc1d58599d0691aaa0efb))
+
+## [3.11.1](https://github.com/googleapis/python-bigquery/compare/v3.11.0...v3.11.1) (2023-06-09)
+
+
+### Documentation
+
+* Add/reformat return types for cloud RAD docs ([#1582](https://github.com/googleapis/python-bigquery/issues/1582)) ([6efdce1](https://github.com/googleapis/python-bigquery/commit/6efdce13cc3b25d37d22a856f2308daed569e637))
+
+## [3.11.0](https://github.com/googleapis/python-bigquery/compare/v3.10.0...v3.11.0) (2023-06-01)
+
+
+### Features
+
+* Add remote function options to routines ([#1558](https://github.com/googleapis/python-bigquery/issues/1558)) ([84ad11d](https://github.com/googleapis/python-bigquery/commit/84ad11d00d99d279e4e6e0fa4ca60e59575b1dad))
+
+
+### Bug Fixes
+
+* Filter None values from OpenTelemetry attributes ([#1567](https://github.com/googleapis/python-bigquery/issues/1567)) ([9ea2e21](https://github.com/googleapis/python-bigquery/commit/9ea2e21c35783782993d1ad2d3b910bbe9981ce2))
+* Handle case when expirationMs is None ([#1553](https://github.com/googleapis/python-bigquery/issues/1553)) ([fa6e13d](https://github.com/googleapis/python-bigquery/commit/fa6e13d5006caadb36899b4e2a24ca82b7f11b17))
+* Raise most recent exception when not able to fetch query job after starting the job ([#1362](https://github.com/googleapis/python-bigquery/issues/1362)) ([09cc1df](https://github.com/googleapis/python-bigquery/commit/09cc1df6babaf90ea0b0a6fd926f8013822a31ed))
+
+## [3.10.0](https://github.com/googleapis/python-bigquery/compare/v3.9.0...v3.10.0) (2023-04-18)
+
+
+### Features
+
+* Add date, datetime, time, timestamp dtype to to_dataframe ([#1547](https://github.com/googleapis/python-bigquery/issues/1547)) ([64e913d](https://github.com/googleapis/python-bigquery/commit/64e913d73832f6363466cbea5ace2337c86fa58b))
+
+## [3.9.0](https://github.com/googleapis/python-bigquery/compare/v3.8.0...v3.9.0) (2023-03-28)
+
+
+### Features
+
+* Expose query job on dbapi cursor ([#1520](https://github.com/googleapis/python-bigquery/issues/1520)) ([339eb0e](https://github.com/googleapis/python-bigquery/commit/339eb0e86040a7c30d140800f34810ffc6a7c76b))
+
+
+### Bug Fixes
+
+* Keyerror when the load_table_from_dataframe accesses a unmapped dtype dataframe index ([#1535](https://github.com/googleapis/python-bigquery/issues/1535)) ([a69348a](https://github.com/googleapis/python-bigquery/commit/a69348a558f48cfc61d03d3e8bb7f9aee48bea86))
+
+## [3.8.0](https://github.com/googleapis/python-bigquery/compare/v3.7.0...v3.8.0) (2023-03-24)
+
+
+### Features
+
+* Add bool, int, float, string dtype to to_dataframe ([#1529](https://github.com/googleapis/python-bigquery/issues/1529)) ([5e4465d](https://github.com/googleapis/python-bigquery/commit/5e4465d0975f54e8da885006686d9431ff9c5653))
+* Add default LoadJobConfig to Client ([#1526](https://github.com/googleapis/python-bigquery/issues/1526)) ([a2520ca](https://github.com/googleapis/python-bigquery/commit/a2520cabf7ec6bcb923c21e338188f1c10dc4d5d))
+* Expose configuration property on CopyJob, ExtractJob, LoadJob, QueryJob ([#1521](https://github.com/googleapis/python-bigquery/issues/1521)) ([8270a10](https://github.com/googleapis/python-bigquery/commit/8270a10df8f40750a7ac541a1781a71d7e79ce67))
+
+
+### Bug Fixes
+
+* Loosen ipywidgets restrictions further to address ipython compatibility issues ([#1531](https://github.com/googleapis/python-bigquery/issues/1531)) ([50e5026](https://github.com/googleapis/python-bigquery/commit/50e502674807b9771d7e26c0e784539bed8f9da6))
+
+## [3.7.0](https://github.com/googleapis/python-bigquery/compare/v3.6.0...v3.7.0) (2023-03-06)
+
+
+### Features
+
+* Add `connection_properties` and `create_session` to `LoadJobConfig` ([#1509](https://github.com/googleapis/python-bigquery/issues/1509)) ([cd0aaa1](https://github.com/googleapis/python-bigquery/commit/cd0aaa15960e9ca7a0aaf411c8e4990f95421816))
+* Add default_query_job_config property and property setter to BQ client ([#1511](https://github.com/googleapis/python-bigquery/issues/1511)) ([a23092c](https://github.com/googleapis/python-bigquery/commit/a23092cad834c6a016f455d46fefa13bb6cdbf0f))
+
+
+### Documentation
+
+* Remove < 3.11 reference from README ([#1502](https://github.com/googleapis/python-bigquery/issues/1502)) ([c7417f4](https://github.com/googleapis/python-bigquery/commit/c7417f43563e20a3e6f1a57f46925fb274b28b07))
+
+## [3.6.0](https://github.com/googleapis/python-bigquery/compare/v3.5.0...v3.6.0) (2023-02-22)
+
+
+### Features
+
+* Adding preserveAsciiControlCharacter to CSVOptions ([#1491](https://github.com/googleapis/python-bigquery/issues/1491)) ([f832e7a](https://github.com/googleapis/python-bigquery/commit/f832e7a0b79f3567a0773ff11630e2f48bed60db))
+
+
+### Bug Fixes
+
+* Annotate optional integer parameters with optional type ([#1487](https://github.com/googleapis/python-bigquery/issues/1487)) ([a190aaa](https://github.com/googleapis/python-bigquery/commit/a190aaa09ae73e8b6a83b7b213247f95fde57615))
+* Loosen ipywidget dependency ([#1504](https://github.com/googleapis/python-bigquery/issues/1504)) ([20d3276](https://github.com/googleapis/python-bigquery/commit/20d3276cc29e9467eef9476d5fd572099d9a3f6f))
+* Removes scope to avoid unnecessary duplication ([#1503](https://github.com/googleapis/python-bigquery/issues/1503)) ([665d7ba](https://github.com/googleapis/python-bigquery/commit/665d7ba74a1b45de1ef51cc75b6860125afc5fe6))
+
+
+### Dependencies
+
+* Update minimum google-cloud-core to 1.6.0 ([a190aaa](https://github.com/googleapis/python-bigquery/commit/a190aaa09ae73e8b6a83b7b213247f95fde57615))
+
+## [3.5.0](https://github.com/googleapis/python-bigquery/compare/v3.4.2...v3.5.0) (2023-01-31)
+
+
+### Features
+
+* Add __str__ method to DatasetReference ([#1477](https://github.com/googleapis/python-bigquery/issues/1477)) ([f32df1f](https://github.com/googleapis/python-bigquery/commit/f32df1fb74e4aea24cd8a4099040ad2f7436e54d))
+* Add preserveAsciiControlCharacter to LoadJobConfig ([#1484](https://github.com/googleapis/python-bigquery/issues/1484)) ([bd1da9a](https://github.com/googleapis/python-bigquery/commit/bd1da9aa0a40b02b7d5409a0b094d8380e255c91))
+
+
+### Documentation
+
+* Adds snippet for creating table with external data config ([#1420](https://github.com/googleapis/python-bigquery/issues/1420)) ([f0ace2a](https://github.com/googleapis/python-bigquery/commit/f0ace2ac2307ef359511a235f80f5ce9e46264c1))
+* Revise delete label table code sample, add TODO to clean up sni⊠([#1466](https://github.com/googleapis/python-bigquery/issues/1466)) ([0dab7d2](https://github.com/googleapis/python-bigquery/commit/0dab7d25ace4b63d2984485e7b0c5bb38f20476f))
+* **samples:** Table variable fix ([#1287](https://github.com/googleapis/python-bigquery/issues/1287)) ([a71888a](https://github.com/googleapis/python-bigquery/commit/a71888a60d1e5e5815ab459fe24368ad5b0d032a))
+
+## [3.4.2](https://github.com/googleapis/python-bigquery/compare/v3.4.1...v3.4.2) (2023-01-13)
+
+
+### Bug Fixes
+
+* Add support for python 3.11 ([#1463](https://github.com/googleapis/python-bigquery/issues/1463)) ([730a1de](https://github.com/googleapis/python-bigquery/commit/730a1dec8be49df26a3d805ebd4ad185ba72170d))
+* Require grpcio >= 1.49.1 for python 3.11 ([72b25c5](https://github.com/googleapis/python-bigquery/commit/72b25c52bc4b9a92c4cb187b6230b280d4af905c))
+
+
+### Dependencies
+
+* Remove upper bound on packaging dependency ([#1440](https://github.com/googleapis/python-bigquery/issues/1440)) ([6088129](https://github.com/googleapis/python-bigquery/commit/60881296a35067e7aa025d92b2425572f10fd4ec))
+
+
+### Documentation
+
+* Create sample to write schema file from table ([#1439](https://github.com/googleapis/python-bigquery/issues/1439)) ([093cc68](https://github.com/googleapis/python-bigquery/commit/093cc6852ada29898c4a4d047fd216544ef15bba))
+* Created samples for load table and create table from schema file ([#1436](https://github.com/googleapis/python-bigquery/issues/1436)) ([8ad2e5b](https://github.com/googleapis/python-bigquery/commit/8ad2e5bc1c04bf16fffe4c8773e722b68117c916))
+* Revise create table cmek sample ([#1452](https://github.com/googleapis/python-bigquery/issues/1452)) ([57740e4](https://github.com/googleapis/python-bigquery/commit/57740e49af7418449aec73a6fdd307fcb588c655))
+* Revise get table labels code sample, add TODO to clean up snipp⊠([#1464](https://github.com/googleapis/python-bigquery/issues/1464)) ([b5ccbfe](https://github.com/googleapis/python-bigquery/commit/b5ccbfe4eee91d7f481d9708084cd29d0c85e666))
+* Revise label table code samples ([#1451](https://github.com/googleapis/python-bigquery/issues/1451)) ([14ae1f2](https://github.com/googleapis/python-bigquery/commit/14ae1f20538ea00829a1325f91f5e8524234bd0c))
+* Revise sample for nested schema ([#1446](https://github.com/googleapis/python-bigquery/issues/1446)) ([a097631](https://github.com/googleapis/python-bigquery/commit/a0976318fc5ad1620a68250c3e059e2a51d4946d))
+
+## [3.4.1](https://github.com/googleapis/python-bigquery/compare/v3.4.0...v3.4.1) (2022-12-09)
+
+
+### Documentation
+
+* Add info about streaming quota limits to `insert_rows*` methods ([#1409](https://github.com/googleapis/python-bigquery/issues/1409)) ([0f08e9a](https://github.com/googleapis/python-bigquery/commit/0f08e9a8ff638e78006d71acd974de2dff89b5d9))
+
+
+### Dependencies
+
+* make pyarrow and BQ Storage optional dependencies ([e1aa921](https://github.com/googleapis/python-bigquery/commit/e1aa9218ad22f85c9a6cab8b61d013779376a582))
+
+## [3.4.0](https://github.com/googleapis/python-bigquery/compare/v3.3.6...v3.4.0) (2022-11-17)
+
+
+### Features
+
+* Add `reference_file_schema_uri` to LoadJobConfig, ExternalConfig ([#1399](https://github.com/googleapis/python-bigquery/issues/1399)) ([931285f](https://github.com/googleapis/python-bigquery/commit/931285ff85842ab07a0ef2ff9db808181ea3c5e4))
+* Add default value expression ([#1408](https://github.com/googleapis/python-bigquery/issues/1408)) ([207aa50](https://github.com/googleapis/python-bigquery/commit/207aa506ab634bdb13256fa5bd8745ec9de23290))
+* Add More Specific Type Annotations for Row Dictionaries ([#1295](https://github.com/googleapis/python-bigquery/issues/1295)) ([eb49873](https://github.com/googleapis/python-bigquery/commit/eb49873176dee478617eb50472d44703abca53b5))
+
+## [3.3.6](https://github.com/googleapis/python-bigquery/compare/v3.3.4...v3.3.6) (2022-11-02)
+
+
+### Features
+
+* Reconfigure tqdm progress bar in %%bigquery magic ([#1355](https://github.com/googleapis/python-bigquery/issues/1355)) ([506f781](https://github.com/googleapis/python-bigquery/commit/506f781c2dd775193336ab9432f32148250ed81d))
+
+
+### Bug Fixes
+
+* Corrects test for non-existent attribute ([#1395](https://github.com/googleapis/python-bigquery/issues/1395)) ([a80f436](https://github.com/googleapis/python-bigquery/commit/a80f436f2e75a8fb680316f17a22eecb31a7101d))
+* **deps:** Allow protobuf 3.19.5 ([#1379](https://github.com/googleapis/python-bigquery/issues/1379)) ([3e4a074](https://github.com/googleapis/python-bigquery/commit/3e4a074a981eb2920c5f9a711c253565d4844858))
+* **deps:** Allow pyarrow < 11 ([#1393](https://github.com/googleapis/python-bigquery/issues/1393)) ([c898546](https://github.com/googleapis/python-bigquery/commit/c898546d3292f9ec1ba6120cd3f9e2805aa087bb))
+* **deps:** Require requests>=2.21.0 ([#1388](https://github.com/googleapis/python-bigquery/issues/1388)) ([e398336](https://github.com/googleapis/python-bigquery/commit/e39833673582e4a7a34103cfc45603932c9c33b3))
+* Refactor to adapt to changes to shapely dependency ([#1376](https://github.com/googleapis/python-bigquery/issues/1376)) ([2afd278](https://github.com/googleapis/python-bigquery/commit/2afd278febe1eb247adc6278ab59903962a5bb6c))
+
+
+### Documentation
+
+* Fix typos ([#1372](https://github.com/googleapis/python-bigquery/issues/1372)) ([21cc525](https://github.com/googleapis/python-bigquery/commit/21cc525a86a06acfe73e5c5a74ec5f0b61e410f2))
+
+
+### Miscellaneous Chores
+
+* release 3.3.6 ([4fce1d9](https://github.com/googleapis/python-bigquery/commit/4fce1d93b1763703b115a0480a2b97021786aff7))
+
+## [3.3.4](https://github.com/googleapis/python-bigquery/compare/v3.3.3...v3.3.4) (2022-09-29)
+
+
+### Bug Fixes
+
+* **deps:** Require protobuf >= 3.20.2 ([#1369](https://github.com/googleapis/python-bigquery/issues/1369)) ([f13383a](https://github.com/googleapis/python-bigquery/commit/f13383a22d7b1a0a714dc1b1210ad970146bd094))
+
+## [3.3.3](https://github.com/googleapis/python-bigquery/compare/v3.3.2...v3.3.3) (2022-09-28)
+
+
+### Bug Fixes
+
+* Refactors code to account for a tdqm code deprecation ([#1357](https://github.com/googleapis/python-bigquery/issues/1357)) ([1369a9d](https://github.com/googleapis/python-bigquery/commit/1369a9d937b85d6a2a6bf9a672c71620648b1e3e))
+* Validate opentelemetry span job attributes have values ([#1327](https://github.com/googleapis/python-bigquery/issues/1327)) ([8287af1](https://github.com/googleapis/python-bigquery/commit/8287af1299169546f847126f03ae04e48890139e))
+
+
+### Documentation
+
+* **samples:** uses function (create_job) more appropriate to the described sample intent ([5aeedaa](https://github.com/googleapis/python-bigquery/commit/5aeedaa2f4e6a0200d50521dfd90f39f9a24d0cc))
## [3.3.2](https://github.com/googleapis/python-bigquery/compare/v3.3.1...v3.3.2) (2022-08-16)
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index d06598b31..3f8653f4b 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -22,7 +22,7 @@ In order to add a feature:
documentation.
- The feature must work fully on the following CPython versions:
- 3.7, 3.8, 3.9 and 3.10 on both UNIX and Windows.
+ 3.9, 3.10, 3.11, 3.12, 3.13 and 3.14 on both UNIX and Windows.
- The feature must not add unnecessary dependencies (where
"unnecessary" is of course subjective, but new dependencies should
@@ -72,7 +72,7 @@ We use `nox `__ to instrument our tests.
- To run a single unit test::
- $ nox -s unit-3.10 -- -k
+ $ nox -s unit-3.13 -- -k
.. note::
@@ -143,12 +143,12 @@ Running System Tests
$ nox -s system
# Run a single system test
- $ nox -s system-3.8 -- -k
+ $ nox -s system-3.13 -- -k
.. note::
- System tests are only configured to run under Python 3.8.
+ System tests are only configured to run under Python 3.9 and 3.13.
For expediency, we do not run them in older versions of Python 3.
This alone will not run the tests. You'll need to change some local
@@ -195,11 +195,11 @@ configure them just like the System Tests.
# Run all tests in a folder
$ cd samples/snippets
- $ nox -s py-3.8
+ $ nox -s py-3.9
# Run a single sample test
$ cd samples/snippets
- $ nox -s py-3.8 -- -k
+ $ nox -s py-3.9 -- -k
********************************************
Note About ``README`` as it pertains to PyPI
@@ -221,15 +221,19 @@ Supported Python Versions
We support:
-- `Python 3.7`_
-- `Python 3.8`_
- `Python 3.9`_
- `Python 3.10`_
+- `Python 3.11`_
+- `Python 3.12`_
+- `Python 3.13`_
+- `Python 3.14`_
-.. _Python 3.7: https://docs.python.org/3.7/
-.. _Python 3.8: https://docs.python.org/3.8/
.. _Python 3.9: https://docs.python.org/3.9/
.. _Python 3.10: https://docs.python.org/3.10/
+.. _Python 3.11: https://docs.python.org/3.11/
+.. _Python 3.12: https://docs.python.org/3.12/
+.. _Python 3.13: https://docs.python.org/3.13/
+.. _Python 3.14: https://docs.python.org/3.14/
Supported versions can be found in our ``noxfile.py`` `config`_.
@@ -237,7 +241,7 @@ Supported versions can be found in our ``noxfile.py`` `config`_.
.. _config: https://github.com/googleapis/python-bigquery/blob/main/noxfile.py
-We also explicitly decided to support Python 3 beginning with version 3.7.
+We also explicitly decided to support Python 3 beginning with version 3.9.
Reasons for this include:
- Encouraging use of newest versions of Python 3
diff --git a/MANIFEST.in b/MANIFEST.in
index e783f4c62..d6814cd60 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
-# Copyright 2020 Google LLC
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/README.rst b/README.rst
index 475d055a2..23ed9257d 100644
--- a/README.rst
+++ b/README.rst
@@ -18,7 +18,7 @@ processing power of Google's infrastructure.
.. |versions| image:: https://img.shields.io/pypi/pyversions/google-cloud-bigquery.svg
:target: https://pypi.org/project/google-cloud-bigquery/
.. _BigQuery: https://cloud.google.com/bigquery/what-is-bigquery
-.. _Client Library Documentation: https://googleapis.dev/python/bigquery/latest
+.. _Client Library Documentation: https://cloud.google.com/python/docs/reference/bigquery/latest/summary_overview
.. _Product Documentation: https://cloud.google.com/bigquery/docs/reference/v2/
Quick Start
@@ -52,11 +52,11 @@ dependencies.
Supported Python Versions
^^^^^^^^^^^^^^^^^^^^^^^^^
-Python >= 3.7, < 3.11
+Python >= 3.9
Unsupported Python Versions
^^^^^^^^^^^^^^^^^^^^^^^^^^^
-Python == 2.7, Python == 3.5, Python == 3.6.
+Python == 2.7, Python == 3.5, Python == 3.6, Python == 3.7, and Python == 3.8.
The last version of this library compatible with Python 2.7 and 3.5 is
`google-cloud-bigquery==1.28.0`.
@@ -117,7 +117,7 @@ the BigQuery client the following PyPI packages need to be installed:
.. code-block:: console
- pip install google-cloud-bigquery[opentelemetry] opentelemetry-exporter-google-cloud
+ pip install google-cloud-bigquery[opentelemetry] opentelemetry-exporter-gcp-trace
After installation, OpenTelemetry can be used in the BigQuery
client and in BigQuery jobs. First, however, an exporter must be
@@ -128,12 +128,11 @@ example of this can be found here:
from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
- from opentelemetry.sdk.trace.export import BatchExportSpanProcessor
+ from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
+ tracer_provider = TracerProvider()
+ tracer_provider = BatchSpanProcessor(CloudTraceSpanExporter())
trace.set_tracer_provider(TracerProvider())
- trace.get_tracer_provider().add_span_processor(
- BatchExportSpanProcessor(CloudTraceSpanExporter())
- )
In this example all tracing data will be published to the Google
`Cloud Trace`_ console. For more information on OpenTelemetry, please consult the `OpenTelemetry documentation`_.
diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md
new file mode 100644
index 000000000..7da12c440
--- /dev/null
+++ b/TROUBLESHOOTING.md
@@ -0,0 +1,34 @@
+# Troubleshooting steps
+
+## Enable logging of BQ Storage Read API session creation
+
+It can be helpful to get the BQ Storage Read API session to allow the BigQuery
+backend team to debug cases of API instability. The logs that share the session
+creation are in a module-specific logger. To enable the logs, refer to the
+following code sample:
+
+```python
+import logging
+import google.cloud.bigquery
+
+# Configure the basic logging to show DEBUG level messages
+log_formatter = logging.Formatter(
+ '%(asctime)s - %(levelname)s - %(message)s'
+)
+handler = logging.StreamHandler()
+handler.setFormatter(log_formatter)
+default_logger = logging.getLogger()
+default_logger.setLevel(logging.DEBUG)
+default_logger.addHandler(handler)
+to_dataframe_logger = logging.getLogger("google.cloud.bigquery._pandas_helpers")
+to_dataframe_logger.setLevel(logging.DEBUG)
+to_dataframe_logger.addHandler(handler)
+
+# Example code that touches the BQ Storage Read API.
+bqclient = google.cloud.bigquery.Client()
+results = bqclient.query_and_wait("SELECT * FROM `bigquery-public-data.usa_names.usa_1910_2013`")
+print(results.to_dataframe().head())
+```
+
+In particular, watch for the text "with BQ Storage API session" in the logs
+to get the streaming API session ID to share with your support person.
diff --git a/benchmark/README.md b/benchmark/README.md
index 435926acb..33065807e 100644
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -1,8 +1,128 @@
# BigQuery Benchmark
-This directory contains benchmarks for BigQuery client.
+This directory contains benchmark scripts for BigQuery client. It is created primarily for project
+maintainers to measure library performance.
## Usage
-`python benchmark.py queries.json`
+`python benchmark.py`
-BigQuery service caches requests so the benchmark should be run
-at least twice, disregarding the first result.
+
+### Flags
+Run `python benchmark.py -h` for detailed information on available flags.
+
+`--reruns` can be used to override the default number of times a query is rerun. Must be a positive
+integer. Default value is 3.
+
+`--projectid` can be used to run benchmarks in a different project. If unset, the GOOGLE_CLOUD_PROJECT
+ environment variable is used.
+
+`--queryfile` can be used to override the default file which contains queries to be instrumented.
+
+`--table` can be used to specify a table to which benchmarking results should be streamed. The format
+for this string is in BigQuery standard SQL notation without escapes, e.g. `projectid.datasetid.tableid`
+
+`--create_table` can be used to have the benchmarking tool create the destination table prior to streaming.
+
+`--tag` allows arbitrary key:value pairs to be set. This flag can be specified multiple times.
+
+When `--create_table` flag is set, must also specify the name of the new table using `--table`.
+
+### Example invocations
+
+Setting all the flags
+```
+python benchmark.py \
+ --reruns 5 \
+ --projectid test_project_id \
+ --table logging_project_id.querybenchmarks.measurements \
+ --create_table \
+ --tag source:myhostname \
+ --tag somekeywithnovalue \
+ --tag experiment:special_environment_thing
+```
+
+Or, a more realistic invocation using shell substitions:
+```
+python benchmark.py \
+ --reruns 5 \
+ --table $BENCHMARK_TABLE \
+ --tag origin:$(hostname) \
+ --tag branch:$(git branch --show-current) \
+ --tag latestcommit:$(git log --pretty=format:'%H' -n 1)
+```
+
+## Stream Results To A BigQuery Table
+
+When streaming benchmarking results to a BigQuery table, the table schema is as follows:
+```
+[
+ {
+ "name": "groupname",
+ "type": "STRING"
+ },
+ {
+ "name": "name",
+ "type": "STRING"
+ },
+ {
+ "name": "tags",
+ "type": "RECORD",
+ "mode": "REPEATED",
+ "fields": [
+ {
+ "name": "key",
+ "type": "STRING"
+ },
+ {
+ "name": "value",
+ "type": "STRING"
+ }
+ ]
+ },
+ {
+ "name": "SQL",
+ "type": "STRING"
+ },
+ {
+ "name": "runs",
+ "type": "RECORD",
+ "mode": "REPEATED",
+ "fields": [
+ {
+ "name": "errorstring",
+ "type": "STRING"
+ },
+ {
+ "name": "start_time",
+ "type": "TIMESTAMP"
+ },
+ {
+ "name": "query_end_time",
+ "type": "TIMESTAMP"
+ },
+ {
+ "name": "first_row_returned_time",
+ "type": "TIMESTAMP"
+ },
+ {
+ "name": "all_rows_returned_time",
+ "type": "TIMESTAMP"
+ },
+ {
+ "name": "total_rows",
+ "type": "INTEGER"
+ }
+ ]
+ },
+ {
+ "name": "event_time",
+ "type": "TIMESTAMP"
+ }
+]
+```
+
+The table schema is the same as the [benchmark in go](https://github.com/googleapis/google-cloud-go/tree/main/bigquery/benchmarks),
+so results from both languages can be streamed to the same table.
+
+## BigQuery Benchmarks In Other Languages
+* Go: https://github.com/googleapis/google-cloud-go/tree/main/bigquery/benchmarks
+* JAVA: https://github.com/googleapis/java-bigquery/tree/main/benchmark
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index 2917f169a..d7dc78678 100644
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -1,4 +1,4 @@
-# Copyright 2017 Google LLC
+# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,35 +12,312 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from google.cloud import bigquery
+"""Scripts for benchmarking BigQuery queries performance."""
+
+import argparse
from datetime import datetime
import json
-import sys
-
-if len(sys.argv) < 2:
- raise Exception('need query file, usage: python {0} '.format(sys.argv[0]))
-
-with open(sys.argv[1], 'r') as f:
- queries = json.loads(f.read())
-
-client = bigquery.Client()
-
-for query in queries:
- start_time = datetime.now()
- job = client.query(query)
- rows = job.result()
-
- num_rows = 0
- num_cols = None
- first_byte_time = None
-
- for row in rows:
- if num_rows == 0:
- num_cols = len(row)
- first_byte_time = datetime.now() - start_time
- elif num_cols != len(row):
- raise Exception('found {0} columsn, expected {1}'.format(len(row), num_cols))
- num_rows += 1
- total_time = datetime.now() - start_time
- print("query {0}: {1} rows, {2} cols, first byte {3} sec, total {4} sec"
- .format(query, num_rows, num_cols, first_byte_time.total_seconds(), total_time.total_seconds()))
+import os
+
+from google.api_core import exceptions
+
+from google.cloud import bigquery
+
+_run_schema = [
+ bigquery.SchemaField("groupname", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("name", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField(
+ "tags",
+ "RECORD",
+ mode="REPEATED",
+ fields=[
+ bigquery.SchemaField("key", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("value", "STRING", mode="NULLABLE"),
+ ],
+ ),
+ bigquery.SchemaField("SQL", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField(
+ "runs",
+ "RECORD",
+ mode="REPEATED",
+ fields=[
+ bigquery.SchemaField("errorstring", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("start_time", "TIMESTAMP", mode="NULLABLE"),
+ bigquery.SchemaField("query_end_time", "TIMESTAMP", mode="NULLABLE"),
+ bigquery.SchemaField(
+ "first_row_returned_time", "TIMESTAMP", mode="NULLABLE"
+ ),
+ bigquery.SchemaField(
+ "all_rows_returned_time", "TIMESTAMP", mode="NULLABLE"
+ ),
+ bigquery.SchemaField("total_rows", "INTEGER", mode="NULLABLE"),
+ ],
+ ),
+ bigquery.SchemaField("event_time", "TIMESTAMP", mode="NULLABLE"),
+]
+
+
+def _check_pos_int(value):
+ """Verifies the value is a positive integer."""
+ ivalue = int(value)
+ if ivalue <= 0:
+ raise argparse.ArgumentTypeError(
+ f"Argument rerun should be positive int. Actual value: {value}"
+ )
+ return ivalue
+
+
+def _parse_tag(tag):
+ """Parses input tag into key value pair as a dict."""
+ tagstring = str(tag)
+ key, value = tagstring.split(":")
+ if not key or not value:
+ raise argparse.ArgumentTypeError(
+ "key and value in tag need to be non-empty. Actual value: "
+ + f"key={key}, value={value}"
+ )
+ return {"key": key, "value": value}
+
+
+def _parse_args() -> dict:
+ """Parses input flags."""
+ parser = argparse.ArgumentParser(description="Benchmark for BigQuery.")
+
+ parser.add_argument(
+ "--reruns",
+ action="store",
+ type=_check_pos_int,
+ default=3,
+ metavar="",
+ help="how many times each query is run. Must be a positive integer."
+ + "Default 3 times",
+ )
+
+ parser.add_argument(
+ "--projectid",
+ action="store",
+ type=str,
+ metavar="",
+ help="run benchmarks in a different project. If unset, the "
+ + "GOOGLE_CLOUD_PROJECT environment variable is used",
+ )
+
+ parser.add_argument(
+ "--queryfile",
+ action="store",
+ type=str,
+ metavar="",
+ default="queries.json",
+ help="override the default file which contains queries to be instrumented",
+ )
+
+ parser.add_argument(
+ "--table",
+ action="store",
+ type=str,
+ metavar="",
+ help="specify a table to which benchmarking results should be "
+ + "streamed. The format for this string is in BigQuery standard SQL "
+ + "notation without escapes, e.g. projectid.datasetid.tableid",
+ )
+
+ parser.add_argument(
+ "--create_table",
+ action="store_true",
+ help="let the benchmarking tool create the destination table prior to"
+ + " streaming; if set, also need to set --table to specify table name",
+ )
+
+ parser.add_argument(
+ "--tag",
+ action="append",
+ type=_parse_tag,
+ metavar="",
+ help="set arbitrary key:value pairs, can be set multiple times",
+ )
+
+ args = parser.parse_args()
+ args_dict = vars(args)
+
+ # Verifies that project id is set.
+ if not args_dict.get("projectid"):
+ if projectid_env := os.environ["GOOGLE_CLOUD_PROJECT"]:
+ args_dict["projectid"] = projectid_env
+ else:
+ raise ValueError(
+ "Must provide --projectid or set "
+ "GOOGLE_CLOUD_PROJECT environment variable"
+ )
+
+ # Verifies that table name is specified when `create_table == True`.
+ if args_dict.get("create_table") and not args_dict.get("table"):
+ raise ValueError(
+ "When --create_table is present, must specify table name with --table"
+ )
+
+ return args_dict
+
+
+def _prepare_table(client, create_table: bool, table_name: str) -> str:
+ """Ensures a table exists, and optionally creates it if directed."""
+
+ # Verifies that table destination is of valid format.
+ parts = table_name.split(".")
+ if len(parts) != 3:
+ raise ValueError(f"Expected table in p.d.t format, got: {table_name}")
+
+ table = bigquery.Table(table_name, schema=_run_schema)
+
+ # Create table if create_table == True.
+ if create_table:
+ table = client.create_table(table)
+ print(f"Created table {table.project}.{table.dataset_id}." f"{table.table_id}")
+
+ # Verifies that table exists.
+ client.get_table(table_name)
+ return table_name
+
+
+def _run_query(client, query: str, rerun: int) -> list:
+ """Runs individual query for `rerun` times, and returns run results."""
+ runs = []
+
+ for _ in range(rerun):
+ print(".", end="", flush=True)
+ run = {}
+ num_rows = 0
+ num_cols = 0
+ start_time = datetime.now()
+ first_row_time = datetime.min
+ end_time = datetime.min
+
+ job = client.query(query)
+ query_end_time = datetime.now()
+
+ try:
+ rows = job.result()
+ for row in rows:
+ if num_rows == 0:
+ num_cols = len(row)
+ first_row_time = datetime.now()
+ elif num_cols != len(row):
+ raise RuntimeError(f"found {len(row)} columns, expected {num_cols}")
+ num_rows += 1
+ end_time = datetime.now()
+ except exceptions.BadRequest as exc:
+ run["errorstring"] = repr(exc)
+
+ run["start_time"] = start_time.isoformat()
+ run["query_end_time"] = query_end_time.isoformat()
+ run["first_row_returned_time"] = first_row_time.isoformat()
+ run["all_rows_returned_time"] = end_time.isoformat()
+ run["total_rows"] = num_rows
+ runs.append(run)
+
+ print("")
+ return runs
+
+
+def _get_delta(time_str_1: str, time_str_2: str) -> str:
+ """Calculates delta of two ISO format time string, and return as a string."""
+ time_1 = datetime.fromisoformat(time_str_1)
+ time_2 = datetime.fromisoformat(time_str_2)
+ delta = time_1 - time_2
+ return str(delta)
+
+
+def _is_datetime_min(time_str: str) -> bool:
+ return datetime.fromisoformat(time_str) == datetime.min
+
+
+def _summary(run: dict) -> str:
+ """Converts run dict to run summary string."""
+ no_val = "NODATA"
+ output = ["QUERYTIME "]
+
+ if not _is_datetime_min(run.get("query_end_time")):
+ output.append(f"{_get_delta(run.get('query_end_time'), run.get('start_time'))}")
+ else:
+ output.append(no_val)
+ output.append(" FIRSTROW ")
+
+ if not _is_datetime_min(run.get("first_row_returned_time")):
+ output.append(
+ f"{_get_delta(run.get('first_row_returned_time'), run.get('start_time'))}"
+ )
+ else:
+ output.append(no_val)
+ output += " ALLROWS "
+
+ if not _is_datetime_min(run.get("all_rows_returned_time")):
+ output.append(
+ f"{_get_delta(run.get('all_rows_returned_time'), run.get('start_time'))}"
+ )
+ else:
+ output.append(no_val)
+
+ if run.get("total_rows"):
+ output.append(f" ROWS {run.get('total_rows')}")
+ if run.get("errorstring"):
+ output.append(f" ERRORED {run.get('errorstring')}")
+
+ return "".join(output)
+
+
+def _print_results(profiles: list):
+ for i, prof in enumerate(profiles):
+ print(f"{i+1}: ({prof['groupname']}:{prof['name']})")
+ print(f"SQL: {prof['SQL']}")
+ print("MEASUREMENTS")
+ for j, run in enumerate(prof["runs"]):
+ print(f"\t\t({j}) {_summary(run)}")
+
+
+def _run_benchmarks(args: dict) -> list:
+ client = bigquery.Client()
+
+ # If we're going to stream results, let's make sure we can do that
+ # before running all the tests.
+ table_id = ""
+ if args.get("create_table") or args.get("table"):
+ table_id = _prepare_table(client, args.get("create_table"), args.get("table"))
+
+ queries_file = args.get("queryfile")
+ with open(queries_file, "r") as f:
+ groups = json.loads(f.read())
+
+ measure_start = datetime.now()
+ profiles = []
+ for group_name, group in groups.items():
+ for name, query in group.items():
+ print(f"Measuring {group_name} : {name}", end="", flush=True)
+ event_time = datetime.now()
+ runs = _run_query(client, query, args.get("reruns"))
+
+ profile = {}
+ profile["groupname"] = group_name
+ profile["name"] = name
+ profile["tags"] = args.get("tag") or []
+ profile["SQL"] = query
+ profile["runs"] = runs
+ profile["event_time"] = event_time.isoformat()
+ profiles.append(profile)
+
+ measure_end = datetime.now()
+ print(f"Measurement time: {str(measure_end-measure_start)}")
+
+ # Stream benchmarking results to table, if required.
+ if table_id:
+ print(f"Streaming test results to table {table_id}...")
+ errors = client.insert_rows_json(table_id, profiles)
+ if errors:
+ raise RuntimeError(f"Cannot upload queries profiles: {errors}")
+ print("Streaming complete.")
+
+ return profiles
+
+
+if __name__ == "__main__":
+ args = _parse_args()
+ profiles = _run_benchmarks(args)
+ _print_results(profiles)
diff --git a/benchmark/queries.json b/benchmark/queries.json
index 13fed38b5..464395619 100644
--- a/benchmark/queries.json
+++ b/benchmark/queries.json
@@ -1,10 +1,16 @@
-[
- "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 10000",
- "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 100000",
- "SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 1000000",
- "SELECT title FROM `bigquery-public-data.samples.wikipedia` ORDER BY title LIMIT 1000",
- "SELECT title, id, timestamp, contributor_ip FROM `bigquery-public-data.samples.wikipedia` WHERE title like 'Blo%' ORDER BY id",
- "SELECT * FROM `bigquery-public-data.baseball.games_post_wide` ORDER BY gameId",
- "SELECT * FROM `bigquery-public-data.samples.github_nested` WHERE repository.has_downloads ORDER BY repository.created_at LIMIT 10000",
- "SELECT repo_name, path FROM `bigquery-public-data.github_repos.files` WHERE path LIKE '%.java' ORDER BY id LIMIT 1000000"
-]
+{
+ "simple-cacheable": {
+ "nycyellow-limit1k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 1000",
+ "nycyellow-limit10k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 10000",
+ "nycyellow-limit100k":"SELECT * FROM `nyc-tlc.yellow.trips` LIMIT 100000",
+ "wikisamples-ordered-limit1k":"SELECT title FROM `bigquery-public-data.samples.wikipedia` ORDER BY title LIMIT 1000"
+ },
+ "simple-nondeterministic": {
+ "current-timestamp":"SELECT CURRENT_TIMESTAMP() as ts",
+ "session-user": "SELECT SESSION_USER() as ts",
+ "literals": "SELECT 1 as i, 3.14 as pi"
+ },
+ "simple-invalid": {
+ "invalid-query": "invalid sql here"
+ }
+}
diff --git a/docs/bigquery/legacy_proto_types.rst b/docs/bigquery/legacy_proto_types.rst
index bc1e93715..36e9984b9 100644
--- a/docs/bigquery/legacy_proto_types.rst
+++ b/docs/bigquery/legacy_proto_types.rst
@@ -3,7 +3,7 @@ Legacy proto-based Types for Google Cloud Bigquery v2 API
.. warning::
These types are provided for backward compatibility only, and are not maintained
- anymore. They might also differ from the types uspported on the backend. It is
+ anymore. They might also differ from the types supported on the backend. It is
therefore strongly advised to migrate to the types found in :doc:`standard_sql`.
Also see the :doc:`3.0.0 Migration Guide<../UPGRADING>` for more information.
diff --git a/docs/conf.py b/docs/conf.py
index 5c83fd79e..df1c18b68 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
-# Copyright 2021 Google LLC
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -61,7 +61,7 @@
# autodoc/autosummary flags
autoclass_content = "both"
-autodoc_default_options = {"members": True, "inherited-members": True}
+autodoc_default_options = {"members": True}
autosummary_generate = True
@@ -109,7 +109,6 @@
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
exclude_patterns = [
- "google/cloud/bigquery_v2/**", # Legacy proto-based types.
"_build",
"**/.nox/**/*",
"samples/AUTHORING_GUIDE.md",
diff --git a/docs/design/query-retries.md b/docs/design/query-retries.md
index 1bac82f5c..08d75302b 100644
--- a/docs/design/query-retries.md
+++ b/docs/design/query-retries.md
@@ -73,7 +73,7 @@ value, the client library uses the jobs.insert REST API to start a query job.
Before it issues this request, it sets a job ID. This job ID remains constant
across API retries.
-If the job ID was randomly generated, and the jobs.insert request and all retries fail, the client library sends a request to the jobs.get API. This covers the case when a query request succeeded, but there was a transient issue that prevented the client from receiving a successful response.
+If the job ID was randomly generated, and the jobs.insert request and all retries fail, the client library sends a request to the jobs.get API. This covers the case when a query request succeeded, but there was a transient issue that prevented the client from receiving a successful response. Note: `jobs.get` requires the location of the query. It will fail with 404 if the location is not specified and the job is not in the US multi-region.
#### Retrying the jobs.query API via the retry parameter
diff --git a/docs/index.rst b/docs/index.rst
index 500c67a7f..6d6ed63f6 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -48,3 +48,8 @@ For a list of all ``google-cloud-bigquery`` releases:
:maxdepth: 2
changelog
+
+.. toctree::
+ :hidden:
+
+ summary_overview.md
diff --git a/docs/magics.rst b/docs/magics.rst
index aa14c6bfa..549d67f76 100644
--- a/docs/magics.rst
+++ b/docs/magics.rst
@@ -6,7 +6,7 @@ in a Jupyter notebook cell.
.. code::
- %load_ext google.cloud.bigquery
+ %load_ext bigquery_magics
This makes the ``%%bigquery`` magic available.
@@ -27,8 +27,9 @@ Running a parameterized query:
:start-after: [START bigquery_jupyter_query_params_scalars]
:end-before: [END bigquery_jupyter_query_params_scalars]
-API Reference
--------------
+BigQuery Magics Reference
+-------------------------
-.. automodule:: google.cloud.bigquery.magics.magics
- :members:
+- `BigQuery Magics Documentation`_
+
+.. _BigQuery Magics Documentation: https://googleapis.dev/python/bigquery-magics/latest
diff --git a/docs/reference.rst b/docs/reference.rst
index b886f1161..d24a73596 100644
--- a/docs/reference.rst
+++ b/docs/reference.rst
@@ -1,8 +1,6 @@
API Reference
~~~~~~~~~~~~~
-.. currentmodule:: google.cloud.bigquery
-
The main concepts with this API are:
- :class:`~google.cloud.bigquery.client.Client` manages connections to the
@@ -18,55 +16,13 @@ The main concepts with this API are:
Client
======
-.. autosummary::
- :toctree: generated
-
- client.Client
+.. automodule:: google.cloud.bigquery.client
Job
===
-Job Configuration
------------------
-
-.. autosummary::
- :toctree: generated
-
- job.QueryJobConfig
- job.CopyJobConfig
- job.LoadJobConfig
- job.ExtractJobConfig
-
-Job Classes
------------
-
-.. autosummary::
- :toctree: generated
-
- job.QueryJob
- job.CopyJob
- job.LoadJob
- job.ExtractJob
-
-Job-Related Types
------------------
-
-.. autosummary::
- :toctree: generated
-
- job.Compression
- job.CreateDisposition
- job.DestinationFormat
- job.DmlStats
- job.Encoding
- job.OperationType
- job.QueryPlanEntry
- job.QueryPlanEntryStep
- job.QueryPriority
- job.ReservationUsage
- job.SourceFormat
- job.WriteDisposition
- job.SchemaUpdateOption
+.. automodule:: google.cloud.bigquery.job
+ :inherited-members:
.. toctree::
:maxdepth: 2
@@ -77,63 +33,28 @@ Job-Related Types
Dataset
=======
-.. autosummary::
- :toctree: generated
-
- dataset.Dataset
- dataset.DatasetListItem
- dataset.DatasetReference
- dataset.AccessEntry
+.. automodule:: google.cloud.bigquery.dataset
Table
=====
-.. autosummary::
- :toctree: generated
-
- table.PartitionRange
- table.RangePartitioning
- table.Row
- table.RowIterator
- table.SnapshotDefinition
- table.CloneDefinition
- table.Table
- table.TableListItem
- table.TableReference
- table.TimePartitioning
- table.TimePartitioningType
+.. automodule:: google.cloud.bigquery.table
Model
=====
-.. autosummary::
- :toctree: generated
-
- model.Model
- model.ModelReference
+.. automodule:: google.cloud.bigquery.model
Routine
=======
-.. autosummary::
- :toctree: generated
-
- routine.DeterminismLevel
- routine.Routine
- routine.RoutineArgument
- routine.RoutineReference
- routine.RoutineType
+.. automodule:: google.cloud.bigquery.routine
Schema
======
-.. autosummary::
- :toctree: generated
-
- schema.SchemaField
- schema.PolicyTagList
-
+.. automodule:: google.cloud.bigquery.schema
Query
=====
@@ -147,25 +68,13 @@ Query
Retries
=======
-.. autosummary::
- :toctree: generated
-
- retry.DEFAULT_RETRY
+.. automodule:: google.cloud.bigquery.retry
External Configuration
======================
-.. autosummary::
- :toctree: generated
-
- external_config.ExternalSourceFormat
- external_config.ExternalConfig
- external_config.BigtableOptions
- external_config.BigtableColumnFamily
- external_config.BigtableColumn
- external_config.CSVOptions
- external_config.GoogleSheetsOptions
+.. automodule:: google.cloud.bigquery.external_config
.. toctree::
:maxdepth: 2
@@ -194,10 +103,7 @@ Enums
Encryption Configuration
========================
-.. autosummary::
- :toctree: generated
-
- encryption_configuration.EncryptionConfiguration
+.. automodule:: google.cloud.bigquery.encryption_configuration
Additional Types
diff --git a/docs/snippets.py b/docs/snippets.py
index 238fd52c3..b4e78e36f 100644
--- a/docs/snippets.py
+++ b/docs/snippets.py
@@ -31,6 +31,11 @@
except (ImportError, AttributeError):
pandas = None
+try:
+ import pyarrow
+except (ImportError, AttributeError):
+ pyarrow = None
+
from google.api_core.exceptions import InternalServerError
from google.api_core.exceptions import ServiceUnavailable
from google.api_core.exceptions import TooManyRequests
@@ -113,194 +118,6 @@ def test_create_client_default_credentials():
assert client is not None
-def test_create_table_nested_repeated_schema(client, to_delete):
- dataset_id = "create_table_nested_repeated_{}".format(_millis())
- project = client.project
- dataset_ref = bigquery.DatasetReference(project, dataset_id)
- dataset = bigquery.Dataset(dataset_ref)
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- # [START bigquery_nested_repeated_schema]
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # project = client.project
- # dataset_ref = bigquery.DatasetReference(project, 'my_dataset')
-
- schema = [
- bigquery.SchemaField("id", "STRING", mode="NULLABLE"),
- bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"),
- bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"),
- bigquery.SchemaField("dob", "DATE", mode="NULLABLE"),
- bigquery.SchemaField(
- "addresses",
- "RECORD",
- mode="REPEATED",
- fields=[
- bigquery.SchemaField("status", "STRING", mode="NULLABLE"),
- bigquery.SchemaField("address", "STRING", mode="NULLABLE"),
- bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
- bigquery.SchemaField("state", "STRING", mode="NULLABLE"),
- bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
- bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"),
- ],
- ),
- ]
- table_ref = dataset_ref.table("my_table")
- table = bigquery.Table(table_ref, schema=schema)
- table = client.create_table(table) # API request
-
- print("Created table {}".format(table.full_table_id))
- # [END bigquery_nested_repeated_schema]
-
-
-def test_create_table_cmek(client, to_delete):
- dataset_id = "create_table_cmek_{}".format(_millis())
- project = client.project
- dataset_ref = bigquery.DatasetReference(project, dataset_id)
- dataset = bigquery.Dataset(dataset_ref)
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- # [START bigquery_create_table_cmek]
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # dataset_id = 'my_dataset'
-
- table_ref = dataset.table("my_table")
- table = bigquery.Table(table_ref)
-
- # Set the encryption key to use for the table.
- # TODO: Replace this key with a key you have created in Cloud KMS.
- kms_key_name = "projects/{}/locations/{}/keyRings/{}/cryptoKeys/{}".format(
- "cloud-samples-tests", "us", "test", "test"
- )
- table.encryption_configuration = bigquery.EncryptionConfiguration(
- kms_key_name=kms_key_name
- )
-
- table = client.create_table(table) # API request
-
- assert table.encryption_configuration.kms_key_name == kms_key_name
- # [END bigquery_create_table_cmek]
-
-
-def test_create_partitioned_table(client, to_delete):
- dataset_id = "create_table_partitioned_{}".format(_millis())
- project = client.project
- dataset_ref = bigquery.DatasetReference(project, dataset_id)
- dataset = client.create_dataset(dataset_ref)
- to_delete.append(dataset)
-
- # [START bigquery_create_table_partitioned]
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # project = client.project
- # dataset_ref = bigquery.DatasetReference(project, 'my_dataset')
-
- table_ref = dataset_ref.table("my_partitioned_table")
- schema = [
- bigquery.SchemaField("name", "STRING"),
- bigquery.SchemaField("post_abbr", "STRING"),
- bigquery.SchemaField("date", "DATE"),
- ]
- table = bigquery.Table(table_ref, schema=schema)
- table.time_partitioning = bigquery.TimePartitioning(
- type_=bigquery.TimePartitioningType.DAY,
- field="date", # name of column to use for partitioning
- expiration_ms=7776000000,
- ) # 90 days
-
- table = client.create_table(table)
-
- print(
- "Created table {}, partitioned on column {}".format(
- table.table_id, table.time_partitioning.field
- )
- )
- # [END bigquery_create_table_partitioned]
-
- assert table.time_partitioning.type_ == "DAY"
- assert table.time_partitioning.field == "date"
- assert table.time_partitioning.expiration_ms == 7776000000
-
-
-@pytest.mark.skip(
- reason=(
- "update_table() is flaky "
- "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589"
- )
-)
-def test_manage_table_labels(client, to_delete):
- dataset_id = "label_table_dataset_{}".format(_millis())
- table_id = "label_table_{}".format(_millis())
- project = client.project
- dataset_ref = bigquery.DatasetReference(project, dataset_id)
- dataset = bigquery.Dataset(dataset_ref)
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- table = bigquery.Table(dataset.table(table_id), schema=SCHEMA)
- table = client.create_table(table)
-
- # [START bigquery_label_table]
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # project = client.project
- # dataset_ref = bigquery.DatasetReference(project, dataset_id)
- # table_ref = dataset_ref.table('my_table')
- # table = client.get_table(table_ref) # API request
-
- assert table.labels == {}
- labels = {"color": "green"}
- table.labels = labels
-
- table = client.update_table(table, ["labels"]) # API request
-
- assert table.labels == labels
- # [END bigquery_label_table]
-
- # [START bigquery_get_table_labels]
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # dataset_id = 'my_dataset'
- # table_id = 'my_table'
-
- project = client.project
- dataset_ref = bigquery.DatasetReference(project, dataset_id)
- table_ref = dataset_ref.table(table_id)
- table = client.get_table(table_ref) # API Request
-
- # View table labels
- print("Table ID: {}".format(table_id))
- print("Labels:")
- if table.labels:
- for label, value in table.labels.items():
- print("\t{}: {}".format(label, value))
- else:
- print("\tTable has no labels defined.")
- # [END bigquery_get_table_labels]
- assert table.labels == labels
-
- # [START bigquery_delete_label_table]
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # project = client.project
- # dataset_ref = bigquery.DatasetReference(project, dataset_id)
- # table_ref = dataset_ref.table('my_table')
- # table = client.get_table(table_ref) # API request
-
- # This example table starts with one label
- assert table.labels == {"color": "green"}
- # To delete a label from a table, set its value to None
- table.labels["color"] = None
-
- table = client.update_table(table, ["labels"]) # API request
-
- assert table.labels == {}
- # [END bigquery_delete_label_table]
-
-
@pytest.mark.skip(
reason=(
"update_table() is flaky "
@@ -338,96 +155,6 @@ def test_update_table_description(client, to_delete):
# [END bigquery_update_table_description]
-@pytest.mark.skip(
- reason=(
- "update_table() is flaky "
- "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589"
- )
-)
-def test_update_table_expiration(client, to_delete):
- """Update a table's expiration time."""
- dataset_id = "update_table_expiration_dataset_{}".format(_millis())
- table_id = "update_table_expiration_table_{}".format(_millis())
- project = client.project
- dataset_ref = bigquery.DatasetReference(project, dataset_id)
- dataset = bigquery.Dataset(dataset_ref)
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- table = bigquery.Table(dataset.table(table_id), schema=SCHEMA)
- table = client.create_table(table)
-
- # [START bigquery_update_table_expiration]
- import datetime
-
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # project = client.project
- # dataset_ref = bigquery.DatasetReference(project, dataset_id)
- # table_ref = dataset_ref.table('my_table')
- # table = client.get_table(table_ref) # API request
-
- assert table.expires is None
-
- # set table to expire 5 days from now
- expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(
- days=5
- )
- table.expires = expiration
- table = client.update_table(table, ["expires"]) # API request
-
- # expiration is stored in milliseconds
- margin = datetime.timedelta(microseconds=1000)
- assert expiration - margin <= table.expires <= expiration + margin
- # [END bigquery_update_table_expiration]
-
-
-@pytest.mark.skip(
- reason=(
- "update_table() is flaky "
- "https://github.com/GoogleCloudPlatform/google-cloud-python/issues/5589"
- )
-)
-def test_relax_column(client, to_delete):
- """Updates a schema field from required to nullable."""
- dataset_id = "relax_column_dataset_{}".format(_millis())
- table_id = "relax_column_table_{}".format(_millis())
- project = client.project
- dataset_ref = bigquery.DatasetReference(project, dataset_id)
- dataset = bigquery.Dataset(dataset_ref)
- dataset = client.create_dataset(dataset)
- to_delete.append(dataset)
-
- # [START bigquery_relax_column]
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # dataset_id = 'my_dataset'
- # table_id = 'my_table'
-
- original_schema = [
- bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
- bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
- ]
-
- dataset_ref = bigquery.DatasetReference(project, dataset_id)
- table_ref = dataset_ref.table(table_id)
- table = bigquery.Table(table_ref, schema=original_schema)
- table = client.create_table(table)
- assert all(field.mode == "REQUIRED" for field in table.schema)
-
- # SchemaField properties cannot be edited after initialization.
- # To make changes, construct new SchemaField objects.
- relaxed_schema = [
- bigquery.SchemaField("full_name", "STRING", mode="NULLABLE"),
- bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
- ]
- table.schema = relaxed_schema
- table = client.update_table(table, ["schema"])
-
- assert all(field.mode == "NULLABLE" for field in table.schema)
- # [END bigquery_relax_column]
-
-
@pytest.mark.skip(
reason=(
"update_table() is flaky "
@@ -738,13 +465,12 @@ def test_client_query_total_rows(client, capsys):
'WHERE state = "TX" '
"LIMIT 100"
)
- query_job = client.query(
+ results = client.query_and_wait(
query,
# Location must match that of the dataset(s) referenced in the query.
location="US",
- ) # API request - starts the query
+ ) # API request - starts the query and waits for results.
- results = query_job.result() # Wait for query to complete.
print("Got {} rows.".format(results.total_rows))
# [END bigquery_query_total_rows]
@@ -752,48 +478,6 @@ def test_client_query_total_rows(client, capsys):
assert "Got 100 rows." in out
-def test_query_external_gcs_permanent_table(client, to_delete):
- dataset_id = "query_external_gcs_{}".format(_millis())
- project = client.project
- dataset_ref = bigquery.DatasetReference(project, dataset_id)
- dataset = bigquery.Dataset(dataset_ref)
- client.create_dataset(dataset)
- to_delete.append(dataset)
-
- # [START bigquery_query_external_gcs_perm]
- # from google.cloud import bigquery
- # client = bigquery.Client()
- # dataset_id = 'my_dataset'
-
- # Configure the external data source
- dataset_ref = bigquery.DatasetReference(project, dataset_id)
- table_id = "us_states"
- schema = [
- bigquery.SchemaField("name", "STRING"),
- bigquery.SchemaField("post_abbr", "STRING"),
- ]
- table = bigquery.Table(dataset_ref.table(table_id), schema=schema)
- external_config = bigquery.ExternalConfig("CSV")
- external_config.source_uris = [
- "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
- ]
- external_config.options.skip_leading_rows = 1 # optionally skip header row
- table.external_data_configuration = external_config
-
- # Create a permanent table linked to the GCS file
- table = client.create_table(table) # API request
-
- # Example query to find states starting with 'W'
- sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id)
-
- query_job = client.query(sql) # API request
-
- w_states = list(query_job) # Waits for query to finish
- print("There are {} states with names starting with W.".format(len(w_states)))
- # [END bigquery_query_external_gcs_perm]
- assert len(w_states) == 4
-
-
def test_ddl_create_view(client, to_delete, capsys):
"""Create a view via a DDL query."""
project = client.project
@@ -866,7 +550,7 @@ def test_query_results_as_dataframe(client):
LIMIT 10
"""
- df = client.query(sql).to_dataframe()
+ df = client.query_and_wait(sql).to_dataframe()
# [END bigquery_query_results_dataframe]
assert isinstance(df, pandas.DataFrame)
assert len(list(df)) == 2 # verify the number of columns
diff --git a/docs/summary_overview.md b/docs/summary_overview.md
new file mode 100644
index 000000000..6dd228e13
--- /dev/null
+++ b/docs/summary_overview.md
@@ -0,0 +1,22 @@
+[
+This is a templated file. Adding content to this file may result in it being
+reverted. Instead, if you want to place additional content, create an
+"overview_content.md" file in `docs/` directory. The Sphinx tool will
+pick up on the content and merge the content.
+]: #
+
+# Google Cloud BigQuery API
+
+Overview of the APIs available for Google Cloud BigQuery API.
+
+## All entries
+
+Classes, methods and properties & attributes for
+Google Cloud BigQuery API.
+
+[classes](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_class.html)
+
+[methods](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_method.html)
+
+[properties and
+attributes](https://cloud.google.com/python/docs/reference/bigquery/latest/summary_property.html)
diff --git a/docs/usage/encryption.rst b/docs/usage/encryption.rst
index 6652f0565..3e6d5aacc 100644
--- a/docs/usage/encryption.rst
+++ b/docs/usage/encryption.rst
@@ -10,7 +10,7 @@ in the BigQuery documentation for more details.
Create a new table, using a customer-managed encryption key from
Cloud KMS to encrypt it.
-.. literalinclude:: ../snippets.py
+.. literalinclude:: ../samples/snippets/create_table_cmek.py
:language: python
:dedent: 4
:start-after: [START bigquery_create_table_cmek]
diff --git a/docs/usage/queries.rst b/docs/usage/queries.rst
index fc57e54de..56be8497e 100644
--- a/docs/usage/queries.rst
+++ b/docs/usage/queries.rst
@@ -5,9 +5,9 @@ Querying data
^^^^^^^^^^^^^
Run a query and wait for it to finish with the
-:func:`~google.cloud.bigquery.client.Client.query` method:
+:func:`~google.cloud.bigquery.client.Client.query_and_wait` method:
-.. literalinclude:: ../samples/client_query.py
+.. literalinclude:: ../samples/snippets/client_query.py
:language: python
:dedent: 4
:start-after: [START bigquery_query]
diff --git a/docs/usage/tables.rst b/docs/usage/tables.rst
index d924fe214..a4f42b15c 100644
--- a/docs/usage/tables.rst
+++ b/docs/usage/tables.rst
@@ -58,6 +58,15 @@ Create an empty table with the
:start-after: [START bigquery_create_table]
:end-before: [END bigquery_create_table]
+Create a table using an external data source with the
+:func:`~google.cloud.bigquery.client.Client.create_table` method:
+
+.. literalinclude:: ../samples/snippets/create_table_external_data_configuration.py
+ :language: python
+ :dedent: 4
+ :start-after: [START bigquery_create_table_external_data_configuration]
+ :end-before: [END bigquery_create_table_external_data_configuration]
+
Create a clustered table with the
:func:`~google.cloud.bigquery.client.Client.create_table` method:
@@ -304,4 +313,4 @@ Replace the table data with a Parquet file from Cloud Storage:
:language: python
:dedent: 4
:start-after: [START bigquery_load_table_gcs_parquet_truncate]
- :end-before: [END bigquery_load_table_gcs_parquet_truncate]
\ No newline at end of file
+ :end-before: [END bigquery_load_table_gcs_parquet_truncate]
diff --git a/google/cloud/bigquery/__init__.py b/google/cloud/bigquery/__init__.py
index 5a4520476..904bea3d4 100644
--- a/google/cloud/bigquery/__init__.py
+++ b/google/cloud/bigquery/__init__.py
@@ -27,6 +27,7 @@
- :class:`~google.cloud.bigquery.table.Table` represents a single "relation".
"""
+import warnings
from google.cloud.bigquery import version as bigquery_version
@@ -42,6 +43,9 @@
from google.cloud.bigquery.enums import KeyResultStatementKind
from google.cloud.bigquery.enums import SqlTypeNames
from google.cloud.bigquery.enums import StandardSqlTypeNames
+from google.cloud.bigquery.exceptions import LegacyBigQueryStorageError
+from google.cloud.bigquery.exceptions import LegacyPandasError
+from google.cloud.bigquery.exceptions import LegacyPyarrowError
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery.external_config import BigtableOptions
from google.cloud.bigquery.external_config import BigtableColumnFamily
@@ -81,6 +85,8 @@
from google.cloud.bigquery.query import ConnectionProperty
from google.cloud.bigquery.query import ScalarQueryParameter
from google.cloud.bigquery.query import ScalarQueryParameterType
+from google.cloud.bigquery.query import RangeQueryParameter
+from google.cloud.bigquery.query import RangeQueryParameterType
from google.cloud.bigquery.query import SqlParameterScalarTypes
from google.cloud.bigquery.query import StructQueryParameter
from google.cloud.bigquery.query import StructQueryParameterType
@@ -91,8 +97,11 @@
from google.cloud.bigquery.routine import RoutineArgument
from google.cloud.bigquery.routine import RoutineReference
from google.cloud.bigquery.routine import RoutineType
+from google.cloud.bigquery.routine import RemoteFunctionOptions
+from google.cloud.bigquery.routine import ExternalRuntimeOptions
from google.cloud.bigquery.schema import PolicyTagList
from google.cloud.bigquery.schema import SchemaField
+from google.cloud.bigquery.schema import FieldElementType
from google.cloud.bigquery.standard_sql import StandardSqlDataType
from google.cloud.bigquery.standard_sql import StandardSqlField
from google.cloud.bigquery.standard_sql import StandardSqlStructType
@@ -107,6 +116,24 @@
from google.cloud.bigquery.table import TimePartitioningType
from google.cloud.bigquery.table import TimePartitioning
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
+from google.cloud.bigquery import _versions_helpers
+
+try:
+ import bigquery_magics # type: ignore
+except ImportError:
+ bigquery_magics = None
+
+sys_major, sys_minor, sys_micro = _versions_helpers.extract_runtime_version()
+
+if sys_major == 3 and sys_minor in (7, 8):
+ warnings.warn(
+ "The python-bigquery library no longer supports Python 3.7 "
+ "and Python 3.8. "
+ f"Your Python version is {sys_major}.{sys_minor}.{sys_micro}. We "
+ "recommend that you update soon to ensure ongoing support. For "
+ "more details, see: [Google Cloud Client Libraries Supported Python Versions policy](https://cloud.google.com/python/docs/supported-python-versions)",
+ FutureWarning,
+ )
__all__ = [
"__version__",
@@ -118,10 +145,12 @@
"ArrayQueryParameter",
"ScalarQueryParameter",
"StructQueryParameter",
+ "RangeQueryParameter",
"ArrayQueryParameterType",
"ScalarQueryParameterType",
"SqlParameterScalarTypes",
"StructQueryParameterType",
+ "RangeQueryParameterType",
# Datasets
"Dataset",
"DatasetReference",
@@ -152,8 +181,11 @@
"Routine",
"RoutineArgument",
"RoutineReference",
+ "RemoteFunctionOptions",
+ "ExternalRuntimeOptions",
# Shared helpers
"SchemaField",
+ "FieldElementType",
"PolicyTagList",
"UDFResource",
"ExternalConfig",
@@ -195,13 +227,25 @@
"WriteDisposition",
# EncryptionConfiguration
"EncryptionConfiguration",
+ # Custom exceptions
+ "LegacyBigQueryStorageError",
+ "LegacyPyarrowError",
+ "LegacyPandasError",
]
def load_ipython_extension(ipython):
"""Called by IPython when this module is loaded as an IPython extension."""
- from google.cloud.bigquery.magics.magics import _cell_magic
-
- ipython.register_magic_function(
- _cell_magic, magic_kind="cell", magic_name="bigquery"
+ warnings.warn(
+ "%load_ext google.cloud.bigquery is deprecated. Install bigquery-magics package and use `%load_ext bigquery_magics`, instead.",
+ category=FutureWarning,
)
+
+ if bigquery_magics is not None:
+ bigquery_magics.load_ipython_extension(ipython)
+ else:
+ from google.cloud.bigquery.magics.magics import _cell_magic
+
+ ipython.register_magic_function(
+ _cell_magic, magic_kind="cell", magic_name="bigquery"
+ )
diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py
index b59bc86d3..c7d7705e0 100644
--- a/google/cloud/bigquery/_helpers.py
+++ b/google/cloud/bigquery/_helpers.py
@@ -17,10 +17,13 @@
import base64
import datetime
import decimal
+import json
import math
import re
import os
-from typing import Optional, Union
+import textwrap
+import warnings
+from typing import Any, Optional, Tuple, Type, Union
from dateutil import relativedelta
from google.cloud._helpers import UTC # type: ignore
@@ -29,8 +32,10 @@
from google.cloud._helpers import _RFC3339_MICROS
from google.cloud._helpers import _RFC3339_NO_FRACTION
from google.cloud._helpers import _to_bytes
+from google.auth import credentials as ga_credentials # type: ignore
+from google.api_core import client_options as client_options_lib
-import packaging.version
+TimeoutType = Union[float, None]
_RFC3339_MICROS_NO_ZULU = "%Y-%m-%dT%H:%M:%S.%f"
_TIMEONLY_WO_MICROS = "%H:%M:%S"
@@ -49,8 +54,7 @@
r"(?P-?\d+) "
r"(?P-?)(?P\d+):(?P\d+):(?P\d+)\.?(?P\d*)?$"
)
-
-_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
+_RANGE_PATTERN = re.compile(r"\[.*, .*\)")
BIGQUERY_EMULATOR_HOST = "BIGQUERY_EMULATOR_HOST"
"""Environment variable defining host for emulator."""
@@ -58,71 +62,71 @@
_DEFAULT_HOST = "https://bigquery.googleapis.com"
"""Default host for JSON API."""
+_DEFAULT_HOST_TEMPLATE = "https://bigquery.{UNIVERSE_DOMAIN}"
+""" Templatized endpoint format. """
-def _get_bigquery_host():
- return os.environ.get(BIGQUERY_EMULATOR_HOST, _DEFAULT_HOST)
-
-
-class BQStorageVersions:
- """Version comparisons for google-cloud-bigqueyr-storage package."""
-
- def __init__(self):
- self._installed_version = None
-
- @property
- def installed_version(self) -> packaging.version.Version:
- """Return the parsed version of google-cloud-bigquery-storage."""
- if self._installed_version is None:
- from google.cloud import bigquery_storage
+_DEFAULT_UNIVERSE = "googleapis.com"
+"""Default universe for the JSON API."""
- self._installed_version = packaging.version.parse(
- # Use 0.0.0, since it is earlier than any released version.
- # Legacy versions also have the same property, but
- # creating a LegacyVersion has been deprecated.
- # https://github.com/pypa/packaging/issues/321
- getattr(bigquery_storage, "__version__", "0.0.0")
- )
-
- return self._installed_version
+_UNIVERSE_DOMAIN_ENV = "GOOGLE_CLOUD_UNIVERSE_DOMAIN"
+"""Environment variable for setting universe domain."""
- @property
- def is_read_session_optional(self) -> bool:
- """True if read_session is optional to rows().
+_SUPPORTED_RANGE_ELEMENTS = {"TIMESTAMP", "DATETIME", "DATE"}
- See: https://github.com/googleapis/python-bigquery-storage/pull/228
- """
- return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION
+def _get_client_universe(
+ client_options: Optional[Union[client_options_lib.ClientOptions, dict]]
+) -> str:
+ """Retrieves the specified universe setting.
-class PyarrowVersions:
- """Version comparisons for pyarrow package."""
+ Args:
+ client_options: specified client options.
+ Returns:
+ str: resolved universe setting.
- def __init__(self):
- self._installed_version = None
+ """
+ if isinstance(client_options, dict):
+ client_options = client_options_lib.from_dict(client_options)
+ universe = _DEFAULT_UNIVERSE
+ options_universe = getattr(client_options, "universe_domain", None)
+ if (
+ options_universe
+ and isinstance(options_universe, str)
+ and len(options_universe) > 0
+ ):
+ universe = options_universe
+ else:
+ env_universe = os.getenv(_UNIVERSE_DOMAIN_ENV)
+ if isinstance(env_universe, str) and len(env_universe) > 0:
+ universe = env_universe
+ return universe
- @property
- def installed_version(self) -> packaging.version.Version:
- """Return the parsed version of pyarrow."""
- if self._installed_version is None:
- import pyarrow # type: ignore
- self._installed_version = packaging.version.parse(
- # Use 0.0.0, since it is earlier than any released version.
- # Legacy versions also have the same property, but
- # creating a LegacyVersion has been deprecated.
- # https://github.com/pypa/packaging/issues/321
- getattr(pyarrow, "__version__", "0.0.0")
- )
+def _validate_universe(client_universe: str, credentials: ga_credentials.Credentials):
+ """Validates that client provided universe and universe embedded in credentials match.
- return self._installed_version
+ Args:
+ client_universe (str): The universe domain configured via the client options.
+ credentials (ga_credentials.Credentials): The credentials being used in the client.
- @property
- def use_compliant_nested_type(self) -> bool:
- return self.installed_version.major >= 4
+ Raises:
+ ValueError: when client_universe does not match the universe in credentials.
+ """
+ if hasattr(credentials, "universe_domain"):
+ cred_universe = getattr(credentials, "universe_domain")
+ if isinstance(cred_universe, str):
+ if client_universe != cred_universe:
+ raise ValueError(
+ "The configured universe domain "
+ f"({client_universe}) does not match the universe domain "
+ f"found in the credentials ({cred_universe}). "
+ "If you haven't configured the universe domain explicitly, "
+ f"`{_DEFAULT_UNIVERSE}` is the default."
+ )
-BQ_STORAGE_VERSIONS = BQStorageVersions()
-PYARROW_VERSIONS = PyarrowVersions()
+def _get_bigquery_host():
+ return os.environ.get(BIGQUERY_EMULATOR_HOST, _DEFAULT_HOST)
def _not_null(value, field):
@@ -130,202 +134,320 @@ def _not_null(value, field):
return value is not None or (field is not None and field.mode != "NULLABLE")
-def _int_from_json(value, field):
- """Coerce 'value' to an int, if set or not nullable."""
- if _not_null(value, field):
- return int(value)
+class CellDataParser:
+ """Converter from BigQuery REST resource to Python value for RowIterator and similar classes.
+ See: "rows" field of
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/list and
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults.
+ """
-def _interval_from_json(
- value: Optional[str], field
-) -> Optional[relativedelta.relativedelta]:
- """Coerce 'value' to an interval, if set or not nullable."""
- if not _not_null(value, field):
- return None
- if value is None:
- raise TypeError(f"got {value} for REQUIRED field: {repr(field)}")
-
- parsed = _INTERVAL_PATTERN.match(value)
- if parsed is None:
- raise ValueError(f"got interval: '{value}' with unexpected format")
-
- calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1
- years = calendar_sign * int(parsed.group("years"))
- months = calendar_sign * int(parsed.group("months"))
- days = int(parsed.group("days"))
- time_sign = -1 if parsed.group("time_sign") == "-" else 1
- hours = time_sign * int(parsed.group("hours"))
- minutes = time_sign * int(parsed.group("minutes"))
- seconds = time_sign * int(parsed.group("seconds"))
- fraction = parsed.group("fraction")
- microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0
-
- return relativedelta.relativedelta(
- years=years,
- months=months,
- days=days,
- hours=hours,
- minutes=minutes,
- seconds=seconds,
- microseconds=microseconds,
- )
+ def to_py(self, resource, field):
+ def default_converter(value, field):
+ _warn_unknown_field_type(field)
+ return value
+ converter = getattr(
+ self, f"{field.field_type.lower()}_to_py", default_converter
+ )
+ if field.mode == "REPEATED":
+ return [converter(item["v"], field) for item in resource]
+ else:
+ return converter(resource, field)
+
+ def bool_to_py(self, value, field):
+ """Coerce 'value' to a bool, if set or not nullable."""
+ if _not_null(value, field):
+ # TODO(tswast): Why does _not_null care if the field is NULLABLE or
+ # REQUIRED? Do we actually need such client-side validation?
+ if value is None:
+ raise TypeError(f"got None for required boolean field {field}")
+ return value.lower() in ("t", "true", "1")
+
+ def boolean_to_py(self, value, field):
+ """Coerce 'value' to a bool, if set or not nullable."""
+ return self.bool_to_py(value, field)
+
+ def integer_to_py(self, value, field):
+ """Coerce 'value' to an int, if set or not nullable."""
+ if _not_null(value, field):
+ return int(value)
+
+ def int64_to_py(self, value, field):
+ """Coerce 'value' to an int, if set or not nullable."""
+ return self.integer_to_py(value, field)
+
+ def interval_to_py(
+ self, value: Optional[str], field
+ ) -> Optional[relativedelta.relativedelta]:
+ """Coerce 'value' to an interval, if set or not nullable."""
+ if not _not_null(value, field):
+ return None
+ if value is None:
+ raise TypeError(f"got {value} for REQUIRED field: {repr(field)}")
+
+ parsed = _INTERVAL_PATTERN.match(value)
+ if parsed is None:
+ raise ValueError(
+ textwrap.dedent(
+ f"""
+ Got interval: '{value}' with unexpected format.
+ Expected interval in canonical format of "[sign]Y-M [sign]D [sign]H:M:S[.F]".
+ See:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#interval_type
+ for more information.
+ """
+ ),
+ )
-def _float_from_json(value, field):
- """Coerce 'value' to a float, if set or not nullable."""
- if _not_null(value, field):
- return float(value)
+ calendar_sign = -1 if parsed.group("calendar_sign") == "-" else 1
+ years = calendar_sign * int(parsed.group("years"))
+ months = calendar_sign * int(parsed.group("months"))
+ days = int(parsed.group("days"))
+ time_sign = -1 if parsed.group("time_sign") == "-" else 1
+ hours = time_sign * int(parsed.group("hours"))
+ minutes = time_sign * int(parsed.group("minutes"))
+ seconds = time_sign * int(parsed.group("seconds"))
+ fraction = parsed.group("fraction")
+ microseconds = time_sign * int(fraction.ljust(6, "0")[:6]) if fraction else 0
+
+ return relativedelta.relativedelta(
+ years=years,
+ months=months,
+ days=days,
+ hours=hours,
+ minutes=minutes,
+ seconds=seconds,
+ microseconds=microseconds,
+ )
+ def float_to_py(self, value, field):
+ """Coerce 'value' to a float, if set or not nullable."""
+ if _not_null(value, field):
+ return float(value)
-def _decimal_from_json(value, field):
- """Coerce 'value' to a Decimal, if set or not nullable."""
- if _not_null(value, field):
- return decimal.Decimal(value)
+ def float64_to_py(self, value, field):
+ """Coerce 'value' to a float, if set or not nullable."""
+ return self.float_to_py(value, field)
+ def numeric_to_py(self, value, field):
+ """Coerce 'value' to a Decimal, if set or not nullable."""
+ if _not_null(value, field):
+ return decimal.Decimal(value)
-def _bool_from_json(value, field):
- """Coerce 'value' to a bool, if set or not nullable."""
- if _not_null(value, field):
- return value.lower() in ["t", "true", "1"]
+ def bignumeric_to_py(self, value, field):
+ """Coerce 'value' to a Decimal, if set or not nullable."""
+ return self.numeric_to_py(value, field)
+ def string_to_py(self, value, _):
+ """NOOP string -> string coercion"""
+ return value
-def _string_from_json(value, _):
- """NOOP string -> string coercion"""
- return value
+ def geography_to_py(self, value, _):
+ """NOOP string -> string coercion"""
+ return value
+ def bytes_to_py(self, value, field):
+ """Base64-decode value"""
+ if _not_null(value, field):
+ return base64.standard_b64decode(_to_bytes(value))
+
+ def timestamp_to_py(self, value, field):
+ """Coerce 'value' to a datetime, if set or not nullable."""
+ if _not_null(value, field):
+ # value will be a integer in seconds, to microsecond precision, in UTC.
+ return _datetime_from_microseconds(int(value))
+
+ def datetime_to_py(self, value, field):
+ """Coerce 'value' to a datetime, if set or not nullable.
+
+ Args:
+ value (str): The timestamp.
+ field (google.cloud.bigquery.schema.SchemaField):
+ The field corresponding to the value.
+
+ Returns:
+ Optional[datetime.datetime]:
+ The parsed datetime object from
+ ``value`` if the ``field`` is not null (otherwise it is
+ :data:`None`).
+ """
+ if _not_null(value, field):
+ if "." in value:
+ # YYYY-MM-DDTHH:MM:SS.ffffff
+ return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU)
+ else:
+ # YYYY-MM-DDTHH:MM:SS
+ return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION)
+ else:
+ return None
+
+ def date_to_py(self, value, field):
+ """Coerce 'value' to a datetime date, if set or not nullable"""
+ if _not_null(value, field):
+ # value will be a string, in YYYY-MM-DD form.
+ return _date_from_iso8601_date(value)
+
+ def time_to_py(self, value, field):
+ """Coerce 'value' to a datetime date, if set or not nullable"""
+ if _not_null(value, field):
+ if len(value) == 8: # HH:MM:SS
+ fmt = _TIMEONLY_WO_MICROS
+ elif len(value) == 15: # HH:MM:SS.micros
+ fmt = _TIMEONLY_W_MICROS
+ else:
+ raise ValueError(
+ textwrap.dedent(
+ f"""
+ Got {repr(value)} with unknown time format.
+ Expected HH:MM:SS or HH:MM:SS.micros. See
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type
+ for more information.
+ """
+ ),
+ )
+ return datetime.datetime.strptime(value, fmt).time()
+
+ def record_to_py(self, value, field):
+ """Coerce 'value' to a mapping, if set or not nullable."""
+ if _not_null(value, field):
+ record = {}
+ record_iter = zip(field.fields, value["f"])
+ for subfield, cell in record_iter:
+ record[subfield.name] = self.to_py(cell["v"], subfield)
+ return record
+
+ def struct_to_py(self, value, field):
+ """Coerce 'value' to a mapping, if set or not nullable."""
+ return self.record_to_py(value, field)
+
+ def json_to_py(self, value, field):
+ """Coerce 'value' to a Pythonic JSON representation."""
+ if _not_null(value, field):
+ return json.loads(value)
+ else:
+ return None
+
+ def _range_element_to_py(self, value, field_element_type):
+ """Coerce 'value' to a range element value."""
+ # Avoid circular imports by importing here.
+ from google.cloud.bigquery import schema
+
+ if value == "UNBOUNDED":
+ return None
+ if field_element_type.element_type in _SUPPORTED_RANGE_ELEMENTS:
+ return self.to_py(
+ value,
+ schema.SchemaField("placeholder", field_element_type.element_type),
+ )
+ else:
+ raise ValueError(
+ textwrap.dedent(
+ f"""
+ Got unsupported range element type: {field_element_type.element_type}.
+ Exptected one of {repr(_SUPPORTED_RANGE_ELEMENTS)}. See:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declare_a_range_type
+ for more information.
+ """
+ ),
+ )
-def _bytes_from_json(value, field):
- """Base64-decode value"""
- if _not_null(value, field):
- return base64.standard_b64decode(_to_bytes(value))
+ def range_to_py(self, value, field):
+ """Coerce 'value' to a range, if set or not nullable.
+ Args:
+ value (str): The literal representation of the range.
+ field (google.cloud.bigquery.schema.SchemaField):
+ The field corresponding to the value.
-def _timestamp_from_json(value, field):
- """Coerce 'value' to a datetime, if set or not nullable."""
- if _not_null(value, field):
- # value will be a integer in seconds, to microsecond precision, in UTC.
- return _datetime_from_microseconds(int(value))
+ Returns:
+ Optional[dict]:
+ The parsed range object from ``value`` if the ``field`` is not
+ null (otherwise it is :data:`None`).
+ """
+ if _not_null(value, field):
+ if _RANGE_PATTERN.match(value):
+ start, end = value[1:-1].split(", ")
+ start = self._range_element_to_py(start, field.range_element_type)
+ end = self._range_element_to_py(end, field.range_element_type)
+ return {"start": start, "end": end}
+ else:
+ raise ValueError(
+ textwrap.dedent(
+ f"""
+ Got unknown format for range value: {value}.
+ Expected format '[lower_bound, upper_bound)'. See:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_with_literal
+ for more information.
+ """
+ ),
+ )
-def _timestamp_query_param_from_json(value, field):
- """Coerce 'value' to a datetime, if set or not nullable.
+CELL_DATA_PARSER = CellDataParser()
- Args:
- value (str): The timestamp.
- field (google.cloud.bigquery.schema.SchemaField):
- The field corresponding to the value.
+class DataFrameCellDataParser(CellDataParser):
+ """Override of CellDataParser to handle differences in expression of values in DataFrame-like outputs.
- Returns:
- Optional[datetime.datetime]:
- The parsed datetime object from
- ``value`` if the ``field`` is not null (otherwise it is
- :data:`None`).
+ This is used to turn the output of the REST API into a pyarrow Table,
+ emulating the serialized arrow from the BigQuery Storage Read API.
"""
- if _not_null(value, field):
- # Canonical formats for timestamps in BigQuery are flexible. See:
- # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type
- # The separator between the date and time can be 'T' or ' '.
- value = value.replace(" ", "T", 1)
- # The UTC timezone may be formatted as Z or +00:00.
- value = value.replace("Z", "")
- value = value.replace("+00:00", "")
-
- if "." in value:
- # YYYY-MM-DDTHH:MM:SS.ffffff
- return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU).replace(
- tzinfo=UTC
- )
- else:
- # YYYY-MM-DDTHH:MM:SS
- return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION).replace(
- tzinfo=UTC
- )
- else:
- return None
+ def json_to_py(self, value, _):
+ """No-op because DataFrame expects string for JSON output."""
+ return value
-def _datetime_from_json(value, field):
- """Coerce 'value' to a datetime, if set or not nullable.
- Args:
- value (str): The timestamp.
- field (google.cloud.bigquery.schema.SchemaField):
- The field corresponding to the value.
+DATA_FRAME_CELL_DATA_PARSER = DataFrameCellDataParser()
- Returns:
- Optional[datetime.datetime]:
- The parsed datetime object from
- ``value`` if the ``field`` is not null (otherwise it is
- :data:`None`).
+
+class ScalarQueryParamParser(CellDataParser):
+ """Override of CellDataParser to handle the differences in the response from query params.
+
+ See: "value" field of
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/QueryParameter#QueryParameterValue
"""
- if _not_null(value, field):
- if "." in value:
- # YYYY-MM-DDTHH:MM:SS.ffffff
- return datetime.datetime.strptime(value, _RFC3339_MICROS_NO_ZULU)
- else:
- # YYYY-MM-DDTHH:MM:SS
- return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION)
- else:
- return None
+ def timestamp_to_py(self, value, field):
+ """Coerce 'value' to a datetime, if set or not nullable.
-def _date_from_json(value, field):
- """Coerce 'value' to a datetime date, if set or not nullable"""
- if _not_null(value, field):
- # value will be a string, in YYYY-MM-DD form.
- return _date_from_iso8601_date(value)
+ Args:
+ value (str): The timestamp.
+ field (google.cloud.bigquery.schema.SchemaField):
+ The field corresponding to the value.
-def _time_from_json(value, field):
- """Coerce 'value' to a datetime date, if set or not nullable"""
- if _not_null(value, field):
- if len(value) == 8: # HH:MM:SS
- fmt = _TIMEONLY_WO_MICROS
- elif len(value) == 15: # HH:MM:SS.micros
- fmt = _TIMEONLY_W_MICROS
- else:
- raise ValueError("Unknown time format: {}".format(value))
- return datetime.datetime.strptime(value, fmt).time()
-
-
-def _record_from_json(value, field):
- """Coerce 'value' to a mapping, if set or not nullable."""
- if _not_null(value, field):
- record = {}
- record_iter = zip(field.fields, value["f"])
- for subfield, cell in record_iter:
- converter = _CELLDATA_FROM_JSON[subfield.field_type]
- if subfield.mode == "REPEATED":
- value = [converter(item["v"], subfield) for item in cell["v"]]
+ Returns:
+ Optional[datetime.datetime]:
+ The parsed datetime object from
+ ``value`` if the ``field`` is not null (otherwise it is
+ :data:`None`).
+ """
+ if _not_null(value, field):
+ # Canonical formats for timestamps in BigQuery are flexible. See:
+ # g.co/cloud/bigquery/docs/reference/standard-sql/data-types#timestamp-type
+ # The separator between the date and time can be 'T' or ' '.
+ value = value.replace(" ", "T", 1)
+ # The UTC timezone may be formatted as Z or +00:00.
+ value = value.replace("Z", "")
+ value = value.replace("+00:00", "")
+
+ if "." in value:
+ # YYYY-MM-DDTHH:MM:SS.ffffff
+ return datetime.datetime.strptime(
+ value, _RFC3339_MICROS_NO_ZULU
+ ).replace(tzinfo=UTC)
else:
- value = converter(cell["v"], subfield)
- record[subfield.name] = value
- return record
-
-
-_CELLDATA_FROM_JSON = {
- "INTEGER": _int_from_json,
- "INT64": _int_from_json,
- "INTERVAL": _interval_from_json,
- "FLOAT": _float_from_json,
- "FLOAT64": _float_from_json,
- "NUMERIC": _decimal_from_json,
- "BIGNUMERIC": _decimal_from_json,
- "BOOLEAN": _bool_from_json,
- "BOOL": _bool_from_json,
- "STRING": _string_from_json,
- "GEOGRAPHY": _string_from_json,
- "BYTES": _bytes_from_json,
- "TIMESTAMP": _timestamp_from_json,
- "DATETIME": _datetime_from_json,
- "DATE": _date_from_json,
- "TIME": _time_from_json,
- "RECORD": _record_from_json,
-}
+ # YYYY-MM-DDTHH:MM:SS
+ return datetime.datetime.strptime(value, _RFC3339_NO_FRACTION).replace(
+ tzinfo=UTC
+ )
+ else:
+ return None
+
-_QUERY_PARAMS_FROM_JSON = dict(_CELLDATA_FROM_JSON)
-_QUERY_PARAMS_FROM_JSON["TIMESTAMP"] = _timestamp_query_param_from_json
+SCALAR_QUERY_PARAM_PARSER = ScalarQueryParamParser()
def _field_to_index_mapping(schema):
@@ -333,14 +455,6 @@ def _field_to_index_mapping(schema):
return {f.name: i for i, f in enumerate(schema)}
-def _field_from_json(resource, field):
- converter = _CELLDATA_FROM_JSON.get(field.field_type, lambda value, _: value)
- if field.mode == "REPEATED":
- return [converter(item["v"], field) for item in resource]
- else:
- return converter(resource, field)
-
-
def _row_tuple_from_json(row, schema):
"""Convert JSON row data to row with appropriate types.
@@ -362,7 +476,7 @@ def _row_tuple_from_json(row, schema):
row_data = []
for field, cell in zip(schema, row["f"]):
- row_data.append(_field_from_json(cell["v"], field))
+ row_data.append(CELL_DATA_PARSER.to_py(cell["v"], field))
return tuple(row_data)
@@ -429,6 +543,18 @@ def _bytes_to_json(value):
return value
+def _json_to_json(value):
+ """Coerce 'value' to a BigQuery REST API representation."""
+ if value is None:
+ return None
+ return json.dumps(value)
+
+
+def _string_to_json(value):
+ """NOOP string -> string coercion"""
+ return value
+
+
def _timestamp_to_json_parameter(value):
"""Coerce 'value' to an JSON-compatible representation.
@@ -478,7 +604,54 @@ def _time_to_json(value):
return value
-# Converters used for scalar values marshalled as row data.
+def _range_element_to_json(value, element_type=None):
+ """Coerce 'value' to an JSON-compatible representation."""
+ if value is None:
+ return None
+ elif isinstance(value, str):
+ if value.upper() in ("UNBOUNDED", "NULL"):
+ return None
+ else:
+ # We do not enforce range element value to be valid to reduce
+ # redundancy with backend.
+ return value
+ elif (
+ element_type and element_type.element_type.upper() in _SUPPORTED_RANGE_ELEMENTS
+ ):
+ converter = _SCALAR_VALUE_TO_JSON_ROW.get(element_type.element_type.upper())
+ return converter(value)
+ else:
+ raise ValueError(
+ f"Unsupported RANGE element type {element_type}, or "
+ "element type is empty. Must be DATE, DATETIME, or "
+ "TIMESTAMP"
+ )
+
+
+def _range_field_to_json(range_element_type, value):
+ """Coerce 'value' to an JSON-compatible representation."""
+ if isinstance(value, str):
+ # string literal
+ if _RANGE_PATTERN.match(value):
+ start, end = value[1:-1].split(", ")
+ else:
+ raise ValueError(f"RANGE literal {value} has incorrect format")
+ elif isinstance(value, dict):
+ # dictionary
+ start = value.get("start")
+ end = value.get("end")
+ else:
+ raise ValueError(
+ f"Unsupported type of RANGE value {value}, must be " "string or dict"
+ )
+
+ start = _range_element_to_json(start, range_element_type)
+ end = _range_element_to_json(end, range_element_type)
+ return {"start": start, "end": end}
+
+
+# Converters used for scalar values marshalled to the BigQuery API, such as in
+# query parameters or the tabledata.insert API.
_SCALAR_VALUE_TO_JSON_ROW = {
"INTEGER": _int_to_json,
"INT64": _int_to_json,
@@ -493,6 +666,8 @@ def _time_to_json(value):
"DATETIME": _datetime_to_json,
"DATE": _date_to_json,
"TIME": _time_to_json,
+ "JSON": _json_to_json,
+ "STRING": _string_to_json,
# Make sure DECIMAL and BIGDECIMAL are handled, even though
# requests for them should be converted to NUMERIC. Better safe
# than sorry.
@@ -506,6 +681,15 @@ def _time_to_json(value):
_SCALAR_VALUE_TO_JSON_PARAM["TIMESTAMP"] = _timestamp_to_json_parameter
+def _warn_unknown_field_type(field):
+ warnings.warn(
+ "Unknown type '{}' for field '{}'. Behavior reading and writing this type is not officially supported and may change in the future.".format(
+ field.field_type, field.name
+ ),
+ FutureWarning,
+ )
+
+
def _scalar_field_to_json(field, row_value):
"""Maps a field and value to a JSON-safe value.
@@ -518,9 +702,12 @@ def _scalar_field_to_json(field, row_value):
Returns:
Any: A JSON-serializable object.
"""
- converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type)
- if converter is None: # STRING doesn't need converting
- return row_value
+
+ def default_converter(value):
+ _warn_unknown_field_type(field)
+ return value
+
+ converter = _SCALAR_VALUE_TO_JSON_ROW.get(field.field_type, default_converter)
return converter(row_value)
@@ -621,6 +808,8 @@ def _single_field_to_json(field, row_value):
if field.field_type == "RECORD":
return _record_field_to_json(field.fields, row_value)
+ if field.field_type == "RANGE":
+ return _range_field_to_json(field.range_element_type, row_value)
return _scalar_field_to_json(field, row_value)
@@ -855,11 +1044,11 @@ def _build_resource_from_properties(obj, filter_fields):
"""
partial = {}
for filter_field in filter_fields:
- api_field = obj._PROPERTY_TO_API_FIELD.get(filter_field)
+ api_field = _get_sub_prop(obj._PROPERTY_TO_API_FIELD, filter_field)
if api_field is None and filter_field not in obj._properties:
raise ValueError("No property %s" % filter_field)
elif api_field is not None:
- partial[api_field] = obj._properties.get(api_field)
+ _set_sub_prop(partial, api_field, _get_sub_prop(obj._properties, api_field))
else:
# allows properties that are not defined in the library
# and properties that have the same name as API resource key
@@ -881,3 +1070,33 @@ def _verify_job_config_type(job_config, expected_type, param_name="job_config"):
job_config=job_config,
)
)
+
+
+def _isinstance_or_raise(
+ value: Any,
+ dtype: Union[Type, Tuple[Type, ...]],
+ none_allowed: Optional[bool] = False,
+) -> Any:
+ """Determine whether a value type matches a given datatype or None.
+ Args:
+ value (Any): Value to be checked.
+ dtype (type): Expected data type or tuple of data types.
+ none_allowed Optional(bool): whether value is allowed to be None. Default
+ is False.
+ Returns:
+ Any: Returns the input value if the type check is successful.
+ Raises:
+ TypeError: If the input value's type does not match the expected data type(s).
+ """
+ if none_allowed and value is None:
+ return value
+
+ if isinstance(value, dtype):
+ return value
+
+ or_none = ""
+ if none_allowed:
+ or_none = " (or None)"
+
+ msg = f"Pass {value} as a '{dtype}'{or_none}. Got {type(value)}."
+ raise TypeError(msg)
diff --git a/google/cloud/bigquery/_http.py b/google/cloud/bigquery/_http.py
index 789ef9243..7921900f8 100644
--- a/google/cloud/bigquery/_http.py
+++ b/google/cloud/bigquery/_http.py
@@ -14,22 +14,10 @@
"""Create / interact with Google BigQuery connections."""
-import os
-import pkg_resources
-
from google.cloud import _http # type: ignore # pytype: disable=import-error
from google.cloud.bigquery import __version__
-# TODO: Increase the minimum version of google-cloud-core to 1.6.0
-# and remove this logic. See:
-# https://github.com/googleapis/python-bigquery/issues/509
-if os.getenv("GOOGLE_API_USE_CLIENT_CERTIFICATE") == "true": # pragma: NO COVER
- release = pkg_resources.get_distribution("google-cloud-core").parsed_version
- if release < pkg_resources.parse_version("1.6.0"):
- raise ImportError("google-cloud-core >= 1.6.0 is required to use mTLS feature")
-
-
class Connection(_http.JSONConnection):
"""A connection to Google BigQuery via the JSON REST API.
diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py
index 33fc72261..27e90246f 100644
--- a/google/cloud/bigquery/_job_helpers.py
+++ b/google/cloud/bigquery/_job_helpers.py
@@ -12,16 +12,49 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-"""Helpers for interacting with the job REST APIs from the client."""
+"""Helpers for interacting with the job REST APIs from the client.
+
+For queries, there are three cases to consider:
+
+1. jobs.insert: This always returns a job resource.
+2. jobs.query, jobCreationMode=JOB_CREATION_REQUIRED:
+ This sometimes can return the results inline, but always includes a job ID.
+3. jobs.query, jobCreationMode=JOB_CREATION_OPTIONAL:
+ This sometimes doesn't create a job at all, instead returning the results.
+ For better debugging, an auto-generated query ID is included in the
+ response.
+
+Client.query() calls either (1) or (2), depending on what the user provides
+for the api_method parameter. query() always returns a QueryJob object, which
+can retry the query when the query job fails for a retriable reason.
+
+Client.query_and_wait() calls (3). This returns a RowIterator that may wrap
+local results from the response or may wrap a query job containing multiple
+pages of results. Even though query_and_wait() waits for the job to complete,
+we still need a separate job_retry object because there are different
+predicates where it is safe to generate a new query ID.
+"""
+
+from __future__ import annotations
import copy
+import dataclasses
+import datetime
+import functools
import uuid
-from typing import Any, Dict, TYPE_CHECKING, Optional
+import textwrap
+from typing import Any, Callable, Dict, Optional, TYPE_CHECKING, Union
+import warnings
import google.api_core.exceptions as core_exceptions
from google.api_core import retry as retries
from google.cloud.bigquery import job
+import google.cloud.bigquery.job.query
+import google.cloud.bigquery.query
+from google.cloud.bigquery import table
+import google.cloud.bigquery.retry
+from google.cloud.bigquery.retry import POLLING_DEFAULT_VALUE
# Avoid circular imports
if TYPE_CHECKING: # pragma: NO COVER
@@ -29,7 +62,7 @@
# The purpose of _TIMEOUT_BUFFER_MILLIS is to allow the server-side timeout to
-# happen before the client-side timeout. This is not strictly neccessary, as the
+# happen before the client-side timeout. This is not strictly necessary, as the
# client retries client-side timeouts, but the hope by making the server-side
# timeout slightly shorter is that it can save the server from some unncessary
# processing time.
@@ -58,25 +91,51 @@ def make_job_id(job_id: Optional[str] = None, prefix: Optional[str] = None) -> s
return str(uuid.uuid4())
+def job_config_with_defaults(
+ job_config: Optional[job.QueryJobConfig],
+ default_job_config: Optional[job.QueryJobConfig],
+) -> Optional[job.QueryJobConfig]:
+ """Create a copy of `job_config`, replacing unset values with those from
+ `default_job_config`.
+ """
+ if job_config is None:
+ return default_job_config
+
+ if default_job_config is None:
+ return job_config
+
+ # Both job_config and default_job_config are not None, so make a copy of
+ # job_config merged with default_job_config. Anything already explicitly
+ # set on job_config should not be replaced.
+ return job_config._fill_from_default(default_job_config)
+
+
def query_jobs_insert(
client: "Client",
query: str,
job_config: Optional[job.QueryJobConfig],
job_id: Optional[str],
job_id_prefix: Optional[str],
- location: str,
+ location: Optional[str],
project: str,
- retry: retries.Retry,
+ retry: Optional[retries.Retry],
timeout: Optional[float],
- job_retry: retries.Retry,
+ job_retry: Optional[retries.Retry],
+ *,
+ callback: Callable = lambda _: None,
) -> job.QueryJob:
"""Initiate a query using jobs.insert.
See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert
+
+ Args:
+ callback (Callable):
+ A callback function used by bigframes to report query progress.
"""
job_id_given = job_id is not None
job_id_save = job_id
job_config_save = job_config
+ query_sent_factory = QuerySentEventFactory()
def do_query():
# Make a copy now, so that original doesn't get changed by the process
@@ -89,6 +148,16 @@ def do_query():
try:
query_job._begin(retry=retry, timeout=timeout)
+ if job_config is not None and not job_config.dry_run:
+ callback(
+ query_sent_factory(
+ query=query,
+ billing_project=query_job.project,
+ location=query_job.location,
+ job_id=query_job.job_id,
+ request_id=None,
+ )
+ )
except core_exceptions.Conflict as create_exc:
# The thought is if someone is providing their own job IDs and they get
# their job ID generation wrong, this could end up returning results for
@@ -97,21 +166,43 @@ def do_query():
raise create_exc
try:
+ # Sometimes we get a 404 after a Conflict. In this case, we
+ # have pretty high confidence that by retrying the 404, we'll
+ # (hopefully) eventually recover the job.
+ # https://github.com/googleapis/python-bigquery/issues/2134
+ #
+ # Allow users who want to completely disable retries to
+ # continue to do so by setting retry to None.
+ get_job_retry = retry
+ if retry is not None:
+ # TODO(tswast): Amend the user's retry object with allowing
+ # 404 to retry when there's a public way to do so.
+ # https://github.com/googleapis/python-api-core/issues/796
+ get_job_retry = (
+ google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY
+ )
+
query_job = client.get_job(
job_id,
project=project,
location=location,
- retry=retry,
- timeout=timeout,
+ retry=get_job_retry,
+ timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
)
except core_exceptions.GoogleAPIError: # (includes RetryError)
- raise create_exc
+ raise
else:
return query_job
else:
return query_job
+ # Allow users who want to completely disable retries to
+ # continue to do so by setting job_retry to None.
+ if job_retry is not None:
+ do_query = google.cloud.bigquery.retry._DEFAULT_QUERY_JOB_INSERT_RETRY(do_query)
+
future = do_query()
+
# The future might be in a failed state now, but if it's
# unrecoverable, we'll find out when we ask for it's result, at which
# point, we may retry.
@@ -122,24 +213,74 @@ def do_query():
return future
-def _to_query_request(job_config: Optional[job.QueryJobConfig]) -> Dict[str, Any]:
+def _validate_job_config(request_body: Dict[str, Any], invalid_key: str):
+ """Catch common mistakes, such as passing in a *JobConfig object of the
+ wrong type.
+ """
+ if invalid_key in request_body:
+ raise ValueError(f"got unexpected key {repr(invalid_key)} in job_config")
+
+
+def validate_job_retry(job_id: Optional[str], job_retry: Optional[retries.Retry]):
+ """Catch common mistakes, such as setting a job_id and job_retry at the same
+ time.
+ """
+ if job_id is not None and job_retry is not None:
+ # TODO(tswast): To avoid breaking changes but still allow a default
+ # query job retry, we currently only raise if they explicitly set a
+ # job_retry other than the default. In a future version, we may want to
+ # avoid this check for DEFAULT_JOB_RETRY and always raise.
+ if job_retry is not google.cloud.bigquery.retry.DEFAULT_JOB_RETRY:
+ raise TypeError(
+ textwrap.dedent(
+ """
+ `job_retry` was provided, but the returned job is
+ not retryable, because a custom `job_id` was
+ provided. To customize the job ID and allow for job
+ retries, set job_id_prefix, instead.
+ """
+ ).strip()
+ )
+ else:
+ warnings.warn(
+ textwrap.dedent(
+ """
+ job_retry must be explicitly set to None if job_id is set.
+ BigQuery cannot retry a failed job by using the exact
+ same ID. Setting job_id without explicitly disabling
+ job_retry will raise an error in the future. To avoid this
+ warning, either use job_id_prefix instead (preferred) or
+ set job_retry=None.
+ """
+ ).strip(),
+ category=FutureWarning,
+ # user code -> client.query / client.query_and_wait -> validate_job_retry
+ stacklevel=3,
+ )
+
+
+def _to_query_request(
+ job_config: Optional[job.QueryJobConfig] = None,
+ *,
+ query: str,
+ location: Optional[str] = None,
+ timeout: Optional[float] = None,
+) -> Dict[str, Any]:
"""Transform from Job resource to QueryRequest resource.
Most of the keys in job.configuration.query are in common with
QueryRequest. If any configuration property is set that is not available in
jobs.query, it will result in a server-side error.
"""
- request_body = {}
- job_config_resource = job_config.to_api_repr() if job_config else {}
- query_config_resource = job_config_resource.get("query", {})
+ request_body = copy.copy(job_config.to_api_repr()) if job_config else {}
- request_body.update(query_config_resource)
+ _validate_job_config(request_body, job.CopyJob._JOB_TYPE)
+ _validate_job_config(request_body, job.ExtractJob._JOB_TYPE)
+ _validate_job_config(request_body, job.LoadJob._JOB_TYPE)
- # These keys are top level in job resource and query resource.
- if "labels" in job_config_resource:
- request_body["labels"] = job_config_resource["labels"]
- if "dryRun" in job_config_resource:
- request_body["dryRun"] = job_config_resource["dryRun"]
+ # Move query.* properties to top-level.
+ query_config_resource = request_body.pop("query", {})
+ request_body.update(query_config_resource)
# Default to standard SQL.
request_body.setdefault("useLegacySql", False)
@@ -149,6 +290,15 @@ def _to_query_request(job_config: Optional[job.QueryJobConfig]) -> Dict[str, Any
request_body.setdefault("formatOptions", {})
request_body["formatOptions"]["useInt64Timestamp"] = True # type: ignore
+ if timeout is not None:
+ # Subtract a buffer for context switching, network latency, etc.
+ request_body["timeoutMs"] = max(0, int(1000 * timeout) - _TIMEOUT_BUFFER_MILLIS)
+
+ if location is not None:
+ request_body["location"] = location
+
+ request_body["query"] = query
+
return request_body
@@ -193,46 +343,42 @@ def _to_query_job(
errors = query_response["errors"]
query_job._properties["status"]["errors"] = errors
- # Transform job state so that QueryJob doesn't try to restart the query.
+ # Avoid an extra call to `getQueryResults` if the query has finished.
job_complete = query_response.get("jobComplete")
if job_complete:
- query_job._properties["status"]["state"] = "DONE"
- # TODO: https://github.com/googleapis/python-bigquery/issues/589
- # Set the first page of results if job is "complete" and there is
- # only 1 page of results. Otherwise, use the existing logic that
- # refreshes the job stats.
- #
- # This also requires updates to `to_dataframe` and the DB API connector
- # so that they don't try to read from a destination table if all the
- # results are present.
- else:
- query_job._properties["status"]["state"] = "PENDING"
+ query_job._query_results = google.cloud.bigquery.query._QueryResults(
+ query_response
+ )
+
+ # We want job.result() to refresh the job state, so the conversion is
+ # always "PENDING", even if the job is finished.
+ query_job._properties["status"]["state"] = "PENDING"
return query_job
+def _to_query_path(project: str) -> str:
+ return f"/projects/{project}/queries"
+
+
def query_jobs_query(
client: "Client",
query: str,
job_config: Optional[job.QueryJobConfig],
- location: str,
+ location: Optional[str],
project: str,
retry: retries.Retry,
timeout: Optional[float],
- job_retry: retries.Retry,
+ job_retry: Optional[retries.Retry],
) -> job.QueryJob:
- """Initiate a query using jobs.query.
+ """Initiate a query using jobs.query with jobCreationMode=JOB_CREATION_REQUIRED.
See: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
"""
- path = f"/projects/{project}/queries"
- request_body = _to_query_request(job_config)
-
- if timeout is not None:
- # Subtract a buffer for context switching, network latency, etc.
- request_body["timeoutMs"] = max(0, int(1000 * timeout) - _TIMEOUT_BUFFER_MILLIS)
- request_body["location"] = location
- request_body["query"] = query
+ path = _to_query_path(project)
+ request_body = _to_query_request(
+ query=query, job_config=job_config, location=location, timeout=timeout
+ )
def do_query():
request_body["requestId"] = make_job_id()
@@ -257,3 +403,387 @@ def do_query():
future._job_retry = job_retry
return future
+
+
+def query_and_wait(
+ client: "Client",
+ query: str,
+ *,
+ job_config: Optional[job.QueryJobConfig],
+ location: Optional[str],
+ project: str,
+ api_timeout: Optional[float] = None,
+ wait_timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE,
+ retry: Optional[retries.Retry],
+ job_retry: Optional[retries.Retry],
+ page_size: Optional[int] = None,
+ max_results: Optional[int] = None,
+ callback: Callable = lambda _: None,
+) -> table.RowIterator:
+ """Run the query, wait for it to finish, and return the results.
+
+
+ Args:
+ client:
+ BigQuery client to make API calls.
+ query (str):
+ SQL query to be executed. Defaults to the standard SQL
+ dialect. Use the ``job_config`` parameter to change dialects.
+ job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
+ Extra configuration options for the job.
+ To override any options that were previously set in
+ the ``default_query_job_config`` given to the
+ ``Client`` constructor, manually set those options to ``None``,
+ or whatever value is preferred.
+ location (Optional[str]):
+ Location where to run the job. Must match the location of the
+ table used in the query as well as the destination table.
+ project (str):
+ Project ID of the project of where to run the job.
+ api_timeout (Optional[float]):
+ The number of seconds to wait for the underlying HTTP transport
+ before using ``retry``.
+ wait_timeout (Optional[Union[float, object]]):
+ The number of seconds to wait for the query to finish. If the
+ query doesn't finish before this timeout, the client attempts
+ to cancel the query. If unset, the underlying Client.get_job() API
+ call has timeout, but we still wait indefinitely for the job to
+ finish.
+ retry (Optional[google.api_core.retry.Retry]):
+ How to retry the RPC. This only applies to making RPC
+ calls. It isn't used to retry failed jobs. This has
+ a reasonable default that should only be overridden
+ with care.
+ job_retry (Optional[google.api_core.retry.Retry]):
+ How to retry failed jobs. The default retries
+ rate-limit-exceeded errors. Passing ``None`` disables
+ job retry. Not all jobs can be retried.
+ page_size (Optional[int]):
+ The maximum number of rows in each page of results from this
+ request. Non-positive values are ignored.
+ max_results (Optional[int]):
+ The maximum total number of rows from this request.
+ callback (Callable):
+ A callback function used by bigframes to report query progress.
+
+ Returns:
+ google.cloud.bigquery.table.RowIterator:
+ Iterator of row data
+ :class:`~google.cloud.bigquery.table.Row`-s. During each
+ page, the iterator will have the ``total_rows`` attribute
+ set, which counts the total number of rows **in the result
+ set** (this is distinct from the total number of rows in the
+ current page: ``iterator.page.num_items``).
+
+ If the query is a special query that produces no results, e.g.
+ a DDL query, an ``_EmptyRowIterator`` instance is returned.
+
+ Raises:
+ TypeError:
+ If ``job_config`` is not an instance of
+ :class:`~google.cloud.bigquery.job.QueryJobConfig`
+ class.
+ """
+ request_body = _to_query_request(
+ query=query, job_config=job_config, location=location, timeout=api_timeout
+ )
+
+ # Some API parameters aren't supported by the jobs.query API. In these
+ # cases, fallback to a jobs.insert call.
+ if not _supported_by_jobs_query(request_body):
+ return _wait_or_cancel(
+ query_jobs_insert(
+ client=client,
+ query=query,
+ job_id=None,
+ job_id_prefix=None,
+ job_config=job_config,
+ location=location,
+ project=project,
+ retry=retry,
+ timeout=api_timeout,
+ job_retry=job_retry,
+ callback=callback,
+ ),
+ api_timeout=api_timeout,
+ wait_timeout=wait_timeout,
+ retry=retry,
+ page_size=page_size,
+ max_results=max_results,
+ callback=callback,
+ )
+
+ path = _to_query_path(project)
+
+ if page_size is not None and max_results is not None:
+ request_body["maxResults"] = min(page_size, max_results)
+ elif page_size is not None or max_results is not None:
+ request_body["maxResults"] = page_size or max_results
+ if client.default_job_creation_mode:
+ request_body["jobCreationMode"] = client.default_job_creation_mode
+
+ query_sent_factory = QuerySentEventFactory()
+
+ def do_query():
+ request_id = make_job_id()
+ request_body["requestId"] = request_id
+ span_attributes = {"path": path}
+
+ if "dryRun" not in request_body:
+ callback(
+ query_sent_factory(
+ query=query,
+ billing_project=project,
+ location=location,
+ job_id=None,
+ request_id=request_id,
+ )
+ )
+
+ # For easier testing, handle the retries ourselves.
+ if retry is not None:
+ response = retry(client._call_api)(
+ retry=None, # We're calling the retry decorator ourselves.
+ span_name="BigQuery.query",
+ span_attributes=span_attributes,
+ method="POST",
+ path=path,
+ data=request_body,
+ timeout=api_timeout,
+ )
+ else:
+ response = client._call_api(
+ retry=None,
+ span_name="BigQuery.query",
+ span_attributes=span_attributes,
+ method="POST",
+ path=path,
+ data=request_body,
+ timeout=api_timeout,
+ )
+
+ # Even if we run with JOB_CREATION_OPTIONAL, if there are more pages
+ # to fetch, there will be a job ID for jobs.getQueryResults.
+ query_results = google.cloud.bigquery.query._QueryResults.from_api_repr(
+ response
+ )
+ page_token = query_results.page_token
+ more_pages = page_token is not None
+
+ if more_pages or not query_results.complete:
+ # TODO(swast): Avoid a call to jobs.get in some cases (few
+ # remaining pages) by waiting for the query to finish and calling
+ # client._list_rows_from_query_results directly. Need to update
+ # RowIterator to fetch destination table via the job ID if needed.
+ return _wait_or_cancel(
+ _to_query_job(client, query, job_config, response),
+ api_timeout=api_timeout,
+ wait_timeout=wait_timeout,
+ retry=retry,
+ page_size=page_size,
+ max_results=max_results,
+ callback=callback,
+ )
+
+ if "dryRun" not in request_body:
+ callback(
+ QueryFinishedEvent(
+ billing_project=project,
+ location=query_results.location,
+ query_id=query_results.query_id,
+ job_id=query_results.job_id,
+ total_rows=query_results.total_rows,
+ total_bytes_processed=query_results.total_bytes_processed,
+ slot_millis=query_results.slot_millis,
+ destination=None,
+ created=query_results.created,
+ started=query_results.started,
+ ended=query_results.ended,
+ )
+ )
+ return table.RowIterator(
+ client=client,
+ api_request=functools.partial(client._call_api, retry, timeout=api_timeout),
+ path=None,
+ schema=query_results.schema,
+ max_results=max_results,
+ page_size=page_size,
+ total_rows=query_results.total_rows,
+ first_page_response=response,
+ location=query_results.location,
+ job_id=query_results.job_id,
+ query_id=query_results.query_id,
+ project=query_results.project,
+ num_dml_affected_rows=query_results.num_dml_affected_rows,
+ query=query,
+ total_bytes_processed=query_results.total_bytes_processed,
+ slot_millis=query_results.slot_millis,
+ created=query_results.created,
+ started=query_results.started,
+ ended=query_results.ended,
+ )
+
+ if job_retry is not None:
+ return job_retry(do_query)()
+ else:
+ return do_query()
+
+
+def _supported_by_jobs_query(request_body: Dict[str, Any]) -> bool:
+ """True if jobs.query can be used. False if jobs.insert is needed."""
+ request_keys = frozenset(request_body.keys())
+
+ # Per issue: https://github.com/googleapis/python-bigquery/issues/1867
+ # use an allowlist here instead of a denylist because the backend API allows
+ # unsupported parameters without any warning or failure. Instead, keep this
+ # set in sync with those in QueryRequest:
+ # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#QueryRequest
+ keys_allowlist = {
+ "kind",
+ "query",
+ "maxResults",
+ "defaultDataset",
+ "timeoutMs",
+ "dryRun",
+ "preserveNulls",
+ "useQueryCache",
+ "useLegacySql",
+ "parameterMode",
+ "queryParameters",
+ "location",
+ "formatOptions",
+ "connectionProperties",
+ "labels",
+ "maximumBytesBilled",
+ "requestId",
+ "createSession",
+ "writeIncrementalResults",
+ "jobTimeoutMs",
+ "reservation",
+ "maxSlots",
+ }
+
+ unsupported_keys = request_keys - keys_allowlist
+ return len(unsupported_keys) == 0
+
+
+def _wait_or_cancel(
+ job: job.QueryJob,
+ api_timeout: Optional[float],
+ wait_timeout: Optional[Union[object, float]],
+ retry: Optional[retries.Retry],
+ page_size: Optional[int],
+ max_results: Optional[int],
+ *,
+ callback: Callable = lambda _: None,
+) -> table.RowIterator:
+ """Wait for a job to complete and return the results.
+
+ If we can't return the results within the ``wait_timeout``, try to cancel
+ the job.
+ """
+ try:
+ if not job.dry_run:
+ callback(
+ QueryReceivedEvent(
+ billing_project=job.project,
+ location=job.location,
+ job_id=job.job_id,
+ statement_type=job.statement_type,
+ state=job.state,
+ query_plan=job.query_plan,
+ created=job.created,
+ started=job.started,
+ ended=job.ended,
+ )
+ )
+ query_results = job.result(
+ page_size=page_size,
+ max_results=max_results,
+ retry=retry,
+ timeout=wait_timeout,
+ )
+ if not job.dry_run:
+ callback(
+ QueryFinishedEvent(
+ billing_project=job.project,
+ location=query_results.location,
+ query_id=query_results.query_id,
+ job_id=query_results.job_id,
+ total_rows=query_results.total_rows,
+ total_bytes_processed=query_results.total_bytes_processed,
+ slot_millis=query_results.slot_millis,
+ destination=job.destination,
+ created=job.created,
+ started=job.started,
+ ended=job.ended,
+ )
+ )
+ return query_results
+ except Exception:
+ # Attempt to cancel the job since we can't return the results.
+ try:
+ job.cancel(retry=retry, timeout=api_timeout)
+ except Exception:
+ # Don't eat the original exception if cancel fails.
+ pass
+ raise
+
+
+@dataclasses.dataclass(frozen=True)
+class QueryFinishedEvent:
+ """Query finished successfully."""
+
+ billing_project: Optional[str]
+ location: Optional[str]
+ query_id: Optional[str]
+ job_id: Optional[str]
+ destination: Optional[table.TableReference]
+ total_rows: Optional[int]
+ total_bytes_processed: Optional[int]
+ slot_millis: Optional[int]
+ created: Optional[datetime.datetime]
+ started: Optional[datetime.datetime]
+ ended: Optional[datetime.datetime]
+
+
+@dataclasses.dataclass(frozen=True)
+class QueryReceivedEvent:
+ """Query received and acknowledged by the BigQuery API."""
+
+ billing_project: Optional[str]
+ location: Optional[str]
+ job_id: Optional[str]
+ statement_type: Optional[str]
+ state: Optional[str]
+ query_plan: Optional[list[google.cloud.bigquery.job.query.QueryPlanEntry]]
+ created: Optional[datetime.datetime]
+ started: Optional[datetime.datetime]
+ ended: Optional[datetime.datetime]
+
+
+@dataclasses.dataclass(frozen=True)
+class QuerySentEvent:
+ """Query sent to BigQuery."""
+
+ query: str
+ billing_project: Optional[str]
+ location: Optional[str]
+ job_id: Optional[str]
+ request_id: Optional[str]
+
+
+class QueryRetryEvent(QuerySentEvent):
+ """Query sent another time because the previous attempt failed."""
+
+
+class QuerySentEventFactory:
+ """Creates a QuerySentEvent first, then QueryRetryEvent after that."""
+
+ def __init__(self):
+ self._event_constructor = QuerySentEvent
+
+ def __call__(self, **kwargs):
+ result = self._event_constructor(**kwargs)
+ self._event_constructor = QueryRetryEvent
+ return result
diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py
index cc0ee75ff..2dab03a06 100644
--- a/google/cloud/bigquery/_pandas_helpers.py
+++ b/google/cloud/bigquery/_pandas_helpers.py
@@ -12,7 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-"""Shared helper functions for connecting BigQuery and pandas."""
+"""Shared helper functions for connecting BigQuery and pandas.
+
+NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package,
+instead. See: go/pandas-gbq-and-bigframes-redundancy and
+https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pandas_to_bigquery.py
+"""
import concurrent.futures
from datetime import datetime
@@ -20,52 +25,63 @@
from itertools import islice
import logging
import queue
+import threading
import warnings
+from typing import Any, Union, Optional, Callable, Generator, List
+
+
+from google.cloud.bigquery import _pyarrow_helpers
+from google.cloud.bigquery import _versions_helpers
+from google.cloud.bigquery import schema
+
try:
import pandas # type: ignore
pandas_import_exception = None
-except ImportError as exc: # pragma: NO COVER
+except ImportError as exc:
pandas = None
pandas_import_exception = exc
else:
import numpy
+
+try:
+ import pandas_gbq.schema.pandas_to_bigquery # type: ignore
+
+ pandas_gbq_import_exception = None
+except ImportError as exc:
+ pandas_gbq = None
+ pandas_gbq_import_exception = exc
+
+
try:
import db_dtypes # type: ignore
date_dtype_name = db_dtypes.DateDtype.name
time_dtype_name = db_dtypes.TimeDtype.name
db_dtypes_import_exception = None
-except ImportError as exc: # pragma: NO COVER
+except ImportError as exc:
db_dtypes = None
db_dtypes_import_exception = exc
date_dtype_name = time_dtype_name = "" # Use '' rather than None because pytype
-
-import pyarrow # type: ignore
-import pyarrow.parquet # type: ignore
+pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()
try:
# _BaseGeometry is used to detect shapely objevys in `bq_to_arrow_array`
from shapely.geometry.base import BaseGeometry as _BaseGeometry # type: ignore
-except ImportError: # pragma: NO COVER
+except ImportError:
# No shapely, use NoneType for _BaseGeometry as a placeholder.
_BaseGeometry = type(None)
else:
+ # We don't have any unit test sessions that install shapely but not pandas.
if pandas is not None: # pragma: NO COVER
def _to_wkb():
- # Create a closure that:
- # - Adds a not-null check. This allows the returned function to
- # be used directly with apply, unlike `shapely.wkb.dumps`.
- # - Avoid extra work done by `shapely.wkb.dumps` that we don't need.
- # - Caches the WKBWriter (and write method lookup :) )
- # - Avoids adding WKBWriter, lgeos, and notnull to the module namespace.
- from shapely.geos import WKBWriter, lgeos # type: ignore
-
- write = WKBWriter(lgeos).write
+ from shapely import wkb # type: ignore
+
+ write = wkb.dumps
notnull = pandas.notnull
def _to_wkb(v):
@@ -76,17 +92,13 @@ def _to_wkb(v):
_to_wkb = _to_wkb()
try:
- from google.cloud.bigquery_storage import ArrowSerializationOptions
+ from google.cloud.bigquery_storage_v1.types import ArrowSerializationOptions
except ImportError:
_ARROW_COMPRESSION_SUPPORT = False
else:
# Having BQ Storage available implies that pyarrow >=1.0.0 is available, too.
_ARROW_COMPRESSION_SUPPORT = True
-from google.cloud.bigquery import _helpers
-from google.cloud.bigquery import schema
-
-
_LOGGER = logging.getLogger(__name__)
_PROGRESS_INTERVAL = 0.2 # Maximum time between download status checks, in seconds.
@@ -123,82 +135,30 @@ def __init__(self):
# be an atomic operation in the Python language definition (enforced by
# the global interpreter lock).
self.done = False
+ # To assist with testing and understanding the behavior of the
+ # download, use this object as shared state to track how many worker
+ # threads have started and have gracefully shutdown.
+ self._started_workers_lock = threading.Lock()
+ self.started_workers = 0
+ self._finished_workers_lock = threading.Lock()
+ self.finished_workers = 0
+ def start(self):
+ with self._started_workers_lock:
+ self.started_workers += 1
-def pyarrow_datetime():
- return pyarrow.timestamp("us", tz=None)
-
-
-def pyarrow_numeric():
- return pyarrow.decimal128(38, 9)
-
-
-def pyarrow_bignumeric():
- # 77th digit is partial.
- # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
- return pyarrow.decimal256(76, 38)
-
+ def finish(self):
+ with self._finished_workers_lock:
+ self.finished_workers += 1
-def pyarrow_time():
- return pyarrow.time64("us")
-
-def pyarrow_timestamp():
- return pyarrow.timestamp("us", tz="UTC")
-
-
-# This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
-# When modifying it be sure to update it there as well.
-BQ_TO_ARROW_SCALARS = {
- "BIGNUMERIC": pyarrow_bignumeric,
- "BOOL": pyarrow.bool_,
- "BOOLEAN": pyarrow.bool_,
- "BYTES": pyarrow.binary,
- "DATE": pyarrow.date32,
- "DATETIME": pyarrow_datetime,
- "FLOAT": pyarrow.float64,
- "FLOAT64": pyarrow.float64,
- "GEOGRAPHY": pyarrow.string,
- "INT64": pyarrow.int64,
- "INTEGER": pyarrow.int64,
- "NUMERIC": pyarrow_numeric,
- "STRING": pyarrow.string,
- "TIME": pyarrow_time,
- "TIMESTAMP": pyarrow_timestamp,
-}
-ARROW_SCALAR_IDS_TO_BQ = {
- # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes
- pyarrow.bool_().id: "BOOL",
- pyarrow.int8().id: "INT64",
- pyarrow.int16().id: "INT64",
- pyarrow.int32().id: "INT64",
- pyarrow.int64().id: "INT64",
- pyarrow.uint8().id: "INT64",
- pyarrow.uint16().id: "INT64",
- pyarrow.uint32().id: "INT64",
- pyarrow.uint64().id: "INT64",
- pyarrow.float16().id: "FLOAT64",
- pyarrow.float32().id: "FLOAT64",
- pyarrow.float64().id: "FLOAT64",
- pyarrow.time32("ms").id: "TIME",
- pyarrow.time64("ns").id: "TIME",
- pyarrow.timestamp("ns").id: "TIMESTAMP",
- pyarrow.date32().id: "DATE",
- pyarrow.date64().id: "DATETIME", # because millisecond resolution
- pyarrow.binary().id: "BYTES",
- pyarrow.string().id: "STRING", # also alias for pyarrow.utf8()
- # The exact scale and precision don't matter, see below.
- pyarrow.decimal128(38, scale=9).id: "NUMERIC",
- # The exact decimal's scale and precision are not important, as only
- # the type ID matters, and it's the same for all decimal256 instances.
- pyarrow.decimal256(76, scale=38).id: "BIGNUMERIC",
-}
BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = {
"GEOGRAPHY": {
b"ARROW:extension:name": b"google:sqlType:geography",
b"ARROW:extension:metadata": b'{"encoding": "WKT"}',
},
"DATETIME": {b"ARROW:extension:name": b"google:sqlType:datetime"},
+ "JSON": {b"ARROW:extension:name": b"google:sqlType:json"},
}
@@ -215,6 +175,17 @@ def bq_to_arrow_struct_data_type(field):
return pyarrow.struct(arrow_fields)
+def bq_to_arrow_range_data_type(field):
+ if field is None:
+ raise ValueError(
+ "Range element type cannot be None, must be one of "
+ "DATE, DATETIME, or TIMESTAMP"
+ )
+ element_type = field.element_type.upper()
+ arrow_element_type = _pyarrow_helpers.bq_to_arrow_scalars(element_type)()
+ return pyarrow.struct([("start", arrow_element_type), ("end", arrow_element_type)])
+
+
def bq_to_arrow_data_type(field):
"""Return the Arrow data type, corresponding to a given BigQuery column.
@@ -233,7 +204,10 @@ def bq_to_arrow_data_type(field):
if field_type_upper in schema._STRUCT_TYPES:
return bq_to_arrow_struct_data_type(field)
- data_type_constructor = BQ_TO_ARROW_SCALARS.get(field_type_upper)
+ if field_type_upper == "RANGE":
+ return bq_to_arrow_range_data_type(field.range_element_type)
+
+ data_type_constructor = _pyarrow_helpers.bq_to_arrow_scalars(field_type_upper)
if data_type_constructor is None:
return None
return data_type_constructor()
@@ -249,15 +223,23 @@ def bq_to_arrow_field(bq_field, array_type=None):
if arrow_type is not None:
if array_type is not None:
arrow_type = array_type # For GEOGRAPHY, at least initially
- is_nullable = bq_field.mode.upper() == "NULLABLE"
metadata = BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA.get(
bq_field.field_type.upper() if bq_field.field_type else ""
)
return pyarrow.field(
- bq_field.name, arrow_type, nullable=is_nullable, metadata=metadata
+ bq_field.name,
+ arrow_type,
+ # Even if the remote schema is REQUIRED, there's a chance there's
+ # local NULL values. Arrow will gladly interpret these NULL values
+ # as non-NULL and give you an arbitrary value. See:
+ # https://github.com/googleapis/python-bigquery/issues/1692
+ nullable=False if bq_field.mode.upper() == "REPEATED" else True,
+ metadata=metadata,
)
- warnings.warn("Unable to determine type for field '{}'.".format(bq_field.name))
+ warnings.warn(
+ "Unable to determine Arrow type for field '{}'.".format(bq_field.name)
+ )
return None
@@ -277,7 +259,20 @@ def bq_to_arrow_schema(bq_schema):
return pyarrow.schema(arrow_fields)
-def default_types_mapper(date_as_object: bool = False):
+def default_types_mapper(
+ date_as_object: bool = False,
+ bool_dtype: Union[Any, None] = None,
+ int_dtype: Union[Any, None] = None,
+ float_dtype: Union[Any, None] = None,
+ string_dtype: Union[Any, None] = None,
+ date_dtype: Union[Any, None] = None,
+ datetime_dtype: Union[Any, None] = None,
+ time_dtype: Union[Any, None] = None,
+ timestamp_dtype: Union[Any, None] = None,
+ range_date_dtype: Union[Any, None] = None,
+ range_datetime_dtype: Union[Any, None] = None,
+ range_timestamp_dtype: Union[Any, None] = None,
+):
"""Create a mapping from pyarrow types to pandas types.
This overrides the pandas defaults to use null-safe extension types where
@@ -293,22 +288,59 @@ def default_types_mapper(date_as_object: bool = False):
"""
def types_mapper(arrow_data_type):
- if pyarrow.types.is_boolean(arrow_data_type):
- return pandas.BooleanDtype()
+ if bool_dtype is not None and pyarrow.types.is_boolean(arrow_data_type):
+ return bool_dtype
+
+ elif int_dtype is not None and pyarrow.types.is_integer(arrow_data_type):
+ return int_dtype
+
+ elif float_dtype is not None and pyarrow.types.is_floating(arrow_data_type):
+ return float_dtype
+
+ elif string_dtype is not None and pyarrow.types.is_string(arrow_data_type):
+ return string_dtype
elif (
# If date_as_object is True, we know some DATE columns are
# out-of-bounds of what is supported by pandas.
- not date_as_object
+ date_dtype is not None
+ and not date_as_object
and pyarrow.types.is_date(arrow_data_type)
):
- return db_dtypes.DateDtype()
+ return date_dtype
- elif pyarrow.types.is_integer(arrow_data_type):
- return pandas.Int64Dtype()
+ elif (
+ datetime_dtype is not None
+ and pyarrow.types.is_timestamp(arrow_data_type)
+ and arrow_data_type.tz is None
+ ):
+ return datetime_dtype
- elif pyarrow.types.is_time(arrow_data_type):
- return db_dtypes.TimeDtype()
+ elif (
+ timestamp_dtype is not None
+ and pyarrow.types.is_timestamp(arrow_data_type)
+ and arrow_data_type.tz is not None
+ ):
+ return timestamp_dtype
+
+ elif time_dtype is not None and pyarrow.types.is_time(arrow_data_type):
+ return time_dtype
+
+ elif pyarrow.types.is_struct(arrow_data_type):
+ if range_datetime_dtype is not None and arrow_data_type.equals(
+ range_datetime_dtype.pyarrow_dtype
+ ):
+ return range_datetime_dtype
+
+ elif range_date_dtype is not None and arrow_data_type.equals(
+ range_date_dtype.pyarrow_dtype
+ ):
+ return range_date_dtype
+
+ elif range_timestamp_dtype is not None and arrow_data_type.equals(
+ range_timestamp_dtype.pyarrow_dtype
+ ):
+ return range_timestamp_dtype
return types_mapper
@@ -335,11 +367,16 @@ def bq_to_arrow_array(series, bq_field):
field_type_upper = bq_field.field_type.upper() if bq_field.field_type else ""
- if bq_field.mode.upper() == "REPEATED":
- return pyarrow.ListArray.from_pandas(series, type=arrow_type)
- if field_type_upper in schema._STRUCT_TYPES:
- return pyarrow.StructArray.from_pandas(series, type=arrow_type)
- return pyarrow.Array.from_pandas(series, type=arrow_type)
+ try:
+ if bq_field.mode.upper() == "REPEATED":
+ return pyarrow.ListArray.from_pandas(series, type=arrow_type)
+ if field_type_upper in schema._STRUCT_TYPES:
+ return pyarrow.StructArray.from_pandas(series, type=arrow_type)
+ return pyarrow.Array.from_pandas(series, type=arrow_type)
+ except pyarrow.ArrowTypeError:
+ msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray"""
+ _LOGGER.error(msg)
+ raise pyarrow.ArrowTypeError(msg)
def get_column_or_index(dataframe, name):
@@ -424,6 +461,10 @@ def _first_array_valid(series):
def dataframe_to_bq_schema(dataframe, bq_schema):
"""Convert a pandas DataFrame schema to a BigQuery schema.
+ DEPRECATED: Use
+ pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(),
+ instead. See: go/pandas-gbq-and-bigframes-redundancy.
+
Args:
dataframe (pandas.DataFrame):
DataFrame for which the client determines the BigQuery schema.
@@ -439,6 +480,20 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
The automatically determined schema. Returns None if the type of
any column cannot be determined.
"""
+ if pandas_gbq is None:
+ warnings.warn(
+ "Loading pandas DataFrame into BigQuery will require pandas-gbq "
+ "package version 0.26.1 or greater in the future. "
+ f"Tried to import pandas-gbq and got: {pandas_gbq_import_exception}",
+ category=FutureWarning,
+ )
+ else:
+ return pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(
+ dataframe,
+ override_bigquery_fields=bq_schema,
+ index=True,
+ )
+
if bq_schema:
bq_schema = schema._to_schema_fields(bq_schema)
bq_schema_index = {field.name: field for field in bq_schema}
@@ -448,31 +503,37 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
bq_schema_unused = set()
bq_schema_out = []
- unknown_type_fields = []
-
+ unknown_type_columns = []
+ dataframe_reset_index = dataframe.reset_index()
for column, dtype in list_columns_and_indexes(dataframe):
- # Use provided type from schema, if present.
+ # Step 1: use provided type from schema, if present.
bq_field = bq_schema_index.get(column)
if bq_field:
bq_schema_out.append(bq_field)
bq_schema_unused.discard(bq_field.name)
continue
- # Otherwise, try to automatically determine the type based on the
+ # Step 2: try to automatically determine the type based on the
# pandas dtype.
bq_type = _PANDAS_DTYPE_TO_BQ.get(dtype.name)
if bq_type is None:
- sample_data = _first_valid(dataframe[column])
+ sample_data = _first_valid(dataframe_reset_index[column])
if (
isinstance(sample_data, _BaseGeometry)
and sample_data is not None # Paranoia
):
bq_type = "GEOGRAPHY"
- bq_field = schema.SchemaField(column, bq_type)
- bq_schema_out.append(bq_field)
+ if bq_type is not None:
+ bq_schema_out.append(schema.SchemaField(column, bq_type))
+ continue
- if bq_field.field_type is None:
- unknown_type_fields.append(bq_field)
+ # Step 3: try with pyarrow if available
+ bq_field = _get_schema_by_pyarrow(column, dataframe_reset_index[column])
+ if bq_field is not None:
+ bq_schema_out.append(bq_field)
+ continue
+
+ unknown_type_columns.append(column)
# Catch any schema mismatch. The developer explicitly asked to serialize a
# column, but it was not found.
@@ -483,88 +544,70 @@ def dataframe_to_bq_schema(dataframe, bq_schema):
)
)
- # If schema detection was not successful for all columns, also try with
- # pyarrow, if available.
- if unknown_type_fields:
- # The augment_schema() helper itself will also issue unknown type
- # warnings if detection still fails for any of the fields.
- bq_schema_out = augment_schema(dataframe, bq_schema_out)
+ if unknown_type_columns != []:
+ msg = "Could not determine the type of columns: {}".format(
+ ", ".join(unknown_type_columns)
+ )
+ warnings.warn(msg)
+ return None # We cannot detect the schema in full.
- return tuple(bq_schema_out) if bq_schema_out else None
+ return tuple(bq_schema_out)
-def augment_schema(dataframe, current_bq_schema):
- """Try to deduce the unknown field types and return an improved schema.
+def _get_schema_by_pyarrow(name, series):
+ """Attempt to detect the type of the given series by leveraging PyArrow's
+ type detection capabilities.
- This function requires ``pyarrow`` to run. If all the missing types still
- cannot be detected, ``None`` is returned. If all types are already known,
- a shallow copy of the given schema is returned.
+ This function requires the ``pyarrow`` library to be installed and
+ available. If the series type cannot be determined or ``pyarrow`` is not
+ available, ``None`` is returned.
Args:
- dataframe (pandas.DataFrame):
- DataFrame for which some of the field types are still unknown.
- current_bq_schema (Sequence[google.cloud.bigquery.schema.SchemaField]):
- A BigQuery schema for ``dataframe``. The types of some or all of
- the fields may be ``None``.
+ name (str):
+ the column name of the SchemaField.
+ series (pandas.Series):
+ The Series data for which to detect the data type.
Returns:
- Optional[Sequence[google.cloud.bigquery.schema.SchemaField]]
+ Optional[google.cloud.bigquery.schema.SchemaField]:
+ A tuple containing the BigQuery-compatible type string (e.g.,
+ "STRING", "INTEGER", "TIMESTAMP", "DATETIME", "NUMERIC", "BIGNUMERIC")
+ and the mode string ("NULLABLE", "REPEATED").
+ Returns ``None`` if the type cannot be determined or ``pyarrow``
+ is not imported.
"""
- # pytype: disable=attribute-error
- augmented_schema = []
- unknown_type_fields = []
- for field in current_bq_schema:
- if field.field_type is not None:
- augmented_schema.append(field)
- continue
-
- arrow_table = pyarrow.array(dataframe[field.name])
-
- if pyarrow.types.is_list(arrow_table.type):
- # `pyarrow.ListType`
- detected_mode = "REPEATED"
- detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.values.type.id)
-
- # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds
- # it to such datetimes, causing them to be recognized as TIMESTAMP type.
- # We thus additionally check the actual data to see if we need to overrule
- # that and choose DATETIME instead.
- # Note that this should only be needed for datetime values inside a list,
- # since scalar datetime values have a proper Pandas dtype that allows
- # distinguishing between timezone-naive and timezone-aware values before
- # even requiring the additional schema augment logic in this method.
- if detected_type == "TIMESTAMP":
- valid_item = _first_array_valid(dataframe[field.name])
- if isinstance(valid_item, datetime) and valid_item.tzinfo is None:
- detected_type = "DATETIME"
- else:
- detected_mode = field.mode
- detected_type = ARROW_SCALAR_IDS_TO_BQ.get(arrow_table.type.id)
-
- if detected_type is None:
- unknown_type_fields.append(field)
- continue
+ if not pyarrow:
+ return None
- new_field = schema.SchemaField(
- name=field.name,
- field_type=detected_type,
- mode=detected_mode,
- description=field.description,
- fields=field.fields,
- )
- augmented_schema.append(new_field)
+ arrow_table = pyarrow.array(series)
+ if pyarrow.types.is_list(arrow_table.type):
+ # `pyarrow.ListType`
+ mode = "REPEATED"
+ type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.values.type.id)
+
+ # For timezone-naive datetimes, pyarrow assumes the UTC timezone and adds
+ # it to such datetimes, causing them to be recognized as TIMESTAMP type.
+ # We thus additionally check the actual data to see if we need to overrule
+ # that and choose DATETIME instead.
+ # Note that this should only be needed for datetime values inside a list,
+ # since scalar datetime values have a proper Pandas dtype that allows
+ # distinguishing between timezone-naive and timezone-aware values before
+ # even requiring the additional schema augment logic in this method.
+ if type == "TIMESTAMP":
+ valid_item = _first_array_valid(series)
+ if isinstance(valid_item, datetime) and valid_item.tzinfo is None:
+ type = "DATETIME"
+ else:
+ mode = "NULLABLE" # default mode
+ type = _pyarrow_helpers.arrow_scalar_ids_to_bq(arrow_table.type.id)
+ if type == "NUMERIC" and arrow_table.type.scale > 9:
+ type = "BIGNUMERIC"
- if unknown_type_fields:
- warnings.warn(
- "Pyarrow could not determine the type of columns: {}.".format(
- ", ".join(field.name for field in unknown_type_fields)
- )
- )
+ if type is not None:
+ return schema.SchemaField(name, type, mode)
+ else:
return None
- return augmented_schema
- # pytype: enable=attribute-error
-
def dataframe_to_arrow(dataframe, bq_schema):
"""Convert pandas dataframe to Arrow table, using BigQuery schema.
@@ -660,11 +703,13 @@ def dataframe_to_parquet(
This argument is ignored for ``pyarrow`` versions earlier than ``4.0.0``.
"""
+ pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import(raise_if_error=True)
+
import pyarrow.parquet # type: ignore
kwargs = (
{"use_compliant_nested_type": parquet_use_compliant_nested_type}
- if _helpers.PYARROW_VERSIONS.use_compliant_nested_type
+ if _versions_helpers.PYARROW_VERSIONS.use_compliant_nested_type
else {}
)
@@ -769,20 +814,35 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page):
def _download_table_bqstorage_stream(
download_state, bqstorage_client, session, stream, worker_queue, page_to_item
):
- reader = bqstorage_client.read_rows(stream.name)
-
- # Avoid deprecation warnings for passing in unnecessary read session.
- # https://github.com/googleapis/python-bigquery-storage/issues/229
- if _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional:
- rowstream = reader.rows()
- else:
- rowstream = reader.rows(session)
+ download_state.start()
+ try:
+ reader = bqstorage_client.read_rows(stream.name)
- for page in rowstream.pages:
- if download_state.done:
- return
- item = page_to_item(page)
- worker_queue.put(item)
+ # Avoid deprecation warnings for passing in unnecessary read session.
+ # https://github.com/googleapis/python-bigquery-storage/issues/229
+ if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional:
+ rowstream = reader.rows()
+ else:
+ rowstream = reader.rows(session)
+
+ for page in rowstream.pages:
+ item = page_to_item(page)
+
+ # Make sure we set a timeout on put() so that we give the worker
+ # thread opportunities to shutdown gracefully, for example if the
+ # parent thread shuts down or the parent generator object which
+ # collects rows from all workers goes out of scope. See:
+ # https://github.com/googleapis/python-bigquery/issues/2032
+ while True:
+ if download_state.done:
+ return
+ try:
+ worker_queue.put(item, timeout=_PROGRESS_INTERVAL)
+ break
+ except queue.Full:
+ continue
+ finally:
+ download_state.finish()
def _nowait(futures):
@@ -800,18 +860,58 @@ def _nowait(futures):
def _download_table_bqstorage(
- project_id,
- table,
- bqstorage_client,
- preserve_order=False,
- selected_fields=None,
- page_to_item=None,
- max_queue_size=_MAX_QUEUE_SIZE_DEFAULT,
-):
- """Use (faster, but billable) BQ Storage API to construct DataFrame."""
+ project_id: str,
+ table: Any,
+ bqstorage_client: Any,
+ preserve_order: bool = False,
+ selected_fields: Optional[List[Any]] = None,
+ page_to_item: Optional[Callable] = None,
+ max_queue_size: Any = _MAX_QUEUE_SIZE_DEFAULT,
+ max_stream_count: Optional[int] = None,
+ download_state: Optional[_DownloadState] = None,
+) -> Generator[Any, None, None]:
+ """Downloads a BigQuery table using the BigQuery Storage API.
+
+ This method uses the faster, but potentially more expensive, BigQuery
+ Storage API to download a table as a Pandas DataFrame. It supports
+ parallel downloads and optional data transformations.
+
+ Args:
+ project_id (str): The ID of the Google Cloud project containing
+ the table.
+ table (Any): The BigQuery table to download.
+ bqstorage_client (Any): An
+ authenticated BigQuery Storage API client.
+ preserve_order (bool, optional): Whether to preserve the order
+ of the rows as they are read from BigQuery. If True this limits
+ the number of streams to one and overrides `max_stream_count`.
+ Defaults to False.
+ selected_fields (Optional[List[SchemaField]]):
+ A list of BigQuery schema fields to select for download. If None,
+ all fields are downloaded. Defaults to None.
+ page_to_item (Optional[Callable]): An optional callable
+ function that takes a page of data from the BigQuery Storage API
+ max_stream_count (Optional[int]): The maximum number of
+ concurrent streams to use for downloading data. If `preserve_order`
+ is True, the requested streams are limited to 1 regardless of the
+ `max_stream_count` value. If 0 or None, then the number of
+ requested streams will be unbounded. Defaults to None.
+ download_state (Optional[_DownloadState]):
+ A threadsafe state object which can be used to observe the
+ behavior of the worker threads created by this method.
+
+ Yields:
+ pandas.DataFrame: Pandas DataFrames, one for each chunk of data
+ downloaded from BigQuery.
+
+ Raises:
+ ValueError: If attempting to read from a specific partition or snapshot.
+
+ Note:
+ This method requires the `google-cloud-bigquery-storage` library
+ to be installed.
+ """
- # Passing a BQ Storage client in implies that the BigQuery Storage library
- # is available and can be imported.
from google.cloud import bigquery_storage
if "$" in table.table_id:
@@ -821,10 +921,11 @@ def _download_table_bqstorage(
if "@" in table.table_id:
raise ValueError("Reading from a specific snapshot is not currently supported.")
- requested_streams = 1 if preserve_order else 0
+ requested_streams = determine_requested_streams(preserve_order, max_stream_count)
- requested_session = bigquery_storage.types.ReadSession(
- table=table.to_bqstorage(), data_format=bigquery_storage.types.DataFormat.ARROW
+ requested_session = bigquery_storage.types.stream.ReadSession(
+ table=table.to_bqstorage(),
+ data_format=bigquery_storage.types.stream.DataFormat.ARROW,
)
if selected_fields is not None:
for field in selected_fields:
@@ -832,7 +933,8 @@ def _download_table_bqstorage(
if _ARROW_COMPRESSION_SUPPORT:
requested_session.read_options.arrow_serialization_options.buffer_compression = (
- ArrowSerializationOptions.CompressionCodec.LZ4_FRAME
+ # CompressionCodec(1) -> LZ4_FRAME
+ ArrowSerializationOptions.CompressionCodec(1)
)
session = bqstorage_client.create_read_session(
@@ -855,7 +957,8 @@ def _download_table_bqstorage(
# Use _DownloadState to notify worker threads when to quit.
# See: https://stackoverflow.com/a/29237343/101923
- download_state = _DownloadState()
+ if download_state is None:
+ download_state = _DownloadState()
# Create a queue to collect frames as they are created in each thread.
#
@@ -868,7 +971,7 @@ def _download_table_bqstorage(
elif max_queue_size is None:
max_queue_size = 0 # unbounded
- worker_queue = queue.Queue(maxsize=max_queue_size)
+ worker_queue: queue.Queue[int] = queue.Queue(maxsize=max_queue_size)
with concurrent.futures.ThreadPoolExecutor(max_workers=total_streams) as pool:
try:
@@ -894,7 +997,7 @@ def _download_table_bqstorage(
# we want to block on the queue's get method, instead. This
# prevents the queue from filling up, because the main thread
# has smaller gaps in time between calls to the queue's get
- # method. For a detailed explaination, see:
+ # method. For a detailed explanation, see:
# https://friendliness.dev/2019/06/18/python-nowait/
done, not_done = _nowait(not_done)
for future in done:
@@ -933,6 +1036,7 @@ def download_arrow_bqstorage(
preserve_order=False,
selected_fields=None,
max_queue_size=_MAX_QUEUE_SIZE_DEFAULT,
+ max_stream_count=None,
):
return _download_table_bqstorage(
project_id,
@@ -942,6 +1046,7 @@ def download_arrow_bqstorage(
selected_fields=selected_fields,
page_to_item=_bqstorage_page_to_arrow,
max_queue_size=max_queue_size,
+ max_stream_count=max_stream_count,
)
@@ -954,6 +1059,7 @@ def download_dataframe_bqstorage(
preserve_order=False,
selected_fields=None,
max_queue_size=_MAX_QUEUE_SIZE_DEFAULT,
+ max_stream_count=None,
):
page_to_item = functools.partial(_bqstorage_page_to_dataframe, column_names, dtypes)
return _download_table_bqstorage(
@@ -964,6 +1070,7 @@ def download_dataframe_bqstorage(
selected_fields=selected_fields,
page_to_item=page_to_item,
max_queue_size=max_queue_size,
+ max_stream_count=max_stream_count,
)
@@ -979,6 +1086,25 @@ def dataframe_to_json_generator(dataframe):
# considered a NaN, however.
if isinstance(is_nan, bool) and is_nan:
continue
+
+ # Convert numpy types to corresponding Python types.
+ # https://stackoverflow.com/a/60441783/101923
+ if isinstance(value, numpy.bool_):
+ value = bool(value)
+ elif isinstance(
+ value,
+ (
+ numpy.int64,
+ numpy.int32,
+ numpy.int16,
+ numpy.int8,
+ numpy.uint64,
+ numpy.uint32,
+ numpy.uint16,
+ numpy.uint8,
+ ),
+ ):
+ value = int(value)
output[column] = value
yield output
@@ -989,3 +1115,40 @@ def verify_pandas_imports():
raise ValueError(_NO_PANDAS_ERROR) from pandas_import_exception
if db_dtypes is None:
raise ValueError(_NO_DB_TYPES_ERROR) from db_dtypes_import_exception
+
+
+def determine_requested_streams(
+ preserve_order: bool,
+ max_stream_count: Union[int, None],
+) -> int:
+ """Determines the value of requested_streams based on the values of
+ `preserve_order` and `max_stream_count`.
+
+ Args:
+ preserve_order (bool): Whether to preserve the order of streams. If True,
+ this limits the number of streams to one. `preserve_order` takes
+ precedence over `max_stream_count`.
+ max_stream_count (Union[int, None]]): The maximum number of streams
+ allowed. Must be a non-negative number or None, where None indicates
+ the value is unset. NOTE: if `preserve_order` is also set, it takes
+ precedence over `max_stream_count`, thus to ensure that `max_stream_count`
+ is used, ensure that `preserve_order` is None.
+
+ Returns:
+ (int) The appropriate value for requested_streams.
+ """
+
+ if preserve_order:
+ # If preserve order is set, it takes precedence.
+ # Limit the requested streams to 1, to ensure that order
+ # is preserved)
+ return 1
+
+ elif max_stream_count is not None:
+ # If preserve_order is not set, only then do we consider max_stream_count
+ if max_stream_count <= -1:
+ raise ValueError("max_stream_count must be non-negative OR None")
+ return max_stream_count
+
+ # Default to zero requested streams (unbounded).
+ return 0
diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py
new file mode 100644
index 000000000..03c70bf63
--- /dev/null
+++ b/google/cloud/bigquery/_pyarrow_helpers.py
@@ -0,0 +1,147 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Shared helper functions for connecting BigQuery and pyarrow.
+
+NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package,
+instead. See: go/pandas-gbq-and-bigframes-redundancy,
+https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/bigquery_to_pyarrow.py
+and
+https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py
+"""
+
+from typing import Any
+
+try:
+ import pyarrow # type: ignore
+except ImportError:
+ pyarrow = None
+
+try:
+ import db_dtypes # type: ignore
+
+ db_dtypes_import_exception = None
+except ImportError as exc:
+ db_dtypes = None
+ db_dtypes_import_exception = exc
+
+
+def pyarrow_datetime():
+ return pyarrow.timestamp("us", tz=None)
+
+
+def pyarrow_numeric():
+ return pyarrow.decimal128(38, 9)
+
+
+def pyarrow_bignumeric():
+ # 77th digit is partial.
+ # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#decimal_types
+ return pyarrow.decimal256(76, 38)
+
+
+def pyarrow_time():
+ return pyarrow.time64("us")
+
+
+def pyarrow_timestamp():
+ return pyarrow.timestamp("us", tz="UTC")
+
+
+_BQ_TO_ARROW_SCALARS = {}
+_ARROW_SCALAR_IDS_TO_BQ = {}
+
+if pyarrow:
+ # This dictionary is duplicated in bigquery_storage/test/unite/test_reader.py
+ # When modifying it be sure to update it there as well.
+ # Note(todo!!): type "BIGNUMERIC"'s matching pyarrow type is added in _pandas_helpers.py
+ _BQ_TO_ARROW_SCALARS = {
+ "BOOL": pyarrow.bool_,
+ "BOOLEAN": pyarrow.bool_,
+ "BYTES": pyarrow.binary,
+ "DATE": pyarrow.date32,
+ "DATETIME": pyarrow_datetime,
+ "FLOAT": pyarrow.float64,
+ "FLOAT64": pyarrow.float64,
+ "GEOGRAPHY": pyarrow.string,
+ "INT64": pyarrow.int64,
+ "INTEGER": pyarrow.int64,
+ # Normally, we'd prefer JSON type built-in to pyarrow (added in 19.0.0),
+ # but we'd like this to map as closely to the BQ Storage API as
+ # possible, which uses the string() dtype, as JSON support in Arrow
+ # predates JSON support in BigQuery by several years.
+ "JSON": pyarrow.string,
+ "NUMERIC": pyarrow_numeric,
+ "STRING": pyarrow.string,
+ "TIME": pyarrow_time,
+ "TIMESTAMP": pyarrow_timestamp,
+ }
+
+ # DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead.
+ _ARROW_SCALAR_IDS_TO_BQ = {
+ # https://arrow.apache.org/docs/python/api/datatypes.html#type-classes
+ pyarrow.bool_().id: "BOOL",
+ pyarrow.int8().id: "INT64",
+ pyarrow.int16().id: "INT64",
+ pyarrow.int32().id: "INT64",
+ pyarrow.int64().id: "INT64",
+ pyarrow.uint8().id: "INT64",
+ pyarrow.uint16().id: "INT64",
+ pyarrow.uint32().id: "INT64",
+ pyarrow.uint64().id: "INT64",
+ pyarrow.float16().id: "FLOAT64",
+ pyarrow.float32().id: "FLOAT64",
+ pyarrow.float64().id: "FLOAT64",
+ pyarrow.time32("ms").id: "TIME",
+ pyarrow.time64("ns").id: "TIME",
+ pyarrow.timestamp("ns").id: "TIMESTAMP",
+ pyarrow.date32().id: "DATE",
+ pyarrow.date64().id: "DATETIME", # because millisecond resolution
+ pyarrow.binary().id: "BYTES",
+ pyarrow.string().id: "STRING", # also alias for pyarrow.utf8()
+ pyarrow.large_string().id: "STRING",
+ # The exact scale and precision don't matter, see below.
+ pyarrow.decimal128(38, scale=9).id: "NUMERIC",
+ # NOTE: all extension types (e.g. json_, uuid, db_dtypes.JSONArrowType)
+ # have the same id (31 as of version 19.0.1), so these should not be
+ # matched by id.
+ }
+
+ _BQ_TO_ARROW_SCALARS["BIGNUMERIC"] = pyarrow_bignumeric
+ # The exact decimal's scale and precision are not important, as only
+ # the type ID matters, and it's the same for all decimal256 instances.
+ _ARROW_SCALAR_IDS_TO_BQ[pyarrow.decimal256(76, scale=38).id] = "BIGNUMERIC"
+
+
+def bq_to_arrow_scalars(bq_scalar: str):
+ """
+ DEPRECATED: update pandas_gbq.schema.bigquery_to_pyarrow, instead, which is
+ to be added in https://github.com/googleapis/python-bigquery-pandas/pull/893.
+
+ Returns:
+ The Arrow scalar type that the input BigQuery scalar type maps to.
+ If it cannot find the BigQuery scalar, return None.
+ """
+ return _BQ_TO_ARROW_SCALARS.get(bq_scalar)
+
+
+def arrow_scalar_ids_to_bq(arrow_scalar: Any):
+ """
+ DEPRECATED: update pandas_gbq.schema.pyarrow_to_bigquery, instead.
+
+ Returns:
+ The BigQuery scalar type that the input arrow scalar type maps to.
+ If it cannot find the arrow scalar, return None.
+ """
+ return _ARROW_SCALAR_IDS_TO_BQ.get(arrow_scalar)
diff --git a/google/cloud/bigquery/_tqdm_helpers.py b/google/cloud/bigquery/_tqdm_helpers.py
index f2355ab3b..22ccee971 100644
--- a/google/cloud/bigquery/_tqdm_helpers.py
+++ b/google/cloud/bigquery/_tqdm_helpers.py
@@ -15,6 +15,7 @@
"""Shared helper functions for tqdm progress bar."""
import concurrent.futures
+import sys
import time
import typing
from typing import Optional
@@ -22,9 +23,14 @@
try:
import tqdm # type: ignore
-except ImportError: # pragma: NO COVER
+except ImportError:
tqdm = None
+try:
+ import tqdm.notebook as tqdm_notebook # type: ignore
+except ImportError:
+ tqdm_notebook = None
+
if typing.TYPE_CHECKING: # pragma: NO COVER
from google.cloud.bigquery import QueryJob
from google.cloud.bigquery.table import RowIterator
@@ -39,19 +45,32 @@
def get_progress_bar(progress_bar_type, description, total, unit):
"""Construct a tqdm progress bar object, if tqdm is installed."""
- if tqdm is None:
+ if tqdm is None or tqdm_notebook is None and progress_bar_type == "tqdm_notebook":
if progress_bar_type is not None:
warnings.warn(_NO_TQDM_ERROR, UserWarning, stacklevel=3)
return None
try:
if progress_bar_type == "tqdm":
- return tqdm.tqdm(desc=description, total=total, unit=unit)
+ return tqdm.tqdm(
+ bar_format="{l_bar}{bar}|",
+ colour="green",
+ desc=description,
+ file=sys.stdout,
+ total=total,
+ unit=unit,
+ )
elif progress_bar_type == "tqdm_notebook":
- return tqdm.tqdm_notebook(desc=description, total=total, unit=unit)
+ return tqdm_notebook.tqdm(
+ bar_format="{l_bar}{bar}|",
+ desc=description,
+ file=sys.stdout,
+ total=total,
+ unit=unit,
+ )
elif progress_bar_type == "tqdm_gui":
return tqdm.tqdm_gui(desc=description, total=total, unit=unit)
- except (KeyError, TypeError):
+ except (KeyError, TypeError): # pragma: NO COVER
# Protect ourselves from any tqdm errors. In case of
# unexpected tqdm behavior, just fall back to showing
# no progress bar.
@@ -79,7 +98,7 @@ def wait_for_query(
"""
default_total = 1
current_stage = None
- start_time = time.time()
+ start_time = time.perf_counter()
progress_bar = get_progress_bar(
progress_bar_type, "Query is running", default_total, "query"
@@ -94,11 +113,7 @@ def wait_for_query(
current_stage = query_job.query_plan[i]
progress_bar.total = len(query_job.query_plan)
progress_bar.set_description(
- "Query executing stage {} and status {} : {:0.2f}s".format(
- current_stage.name,
- current_stage.status,
- time.time() - start_time,
- ),
+ f"Query executing stage {current_stage.name} and status {current_stage.status} : {time.perf_counter() - start_time:.2f}s"
)
try:
query_result = query_job.result(
@@ -106,7 +121,7 @@ def wait_for_query(
)
progress_bar.update(default_total)
progress_bar.set_description(
- "Query complete after {:0.2f}s".format(time.time() - start_time),
+ f"Job ID {query_job.job_id} successfully executed",
)
break
except concurrent.futures.TimeoutError:
diff --git a/google/cloud/bigquery/_versions_helpers.py b/google/cloud/bigquery/_versions_helpers.py
new file mode 100644
index 000000000..cfbf70a8e
--- /dev/null
+++ b/google/cloud/bigquery/_versions_helpers.py
@@ -0,0 +1,264 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Shared helper functions for verifying versions of installed modules."""
+
+import sys
+from typing import Any
+
+import packaging.version
+
+from google.cloud.bigquery import exceptions
+
+
+_MIN_PYARROW_VERSION = packaging.version.Version("3.0.0")
+_MIN_BQ_STORAGE_VERSION = packaging.version.Version("2.0.0")
+_BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION = packaging.version.Version("2.6.0")
+_MIN_PANDAS_VERSION = packaging.version.Version("1.1.0")
+
+_MIN_PANDAS_VERSION_RANGE = packaging.version.Version("1.5.0")
+_MIN_PYARROW_VERSION_RANGE = packaging.version.Version("10.0.1")
+
+
+class PyarrowVersions:
+ """Version comparisons for pyarrow package."""
+
+ def __init__(self):
+ self._installed_version = None
+
+ @property
+ def installed_version(self) -> packaging.version.Version:
+ """Return the parsed version of pyarrow."""
+ if self._installed_version is None:
+ import pyarrow # type: ignore
+
+ self._installed_version = packaging.version.parse(
+ # Use 0.0.0, since it is earlier than any released version.
+ # Legacy versions also have the same property, but
+ # creating a LegacyVersion has been deprecated.
+ # https://github.com/pypa/packaging/issues/321
+ getattr(pyarrow, "__version__", "0.0.0")
+ )
+
+ return self._installed_version
+
+ @property
+ def use_compliant_nested_type(self) -> bool:
+ return self.installed_version.major >= 4
+
+ def try_import(self, raise_if_error: bool = False) -> Any:
+ """Verifies that a recent enough version of pyarrow extra is installed.
+
+ The function assumes that pyarrow extra is installed, and should thus
+ be used in places where this assumption holds.
+
+ Because `pip` can install an outdated version of this extra despite
+ the constraints in `setup.py`, the calling code can use this helper
+ to verify the version compatibility at runtime.
+
+ Returns:
+ The ``pyarrow`` module or ``None``.
+
+ Raises:
+ exceptions.LegacyPyarrowError:
+ If the pyarrow package is outdated and ``raise_if_error`` is
+ ``True``.
+ """
+ try:
+ import pyarrow
+ except ImportError as exc:
+ if raise_if_error:
+ raise exceptions.LegacyPyarrowError(
+ "pyarrow package not found. Install pyarrow version >="
+ f" {_MIN_PYARROW_VERSION}."
+ ) from exc
+ return None
+
+ if self.installed_version < _MIN_PYARROW_VERSION:
+ if raise_if_error:
+ msg = (
+ "Dependency pyarrow is outdated, please upgrade"
+ f" it to version >= {_MIN_PYARROW_VERSION}"
+ f" (version found: {self.installed_version})."
+ )
+ raise exceptions.LegacyPyarrowError(msg)
+ return None
+
+ return pyarrow
+
+
+PYARROW_VERSIONS = PyarrowVersions()
+
+
+class BQStorageVersions:
+ """Version comparisons for google-cloud-bigqueyr-storage package."""
+
+ def __init__(self):
+ self._installed_version = None
+
+ @property
+ def installed_version(self) -> packaging.version.Version:
+ """Return the parsed version of google-cloud-bigquery-storage."""
+ if self._installed_version is None:
+ from google.cloud import bigquery_storage
+
+ self._installed_version = packaging.version.parse(
+ # Use 0.0.0, since it is earlier than any released version.
+ # Legacy versions also have the same property, but
+ # creating a LegacyVersion has been deprecated.
+ # https://github.com/pypa/packaging/issues/321
+ getattr(bigquery_storage, "__version__", "0.0.0")
+ )
+
+ return self._installed_version # type: ignore
+
+ @property
+ def is_read_session_optional(self) -> bool:
+ """True if read_session is optional to rows().
+
+ See: https://github.com/googleapis/python-bigquery-storage/pull/228
+ """
+ return self.installed_version >= _BQ_STORAGE_OPTIONAL_READ_SESSION_VERSION
+
+ def try_import(self, raise_if_error: bool = False) -> Any:
+ """Tries to import the bigquery_storage module, and returns results
+ accordingly. It also verifies the module version is recent enough.
+
+ If the import succeeds, returns the ``bigquery_storage`` module.
+
+ If the import fails,
+ returns ``None`` when ``raise_if_error == False``,
+ raises Error when ``raise_if_error == True``.
+
+ Returns:
+ The ``bigquery_storage`` module or ``None``.
+
+ Raises:
+ exceptions.BigQueryStorageNotFoundError:
+ If google-cloud-bigquery-storage is not installed
+ exceptions.LegacyBigQueryStorageError:
+ If google-cloud-bigquery-storage package is outdated
+ """
+ try:
+ from google.cloud import bigquery_storage # type: ignore
+ except ImportError:
+ if raise_if_error:
+ msg = (
+ "Package google-cloud-bigquery-storage not found. "
+ "Install google-cloud-bigquery-storage version >= "
+ f"{_MIN_BQ_STORAGE_VERSION}."
+ )
+ raise exceptions.BigQueryStorageNotFoundError(msg)
+ return None
+
+ if self.installed_version < _MIN_BQ_STORAGE_VERSION:
+ if raise_if_error:
+ msg = (
+ "Dependency google-cloud-bigquery-storage is outdated, "
+ f"please upgrade it to version >= {_MIN_BQ_STORAGE_VERSION} "
+ f"(version found: {self.installed_version})."
+ )
+ raise exceptions.LegacyBigQueryStorageError(msg)
+ return None
+
+ return bigquery_storage
+
+
+BQ_STORAGE_VERSIONS = BQStorageVersions()
+
+
+class PandasVersions:
+ """Version comparisons for pandas package."""
+
+ def __init__(self):
+ self._installed_version = None
+
+ @property
+ def installed_version(self) -> packaging.version.Version:
+ """Return the parsed version of pandas"""
+ if self._installed_version is None:
+ import pandas # type: ignore
+
+ self._installed_version = packaging.version.parse(
+ # Use 0.0.0, since it is earlier than any released version.
+ # Legacy versions also have the same property, but
+ # creating a LegacyVersion has been deprecated.
+ # https://github.com/pypa/packaging/issues/321
+ getattr(pandas, "__version__", "0.0.0")
+ )
+
+ return self._installed_version
+
+ def try_import(self, raise_if_error: bool = False) -> Any:
+ """Verify that a recent enough version of pandas extra is installed.
+ The function assumes that pandas extra is installed, and should thus
+ be used in places where this assumption holds.
+ Because `pip` can install an outdated version of this extra despite
+ the constraints in `setup.py`, the calling code can use this helper
+ to verify the version compatibility at runtime.
+ Returns:
+ The ``pandas`` module or ``None``.
+ Raises:
+ exceptions.LegacyPandasError:
+ If the pandas package is outdated and ``raise_if_error`` is
+ ``True``.
+ """
+ try:
+ import pandas
+ except ImportError as exc:
+ if raise_if_error:
+ raise exceptions.LegacyPandasError(
+ "pandas package not found. Install pandas version >="
+ f" {_MIN_PANDAS_VERSION}"
+ ) from exc
+ return None
+
+ if self.installed_version < _MIN_PANDAS_VERSION:
+ if raise_if_error:
+ msg = (
+ "Dependency pandas is outdated, please upgrade"
+ f" it to version >= {_MIN_PANDAS_VERSION}"
+ f" (version found: {self.installed_version})."
+ )
+ raise exceptions.LegacyPandasError(msg)
+ return None
+
+ return pandas
+
+
+PANDAS_VERSIONS = PandasVersions()
+
+# Since RANGE support in pandas requires specific versions
+# of both pyarrow and pandas, we make this a separate
+# constant instead of as a property of PANDAS_VERSIONS
+# or PYARROW_VERSIONS.
+SUPPORTS_RANGE_PYARROW = (
+ PANDAS_VERSIONS.try_import() is not None
+ and PANDAS_VERSIONS.installed_version >= _MIN_PANDAS_VERSION_RANGE
+ and PYARROW_VERSIONS.try_import() is not None
+ and PYARROW_VERSIONS.installed_version >= _MIN_PYARROW_VERSION_RANGE
+)
+
+
+def extract_runtime_version():
+ # Retrieve the version information
+ version_info = sys.version_info
+
+ # Extract the major, minor, and micro components
+ major = version_info.major
+ minor = version_info.minor
+ micro = version_info.micro
+
+ # Display the version number in a clear format
+ return major, minor, micro
diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py
index 1200d78f9..c50e7c2d7 100644
--- a/google/cloud/bigquery/client.py
+++ b/google/cloud/bigquery/client.py
@@ -15,6 +15,7 @@
"""Client for interacting with the Google BigQuery API."""
from __future__ import absolute_import
+from __future__ import annotations
from __future__ import division
from collections import abc as collections_abc
@@ -31,6 +32,7 @@
import typing
from typing import (
Any,
+ Callable,
Dict,
IO,
Iterable,
@@ -44,6 +46,8 @@
import uuid
import warnings
+import requests
+
from google import resumable_media # type: ignore
from google.resumable_media.requests import MultipartUpload # type: ignore
from google.resumable_media.requests import ResumableUpload
@@ -56,27 +60,41 @@
import google.cloud._helpers # type: ignore
from google.cloud import exceptions # pytype: disable=import-error
from google.cloud.client import ClientWithProject # type: ignore # pytype: disable=import-error
-from google.cloud.bigquery_storage_v1.services.big_query_read.client import (
- DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO,
-)
+try:
+ from google.cloud.bigquery_storage_v1.services.big_query_read.client import (
+ DEFAULT_CLIENT_INFO as DEFAULT_BQSTORAGE_CLIENT_INFO,
+ )
+except ImportError:
+ DEFAULT_BQSTORAGE_CLIENT_INFO = None # type: ignore
+
+
+from google.auth.credentials import Credentials
+from google.cloud.bigquery._http import Connection
from google.cloud.bigquery import _job_helpers
-from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id
+from google.cloud.bigquery import _pandas_helpers
+from google.cloud.bigquery import _versions_helpers
+from google.cloud.bigquery import enums
+from google.cloud.bigquery import exceptions as bq_exceptions
+from google.cloud.bigquery import job
from google.cloud.bigquery._helpers import _get_sub_prop
from google.cloud.bigquery._helpers import _record_field_to_json
from google.cloud.bigquery._helpers import _str_or_none
from google.cloud.bigquery._helpers import _verify_job_config_type
from google.cloud.bigquery._helpers import _get_bigquery_host
from google.cloud.bigquery._helpers import _DEFAULT_HOST
-from google.cloud.bigquery._http import Connection
-from google.cloud.bigquery import _pandas_helpers
+from google.cloud.bigquery._helpers import _DEFAULT_HOST_TEMPLATE
+from google.cloud.bigquery._helpers import _DEFAULT_UNIVERSE
+from google.cloud.bigquery._helpers import _validate_universe
+from google.cloud.bigquery._helpers import _get_client_universe
+from google.cloud.bigquery._helpers import TimeoutType
+from google.cloud.bigquery._job_helpers import make_job_id as _make_job_id
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.dataset import DatasetReference
-from google.cloud.bigquery import enums
-from google.cloud.bigquery.enums import AutoRowIDs
-from google.cloud.bigquery.opentelemetry_tracing import create_span
-from google.cloud.bigquery import job
+
+from google.cloud.bigquery.enums import AutoRowIDs, DatasetView, UpdateMode
+from google.cloud.bigquery.format_options import ParquetOptions
from google.cloud.bigquery.job import (
CopyJob,
CopyJobConfig,
@@ -90,11 +108,14 @@
from google.cloud.bigquery.model import Model
from google.cloud.bigquery.model import ModelReference
from google.cloud.bigquery.model import _model_arg_to_model_ref
+from google.cloud.bigquery.opentelemetry_tracing import create_span
from google.cloud.bigquery.query import _QueryResults
from google.cloud.bigquery.retry import (
DEFAULT_JOB_RETRY,
DEFAULT_RETRY,
DEFAULT_TIMEOUT,
+ DEFAULT_GET_JOB_TIMEOUT,
+ POLLING_DEFAULT_VALUE,
)
from google.cloud.bigquery.routine import Routine
from google.cloud.bigquery.routine import RoutineReference
@@ -105,20 +126,20 @@
from google.cloud.bigquery.table import TableListItem
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.table import RowIterator
-from google.cloud.bigquery.format_options import ParquetOptions
-from google.cloud.bigquery import _helpers
-TimeoutType = Union[float, None]
+pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()
+pandas = (
+ _versions_helpers.PANDAS_VERSIONS.try_import()
+) # mypy check fails because pandas import is outside module, there are type: ignore comments related to this
+
+
ResumableTimeoutType = Union[
None, float, Tuple[float, float]
] # for resumable media methods
if typing.TYPE_CHECKING: # pragma: NO COVER
# os.PathLike is only subscriptable in Python 3.9+, thus shielding with a condition.
- PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes]]
- import pandas # type: ignore
- import requests # required by api-core
-
+ PathType = Union[str, bytes, os.PathLike[str], os.PathLike[bytes], io.IOBase]
_DEFAULT_CHUNKSIZE = 100 * 1024 * 1024 # 100 MB
_MAX_MULTIPART_SIZE = 5 * 1024 * 1024
_DEFAULT_NUM_RETRIES = 6
@@ -192,6 +213,9 @@ class Client(ClientWithProject):
default_query_job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
Default ``QueryJobConfig``.
Will be merged into job configs passed into the ``query`` method.
+ default_load_job_config (Optional[google.cloud.bigquery.job.LoadJobConfig]):
+ Default ``LoadJobConfig``.
+ Will be merged into job configs passed into the ``load_table_*`` methods.
client_info (Optional[google.api_core.client_info.ClientInfo]):
The client info used to send a user-agent string along with API
requests. If ``None``, then default info will be used. Generally,
@@ -200,6 +224,10 @@ class Client(ClientWithProject):
client_options (Optional[Union[google.api_core.client_options.ClientOptions, Dict]]):
Client options used to set user options on the client. API Endpoint
should be set through client_options.
+ default_job_creation_mode (Optional[str]):
+ Sets the default job creation mode used by query methods such as
+ query_and_wait(). For lightweight queries, JOB_CREATION_OPTIONAL is
+ generally recommended.
Raises:
google.auth.exceptions.DefaultCredentialsError:
@@ -207,22 +235,29 @@ class Client(ClientWithProject):
to acquire default credentials.
"""
- SCOPE = ( # type: ignore
- "https://www.googleapis.com/auth/bigquery",
- "https://www.googleapis.com/auth/cloud-platform",
- )
+ SCOPE = ("https://www.googleapis.com/auth/cloud-platform",) # type: ignore
"""The scopes required for authenticating as a BigQuery consumer."""
def __init__(
self,
- project=None,
- credentials=None,
- _http=None,
- location=None,
- default_query_job_config=None,
- client_info=None,
- client_options=None,
+ project: Optional[str] = None,
+ credentials: Optional[Credentials] = None,
+ _http: Optional[requests.Session] = None,
+ location: Optional[str] = None,
+ default_query_job_config: Optional[QueryJobConfig] = None,
+ default_load_job_config: Optional[LoadJobConfig] = None,
+ client_info: Optional[google.api_core.client_info.ClientInfo] = None,
+ client_options: Optional[
+ Union[google.api_core.client_options.ClientOptions, Dict[str, Any]]
+ ] = None,
+ default_job_creation_mode: Optional[str] = None,
) -> None:
+ if client_options is None:
+ client_options = {}
+ if isinstance(client_options, dict):
+ client_options = google.api_core.client_options.from_dict(client_options)
+ # assert isinstance(client_options, google.api_core.client_options.ClientOptions)
+
super(Client, self).__init__(
project=project,
credentials=credentials,
@@ -230,27 +265,73 @@ def __init__(
_http=_http,
)
- kw_args = {"client_info": client_info}
+ kw_args: Dict[str, Any] = {"client_info": client_info}
bq_host = _get_bigquery_host()
kw_args["api_endpoint"] = bq_host if bq_host != _DEFAULT_HOST else None
- if client_options:
- if type(client_options) == dict:
- client_options = google.api_core.client_options.from_dict(
- client_options
+ client_universe = None
+ if client_options.api_endpoint:
+ api_endpoint = client_options.api_endpoint
+ kw_args["api_endpoint"] = api_endpoint
+ else:
+ client_universe = _get_client_universe(client_options)
+ if client_universe != _DEFAULT_UNIVERSE:
+ kw_args["api_endpoint"] = _DEFAULT_HOST_TEMPLATE.replace(
+ "{UNIVERSE_DOMAIN}", client_universe
)
- if client_options.api_endpoint:
- api_endpoint = client_options.api_endpoint
- kw_args["api_endpoint"] = api_endpoint
+ # Ensure credentials and universe are not in conflict.
+ if hasattr(self, "_credentials") and client_universe is not None:
+ _validate_universe(client_universe, self._credentials)
self._connection = Connection(self, **kw_args)
self._location = location
- self._default_query_job_config = copy.deepcopy(default_query_job_config)
+ self._default_load_job_config = copy.deepcopy(default_load_job_config)
+ self.default_job_creation_mode = default_job_creation_mode
+
+ # Use property setter so validation can run.
+ self.default_query_job_config = default_query_job_config
@property
def location(self):
"""Default location for jobs / datasets / tables."""
return self._location
+ @property
+ def default_job_creation_mode(self):
+ """Default job creation mode used for query execution."""
+ return self._default_job_creation_mode
+
+ @default_job_creation_mode.setter
+ def default_job_creation_mode(self, value: Optional[str]):
+ self._default_job_creation_mode = value
+
+ @property
+ def default_query_job_config(self) -> Optional[QueryJobConfig]:
+ """Default ``QueryJobConfig`` or ``None``.
+
+ Will be merged into job configs passed into the ``query`` or
+ ``query_and_wait`` methods.
+ """
+ return self._default_query_job_config
+
+ @default_query_job_config.setter
+ def default_query_job_config(self, value: Optional[QueryJobConfig]):
+ if value is not None:
+ _verify_job_config_type(
+ value, QueryJobConfig, param_name="default_query_job_config"
+ )
+ self._default_query_job_config = copy.deepcopy(value)
+
+ @property
+ def default_load_job_config(self):
+ """Default ``LoadJobConfig``.
+ Will be merged into job configs passed into the ``load_table_*`` methods.
+ """
+ return self._default_load_job_config
+
+ @default_load_job_config.setter
+ def default_load_job_config(self, value: LoadJobConfig):
+ self._default_load_job_config = copy.deepcopy(value)
+
def close(self):
"""Close the underlying transport objects, releasing system resources.
@@ -265,12 +346,21 @@ def close(self):
def get_service_account_email(
self,
- project: str = None,
+ project: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> str:
"""Get the email address of the project's BigQuery service account
+ Example:
+
+ .. code-block:: python
+
+ from google.cloud import bigquery
+ client = bigquery.Client()
+ client.get_service_account_email()
+ # returns an email similar to: my_service_account@my-project.iam.gserviceaccount.com
+
Note:
This is the service account that BigQuery uses to manage tables
encrypted by a key in KMS.
@@ -285,14 +375,8 @@ def get_service_account_email(
before using ``retry``.
Returns:
- str: service account email address
-
- Example:
-
- >>> from google.cloud import bigquery
- >>> client = bigquery.Client()
- >>> client.get_service_account_email()
- my_service_account@my-project.iam.gserviceaccount.com
+ str:
+ service account email address
"""
if project is None:
@@ -311,11 +395,11 @@ def get_service_account_email(
def list_projects(
self,
- max_results: int = None,
- page_token: str = None,
+ max_results: Optional[int] = None,
+ page_token: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
- page_size: int = None,
+ page_size: Optional[int] = None,
) -> page_iterator.Iterator:
"""List projects for the project associated with this client.
@@ -374,14 +458,14 @@ def api_request(*args, **kwargs):
def list_datasets(
self,
- project: str = None,
+ project: Optional[str] = None,
include_all: bool = False,
- filter: str = None,
- max_results: int = None,
- page_token: str = None,
+ filter: Optional[str] = None,
+ max_results: Optional[int] = None,
+ page_token: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
- page_size: int = None,
+ page_size: Optional[int] = None,
) -> page_iterator.Iterator:
"""List datasets for the project associated with this client.
@@ -433,7 +517,6 @@ def list_datasets(
span_attributes = {"path": path}
def api_request(*args, **kwargs):
-
return self._call_api(
retry,
span_name="BigQuery.listDatasets",
@@ -455,7 +538,9 @@ def api_request(*args, **kwargs):
page_size=page_size,
)
- def dataset(self, dataset_id: str, project: str = None) -> DatasetReference:
+ def dataset(
+ self, dataset_id: str, project: Optional[str] = None
+ ) -> DatasetReference:
"""Deprecated: Construct a reference to a dataset.
.. deprecated:: 1.24.0
@@ -510,18 +595,33 @@ def _ensure_bqstorage_client(
An existing BigQuery Storage client instance. If ``None``, a new
instance is created and returned.
client_options:
- Custom options used with a new BigQuery Storage client instance if one
- is created.
+ Custom options used with a new BigQuery Storage client instance
+ if one is created.
client_info:
- The client info used with a new BigQuery Storage client instance if one
- is created.
+ The client info used with a new BigQuery Storage client
+ instance if one is created.
Returns:
A BigQuery Storage API client.
"""
- from google.cloud import bigquery_storage
- if bqstorage_client is None:
+ try:
+ bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import(
+ raise_if_error=True
+ )
+ except bq_exceptions.BigQueryStorageNotFoundError:
+ warnings.warn(
+ "Cannot create BigQuery Storage client, the dependency "
+ "google-cloud-bigquery-storage is not installed."
+ )
+ return None
+ except bq_exceptions.LegacyBigQueryStorageError as exc:
+ warnings.warn(
+ "Dependency google-cloud-bigquery-storage is outdated: " + str(exc)
+ )
+ return None
+
+ if bqstorage_client is None: # pragma: NO COVER
bqstorage_client = bigquery_storage.BigQueryReadClient(
credentials=self._credentials,
client_options=client_options,
@@ -555,9 +655,19 @@ def create_dataset(
) -> Dataset:
"""API call: create the dataset via a POST request.
+
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/datasets/insert
+ Example:
+
+ .. code-block:: python
+
+ from google.cloud import bigquery
+ client = bigquery.Client()
+ dataset = bigquery.Dataset('my_project.my_dataset')
+ dataset = client.create_dataset(dataset)
+
Args:
dataset (Union[ \
google.cloud.bigquery.dataset.Dataset, \
@@ -584,14 +694,6 @@ def create_dataset(
Raises:
google.cloud.exceptions.Conflict:
If the dataset already exists.
-
- Example:
-
- >>> from google.cloud import bigquery
- >>> client = bigquery.Client()
- >>> dataset = bigquery.Dataset('my_project.my_dataset')
- >>> dataset = client.create_dataset(dataset)
-
"""
dataset = self._dataset_from_arg(dataset)
if isinstance(dataset, DatasetReference):
@@ -765,6 +867,7 @@ def get_dataset(
dataset_ref: Union[DatasetReference, str],
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
+ dataset_view: Optional[DatasetView] = None,
) -> Dataset:
"""Fetch the dataset referenced by ``dataset_ref``
@@ -782,7 +885,21 @@ def get_dataset(
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
-
+ dataset_view (Optional[google.cloud.bigquery.enums.DatasetView]):
+ Specifies the view that determines which dataset information is
+ returned. By default, dataset metadata (e.g. friendlyName, description,
+ labels, etc) and ACL information are returned. This argument can
+ take on the following possible enum values.
+
+ * :attr:`~google.cloud.bigquery.enums.DatasetView.ACL`:
+ Includes dataset metadata and the ACL.
+ * :attr:`~google.cloud.bigquery.enums.DatasetView.FULL`:
+ Includes all dataset metadata, including the ACL and table metadata.
+ This view is not supported by the `datasets.list` API method.
+ * :attr:`~google.cloud.bigquery.enums.DatasetView.METADATA`:
+ Includes basic dataset metadata, but not the ACL.
+ * :attr:`~google.cloud.bigquery.enums.DatasetView.DATASET_VIEW_UNSPECIFIED`:
+ The server will decide which view to use. Currently defaults to FULL.
Returns:
google.cloud.bigquery.dataset.Dataset:
A ``Dataset`` instance.
@@ -792,6 +909,12 @@ def get_dataset(
dataset_ref, default_project=self.project
)
path = dataset_ref.path
+
+ if dataset_view:
+ query_params = {"datasetView": dataset_view.value}
+ else:
+ query_params = {}
+
span_attributes = {"path": path}
api_response = self._call_api(
retry,
@@ -800,6 +923,7 @@ def get_dataset(
method="GET",
path=path,
timeout=timeout,
+ query_params=query_params,
)
return Dataset.from_api_repr(api_response)
@@ -810,6 +934,35 @@ def get_iam_policy(
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> Policy:
+ """Return the access control policy for a table resource.
+
+ Args:
+ table (Union[ \
+ google.cloud.bigquery.table.Table, \
+ google.cloud.bigquery.table.TableReference, \
+ google.cloud.bigquery.table.TableListItem, \
+ str, \
+ ]):
+ The table to get the access control policy for.
+ If a string is passed in, this method attempts to create a
+ table reference from a string using
+ :func:`~google.cloud.bigquery.table.TableReference.from_string`.
+ requested_policy_version (int):
+ Optional. The maximum policy version that will be used to format the policy.
+
+ Only version ``1`` is currently supported.
+
+ See: https://cloud.google.com/bigquery/docs/reference/rest/v2/GetPolicyOptions
+ retry (Optional[google.api_core.retry.Retry]):
+ How to retry the RPC.
+ timeout (Optional[float]):
+ The number of seconds to wait for the underlying HTTP transport
+ before using ``retry``.
+
+ Returns:
+ google.api_core.iam.Policy:
+ The access control policy.
+ """
table = _table_arg_to_table_ref(table, default_project=self.project)
if requested_policy_version != 1:
@@ -835,10 +988,56 @@ def set_iam_policy(
self,
table: Union[Table, TableReference, TableListItem, str],
policy: Policy,
- updateMask: str = None,
+ updateMask: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
+ *,
+ fields: Sequence[str] = (),
) -> Policy:
+ """Return the access control policy for a table resource.
+
+ Args:
+ table (Union[ \
+ google.cloud.bigquery.table.Table, \
+ google.cloud.bigquery.table.TableReference, \
+ google.cloud.bigquery.table.TableListItem, \
+ str, \
+ ]):
+ The table to get the access control policy for.
+ If a string is passed in, this method attempts to create a
+ table reference from a string using
+ :func:`~google.cloud.bigquery.table.TableReference.from_string`.
+ policy (google.api_core.iam.Policy):
+ The access control policy to set.
+ updateMask (Optional[str]):
+ Mask as defined by
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask
+
+ Incompatible with ``fields``.
+ retry (Optional[google.api_core.retry.Retry]):
+ How to retry the RPC.
+ timeout (Optional[float]):
+ The number of seconds to wait for the underlying HTTP transport
+ before using ``retry``.
+ fields (Sequence[str]):
+ Which properties to set on the policy. See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables/setIamPolicy#body.request_body.FIELDS.update_mask
+
+ Incompatible with ``updateMask``.
+
+ Returns:
+ google.api_core.iam.Policy:
+ The updated access control policy.
+ """
+ if updateMask is not None and not fields:
+ update_mask = updateMask
+ elif updateMask is not None and fields:
+ raise ValueError("Cannot set both fields and updateMask")
+ elif fields:
+ update_mask = ",".join(fields)
+ else:
+ update_mask = None
+
table = _table_arg_to_table_ref(table, default_project=self.project)
if not isinstance(policy, (Policy)):
@@ -846,8 +1045,8 @@ def set_iam_policy(
body = {"policy": policy.to_api_repr()}
- if updateMask is not None:
- body["updateMask"] = updateMask
+ if update_mask is not None:
+ body["updateMask"] = update_mask
path = "{}:setIamPolicy".format(table.path)
span_attributes = {"path": path}
@@ -1024,6 +1223,7 @@ def update_dataset(
fields: Sequence[str],
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
+ update_mode: Optional[UpdateMode] = None,
) -> Dataset:
"""Change some fields of a dataset.
@@ -1031,6 +1231,19 @@ def update_dataset(
must be provided. If a field is listed in ``fields`` and is ``None`` in
``dataset``, it will be deleted.
+ For example, to update the default expiration times, specify
+ both properties in the ``fields`` argument:
+
+ .. code-block:: python
+
+ bigquery_client.update_dataset(
+ dataset,
+ [
+ "default_partition_expiration_ms",
+ "default_table_expiration_ms",
+ ]
+ )
+
If ``dataset.etag`` is not ``None``, the update will only
succeed if the dataset on the server has the same ETag. Thus
reading a dataset with ``get_dataset``, changing its fields,
@@ -1045,24 +1258,25 @@ def update_dataset(
The properties of ``dataset`` to change. These are strings
corresponding to the properties of
:class:`~google.cloud.bigquery.dataset.Dataset`.
-
- For example, to update the default expiration times, specify
- both properties in the ``fields`` argument:
-
- .. code-block:: python
-
- bigquery_client.update_dataset(
- dataset,
- [
- "default_partition_expiration_ms",
- "default_table_expiration_ms",
- ]
- )
retry (Optional[google.api_core.retry.Retry]):
How to retry the RPC.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
+ update_mode (Optional[google.cloud.bigquery.enums.UpdateMode]):
+ Specifies the kind of information to update in a dataset.
+ By default, dataset metadata (e.g. friendlyName, description,
+ labels, etc) and ACL information are updated. This argument can
+ take on the following possible enum values.
+
+ * :attr:`~google.cloud.bigquery.enums.UPDATE_MODE_UNSPECIFIED`:
+ The default value. Behavior defaults to UPDATE_FULL.
+ * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_METADATA`:
+ Includes metadata information for the dataset, such as friendlyName, description, labels, etc.
+ * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_ACL`:
+ Includes ACL information for the dataset, which defines dataset access for one or more entities.
+ * :attr:`~google.cloud.bigquery.enums.UpdateMode.UPDATE_FULL`:
+ Includes both dataset metadata and ACL information.
Returns:
google.cloud.bigquery.dataset.Dataset:
@@ -1076,6 +1290,11 @@ def update_dataset(
path = dataset.path
span_attributes = {"path": path, "fields": fields}
+ if update_mode:
+ query_params = {"updateMode": update_mode.value}
+ else:
+ query_params = {}
+
api_response = self._call_api(
retry,
span_name="BigQuery.updateDataset",
@@ -1085,6 +1304,7 @@ def update_dataset(
data=partial,
headers=headers,
timeout=timeout,
+ query_params=query_params,
)
return Dataset.from_api_repr(api_response)
@@ -1101,6 +1321,15 @@ def update_model(
must be provided. If a field is listed in ``fields`` and is ``None``
in ``model``, the field value will be deleted.
+ For example, to update the descriptive properties of the model,
+ specify them in the ``fields`` argument:
+
+ .. code-block:: python
+
+ bigquery_client.update_model(
+ model, ["description", "friendly_name"]
+ )
+
If ``model.etag`` is not ``None``, the update will only succeed if
the model on the server has the same ETag. Thus reading a model with
``get_model``, changing its fields, and then passing it to
@@ -1113,15 +1342,6 @@ def update_model(
The properties of ``model`` to change. These are strings
corresponding to the properties of
:class:`~google.cloud.bigquery.model.Model`.
-
- For example, to update the descriptive properties of the model,
- specify them in the ``fields`` argument:
-
- .. code-block:: python
-
- bigquery_client.update_model(
- model, ["description", "friendly_name"]
- )
retry (Optional[google.api_core.retry.Retry]):
A description of how to retry the API call.
timeout (Optional[float]):
@@ -1165,6 +1385,15 @@ def update_routine(
must be provided. If a field is listed in ``fields`` and is ``None``
in ``routine``, the field value will be deleted.
+ For example, to update the description property of the routine,
+ specify it in the ``fields`` argument:
+
+ .. code-block:: python
+
+ bigquery_client.update_routine(
+ routine, ["description"]
+ )
+
.. warning::
During beta, partial updates are not supported. You must provide
all fields in the resource.
@@ -1183,15 +1412,6 @@ def update_routine(
fields (Sequence[str]):
The fields of ``routine`` to change, spelled as the
:class:`~google.cloud.bigquery.routine.Routine` properties.
-
- For example, to update the description property of the routine,
- specify it in the ``fields`` argument:
-
- .. code-block:: python
-
- bigquery_client.update_routine(
- routine, ["description"]
- )
retry (Optional[google.api_core.retry.Retry]):
A description of how to retry the API call.
timeout (Optional[float]):
@@ -1230,6 +1450,7 @@ def update_table(
self,
table: Table,
fields: Sequence[str],
+ autodetect_schema: bool = False,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> Table:
@@ -1239,6 +1460,16 @@ def update_table(
must be provided. If a field is listed in ``fields`` and is ``None``
in ``table``, the field value will be deleted.
+ For example, to update the descriptive properties of the table,
+ specify them in the ``fields`` argument:
+
+ .. code-block:: python
+
+ bigquery_client.update_table(
+ table,
+ ["description", "friendly_name"]
+ )
+
If ``table.etag`` is not ``None``, the update will only succeed if
the table on the server has the same ETag. Thus reading a table with
``get_table``, changing its fields, and then passing it to
@@ -1250,16 +1481,10 @@ def update_table(
fields (Sequence[str]):
The fields of ``table`` to change, spelled as the
:class:`~google.cloud.bigquery.table.Table` properties.
-
- For example, to update the descriptive properties of the table,
- specify them in the ``fields`` argument:
-
- .. code-block:: python
-
- bigquery_client.update_table(
- table,
- ["description", "friendly_name"]
- )
+ autodetect_schema (bool):
+ Specifies if the schema of the table should be autodetected when
+ updating the table from the underlying source. Only applicable
+ for external tables.
retry (Optional[google.api_core.retry.Retry]):
A description of how to retry the API call.
timeout (Optional[float]):
@@ -1279,12 +1504,18 @@ def update_table(
path = table.path
span_attributes = {"path": path, "fields": fields}
+ if autodetect_schema:
+ query_params = {"autodetect_schema": True}
+ else:
+ query_params = {}
+
api_response = self._call_api(
retry,
span_name="BigQuery.updateTable",
span_attributes=span_attributes,
method="PATCH",
path=path,
+ query_params=query_params,
data=partial,
headers=headers,
timeout=timeout,
@@ -1294,11 +1525,11 @@ def update_table(
def list_models(
self,
dataset: Union[Dataset, DatasetReference, DatasetListItem, str],
- max_results: int = None,
- page_token: str = None,
+ max_results: Optional[int] = None,
+ page_token: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
- page_size: int = None,
+ page_size: Optional[int] = None,
) -> page_iterator.Iterator:
"""[Beta] List models in the dataset.
@@ -1371,11 +1602,11 @@ def api_request(*args, **kwargs):
def list_routines(
self,
dataset: Union[Dataset, DatasetReference, DatasetListItem, str],
- max_results: int = None,
- page_token: str = None,
+ max_results: Optional[int] = None,
+ page_token: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
- page_size: int = None,
+ page_size: Optional[int] = None,
) -> page_iterator.Iterator:
"""[Beta] List routines in the dataset.
@@ -1448,11 +1679,11 @@ def api_request(*args, **kwargs):
def list_tables(
self,
dataset: Union[Dataset, DatasetReference, DatasetListItem, str],
- max_results: int = None,
- page_token: str = None,
+ max_results: Optional[int] = None,
+ page_token: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
- page_size: int = None,
+ page_size: Optional[int] = None,
) -> page_iterator.Iterator:
"""List tables in the dataset.
@@ -1648,20 +1879,24 @@ def delete_job_metadata(
:func:`~google.cloud.bigquery.client.Client.cancel_job` instead.
Args:
- job_id: Job or job identifier.
-
- Keyword Arguments:
- project:
+ job_id (Union[ \
+ str, \
+ LoadJob, \
+ CopyJob, \
+ ExtractJob, \
+ QueryJob \
+ ]): Job or job identifier.
+ project (Optional[str]):
ID of the project which owns the job (defaults to the client's project).
- location:
+ location (Optional[str]):
Location where the job was run. Ignored if ``job_id`` is a job
object.
- retry:
+ retry (Optional[google.api_core.retry.Retry]):
How to retry the RPC.
- timeout:
+ timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
- not_found_ok:
+ not_found_ok (Optional[bool]):
Defaults to ``False``. If ``True``, ignore "not found" errors
when deleting the job.
"""
@@ -1807,10 +2042,12 @@ def _get_query_results(
self,
job_id: str,
retry: retries.Retry,
- project: str = None,
- timeout_ms: int = None,
- location: str = None,
+ project: Optional[str] = None,
+ timeout_ms: Optional[int] = None,
+ location: Optional[str] = None,
timeout: TimeoutType = DEFAULT_TIMEOUT,
+ page_size: int = 0,
+ start_index: Optional[int] = None,
) -> _QueryResults:
"""Get the query results object for a query job.
@@ -1829,16 +2066,28 @@ def _get_query_results(
before using ``retry``. If set, this connection timeout may be
increased to a minimum value. This prevents retries on what
would otherwise be a successful response.
+ page_size (Optional[int]):
+ Maximum number of rows in a single response. See maxResults in
+ the jobs.getQueryResults REST API.
+ start_index (Optional[int]):
+ Zero-based index of the starting row. See startIndex in the
+ jobs.getQueryResults REST API.
Returns:
google.cloud.bigquery.query._QueryResults:
A new ``_QueryResults`` instance.
"""
- extra_params: Dict[str, Any] = {"maxResults": 0}
+ extra_params: Dict[str, Any] = {"maxResults": page_size}
if timeout is not None:
- timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)
+ if not isinstance(timeout, (int, float)):
+ timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT
+ else:
+ timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)
+
+ if page_size > 0:
+ extra_params["formatOptions.useInt64Timestamp"] = True
if project is None:
project = self.project
@@ -1852,6 +2101,9 @@ def _get_query_results(
if location is not None:
extra_params["location"] = location
+ if start_index is not None:
+ extra_params["startIndex"] = start_index
+
path = "/projects/{}/queries/{}".format(project, job_id)
# This call is typically made in a polling loop that checks whether the
@@ -1878,7 +2130,8 @@ def job_from_resource(
resource (Dict): one job resource from API response
Returns:
- The job instance, constructed via the resource.
+ Union[job.CopyJob, job.ExtractJob, job.LoadJob, job.QueryJob, job.UnknownJob]:
+ The job instance, constructed via the resource.
"""
config = resource.get("configuration", {})
if "load" in config:
@@ -1898,12 +2151,10 @@ def create_job(
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:
"""Create a new job.
+
Args:
job_config (dict): configuration job representation returned from the API.
-
- Keyword Arguments:
- retry (Optional[google.api_core.retry.Retry]):
- How to retry the RPC.
+ retry (Optional[google.api_core.retry.Retry]): How to retry the RPC.
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
@@ -1938,15 +2189,8 @@ def create_job(
)
destination = _get_sub_prop(job_config, ["copy", "destinationTable"])
destination = TableReference.from_api_repr(destination)
- sources = []
- source_configs = _get_sub_prop(job_config, ["copy", "sourceTables"])
- if source_configs is None:
- source_configs = [_get_sub_prop(job_config, ["copy", "sourceTable"])]
- for source_config in source_configs:
- table_ref = TableReference.from_api_repr(source_config)
- sources.append(table_ref)
return self.copy_table(
- sources,
+ [], # Source table(s) already in job_config resource.
destination,
job_config=typing.cast(CopyJobConfig, copy_job_config),
retry=retry,
@@ -1990,10 +2234,10 @@ def create_job(
def get_job(
self,
job_id: Union[str, job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob],
- project: str = None,
- location: str = None,
+ project: Optional[str] = None,
+ location: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
- timeout: TimeoutType = DEFAULT_TIMEOUT,
+ timeout: TimeoutType = DEFAULT_GET_JOB_TIMEOUT,
) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]:
"""Fetch a job for the project associated with this client.
@@ -2001,10 +2245,14 @@ def get_job(
https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get
Args:
- job_id:
+ job_id (Union[ \
+ str, \
+ job.LoadJob, \
+ job.CopyJob, \
+ job.ExtractJob, \
+ job.QueryJob \
+ ]):
Job identifier.
-
- Keyword Arguments:
project (Optional[str]):
ID of the project which owns the job (defaults to the client's project).
location (Optional[str]):
@@ -2017,7 +2265,8 @@ def get_job(
before using ``retry``.
Returns:
- Job instance, based on the resource returned by the API.
+ Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob, job.UnknownJob]:
+ Job instance, based on the resource returned by the API.
"""
extra_params = {"projection": "full"}
@@ -2053,8 +2302,8 @@ def get_job(
def cancel_job(
self,
job_id: str,
- project: str = None,
- location: str = None,
+ project: Optional[str] = None,
+ location: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> Union[job.LoadJob, job.CopyJob, job.ExtractJob, job.QueryJob]:
@@ -2071,8 +2320,6 @@ def cancel_job(
google.cloud.bigquery.job.ExtractJob, \
google.cloud.bigquery.job.QueryJob \
]): Job identifier.
-
- Keyword Arguments:
project (Optional[str]):
ID of the project which owns the job (defaults to the client's project).
location (Optional[str]):
@@ -2131,17 +2378,17 @@ def cancel_job(
def list_jobs(
self,
- project: str = None,
+ project: Optional[str] = None,
parent_job: Optional[Union[QueryJob, str]] = None,
- max_results: int = None,
- page_token: str = None,
- all_users: bool = None,
- state_filter: str = None,
+ max_results: Optional[int] = None,
+ page_token: Optional[str] = None,
+ all_users: Optional[bool] = None,
+ state_filter: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
- min_creation_time: datetime.datetime = None,
- max_creation_time: datetime.datetime = None,
- page_size: int = None,
+ min_creation_time: Optional[datetime.datetime] = None,
+ max_creation_time: Optional[datetime.datetime] = None,
+ page_size: Optional[int] = None,
) -> page_iterator.Iterator:
"""List jobs for the project associated with this client.
@@ -2247,11 +2494,11 @@ def load_table_from_uri(
self,
source_uris: Union[str, Sequence[str]],
destination: Union[Table, TableReference, TableListItem, str],
- job_id: str = None,
- job_id_prefix: str = None,
- location: str = None,
- project: str = None,
- job_config: LoadJobConfig = None,
+ job_id: Optional[str] = None,
+ job_id_prefix: Optional[str] = None,
+ location: Optional[str] = None,
+ project: Optional[str] = None,
+ job_config: Optional[LoadJobConfig] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> job.LoadJob:
@@ -2274,8 +2521,6 @@ def load_table_from_uri(
in, this method attempts to create a table reference from a
string using
:func:`google.cloud.bigquery.table.TableReference.from_string`.
-
- Keyword Arguments:
job_id (Optional[str]): Name of the job.
job_id_prefix (Optional[str]):
The user-provided prefix for a randomly generated job ID.
@@ -2299,8 +2544,8 @@ def load_table_from_uri(
Raises:
TypeError:
- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
- class.
+ If ``job_config`` is not an instance of
+ :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
"""
job_id = _make_job_id(job_id, job_id_prefix)
@@ -2317,11 +2562,14 @@ def load_table_from_uri(
destination = _table_arg_to_table_ref(destination, default_project=self.project)
- if job_config:
- job_config = copy.deepcopy(job_config)
- _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
+ if job_config is not None:
+ _verify_job_config_type(job_config, LoadJobConfig)
+ else:
+ job_config = job.LoadJobConfig()
- load_job = job.LoadJob(job_ref, source_uris, destination, self, job_config)
+ new_job_config = job_config._fill_from_default(self._default_load_job_config)
+
+ load_job = job.LoadJob(job_ref, source_uris, destination, self, new_job_config)
load_job._begin(retry=retry, timeout=timeout)
return load_job
@@ -2331,13 +2579,13 @@ def load_table_from_file(
file_obj: IO[bytes],
destination: Union[Table, TableReference, TableListItem, str],
rewind: bool = False,
- size: int = None,
+ size: Optional[int] = None,
num_retries: int = _DEFAULT_NUM_RETRIES,
- job_id: str = None,
- job_id_prefix: str = None,
- location: str = None,
- project: str = None,
- job_config: LoadJobConfig = None,
+ job_id: Optional[str] = None,
+ job_id_prefix: Optional[str] = None,
+ location: Optional[str] = None,
+ project: Optional[str] = None,
+ job_config: Optional[LoadJobConfig] = None,
timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,
) -> job.LoadJob:
"""Upload the contents of this table from a file-like object.
@@ -2346,39 +2594,42 @@ def load_table_from_file(
returns a :class:`~google.cloud.bigquery.job.LoadJob`.
Args:
- file_obj:
+ file_obj (IO[bytes]):
A file handle opened in binary mode for reading.
- destination:
+ destination (Union[Table, \
+ TableReference, \
+ TableListItem, \
+ str \
+ ]):
Table into which data is to be loaded. If a string is passed
in, this method attempts to create a table reference from a
string using
:func:`google.cloud.bigquery.table.TableReference.from_string`.
-
- Keyword Arguments:
- rewind:
+ rewind (Optional[bool]):
If True, seek to the beginning of the file handle before
- reading the file.
- size:
+ reading the file. Defaults to False.
+ size (Optional[int]):
The number of bytes to read from the file handle. If size is
``None`` or large, resumable upload will be used. Otherwise,
multipart upload will be used.
- num_retries: Number of upload retries. Defaults to 6.
- job_id: Name of the job.
- job_id_prefix:
+ num_retries (Optional[int]): Number of upload retries. Defaults to 6.
+ job_id (Optional[str]): Name of the job.
+ job_id_prefix (Optional[str]):
The user-provided prefix for a randomly generated job ID.
This parameter will be ignored if a ``job_id`` is also given.
- location:
+ location (Optional[str]):
Location where to run the job. Must match the location of the
destination table.
- project:
+ project (Optional[str]):
Project ID of the project of where to run the job. Defaults
to the client's project.
- job_config:
+ job_config (Optional[LoadJobConfig]):
Extra configuration options for the job.
- timeout:
+ timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``. Depending on the retry strategy, a request
may be repeated several times using the same timeout each time.
+ Defaults to None.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
@@ -2393,8 +2644,8 @@ def load_table_from_file(
mode.
TypeError:
- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
- class.
+ If ``job_config`` is not an instance of
+ :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
"""
job_id = _make_job_id(job_id, job_id_prefix)
@@ -2406,10 +2657,15 @@ def load_table_from_file(
destination = _table_arg_to_table_ref(destination, default_project=self.project)
job_ref = job._JobReference(job_id, project=project, location=location)
- if job_config:
- job_config = copy.deepcopy(job_config)
- _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
- load_job = job.LoadJob(job_ref, None, destination, self, job_config)
+
+ if job_config is not None:
+ _verify_job_config_type(job_config, LoadJobConfig)
+ else:
+ job_config = job.LoadJobConfig()
+
+ new_job_config = job_config._fill_from_default(self._default_load_job_config)
+
+ load_job = job.LoadJob(job_ref, None, destination, self, new_job_config)
job_resource = load_job.to_api_repr()
if rewind:
@@ -2433,14 +2689,14 @@ def load_table_from_file(
def load_table_from_dataframe(
self,
- dataframe: "pandas.DataFrame",
+ dataframe: "pandas.DataFrame", # type: ignore
destination: Union[Table, TableReference, str],
num_retries: int = _DEFAULT_NUM_RETRIES,
- job_id: str = None,
- job_id_prefix: str = None,
- location: str = None,
- project: str = None,
- job_config: LoadJobConfig = None,
+ job_id: Optional[str] = None,
+ job_id_prefix: Optional[str] = None,
+ location: Optional[str] = None,
+ project: Optional[str] = None,
+ job_config: Optional[LoadJobConfig] = None,
parquet_compression: str = "snappy",
timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,
) -> job.LoadJob:
@@ -2461,9 +2717,13 @@ def load_table_from_dataframe(
https://github.com/googleapis/python-bigquery/issues/19
Args:
- dataframe:
+ dataframe (pandas.Dataframe):
A :class:`~pandas.DataFrame` containing the data to load.
- destination:
+ destination (Union[ \
+ Table, \
+ TableReference, \
+ str \
+ ]):
The destination table to use for loading the data. If it is an
existing table, the schema of the :class:`~pandas.DataFrame`
must match the schema of the destination table. If the table
@@ -2473,21 +2733,19 @@ def load_table_from_dataframe(
If a string is passed in, this method attempts to create a
table reference from a string using
:func:`google.cloud.bigquery.table.TableReference.from_string`.
-
- Keyword Arguments:
- num_retries: Number of upload retries.
- job_id: Name of the job.
- job_id_prefix:
+ num_retries (Optional[int]): Number of upload retries. Defaults to 6.
+ job_id (Optional[str]): Name of the job.
+ job_id_prefix (Optional[str]):
The user-provided prefix for a randomly generated
job ID. This parameter will be ignored if a ``job_id`` is
also given.
- location:
+ location (Optional[str]):
Location where to run the job. Must match the location of the
destination table.
- project:
+ project (Optional[str]):
Project ID of the project of where to run the job. Defaults
to the client's project.
- job_config:
+ job_config (Optional[LoadJobConfig]):
Extra configuration options for the job.
To override the default pandas data type conversions, supply
@@ -2504,9 +2762,10 @@ def load_table_from_dataframe(
:attr:`~google.cloud.bigquery.job.SourceFormat.CSV` and
:attr:`~google.cloud.bigquery.job.SourceFormat.PARQUET` are
supported.
- parquet_compression:
+ parquet_compression (Optional[str]):
[Beta] The compression method to use if intermittently
serializing ``dataframe`` to a parquet file.
+ Defaults to "snappy".
The argument is directly passed as the ``compression``
argument to the underlying ``pyarrow.parquet.write_table()``
@@ -2517,10 +2776,11 @@ def load_table_from_dataframe(
passed as the ``compression`` argument to the underlying
``DataFrame.to_parquet()`` method.
https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_parquet.html#pandas.DataFrame.to_parquet
- timeout:
+ timeout (Optional[flaot]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``. Depending on the retry strategy, a request may
be repeated several times using the same timeout each time.
+ Defaults to None.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
@@ -2529,43 +2789,47 @@ def load_table_from_dataframe(
google.cloud.bigquery.job.LoadJob: A new load job.
Raises:
+ ValueError:
+ If a usable parquet engine cannot be found. This method
+ requires :mod:`pyarrow` to be installed.
TypeError:
- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
- class.
+ If ``job_config`` is not an instance of
+ :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
"""
job_id = _make_job_id(job_id, job_id_prefix)
- if job_config:
- _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
- # Make a copy so that the job config isn't modified in-place.
- job_config_properties = copy.deepcopy(job_config._properties)
- job_config = job.LoadJobConfig()
- job_config._properties = job_config_properties
-
+ if job_config is not None:
+ _verify_job_config_type(job_config, LoadJobConfig)
else:
job_config = job.LoadJobConfig()
+ new_job_config = job_config._fill_from_default(self._default_load_job_config)
+
supported_formats = {job.SourceFormat.CSV, job.SourceFormat.PARQUET}
- if job_config.source_format is None:
+ if new_job_config.source_format is None:
# default value
- job_config.source_format = job.SourceFormat.PARQUET
+ new_job_config.source_format = job.SourceFormat.PARQUET
if (
- job_config.source_format == job.SourceFormat.PARQUET
- and job_config.parquet_options is None
+ new_job_config.source_format == job.SourceFormat.PARQUET
+ and new_job_config.parquet_options is None
):
parquet_options = ParquetOptions()
# default value
parquet_options.enable_list_inference = True
- job_config.parquet_options = parquet_options
+ new_job_config.parquet_options = parquet_options
- if job_config.source_format not in supported_formats:
+ if new_job_config.source_format not in supported_formats:
raise ValueError(
"Got unexpected source_format: '{}'. Currently, only PARQUET and CSV are supported".format(
- job_config.source_format
+ new_job_config.source_format
)
)
+ if pyarrow is None and new_job_config.source_format == job.SourceFormat.PARQUET:
+ # pyarrow is now the only supported parquet engine.
+ raise ValueError("This method requires pyarrow to be installed")
+
if location is None:
location = self.location
@@ -2573,8 +2837,8 @@ def load_table_from_dataframe(
# schema, and check if dataframe schema is compatible with it - except
# for WRITE_TRUNCATE jobs, the existing schema does not matter then.
if (
- not job_config.schema
- and job_config.write_disposition != job.WriteDisposition.WRITE_TRUNCATE
+ not new_job_config.schema
+ and new_job_config.write_disposition != job.WriteDisposition.WRITE_TRUNCATE
):
try:
table = self.get_table(destination)
@@ -2585,7 +2849,7 @@ def load_table_from_dataframe(
name
for name, _ in _pandas_helpers.list_columns_and_indexes(dataframe)
)
- job_config.schema = [
+ new_job_config.schema = [
# Field description and policy tags are not needed to
# serialize a data frame.
SchemaField(
@@ -2599,11 +2863,11 @@ def load_table_from_dataframe(
if field.name in columns_and_indexes
]
- job_config.schema = _pandas_helpers.dataframe_to_bq_schema(
- dataframe, job_config.schema
+ new_job_config.schema = _pandas_helpers.dataframe_to_bq_schema(
+ dataframe, new_job_config.schema
)
- if not job_config.schema:
+ if not new_job_config.schema:
# the schema could not be fully detected
warnings.warn(
"Schema could not be detected for all columns. Loading from a "
@@ -2614,20 +2878,19 @@ def load_table_from_dataframe(
)
tmpfd, tmppath = tempfile.mkstemp(
- suffix="_job_{}.{}".format(job_id[:8], job_config.source_format.lower())
+ suffix="_job_{}.{}".format(job_id[:8], new_job_config.source_format.lower())
)
os.close(tmpfd)
try:
-
- if job_config.source_format == job.SourceFormat.PARQUET:
- if job_config.schema:
+ if new_job_config.source_format == job.SourceFormat.PARQUET:
+ if new_job_config.schema:
if parquet_compression == "snappy": # adjust the default value
parquet_compression = parquet_compression.upper()
_pandas_helpers.dataframe_to_parquet(
dataframe,
- job_config.schema,
+ new_job_config.schema,
tmppath,
parquet_compression=parquet_compression,
parquet_use_compliant_nested_type=True,
@@ -2639,13 +2902,12 @@ def load_table_from_dataframe(
compression=parquet_compression,
**(
{"use_compliant_nested_type": True}
- if _helpers.PYARROW_VERSIONS.use_compliant_nested_type
+ if _versions_helpers.PYARROW_VERSIONS.use_compliant_nested_type
else {}
),
)
else:
-
dataframe.to_csv(
tmppath,
index=False,
@@ -2667,7 +2929,7 @@ def load_table_from_dataframe(
job_id_prefix=job_id_prefix,
location=location,
project=project,
- job_config=job_config,
+ job_config=new_job_config,
timeout=timeout,
)
@@ -2679,11 +2941,11 @@ def load_table_from_json(
json_rows: Iterable[Dict[str, Any]],
destination: Union[Table, TableReference, TableListItem, str],
num_retries: int = _DEFAULT_NUM_RETRIES,
- job_id: str = None,
- job_id_prefix: str = None,
- location: str = None,
- project: str = None,
- job_config: LoadJobConfig = None,
+ job_id: Optional[str] = None,
+ job_id_prefix: Optional[str] = None,
+ location: Optional[str] = None,
+ project: Optional[str] = None,
+ job_config: Optional[LoadJobConfig] = None,
timeout: ResumableTimeoutType = DEFAULT_TIMEOUT,
) -> job.LoadJob:
"""Upload the contents of a table from a JSON string or dict.
@@ -2708,32 +2970,36 @@ def load_table_from_json(
client = bigquery.Client()
client.load_table_from_file(data_as_file, ...)
- destination:
+ destination (Union[ \
+ Table, \
+ TableReference, \
+ TableListItem, \
+ str \
+ ]):
Table into which data is to be loaded. If a string is passed
in, this method attempts to create a table reference from a
string using
:func:`google.cloud.bigquery.table.TableReference.from_string`.
-
- Keyword Arguments:
- num_retries: Number of upload retries.
- job_id: Name of the job.
- job_id_prefix:
+ num_retries (Optional[int]): Number of upload retries. Defaults to 6.
+ job_id (Optional[str]): Name of the job.
+ job_id_prefix (Optional[str]):
The user-provided prefix for a randomly generated job ID.
This parameter will be ignored if a ``job_id`` is also given.
- location:
+ location (Optional[str]):
Location where to run the job. Must match the location of the
destination table.
- project:
+ project (Optional[str]):
Project ID of the project of where to run the job. Defaults
to the client's project.
- job_config:
+ job_config (Optional[LoadJobConfig]):
Extra configuration options for the job. The ``source_format``
setting is always set to
:attr:`~google.cloud.bigquery.job.SourceFormat.NEWLINE_DELIMITED_JSON`.
- timeout:
+ timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``. Depending on the retry strategy, a request may
be repeated several times using the same timeout each time.
+ Defaults to None.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
@@ -2743,22 +3009,36 @@ def load_table_from_json(
Raises:
TypeError:
- If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.LoadJobConfig`
- class.
+ If ``job_config`` is not an instance of
+ :class:`~google.cloud.bigquery.job.LoadJobConfig` class.
"""
job_id = _make_job_id(job_id, job_id_prefix)
- if job_config:
- _verify_job_config_type(job_config, google.cloud.bigquery.job.LoadJobConfig)
- # Make a copy so that the job config isn't modified in-place.
- job_config = copy.deepcopy(job_config)
+ if job_config is not None:
+ _verify_job_config_type(job_config, LoadJobConfig)
else:
job_config = job.LoadJobConfig()
- job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON
+ new_job_config = job_config._fill_from_default(self._default_load_job_config)
- if job_config.schema is None:
- job_config.autodetect = True
+ new_job_config.source_format = job.SourceFormat.NEWLINE_DELIMITED_JSON
+
+ # In specific conditions, we check if the table alread exists, and/or
+ # set the autodetect value for the user. For exact conditions, see table
+ # https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
+ if new_job_config.schema is None and new_job_config.autodetect is None:
+ if new_job_config.write_disposition in (
+ job.WriteDisposition.WRITE_TRUNCATE,
+ job.WriteDisposition.WRITE_EMPTY,
+ ):
+ new_job_config.autodetect = True
+ else:
+ try:
+ self.get_table(destination)
+ except core_exceptions.NotFound:
+ new_job_config.autodetect = True
+ else:
+ new_job_config.autodetect = False
if project is None:
project = self.project
@@ -2780,7 +3060,7 @@ def load_table_from_json(
job_id_prefix=job_id_prefix,
location=location,
project=project,
- job_config=job_config,
+ job_config=new_job_config,
timeout=timeout,
)
@@ -2795,23 +3075,19 @@ def _do_resumable_upload(
"""Perform a resumable upload.
Args:
- stream: A bytes IO object open for reading.
-
- metadata: The metadata associated with the upload.
-
- num_retries:
+ stream (IO[bytes]): A bytes IO object open for reading.
+ metadata (Mapping[str, str]): The metadata associated with the upload.
+ num_retries (int):
Number of upload retries. (Deprecated: This
argument will be removed in a future release.)
-
- timeout:
+ timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``. Depending on the retry strategy, a request may
be repeated several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
-
- project:
+ project (Optional[str]):
Project ID of the project of where to run the upload. Defaults
to the client's project.
@@ -2839,23 +3115,19 @@ def _initiate_resumable_upload(
"""Initiate a resumable upload.
Args:
- stream: A bytes IO object open for reading.
-
- metadata: The metadata associated with the upload.
-
- num_retries:
+ stream (IO[bytes]): A bytes IO object open for reading.
+ metadata (Mapping[str, str]): The metadata associated with the upload.
+ num_retries (int):
Number of upload retries. (Deprecated: This
argument will be removed in a future release.)
-
- timeout:
+ timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``. Depending on the retry strategy, a request may
be repeated several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
-
- project:
+ project (Optional[str]):
Project ID of the project of where to run the upload. Defaults
to the client's project.
@@ -2915,28 +3187,23 @@ def _do_multipart_upload(
"""Perform a multipart upload.
Args:
- stream: A bytes IO object open for reading.
-
- metadata: The metadata associated with the upload.
-
- size:
+ stream (IO[bytes]): A bytes IO object open for reading.
+ metadata (Mapping[str, str]): The metadata associated with the upload.
+ size (int):
The number of bytes to be uploaded (which will be read
from ``stream``). If not provided, the upload will be
concluded once ``stream`` is exhausted (or :data:`None`).
-
- num_retries:
+ num_retries (int):
Number of upload retries. (Deprecated: This
argument will be removed in a future release.)
-
- timeout:
+ timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``. Depending on the retry strategy, a request may
be repeated several times using the same timeout each time.
Can also be passed as a tuple (connect_timeout, read_timeout).
See :meth:`requests.Session.request` documentation for details.
-
- project:
+ project (Optional[str]):
Project ID of the project of where to run the upload. Defaults
to the client's project.
@@ -2992,11 +3259,11 @@ def copy_table(
Sequence[Union[Table, TableReference, TableListItem, str]],
],
destination: Union[Table, TableReference, TableListItem, str],
- job_id: str = None,
- job_id_prefix: str = None,
- location: str = None,
- project: str = None,
- job_config: CopyJobConfig = None,
+ job_id: Optional[str] = None,
+ job_id_prefix: Optional[str] = None,
+ location: Optional[str] = None,
+ project: Optional[str] = None,
+ job_config: Optional[CopyJobConfig] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> job.CopyJob:
@@ -3028,8 +3295,6 @@ def copy_table(
str, \
]):
Table into which data is to be copied.
-
- Keyword Arguments:
job_id (Optional[str]): The ID of the job.
job_id_prefix (Optional[str]):
The user-provided prefix for a randomly generated job ID.
@@ -3098,11 +3363,11 @@ def extract_table(
self,
source: Union[Table, TableReference, TableListItem, Model, ModelReference, str],
destination_uris: Union[str, Sequence[str]],
- job_id: str = None,
- job_id_prefix: str = None,
- location: str = None,
- project: str = None,
- job_config: ExtractJobConfig = None,
+ job_id: Optional[str] = None,
+ job_id_prefix: Optional[str] = None,
+ location: Optional[str] = None,
+ project: Optional[str] = None,
+ job_config: Optional[ExtractJobConfig] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
source_type: str = "Table",
@@ -3126,8 +3391,6 @@ def extract_table(
URIs of Cloud Storage file(s) into which table data is to be
extracted; in format
``gs:///``.
-
- Keyword Arguments:
job_id (Optional[str]): The ID of the job.
job_id_prefix (Optional[str]):
The user-provided prefix for a randomly generated job ID.
@@ -3197,14 +3460,14 @@ def extract_table(
def query(
self,
query: str,
- job_config: QueryJobConfig = None,
- job_id: str = None,
- job_id_prefix: str = None,
- location: str = None,
- project: str = None,
+ job_config: Optional[QueryJobConfig] = None,
+ job_id: Optional[str] = None,
+ job_id_prefix: Optional[str] = None,
+ location: Optional[str] = None,
+ project: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
- job_retry: retries.Retry = DEFAULT_JOB_RETRY,
+ job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY,
api_method: Union[str, enums.QueryApiMethod] = enums.QueryApiMethod.INSERT,
) -> job.QueryJob:
"""Run a SQL query.
@@ -3216,8 +3479,6 @@ def query(
query (str):
SQL query to be executed. Defaults to the standard SQL
dialect. Use the ``job_config`` parameter to change dialects.
-
- Keyword Arguments:
job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
Extra configuration options for the job.
To override any options that were previously set in
@@ -3258,7 +3519,8 @@ def query(
specified here becomes the default ``job_retry`` for
``result()``, where it can also be specified.
api_method (Union[str, enums.QueryApiMethod]):
- Method with which to start the query job.
+ Method with which to start the query job. By default,
+ the jobs.insert API is used for starting a query.
See :class:`google.cloud.bigquery.enums.QueryApiMethod` for
details on the difference between the query start methods.
@@ -3273,18 +3535,9 @@ def query(
class, or if both ``job_id`` and non-``None`` non-default
``job_retry`` are provided.
"""
- job_id_given = job_id is not None
- if (
- job_id_given
- and job_retry is not None
- and job_retry is not DEFAULT_JOB_RETRY
- ):
- raise TypeError(
- "`job_retry` was provided, but the returned job is"
- " not retryable, because a custom `job_id` was"
- " provided."
- )
+ _job_helpers.validate_job_retry(job_id, job_retry)
+ job_id_given = job_id is not None
if job_id_given and api_method == enums.QueryApiMethod.QUERY:
raise TypeError(
"`job_id` was provided, but the 'QUERY' `api_method` was requested."
@@ -3296,26 +3549,12 @@ def query(
if location is None:
location = self.location
- if self._default_query_job_config:
- if job_config:
- _verify_job_config_type(
- job_config, google.cloud.bigquery.job.QueryJobConfig
- )
- # anything that's not defined on the incoming
- # that is in the default,
- # should be filled in with the default
- # the incoming therefore has precedence
- #
- # Note that _fill_from_default doesn't mutate the receiver
- job_config = job_config._fill_from_default(
- self._default_query_job_config
- )
- else:
- _verify_job_config_type(
- self._default_query_job_config,
- google.cloud.bigquery.job.QueryJobConfig,
- )
- job_config = self._default_query_job_config
+ if job_config is not None:
+ _verify_job_config_type(job_config, QueryJobConfig)
+
+ job_config = _job_helpers.job_config_with_defaults(
+ job_config, self._default_query_job_config
+ )
# Note that we haven't modified the original job_config (or
# _default_query_job_config) up to this point.
@@ -3346,18 +3585,161 @@ def query(
else:
raise ValueError(f"Got unexpected value for api_method: {repr(api_method)}")
+ def query_and_wait(
+ self,
+ query,
+ *,
+ job_config: Optional[QueryJobConfig] = None,
+ location: Optional[str] = None,
+ project: Optional[str] = None,
+ api_timeout: TimeoutType = DEFAULT_TIMEOUT,
+ wait_timeout: Union[Optional[float], object] = POLLING_DEFAULT_VALUE,
+ retry: retries.Retry = DEFAULT_RETRY,
+ job_retry: retries.Retry = DEFAULT_JOB_RETRY,
+ page_size: Optional[int] = None,
+ max_results: Optional[int] = None,
+ ) -> RowIterator:
+ """Run the query, wait for it to finish, and return the results.
+
+ Args:
+ query (str):
+ SQL query to be executed. Defaults to the standard SQL
+ dialect. Use the ``job_config`` parameter to change dialects.
+ job_config (Optional[google.cloud.bigquery.job.QueryJobConfig]):
+ Extra configuration options for the job.
+ To override any options that were previously set in
+ the ``default_query_job_config`` given to the
+ ``Client`` constructor, manually set those options to ``None``,
+ or whatever value is preferred.
+ location (Optional[str]):
+ Location where to run the job. Must match the location of the
+ table used in the query as well as the destination table.
+ project (Optional[str]):
+ Project ID of the project of where to run the job. Defaults
+ to the client's project.
+ api_timeout (Optional[float]):
+ The number of seconds to wait for the underlying HTTP transport
+ before using ``retry``.
+ wait_timeout (Optional[Union[float, object]]):
+ The number of seconds to wait for the query to finish. If the
+ query doesn't finish before this timeout, the client attempts
+ to cancel the query. If unset, the underlying REST API calls
+ have timeouts, but we still wait indefinitely for the job to
+ finish.
+ retry (Optional[google.api_core.retry.Retry]):
+ How to retry the RPC. This only applies to making RPC
+ calls. It isn't used to retry failed jobs. This has
+ a reasonable default that should only be overridden
+ with care.
+ job_retry (Optional[google.api_core.retry.Retry]):
+ How to retry failed jobs. The default retries
+ rate-limit-exceeded errors. Passing ``None`` disables
+ job retry. Not all jobs can be retried.
+ page_size (Optional[int]):
+ The maximum number of rows in each page of results from the
+ initial jobs.query request. Non-positive values are ignored.
+ max_results (Optional[int]):
+ The maximum total number of rows from this request.
+
+ Returns:
+ google.cloud.bigquery.table.RowIterator:
+ Iterator of row data
+ :class:`~google.cloud.bigquery.table.Row`-s. During each
+ page, the iterator will have the ``total_rows`` attribute
+ set, which counts the total number of rows **in the result
+ set** (this is distinct from the total number of rows in the
+ current page: ``iterator.page.num_items``).
+
+ If the query is a special query that produces no results, e.g.
+ a DDL query, an ``_EmptyRowIterator`` instance is returned.
+
+ Raises:
+ TypeError:
+ If ``job_config`` is not an instance of
+ :class:`~google.cloud.bigquery.job.QueryJobConfig`
+ class.
+ """
+ return self._query_and_wait_bigframes(
+ query,
+ job_config=job_config,
+ location=location,
+ project=project,
+ api_timeout=api_timeout,
+ wait_timeout=wait_timeout,
+ retry=retry,
+ job_retry=job_retry,
+ page_size=page_size,
+ max_results=max_results,
+ )
+
+ def _query_and_wait_bigframes(
+ self,
+ query,
+ *,
+ job_config: Optional[QueryJobConfig] = None,
+ location: Optional[str] = None,
+ project: Optional[str] = None,
+ api_timeout: TimeoutType = DEFAULT_TIMEOUT,
+ wait_timeout: Union[Optional[float], object] = POLLING_DEFAULT_VALUE,
+ retry: retries.Retry = DEFAULT_RETRY,
+ job_retry: retries.Retry = DEFAULT_JOB_RETRY,
+ page_size: Optional[int] = None,
+ max_results: Optional[int] = None,
+ callback: Callable = lambda _: None,
+ ) -> RowIterator:
+ """See query_and_wait.
+
+ This method has an extra callback parameter, which is used by bigframes
+ to create better progress bars.
+ """
+ if project is None:
+ project = self.project
+
+ if location is None:
+ location = self.location
+
+ if job_config is not None:
+ _verify_job_config_type(job_config, QueryJobConfig)
+
+ job_config = _job_helpers.job_config_with_defaults(
+ job_config, self._default_query_job_config
+ )
+
+ return _job_helpers.query_and_wait(
+ self,
+ query,
+ job_config=job_config,
+ location=location,
+ project=project,
+ api_timeout=api_timeout,
+ wait_timeout=wait_timeout,
+ retry=retry,
+ job_retry=job_retry,
+ page_size=page_size,
+ max_results=max_results,
+ callback=callback,
+ )
+
def insert_rows(
self,
table: Union[Table, TableReference, str],
- rows: Union[Iterable[Tuple], Iterable[Dict]],
- selected_fields: Sequence[SchemaField] = None,
+ rows: Union[Iterable[Tuple], Iterable[Mapping[str, Any]]],
+ selected_fields: Optional[Sequence[SchemaField]] = None,
**kwargs,
- ) -> Sequence[dict]:
+ ) -> Sequence[Dict[str, Any]]:
"""Insert rows into a table via the streaming API.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll
+ BigQuery will reject insertAll payloads that exceed a defined limit (10MB).
+ Additionally, if a payload vastly exceeds this limit, the request is rejected
+ by the intermediate architecture, which returns a 413 (Payload Too Large) status code.
+
+
+ See
+ https://cloud.google.com/bigquery/quotas#streaming_inserts
+
Args:
table (Union[ \
google.cloud.bigquery.table.Table, \
@@ -3418,12 +3800,19 @@ def insert_rows_from_dataframe(
self,
table: Union[Table, TableReference, str],
dataframe,
- selected_fields: Sequence[SchemaField] = None,
+ selected_fields: Optional[Sequence[SchemaField]] = None,
chunk_size: int = 500,
**kwargs: Dict,
) -> Sequence[Sequence[dict]]:
"""Insert rows into a table from a dataframe via the streaming API.
+ BigQuery will reject insertAll payloads that exceed a defined limit (10MB).
+ Additionally, if a payload vastly exceeds this limit, the request is rejected
+ by the intermediate architecture, which returns a 413 (Payload Too Large) status code.
+
+ See
+ https://cloud.google.com/bigquery/quotas#streaming_inserts
+
Args:
table (Union[ \
google.cloud.bigquery.table.Table, \
@@ -3470,13 +3859,13 @@ def insert_rows_from_dataframe(
def insert_rows_json(
self,
table: Union[Table, TableReference, TableListItem, str],
- json_rows: Sequence[Dict],
+ json_rows: Sequence[Mapping[str, Any]],
row_ids: Union[
Iterable[Optional[str]], AutoRowIDs, None
] = AutoRowIDs.GENERATE_UUID,
- skip_invalid_rows: bool = None,
- ignore_unknown_values: bool = None,
- template_suffix: str = None,
+ skip_invalid_rows: Optional[bool] = None,
+ ignore_unknown_values: Optional[bool] = None,
+ template_suffix: Optional[str] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> Sequence[dict]:
@@ -3485,6 +3874,13 @@ def insert_rows_json(
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tabledata/insertAll
+ BigQuery will reject insertAll payloads that exceed a defined limit (10MB).
+ Additionally, if a payload vastly exceeds this limit, the request is rejected
+ by the intermediate architecture, which returns a 413 (Payload Too Large) status code.
+
+ See
+ https://cloud.google.com/bigquery/quotas#streaming_inserts
+
Args:
table (Union[ \
google.cloud.bigquery.table.Table \
@@ -3659,11 +4055,11 @@ def list_partitions(
def list_rows(
self,
table: Union[Table, TableListItem, TableReference, str],
- selected_fields: Sequence[SchemaField] = None,
- max_results: int = None,
- page_token: str = None,
- start_index: int = None,
- page_size: int = None,
+ selected_fields: Optional[Sequence[SchemaField]] = None,
+ max_results: Optional[int] = None,
+ page_token: Optional[str] = None,
+ start_index: Optional[int] = None,
+ page_size: Optional[int] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
) -> RowIterator:
@@ -3762,6 +4158,8 @@ def list_rows(
# tables can be fetched without a column filter.
selected_fields=selected_fields,
total_rows=getattr(table, "num_rows", None),
+ project=table.project,
+ location=table.location,
)
return row_iterator
@@ -3770,14 +4168,23 @@ def _list_rows_from_query_results(
job_id: str,
location: str,
project: str,
- schema: SchemaField,
- total_rows: int = None,
- destination: Union[Table, TableReference, TableListItem, str] = None,
- max_results: int = None,
- start_index: int = None,
- page_size: int = None,
+ schema: Sequence[SchemaField],
+ total_rows: Optional[int] = None,
+ destination: Optional[Union[Table, TableReference, TableListItem, str]] = None,
+ max_results: Optional[int] = None,
+ start_index: Optional[int] = None,
+ page_size: Optional[int] = None,
retry: retries.Retry = DEFAULT_RETRY,
timeout: TimeoutType = DEFAULT_TIMEOUT,
+ query_id: Optional[str] = None,
+ first_page_response: Optional[Dict[str, Any]] = None,
+ num_dml_affected_rows: Optional[int] = None,
+ query: Optional[str] = None,
+ total_bytes_processed: Optional[int] = None,
+ slot_millis: Optional[int] = None,
+ created: Optional[datetime.datetime] = None,
+ started: Optional[datetime.datetime] = None,
+ ended: Optional[datetime.datetime] = None,
) -> RowIterator:
"""List the rows of a completed query.
See
@@ -3817,6 +4224,27 @@ def _list_rows_from_query_results(
would otherwise be a successful response.
If multiple requests are made under the hood, ``timeout``
applies to each individual request.
+ query_id (Optional[str]):
+ [Preview] ID of a completed query. This ID is auto-generated
+ and not guaranteed to be populated.
+ first_page_response (Optional[dict]):
+ API response for the first page of results (if available).
+ num_dml_affected_rows (Optional[int]):
+ If this RowIterator is the result of a DML query, the number of
+ rows that were affected.
+ query (Optional[str]):
+ The query text used.
+ total_bytes_processed (Optional[int]):
+ total bytes processed from job statistics, if present.
+ slot_millis (Optional[int]):
+ Number of slot ms the user is actually billed for.
+ created (Optional[datetime.datetime]):
+ Datetime at which the job was created.
+ started (Optional[datetime.datetime]):
+ Datetime at which the job was started.
+ ended (Optional[datetime.datetime]):
+ Datetime at which the job finished.
+
Returns:
google.cloud.bigquery.table.RowIterator:
Iterator of row data
@@ -3828,7 +4256,10 @@ def _list_rows_from_query_results(
}
if timeout is not None:
- timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)
+ if not isinstance(timeout, (int, float)):
+ timeout = _MIN_GET_QUERY_RESULTS_TIMEOUT
+ else:
+ timeout = max(timeout, _MIN_GET_QUERY_RESULTS_TIMEOUT)
if start_index is not None:
params["startIndex"] = start_index
@@ -3844,6 +4275,18 @@ def _list_rows_from_query_results(
table=destination,
extra_params=params,
total_rows=total_rows,
+ project=project,
+ location=location,
+ job_id=job_id,
+ query_id=query_id,
+ first_page_response=first_page_response,
+ num_dml_affected_rows=num_dml_affected_rows,
+ query=query,
+ total_bytes_processed=total_bytes_processed,
+ slot_millis=slot_millis,
+ created=created,
+ started=started,
+ ended=ended,
)
return row_iterator
@@ -3863,12 +4306,13 @@ def _schema_to_json_file_object(self, schema_list, file_obj):
"""
json.dump(schema_list, file_obj, indent=2, sort_keys=True)
- def schema_from_json(self, file_or_path: "PathType"):
+ def schema_from_json(self, file_or_path: "PathType") -> List[SchemaField]:
"""Takes a file object or file path that contains json that describes
a table schema.
Returns:
- List of schema field objects.
+ List[SchemaField]:
+ List of :class:`~google.cloud.bigquery.schema.SchemaField` objects.
"""
if isinstance(file_or_path, io.IOBase):
return self._schema_from_json_file_object(file_or_path)
diff --git a/google/cloud/bigquery/dataset.py b/google/cloud/bigquery/dataset.py
index c30204067..878b77d41 100644
--- a/google/cloud/bigquery/dataset.py
+++ b/google/cloud/bigquery/dataset.py
@@ -17,8 +17,10 @@
from __future__ import absolute_import
import copy
+import json
import typing
+from typing import Optional, List, Dict, Any, Union
import google.cloud._helpers # type: ignore
@@ -27,8 +29,7 @@
from google.cloud.bigquery.routine import Routine, RoutineReference
from google.cloud.bigquery.table import Table, TableReference
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
-
-from typing import Optional, List, Dict, Any, Union
+from google.cloud.bigquery import external_config
def _get_table_reference(self, table_id: str) -> TableReference:
@@ -92,7 +93,7 @@ class DatasetReference(object):
ValueError: If either argument is not of type ``str``.
"""
- def __init__(self, project, dataset_id):
+ def __init__(self, project: str, dataset_id: str):
if not isinstance(project, str):
raise ValueError("Pass a string for project")
if not isinstance(dataset_id, str):
@@ -139,7 +140,7 @@ def from_api_repr(cls, resource: dict) -> "DatasetReference":
@classmethod
def from_string(
- cls, dataset_id: str, default_project: str = None
+ cls, dataset_id: str, default_project: Optional[str] = None
) -> "DatasetReference":
"""Construct a dataset reference from dataset ID string.
@@ -166,22 +167,24 @@ def from_string(
standard SQL format.
"""
output_dataset_id = dataset_id
- output_project_id = default_project
parts = _helpers._split_id(dataset_id)
- if len(parts) == 1 and not default_project:
- raise ValueError(
- "When default_project is not set, dataset_id must be a "
- "fully-qualified dataset ID in standard SQL format, "
- 'e.g., "project.dataset_id" got {}'.format(dataset_id)
- )
+ if len(parts) == 1:
+ if default_project is not None:
+ output_project_id = default_project
+ else:
+ raise ValueError(
+ "When default_project is not set, dataset_id must be a "
+ "fully-qualified dataset ID in standard SQL format, "
+ 'e.g., "project.dataset_id" got {}'.format(dataset_id)
+ )
elif len(parts) == 2:
output_project_id, output_dataset_id = parts
- elif len(parts) > 2:
+ else:
raise ValueError(
"Too many parts in dataset_id. Expected a fully-qualified "
- "dataset ID in standard SQL format. e.g. "
- '"project.dataset_id", got {}'.format(dataset_id)
+ "dataset ID in standard SQL format, "
+ 'e.g. "project.dataset_id", got {}'.format(dataset_id)
)
return cls(output_project_id, output_dataset_id)
@@ -215,6 +218,9 @@ def __ne__(self, other):
def __hash__(self):
return hash(self._key())
+ def __str__(self):
+ return f"{self.project}.{self._dataset_id}"
+
def __repr__(self):
return "DatasetReference{}".format(self._key())
@@ -293,12 +299,15 @@ def __init__(
role: Optional[str] = None,
entity_type: Optional[str] = None,
entity_id: Optional[Union[Dict[str, Any], str]] = None,
+ **kwargs,
):
- self._properties = {}
+ self._properties: Dict[str, Any] = {}
if entity_type is not None:
self._properties[entity_type] = entity_id
self._properties["role"] = role
- self._entity_type = entity_type
+ self._entity_type: Optional[str] = entity_type
+ for prop, val in kwargs.items():
+ setattr(self, prop, val)
@property
def role(self) -> Optional[str]:
@@ -325,6 +334,9 @@ def dataset(self, value):
if isinstance(value, str):
value = DatasetReference.from_string(value).to_api_repr()
+ if isinstance(value, DatasetReference):
+ value = value.to_api_repr()
+
if isinstance(value, (Dataset, DatasetListItem)):
value = value.reference.to_api_repr()
@@ -432,27 +444,89 @@ def special_group(self) -> Optional[str]:
def special_group(self, value):
self._properties["specialGroup"] = value
+ @property
+ def condition(self) -> Optional["Condition"]:
+ """Optional[Condition]: The IAM condition associated with this entry."""
+ value = typing.cast(Dict[str, Any], self._properties.get("condition"))
+ return Condition.from_api_repr(value) if value else None
+
+ @condition.setter
+ def condition(self, value: Union["Condition", dict, None]):
+ """Set the IAM condition for this entry."""
+ if value is None:
+ self._properties["condition"] = None
+ elif isinstance(value, Condition):
+ self._properties["condition"] = value.to_api_repr()
+ elif isinstance(value, dict):
+ self._properties["condition"] = value
+ else:
+ raise TypeError("condition must be a Condition object, dict, or None")
+
@property
def entity_type(self) -> Optional[str]:
"""The entity_type of the entry."""
+
+ # The api_repr for an AccessEntry object is expected to be a dict with
+ # only a few keys. Two keys that may be present are role and condition.
+ # Any additional key is going to have one of ~eight different names:
+ # userByEmail, groupByEmail, domain, dataset, specialGroup, view,
+ # routine, iamMember
+
+ # if self._entity_type is None, see if it needs setting
+ # i.e. is there a key: value pair that should be associated with
+ # entity_type and entity_id?
+ if self._entity_type is None:
+ resource = self._properties.copy()
+ # we are empyting the dict to get to the last `key: value`` pair
+ # so we don't keep these first entries
+ _ = resource.pop("role", None)
+ _ = resource.pop("condition", None)
+
+ try:
+ # we only need entity_type, because entity_id gets set elsewhere.
+ entity_type, _ = resource.popitem()
+ except KeyError:
+ entity_type = None
+
+ self._entity_type = entity_type
+
return self._entity_type
@property
def entity_id(self) -> Optional[Union[Dict[str, Any], str]]:
"""The entity_id of the entry."""
- return self._properties.get(self._entity_type) if self._entity_type else None
+ if self.entity_type:
+ entity_type = self.entity_type
+ else:
+ return None
+ return typing.cast(
+ Optional[Union[Dict[str, Any], str]],
+ self._properties.get(entity_type, None),
+ )
def __eq__(self, other):
if not isinstance(other, AccessEntry):
return NotImplemented
- return self._key() == other._key()
+ return (
+ self.role == other.role
+ and self.entity_type == other.entity_type
+ and self._normalize_entity_id(self.entity_id)
+ == self._normalize_entity_id(other.entity_id)
+ and self.condition == other.condition
+ )
+
+ @staticmethod
+ def _normalize_entity_id(value):
+ """Ensure consistent equality for dicts like 'view'."""
+ if isinstance(value, dict):
+ return json.dumps(value, sort_keys=True)
+ return value
def __ne__(self, other):
return not self == other
def __repr__(self):
-
- return f""
+ return f""
def _key(self):
"""A tuple key that uniquely describes this field.
@@ -460,9 +534,18 @@ def _key(self):
Returns:
Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`.
"""
+
properties = self._properties.copy()
+
+ # Dicts are not hashable.
+ # Convert condition to a hashable datatype(s)
+ condition = properties.get("condition")
+ if isinstance(condition, dict):
+ condition_key = tuple(sorted(condition.items()))
+ properties["condition"] = condition_key
+
prop_tup = tuple(sorted(properties.items()))
- return (self.role, self._entity_type, self.entity_id, prop_tup)
+ return (self.role, self.entity_type, self.entity_id, prop_tup)
def __hash__(self):
return hash(self._key())
@@ -487,21 +570,10 @@ def from_api_repr(cls, resource: dict) -> "AccessEntry":
Returns:
google.cloud.bigquery.dataset.AccessEntry:
Access entry parsed from ``resource``.
-
- Raises:
- ValueError:
- If the resource has more keys than ``role`` and one additional
- key.
"""
- entry = resource.copy()
- role = entry.pop("role", None)
- entity_type, entity_id = entry.popitem()
- if len(entry) != 0:
- raise ValueError("Entry has unexpected keys remaining.", entry)
-
- config = cls(role, entity_type, entity_id)
- config._properties = copy.deepcopy(resource)
- return config
+ access_entry = cls()
+ access_entry._properties = resource.copy()
+ return access_entry
class Dataset(object):
@@ -515,6 +587,10 @@ class Dataset(object):
A pointer to a dataset. If ``dataset_ref`` is a string, it must
include both the project ID and the dataset ID, separated by
``.``.
+
+ Note:
+ Fields marked as "Output Only" are populated by the server and will only be
+ available after calling :meth:`google.cloud.bigquery.client.Client.get_dataset`.
"""
_PROPERTY_TO_API_FIELD = {
@@ -524,6 +600,13 @@ class Dataset(object):
"default_table_expiration_ms": "defaultTableExpirationMs",
"friendly_name": "friendlyName",
"default_encryption_configuration": "defaultEncryptionConfiguration",
+ "is_case_insensitive": "isCaseInsensitive",
+ "storage_billing_model": "storageBillingModel",
+ "max_time_travel_hours": "maxTimeTravelHours",
+ "default_rounding_mode": "defaultRoundingMode",
+ "resource_tags": "resourceTags",
+ "external_catalog_dataset_options": "externalCatalogDatasetOptions",
+ "access_policy_version": "accessPolicyVersion",
}
def __init__(self, dataset_ref) -> None:
@@ -531,6 +614,65 @@ def __init__(self, dataset_ref) -> None:
dataset_ref = DatasetReference.from_string(dataset_ref)
self._properties = {"datasetReference": dataset_ref.to_api_repr(), "labels": {}}
+ @property
+ def max_time_travel_hours(self):
+ """
+ Optional[int]: Defines the time travel window in hours. The value can
+ be from 48 to 168 hours (2 to 7 days), and in multiple of 24 hours
+ (48, 72, 96, 120, 144, 168).
+ The default value is 168 hours if this is not set.
+ """
+ return self._properties.get("maxTimeTravelHours")
+
+ @max_time_travel_hours.setter
+ def max_time_travel_hours(self, hours):
+ if not isinstance(hours, int):
+ raise ValueError(f"max_time_travel_hours must be an integer. Got {hours}")
+ if hours < 2 * 24 or hours > 7 * 24:
+ raise ValueError(
+ "Time Travel Window should be from 48 to 168 hours (2 to 7 days)"
+ )
+ if hours % 24 != 0:
+ raise ValueError("Time Travel Window should be multiple of 24")
+ self._properties["maxTimeTravelHours"] = hours
+
+ @property
+ def default_rounding_mode(self):
+ """Union[str, None]: defaultRoundingMode of the dataset as set by the user
+ (defaults to :data:`None`).
+
+ Set the value to one of ``'ROUND_HALF_AWAY_FROM_ZERO'``, ``'ROUND_HALF_EVEN'``, or
+ ``'ROUNDING_MODE_UNSPECIFIED'``.
+
+ See `default rounding mode
+ `_
+ in REST API docs and `updating the default rounding model
+ `_
+ guide.
+
+ Raises:
+ ValueError: for invalid value types.
+ """
+ return self._properties.get("defaultRoundingMode")
+
+ @default_rounding_mode.setter
+ def default_rounding_mode(self, value):
+ possible_values = [
+ "ROUNDING_MODE_UNSPECIFIED",
+ "ROUND_HALF_AWAY_FROM_ZERO",
+ "ROUND_HALF_EVEN",
+ ]
+ if not isinstance(value, str) and value is not None:
+ raise ValueError("Pass a string, or None")
+ if value is None:
+ self._properties["defaultRoundingMode"] = "ROUNDING_MODE_UNSPECIFIED"
+ if value not in possible_values and value is not None:
+ raise ValueError(
+ f'rounding mode needs to be one of {",".join(possible_values)}'
+ )
+ if value:
+ self._properties["defaultRoundingMode"] = value
+
@property
def project(self):
"""str: Project ID of the project bound to the dataset."""
@@ -567,7 +709,7 @@ def access_entries(self, value):
@property
def created(self):
- """Union[datetime.datetime, None]: Datetime at which the dataset was
+ """Union[datetime.datetime, None]: Output only. Datetime at which the dataset was
created (:data:`None` until set from the server).
"""
creation_time = self._properties.get("creationTime")
@@ -584,8 +726,8 @@ def dataset_id(self):
@property
def full_dataset_id(self):
- """Union[str, None]: ID for the dataset resource (:data:`None` until
- set from the server)
+ """Union[str, None]: Output only. ID for the dataset resource
+ (:data:`None` until set from the server).
In the format ``project_id:dataset_id``.
"""
@@ -600,14 +742,14 @@ def reference(self):
@property
def etag(self):
- """Union[str, None]: ETag for the dataset resource (:data:`None` until
- set from the server).
+ """Union[str, None]: Output only. ETag for the dataset resource
+ (:data:`None` until set from the server).
"""
return self._properties.get("etag")
@property
def modified(self):
- """Union[datetime.datetime, None]: Datetime at which the dataset was
+ """Union[datetime.datetime, None]: Output only. Datetime at which the dataset was
last modified (:data:`None` until set from the server).
"""
modified_time = self._properties.get("lastModifiedTime")
@@ -619,8 +761,8 @@ def modified(self):
@property
def self_link(self):
- """Union[str, None]: URL for the dataset resource (:data:`None` until
- set from the server).
+ """Union[str, None]: Output only. URL for the dataset resource
+ (:data:`None` until set from the server).
"""
return self._properties.get("selfLink")
@@ -736,6 +878,28 @@ def labels(self, value):
raise ValueError("Pass a dict")
self._properties["labels"] = value
+ @property
+ def resource_tags(self):
+ """Dict[str, str]: Resource tags of the dataset.
+
+ Optional. The tags attached to this dataset. Tag keys are globally
+ unique. Tag key is expected to be in the namespaced format, for
+ example "123456789012/environment" where 123456789012 is
+ the ID of the parent organization or project resource for this tag
+ key. Tag value is expected to be the short name, for example
+ "Production".
+
+ Raises:
+ ValueError: for invalid value types.
+ """
+ return self._properties.setdefault("resourceTags", {})
+
+ @resource_tags.setter
+ def resource_tags(self, value):
+ if not isinstance(value, dict) and value is not None:
+ raise ValueError("Pass a dict")
+ self._properties["resourceTags"] = value
+
@property
def default_encryption_configuration(self):
"""google.cloud.bigquery.encryption_configuration.EncryptionConfiguration: Custom
@@ -760,6 +924,89 @@ def default_encryption_configuration(self, value):
api_repr = value.to_api_repr()
self._properties["defaultEncryptionConfiguration"] = api_repr
+ @property
+ def is_case_insensitive(self):
+ """Optional[bool]: True if the dataset and its table names are case-insensitive, otherwise False.
+ By default, this is False, which means the dataset and its table names are case-sensitive.
+ This field does not affect routine references.
+
+ Raises:
+ ValueError: for invalid value types.
+ """
+ return self._properties.get("isCaseInsensitive") or False
+
+ @is_case_insensitive.setter
+ def is_case_insensitive(self, value):
+ if not isinstance(value, bool) and value is not None:
+ raise ValueError("Pass a boolean value, or None")
+ if value is None:
+ value = False
+ self._properties["isCaseInsensitive"] = value
+
+ @property
+ def storage_billing_model(self):
+ """Union[str, None]: StorageBillingModel of the dataset as set by the user
+ (defaults to :data:`None`).
+
+ Set the value to one of ``'LOGICAL'``, ``'PHYSICAL'``, or
+ ``'STORAGE_BILLING_MODEL_UNSPECIFIED'``. This change takes 24 hours to
+ take effect and you must wait 14 days before you can change the storage
+ billing model again.
+
+ See `storage billing model
+ `_
+ in REST API docs and `updating the storage billing model
+ `_
+ guide.
+
+ Raises:
+ ValueError: for invalid value types.
+ """
+ return self._properties.get("storageBillingModel")
+
+ @storage_billing_model.setter
+ def storage_billing_model(self, value):
+ if not isinstance(value, str) and value is not None:
+ raise ValueError(
+ "storage_billing_model must be a string (e.g. 'LOGICAL',"
+ " 'PHYSICAL', 'STORAGE_BILLING_MODEL_UNSPECIFIED'), or None."
+ f" Got {repr(value)}."
+ )
+ self._properties["storageBillingModel"] = value
+
+ @property
+ def external_catalog_dataset_options(self):
+ """Options defining open source compatible datasets living in the
+ BigQuery catalog. Contains metadata of open source database, schema
+ or namespace represented by the current dataset."""
+
+ prop = _helpers._get_sub_prop(
+ self._properties, ["externalCatalogDatasetOptions"]
+ )
+
+ if prop is not None:
+ prop = external_config.ExternalCatalogDatasetOptions.from_api_repr(prop)
+ return prop
+
+ @external_catalog_dataset_options.setter
+ def external_catalog_dataset_options(self, value):
+ value = _helpers._isinstance_or_raise(
+ value, external_config.ExternalCatalogDatasetOptions, none_allowed=True
+ )
+ self._properties[
+ self._PROPERTY_TO_API_FIELD["external_catalog_dataset_options"]
+ ] = (value.to_api_repr() if value is not None else None)
+
+ @property
+ def access_policy_version(self):
+ return self._properties.get("accessPolicyVersion")
+
+ @access_policy_version.setter
+ def access_policy_version(self, value):
+ if not isinstance(value, int) and value is not None:
+ raise ValueError("Pass an integer, or None")
+ self._properties["accessPolicyVersion"] = value
+
@classmethod
def from_string(cls, full_dataset_id: str) -> "Dataset":
"""Construct a dataset from fully-qualified dataset ID.
@@ -911,3 +1158,130 @@ def reference(self):
model = _get_model_reference
routine = _get_routine_reference
+
+
+class Condition(object):
+ """Represents a textual expression in the Common Expression Language (CEL) syntax.
+
+ Typically used for filtering or policy rules, such as in IAM Conditions
+ or BigQuery row/column access policies.
+
+ See:
+ https://cloud.google.com/iam/docs/reference/rest/Shared.Types/Expr
+ https://github.com/google/cel-spec
+
+ Args:
+ expression (str):
+ The condition expression string using CEL syntax. This is required.
+ Example: ``resource.type == "compute.googleapis.com/Instance"``
+ title (Optional[str]):
+ An optional title for the condition, providing a short summary.
+ Example: ``"Request is for a GCE instance"``
+ description (Optional[str]):
+ An optional description of the condition, providing a detailed explanation.
+ Example: ``"This condition checks whether the resource is a GCE instance."``
+ """
+
+ def __init__(
+ self,
+ expression: str,
+ title: Optional[str] = None,
+ description: Optional[str] = None,
+ ):
+ self._properties: Dict[str, Any] = {}
+ # Use setters to initialize properties, which also handle validation
+ self.expression = expression
+ self.title = title
+ self.description = description
+
+ @property
+ def title(self) -> Optional[str]:
+ """Optional[str]: The title for the condition."""
+ return self._properties.get("title")
+
+ @title.setter
+ def title(self, value: Optional[str]):
+ if value is not None and not isinstance(value, str):
+ raise ValueError("Pass a string for title, or None")
+ self._properties["title"] = value
+
+ @property
+ def description(self) -> Optional[str]:
+ """Optional[str]: The description for the condition."""
+ return self._properties.get("description")
+
+ @description.setter
+ def description(self, value: Optional[str]):
+ if value is not None and not isinstance(value, str):
+ raise ValueError("Pass a string for description, or None")
+ self._properties["description"] = value
+
+ @property
+ def expression(self) -> str:
+ """str: The expression string for the condition."""
+
+ # Cast assumes expression is always set due to __init__ validation
+ return typing.cast(str, self._properties.get("expression"))
+
+ @expression.setter
+ def expression(self, value: str):
+ if not isinstance(value, str):
+ raise ValueError("Pass a non-empty string for expression")
+ if not value:
+ raise ValueError("expression cannot be an empty string")
+ self._properties["expression"] = value
+
+ def to_api_repr(self) -> Dict[str, Any]:
+ """Construct the API resource representation of this Condition."""
+ return self._properties
+
+ @classmethod
+ def from_api_repr(cls, resource: Dict[str, Any]) -> "Condition":
+ """Factory: construct a Condition instance given its API representation."""
+
+ # Ensure required fields are present in the resource if necessary
+ if "expression" not in resource:
+ raise ValueError("API representation missing required 'expression' field.")
+
+ return cls(
+ expression=resource["expression"],
+ title=resource.get("title"),
+ description=resource.get("description"),
+ )
+
+ def __eq__(self, other: object) -> bool:
+ """Check for equality based on expression, title, and description."""
+ if not isinstance(other, Condition):
+ return NotImplemented
+ return self._key() == other._key()
+
+ def _key(self):
+ """A tuple key that uniquely describes this field.
+ Used to compute this instance's hashcode and evaluate equality.
+ Returns:
+ Tuple: The contents of this :class:`~google.cloud.bigquery.dataset.AccessEntry`.
+ """
+
+ properties = self._properties.copy()
+
+ # Dicts are not hashable.
+ # Convert object to a hashable datatype(s)
+ prop_tup = tuple(sorted(properties.items()))
+ return prop_tup
+
+ def __ne__(self, other: object) -> bool:
+ """Check for inequality."""
+ return not self == other
+
+ def __hash__(self) -> int:
+ """Generate a hash based on expression, title, and description."""
+ return hash(self._key())
+
+ def __repr__(self) -> str:
+ """Return a string representation of the Condition object."""
+ parts = [f"expression={self.expression!r}"]
+ if self.title is not None:
+ parts.append(f"title={self.title!r}")
+ if self.description is not None:
+ parts.append(f"description={self.description!r}")
+ return f"Condition({', '.join(parts)})"
diff --git a/google/cloud/bigquery/dbapi/_helpers.py b/google/cloud/bigquery/dbapi/_helpers.py
index 117fa8ae7..a4ab05ce8 100644
--- a/google/cloud/bigquery/dbapi/_helpers.py
+++ b/google/cloud/bigquery/dbapi/_helpers.py
@@ -277,12 +277,14 @@ def complex_query_parameter(
param = query.ArrayQueryParameter(
name,
sub_type,
- value
- if isinstance(sub_type, query.ScalarQueryParameterType)
- else [
- complex_query_parameter(None, v, sub_type._complex__src, base)
- for v in value
- ],
+ (
+ value
+ if isinstance(sub_type, query.ScalarQueryParameterType)
+ else [
+ complex_query_parameter(None, v, sub_type._complex__src, base)
+ for v in value
+ ]
+ ),
)
elif type_type == STRUCT:
if not isinstance(value, collections_abc.Mapping):
diff --git a/google/cloud/bigquery/dbapi/connection.py b/google/cloud/bigquery/dbapi/connection.py
index 66dee7dfb..a1a69b8fe 100644
--- a/google/cloud/bigquery/dbapi/connection.py
+++ b/google/cloud/bigquery/dbapi/connection.py
@@ -35,12 +35,18 @@ class Connection(object):
A client that uses the faster BigQuery Storage API to fetch rows from
BigQuery. If not passed, it is created using the same credentials
as ``client`` (provided that BigQuery Storage dependencies are installed).
-
- If both clients are available, ``bqstorage_client`` is used for
- fetching query results.
+ prefer_bqstorage_client (Optional[bool]):
+ Prefer the BigQuery Storage client over the REST client. If Storage
+ client isn't available, fall back to the REST client. Defaults to
+ ``True``.
"""
- def __init__(self, client=None, bqstorage_client=None):
+ def __init__(
+ self,
+ client=None,
+ bqstorage_client=None,
+ prefer_bqstorage_client=True,
+ ):
if client is None:
client = bigquery.Client()
self._owns_client = True
@@ -49,7 +55,10 @@ def __init__(self, client=None, bqstorage_client=None):
# A warning is already raised by the BQ Storage client factory factory if
# instantiation fails, or if the given BQ Storage client instance is outdated.
- if bqstorage_client is None:
+ if not prefer_bqstorage_client:
+ bqstorage_client = None
+ self._owns_bqstorage_client = False
+ elif bqstorage_client is None:
bqstorage_client = client._ensure_bqstorage_client()
self._owns_bqstorage_client = bqstorage_client is not None
else:
@@ -95,7 +104,7 @@ def cursor(self):
return new_cursor
-def connect(client=None, bqstorage_client=None):
+def connect(client=None, bqstorage_client=None, prefer_bqstorage_client=True):
"""Construct a DB-API connection to Google BigQuery.
Args:
@@ -108,11 +117,12 @@ def connect(client=None, bqstorage_client=None):
A client that uses the faster BigQuery Storage API to fetch rows from
BigQuery. If not passed, it is created using the same credentials
as ``client`` (provided that BigQuery Storage dependencies are installed).
-
- If both clients are available, ``bqstorage_client`` is used for
- fetching query results.
+ prefer_bqstorage_client (Optional[bool]):
+ Prefer the BigQuery Storage client over the REST client. If Storage
+ client isn't available, fall back to the REST client. Defaults to
+ ``True``.
Returns:
google.cloud.bigquery.dbapi.Connection: A new DB-API connection to BigQuery.
"""
- return Connection(client, bqstorage_client)
+ return Connection(client, bqstorage_client, prefer_bqstorage_client)
diff --git a/google/cloud/bigquery/dbapi/cursor.py b/google/cloud/bigquery/dbapi/cursor.py
index 03f3b72ca..014a6825e 100644
--- a/google/cloud/bigquery/dbapi/cursor.py
+++ b/google/cloud/bigquery/dbapi/cursor.py
@@ -14,11 +14,12 @@
"""Cursor for the Google BigQuery DB-API."""
+from __future__ import annotations
+
import collections
from collections import abc as collections_abc
-import copy
-import logging
import re
+from typing import Optional
try:
from google.cloud.bigquery_storage import ArrowSerializationOptions
@@ -34,8 +35,6 @@
import google.cloud.exceptions # type: ignore
-_LOGGER = logging.getLogger(__name__)
-
# Per PEP 249: A 7-item sequence containing information describing one result
# column. The first two items (name and type_code) are mandatory, the other
# five are optional and are set to None if no meaningful values can be
@@ -76,9 +75,32 @@ def __init__(self, connection):
# most appropriate size.
self.arraysize = None
self._query_data = None
- self._query_job = None
+ self._query_rows = None
self._closed = False
+ @property
+ def query_job(self) -> Optional[job.QueryJob]:
+ """google.cloud.bigquery.job.query.QueryJob | None: The query job
+ created by the last ``execute*()`` call, if a query job was created.
+
+ .. note::
+ If the last ``execute*()`` call was ``executemany()``, this is the
+ last job created by ``executemany()``."""
+ rows = self._query_rows
+
+ if rows is None:
+ return None
+
+ job_id = rows.job_id
+ project = rows.project
+ location = rows.location
+ client = self.connection._client
+
+ if job_id is None:
+ return None
+
+ return client.get_job(job_id, location=location, project=project)
+
def close(self):
"""Mark the cursor as closed, preventing its further use."""
self._closed = True
@@ -107,8 +129,8 @@ def _set_description(self, schema):
for field in schema
)
- def _set_rowcount(self, query_results):
- """Set the rowcount from query results.
+ def _set_rowcount(self, rows):
+ """Set the rowcount from a RowIterator.
Normally, this sets rowcount to the number of rows returned by the
query, but if it was a DML statement, it sets rowcount to the number
@@ -119,10 +141,10 @@ def _set_rowcount(self, query_results):
Results of a query.
"""
total_rows = 0
- num_dml_affected_rows = query_results.num_dml_affected_rows
+ num_dml_affected_rows = rows.num_dml_affected_rows
- if query_results.total_rows is not None and query_results.total_rows > 0:
- total_rows = query_results.total_rows
+ if rows.total_rows is not None and rows.total_rows > 0:
+ total_rows = rows.total_rows
if num_dml_affected_rows is not None and num_dml_affected_rows > 0:
total_rows = num_dml_affected_rows
self.rowcount = total_rows
@@ -155,9 +177,10 @@ def execute(self, operation, parameters=None, job_id=None, job_config=None):
parameters (Union[Mapping[str, Any], Sequence[Any]]):
(Optional) dictionary or sequence of parameter values.
- job_id (str):
- (Optional) The job_id to use. If not set, a job ID
- is generated at random.
+ job_id (str | None):
+ (Optional and discouraged) The job ID to use when creating
+ the query job. For best performance and reliability, manually
+ setting a job ID is discouraged.
job_config (google.cloud.bigquery.job.QueryJobConfig):
(Optional) Extra configuration options for the query job.
@@ -171,7 +194,7 @@ def _execute(
self, formatted_operation, parameters, job_id, job_config, parameter_types
):
self._query_data = None
- self._query_job = None
+ self._query_results = None
client = self.connection._client
# The DB-API uses the pyformat formatting, since the way BigQuery does
@@ -180,33 +203,35 @@ def _execute(
# libraries.
query_parameters = _helpers.to_query_parameters(parameters, parameter_types)
- if client._default_query_job_config:
- if job_config:
- config = job_config._fill_from_default(client._default_query_job_config)
- else:
- config = copy.deepcopy(client._default_query_job_config)
- else:
- config = job_config or job.QueryJobConfig(use_legacy_sql=False)
-
+ config = job_config or job.QueryJobConfig()
config.query_parameters = query_parameters
- self._query_job = client.query(
- formatted_operation, job_config=config, job_id=job_id
- )
-
- if self._query_job.dry_run:
- self._set_description(schema=None)
- self.rowcount = 0
- return
- # Wait for the query to finish.
+ # Start the query and wait for the query to finish.
try:
- self._query_job.result()
+ if job_id is not None:
+ rows = client.query(
+ formatted_operation,
+ job_config=job_config,
+ job_id=job_id,
+ ).result(
+ page_size=self.arraysize,
+ )
+ else:
+ rows = client.query_and_wait(
+ formatted_operation,
+ job_config=config,
+ page_size=self.arraysize,
+ )
except google.cloud.exceptions.GoogleCloudError as exc:
raise exceptions.DatabaseError(exc)
- query_results = self._query_job._query_results
- self._set_rowcount(query_results)
- self._set_description(query_results.schema)
+ self._query_rows = rows
+ self._set_description(rows.schema)
+
+ if config.dry_run:
+ self.rowcount = 0
+ else:
+ self._set_rowcount(rows)
def executemany(self, operation, seq_of_parameters):
"""Prepare and execute a database operation multiple times.
@@ -240,25 +265,26 @@ def _try_fetch(self, size=None):
Mutates self to indicate that iteration has started.
"""
- if self._query_job is None:
+ if self._query_data is not None:
+ # Already started fetching the data.
+ return
+
+ rows = self._query_rows
+ if rows is None:
raise exceptions.InterfaceError(
"No query results: execute() must be called before fetch."
)
- if self._query_job.dry_run:
- self._query_data = iter([])
+ bqstorage_client = self.connection._bqstorage_client
+ if rows._should_use_bqstorage(
+ bqstorage_client,
+ create_bqstorage_client=False,
+ ):
+ rows_iterable = self._bqstorage_fetch(bqstorage_client)
+ self._query_data = _helpers.to_bq_table_rows(rows_iterable)
return
- if self._query_data is None:
- bqstorage_client = self.connection._bqstorage_client
-
- if bqstorage_client is not None:
- rows_iterable = self._bqstorage_fetch(bqstorage_client)
- self._query_data = _helpers.to_bq_table_rows(rows_iterable)
- return
-
- rows_iter = self._query_job.result(page_size=self.arraysize)
- self._query_data = iter(rows_iter)
+ self._query_data = iter(rows)
def _bqstorage_fetch(self, bqstorage_client):
"""Start fetching data with the BigQuery Storage API.
@@ -280,7 +306,7 @@ def _bqstorage_fetch(self, bqstorage_client):
# bigquery_storage can indeed be imported here without errors.
from google.cloud import bigquery_storage
- table_reference = self._query_job.destination
+ table_reference = self._query_rows._table
requested_session = bigquery_storage.types.ReadSession(
table=table_reference.to_bqstorage(),
diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py
index 45d43a2a7..dc67f9674 100644
--- a/google/cloud/bigquery/enums.py
+++ b/google/cloud/bigquery/enums.py
@@ -22,7 +22,7 @@ class AutoRowIDs(enum.Enum):
GENERATE_UUID = enum.auto()
-class Compression(object):
+class Compression(str, enum.Enum):
"""The compression type to use for exported files. The default value is
:attr:`NONE`.
@@ -39,6 +39,9 @@ class Compression(object):
SNAPPY = "SNAPPY"
"""Specifies SNAPPY format."""
+ ZSTD = "ZSTD"
+ """Specifies ZSTD format."""
+
NONE = "NONE"
"""Specifies no compression."""
@@ -77,6 +80,53 @@ class CreateDisposition(object):
returned in the job result."""
+class DatasetView(enum.Enum):
+ """DatasetView specifies which dataset information is returned."""
+
+ DATASET_VIEW_UNSPECIFIED = "DATASET_VIEW_UNSPECIFIED"
+ """The default value. Currently maps to the FULL view."""
+
+ METADATA = "METADATA"
+ """View metadata information for the dataset, such as friendlyName,
+ description, labels, etc."""
+
+ ACL = "ACL"
+ """View ACL information for the dataset, which defines dataset access
+ for one or more entities."""
+
+ FULL = "FULL"
+ """View both dataset metadata and ACL information."""
+
+
+class DefaultPandasDTypes(enum.Enum):
+ """Default Pandas DataFrem DTypes to convert BigQuery data. These
+ Sentinel values are used instead of None to maintain backward compatibility,
+ and allow Pandas package is not available. For more information:
+ https://stackoverflow.com/a/60605919/101923
+ """
+
+ BOOL_DTYPE = object()
+ """Specifies default bool dtype"""
+
+ INT_DTYPE = object()
+ """Specifies default integer dtype"""
+
+ DATE_DTYPE = object()
+ """Specifies default date dtype"""
+
+ TIME_DTYPE = object()
+ """Specifies default time dtype"""
+
+ RANGE_DATE_DTYPE = object()
+ """Specifies default range date dtype"""
+
+ RANGE_DATETIME_DTYPE = object()
+ """Specifies default range datetime dtype"""
+
+ RANGE_TIMESTAMP_DTYPE = object()
+ """Specifies default range timestamp dtype"""
+
+
class DestinationFormat(object):
"""The exported file format. The default value is :attr:`CSV`.
@@ -214,6 +264,11 @@ class KeyResultStatementKind:
class StandardSqlTypeNames(str, enum.Enum):
+ """Enum of allowed SQL type names in schema.SchemaField.
+
+ Datatype used in GoogleSQL.
+ """
+
def _generate_next_value_(name, start, count, last_values):
return name
@@ -234,6 +289,10 @@ def _generate_next_value_(name, start, count, last_values):
JSON = enum.auto()
ARRAY = enum.auto()
STRUCT = enum.auto()
+ RANGE = enum.auto()
+ # NOTE: FOREIGN acts as a wrapper for data types
+ # not natively understood by BigQuery unless translated
+ FOREIGN = enum.auto()
class EntityTypes(str, enum.Enum):
@@ -252,7 +311,10 @@ class EntityTypes(str, enum.Enum):
# See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
# and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
class SqlTypeNames(str, enum.Enum):
- """Enum of allowed SQL type names in schema.SchemaField."""
+ """Enum of allowed SQL type names in schema.SchemaField.
+
+ Datatype used in Legacy SQL.
+ """
STRING = "STRING"
BYTES = "BYTES"
@@ -272,6 +334,10 @@ class SqlTypeNames(str, enum.Enum):
TIME = "TIME"
DATETIME = "DATETIME"
INTERVAL = "INTERVAL" # NOTE: not available in legacy types
+ RANGE = "RANGE" # NOTE: not available in legacy types
+ # NOTE: FOREIGN acts as a wrapper for data types
+ # not natively understood by BigQuery unless translated
+ FOREIGN = "FOREIGN"
class WriteDisposition(object):
@@ -290,6 +356,10 @@ class WriteDisposition(object):
WRITE_TRUNCATE = "WRITE_TRUNCATE"
"""If the table already exists, BigQuery overwrites the table data."""
+ WRITE_TRUNCATE_DATA = "WRITE_TRUNCATE_DATA"
+ """For existing tables, truncate data but preserve existing schema
+ and constraints."""
+
WRITE_EMPTY = "WRITE_EMPTY"
"""If the table already exists and contains data, a 'duplicate' error is
returned in the job result."""
@@ -310,3 +380,118 @@ class DeterminismLevel:
NOT_DETERMINISTIC = "NOT_DETERMINISTIC"
"""The UDF is not deterministic."""
+
+
+class RoundingMode(str, enum.Enum):
+ """Rounding mode options that can be used when storing NUMERIC or BIGNUMERIC
+ values.
+
+ ROUNDING_MODE_UNSPECIFIED: will default to using ROUND_HALF_AWAY_FROM_ZERO.
+
+ ROUND_HALF_AWAY_FROM_ZERO: rounds half values away from zero when applying
+ precision and scale upon writing of NUMERIC and BIGNUMERIC values.
+ For Scale: 0
+ * 1.1, 1.2, 1.3, 1.4 => 1
+ * 1.5, 1.6, 1.7, 1.8, 1.9 => 2
+
+ ROUND_HALF_EVEN: rounds half values to the nearest even value when applying
+ precision and scale upon writing of NUMERIC and BIGNUMERIC values.
+ For Scale: 0
+ * 1.1, 1.2, 1.3, 1.4 => 1
+ * 1.5 => 2
+ * 1.6, 1.7, 1.8, 1.9 => 2
+ * 2.5 => 2
+ """
+
+ def _generate_next_value_(name, start, count, last_values):
+ return name
+
+ ROUNDING_MODE_UNSPECIFIED = enum.auto()
+ ROUND_HALF_AWAY_FROM_ZERO = enum.auto()
+ ROUND_HALF_EVEN = enum.auto()
+
+
+class BigLakeFileFormat(object):
+ FILE_FORMAT_UNSPECIFIED = "FILE_FORMAT_UNSPECIFIED"
+ """The default unspecified value."""
+
+ PARQUET = "PARQUET"
+ """Apache Parquet format."""
+
+
+class BigLakeTableFormat(object):
+ TABLE_FORMAT_UNSPECIFIED = "TABLE_FORMAT_UNSPECIFIED"
+ """The default unspecified value."""
+
+ ICEBERG = "ICEBERG"
+ """Apache Iceberg format."""
+
+
+class UpdateMode(enum.Enum):
+ """Specifies the kind of information to update in a dataset."""
+
+ UPDATE_MODE_UNSPECIFIED = "UPDATE_MODE_UNSPECIFIED"
+ """The default value. Behavior defaults to UPDATE_FULL."""
+
+ UPDATE_METADATA = "UPDATE_METADATA"
+ """Includes metadata information for the dataset, such as friendlyName,
+ description, labels, etc."""
+
+ UPDATE_ACL = "UPDATE_ACL"
+ """Includes ACL information for the dataset, which defines dataset access
+ for one or more entities."""
+
+ UPDATE_FULL = "UPDATE_FULL"
+ """Includes both dataset metadata and ACL information."""
+
+
+class JobCreationMode(object):
+ """Documented values for Job Creation Mode."""
+
+ JOB_CREATION_MODE_UNSPECIFIED = "JOB_CREATION_MODE_UNSPECIFIED"
+ """Job creation mode is unspecified."""
+
+ JOB_CREATION_REQUIRED = "JOB_CREATION_REQUIRED"
+ """Job creation is always required."""
+
+ JOB_CREATION_OPTIONAL = "JOB_CREATION_OPTIONAL"
+ """Job creation is optional.
+
+ Returning immediate results is prioritized.
+ BigQuery will automatically determine if a Job needs to be created.
+ The conditions under which BigQuery can decide to not create a Job are
+ subject to change.
+ """
+
+
+class SourceColumnMatch(str, enum.Enum):
+ """Uses sensible defaults based on how the schema is provided.
+ If autodetect is used, then columns are matched by name. Otherwise, columns
+ are matched by position. This is done to keep the behavior backward-compatible.
+ """
+
+ SOURCE_COLUMN_MATCH_UNSPECIFIED = "SOURCE_COLUMN_MATCH_UNSPECIFIED"
+ """Unspecified column name match option."""
+
+ POSITION = "POSITION"
+ """Matches by position. This assumes that the columns are ordered the same
+ way as the schema."""
+
+ NAME = "NAME"
+ """Matches by name. This reads the header row as column names and reorders
+ columns to match the field names in the schema."""
+
+
+class TimestampPrecision(enum.Enum):
+ """Precision (maximum number of total digits in base 10) for seconds of
+ TIMESTAMP type."""
+
+ MICROSECOND = None
+ """
+ Default, for TIMESTAMP type with microsecond precision.
+ """
+
+ PICOSECOND = 12
+ """
+ For TIMESTAMP type with picosecond precision.
+ """
diff --git a/google/cloud/bigquery/exceptions.py b/google/cloud/bigquery/exceptions.py
new file mode 100644
index 000000000..62e0d540c
--- /dev/null
+++ b/google/cloud/bigquery/exceptions.py
@@ -0,0 +1,35 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class BigQueryError(Exception):
+ """Base class for all custom exceptions defined by the BigQuery client."""
+
+
+class LegacyBigQueryStorageError(BigQueryError):
+ """Raised when too old a version of BigQuery Storage extra is detected at runtime."""
+
+
+class LegacyPyarrowError(BigQueryError):
+ """Raised when too old a version of pyarrow package is detected at runtime."""
+
+
+class BigQueryStorageNotFoundError(BigQueryError):
+ """Raised when BigQuery Storage extra is not installed when trying to
+ import it.
+ """
+
+
+class LegacyPandasError(BigQueryError):
+ """Raised when too old a version of pandas package is detected at runtime."""
diff --git a/google/cloud/bigquery/external_config.py b/google/cloud/bigquery/external_config.py
index 640b2d16b..7e76f93b5 100644
--- a/google/cloud/bigquery/external_config.py
+++ b/google/cloud/bigquery/external_config.py
@@ -18,17 +18,21 @@
Job.configuration.query.tableDefinitions.
"""
-from __future__ import absolute_import
+from __future__ import absolute_import, annotations
import base64
import copy
+import typing
from typing import Any, Dict, FrozenSet, Iterable, Optional, Union
from google.cloud.bigquery._helpers import _to_bytes
from google.cloud.bigquery._helpers import _bytes_to_json
from google.cloud.bigquery._helpers import _int_or_none
from google.cloud.bigquery._helpers import _str_or_none
+from google.cloud.bigquery import _helpers
+from google.cloud.bigquery.enums import SourceColumnMatch
from google.cloud.bigquery.format_options import AvroOptions, ParquetOptions
+from google.cloud.bigquery import schema
from google.cloud.bigquery.schema import SchemaField
@@ -418,6 +422,20 @@ def encoding(self):
def encoding(self, value):
self._properties["encoding"] = value
+ @property
+ def preserve_ascii_control_characters(self):
+ """bool: Indicates if the embedded ASCII control characters
+ (the first 32 characters in the ASCII-table, from '\x00' to '\x1F') are preserved.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.preserve_ascii_control_characters
+ """
+ return self._properties.get("preserveAsciiControlCharacters")
+
+ @preserve_ascii_control_characters.setter
+ def preserve_ascii_control_characters(self, value):
+ self._properties["preserveAsciiControlCharacters"] = value
+
@property
def field_delimiter(self):
"""str: The separator for fields in a CSV file. Defaults to comma (',').
@@ -457,6 +475,60 @@ def skip_leading_rows(self):
def skip_leading_rows(self, value):
self._properties["skipLeadingRows"] = str(value)
+ @property
+ def source_column_match(self) -> Optional[SourceColumnMatch]:
+ """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the
+ strategy used to match loaded columns to the schema. If not set, a sensible
+ default is chosen based on how the schema is provided. If autodetect is
+ used, then columns are matched by name. Otherwise, columns are matched by
+ position. This is done to keep the behavior backward-compatible.
+
+ Acceptable values are:
+
+ SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option.
+ POSITION: matches by position. This assumes that the columns are ordered
+ the same way as the schema.
+ NAME: matches by name. This reads the header row as column names and
+ reorders columns to match the field names in the schema.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.source_column_match
+ """
+
+ value = self._properties.get("sourceColumnMatch")
+ return SourceColumnMatch(value) if value is not None else None
+
+ @source_column_match.setter
+ def source_column_match(self, value: Union[SourceColumnMatch, str, None]):
+ if value is not None and not isinstance(value, (SourceColumnMatch, str)):
+ raise TypeError(
+ "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None"
+ )
+ if isinstance(value, SourceColumnMatch):
+ value = value.value
+ self._properties["sourceColumnMatch"] = value if value else None
+
+ @property
+ def null_markers(self) -> Optional[Iterable[str]]:
+ """Optional[Iterable[str]]: A list of strings represented as SQL NULL values in a CSV file.
+
+ .. note::
+ null_marker and null_markers can't be set at the same time.
+ If null_marker is set, null_markers has to be not set.
+ If null_markers is set, null_marker has to be not set.
+ If both null_marker and null_markers are set at the same time, a user error would be thrown.
+ Any strings listed in null_markers, including empty string would be interpreted as SQL NULL.
+ This applies to all column types.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#CsvOptions.FIELDS.null_markers
+ """
+ return self._properties.get("nullMarkers")
+
+ @null_markers.setter
+ def null_markers(self, value: Optional[Iterable[str]]):
+ self._properties["nullMarkers"] = value
+
def to_api_repr(self) -> dict:
"""Build an API representation of this object.
@@ -565,11 +637,7 @@ def from_api_repr(cls, resource: dict) -> "GoogleSheetsOptions":
class HivePartitioningOptions(object):
- """[Beta] Options that configure hive partitioning.
-
- .. note::
- **Experimental**. This feature is experimental and might change or
- have limited support.
+ """Options that configure hive partitioning.
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#HivePartitioningOptions
@@ -736,13 +804,9 @@ def decimal_target_types(self, value: Optional[Iterable[str]]):
@property
def hive_partitioning(self):
- """Optional[:class:`~.external_config.HivePartitioningOptions`]: [Beta] When set, \
+ """Optional[:class:`~.external_config.HivePartitioningOptions`]: When set, \
it configures hive partitioning support.
- .. note::
- **Experimental**. This feature is experimental and might change or
- have limited support.
-
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.hive_partitioning_options
"""
@@ -756,6 +820,20 @@ def hive_partitioning(self, value):
prop = value.to_api_repr() if value is not None else None
self._properties["hivePartitioningOptions"] = prop
+ @property
+ def reference_file_schema_uri(self):
+ """Optional[str]:
+ When creating an external table, the user can provide a reference file with the
+ table schema. This is enabled for the following formats:
+
+ AVRO, PARQUET, ORC
+ """
+ return self._properties.get("referenceFileSchemaUri")
+
+ @reference_file_schema_uri.setter
+ def reference_file_schema_uri(self, value):
+ self._properties["referenceFileSchemaUri"] = value
+
@property
def ignore_unknown_values(self):
"""bool: If :data:`True`, extra values that are not represented in the
@@ -805,7 +883,9 @@ def schema(self):
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.schema
"""
- prop = self._properties.get("schema", {})
+ prop: Dict[str, Any] = typing.cast(
+ Dict[str, Any], self._properties.get("schema", {})
+ )
return [SchemaField.from_api_repr(field) for field in prop.get("fields", [])]
@schema.setter
@@ -816,15 +896,83 @@ def schema(self, value):
self._properties["schema"] = prop
@property
- def connection_id(self):
- """Optional[str]: [Experimental] ID of a BigQuery Connection API
- resource.
+ def date_format(self) -> Optional[str]:
+ """Optional[str]: Format used to parse DATE values. Supports C-style and SQL-style values.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.date_format
+ """
+ result = self._properties.get("dateFormat")
+ return typing.cast(str, result)
- .. WARNING::
+ @date_format.setter
+ def date_format(self, value: Optional[str]):
+ self._properties["dateFormat"] = value
- This feature is experimental. Pre-GA features may have limited
- support, and changes to pre-GA features may not be compatible with
- other pre-GA versions.
+ @property
+ def datetime_format(self) -> Optional[str]:
+ """Optional[str]: Format used to parse DATETIME values. Supports C-style
+ and SQL-style values.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.datetime_format
+ """
+ result = self._properties.get("datetimeFormat")
+ return typing.cast(str, result)
+
+ @datetime_format.setter
+ def datetime_format(self, value: Optional[str]):
+ self._properties["datetimeFormat"] = value
+
+ @property
+ def time_zone(self) -> Optional[str]:
+ """Optional[str]: Time zone used when parsing timestamp values that do not
+ have specific time zone information (e.g. 2024-04-20 12:34:56). The expected
+ format is an IANA timezone string (e.g. America/Los_Angeles).
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_zone
+ """
+
+ result = self._properties.get("timeZone")
+ return typing.cast(str, result)
+
+ @time_zone.setter
+ def time_zone(self, value: Optional[str]):
+ self._properties["timeZone"] = value
+
+ @property
+ def time_format(self) -> Optional[str]:
+ """Optional[str]: Format used to parse TIME values. Supports C-style and SQL-style values.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.time_format
+ """
+ result = self._properties.get("timeFormat")
+ return typing.cast(str, result)
+
+ @time_format.setter
+ def time_format(self, value: Optional[str]):
+ self._properties["timeFormat"] = value
+
+ @property
+ def timestamp_format(self) -> Optional[str]:
+ """Optional[str]: Format used to parse TIMESTAMP values. Supports C-style and SQL-style values.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#ExternalDataConfiguration.FIELDS.timestamp_format
+ """
+ result = self._properties.get("timestampFormat")
+ return typing.cast(str, result)
+
+ @timestamp_format.setter
+ def timestamp_format(self, value: Optional[str]):
+ self._properties["timestampFormat"] = value
+
+ @property
+ def connection_id(self):
+ """Optional[str]: ID of a BigQuery Connection API
+ resource.
"""
return self._properties.get("connectionId")
@@ -975,3 +1123,182 @@ def from_api_repr(cls, resource: dict) -> "ExternalConfig":
config = cls(resource["sourceFormat"])
config._properties = copy.deepcopy(resource)
return config
+
+
+class ExternalCatalogDatasetOptions:
+ """Options defining open source compatible datasets living in the BigQuery catalog.
+ Contains metadata of open source database, schema or namespace represented
+ by the current dataset.
+
+ Args:
+ default_storage_location_uri (Optional[str]): The storage location URI for all
+ tables in the dataset. Equivalent to hive metastore's database
+ locationUri. Maximum length of 1024 characters. (str)
+ parameters (Optional[dict[str, Any]]): A map of key value pairs defining the parameters
+ and properties of the open source schema. Maximum size of 2Mib.
+ """
+
+ def __init__(
+ self,
+ default_storage_location_uri: Optional[str] = None,
+ parameters: Optional[Dict[str, Any]] = None,
+ ):
+ self._properties: Dict[str, Any] = {}
+ self.default_storage_location_uri = default_storage_location_uri
+ self.parameters = parameters
+
+ @property
+ def default_storage_location_uri(self) -> Optional[str]:
+ """Optional. The storage location URI for all tables in the dataset.
+ Equivalent to hive metastore's database locationUri. Maximum length of
+ 1024 characters."""
+
+ return self._properties.get("defaultStorageLocationUri")
+
+ @default_storage_location_uri.setter
+ def default_storage_location_uri(self, value: Optional[str]):
+ value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
+ self._properties["defaultStorageLocationUri"] = value
+
+ @property
+ def parameters(self) -> Optional[Dict[str, Any]]:
+ """Optional. A map of key value pairs defining the parameters and
+ properties of the open source schema. Maximum size of 2Mib."""
+
+ return self._properties.get("parameters")
+
+ @parameters.setter
+ def parameters(self, value: Optional[Dict[str, Any]]):
+ value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
+ self._properties["parameters"] = value
+
+ def to_api_repr(self) -> dict:
+ """Build an API representation of this object.
+
+ Returns:
+ Dict[str, Any]:
+ A dictionary in the format used by the BigQuery API.
+ """
+ return self._properties
+
+ @classmethod
+ def from_api_repr(cls, api_repr: dict) -> ExternalCatalogDatasetOptions:
+ """Factory: constructs an instance of the class (cls)
+ given its API representation.
+
+ Args:
+ api_repr (Dict[str, Any]):
+ API representation of the object to be instantiated.
+
+ Returns:
+ An instance of the class initialized with data from 'resource'.
+ """
+ config = cls()
+ config._properties = api_repr
+ return config
+
+
+class ExternalCatalogTableOptions:
+ """Metadata about open source compatible table. The fields contained in these
+ options correspond to hive metastore's table level properties.
+
+ Args:
+ connection_id (Optional[str]): The connection specifying the credentials to be
+ used to read external storage, such as Azure Blob, Cloud Storage, or
+ S3. The connection is needed to read the open source table from
+ BigQuery Engine. The connection_id can have the form `..` or
+ `projects//locations//connections/`.
+ parameters (Union[Dict[str, Any], None]): A map of key value pairs defining the parameters
+ and properties of the open source table. Corresponds with hive meta
+ store table parameters. Maximum size of 4Mib.
+ storage_descriptor (Optional[StorageDescriptor]): A storage descriptor containing information
+ about the physical storage of this table.
+ """
+
+ def __init__(
+ self,
+ connection_id: Optional[str] = None,
+ parameters: Union[Dict[str, Any], None] = None,
+ storage_descriptor: Optional[schema.StorageDescriptor] = None,
+ ):
+ self._properties: Dict[str, Any] = {}
+ self.connection_id = connection_id
+ self.parameters = parameters
+ self.storage_descriptor = storage_descriptor
+
+ @property
+ def connection_id(self) -> Optional[str]:
+ """Optional. The connection specifying the credentials to be
+ used to read external storage, such as Azure Blob, Cloud Storage, or
+ S3. The connection is needed to read the open source table from
+ BigQuery Engine. The connection_id can have the form `..` or
+ `projects//locations//connections/`.
+ """
+
+ return self._properties.get("connectionId")
+
+ @connection_id.setter
+ def connection_id(self, value: Optional[str]):
+ value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
+ self._properties["connectionId"] = value
+
+ @property
+ def parameters(self) -> Union[Dict[str, Any], None]:
+ """Optional. A map of key value pairs defining the parameters and
+ properties of the open source table. Corresponds with hive meta
+ store table parameters. Maximum size of 4Mib.
+ """
+
+ return self._properties.get("parameters")
+
+ @parameters.setter
+ def parameters(self, value: Union[Dict[str, Any], None]):
+ value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
+ self._properties["parameters"] = value
+
+ @property
+ def storage_descriptor(self) -> Any:
+ """Optional. A storage descriptor containing information about the
+ physical storage of this table."""
+
+ prop = _helpers._get_sub_prop(self._properties, ["storageDescriptor"])
+
+ if prop is not None:
+ return schema.StorageDescriptor.from_api_repr(prop)
+ return None
+
+ @storage_descriptor.setter
+ def storage_descriptor(self, value: Union[schema.StorageDescriptor, dict, None]):
+ value = _helpers._isinstance_or_raise(
+ value, (schema.StorageDescriptor, dict), none_allowed=True
+ )
+ if isinstance(value, schema.StorageDescriptor):
+ self._properties["storageDescriptor"] = value.to_api_repr()
+ else:
+ self._properties["storageDescriptor"] = value
+
+ def to_api_repr(self) -> dict:
+ """Build an API representation of this object.
+
+ Returns:
+ Dict[str, Any]:
+ A dictionary in the format used by the BigQuery API.
+ """
+
+ return self._properties
+
+ @classmethod
+ def from_api_repr(cls, api_repr: dict) -> ExternalCatalogTableOptions:
+ """Factory: constructs an instance of the class (cls)
+ given its API representation.
+
+ Args:
+ api_repr (Dict[str, Any]):
+ API representation of the object to be instantiated.
+
+ Returns:
+ An instance of the class initialized with data from 'api_repr'.
+ """
+ config = cls()
+ config._properties = api_repr
+ return config
diff --git a/google/cloud/bigquery/format_options.py b/google/cloud/bigquery/format_options.py
index 1208565a9..e26b7a74f 100644
--- a/google/cloud/bigquery/format_options.py
+++ b/google/cloud/bigquery/format_options.py
@@ -13,7 +13,7 @@
# limitations under the License.
import copy
-from typing import Dict, Optional
+from typing import Dict, Optional, Union
class AvroOptions:
@@ -105,6 +105,21 @@ def enable_list_inference(self) -> bool:
def enable_list_inference(self, value: bool) -> None:
self._properties["enableListInference"] = value
+ @property
+ def map_target_type(self) -> Optional[Union[bool, str]]:
+ """Indicates whether to simplify the representation of parquet maps to only show keys and values."""
+
+ return self._properties.get("mapTargetType")
+
+ @map_target_type.setter
+ def map_target_type(self, value: str) -> None:
+ """Sets the map target type.
+
+ Args:
+ value: The map target type (eg ARRAY_OF_STRUCT).
+ """
+ self._properties["mapTargetType"] = value
+
@classmethod
def from_api_repr(cls, resource: Dict[str, bool]) -> "ParquetOptions":
"""Factory: construct an instance from a resource dict.
diff --git a/google/cloud/bigquery/job/__init__.py b/google/cloud/bigquery/job/__init__.py
index f51311b0b..4cda65965 100644
--- a/google/cloud/bigquery/job/__init__.py
+++ b/google/cloud/bigquery/job/__init__.py
@@ -39,6 +39,7 @@
from google.cloud.bigquery.job.query import QueryPlanEntryStep
from google.cloud.bigquery.job.query import ScriptOptions
from google.cloud.bigquery.job.query import TimelineEntry
+from google.cloud.bigquery.job.query import IncrementalResultStats
from google.cloud.bigquery.enums import Compression
from google.cloud.bigquery.enums import CreateDisposition
from google.cloud.bigquery.enums import DestinationFormat
@@ -84,4 +85,5 @@
"SourceFormat",
"TransactionInfo",
"WriteDisposition",
+ "IncrementalResultStats",
]
diff --git a/google/cloud/bigquery/job/base.py b/google/cloud/bigquery/job/base.py
index 86701e295..7576fc9aa 100644
--- a/google/cloud/bigquery/job/base.py
+++ b/google/cloud/bigquery/job/base.py
@@ -19,16 +19,18 @@
import http
import threading
import typing
-from typing import Dict, Optional, Sequence
+from typing import ClassVar, Dict, Optional, Sequence
+from google.api_core import retry as retries
from google.api_core import exceptions
import google.api_core.future.polling
from google.cloud.bigquery import _helpers
-from google.cloud.bigquery.retry import DEFAULT_RETRY
-
-if typing.TYPE_CHECKING: # pragma: NO COVER
- from google.api_core import retry as retries
+from google.cloud.bigquery._helpers import _int_or_none
+from google.cloud.bigquery.retry import (
+ DEFAULT_GET_JOB_TIMEOUT,
+ DEFAULT_RETRY,
+)
_DONE_STATE = "DONE"
@@ -47,7 +49,7 @@
"notImplemented": http.client.NOT_IMPLEMENTED,
"policyViolation": http.client.FORBIDDEN,
"quotaExceeded": http.client.FORBIDDEN,
- "rateLimitExceeded": http.client.FORBIDDEN,
+ "rateLimitExceeded": http.client.TOO_MANY_REQUESTS,
"resourceInUse": http.client.BAD_REQUEST,
"resourcesExceeded": http.client.BAD_REQUEST,
"responseTooLarge": http.client.FORBIDDEN,
@@ -56,7 +58,7 @@
}
-def _error_result_to_exception(error_result):
+def _error_result_to_exception(error_result, errors=None):
"""Maps BigQuery error reasons to an exception.
The reasons and their matching HTTP status codes are documented on
@@ -67,6 +69,7 @@ def _error_result_to_exception(error_result):
Args:
error_result (Mapping[str, str]): The error result from BigQuery.
+ errors (Union[Iterable[str], None]): The detailed error messages.
Returns:
google.cloud.exceptions.GoogleAPICallError: The mapped exception.
@@ -75,8 +78,24 @@ def _error_result_to_exception(error_result):
status_code = _ERROR_REASON_TO_EXCEPTION.get(
reason, http.client.INTERNAL_SERVER_ERROR
)
+ # Manually create error message to preserve both error_result and errors.
+ # Can be removed once b/310544564 and b/318889899 are resolved.
+ concatenated_errors = ""
+ if errors:
+ concatenated_errors = "; "
+ for err in errors:
+ concatenated_errors += ", ".join(
+ [f"{key}: {value}" for key, value in err.items()]
+ )
+ concatenated_errors += "; "
+
+ # strips off the last unneeded semicolon and space
+ concatenated_errors = concatenated_errors[:-2]
+
+ error_message = error_result.get("message", "") + concatenated_errors
+
return exceptions.from_http_status(
- status_code, error_result.get("message", ""), errors=[error_result]
+ status_code, error_message, errors=[error_result]
)
@@ -150,6 +169,271 @@ def _from_api_repr(cls, resource):
return job_ref
+class _JobConfig(object):
+ """Abstract base class for job configuration objects.
+
+ Args:
+ job_type (str): The key to use for the job configuration.
+ """
+
+ def __init__(self, job_type, **kwargs):
+ self._job_type = job_type
+ self._properties = {job_type: {}}
+ for prop, val in kwargs.items():
+ setattr(self, prop, val)
+
+ def __setattr__(self, name, value):
+ """Override to be able to raise error if an unknown property is being set"""
+ if not name.startswith("_") and not hasattr(type(self), name):
+ raise AttributeError(
+ "Property {} is unknown for {}.".format(name, type(self))
+ )
+ super(_JobConfig, self).__setattr__(name, value)
+
+ @property
+ def job_timeout_ms(self):
+ """Optional parameter. Job timeout in milliseconds. If this time limit is exceeded, BigQuery might attempt to stop the job.
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfiguration.FIELDS.job_timeout_ms
+ e.g.
+
+ job_config = bigquery.QueryJobConfig( job_timeout_ms = 5000 )
+ or
+ job_config.job_timeout_ms = 5000
+
+ Raises:
+ ValueError: If ``value`` type is invalid.
+ """
+
+ # None as this is an optional parameter.
+ if self._properties.get("jobTimeoutMs"):
+ return self._properties["jobTimeoutMs"]
+ return None
+
+ @job_timeout_ms.setter
+ def job_timeout_ms(self, value):
+ try:
+ value = _int_or_none(value)
+ except ValueError as err:
+ raise ValueError("Pass an int for jobTimeoutMs, e.g. 5000").with_traceback(
+ err.__traceback__
+ )
+
+ if value is not None:
+ # docs indicate a string is expected by the API
+ self._properties["jobTimeoutMs"] = str(value)
+ else:
+ self._properties.pop("jobTimeoutMs", None)
+
+ @property
+ def max_slots(self) -> Optional[int]:
+ """The maximum rate of slot consumption to allow for this job.
+
+ If set, the number of slots used to execute the job will be throttled
+ to try and keep its slot consumption below the requested rate.
+ This feature is not generally available.
+ """
+
+ max_slots = self._properties.get("maxSlots")
+ if max_slots is not None:
+ if isinstance(max_slots, str):
+ return int(max_slots)
+ if isinstance(max_slots, int):
+ return max_slots
+ return None
+
+ @max_slots.setter
+ def max_slots(self, value):
+ try:
+ value = _int_or_none(value)
+ except ValueError as err:
+ raise ValueError("Pass an int for max slots, e.g. 100").with_traceback(
+ err.__traceback__
+ )
+
+ if value is not None:
+ self._properties["maxSlots"] = str(value)
+ else:
+ self._properties.pop("maxSlots", None)
+
+ @property
+ def reservation(self):
+ """str: Optional. The reservation that job would use.
+
+ User can specify a reservation to execute the job. If reservation is
+ not set, reservation is determined based on the rules defined by the
+ reservation assignments. The expected format is
+ projects/{project}/locations/{location}/reservations/{reservation}.
+
+ Raises:
+ ValueError: If ``value`` type is not None or of string type.
+ """
+ return self._properties.setdefault("reservation", None)
+
+ @reservation.setter
+ def reservation(self, value):
+ if value and not isinstance(value, str):
+ raise ValueError("Reservation must be None or a string.")
+ self._properties["reservation"] = value
+
+ @property
+ def labels(self):
+ """Dict[str, str]: Labels for the job.
+
+ This method always returns a dict. Once a job has been created on the
+ server, its labels cannot be modified anymore.
+
+ Raises:
+ ValueError: If ``value`` type is invalid.
+ """
+ return self._properties.setdefault("labels", {})
+
+ @labels.setter
+ def labels(self, value):
+ if not isinstance(value, dict):
+ raise ValueError("Pass a dict")
+ self._properties["labels"] = value
+
+ def _get_sub_prop(self, key, default=None):
+ """Get a value in the ``self._properties[self._job_type]`` dictionary.
+
+ Most job properties are inside the dictionary related to the job type
+ (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access
+ those properties::
+
+ self._get_sub_prop('destinationTable')
+
+ This is equivalent to using the ``_helpers._get_sub_prop`` function::
+
+ _helpers._get_sub_prop(
+ self._properties, ['query', 'destinationTable'])
+
+ Args:
+ key (str):
+ Key for the value to get in the
+ ``self._properties[self._job_type]`` dictionary.
+ default (Optional[object]):
+ Default value to return if the key is not found.
+ Defaults to :data:`None`.
+
+ Returns:
+ object: The value if present or the default.
+ """
+ return _helpers._get_sub_prop(
+ self._properties, [self._job_type, key], default=default
+ )
+
+ def _set_sub_prop(self, key, value):
+ """Set a value in the ``self._properties[self._job_type]`` dictionary.
+
+ Most job properties are inside the dictionary related to the job type
+ (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set
+ those properties::
+
+ self._set_sub_prop('useLegacySql', False)
+
+ This is equivalent to using the ``_helper._set_sub_prop`` function::
+
+ _helper._set_sub_prop(
+ self._properties, ['query', 'useLegacySql'], False)
+
+ Args:
+ key (str):
+ Key to set in the ``self._properties[self._job_type]``
+ dictionary.
+ value (object): Value to set.
+ """
+ _helpers._set_sub_prop(self._properties, [self._job_type, key], value)
+
+ def _del_sub_prop(self, key):
+ """Remove ``key`` from the ``self._properties[self._job_type]`` dict.
+
+ Most job properties are inside the dictionary related to the job type
+ (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear
+ those properties::
+
+ self._del_sub_prop('useLegacySql')
+
+ This is equivalent to using the ``_helper._del_sub_prop`` function::
+
+ _helper._del_sub_prop(
+ self._properties, ['query', 'useLegacySql'])
+
+ Args:
+ key (str):
+ Key to remove in the ``self._properties[self._job_type]``
+ dictionary.
+ """
+ _helpers._del_sub_prop(self._properties, [self._job_type, key])
+
+ def to_api_repr(self) -> dict:
+ """Build an API representation of the job config.
+
+ Returns:
+ Dict: A dictionary in the format used by the BigQuery API.
+ """
+ return copy.deepcopy(self._properties)
+
+ def _fill_from_default(self, default_job_config=None):
+ """Merge this job config with a default job config.
+
+ The keys in this object take precedence over the keys in the default
+ config. The merge is done at the top-level as well as for keys one
+ level below the job type.
+
+ Args:
+ default_job_config (google.cloud.bigquery.job._JobConfig):
+ The default job config that will be used to fill in self.
+
+ Returns:
+ google.cloud.bigquery.job._JobConfig: A new (merged) job config.
+ """
+ if not default_job_config:
+ new_job_config = copy.deepcopy(self)
+ return new_job_config
+
+ if self._job_type != default_job_config._job_type:
+ raise TypeError(
+ "attempted to merge two incompatible job types: "
+ + repr(self._job_type)
+ + ", "
+ + repr(default_job_config._job_type)
+ )
+
+ # cls is one of the job config subclasses that provides the job_type argument to
+ # this base class on instantiation, thus missing-parameter warning is a false
+ # positive here.
+ new_job_config = self.__class__() # pytype: disable=missing-parameter
+
+ default_job_properties = copy.deepcopy(default_job_config._properties)
+ for key in self._properties:
+ if key != self._job_type:
+ default_job_properties[key] = self._properties[key]
+
+ default_job_properties[self._job_type].update(self._properties[self._job_type])
+ new_job_config._properties = default_job_properties
+
+ return new_job_config
+
+ @classmethod
+ def from_api_repr(cls, resource: dict) -> "_JobConfig":
+ """Factory: construct a job configuration given its API representation
+
+ Args:
+ resource (Dict):
+ A job configuration in the same representation as is returned
+ from the API.
+
+ Returns:
+ google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``.
+ """
+ # cls is one of the job config subclasses that provides the job_type argument to
+ # this base class on instantiation, thus missing-parameter warning is a false
+ # positive here.
+ job_config = cls() # type: ignore # pytype: disable=missing-parameter
+ job_config._properties = resource
+ return job_config
+
+
class _AsyncJob(google.api_core.future.polling.PollingFuture):
"""Base class for asynchronous jobs.
@@ -161,6 +445,9 @@ class _AsyncJob(google.api_core.future.polling.PollingFuture):
Client which holds credentials and project configuration.
"""
+ _JOB_TYPE = "unknown"
+ _CONFIG_CLASS: ClassVar
+
def __init__(self, job_id, client):
super(_AsyncJob, self).__init__()
@@ -176,6 +463,13 @@ def __init__(self, job_id, client):
self._result_set = False
self._completion_lock = threading.Lock()
+ @property
+ def configuration(self) -> _JobConfig:
+ """Job-type specific configurtion."""
+ configuration: _JobConfig = self._CONFIG_CLASS() # pytype: disable=not-callable
+ configuration._properties = self._properties.setdefault("configuration", {})
+ return configuration
+
@property
def job_id(self):
"""str: ID of the job."""
@@ -243,6 +537,18 @@ def location(self):
"""str: Location where the job runs."""
return _helpers._get_sub_prop(self._properties, ["jobReference", "location"])
+ @property
+ def reservation_id(self):
+ """str: Name of the primary reservation assigned to this job.
+
+ Note that this could be different than reservations reported in
+ the reservation field if parent reservations were used to execute
+ this job.
+ """
+ return _helpers._get_sub_prop(
+ self._properties, ["statistics", "reservation_id"]
+ )
+
def _require_client(self, client):
"""Check client or verify over-ride.
@@ -387,7 +693,12 @@ def transaction_info(self) -> Optional[TransactionInfo]:
@property
def error_result(self):
- """Error information about the job as a whole.
+ """Output only. Final error result of the job.
+
+ If present, indicates that the job has completed and was unsuccessful.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.error_result
Returns:
Optional[Mapping]: the error information (None until set from the server).
@@ -398,7 +709,13 @@ def error_result(self):
@property
def errors(self):
- """Information about individual errors generated by the job.
+ """Output only. The first errors encountered during the running of the job.
+
+ The final message includes the number of errors that caused the process to stop.
+ Errors here do not necessarily mean that the job has not completed or was unsuccessful.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.errors
Returns:
Optional[List[Mapping]]:
@@ -410,7 +727,12 @@ def errors(self):
@property
def state(self):
- """Status of the job.
+ """Output only. Running state of the job.
+
+ Valid states include 'PENDING', 'RUNNING', and 'DONE'.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.state
Returns:
Optional[str]:
@@ -426,8 +748,7 @@ def _set_properties(self, api_response):
api_response (Dict): response returned from an API call.
"""
cleaned = api_response.copy()
-
- statistics = cleaned.get("statistics", {})
+ statistics = cleaned.setdefault("statistics", {})
if "creationTime" in statistics:
statistics["creationTime"] = float(statistics["creationTime"])
if "startTime" in statistics:
@@ -435,13 +756,7 @@ def _set_properties(self, api_response):
if "endTime" in statistics:
statistics["endTime"] = float(statistics["endTime"])
- # Save configuration to keep reference same in self._configuration.
- cleaned_config = cleaned.pop("configuration", {})
- configuration = self._properties.pop("configuration", {})
- self._properties.clear()
- self._properties.update(cleaned)
- self._properties["configuration"] = configuration
- self._properties["configuration"].update(cleaned_config)
+ self._properties = cleaned
# For Future interface
self._set_future_result()
@@ -520,7 +835,10 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None):
self._set_properties(api_response)
def exists(
- self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None
+ self,
+ client=None,
+ retry: "retries.Retry" = DEFAULT_RETRY,
+ timeout: Optional[float] = None,
) -> bool:
"""API call: test for the existence of the job via a GET request
@@ -565,7 +883,10 @@ def exists(
return True
def reload(
- self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None
+ self,
+ client=None,
+ retry: "retries.Retry" = DEFAULT_RETRY,
+ timeout: Optional[float] = DEFAULT_GET_JOB_TIMEOUT,
):
"""API call: refresh job properties via a GET request.
@@ -584,25 +905,20 @@ def reload(
"""
client = self._require_client(client)
- extra_params = {}
- if self.location:
- extra_params["location"] = self.location
- span_attributes = {"path": self.path}
-
- api_response = client._call_api(
- retry,
- span_name="BigQuery.job.reload",
- span_attributes=span_attributes,
- job_ref=self,
- method="GET",
- path=self.path,
- query_params=extra_params,
+ got_job = client.get_job(
+ self,
+ project=self.project,
+ location=self.location,
+ retry=retry,
timeout=timeout,
)
- self._set_properties(api_response)
+ self._set_properties(got_job._properties)
def cancel(
- self, client=None, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None
+ self,
+ client=None,
+ retry: Optional[retries.Retry] = DEFAULT_RETRY,
+ timeout: Optional[float] = None,
) -> bool:
"""API call: cancel job via a POST request
@@ -664,7 +980,9 @@ def _set_future_result(self):
return
if self.error_result is not None:
- exception = _error_result_to_exception(self.error_result)
+ exception = _error_result_to_exception(
+ self.error_result, self.errors or ()
+ )
self.set_exception(exception)
else:
self.set_result(self)
@@ -672,7 +990,7 @@ def _set_future_result(self):
def done(
self,
retry: "retries.Retry" = DEFAULT_RETRY,
- timeout: float = None,
+ timeout: Optional[float] = DEFAULT_GET_JOB_TIMEOUT,
reload: bool = True,
) -> bool:
"""Checks if the job is complete.
@@ -697,8 +1015,10 @@ def done(
self.reload(retry=retry, timeout=timeout)
return self.state == _DONE_STATE
- def result( # type: ignore # (signature complaint)
- self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None
+ def result( # type: ignore # (incompatible with supertype)
+ self,
+ retry: Optional[retries.Retry] = DEFAULT_RETRY,
+ timeout: Optional[float] = None,
) -> "_AsyncJob":
"""Start the job and wait for it to complete and get the result.
@@ -724,8 +1044,7 @@ def result( # type: ignore # (signature complaint)
if self.state is None:
self._begin(retry=retry, timeout=timeout)
- kwargs = {} if retry is DEFAULT_RETRY else {"retry": retry}
- return super(_AsyncJob, self).result(timeout=timeout, **kwargs)
+ return super(_AsyncJob, self).result(timeout=timeout, retry=retry)
def cancelled(self):
"""Check if the job has been cancelled.
@@ -751,182 +1070,6 @@ def __repr__(self):
return result
-class _JobConfig(object):
- """Abstract base class for job configuration objects.
-
- Args:
- job_type (str): The key to use for the job configuration.
- """
-
- def __init__(self, job_type, **kwargs):
- self._job_type = job_type
- self._properties = {job_type: {}}
- for prop, val in kwargs.items():
- setattr(self, prop, val)
-
- def __setattr__(self, name, value):
- """Override to be able to raise error if an unknown property is being set"""
- if not name.startswith("_") and not hasattr(type(self), name):
- raise AttributeError(
- "Property {} is unknown for {}.".format(name, type(self))
- )
- super(_JobConfig, self).__setattr__(name, value)
-
- @property
- def labels(self):
- """Dict[str, str]: Labels for the job.
-
- This method always returns a dict. Once a job has been created on the
- server, its labels cannot be modified anymore.
-
- Raises:
- ValueError: If ``value`` type is invalid.
- """
- return self._properties.setdefault("labels", {})
-
- @labels.setter
- def labels(self, value):
- if not isinstance(value, dict):
- raise ValueError("Pass a dict")
- self._properties["labels"] = value
-
- def _get_sub_prop(self, key, default=None):
- """Get a value in the ``self._properties[self._job_type]`` dictionary.
-
- Most job properties are inside the dictionary related to the job type
- (e.g. 'copy', 'extract', 'load', 'query'). Use this method to access
- those properties::
-
- self._get_sub_prop('destinationTable')
-
- This is equivalent to using the ``_helpers._get_sub_prop`` function::
-
- _helpers._get_sub_prop(
- self._properties, ['query', 'destinationTable'])
-
- Args:
- key (str):
- Key for the value to get in the
- ``self._properties[self._job_type]`` dictionary.
- default (Optional[object]):
- Default value to return if the key is not found.
- Defaults to :data:`None`.
-
- Returns:
- object: The value if present or the default.
- """
- return _helpers._get_sub_prop(
- self._properties, [self._job_type, key], default=default
- )
-
- def _set_sub_prop(self, key, value):
- """Set a value in the ``self._properties[self._job_type]`` dictionary.
-
- Most job properties are inside the dictionary related to the job type
- (e.g. 'copy', 'extract', 'load', 'query'). Use this method to set
- those properties::
-
- self._set_sub_prop('useLegacySql', False)
-
- This is equivalent to using the ``_helper._set_sub_prop`` function::
-
- _helper._set_sub_prop(
- self._properties, ['query', 'useLegacySql'], False)
-
- Args:
- key (str):
- Key to set in the ``self._properties[self._job_type]``
- dictionary.
- value (object): Value to set.
- """
- _helpers._set_sub_prop(self._properties, [self._job_type, key], value)
-
- def _del_sub_prop(self, key):
- """Remove ``key`` from the ``self._properties[self._job_type]`` dict.
-
- Most job properties are inside the dictionary related to the job type
- (e.g. 'copy', 'extract', 'load', 'query'). Use this method to clear
- those properties::
-
- self._del_sub_prop('useLegacySql')
-
- This is equivalent to using the ``_helper._del_sub_prop`` function::
-
- _helper._del_sub_prop(
- self._properties, ['query', 'useLegacySql'])
-
- Args:
- key (str):
- Key to remove in the ``self._properties[self._job_type]``
- dictionary.
- """
- _helpers._del_sub_prop(self._properties, [self._job_type, key])
-
- def to_api_repr(self) -> dict:
- """Build an API representation of the job config.
-
- Returns:
- Dict: A dictionary in the format used by the BigQuery API.
- """
- return copy.deepcopy(self._properties)
-
- def _fill_from_default(self, default_job_config):
- """Merge this job config with a default job config.
-
- The keys in this object take precedence over the keys in the default
- config. The merge is done at the top-level as well as for keys one
- level below the job type.
-
- Args:
- default_job_config (google.cloud.bigquery.job._JobConfig):
- The default job config that will be used to fill in self.
-
- Returns:
- google.cloud.bigquery.job._JobConfig: A new (merged) job config.
- """
- if self._job_type != default_job_config._job_type:
- raise TypeError(
- "attempted to merge two incompatible job types: "
- + repr(self._job_type)
- + ", "
- + repr(default_job_config._job_type)
- )
-
- # cls is one of the job config subclasses that provides the job_type argument to
- # this base class on instantiation, thus missing-parameter warning is a false
- # positive here.
- new_job_config = self.__class__() # pytype: disable=missing-parameter
-
- default_job_properties = copy.deepcopy(default_job_config._properties)
- for key in self._properties:
- if key != self._job_type:
- default_job_properties[key] = self._properties[key]
-
- default_job_properties[self._job_type].update(self._properties[self._job_type])
- new_job_config._properties = default_job_properties
-
- return new_job_config
-
- @classmethod
- def from_api_repr(cls, resource: dict) -> "_JobConfig":
- """Factory: construct a job configuration given its API representation
-
- Args:
- resource (Dict):
- A job configuration in the same representation as is returned
- from the API.
-
- Returns:
- google.cloud.bigquery.job._JobConfig: Configuration parsed from ``resource``.
- """
- # cls is one of the job config subclasses that provides the job_type argument to
- # this base class on instantiation, thus missing-parameter warning is a false
- # positive here.
- job_config = cls() # type: ignore # pytype: disable=missing-parameter
- job_config._properties = resource
- return job_config
-
-
class ScriptStackFrame(object):
"""Stack frame showing the line/column/procedure name where the current
evaluation happened.
diff --git a/google/cloud/bigquery/job/copy_.py b/google/cloud/bigquery/job/copy_.py
index 9d7548ec5..5c52aeed6 100644
--- a/google/cloud/bigquery/job/copy_.py
+++ b/google/cloud/bigquery/job/copy_.py
@@ -14,6 +14,7 @@
"""Classes for copy jobs."""
+import typing
from typing import Optional
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
@@ -160,15 +161,13 @@ class CopyJob(_AsyncJob):
"""
_JOB_TYPE = "copy"
+ _CONFIG_CLASS = CopyJobConfig
def __init__(self, job_id, sources, destination, client, job_config=None):
super(CopyJob, self).__init__(job_id, client)
- if not job_config:
- job_config = CopyJobConfig()
-
- self._configuration = job_config
- self._properties["configuration"] = job_config._properties
+ if job_config is not None:
+ self._properties["configuration"] = job_config._properties
if destination:
_helpers._set_sub_prop(
@@ -185,6 +184,11 @@ def __init__(self, job_id, sources, destination, client, job_config=None):
source_resources,
)
+ @property
+ def configuration(self) -> CopyJobConfig:
+ """The configuration for this copy job."""
+ return typing.cast(CopyJobConfig, super().configuration)
+
@property
def destination(self):
"""google.cloud.bigquery.table.TableReference: Table into which data
@@ -223,14 +227,14 @@ def create_disposition(self):
"""See
:attr:`google.cloud.bigquery.job.CopyJobConfig.create_disposition`.
"""
- return self._configuration.create_disposition
+ return self.configuration.create_disposition
@property
def write_disposition(self):
"""See
:attr:`google.cloud.bigquery.job.CopyJobConfig.write_disposition`.
"""
- return self._configuration.write_disposition
+ return self.configuration.write_disposition
@property
def destination_encryption_configuration(self):
@@ -243,7 +247,7 @@ def destination_encryption_configuration(self):
See
:attr:`google.cloud.bigquery.job.CopyJobConfig.destination_encryption_configuration`.
"""
- return self._configuration.destination_encryption_configuration
+ return self.configuration.destination_encryption_configuration
def to_api_repr(self):
"""Generate a resource for :meth:`_begin`."""
diff --git a/google/cloud/bigquery/job/extract.py b/google/cloud/bigquery/job/extract.py
index 52aa036c9..64ec39b76 100644
--- a/google/cloud/bigquery/job/extract.py
+++ b/google/cloud/bigquery/job/extract.py
@@ -14,6 +14,8 @@
"""Classes for extract (export) jobs."""
+import typing
+
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.model import ModelReference
from google.cloud.bigquery.table import Table
@@ -125,15 +127,13 @@ class ExtractJob(_AsyncJob):
"""
_JOB_TYPE = "extract"
+ _CONFIG_CLASS = ExtractJobConfig
def __init__(self, job_id, source, destination_uris, client, job_config=None):
super(ExtractJob, self).__init__(job_id, client)
- if job_config is None:
- job_config = ExtractJobConfig()
-
- self._properties["configuration"] = job_config._properties
- self._configuration = job_config
+ if job_config is not None:
+ self._properties["configuration"] = job_config._properties
if source:
source_ref = {"projectId": source.project, "datasetId": source.dataset_id}
@@ -156,6 +156,11 @@ def __init__(self, job_id, source, destination_uris, client, job_config=None):
destination_uris,
)
+ @property
+ def configuration(self) -> ExtractJobConfig:
+ """The configuration for this extract job."""
+ return typing.cast(ExtractJobConfig, super().configuration)
+
@property
def source(self):
"""Union[ \
@@ -189,28 +194,28 @@ def compression(self):
"""See
:attr:`google.cloud.bigquery.job.ExtractJobConfig.compression`.
"""
- return self._configuration.compression
+ return self.configuration.compression
@property
def destination_format(self):
"""See
:attr:`google.cloud.bigquery.job.ExtractJobConfig.destination_format`.
"""
- return self._configuration.destination_format
+ return self.configuration.destination_format
@property
def field_delimiter(self):
"""See
:attr:`google.cloud.bigquery.job.ExtractJobConfig.field_delimiter`.
"""
- return self._configuration.field_delimiter
+ return self.configuration.field_delimiter
@property
def print_header(self):
"""See
:attr:`google.cloud.bigquery.job.ExtractJobConfig.print_header`.
"""
- return self._configuration.print_header
+ return self.configuration.print_header
@property
def destination_uri_file_counts(self):
diff --git a/google/cloud/bigquery/job/load.py b/google/cloud/bigquery/job/load.py
index e4b44395e..8cdb779ac 100644
--- a/google/cloud/bigquery/job/load.py
+++ b/google/cloud/bigquery/job/load.py
@@ -14,9 +14,11 @@
"""Classes for load jobs."""
-from typing import FrozenSet, List, Iterable, Optional
+import typing
+from typing import FrozenSet, List, Iterable, Optional, Union
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
+from google.cloud.bigquery.enums import SourceColumnMatch
from google.cloud.bigquery.external_config import HivePartitioningOptions
from google.cloud.bigquery.format_options import ParquetOptions
from google.cloud.bigquery import _helpers
@@ -28,6 +30,27 @@
from google.cloud.bigquery.job.base import _AsyncJob
from google.cloud.bigquery.job.base import _JobConfig
from google.cloud.bigquery.job.base import _JobReference
+from google.cloud.bigquery.query import ConnectionProperty
+
+
+class ColumnNameCharacterMap:
+ """Indicates the character map used for column names.
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#columnnamecharactermap
+ """
+
+ COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED = "COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED"
+ """Unspecified column name character map."""
+
+ STRICT = "STRICT"
+ """Support flexible column name and reject invalid column names."""
+
+ V1 = "V1"
+ """ Support alphanumeric + underscore characters and names must start with
+ a letter or underscore. Invalid column names will be normalized."""
+
+ V2 = "V2"
+ """Support flexible column name. Invalid column names will be normalized."""
class LoadJobConfig(_JobConfig):
@@ -120,6 +143,25 @@ def clustering_fields(self, value):
else:
self._del_sub_prop("clustering")
+ @property
+ def connection_properties(self) -> List[ConnectionProperty]:
+ """Connection properties.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.connection_properties
+
+ .. versionadded:: 3.7.0
+ """
+ resource = self._get_sub_prop("connectionProperties", [])
+ return [ConnectionProperty.from_api_repr(prop) for prop in resource]
+
+ @connection_properties.setter
+ def connection_properties(self, value: Iterable[ConnectionProperty]):
+ self._set_sub_prop(
+ "connectionProperties",
+ [prop.to_api_repr() for prop in value],
+ )
+
@property
def create_disposition(self):
"""Optional[google.cloud.bigquery.job.CreateDisposition]: Specifies behavior
@@ -134,6 +176,27 @@ def create_disposition(self):
def create_disposition(self, value):
self._set_sub_prop("createDisposition", value)
+ @property
+ def create_session(self) -> Optional[bool]:
+ """[Preview] If :data:`True`, creates a new session, where
+ :attr:`~google.cloud.bigquery.job.LoadJob.session_info` will contain a
+ random server generated session id.
+
+ If :data:`False`, runs load job with an existing ``session_id`` passed in
+ :attr:`~google.cloud.bigquery.job.LoadJobConfig.connection_properties`,
+ otherwise runs load job in non-session mode.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.create_session
+
+ .. versionadded:: 3.7.0
+ """
+ return self._get_sub_prop("createSession")
+
+ @create_session.setter
+ def create_session(self, value: Optional[bool]):
+ self._set_sub_prop("createSession", value)
+
@property
def decimal_target_types(self) -> Optional[FrozenSet[str]]:
"""Possible SQL data types to which the source decimal values are converted.
@@ -285,6 +348,19 @@ def ignore_unknown_values(self):
def ignore_unknown_values(self, value):
self._set_sub_prop("ignoreUnknownValues", value)
+ @property
+ def json_extension(self):
+ """Optional[str]: The extension to use for writing JSON data to BigQuery. Only supports GeoJSON currently.
+
+ See: https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.json_extension
+
+ """
+ return self._get_sub_prop("jsonExtension")
+
+ @json_extension.setter
+ def json_extension(self, value):
+ self._set_sub_prop("jsonExtension", value)
+
@property
def max_bad_records(self):
"""Optional[int]: Number of invalid rows to ignore.
@@ -311,6 +387,40 @@ def null_marker(self):
def null_marker(self, value):
self._set_sub_prop("nullMarker", value)
+ @property
+ def null_markers(self) -> Optional[List[str]]:
+ """Optional[List[str]]: A list of strings represented as SQL NULL values in a CSV file.
+
+ .. note::
+ null_marker and null_markers can't be set at the same time.
+ If null_marker is set, null_markers has to be not set.
+ If null_markers is set, null_marker has to be not set.
+ If both null_marker and null_markers are set at the same time, a user error would be thrown.
+ Any strings listed in null_markers, including empty string would be interpreted as SQL NULL.
+ This applies to all column types.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.null_markers
+ """
+ return self._get_sub_prop("nullMarkers")
+
+ @null_markers.setter
+ def null_markers(self, value: Optional[List[str]]):
+ self._set_sub_prop("nullMarkers", value)
+
+ @property
+ def preserve_ascii_control_characters(self):
+ """Optional[bool]: Preserves the embedded ASCII control characters when sourceFormat is set to CSV.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.preserve_ascii_control_characters
+ """
+ return self._get_sub_prop("preserveAsciiControlCharacters")
+
+ @preserve_ascii_control_characters.setter
+ def preserve_ascii_control_characters(self, value):
+ self._set_sub_prop("preserveAsciiControlCharacters", bool(value))
+
@property
def projection_fields(self) -> Optional[List[str]]:
"""Optional[List[str]]: If
@@ -379,6 +489,20 @@ def range_partitioning(self, value):
)
self._set_sub_prop("rangePartitioning", resource)
+ @property
+ def reference_file_schema_uri(self):
+ """Optional[str]:
+ When creating an external table, the user can provide a reference file with the
+ table schema. This is enabled for the following formats:
+
+ AVRO, PARQUET, ORC
+ """
+ return self._get_sub_prop("referenceFileSchemaUri")
+
+ @reference_file_schema_uri.setter
+ def reference_file_schema_uri(self, value):
+ return self._set_sub_prop("referenceFileSchemaUri", value)
+
@property
def schema(self):
"""Optional[Sequence[Union[ \
@@ -446,6 +570,105 @@ def source_format(self):
def source_format(self, value):
self._set_sub_prop("sourceFormat", value)
+ @property
+ def source_column_match(self) -> Optional[SourceColumnMatch]:
+ """Optional[google.cloud.bigquery.enums.SourceColumnMatch]: Controls the
+ strategy used to match loaded columns to the schema. If not set, a sensible
+ default is chosen based on how the schema is provided. If autodetect is
+ used, then columns are matched by name. Otherwise, columns are matched by
+ position. This is done to keep the behavior backward-compatible.
+
+ Acceptable values are:
+
+ SOURCE_COLUMN_MATCH_UNSPECIFIED: Unspecified column name match option.
+ POSITION: matches by position. This assumes that the columns are ordered
+ the same way as the schema.
+ NAME: matches by name. This reads the header row as column names and
+ reorders columns to match the field names in the schema.
+
+ See:
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.source_column_match
+ """
+ value = self._get_sub_prop("sourceColumnMatch")
+ return SourceColumnMatch(value) if value is not None else None
+
+ @source_column_match.setter
+ def source_column_match(self, value: Union[SourceColumnMatch, str, None]):
+ if value is not None and not isinstance(value, (SourceColumnMatch, str)):
+ raise TypeError(
+ "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None"
+ )
+ if isinstance(value, SourceColumnMatch):
+ value = value.value
+ self._set_sub_prop("sourceColumnMatch", value if value else None)
+
+ @property
+ def date_format(self) -> Optional[str]:
+ """Optional[str]: Date format used for parsing DATE values.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.date_format
+ """
+ return self._get_sub_prop("dateFormat")
+
+ @date_format.setter
+ def date_format(self, value: Optional[str]):
+ self._set_sub_prop("dateFormat", value)
+
+ @property
+ def datetime_format(self) -> Optional[str]:
+ """Optional[str]: Date format used for parsing DATETIME values.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.datetime_format
+ """
+ return self._get_sub_prop("datetimeFormat")
+
+ @datetime_format.setter
+ def datetime_format(self, value: Optional[str]):
+ self._set_sub_prop("datetimeFormat", value)
+
+ @property
+ def time_zone(self) -> Optional[str]:
+ """Optional[str]: Default time zone that will apply when parsing timestamp
+ values that have no specific time zone.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_zone
+ """
+ return self._get_sub_prop("timeZone")
+
+ @time_zone.setter
+ def time_zone(self, value: Optional[str]):
+ self._set_sub_prop("timeZone", value)
+
+ @property
+ def time_format(self) -> Optional[str]:
+ """Optional[str]: Date format used for parsing TIME values.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.time_format
+ """
+ return self._get_sub_prop("timeFormat")
+
+ @time_format.setter
+ def time_format(self, value: Optional[str]):
+ self._set_sub_prop("timeFormat", value)
+
+ @property
+ def timestamp_format(self) -> Optional[str]:
+ """Optional[str]: Date format used for parsing TIMESTAMP values.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.timestamp_format
+ """
+ return self._get_sub_prop("timestampFormat")
+
+ @timestamp_format.setter
+ def timestamp_format(self, value: Optional[str]):
+ self._set_sub_prop("timestampFormat", value)
+
@property
def time_partitioning(self):
"""Optional[google.cloud.bigquery.table.TimePartitioning]: Specifies time-based
@@ -515,6 +738,27 @@ def parquet_options(self, value):
else:
self._del_sub_prop("parquetOptions")
+ @property
+ def column_name_character_map(self) -> str:
+ """Optional[google.cloud.bigquery.job.ColumnNameCharacterMap]:
+ Character map supported for column names in CSV/Parquet loads. Defaults
+ to STRICT and can be overridden by Project Config Service. Using this
+ option with unsupported load formats will result in an error.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationLoad.FIELDS.column_name_character_map
+ """
+ return self._get_sub_prop(
+ "columnNameCharacterMap",
+ ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
+ )
+
+ @column_name_character_map.setter
+ def column_name_character_map(self, value: Optional[str]):
+ if value is None:
+ value = ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED
+ self._set_sub_prop("columnNameCharacterMap", value)
+
class LoadJob(_AsyncJob):
"""Asynchronous job for loading data into a table.
@@ -537,15 +781,13 @@ class LoadJob(_AsyncJob):
"""
_JOB_TYPE = "load"
+ _CONFIG_CLASS = LoadJobConfig
def __init__(self, job_id, source_uris, destination, client, job_config=None):
super(LoadJob, self).__init__(job_id, client)
- if not job_config:
- job_config = LoadJobConfig()
-
- self._configuration = job_config
- self._properties["configuration"] = job_config._properties
+ if job_config is not None:
+ self._properties["configuration"] = job_config._properties
if source_uris is not None:
_helpers._set_sub_prop(
@@ -559,6 +801,11 @@ def __init__(self, job_id, source_uris, destination, client, job_config=None):
destination.to_api_repr(),
)
+ @property
+ def configuration(self) -> LoadJobConfig:
+ """The configuration for this load job."""
+ return typing.cast(LoadJobConfig, super().configuration)
+
@property
def destination(self):
"""google.cloud.bigquery.table.TableReference: table where loaded rows are written
@@ -586,98 +833,130 @@ def allow_jagged_rows(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.allow_jagged_rows`.
"""
- return self._configuration.allow_jagged_rows
+ return self.configuration.allow_jagged_rows
@property
def allow_quoted_newlines(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.allow_quoted_newlines`.
"""
- return self._configuration.allow_quoted_newlines
+ return self.configuration.allow_quoted_newlines
@property
def autodetect(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.autodetect`.
"""
- return self._configuration.autodetect
+ return self.configuration.autodetect
+
+ @property
+ def connection_properties(self) -> List[ConnectionProperty]:
+ """See
+ :attr:`google.cloud.bigquery.job.LoadJobConfig.connection_properties`.
+
+ .. versionadded:: 3.7.0
+ """
+ return self.configuration.connection_properties
@property
def create_disposition(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.create_disposition`.
"""
- return self._configuration.create_disposition
+ return self.configuration.create_disposition
+
+ @property
+ def create_session(self) -> Optional[bool]:
+ """See
+ :attr:`google.cloud.bigquery.job.LoadJobConfig.create_session`.
+
+ .. versionadded:: 3.7.0
+ """
+ return self.configuration.create_session
@property
def encoding(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.encoding`.
"""
- return self._configuration.encoding
+ return self.configuration.encoding
@property
def field_delimiter(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.field_delimiter`.
"""
- return self._configuration.field_delimiter
+ return self.configuration.field_delimiter
@property
def ignore_unknown_values(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.ignore_unknown_values`.
"""
- return self._configuration.ignore_unknown_values
+ return self.configuration.ignore_unknown_values
@property
def max_bad_records(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.max_bad_records`.
"""
- return self._configuration.max_bad_records
+ return self.configuration.max_bad_records
@property
def null_marker(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.null_marker`.
"""
- return self._configuration.null_marker
+ return self.configuration.null_marker
+
+ @property
+ def null_markers(self):
+ """See
+ :attr:`google.cloud.bigquery.job.LoadJobConfig.null_markers`.
+ """
+ return self.configuration.null_markers
@property
def quote_character(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.quote_character`.
"""
- return self._configuration.quote_character
+ return self.configuration.quote_character
+
+ @property
+ def reference_file_schema_uri(self):
+ """See:
+ attr:`google.cloud.bigquery.job.LoadJobConfig.reference_file_schema_uri`.
+ """
+ return self.configuration.reference_file_schema_uri
@property
def skip_leading_rows(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.skip_leading_rows`.
"""
- return self._configuration.skip_leading_rows
+ return self.configuration.skip_leading_rows
@property
def source_format(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.source_format`.
"""
- return self._configuration.source_format
+ return self.configuration.source_format
@property
def write_disposition(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.write_disposition`.
"""
- return self._configuration.write_disposition
+ return self.configuration.write_disposition
@property
def schema(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.schema`.
"""
- return self._configuration.schema
+ return self.configuration.schema
@property
def destination_encryption_configuration(self):
@@ -690,7 +969,7 @@ def destination_encryption_configuration(self):
See
:attr:`google.cloud.bigquery.job.LoadJobConfig.destination_encryption_configuration`.
"""
- return self._configuration.destination_encryption_configuration
+ return self.configuration.destination_encryption_configuration
@property
def destination_table_description(self):
@@ -699,7 +978,7 @@ def destination_table_description(self):
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.description
"""
- return self._configuration.destination_table_description
+ return self.configuration.destination_table_description
@property
def destination_table_friendly_name(self):
@@ -708,42 +987,84 @@ def destination_table_friendly_name(self):
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#DestinationTableProperties.FIELDS.friendly_name
"""
- return self._configuration.destination_table_friendly_name
+ return self.configuration.destination_table_friendly_name
@property
def range_partitioning(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.range_partitioning`.
"""
- return self._configuration.range_partitioning
+ return self.configuration.range_partitioning
@property
def time_partitioning(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.time_partitioning`.
"""
- return self._configuration.time_partitioning
+ return self.configuration.time_partitioning
@property
def use_avro_logical_types(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.use_avro_logical_types`.
"""
- return self._configuration.use_avro_logical_types
+ return self.configuration.use_avro_logical_types
@property
def clustering_fields(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.clustering_fields`.
"""
- return self._configuration.clustering_fields
+ return self.configuration.clustering_fields
+
+ @property
+ def source_column_match(self) -> Optional[SourceColumnMatch]:
+ """See
+ :attr:`google.cloud.bigquery.job.LoadJobConfig.source_column_match`.
+ """
+ return self.configuration.source_column_match
+
+ @property
+ def date_format(self):
+ """See
+ :attr:`google.cloud.bigquery.job.LoadJobConfig.date_format`.
+ """
+ return self.configuration.date_format
+
+ @property
+ def datetime_format(self):
+ """See
+ :attr:`google.cloud.bigquery.job.LoadJobConfig.datetime_format`.
+ """
+ return self.configuration.datetime_format
+
+ @property
+ def time_zone(self):
+ """See
+ :attr:`google.cloud.bigquery.job.LoadJobConfig.time_zone`.
+ """
+ return self.configuration.time_zone
+
+ @property
+ def time_format(self):
+ """See
+ :attr:`google.cloud.bigquery.job.LoadJobConfig.time_format`.
+ """
+ return self.configuration.time_format
+
+ @property
+ def timestamp_format(self):
+ """See
+ :attr:`google.cloud.bigquery.job.LoadJobConfig.timestamp_format`.
+ """
+ return self.configuration.timestamp_format
@property
def schema_update_options(self):
"""See
:attr:`google.cloud.bigquery.job.LoadJobConfig.schema_update_options`.
"""
- return self._configuration.schema_update_options
+ return self.configuration.schema_update_options
@property
def input_file_bytes(self):
diff --git a/google/cloud/bigquery/job/query.py b/google/cloud/bigquery/job/query.py
index c2d304e30..38b8a7148 100644
--- a/google/cloud/bigquery/job/query.py
+++ b/google/cloud/bigquery/job/query.py
@@ -17,18 +17,19 @@
import concurrent.futures
import copy
import re
+import time
import typing
from typing import Any, Dict, Iterable, List, Optional, Union
from google.api_core import exceptions
-from google.api_core.future import polling as polling_future
+from google.api_core import retry as retries
import requests
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetListItem
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
-from google.cloud.bigquery.enums import KeyResultStatementKind
+from google.cloud.bigquery.enums import KeyResultStatementKind, DefaultPandasDTypes
from google.cloud.bigquery.external_config import ExternalConfig
from google.cloud.bigquery import _helpers
from google.cloud.bigquery.query import (
@@ -39,7 +40,11 @@
StructQueryParameter,
UDFResource,
)
-from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_JOB_RETRY
+from google.cloud.bigquery.retry import (
+ DEFAULT_RETRY,
+ DEFAULT_JOB_RETRY,
+ POLLING_DEFAULT_VALUE,
+)
from google.cloud.bigquery.routine import RoutineReference
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import _EmptyRowIterator
@@ -53,13 +58,17 @@
from google.cloud.bigquery.job.base import _JobConfig
from google.cloud.bigquery.job.base import _JobReference
+try:
+ import pandas # type: ignore
+except ImportError:
+ pandas = None
+
if typing.TYPE_CHECKING: # pragma: NO COVER
# Assumption: type checks are only used by library developers and CI environments
# that have all optional dependencies installed, thus no conditional imports.
import pandas # type: ignore
import geopandas # type: ignore
import pyarrow # type: ignore
- from google.api_core import retry as retries
from google.cloud import bigquery_storage
from google.cloud.bigquery.client import Client
from google.cloud.bigquery.table import RowIterator
@@ -188,6 +197,119 @@ def from_api_repr(cls, stats: Dict[str, str]) -> "DmlStats":
return cls(*args)
+class IncrementalResultStats:
+ """IncrementalResultStats provides information about incremental query execution."""
+
+ def __init__(self):
+ self._properties = {}
+
+ @classmethod
+ def from_api_repr(cls, resource) -> "IncrementalResultStats":
+ """Factory: construct instance from the JSON repr.
+
+ Args:
+ resource(Dict[str: object]):
+ IncrementalResultStats representation returned from API.
+
+ Returns:
+ google.cloud.bigquery.job.IncrementalResultStats:
+ stats parsed from ``resource``.
+ """
+ entry = cls()
+ entry._properties = resource
+ return entry
+
+ @property
+ def disabled_reason(self):
+ """Optional[string]: Reason why incremental results were not
+ written by the query.
+ """
+ return _helpers._str_or_none(self._properties.get("disabledReason"))
+
+ @property
+ def result_set_last_replace_time(self):
+ """Optional[datetime]: The time at which the result table's contents
+ were completely replaced. May be absent if no results have been written
+ or the query has completed."""
+ from google.cloud._helpers import _rfc3339_nanos_to_datetime
+
+ value = self._properties.get("resultSetLastReplaceTime")
+ if value:
+ try:
+ return _rfc3339_nanos_to_datetime(value)
+ except ValueError:
+ pass
+ return None
+
+ @property
+ def result_set_last_modify_time(self):
+ """Optional[datetime]: The time at which the result table's contents
+ were modified. May be absent if no results have been written or the
+ query has completed."""
+ from google.cloud._helpers import _rfc3339_nanos_to_datetime
+
+ value = self._properties.get("resultSetLastModifyTime")
+ if value:
+ try:
+ return _rfc3339_nanos_to_datetime(value)
+ except ValueError:
+ pass
+ return None
+
+
+class IndexUnusedReason(typing.NamedTuple):
+ """Reason about why no search index was used in the search query (or sub-query).
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#indexunusedreason
+ """
+
+ code: Optional[str] = None
+ """Specifies the high-level reason for the scenario when no search index was used.
+ """
+
+ message: Optional[str] = None
+ """Free form human-readable reason for the scenario when no search index was used.
+ """
+
+ baseTable: Optional[TableReference] = None
+ """Specifies the base table involved in the reason that no search index was used.
+ """
+
+ indexName: Optional[str] = None
+ """Specifies the name of the unused search index, if available."""
+
+ @classmethod
+ def from_api_repr(cls, reason):
+ code = reason.get("code")
+ message = reason.get("message")
+ baseTable = reason.get("baseTable")
+ indexName = reason.get("indexName")
+
+ return cls(code, message, baseTable, indexName)
+
+
+class SearchStats(typing.NamedTuple):
+ """Statistics related to Search Queries. Populated as part of JobStatistics2.
+
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#searchstatistics
+ """
+
+ mode: Optional[str] = None
+ """Indicates the type of search index usage in the entire search query."""
+
+ reason: List[IndexUnusedReason] = []
+ """Reason about why no search index was used in the search query (or sub-query)"""
+
+ @classmethod
+ def from_api_repr(cls, stats: Dict[str, Any]):
+ mode = stats.get("indexUsageMode", None)
+ reason = [
+ IndexUnusedReason.from_api_repr(r)
+ for r in stats.get("indexUnusedReasons", [])
+ ]
+ return cls(mode, reason)
+
+
class ScriptOptions:
"""Options controlling the execution of scripts.
@@ -414,6 +536,11 @@ def destination(self):
ID, each separated by ``.``. For example:
``your-project.your_dataset.your_table``.
+ .. note::
+
+ Only table ID is passed to the backend, so any configuration
+ in `~google.cloud.bigquery.table.Table` is discarded.
+
See
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobConfigurationQuery.FIELDS.destination_table
"""
@@ -607,6 +734,21 @@ def write_disposition(self):
def write_disposition(self, value):
self._set_sub_prop("writeDisposition", value)
+ @property
+ def write_incremental_results(self) -> Optional[bool]:
+ """This is only supported for a SELECT query using a temporary table.
+
+ If set, the query is allowed to write results incrementally to the temporary result
+ table. This may incur a performance penalty. This option cannot be used with Legacy SQL.
+
+ This feature is not generally available.
+ """
+ return self._get_sub_prop("writeIncrementalResults")
+
+ @write_incremental_results.setter
+ def write_incremental_results(self, value):
+ self._set_sub_prop("writeIncrementalResults", value)
+
@property
def table_definitions(self):
"""Dict[str, google.cloud.bigquery.external_config.ExternalConfig]:
@@ -714,10 +856,9 @@ def to_api_repr(self) -> dict:
Dict: A dictionary in the format used by the BigQuery API.
"""
resource = copy.deepcopy(self._properties)
-
# Query parameters have an addition property associated with them
# to indicate if the query is using named or positional parameters.
- query_parameters = resource["query"].get("queryParameters")
+ query_parameters = resource.get("query", {}).get("queryParameters")
if query_parameters:
if query_parameters[0].get("name") is None:
resource["query"]["parameterMode"] = "POSITIONAL"
@@ -745,23 +886,20 @@ class QueryJob(_AsyncJob):
_JOB_TYPE = "query"
_UDF_KEY = "userDefinedFunctionResources"
+ _CONFIG_CLASS = QueryJobConfig
def __init__(self, job_id, query, client, job_config=None):
super(QueryJob, self).__init__(job_id, client)
- if job_config is None:
- job_config = QueryJobConfig()
- if job_config.use_legacy_sql is None:
- job_config.use_legacy_sql = False
-
- self._properties["configuration"] = job_config._properties
- self._configuration = job_config
+ if job_config is not None:
+ self._properties["configuration"] = job_config._properties
+ if self.configuration.use_legacy_sql is None:
+ self.configuration.use_legacy_sql = False
if query:
_helpers._set_sub_prop(
self._properties, ["configuration", "query", "query"], query
)
-
self._query_results = None
self._done_timeout = None
self._transport_timeout = None
@@ -771,7 +909,12 @@ def allow_large_results(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.allow_large_results`.
"""
- return self._configuration.allow_large_results
+ return self.configuration.allow_large_results
+
+ @property
+ def configuration(self) -> QueryJobConfig:
+ """The configuration for this query job."""
+ return typing.cast(QueryJobConfig, super().configuration)
@property
def connection_properties(self) -> List[ConnectionProperty]:
@@ -780,14 +923,14 @@ def connection_properties(self) -> List[ConnectionProperty]:
.. versionadded:: 2.29.0
"""
- return self._configuration.connection_properties
+ return self.configuration.connection_properties
@property
def create_disposition(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.create_disposition`.
"""
- return self._configuration.create_disposition
+ return self.configuration.create_disposition
@property
def create_session(self) -> Optional[bool]:
@@ -796,21 +939,21 @@ def create_session(self) -> Optional[bool]:
.. versionadded:: 2.29.0
"""
- return self._configuration.create_session
+ return self.configuration.create_session
@property
def default_dataset(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.default_dataset`.
"""
- return self._configuration.default_dataset
+ return self.configuration.default_dataset
@property
def destination(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.destination`.
"""
- return self._configuration.destination
+ return self.configuration.destination
@property
def destination_encryption_configuration(self):
@@ -823,28 +966,37 @@ def destination_encryption_configuration(self):
See
:attr:`google.cloud.bigquery.job.QueryJobConfig.destination_encryption_configuration`.
"""
- return self._configuration.destination_encryption_configuration
+ return self.configuration.destination_encryption_configuration
@property
def dry_run(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.dry_run`.
"""
- return self._configuration.dry_run
+ return self.configuration.dry_run
@property
def flatten_results(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.flatten_results`.
"""
- return self._configuration.flatten_results
+ return self.configuration.flatten_results
@property
def priority(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.priority`.
"""
- return self._configuration.priority
+ return self.configuration.priority
+
+ @property
+ def search_stats(self) -> Optional[SearchStats]:
+ """Returns a SearchStats object."""
+
+ stats = self._job_statistics().get("searchStatistics")
+ if stats is not None:
+ return SearchStats.from_api_repr(stats)
+ return None
@property
def query(self):
@@ -857,95 +1009,104 @@ def query(self):
self._properties, ["configuration", "query", "query"]
)
+ @property
+ def query_id(self) -> Optional[str]:
+ """[Preview] ID of a completed query.
+
+ This ID is auto-generated and not guaranteed to be populated.
+ """
+ query_results = self._query_results
+ return query_results.query_id if query_results is not None else None
+
@property
def query_parameters(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.query_parameters`.
"""
- return self._configuration.query_parameters
+ return self.configuration.query_parameters
@property
def udf_resources(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.udf_resources`.
"""
- return self._configuration.udf_resources
+ return self.configuration.udf_resources
@property
def use_legacy_sql(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.use_legacy_sql`.
"""
- return self._configuration.use_legacy_sql
+ return self.configuration.use_legacy_sql
@property
def use_query_cache(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.use_query_cache`.
"""
- return self._configuration.use_query_cache
+ return self.configuration.use_query_cache
@property
def write_disposition(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.write_disposition`.
"""
- return self._configuration.write_disposition
+ return self.configuration.write_disposition
@property
def maximum_billing_tier(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_billing_tier`.
"""
- return self._configuration.maximum_billing_tier
+ return self.configuration.maximum_billing_tier
@property
def maximum_bytes_billed(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.maximum_bytes_billed`.
"""
- return self._configuration.maximum_bytes_billed
+ return self.configuration.maximum_bytes_billed
@property
def range_partitioning(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.range_partitioning`.
"""
- return self._configuration.range_partitioning
+ return self.configuration.range_partitioning
@property
def table_definitions(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.table_definitions`.
"""
- return self._configuration.table_definitions
+ return self.configuration.table_definitions
@property
def time_partitioning(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.time_partitioning`.
"""
- return self._configuration.time_partitioning
+ return self.configuration.time_partitioning
@property
def clustering_fields(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.clustering_fields`.
"""
- return self._configuration.clustering_fields
+ return self.configuration.clustering_fields
@property
def schema_update_options(self):
"""See
:attr:`google.cloud.bigquery.job.QueryJobConfig.schema_update_options`.
"""
- return self._configuration.schema_update_options
+ return self.configuration.schema_update_options
def to_api_repr(self):
"""Generate a resource for :meth:`_begin`."""
# Use to_api_repr to allow for some configuration properties to be set
# automatically.
- configuration = self._configuration.to_api_repr()
+ configuration = self.configuration.to_api_repr()
return {
"jobReference": self._properties["jobReference"],
"configuration": configuration,
@@ -1158,7 +1319,6 @@ def referenced_tables(self):
datasets_by_project_name = {}
for table in self._job_statistics().get("referencedTables", ()):
-
t_project = table["projectId"]
ds_id = table["datasetId"]
@@ -1239,6 +1399,13 @@ def bi_engine_stats(self) -> Optional[BiEngineStats]:
else:
return BiEngineStats.from_api_repr(stats)
+ @property
+ def incremental_result_stats(self) -> Optional[IncrementalResultStats]:
+ stats = self._job_statistics().get("incrementalResultStats")
+ if stats is None:
+ return None
+ return IncrementalResultStats.from_api_repr(stats)
+
def _blocking_poll(self, timeout=None, **kwargs):
self._done_timeout = timeout
self._transport_timeout = timeout
@@ -1257,7 +1424,7 @@ def _format_for_exception(message: str, query: str):
"""
template = "{message}\n\n{header}\n\n{ruler}\n{body}\n{ruler}"
- lines = query.splitlines()
+ lines = query.splitlines() if query is not None else [""]
max_line_len = max(len(line) for line in lines)
header = "-----Query Job SQL Follows-----"
@@ -1305,9 +1472,13 @@ def _begin(self, client=None, retry=DEFAULT_RETRY, timeout=None):
raise
def _reload_query_results(
- self, retry: "retries.Retry" = DEFAULT_RETRY, timeout: float = None
+ self,
+ retry: "retries.Retry" = DEFAULT_RETRY,
+ timeout: Optional[float] = None,
+ page_size: int = 0,
+ start_index: Optional[int] = None,
):
- """Refresh the cached query results.
+ """Refresh the cached query results unless already cached and complete.
Args:
retry (Optional[google.api_core.retry.Retry]):
@@ -1315,7 +1486,15 @@ def _reload_query_results(
timeout (Optional[float]):
The number of seconds to wait for the underlying HTTP transport
before using ``retry``.
+ page_size (int):
+ Maximum number of rows in a single response. See maxResults in
+ the jobs.getQueryResults REST API.
+ start_index (Optional[int]):
+ Zero-based index of the starting row. See startIndex in the
+ jobs.getQueryResults REST API.
"""
+ # Optimization: avoid a call to jobs.getQueryResults if it's already
+ # been fetched, e.g. from jobs.query first page of results.
if self._query_results and self._query_results.complete:
return
@@ -1324,7 +1503,16 @@ def _reload_query_results(
# the timeout from the futures API is respected. See:
# https://github.com/GoogleCloudPlatform/google-cloud-python/issues/4135
timeout_ms = None
- if self._done_timeout is not None:
+
+ # Python_API_core, as part of a major rewrite of the deadline, timeout,
+ # retry process sets the timeout value as a Python object().
+ # Our system does not natively handle that and instead expects
+ # either None or a numeric value. If passed a Python object, convert to
+ # None.
+ if type(self._done_timeout) is object: # pragma: NO COVER
+ self._done_timeout = None
+
+ if self._done_timeout is not None: # pragma: NO COVER
# Subtract a buffer for context switching, network latency, etc.
api_timeout = self._done_timeout - _TIMEOUT_BUFFER_SECS
api_timeout = max(min(api_timeout, 10), 0)
@@ -1334,7 +1522,14 @@ def _reload_query_results(
# If an explicit timeout is not given, fall back to the transport timeout
# stored in _blocking_poll() in the process of polling for job completion.
- transport_timeout = timeout if timeout is not None else self._transport_timeout
+ if timeout is not None:
+ transport_timeout = timeout
+ else:
+ transport_timeout = self._transport_timeout
+
+ # Handle PollingJob._DEFAULT_VALUE.
+ if not isinstance(transport_timeout, (float, int)):
+ transport_timeout = None
self._query_results = self._client._get_query_results(
self.job_id,
@@ -1343,50 +1538,18 @@ def _reload_query_results(
timeout_ms=timeout_ms,
location=self.location,
timeout=transport_timeout,
+ page_size=page_size,
+ start_index=start_index,
)
- def _done_or_raise(self, retry=DEFAULT_RETRY, timeout=None):
- """Check if the query has finished running and raise if it's not.
-
- If the query has finished, also reload the job itself.
- """
- # If an explicit timeout is not given, fall back to the transport timeout
- # stored in _blocking_poll() in the process of polling for job completion.
- transport_timeout = timeout if timeout is not None else self._transport_timeout
-
- try:
- self._reload_query_results(retry=retry, timeout=transport_timeout)
- except exceptions.GoogleAPIError as exc:
- # Reloading also updates error details on self, thus no need for an
- # explicit self.set_exception() call if reloading succeeds.
- try:
- self.reload(retry=retry, timeout=transport_timeout)
- except exceptions.GoogleAPIError:
- # Use the query results reload exception, as it generally contains
- # much more useful error information.
- self.set_exception(exc)
- finally:
- return
-
- # Only reload the job once we know the query is complete.
- # This will ensure that fields such as the destination table are
- # correctly populated.
- if not self._query_results.complete:
- raise polling_future._OperationNotComplete()
- else:
- try:
- self.reload(retry=retry, timeout=transport_timeout)
- except exceptions.GoogleAPIError as exc:
- self.set_exception(exc)
-
- def result( # type: ignore # (complaints about the overloaded signature)
+ def result( # type: ignore # (incompatible with supertype)
self,
- page_size: int = None,
- max_results: int = None,
- retry: "retries.Retry" = DEFAULT_RETRY,
- timeout: float = None,
- start_index: int = None,
- job_retry: "retries.Retry" = DEFAULT_JOB_RETRY,
+ page_size: Optional[int] = None,
+ max_results: Optional[int] = None,
+ retry: Optional[retries.Retry] = DEFAULT_RETRY,
+ timeout: Optional[Union[float, object]] = POLLING_DEFAULT_VALUE,
+ start_index: Optional[int] = None,
+ job_retry: Optional[retries.Retry] = DEFAULT_JOB_RETRY,
) -> Union["RowIterator", _EmptyRowIterator]:
"""Start the job and wait for it to complete and get the result.
@@ -1404,11 +1567,14 @@ def result( # type: ignore # (complaints about the overloaded signature)
is ``DONE``, retrying is aborted early even if the
results are not available, as this will not change
anymore.
- timeout (Optional[float]):
+ timeout (Optional[Union[float, \
+ google.api_core.future.polling.PollingFuture._DEFAULT_VALUE, \
+ ]]):
The number of seconds to wait for the underlying HTTP transport
- before using ``retry``.
- If multiple requests are made under the hood, ``timeout``
- applies to each individual request.
+ before using ``retry``. If ``None``, wait indefinitely
+ unless an error is returned. If unset, only the
+ underlying API calls have their default timeouts, but we still
+ wait indefinitely for the job to finish.
start_index (Optional[int]):
The zero-based index of the starting row to read.
job_retry (Optional[google.api_core.retry.Retry]):
@@ -1435,7 +1601,7 @@ def result( # type: ignore # (complaints about the overloaded signature)
a DDL query, an ``_EmptyRowIterator`` instance is returned.
Raises:
- google.cloud.exceptions.GoogleAPICallError:
+ google.api_core.exceptions.GoogleAPICallError:
If the job failed and retries aren't successful.
concurrent.futures.TimeoutError:
If the job did not complete in the given timeout.
@@ -1443,8 +1609,44 @@ def result( # type: ignore # (complaints about the overloaded signature)
If Non-``None`` and non-default ``job_retry`` is
provided and the job is not retryable.
"""
+ # Note: Since waiting for a query job to finish is more complex than
+ # refreshing the job state in a loop, we avoid calling the superclass
+ # in this method.
+
if self.dry_run:
- return _EmptyRowIterator()
+ return _EmptyRowIterator(
+ project=self.project,
+ location=self.location,
+ schema=self.schema,
+ total_bytes_processed=self.total_bytes_processed,
+ # Intentionally omit job_id and query_id since this doesn't
+ # actually correspond to a finished query job.
+ )
+
+ # Setting max_results should be equivalent to setting page_size with
+ # regards to allowing the user to tune how many results to download
+ # while we wait for the query to finish. See internal issue:
+ # 344008814. But if start_index is set, user is trying to access a
+ # specific page, so we don't need to set page_size. See issue #1950.
+ if page_size is None and max_results is not None and start_index is None:
+ page_size = max_results
+
+ # When timeout has default sentinel value ``object()``, do not pass
+ # anything to invoke default timeouts in subsequent calls.
+ done_kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {}
+ reload_query_results_kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {}
+ list_rows_kwargs: Dict[str, Union[_helpers.TimeoutType, object]] = {}
+ if type(timeout) is not object:
+ done_kwargs["timeout"] = timeout
+ list_rows_kwargs["timeout"] = timeout
+ reload_query_results_kwargs["timeout"] = timeout
+
+ if page_size is not None:
+ reload_query_results_kwargs["page_size"] = page_size
+
+ if start_index is not None:
+ reload_query_results_kwargs["start_index"] = start_index
+
try:
retry_do_query = getattr(self, "_retry_do_query", None)
if retry_do_query is not None:
@@ -1458,45 +1660,131 @@ def result( # type: ignore # (complaints about the overloaded signature)
" provided to the query that created this job."
)
- first = True
+ restart_query_job = False
- def do_get_result():
- nonlocal first
+ def is_job_done():
+ nonlocal restart_query_job
- if first:
- first = False
- else:
+ if restart_query_job:
+ restart_query_job = False
+
+ # The original job has failed. Create a new one.
+ #
# Note that we won't get here if retry_do_query is
# None, because we won't use a retry.
-
- # The orinal job is failed. Create a new one.
job = retry_do_query()
- # If it's already failed, we might as well stop:
- if job.done() and job.exception() is not None:
- raise job.exception()
-
# Become the new job:
self.__dict__.clear()
self.__dict__.update(job.__dict__)
- # This shouldn't be necessary, because once we have a good
- # job, it should stay good,and we shouldn't have to retry.
- # But let's be paranoid. :)
+ # It's possible the job fails again and we'll have to
+ # retry that too.
self._retry_do_query = retry_do_query
self._job_retry = job_retry
- super(QueryJob, self).result(retry=retry, timeout=timeout)
-
- # Since the job could already be "done" (e.g. got a finished job
- # via client.get_job), the superclass call to done() might not
- # set the self._query_results cache.
- self._reload_query_results(retry=retry, timeout=timeout)
+ # If the job hasn't been created, create it now. Related:
+ # https://github.com/googleapis/python-bigquery/issues/1940
+ if self.state is None:
+ self._begin(retry=retry, **done_kwargs)
+
+ # Refresh the job status with jobs.get because some of the
+ # exceptions thrown by jobs.getQueryResults like timeout and
+ # rateLimitExceeded errors are ambiguous. We want to know if
+ # the query job failed and not just the call to
+ # jobs.getQueryResults.
+ if self.done(retry=retry, **done_kwargs):
+ # If it's already failed, we might as well stop.
+ job_failed_exception = self.exception()
+ if job_failed_exception is not None:
+ # Only try to restart the query job if the job failed for
+ # a retriable reason. For example, don't restart the query
+ # if the call to reload the job metadata within self.done()
+ # timed out.
+ #
+ # The `restart_query_job` must only be called after a
+ # successful call to the `jobs.get` REST API and we
+ # determine that the job has failed.
+ #
+ # The `jobs.get` REST API
+ # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get)
+ # is called via `self.done()` which calls
+ # `self.reload()`.
+ #
+ # To determine if the job failed, the `self.exception()`
+ # is set from `self.reload()` via
+ # `self._set_properties()`, which translates the
+ # `Job.status.errorResult` field
+ # (https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatus.FIELDS.error_result)
+ # into an exception that can be processed by the
+ # `job_retry` predicate.
+ restart_query_job = True
+ raise job_failed_exception
+ else:
+ # Make sure that the _query_results are cached so we
+ # can return a complete RowIterator.
+ #
+ # Note: As an optimization, _reload_query_results
+ # doesn't make any API calls if the query results are
+ # already cached and have jobComplete=True in the
+ # response from the REST API. This ensures we aren't
+ # making any extra API calls if the previous loop
+ # iteration fetched the finished job.
+ self._reload_query_results(
+ retry=retry, **reload_query_results_kwargs
+ )
+ return True
+
+ # Call jobs.getQueryResults with max results set to 0 just to
+ # wait for the query to finish. Unlike most methods,
+ # jobs.getQueryResults hangs as long as it can to ensure we
+ # know when the query has finished as soon as possible.
+ self._reload_query_results(retry=retry, **reload_query_results_kwargs)
+
+ # Even if the query is finished now according to
+ # jobs.getQueryResults, we'll want to reload the job status if
+ # it's not already DONE.
+ return False
if retry_do_query is not None and job_retry is not None:
- do_get_result = job_retry(do_get_result)
+ is_job_done = job_retry(is_job_done)
+
+ # timeout can be a number of seconds, `None`, or a
+ # `google.api_core.future.polling.PollingFuture._DEFAULT_VALUE`
+ # sentinel object indicating a default timeout if we choose to add
+ # one some day. This value can come from our PollingFuture
+ # superclass and was introduced in
+ # https://github.com/googleapis/python-api-core/pull/462.
+ if isinstance(timeout, (float, int)):
+ remaining_timeout = timeout
+ else:
+ # Note: we may need to handle _DEFAULT_VALUE as a separate
+ # case someday, but even then the best we can do for queries
+ # is 72+ hours for hyperparameter tuning jobs:
+ # https://cloud.google.com/bigquery/quotas#query_jobs
+ #
+ # The timeout for a multi-statement query is 24+ hours. See:
+ # https://cloud.google.com/bigquery/quotas#multi_statement_query_limits
+ remaining_timeout = None
+
+ if remaining_timeout is None:
+ # Since is_job_done() calls jobs.getQueryResults, which is a
+ # long-running API, don't delay the next request at all.
+ while not is_job_done():
+ pass
+ else:
+ # Use a monotonic clock since we don't actually care about
+ # daylight savings or similar, just the elapsed time.
+ previous_time = time.monotonic()
- do_get_result()
+ while not is_job_done():
+ current_time = time.monotonic()
+ elapsed_time = current_time - previous_time
+ remaining_timeout = remaining_timeout - elapsed_time
+ previous_time = current_time
+
+ if remaining_timeout < 0:
+ raise concurrent.futures.TimeoutError()
except exceptions.GoogleAPICallError as exc:
exc.message = _EXCEPTION_FOOTER_TEMPLATE.format(
@@ -1513,7 +1801,27 @@ def do_get_result():
# indicate success and avoid calling tabledata.list on a table which
# can't be read (such as a view table).
if self._query_results.total_rows is None:
- return _EmptyRowIterator()
+ return _EmptyRowIterator(
+ location=self.location,
+ project=self.project,
+ job_id=self.job_id,
+ query_id=self.query_id,
+ schema=self.schema,
+ num_dml_affected_rows=self._query_results.num_dml_affected_rows,
+ query=self.query,
+ total_bytes_processed=self.total_bytes_processed,
+ slot_millis=self.slot_millis,
+ )
+
+ # We know that there's at least 1 row, so only treat the response from
+ # jobs.getQueryResults / jobs.query as the first page of the
+ # RowIterator response if there are any rows in it. This prevents us
+ # from stopping the iteration early in the cases where we set
+ # maxResults=0. In that case, we're missing rows and there's no next
+ # page token.
+ first_page_response = self._query_results._properties
+ if "rows" not in first_page_response:
+ first_page_response = None
rows = self._client._list_rows_from_query_results(
self.job_id,
@@ -1526,7 +1834,16 @@ def do_get_result():
max_results=max_results,
start_index=start_index,
retry=retry,
- timeout=timeout,
+ query_id=self.query_id,
+ first_page_response=first_page_response,
+ num_dml_affected_rows=self._query_results.num_dml_affected_rows,
+ query=self.query,
+ total_bytes_processed=self.total_bytes_processed,
+ slot_millis=self.slot_millis,
+ created=self.created,
+ started=self.started,
+ ended=self.ended,
+ **list_rows_kwargs,
)
rows._preserve_order = _contains_order_by(self.query)
return rows
@@ -1536,7 +1853,7 @@ def do_get_result():
# that should only exist here in the QueryJob method.
def to_arrow(
self,
- progress_bar_type: str = None,
+ progress_bar_type: Optional[str] = None,
bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
create_bqstorage_client: bool = True,
max_results: Optional[int] = None,
@@ -1556,9 +1873,9 @@ def to_arrow(
No progress bar.
``'tqdm'``
Use the :func:`tqdm.tqdm` function to print a progress bar
- to :data:`sys.stderr`.
+ to :data:`sys.stdout`.
``'tqdm_notebook'``
- Use the :func:`tqdm.tqdm_notebook` function to display a
+ Use the :func:`tqdm.notebook.tqdm` function to display a
progress bar as a Jupyter notebook widget.
``'tqdm_gui'``
Use the :func:`tqdm.tqdm_gui` function to display a
@@ -1593,6 +1910,10 @@ def to_arrow(
headers from the query results. The column headers are derived
from the destination table's schema.
+ Raises:
+ ValueError:
+ If the :mod:`pyarrow` library cannot be imported.
+
.. versionadded:: 1.17.0
"""
query_result = wait_for_query(self, progress_bar_type, max_results=max_results)
@@ -1608,11 +1929,26 @@ def to_arrow(
def to_dataframe(
self,
bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
- dtypes: Dict[str, Any] = None,
- progress_bar_type: str = None,
+ dtypes: Optional[Dict[str, Any]] = None,
+ progress_bar_type: Optional[str] = None,
create_bqstorage_client: bool = True,
max_results: Optional[int] = None,
geography_as_object: bool = False,
+ bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE,
+ int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE,
+ float_dtype: Union[Any, None] = None,
+ string_dtype: Union[Any, None] = None,
+ date_dtype: Union[Any, None] = DefaultPandasDTypes.DATE_DTYPE,
+ datetime_dtype: Union[Any, None] = None,
+ time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE,
+ timestamp_dtype: Union[Any, None] = None,
+ range_date_dtype: Union[Any, None] = DefaultPandasDTypes.RANGE_DATE_DTYPE,
+ range_datetime_dtype: Union[
+ Any, None
+ ] = DefaultPandasDTypes.RANGE_DATETIME_DTYPE,
+ range_timestamp_dtype: Union[
+ Any, None
+ ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE,
) -> "pandas.DataFrame":
"""Return a pandas DataFrame from a QueryJob
@@ -1665,6 +2001,146 @@ def to_dataframe(
.. versionadded:: 2.24.0
+ bool_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``)
+ to convert BigQuery Boolean type, instead of relying on the default
+ ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type
+
+ .. versionadded:: 3.8.0
+
+ int_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)
+ to convert BigQuery Integer types, instead of relying on the default
+ ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("int64")``. A list of BigQuery
+ Integer types can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
+
+ .. versionadded:: 3.8.0
+
+ float_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)
+ to convert BigQuery Float type, instead of relying on the default
+ ``numpy.dtype("float64")``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("float64")``. BigQuery Float
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
+
+ .. versionadded:: 3.8.0
+
+ string_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to
+ convert BigQuery String type, instead of relying on the default
+ ``numpy.dtype("object")``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("object")``. BigQuery String
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type
+
+ .. versionadded:: 3.8.0
+
+ date_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g.
+ ``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date
+ type, instead of relying on the default ``db_dtypes.DateDtype()``.
+ If you explicitly set the value to ``None``, then the data type will be
+ ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery
+ Date type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type
+
+ .. versionadded:: 3.10.0
+
+ datetime_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g.
+ ``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime
+ type, instead of relying on the default ``numpy.dtype("datetime64[ns]``.
+ If you explicitly set the value to ``None``, then the data type will be
+ ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery
+ Datetime type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type
+
+ .. versionadded:: 3.10.0
+
+ time_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g.
+ ``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time
+ type, instead of relying on the default ``db_dtypes.TimeDtype()``.
+ If you explicitly set the value to ``None``, then the data type will be
+ ``numpy.dtype("object")``. BigQuery Time type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type
+
+ .. versionadded:: 3.10.0
+
+ timestamp_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g.
+ ``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp
+ type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``.
+ If you explicitly set the value to ``None``, then the data type will be
+ ``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery
+ Datetime type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type
+
+ .. versionadded:: 3.10.0
+
+ range_date_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype, such as:
+
+ .. code-block:: python
+
+ pandas.ArrowDtype(pyarrow.struct(
+ [("start", pyarrow.date32()), ("end", pyarrow.date32())]
+ ))
+
+ to convert BigQuery RANGE type, instead of relying on
+ the default ``object``. If you explicitly set the value to
+ ``None``, the data type will be ``object``. BigQuery Range type
+ can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+ .. versionadded:: 3.21.0
+
+ range_datetime_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype, such as:
+
+ .. code-block:: python
+
+ pandas.ArrowDtype(pyarrow.struct(
+ [
+ ("start", pyarrow.timestamp("us")),
+ ("end", pyarrow.timestamp("us")),
+ ]
+ ))
+
+ to convert BigQuery RANGE type, instead of relying on
+ the default ``object``. If you explicitly set the value to
+ ``None``, the data type will be ``object``. BigQuery Range type
+ can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+ .. versionadded:: 3.21.0
+
+ range_timestamp_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype, such as:
+
+ .. code-block:: python
+
+ pandas.ArrowDtype(pyarrow.struct(
+ [
+ ("start", pyarrow.timestamp("us", tz="UTC")),
+ ("end", pyarrow.timestamp("us", tz="UTC")),
+ ]
+ ))
+
+ to convert BigQuery RANGE type, instead of relying
+ on the default ``object``. If you explicitly set the value to
+ ``None``, the data type will be ``object``. BigQuery Range type
+ can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+ .. versionadded:: 3.21.0
+
Returns:
pandas.DataFrame:
A :class:`~pandas.DataFrame` populated with row data
@@ -1687,6 +2163,17 @@ def to_dataframe(
progress_bar_type=progress_bar_type,
create_bqstorage_client=create_bqstorage_client,
geography_as_object=geography_as_object,
+ bool_dtype=bool_dtype,
+ int_dtype=int_dtype,
+ float_dtype=float_dtype,
+ string_dtype=string_dtype,
+ date_dtype=date_dtype,
+ datetime_dtype=datetime_dtype,
+ time_dtype=time_dtype,
+ timestamp_dtype=timestamp_dtype,
+ range_date_dtype=range_date_dtype,
+ range_datetime_dtype=range_datetime_dtype,
+ range_timestamp_dtype=range_timestamp_dtype,
)
# If changing the signature of this method, make sure to apply the same
@@ -1694,12 +2181,16 @@ def to_dataframe(
# that should only exist here in the QueryJob method.
def to_geodataframe(
self,
- bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
- dtypes: Dict[str, Any] = None,
- progress_bar_type: str = None,
+ bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
+ dtypes: Optional[Dict[str, Any]] = None,
+ progress_bar_type: Optional[str] = None,
create_bqstorage_client: bool = True,
max_results: Optional[int] = None,
geography_column: Optional[str] = None,
+ bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE,
+ int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE,
+ float_dtype: Union[Any, None] = None,
+ string_dtype: Union[Any, None] = None,
) -> "geopandas.GeoDataFrame":
"""Return a GeoPandas GeoDataFrame from a QueryJob
@@ -1750,6 +2241,34 @@ def to_geodataframe(
identifies which one to use to construct a GeoPandas
GeoDataFrame. This option can be ommitted if there's
only one GEOGRAPHY column.
+ bool_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``)
+ to convert BigQuery Boolean type, instead of relying on the default
+ ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type
+ int_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)
+ to convert BigQuery Integer types, instead of relying on the default
+ ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("int64")``. A list of BigQuery
+ Integer types can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
+ float_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)
+ to convert BigQuery Float type, instead of relying on the default
+ ``numpy.dtype("float64")``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("float64")``. BigQuery Float
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
+ string_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to
+ convert BigQuery String type, instead of relying on the default
+ ``numpy.dtype("object")``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("object")``. BigQuery String
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type
Returns:
geopandas.GeoDataFrame:
@@ -1773,6 +2292,10 @@ def to_geodataframe(
progress_bar_type=progress_bar_type,
create_bqstorage_client=create_bqstorage_client,
geography_column=geography_column,
+ bool_dtype=bool_dtype,
+ int_dtype=int_dtype,
+ float_dtype=float_dtype,
+ string_dtype=string_dtype,
)
def __iter__(self):
@@ -2046,6 +2569,11 @@ def steps(self):
for step in self._properties.get("steps", [])
]
+ @property
+ def slot_ms(self):
+ """Optional[int]: Slot-milliseconds used by the stage."""
+ return _helpers._int_or_none(self._properties.get("slotMs"))
+
class TimelineEntry(object):
"""TimelineEntry represents progress of a query job at a particular
diff --git a/google/cloud/bigquery/magics/magics.py b/google/cloud/bigquery/magics/magics.py
index 14819aa59..1f892b595 100644
--- a/google/cloud/bigquery/magics/magics.py
+++ b/google/cloud/bigquery/magics/magics.py
@@ -14,70 +14,11 @@
"""IPython Magics
-.. function:: %%bigquery
-
- IPython cell magic to run a query and display the result as a DataFrame
-
- .. code-block:: python
-
- %%bigquery [] [--project ] [--use_legacy_sql]
- [--verbose] [--params ]
-
-
- Parameters:
-
- * ```` (Optional[line argument]):
- variable to store the query results. The results are not displayed if
- this parameter is used. If an error occurs during the query execution,
- the corresponding ``QueryJob`` instance (if available) is stored in
- the variable instead.
- * ``--destination_table`` (Optional[line argument]):
- A dataset and table to store the query results. If table does not exists,
- it will be created. If table already exists, its data will be overwritten.
- Variable should be in a format ..
- * ``--no_query_cache`` (Optional[line argument]):
- Do not use cached query results.
- * ``--project `` (Optional[line argument]):
- Project to use for running the query. Defaults to the context
- :attr:`~google.cloud.bigquery.magics.Context.project`.
- * ``--use_bqstorage_api`` (Optional[line argument]):
- [Deprecated] Not used anymore, as BigQuery Storage API is used by default.
- * ``--use_rest_api`` (Optional[line argument]):
- Use the BigQuery REST API instead of the Storage API.
- * ``--use_legacy_sql`` (Optional[line argument]):
- Runs the query using Legacy SQL syntax. Defaults to Standard SQL if
- this argument not used.
- * ``--verbose`` (Optional[line argument]):
- If this flag is used, information including the query job ID and the
- amount of time for the query to complete will not be cleared after the
- query is finished. By default, this information will be displayed but
- will be cleared after the query is finished.
- * ``--params `` (Optional[line argument]):
- If present, the argument following the ``--params`` flag must be
- either:
-
- * :class:`str` - A JSON string representation of a dictionary in the
- format ``{"param_name": "param_value"}`` (ex. ``{"num": 17}``). Use
- of the parameter in the query should be indicated with
- ``@param_name``. See ``In[5]`` in the Examples section below.
-
- * :class:`dict` reference - A reference to a ``dict`` in the format
- ``{"param_name": "param_value"}``, where the value types must be JSON
- serializable. The variable reference is indicated by a ``$`` before
- the variable name (ex. ``$my_dict_var``). See ``In[6]`` and ``In[7]``
- in the Examples section below.
-
- * ```` (required, cell argument):
- SQL query to run. If the query does not contain any whitespace (aside
- from leading and trailing whitespace), it is assumed to represent a
- fully-qualified table ID, and the latter's data will be fetched.
+Install ``bigquery-magics`` and call ``%load_ext bigquery_magics`` to use the
+``%%bigquery`` cell magic.
- Returns:
- A :class:`pandas.DataFrame` with the query results.
-
- .. note::
- All queries run using this magic will run using the context
- :attr:`~google.cloud.bigquery.magics.Context.credentials`.
+See the `BigQuery Magics reference documentation
+`_.
"""
from __future__ import print_function
@@ -95,7 +36,7 @@
import IPython # type: ignore
from IPython import display # type: ignore
from IPython.core import magic_arguments # type: ignore
-except ImportError: # pragma: NO COVER
+except ImportError:
raise ImportError("This module can only be loaded in IPython.")
from google.api_core import client_info
@@ -104,11 +45,17 @@
import google.auth # type: ignore
from google.cloud import bigquery
import google.cloud.bigquery.dataset
+from google.cloud.bigquery import _versions_helpers
+from google.cloud.bigquery import exceptions
from google.cloud.bigquery.dbapi import _helpers
from google.cloud.bigquery.magics import line_arg_parser as lap
+try:
+ import bigquery_magics # type: ignore
+except ImportError:
+ bigquery_magics = None
-IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__)
+IPYTHON_USER_AGENT = "ipython-{}".format(IPython.__version__) # type: ignore
class Context(object):
@@ -125,7 +72,7 @@ def __init__(self):
self._default_query_job_config = bigquery.QueryJobConfig()
self._bigquery_client_options = client_options.ClientOptions()
self._bqstorage_client_options = client_options.ClientOptions()
- self._progress_bar_type = "tqdm"
+ self._progress_bar_type = "tqdm_notebook"
@property
def credentials(self):
@@ -269,7 +216,7 @@ def progress_bar_type(self):
Manually setting the progress_bar_type:
>>> from google.cloud.bigquery import magics
- >>> magics.context.progress_bar_type = "tqdm"
+ >>> magics.context.progress_bar_type = "tqdm_notebook"
"""
return self._progress_bar_type
@@ -278,7 +225,14 @@ def progress_bar_type(self, value):
self._progress_bar_type = value
-context = Context()
+# If bigquery_magics is available, we load that extension rather than this one.
+# Ensure google.cloud.bigquery.magics.context setters are on the correct magics
+# implementation in case the user has installed the package but hasn't updated
+# their code.
+if bigquery_magics is not None:
+ context = bigquery_magics.context
+else:
+ context = Context()
def _handle_error(error, destination_var=None):
@@ -286,7 +240,7 @@ def _handle_error(error, destination_var=None):
Args:
error (Exception):
- An exception that ocurred during the query exectution.
+ An exception that occurred during the query execution.
destination_var (Optional[str]):
The name of the IPython session variable to store the query job.
"""
@@ -329,22 +283,25 @@ def _run_query(client, query, job_config=None):
Query complete after 2.07s
'bf633912-af2c-4780-b568-5d868058632b'
"""
- start_time = time.time()
+ start_time = time.perf_counter()
query_job = client.query(query, job_config=job_config)
if job_config and job_config.dry_run:
return query_job
- print("Executing query with job ID: {}".format(query_job.job_id))
+ print(f"Executing query with job ID: {query_job.job_id}")
while True:
- print("\rQuery executing: {:0.2f}s".format(time.time() - start_time), end="")
+ print(
+ f"\rQuery executing: {time.perf_counter() - start_time:.2f}s".format(),
+ end="",
+ )
try:
query_job.result(timeout=0.5)
break
except futures.TimeoutError:
continue
- print("\nQuery complete after {:0.2f}s".format(time.time() - start_time))
+ print(f"\nJob ID {query_job.job_id} successfully executed")
return query_job
@@ -365,7 +322,7 @@ def _create_dataset_if_necessary(client, dataset_id):
pass
dataset = bigquery.Dataset(dataset_reference)
dataset.location = client.location
- print("Creating dataset: {}".format(dataset_id))
+ print(f"Creating dataset: {dataset_id}")
dataset = client.create_dataset(dataset)
@@ -500,7 +457,16 @@ def _create_dataset_if_necessary(client, dataset_id):
default=None,
help=(
"Sets progress bar type to display a progress bar while executing the query."
- "Defaults to use tqdm. Install the ``tqdm`` package to use this feature."
+ "Defaults to use tqdm_notebook. Install the ``tqdm`` package to use this feature."
+ ),
+)
+@magic_arguments.argument(
+ "--location",
+ type=str,
+ default=None,
+ help=(
+ "Set the location to execute query."
+ "Defaults to location set in query setting in console."
),
)
def _cell_magic(line, query):
@@ -546,6 +512,7 @@ def _cell_magic(line, query):
category=DeprecationWarning,
)
use_bqstorage_api = not args.use_rest_api
+ location = args.location
params = []
if params_option_value:
@@ -574,6 +541,7 @@ def _cell_magic(line, query):
default_query_job_config=context.default_query_job_config,
client_info=client_info.ClientInfo(user_agent=IPYTHON_USER_AGENT),
client_options=bigquery_client_options,
+ location=location,
)
if context._connection:
client._connection = context._connection
@@ -741,9 +709,41 @@ def _split_args_line(line):
def _make_bqstorage_client(client, use_bqstorage_api, client_options):
+ """Creates a BigQuery Storage client.
+
+ Args:
+ client (:class:`~google.cloud.bigquery.client.Client`): BigQuery client.
+ use_bqstorage_api (bool): whether BigQuery Storage API is used or not.
+ client_options (:class:`google.api_core.client_options.ClientOptions`):
+ Custom options used with a new BigQuery Storage client instance
+ if one is created.
+
+ Raises:
+ ImportError: if google-cloud-bigquery-storage is not installed, or
+ grpcio package is not installed.
+
+
+ Returns:
+ None: if ``use_bqstorage_api == False``, or google-cloud-bigquery-storage
+ is outdated.
+ BigQuery Storage Client:
+ """
if not use_bqstorage_api:
return None
+ try:
+ _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True)
+ except exceptions.BigQueryStorageNotFoundError as err:
+ customized_error = ImportError(
+ "The default BigQuery Storage API client cannot be used, install "
+ "the missing google-cloud-bigquery-storage and pyarrow packages "
+ "to use it. Alternatively, use the classic REST API by specifying "
+ "the --use_rest_api magic option."
+ )
+ raise customized_error from err
+ except exceptions.LegacyBigQueryStorageError:
+ pass
+
try:
from google.api_core.gapic_v1 import client_info as gapic_client_info
except ImportError as err:
diff --git a/google/cloud/bigquery/model.py b/google/cloud/bigquery/model.py
index 4d2bc346c..16581be5a 100644
--- a/google/cloud/bigquery/model.py
+++ b/google/cloud/bigquery/model.py
@@ -16,6 +16,8 @@
"""Define resources for the BigQuery ML Models API."""
+from __future__ import annotations # type: ignore
+
import copy
import datetime
import typing
@@ -56,7 +58,7 @@ def __init__(self, model_ref: Union["ModelReference", str, None]):
# semantics. The BigQuery API makes a distinction between an unset
# value, a null value, and a default value (0 or ""), but the protocol
# buffer classes do not.
- self._properties = {}
+ self._properties: Dict[str, Any] = {}
if isinstance(model_ref, str):
model_ref = ModelReference.from_string(model_ref)
@@ -184,6 +186,21 @@ def feature_columns(self) -> Sequence[standard_sql.StandardSqlField]:
standard_sql.StandardSqlField.from_api_repr(column) for column in resource
]
+ @property
+ def transform_columns(self) -> Sequence[TransformColumn]:
+ """The input feature columns that were used to train this model.
+ The output transform columns used to train this model.
+
+ See REST API:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/models#transformcolumn
+
+ Read-only.
+ """
+ resources: Sequence[Dict[str, Any]] = typing.cast(
+ Sequence[Dict[str, Any]], self._properties.get("transformColumns", [])
+ )
+ return [TransformColumn(resource) for resource in resources]
+
@property
def label_columns(self) -> Sequence[standard_sql.StandardSqlField]:
"""Label columns that were used to train this model.
@@ -434,6 +451,60 @@ def __repr__(self):
)
+class TransformColumn:
+ """TransformColumn represents a transform column feature.
+
+ See
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/models#transformcolumn
+
+ Args:
+ resource:
+ A dictionary representing a transform column feature.
+ """
+
+ def __init__(self, resource: Dict[str, Any]):
+ self._properties = resource
+
+ @property
+ def name(self) -> Optional[str]:
+ """Name of the column."""
+ return self._properties.get("name")
+
+ @property
+ def type_(self) -> Optional[standard_sql.StandardSqlDataType]:
+ """Data type of the column after the transform.
+
+ Returns:
+ Optional[google.cloud.bigquery.standard_sql.StandardSqlDataType]:
+ Data type of the column.
+ """
+ type_json = self._properties.get("type")
+ if type_json is None:
+ return None
+ return standard_sql.StandardSqlDataType.from_api_repr(type_json)
+
+ @property
+ def transform_sql(self) -> Optional[str]:
+ """The SQL expression used in the column transform."""
+ return self._properties.get("transformSql")
+
+ @classmethod
+ def from_api_repr(cls, resource: Dict[str, Any]) -> "TransformColumn":
+ """Constructs a transform column feature given its API representation
+
+ Args:
+ resource:
+ Transform column feature representation from the API
+
+ Returns:
+ Transform column feature parsed from ``resource``.
+ """
+ this = cls({})
+ resource = copy.deepcopy(resource)
+ this._properties = resource
+ return this
+
+
def _model_arg_to_model_ref(value, default_project=None):
"""Helper to convert a string or Model to ModelReference.
diff --git a/google/cloud/bigquery/opentelemetry_tracing.py b/google/cloud/bigquery/opentelemetry_tracing.py
index adecea121..b5f6bf991 100644
--- a/google/cloud/bigquery/opentelemetry_tracing.py
+++ b/google/cloud/bigquery/opentelemetry_tracing.py
@@ -18,9 +18,9 @@
logger = logging.getLogger(__name__)
try:
- from opentelemetry import trace
- from opentelemetry.instrumentation.utils import http_status_to_status_code
- from opentelemetry.trace.status import Status
+ from opentelemetry import trace # type: ignore
+ from opentelemetry.instrumentation.utils import http_status_to_status_code # type: ignore
+ from opentelemetry.trace.status import Status # type: ignore
HAS_OPENTELEMETRY = True
_warned_telemetry = True
@@ -87,16 +87,38 @@ def create_span(name, attributes=None, client=None, job_ref=None):
def _get_final_span_attributes(attributes=None, client=None, job_ref=None):
- final_attributes = {}
- final_attributes.update(_default_attributes.copy())
+ """Compiles attributes from: client, job_ref, user-provided attributes.
+
+ Attributes from all of these sources are merged together. Note the
+ attributes are added sequentially based on perceived order of precedence:
+ i.e. attributes added last may overwrite attributes added earlier.
+
+ Args:
+ attributes (Optional[dict]):
+ Additional attributes that pertain to
+ the specific API call (i.e. not a default attribute)
+
+ client (Optional[google.cloud.bigquery.client.Client]):
+ Pass in a Client object to extract any attributes that may be
+ relevant to it and add them to the final_attributes
+
+ job_ref (Optional[google.cloud.bigquery.job._AsyncJob])
+ Pass in a _AsyncJob object to extract any attributes that may be
+ relevant to it and add them to the final_attributes.
+
+ Returns: dict
+ """
+
+ collected_attributes = _default_attributes.copy()
+
if client:
- client_attributes = _set_client_attributes(client)
- final_attributes.update(client_attributes)
+ collected_attributes.update(_set_client_attributes(client))
if job_ref:
- job_attributes = _set_job_attributes(job_ref)
- final_attributes.update(job_attributes)
+ collected_attributes.update(_set_job_attributes(job_ref))
if attributes:
- final_attributes.update(attributes)
+ collected_attributes.update(attributes)
+
+ final_attributes = {k: v for k, v in collected_attributes.items() if v is not None}
return final_attributes
@@ -107,10 +129,7 @@ def _set_client_attributes(client):
def _set_job_attributes(job_ref):
job_attributes = {
"db.name": job_ref.project,
- "location": job_ref.location,
- "num_child_jobs": job_ref.num_child_jobs,
"job_id": job_ref.job_id,
- "parent_job_id": job_ref.parent_job_id,
"state": job_ref.state,
}
@@ -125,4 +144,21 @@ def _set_job_attributes(job_ref):
if job_ref.ended is not None:
job_attributes["timeEnded"] = job_ref.ended.isoformat()
+ if job_ref.location is not None:
+ job_attributes["location"] = job_ref.location
+
+ if job_ref.parent_job_id is not None:
+ job_attributes["parent_job_id"] = job_ref.parent_job_id
+
+ if job_ref.num_child_jobs is not None:
+ job_attributes["num_child_jobs"] = job_ref.num_child_jobs
+
+ total_bytes_billed = getattr(job_ref, "total_bytes_billed", None)
+ if total_bytes_billed is not None:
+ job_attributes["total_bytes_billed"] = total_bytes_billed
+
+ total_bytes_processed = getattr(job_ref, "total_bytes_processed", None)
+ if total_bytes_processed is not None:
+ job_attributes["total_bytes_processed"] = total_bytes_processed
+
return job_attributes
diff --git a/google/cloud/bigquery/query.py b/google/cloud/bigquery/query.py
index 0469cb271..170ed2976 100644
--- a/google/cloud/bigquery/query.py
+++ b/google/cloud/bigquery/query.py
@@ -18,12 +18,13 @@
import copy
import datetime
import decimal
-from typing import Any, Optional, Dict, Union
+from typing import Any, cast, Optional, Dict, Union
from google.cloud.bigquery.table import _parse_schema_resource
+from google.cloud.bigquery import _helpers
from google.cloud.bigquery._helpers import _rows_from_json
-from google.cloud.bigquery._helpers import _QUERY_PARAMS_FROM_JSON
from google.cloud.bigquery._helpers import _SCALAR_VALUE_TO_JSON_PARAM
+from google.cloud.bigquery._helpers import _SUPPORTED_RANGE_ELEMENTS
_SCALAR_VALUE_TYPE = Optional[
@@ -362,6 +363,129 @@ def __repr__(self):
return f"{self.__class__.__name__}({items}{name}{description})"
+class RangeQueryParameterType(_AbstractQueryParameterType):
+ """Type representation for range query parameters.
+
+ Args:
+ type_ (Union[ScalarQueryParameterType, str]):
+ Type of range element, must be one of 'TIMESTAMP', 'DATETIME', or
+ 'DATE'.
+ name (Optional[str]):
+ The name of the query parameter. Primarily used if the type is
+ one of the subfields in ``StructQueryParameterType`` instance.
+ description (Optional[str]):
+ The query parameter description. Primarily used if the type is
+ one of the subfields in ``StructQueryParameterType`` instance.
+ """
+
+ @classmethod
+ def _parse_range_element_type(self, type_):
+ """Helper method that parses the input range element type, which may
+ be a string, or a ScalarQueryParameterType object.
+
+ Returns:
+ google.cloud.bigquery.query.ScalarQueryParameterType: Instance
+ """
+ if isinstance(type_, str):
+ if type_ not in _SUPPORTED_RANGE_ELEMENTS:
+ raise ValueError(
+ "If given as a string, range element type must be one of "
+ "'TIMESTAMP', 'DATE', or 'DATETIME'."
+ )
+ return ScalarQueryParameterType(type_)
+ elif isinstance(type_, ScalarQueryParameterType):
+ if type_._type not in _SUPPORTED_RANGE_ELEMENTS:
+ raise ValueError(
+ "If given as a ScalarQueryParameter object, range element "
+ "type must be one of 'TIMESTAMP', 'DATE', or 'DATETIME' "
+ "type."
+ )
+ return type_
+ else:
+ raise ValueError(
+ "range_type must be a string or ScalarQueryParameter object, "
+ "of 'TIMESTAMP', 'DATE', or 'DATETIME' type."
+ )
+
+ def __init__(self, type_, *, name=None, description=None):
+ self.type_ = self._parse_range_element_type(type_)
+ self.name = name
+ self.description = description
+
+ @classmethod
+ def from_api_repr(cls, resource):
+ """Factory: construct parameter type from JSON resource.
+
+ Args:
+ resource (Dict): JSON mapping of parameter
+
+ Returns:
+ google.cloud.bigquery.query.RangeQueryParameterType: Instance
+ """
+ type_ = resource["rangeElementType"]["type"]
+ name = resource.get("name")
+ description = resource.get("description")
+
+ return cls(type_, name=name, description=description)
+
+ def to_api_repr(self):
+ """Construct JSON API representation for the parameter type.
+
+ Returns:
+ Dict: JSON mapping
+ """
+ # Name and description are only used if the type is a field inside a struct
+ # type, but it's StructQueryParameterType's responsibilty to use these two
+ # attributes in the API representation when needed. Here we omit them.
+ return {
+ "type": "RANGE",
+ "rangeElementType": self.type_.to_api_repr(),
+ }
+
+ def with_name(self, new_name: Union[str, None]):
+ """Return a copy of the instance with ``name`` set to ``new_name``.
+
+ Args:
+ name (Union[str, None]):
+ The new name of the range query parameter type. If ``None``,
+ the existing name is cleared.
+
+ Returns:
+ google.cloud.bigquery.query.RangeQueryParameterType:
+ A new instance with updated name.
+ """
+ return type(self)(self.type_, name=new_name, description=self.description)
+
+ def __repr__(self):
+ name = f", name={self.name!r}" if self.name is not None else ""
+ description = (
+ f", description={self.description!r}"
+ if self.description is not None
+ else ""
+ )
+ return f"{self.__class__.__name__}({self.type_!r}{name}{description})"
+
+ def _key(self):
+ """A tuple key that uniquely describes this field.
+
+ Used to compute this instance's hashcode and evaluate equality.
+
+ Returns:
+ Tuple: The contents of this
+ :class:`~google.cloud.bigquery.query.RangeQueryParameterType`.
+ """
+ type_ = self.type_.to_api_repr()
+ return (self.name, type_, self.description)
+
+ def __eq__(self, other):
+ if not isinstance(other, RangeQueryParameterType):
+ return NotImplemented
+ return self._key() == other._key()
+
+ def __ne__(self, other):
+ return not self == other
+
+
class _AbstractQueryParameter(object):
"""Base class for named / positional query parameters."""
@@ -447,6 +571,9 @@ def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter":
Returns:
google.cloud.bigquery.query.ScalarQueryParameter: Instance
"""
+ # Import here to avoid circular imports.
+ from google.cloud.bigquery import schema
+
name = resource.get("name")
type_ = resource["parameterType"]["type"]
@@ -454,7 +581,9 @@ def from_api_repr(cls, resource: dict) -> "ScalarQueryParameter":
# from the back-end - the latter omits it for None values.
value = resource.get("parameterValue", {}).get("value")
if value is not None:
- converted = _QUERY_PARAMS_FROM_JSON[type_](value, None)
+ converted = _helpers.SCALAR_QUERY_PARAM_PARSER.to_py(
+ value, schema.SchemaField(cast(str, name), type_)
+ )
else:
converted = None
@@ -467,9 +596,8 @@ def to_api_repr(self) -> dict:
Dict: JSON mapping
"""
value = self.value
- converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_)
- if converter is not None:
- value = converter(value)
+ converter = _SCALAR_VALUE_TO_JSON_PARAM.get(self.type_, lambda value: value)
+ value = converter(value) # type: ignore
resource: Dict[str, Any] = {
"parameterType": {"type": self.type_},
"parameterValue": {"value": value},
@@ -570,13 +698,20 @@ def _from_api_repr_struct(cls, resource):
@classmethod
def _from_api_repr_scalar(cls, resource):
+ """Converts REST resource into a list of scalar values."""
+ # Import here to avoid circular imports.
+ from google.cloud.bigquery import schema
+
name = resource.get("name")
array_type = resource["parameterType"]["arrayType"]["type"]
parameter_value = resource.get("parameterValue", {})
array_values = parameter_value.get("arrayValues", ())
values = [value["value"] for value in array_values]
converted = [
- _QUERY_PARAMS_FROM_JSON[array_type](value, None) for value in values
+ _helpers.SCALAR_QUERY_PARAM_PARSER.to_py(
+ value, schema.SchemaField(name, array_type)
+ )
+ for value in values
]
return cls(name, array_type, converted)
@@ -624,9 +759,10 @@ def to_api_repr(self) -> dict:
else:
a_type = self.array_type.to_api_repr()
- converter = _SCALAR_VALUE_TO_JSON_PARAM.get(a_type["type"])
- if converter is not None:
- values = [converter(value) for value in values]
+ converter = _SCALAR_VALUE_TO_JSON_PARAM.get(
+ a_type["type"], lambda value: value
+ )
+ values = [converter(value) for value in values] # type: ignore
a_values = [{"value": value} for value in values]
resource = {
@@ -668,7 +804,7 @@ def __repr__(self):
class StructQueryParameter(_AbstractQueryParameter):
- """Named / positional query parameters for struct values.
+ """Name / positional query parameters for struct values.
Args:
name (Optional[str]):
@@ -726,6 +862,9 @@ def from_api_repr(cls, resource: dict) -> "StructQueryParameter":
Returns:
google.cloud.bigquery.query.StructQueryParameter: Instance
"""
+ # Import here to avoid circular imports.
+ from google.cloud.bigquery import schema
+
name = resource.get("name")
instance = cls(name)
type_resources = {}
@@ -753,7 +892,9 @@ def from_api_repr(cls, resource: dict) -> "StructQueryParameter":
converted = ArrayQueryParameter.from_api_repr(struct_resource)
else:
value = value["value"]
- converted = _QUERY_PARAMS_FROM_JSON[type_](value, None)
+ converted = _helpers.SCALAR_QUERY_PARAM_PARSER.to_py(
+ value, schema.SchemaField(cast(str, name), type_)
+ )
instance.struct_values[key] = converted
return instance
@@ -773,10 +914,8 @@ def to_api_repr(self) -> dict:
values[name] = repr_["parameterValue"]
else:
s_types[name] = {"name": name, "type": {"type": type_}}
- converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_)
- if converter is not None:
- value = converter(value)
- values[name] = {"value": value}
+ converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_, lambda value: value)
+ values[name] = {"value": converter(value)}
resource = {
"parameterType": {
@@ -795,7 +934,7 @@ def _key(self):
Used to compute this instance's hashcode and evaluate equality.
Returns:
- Tuple: The contents of this :class:`~google.cloud.biquery.ArrayQueryParameter`.
+ Tuple: The contents of this :class:`~google.cloud.bigquery.ArrayQueryParameter`.
"""
return (self.name, self.struct_types, self.struct_values)
@@ -811,6 +950,178 @@ def __repr__(self):
return "StructQueryParameter{}".format(self._key())
+class RangeQueryParameter(_AbstractQueryParameter):
+ """Named / positional query parameters for range values.
+
+ Args:
+ range_element_type (Union[str, RangeQueryParameterType]):
+ The type of range elements. It must be one of 'TIMESTAMP',
+ 'DATE', or 'DATETIME'.
+
+ start (Optional[Union[ScalarQueryParameter, str]]):
+ The start of the range value. Must be the same type as
+ range_element_type. If not provided, it's interpreted as UNBOUNDED.
+
+ end (Optional[Union[ScalarQueryParameter, str]]):
+ The end of the range value. Must be the same type as
+ range_element_type. If not provided, it's interpreted as UNBOUNDED.
+
+ name (Optional[str]):
+ Parameter name, used via ``@foo`` syntax. If None, the
+ parameter can only be addressed via position (``?``).
+ """
+
+ @classmethod
+ def _parse_range_element_type(self, range_element_type):
+ if isinstance(range_element_type, str):
+ if range_element_type not in _SUPPORTED_RANGE_ELEMENTS:
+ raise ValueError(
+ "If given as a string, range_element_type must be one of "
+ f"'TIMESTAMP', 'DATE', or 'DATETIME'. Got {range_element_type}."
+ )
+ return RangeQueryParameterType(range_element_type)
+ elif isinstance(range_element_type, RangeQueryParameterType):
+ if range_element_type.type_._type not in _SUPPORTED_RANGE_ELEMENTS:
+ raise ValueError(
+ "If given as a RangeQueryParameterType object, "
+ "range_element_type must be one of 'TIMESTAMP', 'DATE', "
+ "or 'DATETIME' type."
+ )
+ return range_element_type
+ else:
+ raise ValueError(
+ "range_element_type must be a string or "
+ "RangeQueryParameterType object, of 'TIMESTAMP', 'DATE', "
+ "or 'DATETIME' type. Got "
+ f"{type(range_element_type)}:{range_element_type}"
+ )
+
+ @classmethod
+ def _serialize_range_element_value(self, value, type_):
+ if value is None or isinstance(value, str):
+ return value
+ else:
+ converter = _SCALAR_VALUE_TO_JSON_PARAM.get(type_)
+ if converter is not None:
+ return converter(value) # type: ignore
+ else:
+ raise ValueError(
+ f"Cannot convert range element value from type {type_}, "
+ "must be one of the strings 'TIMESTAMP', 'DATE' "
+ "'DATETIME' or a RangeQueryParameterType object."
+ )
+
+ def __init__(
+ self,
+ range_element_type,
+ start=None,
+ end=None,
+ name=None,
+ ):
+ self.name = name
+ self.range_element_type = self._parse_range_element_type(range_element_type)
+ print(self.range_element_type.type_._type)
+ self.start = start
+ self.end = end
+
+ @classmethod
+ def positional(
+ cls, range_element_type, start=None, end=None
+ ) -> "RangeQueryParameter":
+ """Factory for positional parameters.
+
+ Args:
+ range_element_type (Union[str, RangeQueryParameterType]):
+ The type of range elements. It must be one of `'TIMESTAMP'`,
+ `'DATE'`, or `'DATETIME'`.
+
+ start (Optional[Union[ScalarQueryParameter, str]]):
+ The start of the range value. Must be the same type as
+ range_element_type. If not provided, it's interpreted as
+ UNBOUNDED.
+
+ end (Optional[Union[ScalarQueryParameter, str]]):
+ The end of the range value. Must be the same type as
+ range_element_type. If not provided, it's interpreted as
+ UNBOUNDED.
+
+ Returns:
+ google.cloud.bigquery.query.RangeQueryParameter: Instance without
+ name.
+ """
+ return cls(range_element_type, start, end)
+
+ @classmethod
+ def from_api_repr(cls, resource: dict) -> "RangeQueryParameter":
+ """Factory: construct parameter from JSON resource.
+
+ Args:
+ resource (Dict): JSON mapping of parameter
+
+ Returns:
+ google.cloud.bigquery.query.RangeQueryParameter: Instance
+ """
+ name = resource.get("name")
+ range_element_type = (
+ resource.get("parameterType", {}).get("rangeElementType", {}).get("type")
+ )
+ range_value = resource.get("parameterValue", {}).get("rangeValue", {})
+ start = range_value.get("start", {}).get("value")
+ end = range_value.get("end", {}).get("value")
+
+ return cls(range_element_type, start=start, end=end, name=name)
+
+ def to_api_repr(self) -> dict:
+ """Construct JSON API representation for the parameter.
+
+ Returns:
+ Dict: JSON mapping
+ """
+ range_element_type = self.range_element_type.to_api_repr()
+ type_ = self.range_element_type.type_._type
+ start = self._serialize_range_element_value(self.start, type_)
+ end = self._serialize_range_element_value(self.end, type_)
+ resource = {
+ "parameterType": range_element_type,
+ "parameterValue": {
+ "rangeValue": {
+ "start": {"value": start},
+ "end": {"value": end},
+ },
+ },
+ }
+
+ # distinguish between name not provided vs. name being empty string
+ if self.name is not None:
+ resource["name"] = self.name
+
+ return resource
+
+ def _key(self):
+ """A tuple key that uniquely describes this field.
+
+ Used to compute this instance's hashcode and evaluate equality.
+
+ Returns:
+ Tuple: The contents of this
+ :class:`~google.cloud.bigquery.query.RangeQueryParameter`.
+ """
+
+ range_element_type = self.range_element_type.to_api_repr()
+ return (self.name, range_element_type, self.start, self.end)
+
+ def __eq__(self, other):
+ if not isinstance(other, RangeQueryParameter):
+ return NotImplemented
+ return self._key() == other._key()
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __repr__(self):
+ return "RangeQueryParameter{}".format(self._key())
+
+
class SqlParameterScalarTypes:
"""Supported scalar SQL query parameter types as type objects."""
@@ -911,6 +1222,33 @@ def job_id(self):
"""
return self._properties.get("jobReference", {}).get("jobId")
+ @property
+ def location(self):
+ """Location of the query job these results are from.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.job_reference
+ or https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.location
+
+ Returns:
+ str: Job ID of the query job.
+ """
+ location = self._properties.get("jobReference", {}).get("location")
+
+ # Sometimes there's no job, but we still want to get the location
+ # information. Prefer the value from job for backwards compatibilitity.
+ if not location:
+ location = self._properties.get("location")
+ return location
+
+ @property
+ def query_id(self) -> Optional[str]:
+ """[Preview] ID of a completed query.
+
+ This ID is auto-generated and not guaranteed to be populated.
+ """
+ return self._properties.get("queryId")
+
@property
def page_token(self):
"""Token for fetching next bach of results.
@@ -951,6 +1289,20 @@ def total_bytes_processed(self):
if total_bytes_processed is not None:
return int(total_bytes_processed)
+ @property
+ def slot_millis(self):
+ """Total number of slot ms the user is actually billed for.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.total_slot_ms
+
+ Returns:
+ Optional[int]: Count generated on the server (None until set by the server).
+ """
+ slot_millis = self._properties.get("totalSlotMs")
+ if slot_millis is not None:
+ return int(slot_millis)
+
@property
def num_dml_affected_rows(self):
"""Total number of rows affected by a DML query.
@@ -965,6 +1317,56 @@ def num_dml_affected_rows(self):
if num_dml_affected_rows is not None:
return int(num_dml_affected_rows)
+ @property
+ def created(self):
+ """Creation time of this query.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.creation_time
+
+ Returns:
+ Optional[datetime.datetime]:
+ the creation time (None until set from the server).
+ """
+ millis = self._properties.get("creationTime")
+ if millis is not None:
+ return _helpers._datetime_from_microseconds(int(millis) * 1000.0)
+
+ @property
+ def started(self):
+ """Start time of this query.
+
+ This field will be present when the query transitions from the
+ PENDING state to either RUNNING or DONE.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.start_time
+
+ Returns:
+ Optional[datetime.datetime]:
+ the start time (None until set from the server).
+ """
+ millis = self._properties.get("startTime")
+ if millis is not None:
+ return _helpers._datetime_from_microseconds(int(millis) * 1000.0)
+
+ @property
+ def ended(self):
+ """End time of this query.
+
+ This field will be present whenever a query is in the DONE state.
+
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.end_time
+
+ Returns:
+ Optional[datetime.datetime]:
+ the end time (None until set from the server).
+ """
+ millis = self._properties.get("endTime")
+ if millis is not None:
+ return _helpers._datetime_from_microseconds(int(millis) * 1000.0)
+
@property
def rows(self):
"""Query results.
@@ -997,16 +1399,8 @@ def _set_properties(self, api_response):
Args:
api_response (Dict): Response returned from an API call
"""
- job_id_present = (
- "jobReference" in api_response
- and "jobId" in api_response["jobReference"]
- and "projectId" in api_response["jobReference"]
- )
- if not job_id_present:
- raise ValueError("QueryResult requires a job reference")
-
self._properties.clear()
- self._properties.update(copy.deepcopy(api_response))
+ self._properties.update(api_response)
def _query_param_from_api_repr(resource):
diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py
index 254b26608..19012efd6 100644
--- a/google/cloud/bigquery/retry.py
+++ b/google/cloud/bigquery/retry.py
@@ -14,6 +14,7 @@
from google.api_core import exceptions
from google.api_core import retry
+import google.api_core.future.polling
from google.auth import exceptions as auth_exceptions # type: ignore
import requests.exceptions
@@ -27,13 +28,34 @@
exceptions.TooManyRequests,
exceptions.InternalServerError,
exceptions.BadGateway,
+ exceptions.ServiceUnavailable,
requests.exceptions.ChunkedEncodingError,
requests.exceptions.ConnectionError,
requests.exceptions.Timeout,
auth_exceptions.TransportError,
)
-_DEFAULT_JOB_DEADLINE = 60.0 * 10.0 # seconds
+_DEFAULT_RETRY_DEADLINE = 10.0 * 60.0 # 10 minutes
+
+# Ambiguous errors (e.g. internalError, backendError, rateLimitExceeded) retry
+# until the full `_DEFAULT_RETRY_DEADLINE`. This is because the
+# `jobs.getQueryResults` REST API translates a job failure into an HTTP error.
+#
+# TODO(https://github.com/googleapis/python-bigquery/issues/1903): Investigate
+# if we can fail early for ambiguous errors in `QueryJob.result()`'s call to
+# the `jobs.getQueryResult` API.
+#
+# We need `_DEFAULT_JOB_DEADLINE` to be some multiple of
+# `_DEFAULT_RETRY_DEADLINE` to allow for a few retries after the retry
+# timeout is reached.
+#
+# Note: This multiple should actually be a multiple of
+# (2 * _DEFAULT_RETRY_DEADLINE). After an ambiguous exception, the first
+# call from `job_retry()` refreshes the job state without actually restarting
+# the query. The second `job_retry()` actually restarts the query. For a more
+# detailed explanation, see the comments where we set `restart_query_job = True`
+# in `QueryJob.result()`'s inner `is_job_done()` function.
+_DEFAULT_JOB_DEADLINE = 2.0 * (2.0 * _DEFAULT_RETRY_DEADLINE)
def _should_retry(exc):
@@ -50,7 +72,7 @@ def _should_retry(exc):
return reason in _RETRYABLE_REASONS
-DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=600.0)
+DEFAULT_RETRY = retry.Retry(predicate=_should_retry, deadline=_DEFAULT_RETRY_DEADLINE)
"""The default retry object.
Any method with a ``retry`` parameter will be retried automatically,
@@ -60,6 +82,37 @@ def _should_retry(exc):
pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``.
"""
+
+def _should_retry_get_job_conflict(exc):
+ """Predicate for determining when to retry a jobs.get call after a conflict error.
+
+ Sometimes we get a 404 after a Conflict. In this case, we
+ have pretty high confidence that by retrying the 404, we'll
+ (hopefully) eventually recover the job.
+ https://github.com/googleapis/python-bigquery/issues/2134
+
+ Note: we may be able to extend this to user-specified predicates
+ after https://github.com/googleapis/python-api-core/issues/796
+ to tweak existing Retry object predicates.
+ """
+ return isinstance(exc, exceptions.NotFound) or _should_retry(exc)
+
+
+# Pick a deadline smaller than our other deadlines since we want to timeout
+# before those expire.
+_DEFAULT_GET_JOB_CONFLICT_DEADLINE = _DEFAULT_RETRY_DEADLINE / 3.0
+_DEFAULT_GET_JOB_CONFLICT_RETRY = retry.Retry(
+ predicate=_should_retry_get_job_conflict,
+ deadline=_DEFAULT_GET_JOB_CONFLICT_DEADLINE,
+)
+"""Private, may be removed in future."""
+
+
+# Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We
+# briefly had a default timeout, but even setting it at more than twice the
+# theoretical server-side default timeout of 2 minutes was not enough for
+# complex queries. See:
+# https://github.com/googleapis/python-bigquery/issues/970#issuecomment-921934647
DEFAULT_TIMEOUT = None
"""The default API timeout.
@@ -67,10 +120,39 @@ def _should_retry(exc):
deadline on the retry object.
"""
-job_retry_reasons = "rateLimitExceeded", "backendError"
+job_retry_reasons = (
+ "jobBackendError",
+ "jobInternalError",
+ "jobRateLimitExceeded",
+)
def _job_should_retry(exc):
+ # Sometimes we have ambiguous errors, such as 'backendError' which could
+ # be due to an API problem or a job problem. For these, make sure we retry
+ # our is_job_done() function.
+ #
+ # Note: This won't restart the job unless we know for sure it's because of
+ # the job status and set restart_query_job = True in that loop. This means
+ # that we might end up calling this predicate twice for the same job
+ # but from different paths: (1) from jobs.getQueryResults RetryError and
+ # (2) from translating the job error from the body of a jobs.get response.
+ #
+ # Note: If we start retrying job types other than queries where we don't
+ # call the problematic getQueryResults API to check the status, we need
+ # to provide a different predicate, as there shouldn't be ambiguous
+ # errors in those cases.
+ if isinstance(exc, exceptions.RetryError):
+ exc = exc.cause
+
+ # Per https://github.com/googleapis/python-bigquery/issues/1929, sometimes
+ # retriable errors make their way here. Because of the separate
+ # `restart_query_job` logic to make sure we aren't restarting non-failed
+ # jobs, it should be safe to continue and not totally fail our attempt at
+ # waiting for the query to complete.
+ if _should_retry(exc):
+ return True
+
if not hasattr(exc, "errors") or len(exc.errors) == 0:
return False
@@ -84,3 +166,41 @@ def _job_should_retry(exc):
"""
The default job retry object.
"""
+
+
+def _query_job_insert_should_retry(exc):
+ # Per https://github.com/googleapis/python-bigquery/issues/2134, sometimes
+ # we get a 404 error. In this case, if we get this far, assume that the job
+ # doesn't actually exist and try again. We can't add 404 to the default
+ # job_retry because that happens for errors like "this table does not
+ # exist", which probably won't resolve with a retry.
+ if isinstance(exc, exceptions.RetryError):
+ exc = exc.cause
+
+ if isinstance(exc, exceptions.NotFound):
+ message = exc.message
+ # Don't try to retry table/dataset not found, just job not found.
+ # The URL contains jobs, so use whitespace to disambiguate.
+ return message is not None and " job" in message.lower()
+
+ return _job_should_retry(exc)
+
+
+_DEFAULT_QUERY_JOB_INSERT_RETRY = retry.Retry(
+ predicate=_query_job_insert_should_retry,
+ # jobs.insert doesn't wait for the job to complete, so we don't need the
+ # long _DEFAULT_JOB_DEADLINE for this part.
+ deadline=_DEFAULT_RETRY_DEADLINE,
+)
+"""Private, may be removed in future."""
+
+
+DEFAULT_GET_JOB_TIMEOUT = 128
+"""
+Default timeout for Client.get_job().
+"""
+
+POLLING_DEFAULT_VALUE = google.api_core.future.polling.PollingFuture._DEFAULT_VALUE
+"""
+Default value defined in google.api_core.future.polling.PollingFuture.
+"""
diff --git a/google/cloud/bigquery/routine/__init__.py b/google/cloud/bigquery/routine/__init__.py
index 7353073c8..025103957 100644
--- a/google/cloud/bigquery/routine/__init__.py
+++ b/google/cloud/bigquery/routine/__init__.py
@@ -20,6 +20,8 @@
from google.cloud.bigquery.routine.routine import RoutineArgument
from google.cloud.bigquery.routine.routine import RoutineReference
from google.cloud.bigquery.routine.routine import RoutineType
+from google.cloud.bigquery.routine.routine import RemoteFunctionOptions
+from google.cloud.bigquery.routine.routine import ExternalRuntimeOptions
__all__ = (
@@ -28,4 +30,6 @@
"RoutineArgument",
"RoutineReference",
"RoutineType",
+ "RemoteFunctionOptions",
+ "ExternalRuntimeOptions",
)
diff --git a/google/cloud/bigquery/routine/routine.py b/google/cloud/bigquery/routine/routine.py
index 3c0919003..c5aa8750e 100644
--- a/google/cloud/bigquery/routine/routine.py
+++ b/google/cloud/bigquery/routine/routine.py
@@ -15,8 +15,8 @@
# limitations under the License.
"""Define resources for the BigQuery Routines API."""
-
-from typing import Any, Dict, Optional
+import typing
+from typing import Any, Dict, Optional, Union
import google.cloud._helpers # type: ignore
from google.cloud.bigquery import _helpers
@@ -67,6 +67,9 @@ class Routine(object):
"type_": "routineType",
"description": "description",
"determinism_level": "determinismLevel",
+ "remote_function_options": "remoteFunctionOptions",
+ "data_governance_type": "dataGovernanceType",
+ "external_runtime_options": "externalRuntimeOptions",
}
def __init__(self, routine_ref, **kwargs) -> None:
@@ -214,7 +217,7 @@ def return_type(self, value: StandardSqlDataType):
self._properties[self._PROPERTY_TO_API_FIELD["return_type"]] = resource
@property
- def return_table_type(self) -> Optional[StandardSqlTableType]:
+ def return_table_type(self) -> Union[StandardSqlTableType, Any, None]:
"""The return type of a Table Valued Function (TVF) routine.
.. versionadded:: 2.22.0
@@ -297,6 +300,87 @@ def determinism_level(self):
def determinism_level(self, value):
self._properties[self._PROPERTY_TO_API_FIELD["determinism_level"]] = value
+ @property
+ def remote_function_options(self):
+ """Optional[google.cloud.bigquery.routine.RemoteFunctionOptions]:
+ Configures remote function options for a routine.
+
+ Raises:
+ ValueError:
+ If the value is not
+ :class:`~google.cloud.bigquery.routine.RemoteFunctionOptions` or
+ :data:`None`.
+ """
+ prop = self._properties.get(
+ self._PROPERTY_TO_API_FIELD["remote_function_options"]
+ )
+ if prop is not None:
+ return RemoteFunctionOptions.from_api_repr(prop)
+
+ @remote_function_options.setter
+ def remote_function_options(self, value):
+ api_repr = value
+ if isinstance(value, RemoteFunctionOptions):
+ api_repr = value.to_api_repr()
+ elif value is not None:
+ raise ValueError(
+ "value must be google.cloud.bigquery.routine.RemoteFunctionOptions "
+ "or None"
+ )
+ self._properties[
+ self._PROPERTY_TO_API_FIELD["remote_function_options"]
+ ] = api_repr
+
+ @property
+ def data_governance_type(self):
+ """Optional[str]: If set to ``DATA_MASKING``, the function is validated
+ and made available as a masking function.
+
+ Raises:
+ ValueError:
+ If the value is not :data:`string` or :data:`None`.
+ """
+ return self._properties.get(self._PROPERTY_TO_API_FIELD["data_governance_type"])
+
+ @data_governance_type.setter
+ def data_governance_type(self, value):
+ if value is not None and not isinstance(value, str):
+ raise ValueError(
+ "invalid data_governance_type, must be a string or `None`."
+ )
+ self._properties[self._PROPERTY_TO_API_FIELD["data_governance_type"]] = value
+
+ @property
+ def external_runtime_options(self):
+ """Optional[google.cloud.bigquery.routine.ExternalRuntimeOptions]:
+ Configures the external runtime options for a routine.
+
+ Raises:
+ ValueError:
+ If the value is not
+ :class:`~google.cloud.bigquery.routine.ExternalRuntimeOptions` or
+ :data:`None`.
+ """
+ prop = self._properties.get(
+ self._PROPERTY_TO_API_FIELD["external_runtime_options"]
+ )
+ if prop is not None:
+ return ExternalRuntimeOptions.from_api_repr(prop)
+
+ @external_runtime_options.setter
+ def external_runtime_options(self, value):
+ api_repr = value
+ if isinstance(value, ExternalRuntimeOptions):
+ api_repr = value.to_api_repr()
+ elif value is not None:
+ raise ValueError(
+ "value must be google.cloud.bigquery.routine.ExternalRuntimeOptions "
+ "or None"
+ )
+ self._properties[
+ self._PROPERTY_TO_API_FIELD["external_runtime_options"]
+ ] = api_repr
+
@classmethod
def from_api_repr(cls, resource: dict) -> "Routine":
"""Factory: construct a routine given its API representation.
@@ -466,17 +550,17 @@ def __init__(self):
@property
def project(self):
"""str: ID of the project containing the routine."""
- return self._properties["projectId"] # pytype: disable=key-error
+ return self._properties.get("projectId", "")
@property
def dataset_id(self):
"""str: ID of dataset containing the routine."""
- return self._properties["datasetId"] # pytype: disable=key-error
+ return self._properties.get("datasetId", "")
@property
def routine_id(self):
"""str: The routine ID."""
- return self._properties["routineId"] # pytype: disable=key-error
+ return self._properties.get("routineId", "")
@property
def path(self):
@@ -505,7 +589,7 @@ def from_api_repr(cls, resource: dict) -> "RoutineReference":
@classmethod
def from_string(
- cls, routine_id: str, default_project: str = None
+ cls, routine_id: str, default_project: Optional[str] = None
) -> "RoutineReference":
"""Factory: construct a routine reference from routine ID string.
@@ -563,3 +647,275 @@ def __str__(self):
This is a fully-qualified ID, including the project ID and dataset ID.
"""
return "{}.{}.{}".format(self.project, self.dataset_id, self.routine_id)
+
+
+class RemoteFunctionOptions(object):
+ """Configuration options for controlling remote BigQuery functions."""
+
+ _PROPERTY_TO_API_FIELD = {
+ "endpoint": "endpoint",
+ "connection": "connection",
+ "max_batching_rows": "maxBatchingRows",
+ "user_defined_context": "userDefinedContext",
+ }
+
+ def __init__(
+ self,
+ endpoint=None,
+ connection=None,
+ max_batching_rows=None,
+ user_defined_context=None,
+ _properties=None,
+ ) -> None:
+ if _properties is None:
+ _properties = {}
+ self._properties = _properties
+
+ if endpoint is not None:
+ self.endpoint = endpoint
+ if connection is not None:
+ self.connection = connection
+ if max_batching_rows is not None:
+ self.max_batching_rows = max_batching_rows
+ if user_defined_context is not None:
+ self.user_defined_context = user_defined_context
+
+ @property
+ def connection(self):
+ """string: Fully qualified name of the user-provided connection object which holds the authentication information to send requests to the remote service.
+
+ Format is "projects/{projectId}/locations/{locationId}/connections/{connectionId}"
+ """
+ return _helpers._str_or_none(self._properties.get("connection"))
+
+ @connection.setter
+ def connection(self, value):
+ self._properties["connection"] = _helpers._str_or_none(value)
+
+ @property
+ def endpoint(self):
+ """string: Endpoint of the user-provided remote service
+
+ Example: "https://us-east1-my_gcf_project.cloudfunctions.net/remote_add"
+ """
+ return _helpers._str_or_none(self._properties.get("endpoint"))
+
+ @endpoint.setter
+ def endpoint(self, value):
+ self._properties["endpoint"] = _helpers._str_or_none(value)
+
+ @property
+ def max_batching_rows(self):
+ """int64: Max number of rows in each batch sent to the remote service.
+
+ If absent or if 0, BigQuery dynamically decides the number of rows in a batch.
+ """
+ return _helpers._int_or_none(self._properties.get("maxBatchingRows"))
+
+ @max_batching_rows.setter
+ def max_batching_rows(self, value):
+ self._properties["maxBatchingRows"] = _helpers._str_or_none(value)
+
+ @property
+ def user_defined_context(self):
+ """Dict[str, str]: User-defined context as a set of key/value pairs,
+ which will be sent as function invocation context together with
+ batched arguments in the requests to the remote service. The total
+ number of bytes of keys and values must be less than 8KB.
+ """
+ return self._properties.get("userDefinedContext")
+
+ @user_defined_context.setter
+ def user_defined_context(self, value):
+ if not isinstance(value, dict):
+ raise ValueError("value must be dictionary")
+ self._properties["userDefinedContext"] = value
+
+ @classmethod
+ def from_api_repr(cls, resource: dict) -> "RemoteFunctionOptions":
+ """Factory: construct remote function options given its API representation.
+
+ Args:
+ resource (Dict[str, object]): Resource, as returned from the API.
+
+ Returns:
+ google.cloud.bigquery.routine.RemoteFunctionOptions:
+ Python object, as parsed from ``resource``.
+ """
+ ref = cls()
+ ref._properties = resource
+ return ref
+
+ def to_api_repr(self) -> dict:
+ """Construct the API resource representation of this RemoteFunctionOptions.
+
+ Returns:
+ Dict[str, object]: Remote function options represented as an API resource.
+ """
+ return self._properties
+
+ def __eq__(self, other):
+ if not isinstance(other, RemoteFunctionOptions):
+ return NotImplemented
+ return self._properties == other._properties
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __repr__(self):
+ all_properties = [
+ "{}={}".format(property_name, repr(getattr(self, property_name)))
+ for property_name in sorted(self._PROPERTY_TO_API_FIELD)
+ ]
+ return "RemoteFunctionOptions({})".format(", ".join(all_properties))
+
+
+class ExternalRuntimeOptions(object):
+ """Options for the runtime of the external system.
+
+ Args:
+ container_memory (str):
+ Optional. Amount of memory provisioned for a Python UDF container
+ instance. Format: {number}{unit} where unit is one of "M", "G", "Mi"
+ and "Gi" (e.g. 1G, 512Mi). If not specified, the default value is
+ 512Mi. For more information, see `Configure container limits for
+ Python UDFs `_
+ container_cpu (int):
+ Optional. Amount of CPU provisioned for a Python UDF container
+ instance. For more information, see `Configure container limits
+ for Python UDFs `_
+ runtime_connection (str):
+ Optional. Fully qualified name of the connection whose service account
+ will be used to execute the code in the container. Format:
+ "projects/{projectId}/locations/{locationId}/connections/{connectionId}"
+ max_batching_rows (int):
+ Optional. Maximum number of rows in each batch sent to the external
+ runtime. If absent or if 0, BigQuery dynamically decides the number of
+ rows in a batch.
+ runtime_version (str):
+ Optional. Language runtime version. Example: python-3.11.
+ """
+
+ _PROPERTY_TO_API_FIELD = {
+ "container_memory": "containerMemory",
+ "container_cpu": "containerCpu",
+ "runtime_connection": "runtimeConnection",
+ "max_batching_rows": "maxBatchingRows",
+ "runtime_version": "runtimeVersion",
+ }
+
+ def __init__(
+ self,
+ container_memory: Optional[str] = None,
+ container_cpu: Optional[int] = None,
+ runtime_connection: Optional[str] = None,
+ max_batching_rows: Optional[int] = None,
+ runtime_version: Optional[str] = None,
+ _properties: Optional[Dict] = None,
+ ) -> None:
+ if _properties is None:
+ _properties = {}
+ self._properties = _properties
+
+ if container_memory is not None:
+ self.container_memory = container_memory
+ if container_cpu is not None:
+ self.container_cpu = container_cpu
+ if runtime_connection is not None:
+ self.runtime_connection = runtime_connection
+ if max_batching_rows is not None:
+ self.max_batching_rows = max_batching_rows
+ if runtime_version is not None:
+ self.runtime_version = runtime_version
+
+ @property
+ def container_memory(self) -> Optional[str]:
+ """Optional. Amount of memory provisioned for a Python UDF container instance."""
+ return _helpers._str_or_none(self._properties.get("containerMemory"))
+
+ @container_memory.setter
+ def container_memory(self, value: Optional[str]):
+ if value is not None and not isinstance(value, str):
+ raise ValueError("container_memory must be a string or None.")
+ self._properties["containerMemory"] = value
+
+ @property
+ def container_cpu(self) -> Optional[int]:
+ """Optional. Amount of CPU provisioned for a Python UDF container instance."""
+ return _helpers._int_or_none(self._properties.get("containerCpu"))
+
+ @container_cpu.setter
+ def container_cpu(self, value: Optional[int]):
+ if value is not None and not isinstance(value, int):
+ raise ValueError("container_cpu must be an integer or None.")
+ self._properties["containerCpu"] = value
+
+ @property
+ def runtime_connection(self) -> Optional[str]:
+ """Optional. Fully qualified name of the connection."""
+ return _helpers._str_or_none(self._properties.get("runtimeConnection"))
+
+ @runtime_connection.setter
+ def runtime_connection(self, value: Optional[str]):
+ if value is not None and not isinstance(value, str):
+ raise ValueError("runtime_connection must be a string or None.")
+ self._properties["runtimeConnection"] = value
+
+ @property
+ def max_batching_rows(self) -> Optional[int]:
+ """Optional. Maximum number of rows in each batch sent to the external runtime."""
+ return typing.cast(
+ int, _helpers._int_or_none(self._properties.get("maxBatchingRows"))
+ )
+
+ @max_batching_rows.setter
+ def max_batching_rows(self, value: Optional[int]):
+ if value is not None and not isinstance(value, int):
+ raise ValueError("max_batching_rows must be an integer or None.")
+ self._properties["maxBatchingRows"] = _helpers._str_or_none(value)
+
+ @property
+ def runtime_version(self) -> Optional[str]:
+ """Optional. Language runtime version."""
+ return _helpers._str_or_none(self._properties.get("runtimeVersion"))
+
+ @runtime_version.setter
+ def runtime_version(self, value: Optional[str]):
+ if value is not None and not isinstance(value, str):
+ raise ValueError("runtime_version must be a string or None.")
+ self._properties["runtimeVersion"] = value
+
+ @classmethod
+ def from_api_repr(cls, resource: dict) -> "ExternalRuntimeOptions":
+ """Factory: construct external runtime options given its API representation.
+ Args:
+ resource (Dict[str, object]): Resource, as returned from the API.
+ Returns:
+ google.cloud.bigquery.routine.ExternalRuntimeOptions:
+ Python object, as parsed from ``resource``.
+ """
+ ref = cls()
+ ref._properties = resource
+ return ref
+
+ def to_api_repr(self) -> dict:
+ """Construct the API resource representation of this ExternalRuntimeOptions.
+ Returns:
+ Dict[str, object]: External runtime options represented as an API resource.
+ """
+ return self._properties
+
+ def __eq__(self, other):
+ if not isinstance(other, ExternalRuntimeOptions):
+ return NotImplemented
+ return self._properties == other._properties
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __repr__(self):
+ all_properties = [
+ "{}={}".format(property_name, repr(getattr(self, property_name)))
+ for property_name in sorted(self._PROPERTY_TO_API_FIELD)
+ ]
+ return "ExternalRuntimeOptions({})".format(", ".join(all_properties))
diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py
index 5580a2ae9..1809df21f 100644
--- a/google/cloud/bigquery/schema.py
+++ b/google/cloud/bigquery/schema.py
@@ -14,19 +14,22 @@
"""Schemas for BigQuery tables / queries."""
-import collections
+from __future__ import annotations
import enum
-from typing import Any, Dict, Iterable, Union
+import typing
+from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence
+from google.cloud.bigquery import _helpers
from google.cloud.bigquery import standard_sql
+from google.cloud.bigquery import enums
from google.cloud.bigquery.enums import StandardSqlTypeNames
_STRUCT_TYPES = ("RECORD", "STRUCT")
# SQL types reference:
-# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
-# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
+# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types
+# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types
LEGACY_TO_STANDARD_TYPES = {
"STRING": StandardSqlTypeNames.STRING,
"BYTES": StandardSqlTypeNames.BYTES,
@@ -45,6 +48,7 @@
"DATE": StandardSqlTypeNames.DATE,
"TIME": StandardSqlTypeNames.TIME,
"DATETIME": StandardSqlTypeNames.DATETIME,
+ "FOREIGN": StandardSqlTypeNames.FOREIGN,
# no direct conversion from ARRAY, the latter is represented by mode="REPEATED"
}
"""String names of the legacy SQL types to integer codes of Standard SQL standard_sql."""
@@ -66,6 +70,46 @@ class _DefaultSentinel(enum.Enum):
_DEFAULT_VALUE = _DefaultSentinel.DEFAULT_VALUE
+class FieldElementType(object):
+ """Represents the type of a field element.
+
+ Args:
+ element_type (str): The type of a field element.
+ """
+
+ def __init__(self, element_type: str):
+ self._properties = {}
+ self._properties["type"] = element_type.upper()
+
+ @property
+ def element_type(self):
+ return self._properties.get("type")
+
+ @classmethod
+ def from_api_repr(cls, api_repr: Optional[dict]) -> Optional["FieldElementType"]:
+ """Factory: construct a FieldElementType given its API representation.
+
+ Args:
+ api_repr (Dict[str, str]): field element type as returned from
+ the API.
+
+ Returns:
+ google.cloud.bigquery.FieldElementType:
+ Python object, as parsed from ``api_repr``.
+ """
+ if not api_repr:
+ return None
+ return cls(api_repr["type"].upper())
+
+ def to_api_repr(self) -> dict:
+ """Construct the API resource representation of this field element type.
+
+ Returns:
+ Dict[str, str]: Field element type represented as an API resource.
+ """
+ return self._properties
+
+
class SchemaField(object):
"""Describe a single field within a table schema.
@@ -93,6 +137,73 @@ class SchemaField(object):
Scale (digits after decimal) of fields with NUMERIC or BIGNUMERIC type.
max_length: Maximum length of fields with STRING or BYTES type.
+
+ default_value_expression: str, Optional
+ Used to specify the default value of a field using a SQL expression. It can only be set for
+ top level fields (columns).
+
+ You can use a struct or array expression to specify default value for the entire struct or
+ array. The valid SQL expressions are:
+
+ - Literals for all data types, including STRUCT and ARRAY.
+
+ - The following functions:
+
+ `CURRENT_TIMESTAMP`
+ `CURRENT_TIME`
+ `CURRENT_DATE`
+ `CURRENT_DATETIME`
+ `GENERATE_UUID`
+ `RAND`
+ `SESSION_USER`
+ `ST_GEOPOINT`
+
+ - Struct or array composed with the above allowed functions, for example:
+
+ "[CURRENT_DATE(), DATE '2020-01-01'"]
+
+ range_element_type: FieldElementType, str, Optional
+ The subtype of the RANGE, if the type of this field is RANGE. If
+ the type is RANGE, this field is required. Possible values for the
+ field element type of a RANGE include `DATE`, `DATETIME` and
+ `TIMESTAMP`.
+
+ rounding_mode: Union[enums.RoundingMode, str, None]
+ Specifies the rounding mode to be used when storing values of
+ NUMERIC and BIGNUMERIC type.
+
+ Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO.
+ ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero
+ when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
+ values.
+
+ For Scale: 0
+ 1.1, 1.2, 1.3, 1.4 => 1
+ 1.5, 1.6, 1.7, 1.8, 1.9 => 2
+
+ ROUND_HALF_EVEN rounds half values to the nearest even value
+ when applying precision and scale upon writing of NUMERIC and BIGNUMERIC
+ values.
+
+ For Scale: 0
+ 1.1, 1.2, 1.3, 1.4 => 1
+ 1.5 => 2
+ 1.6, 1.7, 1.8, 1.9 => 2
+ 2.5 => 2
+
+ foreign_type_definition: Optional[str]
+ Definition of the foreign data type.
+
+ Only valid for top-level schema fields (not nested fields).
+ If the type is FOREIGN, this field is required.
+
+ timestamp_precision: Optional[enums.TimestampPrecision]
+ Precision (maximum number of total digits in base 10) for seconds
+ of TIMESTAMP type.
+
+ Defaults to `enums.TimestampPrecision.MICROSECOND` (`None`) for
+ microsecond precision. Use `enums.TimestampPrecision.PICOSECOND`
+ (`12`) for picosecond precision.
"""
def __init__(
@@ -100,21 +211,29 @@ def __init__(
name: str,
field_type: str,
mode: str = "NULLABLE",
+ default_value_expression: Optional[str] = None,
description: Union[str, _DefaultSentinel] = _DEFAULT_VALUE,
fields: Iterable["SchemaField"] = (),
policy_tags: Union["PolicyTagList", None, _DefaultSentinel] = _DEFAULT_VALUE,
precision: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE,
+ range_element_type: Union[FieldElementType, str, None] = None,
+ rounding_mode: Union[enums.RoundingMode, str, None] = None,
+ foreign_type_definition: Optional[str] = None,
+ timestamp_precision: Optional[enums.TimestampPrecision] = None,
):
self._properties: Dict[str, Any] = {
"name": name,
"type": field_type,
}
+ self._properties["name"] = name
if mode is not None:
self._properties["mode"] = mode.upper()
if description is not _DEFAULT_VALUE:
self._properties["description"] = description
+ if default_value_expression is not None:
+ self._properties["defaultValueExpression"] = default_value_expression
if precision is not _DEFAULT_VALUE:
self._properties["precision"] = precision
if scale is not _DEFAULT_VALUE:
@@ -123,65 +242,77 @@ def __init__(
self._properties["maxLength"] = max_length
if policy_tags is not _DEFAULT_VALUE:
self._properties["policyTags"] = (
- policy_tags.to_api_repr() if policy_tags is not None else None
+ policy_tags.to_api_repr()
+ if isinstance(policy_tags, PolicyTagList)
+ else None
)
- self._fields = tuple(fields)
-
- @staticmethod
- def __get_int(api_repr, name):
- v = api_repr.get(name, _DEFAULT_VALUE)
- if v is not _DEFAULT_VALUE:
- v = int(v)
- return v
+ if isinstance(timestamp_precision, enums.TimestampPrecision):
+ self._properties["timestampPrecision"] = timestamp_precision.value
+ elif timestamp_precision is not None:
+ raise ValueError(
+ "timestamp_precision must be class enums.TimestampPrecision "
+ f"or None, got {type(timestamp_precision)} instead."
+ )
+ if isinstance(range_element_type, str):
+ self._properties["rangeElementType"] = {"type": range_element_type}
+ if isinstance(range_element_type, FieldElementType):
+ self._properties["rangeElementType"] = range_element_type.to_api_repr()
+ if rounding_mode is not None:
+ self._properties["roundingMode"] = rounding_mode
+ if foreign_type_definition is not None:
+ self._properties["foreignTypeDefinition"] = foreign_type_definition
+
+ if fields: # Don't set the property if it's not set.
+ self._properties["fields"] = [field.to_api_repr() for field in fields]
@classmethod
def from_api_repr(cls, api_repr: dict) -> "SchemaField":
"""Return a ``SchemaField`` object deserialized from a dictionary.
Args:
- api_repr (Mapping[str, str]): The serialized representation
- of the SchemaField, such as what is output by
- :meth:`to_api_repr`.
+ api_repr (dict): The serialized representation of the SchemaField,
+ such as what is output by :meth:`to_api_repr`.
Returns:
- google.cloud.biquery.schema.SchemaField: The ``SchemaField`` object.
+ google.cloud.bigquery.schema.SchemaField: The ``SchemaField`` object.
"""
- field_type = api_repr["type"].upper()
-
- # Handle optional properties with default values
- mode = api_repr.get("mode", "NULLABLE")
- description = api_repr.get("description", _DEFAULT_VALUE)
- fields = api_repr.get("fields", ())
- policy_tags = api_repr.get("policyTags", _DEFAULT_VALUE)
-
- if policy_tags is not None and policy_tags is not _DEFAULT_VALUE:
- policy_tags = PolicyTagList.from_api_repr(policy_tags)
-
- return cls(
- field_type=field_type,
- fields=[cls.from_api_repr(f) for f in fields],
- mode=mode.upper(),
- description=description,
- name=api_repr["name"],
- policy_tags=policy_tags,
- precision=cls.__get_int(api_repr, "precision"),
- scale=cls.__get_int(api_repr, "scale"),
- max_length=cls.__get_int(api_repr, "maxLength"),
- )
+ placeholder = cls("this_will_be_replaced", "PLACEHOLDER")
+
+ # The API would return a string despite we send an integer. To ensure
+ # success of resending received schema, we convert string to integer
+ # to ensure consistency.
+ try:
+ api_repr["timestampPrecision"] = int(api_repr["timestampPrecision"])
+ except (TypeError, KeyError):
+ pass
+
+ # Note: we don't make a copy of api_repr because this can cause
+ # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
+ # fields. See https://github.com/googleapis/python-bigquery/issues/6
+ placeholder._properties = api_repr
+
+ # Add the field `mode` with default value if it does not exist. Fixes
+ # an incompatibility issue with pandas-gbq:
+ # https://github.com/googleapis/python-bigquery-pandas/issues/854
+ if "mode" not in placeholder._properties:
+ placeholder._properties["mode"] = "NULLABLE"
+
+ return placeholder
@property
def name(self):
"""str: The name of the field."""
- return self._properties["name"]
+ return self._properties.get("name", "")
@property
- def field_type(self):
+ def field_type(self) -> str:
"""str: The type of the field.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.type
"""
- return self._properties["type"]
+ type_ = self._properties.get("type")
+ return cast(str, type_).upper()
@property
def mode(self):
@@ -190,13 +321,18 @@ def mode(self):
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#TableFieldSchema.FIELDS.mode
"""
- return self._properties.get("mode")
+ return cast(str, self._properties.get("mode", "NULLABLE")).upper()
@property
def is_nullable(self):
"""bool: whether 'mode' is 'nullable'."""
return self.mode == "NULLABLE"
+ @property
+ def default_value_expression(self):
+ """Optional[str] default value of a field, using an SQL expression"""
+ return self._properties.get("defaultValueExpression")
+
@property
def description(self):
"""Optional[str]: description for the field."""
@@ -205,17 +341,45 @@ def description(self):
@property
def precision(self):
"""Optional[int]: Precision (number of digits) for the NUMERIC field."""
- return self._properties.get("precision")
+ return _helpers._int_or_none(self._properties.get("precision"))
@property
def scale(self):
"""Optional[int]: Scale (digits after decimal) for the NUMERIC field."""
- return self._properties.get("scale")
+ return _helpers._int_or_none(self._properties.get("scale"))
@property
def max_length(self):
"""Optional[int]: Maximum length for the STRING or BYTES field."""
- return self._properties.get("maxLength")
+ return _helpers._int_or_none(self._properties.get("maxLength"))
+
+ @property
+ def range_element_type(self):
+ """Optional[FieldElementType]: The subtype of the RANGE, if the
+ type of this field is RANGE.
+
+ Must be set when ``type`` is `"RANGE"`. Must be one of `"DATE"`,
+ `"DATETIME"` or `"TIMESTAMP"`.
+ """
+ if self._properties.get("rangeElementType"):
+ ret = self._properties.get("rangeElementType")
+ return FieldElementType.from_api_repr(ret)
+
+ @property
+ def rounding_mode(self):
+ """Enum that specifies the rounding mode to be used when storing values of
+ NUMERIC and BIGNUMERIC type.
+ """
+ return self._properties.get("roundingMode")
+
+ @property
+ def foreign_type_definition(self):
+ """Definition of the foreign data type.
+
+ Only valid for top-level schema fields (not nested fields).
+ If the type is FOREIGN, this field is required.
+ """
+ return self._properties.get("foreignTypeDefinition")
@property
def fields(self):
@@ -223,7 +387,7 @@ def fields(self):
Must be empty unset if ``field_type`` is not 'RECORD'.
"""
- return self._fields
+ return tuple(_to_schema_fields(self._properties.get("fields", [])))
@property
def policy_tags(self):
@@ -233,21 +397,26 @@ def policy_tags(self):
resource = self._properties.get("policyTags")
return PolicyTagList.from_api_repr(resource) if resource is not None else None
+ @property
+ def timestamp_precision(self) -> enums.TimestampPrecision:
+ """Precision (maximum number of total digits in base 10) for seconds of
+ TIMESTAMP type.
+
+ Returns:
+ enums.TimestampPrecision: value of TimestampPrecision.
+ """
+ return enums.TimestampPrecision(self._properties.get("timestampPrecision"))
+
def to_api_repr(self) -> dict:
"""Return a dictionary representing this schema field.
Returns:
Dict: A dictionary representing the SchemaField in a serialized form.
"""
- answer = self._properties.copy()
-
- # If this is a RECORD type, then sub-fields are also included,
- # add this to the serialized representation.
- if self.field_type.upper() in _STRUCT_TYPES:
- answer["fields"] = [f.to_api_repr() for f in self.fields]
-
- # Done; return the serialized dictionary.
- return answer
+ # Note: we don't make a copy of _properties because this can cause
+ # unnecessary slowdowns, especially on deeply nested STRUCT / RECORD
+ # fields. See https://github.com/googleapis/python-bigquery/issues/6
+ return self._properties
def _key(self):
"""A tuple key that uniquely describes this field.
@@ -257,33 +426,33 @@ def _key(self):
Returns:
Tuple: The contents of this :class:`~google.cloud.bigquery.schema.SchemaField`.
"""
- field_type = self.field_type.upper() if self.field_type is not None else None
-
- # Type can temporarily be set to None if the code needs a SchemaField instance,
- # but has npt determined the exact type of the field yet.
- if field_type is not None:
- if field_type == "STRING" or field_type == "BYTES":
- if self.max_length is not None:
- field_type = f"{field_type}({self.max_length})"
- elif field_type.endswith("NUMERIC"):
- if self.precision is not None:
- if self.scale is not None:
- field_type = f"{field_type}({self.precision}, {self.scale})"
- else:
- field_type = f"{field_type}({self.precision})"
+ field_type = self.field_type
+ if field_type == "STRING" or field_type == "BYTES":
+ if self.max_length is not None:
+ field_type = f"{field_type}({self.max_length})"
+ elif field_type.endswith("NUMERIC"):
+ if self.precision is not None:
+ if self.scale is not None:
+ field_type = f"{field_type}({self.precision}, {self.scale})"
+ else:
+ field_type = f"{field_type}({self.precision})"
policy_tags = (
None if self.policy_tags is None else tuple(sorted(self.policy_tags.names))
)
+ timestamp_precision = self._properties.get("timestampPrecision")
+
return (
self.name,
field_type,
# Mode is always str, if not given it defaults to a str value
self.mode.upper(), # pytype: disable=attribute-error
+ self.default_value_expression,
self.description,
- self._fields,
+ self.fields,
policy_tags,
+ timestamp_precision,
)
def to_standard_sql(self) -> standard_sql.StandardSqlField:
@@ -334,10 +503,9 @@ def __hash__(self):
return hash(self._key())
def __repr__(self):
- key = self._key()
- policy_tags = key[-1]
+ *initial_tags, policy_tags, timestamp_precision_tag = self._key()
policy_tags_inst = None if policy_tags is None else PolicyTagList(policy_tags)
- adjusted_key = key[:-1] + (policy_tags_inst,)
+ adjusted_key = (*initial_tags, policy_tags_inst, timestamp_precision_tag)
return f"{self.__class__.__name__}{adjusted_key}"
@@ -351,6 +519,8 @@ def _parse_schema_resource(info):
Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]:
A list of parsed fields, or ``None`` if no "fields" key found.
"""
+ if isinstance(info, list):
+ return [SchemaField.from_api_repr(f) for f in info]
return [SchemaField.from_api_repr(f) for f in info.get("fields", ())]
@@ -363,40 +533,48 @@ def _build_schema_resource(fields):
Returns:
Sequence[Dict]: Mappings describing the schema of the supplied fields.
"""
- return [field.to_api_repr() for field in fields]
+ if isinstance(fields, Sequence):
+ # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
+ return [field.to_api_repr() for field in fields]
+
+ else:
+ raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
def _to_schema_fields(schema):
- """Coerce `schema` to a list of schema field instances.
+ """Coerces schema to a list of SchemaField instances while
+ preserving the original structure as much as possible.
Args:
- schema(Sequence[Union[ \
- :class:`~google.cloud.bigquery.schema.SchemaField`, \
- Mapping[str, Any] \
- ]]):
- Table schema to convert. If some items are passed as mappings,
- their content must be compatible with
- :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.
+ schema (Sequence[Union[ \
+ :class:`~google.cloud.bigquery.schema.SchemaField`, \
+ Mapping[str, Any] \
+ ]
+ ]
+ )::
+ Table schema to convert. Can be a list of SchemaField
+ objects or mappings.
Returns:
- Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`]
+ A list of SchemaField objects.
Raises:
- Exception: If ``schema`` is not a sequence, or if any item in the
- sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField`
- instance or a compatible mapping representation of the field.
+ TypeError: If schema is not a Sequence.
"""
- for field in schema:
- if not isinstance(field, (SchemaField, collections.abc.Mapping)):
- raise ValueError(
- "Schema items must either be fields or compatible "
- "mapping representations."
+
+ if isinstance(schema, Sequence):
+ # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields
+ return [
+ (
+ field
+ if isinstance(field, SchemaField)
+ else SchemaField.from_api_repr(field)
)
+ for field in schema
+ ]
- return [
- field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field)
- for field in schema
- ]
+ else:
+ raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
class PolicyTagList(object):
@@ -481,3 +659,267 @@ def to_api_repr(self) -> dict:
"""
answer = {"names": list(self.names)}
return answer
+
+
+class ForeignTypeInfo:
+ """Metadata about the foreign data type definition such as the system in which the
+ type is defined.
+
+ Args:
+ type_system (str): Required. Specifies the system which defines the
+ foreign data type.
+
+ TypeSystem enum currently includes:
+ * "TYPE_SYSTEM_UNSPECIFIED"
+ * "HIVE"
+ """
+
+ def __init__(self, type_system: Optional[str] = None):
+ self._properties: Dict[str, Any] = {}
+ self.type_system = type_system
+
+ @property
+ def type_system(self) -> Optional[str]:
+ """Required. Specifies the system which defines the foreign data
+ type."""
+
+ return self._properties.get("typeSystem")
+
+ @type_system.setter
+ def type_system(self, value: Optional[str]):
+ value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
+ self._properties["typeSystem"] = value
+
+ def to_api_repr(self) -> dict:
+ """Build an API representation of this object.
+
+ Returns:
+ Dict[str, Any]:
+ A dictionary in the format used by the BigQuery API.
+ """
+
+ return self._properties
+
+ @classmethod
+ def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignTypeInfo":
+ """Factory: constructs an instance of the class (cls)
+ given its API representation.
+
+ Args:
+ api_repr (Dict[str, Any]):
+ API representation of the object to be instantiated.
+
+ Returns:
+ An instance of the class initialized with data from 'api_repr'.
+ """
+
+ config = cls()
+ config._properties = api_repr
+ return config
+
+
+class SerDeInfo:
+ """Serializer and deserializer information.
+
+ Args:
+ serialization_library (str): Required. Specifies a fully-qualified class
+ name of the serialization library that is responsible for the
+ translation of data between table representation and the underlying
+ low-level input and output format structures. The maximum length is
+ 256 characters.
+ name (Optional[str]): Name of the SerDe. The maximum length is 256
+ characters.
+ parameters: (Optional[dict[str, str]]): Key-value pairs that define the initialization
+ parameters for the serialization library. Maximum size 10 Kib.
+ """
+
+ def __init__(
+ self,
+ serialization_library: str,
+ name: Optional[str] = None,
+ parameters: Optional[dict[str, str]] = None,
+ ):
+ self._properties: Dict[str, Any] = {}
+ self.serialization_library = serialization_library
+ self.name = name
+ self.parameters = parameters
+
+ @property
+ def serialization_library(self) -> str:
+ """Required. Specifies a fully-qualified class name of the serialization
+ library that is responsible for the translation of data between table
+ representation and the underlying low-level input and output format
+ structures. The maximum length is 256 characters."""
+
+ return typing.cast(str, self._properties.get("serializationLibrary"))
+
+ @serialization_library.setter
+ def serialization_library(self, value: str):
+ value = _helpers._isinstance_or_raise(value, str, none_allowed=False)
+ self._properties["serializationLibrary"] = value
+
+ @property
+ def name(self) -> Optional[str]:
+ """Optional. Name of the SerDe. The maximum length is 256 characters."""
+
+ return self._properties.get("name")
+
+ @name.setter
+ def name(self, value: Optional[str] = None):
+ value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
+ self._properties["name"] = value
+
+ @property
+ def parameters(self) -> Optional[dict[str, str]]:
+ """Optional. Key-value pairs that define the initialization parameters
+ for the serialization library. Maximum size 10 Kib."""
+
+ return self._properties.get("parameters")
+
+ @parameters.setter
+ def parameters(self, value: Optional[dict[str, str]] = None):
+ value = _helpers._isinstance_or_raise(value, dict, none_allowed=True)
+ self._properties["parameters"] = value
+
+ def to_api_repr(self) -> dict:
+ """Build an API representation of this object.
+
+ Returns:
+ Dict[str, Any]:
+ A dictionary in the format used by the BigQuery API.
+ """
+ return self._properties
+
+ @classmethod
+ def from_api_repr(cls, api_repr: dict) -> SerDeInfo:
+ """Factory: constructs an instance of the class (cls)
+ given its API representation.
+
+ Args:
+ api_repr (Dict[str, Any]):
+ API representation of the object to be instantiated.
+
+ Returns:
+ An instance of the class initialized with data from 'api_repr'.
+ """
+ config = cls("PLACEHOLDER")
+ config._properties = api_repr
+ return config
+
+
+class StorageDescriptor:
+ """Contains information about how a table's data is stored and accessed by open
+ source query engines.
+
+ Args:
+ input_format (Optional[str]): Specifies the fully qualified class name of
+ the InputFormat (e.g.
+ "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum
+ length is 128 characters.
+ location_uri (Optional[str]): The physical location of the table (e.g.
+ 'gs://spark-dataproc-data/pangea-data/case_sensitive/' or
+ 'gs://spark-dataproc-data/pangea-data/'). The maximum length is
+ 2056 bytes.
+ output_format (Optional[str]): Specifies the fully qualified class name
+ of the OutputFormat (e.g.
+ "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat"). The maximum
+ length is 128 characters.
+ serde_info (Union[SerDeInfo, dict, None]): Serializer and deserializer information.
+ """
+
+ def __init__(
+ self,
+ input_format: Optional[str] = None,
+ location_uri: Optional[str] = None,
+ output_format: Optional[str] = None,
+ serde_info: Union[SerDeInfo, dict, None] = None,
+ ):
+ self._properties: Dict[str, Any] = {}
+ self.input_format = input_format
+ self.location_uri = location_uri
+ self.output_format = output_format
+ # Using typing.cast() because mypy cannot wrap it's head around the fact that:
+ # the setter can accept Union[SerDeInfo, dict, None]
+ # but the getter will only ever return Optional[SerDeInfo].
+ self.serde_info = typing.cast(Optional[SerDeInfo], serde_info)
+
+ @property
+ def input_format(self) -> Optional[str]:
+ """Optional. Specifies the fully qualified class name of the InputFormat
+ (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"). The maximum
+ length is 128 characters."""
+
+ return self._properties.get("inputFormat")
+
+ @input_format.setter
+ def input_format(self, value: Optional[str]):
+ value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
+ self._properties["inputFormat"] = value
+
+ @property
+ def location_uri(self) -> Optional[str]:
+ """Optional. The physical location of the table (e.g. 'gs://spark-
+ dataproc-data/pangea-data/case_sensitive/' or 'gs://spark-dataproc-
+ data/pangea-data/'). The maximum length is 2056 bytes."""
+
+ return self._properties.get("locationUri")
+
+ @location_uri.setter
+ def location_uri(self, value: Optional[str]):
+ value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
+ self._properties["locationUri"] = value
+
+ @property
+ def output_format(self) -> Optional[str]:
+ """Optional. Specifies the fully qualified class name of the
+ OutputFormat (e.g. "org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat").
+ The maximum length is 128 characters."""
+
+ return self._properties.get("outputFormat")
+
+ @output_format.setter
+ def output_format(self, value: Optional[str]):
+ value = _helpers._isinstance_or_raise(value, str, none_allowed=True)
+ self._properties["outputFormat"] = value
+
+ @property
+ def serde_info(self) -> Optional[SerDeInfo]:
+ """Optional. Serializer and deserializer information."""
+
+ prop = _helpers._get_sub_prop(self._properties, ["serDeInfo"])
+ if prop is not None:
+ return typing.cast(SerDeInfo, SerDeInfo.from_api_repr(prop))
+ return None
+
+ @serde_info.setter
+ def serde_info(self, value: Union[SerDeInfo, dict, None]):
+ value = _helpers._isinstance_or_raise(
+ value, (SerDeInfo, dict), none_allowed=True
+ )
+
+ if isinstance(value, SerDeInfo):
+ self._properties["serDeInfo"] = value.to_api_repr()
+ else:
+ self._properties["serDeInfo"] = value
+
+ def to_api_repr(self) -> dict:
+ """Build an API representation of this object.
+ Returns:
+ Dict[str, Any]:
+ A dictionary in the format used by the BigQuery API.
+ """
+ return self._properties
+
+ @classmethod
+ def from_api_repr(cls, resource: dict) -> StorageDescriptor:
+ """Factory: constructs an instance of the class (cls)
+ given its API representation.
+ Args:
+ resource (Dict[str, Any]):
+ API representation of the object to be instantiated.
+ Returns:
+ An instance of the class initialized with data from 'resource'.
+ """
+ config = cls()
+ config._properties = resource
+ return config
diff --git a/google/cloud/bigquery/standard_sql.py b/google/cloud/bigquery/standard_sql.py
index e0f22b2de..68332eb80 100644
--- a/google/cloud/bigquery/standard_sql.py
+++ b/google/cloud/bigquery/standard_sql.py
@@ -43,6 +43,7 @@ class StandardSqlDataType:
]
}
}
+ RANGE: {type_kind="RANGE", range_element_type="DATETIME"}
Args:
type_kind:
@@ -52,6 +53,8 @@ class StandardSqlDataType:
The type of the array's elements, if type_kind is ARRAY.
struct_type:
The fields of this struct, in order, if type_kind is STRUCT.
+ range_element_type:
+ The type of the range's elements, if type_kind is RANGE.
"""
def __init__(
@@ -61,12 +64,14 @@ def __init__(
] = StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
array_element_type: Optional["StandardSqlDataType"] = None,
struct_type: Optional["StandardSqlStructType"] = None,
+ range_element_type: Optional["StandardSqlDataType"] = None,
):
self._properties: Dict[str, Any] = {}
self.type_kind = type_kind
self.array_element_type = array_element_type
self.struct_type = struct_type
+ self.range_element_type = range_element_type
@property
def type_kind(self) -> Optional[StandardSqlTypeNames]:
@@ -127,6 +132,28 @@ def struct_type(self, value: Optional["StandardSqlStructType"]):
else:
self._properties["structType"] = struct_type
+ @property
+ def range_element_type(self) -> Optional["StandardSqlDataType"]:
+ """The type of the range's elements, if type_kind = "RANGE". Must be
+ one of DATETIME, DATE, or TIMESTAMP."""
+ range_element_info = self._properties.get("rangeElementType")
+
+ if range_element_info is None:
+ return None
+
+ result = StandardSqlDataType()
+ result._properties = range_element_info # We do not use a copy on purpose.
+ return result
+
+ @range_element_type.setter
+ def range_element_type(self, value: Optional["StandardSqlDataType"]):
+ range_element_type = None if value is None else value.to_api_repr()
+
+ if range_element_type is None:
+ self._properties.pop("rangeElementType", None)
+ else:
+ self._properties["rangeElementType"] = range_element_type
+
def to_api_repr(self) -> Dict[str, Any]:
"""Construct the API resource representation of this SQL data type."""
return copy.deepcopy(self._properties)
@@ -155,7 +182,13 @@ def from_api_repr(cls, resource: Dict[str, Any]):
if struct_info:
struct_type = StandardSqlStructType.from_api_repr(struct_info)
- return cls(type_kind, array_element_type, struct_type)
+ range_element_type = None
+ if type_kind == StandardSqlTypeNames.RANGE:
+ range_element_info = resource.get("rangeElementType")
+ if range_element_info:
+ range_element_type = cls.from_api_repr(range_element_info)
+
+ return cls(type_kind, array_element_type, struct_type, range_element_type)
def __eq__(self, other):
if not isinstance(other, StandardSqlDataType):
@@ -165,6 +198,7 @@ def __eq__(self, other):
self.type_kind == other.type_kind
and self.array_element_type == other.array_element_type
and self.struct_type == other.struct_type
+ and self.range_element_type == other.range_element_type
)
def __str__(self):
diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py
index 72eb1baf6..5efcb1958 100644
--- a/google/cloud/bigquery/table.py
+++ b/google/cloud/bigquery/table.py
@@ -21,29 +21,39 @@
import functools
import operator
import typing
-from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union
+from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, Sequence
+
import warnings
try:
import pandas # type: ignore
-except ImportError: # pragma: NO COVER
+except ImportError:
pandas = None
-import pyarrow # type: ignore
+try:
+ import pyarrow # type: ignore
+except ImportError:
+ pyarrow = None
+
+try:
+ import db_dtypes # type: ignore
+except ImportError:
+ db_dtypes = None
try:
import geopandas # type: ignore
except ImportError:
geopandas = None
-else:
+finally:
_COORDINATE_REFERENCE_SYSTEM = "EPSG:4326"
try:
- import shapely.geos # type: ignore
+ import shapely # type: ignore
+ from shapely import wkt # type: ignore
except ImportError:
shapely = None
else:
- _read_wkt = shapely.geos.WKTReader(shapely.geos.lgeos).read
+ _read_wkt = wkt.loads
import google.api_core.exceptions
from google.api_core.page_iterator import HTTPIterator
@@ -51,19 +61,25 @@
import google.cloud._helpers # type: ignore
from google.cloud.bigquery import _helpers
from google.cloud.bigquery import _pandas_helpers
+from google.cloud.bigquery import _versions_helpers
+from google.cloud.bigquery import exceptions as bq_exceptions
+from google.cloud.bigquery._tqdm_helpers import get_progress_bar
+from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
+from google.cloud.bigquery.enums import DefaultPandasDTypes
+from google.cloud.bigquery.external_config import ExternalConfig
+from google.cloud.bigquery import schema as _schema
from google.cloud.bigquery.schema import _build_schema_resource
from google.cloud.bigquery.schema import _parse_schema_resource
from google.cloud.bigquery.schema import _to_schema_fields
-from google.cloud.bigquery._tqdm_helpers import get_progress_bar
-from google.cloud.bigquery.external_config import ExternalConfig
-from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration
+from google.cloud.bigquery import external_config
if typing.TYPE_CHECKING: # pragma: NO COVER
# Unconditionally import optional dependencies again to tell pytype that
# they are not None, avoiding false "no attribute" errors.
import pandas
- import geopandas
- from google.cloud import bigquery_storage
+ import pyarrow
+ import geopandas # type: ignore
+ from google.cloud import bigquery_storage # type: ignore
from google.cloud.bigquery.dataset import DatasetReference
@@ -71,6 +87,10 @@
"The geopandas library is not installed, please install "
"geopandas to use the to_geodataframe() function."
)
+_NO_PYARROW_ERROR = (
+ "The pyarrow library is not installed, please install "
+ "pyarrow to use the to_arrow() function."
+)
_NO_SHAPELY_ERROR = (
"The shapely library is not installed, please install "
"shapely to use the geography_as_object option."
@@ -78,6 +98,31 @@
_TABLE_HAS_NO_SCHEMA = 'Table has no schema: call "client.get_table()"'
+_NO_SUPPORTED_DTYPE = (
+ "The dtype cannot to be converted to a pandas ExtensionArray "
+ "because the necessary `__from_arrow__` attribute is missing."
+)
+
+_RANGE_PYARROW_WARNING = (
+ "Unable to represent RANGE schema as struct using pandas ArrowDtype. Using "
+ "`object` instead. To use ArrowDtype, use pandas >= 1.5 and "
+ "pyarrow >= 10.0.1."
+)
+
+# How many of the total rows need to be downloaded already for us to skip
+# calling the BQ Storage API?
+#
+# In microbenchmarks on 2024-05-21, I (tswast@) measure that at about 2 MB of
+# remaining results, it's faster to use the BQ Storage Read API to download
+# the results than use jobs.getQueryResults. Since we don't have a good way to
+# know the remaining bytes, we estimate by remaining number of rows.
+#
+# Except when rows themselves are larger, I observe that the a single page of
+# results will be around 10 MB. Therefore, the proportion of rows already
+# downloaded should be 10 (first page) / 12 (all results) or less for it to be
+# worth it to make a call to jobs.getQueryResults.
+ALMOST_COMPLETELY_CACHED_RATIO = 0.833333
+
def _reference_getter(table):
"""A :class:`~google.cloud.bigquery.table.TableReference` pointing to
@@ -92,7 +137,9 @@ def _reference_getter(table):
return TableReference(dataset_ref, table.table_id)
-def _view_use_legacy_sql_getter(table):
+def _view_use_legacy_sql_getter(
+ table: Union["Table", "TableListItem"]
+) -> Optional[bool]:
"""bool: Specifies whether to execute the view with Legacy or Standard SQL.
This boolean specifies whether to execute the view with Legacy SQL
@@ -103,15 +150,17 @@ def _view_use_legacy_sql_getter(table):
Raises:
ValueError: For invalid value types.
"""
- view = table._properties.get("view")
+
+ view: Optional[Dict[str, Any]] = table._properties.get("view")
if view is not None:
# The server-side default for useLegacySql is True.
- return view.get("useLegacySql", True)
+ return view.get("useLegacySql", True) if view is not None else True
# In some cases, such as in a table list no view object is present, but the
# resource still represents a view. Use the type as a fallback.
if table.table_type == "VIEW":
# The server-side default for useLegacySql is True.
return True
+ return None # explicit return statement to appease mypy
class _TableBase:
@@ -207,7 +256,7 @@ def __init__(self, dataset_ref: "DatasetReference", table_id: str):
@classmethod
def from_string(
- cls, table_id: str, default_project: str = None
+ cls, table_id: str, default_project: Optional[str] = None
) -> "TableReference":
"""Construct a table reference from table ID string.
@@ -330,8 +379,9 @@ class Table(_TableBase):
:meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`.
"""
- _PROPERTY_TO_API_FIELD = {
+ _PROPERTY_TO_API_FIELD: Dict[str, Any] = {
**_TableBase._PROPERTY_TO_API_FIELD,
+ "biglake_configuration": "biglakeConfiguration",
"clustering_fields": "clustering",
"created": "creationTime",
"description": "description",
@@ -348,33 +398,64 @@ class Table(_TableBase):
"mview_last_refresh_time": ["materializedView", "lastRefreshTime"],
"mview_query": "materializedView",
"mview_refresh_interval": "materializedView",
+ "mview_allow_non_incremental_definition": "materializedView",
"num_bytes": "numBytes",
"num_rows": "numRows",
"partition_expiration": "timePartitioning",
"partitioning_type": "timePartitioning",
"range_partitioning": "rangePartitioning",
"time_partitioning": "timePartitioning",
- "schema": "schema",
+ "schema": ["schema", "fields"],
"snapshot_definition": "snapshotDefinition",
"clone_definition": "cloneDefinition",
"streaming_buffer": "streamingBuffer",
"self_link": "selfLink",
- "time_partitioning": "timePartitioning",
"type": "type",
"view_use_legacy_sql": "view",
"view_query": "view",
"require_partition_filter": "requirePartitionFilter",
+ "table_constraints": "tableConstraints",
+ "max_staleness": "maxStaleness",
+ "resource_tags": "resourceTags",
+ "external_catalog_table_options": "externalCatalogTableOptions",
+ "foreign_type_info": ["schema", "foreignTypeInfo"],
}
def __init__(self, table_ref, schema=None) -> None:
table_ref = _table_arg_to_table_ref(table_ref)
- self._properties = {"tableReference": table_ref.to_api_repr(), "labels": {}}
+ self._properties: Dict[str, Any] = {
+ "tableReference": table_ref.to_api_repr(),
+ "labels": {},
+ }
# Let the @property do validation.
if schema is not None:
self.schema = schema
reference = property(_reference_getter)
+ @property
+ def biglake_configuration(self):
+ """google.cloud.bigquery.table.BigLakeConfiguration: Configuration
+ for managed tables for Apache Iceberg.
+
+ See https://cloud.google.com/bigquery/docs/iceberg-tables for more information.
+ """
+ prop = self._properties.get(
+ self._PROPERTY_TO_API_FIELD["biglake_configuration"]
+ )
+ if prop is not None:
+ prop = BigLakeConfiguration.from_api_repr(prop)
+ return prop
+
+ @biglake_configuration.setter
+ def biglake_configuration(self, value):
+ api_repr = value
+ if value is not None:
+ api_repr = value.to_api_repr()
+ self._properties[
+ self._PROPERTY_TO_API_FIELD["biglake_configuration"]
+ ] = api_repr
+
@property
def require_partition_filter(self):
"""bool: If set to true, queries over the partitioned table require a
@@ -404,8 +485,20 @@ def schema(self):
If ``schema`` is not a sequence, or if any item in the sequence
is not a :class:`~google.cloud.bigquery.schema.SchemaField`
instance or a compatible mapping representation of the field.
+
+ .. Note::
+ If you are referencing a schema for an external catalog table such
+ as a Hive table, it will also be necessary to populate the foreign_type_info
+ attribute. This is not necessary if defining the schema for a BigQuery table.
+
+ For details, see:
+ https://cloud.google.com/bigquery/docs/external-tables
+ https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets
+
"""
- prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"])
+ prop = _helpers._get_sub_prop(
+ self._properties, self._PROPERTY_TO_API_FIELD["schema"]
+ )
if not prop:
return []
else:
@@ -416,10 +509,21 @@ def schema(self, value):
api_field = self._PROPERTY_TO_API_FIELD["schema"]
if value is None:
- self._properties[api_field] = None
- else:
+ _helpers._set_sub_prop(
+ self._properties,
+ api_field,
+ None,
+ )
+ elif isinstance(value, Sequence):
value = _to_schema_fields(value)
- self._properties[api_field] = {"fields": _build_schema_resource(value)}
+ value = _build_schema_resource(value)
+ _helpers._set_sub_prop(
+ self._properties,
+ api_field,
+ value,
+ )
+ else:
+ raise TypeError("Schema must be a Sequence (e.g. a list) or None.")
@property
def labels(self):
@@ -666,7 +770,11 @@ def partition_expiration(self, value):
if self.time_partitioning is None:
self._properties[api_field] = {"type": TimePartitioningType.DAY}
- self._properties[api_field]["expirationMs"] = str(value)
+
+ if value is None:
+ self._properties[api_field]["expirationMs"] = None
+ else:
+ self._properties[api_field]["expirationMs"] = str(value)
@property
def clustering_fields(self):
@@ -882,6 +990,28 @@ def mview_refresh_interval(self, value):
refresh_interval_ms,
)
+ @property
+ def mview_allow_non_incremental_definition(self):
+ """Optional[bool]: This option declares the intention to construct a
+ materialized view that isn't refreshed incrementally.
+ The default value is :data:`False`.
+ """
+ api_field = self._PROPERTY_TO_API_FIELD[
+ "mview_allow_non_incremental_definition"
+ ]
+ return _helpers._get_sub_prop(
+ self._properties, [api_field, "allowNonIncrementalDefinition"]
+ )
+
+ @mview_allow_non_incremental_definition.setter
+ def mview_allow_non_incremental_definition(self, value):
+ api_field = self._PROPERTY_TO_API_FIELD[
+ "mview_allow_non_incremental_definition"
+ ]
+ _helpers._set_sub_prop(
+ self._properties, [api_field, "allowNonIncrementalDefinition"], value
+ )
+
@property
def streaming_buffer(self):
"""google.cloud.bigquery.StreamingBuffer: Information about a table's
@@ -943,6 +1073,111 @@ def clone_definition(self) -> Optional["CloneDefinition"]:
clone_info = CloneDefinition(clone_info)
return clone_info
+ @property
+ def table_constraints(self) -> Optional["TableConstraints"]:
+ """Tables Primary Key and Foreign Key information."""
+ table_constraints = self._properties.get(
+ self._PROPERTY_TO_API_FIELD["table_constraints"]
+ )
+ if table_constraints is not None:
+ table_constraints = TableConstraints.from_api_repr(table_constraints)
+ return table_constraints
+
+ @table_constraints.setter
+ def table_constraints(self, value):
+ """Tables Primary Key and Foreign Key information."""
+ api_repr = value
+ if not isinstance(value, TableConstraints) and value is not None:
+ raise ValueError(
+ "value must be google.cloud.bigquery.table.TableConstraints or None"
+ )
+ api_repr = value.to_api_repr() if value else None
+ self._properties[self._PROPERTY_TO_API_FIELD["table_constraints"]] = api_repr
+
+ @property
+ def resource_tags(self):
+ """Dict[str, str]: Resource tags for the table.
+
+ See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.resource_tags
+ """
+ return self._properties.setdefault(
+ self._PROPERTY_TO_API_FIELD["resource_tags"], {}
+ )
+
+ @resource_tags.setter
+ def resource_tags(self, value):
+ if not isinstance(value, dict) and value is not None:
+ raise ValueError("resource_tags must be a dict or None")
+ self._properties[self._PROPERTY_TO_API_FIELD["resource_tags"]] = value
+
+ @property
+ def external_catalog_table_options(
+ self,
+ ) -> Optional[external_config.ExternalCatalogTableOptions]:
+ """Options defining open source compatible datasets living in the
+ BigQuery catalog. Contains metadata of open source database, schema
+ or namespace represented by the current dataset."""
+
+ prop = self._properties.get(
+ self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]
+ )
+ if prop is not None:
+ return external_config.ExternalCatalogTableOptions.from_api_repr(prop)
+ return None
+
+ @external_catalog_table_options.setter
+ def external_catalog_table_options(
+ self, value: Union[external_config.ExternalCatalogTableOptions, dict, None]
+ ):
+ value = _helpers._isinstance_or_raise(
+ value,
+ (external_config.ExternalCatalogTableOptions, dict),
+ none_allowed=True,
+ )
+ if isinstance(value, external_config.ExternalCatalogTableOptions):
+ self._properties[
+ self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]
+ ] = value.to_api_repr()
+ else:
+ self._properties[
+ self._PROPERTY_TO_API_FIELD["external_catalog_table_options"]
+ ] = value
+
+ @property
+ def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]:
+ """Optional. Specifies metadata of the foreign data type definition in
+ field schema (TableFieldSchema.foreign_type_definition).
+ Returns:
+ Optional[schema.ForeignTypeInfo]:
+ Foreign type information, or :data:`None` if not set.
+ .. Note::
+ foreign_type_info is only required if you are referencing an
+ external catalog such as a Hive table.
+ For details, see:
+ https://cloud.google.com/bigquery/docs/external-tables
+ https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets
+ """
+
+ prop = _helpers._get_sub_prop(
+ self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"]
+ )
+ if prop is not None:
+ return _schema.ForeignTypeInfo.from_api_repr(prop)
+ return None
+
+ @foreign_type_info.setter
+ def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]):
+ value = _helpers._isinstance_or_raise(
+ value,
+ (_schema.ForeignTypeInfo, dict),
+ none_allowed=True,
+ )
+ if isinstance(value, _schema.ForeignTypeInfo):
+ value = value.to_api_repr()
+ _helpers._set_sub_prop(
+ self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"], value
+ )
+
@classmethod
def from_string(cls, full_table_id: str) -> "Table":
"""Construct a table from fully-qualified table ID.
@@ -1036,6 +1271,40 @@ def __repr__(self):
def __str__(self):
return f"{self.project}.{self.dataset_id}.{self.table_id}"
+ @property
+ def max_staleness(self):
+ """Union[str, None]: The maximum staleness of data that could be returned when the table is queried.
+
+ Staleness encoded as a string encoding of sql IntervalValue type.
+ This property is optional and defaults to None.
+
+ According to the BigQuery API documentation, maxStaleness specifies the maximum time
+ interval for which stale data can be returned when querying the table.
+ It helps control data freshness in scenarios like metadata-cached external tables.
+
+ Returns:
+ Optional[str]: A string representing the maximum staleness interval
+ (e.g., '1h', '30m', '15s' for hours, minutes, seconds respectively).
+ """
+ return self._properties.get(self._PROPERTY_TO_API_FIELD["max_staleness"])
+
+ @max_staleness.setter
+ def max_staleness(self, value):
+ """Set the maximum staleness for the table.
+
+ Args:
+ value (Optional[str]): A string representing the maximum staleness interval.
+ Must be a valid time interval string.
+ Examples include '1h' (1 hour), '30m' (30 minutes), '15s' (15 seconds).
+
+ Raises:
+ ValueError: If the value is not None and not a string.
+ """
+ if value is not None and not isinstance(value, str):
+ raise ValueError("max_staleness must be a string or None")
+
+ self._properties[self._PROPERTY_TO_API_FIELD["max_staleness"]] = value
+
class TableListItem(_TableBase):
"""A read-only table resource from a list operation.
@@ -1420,7 +1689,7 @@ def get(self, key: str, default: Any = None) -> Any:
>>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z')
None
- The default value can be overrided with the ``default`` parameter.
+ The default value can be overridden with the ``default`` parameter.
>>> Row(('a', 'b'), {'x': 0, 'y': 1}).get('z', '')
''
@@ -1516,6 +1785,18 @@ class RowIterator(HTTPIterator):
first_page_response (Optional[dict]):
API response for the first page of results. These are returned when
the first page is requested.
+ query (Optional[str]):
+ The query text used.
+ total_bytes_processed (Optional[int]):
+ If representing query results, the total bytes processed by the associated query.
+ slot_millis (Optional[int]):
+ If representing query results, the number of slot ms billed for the associated query.
+ created (Optional[datetime.datetime]):
+ If representing query results, the creation time of the associated query.
+ started (Optional[datetime.datetime]):
+ If representing query results, the start time of the associated query.
+ ended (Optional[datetime.datetime]):
+ If representing query results, the end time of the associated query.
"""
def __init__(
@@ -1532,6 +1813,17 @@ def __init__(
selected_fields=None,
total_rows=None,
first_page_response=None,
+ location: Optional[str] = None,
+ job_id: Optional[str] = None,
+ query_id: Optional[str] = None,
+ project: Optional[str] = None,
+ num_dml_affected_rows: Optional[int] = None,
+ query: Optional[str] = None,
+ total_bytes_processed: Optional[int] = None,
+ slot_millis: Optional[int] = None,
+ created: Optional[datetime.datetime] = None,
+ started: Optional[datetime.datetime] = None,
+ ended: Optional[datetime.datetime] = None,
):
super(RowIterator, self).__init__(
client,
@@ -1545,30 +1837,134 @@ def __init__(
page_start=_rows_page_start,
next_token="pageToken",
)
- schema = _to_schema_fields(schema)
+ schema = _to_schema_fields(schema) if schema else ()
self._field_to_index = _helpers._field_to_index_mapping(schema)
self._page_size = page_size
self._preserve_order = False
- self._project = client.project if client is not None else None
self._schema = schema
self._selected_fields = selected_fields
self._table = table
self._total_rows = total_rows
self._first_page_response = first_page_response
+ self._location = location
+ self._job_id = job_id
+ self._query_id = query_id
+ self._project = project
+ self._num_dml_affected_rows = num_dml_affected_rows
+ self._query = query
+ self._total_bytes_processed = total_bytes_processed
+ self._slot_millis = slot_millis
+ self._job_created = created
+ self._job_started = started
+ self._job_ended = ended
+
+ @property
+ def _billing_project(self) -> Optional[str]:
+ """GCP Project ID where BQ API will bill to (if applicable)."""
+ client = self.client
+ return client.project if client is not None else None
+
+ @property
+ def job_id(self) -> Optional[str]:
+ """ID of the query job (if applicable).
+
+ To get the job metadata, call
+ ``job = client.get_job(rows.job_id, location=rows.location)``.
+ """
+ return self._job_id
+
+ @property
+ def location(self) -> Optional[str]:
+ """Location where the query executed (if applicable).
+
+ See: https://cloud.google.com/bigquery/docs/locations
+ """
+ return self._location
+
+ @property
+ def num_dml_affected_rows(self) -> Optional[int]:
+ """If this RowIterator is the result of a DML query, the number of
+ rows that were affected.
- def _is_completely_cached(self):
+ See:
+ https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query#body.QueryResponse.FIELDS.num_dml_affected_rows
+ """
+ return self._num_dml_affected_rows
+
+ @property
+ def project(self) -> Optional[str]:
+ """GCP Project ID where these rows are read from."""
+ return self._project
+
+ @property
+ def query_id(self) -> Optional[str]:
+ """[Preview] ID of a completed query.
+
+ This ID is auto-generated and not guaranteed to be populated.
+ """
+ return self._query_id
+
+ @property
+ def query(self) -> Optional[str]:
+ """The query text used."""
+ return self._query
+
+ @property
+ def total_bytes_processed(self) -> Optional[int]:
+ """total bytes processed from job statistics, if present."""
+ return self._total_bytes_processed
+
+ @property
+ def slot_millis(self) -> Optional[int]:
+ """Number of slot ms the user is actually billed for."""
+ return self._slot_millis
+
+ @property
+ def created(self) -> Optional[datetime.datetime]:
+ """If representing query results, the creation time of the associated query."""
+ return self._job_created
+
+ @property
+ def started(self) -> Optional[datetime.datetime]:
+ """If representing query results, the start time of the associated query."""
+ return self._job_started
+
+ @property
+ def ended(self) -> Optional[datetime.datetime]:
+ """If representing query results, the end time of the associated query."""
+ return self._job_ended
+
+ def _is_almost_completely_cached(self):
"""Check if all results are completely cached.
This is useful to know, because we can avoid alternative download
mechanisms.
"""
- if self._first_page_response is None or self.next_page_token:
+ if (
+ not hasattr(self, "_first_page_response")
+ or self._first_page_response is None
+ ):
return False
- return self._first_page_response.get(self._next_token) is None
+ total_cached_rows = len(self._first_page_response.get(self._items_key, []))
+ if self.max_results is not None and total_cached_rows >= self.max_results:
+ return True
+
+ if (
+ self.next_page_token is None
+ and self._first_page_response.get(self._next_token) is None
+ ):
+ return True
+
+ if self._total_rows is not None:
+ almost_completely = self._total_rows * ALMOST_COMPLETELY_CACHED_RATIO
+ if total_cached_rows >= almost_completely:
+ return True
- def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client):
- """Returns if the BigQuery Storage API can be used.
+ return False
+
+ def _should_use_bqstorage(self, bqstorage_client, create_bqstorage_client):
+ """Returns True if the BigQuery Storage API can be used.
Returns:
bool
@@ -1578,12 +1974,32 @@ def _validate_bqstorage(self, bqstorage_client, create_bqstorage_client):
if not using_bqstorage_api:
return False
- if self._is_completely_cached():
+ if self._table is None:
+ return False
+
+ # The developer has already started paging through results if
+ # next_page_token is set.
+ if hasattr(self, "next_page_token") and self.next_page_token is not None:
+ return False
+
+ if self._is_almost_completely_cached():
return False
if self.max_results is not None:
return False
+ try:
+ _versions_helpers.BQ_STORAGE_VERSIONS.try_import(raise_if_error=True)
+ except bq_exceptions.BigQueryStorageNotFoundError:
+ warnings.warn(
+ "BigQuery Storage module not found, fetch data with the REST "
+ "endpoint instead."
+ )
+ return False
+ except bq_exceptions.LegacyBigQueryStorageError as exc:
+ warnings.warn(str(exc))
+ return False
+
return True
def _get_next_page_response(self):
@@ -1594,17 +2010,32 @@ def _get_next_page_response(self):
The parsed JSON response of the next page's contents.
"""
if self._first_page_response:
- response = self._first_page_response
+ rows = self._first_page_response.get(self._items_key, [])[
+ : self.max_results
+ ]
+ response = {
+ self._items_key: rows,
+ }
+ if self._next_token in self._first_page_response:
+ response[self._next_token] = self._first_page_response[self._next_token]
+
self._first_page_response = None
return response
params = self._get_query_params()
+
+ # If the user has provided page_size and start_index, we need to pass
+ # start_index for the first page, but for all subsequent pages, we
+ # should not pass start_index. We make a shallow copy of params and do
+ # not alter the original, so if the user iterates the results again,
+ # start_index is preserved.
+ params_copy = copy.copy(params)
if self._page_size is not None:
if self.page_number and "startIndex" in params:
- del params["startIndex"]
- params["maxResults"] = self._page_size
+ del params_copy["startIndex"]
+
return self.api_request(
- method=self._HTTP_METHOD, path=self.path, query_params=params
+ method=self._HTTP_METHOD, path=self.path, query_params=params_copy
)
@property
@@ -1615,7 +2046,7 @@ def schema(self):
@property
def total_rows(self):
- """int: The total number of rows in the table."""
+ """int: The total number of rows in the table or query results."""
return self._total_rows
def _maybe_warn_max_results(
@@ -1641,7 +2072,7 @@ def _maybe_warn_max_results(
def _to_page_iterable(
self, bqstorage_download, tabledata_list_download, bqstorage_client=None
):
- if not self._validate_bqstorage(bqstorage_client, False):
+ if not self._should_use_bqstorage(bqstorage_client, False):
bqstorage_client = None
result_pages = (
@@ -1653,8 +2084,9 @@ def _to_page_iterable(
def to_arrow_iterable(
self,
- bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
+ bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore
+ max_stream_count: Optional[int] = None,
) -> Iterator["pyarrow.RecordBatch"]:
"""[Beta] Create an iterable of class:`pyarrow.RecordBatch`, to process the table as a stream.
@@ -1679,6 +2111,22 @@ def to_arrow_iterable(
created by the server. If ``max_queue_size`` is :data:`None`, the queue
size is infinite.
+ max_stream_count (Optional[int]):
+ The maximum number of parallel download streams when
+ using BigQuery Storage API. Ignored if
+ BigQuery Storage API is not used.
+
+ This setting also has no effect if the query result
+ is deterministically ordered with ORDER BY,
+ in which case, the number of download stream is always 1.
+
+ If set to 0 or None (the default), the number of download
+ streams is determined by BigQuery the server. However, this behaviour
+ can require a lot of memory to store temporary download result,
+ especially with very large queries. In that case,
+ setting this parameter value to a value > 0 can help
+ reduce system resource consumption.
+
Returns:
pyarrow.RecordBatch:
A generator of :class:`~pyarrow.RecordBatch`.
@@ -1689,12 +2137,13 @@ def to_arrow_iterable(
bqstorage_download = functools.partial(
_pandas_helpers.download_arrow_bqstorage,
- self._project,
+ self._billing_project,
self._table,
bqstorage_client,
preserve_order=self._preserve_order,
selected_fields=self._selected_fields,
max_queue_size=max_queue_size,
+ max_stream_count=max_stream_count,
)
tabledata_list_download = functools.partial(
_pandas_helpers.download_arrow_row_iterator, iter(self.pages), self.schema
@@ -1709,7 +2158,7 @@ def to_arrow_iterable(
# changes to job.QueryJob.to_arrow()
def to_arrow(
self,
- progress_bar_type: str = None,
+ progress_bar_type: Optional[str] = None,
bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
create_bqstorage_client: bool = True,
) -> "pyarrow.Table":
@@ -1728,9 +2177,9 @@ def to_arrow(
No progress bar.
``'tqdm'``
Use the :func:`tqdm.tqdm` function to print a progress bar
- to :data:`sys.stderr`.
+ to :data:`sys.stdout`.
``'tqdm_notebook'``
- Use the :func:`tqdm.tqdm_notebook` function to display a
+ Use the :func:`tqdm.notebook.tqdm` function to display a
progress bar as a Jupyter notebook widget.
``'tqdm_gui'``
Use the :func:`tqdm.tqdm_gui` function to display a
@@ -1760,11 +2209,18 @@ def to_arrow(
headers from the query results. The column headers are derived
from the destination table's schema.
+ Raises:
+ ValueError: If the :mod:`pyarrow` library cannot be imported.
+
+
.. versionadded:: 1.17.0
"""
+ if pyarrow is None:
+ raise ValueError(_NO_PYARROW_ERROR)
+
self._maybe_warn_max_results(bqstorage_client)
- if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):
+ if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client):
create_bqstorage_client = False
bqstorage_client = None
@@ -1812,8 +2268,9 @@ def to_arrow(
def to_dataframe_iterable(
self,
bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
- dtypes: Dict[str, Any] = None,
+ dtypes: Optional[Dict[str, Any]] = None,
max_queue_size: int = _pandas_helpers._MAX_QUEUE_SIZE_DEFAULT, # type: ignore
+ max_stream_count: Optional[int] = None,
) -> "pandas.DataFrame":
"""Create an iterable of pandas DataFrames, to process the table as a stream.
@@ -1844,6 +2301,22 @@ def to_dataframe_iterable(
.. versionadded:: 2.14.0
+ max_stream_count (Optional[int]):
+ The maximum number of parallel download streams when
+ using BigQuery Storage API. Ignored if
+ BigQuery Storage API is not used.
+
+ This setting also has no effect if the query result
+ is deterministically ordered with ORDER BY,
+ in which case, the number of download stream is always 1.
+
+ If set to 0 or None (the default), the number of download
+ streams is determined by BigQuery the server. However, this behaviour
+ can require a lot of memory to store temporary download result,
+ especially with very large queries. In that case,
+ setting this parameter value to a value > 0 can help
+ reduce system resource consumption.
+
Returns:
pandas.DataFrame:
A generator of :class:`~pandas.DataFrame`.
@@ -1862,7 +2335,7 @@ def to_dataframe_iterable(
column_names = [field.name for field in self._schema]
bqstorage_download = functools.partial(
_pandas_helpers.download_dataframe_bqstorage,
- self._project,
+ self._billing_project,
self._table,
bqstorage_client,
column_names,
@@ -1870,6 +2343,7 @@ def to_dataframe_iterable(
preserve_order=self._preserve_order,
selected_fields=self._selected_fields,
max_queue_size=max_queue_size,
+ max_stream_count=max_stream_count,
)
tabledata_list_download = functools.partial(
_pandas_helpers.download_dataframe_row_iterator,
@@ -1888,10 +2362,25 @@ def to_dataframe_iterable(
def to_dataframe(
self,
bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
- dtypes: Dict[str, Any] = None,
- progress_bar_type: str = None,
+ dtypes: Optional[Dict[str, Any]] = None,
+ progress_bar_type: Optional[str] = None,
create_bqstorage_client: bool = True,
geography_as_object: bool = False,
+ bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE,
+ int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE,
+ float_dtype: Union[Any, None] = None,
+ string_dtype: Union[Any, None] = None,
+ date_dtype: Union[Any, None] = DefaultPandasDTypes.DATE_DTYPE,
+ datetime_dtype: Union[Any, None] = None,
+ time_dtype: Union[Any, None] = DefaultPandasDTypes.TIME_DTYPE,
+ timestamp_dtype: Union[Any, None] = None,
+ range_date_dtype: Union[Any, None] = DefaultPandasDTypes.RANGE_DATE_DTYPE,
+ range_datetime_dtype: Union[
+ Any, None
+ ] = DefaultPandasDTypes.RANGE_DATETIME_DTYPE,
+ range_timestamp_dtype: Union[
+ Any, None
+ ] = DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE,
) -> "pandas.DataFrame":
"""Create a pandas DataFrame by loading all pages of a query.
@@ -1921,15 +2410,16 @@ def to_dataframe(
No progress bar.
``'tqdm'``
Use the :func:`tqdm.tqdm` function to print a progress bar
- to :data:`sys.stderr`.
+ to :data:`sys.stdout`.
``'tqdm_notebook'``
- Use the :func:`tqdm.tqdm_notebook` function to display a
+ Use the :func:`tqdm.notebook.tqdm` function to display a
progress bar as a Jupyter notebook widget.
``'tqdm_gui'``
Use the :func:`tqdm.tqdm_gui` function to display a
progress bar as a graphical dialog box.
.. versionadded:: 1.11.0
+
create_bqstorage_client (Optional[bool]):
If ``True`` (default), create a BigQuery Storage API client
using the default API settings. The BigQuery Storage API
@@ -1947,6 +2437,146 @@ def to_dataframe(
.. versionadded:: 2.24.0
+ bool_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``)
+ to convert BigQuery Boolean type, instead of relying on the default
+ ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type
+
+ .. versionadded:: 3.8.0
+
+ int_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)
+ to convert BigQuery Integer types, instead of relying on the default
+ ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("int64")``. A list of BigQuery
+ Integer types can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
+
+ .. versionadded:: 3.8.0
+
+ float_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)
+ to convert BigQuery Float type, instead of relying on the default
+ ``numpy.dtype("float64")``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("float64")``. BigQuery Float
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
+
+ .. versionadded:: 3.8.0
+
+ string_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to
+ convert BigQuery String type, instead of relying on the default
+ ``numpy.dtype("object")``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("object")``. BigQuery String
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type
+
+ .. versionadded:: 3.8.0
+
+ date_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g.
+ ``pandas.ArrowDtype(pyarrow.date32())``) to convert BigQuery Date
+ type, instead of relying on the default ``db_dtypes.DateDtype()``.
+ If you explicitly set the value to ``None``, then the data type will be
+ ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery
+ Date type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type
+
+ .. versionadded:: 3.10.0
+
+ datetime_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g.
+ ``pandas.ArrowDtype(pyarrow.timestamp("us"))``) to convert BigQuery Datetime
+ type, instead of relying on the default ``numpy.dtype("datetime64[ns]``.
+ If you explicitly set the value to ``None``, then the data type will be
+ ``numpy.dtype("datetime64[ns]")`` or ``object`` if out of bound. BigQuery
+ Datetime type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type
+
+ .. versionadded:: 3.10.0
+
+ time_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g.
+ ``pandas.ArrowDtype(pyarrow.time64("us"))``) to convert BigQuery Time
+ type, instead of relying on the default ``db_dtypes.TimeDtype()``.
+ If you explicitly set the value to ``None``, then the data type will be
+ ``numpy.dtype("object")``. BigQuery Time type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type
+
+ .. versionadded:: 3.10.0
+
+ timestamp_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g.
+ ``pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))``) to convert BigQuery Timestamp
+ type, instead of relying on the default ``numpy.dtype("datetime64[ns, UTC]")``.
+ If you explicitly set the value to ``None``, then the data type will be
+ ``numpy.dtype("datetime64[ns, UTC]")`` or ``object`` if out of bound. BigQuery
+ Datetime type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#timestamp_type
+
+ .. versionadded:: 3.10.0
+
+ range_date_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype, such as:
+
+ .. code-block:: python
+
+ pandas.ArrowDtype(pyarrow.struct(
+ [("start", pyarrow.date32()), ("end", pyarrow.date32())]
+ ))
+
+ to convert BigQuery RANGE type, instead of relying on
+ the default ``object``. If you explicitly set the value to
+ ``None``, the data type will be ``object``. BigQuery Range type
+ can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+ .. versionadded:: 3.21.0
+
+ range_datetime_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype, such as:
+
+ .. code-block:: python
+
+ pandas.ArrowDtype(pyarrow.struct(
+ [
+ ("start", pyarrow.timestamp("us")),
+ ("end", pyarrow.timestamp("us")),
+ ]
+ ))
+
+ to convert BigQuery RANGE type, instead of relying on
+ the default ``object``. If you explicitly set the value to
+ ``None``, the data type will be ``object``. BigQuery Range type
+ can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+ .. versionadded:: 3.21.0
+
+ range_timestamp_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype, such as:
+
+ .. code-block:: python
+
+ pandas.ArrowDtype(pyarrow.struct(
+ [
+ ("start", pyarrow.timestamp("us", tz="UTC")),
+ ("end", pyarrow.timestamp("us", tz="UTC")),
+ ]
+ ))
+
+ to convert BigQuery RANGE type, instead of relying
+ on the default ``object``. If you explicitly set the value to
+ ``None``, the data type will be ``object``. BigQuery Range type
+ can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#range_type
+
+ .. versionadded:: 3.21.0
+
Returns:
pandas.DataFrame:
A :class:`~pandas.DataFrame` populated with row data and column
@@ -1959,7 +2589,9 @@ def to_dataframe(
the :mod:`google.cloud.bigquery_storage_v1` module is
required but cannot be imported. Also if
`geography_as_object` is `True`, but the
- :mod:`shapely` library cannot be imported.
+ :mod:`shapely` library cannot be imported. Also if
+ `bool_dtype`, `int_dtype` or other dtype parameters
+ is not supported dtype.
"""
_pandas_helpers.verify_pandas_imports()
@@ -1967,12 +2599,90 @@ def to_dataframe(
if geography_as_object and shapely is None:
raise ValueError(_NO_SHAPELY_ERROR)
+ if bool_dtype is DefaultPandasDTypes.BOOL_DTYPE:
+ bool_dtype = pandas.BooleanDtype()
+
+ if int_dtype is DefaultPandasDTypes.INT_DTYPE:
+ int_dtype = pandas.Int64Dtype()
+
+ if time_dtype is DefaultPandasDTypes.TIME_DTYPE:
+ time_dtype = db_dtypes.TimeDtype()
+
+ if range_date_dtype is DefaultPandasDTypes.RANGE_DATE_DTYPE:
+ if _versions_helpers.SUPPORTS_RANGE_PYARROW:
+ range_date_dtype = pandas.ArrowDtype(
+ pyarrow.struct(
+ [("start", pyarrow.date32()), ("end", pyarrow.date32())]
+ )
+ )
+ else:
+ warnings.warn(_RANGE_PYARROW_WARNING)
+ range_date_dtype = None
+
+ if range_datetime_dtype is DefaultPandasDTypes.RANGE_DATETIME_DTYPE:
+ if _versions_helpers.SUPPORTS_RANGE_PYARROW:
+ range_datetime_dtype = pandas.ArrowDtype(
+ pyarrow.struct(
+ [
+ ("start", pyarrow.timestamp("us")),
+ ("end", pyarrow.timestamp("us")),
+ ]
+ )
+ )
+ else:
+ warnings.warn(_RANGE_PYARROW_WARNING)
+ range_datetime_dtype = None
+
+ if range_timestamp_dtype is DefaultPandasDTypes.RANGE_TIMESTAMP_DTYPE:
+ if _versions_helpers.SUPPORTS_RANGE_PYARROW:
+ range_timestamp_dtype = pandas.ArrowDtype(
+ pyarrow.struct(
+ [
+ ("start", pyarrow.timestamp("us", tz="UTC")),
+ ("end", pyarrow.timestamp("us", tz="UTC")),
+ ]
+ )
+ )
+ else:
+ warnings.warn(_RANGE_PYARROW_WARNING)
+ range_timestamp_dtype = None
+
+ if bool_dtype is not None and not hasattr(bool_dtype, "__from_arrow__"):
+ raise ValueError("bool_dtype", _NO_SUPPORTED_DTYPE)
+
+ if int_dtype is not None and not hasattr(int_dtype, "__from_arrow__"):
+ raise ValueError("int_dtype", _NO_SUPPORTED_DTYPE)
+
+ if float_dtype is not None and not hasattr(float_dtype, "__from_arrow__"):
+ raise ValueError("float_dtype", _NO_SUPPORTED_DTYPE)
+
+ if string_dtype is not None and not hasattr(string_dtype, "__from_arrow__"):
+ raise ValueError("string_dtype", _NO_SUPPORTED_DTYPE)
+
+ if (
+ date_dtype is not None
+ and date_dtype is not DefaultPandasDTypes.DATE_DTYPE
+ and not hasattr(date_dtype, "__from_arrow__")
+ ):
+ raise ValueError("date_dtype", _NO_SUPPORTED_DTYPE)
+
+ if datetime_dtype is not None and not hasattr(datetime_dtype, "__from_arrow__"):
+ raise ValueError("datetime_dtype", _NO_SUPPORTED_DTYPE)
+
+ if time_dtype is not None and not hasattr(time_dtype, "__from_arrow__"):
+ raise ValueError("time_dtype", _NO_SUPPORTED_DTYPE)
+
+ if timestamp_dtype is not None and not hasattr(
+ timestamp_dtype, "__from_arrow__"
+ ):
+ raise ValueError("timestamp_dtype", _NO_SUPPORTED_DTYPE)
+
if dtypes is None:
dtypes = {}
self._maybe_warn_max_results(bqstorage_client)
- if not self._validate_bqstorage(bqstorage_client, create_bqstorage_client):
+ if not self._should_use_bqstorage(bqstorage_client, create_bqstorage_client):
create_bqstorage_client = False
bqstorage_client = None
@@ -1982,40 +2692,49 @@ def to_dataframe(
create_bqstorage_client=create_bqstorage_client,
)
- # When converting date or timestamp values to nanosecond precision, the result
- # can be out of pyarrow bounds. To avoid the error when converting to
- # Pandas, we set the date_as_object or timestamp_as_object parameter to True,
- # if necessary.
- date_as_object = not all(
- self.__can_cast_timestamp_ns(col)
- for col in record_batch
- # Type can be date32 or date64 (plus units).
- # See: https://arrow.apache.org/docs/python/api/datatypes.html
- if str(col.type).startswith("date")
- )
+ # Default date dtype is `db_dtypes.DateDtype()` that could cause out of bounds error,
+ # when pyarrow converts date values to nanosecond precision. To avoid the error, we
+ # set the date_as_object parameter to True, if necessary.
+ date_as_object = False
+ if date_dtype is DefaultPandasDTypes.DATE_DTYPE:
+ date_dtype = db_dtypes.DateDtype()
+ date_as_object = not all(
+ self.__can_cast_timestamp_ns(col)
+ for col in record_batch
+ # Type can be date32 or date64 (plus units).
+ # See: https://arrow.apache.org/docs/python/api/datatypes.html
+ if pyarrow.types.is_date(col.type)
+ )
- timestamp_as_object = not all(
- self.__can_cast_timestamp_ns(col)
- for col in record_batch
- # Type can be timestamp (plus units and time zone).
- # See: https://arrow.apache.org/docs/python/api/datatypes.html
- if str(col.type).startswith("timestamp")
- )
+ timestamp_as_object = False
+ if datetime_dtype is None and timestamp_dtype is None:
+ timestamp_as_object = not all(
+ self.__can_cast_timestamp_ns(col)
+ for col in record_batch
+ # Type can be datetime and timestamp (plus units and time zone).
+ # See: https://arrow.apache.org/docs/python/api/datatypes.html
+ if pyarrow.types.is_timestamp(col.type)
+ )
- if len(record_batch) > 0:
- df = record_batch.to_pandas(
+ df = record_batch.to_pandas(
+ date_as_object=date_as_object,
+ timestamp_as_object=timestamp_as_object,
+ integer_object_nulls=True,
+ types_mapper=_pandas_helpers.default_types_mapper(
date_as_object=date_as_object,
- timestamp_as_object=timestamp_as_object,
- integer_object_nulls=True,
- types_mapper=_pandas_helpers.default_types_mapper(
- date_as_object=date_as_object
- ),
- )
- else:
- # Avoid "ValueError: need at least one array to concatenate" on
- # older versions of pandas when converting empty RecordBatch to
- # DataFrame. See: https://github.com/pandas-dev/pandas/issues/41241
- df = pandas.DataFrame([], columns=record_batch.schema.names)
+ bool_dtype=bool_dtype,
+ int_dtype=int_dtype,
+ float_dtype=float_dtype,
+ string_dtype=string_dtype,
+ date_dtype=date_dtype,
+ datetime_dtype=datetime_dtype,
+ time_dtype=time_dtype,
+ timestamp_dtype=timestamp_dtype,
+ range_date_dtype=range_date_dtype,
+ range_datetime_dtype=range_datetime_dtype,
+ range_timestamp_dtype=range_timestamp_dtype,
+ ),
+ )
for column in dtypes:
df[column] = pandas.Series(df[column], dtype=dtypes[column], copy=False)
@@ -2040,11 +2759,15 @@ def __can_cast_timestamp_ns(column):
# changes to job.QueryJob.to_geodataframe()
def to_geodataframe(
self,
- bqstorage_client: "bigquery_storage.BigQueryReadClient" = None,
- dtypes: Dict[str, Any] = None,
- progress_bar_type: str = None,
+ bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
+ dtypes: Optional[Dict[str, Any]] = None,
+ progress_bar_type: Optional[str] = None,
create_bqstorage_client: bool = True,
geography_column: Optional[str] = None,
+ bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE,
+ int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE,
+ float_dtype: Union[Any, None] = None,
+ string_dtype: Union[Any, None] = None,
) -> "geopandas.GeoDataFrame":
"""Create a GeoPandas GeoDataFrame by loading all pages of a query.
@@ -2075,9 +2798,9 @@ def to_geodataframe(
No progress bar.
``'tqdm'``
Use the :func:`tqdm.tqdm` function to print a progress bar
- to :data:`sys.stderr`.
+ to :data:`sys.stdout`.
``'tqdm_notebook'``
- Use the :func:`tqdm.tqdm_notebook` function to display a
+ Use the :func:`tqdm.notebook.tqdm` function to display a
progress bar as a Jupyter notebook widget.
``'tqdm_gui'``
Use the :func:`tqdm.tqdm_gui` function to display a
@@ -2096,6 +2819,34 @@ def to_geodataframe(
identifies which one to use to construct a geopandas
GeoDataFrame. This option can be ommitted if there's
only one GEOGRAPHY column.
+ bool_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.BooleanDtype()``)
+ to convert BigQuery Boolean type, instead of relying on the default
+ ``pandas.BooleanDtype()``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("bool")``. BigQuery Boolean
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#boolean_type
+ int_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Int64Dtype()``)
+ to convert BigQuery Integer types, instead of relying on the default
+ ``pandas.Int64Dtype()``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("int64")``. A list of BigQuery
+ Integer types can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#integer_types
+ float_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.Float32Dtype()``)
+ to convert BigQuery Float type, instead of relying on the default
+ ``numpy.dtype("float64")``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("float64")``. BigQuery Float
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#floating_point_types
+ string_dtype (Optional[pandas.Series.dtype, None]):
+ If set, indicate a pandas ExtensionDtype (e.g. ``pandas.StringDtype()``) to
+ convert BigQuery String type, instead of relying on the default
+ ``numpy.dtype("object")``. If you explicitly set the value to ``None``,
+ then the data type will be ``numpy.dtype("object")``. BigQuery String
+ type can be found at:
+ https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#string_type
Returns:
geopandas.GeoDataFrame:
@@ -2147,6 +2898,10 @@ def to_geodataframe(
progress_bar_type,
create_bqstorage_client,
geography_as_object=True,
+ bool_dtype=bool_dtype,
+ int_dtype=int_dtype,
+ float_dtype=float_dtype,
+ string_dtype=string_dtype,
)
return geopandas.GeoDataFrame(
@@ -2162,7 +2917,6 @@ class _EmptyRowIterator(RowIterator):
statements.
"""
- schema = ()
pages = ()
total_rows = 0
@@ -2194,6 +2948,8 @@ def to_arrow(
Returns:
pyarrow.Table: An empty :class:`pyarrow.Table`.
"""
+ if pyarrow is None:
+ raise ValueError(_NO_PYARROW_ERROR)
return pyarrow.Table.from_arrays(())
def to_dataframe(
@@ -2203,6 +2959,17 @@ def to_dataframe(
progress_bar_type=None,
create_bqstorage_client=True,
geography_as_object=False,
+ bool_dtype=None,
+ int_dtype=None,
+ float_dtype=None,
+ string_dtype=None,
+ date_dtype=None,
+ datetime_dtype=None,
+ time_dtype=None,
+ timestamp_dtype=None,
+ range_date_dtype=None,
+ range_datetime_dtype=None,
+ range_timestamp_dtype=None,
) -> "pandas.DataFrame":
"""Create an empty dataframe.
@@ -2211,6 +2978,18 @@ def to_dataframe(
dtypes (Any): Ignored. Added for compatibility with RowIterator.
progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
+ geography_as_object (bool): Ignored. Added for compatibility with RowIterator.
+ bool_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ int_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ float_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ string_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ date_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ datetime_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ time_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ range_date_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ range_datetime_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ range_timestamp_dtype (Any): Ignored. Added for compatibility with RowIterator.
Returns:
pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
@@ -2225,6 +3004,10 @@ def to_geodataframe(
progress_bar_type=None,
create_bqstorage_client=True,
geography_column: Optional[str] = None,
+ bool_dtype: Union[Any, None] = DefaultPandasDTypes.BOOL_DTYPE,
+ int_dtype: Union[Any, None] = DefaultPandasDTypes.INT_DTYPE,
+ float_dtype: Union[Any, None] = None,
+ string_dtype: Union[Any, None] = None,
) -> "pandas.DataFrame":
"""Create an empty dataframe.
@@ -2233,6 +3016,11 @@ def to_geodataframe(
dtypes (Any): Ignored. Added for compatibility with RowIterator.
progress_bar_type (Any): Ignored. Added for compatibility with RowIterator.
create_bqstorage_client (bool): Ignored. Added for compatibility with RowIterator.
+ geography_column (str): Ignored. Added for compatibility with RowIterator.
+ bool_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ int_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ float_dtype (Any): Ignored. Added for compatibility with RowIterator.
+ string_dtype (Any): Ignored. Added for compatibility with RowIterator.
Returns:
pandas.DataFrame: An empty :class:`~pandas.DataFrame`.
@@ -2249,6 +3037,7 @@ def to_dataframe_iterable(
bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
dtypes: Optional[Dict[str, Any]] = None,
max_queue_size: Optional[int] = None,
+ max_stream_count: Optional[int] = None,
) -> Iterator["pandas.DataFrame"]:
"""Create an iterable of pandas DataFrames, to process the table as a stream.
@@ -2264,6 +3053,9 @@ def to_dataframe_iterable(
max_queue_size:
Ignored. Added for compatibility with RowIterator.
+ max_stream_count:
+ Ignored. Added for compatibility with RowIterator.
+
Returns:
An iterator yielding a single empty :class:`~pandas.DataFrame`.
@@ -2278,6 +3070,7 @@ def to_arrow_iterable(
self,
bqstorage_client: Optional["bigquery_storage.BigQueryReadClient"] = None,
max_queue_size: Optional[int] = None,
+ max_stream_count: Optional[int] = None,
) -> Iterator["pyarrow.RecordBatch"]:
"""Create an iterable of pandas DataFrames, to process the table as a stream.
@@ -2290,6 +3083,9 @@ def to_arrow_iterable(
max_queue_size:
Ignored. Added for compatibility with RowIterator.
+ max_stream_count:
+ Ignored. Added for compatibility with RowIterator.
+
Returns:
An iterator yielding a single empty :class:`~pyarrow.RecordBatch`.
"""
@@ -2660,6 +3456,281 @@ def __repr__(self):
return "TimePartitioning({})".format(",".join(key_vals))
+class PrimaryKey:
+ """Represents the primary key constraint on a table's columns.
+
+ Args:
+ columns: The columns that are composed of the primary key constraint.
+ """
+
+ def __init__(self, columns: List[str]):
+ self.columns = columns
+
+ def __eq__(self, other):
+ if not isinstance(other, PrimaryKey):
+ raise TypeError("The value provided is not a BigQuery PrimaryKey.")
+ return self.columns == other.columns
+
+
+class ColumnReference:
+ """The pair of the foreign key column and primary key column.
+
+ Args:
+ referencing_column: The column that composes the foreign key.
+ referenced_column: The column in the primary key that are referenced by the referencingColumn.
+ """
+
+ def __init__(self, referencing_column: str, referenced_column: str):
+ self.referencing_column = referencing_column
+ self.referenced_column = referenced_column
+
+ def __eq__(self, other):
+ if not isinstance(other, ColumnReference):
+ raise TypeError("The value provided is not a BigQuery ColumnReference.")
+ return (
+ self.referencing_column == other.referencing_column
+ and self.referenced_column == other.referenced_column
+ )
+
+
+class ForeignKey:
+ """Represents a foreign key constraint on a table's columns.
+
+ Args:
+ name: Set only if the foreign key constraint is named.
+ referenced_table: The table that holds the primary key and is referenced by this foreign key.
+ column_references: The columns that compose the foreign key.
+ """
+
+ def __init__(
+ self,
+ name: str,
+ referenced_table: TableReference,
+ column_references: List[ColumnReference],
+ ):
+ self.name = name
+ self.referenced_table = referenced_table
+ self.column_references = column_references
+
+ def __eq__(self, other):
+ if not isinstance(other, ForeignKey):
+ raise TypeError("The value provided is not a BigQuery ForeignKey.")
+ return (
+ self.name == other.name
+ and self.referenced_table == other.referenced_table
+ and self.column_references == other.column_references
+ )
+
+ @classmethod
+ def from_api_repr(cls, api_repr: Dict[str, Any]) -> "ForeignKey":
+ """Create an instance from API representation."""
+ return cls(
+ name=api_repr["name"],
+ referenced_table=TableReference.from_api_repr(api_repr["referencedTable"]),
+ column_references=[
+ ColumnReference(
+ column_reference_resource["referencingColumn"],
+ column_reference_resource["referencedColumn"],
+ )
+ for column_reference_resource in api_repr["columnReferences"]
+ ],
+ )
+
+ def to_api_repr(self) -> Dict[str, Any]:
+ """Return a dictionary representing this object."""
+ return {
+ "name": self.name,
+ "referencedTable": self.referenced_table.to_api_repr(),
+ "columnReferences": [
+ {
+ "referencingColumn": column_reference.referencing_column,
+ "referencedColumn": column_reference.referenced_column,
+ }
+ for column_reference in self.column_references
+ ],
+ }
+
+
+class TableConstraints:
+ """The TableConstraints defines the primary key and foreign key.
+
+ Args:
+ primary_key:
+ Represents a primary key constraint on a table's columns. Present only if the table
+ has a primary key. The primary key is not enforced.
+ foreign_keys:
+ Present only if the table has a foreign key. The foreign key is not enforced.
+
+ """
+
+ def __init__(
+ self,
+ primary_key: Optional[PrimaryKey],
+ foreign_keys: Optional[List[ForeignKey]],
+ ):
+ self.primary_key = primary_key
+ self.foreign_keys = foreign_keys
+
+ def __eq__(self, other):
+ if not isinstance(other, TableConstraints) and other is not None:
+ raise TypeError("The value provided is not a BigQuery TableConstraints.")
+ return self.primary_key == (
+ other.primary_key if other.primary_key else None
+ ) and self.foreign_keys == (other.foreign_keys if other.foreign_keys else None)
+
+ @classmethod
+ def from_api_repr(cls, resource: Dict[str, Any]) -> "TableConstraints":
+ """Create an instance from API representation."""
+ primary_key = None
+ if "primaryKey" in resource:
+ primary_key = PrimaryKey(resource["primaryKey"]["columns"])
+
+ foreign_keys = None
+ if "foreignKeys" in resource:
+ foreign_keys = [
+ ForeignKey.from_api_repr(foreign_key_resource)
+ for foreign_key_resource in resource["foreignKeys"]
+ ]
+ return cls(primary_key, foreign_keys)
+
+ def to_api_repr(self) -> Dict[str, Any]:
+ """Return a dictionary representing this object."""
+ resource: Dict[str, Any] = {}
+ if self.primary_key:
+ resource["primaryKey"] = {"columns": self.primary_key.columns}
+ if self.foreign_keys:
+ resource["foreignKeys"] = [
+ foreign_key.to_api_repr() for foreign_key in self.foreign_keys
+ ]
+ return resource
+
+
+class BigLakeConfiguration(object):
+ """Configuration for managed tables for Apache Iceberg, formerly
+ known as BigLake.
+
+ Args:
+ connection_id (Optional[str]):
+ The connection specifying the credentials to be used to read and write to external
+ storage, such as Cloud Storage. The connection_id can have the form
+ ``{project}.{location}.{connection_id}`` or
+ ``projects/{project}/locations/{location}/connections/{connection_id}``.
+ storage_uri (Optional[str]):
+ The fully qualified location prefix of the external folder where table data is
+ stored. The '*' wildcard character is not allowed. The URI should be in the
+ format ``gs://bucket/path_to_table/``.
+ file_format (Optional[str]):
+ The file format the table data is stored in. See BigLakeFileFormat for available
+ values.
+ table_format (Optional[str]):
+ The table format the metadata only snapshots are stored in. See BigLakeTableFormat
+ for available values.
+ _properties (Optional[dict]):
+ Private. Used to construct object from API resource.
+ """
+
+ def __init__(
+ self,
+ connection_id: Optional[str] = None,
+ storage_uri: Optional[str] = None,
+ file_format: Optional[str] = None,
+ table_format: Optional[str] = None,
+ _properties: Optional[dict] = None,
+ ) -> None:
+ if _properties is None:
+ _properties = {}
+ self._properties = _properties
+ if connection_id is not None:
+ self.connection_id = connection_id
+ if storage_uri is not None:
+ self.storage_uri = storage_uri
+ if file_format is not None:
+ self.file_format = file_format
+ if table_format is not None:
+ self.table_format = table_format
+
+ @property
+ def connection_id(self) -> Optional[str]:
+ """str: The connection specifying the credentials to be used to read and write to external
+ storage, such as Cloud Storage."""
+ return self._properties.get("connectionId")
+
+ @connection_id.setter
+ def connection_id(self, value: Optional[str]):
+ self._properties["connectionId"] = value
+
+ @property
+ def storage_uri(self) -> Optional[str]:
+ """str: The fully qualified location prefix of the external folder where table data is
+ stored."""
+ return self._properties.get("storageUri")
+
+ @storage_uri.setter
+ def storage_uri(self, value: Optional[str]):
+ self._properties["storageUri"] = value
+
+ @property
+ def file_format(self) -> Optional[str]:
+ """str: The file format the table data is stored in. See BigLakeFileFormat for available
+ values."""
+ return self._properties.get("fileFormat")
+
+ @file_format.setter
+ def file_format(self, value: Optional[str]):
+ self._properties["fileFormat"] = value
+
+ @property
+ def table_format(self) -> Optional[str]:
+ """str: The table format the metadata only snapshots are stored in. See BigLakeTableFormat
+ for available values."""
+ return self._properties.get("tableFormat")
+
+ @table_format.setter
+ def table_format(self, value: Optional[str]):
+ self._properties["tableFormat"] = value
+
+ def _key(self):
+ return tuple(sorted(self._properties.items()))
+
+ def __eq__(self, other):
+ if not isinstance(other, BigLakeConfiguration):
+ return NotImplemented
+ return self._key() == other._key()
+
+ def __ne__(self, other):
+ return not self == other
+
+ def __hash__(self):
+ return hash(self._key())
+
+ def __repr__(self):
+ key_vals = ["{}={}".format(key, val) for key, val in self._key()]
+ return "BigLakeConfiguration({})".format(",".join(key_vals))
+
+ @classmethod
+ def from_api_repr(cls, resource: Dict[str, Any]) -> "BigLakeConfiguration":
+ """Factory: construct a BigLakeConfiguration given its API representation.
+
+ Args:
+ resource:
+ BigLakeConfiguration representation returned from the API
+
+ Returns:
+ BigLakeConfiguration parsed from ``resource``.
+ """
+ ref = cls()
+ ref._properties = resource
+ return ref
+
+ def to_api_repr(self) -> Dict[str, Any]:
+ """Construct the API resource representation of this BigLakeConfiguration.
+
+ Returns:
+ BigLakeConfiguration represented as an API resource.
+ """
+ return copy.deepcopy(self._properties)
+
+
def _item_to_row(iterator, resource):
"""Convert a JSON row to the native object.
@@ -2693,7 +3764,9 @@ def _row_iterator_page_columns(schema, response):
def get_column_data(field_index, field):
for row in rows:
- yield _helpers._field_from_json(row["f"][field_index]["v"], field)
+ yield _helpers.DATA_FRAME_CELL_DATA_PARSER.to_py(
+ row["f"][field_index]["v"], field
+ )
for field_index, field in enumerate(schema):
columns.append(get_column_data(field_index, field))
@@ -2715,9 +3788,9 @@ def _rows_page_start(iterator, page, response):
page._columns = _row_iterator_page_columns(iterator._schema, response)
total_rows = response.get("totalRows")
+ # Don't reset total_rows if it's not present in the next API response.
if total_rows is not None:
- total_rows = int(total_rows)
- iterator._total_rows = total_rows
+ iterator._total_rows = int(total_rows)
# pylint: enable=unused-argument
diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py
index eb307e8d6..1d5e35889 100644
--- a/google/cloud/bigquery/version.py
+++ b/google/cloud/bigquery/version.py
@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-__version__ = "3.3.2"
+__version__ = "3.39.0"
diff --git a/mypy.ini b/mypy.ini
index 4505b4854..beaa679a8 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -1,3 +1,3 @@
[mypy]
-python_version = 3.6
+python_version = 3.8
namespace_packages = True
diff --git a/noxfile.py b/noxfile.py
index c6f7c76b1..194e7ce8f 100644
--- a/noxfile.py
+++ b/noxfile.py
@@ -14,24 +14,55 @@
from __future__ import absolute_import
+from functools import wraps
import pathlib
import os
import re
import shutil
-
import nox
+import time
-MYPY_VERSION = "mypy==0.910"
-PYTYPE_VERSION = "pytype==2021.4.9"
-BLACK_VERSION = "black==22.3.0"
-BLACK_PATHS = ("docs", "google", "samples", "tests", "noxfile.py", "setup.py")
-
-DEFAULT_PYTHON_VERSION = "3.8"
-SYSTEM_TEST_PYTHON_VERSIONS = ["3.8", "3.10"]
-UNIT_TEST_PYTHON_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
+MYPY_VERSION = "mypy==1.6.1"
+PYTYPE_VERSION = "pytype==2024.9.13"
+BLACK_VERSION = "black==23.7.0"
+BLACK_PATHS = (
+ "benchmark",
+ "docs",
+ "google",
+ "samples",
+ "samples/tests",
+ "tests",
+ "noxfile.py",
+ "setup.py",
+)
+
+DEFAULT_PYTHON_VERSION = "3.9"
+SYSTEM_TEST_PYTHON_VERSIONS = ["3.9", "3.11", "3.12", "3.13"]
+UNIT_TEST_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
CURRENT_DIRECTORY = pathlib.Path(__file__).parent.absolute()
+
+def _calculate_duration(func):
+ """This decorator prints the execution time for the decorated function."""
+
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ start = time.monotonic()
+ result = func(*args, **kwargs)
+ end = time.monotonic()
+ total_seconds = round(end - start)
+ hours = total_seconds // 3600 # Integer division to get hours
+ remaining_seconds = total_seconds % 3600 # Modulo to find remaining seconds
+ minutes = remaining_seconds // 60
+ seconds = remaining_seconds % 60
+ human_time = f"{hours:}:{minutes:0>2}:{seconds:0>2}"
+ print(f"Session ran in {total_seconds} seconds ({human_time})")
+ return result
+
+ return wrapper
+
+
# 'docfx' is excluded since it only needs to run in 'docs-presubmit'
nox.options.sessions = [
"unit_noextras",
@@ -57,33 +88,50 @@ def default(session, install_extras=True):
Python corresponding to the ``nox`` binary the ``PATH`` can
run the tests.
"""
+
constraints_path = str(
CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
)
# Install all test dependencies, then install local packages in-place.
session.install(
- "mock",
- "pytest",
+ # TODO(https://github.com/pytest-dev/pytest-xdist/issues/1273): Remove once this bug is fixed
+ "pytest<9",
"google-cloud-testutils",
"pytest-cov",
+ "pytest-xdist",
"freezegun",
"-c",
constraints_path,
)
-
- if install_extras and session.python == "3.10":
- install_target = ".[bqstorage,pandas,tqdm,opentelemetry]"
- elif install_extras:
+ # We have logic in the magics.py file that checks for whether 'bigquery_magics'
+ # is imported OR not. If yes, we use a context object from that library.
+ # If no, we use our own context object from magics.py. In order to exercise
+ # that logic (and the associated tests) we avoid installing the [ipython] extra
+ # which has a downstream effect of then avoiding installing bigquery_magics.
+ if install_extras and session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
+ install_target = ".[bqstorage,pandas,ipywidgets,geopandas,matplotlib,tqdm,opentelemetry,bigquery_v2]"
+ elif install_extras: # run against all other UNIT_TEST_PYTHON_VERSIONS
install_target = ".[all]"
else:
install_target = "."
session.install("-e", install_target, "-c", constraints_path)
+ # Test with some broken "extras" in case the user didn't install the extra
+ # directly. For example, pandas-gbq is recommended for pandas features, but
+ # we want to test that we fallback to the previous behavior. For context,
+ # see internal document go/pandas-gbq-and-bigframes-redundancy.
+ if session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
+ session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y")
+
+ session.run("python", "-m", "pip", "freeze")
+
# Run py.test against the unit tests.
session.run(
"py.test",
+ "-n=8",
"--quiet",
+ "-W default::PendingDeprecationWarning",
"--cov=google/cloud/bigquery",
"--cov=tests/unit",
"--cov-append",
@@ -96,27 +144,35 @@ def default(session, install_extras=True):
@nox.session(python=UNIT_TEST_PYTHON_VERSIONS)
+@_calculate_duration
def unit(session):
"""Run the unit test suite."""
+
default(session)
@nox.session(python=[UNIT_TEST_PYTHON_VERSIONS[0], UNIT_TEST_PYTHON_VERSIONS[-1]])
+@_calculate_duration
def unit_noextras(session):
"""Run the unit test suite."""
- # Install optional dependencies that are out-of-date.
+ # Install optional dependencies that are out-of-date to see that
+ # we fail gracefully.
# https://github.com/googleapis/python-bigquery/issues/933
- # There is no pyarrow 1.0.0 package for Python 3.9.
+ #
+ # We only install this extra package on one of the two Python versions
+ # so that it continues to be an optional dependency.
+ # https://github.com/googleapis/python-bigquery/issues/1877
if session.python == UNIT_TEST_PYTHON_VERSIONS[0]:
- session.install("pyarrow==1.0.0")
-
+ session.install("pyarrow==4.0.0", "numpy==1.20.2")
default(session, install_extras=False)
@nox.session(python=DEFAULT_PYTHON_VERSION)
+@_calculate_duration
def mypy(session):
"""Run type checks with mypy."""
+
session.install("-e", ".[all]")
session.install(MYPY_VERSION)
@@ -128,22 +184,28 @@ def mypy(session):
"types-requests",
"types-setuptools",
)
- session.run("mypy", "google/cloud")
+ session.run("python", "-m", "pip", "freeze")
+ session.run("mypy", "-p", "google", "--show-traceback")
@nox.session(python=DEFAULT_PYTHON_VERSION)
+@_calculate_duration
def pytype(session):
"""Run type checks with pytype."""
# An indirect dependecy attrs==21.1.0 breaks the check, and installing a less
# recent version avoids the error until a possibly better fix is found.
# https://github.com/googleapis/python-bigquery/issues/655
+
session.install("attrs==20.3.0")
session.install("-e", ".[all]")
session.install(PYTYPE_VERSION)
- session.run("pytype")
+ session.run("python", "-m", "pip", "freeze")
+ # See https://github.com/google/pytype/issues/464
+ session.run("pytype", "-P", ".", "google/cloud/bigquery")
@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS)
+@_calculate_duration
def system(session):
"""Run the system test suite."""
@@ -151,20 +213,23 @@ def system(session):
CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
)
- # Check the value of `RUN_SYSTEM_TESTS` env var. It defaults to true.
- if os.environ.get("RUN_SYSTEM_TESTS", "true") == "false":
- session.skip("RUN_SYSTEM_TESTS is set to false, skipping")
-
# Sanity check: Only run system tests if the environment variable is set.
if not os.environ.get("GOOGLE_APPLICATION_CREDENTIALS", ""):
session.skip("Credentials must be set via environment variable.")
# Use pre-release gRPC for system tests.
- session.install("--pre", "grpcio", "-c", constraints_path)
+ # Exclude version 1.49.0rc1 which has a known issue.
+ # See https://github.com/grpc/grpc/pull/30642
+ session.install("--pre", "grpcio!=1.49.0rc1", "-c", constraints_path)
# Install all test dependencies, then install local packages in place.
session.install(
- "mock", "pytest", "psutil", "google-cloud-testutils", "-c", constraints_path
+ "pytest",
+ "psutil",
+ "pytest-xdist",
+ "google-cloud-testutils",
+ "-c",
+ constraints_path,
)
if os.environ.get("GOOGLE_API_USE_CLIENT_CERTIFICATE", "") == "true":
# mTLS test requires pyopenssl and latest google-cloud-storage
@@ -175,28 +240,62 @@ def system(session):
# Data Catalog needed for the column ACL test with a real Policy Tag.
session.install("google-cloud-datacatalog", "-c", constraints_path)
- if session.python == "3.10":
- extras = "[bqstorage,pandas,tqdm,opentelemetry]"
+ # Resource Manager needed for test with a real Resource Tag.
+ session.install("google-cloud-resource-manager", "-c", constraints_path)
+
+ if session.python in ["3.11", "3.12"]:
+ extras = "[bqstorage,ipywidgets,pandas,tqdm,opentelemetry]"
else:
extras = "[all]"
session.install("-e", f".{extras}", "-c", constraints_path)
+ # Test with some broken "extras" in case the user didn't install the extra
+ # directly. For example, pandas-gbq is recommended for pandas features, but
+ # we want to test that we fallback to the previous behavior. For context,
+ # see internal document go/pandas-gbq-and-bigframes-redundancy.
+ if session.python == SYSTEM_TEST_PYTHON_VERSIONS[0]:
+ session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y")
+
+ # print versions of all dependencies
+ session.run("python", "-m", "pip", "freeze")
+
# Run py.test against the system tests.
- session.run("py.test", "--quiet", os.path.join("tests", "system"), *session.posargs)
+ session.run(
+ "py.test",
+ "-n=auto",
+ "--quiet",
+ "-W default::PendingDeprecationWarning",
+ os.path.join("tests", "system"),
+ *session.posargs,
+ )
@nox.session(python=DEFAULT_PYTHON_VERSION)
+@_calculate_duration
def mypy_samples(session):
"""Run type checks with mypy."""
- session.install("-e", ".[all]")
- session.install("ipython", "pytest")
+ session.install("pytest")
+ for requirements_path in CURRENT_DIRECTORY.glob("samples/*/requirements.txt"):
+ session.install("-r", str(requirements_path))
session.install(MYPY_VERSION)
+ # requirements.txt might include this package. Install from source so that
+ # we can author samples with unreleased features.
+ session.install("-e", ".[all]")
+
# Just install the dependencies' type info directly, since "mypy --install-types"
# might require an additional pass.
- session.install("types-mock", "types-pytz")
- session.install("typing-extensions") # for TypedDict in pre-3.8 Python versions
+ session.install(
+ "types-mock",
+ "types-pytz",
+ "types-protobuf!=4.24.0.20240106", # This version causes an error: 'Module "google.oauth2" has no attribute "service_account"'
+ "types-python-dateutil",
+ "types-requests",
+ "types-setuptools",
+ )
+
+ session.run("python", "-m", "pip", "freeze")
session.run(
"mypy",
@@ -208,136 +307,172 @@ def mypy_samples(session):
@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS)
+@_calculate_duration
def snippets(session):
"""Run the snippets test suite."""
- # Check the value of `RUN_SNIPPETS_TESTS` env var. It defaults to true.
- if os.environ.get("RUN_SNIPPETS_TESTS", "true") == "false":
- session.skip("RUN_SNIPPETS_TESTS is set to false, skipping")
-
constraints_path = str(
CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt"
)
# Install all test dependencies, then install local packages in place.
- session.install("mock", "pytest", "google-cloud-testutils", "-c", constraints_path)
+ session.install(
+ "pytest", "pytest-xdist", "google-cloud-testutils", "-c", constraints_path
+ )
session.install("google-cloud-storage", "-c", constraints_path)
session.install("grpcio", "-c", constraints_path)
- if session.python == "3.10":
- extras = "[bqstorage,pandas,tqdm,opentelemetry]"
+ if session.python in ["3.11", "3.12"]:
+ extras = (
+ "[bqstorage,pandas,ipywidgets,geopandas,tqdm,opentelemetry,bigquery_v2]"
+ )
else:
extras = "[all]"
session.install("-e", f".{extras}", "-c", constraints_path)
+ session.run("python", "-m", "pip", "freeze")
# Run py.test against the snippets tests.
# Skip tests in samples/snippets, as those are run in a different session
# using the nox config from that directory.
- session.run("py.test", os.path.join("docs", "snippets.py"), *session.posargs)
+ session.run(
+ "py.test", "-n=auto", os.path.join("docs", "snippets.py"), *session.posargs
+ )
session.run(
"py.test",
+ "-n=auto",
"samples",
+ "-W default::PendingDeprecationWarning",
+ "--ignore=samples/desktopapp",
"--ignore=samples/magics",
"--ignore=samples/geography",
+ "--ignore=samples/notebooks",
"--ignore=samples/snippets",
*session.posargs,
)
@nox.session(python=DEFAULT_PYTHON_VERSION)
+@_calculate_duration
def cover(session):
"""Run the final coverage report.
This outputs the coverage report aggregating coverage from the unit
test runs (not system test runs), and then erases coverage data.
"""
+
session.install("coverage", "pytest-cov")
+ session.run("python", "-m", "pip", "freeze")
session.run("coverage", "report", "--show-missing", "--fail-under=100")
session.run("coverage", "erase")
@nox.session(python=SYSTEM_TEST_PYTHON_VERSIONS)
+@_calculate_duration
def prerelease_deps(session):
"""Run all tests with prerelease versions of dependencies installed.
https://github.com/googleapis/python-bigquery/issues/95
"""
+ # Because we test minimum dependency versions on the minimum Python
+ # version, the first version we test with in the unit tests sessions has a
+ # constraints file containing all dependencies and extras.
+ with open(
+ CURRENT_DIRECTORY
+ / "testing"
+ / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt",
+ encoding="utf-8",
+ ) as constraints_file:
+ constraints_text = constraints_file.read()
+
+ # Ignore leading whitespace and comment lines.
+ deps = [
+ match.group(1)
+ for match in re.finditer(
+ r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE
+ )
+ ]
+
+ session.install(*deps)
+
+ session.install(
+ "--pre",
+ "--upgrade",
+ "freezegun",
+ "google-cloud-datacatalog",
+ "google-cloud-resource-manager",
+ "google-cloud-storage",
+ "google-cloud-testutils",
+ "psutil",
+ "pytest",
+ "pytest-xdist",
+ "pytest-cov",
+ )
+
# PyArrow prerelease packages are published to an alternative PyPI host.
- # https://arrow.apache.org/docs/python/install.html#installing-nightly-packages
+ # https://arrow.apache.org/docs/developers/python.html#installing-nightly-packages
session.install(
"--extra-index-url",
- "https://pypi.fury.io/arrow-nightlies/",
+ "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple",
"--prefer-binary",
"--pre",
"--upgrade",
"pyarrow",
)
session.install(
- "--extra-index-url",
- "https://pypi.anaconda.org/scipy-wheels-nightly/simple",
- "--prefer-binary",
"--pre",
"--upgrade",
+ "IPython",
+ "ipykernel",
+ "ipywidgets",
+ "tqdm",
+ "git+https://github.com/pypa/packaging.git",
"pandas",
)
session.install(
"--pre",
"--upgrade",
+ "--no-deps",
"google-api-core",
"google-cloud-bigquery-storage",
"google-cloud-core",
"google-resumable-media",
+ "db-dtypes",
"grpcio",
+ "protobuf",
)
- session.install(
- "freezegun",
- "google-cloud-datacatalog",
- "google-cloud-storage",
- "google-cloud-testutils",
- "IPython",
- "mock",
- "psutil",
- "pytest",
- "pytest-cov",
- )
-
- # Because we test minimum dependency versions on the minimum Python
- # version, the first version we test with in the unit tests sessions has a
- # constraints file containing all dependencies and extras.
- with open(
- CURRENT_DIRECTORY
- / "testing"
- / f"constraints-{UNIT_TEST_PYTHON_VERSIONS[0]}.txt",
- encoding="utf-8",
- ) as constraints_file:
- constraints_text = constraints_file.read()
-
- # Ignore leading whitespace and comment lines.
- deps = [
- match.group(1)
- for match in re.finditer(
- r"^\s*(\S+)(?===\S+)", constraints_text, flags=re.MULTILINE
- )
- ]
- # We use --no-deps to ensure that pre-release versions aren't overwritten
- # by the version ranges in setup.py.
- session.install(*deps)
- session.install("--no-deps", "-e", ".[all]")
+ # Ensure that this library is installed from source
+ session.install("-e", ".", "--no-deps")
# Print out prerelease package versions.
- session.run("python", "-c", "import grpc; print(grpc.__version__)")
- session.run("python", "-c", "import pandas; print(pandas.__version__)")
- session.run("python", "-c", "import pyarrow; print(pyarrow.__version__)")
+ session.run("python", "-m", "pip", "freeze")
# Run all tests, except a few samples tests which require extra dependencies.
- session.run("py.test", "tests/unit")
- session.run("py.test", "tests/system")
- session.run("py.test", "samples/tests")
+ session.run(
+ "py.test",
+ "-n=auto",
+ "tests/unit",
+ "-W default::PendingDeprecationWarning",
+ )
+
+ session.run(
+ "py.test",
+ "-n=auto",
+ "tests/system",
+ "-W default::PendingDeprecationWarning",
+ )
+
+ session.run(
+ "py.test",
+ "-n=auto",
+ "samples/tests",
+ "-W default::PendingDeprecationWarning",
+ )
@nox.session(python=DEFAULT_PYTHON_VERSION)
+@_calculate_duration
def lint(session):
"""Run linters.
@@ -347,40 +482,61 @@ def lint(session):
session.install("flake8", BLACK_VERSION)
session.install("-e", ".")
+ session.run("python", "-m", "pip", "freeze")
session.run("flake8", os.path.join("google", "cloud", "bigquery"))
session.run("flake8", "tests")
session.run("flake8", os.path.join("docs", "samples"))
session.run("flake8", os.path.join("docs", "snippets.py"))
+ session.run("flake8", "benchmark")
session.run("black", "--check", *BLACK_PATHS)
@nox.session(python=DEFAULT_PYTHON_VERSION)
+@_calculate_duration
def lint_setup_py(session):
"""Verify that setup.py is valid (including RST check)."""
session.install("docutils", "Pygments")
+ session.run("python", "-m", "pip", "freeze")
session.run("python", "setup.py", "check", "--restructuredtext", "--strict")
@nox.session(python=DEFAULT_PYTHON_VERSION)
+@_calculate_duration
def blacken(session):
"""Run black.
Format code to uniform standard.
"""
session.install(BLACK_VERSION)
+ session.run("python", "-m", "pip", "freeze")
session.run("black", *BLACK_PATHS)
-@nox.session(python=DEFAULT_PYTHON_VERSION)
+@nox.session(python="3.10")
+@_calculate_duration
def docs(session):
"""Build the docs."""
- session.install("recommonmark", "sphinx==4.0.1", "sphinx_rtd_theme")
+ session.install(
+ # We need to pin to specific versions of the `sphinxcontrib-*` packages
+ # which still support sphinx 4.x.
+ # See https://github.com/googleapis/sphinx-docfx-yaml/issues/344
+ # and https://github.com/googleapis/sphinx-docfx-yaml/issues/345.
+ "sphinxcontrib-applehelp==1.0.4",
+ "sphinxcontrib-devhelp==1.0.2",
+ "sphinxcontrib-htmlhelp==2.0.1",
+ "sphinxcontrib-qthelp==1.0.3",
+ "sphinxcontrib-serializinghtml==1.1.5",
+ "sphinx==4.5.0",
+ "alabaster",
+ "recommonmark",
+ )
session.install("google-cloud-storage")
session.install("-e", ".[all]")
shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)
+ session.run("python", "-m", "pip", "freeze")
session.run(
"sphinx-build",
"-W", # warnings as errors
@@ -395,16 +551,29 @@ def docs(session):
)
-@nox.session(python=DEFAULT_PYTHON_VERSION)
+@nox.session(python="3.10")
+@_calculate_duration
def docfx(session):
"""Build the docfx yaml files for this library."""
session.install("-e", ".")
session.install(
- "sphinx==4.0.1", "alabaster", "recommonmark", "gcp-sphinx-docfx-yaml"
+ # We need to pin to specific versions of the `sphinxcontrib-*` packages
+ # which still support sphinx 4.x.
+ # See https://github.com/googleapis/sphinx-docfx-yaml/issues/344
+ # and https://github.com/googleapis/sphinx-docfx-yaml/issues/345.
+ "sphinxcontrib-applehelp==1.0.4",
+ "sphinxcontrib-devhelp==1.0.2",
+ "sphinxcontrib-htmlhelp==2.0.1",
+ "sphinxcontrib-qthelp==1.0.3",
+ "sphinxcontrib-serializinghtml==1.1.5",
+ "gcp-sphinx-docfx-yaml",
+ "alabaster",
+ "recommonmark",
)
shutil.rmtree(os.path.join("docs", "_build"), ignore_errors=True)
+ session.run("python", "-m", "pip", "freeze")
session.run(
"sphinx-build",
"-T", # show full traceback on exception
diff --git a/owlbot.py b/owlbot.py
deleted file mode 100644
index 4d287ac46..000000000
--- a/owlbot.py
+++ /dev/null
@@ -1,123 +0,0 @@
-# Copyright 2018 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""This script is used to synthesize generated parts of this library."""
-import textwrap
-
-import synthtool as s
-from synthtool import gcp
-from synthtool.languages import python
-
-default_version = "v2"
-
-for library in s.get_staging_dirs(default_version):
- # Avoid breaking change due to change in field renames.
- # https://github.com/googleapis/python-bigquery/issues/319
- s.replace(
- library / f"google/cloud/bigquery_{library.name}/types/standard_sql.py",
- r"type_ ",
- "type ",
- )
- # Patch docs issue
- s.replace(
- library / f"google/cloud/bigquery_{library.name}/types/model.py",
- r"""\"predicted_\"""",
- """`predicted_`""",
- )
- s.move(library / f"google/cloud/bigquery_{library.name}/types")
-s.remove_staging_dirs()
-
-common = gcp.CommonTemplates()
-
-# ----------------------------------------------------------------------------
-# Add templated files
-# ----------------------------------------------------------------------------
-templated_files = common.py_library(
- cov_level=100,
- samples=True,
- microgenerator=True,
- split_system_tests=True,
- intersphinx_dependencies={
- "dateutil": "https://dateutil.readthedocs.io/en/latest/",
- "geopandas": "https://geopandas.org/",
- "pandas": "https://pandas.pydata.org/pandas-docs/stable/",
- },
-)
-
-# BigQuery has a custom multiprocessing note
-s.move(
- templated_files,
- excludes=[
- "noxfile.py",
- "docs/multiprocessing.rst",
- "docs/index.rst",
- ".coveragerc",
- ".github/CODEOWNERS",
- # Include custom SNIPPETS_TESTS job for performance.
- # https://github.com/googleapis/python-bigquery/issues/191
- ".kokoro/presubmit/presubmit.cfg",
- ".github/workflows", # exclude gh actions as credentials are needed for tests
- "README.rst",
- ],
-)
-
-python.configure_previous_major_version_branches()
-# ----------------------------------------------------------------------------
-# Samples templates
-# ----------------------------------------------------------------------------
-
-python.py_samples()
-
-s.replace(
- "docs/conf.py",
- r'\{"members": True\}',
- '{"members": True, "inherited-members": True}',
-)
-s.replace(
- "docs/conf.py",
- r"exclude_patterns = \[",
- '\\g<0>\n "google/cloud/bigquery_v2/**", # Legacy proto-based types.',
-)
-
-# ----------------------------------------------------------------------------
-# pytype-related changes
-# ----------------------------------------------------------------------------
-
-# Add .pytype to .gitignore
-s.replace(".gitignore", r"\.pytest_cache", "\\g<0>\n.pytype")
-
-# Add pytype config to setup.cfg
-s.replace(
- "setup.cfg",
- r"universal = 1",
- textwrap.dedent(
- """ \\g<0>
-
- [pytype]
- python_version = 3.8
- inputs =
- google/cloud/
- exclude =
- tests/
- google/cloud/bigquery_v2/ # Legacy proto-based types.
- output = .pytype/
- disable =
- # There's some issue with finding some pyi files, thus disabling.
- # The issue https://github.com/google/pytype/issues/150 is closed, but the
- # error still occurs for some reason.
- pyi-error"""
- ),
-)
-
-s.shell.run(["nox", "-s", "blacken"], hide_output=False)
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 000000000..a0e356b34
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,112 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[build-system]
+requires = ["setuptools"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "google-cloud-bigquery"
+authors = [{ name = "Google LLC", email = "googleapis-packages@google.com" }]
+license = "Apache-2.0"
+license-files = ["LICENSE"]
+requires-python = ">=3.9"
+description = "Google BigQuery API client library"
+readme = "README.rst"
+classifiers = [
+ # Should be one of:
+ # "Development Status :: 3 - Alpha"
+ # "Development Status :: 4 - Beta"
+ # "Development Status :: 5 - Production/Stable"
+ "Development Status :: 5 - Production/Stable",
+ "Intended Audience :: Developers",
+ "Programming Language :: Python",
+ "Programming Language :: Python :: 3",
+ "Programming Language :: Python :: 3.9",
+ "Programming Language :: Python :: 3.10",
+ "Programming Language :: Python :: 3.11",
+ "Programming Language :: Python :: 3.12",
+ "Programming Language :: Python :: 3.13",
+ "Programming Language :: Python :: 3.14",
+ "Operating System :: OS Independent",
+ "Topic :: Internet",
+]
+dependencies = [
+ "google-api-core[grpc] >= 2.11.1, < 3.0.0",
+ "google-auth >= 2.14.1, < 3.0.0",
+ "google-cloud-core >= 2.4.1, < 3.0.0",
+ "google-resumable-media >= 2.0.0, < 3.0.0",
+ "packaging >= 24.2.0",
+ "python-dateutil >= 2.8.2, < 3.0.0",
+ "requests >= 2.21.0, < 3.0.0",
+]
+dynamic = ["version"]
+
+[project.urls]
+Repository = "https://github.com/googleapis/python-bigquery"
+
+[project.optional-dependencies]
+# bqstorage had a period where it was a required dependency, and has been
+# moved back to optional due to bloat. See
+# https://github.com/googleapis/python-bigquery/issues/1196 for more background.
+bqstorage = [
+ "google-cloud-bigquery-storage >= 2.18.0, < 3.0.0",
+ # Due to an issue in pip's dependency resolver, the `grpc` extra is not
+ # installed, even though `google-cloud-bigquery-storage` specifies it
+ # as `google-api-core[grpc]`. We thus need to explicitly specify it here.
+ # See: https://github.com/googleapis/python-bigquery/issues/83 The
+ # grpc.Channel.close() method isn't added until 1.32.0.
+ # https://github.com/grpc/grpc/pull/15254
+ "grpcio >= 1.47.0, < 2.0.0",
+ "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'",
+ "grpcio >= 1.75.1, < 2.0.0; python_version >= '3.14'",
+ "pyarrow >= 4.0.0",
+]
+pandas = [
+ "pandas >= 1.3.0",
+ "pandas-gbq >= 0.26.1",
+ "grpcio >= 1.47.0, < 2.0.0",
+ "grpcio >= 1.49.1, < 2.0.0; python_version >= '3.11'",
+ "grpcio >= 1.75.1, < 2.0.0; python_version >= '3.14'",
+ "pyarrow >= 3.0.0",
+ "db-dtypes >= 1.0.4, < 2.0.0",
+]
+ipywidgets = ["ipywidgets >= 7.7.1", "ipykernel >= 6.2.0"]
+geopandas = ["geopandas >= 0.9.0, < 2.0.0", "Shapely >= 1.8.4, < 3.0.0"]
+ipython = ["ipython >= 7.23.1", "bigquery-magics >= 0.6.0"]
+matplotlib = [
+ "matplotlib >= 3.7.1, <= 3.9.2; python_version == '3.9'",
+ "matplotlib >= 3.10.3; python_version >= '3.10'",
+]
+tqdm = ["tqdm >= 4.23.4, < 5.0.0"]
+opentelemetry = [
+ "opentelemetry-api >= 1.1.0",
+ "opentelemetry-sdk >= 1.1.0",
+ "opentelemetry-instrumentation >= 0.20b0",
+]
+bigquery_v2 = [
+ "proto-plus >= 1.22.3, < 2.0.0",
+ "protobuf >= 3.20.2, < 7.0.0, != 4.21.0, != 4.21.1, != 4.21.2, != 4.21.3, != 4.21.4, != 4.21.5", # For the legacy proto-based types.
+]
+all = [
+ "google-cloud-bigquery[bqstorage,pandas,ipywidgets,geopandas,ipython,matplotlib,tqdm,opentelemetry,bigquery_v2]",
+]
+
+[tool.setuptools.dynamic]
+version = { attr = "google.cloud.bigquery.version.__version__" }
+
+[tool.setuptools.packages.find]
+# Only include packages under the 'google' namespace. Do not include tests,
+# benchmarks, etc.
+include = ["google*"]
diff --git a/renovate.json b/renovate.json
index 566a70f3c..3ea143d4c 100644
--- a/renovate.json
+++ b/renovate.json
@@ -5,8 +5,15 @@
":preserveSemverRanges",
":disableDependencyDashboard"
],
- "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt"],
+ "ignorePaths": [".pre-commit-config.yaml", ".kokoro/requirements.txt", "setup.py", ".github/workflows/unittest.yml", ".github/workflows/docs.yml"],
"pip_requirements": {
"fileMatch": ["requirements-test.txt", "samples/[\\S/]*constraints.txt", "samples/[\\S/]*constraints-test.txt"]
- }
+ },
+ "packageRules": [
+ {
+ "matchFileNames": ["pyproject.toml"],
+ "matchStrings": ["matplotlib (.*); python_version == '3.9'"],
+ "allowedVersions": ">= 3.7.1, <= 3.9.2"
+ }
+ ]
}
diff --git a/samples/add_empty_column.py b/samples/add_empty_column.py
index 6d449d6e2..5541a6738 100644
--- a/samples/add_empty_column.py
+++ b/samples/add_empty_column.py
@@ -14,7 +14,6 @@
def add_empty_column(table_id: str) -> None:
-
# [START bigquery_add_empty_column]
from google.cloud import bigquery
diff --git a/samples/browse_table_data.py b/samples/browse_table_data.py
index 6a56253bf..2fba65aeb 100644
--- a/samples/browse_table_data.py
+++ b/samples/browse_table_data.py
@@ -14,7 +14,6 @@
def browse_table_data(table_id: str) -> None:
-
# [START bigquery_browse_table]
from google.cloud import bigquery
@@ -47,7 +46,7 @@ def browse_table_data(table_id: str) -> None:
print("Downloaded {} rows from table {}".format(len(rows), table_id))
# Print row data in tabular format.
- rows_iter = client.list_rows(table, max_results=10)
+ rows_iter = client.list_rows(table_id, max_results=10)
format_string = "{!s:<16} " * len(rows_iter.schema)
field_names = [field.name for field in rows_iter.schema]
print(format_string.format(*field_names)) # Prints column headers.
diff --git a/samples/client_list_jobs.py b/samples/client_list_jobs.py
index 7f1e39cb8..335d2ecec 100644
--- a/samples/client_list_jobs.py
+++ b/samples/client_list_jobs.py
@@ -14,7 +14,6 @@
def client_list_jobs() -> None:
-
# [START bigquery_list_jobs]
from google.cloud import bigquery
diff --git a/samples/client_load_partitioned_table.py b/samples/client_load_partitioned_table.py
index 9956f3f00..cfdf24819 100644
--- a/samples/client_load_partitioned_table.py
+++ b/samples/client_load_partitioned_table.py
@@ -14,7 +14,6 @@
def client_load_partitioned_table(table_id: str) -> None:
-
# [START bigquery_load_table_partitioned]
from google.cloud import bigquery
diff --git a/samples/client_query_add_column.py b/samples/client_query_add_column.py
index 2da200bc5..6aae5fce4 100644
--- a/samples/client_query_add_column.py
+++ b/samples/client_query_add_column.py
@@ -14,7 +14,6 @@
def client_query_add_column(table_id: str) -> None:
-
# [START bigquery_add_column_query_append]
from google.cloud import bigquery
@@ -37,14 +36,13 @@ def client_query_add_column(table_id: str) -> None:
)
# Start the query, passing in the extra configuration.
- query_job = client.query(
+ client.query_and_wait(
# In this example, the existing table contains only the 'full_name' and
# 'age' columns, while the results of this query will contain an
# additional 'favorite_color' column.
'SELECT "Timmy" as full_name, 85 as age, "Blue" as favorite_color;',
job_config=job_config,
- ) # Make an API request.
- query_job.result() # Wait for the job to complete.
+ ) # Make an API request and wait for job to complete.
# Checks the updated length of the schema.
table = client.get_table(table_id) # Make an API request.
diff --git a/samples/client_query_batch.py b/samples/client_query_batch.py
index df164d1be..5c55e278e 100644
--- a/samples/client_query_batch.py
+++ b/samples/client_query_batch.py
@@ -19,7 +19,6 @@
def client_query_batch() -> "bigquery.QueryJob":
-
# [START bigquery_query_batch]
from google.cloud import bigquery
diff --git a/samples/client_query_destination_table.py b/samples/client_query_destination_table.py
index b200f1cc6..f6a622229 100644
--- a/samples/client_query_destination_table.py
+++ b/samples/client_query_destination_table.py
@@ -14,7 +14,6 @@
def client_query_destination_table(table_id: str) -> None:
-
# [START bigquery_query_destination_table]
from google.cloud import bigquery
@@ -33,8 +32,9 @@ def client_query_destination_table(table_id: str) -> None:
"""
# Start the query, passing in the extra configuration.
- query_job = client.query(sql, job_config=job_config) # Make an API request.
- query_job.result() # Wait for the job to complete.
+ client.query_and_wait(
+ sql, job_config=job_config
+ ) # Make an API request and wait for the query to finish.
print("Query results loaded to the table {}".format(table_id))
# [END bigquery_query_destination_table]
diff --git a/samples/client_query_destination_table_clustered.py b/samples/client_query_destination_table_clustered.py
index c4ab305f5..19330500a 100644
--- a/samples/client_query_destination_table_clustered.py
+++ b/samples/client_query_destination_table_clustered.py
@@ -14,7 +14,6 @@
def client_query_destination_table_clustered(table_id: str) -> None:
-
# [START bigquery_query_clustered_table]
from google.cloud import bigquery
@@ -32,8 +31,9 @@ def client_query_destination_table_clustered(table_id: str) -> None:
)
# Start the query, passing in the extra configuration.
- query_job = client.query(sql, job_config=job_config) # Make an API request.
- query_job.result() # Wait for the job to complete.
+ client.query_and_wait(
+ sql, job_config=job_config
+ ) # Make an API request and wait for job to complete.
table = client.get_table(table_id) # Make an API request.
if table.clustering_fields == cluster_fields:
diff --git a/samples/client_query_destination_table_cmek.py b/samples/client_query_destination_table_cmek.py
index 0fd44d189..040c96e22 100644
--- a/samples/client_query_destination_table_cmek.py
+++ b/samples/client_query_destination_table_cmek.py
@@ -14,7 +14,6 @@
def client_query_destination_table_cmek(table_id: str, kms_key_name: str) -> None:
-
# [START bigquery_query_destination_table_cmek]
from google.cloud import bigquery
diff --git a/samples/client_query_destination_table_legacy.py b/samples/client_query_destination_table_legacy.py
index ee45d9a01..26c3945ca 100644
--- a/samples/client_query_destination_table_legacy.py
+++ b/samples/client_query_destination_table_legacy.py
@@ -14,7 +14,6 @@
def client_query_destination_table_legacy(table_id: str) -> None:
-
# [START bigquery_query_legacy_large_results]
from google.cloud import bigquery
@@ -37,8 +36,9 @@ def client_query_destination_table_legacy(table_id: str) -> None:
"""
# Start the query, passing in the extra configuration.
- query_job = client.query(sql, job_config=job_config) # Make an API request.
- query_job.result() # Wait for the job to complete.
+ client.query_and_wait(
+ sql, job_config=job_config
+ ) # Make an API request and wait for the query to finish.
print("Query results loaded to the table {}".format(table_id))
# [END bigquery_query_legacy_large_results]
diff --git a/samples/client_query_dry_run.py b/samples/client_query_dry_run.py
index 418b43cb5..bb4893c2a 100644
--- a/samples/client_query_dry_run.py
+++ b/samples/client_query_dry_run.py
@@ -19,7 +19,6 @@
def client_query_dry_run() -> "bigquery.QueryJob":
-
# [START bigquery_query_dry_run]
from google.cloud import bigquery
diff --git a/samples/client_query_job_optional.py b/samples/client_query_job_optional.py
new file mode 100644
index 000000000..6321aea35
--- /dev/null
+++ b/samples/client_query_job_optional.py
@@ -0,0 +1,57 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def client_query_job_optional() -> None:
+ # [START bigquery_query_job_optional]
+ # This example demonstrates executing a query without requiring an associated
+ # job.
+ from google.cloud import bigquery
+ from google.cloud.bigquery.enums import JobCreationMode
+
+ # Construct a BigQuery client object, specifying that the library should
+ # avoid creating jobs when possible.
+ client = bigquery.Client(
+ default_job_creation_mode=JobCreationMode.JOB_CREATION_OPTIONAL
+ )
+
+ query = """
+ SELECT
+ name,
+ gender,
+ SUM(number) AS total
+ FROM
+ bigquery-public-data.usa_names.usa_1910_2013
+ GROUP BY
+ name, gender
+ ORDER BY
+ total DESC
+ LIMIT 10
+ """
+ # Run the query. The returned `rows` iterator can return information about
+ # how the query was executed as well as the result data.
+ rows = client.query_and_wait(query)
+
+ if rows.job_id is not None:
+ print("Query was run with job state. Job ID: {}".format(rows.job_id))
+ else:
+ print(
+ "Query was run without creating a job. Query ID: {}".format(rows.query_id)
+ )
+
+ print("The query data:")
+ for row in rows:
+ # Row values can be accessed by field name or index.
+ print("name={}, gender={}, total={}".format(row[0], row[1], row["total"]))
+ # [END bigquery_query_job_optional]
diff --git a/samples/client_query_legacy_sql.py b/samples/client_query_legacy_sql.py
index c054e1f28..1fb5b797a 100644
--- a/samples/client_query_legacy_sql.py
+++ b/samples/client_query_legacy_sql.py
@@ -14,7 +14,6 @@
def client_query_legacy_sql() -> None:
-
# [START bigquery_query_legacy]
from google.cloud import bigquery
@@ -30,10 +29,12 @@ def client_query_legacy_sql() -> None:
# Set use_legacy_sql to True to use legacy SQL syntax.
job_config = bigquery.QueryJobConfig(use_legacy_sql=True)
- # Start the query, passing in the extra configuration.
- query_job = client.query(query, job_config=job_config) # Make an API request.
+ # Start the query and waits for query job to complete, passing in the extra configuration.
+ results = client.query_and_wait(
+ query, job_config=job_config
+ ) # Make an API request.
print("The query data:")
- for row in query_job:
+ for row in results:
print(row)
# [END bigquery_query_legacy]
diff --git a/samples/client_query_relax_column.py b/samples/client_query_relax_column.py
index c96a1e7aa..26dce888f 100644
--- a/samples/client_query_relax_column.py
+++ b/samples/client_query_relax_column.py
@@ -14,7 +14,6 @@
def client_query_relax_column(table_id: str) -> None:
-
# [START bigquery_relax_column_query_append]
from google.cloud import bigquery
@@ -40,13 +39,12 @@ def client_query_relax_column(table_id: str) -> None:
)
# Start the query, passing in the extra configuration.
- query_job = client.query(
+ client.query_and_wait(
# In this example, the existing table contains 'full_name' and 'age' as
# required columns, but the query results will omit the second column.
'SELECT "Beyonce" as full_name;',
job_config=job_config,
- ) # Make an API request.
- query_job.result() # Wait for the job to complete.
+ ) # Make an API request and wait for job to complete
# Checks the updated number of required fields.
table = client.get_table(table_id) # Make an API request.
diff --git a/samples/client_query_w_array_params.py b/samples/client_query_w_array_params.py
index 669713182..e9d759f61 100644
--- a/samples/client_query_w_array_params.py
+++ b/samples/client_query_w_array_params.py
@@ -14,7 +14,6 @@
def client_query_w_array_params() -> None:
-
# [START bigquery_query_params_arrays]
from google.cloud import bigquery
@@ -36,8 +35,8 @@ def client_query_w_array_params() -> None:
bigquery.ArrayQueryParameter("states", "STRING", ["WA", "WI", "WV", "WY"]),
]
)
- query_job = client.query(query, job_config=job_config) # Make an API request.
+ rows = client.query_and_wait(query, job_config=job_config) # Make an API request.
- for row in query_job:
+ for row in rows:
print("{}: \t{}".format(row.name, row.count))
# [END bigquery_query_params_arrays]
diff --git a/samples/client_query_w_named_params.py b/samples/client_query_w_named_params.py
index f42be1dc8..b7e59a81a 100644
--- a/samples/client_query_w_named_params.py
+++ b/samples/client_query_w_named_params.py
@@ -14,7 +14,6 @@
def client_query_w_named_params() -> None:
-
# [START bigquery_query_params_named]
from google.cloud import bigquery
@@ -34,8 +33,10 @@ def client_query_w_named_params() -> None:
bigquery.ScalarQueryParameter("min_word_count", "INT64", 250),
]
)
- query_job = client.query(query, job_config=job_config) # Make an API request.
+ results = client.query_and_wait(
+ query, job_config=job_config
+ ) # Make an API request.
- for row in query_job:
+ for row in results:
print("{}: \t{}".format(row.word, row.word_count))
# [END bigquery_query_params_named]
diff --git a/samples/client_query_w_positional_params.py b/samples/client_query_w_positional_params.py
index b088b305e..24f3e0dbe 100644
--- a/samples/client_query_w_positional_params.py
+++ b/samples/client_query_w_positional_params.py
@@ -14,7 +14,6 @@
def client_query_w_positional_params() -> None:
-
# [START bigquery_query_params_positional]
from google.cloud import bigquery
@@ -36,8 +35,10 @@ def client_query_w_positional_params() -> None:
bigquery.ScalarQueryParameter(None, "INT64", 250),
]
)
- query_job = client.query(query, job_config=job_config) # Make an API request.
+ results = client.query_and_wait(
+ query, job_config=job_config
+ ) # Make an API request.
- for row in query_job:
+ for row in results:
print("{}: \t{}".format(row.word, row.word_count))
# [END bigquery_query_params_positional]
diff --git a/samples/client_query_w_struct_params.py b/samples/client_query_w_struct_params.py
index 6c5b78113..cda2fcb43 100644
--- a/samples/client_query_w_struct_params.py
+++ b/samples/client_query_w_struct_params.py
@@ -14,7 +14,6 @@
def client_query_w_struct_params() -> None:
-
# [START bigquery_query_params_structs]
from google.cloud import bigquery
@@ -31,8 +30,10 @@ def client_query_w_struct_params() -> None:
)
]
)
- query_job = client.query(query, job_config=job_config) # Make an API request.
+ results = client.query_and_wait(
+ query, job_config=job_config
+ ) # Make an API request and waits for results.
- for row in query_job:
+ for row in results:
print(row.s)
# [END bigquery_query_params_structs]
diff --git a/samples/client_query_w_timestamp_params.py b/samples/client_query_w_timestamp_params.py
index 07d64cc94..1dbb1e9b6 100644
--- a/samples/client_query_w_timestamp_params.py
+++ b/samples/client_query_w_timestamp_params.py
@@ -14,7 +14,6 @@
def client_query_w_timestamp_params() -> None:
-
# [START bigquery_query_params_timestamps]
import datetime
@@ -33,8 +32,10 @@ def client_query_w_timestamp_params() -> None:
)
]
)
- query_job = client.query(query, job_config=job_config) # Make an API request.
+ results = client.query_and_wait(
+ query, job_config=job_config
+ ) # Make an API request.
- for row in query_job:
+ for row in results:
print(row)
# [END bigquery_query_params_timestamps]
diff --git a/samples/copy_table.py b/samples/copy_table.py
index 8c6153fef..3ae6e9ebe 100644
--- a/samples/copy_table.py
+++ b/samples/copy_table.py
@@ -14,7 +14,6 @@
def copy_table(source_table_id: str, destination_table_id: str) -> None:
-
# [START bigquery_copy_table]
from google.cloud import bigquery
diff --git a/samples/copy_table_cmek.py b/samples/copy_table_cmek.py
index f2e8a90f9..f03053fab 100644
--- a/samples/copy_table_cmek.py
+++ b/samples/copy_table_cmek.py
@@ -14,7 +14,6 @@
def copy_table_cmek(dest_table_id: str, orig_table_id: str, kms_key_name: str) -> None:
-
# [START bigquery_copy_table_cmek]
from google.cloud import bigquery
diff --git a/samples/copy_table_multiple_source.py b/samples/copy_table_multiple_source.py
index 1163b1664..509b8951b 100644
--- a/samples/copy_table_multiple_source.py
+++ b/samples/copy_table_multiple_source.py
@@ -16,7 +16,6 @@
def copy_table_multiple_source(dest_table_id: str, table_ids: Sequence[str]) -> None:
-
# [START bigquery_copy_table_multiple_source]
from google.cloud import bigquery
diff --git a/samples/create_dataset.py b/samples/create_dataset.py
index dea91798d..7f645933a 100644
--- a/samples/create_dataset.py
+++ b/samples/create_dataset.py
@@ -14,7 +14,6 @@
def create_dataset(dataset_id: str) -> None:
-
# [START bigquery_create_dataset]
from google.cloud import bigquery
diff --git a/samples/create_job.py b/samples/create_job.py
index 39922f7ae..f335e2f7a 100644
--- a/samples/create_job.py
+++ b/samples/create_job.py
@@ -13,33 +13,54 @@
# limitations under the License.
import typing
+from typing import Union
if typing.TYPE_CHECKING:
- from google.cloud import bigquery
-
+ from google.cloud.bigquery import LoadJob, CopyJob, ExtractJob, QueryJob
-def create_job() -> "bigquery.QueryJob":
+def create_job() -> "Union[LoadJob, CopyJob, ExtractJob, QueryJob]":
# [START bigquery_create_job]
from google.cloud import bigquery
# Construct a BigQuery client object.
client = bigquery.Client()
- query_job = client.query(
- "SELECT country_name from `bigquery-public-data.utility_us.country_code_iso`",
- # Explicitly force job execution to be routed to a specific processing
- # location.
- location="US",
- # Specify a job configuration to set optional job resource properties.
- job_config=bigquery.QueryJobConfig(
- labels={"example-label": "example-value"}, maximum_bytes_billed=1000000
- ),
- # The client libraries automatically generate a job ID. Override the
- # generated ID with either the job_id_prefix or job_id parameters.
- job_id_prefix="code_sample_",
+ query_job = client.create_job(
+ # Specify a job configuration, providing a query
+ # and/or optional job resource properties, as needed.
+ # The job instance can be a LoadJob, CopyJob, ExtractJob, QueryJob
+ # Here, we demonstrate a "query" job.
+ # References:
+ # https://googleapis.dev/python/bigquery/latest/generated/google.cloud.bigquery.client.Client.html#google.cloud.bigquery.client.Client.create_job
+ # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job
+ #
+ # Example use cases for .create_job() include:
+ # * to retry failed jobs
+ # * to generate jobs with an experimental API property that hasn't
+ # been added to one of the manually written job configuration
+ # classes yet
+ #
+ # NOTE: unless it is necessary to create a job in this way, the
+ # preferred approach is to use one of the dedicated API calls:
+ # client.query()
+ # client.extract_table()
+ # client.copy_table()
+ # client.load_table_file(), client.load_table_from_dataframe(), etc
+ job_config={
+ "query": {
+ "query": """
+ SELECT country_name
+ FROM `bigquery-public-data.utility_us.country_code_iso`
+ LIMIT 5
+ """,
+ },
+ "labels": {"example-label": "example-value"},
+ "maximum_bytes_billed": 10000000,
+ }
) # Make an API request.
- print("Started job: {}".format(query_job.job_id))
+ print(f"Started job: {query_job.job_id}")
# [END bigquery_create_job]
+
return query_job
diff --git a/samples/create_routine.py b/samples/create_routine.py
index 96dc24210..8be1b6a99 100644
--- a/samples/create_routine.py
+++ b/samples/create_routine.py
@@ -19,7 +19,6 @@
def create_routine(routine_id: str) -> "bigquery.Routine":
-
# [START bigquery_create_routine]
from google.cloud import bigquery
diff --git a/samples/create_routine_ddl.py b/samples/create_routine_ddl.py
index 56c7cfe24..231d5a142 100644
--- a/samples/create_routine_ddl.py
+++ b/samples/create_routine_ddl.py
@@ -14,7 +14,6 @@
def create_routine_ddl(routine_id: str) -> None:
-
# [START bigquery_create_routine_ddl]
from google.cloud import bigquery
diff --git a/samples/create_table.py b/samples/create_table.py
index eaac54696..7fda370ce 100644
--- a/samples/create_table.py
+++ b/samples/create_table.py
@@ -14,7 +14,6 @@
def create_table(table_id: str) -> None:
-
# [START bigquery_create_table]
from google.cloud import bigquery
diff --git a/samples/create_table_clustered.py b/samples/create_table_clustered.py
index 1686c519a..a9ad43e59 100644
--- a/samples/create_table_clustered.py
+++ b/samples/create_table_clustered.py
@@ -19,7 +19,6 @@
def create_table_clustered(table_id: str) -> "bigquery.Table":
-
# [START bigquery_create_table_clustered]
from google.cloud import bigquery
diff --git a/samples/create_table_range_partitioned.py b/samples/create_table_range_partitioned.py
index 4dc45ed58..128ab87d9 100644
--- a/samples/create_table_range_partitioned.py
+++ b/samples/create_table_range_partitioned.py
@@ -19,7 +19,6 @@
def create_table_range_partitioned(table_id: str) -> "bigquery.Table":
-
# [START bigquery_create_table_range_partitioned]
from google.cloud import bigquery
diff --git a/samples/dataset_exists.py b/samples/dataset_exists.py
index 221899a65..784f86524 100644
--- a/samples/dataset_exists.py
+++ b/samples/dataset_exists.py
@@ -14,7 +14,6 @@
def dataset_exists(dataset_id: str) -> None:
-
# [START bigquery_dataset_exists]
from google.cloud import bigquery
from google.cloud.exceptions import NotFound
diff --git a/samples/delete_dataset.py b/samples/delete_dataset.py
index b340ed57a..9c7644db0 100644
--- a/samples/delete_dataset.py
+++ b/samples/delete_dataset.py
@@ -14,7 +14,6 @@
def delete_dataset(dataset_id: str) -> None:
-
# [START bigquery_delete_dataset]
from google.cloud import bigquery
diff --git a/samples/delete_dataset_labels.py b/samples/delete_dataset_labels.py
index ec5df09c1..d5efdf4ea 100644
--- a/samples/delete_dataset_labels.py
+++ b/samples/delete_dataset_labels.py
@@ -19,7 +19,6 @@
def delete_dataset_labels(dataset_id: str) -> "bigquery.Dataset":
-
# [START bigquery_delete_label_dataset]
from google.cloud import bigquery
diff --git a/samples/delete_routine.py b/samples/delete_routine.py
index 7362a5fea..604e7f730 100644
--- a/samples/delete_routine.py
+++ b/samples/delete_routine.py
@@ -14,7 +14,6 @@
def delete_routine(routine_id: str) -> None:
-
# [START bigquery_delete_routine]
from google.cloud import bigquery
diff --git a/samples/delete_table.py b/samples/delete_table.py
index 9e7ee170a..a8ac4617a 100644
--- a/samples/delete_table.py
+++ b/samples/delete_table.py
@@ -14,7 +14,6 @@
def delete_table(table_id: str) -> None:
-
# [START bigquery_delete_table]
from google.cloud import bigquery
diff --git a/google/__init__.py b/samples/desktopapp/__init__.py
similarity index 70%
rename from google/__init__.py
rename to samples/desktopapp/__init__.py
index 8fcc60e2b..4fbd93bb2 100644
--- a/google/__init__.py
+++ b/samples/desktopapp/__init__.py
@@ -1,6 +1,4 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright 2019 Google LLC
+# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,12 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
-try:
- import pkg_resources
-
- pkg_resources.declare_namespace(__name__)
-except ImportError:
- import pkgutil
-
- __path__ = pkgutil.extend_path(__path__, __name__)
diff --git a/.github/.OwlBot.yaml b/samples/desktopapp/conftest.py
similarity index 63%
rename from .github/.OwlBot.yaml
rename to samples/desktopapp/conftest.py
index e54051157..fdc85a852 100644
--- a/.github/.OwlBot.yaml
+++ b/samples/desktopapp/conftest.py
@@ -1,4 +1,4 @@
-# Copyright 2021 Google LLC
+# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,15 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-docker:
- image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
+from google.cloud import bigquery
+import pytest
-deep-remove-regex:
- - /owl-bot-staging
-
-deep-copy-regex:
- - source: /google/cloud/bigquery/(v.*)/.*-py/(.*)
- dest: /owl-bot-staging/$1/$2
-
-begin-after-commit-hash: f2de93abafa306b2ebadf1d10d947db8bcf2bf15
+@pytest.fixture
+def bigquery_client_patch(
+ monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client
+) -> None:
+ monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client)
diff --git a/samples/desktopapp/mypy.ini b/samples/desktopapp/mypy.ini
new file mode 100644
index 000000000..d27b6b599
--- /dev/null
+++ b/samples/desktopapp/mypy.ini
@@ -0,0 +1,8 @@
+[mypy]
+; We require type annotations in all samples.
+strict = True
+exclude = noxfile\.py
+warn_unused_configs = True
+
+[mypy-google.auth,google.oauth2,geojson,google_auth_oauthlib,IPython.*]
+ignore_missing_imports = True
diff --git a/samples/desktopapp/noxfile.py b/samples/desktopapp/noxfile.py
new file mode 100644
index 000000000..db2333e5a
--- /dev/null
+++ b/samples/desktopapp/noxfile.py
@@ -0,0 +1,293 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import glob
+import os
+from pathlib import Path
+import sys
+from typing import Callable, Dict, Optional
+
+import nox
+
+
+# WARNING - WARNING - WARNING - WARNING - WARNING
+# WARNING - WARNING - WARNING - WARNING - WARNING
+# DO NOT EDIT THIS FILE EVER!
+# WARNING - WARNING - WARNING - WARNING - WARNING
+# WARNING - WARNING - WARNING - WARNING - WARNING
+
+BLACK_VERSION = "black==23.7.0"
+ISORT_VERSION = "isort==5.10.1"
+
+# Copy `noxfile_config.py` to your directory and modify it instead.
+
+# `TEST_CONFIG` dict is a configuration hook that allows users to
+# modify the test configurations. The values here should be in sync
+# with `noxfile_config.py`. Users will copy `noxfile_config.py` into
+# their directory and modify it.
+
+TEST_CONFIG = {
+ # You can opt out from the test for specific Python versions.
+ "ignored_versions": [],
+ # Old samples are opted out of enforcing Python type hints
+ # All new samples should feature them
+ "enforce_type_hints": False,
+ # An envvar key for determining the project id to use. Change it
+ # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
+ # build specific Cloud project. You can also use your own string
+ # to use your own Cloud project.
+ "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
+ # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
+ # If you need to use a specific version of pip,
+ # change pip_version_override to the string representation
+ # of the version number, for example, "20.2.4"
+ "pip_version_override": None,
+ # A dictionary you want to inject into your test. Don't put any
+ # secrets here. These values will override predefined values.
+ "envs": {},
+}
+
+
+try:
+ # Ensure we can import noxfile_config in the project's directory.
+ sys.path.append(".")
+ from noxfile_config import TEST_CONFIG_OVERRIDE
+except ImportError as e:
+ print("No user noxfile_config found: detail: {}".format(e))
+ TEST_CONFIG_OVERRIDE = {}
+
+# Update the TEST_CONFIG with the user supplied values.
+TEST_CONFIG.update(TEST_CONFIG_OVERRIDE)
+
+
+def get_pytest_env_vars() -> Dict[str, str]:
+ """Returns a dict for pytest invocation."""
+ ret = {}
+
+ # Override the GCLOUD_PROJECT and the alias.
+ env_key = TEST_CONFIG["gcloud_project_env"]
+ # This should error out if not set.
+ ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key]
+
+ # Apply user supplied envs.
+ ret.update(TEST_CONFIG["envs"])
+ return ret
+
+
+# DO NOT EDIT - automatically generated.
+# All versions used to test samples.
+ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
+
+# Any default versions that should be ignored.
+IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
+
+TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS])
+
+INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in (
+ "True",
+ "true",
+)
+
+# Error if a python version is missing
+nox.options.error_on_missing_interpreters = True
+
+#
+# Style Checks
+#
+
+
+# Linting with flake8.
+#
+# We ignore the following rules:
+# E203: whitespace before â:â
+# E266: too many leading â#â for block comment
+# E501: line too long
+# I202: Additional newline in a section of imports
+#
+# We also need to specify the rules which are ignored by default:
+# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121']
+FLAKE8_COMMON_ARGS = [
+ "--show-source",
+ "--builtin=gettext",
+ "--max-complexity=20",
+ "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py",
+ "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202",
+ "--max-line-length=88",
+]
+
+
+@nox.session
+def lint(session: nox.sessions.Session) -> None:
+ if not TEST_CONFIG["enforce_type_hints"]:
+ session.install("flake8")
+ else:
+ session.install("flake8", "flake8-annotations")
+
+ args = FLAKE8_COMMON_ARGS + [
+ ".",
+ ]
+ session.run("flake8", *args)
+
+
+#
+# Black
+#
+
+
+@nox.session
+def blacken(session: nox.sessions.Session) -> None:
+ """Run black. Format code to uniform standard."""
+ session.install(BLACK_VERSION)
+ python_files = [path for path in os.listdir(".") if path.endswith(".py")]
+
+ session.run("black", *python_files)
+
+
+#
+# format = isort + black
+#
+
+
+@nox.session
+def format(session: nox.sessions.Session) -> None:
+ """
+ Run isort to sort imports. Then run black
+ to format code to uniform standard.
+ """
+ session.install(BLACK_VERSION, ISORT_VERSION)
+ python_files = [path for path in os.listdir(".") if path.endswith(".py")]
+
+ # Use the --fss option to sort imports using strict alphabetical order.
+ # See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections
+ session.run("isort", "--fss", *python_files)
+ session.run("black", *python_files)
+
+
+#
+# Sample Tests
+#
+
+
+PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"]
+
+
+def _session_tests(
+ session: nox.sessions.Session, post_install: Callable = None
+) -> None:
+ # check for presence of tests
+ test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob(
+ "**/test_*.py", recursive=True
+ )
+ test_list.extend(glob.glob("**/tests", recursive=True))
+
+ if len(test_list) == 0:
+ print("No tests found, skipping directory.")
+ return
+
+ if TEST_CONFIG["pip_version_override"]:
+ pip_version = TEST_CONFIG["pip_version_override"]
+ session.install(f"pip=={pip_version}")
+ """Runs py.test for a particular project."""
+ concurrent_args = []
+ if os.path.exists("requirements.txt"):
+ if os.path.exists("constraints.txt"):
+ session.install("-r", "requirements.txt", "-c", "constraints.txt")
+ else:
+ session.install("-r", "requirements.txt")
+ with open("requirements.txt") as rfile:
+ packages = rfile.read()
+
+ if os.path.exists("requirements-test.txt"):
+ if os.path.exists("constraints-test.txt"):
+ session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt")
+ else:
+ session.install("-r", "requirements-test.txt")
+ with open("requirements-test.txt") as rtfile:
+ packages += rtfile.read()
+
+ if INSTALL_LIBRARY_FROM_SOURCE:
+ session.install("-e", _get_repo_root())
+
+ if post_install:
+ post_install(session)
+
+ if "pytest-parallel" in packages:
+ concurrent_args.extend(["--workers", "auto", "--tests-per-worker", "auto"])
+ elif "pytest-xdist" in packages:
+ concurrent_args.extend(["-n", "auto"])
+
+ session.run(
+ "pytest",
+ *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args),
+ # Pytest will return 5 when no tests are collected. This can happen
+ # on travis where slow and flaky tests are excluded.
+ # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html
+ success_codes=[0, 5],
+ env=get_pytest_env_vars(),
+ )
+
+
+@nox.session(python=ALL_VERSIONS)
+def py(session: nox.sessions.Session) -> None:
+ """Runs py.test for a sample using the specified version of Python."""
+ if session.python in TESTED_VERSIONS:
+ _session_tests(session)
+ else:
+ session.skip(
+ "SKIPPED: {} tests are disabled for this sample.".format(session.python)
+ )
+
+
+#
+# Readmegen
+#
+
+
+def _get_repo_root() -> Optional[str]:
+ """Returns the root folder of the project."""
+ # Get root of this repository. Assume we don't have directories nested deeper than 10 items.
+ p = Path(os.getcwd())
+ for i in range(10):
+ if p is None:
+ break
+ if Path(p / ".git").exists():
+ return str(p)
+ # .git is not available in repos cloned via Cloud Build
+ # setup.py is always in the library's root, so use that instead
+ # https://github.com/googleapis/synthtool/issues/792
+ if Path(p / "setup.py").exists():
+ return str(p)
+ p = p.parent
+ raise Exception("Unable to detect repository root.")
+
+
+GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")])
+
+
+@nox.session
+@nox.parametrize("path", GENERATED_READMES)
+def readmegen(session: nox.sessions.Session, path: str) -> None:
+ """(Re-)generates the readme for a sample."""
+ session.install("jinja2", "pyyaml")
+ dir_ = os.path.dirname(path)
+
+ if os.path.exists(os.path.join(dir_, "requirements.txt")):
+ session.install("-r", os.path.join(dir_, "requirements.txt"))
+
+ in_file = os.path.join(dir_, "README.rst.in")
+ session.run(
+ "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file
+ )
diff --git a/samples/desktopapp/noxfile_config.py b/samples/desktopapp/noxfile_config.py
new file mode 100644
index 000000000..315bd5be8
--- /dev/null
+++ b/samples/desktopapp/noxfile_config.py
@@ -0,0 +1,40 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Default TEST_CONFIG_OVERRIDE for python repos.
+
+# You can copy this file into your directory, then it will be inported from
+# the noxfile.py.
+
+# The source of truth:
+# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py
+
+TEST_CONFIG_OVERRIDE = {
+ # You can opt out from the test for specific Python versions.
+ "ignored_versions": [
+ "2.7",
+ # TODO: Enable 3.10 once there is a geopandas/fiona release.
+ # https://github.com/Toblerity/Fiona/issues/1043
+ "3.10",
+ ],
+ # An envvar key for determining the project id to use. Change it
+ # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
+ # build specific Cloud project. You can also use your own string
+ # to use your own Cloud project.
+ "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
+ # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT",
+ # A dictionary you want to inject into your test. Don't put any
+ # secrets here. These values will override predefined values.
+ "envs": {},
+}
diff --git a/samples/desktopapp/requirements-test.txt b/samples/desktopapp/requirements-test.txt
new file mode 100644
index 000000000..31b836790
--- /dev/null
+++ b/samples/desktopapp/requirements-test.txt
@@ -0,0 +1,4 @@
+google-cloud-testutils==1.6.4
+pytest==8.4.2
+mock==5.2.0
+pytest-xdist==3.8.0
diff --git a/samples/desktopapp/requirements.txt b/samples/desktopapp/requirements.txt
new file mode 100644
index 000000000..56696f868
--- /dev/null
+++ b/samples/desktopapp/requirements.txt
@@ -0,0 +1,2 @@
+google-cloud-bigquery==3.38.0
+google-auth-oauthlib==1.2.2
diff --git a/samples/snippets/user_credentials.py b/samples/desktopapp/user_credentials.py
similarity index 96%
rename from samples/snippets/user_credentials.py
rename to samples/desktopapp/user_credentials.py
index 487a56c5f..68236d126 100644
--- a/samples/snippets/user_credentials.py
+++ b/samples/desktopapp/user_credentials.py
@@ -61,10 +61,10 @@ def main(project: str) -> None:
WHERE name = 'William'
GROUP BY name;
"""
- query_job = client.query(query_string)
+ results = client.query_and_wait(query_string)
# Print the results.
- for row in query_job.result(): # Wait for the job to complete.
+ for row in results: # Wait for the job to complete.
print("{}: {}".format(row["name"], row["total"]))
# [END bigquery_auth_user_query]
diff --git a/samples/snippets/user_credentials_test.py b/samples/desktopapp/user_credentials_test.py
similarity index 91%
rename from samples/snippets/user_credentials_test.py
rename to samples/desktopapp/user_credentials_test.py
index e2794e83b..d14798d9b 100644
--- a/samples/snippets/user_credentials_test.py
+++ b/samples/desktopapp/user_credentials_test.py
@@ -14,17 +14,16 @@
import os
from typing import Iterator, Union
+from unittest import mock
import google.auth
-import mock
import pytest
-from user_credentials import main
-
+from .user_credentials import main # type: ignore
PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"]
-MockType = Union[mock.mock.MagicMock, mock.mock.AsyncMock]
+MockType = Union[mock.MagicMock, mock.AsyncMock]
@pytest.fixture
diff --git a/samples/download_public_data.py b/samples/download_public_data.py
index a488bbbb5..cb2ebd1fd 100644
--- a/samples/download_public_data.py
+++ b/samples/download_public_data.py
@@ -14,7 +14,6 @@
def download_public_data() -> None:
-
# [START bigquery_pandas_public_data]
from google.cloud import bigquery
diff --git a/samples/download_public_data_sandbox.py b/samples/download_public_data_sandbox.py
index ce5200b4e..909a7da05 100644
--- a/samples/download_public_data_sandbox.py
+++ b/samples/download_public_data_sandbox.py
@@ -14,7 +14,6 @@
def download_public_data_sandbox() -> None:
-
# [START bigquery_pandas_public_data_sandbox]
from google.cloud import bigquery
@@ -28,7 +27,9 @@ def download_public_data_sandbox() -> None:
query_string = "SELECT * FROM `bigquery-public-data.usa_names.usa_1910_current`"
# Use the BigQuery Storage API to speed-up downloads of large tables.
- dataframe = client.query(query_string).to_dataframe(create_bqstorage_client=True)
+ dataframe = client.query_and_wait(query_string).to_dataframe(
+ create_bqstorage_client=True
+ )
print(dataframe.info())
# [END bigquery_pandas_public_data_sandbox]
diff --git a/samples/geography/insert_geojson.py b/samples/geography/insert_geojson.py
index 2db407b55..9a6f6c413 100644
--- a/samples/geography/insert_geojson.py
+++ b/samples/geography/insert_geojson.py
@@ -18,7 +18,6 @@
def insert_geojson(
override_values: Optional[Mapping[str, str]] = None
) -> Sequence[Dict[str, object]]:
-
if override_values is None:
override_values = {}
diff --git a/samples/geography/insert_wkt.py b/samples/geography/insert_wkt.py
index 25c7ee727..2923d2596 100644
--- a/samples/geography/insert_wkt.py
+++ b/samples/geography/insert_wkt.py
@@ -18,7 +18,6 @@
def insert_wkt(
override_values: Optional[Mapping[str, str]] = None
) -> Sequence[Dict[str, object]]:
-
if override_values is None:
override_values = {}
diff --git a/samples/geography/noxfile.py b/samples/geography/noxfile.py
index 29b5bc852..db2333e5a 100644
--- a/samples/geography/noxfile.py
+++ b/samples/geography/noxfile.py
@@ -18,7 +18,7 @@
import os
from pathlib import Path
import sys
-from typing import Callable, Dict, List, Optional
+from typing import Callable, Dict, Optional
import nox
@@ -29,7 +29,7 @@
# WARNING - WARNING - WARNING - WARNING - WARNING
# WARNING - WARNING - WARNING - WARNING - WARNING
-BLACK_VERSION = "black==22.3.0"
+BLACK_VERSION = "black==23.7.0"
ISORT_VERSION = "isort==5.10.1"
# Copy `noxfile_config.py` to your directory and modify it instead.
@@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]:
# DO NOT EDIT - automatically generated.
# All versions used to test samples.
-ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
+ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
# Any default versions that should be ignored.
IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
@@ -109,22 +109,6 @@ def get_pytest_env_vars() -> Dict[str, str]:
#
-def _determine_local_import_names(start_dir: str) -> List[str]:
- """Determines all import names that should be considered "local".
-
- This is used when running the linter to insure that import order is
- properly checked.
- """
- file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)]
- return [
- basename
- for basename, extension in file_ext_pairs
- if extension == ".py"
- or os.path.isdir(os.path.join(start_dir, basename))
- and basename not in ("__pycache__")
- ]
-
-
# Linting with flake8.
#
# We ignore the following rules:
@@ -139,7 +123,6 @@ def _determine_local_import_names(start_dir: str) -> List[str]:
"--show-source",
"--builtin=gettext",
"--max-complexity=20",
- "--import-order-style=google",
"--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py",
"--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202",
"--max-line-length=88",
@@ -149,14 +132,11 @@ def _determine_local_import_names(start_dir: str) -> List[str]:
@nox.session
def lint(session: nox.sessions.Session) -> None:
if not TEST_CONFIG["enforce_type_hints"]:
- session.install("flake8", "flake8-import-order")
+ session.install("flake8")
else:
- session.install("flake8", "flake8-import-order", "flake8-annotations")
+ session.install("flake8", "flake8-annotations")
- local_names = _determine_local_import_names(".")
args = FLAKE8_COMMON_ARGS + [
- "--application-import-names",
- ",".join(local_names),
".",
]
session.run("flake8", *args)
@@ -208,8 +188,10 @@ def _session_tests(
session: nox.sessions.Session, post_install: Callable = None
) -> None:
# check for presence of tests
- test_list = glob.glob("*_test.py") + glob.glob("test_*.py")
- test_list.extend(glob.glob("tests"))
+ test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob(
+ "**/test_*.py", recursive=True
+ )
+ test_list.extend(glob.glob("**/tests", recursive=True))
if len(test_list) == 0:
print("No tests found, skipping directory.")
diff --git a/samples/geography/requirements-test.txt b/samples/geography/requirements-test.txt
index fb466e509..6fb9ba310 100644
--- a/samples/geography/requirements-test.txt
+++ b/samples/geography/requirements-test.txt
@@ -1,2 +1,3 @@
-pytest==7.1.2
-mock==4.0.3
+pytest==8.4.2
+mock==5.2.0
+pytest-xdist==3.8.0
diff --git a/samples/geography/requirements.txt b/samples/geography/requirements.txt
index feca08cca..ec5c7f2af 100644
--- a/samples/geography/requirements.txt
+++ b/samples/geography/requirements.txt
@@ -1,45 +1,44 @@
-attrs==22.1.0
-certifi==2022.6.15
-cffi==1.15.1
-charset-normalizer==2.1.1
-click==8.1.3
-click-plugins==1.1.1
+attrs==25.4.0
+certifi==2025.10.5
+cffi==2.0.0
+charset-normalizer==3.4.3
+click===8.1.8; python_version == '3.9'
+click==8.3.0; python_version >= '3.10'
+click-plugins==1.1.1.2
cligj==0.7.2
-dataclasses==0.8; python_version < '3.7'
-db-dtypes==1.0.3
-Fiona==1.8.21
-geojson==2.5.0
-geopandas===0.10.2; python_version == '3.7'
-geopandas==0.11.1; python_version >= '3.8'
-google-api-core==2.8.2
-google-auth==2.11.0
-google-cloud-bigquery==3.3.2
-google-cloud-bigquery-storage==2.14.2
-google-cloud-core==2.3.2
-google-crc32c==1.3.0
-google-resumable-media==2.3.3
-googleapis-common-protos==1.56.4
-grpcio==1.47.0
-idna==3.3
-libcst==0.4.7
-munch==2.5.0
-mypy-extensions==0.4.3
-packaging==21.3
-pandas===1.3.5; python_version == '3.7'
-pandas==1.4.3; python_version >= '3.8'
-proto-plus==1.22.0
-pyarrow==9.0.0
-pyasn1==0.4.8
-pyasn1-modules==0.2.8
-pycparser==2.21
-pyparsing==3.0.9
-python-dateutil==2.8.2
-pytz==2022.2.1
-PyYAML==6.0
-requests==2.28.1
-rsa==4.9
-Shapely==1.8.4
-six==1.16.0
-typing-extensions==4.3.0
-typing-inspect==0.8.0
-urllib3==1.26.12
+db-dtypes==1.4.3
+Fiona==1.10.1
+geojson==3.2.0
+geopandas===1.0.1; python_version <= '3.9'
+geopandas==1.1.1; python_version >= '3.10'
+google-api-core==2.25.2
+google-auth==2.41.1
+google-cloud-bigquery==3.38.0
+google-cloud-bigquery-storage==2.33.1
+google-cloud-core==2.4.3
+google-crc32c==1.7.1
+google-resumable-media==2.7.2
+googleapis-common-protos==1.70.0
+grpcio==1.75.1
+idna==3.10
+munch==4.0.0
+mypy-extensions==1.1.0
+packaging==25.0
+pandas==2.3.3
+proto-plus==1.26.1
+pyarrow==21.0.0
+pyasn1==0.6.1
+pyasn1-modules==0.4.2
+pycparser==2.23
+pyparsing==3.2.5
+python-dateutil==2.9.0.post0
+pytz==2025.2
+PyYAML==6.0.3
+requests==2.32.5
+rsa==4.9.1
+Shapely===2.0.7; python_version == '3.9'
+Shapely==2.1.2; python_version >= '3.10'
+six==1.17.0
+typing-extensions==4.15.0
+typing-inspect==0.9.0
+urllib3==2.6.0
diff --git a/samples/geography/to_geodataframe.py b/samples/geography/to_geodataframe.py
index e36331f27..630d8d0bf 100644
--- a/samples/geography/to_geodataframe.py
+++ b/samples/geography/to_geodataframe.py
@@ -33,6 +33,6 @@ def get_austin_service_requests_as_geography() -> "pandas.DataFrame":
LIMIT 10
"""
- df = client.query(sql).to_geodataframe()
+ df = client.query_and_wait(sql).to_geodataframe()
# [END bigquery_query_results_geodataframe]
return df
diff --git a/samples/get_dataset.py b/samples/get_dataset.py
index 5654cbdce..1e4ad2904 100644
--- a/samples/get_dataset.py
+++ b/samples/get_dataset.py
@@ -14,7 +14,6 @@
def get_dataset(dataset_id: str) -> None:
-
# [START bigquery_get_dataset]
from google.cloud import bigquery
diff --git a/samples/get_dataset_labels.py b/samples/get_dataset_labels.py
index d97ee3c01..8dc8b9430 100644
--- a/samples/get_dataset_labels.py
+++ b/samples/get_dataset_labels.py
@@ -14,7 +14,6 @@
def get_dataset_labels(dataset_id: str) -> None:
-
# [START bigquery_get_dataset_labels]
from google.cloud import bigquery
diff --git a/samples/get_routine.py b/samples/get_routine.py
index 031d9a127..96e85acc9 100644
--- a/samples/get_routine.py
+++ b/samples/get_routine.py
@@ -19,7 +19,6 @@
def get_routine(routine_id: str) -> "bigquery.Routine":
-
# [START bigquery_get_routine]
from google.cloud import bigquery
diff --git a/samples/get_table.py b/samples/get_table.py
index 6195aaf9a..f71db7cee 100644
--- a/samples/get_table.py
+++ b/samples/get_table.py
@@ -14,7 +14,6 @@
def get_table(table_id: str) -> None:
-
# [START bigquery_get_table]
from google.cloud import bigquery
diff --git a/samples/label_dataset.py b/samples/label_dataset.py
index a59743e5d..4fcc3dcd8 100644
--- a/samples/label_dataset.py
+++ b/samples/label_dataset.py
@@ -14,7 +14,6 @@
def label_dataset(dataset_id: str) -> None:
-
# [START bigquery_label_dataset]
from google.cloud import bigquery
diff --git a/samples/list_datasets.py b/samples/list_datasets.py
index c1b6639a9..d9401e9ae 100644
--- a/samples/list_datasets.py
+++ b/samples/list_datasets.py
@@ -14,7 +14,6 @@
def list_datasets() -> None:
-
# [START bigquery_list_datasets]
from google.cloud import bigquery
diff --git a/samples/list_datasets_by_label.py b/samples/list_datasets_by_label.py
index d1f264872..3a2bef632 100644
--- a/samples/list_datasets_by_label.py
+++ b/samples/list_datasets_by_label.py
@@ -14,7 +14,6 @@
def list_datasets_by_label() -> None:
-
# [START bigquery_list_datasets_by_label]
from google.cloud import bigquery
diff --git a/samples/list_routines.py b/samples/list_routines.py
index bee7c23be..95ddd962e 100644
--- a/samples/list_routines.py
+++ b/samples/list_routines.py
@@ -14,7 +14,6 @@
def list_routines(dataset_id: str) -> None:
-
# [START bigquery_list_routines]
from google.cloud import bigquery
diff --git a/samples/list_tables.py b/samples/list_tables.py
index df846961d..17c06370d 100644
--- a/samples/list_tables.py
+++ b/samples/list_tables.py
@@ -14,7 +14,6 @@
def list_tables(dataset_id: str) -> None:
-
# [START bigquery_list_tables]
from google.cloud import bigquery
diff --git a/samples/load_table_clustered.py b/samples/load_table_clustered.py
index 87b6c76ce..749746882 100644
--- a/samples/load_table_clustered.py
+++ b/samples/load_table_clustered.py
@@ -19,7 +19,6 @@
def load_table_clustered(table_id: str) -> "bigquery.Table":
-
# [START bigquery_load_table_clustered]
from google.cloud import bigquery
diff --git a/samples/load_table_dataframe.py b/samples/load_table_dataframe.py
index db4c131f2..2c668d183 100644
--- a/samples/load_table_dataframe.py
+++ b/samples/load_table_dataframe.py
@@ -19,7 +19,6 @@
def load_table_dataframe(table_id: str) -> "bigquery.Table":
-
# [START bigquery_load_table_dataframe]
import datetime
diff --git a/samples/load_table_file.py b/samples/load_table_file.py
index 00226eb3c..838c3b105 100644
--- a/samples/load_table_file.py
+++ b/samples/load_table_file.py
@@ -19,7 +19,6 @@
def load_table_file(file_path: str, table_id: str) -> "bigquery.Table":
-
# [START bigquery_load_from_file]
from google.cloud import bigquery
diff --git a/samples/load_table_uri_autodetect_csv.py b/samples/load_table_uri_autodetect_csv.py
index c412c63f1..ca4590581 100644
--- a/samples/load_table_uri_autodetect_csv.py
+++ b/samples/load_table_uri_autodetect_csv.py
@@ -14,7 +14,6 @@
def load_table_uri_autodetect_csv(table_id: str) -> None:
-
# [START bigquery_load_table_gcs_csv_autodetect]
from google.cloud import bigquery
diff --git a/samples/load_table_uri_autodetect_json.py b/samples/load_table_uri_autodetect_json.py
index 9d0bc3f22..00e8dc1b2 100644
--- a/samples/load_table_uri_autodetect_json.py
+++ b/samples/load_table_uri_autodetect_json.py
@@ -14,7 +14,6 @@
def load_table_uri_autodetect_json(table_id: str) -> None:
-
# [START bigquery_load_table_gcs_json_autodetect]
from google.cloud import bigquery
diff --git a/samples/load_table_uri_avro.py b/samples/load_table_uri_avro.py
index e9f7c39ed..a0e8c86a6 100644
--- a/samples/load_table_uri_avro.py
+++ b/samples/load_table_uri_avro.py
@@ -14,7 +14,6 @@
def load_table_uri_avro(table_id: str) -> None:
-
# [START bigquery_load_table_gcs_avro]
from google.cloud import bigquery
diff --git a/samples/load_table_uri_cmek.py b/samples/load_table_uri_cmek.py
index 4dfc0d3b4..d54422028 100644
--- a/samples/load_table_uri_cmek.py
+++ b/samples/load_table_uri_cmek.py
@@ -14,7 +14,6 @@
def load_table_uri_cmek(table_id: str, kms_key_name: str) -> None:
-
# [START bigquery_load_table_gcs_json_cmek]
from google.cloud import bigquery
diff --git a/samples/load_table_uri_csv.py b/samples/load_table_uri_csv.py
index 9cb8c6f20..d660a2195 100644
--- a/samples/load_table_uri_csv.py
+++ b/samples/load_table_uri_csv.py
@@ -14,7 +14,6 @@
def load_table_uri_csv(table_id: str) -> None:
-
# [START bigquery_load_table_gcs_csv]
from google.cloud import bigquery
diff --git a/samples/load_table_uri_orc.py b/samples/load_table_uri_orc.py
index 7babd2630..c09129216 100644
--- a/samples/load_table_uri_orc.py
+++ b/samples/load_table_uri_orc.py
@@ -14,7 +14,6 @@
def load_table_uri_orc(table_id: str) -> None:
-
# [START bigquery_load_table_gcs_orc]
from google.cloud import bigquery
diff --git a/samples/load_table_uri_truncate_avro.py b/samples/load_table_uri_truncate_avro.py
index 51c6636fa..307a4e4de 100644
--- a/samples/load_table_uri_truncate_avro.py
+++ b/samples/load_table_uri_truncate_avro.py
@@ -14,7 +14,6 @@
def load_table_uri_truncate_avro(table_id: str) -> None:
-
# [START bigquery_load_table_gcs_avro_truncate]
import io
diff --git a/samples/load_table_uri_truncate_csv.py b/samples/load_table_uri_truncate_csv.py
index ee8b34043..4bfd306cd 100644
--- a/samples/load_table_uri_truncate_csv.py
+++ b/samples/load_table_uri_truncate_csv.py
@@ -14,7 +14,6 @@
def load_table_uri_truncate_csv(table_id: str) -> None:
-
# [START bigquery_load_table_gcs_csv_truncate]
import io
diff --git a/samples/load_table_uri_truncate_json.py b/samples/load_table_uri_truncate_json.py
index e85e0808e..a05a3eda0 100644
--- a/samples/load_table_uri_truncate_json.py
+++ b/samples/load_table_uri_truncate_json.py
@@ -14,7 +14,6 @@
def load_table_uri_truncate_json(table_id: str) -> None:
-
# [START bigquery_load_table_gcs_json_truncate]
import io
diff --git a/samples/load_table_uri_truncate_orc.py b/samples/load_table_uri_truncate_orc.py
index c730099d1..1c704b745 100644
--- a/samples/load_table_uri_truncate_orc.py
+++ b/samples/load_table_uri_truncate_orc.py
@@ -14,7 +14,6 @@
def load_table_uri_truncate_orc(table_id: str) -> None:
-
# [START bigquery_load_table_gcs_orc_truncate]
import io
diff --git a/samples/load_table_uri_truncate_parquet.py b/samples/load_table_uri_truncate_parquet.py
index 3a0a55c8a..d74f79910 100644
--- a/samples/load_table_uri_truncate_parquet.py
+++ b/samples/load_table_uri_truncate_parquet.py
@@ -14,7 +14,6 @@
def load_table_uri_truncate_parquet(table_id: str) -> None:
-
# [START bigquery_load_table_gcs_parquet_truncate]
import io
diff --git a/samples/magics/conftest.py b/samples/magics/conftest.py
index 55ea30f90..0943c535a 100644
--- a/samples/magics/conftest.py
+++ b/samples/magics/conftest.py
@@ -18,7 +18,7 @@
import pytest
if typing.TYPE_CHECKING:
- from IPython.core.interactiveshell import TerminalInteractiveShell
+ from IPython.terminal.interactiveshell import TerminalInteractiveShell
interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell")
tools = pytest.importorskip("IPython.testing.tools")
@@ -40,5 +40,7 @@ def ipython_interactive(
for the duration of the test scope.
"""
- with ipython.builtin_trap:
+
+ trap = typing.cast(typing.ContextManager, ipython.builtin_trap)
+ with trap:
yield ipython
diff --git a/samples/magics/noxfile.py b/samples/magics/noxfile.py
index 29b5bc852..db2333e5a 100644
--- a/samples/magics/noxfile.py
+++ b/samples/magics/noxfile.py
@@ -18,7 +18,7 @@
import os
from pathlib import Path
import sys
-from typing import Callable, Dict, List, Optional
+from typing import Callable, Dict, Optional
import nox
@@ -29,7 +29,7 @@
# WARNING - WARNING - WARNING - WARNING - WARNING
# WARNING - WARNING - WARNING - WARNING - WARNING
-BLACK_VERSION = "black==22.3.0"
+BLACK_VERSION = "black==23.7.0"
ISORT_VERSION = "isort==5.10.1"
# Copy `noxfile_config.py` to your directory and modify it instead.
@@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]:
# DO NOT EDIT - automatically generated.
# All versions used to test samples.
-ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
+ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
# Any default versions that should be ignored.
IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
@@ -109,22 +109,6 @@ def get_pytest_env_vars() -> Dict[str, str]:
#
-def _determine_local_import_names(start_dir: str) -> List[str]:
- """Determines all import names that should be considered "local".
-
- This is used when running the linter to insure that import order is
- properly checked.
- """
- file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)]
- return [
- basename
- for basename, extension in file_ext_pairs
- if extension == ".py"
- or os.path.isdir(os.path.join(start_dir, basename))
- and basename not in ("__pycache__")
- ]
-
-
# Linting with flake8.
#
# We ignore the following rules:
@@ -139,7 +123,6 @@ def _determine_local_import_names(start_dir: str) -> List[str]:
"--show-source",
"--builtin=gettext",
"--max-complexity=20",
- "--import-order-style=google",
"--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py",
"--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202",
"--max-line-length=88",
@@ -149,14 +132,11 @@ def _determine_local_import_names(start_dir: str) -> List[str]:
@nox.session
def lint(session: nox.sessions.Session) -> None:
if not TEST_CONFIG["enforce_type_hints"]:
- session.install("flake8", "flake8-import-order")
+ session.install("flake8")
else:
- session.install("flake8", "flake8-import-order", "flake8-annotations")
+ session.install("flake8", "flake8-annotations")
- local_names = _determine_local_import_names(".")
args = FLAKE8_COMMON_ARGS + [
- "--application-import-names",
- ",".join(local_names),
".",
]
session.run("flake8", *args)
@@ -208,8 +188,10 @@ def _session_tests(
session: nox.sessions.Session, post_install: Callable = None
) -> None:
# check for presence of tests
- test_list = glob.glob("*_test.py") + glob.glob("test_*.py")
- test_list.extend(glob.glob("tests"))
+ test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob(
+ "**/test_*.py", recursive=True
+ )
+ test_list.extend(glob.glob("**/tests", recursive=True))
if len(test_list) == 0:
print("No tests found, skipping directory.")
diff --git a/samples/magics/noxfile_config.py b/samples/magics/noxfile_config.py
new file mode 100644
index 000000000..982751b8b
--- /dev/null
+++ b/samples/magics/noxfile_config.py
@@ -0,0 +1,37 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Default TEST_CONFIG_OVERRIDE for python repos.
+
+# You can copy this file into your directory, then it will be inported from
+# the noxfile.py.
+
+# The source of truth:
+# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py
+
+TEST_CONFIG_OVERRIDE = {
+ # You can opt out from the test for specific Python versions.
+ "ignored_versions": [
+ "2.7",
+ ],
+ # An envvar key for determining the project id to use. Change it
+ # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
+ # build specific Cloud project. You can also use your own string
+ # to use your own Cloud project.
+ "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
+ # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT",
+ # A dictionary you want to inject into your test. Don't put any
+ # secrets here. These values will override predefined values.
+ "envs": {},
+}
diff --git a/samples/magics/query.py b/samples/magics/query.py
index 4d3b4418b..0ac947db0 100644
--- a/samples/magics/query.py
+++ b/samples/magics/query.py
@@ -24,7 +24,7 @@
def query() -> "pandas.DataFrame":
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ ip.extension_manager.load_extension("bigquery_magics")
sample = """
# [START bigquery_jupyter_query]
diff --git a/samples/magics/query_params_scalars.py b/samples/magics/query_params_scalars.py
index e833ef93b..74f665acb 100644
--- a/samples/magics/query_params_scalars.py
+++ b/samples/magics/query_params_scalars.py
@@ -24,7 +24,7 @@
def query_with_parameters() -> "pandas.DataFrame":
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ ip.extension_manager.load_extension("bigquery_magics")
sample = """
# [START bigquery_jupyter_query_params_scalars]
diff --git a/samples/magics/requirements-test.txt b/samples/magics/requirements-test.txt
index 856751fc1..31b836790 100644
--- a/samples/magics/requirements-test.txt
+++ b/samples/magics/requirements-test.txt
@@ -1,3 +1,4 @@
-google-cloud-testutils==1.3.3
-pytest==7.1.2
-mock==4.0.3
+google-cloud-testutils==1.6.4
+pytest==8.4.2
+mock==5.2.0
+pytest-xdist==3.8.0
diff --git a/samples/magics/requirements.txt b/samples/magics/requirements.txt
index 4c0a67a18..331e910e2 100644
--- a/samples/magics/requirements.txt
+++ b/samples/magics/requirements.txt
@@ -1,13 +1,6 @@
-db-dtypes==1.0.3
-google-cloud-bigquery-storage==2.14.2
-google-auth-oauthlib==0.5.2
-grpcio==1.47.0
-ipython===7.31.1; python_version == '3.7'
-ipython===8.0.1; python_version == '3.8'
-ipython==8.4.0; python_version >= '3.9'
-matplotlib==3.5.3
-pandas===1.3.5; python_version == '3.7'
-pandas==1.4.3; python_version >= '3.8'
-pyarrow==9.0.0
-pytz==2022.2.1
-typing-extensions==4.3.0
+bigquery_magics==0.10.3
+db-dtypes==1.4.3
+google.cloud.bigquery==3.38.0
+google-cloud-bigquery-storage==2.33.1
+ipython===8.18.1
+pandas==2.3.3
diff --git a/samples/mypy.ini b/samples/mypy.ini
index 29757e47d..3f7eb6647 100644
--- a/samples/mypy.ini
+++ b/samples/mypy.ini
@@ -2,7 +2,6 @@
# Should match DEFAULT_PYTHON_VERSION from root noxfile.py
python_version = 3.8
exclude = noxfile\.py
-strict = True
warn_unused_configs = True
[mypy-google.auth,google.oauth2,geojson,google_auth_oauthlib,IPython.*]
diff --git a/google/cloud/__init__.py b/samples/notebooks/__init__.py
similarity index 69%
rename from google/cloud/__init__.py
rename to samples/notebooks/__init__.py
index 8e60d8439..4fbd93bb2 100644
--- a/google/cloud/__init__.py
+++ b/samples/notebooks/__init__.py
@@ -1,6 +1,4 @@
-# -*- coding: utf-8 -*-
-#
-# Copyright 2019 Google LLC
+# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -13,12 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
-try:
- import pkg_resources
-
- pkg_resources.declare_namespace(__name__)
-except ImportError:
- import pkgutil
-
- __path__ = pkgutil.extend_path(__path__, __name__) # type: ignore
diff --git a/.github/.OwlBot.lock.yaml b/samples/notebooks/conftest.py
similarity index 60%
rename from .github/.OwlBot.lock.yaml
rename to samples/notebooks/conftest.py
index c6acdf3f9..fdc85a852 100644
--- a/.github/.OwlBot.lock.yaml
+++ b/samples/notebooks/conftest.py
@@ -1,17 +1,23 @@
-# Copyright 2022 Google LLC
+# Copyright 2020 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
-# http://www.apache.org/licenses/LICENSE-2.0
+# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-docker:
- image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest
- digest: sha256:562802bfac02e012a6ac34eda282f81d06e77326b82a32d7bbb1369ff552b387
-# created: 2022-08-24T17:07:22.006876712Z
+
+from google.cloud import bigquery
+import pytest
+
+
+@pytest.fixture
+def bigquery_client_patch(
+ monkeypatch: pytest.MonkeyPatch, bigquery_client: bigquery.Client
+) -> None:
+ monkeypatch.setattr(bigquery, "Client", lambda: bigquery_client)
diff --git a/samples/snippets/jupyter_tutorial_test.py b/samples/notebooks/jupyter_tutorial_test.py
similarity index 97%
rename from samples/snippets/jupyter_tutorial_test.py
rename to samples/notebooks/jupyter_tutorial_test.py
index 9d42a4eda..1861a822f 100644
--- a/samples/snippets/jupyter_tutorial_test.py
+++ b/samples/notebooks/jupyter_tutorial_test.py
@@ -45,7 +45,9 @@ def ipython_interactive(
for the duration of the test scope.
"""
- with ipython.builtin_trap:
+
+ trap = typing.cast(typing.ContextManager, ipython.builtin_trap)
+ with trap:
yield ipython
@@ -60,7 +62,7 @@ def _strip_region_tags(sample_text: str) -> str:
def test_jupyter_tutorial(ipython: "TerminalInteractiveShell") -> None:
matplotlib.use("agg")
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ ip.extension_manager.load_extension("bigquery_magics")
sample = """
# [START bigquery_jupyter_magic_gender_by_year]
diff --git a/samples/notebooks/mypy.ini b/samples/notebooks/mypy.ini
new file mode 100644
index 000000000..dea60237b
--- /dev/null
+++ b/samples/notebooks/mypy.ini
@@ -0,0 +1,8 @@
+[mypy]
+; We require type annotations in all samples.
+strict = True
+exclude = noxfile\.py
+warn_unused_configs = True
+
+[mypy-IPython.*,nox,noxfile_config,pandas]
+ignore_missing_imports = True
\ No newline at end of file
diff --git a/samples/notebooks/noxfile.py b/samples/notebooks/noxfile.py
new file mode 100644
index 000000000..db2333e5a
--- /dev/null
+++ b/samples/notebooks/noxfile.py
@@ -0,0 +1,293 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import glob
+import os
+from pathlib import Path
+import sys
+from typing import Callable, Dict, Optional
+
+import nox
+
+
+# WARNING - WARNING - WARNING - WARNING - WARNING
+# WARNING - WARNING - WARNING - WARNING - WARNING
+# DO NOT EDIT THIS FILE EVER!
+# WARNING - WARNING - WARNING - WARNING - WARNING
+# WARNING - WARNING - WARNING - WARNING - WARNING
+
+BLACK_VERSION = "black==23.7.0"
+ISORT_VERSION = "isort==5.10.1"
+
+# Copy `noxfile_config.py` to your directory and modify it instead.
+
+# `TEST_CONFIG` dict is a configuration hook that allows users to
+# modify the test configurations. The values here should be in sync
+# with `noxfile_config.py`. Users will copy `noxfile_config.py` into
+# their directory and modify it.
+
+TEST_CONFIG = {
+ # You can opt out from the test for specific Python versions.
+ "ignored_versions": [],
+ # Old samples are opted out of enforcing Python type hints
+ # All new samples should feature them
+ "enforce_type_hints": False,
+ # An envvar key for determining the project id to use. Change it
+ # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
+ # build specific Cloud project. You can also use your own string
+ # to use your own Cloud project.
+ "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
+ # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
+ # If you need to use a specific version of pip,
+ # change pip_version_override to the string representation
+ # of the version number, for example, "20.2.4"
+ "pip_version_override": None,
+ # A dictionary you want to inject into your test. Don't put any
+ # secrets here. These values will override predefined values.
+ "envs": {},
+}
+
+
+try:
+ # Ensure we can import noxfile_config in the project's directory.
+ sys.path.append(".")
+ from noxfile_config import TEST_CONFIG_OVERRIDE
+except ImportError as e:
+ print("No user noxfile_config found: detail: {}".format(e))
+ TEST_CONFIG_OVERRIDE = {}
+
+# Update the TEST_CONFIG with the user supplied values.
+TEST_CONFIG.update(TEST_CONFIG_OVERRIDE)
+
+
+def get_pytest_env_vars() -> Dict[str, str]:
+ """Returns a dict for pytest invocation."""
+ ret = {}
+
+ # Override the GCLOUD_PROJECT and the alias.
+ env_key = TEST_CONFIG["gcloud_project_env"]
+ # This should error out if not set.
+ ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key]
+
+ # Apply user supplied envs.
+ ret.update(TEST_CONFIG["envs"])
+ return ret
+
+
+# DO NOT EDIT - automatically generated.
+# All versions used to test samples.
+ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
+
+# Any default versions that should be ignored.
+IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
+
+TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS])
+
+INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in (
+ "True",
+ "true",
+)
+
+# Error if a python version is missing
+nox.options.error_on_missing_interpreters = True
+
+#
+# Style Checks
+#
+
+
+# Linting with flake8.
+#
+# We ignore the following rules:
+# E203: whitespace before â:â
+# E266: too many leading â#â for block comment
+# E501: line too long
+# I202: Additional newline in a section of imports
+#
+# We also need to specify the rules which are ignored by default:
+# ['E226', 'W504', 'E126', 'E123', 'W503', 'E24', 'E704', 'E121']
+FLAKE8_COMMON_ARGS = [
+ "--show-source",
+ "--builtin=gettext",
+ "--max-complexity=20",
+ "--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py",
+ "--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202",
+ "--max-line-length=88",
+]
+
+
+@nox.session
+def lint(session: nox.sessions.Session) -> None:
+ if not TEST_CONFIG["enforce_type_hints"]:
+ session.install("flake8")
+ else:
+ session.install("flake8", "flake8-annotations")
+
+ args = FLAKE8_COMMON_ARGS + [
+ ".",
+ ]
+ session.run("flake8", *args)
+
+
+#
+# Black
+#
+
+
+@nox.session
+def blacken(session: nox.sessions.Session) -> None:
+ """Run black. Format code to uniform standard."""
+ session.install(BLACK_VERSION)
+ python_files = [path for path in os.listdir(".") if path.endswith(".py")]
+
+ session.run("black", *python_files)
+
+
+#
+# format = isort + black
+#
+
+
+@nox.session
+def format(session: nox.sessions.Session) -> None:
+ """
+ Run isort to sort imports. Then run black
+ to format code to uniform standard.
+ """
+ session.install(BLACK_VERSION, ISORT_VERSION)
+ python_files = [path for path in os.listdir(".") if path.endswith(".py")]
+
+ # Use the --fss option to sort imports using strict alphabetical order.
+ # See https://pycqa.github.io/isort/docs/configuration/options.html#force-sort-within-sections
+ session.run("isort", "--fss", *python_files)
+ session.run("black", *python_files)
+
+
+#
+# Sample Tests
+#
+
+
+PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"]
+
+
+def _session_tests(
+ session: nox.sessions.Session, post_install: Callable = None
+) -> None:
+ # check for presence of tests
+ test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob(
+ "**/test_*.py", recursive=True
+ )
+ test_list.extend(glob.glob("**/tests", recursive=True))
+
+ if len(test_list) == 0:
+ print("No tests found, skipping directory.")
+ return
+
+ if TEST_CONFIG["pip_version_override"]:
+ pip_version = TEST_CONFIG["pip_version_override"]
+ session.install(f"pip=={pip_version}")
+ """Runs py.test for a particular project."""
+ concurrent_args = []
+ if os.path.exists("requirements.txt"):
+ if os.path.exists("constraints.txt"):
+ session.install("-r", "requirements.txt", "-c", "constraints.txt")
+ else:
+ session.install("-r", "requirements.txt")
+ with open("requirements.txt") as rfile:
+ packages = rfile.read()
+
+ if os.path.exists("requirements-test.txt"):
+ if os.path.exists("constraints-test.txt"):
+ session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt")
+ else:
+ session.install("-r", "requirements-test.txt")
+ with open("requirements-test.txt") as rtfile:
+ packages += rtfile.read()
+
+ if INSTALL_LIBRARY_FROM_SOURCE:
+ session.install("-e", _get_repo_root())
+
+ if post_install:
+ post_install(session)
+
+ if "pytest-parallel" in packages:
+ concurrent_args.extend(["--workers", "auto", "--tests-per-worker", "auto"])
+ elif "pytest-xdist" in packages:
+ concurrent_args.extend(["-n", "auto"])
+
+ session.run(
+ "pytest",
+ *(PYTEST_COMMON_ARGS + session.posargs + concurrent_args),
+ # Pytest will return 5 when no tests are collected. This can happen
+ # on travis where slow and flaky tests are excluded.
+ # See http://doc.pytest.org/en/latest/_modules/_pytest/main.html
+ success_codes=[0, 5],
+ env=get_pytest_env_vars(),
+ )
+
+
+@nox.session(python=ALL_VERSIONS)
+def py(session: nox.sessions.Session) -> None:
+ """Runs py.test for a sample using the specified version of Python."""
+ if session.python in TESTED_VERSIONS:
+ _session_tests(session)
+ else:
+ session.skip(
+ "SKIPPED: {} tests are disabled for this sample.".format(session.python)
+ )
+
+
+#
+# Readmegen
+#
+
+
+def _get_repo_root() -> Optional[str]:
+ """Returns the root folder of the project."""
+ # Get root of this repository. Assume we don't have directories nested deeper than 10 items.
+ p = Path(os.getcwd())
+ for i in range(10):
+ if p is None:
+ break
+ if Path(p / ".git").exists():
+ return str(p)
+ # .git is not available in repos cloned via Cloud Build
+ # setup.py is always in the library's root, so use that instead
+ # https://github.com/googleapis/synthtool/issues/792
+ if Path(p / "setup.py").exists():
+ return str(p)
+ p = p.parent
+ raise Exception("Unable to detect repository root.")
+
+
+GENERATED_READMES = sorted([x for x in Path(".").rglob("*.rst.in")])
+
+
+@nox.session
+@nox.parametrize("path", GENERATED_READMES)
+def readmegen(session: nox.sessions.Session, path: str) -> None:
+ """(Re-)generates the readme for a sample."""
+ session.install("jinja2", "pyyaml")
+ dir_ = os.path.dirname(path)
+
+ if os.path.exists(os.path.join(dir_, "requirements.txt")):
+ session.install("-r", os.path.join(dir_, "requirements.txt"))
+
+ in_file = os.path.join(dir_, "README.rst.in")
+ session.run(
+ "python", _get_repo_root() + "/scripts/readme-gen/readme_gen.py", in_file
+ )
diff --git a/samples/notebooks/noxfile_config.py b/samples/notebooks/noxfile_config.py
new file mode 100644
index 000000000..315bd5be8
--- /dev/null
+++ b/samples/notebooks/noxfile_config.py
@@ -0,0 +1,40 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Default TEST_CONFIG_OVERRIDE for python repos.
+
+# You can copy this file into your directory, then it will be inported from
+# the noxfile.py.
+
+# The source of truth:
+# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py
+
+TEST_CONFIG_OVERRIDE = {
+ # You can opt out from the test for specific Python versions.
+ "ignored_versions": [
+ "2.7",
+ # TODO: Enable 3.10 once there is a geopandas/fiona release.
+ # https://github.com/Toblerity/Fiona/issues/1043
+ "3.10",
+ ],
+ # An envvar key for determining the project id to use. Change it
+ # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
+ # build specific Cloud project. You can also use your own string
+ # to use your own Cloud project.
+ "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
+ # "gcloud_project_env": "BUILD_SPECIFIC_GCLOUD_PROJECT",
+ # A dictionary you want to inject into your test. Don't put any
+ # secrets here. These values will override predefined values.
+ "envs": {},
+}
diff --git a/samples/notebooks/requirements-test.txt b/samples/notebooks/requirements-test.txt
new file mode 100644
index 000000000..31b836790
--- /dev/null
+++ b/samples/notebooks/requirements-test.txt
@@ -0,0 +1,4 @@
+google-cloud-testutils==1.6.4
+pytest==8.4.2
+mock==5.2.0
+pytest-xdist==3.8.0
diff --git a/samples/notebooks/requirements.txt b/samples/notebooks/requirements.txt
new file mode 100644
index 000000000..ef509734a
--- /dev/null
+++ b/samples/notebooks/requirements.txt
@@ -0,0 +1,9 @@
+bigquery-magics==0.10.3
+db-dtypes==1.4.3
+google-cloud-bigquery==3.38.0
+google-cloud-bigquery-storage==2.33.1
+ipython===8.18.1; python_version == '3.9'
+ipython==9.6.0; python_version >= '3.10'
+matplotlib===3.9.2; python_version == '3.9'
+matplotlib==3.10.6; python_version >= '3.10'
+pandas==2.3.3
diff --git a/samples/query_external_gcs_temporary_table.py b/samples/query_external_gcs_temporary_table.py
index 9bcb86aab..e0bc8438f 100644
--- a/samples/query_external_gcs_temporary_table.py
+++ b/samples/query_external_gcs_temporary_table.py
@@ -14,7 +14,6 @@
def query_external_gcs_temporary_table() -> None:
-
# [START bigquery_query_external_gcs_temp]
from google.cloud import bigquery
@@ -39,8 +38,8 @@ def query_external_gcs_temporary_table() -> None:
# Example query to find states starting with 'W'.
sql = 'SELECT * FROM `{}` WHERE name LIKE "W%"'.format(table_id)
- query_job = client.query(sql, job_config=job_config) # Make an API request.
+ results = client.query_and_wait(sql, job_config=job_config) # Make an API request.
- w_states = list(query_job) # Wait for the job to complete.
+ w_states = list(results) # Wait for the job to complete.
print("There are {} states with names starting with W.".format(len(w_states)))
# [END bigquery_query_external_gcs_temp]
diff --git a/samples/query_external_sheets_permanent_table.py b/samples/query_external_sheets_permanent_table.py
index a5855e66a..0f8ddbae1 100644
--- a/samples/query_external_sheets_permanent_table.py
+++ b/samples/query_external_sheets_permanent_table.py
@@ -14,7 +14,6 @@
def query_external_sheets_permanent_table(dataset_id: str) -> None:
-
# [START bigquery_query_external_sheets_perm]
from google.cloud import bigquery
import google.auth
@@ -70,10 +69,10 @@ def query_external_sheets_permanent_table(dataset_id: str) -> None:
# Example query to find states starting with "W".
sql = 'SELECT * FROM `{}.{}` WHERE name LIKE "W%"'.format(dataset_id, table_id)
- query_job = client.query(sql) # Make an API request.
+ results = client.query_and_wait(sql) # Make an API request.
# Wait for the query to complete.
- w_states = list(query_job)
+ w_states = list(results)
print(
"There are {} states with names starting with W in the selected range.".format(
len(w_states)
diff --git a/samples/query_external_sheets_temporary_table.py b/samples/query_external_sheets_temporary_table.py
index 944d3b826..876e4cc1a 100644
--- a/samples/query_external_sheets_temporary_table.py
+++ b/samples/query_external_sheets_temporary_table.py
@@ -14,7 +14,6 @@
def query_external_sheets_temporary_table() -> None:
-
# [START bigquery_query_external_sheets_temp]
# [START bigquery_auth_drive_scope]
from google.cloud import bigquery
diff --git a/samples/query_no_cache.py b/samples/query_no_cache.py
index f39c01dbc..b942e5010 100644
--- a/samples/query_no_cache.py
+++ b/samples/query_no_cache.py
@@ -14,7 +14,6 @@
def query_no_cache() -> None:
-
# [START bigquery_query_no_cache]
from google.cloud import bigquery
@@ -27,8 +26,8 @@ def query_no_cache() -> None:
FROM `bigquery-public-data.samples.shakespeare`
GROUP BY corpus;
"""
- query_job = client.query(sql, job_config=job_config) # Make an API request.
+ results = client.query_and_wait(sql, job_config=job_config) # Make an API request.
- for row in query_job:
+ for row in results:
print(row)
# [END bigquery_query_no_cache]
diff --git a/samples/query_pagination.py b/samples/query_pagination.py
index 2e1654050..7ccaecff7 100644
--- a/samples/query_pagination.py
+++ b/samples/query_pagination.py
@@ -14,7 +14,6 @@
def query_pagination() -> None:
-
# [START bigquery_query_pagination]
from google.cloud import bigquery
diff --git a/samples/query_to_arrow.py b/samples/query_to_arrow.py
index 157a93638..d359bb79a 100644
--- a/samples/query_to_arrow.py
+++ b/samples/query_to_arrow.py
@@ -19,7 +19,6 @@
def query_to_arrow() -> "pyarrow.Table":
-
# [START bigquery_query_to_arrow]
from google.cloud import bigquery
@@ -45,8 +44,8 @@ def query_to_arrow() -> "pyarrow.Table":
FROM races r
CROSS JOIN UNNEST(r.participants) as participant;
"""
- query_job = client.query(sql)
- arrow_table = query_job.to_arrow() # Make an API request.
+ results = client.query_and_wait(sql)
+ arrow_table = results.to_arrow() # Make an API request.
print(
"Downloaded {} rows, {} columns.".format(
diff --git a/samples/snippets/authenticate_service_account_test.py b/samples/snippets/authenticate_service_account_test.py
index 4b5711f80..fbdd2d064 100644
--- a/samples/snippets/authenticate_service_account_test.py
+++ b/samples/snippets/authenticate_service_account_test.py
@@ -17,7 +17,7 @@
import google.auth
-import authenticate_service_account
+import authenticate_service_account # type: ignore
if typing.TYPE_CHECKING:
import pytest
diff --git a/samples/snippets/authorized_view_tutorial.py b/samples/snippets/authorized_view_tutorial.py
index bfb61bc38..f52170bc6 100644
--- a/samples/snippets/authorized_view_tutorial.py
+++ b/samples/snippets/authorized_view_tutorial.py
@@ -62,15 +62,13 @@ def run_authorized_view_tutorial(
FROM `bigquery-public-data.github_repos.commits`
LIMIT 1000
"""
- query_job = client.query(
+ client.query_and_wait(
sql,
# Location must match that of the dataset(s) referenced in the query
# and of the destination table.
location="US",
job_config=job_config,
- ) # API request - starts the query
-
- query_job.result() # Waits for the query to finish
+ ) # API request - starts the query and waits for query to finish
# [END bigquery_avt_create_source_table]
# Create a separate dataset to store your view
diff --git a/samples/snippets/authorized_view_tutorial_test.py b/samples/snippets/authorized_view_tutorial_test.py
index cae870486..04f6312d3 100644
--- a/samples/snippets/authorized_view_tutorial_test.py
+++ b/samples/snippets/authorized_view_tutorial_test.py
@@ -13,12 +13,12 @@
# limitations under the License.
from typing import Iterator, List
-import uuid
from google.cloud import bigquery
import pytest
-import authorized_view_tutorial
+import authorized_view_tutorial # type: ignore
+from conftest import prefixer # type: ignore
@pytest.fixture(scope="module")
@@ -38,12 +38,8 @@ def test_authorized_view_tutorial(
client: bigquery.Client, datasets_to_delete: List[str]
) -> None:
override_values = {
- "source_dataset_id": "github_source_data_{}".format(
- str(uuid.uuid4()).replace("-", "_")
- ),
- "shared_dataset_id": "shared_views_{}".format(
- str(uuid.uuid4()).replace("-", "_")
- ),
+ "source_dataset_id": f"{prefixer.create_prefix()}_authorized_view_tutorial",
+ "shared_dataset_id": f"{prefixer.create_prefix()}_authorized_view_tutorial_shared_views",
}
source_dataset_ref = "{}.{}".format(
client.project, override_values["source_dataset_id"]
diff --git a/samples/client_query.py b/samples/snippets/client_query.py
similarity index 93%
rename from samples/client_query.py
rename to samples/snippets/client_query.py
index 091d3f98b..ccae2e8bd 100644
--- a/samples/client_query.py
+++ b/samples/snippets/client_query.py
@@ -14,9 +14,7 @@
def client_query() -> None:
-
# [START bigquery_query]
-
from google.cloud import bigquery
# Construct a BigQuery client object.
@@ -30,10 +28,10 @@ def client_query() -> None:
ORDER BY total_people DESC
LIMIT 20
"""
- query_job = client.query(query) # Make an API request.
+ rows = client.query_and_wait(query) # Make an API request.
print("The query data:")
- for row in query_job:
+ for row in rows:
# Row values can be accessed by field name or index.
print("name={}, count={}".format(row[0], row["total_people"]))
# [END bigquery_query]
diff --git a/samples/snippets/client_query_test.py b/samples/snippets/client_query_test.py
new file mode 100644
index 000000000..1bc83a230
--- /dev/null
+++ b/samples/snippets/client_query_test.py
@@ -0,0 +1,38 @@
+# Copyright 2019 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import client_query # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_client_query(capsys: "pytest.CaptureFixture[str]") -> None:
+ client_query.client_query()
+ out, _ = capsys.readouterr()
+ assert "The query data:" in out
+ assert "name=James, count=272793" in out
+
+
+def test_client_query_job_optional(
+ capsys: "pytest.CaptureFixture[str]", monkeypatch: "pytest.MonkeyPatch"
+) -> None:
+ monkeypatch.setenv("QUERY_PREVIEW_ENABLED", "true")
+
+ client_query.client_query()
+ out, _ = capsys.readouterr()
+ assert "The query data:" in out
+ assert "name=James, count=272793" in out
diff --git a/samples/snippets/create_iam_policy_test.py b/samples/snippets/create_iam_policy_test.py
new file mode 100644
index 000000000..c41ced2cd
--- /dev/null
+++ b/samples/snippets/create_iam_policy_test.py
@@ -0,0 +1,44 @@
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def test_create_iam_policy(table_id: str):
+ your_table_id = table_id
+
+ # [START bigquery_create_iam_policy]
+ from google.cloud import bigquery
+
+ bqclient = bigquery.Client()
+
+ policy = bqclient.get_iam_policy(
+ your_table_id, # e.g. "project.dataset.table"
+ )
+
+ analyst_email = "example-analyst-group@google.com"
+ binding = {
+ "role": "roles/bigquery.dataViewer",
+ "members": {f"group:{analyst_email}"},
+ }
+ policy.bindings.append(binding)
+
+ updated_policy = bqclient.set_iam_policy(
+ your_table_id, # e.g. "project.dataset.table"
+ policy,
+ )
+
+ for binding in updated_policy.bindings:
+ print(repr(binding))
+ # [END bigquery_create_iam_policy]
+
+ assert binding in updated_policy.bindings
diff --git a/samples/snippets/create_partitioned_table.py b/samples/snippets/create_partitioned_table.py
new file mode 100644
index 000000000..0277d7d0f
--- /dev/null
+++ b/samples/snippets/create_partitioned_table.py
@@ -0,0 +1,45 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def create_partitioned_table(table_id):
+ your_fully_qualified_table_id = table_id
+
+ # [START bigquery_create_table_partitioned]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ # Use format "your-project.your_dataset.your_table_name" for table_id
+ table_id = your_fully_qualified_table_id
+ schema = [
+ bigquery.SchemaField("name", "STRING"),
+ bigquery.SchemaField("post_abbr", "STRING"),
+ bigquery.SchemaField("date", "DATE"),
+ ]
+ table = bigquery.Table(table_id, schema=schema)
+ table.time_partitioning = bigquery.TimePartitioning(
+ type_=bigquery.TimePartitioningType.DAY,
+ field="date", # name of column to use for partitioning
+ expiration_ms=1000 * 60 * 60 * 24 * 90,
+ ) # 90 days
+
+ table = client.create_table(table)
+
+ print(
+ f"Created table {table.project}.{table.dataset_id}.{table.table_id}, "
+ f"partitioned on column {table.time_partitioning.field}."
+ )
+ # [END bigquery_create_table_partitioned]
+ return table
diff --git a/samples/snippets/create_partitioned_table_test.py b/samples/snippets/create_partitioned_table_test.py
new file mode 100644
index 000000000..e4d7ec20e
--- /dev/null
+++ b/samples/snippets/create_partitioned_table_test.py
@@ -0,0 +1,34 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import create_partitioned_table # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_create_partitioned_table(
+ capsys: "pytest.CaptureFixture[str]",
+ random_table_id: str,
+) -> None:
+ table = create_partitioned_table.create_partitioned_table(random_table_id)
+
+ out, _ = capsys.readouterr()
+ assert "Created" in out
+ assert random_table_id in out
+
+ assert table.time_partitioning.type_ == "DAY"
+ assert table.time_partitioning.field == "date"
diff --git a/samples/snippets/create_table_cmek.py b/samples/snippets/create_table_cmek.py
new file mode 100644
index 000000000..011c56d4e
--- /dev/null
+++ b/samples/snippets/create_table_cmek.py
@@ -0,0 +1,46 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def create_table_cmek(table_id: str, kms_key_name: str) -> None:
+ orig_table_id = table_id
+ orig_key_name = kms_key_name
+ # [START bigquery_create_table_cmek]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ # TODO(dev): Change table_id to the full name of the table you want to create.
+ table_id = "your-project.your_dataset.your_table_name"
+
+ # Set the encryption key to use for the table.
+ # TODO: Replace this key with a key you have created in Cloud KMS.
+ kms_key_name = "projects/your-project/locations/us/keyRings/test/cryptoKeys/test"
+
+ # [END bigquery_create_table_cmek]
+
+ table_id = orig_table_id
+ kms_key_name = orig_key_name
+
+ # [START bigquery_create_table_cmek]
+ table = bigquery.Table(table_id)
+ table.encryption_configuration = bigquery.EncryptionConfiguration(
+ kms_key_name=kms_key_name
+ )
+ table = client.create_table(table) # API request
+
+ print(f"Created {table_id}.")
+ print(f"Key: {table.encryption_configuration.kms_key_name}.")
+
+ # [END bigquery_create_table_cmek]
diff --git a/samples/snippets/create_table_cmek_test.py b/samples/snippets/create_table_cmek_test.py
new file mode 100644
index 000000000..e8626b84c
--- /dev/null
+++ b/samples/snippets/create_table_cmek_test.py
@@ -0,0 +1,36 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import create_table_cmek # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_create_table(
+ capsys: "pytest.CaptureFixture[str]",
+ random_table_id: str,
+) -> None:
+ kms_key_name = (
+ "projects/cloud-samples-tests/locations/us/keyRings/test/cryptoKeys/test"
+ )
+
+ create_table_cmek.create_table_cmek(random_table_id, kms_key_name)
+
+ out, _ = capsys.readouterr()
+ assert "Created" in out
+ assert random_table_id in out
+ assert kms_key_name in out
diff --git a/samples/snippets/create_table_external_data_configuration.py b/samples/snippets/create_table_external_data_configuration.py
new file mode 100644
index 000000000..cbb15d40a
--- /dev/null
+++ b/samples/snippets/create_table_external_data_configuration.py
@@ -0,0 +1,70 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def create_table_external_data_configuration(
+ table_id: str,
+) -> None:
+ """Create a table using an external data source"""
+ orig_table_id = table_id
+ # [START bigquery_query_external_gcs_perm]
+ # [START bigquery_create_table_external_data_configuration]
+ # [START bigquery_create_external_table_definition]
+ from google.cloud import bigquery
+
+ # Construct a BigQuery client object.
+ client = bigquery.Client()
+
+ # TODO(developer): Set table_id to the ID of the table to create.
+ table_id = "your-project.your_dataset.your_table_name"
+ # [END bigquery_create_table_external_data_configuration]
+ # [END bigquery_query_external_gcs_perm]
+ table_id = orig_table_id
+ # [START bigquery_query_external_gcs_perm]
+ # [START bigquery_create_table_external_data_configuration]
+
+ # TODO(developer): Set the external source format of your table.
+ # Note that the set of allowed values for external data sources is
+ # different than the set used for loading data (see :class:`~google.cloud.bigquery.job.SourceFormat`).
+ external_source_format = "AVRO"
+
+ # TODO(developer): Set the source_uris to point to your data in Google Cloud
+ source_uris = [
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro",
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro",
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro",
+ ]
+
+ # Create ExternalConfig object with external source format
+ external_config = bigquery.ExternalConfig(external_source_format)
+ # Set source_uris that point to your data in Google Cloud
+ external_config.source_uris = source_uris
+
+ # TODO(developer) You have the option to set a reference_file_schema_uri, which points to
+ # a reference file for the table schema
+ reference_file_schema_uri = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro"
+
+ external_config.reference_file_schema_uri = reference_file_schema_uri
+ # [END bigquery_create_external_table_definition]
+
+ table = bigquery.Table(table_id)
+ # Set the external data configuration of the table
+ table.external_data_configuration = external_config
+ table = client.create_table(table) # Make an API request.
+
+ print(
+ f"Created table with external source format {table.external_data_configuration.source_format}"
+ )
+ # [END bigquery_create_table_external_data_configuration]
+ # [END bigquery_query_external_gcs_perm]
diff --git a/samples/snippets/create_table_external_data_configuration_test.py b/samples/snippets/create_table_external_data_configuration_test.py
new file mode 100644
index 000000000..bf81a75f9
--- /dev/null
+++ b/samples/snippets/create_table_external_data_configuration_test.py
@@ -0,0 +1,31 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import create_table_external_data_configuration # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_create_table_external_data_configuration(
+ capsys: "pytest.CaptureFixture[str]",
+ random_table_id: str,
+) -> None:
+ create_table_external_data_configuration.create_table_external_data_configuration(
+ random_table_id
+ )
+ out, _ = capsys.readouterr()
+ assert "Created table with external source format AVRO" in out
diff --git a/samples/snippets/create_table_external_hive_partitioned_test.py b/samples/snippets/create_table_external_hive_partitioned_test.py
index 37deb8b12..5b8cbe1c3 100644
--- a/samples/snippets/create_table_external_hive_partitioned_test.py
+++ b/samples/snippets/create_table_external_hive_partitioned_test.py
@@ -14,7 +14,7 @@
import typing
-import create_table_external_hive_partitioned
+import create_table_external_hive_partitioned # type: ignore
if typing.TYPE_CHECKING:
import pytest
diff --git a/samples/snippets/create_table_schema_from_json.py b/samples/snippets/create_table_schema_from_json.py
new file mode 100644
index 000000000..b866e2ebe
--- /dev/null
+++ b/samples/snippets/create_table_schema_from_json.py
@@ -0,0 +1,42 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pathlib
+
+
+def create_table(table_id: str) -> None:
+ orig_table_id = table_id
+ current_directory = pathlib.Path(__file__).parent
+ orig_schema_path = str(current_directory / "schema.json")
+ # [START bigquery_schema_file_create]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ # TODO(dev): Change table_id to the full name of the table you want to create.
+ table_id = "your-project.your_dataset.your_table_name"
+ # TODO(dev): Change schema_path variable to the path of your schema file.
+ schema_path = "path/to/schema.json"
+ # [END bigquery_schema_file_create]
+ table_id = orig_table_id
+ schema_path = orig_schema_path
+
+ # [START bigquery_schema_file_create]
+ # To load a schema file use the schema_from_json method.
+ schema = client.schema_from_json(schema_path)
+
+ table = bigquery.Table(table_id, schema=schema)
+ table = client.create_table(table) # API request
+ print(f"Created table {table_id}.")
+ # [END bigquery_schema_file_create]
diff --git a/samples/snippets/create_table_schema_from_json_test.py b/samples/snippets/create_table_schema_from_json_test.py
new file mode 100644
index 000000000..e725d3ccf
--- /dev/null
+++ b/samples/snippets/create_table_schema_from_json_test.py
@@ -0,0 +1,31 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import create_table_schema_from_json # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_create_table(
+ capsys: "pytest.CaptureFixture[str]",
+ random_table_id: str,
+) -> None:
+ create_table_schema_from_json.create_table(random_table_id)
+
+ out, _ = capsys.readouterr()
+ assert "Created" in out
+ assert random_table_id in out
diff --git a/samples/snippets/create_table_snapshot_test.py b/samples/snippets/create_table_snapshot_test.py
index f1d8d0f7b..17ef24d26 100644
--- a/samples/snippets/create_table_snapshot_test.py
+++ b/samples/snippets/create_table_snapshot_test.py
@@ -14,7 +14,7 @@
import typing
-import create_table_snapshot
+import create_table_snapshot # type: ignore
if typing.TYPE_CHECKING:
import pytest
@@ -25,7 +25,6 @@ def test_create_table_snapshot(
table_id: str,
random_table_id: str,
) -> None:
-
create_table_snapshot.create_table_snapshot(table_id, random_table_id)
out, _ = capsys.readouterr()
diff --git a/samples/snippets/dataset_access_test.py b/samples/snippets/dataset_access_test.py
index 4d1a70eb1..e3a53b084 100644
--- a/samples/snippets/dataset_access_test.py
+++ b/samples/snippets/dataset_access_test.py
@@ -14,12 +14,12 @@
import typing
-import revoke_dataset_access
-import update_dataset_access
+import revoke_dataset_access # type: ignore
+import update_dataset_access # type: ignore
if typing.TYPE_CHECKING:
- import pytest
from google.cloud import bigquery
+ import pytest
def test_dataset_access_permissions(
diff --git a/samples/snippets/delete_job.py b/samples/snippets/delete_job.py
index 7c8640baf..2aeb53849 100644
--- a/samples/snippets/delete_job.py
+++ b/samples/snippets/delete_job.py
@@ -17,8 +17,8 @@ def delete_job_metadata(job_id: str, location: str) -> None:
orig_job_id = job_id
orig_location = location
# [START bigquery_delete_job]
- from google.cloud import bigquery
from google.api_core import exceptions
+ from google.cloud import bigquery
# TODO(developer): Set the job ID to the ID of the job whose metadata you
# wish to delete.
diff --git a/samples/snippets/delete_job_test.py b/samples/snippets/delete_job_test.py
index ac9d52dcf..88eeae1ed 100644
--- a/samples/snippets/delete_job_test.py
+++ b/samples/snippets/delete_job_test.py
@@ -16,7 +16,7 @@
from google.cloud import bigquery
-import delete_job
+import delete_job # type: ignore
if typing.TYPE_CHECKING:
import pytest
diff --git a/samples/snippets/delete_label_table.py b/samples/snippets/delete_label_table.py
new file mode 100644
index 000000000..0e9eaaf8f
--- /dev/null
+++ b/samples/snippets/delete_label_table.py
@@ -0,0 +1,43 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.cloud import bigquery
+
+
+def delete_label_table(table_id: str, label_key: str) -> bigquery.Table:
+ orig_table_id = table_id
+ orig_label_key = label_key
+ # [START bigquery_delete_label_table]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ # TODO(dev): Change table_id to the full name of the table you wish to delete from.
+ table_id = "your-project.your_dataset.your_table_name"
+ # TODO(dev): Change label_key to the name of the label you want to remove.
+ label_key = "color"
+ # [END bigquery_delete_label_table]
+ table_id = orig_table_id
+ label_key = orig_label_key
+ # [START bigquery_delete_label_table]
+ table = client.get_table(table_id) # API request
+
+ # To delete a label from a table, set its value to None
+ table.labels[label_key] = None
+
+ table = client.update_table(table, ["labels"]) # API request
+
+ print(f"Deleted label '{label_key}' from {table_id}.")
+ # [END bigquery_delete_label_table]
+ return table
diff --git a/samples/snippets/delete_label_table_test.py b/samples/snippets/delete_label_table_test.py
new file mode 100644
index 000000000..01e538ae3
--- /dev/null
+++ b/samples/snippets/delete_label_table_test.py
@@ -0,0 +1,33 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import delete_label_table # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_delete_label_table(
+ capsys: "pytest.CaptureFixture[str]",
+ table_id: str,
+) -> None:
+ table = delete_label_table.delete_label_table(table_id, "color")
+
+ out, _ = capsys.readouterr()
+ assert "Deleted" in out
+ assert "color" in out
+ assert table_id in out
+ assert table.labels is None or "color" not in table.labels
diff --git a/samples/snippets/get_table_labels.py b/samples/snippets/get_table_labels.py
new file mode 100644
index 000000000..8cfbd4ee2
--- /dev/null
+++ b/samples/snippets/get_table_labels.py
@@ -0,0 +1,39 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def get_table_labels(table_id: str) -> None:
+ orig_table_id = table_id
+ # [START bigquery_get_table_labels]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ # TODO(dev): Change table_id to the full name of the table you want to create.
+ table_id = "your-project.your_dataset.your_table_name"
+
+ # [END bigquery_get_table_labels]
+ table_id = orig_table_id
+
+ # [START bigquery_get_table_labels]
+ table = client.get_table(table_id) # API Request
+
+ # View table labels
+ print(f"Table ID: {table_id}.")
+ if table.labels:
+ for label, value in table.labels.items():
+ print(f"\t{label}: {value}")
+ else:
+ print("\tTable has no labels defined.")
+ # [END bigquery_get_table_labels]
diff --git a/samples/snippets/get_table_labels_test.py b/samples/snippets/get_table_labels_test.py
new file mode 100644
index 000000000..e910d6a65
--- /dev/null
+++ b/samples/snippets/get_table_labels_test.py
@@ -0,0 +1,49 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+from google.cloud import bigquery
+
+import get_table_labels # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_get_table_labels(
+ capsys: "pytest.CaptureFixture[str]",
+ table_id: str,
+ bigquery_client: bigquery.Client,
+) -> None:
+ table = bigquery_client.get_table(table_id)
+ table.labels = {"color": "green"}
+ bigquery_client.update_table(table, ["labels"])
+
+ get_table_labels.get_table_labels(table_id)
+
+ out, _ = capsys.readouterr()
+ assert table_id in out
+ assert "color" in out
+
+
+def test_get_table_labels_no_label(
+ capsys: "pytest.CaptureFixture[str]",
+ table_id: str,
+) -> None:
+ get_table_labels.get_table_labels(table_id)
+
+ out, _ = capsys.readouterr()
+ assert "no labels defined" in out
+ assert table_id in out
diff --git a/samples/snippets/get_table_make_schema.py b/samples/snippets/get_table_make_schema.py
new file mode 100644
index 000000000..f870b42de
--- /dev/null
+++ b/samples/snippets/get_table_make_schema.py
@@ -0,0 +1,47 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def get_table_make_schema(table_id: str, schema_path: str) -> None:
+ orig_table_id = table_id
+ orig_schema_path = schema_path
+ # [START bigquery_schema_file_get]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ # TODO(dev): Change the table_id variable to the full name of the
+ # table you want to get schema from.
+ table_id = "your-project.your_dataset.your_table_name"
+
+ # TODO(dev): Change schema_path variable to the path
+ # of your schema file.
+ schema_path = "path/to/schema.json"
+ # [END bigquery_schema_file_get]
+ table_id = orig_table_id
+ schema_path = orig_schema_path
+ # [START bigquery_schema_file_get]
+ table = client.get_table(table_id) # Make an API request.
+
+ # Write a schema file to schema_path with the schema_to_json method.
+ client.schema_to_json(table.schema, schema_path)
+
+ with open(schema_path, "r", encoding="utf-8") as schema_file:
+ schema_contents = schema_file.read()
+
+ # View table properties
+ print(f"Got table '{table.project}.{table.dataset_id}.{table.table_id}'.")
+ print(f"Table schema: {schema_contents}")
+
+ # [END bigquery_schema_file_get]
diff --git a/samples/snippets/get_table_make_schema_test.py b/samples/snippets/get_table_make_schema_test.py
new file mode 100644
index 000000000..b1a1623bb
--- /dev/null
+++ b/samples/snippets/get_table_make_schema_test.py
@@ -0,0 +1,36 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import get_table_make_schema # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pathlib
+
+ import pytest
+
+
+def test_get_table_make_schema(
+ capsys: "pytest.CaptureFixture[str]",
+ table_id: str,
+ tmp_path: "pathlib.Path",
+) -> None:
+ schema_path = str(tmp_path / "test_schema.json")
+
+ get_table_make_schema.get_table_make_schema(table_id, schema_path)
+
+ out, _ = capsys.readouterr()
+ assert "Got table" in out
+ assert table_id in out
diff --git a/samples/snippets/label_job.py b/samples/snippets/label_job.py
new file mode 100644
index 000000000..cfd06d189
--- /dev/null
+++ b/samples/snippets/label_job.py
@@ -0,0 +1,36 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def label_job() -> None:
+ # [START bigquery_label_job]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ sql = """
+ SELECT corpus
+ FROM `bigquery-public-data.samples.shakespeare`
+ GROUP BY corpus;
+ """
+ labels = {"color": "green"}
+
+ config = bigquery.QueryJobConfig()
+ config.labels = labels
+ location = "us"
+ job = client.query(sql, location=location, job_config=config)
+ job_id = job.job_id
+
+ print(f"Added {job.labels} to {job_id}.")
+ # [END bigquery_label_job]
diff --git a/samples/snippets/label_job_test.py b/samples/snippets/label_job_test.py
new file mode 100644
index 000000000..0780db61a
--- /dev/null
+++ b/samples/snippets/label_job_test.py
@@ -0,0 +1,31 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import label_job # type: ignore
+
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_label_job(
+ capsys: "pytest.CaptureFixture[str]",
+) -> None:
+ label_job.label_job()
+
+ out, _ = capsys.readouterr()
+ assert "color" in out
+ assert "green" in out
diff --git a/samples/snippets/label_table.py b/samples/snippets/label_table.py
new file mode 100644
index 000000000..5fce08d62
--- /dev/null
+++ b/samples/snippets/label_table.py
@@ -0,0 +1,37 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def label_table(table_id: str) -> None:
+ orig_table_id = table_id
+ # [START bigquery_label_table]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ # TODO(dev): Change table_id to the full name of the table you want to create.
+ table_id = "your-project.your_dataset.your_table_name"
+
+ # [END bigquery_label_table]
+ table_id = orig_table_id
+ # [START bigquery_label_table]
+ table = client.get_table(table_id) # API request
+
+ labels = {"color": "green"}
+ table.labels = labels
+
+ table = client.update_table(table, ["labels"]) # API request
+
+ print(f"Added {table.labels} to {table_id}.")
+ # [END bigquery_label_table]
diff --git a/samples/snippets/label_table_test.py b/samples/snippets/label_table_test.py
new file mode 100644
index 000000000..49f5406ab
--- /dev/null
+++ b/samples/snippets/label_table_test.py
@@ -0,0 +1,31 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import label_table # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_label_table(
+ capsys: "pytest.CaptureFixture[str]",
+ table_id: str,
+) -> None:
+ label_table.label_table(table_id)
+
+ out, _ = capsys.readouterr()
+ assert "color" in out
+ assert table_id in out
diff --git a/samples/snippets/load_table_schema_from_json.py b/samples/snippets/load_table_schema_from_json.py
new file mode 100644
index 000000000..3f1f85430
--- /dev/null
+++ b/samples/snippets/load_table_schema_from_json.py
@@ -0,0 +1,60 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pathlib
+
+
+def load_table(table_id: str) -> None:
+ orig_uri = "gs://cloud-samples-data/bigquery/us-states/us-states.csv"
+ orig_table_id = table_id
+ current_directory = pathlib.Path(__file__).parent
+ orig_schema_path = str(current_directory / "schema_us_states.json")
+ # [START bigquery_schema_file_load]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ # TODO(dev): Change uri variable to the path of your data file.
+ uri = "gs://your-bucket/path/to/your-file.csv"
+ # TODO(dev): Change table_id to the full name of the table you want to create.
+ table_id = "your-project.your_dataset.your_table"
+ # TODO(dev): Change schema_path variable to the path of your schema file.
+ schema_path = "path/to/schema.json"
+ # [END bigquery_schema_file_load]
+ uri = orig_uri
+ table_id = orig_table_id
+ schema_path = orig_schema_path
+ # [START bigquery_schema_file_load]
+ # To load a schema file use the schema_from_json method.
+ schema = client.schema_from_json(schema_path)
+
+ job_config = bigquery.LoadJobConfig(
+ # To use the schema you loaded pass it into the
+ # LoadJobConfig constructor.
+ schema=schema,
+ skip_leading_rows=1,
+ )
+
+ # Pass the job_config object to the load_table_from_file,
+ # load_table_from_json, or load_table_from_uri method
+ # to use the schema on a new table.
+ load_job = client.load_table_from_uri(
+ uri, table_id, job_config=job_config
+ ) # Make an API request.
+
+ load_job.result() # Waits for the job to complete.
+
+ destination_table = client.get_table(table_id) # Make an API request.
+ print(f"Loaded {destination_table.num_rows} rows to {table_id}.")
+ # [END bigquery_schema_file_load]
diff --git a/samples/snippets/load_table_schema_from_json_test.py b/samples/snippets/load_table_schema_from_json_test.py
new file mode 100644
index 000000000..745793cd7
--- /dev/null
+++ b/samples/snippets/load_table_schema_from_json_test.py
@@ -0,0 +1,31 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import load_table_schema_from_json # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_load_table(
+ capsys: "pytest.CaptureFixture[str]",
+ random_table_id: str,
+) -> None:
+ load_table_schema_from_json.load_table(random_table_id)
+
+ out, _ = capsys.readouterr()
+ assert "Loaded" in out
+ assert random_table_id in out
diff --git a/samples/snippets/load_table_uri_firestore_test.py b/samples/snippets/load_table_uri_firestore_test.py
index 552fa2e35..e19378a04 100644
--- a/samples/snippets/load_table_uri_firestore_test.py
+++ b/samples/snippets/load_table_uri_firestore_test.py
@@ -14,7 +14,7 @@
import typing
-import load_table_uri_firestore
+import load_table_uri_firestore # type: ignore
if typing.TYPE_CHECKING:
import pytest
diff --git a/samples/snippets/manage_job_test.py b/samples/snippets/manage_job_test.py
index 630be365b..2ef4be2e0 100644
--- a/samples/snippets/manage_job_test.py
+++ b/samples/snippets/manage_job_test.py
@@ -15,8 +15,8 @@
from google.cloud import bigquery
import pytest
-import manage_job_cancel
-import manage_job_get
+import manage_job_cancel # type: ignore
+import manage_job_get # type: ignore
def test_manage_job(capsys: pytest.CaptureFixture[str]) -> None:
diff --git a/samples/snippets/materialized_view.py b/samples/snippets/materialized_view.py
index adb3688a4..a47ee5b81 100644
--- a/samples/snippets/materialized_view.py
+++ b/samples/snippets/materialized_view.py
@@ -60,6 +60,7 @@ def update_materialized_view(
# [START bigquery_update_materialized_view]
import datetime
+
from google.cloud import bigquery
bigquery_client = bigquery.Client()
diff --git a/samples/snippets/materialized_view_test.py b/samples/snippets/materialized_view_test.py
index 70869346f..1b464af6f 100644
--- a/samples/snippets/materialized_view_test.py
+++ b/samples/snippets/materialized_view_test.py
@@ -20,7 +20,8 @@
from google.cloud import bigquery
import pytest
-import materialized_view
+import materialized_view # type: ignore
+from conftest import prefixer # type: ignore
def temp_suffix() -> str:
@@ -37,7 +38,7 @@ def bigquery_client_patch(
@pytest.fixture(scope="module")
def dataset_id(bigquery_client: bigquery.Client) -> Iterator[str]:
- dataset_id = f"mvdataset_{temp_suffix()}"
+ dataset_id = f"{prefixer.create_prefix()}_materialized_view"
bigquery_client.create_dataset(dataset_id)
yield dataset_id
bigquery_client.delete_dataset(dataset_id, delete_contents=True)
diff --git a/samples/snippets/natality_tutorial.py b/samples/snippets/natality_tutorial.py
index b330a3c21..df9fc15be 100644
--- a/samples/snippets/natality_tutorial.py
+++ b/samples/snippets/natality_tutorial.py
@@ -83,8 +83,7 @@ def run_natality_tutorial(override_values: Optional[Dict[str, str]] = None) -> N
"""
# Run the query.
- query_job = client.query(query, job_config=job_config)
- query_job.result() # Waits for the query to finish
+ client.query_and_wait(query, job_config=job_config) # Waits for the query to finish
# [END bigquery_query_natality_tutorial]
diff --git a/samples/snippets/natality_tutorial_test.py b/samples/snippets/natality_tutorial_test.py
index f56738528..603d142f2 100644
--- a/samples/snippets/natality_tutorial_test.py
+++ b/samples/snippets/natality_tutorial_test.py
@@ -13,12 +13,12 @@
# limitations under the License.
from typing import Iterator, List
-import uuid
from google.cloud import bigquery
import pytest
-import natality_tutorial
+import natality_tutorial # type: ignore
+from conftest import prefixer # type: ignore
@pytest.fixture(scope="module")
@@ -37,11 +37,7 @@ def datasets_to_delete(client: bigquery.Client) -> Iterator[List[str]]:
def test_natality_tutorial(
client: bigquery.Client, datasets_to_delete: List[str]
) -> None:
- override_values = {
- "dataset_id": "natality_regression_{}".format(
- str(uuid.uuid4()).replace("-", "_")
- ),
- }
+ override_values = {"dataset_id": f"{prefixer.create_prefix()}_natality_tutorial"}
datasets_to_delete.append(override_values["dataset_id"])
natality_tutorial.run_natality_tutorial(override_values)
diff --git a/samples/snippets/nested_repeated_schema.py b/samples/snippets/nested_repeated_schema.py
new file mode 100644
index 000000000..5d55860cc
--- /dev/null
+++ b/samples/snippets/nested_repeated_schema.py
@@ -0,0 +1,54 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def nested_schema(table_id: str) -> None:
+ orig_table_id = table_id
+ # [START bigquery_nested_repeated_schema]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ # TODO(dev): Change table_id to the full name of the table you want to create.
+ table_id = "your-project.your_dataset.your_table_name"
+
+ schema = [
+ bigquery.SchemaField("id", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("first_name", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("last_name", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("dob", "DATE", mode="NULLABLE"),
+ bigquery.SchemaField(
+ "addresses",
+ "RECORD",
+ mode="REPEATED",
+ fields=[
+ bigquery.SchemaField("status", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("address", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("city", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("state", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("zip", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("numberOfYears", "STRING", mode="NULLABLE"),
+ ],
+ ),
+ ]
+ # [END bigquery_nested_repeated_schema]
+
+ table_id = orig_table_id
+
+ # [START bigquery_nested_repeated_schema]
+ table = bigquery.Table(table_id, schema=schema)
+ table = client.create_table(table) # API request
+
+ print(f"Created table {table.project}.{table.dataset_id}.{table.table_id}.")
+ # [END bigquery_nested_repeated_schema]
diff --git a/samples/snippets/nested_repeated_schema_test.py b/samples/snippets/nested_repeated_schema_test.py
new file mode 100644
index 000000000..67815dcf6
--- /dev/null
+++ b/samples/snippets/nested_repeated_schema_test.py
@@ -0,0 +1,31 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+import nested_repeated_schema # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_create_table(
+ capsys: "pytest.CaptureFixture[str]",
+ random_table_id: str,
+) -> None:
+ nested_repeated_schema.nested_schema(random_table_id)
+
+ out, _ = capsys.readouterr()
+ assert "Created" in out
+ assert random_table_id in out
diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py
index 29b5bc852..db2333e5a 100644
--- a/samples/snippets/noxfile.py
+++ b/samples/snippets/noxfile.py
@@ -18,7 +18,7 @@
import os
from pathlib import Path
import sys
-from typing import Callable, Dict, List, Optional
+from typing import Callable, Dict, Optional
import nox
@@ -29,7 +29,7 @@
# WARNING - WARNING - WARNING - WARNING - WARNING
# WARNING - WARNING - WARNING - WARNING - WARNING
-BLACK_VERSION = "black==22.3.0"
+BLACK_VERSION = "black==23.7.0"
ISORT_VERSION = "isort==5.10.1"
# Copy `noxfile_config.py` to your directory and modify it instead.
@@ -89,7 +89,7 @@ def get_pytest_env_vars() -> Dict[str, str]:
# DO NOT EDIT - automatically generated.
# All versions used to test samples.
-ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10"]
+ALL_VERSIONS = ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14"]
# Any default versions that should be ignored.
IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"]
@@ -109,22 +109,6 @@ def get_pytest_env_vars() -> Dict[str, str]:
#
-def _determine_local_import_names(start_dir: str) -> List[str]:
- """Determines all import names that should be considered "local".
-
- This is used when running the linter to insure that import order is
- properly checked.
- """
- file_ext_pairs = [os.path.splitext(path) for path in os.listdir(start_dir)]
- return [
- basename
- for basename, extension in file_ext_pairs
- if extension == ".py"
- or os.path.isdir(os.path.join(start_dir, basename))
- and basename not in ("__pycache__")
- ]
-
-
# Linting with flake8.
#
# We ignore the following rules:
@@ -139,7 +123,6 @@ def _determine_local_import_names(start_dir: str) -> List[str]:
"--show-source",
"--builtin=gettext",
"--max-complexity=20",
- "--import-order-style=google",
"--exclude=.nox,.cache,env,lib,generated_pb2,*_pb2.py,*_pb2_grpc.py",
"--ignore=E121,E123,E126,E203,E226,E24,E266,E501,E704,W503,W504,I202",
"--max-line-length=88",
@@ -149,14 +132,11 @@ def _determine_local_import_names(start_dir: str) -> List[str]:
@nox.session
def lint(session: nox.sessions.Session) -> None:
if not TEST_CONFIG["enforce_type_hints"]:
- session.install("flake8", "flake8-import-order")
+ session.install("flake8")
else:
- session.install("flake8", "flake8-import-order", "flake8-annotations")
+ session.install("flake8", "flake8-annotations")
- local_names = _determine_local_import_names(".")
args = FLAKE8_COMMON_ARGS + [
- "--application-import-names",
- ",".join(local_names),
".",
]
session.run("flake8", *args)
@@ -208,8 +188,10 @@ def _session_tests(
session: nox.sessions.Session, post_install: Callable = None
) -> None:
# check for presence of tests
- test_list = glob.glob("*_test.py") + glob.glob("test_*.py")
- test_list.extend(glob.glob("tests"))
+ test_list = glob.glob("**/*_test.py", recursive=True) + glob.glob(
+ "**/test_*.py", recursive=True
+ )
+ test_list.extend(glob.glob("**/tests", recursive=True))
if len(test_list) == 0:
print("No tests found, skipping directory.")
diff --git a/samples/snippets/quickstart.py b/samples/snippets/quickstart.py
index f9628da7d..8f7f05c73 100644
--- a/samples/snippets/quickstart.py
+++ b/samples/snippets/quickstart.py
@@ -18,7 +18,6 @@
def run_quickstart(override_values: Optional[Dict[str, str]] = None) -> None:
-
if override_values is None:
override_values = {}
diff --git a/samples/snippets/quickstart_test.py b/samples/snippets/quickstart_test.py
index b0bad5ee5..74a02a83a 100644
--- a/samples/snippets/quickstart_test.py
+++ b/samples/snippets/quickstart_test.py
@@ -13,13 +13,12 @@
# limitations under the License.
from typing import Iterator, List
-import uuid
from google.cloud import bigquery
import pytest
-import quickstart
-
+import quickstart # type: ignore
+from conftest import prefixer # type: ignore
# Must match the dataset listed in quickstart.py (there's no easy way to
# extract this).
@@ -44,10 +43,7 @@ def test_quickstart(
client: bigquery.Client,
datasets_to_delete: List[str],
) -> None:
-
- override_values = {
- "dataset_id": "my_new_dataset_{}".format(str(uuid.uuid4()).replace("-", "_")),
- }
+ override_values = {"dataset_id": f"{prefixer.create_prefix()}_quickstart"}
datasets_to_delete.append(override_values["dataset_id"])
quickstart.run_quickstart(override_values)
diff --git a/samples/snippets/relax_column.py b/samples/snippets/relax_column.py
new file mode 100644
index 000000000..bcd79cee8
--- /dev/null
+++ b/samples/snippets/relax_column.py
@@ -0,0 +1,52 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.cloud import bigquery
+
+
+def relax_column(table_id: str) -> bigquery.Table:
+ orig_table_id = table_id
+
+ # [START bigquery_relax_column]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ # TODO(dev): Change table_id to full name of the table you want to create.
+ table_id = "your-project.your_dataset.your_table"
+
+ # [END bigquery_relax_column]
+ table_id = orig_table_id
+
+ # [START bigquery_relax_column]
+ table = client.get_table(table_id)
+ new_schema = []
+ for field in table.schema:
+ if field.mode != "REQUIRED":
+ new_schema.append(field)
+ else:
+ # SchemaField properties cannot be edited after initialization.
+ # To make changes, construct new SchemaField objects.
+ new_field = field.to_api_repr()
+ new_field["mode"] = "NULLABLE"
+ relaxed_field = bigquery.SchemaField.from_api_repr(new_field)
+ new_schema.append(relaxed_field)
+
+ table.schema = new_schema
+ table = client.update_table(table, ["schema"])
+
+ print(f"Updated {table_id} schema: {table.schema}.")
+
+ # [END bigquery_relax_column]
+ return table
diff --git a/samples/snippets/relax_column_test.py b/samples/snippets/relax_column_test.py
new file mode 100644
index 000000000..ede1c3ab7
--- /dev/null
+++ b/samples/snippets/relax_column_test.py
@@ -0,0 +1,46 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import typing
+
+from google.cloud import bigquery
+
+import relax_column # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pytest
+
+
+def test_relax_column(
+ capsys: "pytest.CaptureFixture[str]",
+ bigquery_client: bigquery.Client,
+ random_table_id: str,
+) -> None:
+ table = bigquery.Table(
+ random_table_id,
+ schema=[
+ bigquery.SchemaField("string_col", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("string_col2", "STRING", mode="REQUIRED"),
+ ],
+ )
+
+ bigquery_client.create_table(table)
+ table = relax_column.relax_column(random_table_id)
+
+ out, _ = capsys.readouterr()
+
+ assert all(field.mode == "NULLABLE" for field in table.schema)
+ assert "REQUIRED" not in out
+ assert "NULLABLE" in out
+ assert random_table_id in out
diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt
index 856751fc1..901f1df1a 100644
--- a/samples/snippets/requirements-test.txt
+++ b/samples/snippets/requirements-test.txt
@@ -1,3 +1,5 @@
-google-cloud-testutils==1.3.3
-pytest==7.1.2
-mock==4.0.3
+# samples/snippets should be runnable with no "extras"
+google-cloud-testutils==1.6.4
+pytest==8.4.2
+mock==5.2.0
+pytest-xdist==3.8.0
diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt
index bbef52e66..441385536 100644
--- a/samples/snippets/requirements.txt
+++ b/samples/snippets/requirements.txt
@@ -1,14 +1,2 @@
-db-dtypes==1.0.3
-google-cloud-bigquery==3.3.2
-google-cloud-bigquery-storage==2.14.2
-google-auth-oauthlib==0.5.2
-grpcio==1.47.0
-ipython===7.31.1; python_version == '3.7'
-ipython===8.0.1; python_version == '3.8'
-ipython==8.4.0; python_version >= '3.9'
-matplotlib==3.5.3
-pandas===1.3.5; python_version == '3.7'
-pandas==1.4.3; python_version >= '3.8'
-pyarrow==9.0.0
-pytz==2022.2.1
-typing-extensions==4.3.0
+# samples/snippets should be runnable with no "extras"
+google-cloud-bigquery==3.38.0
diff --git a/samples/snippets/schema.json b/samples/snippets/schema.json
new file mode 100644
index 000000000..bd2164dad
--- /dev/null
+++ b/samples/snippets/schema.json
@@ -0,0 +1,20 @@
+[
+ {
+ "name": "qtr",
+ "type": "STRING",
+ "mode": "REQUIRED",
+ "description": "quarter"
+ },
+ {
+ "name": "rep",
+ "type": "STRING",
+ "mode": "NULLABLE",
+ "description": "sales representative"
+ },
+ {
+ "name": "sales",
+ "type": "FLOAT",
+ "mode": "NULLABLE",
+ "defaultValueExpression": "2.55"
+ }
+]
diff --git a/samples/snippets/schema_us_states.json b/samples/snippets/schema_us_states.json
new file mode 100644
index 000000000..7f2ccc277
--- /dev/null
+++ b/samples/snippets/schema_us_states.json
@@ -0,0 +1,12 @@
+[
+ {
+ "name": "name",
+ "type": "STRING",
+ "mode": "NULLABLE"
+ },
+ {
+ "name": "post_abbr",
+ "type": "STRING",
+ "mode": "NULLABLE"
+ }
+]
diff --git a/samples/snippets/simple_app.py b/samples/snippets/simple_app.py
index 3d856d4bb..8281e1877 100644
--- a/samples/snippets/simple_app.py
+++ b/samples/snippets/simple_app.py
@@ -27,7 +27,7 @@ def query_stackoverflow() -> None:
client = bigquery.Client()
# [END bigquery_simple_app_client]
# [START bigquery_simple_app_query]
- query_job = client.query(
+ results = client.query_and_wait(
"""
SELECT
CONCAT(
@@ -38,9 +38,7 @@ def query_stackoverflow() -> None:
WHERE tags like '%google-bigquery%'
ORDER BY view_count DESC
LIMIT 10"""
- )
-
- results = query_job.result() # Waits for job to complete.
+ ) # Waits for job to complete.
# [END bigquery_simple_app_query]
# [START bigquery_simple_app_print]
diff --git a/samples/snippets/simple_app_test.py b/samples/snippets/simple_app_test.py
index de4e1ce34..4bf0bb49c 100644
--- a/samples/snippets/simple_app_test.py
+++ b/samples/snippets/simple_app_test.py
@@ -14,7 +14,7 @@
import typing
-import simple_app
+import simple_app # type: ignore
if typing.TYPE_CHECKING:
import pytest
diff --git a/samples/snippets/test_update_with_dml.py b/samples/snippets/test_update_with_dml.py
index ef5ec196a..d03114a36 100644
--- a/samples/snippets/test_update_with_dml.py
+++ b/samples/snippets/test_update_with_dml.py
@@ -17,8 +17,8 @@
from google.cloud import bigquery
import pytest
-from conftest import prefixer
-import update_with_dml
+from conftest import prefixer # type: ignore
+import update_with_dml # type: ignore
@pytest.fixture
diff --git a/samples/snippets/update_table_expiration.py b/samples/snippets/update_table_expiration.py
new file mode 100644
index 000000000..bf944800f
--- /dev/null
+++ b/samples/snippets/update_table_expiration.py
@@ -0,0 +1,45 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+
+
+def update_table_expiration(table_id, expiration):
+ orig_table_id = table_id
+ orig_expiration = expiration
+
+ # [START bigquery_update_table_expiration]
+ from google.cloud import bigquery
+
+ client = bigquery.Client()
+
+ # TODO(dev): Change table_id to the full name of the table you want to update.
+ table_id = "your-project.your_dataset.your_table_name"
+
+ # TODO(dev): Set table to expire for desired days days from now.
+ expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(
+ days=5
+ )
+ # [END bigquery_update_table_expiration]
+
+ table_id = orig_table_id
+ expiration = orig_expiration
+
+ # [START bigquery_update_table_expiration]
+ table = client.get_table(table_id) # Make an API request.
+ table.expires = expiration
+ table = client.update_table(table, ["expires"]) # API request
+
+ print(f"Updated {table_id}, expires {table.expires}.")
+ # [END bigquery_update_table_expiration]
diff --git a/samples/snippets/update_table_expiration_test.py b/samples/snippets/update_table_expiration_test.py
new file mode 100644
index 000000000..ed68a8c2c
--- /dev/null
+++ b/samples/snippets/update_table_expiration_test.py
@@ -0,0 +1,43 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+import typing
+
+import update_table_expiration # type: ignore
+
+if typing.TYPE_CHECKING:
+ import pathlib
+
+ import pytest
+
+
+def test_update_table_expiration(
+ capsys: "pytest.CaptureFixture[str]",
+ table_id: str,
+ tmp_path: "pathlib.Path",
+) -> None:
+ # This was not needed for function, only for test
+ expiration = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta(
+ days=5
+ )
+
+ update_table_expiration.update_table_expiration(table_id, expiration)
+
+ out, _ = capsys.readouterr()
+ assert "Updated" in out
+ assert table_id in out
+ assert str(expiration.day) in out
+ assert str(expiration.month) in out
+ assert str(expiration.year) in out
diff --git a/samples/snippets/view.py b/samples/snippets/view.py
index 5e976f68a..30e719c79 100644
--- a/samples/snippets/view.py
+++ b/samples/snippets/view.py
@@ -127,7 +127,6 @@ def update_view(override_values: Optional[Dict[str, str]] = None) -> "bigquery.T
def grant_access(
override_values: Optional[OverridesDict] = None,
) -> Tuple["bigquery.Dataset", "bigquery.Dataset"]:
-
if override_values is None:
override_values = {}
@@ -148,7 +147,7 @@ def grant_access(
# Make an API request to get the view dataset ACLs.
view_dataset = client.get_dataset(view_dataset_id)
- analyst_group_email = "data_analysts@example.com"
+ analyst_group_email = "example-analyst-group@google.com"
# [END bigquery_grant_view_access]
# To facilitate testing, we replace values with alternatives
# provided by the testing harness.
diff --git a/samples/snippets/view_test.py b/samples/snippets/view_test.py
index 4d0d43b77..d46595695 100644
--- a/samples/snippets/view_test.py
+++ b/samples/snippets/view_test.py
@@ -19,7 +19,8 @@
from google.cloud import bigquery
import pytest
-import view
+import view # type: ignore
+from conftest import prefixer # type: ignore
def temp_suffix() -> str:
@@ -53,7 +54,7 @@ def view_id(bigquery_client: bigquery.Client, view_dataset_id: str) -> Iterator[
def source_dataset_id(
bigquery_client: bigquery.Client, project_id: str
) -> Iterator[str]:
- dataset_id = f"{project_id}.view_{temp_suffix()}"
+ dataset_id = f"{prefixer.create_prefix()}_view"
bigquery_client.create_dataset(dataset_id)
yield dataset_id
bigquery_client.delete_dataset(dataset_id, delete_contents=True)
@@ -113,7 +114,6 @@ def test_view(
project_id, dataset_id, table_id = view_id.split(".")
overrides: view.OverridesDict = {
- "analyst_group_email": "cloud-dpes-bigquery@google.com",
"view_dataset_id": view_dataset_id,
"source_dataset_id": source_dataset_id,
"view_reference": {
@@ -126,5 +126,5 @@ def test_view(
assert len(view_dataset.access_entries) != 0
assert len(source_dataset.access_entries) != 0
out, _ = capsys.readouterr()
- assert "cloud-dpes-bigquery@google.com" in out
+ assert "example-analyst-group@google.com" in out
assert table_id in out
diff --git a/samples/table_exists.py b/samples/table_exists.py
index 6edba9239..c19d419ae 100644
--- a/samples/table_exists.py
+++ b/samples/table_exists.py
@@ -14,7 +14,6 @@
def table_exists(table_id: str) -> None:
-
# [START bigquery_table_exists]
from google.cloud import bigquery
from google.cloud.exceptions import NotFound
diff --git a/samples/table_insert_rows.py b/samples/table_insert_rows.py
index 8aa723fe0..d680b4c1e 100644
--- a/samples/table_insert_rows.py
+++ b/samples/table_insert_rows.py
@@ -14,7 +14,6 @@
def table_insert_rows(table_id: str) -> None:
-
# [START bigquery_table_insert_rows]
from google.cloud import bigquery
diff --git a/samples/table_insert_rows_explicit_none_insert_ids.py b/samples/table_insert_rows_explicit_none_insert_ids.py
index b2bd06372..bbde034f7 100644
--- a/samples/table_insert_rows_explicit_none_insert_ids.py
+++ b/samples/table_insert_rows_explicit_none_insert_ids.py
@@ -14,7 +14,6 @@
def table_insert_rows_explicit_none_insert_ids(table_id: str) -> None:
-
# [START bigquery_table_insert_rows_explicit_none_insert_ids]
from google.cloud import bigquery
diff --git a/samples/tests/conftest.py b/samples/tests/conftest.py
index b7a2ad587..cdf52b388 100644
--- a/samples/tests/conftest.py
+++ b/samples/tests/conftest.py
@@ -13,11 +13,11 @@
# limitations under the License.
import datetime
-from typing import Iterator
+from typing import Iterator, List
+from unittest import mock
import uuid
import google.auth
-import mock
import pytest
from google.cloud import bigquery
@@ -47,6 +47,22 @@ def random_table_id(dataset_id: str) -> str:
return "{}.{}".format(dataset_id, random_table_id)
+@pytest.fixture
+def avro_source_uris() -> List[str]:
+ avro_source_uris = [
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro",
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro",
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro",
+ ]
+ return avro_source_uris
+
+
+@pytest.fixture
+def reference_file_schema_uri() -> str:
+ reference_file_schema_uri = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro"
+ return reference_file_schema_uri
+
+
@pytest.fixture
def random_dataset_id(client: bigquery.Client) -> Iterator[str]:
now = datetime.datetime.now()
@@ -146,7 +162,7 @@ def model_id(client: bigquery.Client, dataset_id: str) -> str:
CREATE MODEL `{}`
OPTIONS (
model_type='linear_reg',
- max_iteration=1,
+ max_iterations=1,
learn_rate=0.4,
learn_rate_strategy='constant'
) AS (
@@ -158,7 +174,7 @@ def model_id(client: bigquery.Client, dataset_id: str) -> str:
model_id
)
- client.query(sql).result()
+ client.query_and_wait(sql)
return model_id
diff --git a/samples/tests/test_add_empty_column.py b/samples/tests/test_add_empty_column.py
index 5c7184766..95d554621 100644
--- a/samples/tests/test_add_empty_column.py
+++ b/samples/tests/test_add_empty_column.py
@@ -21,7 +21,6 @@
def test_add_empty_column(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None:
-
add_empty_column.add_empty_column(table_id)
out, err = capsys.readouterr()
assert "A new column has been added." in out
diff --git a/samples/tests/test_browse_table_data.py b/samples/tests/test_browse_table_data.py
index 368e5cad6..670eb7ccf 100644
--- a/samples/tests/test_browse_table_data.py
+++ b/samples/tests/test_browse_table_data.py
@@ -23,7 +23,6 @@
def test_browse_table_data(
capsys: "pytest.CaptureFixture[str]", table_with_data_id: str
) -> None:
-
browse_table_data.browse_table_data(table_with_data_id)
out, err = capsys.readouterr()
assert "Downloaded 164656 rows from table {}".format(table_with_data_id) in out
diff --git a/samples/tests/test_client_list_jobs.py b/samples/tests/test_client_list_jobs.py
index a2845b7ad..6bb1bbd19 100644
--- a/samples/tests/test_client_list_jobs.py
+++ b/samples/tests/test_client_list_jobs.py
@@ -25,7 +25,6 @@
def test_client_list_jobs(
capsys: "pytest.CaptureFixture[str]", client: "bigquery.Client"
) -> None:
-
job = create_job.create_job()
client.cancel_job(job.job_id)
job.cancel()
diff --git a/samples/tests/test_client_load_partitioned_table.py b/samples/tests/test_client_load_partitioned_table.py
index 24f86c700..2f6564afa 100644
--- a/samples/tests/test_client_load_partitioned_table.py
+++ b/samples/tests/test_client_load_partitioned_table.py
@@ -23,7 +23,6 @@
def test_client_load_partitioned_table(
capsys: "pytest.CaptureFixture[str]", random_table_id: str
) -> None:
-
client_load_partitioned_table.client_load_partitioned_table(random_table_id)
out, err = capsys.readouterr()
assert "Loaded 50 rows to table {}".format(random_table_id) in out
diff --git a/samples/tests/test_client_query_add_column.py b/samples/tests/test_client_query_add_column.py
index 1eb5a1ed6..c80f195a5 100644
--- a/samples/tests/test_client_query_add_column.py
+++ b/samples/tests/test_client_query_add_column.py
@@ -25,7 +25,6 @@
def test_client_query_add_column(
capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client
) -> None:
-
schema = [
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
diff --git a/samples/tests/test_client_query_batch.py b/samples/tests/test_client_query_batch.py
index 548fe3ac3..b1e0e2647 100644
--- a/samples/tests/test_client_query_batch.py
+++ b/samples/tests/test_client_query_batch.py
@@ -21,7 +21,6 @@
def test_client_query_batch(capsys: "pytest.CaptureFixture[str]") -> None:
-
job = client_query_batch.client_query_batch()
out, err = capsys.readouterr()
assert "Job {} is currently in state {}".format(job.job_id, job.state) in out
diff --git a/samples/tests/test_client_query_destination_table.py b/samples/tests/test_client_query_destination_table.py
index 067bc16ec..1487f6e65 100644
--- a/samples/tests/test_client_query_destination_table.py
+++ b/samples/tests/test_client_query_destination_table.py
@@ -23,7 +23,6 @@
def test_client_query_destination_table(
capsys: "pytest.CaptureFixture[str]", table_id: str
) -> None:
-
client_query_destination_table.client_query_destination_table(table_id)
out, err = capsys.readouterr()
assert "Query results loaded to the table {}".format(table_id) in out
diff --git a/samples/tests/test_client_query_destination_table_clustered.py b/samples/tests/test_client_query_destination_table_clustered.py
index 02b131531..8a1e5bcd4 100644
--- a/samples/tests/test_client_query_destination_table_clustered.py
+++ b/samples/tests/test_client_query_destination_table_clustered.py
@@ -23,7 +23,6 @@
def test_client_query_destination_table_clustered(
capsys: "pytest.CaptureFixture[str]", random_table_id: str
) -> None:
-
client_query_destination_table_clustered.client_query_destination_table_clustered(
random_table_id
)
diff --git a/samples/tests/test_client_query_destination_table_cmek.py b/samples/tests/test_client_query_destination_table_cmek.py
index f2fe3bc39..4cb76be8e 100644
--- a/samples/tests/test_client_query_destination_table_cmek.py
+++ b/samples/tests/test_client_query_destination_table_cmek.py
@@ -23,7 +23,6 @@
def test_client_query_destination_table_cmek(
capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str
) -> None:
-
client_query_destination_table_cmek.client_query_destination_table_cmek(
random_table_id, kms_key_name
)
diff --git a/samples/tests/test_client_query_destination_table_legacy.py b/samples/tests/test_client_query_destination_table_legacy.py
index 0071ee4a4..78a199bea 100644
--- a/samples/tests/test_client_query_destination_table_legacy.py
+++ b/samples/tests/test_client_query_destination_table_legacy.py
@@ -23,7 +23,6 @@
def test_client_query_destination_table_legacy(
capsys: "pytest.CaptureFixture[str]", random_table_id: str
) -> None:
-
client_query_destination_table_legacy.client_query_destination_table_legacy(
random_table_id
)
diff --git a/samples/tests/test_client_query_dry_run.py b/samples/tests/test_client_query_dry_run.py
index cffb152ef..cfc8100a1 100644
--- a/samples/tests/test_client_query_dry_run.py
+++ b/samples/tests/test_client_query_dry_run.py
@@ -21,7 +21,6 @@
def test_client_query_dry_run(capsys: "pytest.CaptureFixture[str]") -> None:
-
query_job = client_query_dry_run.client_query_dry_run()
out, err = capsys.readouterr()
assert "This query will process" in out
diff --git a/samples/tests/test_client_query.py b/samples/tests/test_client_query_job_optional.py
similarity index 72%
rename from samples/tests/test_client_query.py
rename to samples/tests/test_client_query_job_optional.py
index a8e3c343e..0e0b2cf19 100644
--- a/samples/tests/test_client_query.py
+++ b/samples/tests/test_client_query_job_optional.py
@@ -1,4 +1,4 @@
-# Copyright 2019 Google LLC
+# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,15 +14,13 @@
import typing
-from .. import client_query
+from .. import client_query_job_optional
if typing.TYPE_CHECKING:
import pytest
-def test_client_query(capsys: "pytest.CaptureFixture[str]") -> None:
-
- client_query.client_query()
+def test_client_query_shortmode(capsys: "pytest.CaptureFixture[str]") -> None:
+ client_query_job_optional.client_query_job_optional()
out, err = capsys.readouterr()
- assert "The query data:" in out
- assert "name=James, count=272793" in out
+ assert "Query was run" in out
diff --git a/samples/tests/test_client_query_legacy_sql.py b/samples/tests/test_client_query_legacy_sql.py
index b12b5a934..98303cde9 100644
--- a/samples/tests/test_client_query_legacy_sql.py
+++ b/samples/tests/test_client_query_legacy_sql.py
@@ -22,7 +22,6 @@
def test_client_query_legacy_sql(capsys: "pytest.CaptureFixture[str]") -> None:
-
client_query_legacy_sql.client_query_legacy_sql()
out, err = capsys.readouterr()
assert re.search(r"(Row[\w(){}:', ]+)$", out)
diff --git a/samples/tests/test_client_query_relax_column.py b/samples/tests/test_client_query_relax_column.py
index 93fa0f3cf..0df8463be 100644
--- a/samples/tests/test_client_query_relax_column.py
+++ b/samples/tests/test_client_query_relax_column.py
@@ -27,7 +27,6 @@ def test_client_query_relax_column(
random_table_id: str,
client: bigquery.Client,
) -> None:
-
schema = [
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
diff --git a/samples/tests/test_client_query_w_array_params.py b/samples/tests/test_client_query_w_array_params.py
index fcd3f6972..c302712fe 100644
--- a/samples/tests/test_client_query_w_array_params.py
+++ b/samples/tests/test_client_query_w_array_params.py
@@ -21,7 +21,6 @@
def test_client_query_w_array_params(capsys: "pytest.CaptureFixture[str]") -> None:
-
client_query_w_array_params.client_query_w_array_params()
out, err = capsys.readouterr()
assert "James" in out
diff --git a/samples/tests/test_client_query_w_named_params.py b/samples/tests/test_client_query_w_named_params.py
index 85ef1dc4a..e4d66be41 100644
--- a/samples/tests/test_client_query_w_named_params.py
+++ b/samples/tests/test_client_query_w_named_params.py
@@ -21,7 +21,6 @@
def test_client_query_w_named_params(capsys: "pytest.CaptureFixture[str]") -> None:
-
client_query_w_named_params.client_query_w_named_params()
out, err = capsys.readouterr()
assert "the" in out
diff --git a/samples/tests/test_client_query_w_positional_params.py b/samples/tests/test_client_query_w_positional_params.py
index 8ade676ab..61df76aaa 100644
--- a/samples/tests/test_client_query_w_positional_params.py
+++ b/samples/tests/test_client_query_w_positional_params.py
@@ -21,7 +21,6 @@
def test_client_query_w_positional_params(capsys: "pytest.CaptureFixture[str]") -> None:
-
client_query_w_positional_params.client_query_w_positional_params()
out, err = capsys.readouterr()
assert "the" in out
diff --git a/samples/tests/test_client_query_w_struct_params.py b/samples/tests/test_client_query_w_struct_params.py
index 3198dbad5..5eea993ce 100644
--- a/samples/tests/test_client_query_w_struct_params.py
+++ b/samples/tests/test_client_query_w_struct_params.py
@@ -21,7 +21,6 @@
def test_client_query_w_struct_params(capsys: "pytest.CaptureFixture[str]") -> None:
-
client_query_w_struct_params.client_query_w_struct_params()
out, err = capsys.readouterr()
assert "1" in out
diff --git a/samples/tests/test_client_query_w_timestamp_params.py b/samples/tests/test_client_query_w_timestamp_params.py
index a3bbccdd4..8147d4a96 100644
--- a/samples/tests/test_client_query_w_timestamp_params.py
+++ b/samples/tests/test_client_query_w_timestamp_params.py
@@ -21,7 +21,6 @@
def test_client_query_w_timestamp_params(capsys: "pytest.CaptureFixture[str]") -> None:
-
client_query_w_timestamp_params.client_query_w_timestamp_params()
out, err = capsys.readouterr()
assert "2016, 12, 7, 9, 0" in out
diff --git a/samples/tests/test_copy_table.py b/samples/tests/test_copy_table.py
index d5a6c121e..3953e3162 100644
--- a/samples/tests/test_copy_table.py
+++ b/samples/tests/test_copy_table.py
@@ -28,8 +28,6 @@ def test_copy_table(
random_table_id: str,
client: "bigquery.Client",
) -> None:
- pytest.skip("b/210907595: copy fails for shakespeare table")
-
copy_table.copy_table(table_with_data_id, random_table_id)
out, err = capsys.readouterr()
assert "A copy of the table created." in out
diff --git a/samples/tests/test_copy_table_cmek.py b/samples/tests/test_copy_table_cmek.py
index 1bdec2f35..7cac15723 100644
--- a/samples/tests/test_copy_table_cmek.py
+++ b/samples/tests/test_copy_table_cmek.py
@@ -23,8 +23,6 @@ def test_copy_table_cmek(
table_with_data_id: str,
kms_key_name: str,
) -> None:
- pytest.skip("b/210907595: copy fails for shakespeare table")
-
copy_table_cmek.copy_table_cmek(random_table_id, table_with_data_id, kms_key_name)
out, err = capsys.readouterr()
assert "A copy of the table created" in out
diff --git a/samples/tests/test_copy_table_multiple_source.py b/samples/tests/test_copy_table_multiple_source.py
index e8b27d2a9..5d7991c91 100644
--- a/samples/tests/test_copy_table_multiple_source.py
+++ b/samples/tests/test_copy_table_multiple_source.py
@@ -29,7 +29,6 @@ def test_copy_table_multiple_source(
random_dataset_id: str,
client: bigquery.Client,
) -> None:
-
dataset = bigquery.Dataset(random_dataset_id)
dataset.location = "US"
dataset = client.create_dataset(dataset)
diff --git a/samples/tests/test_create_dataset.py b/samples/tests/test_create_dataset.py
index e7a897f8f..ecf5ef129 100644
--- a/samples/tests/test_create_dataset.py
+++ b/samples/tests/test_create_dataset.py
@@ -23,7 +23,6 @@
def test_create_dataset(
capsys: "pytest.CaptureFixture[str]", random_dataset_id: str
) -> None:
-
create_dataset.create_dataset(random_dataset_id)
out, err = capsys.readouterr()
assert "Created dataset {}".format(random_dataset_id) in out
diff --git a/samples/tests/test_dataset_exists.py b/samples/tests/test_dataset_exists.py
index bfef4368f..744122e37 100644
--- a/samples/tests/test_dataset_exists.py
+++ b/samples/tests/test_dataset_exists.py
@@ -27,7 +27,6 @@ def test_dataset_exists(
random_dataset_id: str,
client: bigquery.Client,
) -> None:
-
dataset_exists.dataset_exists(random_dataset_id)
out, err = capsys.readouterr()
assert "Dataset {} is not found".format(random_dataset_id) in out
diff --git a/samples/tests/test_dataset_label_samples.py b/samples/tests/test_dataset_label_samples.py
index 75a024856..ec9ff9228 100644
--- a/samples/tests/test_dataset_label_samples.py
+++ b/samples/tests/test_dataset_label_samples.py
@@ -25,7 +25,6 @@
def test_dataset_label_samples(
capsys: "pytest.CaptureFixture[str]", dataset_id: str
) -> None:
-
label_dataset.label_dataset(dataset_id)
out, err = capsys.readouterr()
assert "Labels added to {}".format(dataset_id) in out
diff --git a/samples/tests/test_delete_dataset.py b/samples/tests/test_delete_dataset.py
index 9347bf185..c2a77c475 100644
--- a/samples/tests/test_delete_dataset.py
+++ b/samples/tests/test_delete_dataset.py
@@ -21,7 +21,6 @@
def test_delete_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None:
-
delete_dataset.delete_dataset(dataset_id)
out, err = capsys.readouterr()
assert "Deleted dataset '{}'.".format(dataset_id) in out
diff --git a/samples/tests/test_delete_table.py b/samples/tests/test_delete_table.py
index aca2df62f..5ba5622e8 100644
--- a/samples/tests/test_delete_table.py
+++ b/samples/tests/test_delete_table.py
@@ -21,7 +21,6 @@
def test_delete_table(capsys: "pytest.CaptureFixture[str]", table_id: str) -> None:
-
delete_table.delete_table(table_id)
out, err = capsys.readouterr()
assert "Deleted table '{}'.".format(table_id) in out
diff --git a/samples/tests/test_download_public_data.py b/samples/tests/test_download_public_data.py
index 02c2c6f9c..4f6c02452 100644
--- a/samples/tests/test_download_public_data.py
+++ b/samples/tests/test_download_public_data.py
@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
-
import pytest
from .. import download_public_data
@@ -21,20 +19,9 @@
pytest.importorskip("google.cloud.bigquery_storage_v1")
-def test_download_public_data(
- caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str]
-) -> None:
- # Enable debug-level logging to verify the BigQuery Storage API is used.
- caplog.set_level(logging.DEBUG)
-
+def test_download_public_data(capsys: pytest.CaptureFixture[str]) -> None:
download_public_data.download_public_data()
out, _ = capsys.readouterr()
assert "year" in out
assert "gender" in out
assert "name" in out
-
- assert any(
- "Started reading table 'bigquery-public-data.usa_names.usa_1910_current' with BQ Storage API session"
- in message
- for message in caplog.messages
- )
diff --git a/samples/tests/test_download_public_data_sandbox.py b/samples/tests/test_download_public_data_sandbox.py
index e86f604ad..d3dd31a38 100644
--- a/samples/tests/test_download_public_data_sandbox.py
+++ b/samples/tests/test_download_public_data_sandbox.py
@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import logging
-
import pytest
from .. import download_public_data_sandbox
@@ -21,20 +19,9 @@
pytest.importorskip("google.cloud.bigquery_storage_v1")
-def test_download_public_data_sandbox(
- caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str]
-) -> None:
- # Enable debug-level logging to verify the BigQuery Storage API is used.
- caplog.set_level(logging.DEBUG)
-
+def test_download_public_data_sandbox(capsys: pytest.CaptureFixture[str]) -> None:
download_public_data_sandbox.download_public_data_sandbox()
- out, err = capsys.readouterr()
+ out, _ = capsys.readouterr()
assert "year" in out
assert "gender" in out
assert "name" in out
-
- assert any(
- # An anonymous table is used because this sample reads from query results.
- ("Started reading table" in message and "BQ Storage API session" in message)
- for message in caplog.messages
- )
diff --git a/samples/tests/test_get_dataset.py b/samples/tests/test_get_dataset.py
index 97b30541b..07c7a28b7 100644
--- a/samples/tests/test_get_dataset.py
+++ b/samples/tests/test_get_dataset.py
@@ -21,7 +21,6 @@
def test_get_dataset(capsys: "pytest.CaptureFixture[str]", dataset_id: str) -> None:
-
get_dataset.get_dataset(dataset_id)
out, err = capsys.readouterr()
assert dataset_id in out
diff --git a/samples/tests/test_get_table.py b/samples/tests/test_get_table.py
index e6383010f..edf09762d 100644
--- a/samples/tests/test_get_table.py
+++ b/samples/tests/test_get_table.py
@@ -25,7 +25,6 @@
def test_get_table(
capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client
) -> None:
-
schema = [
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
diff --git a/samples/tests/test_list_tables.py b/samples/tests/test_list_tables.py
index 7c726accc..c8a66b656 100644
--- a/samples/tests/test_list_tables.py
+++ b/samples/tests/test_list_tables.py
@@ -23,7 +23,6 @@
def test_list_tables(
capsys: "pytest.CaptureFixture[str]", dataset_id: str, table_id: str
) -> None:
-
list_tables.list_tables(dataset_id)
out, err = capsys.readouterr()
assert "Tables contained in '{}':".format(dataset_id) in out
diff --git a/samples/tests/test_load_table_clustered.py b/samples/tests/test_load_table_clustered.py
index bbf3c671f..89059271a 100644
--- a/samples/tests/test_load_table_clustered.py
+++ b/samples/tests/test_load_table_clustered.py
@@ -26,7 +26,6 @@ def test_load_table_clustered(
random_table_id: str,
client: "bigquery.Client",
) -> None:
-
table = load_table_clustered.load_table_clustered(random_table_id)
out, _ = capsys.readouterr()
diff --git a/samples/tests/test_load_table_dataframe.py b/samples/tests/test_load_table_dataframe.py
index 9a975493c..4aa872fa4 100644
--- a/samples/tests/test_load_table_dataframe.py
+++ b/samples/tests/test_load_table_dataframe.py
@@ -31,7 +31,6 @@ def test_load_table_dataframe(
client: "bigquery.Client",
random_table_id: str,
) -> None:
-
table = load_table_dataframe.load_table_dataframe(random_table_id)
out, _ = capsys.readouterr()
expected_column_names = [
diff --git a/samples/tests/test_load_table_uri_autodetect_csv.py b/samples/tests/test_load_table_uri_autodetect_csv.py
index c9b410850..46b593713 100644
--- a/samples/tests/test_load_table_uri_autodetect_csv.py
+++ b/samples/tests/test_load_table_uri_autodetect_csv.py
@@ -23,7 +23,6 @@
def test_load_table_uri_autodetect_csv(
capsys: "pytest.CaptureFixture[str]", random_table_id: str
) -> None:
-
load_table_uri_autodetect_csv.load_table_uri_autodetect_csv(random_table_id)
out, err = capsys.readouterr()
assert "Loaded 50 rows." in out
diff --git a/samples/tests/test_load_table_uri_autodetect_json.py b/samples/tests/test_load_table_uri_autodetect_json.py
index 2c68a13db..43bf4e1b3 100644
--- a/samples/tests/test_load_table_uri_autodetect_json.py
+++ b/samples/tests/test_load_table_uri_autodetect_json.py
@@ -23,7 +23,6 @@
def test_load_table_uri_autodetect_csv(
capsys: "pytest.CaptureFixture[str]", random_table_id: str
) -> None:
-
load_table_uri_autodetect_json.load_table_uri_autodetect_json(random_table_id)
out, err = capsys.readouterr()
assert "Loaded 50 rows." in out
diff --git a/samples/tests/test_load_table_uri_cmek.py b/samples/tests/test_load_table_uri_cmek.py
index 1eb873843..1ae8689f9 100644
--- a/samples/tests/test_load_table_uri_cmek.py
+++ b/samples/tests/test_load_table_uri_cmek.py
@@ -23,7 +23,6 @@
def test_load_table_uri_cmek(
capsys: "pytest.CaptureFixture[str]", random_table_id: str, kms_key_name: str
) -> None:
-
load_table_uri_cmek.load_table_uri_cmek(random_table_id, kms_key_name)
out, _ = capsys.readouterr()
assert "A table loaded with encryption configuration key" in out
diff --git a/samples/tests/test_load_table_uri_csv.py b/samples/tests/test_load_table_uri_csv.py
index a57224c84..8b4c733e8 100644
--- a/samples/tests/test_load_table_uri_csv.py
+++ b/samples/tests/test_load_table_uri_csv.py
@@ -23,7 +23,6 @@
def test_load_table_uri_csv(
capsys: "pytest.CaptureFixture[str]", random_table_id: str
) -> None:
-
load_table_uri_csv.load_table_uri_csv(random_table_id)
out, _ = capsys.readouterr()
assert "Loaded 50 rows." in out
diff --git a/samples/tests/test_load_table_uri_json.py b/samples/tests/test_load_table_uri_json.py
index 3ad0ce29b..751c3867a 100644
--- a/samples/tests/test_load_table_uri_json.py
+++ b/samples/tests/test_load_table_uri_json.py
@@ -23,7 +23,6 @@
def test_load_table_uri_json(
capsys: "pytest.CaptureFixture[str]", random_table_id: str
) -> None:
-
load_table_uri_json.load_table_uri_json(random_table_id)
out, _ = capsys.readouterr()
assert "Loaded 50 rows." in out
diff --git a/samples/tests/test_load_table_uri_orc.py b/samples/tests/test_load_table_uri_orc.py
index f31e8cabb..23d8288b7 100644
--- a/samples/tests/test_load_table_uri_orc.py
+++ b/samples/tests/test_load_table_uri_orc.py
@@ -23,7 +23,6 @@
def test_load_table_uri_orc(
capsys: "pytest.CaptureFixture[str]", random_table_id: str
) -> None:
-
load_table_uri_orc.load_table_uri_orc(random_table_id)
out, _ = capsys.readouterr()
assert "Loaded 50 rows." in out
diff --git a/samples/tests/test_load_table_uri_parquet.py b/samples/tests/test_load_table_uri_parquet.py
index 5404e8584..ee7682388 100644
--- a/samples/tests/test_load_table_uri_parquet.py
+++ b/samples/tests/test_load_table_uri_parquet.py
@@ -23,7 +23,6 @@
def test_load_table_uri_json(
capsys: "pytest.CaptureFixture[str]", random_table_id: str
) -> None:
-
load_table_uri_parquet.load_table_uri_parquet(random_table_id)
out, _ = capsys.readouterr()
assert "Loaded 50 rows." in out
diff --git a/samples/tests/test_query_external_gcs_temporary_table.py b/samples/tests/test_query_external_gcs_temporary_table.py
index 9590f3d7a..75b3ce6d8 100644
--- a/samples/tests/test_query_external_gcs_temporary_table.py
+++ b/samples/tests/test_query_external_gcs_temporary_table.py
@@ -23,7 +23,6 @@
def test_query_external_gcs_temporary_table(
capsys: "pytest.CaptureFixture[str]",
) -> None:
-
query_external_gcs_temporary_table.query_external_gcs_temporary_table()
out, err = capsys.readouterr()
assert "There are 4 states with names starting with W." in out
diff --git a/samples/tests/test_query_external_sheets_permanent_table.py b/samples/tests/test_query_external_sheets_permanent_table.py
index 851839054..1a4c21330 100644
--- a/samples/tests/test_query_external_sheets_permanent_table.py
+++ b/samples/tests/test_query_external_sheets_permanent_table.py
@@ -23,7 +23,6 @@
def test_query_external_sheets_permanent_table(
capsys: "pytest.CaptureFixture[str]", dataset_id: str
) -> None:
-
query_external_sheets_permanent_table.query_external_sheets_permanent_table(
dataset_id
)
diff --git a/samples/tests/test_query_external_sheets_temporary_table.py b/samples/tests/test_query_external_sheets_temporary_table.py
index 58e0cb394..2ada20566 100644
--- a/samples/tests/test_query_external_sheets_temporary_table.py
+++ b/samples/tests/test_query_external_sheets_temporary_table.py
@@ -23,7 +23,6 @@
def test_query_external_sheets_temporary_table(
capsys: "pytest.CaptureFixture[str]",
) -> None:
-
query_external_sheets_temporary_table.query_external_sheets_temporary_table()
out, err = capsys.readouterr()
assert "There are 2 states with names starting with W in the selected range." in out
diff --git a/samples/tests/test_query_no_cache.py b/samples/tests/test_query_no_cache.py
index f3fb039c9..fffa5dac7 100644
--- a/samples/tests/test_query_no_cache.py
+++ b/samples/tests/test_query_no_cache.py
@@ -22,7 +22,6 @@
def test_query_no_cache(capsys: "pytest.CaptureFixture[str]") -> None:
-
query_no_cache.query_no_cache()
out, err = capsys.readouterr()
assert re.search(r"(Row[\w(){}:', ]+)$", out)
diff --git a/samples/tests/test_query_pagination.py b/samples/tests/test_query_pagination.py
index daf711e49..adc946399 100644
--- a/samples/tests/test_query_pagination.py
+++ b/samples/tests/test_query_pagination.py
@@ -21,7 +21,6 @@
def test_query_pagination(capsys: "pytest.CaptureFixture[str]") -> None:
-
query_pagination.query_pagination()
out, _ = capsys.readouterr()
assert "The query data:" in out
diff --git a/samples/tests/test_query_script.py b/samples/tests/test_query_script.py
index 98dd1253b..50c973024 100644
--- a/samples/tests/test_query_script.py
+++ b/samples/tests/test_query_script.py
@@ -21,7 +21,6 @@
def test_query_script(capsys: "pytest.CaptureFixture[str]") -> None:
-
query_script.query_script()
out, _ = capsys.readouterr()
assert "Script created 2 child jobs." in out
diff --git a/samples/tests/test_query_to_arrow.py b/samples/tests/test_query_to_arrow.py
index d9b1aeb73..9fc8571e9 100644
--- a/samples/tests/test_query_to_arrow.py
+++ b/samples/tests/test_query_to_arrow.py
@@ -20,7 +20,6 @@
def test_query_to_arrow(capsys: "pytest.CaptureFixture[str]") -> None:
-
arrow_table = query_to_arrow.query_to_arrow()
out, err = capsys.readouterr()
assert "Downloaded 8 rows, 2 columns." in out
diff --git a/samples/tests/test_table_exists.py b/samples/tests/test_table_exists.py
index 7317ba747..35cf61cc8 100644
--- a/samples/tests/test_table_exists.py
+++ b/samples/tests/test_table_exists.py
@@ -25,7 +25,6 @@
def test_table_exists(
capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client
) -> None:
-
table_exists.table_exists(random_table_id)
out, err = capsys.readouterr()
assert "Table {} is not found.".format(random_table_id) in out
diff --git a/samples/tests/test_table_insert_rows.py b/samples/tests/test_table_insert_rows.py
index 59024fa95..13400d69c 100644
--- a/samples/tests/test_table_insert_rows.py
+++ b/samples/tests/test_table_insert_rows.py
@@ -27,7 +27,6 @@ def test_table_insert_rows(
random_table_id: str,
client: bigquery.Client,
) -> None:
-
schema = [
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
diff --git a/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py b/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py
index 00456ce84..c6bfbf392 100644
--- a/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py
+++ b/samples/tests/test_table_insert_rows_explicit_none_insert_ids.py
@@ -25,7 +25,6 @@
def test_table_insert_rows_explicit_none_insert_ids(
capsys: "pytest.CaptureFixture[str]", random_table_id: str, client: bigquery.Client
) -> None:
-
schema = [
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
diff --git a/samples/tests/test_update_dataset_access.py b/samples/tests/test_update_dataset_access.py
index 186a3b575..f17634fb0 100644
--- a/samples/tests/test_update_dataset_access.py
+++ b/samples/tests/test_update_dataset_access.py
@@ -23,7 +23,6 @@
def test_update_dataset_access(
capsys: "pytest.CaptureFixture[str]", dataset_id: str
) -> None:
-
update_dataset_access.update_dataset_access(dataset_id)
out, err = capsys.readouterr()
assert (
diff --git a/samples/tests/test_update_dataset_default_partition_expiration.py b/samples/tests/test_update_dataset_default_partition_expiration.py
index b7787dde3..4dd0d9296 100644
--- a/samples/tests/test_update_dataset_default_partition_expiration.py
+++ b/samples/tests/test_update_dataset_default_partition_expiration.py
@@ -23,7 +23,6 @@
def test_update_dataset_default_partition_expiration(
capsys: "pytest.CaptureFixture[str]", dataset_id: str
) -> None:
-
ninety_days_ms = 90 * 24 * 60 * 60 * 1000 # in milliseconds
update_dataset_default_partition_expiration.update_dataset_default_partition_expiration(
diff --git a/samples/tests/test_update_dataset_default_table_expiration.py b/samples/tests/test_update_dataset_default_table_expiration.py
index f780827f2..24df5446d 100644
--- a/samples/tests/test_update_dataset_default_table_expiration.py
+++ b/samples/tests/test_update_dataset_default_table_expiration.py
@@ -23,7 +23,6 @@
def test_update_dataset_default_table_expiration(
capsys: "pytest.CaptureFixture[str]", dataset_id: str
) -> None:
-
one_day_ms = 24 * 60 * 60 * 1000 # in milliseconds
update_dataset_default_table_expiration.update_dataset_default_table_expiration(
diff --git a/samples/tests/test_update_dataset_description.py b/samples/tests/test_update_dataset_description.py
index 5d1209e22..6d76337dc 100644
--- a/samples/tests/test_update_dataset_description.py
+++ b/samples/tests/test_update_dataset_description.py
@@ -23,7 +23,6 @@
def test_update_dataset_description(
capsys: "pytest.CaptureFixture[str]", dataset_id: str
) -> None:
-
update_dataset_description.update_dataset_description(dataset_id)
out, err = capsys.readouterr()
assert "Updated description." in out
diff --git a/samples/tests/test_update_table_require_partition_filter.py b/samples/tests/test_update_table_require_partition_filter.py
index 68e1c1e2b..c86a22769 100644
--- a/samples/tests/test_update_table_require_partition_filter.py
+++ b/samples/tests/test_update_table_require_partition_filter.py
@@ -27,7 +27,6 @@ def test_update_table_require_partition_filter(
random_table_id: str,
client: bigquery.Client,
) -> None:
-
# Make a partitioned table.
schema = [bigquery.SchemaField("transaction_timestamp", "TIMESTAMP")]
table = bigquery.Table(random_table_id, schema=schema)
diff --git a/samples/update_dataset_access.py b/samples/update_dataset_access.py
index fda784da5..2fb21aff2 100644
--- a/samples/update_dataset_access.py
+++ b/samples/update_dataset_access.py
@@ -14,7 +14,6 @@
def update_dataset_access(dataset_id: str) -> None:
-
# [START bigquery_update_dataset_access]
from google.cloud import bigquery
diff --git a/samples/update_dataset_default_partition_expiration.py b/samples/update_dataset_default_partition_expiration.py
index 37456f3a0..7a3ccaca3 100644
--- a/samples/update_dataset_default_partition_expiration.py
+++ b/samples/update_dataset_default_partition_expiration.py
@@ -14,7 +14,6 @@
def update_dataset_default_partition_expiration(dataset_id: str) -> None:
-
# [START bigquery_update_dataset_partition_expiration]
from google.cloud import bigquery
diff --git a/samples/update_dataset_default_table_expiration.py b/samples/update_dataset_default_table_expiration.py
index cf6f50d9f..ccd0d979e 100644
--- a/samples/update_dataset_default_table_expiration.py
+++ b/samples/update_dataset_default_table_expiration.py
@@ -14,7 +14,6 @@
def update_dataset_default_table_expiration(dataset_id: str) -> None:
-
# [START bigquery_update_dataset_expiration]
from google.cloud import bigquery
diff --git a/samples/update_dataset_description.py b/samples/update_dataset_description.py
index 98c5fed43..b12baa999 100644
--- a/samples/update_dataset_description.py
+++ b/samples/update_dataset_description.py
@@ -14,7 +14,6 @@
def update_dataset_description(dataset_id: str) -> None:
-
# [START bigquery_update_dataset_description]
from google.cloud import bigquery
diff --git a/samples/update_routine.py b/samples/update_routine.py
index 1a975a253..1a8908295 100644
--- a/samples/update_routine.py
+++ b/samples/update_routine.py
@@ -19,7 +19,6 @@
def update_routine(routine_id: str) -> "bigquery.Routine":
-
# [START bigquery_update_routine]
from google.cloud import bigquery
diff --git a/samples/update_table_require_partition_filter.py b/samples/update_table_require_partition_filter.py
index 8221238a7..40b739b76 100644
--- a/samples/update_table_require_partition_filter.py
+++ b/samples/update_table_require_partition_filter.py
@@ -14,7 +14,6 @@
def update_table_require_partition_filter(table_id: str) -> None:
-
# [START bigquery_update_table_require_partition_filter]
from google.cloud import bigquery
diff --git a/scripts/decrypt-secrets.sh b/scripts/decrypt-secrets.sh
index 21f6d2a26..120b0ddc4 100755
--- a/scripts/decrypt-secrets.sh
+++ b/scripts/decrypt-secrets.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# Copyright 2015 Google Inc. All rights reserved.
+# Copyright 2024 Google LLC All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
diff --git a/scripts/readme-gen/readme_gen.py b/scripts/readme-gen/readme_gen.py
index 91b59676b..8f5e248a0 100644
--- a/scripts/readme-gen/readme_gen.py
+++ b/scripts/readme-gen/readme_gen.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
-# Copyright 2016 Google Inc
+# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -33,17 +33,17 @@
autoescape=True,
)
-README_TMPL = jinja_env.get_template('README.tmpl.rst')
+README_TMPL = jinja_env.get_template("README.tmpl.rst")
def get_help(file):
- return subprocess.check_output(['python', file, '--help']).decode()
+ return subprocess.check_output(["python", file, "--help"]).decode()
def main():
parser = argparse.ArgumentParser()
- parser.add_argument('source')
- parser.add_argument('--destination', default='README.rst')
+ parser.add_argument("source")
+ parser.add_argument("--destination", default="README.rst")
args = parser.parse_args()
@@ -51,9 +51,9 @@ def main():
root = os.path.dirname(source)
destination = os.path.join(root, args.destination)
- jinja_env.globals['get_help'] = get_help
+ jinja_env.globals["get_help"] = get_help
- with io.open(source, 'r') as f:
+ with io.open(source, "r") as f:
config = yaml.load(f)
# This allows get_help to execute in the right directory.
@@ -61,9 +61,9 @@ def main():
output = README_TMPL.render(config)
- with io.open(destination, 'w') as f:
+ with io.open(destination, "w") as f:
f.write(output)
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/scripts/readme-gen/templates/install_deps.tmpl.rst b/scripts/readme-gen/templates/install_deps.tmpl.rst
index 6f069c6c8..f21db80c4 100644
--- a/scripts/readme-gen/templates/install_deps.tmpl.rst
+++ b/scripts/readme-gen/templates/install_deps.tmpl.rst
@@ -12,7 +12,7 @@ Install Dependencies
.. _Python Development Environment Setup Guide:
https://cloud.google.com/python/setup
-#. Create a virtualenv. Samples are compatible with Python 3.7+.
+#. Create a virtualenv. Samples are compatible with Python 3.9+.
.. code-block:: bash
diff --git a/setup.cfg b/setup.cfg
index 25892161f..d5e734f0f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
-# Copyright 2020 Google LLC
+# Copyright 2023 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,10 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# Generated by synthtool. DO NOT EDIT!
-[bdist_wheel]
-universal = 1
-
[pytype]
python_version = 3.8
inputs =
diff --git a/setup.py b/setup.py
index d8f2bb226..2ad29ecbf 100644
--- a/setup.py
+++ b/setup.py
@@ -12,118 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import io
-import os
+import setuptools # type: ignore
-import setuptools
-
-# Package metadata.
-
-name = "google-cloud-bigquery"
-description = "Google BigQuery API client library"
-
-# Should be one of:
-# 'Development Status :: 3 - Alpha'
-# 'Development Status :: 4 - Beta'
-# 'Development Status :: 5 - Production/Stable'
-release_status = "Development Status :: 5 - Production/Stable"
-dependencies = [
- "grpcio >= 1.47.0, < 2.0dev", # https://github.com/googleapis/python-bigquery/issues/1262
- # NOTE: Maintainers, please do not require google-api-core>=2.x.x
- # Until this issue is closed
- # https://github.com/googleapis/google-cloud-python/issues/10566
- "google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0",
- "google-cloud-bigquery-storage >= 2.0.0, <3.0.0dev",
- "proto-plus >= 1.22.0, <2.0.0dev",
- # NOTE: Maintainers, please do not require google-cloud-core>=2.x.x
- # Until this issue is closed
- # https://github.com/googleapis/google-cloud-python/issues/10566
- "google-cloud-core >= 1.4.1, <3.0.0dev",
- "google-resumable-media >= 0.6.0, < 3.0dev",
- "packaging >= 14.3, <22.0.0dev",
- "protobuf >= 3.19.0, <5.0.0dev", # For the legacy proto-based types.
- "python-dateutil >= 2.7.2, <3.0dev",
- "pyarrow >= 3.0.0, < 10.0dev",
- "requests >= 2.18.0, < 3.0.0dev",
-]
-extras = {
- # Keep the no-op bqstorage extra for backward compatibility.
- # See: https://github.com/googleapis/python-bigquery/issues/757
- "bqstorage": [],
- "pandas": ["pandas>=1.0.0", "db-dtypes>=0.3.0,<2.0.0dev"],
- "geopandas": ["geopandas>=0.9.0, <1.0dev", "Shapely>=1.6.0, <2.0dev"],
- "ipython": ["ipython>=7.0.1,!=8.1.0"],
- "tqdm": ["tqdm >= 4.7.4, <5.0.0dev"],
- "opentelemetry": [
- "opentelemetry-api >= 1.1.0",
- "opentelemetry-sdk >= 1.1.0",
- "opentelemetry-instrumentation >= 0.20b0",
- ],
-}
-
-all_extras = []
-
-for extra in extras:
- all_extras.extend(extras[extra])
-
-extras["all"] = all_extras
-
-# Setup boilerplate below this line.
-
-package_root = os.path.abspath(os.path.dirname(__file__))
-
-readme_filename = os.path.join(package_root, "README.rst")
-with io.open(readme_filename, encoding="utf-8") as readme_file:
- readme = readme_file.read()
-
-version = {}
-with open(os.path.join(package_root, "google/cloud/bigquery/version.py")) as fp:
- exec(fp.read(), version)
-version = version["__version__"]
-
-# Only include packages under the 'google' namespace. Do not include tests,
-# benchmarks, etc.
-packages = [
- package
- for package in setuptools.PEP420PackageFinder.find()
- if package.startswith("google")
-]
-
-# Determine which namespaces are needed.
-namespaces = ["google"]
-if "google.cloud" in packages:
- namespaces.append("google.cloud")
-
-
-setuptools.setup(
- name=name,
- version=version,
- description=description,
- long_description=readme,
- author="Google LLC",
- author_email="googleapis-packages@google.com",
- license="Apache 2.0",
- url="https://github.com/googleapis/python-bigquery",
- classifiers=[
- release_status,
- "Intended Audience :: Developers",
- "License :: OSI Approved :: Apache Software License",
- "Programming Language :: Python",
- "Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.7",
- "Programming Language :: Python :: 3.8",
- "Programming Language :: Python :: 3.9",
- "Programming Language :: Python :: 3.10",
- "Operating System :: OS Independent",
- "Topic :: Internet",
- ],
- platforms="Posix; MacOS X; Windows",
- packages=packages,
- namespace_packages=namespaces,
- install_requires=dependencies,
- extras_require=extras,
- python_requires=">=3.7, <3.11",
- include_package_data=True,
- zip_safe=False,
-)
+setuptools.setup()
diff --git a/testing/constraints-3.12.txt b/testing/constraints-3.12.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/testing/constraints-3.13.txt b/testing/constraints-3.13.txt
new file mode 100644
index 000000000..e69de29bb
diff --git a/testing/constraints-3.14.txt b/testing/constraints-3.14.txt
new file mode 100644
index 000000000..6bd20f5fb
--- /dev/null
+++ b/testing/constraints-3.14.txt
@@ -0,0 +1,2 @@
+# Constraints for Python 3.14
+grpcio >= 1.75.1
diff --git a/testing/constraints-3.7.txt b/testing/constraints-3.7.txt
deleted file mode 100644
index 67313f6b8..000000000
--- a/testing/constraints-3.7.txt
+++ /dev/null
@@ -1,27 +0,0 @@
-# This constraints file is used to check that lower bounds
-# are correct in setup.py
-# List *all* library dependencies and extras in this file.
-# Pin the version to the lower bound.
-#
-# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
-# Then this file should have foo==1.14.0
-db-dtypes==0.3.0
-geopandas==0.9.0
-google-api-core==1.31.5
-google-cloud-bigquery-storage==2.0.0
-google-cloud-core==1.4.1
-google-resumable-media==0.6.0
-grpcio==1.47.0
-ipython==7.0.1
-opentelemetry-api==1.1.0
-opentelemetry-instrumentation==0.20b0
-opentelemetry-sdk==1.1.0
-pandas==1.1.0
-proto-plus==1.22.0
-protobuf==3.19.0
-pyarrow==3.0.0
-python-dateutil==2.7.3
-requests==2.18.0
-Shapely==1.6.4.post2
-six==1.13.0
-tqdm==4.7.4
diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt
deleted file mode 100644
index e5e73c5c7..000000000
--- a/testing/constraints-3.8.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-grpcio==1.47.0
-pandas==1.2.0
diff --git a/testing/constraints-3.9.txt b/testing/constraints-3.9.txt
index d4c302867..f61c0cf09 100644
--- a/testing/constraints-3.9.txt
+++ b/testing/constraints-3.9.txt
@@ -4,5 +4,30 @@
#
# NOTE: Not comprehensive yet, will eventually be maintained semi-automatically by
# the renovate bot.
+bigquery-magics==0.6.0
+db-dtypes==1.0.4
+geopandas==0.9.0
+google-api-core==2.11.1
+google-auth==2.14.1
+google-cloud-bigquery-storage==2.18.0
+google-cloud-core==2.4.1
+google-resumable-media==2.0.0
grpcio==1.47.0
-pyarrow>=4.0.0
+grpcio==1.49.1; python_version >= '3.11'
+ipywidgets==7.7.1
+ipython==7.23.1
+ipykernel==6.2.0
+opentelemetry-api==1.1.0
+opentelemetry-instrumentation==0.20b0
+opentelemetry-sdk==1.1.0
+numpy==1.20.2
+packaging==24.2.0
+pandas==1.3.0
+pandas-gbq==0.26.1
+proto-plus==1.22.3
+protobuf==3.20.2
+pyarrow==4.0.0
+python-dateutil==2.8.2
+requests==2.21.0
+Shapely==1.8.4
+matplotlib==3.7.1
diff --git a/tests/data/scalars.csv b/tests/data/scalars.csv
new file mode 100644
index 000000000..7af97583f
--- /dev/null
+++ b/tests/data/scalars.csv
@@ -0,0 +1,2 @@
+"[2020-01-01, 2020-02-01)"
+
diff --git a/tests/data/scalars_schema_csv.json b/tests/data/scalars_schema_csv.json
new file mode 100644
index 000000000..82b878d95
--- /dev/null
+++ b/tests/data/scalars_schema_csv.json
@@ -0,0 +1,10 @@
+[
+ {
+ "mode" : "NULLABLE",
+ "name" : "range_date",
+ "type" : "RANGE",
+ "rangeElementType": {
+ "type": "DATE"
+ }
+ }
+ ]
\ No newline at end of file
diff --git a/tests/data/schema.json b/tests/data/schema.json
index 6a36e55e5..29542e82d 100644
--- a/tests/data/schema.json
+++ b/tests/data/schema.json
@@ -83,6 +83,14 @@
"mode" : "NULLABLE",
"name" : "FavoriteNumber",
"type" : "NUMERIC"
+ },
+ {
+ "mode" : "NULLABLE",
+ "name" : "TimeRange",
+ "type" : "RANGE",
+ "rangeElementType": {
+ "type": "DATETIME"
+ }
}
]
}
diff --git a/tests/system/conftest.py b/tests/system/conftest.py
index 784a1dd5c..8efa042af 100644
--- a/tests/system/conftest.py
+++ b/tests/system/conftest.py
@@ -96,12 +96,14 @@ def load_scalars_table(
project_id: str,
dataset_id: str,
data_path: str = "scalars.jsonl",
+ source_format=enums.SourceFormat.NEWLINE_DELIMITED_JSON,
+ schema_source="scalars_schema.json",
) -> str:
- schema = bigquery_client.schema_from_json(DATA_DIR / "scalars_schema.json")
+ schema = bigquery_client.schema_from_json(DATA_DIR / schema_source)
table_id = data_path.replace(".", "_") + hex(random.randrange(1000000))
job_config = bigquery.LoadJobConfig()
job_config.schema = schema
- job_config.source_format = enums.SourceFormat.NEWLINE_DELIMITED_JSON
+ job_config.source_format = source_format
full_table_id = f"{project_id}.{dataset_id}.{table_id}"
with open(DATA_DIR / data_path, "rb") as data_file:
job = bigquery_client.load_table_from_file(
@@ -151,6 +153,22 @@ def scalars_table_multi_location(
return request.param, full_table_id
+@pytest.fixture(scope="session")
+def scalars_table_csv(
+ bigquery_client: bigquery.Client, project_id: str, dataset_id: str
+):
+ full_table_id = load_scalars_table(
+ bigquery_client,
+ project_id,
+ dataset_id,
+ data_path="scalars.csv",
+ source_format=enums.SourceFormat.CSV,
+ schema_source="scalars_schema_csv.json",
+ )
+ yield full_table_id
+ bigquery_client.delete_table(full_table_id, not_found_ok=True)
+
+
@pytest.fixture
def test_table_name(request, replace_non_anum=re.compile(r"[^a-zA-Z0-9_]").sub):
return replace_non_anum("_", request.node.name)
diff --git a/tests/system/helpers.py b/tests/system/helpers.py
index 721f55040..7fd344eeb 100644
--- a/tests/system/helpers.py
+++ b/tests/system/helpers.py
@@ -25,6 +25,7 @@
_naive = datetime.datetime(2016, 12, 5, 12, 41, 9)
_naive_microseconds = datetime.datetime(2016, 12, 5, 12, 41, 9, 250000)
_stamp = "%s %s" % (_naive.date().isoformat(), _naive.time().isoformat())
+_date = _naive.date().isoformat()
_stamp_microseconds = _stamp + ".250000"
_zoned = _naive.replace(tzinfo=UTC)
_zoned_microseconds = _naive_microseconds.replace(tzinfo=UTC)
@@ -78,6 +79,10 @@
),
("SELECT ARRAY(SELECT STRUCT([1, 2]))", [{"_field_1": [1, 2]}]),
("SELECT ST_GeogPoint(1, 2)", "POINT(1 2)"),
+ (
+ "SELECT RANGE '[UNBOUNDED, %s)'" % _date,
+ {"start": None, "end": _naive.date()},
+ ),
]
diff --git a/tests/system/test_arrow.py b/tests/system/test_arrow.py
index 8b88b6844..f2aed656c 100644
--- a/tests/system/test_arrow.py
+++ b/tests/system/test_arrow.py
@@ -167,3 +167,59 @@ def test_arrow_extension_types_same_for_storage_and_REST_APIs_894(
b"ARROW:extension:name": b"google:sqlType:geography",
b"ARROW:extension:metadata": b'{"encoding": "WKT"}',
}
+
+
+def test_list_rows_range_csv(
+ bigquery_client: bigquery.Client,
+ scalars_table_csv: str,
+):
+ table_id = scalars_table_csv
+
+ schema = [
+ bigquery.SchemaField(
+ "range_date", enums.SqlTypeNames.RANGE, range_element_type="DATE"
+ ),
+ ]
+
+ arrow_table = bigquery_client.list_rows(
+ table_id,
+ selected_fields=schema,
+ ).to_arrow()
+
+ schema = arrow_table.schema
+
+ expected_type = pyarrow.struct(
+ [("start", pyarrow.date32()), ("end", pyarrow.date32())]
+ )
+
+ range_type = schema.field("range_date").type
+ assert range_type == expected_type
+
+
+def test_to_arrow_query_with_empty_results(bigquery_client):
+ """
+ JSON regression test for https://github.com/googleapis/python-bigquery/issues/1580.
+ """
+ job = bigquery_client.query(
+ """
+ select
+ 123 as int_col,
+ '' as string_col,
+ to_json('{}') as json_col,
+ struct(to_json('[]') as json_field, -1 as int_field) as struct_col,
+ [to_json('null')] as json_array_col,
+ from unnest([])
+ """
+ )
+ table = job.to_arrow()
+ assert list(table.column_names) == [
+ "int_col",
+ "string_col",
+ "json_col",
+ "struct_col",
+ "json_array_col",
+ ]
+ assert table.shape == (0, 5)
+ struct_type = table.field("struct_col").type
+ assert struct_type.get_field_index("json_field") == 0
+ assert struct_type.get_field_index("int_field") == 1
diff --git a/tests/system/test_client.py b/tests/system/test_client.py
index c99ee1c72..3d32a3634 100644
--- a/tests/system/test_client.py
+++ b/tests/system/test_client.py
@@ -13,6 +13,7 @@
# limitations under the License.
import base64
+import copy
import csv
import datetime
import decimal
@@ -24,6 +25,8 @@
import time
import unittest
import uuid
+import random
+import string
from typing import Optional
from google.api_core.exceptions import PreconditionFailed
@@ -35,21 +38,19 @@
from google.api_core.exceptions import InternalServerError
from google.api_core.exceptions import ServiceUnavailable
from google.api_core.exceptions import TooManyRequests
-from google.api_core.iam import Policy
from google.cloud import bigquery
from google.cloud.bigquery.dataset import Dataset
from google.cloud.bigquery.dataset import DatasetReference
from google.cloud.bigquery.table import Table
from google.cloud._helpers import UTC
from google.cloud.bigquery import dbapi, enums
-from google.cloud import bigquery_storage
from google.cloud import storage
from google.cloud.datacatalog_v1 import types as datacatalog_types
from google.cloud.datacatalog_v1 import PolicyTagManagerClient
+from google.cloud.resourcemanager_v3 import types as resourcemanager_types
+from google.cloud.resourcemanager_v3 import TagKeysClient, TagValuesClient
import psutil
import pytest
-import pyarrow
-import pyarrow.types
from test_utils.retry import RetryErrors
from test_utils.retry import RetryInstanceState
from test_utils.retry import RetryResult
@@ -73,6 +74,16 @@
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
]
+SCHEMA_PICOSECOND = [
+ bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
+ bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
+ bigquery.SchemaField(
+ "time_pico",
+ "TIMESTAMP",
+ mode="REQUIRED",
+ timestamp_precision=enums.TimestampPrecision.PICOSECOND,
+ ),
+]
CLUSTERING_SCHEMA = [
bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
@@ -96,6 +107,24 @@
],
),
]
+TABLE_CONSTRAINTS_SCHEMA = [
+ bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"),
+ bigquery.SchemaField("fk_id", "STRING", mode="REQUIRED"),
+]
+
+SOURCE_URIS_AVRO = [
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro",
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.avro",
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.avro",
+]
+SOURCE_URIS_PARQUET = [
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet",
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/b-twitter.parquet",
+ "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/c-twitter.parquet",
+]
+REFERENCE_FILE_SCHEMA_URI_AVRO = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.avro"
+REFERENCE_FILE_SCHEMA_URI_PARQUET = "gs://cloud-samples-data/bigquery/federated-formats-reference-file-schema/a-twitter.parquet"
+
# The VPC-SC team maintains a mirror of the GCS bucket used for code
# samples. The public bucket crosses the configured security boundary.
@@ -145,9 +174,12 @@ def setUpModule():
class TestBigQuery(unittest.TestCase):
def setUp(self):
self.to_delete = []
+ self.to_delete_tag_keys_values = []
def tearDown(self):
policy_tag_client = PolicyTagManagerClient()
+ tag_keys_client = TagKeysClient()
+ tag_values_client = TagValuesClient()
def _still_in_use(bad_request):
return any(
@@ -170,6 +202,18 @@ def _still_in_use(bad_request):
else:
doomed.delete()
+ # The TagKey cannot be deleted if it has any child TagValues.
+ for key_values in self.to_delete_tag_keys_values:
+ tag_key = key_values.pop()
+
+ # Delete tag values first
+ [
+ tag_values_client.delete_tag_value(name=tag_value.name).result()
+ for tag_value in key_values
+ ]
+
+ tag_keys_client.delete_tag_key(name=tag_key.name).result()
+
def test_get_service_account_email(self):
client = Config.CLIENT
@@ -216,6 +260,22 @@ def test_create_dataset(self):
self.assertTrue(_dataset_exists(dataset))
self.assertEqual(dataset.dataset_id, DATASET_ID)
self.assertEqual(dataset.project, Config.CLIENT.project)
+ self.assertIs(dataset.is_case_insensitive, False)
+
+ def test_create_dataset_case_sensitive(self):
+ DATASET_ID = _make_dataset_id("create_cs_dataset")
+ dataset = self.temp_dataset(DATASET_ID, is_case_insensitive=False)
+ self.assertIs(dataset.is_case_insensitive, False)
+
+ def test_create_dataset_case_insensitive(self):
+ DATASET_ID = _make_dataset_id("create_ci_dataset")
+ dataset = self.temp_dataset(DATASET_ID, is_case_insensitive=True)
+ self.assertIs(dataset.is_case_insensitive, True)
+
+ def test_create_dataset_max_time_travel_hours(self):
+ DATASET_ID = _make_dataset_id("create_ci_dataset")
+ dataset = self.temp_dataset(DATASET_ID, max_time_travel_hours=24 * 2)
+ self.assertEqual(int(dataset.max_time_travel_hours), 24 * 2)
def test_get_dataset(self):
dataset_id = _make_dataset_id("get_dataset")
@@ -244,30 +304,107 @@ def test_get_dataset(self):
self.assertEqual(got.friendly_name, "Friendly")
self.assertEqual(got.description, "Description")
+ def test_create_dataset_with_default_rounding_mode(self):
+ DATASET_ID = _make_dataset_id("create_dataset_rounding_mode")
+ dataset = self.temp_dataset(DATASET_ID, default_rounding_mode="ROUND_HALF_EVEN")
+
+ self.assertTrue(_dataset_exists(dataset))
+ self.assertEqual(dataset.default_rounding_mode, "ROUND_HALF_EVEN")
+
+ def _create_resource_tag_key_and_values(self, key, values):
+ tag_key_client = TagKeysClient()
+ tag_value_client = TagValuesClient()
+
+ tag_key_parent = f"projects/{Config.CLIENT.project}"
+ new_tag_key = resourcemanager_types.TagKey(
+ short_name=key, parent=tag_key_parent
+ )
+ tag_key = tag_key_client.create_tag_key(tag_key=new_tag_key).result()
+ self.to_delete_tag_keys_values.insert(0, [tag_key])
+
+ for value in values:
+ new_tag_value = resourcemanager_types.TagValue(
+ short_name=value, parent=tag_key.name
+ )
+ tag_value = tag_value_client.create_tag_value(
+ tag_value=new_tag_value
+ ).result()
+ self.to_delete_tag_keys_values[0].insert(0, tag_value)
+
def test_update_dataset(self):
dataset = self.temp_dataset(_make_dataset_id("update_dataset"))
self.assertTrue(_dataset_exists(dataset))
self.assertIsNone(dataset.friendly_name)
self.assertIsNone(dataset.description)
self.assertEqual(dataset.labels, {})
+ self.assertEqual(dataset.resource_tags, {})
+ self.assertIs(dataset.is_case_insensitive, False)
+
+ # This creates unique tag keys for each of test runnings for different Python versions
+ tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4))
+ tag_1 = f"env_{tag_postfix}"
+ tag_2 = f"component_{tag_postfix}"
+ tag_3 = f"project_{tag_postfix}"
+
+ # Tags need to be created before they can be used in a dataset.
+ self._create_resource_tag_key_and_values(tag_1, ["prod", "dev"])
+ self._create_resource_tag_key_and_values(tag_2, ["batch"])
+ self._create_resource_tag_key_and_values(tag_3, ["atlas"])
dataset.friendly_name = "Friendly"
dataset.description = "Description"
dataset.labels = {"priority": "high", "color": "blue"}
+ dataset.resource_tags = {
+ f"{Config.CLIENT.project}/{tag_1}": "prod",
+ f"{Config.CLIENT.project}/{tag_2}": "batch",
+ }
+ dataset.is_case_insensitive = True
ds2 = Config.CLIENT.update_dataset(
- dataset, ("friendly_name", "description", "labels")
+ dataset,
+ (
+ "friendly_name",
+ "description",
+ "labels",
+ "resource_tags",
+ "is_case_insensitive",
+ ),
)
self.assertEqual(ds2.friendly_name, "Friendly")
self.assertEqual(ds2.description, "Description")
self.assertEqual(ds2.labels, {"priority": "high", "color": "blue"})
+ self.assertEqual(
+ ds2.resource_tags,
+ {
+ f"{Config.CLIENT.project}/{tag_1}": "prod",
+ f"{Config.CLIENT.project}/{tag_2}": "batch",
+ },
+ )
+ self.assertIs(ds2.is_case_insensitive, True)
ds2.labels = {
"color": "green", # change
"shape": "circle", # add
"priority": None, # delete
}
- ds3 = Config.CLIENT.update_dataset(ds2, ["labels"])
+ ds2.resource_tags = {
+ f"{Config.CLIENT.project}/{tag_1}": "dev", # change
+ f"{Config.CLIENT.project}/{tag_3}": "atlas", # add
+ f"{Config.CLIENT.project}/{tag_2}": None, # delete
+ }
+ ds3 = Config.CLIENT.update_dataset(ds2, ["labels", "resource_tags"])
self.assertEqual(ds3.labels, {"color": "green", "shape": "circle"})
+ self.assertEqual(
+ ds3.resource_tags,
+ {
+ f"{Config.CLIENT.project}/{tag_1}": "dev",
+ f"{Config.CLIENT.project}/{tag_3}": "atlas",
+ },
+ )
+
+ # Remove all tags
+ ds3.resource_tags = None
+ ds4 = Config.CLIENT.update_dataset(ds3, ["resource_tags"])
+ self.assertEqual(ds4.resource_tags, {})
# If we try to update using d2 again, it will fail because the
# previous update changed the ETag.
@@ -314,56 +451,47 @@ def test_create_table(self):
self.assertTrue(_table_exists(table))
self.assertEqual(table.table_id, table_id)
- def test_create_table_with_policy(self):
- from google.cloud.bigquery.schema import PolicyTagList
-
- dataset = self.temp_dataset(_make_dataset_id("create_table_with_policy"))
- table_id = "test_table"
- policy_1 = PolicyTagList(
- names=[
- "projects/{}/locations/us/taxonomies/1/policyTags/2".format(
- Config.CLIENT.project
- ),
- ]
+ def test_create_tables_in_case_insensitive_dataset(self):
+ ci_dataset = self.temp_dataset(
+ _make_dataset_id("create_table"), is_case_insensitive=True
)
- policy_2 = PolicyTagList(
- names=[
- "projects/{}/locations/us/taxonomies/3/policyTags/4".format(
- Config.CLIENT.project
- ),
- ]
- )
-
- schema = [
- bigquery.SchemaField("full_name", "STRING", mode="REQUIRED"),
- bigquery.SchemaField(
- "secret_int", "INTEGER", mode="REQUIRED", policy_tags=policy_1
- ),
- ]
- table_arg = Table(dataset.table(table_id), schema=schema)
- self.assertFalse(_table_exists(table_arg))
+ table_arg = Table(ci_dataset.table("test_table2"), schema=SCHEMA)
+ tablemc_arg = Table(ci_dataset.table("Test_taBLe2")) # same name, in Mixed Case
table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
self.to_delete.insert(0, table)
- self.assertTrue(_table_exists(table))
- self.assertEqual(policy_1, table.schema[1].policy_tags)
+ self.assertTrue(_table_exists(table_arg))
+ self.assertTrue(_table_exists(tablemc_arg))
+ self.assertIs(ci_dataset.is_case_insensitive, True)
- # Amend the schema to replace the policy tags
- new_schema = table.schema[:]
- old_field = table.schema[1]
- new_schema[1] = bigquery.SchemaField(
- name=old_field.name,
- field_type=old_field.field_type,
- mode=old_field.mode,
- description=old_field.description,
- fields=old_field.fields,
- policy_tags=policy_2,
+ def test_create_tables_in_case_sensitive_dataset(self):
+ ci_dataset = self.temp_dataset(
+ _make_dataset_id("create_table"), is_case_insensitive=False
)
+ table_arg = Table(ci_dataset.table("test_table3"), schema=SCHEMA)
+ tablemc_arg = Table(ci_dataset.table("Test_taBLe3")) # same name, in Mixed Case
- table.schema = new_schema
- table2 = Config.CLIENT.update_table(table, ["schema"])
- self.assertEqual(policy_2, table2.schema[1].policy_tags)
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ self.assertTrue(_table_exists(table_arg))
+ self.assertFalse(_table_exists(tablemc_arg))
+ self.assertIs(ci_dataset.is_case_insensitive, False)
+
+ def test_create_tables_in_default_sensitivity_dataset(self):
+ dataset = self.temp_dataset(_make_dataset_id("create_table"))
+ table_arg = Table(dataset.table("test_table4"), schema=SCHEMA)
+ tablemc_arg = Table(
+ dataset.table("Test_taBLe4")
+ ) # same name, in MC (Mixed Case)
+
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ self.assertTrue(_table_exists(table_arg))
+ self.assertFalse(_table_exists(tablemc_arg))
+ self.assertIs(dataset.is_case_insensitive, False)
def test_create_table_with_real_custom_policy(self):
from google.cloud.bigquery.schema import PolicyTagList
@@ -427,6 +555,68 @@ def test_create_table_with_real_custom_policy(self):
list(table.schema[1].policy_tags.names), [child_policy_tag.name]
)
+ def test_create_table_with_default_value_expression(self):
+ dataset = self.temp_dataset(
+ _make_dataset_id("create_table_with_default_value_expression")
+ )
+
+ table_id = "test_table"
+ timestamp_field_name = "timestamp_field_with_default_value_expression"
+
+ string_default_val_expression = "'FOO'"
+ timestamp_default_val_expression = "CURRENT_TIMESTAMP"
+
+ schema = [
+ bigquery.SchemaField(
+ "username",
+ "STRING",
+ default_value_expression=string_default_val_expression,
+ ),
+ bigquery.SchemaField(
+ timestamp_field_name,
+ "TIMESTAMP",
+ default_value_expression=timestamp_default_val_expression,
+ ),
+ ]
+ table_arg = Table(dataset.table(table_id), schema=schema)
+ self.assertFalse(_table_exists(table_arg))
+
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ self.assertTrue(_table_exists(table))
+
+ # Fetch the created table and its metadata to verify that the default
+ # value expression is assigned to fields
+ remote_table = Config.CLIENT.get_table(table)
+ remote_schema = remote_table.schema
+ self.assertEqual(remote_schema, schema)
+
+ for field in remote_schema:
+ if field.name == string_default_val_expression:
+ self.assertEqual("'FOO'", field.default_value_expression)
+ if field.name == timestamp_default_val_expression:
+ self.assertEqual("CURRENT_TIMESTAMP", field.default_value_expression)
+
+ # Insert rows into the created table to verify default values are populated
+ # when value is not provided
+ NOW_SECONDS = 1448911495.484366
+ NOW = datetime.datetime.utcfromtimestamp(NOW_SECONDS).replace(tzinfo=UTC)
+
+ # Rows to insert. Row #1 will have default `TIMESTAMP` defaultValueExpression CURRENT_TIME
+ # Row #2 will have default `STRING` defaultValueExpression "'FOO"
+ ROWS = [{"username": "john_doe"}, {timestamp_field_name: NOW}]
+
+ errors = Config.CLIENT.insert_rows(table, ROWS)
+ self.assertEqual(len(errors), 0)
+
+ # Get list of inserted rows
+ row_1, row_2 = [row for row in list(Config.CLIENT.list_rows(table))]
+
+ # Assert that row values are populated with default value expression
+ self.assertIsInstance(row_1.get(timestamp_field_name), datetime.datetime)
+ self.assertEqual("FOO", row_2.get("username"))
+
def test_create_table_w_time_partitioning_w_clustering_fields(self):
from google.cloud.bigquery.table import TimePartitioning
from google.cloud.bigquery.table import TimePartitioningType
@@ -451,6 +641,19 @@ def test_create_table_w_time_partitioning_w_clustering_fields(self):
self.assertEqual(time_partitioning.field, "transaction_time")
self.assertEqual(table.clustering_fields, ["user_email", "store_code"])
+ def test_create_table_w_picosecond_timestamp(self):
+ dataset = self.temp_dataset(_make_dataset_id("create_table"))
+ table_id = "test_table"
+ table_arg = Table(dataset.table(table_id), schema=SCHEMA_PICOSECOND)
+ self.assertFalse(_table_exists(table_arg))
+
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+
+ self.assertTrue(_table_exists(table))
+ self.assertEqual(table.table_id, table_id)
+ self.assertEqual(table.schema, SCHEMA_PICOSECOND)
+
def test_delete_dataset_with_string(self):
dataset_id = _make_dataset_id("delete_table_true_with_string")
project = Config.CLIENT.project
@@ -556,6 +759,16 @@ def test_list_tables(self):
def test_update_table(self):
dataset = self.temp_dataset(_make_dataset_id("update_table"))
+ # This creates unique tag keys for each of test runnings for different Python versions
+ tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4))
+ tag_1 = f"owner_{tag_postfix}"
+ tag_2 = f"classification_{tag_postfix}"
+ tag_3 = f"env_{tag_postfix}"
+
+ self._create_resource_tag_key_and_values(tag_1, ["Alice", "Bob"])
+ self._create_resource_tag_key_and_values(tag_2, ["public"])
+ self._create_resource_tag_key_and_values(tag_3, ["dev"])
+
TABLE_NAME = "test_table"
table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA)
self.assertFalse(_table_exists(table_arg))
@@ -568,14 +781,25 @@ def test_update_table(self):
table.friendly_name = "Friendly"
table.description = "Description"
table.labels = {"priority": "high", "color": "blue"}
+ table.resource_tags = {
+ f"{Config.CLIENT.project}/{tag_1}": "Alice",
+ f"{Config.CLIENT.project}/{tag_3}": "dev",
+ }
table2 = Config.CLIENT.update_table(
- table, ["friendly_name", "description", "labels"]
+ table, ["friendly_name", "description", "labels", "resource_tags"]
)
self.assertEqual(table2.friendly_name, "Friendly")
self.assertEqual(table2.description, "Description")
self.assertEqual(table2.labels, {"priority": "high", "color": "blue"})
+ self.assertEqual(
+ table2.resource_tags,
+ {
+ f"{Config.CLIENT.project}/{tag_1}": "Alice",
+ f"{Config.CLIENT.project}/{tag_3}": "dev",
+ },
+ )
table2.description = None
table2.labels = {
@@ -583,9 +807,28 @@ def test_update_table(self):
"shape": "circle", # add
"priority": None, # delete
}
- table3 = Config.CLIENT.update_table(table2, ["description", "labels"])
+ table2.resource_tags = {
+ f"{Config.CLIENT.project}/{tag_1}": "Bob", # change
+ f"{Config.CLIENT.project}/{tag_2}": "public", # add
+ f"{Config.CLIENT.project}/{tag_3}": None, # delete
+ }
+ table3 = Config.CLIENT.update_table(
+ table2, ["description", "labels", "resource_tags"]
+ )
self.assertIsNone(table3.description)
self.assertEqual(table3.labels, {"color": "green", "shape": "circle"})
+ self.assertEqual(
+ table3.resource_tags,
+ {
+ f"{Config.CLIENT.project}/{tag_1}": "Bob",
+ f"{Config.CLIENT.project}/{tag_2}": "public",
+ },
+ )
+
+ # Delete resource tag bindings.
+ table3.resource_tags = None
+ table4 = Config.CLIENT.update_table(table3, ["resource_tags"])
+ self.assertEqual(table4.resource_tags, {})
# If we try to update using table2 again, it will fail because the
# previous update changed the ETag.
@@ -685,6 +928,126 @@ def test_update_table_clustering_configuration(self):
table3 = Config.CLIENT.update_table(table2, ["clustering_fields"])
self.assertIsNone(table3.clustering_fields, None)
+ def test_update_table_constraints(self):
+ from google.cloud.bigquery.table import TableConstraints
+ from google.cloud.bigquery.table import (
+ PrimaryKey,
+ ForeignKey,
+ TableReference,
+ ColumnReference,
+ )
+
+ dataset = self.temp_dataset(_make_dataset_id("update_table"))
+
+ TABLE_NAME = "test_table"
+ table_arg = Table(dataset.table(TABLE_NAME), schema=TABLE_CONSTRAINTS_SCHEMA)
+ self.assertFalse(_table_exists(table_arg))
+
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+ self.assertTrue(_table_exists(table))
+
+ REFERENCE_TABLE_NAME = "test_table2"
+ reference_table_arg = Table(
+ dataset.table(REFERENCE_TABLE_NAME),
+ schema=[
+ bigquery.SchemaField("id", "INTEGER", mode="REQUIRED"),
+ ],
+ )
+ reference_table = helpers.retry_403(Config.CLIENT.create_table)(
+ reference_table_arg
+ )
+ self.to_delete.insert(0, reference_table)
+ self.assertTrue(_table_exists(reference_table))
+
+ reference_table.table_constraints = TableConstraints(
+ primary_key=PrimaryKey(columns=["id"]), foreign_keys=None
+ )
+ reference_table2 = Config.CLIENT.update_table(
+ reference_table, ["table_constraints"]
+ )
+ self.assertEqual(
+ reference_table2.table_constraints.primary_key,
+ reference_table.table_constraints.primary_key,
+ )
+
+ table_constraints = TableConstraints(
+ primary_key=PrimaryKey(columns=["id"]),
+ foreign_keys=[
+ ForeignKey(
+ name="fk_id",
+ referenced_table=TableReference(dataset, "test_table2"),
+ column_references=[
+ ColumnReference(referencing_column="id", referenced_column="id")
+ ],
+ ),
+ ],
+ )
+
+ table.table_constraints = table_constraints
+ table2 = Config.CLIENT.update_table(table, ["table_constraints"])
+ self.assertEqual(
+ table2.table_constraints,
+ table_constraints,
+ )
+
+ table2.table_constraints = None
+ table3 = Config.CLIENT.update_table(table2, ["table_constraints"])
+ self.assertIsNone(table3.table_constraints, None)
+
+ reference_table2.table_constraints = None
+ reference_table3 = Config.CLIENT.update_table(
+ reference_table2, ["table_constraints"]
+ )
+ self.assertIsNone(reference_table3.table_constraints, None)
+
+ def test_update_table_autodetect_schema(self):
+ dataset = self.temp_dataset(_make_dataset_id("bq_update_table_test"))
+
+ # Create an external table, restrict schema to one field
+ TABLE_NAME = "test_table"
+ set_schema = [bigquery.SchemaField("username", "STRING", mode="NULLABLE")]
+ table_arg = Table(dataset.table(TABLE_NAME))
+
+ # Create an external_config and include it in the table arguments
+ external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO)
+ external_config.source_uris = SOURCE_URIS_AVRO
+ external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO
+ external_config.schema = set_schema
+ table_arg.external_data_configuration = external_config
+
+ self.assertFalse(_table_exists(table_arg))
+
+ table = helpers.retry_403(Config.CLIENT.create_table)(table_arg)
+ self.to_delete.insert(0, table)
+ self.assertTrue(_table_exists(table))
+
+ self.assertEqual(table.schema, set_schema)
+
+ # Update table with schema autodetection
+ updated_table_arg = Table(dataset.table(TABLE_NAME))
+
+ # Update the external_config and include it in the table arguments
+ updated_external_config = copy.deepcopy(external_config)
+ updated_external_config.autodetect = True
+ updated_external_config.schema = None
+ updated_table_arg.external_data_configuration = updated_external_config
+
+ # PATCH call with autodetect_schema=True to trigger schema inference
+ updated_table = Config.CLIENT.update_table(
+ updated_table_arg, ["external_data_configuration"], autodetect_schema=True
+ )
+
+ # The updated table should have a schema inferred from the reference
+ # file, which has all four fields.
+ expected_schema = [
+ bigquery.SchemaField("username", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"),
+ ]
+ self.assertEqual(updated_table.schema, expected_schema)
+
@staticmethod
def _fetch_single_page(table, selected_fields=None):
iterator = Config.CLIENT.list_rows(table, selected_fields=selected_fields)
@@ -893,6 +1256,45 @@ def test_load_table_from_json_schema_autodetect(self):
self.assertEqual(tuple(table.schema), table_schema)
self.assertEqual(table.num_rows, 2)
+ # Autodetect makes best effort to infer the schema, but situations exist
+ # when the detected schema is wrong, and does not match existing schema.
+ # Thus the client sets autodetect = False when table exists and just uses
+ # the existing schema. This test case uses a special case where backend has
+ # no way to distinguish int from string.
+ def test_load_table_from_json_schema_autodetect_table_exists(self):
+ json_rows = [
+ {"name": "123", "age": 18, "birthday": "2001-10-15", "is_awesome": False},
+ {"name": "456", "age": 79, "birthday": "1940-03-10", "is_awesome": True},
+ ]
+
+ dataset_id = _make_dataset_id("bq_system_test")
+ self.temp_dataset(dataset_id)
+ table_id = "{}.{}.load_table_from_json_basic_use".format(
+ Config.CLIENT.project, dataset_id
+ )
+
+ # Use schema with NULLABLE fields, because schema autodetection
+ # defaults to field mode NULLABLE.
+ table_schema = (
+ bigquery.SchemaField("name", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("age", "INTEGER", mode="NULLABLE"),
+ bigquery.SchemaField("birthday", "DATE", mode="NULLABLE"),
+ bigquery.SchemaField("is_awesome", "BOOLEAN", mode="NULLABLE"),
+ )
+ # create the table before loading so that the column order is predictable
+ table = helpers.retry_403(Config.CLIENT.create_table)(
+ Table(table_id, schema=table_schema)
+ )
+ self.to_delete.insert(0, table)
+
+ # do not pass an explicit job config to trigger automatic schema detection
+ load_job = Config.CLIENT.load_table_from_json(json_rows, table_id)
+ load_job.result()
+
+ table = Config.CLIENT.get_table(table)
+ self.assertEqual(tuple(table.schema), table_schema)
+ self.assertEqual(table.num_rows, 2)
+
def test_load_avro_from_uri_then_dump_table(self):
from google.cloud.bigquery.job import CreateDisposition
from google.cloud.bigquery.job import SourceFormat
@@ -1052,6 +1454,195 @@ def test_load_table_from_file_w_explicit_location(self):
table_ref, "gs://{}/letters-us.csv".format(bucket_name), location="US"
).result()
+ def test_create_external_table_with_reference_file_schema_uri_avro(self):
+ client = Config.CLIENT
+ dataset_id = _make_dataset_id("external_reference_file_avro")
+ self.temp_dataset(dataset_id)
+ dataset_ref = bigquery.DatasetReference(client.project, dataset_id)
+ table_id = "test_ref_file_avro"
+ table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id)
+
+ expected_schema = [
+ bigquery.SchemaField("username", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"),
+ ]
+
+ # By default, the table should have the c-twitter schema because it is lexicographically last
+ # in the `SOURCE_URIs` list:
+ # a-twitter schema: (username, tweet, timestamp, likes)
+ # b-twitter schema: (username, tweet, timestamp)
+ # c-twitter schema: (username, tweet)
+
+ # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema
+
+ # Create external data configuration
+ external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.AVRO)
+ external_config.source_uris = SOURCE_URIS_AVRO
+ external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO
+
+ table = bigquery.Table(table_ref)
+ table.external_data_configuration = external_config
+
+ table = client.create_table(table)
+
+ # Get table created by the create_table API call
+ generated_table = client.get_table(table_ref)
+
+ self.assertEqual(generated_table.schema, expected_schema)
+ self.assertEqual(
+ generated_table.external_data_configuration._properties[
+ "referenceFileSchemaUri"
+ ],
+ REFERENCE_FILE_SCHEMA_URI_AVRO,
+ )
+
+ # Clean up test
+ self.to_delete.insert(0, generated_table)
+
+ def test_load_table_from_uri_with_reference_file_schema_uri_avro(self):
+ dataset_id = _make_dataset_id("test_reference_file_avro")
+ self.temp_dataset(dataset_id)
+ client = Config.CLIENT
+ dataset_ref = bigquery.DatasetReference(client.project, dataset_id)
+ table_id = "test_ref_file_avro"
+ table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id)
+
+ expected_schema = [
+ bigquery.SchemaField("username", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"),
+ ]
+
+ # By default, the table should have the c-twitter schema because it is lexicographically last
+ # in the `SOURCE_URIS` list:
+ # a-twitter schema: (username, tweet, timestamp, likes)
+ # b-twitter schema: (username, tweet, timestamp)
+ # c-twitter schema: (username, tweet)
+
+ # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema
+
+ # Create load job configuration
+ load_job_config = bigquery.LoadJobConfig(
+ source_format=bigquery.SourceFormat.AVRO
+ )
+ load_job_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_AVRO
+
+ load_job = client.load_table_from_uri(
+ source_uris=SOURCE_URIS_AVRO,
+ destination=table_ref,
+ job_config=load_job_config,
+ )
+ # Wait for load job to complete
+ result = load_job.result()
+
+ # Get table created by the load job
+ generated_table = client.get_table(table_ref)
+ self.assertEqual(generated_table.schema, expected_schema)
+ self.assertEqual(
+ result._properties["configuration"]["load"]["referenceFileSchemaUri"],
+ REFERENCE_FILE_SCHEMA_URI_AVRO,
+ )
+
+ # Clean up test
+ self.to_delete.insert(0, generated_table)
+
+ def test_create_external_table_with_reference_file_schema_uri_parquet(self):
+ client = Config.CLIENT
+ dataset_id = _make_dataset_id("external_table_ref_file_parquet")
+ self.temp_dataset(dataset_id)
+ dataset_ref = bigquery.DatasetReference(client.project, dataset_id)
+ table_id = "test_ref_file_parquet"
+ table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id)
+
+ expected_schema = [
+ bigquery.SchemaField("username", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"),
+ ]
+
+ # By default, the table should have the c-twitter schema because it is lexicographically last
+ # in the `SOURCE_URIS` list:
+ # a-twitter schema: (username, tweet, timestamp, likes)
+ # b-twitter schema: (username, tweet, timestamp)
+ # c-twitter schema: (username, tweet)
+
+ # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema
+
+ # Create external data configuration
+ external_config = bigquery.ExternalConfig(bigquery.ExternalSourceFormat.PARQUET)
+ external_config.source_uris = SOURCE_URIS_PARQUET
+ external_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_PARQUET
+
+ table = bigquery.Table(table_ref)
+ table.external_data_configuration = external_config
+
+ table = client.create_table(table)
+
+ # Get table created by the create_table API call
+ generated_table = client.get_table(table_ref)
+ self.assertEqual(generated_table.schema, expected_schema)
+ self.assertEqual(
+ generated_table.external_data_configuration._properties[
+ "referenceFileSchemaUri"
+ ],
+ REFERENCE_FILE_SCHEMA_URI_PARQUET,
+ )
+
+ # Clean up test
+ self.to_delete.insert(0, generated_table)
+
+ def test_load_table_from_uri_with_reference_file_schema_uri_parquet(self):
+ dataset_id = _make_dataset_id("test_reference_file_parquet")
+ self.temp_dataset(dataset_id)
+ client = Config.CLIENT
+ dataset_ref = bigquery.DatasetReference(client.project, dataset_id)
+ table_id = "test_ref_file_parquet"
+ table_ref = bigquery.TableReference(dataset_ref=dataset_ref, table_id=table_id)
+
+ expected_schema = [
+ bigquery.SchemaField("username", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("tweet", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("timestamp", "STRING", mode="NULLABLE"),
+ bigquery.SchemaField("likes", "INTEGER", mode="NULLABLE"),
+ ]
+
+ # By default, the table should have the c-twitter schema because it is lexicographically last
+ # in the `SOURCE_URIS` list:
+ # a-twitter schema: (username, tweet, timestamp, likes)
+ # b-twitter schema: (username, tweet, timestamp)
+ # c-twitter schema: (username, tweet)
+
+ # Because `referenceFileSchemaUri` is set as a-twitter, the table will have a-twitter schema
+
+ # Create load job configuration
+ load_job_config = bigquery.LoadJobConfig(
+ source_format=bigquery.SourceFormat.PARQUET
+ )
+ load_job_config.reference_file_schema_uri = REFERENCE_FILE_SCHEMA_URI_PARQUET
+
+ load_job = client.load_table_from_uri(
+ source_uris=SOURCE_URIS_PARQUET,
+ destination=table_ref,
+ job_config=load_job_config,
+ )
+ # Wait for load job to complete
+ result = load_job.result()
+
+ # Get table created by the load job
+ generated_table = client.get_table(table_ref)
+ self.assertEqual(generated_table.schema, expected_schema)
+ self.assertEqual(
+ result._properties["configuration"]["load"]["referenceFileSchemaUri"],
+ REFERENCE_FILE_SCHEMA_URI_PARQUET,
+ )
+
+ # Clean up test
+ self.to_delete.insert(0, generated_table)
+
def _write_csv_to_storage(self, bucket_name, blob_name, header_row, data_rows):
from google.cloud._testing import _NamedTemporaryFile
@@ -1137,8 +1728,6 @@ def test_extract_table(self):
self.assertIn("Bharney Rhubble", got)
def test_copy_table(self):
- pytest.skip("b/210907595: copy fails for shakespeare table")
-
# If we create a new table to copy from, the test won't work
# because the new rows will be stored in the streaming buffer,
# and copy jobs don't read the streaming buffer.
@@ -1158,33 +1747,6 @@ def test_copy_table(self):
got_rows = self._fetch_single_page(dest_table)
self.assertTrue(len(got_rows) > 0)
- def test_get_set_iam_policy(self):
- from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE
-
- dataset = self.temp_dataset(_make_dataset_id("create_table"))
- table_id = "test_table"
- table_ref = Table(dataset.table(table_id))
- self.assertFalse(_table_exists(table_ref))
-
- table = helpers.retry_403(Config.CLIENT.create_table)(table_ref)
- self.to_delete.insert(0, table)
-
- self.assertTrue(_table_exists(table))
-
- member = "serviceAccount:{}".format(Config.CLIENT.get_service_account_email())
- BINDING = {
- "role": BIGQUERY_DATA_VIEWER_ROLE,
- "members": {member},
- }
-
- policy = Config.CLIENT.get_iam_policy(table)
- self.assertIsInstance(policy, Policy)
- self.assertEqual(policy.bindings, [])
-
- policy.bindings.append(BINDING)
- returned_policy = Config.CLIENT.set_iam_policy(table, policy)
- self.assertEqual(returned_policy.bindings, policy.bindings)
-
def test_test_iam_permissions(self):
dataset = self.temp_dataset(_make_dataset_id("create_table"))
table_id = "test_table"
@@ -1474,6 +2036,9 @@ def test_dbapi_fetchall_from_script(self):
self.assertEqual(row_tuples, [(5, "foo"), (6, "bar"), (7, "baz")])
def test_dbapi_fetch_w_bqstorage_client_large_result_set(self):
+ bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")
+ pytest.importorskip("pyarrow")
+
bqstorage_client = bigquery_storage.BigQueryReadClient(
credentials=Config.CLIENT._credentials
)
@@ -1481,38 +2046,53 @@ def test_dbapi_fetch_w_bqstorage_client_large_result_set(self):
cursor.execute(
"""
- SELECT id, `by`, time_ts
- FROM `bigquery-public-data.hacker_news.comments`
+ SELECT id, `by`, timestamp
+ FROM `bigquery-public-data.hacker_news.full`
ORDER BY `id` ASC
LIMIT 100000
"""
)
result_rows = [cursor.fetchone(), cursor.fetchone(), cursor.fetchone()]
-
field_name = operator.itemgetter(0)
fetched_data = [sorted(row.items(), key=field_name) for row in result_rows]
-
# Since DB API is not thread safe, only a single result stream should be
# requested by the BQ storage client, meaning that results should arrive
# in the sorted order.
+
expected_data = [
[
- ("by", "sama"),
- ("id", 15),
- ("time_ts", datetime.datetime(2006, 10, 9, 19, 51, 1, tzinfo=UTC)),
+ ("by", "pg"),
+ ("id", 1),
+ (
+ "timestamp",
+ datetime.datetime(
+ 2006, 10, 9, 18, 21, 51, tzinfo=datetime.timezone.utc
+ ),
+ ),
],
[
- ("by", "pg"),
- ("id", 17),
- ("time_ts", datetime.datetime(2006, 10, 9, 19, 52, 45, tzinfo=UTC)),
+ ("by", "phyllis"),
+ ("id", 2),
+ (
+ "timestamp",
+ datetime.datetime(
+ 2006, 10, 9, 18, 30, 28, tzinfo=datetime.timezone.utc
+ ),
+ ),
],
[
- ("by", "pg"),
- ("id", 22),
- ("time_ts", datetime.datetime(2006, 10, 10, 2, 18, 22, tzinfo=UTC)),
+ ("by", "phyllis"),
+ ("id", 3),
+ (
+ "timestamp",
+ datetime.datetime(
+ 2006, 10, 9, 18, 40, 33, tzinfo=datetime.timezone.utc
+ ),
+ ),
],
]
+
self.assertEqual(fetched_data, expected_data)
def test_dbapi_dry_run_query(self):
@@ -1532,6 +2112,7 @@ def test_dbapi_dry_run_query(self):
self.assertEqual(list(rows), [])
def test_dbapi_connection_does_not_leak_sockets(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
current_process = psutil.Process()
conn_count_start = len(current_process.connections())
@@ -1541,8 +2122,8 @@ def test_dbapi_connection_does_not_leak_sockets(self):
cursor.execute(
"""
- SELECT id, `by`, time_ts
- FROM `bigquery-public-data.hacker_news.comments`
+ SELECT id, `by`, timestamp
+ FROM `bigquery-public-data.hacker_news.full`
ORDER BY `id` ASC
LIMIT 100000
"""
@@ -1753,13 +2334,18 @@ def test_insert_rows_nested_nested(self):
),
],
),
+ SF("json_col", "JSON"),
]
record = {
"nested_string": "another string value",
"nested_repeated": [0, 1, 2],
"nested_record": {"nested_nested_string": "some deep insight"},
}
- to_insert = [("Some value", record)]
+ json_record = {
+ "json_array": [1, 2, 3],
+ "json_object": {"alpha": "abc", "num": 123},
+ }
+ to_insert = [("Some value", record, json_record)]
table_id = "test_table"
dataset = self.temp_dataset(_make_dataset_id("issue_2951"))
table_arg = Table(dataset.table(table_id), schema=schema)
@@ -1854,6 +2440,44 @@ def test_create_routine(self):
assert len(rows) == 1
assert rows[0].max_value == 100.0
+ def test_create_routine_with_range(self):
+ routine_name = "routine_range"
+ dataset = self.temp_dataset(_make_dataset_id("routine_range"))
+
+ routine = bigquery.Routine(
+ dataset.routine(routine_name),
+ type_="SCALAR_FUNCTION",
+ language="SQL",
+ body="RANGE_START(x)",
+ arguments=[
+ bigquery.RoutineArgument(
+ name="x",
+ data_type=bigquery.StandardSqlDataType(
+ type_kind=bigquery.StandardSqlTypeNames.RANGE,
+ range_element_type=bigquery.StandardSqlDataType(
+ type_kind=bigquery.StandardSqlTypeNames.DATE
+ ),
+ ),
+ )
+ ],
+ return_type=bigquery.StandardSqlDataType(
+ type_kind=bigquery.StandardSqlTypeNames.DATE
+ ),
+ )
+
+ query_string = (
+ "SELECT `{}`(RANGE '[2016-08-12, UNBOUNDED)') as range_start;".format(
+ str(routine.reference)
+ )
+ )
+
+ routine = helpers.retry_403(Config.CLIENT.create_routine)(routine)
+ query_job = helpers.retry_403(Config.CLIENT.query)(query_string)
+ rows = list(query_job.result())
+
+ assert len(rows) == 1
+ assert rows[0].range_start == datetime.date(2016, 8, 12)
+
def test_create_tvf_routine(self):
from google.cloud.bigquery import (
Routine,
@@ -1941,6 +2565,41 @@ def test_create_tvf_routine(self):
]
assert result_rows == expected
+ def test_create_routine_w_data_governance(self):
+ routine_name = "routine_with_data_governance"
+ dataset = self.temp_dataset(_make_dataset_id("create_routine"))
+
+ routine = bigquery.Routine(
+ dataset.routine(routine_name),
+ type_="SCALAR_FUNCTION",
+ language="SQL",
+ body="x",
+ arguments=[
+ bigquery.RoutineArgument(
+ name="x",
+ data_type=bigquery.StandardSqlDataType(
+ type_kind=bigquery.StandardSqlTypeNames.INT64
+ ),
+ )
+ ],
+ data_governance_type="DATA_MASKING",
+ return_type=bigquery.StandardSqlDataType(
+ type_kind=bigquery.StandardSqlTypeNames.INT64
+ ),
+ )
+ routine_original = copy.deepcopy(routine)
+
+ client = Config.CLIENT
+ routine_new = client.create_routine(routine)
+
+ assert routine_new.reference == routine_original.reference
+ assert routine_new.type_ == routine_original.type_
+ assert routine_new.language == routine_original.language
+ assert routine_new.body == routine_original.body
+ assert routine_new.arguments == routine_original.arguments
+ assert routine_new.return_type == routine_original.return_type
+ assert routine_new.data_governance_type == routine_original.data_governance_type
+
def test_create_table_rows_fetch_nested_schema(self):
table_name = "test_table"
dataset = self.temp_dataset(_make_dataset_id("create_table_nested_schema"))
@@ -1999,6 +2658,9 @@ def test_create_table_rows_fetch_nested_schema(self):
self.assertEqual(found[8], decimal.Decimal(expected["FavoriteNumber"]))
def test_nested_table_to_arrow(self):
+ bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")
+ pyarrow = pytest.importorskip("pyarrow")
+ pyarrow.types = pytest.importorskip("pyarrow.types")
from google.cloud.bigquery.job import SourceFormat
from google.cloud.bigquery.job import WriteDisposition
@@ -2055,12 +2717,18 @@ def test_nested_table_to_arrow(self):
self.assertTrue(pyarrow.types.is_list(record_col[1].type))
self.assertTrue(pyarrow.types.is_int64(record_col[1].type.value_type))
- def temp_dataset(self, dataset_id, location=None):
+ def temp_dataset(self, dataset_id, *args, **kwargs):
project = Config.CLIENT.project
dataset_ref = bigquery.DatasetReference(project, dataset_id)
dataset = Dataset(dataset_ref)
- if location:
- dataset.location = location
+ if kwargs.get("location"):
+ dataset.location = kwargs.get("location")
+ if kwargs.get("max_time_travel_hours"):
+ dataset.max_time_travel_hours = kwargs.get("max_time_travel_hours")
+ if kwargs.get("default_rounding_mode"):
+ dataset.default_rounding_mode = kwargs.get("default_rounding_mode")
+ if kwargs.get("is_case_insensitive"):
+ dataset.is_case_insensitive = kwargs.get("is_case_insensitive")
dataset = helpers.retry_403(Config.CLIENT.create_dataset)(dataset)
self.to_delete.append(dataset)
return dataset
@@ -2087,8 +2755,7 @@ def _table_exists(t):
return False
-def test_dbapi_create_view(dataset_id):
-
+def test_dbapi_create_view(dataset_id: str):
query = f"""
CREATE VIEW {dataset_id}.dbapi_create_view
AS SELECT name, SUM(number) AS total
@@ -2100,7 +2767,7 @@ def test_dbapi_create_view(dataset_id):
assert Config.CURSOR.rowcount == 0, "expected 0 rows"
-def test_parameterized_types_round_trip(dataset_id):
+def test_parameterized_types_round_trip(dataset_id: str):
client = Config.CLIENT
table_id = f"{dataset_id}.test_parameterized_types_round_trip"
fields = (
@@ -2126,7 +2793,7 @@ def test_parameterized_types_round_trip(dataset_id):
assert tuple(s._key()[:2] for s in table2.schema) == fields
-def test_table_snapshots(dataset_id):
+def test_table_snapshots(dataset_id: str):
from google.cloud.bigquery import CopyJobConfig
from google.cloud.bigquery import OperationType
@@ -2197,7 +2864,7 @@ def test_table_snapshots(dataset_id):
assert rows == [(1, "one"), (2, "two")]
-def test_table_clones(dataset_id):
+def test_table_clones(dataset_id: str):
from google.cloud.bigquery import CopyJobConfig
from google.cloud.bigquery import OperationType
@@ -2223,7 +2890,7 @@ def test_table_clones(dataset_id):
# Now create a clone before modifying the original table data.
copy_config = CopyJobConfig()
copy_config.operation_type = OperationType.CLONE
- copy_config.write_disposition = bigquery.WriteDisposition.WRITE_TRUNCATE
+ copy_config.write_disposition = bigquery.WriteDisposition.WRITE_EMPTY
copy_job = client.copy_table(
sources=table_path_source,
diff --git a/tests/system/test_list_rows.py b/tests/system/test_list_rows.py
index 4c08958c3..108b842ce 100644
--- a/tests/system/test_list_rows.py
+++ b/tests/system/test_list_rows.py
@@ -118,3 +118,17 @@ def test_list_rows_scalars_extreme(
assert value == 4
else:
assert value is None
+
+
+def test_list_rows_range(bigquery_client: bigquery.Client, scalars_table_csv: str):
+ rows = bigquery_client.list_rows(scalars_table_csv)
+ rows = list(rows)
+ row = rows[0]
+ expected_range = {
+ "start": datetime.date(2020, 1, 1),
+ "end": datetime.date(2020, 2, 1),
+ }
+ assert row["range_date"] == expected_range
+
+ row_null = rows[1]
+ assert row_null["range_date"] is None
diff --git a/tests/system/test_magics.py b/tests/system/test_magics.py
index 78c15cb50..72d358a74 100644
--- a/tests/system/test_magics.py
+++ b/tests/system/test_magics.py
@@ -50,7 +50,10 @@ def test_bigquery_magic(ipython_interactive):
current_process = psutil.Process()
conn_count_start = len(current_process.connections())
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ # Deprecated, but should still work in google-cloud-bigquery 3.x.
+ with pytest.warns(FutureWarning, match="bigquery_magics"):
+ ip.extension_manager.load_extension("google.cloud.bigquery")
+
sql = """
SELECT
CONCAT(
@@ -71,8 +74,7 @@ def test_bigquery_magic(ipython_interactive):
# Removes blanks & terminal code (result of display clearing)
updates = list(filter(lambda x: bool(x) and x != "\x1b[2K", lines))
assert re.match("Executing query with job ID: .*", updates[0])
- assert all(re.match("Query executing: .*s", line) for line in updates[1:-1])
- assert re.match("Query complete after .*s", updates[-1])
+ assert (re.match("Query executing: .*s", line) for line in updates[1:-1])
assert isinstance(result, pandas.DataFrame)
assert len(result) == 10 # verify row count
assert list(result) == ["url", "view_count"] # verify column names
diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py
index 34e4243c4..1fe7ff2cd 100644
--- a/tests/system/test_pandas.py
+++ b/tests/system/test_pandas.py
@@ -23,22 +23,32 @@
import warnings
import google.api_core.retry
-import pkg_resources
import pytest
+try:
+ import importlib.metadata as metadata
+except ImportError:
+ import importlib_metadata as metadata
+
from google.cloud import bigquery
-from google.cloud import bigquery_storage
+
from google.cloud.bigquery import enums
from . import helpers
pandas = pytest.importorskip("pandas", minversion="0.23.0")
+pyarrow = pytest.importorskip("pyarrow")
numpy = pytest.importorskip("numpy")
+bigquery_storage = pytest.importorskip(
+ "google.cloud.bigquery_storage", minversion="2.0.0"
+)
-PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
-PANDAS_INT64_VERSION = pkg_resources.parse_version("1.0.0")
+if pandas is not None:
+ PANDAS_INSTALLED_VERSION = metadata.version("pandas")
+else:
+ PANDAS_INSTALLED_VERSION = "0.0.0"
class MissingDataError(Exception):
@@ -302,10 +312,7 @@ def test_load_table_from_dataframe_w_automatic_schema(bigquery_client, dataset_i
]
-@pytest.mark.skipif(
- PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION,
- reason="Only `pandas version >=1.0.0` is supported",
-)
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_load_table_from_dataframe_w_nullable_int64_datatype(
bigquery_client, dataset_id
):
@@ -334,7 +341,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(
@pytest.mark.skipif(
- PANDAS_INSTALLED_VERSION < PANDAS_INT64_VERSION,
+ PANDAS_INSTALLED_VERSION[0:2].startswith("0."),
reason="Only `pandas version >=1.0.0` is supported",
)
def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(
@@ -373,10 +380,10 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
bigquery.SchemaField("geo_col", "GEOGRAPHY"),
bigquery.SchemaField("int_col", "INTEGER"),
bigquery.SchemaField("num_col", "NUMERIC"),
- bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
bigquery.SchemaField("str_col", "STRING"),
bigquery.SchemaField("time_col", "TIME"),
bigquery.SchemaField("ts_col", "TIMESTAMP"),
+ bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
)
num_rows = 100
@@ -390,10 +397,10 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
("geo_col", nulls),
("int_col", nulls),
("num_col", nulls),
- ("bignum_col", nulls),
("str_col", nulls),
("time_col", nulls),
("ts_col", nulls),
+ ("bignum_col", nulls),
]
df_data = collections.OrderedDict(df_data)
dataframe = pandas.DataFrame(df_data, columns=df_data.keys())
@@ -420,8 +427,7 @@ def test_load_table_from_dataframe_w_nulls(bigquery_client, dataset_id):
def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id):
- """Test that a DataFrame with required columns can be uploaded if a
- BigQuery schema is specified.
+ """Test that a DataFrame can be uploaded to a table with required columns.
See: https://github.com/googleapis/google-cloud-python/issues/8093
"""
@@ -432,7 +438,6 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id):
records = [{"name": "Chip", "age": 2}, {"name": "Dale", "age": 3}]
dataframe = pandas.DataFrame(records, columns=["name", "age"])
- job_config = bigquery.LoadJobConfig(schema=table_schema)
table_id = "{}.{}.load_table_from_dataframe_w_required".format(
bigquery_client.project, dataset_id
)
@@ -443,15 +448,50 @@ def test_load_table_from_dataframe_w_required(bigquery_client, dataset_id):
bigquery.Table(table_id, schema=table_schema)
)
- job_config = bigquery.LoadJobConfig(schema=table_schema)
- load_job = bigquery_client.load_table_from_dataframe(
- dataframe, table_id, job_config=job_config
- )
+ load_job = bigquery_client.load_table_from_dataframe(dataframe, table_id)
load_job.result()
table = bigquery_client.get_table(table)
assert tuple(table.schema) == table_schema
assert table.num_rows == 2
+ for field in table.schema:
+ assert field.mode == "REQUIRED"
+
+
+def test_load_table_from_dataframe_w_required_but_local_nulls_fails(
+ bigquery_client, dataset_id
+):
+ """Test that a DataFrame with nulls can't be uploaded to a table with
+ required columns.
+
+ See: https://github.com/googleapis/python-bigquery/issues/1692
+ """
+ table_schema = (
+ bigquery.SchemaField("name", "STRING", mode="REQUIRED"),
+ bigquery.SchemaField("age", "INTEGER", mode="REQUIRED"),
+ )
+
+ records = [
+ {"name": "Chip", "age": 2},
+ {"name": "Dale", "age": 3},
+ {"name": None, "age": None},
+ {"name": "Alvin", "age": 4},
+ ]
+ dataframe = pandas.DataFrame(records, columns=["name", "age"])
+ table_id = (
+ "{}.{}.load_table_from_dataframe_w_required_but_local_nulls_fails".format(
+ bigquery_client.project, dataset_id
+ )
+ )
+
+ # Create the table before loading so that schema mismatch errors are
+ # identified.
+ helpers.retry_403(bigquery_client.create_table)(
+ bigquery.Table(table_id, schema=table_schema)
+ )
+
+ with pytest.raises(google.api_core.exceptions.BadRequest, match="null"):
+ bigquery_client.load_table_from_dataframe(dataframe, table_id).result()
def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id):
@@ -469,10 +509,10 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
bigquery.SchemaField("geo_col", "GEOGRAPHY"),
bigquery.SchemaField("int_col", "INTEGER"),
bigquery.SchemaField("num_col", "NUMERIC"),
- bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
bigquery.SchemaField("str_col", "STRING"),
bigquery.SchemaField("time_col", "TIME"),
bigquery.SchemaField("ts_col", "TIMESTAMP"),
+ bigquery.SchemaField("bignum_col", "BIGNUMERIC"),
)
df_data = [
@@ -502,14 +542,6 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
decimal.Decimal("99999999999999999999999999999.999999999"),
],
),
- (
- "bignum_col",
- [
- decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
- None,
- decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
- ],
- ),
("str_col", ["abc", None, "def"]),
(
"time_col",
@@ -525,6 +557,14 @@ def test_load_table_from_dataframe_w_explicit_schema(bigquery_client, dataset_id
),
],
),
+ (
+ "bignum_col",
+ [
+ decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
+ None,
+ decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
+ ],
+ ),
]
df_data = collections.OrderedDict(df_data)
dataframe = pandas.DataFrame(df_data, dtype="object", columns=df_data.keys())
@@ -736,8 +776,8 @@ def test_load_table_from_dataframe_w_explicit_schema_source_format_csv_floats(
def test_query_results_to_dataframe(bigquery_client):
QUERY = """
- SELECT id, author, time_ts, dead
- FROM `bigquery-public-data.hacker_news.comments`
+ SELECT id, `by`, timestamp, dead
+ FROM `bigquery-public-data.hacker_news.full`
LIMIT 10
"""
@@ -745,12 +785,12 @@ def test_query_results_to_dataframe(bigquery_client):
assert isinstance(df, pandas.DataFrame)
assert len(df) == 10 # verify the number of rows
- column_names = ["id", "author", "time_ts", "dead"]
+ column_names = ["id", "by", "timestamp", "dead"]
assert list(df) == column_names # verify the column names
exp_datatypes = {
"id": int,
- "author": str,
- "time_ts": pandas.Timestamp,
+ "by": str,
+ "timestamp": pandas.Timestamp,
"dead": bool,
}
for _, row in df.iterrows():
@@ -762,8 +802,8 @@ def test_query_results_to_dataframe(bigquery_client):
def test_query_results_to_dataframe_w_bqstorage(bigquery_client):
query = """
- SELECT id, author, time_ts, dead
- FROM `bigquery-public-data.hacker_news.comments`
+ SELECT id, `by`, timestamp, dead
+ FROM `bigquery-public-data.hacker_news.full`
LIMIT 10
"""
@@ -775,12 +815,12 @@ def test_query_results_to_dataframe_w_bqstorage(bigquery_client):
assert isinstance(df, pandas.DataFrame)
assert len(df) == 10 # verify the number of rows
- column_names = ["id", "author", "time_ts", "dead"]
+ column_names = ["id", "by", "timestamp", "dead"]
assert list(df) == column_names
exp_datatypes = {
"id": int,
- "author": str,
- "time_ts": pandas.Timestamp,
+ "by": str,
+ "timestamp": pandas.Timestamp,
"dead": bool,
}
for index, row in df.iterrows():
@@ -795,7 +835,9 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id):
schema = [
SF("float_col", "FLOAT", mode="REQUIRED"),
SF("int_col", "INTEGER", mode="REQUIRED"),
+ SF("int64_col", "INTEGER", mode="NULLABLE"),
SF("bool_col", "BOOLEAN", mode="REQUIRED"),
+ SF("boolean_col", "BOOLEAN", mode="NULLABLE"),
SF("string_col", "STRING", mode="NULLABLE"),
SF("date_col", "DATE", mode="NULLABLE"),
SF("time_col", "TIME", mode="NULLABLE"),
@@ -858,6 +900,15 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id):
dataframe["date_col"] = dataframe["date_col"].astype("dbdate")
dataframe["time_col"] = dataframe["time_col"].astype("dbtime")
+ # Support nullable integer and boolean dtypes.
+ # https://github.com/googleapis/python-bigquery/issues/1815
+ dataframe["int64_col"] = pandas.Series(
+ [-11, -22, pandas.NA, -44, -55, -66], dtype="Int64"
+ )
+ dataframe["boolean_col"] = pandas.Series(
+ [True, False, True, pandas.NA, True, False], dtype="boolean"
+ )
+
table_id = f"{bigquery_client.project}.{dataset_id}.test_insert_rows_from_dataframe"
table_arg = bigquery.Table(table_id, schema=schema)
table = helpers.retry_403(bigquery_client.create_table)(table_arg)
@@ -870,7 +921,7 @@ def test_insert_rows_from_dataframe(bigquery_client, dataset_id):
expected = [
# Pandas often represents NULL values as NaN. Convert to None for
# easier comparison.
- tuple(None if col != col else col for col in data_row)
+ tuple(None if pandas.isna(col) else col for col in data_row)
for data_row in dataframe.itertuples(index=False)
]
@@ -1002,6 +1053,7 @@ def test_list_rows_max_results_w_bqstorage(bigquery_client):
assert len(dataframe.index) == 100
+@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="")
@pytest.mark.parametrize(
("max_results",),
(
@@ -1106,11 +1158,108 @@ def test_list_rows_nullable_scalars_extreme_dtypes(
assert df.dtypes["string_col"].name == "object"
+@pytest.mark.parametrize(
+ ("max_results",),
+ (
+ (None,),
+ (10,),
+ ), # Use BQ Storage API. # Use REST API.
+)
+def test_list_rows_nullable_scalars_extreme_dtypes_w_custom_dtype(
+ bigquery_client, scalars_extreme_table, max_results
+):
+ # TODO(GH#836): Avoid INTERVAL columns until they are supported by the
+ # BigQuery Storage API and pyarrow.
+ schema = [
+ bigquery.SchemaField("bool_col", enums.SqlTypeNames.BOOLEAN),
+ bigquery.SchemaField("bignumeric_col", enums.SqlTypeNames.BIGNUMERIC),
+ bigquery.SchemaField("bytes_col", enums.SqlTypeNames.BYTES),
+ bigquery.SchemaField("date_col", enums.SqlTypeNames.DATE),
+ bigquery.SchemaField("datetime_col", enums.SqlTypeNames.DATETIME),
+ bigquery.SchemaField("float64_col", enums.SqlTypeNames.FLOAT64),
+ bigquery.SchemaField("geography_col", enums.SqlTypeNames.GEOGRAPHY),
+ bigquery.SchemaField("int64_col", enums.SqlTypeNames.INT64),
+ bigquery.SchemaField("numeric_col", enums.SqlTypeNames.NUMERIC),
+ bigquery.SchemaField("string_col", enums.SqlTypeNames.STRING),
+ bigquery.SchemaField("time_col", enums.SqlTypeNames.TIME),
+ bigquery.SchemaField("timestamp_col", enums.SqlTypeNames.TIMESTAMP),
+ ]
+
+ df = bigquery_client.list_rows(
+ scalars_extreme_table,
+ max_results=max_results,
+ selected_fields=schema,
+ ).to_dataframe(
+ bool_dtype=pandas.BooleanDtype(),
+ int_dtype=pandas.Int64Dtype(),
+ float_dtype=(
+ pandas.Float64Dtype()
+ if hasattr(pandas, "Float64Dtype")
+ else pandas.StringDtype()
+ ),
+ string_dtype=pandas.StringDtype(),
+ date_dtype=(
+ pandas.ArrowDtype(pyarrow.date32())
+ if hasattr(pandas, "ArrowDtype")
+ else None
+ ),
+ datetime_dtype=(
+ pandas.ArrowDtype(pyarrow.timestamp("us"))
+ if hasattr(pandas, "ArrowDtype")
+ else None
+ ),
+ time_dtype=(
+ pandas.ArrowDtype(pyarrow.time64("us"))
+ if hasattr(pandas, "ArrowDtype")
+ else None
+ ),
+ timestamp_dtype=(
+ pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))
+ if hasattr(pandas, "ArrowDtype")
+ else None
+ ),
+ )
+
+ # These pandas dtypes are handled by the custom dtypes.
+ assert df.dtypes["bool_col"].name == "boolean"
+ assert df.dtypes["float64_col"].name == "Float64"
+ assert df.dtypes["int64_col"].name == "Int64"
+ assert df.dtypes["string_col"].name == "string"
+
+ assert (
+ df.dtypes["date_col"].name == "date32[day][pyarrow]"
+ if hasattr(pandas, "ArrowDtype")
+ else "datetime64[ns]"
+ )
+ assert (
+ df.dtypes["datetime_col"].name == "timestamp[us][pyarrow]"
+ if hasattr(pandas, "ArrowDtype")
+ else "object"
+ )
+ assert (
+ df.dtypes["timestamp_col"].name == "timestamp[us, tz=UTC][pyarrow]"
+ if hasattr(pandas, "ArrowDtype")
+ else "object"
+ )
+ assert (
+ df.dtypes["time_col"].name == "time64[us][pyarrow]"
+ if hasattr(pandas, "ArrowDtype")
+ else "object"
+ )
+
+ # decimal.Decimal is used to avoid loss of precision.
+ assert df.dtypes["numeric_col"].name == "object"
+ assert df.dtypes["bignumeric_col"].name == "object"
+
+ # pandas uses Python bytes objects.
+ assert df.dtypes["bytes_col"].name == "object"
+
+
def test_upload_time_and_datetime_56(bigquery_client, dataset_id):
df = pandas.DataFrame(
dict(
dt=[
- datetime.datetime(2020, 1, 8, 8, 0, 0),
+ datetime.datetime(2020, 1, 8, 8, 0, 0, tzinfo=datetime.timezone.utc),
datetime.datetime(
2020,
1,
@@ -1150,6 +1299,32 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id):
]
+def test_to_dataframe_query_with_empty_results(bigquery_client):
+ """
+ JSON regression test for https://github.com/googleapis/python-bigquery/issues/1580.
+ """
+ job = bigquery_client.query(
+ """
+ select
+ 123 as int_col,
+ '' as string_col,
+ to_json('{}') as json_col,
+ struct(to_json('[]') as json_field, -1 as int_field) as struct_col,
+ [to_json('null')] as json_array_col,
+ from unnest([])
+ """
+ )
+ df = job.to_dataframe()
+ assert list(df.columns) == [
+ "int_col",
+ "string_col",
+ "json_col",
+ "struct_col",
+ "json_array_col",
+ ]
+ assert len(df.index) == 0
+
+
def test_to_dataframe_geography_as_objects(bigquery_client, dataset_id):
wkt = pytest.importorskip("shapely.wkt")
bigquery_client.query(
diff --git a/tests/system/test_query.py b/tests/system/test_query.py
index 723f927d7..d94a117e3 100644
--- a/tests/system/test_query.py
+++ b/tests/system/test_query.py
@@ -26,6 +26,7 @@
from google.cloud.bigquery.query import ScalarQueryParameterType
from google.cloud.bigquery.query import StructQueryParameter
from google.cloud.bigquery.query import StructQueryParameterType
+from google.cloud.bigquery.query import RangeQueryParameter
@pytest.fixture(params=["INSERT", "QUERY"])
@@ -256,6 +257,18 @@ def test_query_statistics(bigquery_client, query_api_method):
)
],
),
+ pytest.param(
+ "SELECT @json",
+ {"alpha": "abc", "num": [1, 2, 3]},
+ [
+ ScalarQueryParameter(
+ name="json",
+ type_="JSON",
+ value={"alpha": "abc", "num": [1, 2, 3]},
+ )
+ ],
+ id="scalar-json",
+ ),
(
"SELECT @naive_time",
datetime.time(12, 41, 9, 62500),
@@ -410,6 +423,38 @@ def test_query_statistics(bigquery_client, query_api_method):
)
],
),
+ (
+ "SELECT @range_date",
+ {"end": None, "start": datetime.date(2016, 12, 5)},
+ [
+ RangeQueryParameter(
+ name="range_date",
+ range_element_type="DATE",
+ start=datetime.date(2016, 12, 5),
+ )
+ ],
+ ),
+ (
+ "SELECT @range_datetime",
+ {"end": None, "start": datetime.datetime(2016, 12, 5, 0, 0)},
+ [
+ RangeQueryParameter(
+ name="range_datetime",
+ range_element_type="DATETIME",
+ start=datetime.datetime(2016, 12, 5),
+ )
+ ],
+ ),
+ (
+ "SELECT @range_unbounded",
+ {"end": None, "start": None},
+ [
+ RangeQueryParameter(
+ name="range_unbounded",
+ range_element_type="DATETIME",
+ )
+ ],
+ ),
),
)
def test_query_parameters(
@@ -465,7 +510,7 @@ def test_query_error_w_api_method_default(bigquery_client: bigquery.Client):
"""Test that an exception is not thrown until fetching the results.
For backwards compatibility, jobs.insert is the default API method. With
- jobs.insert, a failed query job is "sucessfully" created. An exception is
+ jobs.insert, a failed query job is "successfully" created. An exception is
thrown when fetching the results.
"""
diff --git a/tests/unit/_helpers/test_cell_data_parser.py b/tests/unit/_helpers/test_cell_data_parser.py
new file mode 100644
index 000000000..14721a26c
--- /dev/null
+++ b/tests/unit/_helpers/test_cell_data_parser.py
@@ -0,0 +1,467 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import base64
+import datetime
+import decimal
+import json
+
+from dateutil.relativedelta import relativedelta
+import pytest
+
+import google.cloud.bigquery.schema
+
+
+def create_field(mode="NULLABLE", type_="IGNORED", name="test_field", **kwargs):
+ return google.cloud.bigquery.schema.SchemaField(name, type_, mode=mode, **kwargs)
+
+
+@pytest.fixture
+def mut():
+ from google.cloud.bigquery import _helpers
+
+ return _helpers
+
+
+@pytest.fixture
+def object_under_test(mut):
+ return mut.CELL_DATA_PARSER
+
+
+ALL_TYPES = {
+ "BOOL",
+ "BOOLEAN",
+ "BYTES",
+ "INTEGER",
+ "INT64",
+ "INTERVAL",
+ "FLOAT",
+ "FLOAT64",
+ "NUMERIC",
+ "BIGNUMERIC",
+ "STRING",
+ "GEOGRAPHY",
+ "TIMESTAMP",
+ "DATETIME",
+ "DATE",
+ "TIME",
+ "RECORD",
+ "STRUCT",
+ "JSON",
+ "RANGE",
+}
+
+TYPES_WITH_CLIENT_SIDE_NULL_VALIDATION = ALL_TYPES - {
+ "STRING",
+ "GEOGRAPHY",
+}
+
+
+@pytest.mark.parametrize(
+ "type_",
+ list(sorted(ALL_TYPES)),
+)
+def test_to_py_w_none_nullable(object_under_test, type_):
+ assert object_under_test.to_py(None, create_field("NULLABLE", type_)) is None
+
+
+@pytest.mark.parametrize("type_", list(sorted(TYPES_WITH_CLIENT_SIDE_NULL_VALIDATION)))
+def test_to_py_w_none_required(object_under_test, type_):
+ with pytest.raises(TypeError):
+ object_under_test.to_py(None, create_field("REQUIRED", type_))
+
+
+def test_interval_to_py_w_invalid_format(object_under_test):
+ with pytest.raises(ValueError, match="NOT_AN_INTERVAL"):
+ object_under_test.interval_to_py("NOT_AN_INTERVAL", create_field())
+
+
+@pytest.mark.parametrize(
+ ("value", "expected"),
+ (
+ ("0-0 0 0:0:0", relativedelta()),
+ # SELECT INTERVAL X YEAR
+ ("-10000-0 0 0:0:0", relativedelta(years=-10000)),
+ ("-1-0 0 0:0:0", relativedelta(years=-1)),
+ ("1-0 0 0:0:0", relativedelta(years=1)),
+ ("10000-0 0 0:0:0", relativedelta(years=10000)),
+ # SELECT INTERVAL X MONTH
+ ("-0-11 0 0:0:0", relativedelta(months=-11)),
+ ("-0-1 0 0:0:0", relativedelta(months=-1)),
+ ("0-1 0 0:0:0", relativedelta(months=1)),
+ ("0-11 0 0:0:0", relativedelta(months=11)),
+ # SELECT INTERVAL X DAY
+ ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)),
+ ("0-0 -1 0:0:0", relativedelta(days=-1)),
+ ("0-0 1 0:0:0", relativedelta(days=1)),
+ ("0-0 3660000 0:0:0", relativedelta(days=3660000)),
+ # SELECT INTERVAL X HOUR
+ ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)),
+ ("0-0 0 -1:0:0", relativedelta(hours=-1)),
+ ("0-0 0 1:0:0", relativedelta(hours=1)),
+ ("0-0 0 87840000:0:0", relativedelta(hours=87840000)),
+ # SELECT INTERVAL X MINUTE
+ ("0-0 0 -0:59:0", relativedelta(minutes=-59)),
+ ("0-0 0 -0:1:0", relativedelta(minutes=-1)),
+ ("0-0 0 0:1:0", relativedelta(minutes=1)),
+ ("0-0 0 0:59:0", relativedelta(minutes=59)),
+ # SELECT INTERVAL X SECOND
+ ("0-0 0 -0:0:59", relativedelta(seconds=-59)),
+ ("0-0 0 -0:0:1", relativedelta(seconds=-1)),
+ ("0-0 0 0:0:1", relativedelta(seconds=1)),
+ ("0-0 0 0:0:59", relativedelta(seconds=59)),
+ # SELECT (INTERVAL -1 SECOND) / 1000000
+ ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)),
+ ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)),
+ ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)),
+ ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)),
+ ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)),
+ # Test with multiple digits in each section.
+ (
+ "32-11 45 67:16:23.987654",
+ relativedelta(
+ years=32,
+ months=11,
+ days=45,
+ hours=67,
+ minutes=16,
+ seconds=23,
+ microseconds=987654,
+ ),
+ ),
+ (
+ "-32-11 -45 -67:16:23.987654",
+ relativedelta(
+ years=-32,
+ months=-11,
+ days=-45,
+ hours=-67,
+ minutes=-16,
+ seconds=-23,
+ microseconds=-987654,
+ ),
+ ),
+ # Test with mixed +/- sections.
+ (
+ "9999-9 -999999 9999999:59:59.999999",
+ relativedelta(
+ years=9999,
+ months=9,
+ days=-999999,
+ hours=9999999,
+ minutes=59,
+ seconds=59,
+ microseconds=999999,
+ ),
+ ),
+ # Test with fraction that is not microseconds.
+ ("0-0 0 0:0:42.", relativedelta(seconds=42)),
+ ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)),
+ ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)),
+ ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)),
+ ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)),
+ # Fractional seconds can cause rounding problems if cast to float. See:
+ # https://github.com/googleapis/python-db-dtypes-pandas/issues/18
+ ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)),
+ (
+ "0-0 0 01:01:01.010101",
+ relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101),
+ ),
+ (
+ "0-0 0 09:09:09.090909",
+ relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909),
+ ),
+ (
+ "0-0 0 11:11:11.111111",
+ relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111),
+ ),
+ (
+ "0-0 0 19:16:23.987654",
+ relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654),
+ ),
+ # Nanoseconds are not expected, but should not cause error.
+ ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)),
+ ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)),
+ ),
+)
+def test_interval_to_py_w_string_values(object_under_test, value, expected):
+ got = object_under_test.interval_to_py(value, create_field())
+ assert got == expected
+
+
+def test_integer_to_py_w_string_value(object_under_test):
+ coerced = object_under_test.integer_to_py("42", object())
+ assert coerced == 42
+
+
+def test_integer_to_py_w_float_value(object_under_test):
+ coerced = object_under_test.integer_to_py(42.0, object())
+ assert coerced == 42
+
+
+def test_json_to_py_w_json_field(object_under_test):
+ data_field = create_field("REQUIRED", "data", "JSON")
+
+ value = json.dumps(
+ {"v": {"key": "value"}},
+ )
+
+ expected_output = {"v": {"key": "value"}}
+ coerced_output = object_under_test.json_to_py(value, data_field)
+ assert coerced_output == expected_output
+
+
+def test_json_to_py_w_string_value(object_under_test):
+ coerced = object_under_test.json_to_py('"foo"', create_field())
+ assert coerced == "foo"
+
+
+def test_float_to_py_w_string_value(object_under_test):
+ coerced = object_under_test.float_to_py("3.1415", object())
+ assert coerced == 3.1415
+
+
+def test_float_to_py_w_float_value(object_under_test):
+ coerced = object_under_test.float_to_py(3.1415, object())
+ assert coerced == 3.1415
+
+
+def test_numeric_to_py_w_string_value(object_under_test):
+ coerced = object_under_test.numeric_to_py("3.1415", object())
+ assert coerced == decimal.Decimal("3.1415")
+
+
+def test_numeric_to_py_w_float_value(object_under_test):
+ coerced = object_under_test.numeric_to_py(3.1415, object())
+ # There is no exact float representation of 3.1415.
+ assert coerced == decimal.Decimal(3.1415)
+
+
+def test_bool_to_py_w_value_t(object_under_test):
+ coerced = object_under_test.bool_to_py("T", object())
+ assert coerced is True
+
+
+def test_bool_to_py_w_value_true(object_under_test):
+ coerced = object_under_test.bool_to_py("True", object())
+ assert coerced is True
+
+
+def test_bool_to_py_w_value_1(object_under_test):
+ coerced = object_under_test.bool_to_py("1", object())
+ assert coerced is True
+
+
+def test_bool_to_py_w_value_other(object_under_test):
+ coerced = object_under_test.bool_to_py("f", object())
+ assert coerced is False
+
+
+def test_string_to_py_w_string_value(object_under_test):
+ coerced = object_under_test.string_to_py("Wonderful!", object())
+ assert coerced == "Wonderful!"
+
+
+def test_bytes_to_py_w_base64_encoded_bytes(object_under_test):
+ expected = b"Wonderful!"
+ encoded = base64.standard_b64encode(expected)
+ coerced = object_under_test.bytes_to_py(encoded, object())
+ assert coerced == expected
+
+
+def test_bytes_to_py_w_base64_encoded_text(object_under_test):
+ expected = b"Wonderful!"
+ encoded = base64.standard_b64encode(expected).decode("ascii")
+ coerced = object_under_test.bytes_to_py(encoded, object())
+ assert coerced == expected
+
+
+def test_timestamp_to_py_w_string_int_value(object_under_test):
+ from google.cloud._helpers import _EPOCH
+
+ coerced = object_under_test.timestamp_to_py("1234567", object())
+ assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)
+
+
+def test_timestamp_to_py_w_int_value(object_under_test):
+ from google.cloud._helpers import _EPOCH
+
+ coerced = object_under_test.timestamp_to_py(1234567, object())
+ assert coerced == _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)
+
+
+def test_datetime_to_py_w_string_value(object_under_test):
+ coerced = object_under_test.datetime_to_py("2016-12-02T18:51:33", object())
+ assert coerced == datetime.datetime(2016, 12, 2, 18, 51, 33)
+
+
+def test_datetime_to_py_w_microseconds(object_under_test):
+ coerced = object_under_test.datetime_to_py("2015-05-22T10:11:12.987654", object())
+ assert coerced == datetime.datetime(2015, 5, 22, 10, 11, 12, 987654)
+
+
+def test_date_to_py_w_string_value(object_under_test):
+ coerced = object_under_test.date_to_py("1987-09-22", object())
+ assert coerced == datetime.date(1987, 9, 22)
+
+
+def test_time_to_py_w_string_value(object_under_test):
+ coerced = object_under_test.time_to_py("12:12:27", object())
+ assert coerced == datetime.time(12, 12, 27)
+
+
+def test_time_to_py_w_subsecond_string_value(object_under_test):
+ coerced = object_under_test.time_to_py("12:12:27.123456", object())
+ assert coerced == datetime.time(12, 12, 27, 123456)
+
+
+def test_time_to_py_w_bogus_string_value(object_under_test):
+ with pytest.raises(ValueError):
+ object_under_test.time_to_py("12:12:27.123", object())
+
+
+def test_range_to_py_w_wrong_format(object_under_test):
+ range_field = create_field(
+ "NULLABLE",
+ "RANGE",
+ range_element_type="DATE",
+ )
+ with pytest.raises(ValueError):
+ object_under_test.range_to_py("[2009-06-172019-06-17)", range_field)
+
+
+def test_range_to_py_w_wrong_element_type(object_under_test):
+ range_field = create_field(
+ "NULLABLE",
+ "RANGE",
+ range_element_type=google.cloud.bigquery.schema.FieldElementType(
+ element_type="TIME"
+ ),
+ )
+ with pytest.raises(ValueError):
+ object_under_test.range_to_py("[15:31:38, 15:50:38)", range_field)
+
+
+def test_range_to_py_w_unbounded_value(object_under_test):
+ range_field = create_field(
+ "NULLABLE",
+ "RANGE",
+ range_element_type="DATE",
+ )
+ coerced = object_under_test.range_to_py("[UNBOUNDED, 2019-06-17)", range_field)
+ assert coerced == {"start": None, "end": datetime.date(2019, 6, 17)}
+
+
+def test_range_to_py_w_date_value(object_under_test):
+ range_field = create_field(
+ "NULLABLE",
+ "RANGE",
+ range_element_type="DATE",
+ )
+ coerced = object_under_test.range_to_py("[2009-06-17, 2019-06-17)", range_field)
+ assert coerced == {
+ "start": datetime.date(2009, 6, 17),
+ "end": datetime.date(2019, 6, 17),
+ }
+
+
+def test_range_to_py_w_datetime_value(object_under_test):
+ range_field = create_field(
+ "NULLABLE",
+ "RANGE",
+ range_element_type=google.cloud.bigquery.schema.FieldElementType(
+ element_type="DATETIME"
+ ),
+ )
+ coerced = object_under_test.range_to_py(
+ "[2009-06-17T13:45:30, 2019-06-17T13:45:30)", range_field
+ )
+ assert coerced == {
+ "start": datetime.datetime(2009, 6, 17, 13, 45, 30),
+ "end": datetime.datetime(2019, 6, 17, 13, 45, 30),
+ }
+
+
+def test_range_to_py_w_timestamp_value(object_under_test):
+ from google.cloud._helpers import _EPOCH
+
+ range_field = create_field(
+ "NULLABLE",
+ "RANGE",
+ range_element_type=google.cloud.bigquery.schema.FieldElementType(
+ element_type="TIMESTAMP"
+ ),
+ )
+ coerced = object_under_test.range_to_py("[1234567, 1234789)", range_field)
+ assert coerced == {
+ "start": _EPOCH + datetime.timedelta(seconds=1, microseconds=234567),
+ "end": _EPOCH + datetime.timedelta(seconds=1, microseconds=234789),
+ }
+
+
+def test_record_to_py_w_nullable_subfield_none(object_under_test):
+ subfield = create_field("NULLABLE", "INTEGER", name="age")
+ field = create_field("REQUIRED", fields=[subfield])
+ value = {"f": [{"v": None}]}
+ coerced = object_under_test.record_to_py(value, field)
+ assert coerced == {"age": None}
+
+
+def test_record_to_py_w_scalar_subfield(object_under_test):
+ subfield = create_field("REQUIRED", "INTEGER", name="age")
+ field = create_field("REQUIRED", fields=[subfield])
+ value = {"f": [{"v": 42}]}
+ coerced = object_under_test.record_to_py(value, field)
+ assert coerced == {"age": 42}
+
+
+def test_record_to_py_w_scalar_subfield_geography(object_under_test):
+ subfield = create_field("REQUIRED", "GEOGRAPHY", name="geo")
+ field = create_field("REQUIRED", fields=[subfield])
+ value = {"f": [{"v": "POINT(1, 2)"}]}
+ coerced = object_under_test.record_to_py(value, field)
+ assert coerced == {"geo": "POINT(1, 2)"}
+
+
+def test_record_to_py_w_repeated_subfield(object_under_test):
+ subfield = create_field("REPEATED", "STRING", name="color")
+ field = create_field("REQUIRED", fields=[subfield])
+ value = {"f": [{"v": [{"v": "red"}, {"v": "yellow"}, {"v": "blue"}]}]}
+ coerced = object_under_test.record_to_py(value, field)
+ assert coerced == {"color": ["red", "yellow", "blue"]}
+
+
+def test_record_to_py_w_record_subfield(object_under_test):
+ full_name = create_field("REQUIRED", "STRING", name="full_name")
+ area_code = create_field("REQUIRED", "STRING", name="area_code")
+ local_number = create_field("REQUIRED", "STRING", name="local_number")
+ rank = create_field("REQUIRED", "INTEGER", name="rank")
+ phone = create_field(
+ "NULLABLE", "RECORD", name="phone", fields=[area_code, local_number, rank]
+ )
+ person = create_field(
+ "REQUIRED", "RECORD", name="person", fields=[full_name, phone]
+ )
+ value = {
+ "f": [
+ {"v": "Phred Phlyntstone"},
+ {"v": {"f": [{"v": "800"}, {"v": "555-1212"}, {"v": 1}]}},
+ ]
+ }
+ expected = {
+ "full_name": "Phred Phlyntstone",
+ "phone": {"area_code": "800", "local_number": "555-1212", "rank": 1},
+ }
+ coerced = object_under_test.record_to_py(value, person)
+ assert coerced == expected
diff --git a/tests/unit/_helpers/test_data_frame_cell_data_parser.py b/tests/unit/_helpers/test_data_frame_cell_data_parser.py
new file mode 100644
index 000000000..c3332dc89
--- /dev/null
+++ b/tests/unit/_helpers/test_data_frame_cell_data_parser.py
@@ -0,0 +1,71 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+import google.cloud.bigquery.schema
+
+
+def create_field(mode="NULLABLE", type_="IGNORED", name="test_field", **kwargs):
+ return google.cloud.bigquery.schema.SchemaField(name, type_, mode=mode, **kwargs)
+
+
+@pytest.fixture
+def mut():
+ from google.cloud.bigquery import _helpers
+
+ return _helpers
+
+
+@pytest.fixture
+def object_under_test(mut):
+ return mut.DATA_FRAME_CELL_DATA_PARSER
+
+
+def test_json_to_py_doesnt_parse_json(object_under_test):
+ coerced = object_under_test.json_to_py('{"key":"value"}', create_field())
+ assert coerced == '{"key":"value"}'
+
+
+def test_json_to_py_repeated_doesnt_parse_json(object_under_test):
+ coerced = object_under_test.json_to_py('{"key":"value"}', create_field("REPEATED"))
+ assert coerced == '{"key":"value"}'
+
+
+def test_record_to_py_doesnt_parse_json(object_under_test):
+ subfield = create_field(type_="JSON", name="json")
+ field = create_field(fields=[subfield])
+ value = {"f": [{"v": '{"key":"value"}'}]}
+ coerced = object_under_test.record_to_py(value, field)
+ assert coerced == {"json": '{"key":"value"}'}
+
+
+def test_record_to_py_doesnt_parse_repeated_json(object_under_test):
+ subfield = create_field("REPEATED", "JSON", name="json")
+ field = create_field("REQUIRED", fields=[subfield])
+ value = {
+ "f": [
+ {
+ "v": [
+ {"v": '{"key":"value0"}'},
+ {"v": '{"key":"value1"}'},
+ {"v": '{"key":"value2"}'},
+ ]
+ }
+ ]
+ }
+ coerced = object_under_test.record_to_py(value, field)
+ assert coerced == {
+ "json": ['{"key":"value0"}', '{"key":"value1"}', '{"key":"value2"}']
+ }
diff --git a/tests/unit/_helpers/test_from_json.py b/tests/unit/_helpers/test_from_json.py
deleted file mode 100644
index 65b054f44..000000000
--- a/tests/unit/_helpers/test_from_json.py
+++ /dev/null
@@ -1,157 +0,0 @@
-# Copyright 2021 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from dateutil.relativedelta import relativedelta
-import pytest
-
-from google.cloud.bigquery.schema import SchemaField
-
-
-def create_field(mode="NULLABLE", type_="IGNORED"):
- return SchemaField("test_field", type_, mode=mode)
-
-
-@pytest.fixture
-def mut():
- from google.cloud.bigquery import _helpers
-
- return _helpers
-
-
-def test_interval_from_json_w_none_nullable(mut):
- got = mut._interval_from_json(None, create_field())
- assert got is None
-
-
-def test_interval_from_json_w_none_required(mut):
- with pytest.raises(TypeError):
- mut._interval_from_json(None, create_field(mode="REQUIRED"))
-
-
-def test_interval_from_json_w_invalid_format(mut):
- with pytest.raises(ValueError, match="NOT_AN_INTERVAL"):
- mut._interval_from_json("NOT_AN_INTERVAL", create_field())
-
-
-@pytest.mark.parametrize(
- ("value", "expected"),
- (
- ("0-0 0 0:0:0", relativedelta()),
- # SELECT INTERVAL X YEAR
- ("-10000-0 0 0:0:0", relativedelta(years=-10000)),
- ("-1-0 0 0:0:0", relativedelta(years=-1)),
- ("1-0 0 0:0:0", relativedelta(years=1)),
- ("10000-0 0 0:0:0", relativedelta(years=10000)),
- # SELECT INTERVAL X MONTH
- ("-0-11 0 0:0:0", relativedelta(months=-11)),
- ("-0-1 0 0:0:0", relativedelta(months=-1)),
- ("0-1 0 0:0:0", relativedelta(months=1)),
- ("0-11 0 0:0:0", relativedelta(months=11)),
- # SELECT INTERVAL X DAY
- ("0-0 -3660000 0:0:0", relativedelta(days=-3660000)),
- ("0-0 -1 0:0:0", relativedelta(days=-1)),
- ("0-0 1 0:0:0", relativedelta(days=1)),
- ("0-0 3660000 0:0:0", relativedelta(days=3660000)),
- # SELECT INTERVAL X HOUR
- ("0-0 0 -87840000:0:0", relativedelta(hours=-87840000)),
- ("0-0 0 -1:0:0", relativedelta(hours=-1)),
- ("0-0 0 1:0:0", relativedelta(hours=1)),
- ("0-0 0 87840000:0:0", relativedelta(hours=87840000)),
- # SELECT INTERVAL X MINUTE
- ("0-0 0 -0:59:0", relativedelta(minutes=-59)),
- ("0-0 0 -0:1:0", relativedelta(minutes=-1)),
- ("0-0 0 0:1:0", relativedelta(minutes=1)),
- ("0-0 0 0:59:0", relativedelta(minutes=59)),
- # SELECT INTERVAL X SECOND
- ("0-0 0 -0:0:59", relativedelta(seconds=-59)),
- ("0-0 0 -0:0:1", relativedelta(seconds=-1)),
- ("0-0 0 0:0:1", relativedelta(seconds=1)),
- ("0-0 0 0:0:59", relativedelta(seconds=59)),
- # SELECT (INTERVAL -1 SECOND) / 1000000
- ("0-0 0 -0:0:0.000001", relativedelta(microseconds=-1)),
- ("0-0 0 -0:0:59.999999", relativedelta(seconds=-59, microseconds=-999999)),
- ("0-0 0 -0:0:59.999", relativedelta(seconds=-59, microseconds=-999000)),
- ("0-0 0 0:0:59.999", relativedelta(seconds=59, microseconds=999000)),
- ("0-0 0 0:0:59.999999", relativedelta(seconds=59, microseconds=999999)),
- # Test with multiple digits in each section.
- (
- "32-11 45 67:16:23.987654",
- relativedelta(
- years=32,
- months=11,
- days=45,
- hours=67,
- minutes=16,
- seconds=23,
- microseconds=987654,
- ),
- ),
- (
- "-32-11 -45 -67:16:23.987654",
- relativedelta(
- years=-32,
- months=-11,
- days=-45,
- hours=-67,
- minutes=-16,
- seconds=-23,
- microseconds=-987654,
- ),
- ),
- # Test with mixed +/- sections.
- (
- "9999-9 -999999 9999999:59:59.999999",
- relativedelta(
- years=9999,
- months=9,
- days=-999999,
- hours=9999999,
- minutes=59,
- seconds=59,
- microseconds=999999,
- ),
- ),
- # Test with fraction that is not microseconds.
- ("0-0 0 0:0:42.", relativedelta(seconds=42)),
- ("0-0 0 0:0:59.1", relativedelta(seconds=59, microseconds=100000)),
- ("0-0 0 0:0:0.12", relativedelta(microseconds=120000)),
- ("0-0 0 0:0:0.123", relativedelta(microseconds=123000)),
- ("0-0 0 0:0:0.1234", relativedelta(microseconds=123400)),
- # Fractional seconds can cause rounding problems if cast to float. See:
- # https://github.com/googleapis/python-db-dtypes-pandas/issues/18
- ("0-0 0 0:0:59.876543", relativedelta(seconds=59, microseconds=876543)),
- (
- "0-0 0 01:01:01.010101",
- relativedelta(hours=1, minutes=1, seconds=1, microseconds=10101),
- ),
- (
- "0-0 0 09:09:09.090909",
- relativedelta(hours=9, minutes=9, seconds=9, microseconds=90909),
- ),
- (
- "0-0 0 11:11:11.111111",
- relativedelta(hours=11, minutes=11, seconds=11, microseconds=111111),
- ),
- (
- "0-0 0 19:16:23.987654",
- relativedelta(hours=19, minutes=16, seconds=23, microseconds=987654),
- ),
- # Nanoseconds are not expected, but should not cause error.
- ("0-0 0 0:0:00.123456789", relativedelta(microseconds=123456)),
- ("0-0 0 0:0:59.87654321", relativedelta(seconds=59, microseconds=876543)),
- ),
-)
-def test_w_string_values(mut, value, expected):
- got = mut._interval_from_json(value, create_field())
- assert got == expected
diff --git a/tests/unit/_helpers/test_scalar_query_param_parser.py b/tests/unit/_helpers/test_scalar_query_param_parser.py
new file mode 100644
index 000000000..8e0d2a34e
--- /dev/null
+++ b/tests/unit/_helpers/test_scalar_query_param_parser.py
@@ -0,0 +1,93 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import datetime
+
+import pytest
+
+import google.cloud.bigquery.schema
+
+
+def create_field(mode="NULLABLE", type_="IGNORED"):
+ return google.cloud.bigquery.schema.SchemaField("test_field", type_, mode=mode)
+
+
+@pytest.fixture
+def mut():
+ from google.cloud.bigquery import _helpers
+
+ return _helpers
+
+
+@pytest.fixture
+def object_under_test(mut):
+ return mut.SCALAR_QUERY_PARAM_PARSER
+
+
+def test_timestamp_to_py_w_none_nullable(object_under_test):
+ assert object_under_test.timestamp_to_py(None, create_field()) is None
+
+
+@pytest.mark.parametrize(
+ ("value", "expected"),
+ [
+ (
+ "2016-12-20 15:58:27.339328+00:00",
+ datetime.datetime(
+ 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc
+ ),
+ ),
+ (
+ "2016-12-20 15:58:27+00:00",
+ datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc),
+ ),
+ (
+ "2016-12-20T15:58:27.339328+00:00",
+ datetime.datetime(
+ 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc
+ ),
+ ),
+ (
+ "2016-12-20T15:58:27+00:00",
+ datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc),
+ ),
+ (
+ "2016-12-20 15:58:27.339328Z",
+ datetime.datetime(
+ 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc
+ ),
+ ),
+ (
+ "2016-12-20 15:58:27Z",
+ datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc),
+ ),
+ (
+ "2016-12-20T15:58:27.339328Z",
+ datetime.datetime(
+ 2016, 12, 20, 15, 58, 27, 339328, tzinfo=datetime.timezone.utc
+ ),
+ ),
+ (
+ "2016-12-20T15:58:27Z",
+ datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=datetime.timezone.utc),
+ ),
+ ],
+)
+def test_timestamp_to_py_w_timestamp_valid(object_under_test, value, expected):
+ assert object_under_test.timestamp_to_py(value, create_field()) == expected
+
+
+def test_timestamp_to_py_w_timestamp_invalid(object_under_test):
+ with pytest.raises(ValueError):
+ object_under_test.timestamp_to_py("definitely-not-a-timestamp", create_field())
diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py
index c2ae78eaa..5070a199b 100644
--- a/tests/unit/conftest.py
+++ b/tests/unit/conftest.py
@@ -12,7 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import mock
+from unittest import mock
+import threading
+
import pytest
from .helpers import make_client
@@ -23,6 +25,18 @@ def client():
yield make_client()
+time_lock = threading.Lock()
+
+
+@pytest.fixture
+def global_time_lock():
+ """Fixture to run tests serially that depend on the global time state,
+ such as tests of retry behavior.
+ """
+ with time_lock:
+ yield
+
+
@pytest.fixture
def PROJECT():
yield "PROJECT"
diff --git a/tests/unit/helpers.py b/tests/unit/helpers.py
index 67aeaca35..c5414138e 100644
--- a/tests/unit/helpers.py
+++ b/tests/unit/helpers.py
@@ -12,15 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from unittest import mock
+
+import pytest
+
import google.cloud.bigquery.client
import google.cloud.bigquery.dataset
-import mock
-import pytest
def make_connection(*responses):
import google.cloud.bigquery._http
- import mock
from google.cloud.exceptions import NotFound
mock_conn = mock.create_autospec(google.cloud.bigquery._http.Connection)
@@ -43,6 +44,20 @@ def make_client(project="PROJECT", **kw):
return google.cloud.bigquery.client.Client(project, credentials, **kw)
+def make_creds(creds_universe: None):
+ from google.auth import credentials
+
+ class TestingCreds(credentials.Credentials):
+ def refresh(self, request): # pragma: NO COVER
+ raise NotImplementedError
+
+ @property
+ def universe_domain(self):
+ return creds_universe
+
+ return TestingCreds()
+
+
def make_dataset_reference_string(project, ds_id):
return f"{project}.{ds_id}"
diff --git a/tests/unit/job/helpers.py b/tests/unit/job/helpers.py
index 3642c7229..24ba2fa99 100644
--- a/tests/unit/job/helpers.py
+++ b/tests/unit/job/helpers.py
@@ -106,7 +106,9 @@ def _setUpConstants(self):
from google.cloud._helpers import UTC
self.WHEN_TS = 1437767599.006
- self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC)
+ self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace(
+ tzinfo=UTC
+ )
self.ETAG = "ETAG"
self.FULL_JOB_ID = "%s:%s" % (self.PROJECT, self.JOB_ID)
self.RESOURCE_URL = "{}/bigquery/v2/projects/{}/jobs/{}".format(
diff --git a/tests/unit/job/test_async_job_retry.py b/tests/unit/job/test_async_job_retry.py
new file mode 100644
index 000000000..35041aa1b
--- /dev/null
+++ b/tests/unit/job/test_async_job_retry.py
@@ -0,0 +1,139 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest import mock
+
+import google.api_core.retry
+from google.api_core import exceptions
+
+from . import helpers
+import google.cloud.bigquery.job
+
+
+PROJECT = "test-project"
+JOB_ID = "test-job-id"
+
+
+def test_cancel_w_custom_retry(global_time_lock):
+ from google.cloud.bigquery.retry import DEFAULT_RETRY
+
+ api_path = "/projects/{}/jobs/{}/cancel".format(PROJECT, JOB_ID)
+ resource = {
+ "jobReference": {
+ "jobId": JOB_ID,
+ "projectId": PROJECT,
+ "location": None,
+ },
+ "configuration": {"test": True},
+ }
+ expected = resource.copy()
+ expected["statistics"] = {}
+ response = {"job": resource}
+ conn = helpers.make_connection(
+ ValueError,
+ response,
+ )
+ client = helpers._make_client(project=PROJECT, connection=conn)
+ job = google.cloud.bigquery.job._AsyncJob(
+ google.cloud.bigquery.job._JobReference(JOB_ID, PROJECT, "EU"), client
+ )
+
+ retry = DEFAULT_RETRY.with_deadline(1).with_predicate(
+ lambda exc: isinstance(exc, ValueError)
+ )
+
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ result = job.cancel(retry=retry, timeout=7.5)
+
+ final_attributes.assert_called()
+
+ assert result is True
+ assert job._properties == expected
+ conn.api_request.assert_has_calls(
+ [
+ mock.call(
+ method="POST",
+ path=api_path,
+ query_params={"location": "EU"},
+ timeout=7.5,
+ ),
+ mock.call(
+ method="POST",
+ path=api_path,
+ query_params={"location": "EU"},
+ timeout=7.5,
+ ), # was retried once
+ ],
+ )
+
+
+def test_result_w_retry_wo_state(global_time_lock):
+ from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
+
+ begun_job_resource = helpers._make_job_resource(
+ job_id=JOB_ID, project_id=PROJECT, location="EU", started=True
+ )
+ done_job_resource = helpers._make_job_resource(
+ job_id=JOB_ID,
+ project_id=PROJECT,
+ location="EU",
+ started=True,
+ ended=True,
+ )
+ conn = helpers.make_connection(
+ exceptions.NotFound("not normally retriable"),
+ begun_job_resource,
+ exceptions.NotFound("not normally retriable"),
+ done_job_resource,
+ )
+ client = helpers._make_client(project=PROJECT, connection=conn)
+ job = google.cloud.bigquery.job._AsyncJob(
+ google.cloud.bigquery.job._JobReference(JOB_ID, PROJECT, "EU"), client
+ )
+ custom_predicate = mock.Mock()
+ custom_predicate.return_value = True
+ custom_retry = google.api_core.retry.Retry(
+ predicate=custom_predicate,
+ initial=0.001,
+ maximum=0.001,
+ deadline=0.1,
+ )
+ assert job.result(retry=custom_retry) is job
+
+ begin_call = mock.call(
+ method="POST",
+ path=f"/projects/{PROJECT}/jobs",
+ data={
+ "jobReference": {
+ "jobId": JOB_ID,
+ "projectId": PROJECT,
+ "location": "EU",
+ }
+ },
+ timeout=None,
+ )
+ reload_call = mock.call(
+ method="GET",
+ path=f"/projects/{PROJECT}/jobs/{JOB_ID}",
+ query_params={
+ "projection": "full",
+ "location": "EU",
+ },
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
+ )
+ conn.api_request.assert_has_calls(
+ [begin_call, begin_call, reload_call, reload_call]
+ )
diff --git a/tests/unit/job/test_base.py b/tests/unit/job/test_base.py
index f0525c22a..420904820 100644
--- a/tests/unit/job/test_base.py
+++ b/tests/unit/job/test_base.py
@@ -15,12 +15,13 @@
import copy
import http
import unittest
+from unittest import mock
-from google.api_core import exceptions
-import google.api_core.retry
-import mock
+from google.api_core.future import polling
import pytest
+from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
+
from ..helpers import make_connection
from .helpers import _make_client
@@ -46,6 +47,27 @@ def test_missing_reason(self):
exception = self._call_fut(error_result)
self.assertEqual(exception.code, http.client.INTERNAL_SERVER_ERROR)
+ def test_contatenate_errors(self):
+ # Added test for b/310544564 and b/318889899.
+ # Ensures that error messages from both error_result and errors are
+ # present in the exception raised.
+
+ error_result = {
+ "reason": "invalid1",
+ "message": "error message 1",
+ }
+ errors = [
+ {"reason": "invalid2", "message": "error message 2"},
+ {"reason": "invalid3", "message": "error message 3"},
+ ]
+
+ exception = self._call_fut(error_result, errors)
+ self.assertEqual(
+ exception.message,
+ "error message 1; reason: invalid2, message: error message 2; "
+ "reason: invalid3, message: error message 3",
+ )
+
class Test_JobReference(unittest.TestCase):
JOB_ID = "job-id"
@@ -307,7 +329,7 @@ def _datetime_and_millis():
import datetime
from google.cloud._helpers import _millis
- now = datetime.datetime.utcnow().replace(
+ now = datetime.datetime.now(datetime.timezone.utc).replace(
microsecond=123000,
tzinfo=datetime.timezone.utc, # stats timestamps have ms precision
)
@@ -419,6 +441,16 @@ def test_state(self):
status["state"] = state
self.assertEqual(job.state, state)
+ def test_reservation_id(self):
+ reservation_id = "RESERVATION-ID"
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, client)
+ self.assertIsNone(job.reservation_id)
+ stats = job._properties["statistics"] = {}
+ self.assertIsNone(job.reservation_id)
+ stats["reservation_id"] = reservation_id
+ self.assertEqual(job.reservation_id, reservation_id)
+
def _set_properties_job(self):
client = _make_client(project=self.PROJECT)
job = self._make_one(self.JOB_ID, client)
@@ -432,11 +464,19 @@ def _set_properties_job(self):
def test__set_properties_no_stats(self):
config = {"test": True}
resource = {"configuration": config}
+ expected = resource.copy()
+ expected["statistics"] = {}
job = self._set_properties_job()
+ original_resource = job._properties
job._set_properties(resource)
- self.assertEqual(job._properties, resource)
+ self.assertEqual(job._properties, expected)
+
+ # Make sure we don't mutate the object used in the request, as that
+ # makes debugging more difficult and leads to false positives in unit
+ # tests.
+ self.assertIsNot(job._properties, original_resource)
def test__set_properties_w_creation_time(self):
now, millis = self._datetime_and_millis()
@@ -546,6 +586,8 @@ def test__begin_defaults(self):
},
"configuration": {"test": True},
}
+ expected = resource.copy()
+ expected["statistics"] = {}
job = self._set_properties_job()
builder = job.to_api_repr = mock.Mock()
builder.return_value = resource
@@ -564,7 +606,7 @@ def test__begin_defaults(self):
data=resource,
timeout=None,
)
- self.assertEqual(job._properties, resource)
+ self.assertEqual(job._properties, expected)
def test__begin_explicit(self):
from google.cloud.bigquery.retry import DEFAULT_RETRY
@@ -578,6 +620,8 @@ def test__begin_explicit(self):
},
"configuration": {"test": True},
}
+ expected = resource.copy()
+ expected["statistics"] = {}
job = self._set_properties_job()
builder = job.to_api_repr = mock.Mock()
builder.return_value = resource
@@ -598,7 +642,7 @@ def test__begin_explicit(self):
data=resource,
timeout=7.5,
)
- self.assertEqual(job._properties, resource)
+ self.assertEqual(job._properties, expected)
def test_exists_defaults_miss(self):
from google.cloud.exceptions import NotFound
@@ -675,7 +719,7 @@ def test_exists_w_timeout(self):
)
def test_reload_defaults(self):
- from google.cloud.bigquery.retry import DEFAULT_RETRY
+ from google.cloud.bigquery.retry import DEFAULT_RETRY, DEFAULT_GET_JOB_TIMEOUT
resource = {
"jobReference": {
@@ -685,6 +729,8 @@ def test_reload_defaults(self):
},
"configuration": {"test": True},
}
+ expected = resource.copy()
+ expected["statistics"] = {}
job = self._set_properties_job()
job._properties["jobReference"]["location"] = self.LOCATION
call_api = job._client._call_api = mock.Mock()
@@ -693,17 +739,21 @@ def test_reload_defaults(self):
call_api.assert_called_once_with(
DEFAULT_RETRY,
- span_name="BigQuery.job.reload",
+ span_name="BigQuery.getJob",
span_attributes={
- "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)
+ "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID),
+ "job_id": "job-id",
+ "location": "us-central",
},
- job_ref=job,
method="GET",
path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID),
- query_params={"location": self.LOCATION},
- timeout=None,
+ query_params={
+ "projection": "full",
+ "location": "us-central",
+ },
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
- self.assertEqual(job._properties, resource)
+ self.assertEqual(job._properties, expected)
def test_reload_explicit(self):
from google.cloud.bigquery.retry import DEFAULT_RETRY
@@ -717,6 +767,8 @@ def test_reload_explicit(self):
},
"configuration": {"test": True},
}
+ expected = resource.copy()
+ expected["statistics"] = {}
job = self._set_properties_job()
client = _make_client(project=other_project)
call_api = client._call_api = mock.Mock()
@@ -726,17 +778,42 @@ def test_reload_explicit(self):
call_api.assert_called_once_with(
retry,
- span_name="BigQuery.job.reload",
+ span_name="BigQuery.getJob",
span_attributes={
- "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID)
+ "path": "/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID),
+ "job_id": "job-id",
+ "location": None,
},
- job_ref=job,
method="GET",
path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID),
- query_params={},
+ query_params={"projection": "full"},
timeout=4.2,
)
- self.assertEqual(job._properties, resource)
+ self.assertEqual(job._properties, expected)
+
+ def test_reload_none_timeout(self):
+ from google.cloud.bigquery.retry import DEFAULT_RETRY
+
+ resource = {
+ "jobReference": {
+ "jobId": self.JOB_ID,
+ "projectId": self.PROJECT,
+ "location": None,
+ },
+ "configuration": {"test": True},
+ }
+ client = _make_client(project=self.PROJECT)
+ conn = client._connection = make_connection(resource)
+ job = self._set_properties_job()
+ retry = DEFAULT_RETRY.with_deadline(1)
+ job.reload(client=client, retry=retry, timeout=None)
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/projects/{}/jobs/{}".format(self.PROJECT, self.JOB_ID),
+ query_params={"projection": "full"},
+ timeout=None,
+ )
def test_cancel_defaults(self):
resource = {
@@ -747,6 +824,8 @@ def test_cancel_defaults(self):
},
"configuration": {"test": True},
}
+ expected = resource.copy()
+ expected["statistics"] = {}
response = {"job": resource}
job = self._set_properties_job()
job._properties["jobReference"]["location"] = self.LOCATION
@@ -764,7 +843,7 @@ def test_cancel_defaults(self):
query_params={"location": self.LOCATION},
timeout=None,
)
- self.assertEqual(job._properties, resource)
+ self.assertEqual(job._properties, expected)
def test_cancel_explicit(self):
other_project = "other-project-234"
@@ -776,6 +855,8 @@ def test_cancel_explicit(self):
},
"configuration": {"test": True},
}
+ expected = resource.copy()
+ expected["statistics"] = {}
response = {"job": resource}
job = self._set_properties_job()
client = _make_client(project=other_project)
@@ -797,49 +878,7 @@ def test_cancel_explicit(self):
query_params={},
timeout=7.5,
)
- self.assertEqual(job._properties, resource)
-
- def test_cancel_w_custom_retry(self):
- from google.cloud.bigquery.retry import DEFAULT_RETRY
-
- api_path = "/projects/{}/jobs/{}/cancel".format(self.PROJECT, self.JOB_ID)
- resource = {
- "jobReference": {
- "jobId": self.JOB_ID,
- "projectId": self.PROJECT,
- "location": None,
- },
- "configuration": {"test": True},
- }
- response = {"job": resource}
- job = self._set_properties_job()
-
- api_request_patcher = mock.patch.object(
- job._client._connection, "api_request", side_effect=[ValueError, response]
- )
- retry = DEFAULT_RETRY.with_deadline(1).with_predicate(
- lambda exc: isinstance(exc, ValueError)
- )
-
- with api_request_patcher as fake_api_request:
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- result = job.cancel(retry=retry, timeout=7.5)
-
- final_attributes.assert_called()
-
- self.assertTrue(result)
- self.assertEqual(job._properties, resource)
- self.assertEqual(
- fake_api_request.call_args_list,
- [
- mock.call(method="POST", path=api_path, query_params={}, timeout=7.5),
- mock.call(
- method="POST", path=api_path, query_params={}, timeout=7.5
- ), # was retried once
- ],
- )
+ self.assertEqual(job._properties, expected)
def test__set_future_result_wo_done(self):
client = _make_client(project=self.PROJECT)
@@ -908,7 +947,10 @@ def test_done_defaults_wo_state(self):
self.assertFalse(job.done())
- reload_.assert_called_once_with(retry=DEFAULT_RETRY, timeout=None)
+ reload_.assert_called_once_with(
+ retry=DEFAULT_RETRY,
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
+ )
def test_done_explicit_wo_state(self):
from google.cloud.bigquery.retry import DEFAULT_RETRY
@@ -922,6 +964,18 @@ def test_done_explicit_wo_state(self):
reload_.assert_called_once_with(retry=retry, timeout=7.5)
+ def test_done_with_none_timeout(self):
+ from google.cloud.bigquery.retry import DEFAULT_RETRY
+
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, client)
+ reload_ = job.reload = mock.Mock()
+ retry = DEFAULT_RETRY.with_deadline(1)
+
+ self.assertFalse(job.done(retry=retry, timeout=None))
+
+ reload_.assert_called_once_with(retry=retry, timeout=None)
+
def test_done_already(self):
client = _make_client(project=self.PROJECT)
job = self._make_one(self.JOB_ID, client)
@@ -930,6 +984,8 @@ def test_done_already(self):
self.assertTrue(job.done())
def test_result_default_wo_state(self):
+ from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
+
begun_job_resource = _make_job_resource(
job_id=self.JOB_ID, project_id=self.PROJECT, location="US", started=True
)
@@ -943,13 +999,12 @@ def test_result_default_wo_state(self):
conn = make_connection(
_make_retriable_exception(),
begun_job_resource,
- _make_retriable_exception(),
done_job_resource,
)
client = _make_client(project=self.PROJECT, connection=conn)
job = self._make_one(self.JOB_ID, client)
- self.assertIs(job.result(), job)
+ self.assertIs(job.result(retry=polling.DEFAULT_RETRY), job)
begin_call = mock.call(
method="POST",
@@ -960,65 +1015,13 @@ def test_result_default_wo_state(self):
reload_call = mock.call(
method="GET",
path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}",
- query_params={"location": "US"},
- timeout=None,
- )
- conn.api_request.assert_has_calls(
- [begin_call, begin_call, reload_call, reload_call]
- )
-
- def test_result_w_retry_wo_state(self):
- begun_job_resource = _make_job_resource(
- job_id=self.JOB_ID, project_id=self.PROJECT, location="EU", started=True
- )
- done_job_resource = _make_job_resource(
- job_id=self.JOB_ID,
- project_id=self.PROJECT,
- location="EU",
- started=True,
- ended=True,
- )
- conn = make_connection(
- exceptions.NotFound("not normally retriable"),
- begun_job_resource,
- exceptions.NotFound("not normally retriable"),
- done_job_resource,
- )
- client = _make_client(project=self.PROJECT, connection=conn)
- job = self._make_one(
- self._job_reference(self.JOB_ID, self.PROJECT, "EU"), client
- )
- custom_predicate = mock.Mock()
- custom_predicate.return_value = True
- custom_retry = google.api_core.retry.Retry(
- predicate=custom_predicate,
- initial=0.001,
- maximum=0.001,
- deadline=0.1,
- )
- self.assertIs(job.result(retry=custom_retry), job)
-
- begin_call = mock.call(
- method="POST",
- path=f"/projects/{self.PROJECT}/jobs",
- data={
- "jobReference": {
- "jobId": self.JOB_ID,
- "projectId": self.PROJECT,
- "location": "EU",
- }
+ query_params={
+ "projection": "full",
+ "location": "US",
},
- timeout=None,
- )
- reload_call = mock.call(
- method="GET",
- path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}",
- query_params={"location": "EU"},
- timeout=None,
- )
- conn.api_request.assert_has_calls(
- [begin_call, begin_call, reload_call, reload_call]
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
+ conn.api_request.assert_has_calls([begin_call, begin_call, reload_call])
def test_result_explicit_w_state(self):
conn = make_connection()
@@ -1085,21 +1088,43 @@ def test_ctor_with_unknown_property_raises_error(self):
config = self._make_one()
config.wrong_name = None
- def test_fill_from_default(self):
+ def test_fill_query_job_config_from_default(self):
from google.cloud.bigquery import QueryJobConfig
job_config = QueryJobConfig()
job_config.dry_run = True
job_config.maximum_bytes_billed = 1000
+ job_config.reservation = "reservation_1"
default_job_config = QueryJobConfig()
default_job_config.use_query_cache = True
default_job_config.maximum_bytes_billed = 2000
+ default_job_config.reservation = "reservation_2"
final_job_config = job_config._fill_from_default(default_job_config)
self.assertTrue(final_job_config.dry_run)
self.assertTrue(final_job_config.use_query_cache)
self.assertEqual(final_job_config.maximum_bytes_billed, 1000)
+ self.assertEqual(final_job_config.reservation, "reservation_1")
+
+ def test_fill_load_job_from_default(self):
+ from google.cloud.bigquery import LoadJobConfig
+
+ job_config = LoadJobConfig()
+ job_config.create_session = True
+ job_config.encoding = "UTF-8"
+ job_config.reservation = "reservation_1"
+
+ default_job_config = LoadJobConfig()
+ default_job_config.ignore_unknown_values = True
+ default_job_config.encoding = "ISO-8859-1"
+ default_job_config.reservation = "reservation_2"
+
+ final_job_config = job_config._fill_from_default(default_job_config)
+ self.assertTrue(final_job_config.create_session)
+ self.assertTrue(final_job_config.ignore_unknown_values)
+ self.assertEqual(final_job_config.encoding, "UTF-8")
+ self.assertEqual(final_job_config.reservation, "reservation_1")
def test_fill_from_default_conflict(self):
from google.cloud.bigquery import QueryJobConfig
@@ -1113,6 +1138,19 @@ def test_fill_from_default_conflict(self):
with self.assertRaises(TypeError):
basic_job_config._fill_from_default(conflicting_job_config)
+ def test_fill_from_empty_default_conflict(self):
+ from google.cloud.bigquery import QueryJobConfig
+
+ job_config = QueryJobConfig()
+ job_config.dry_run = True
+ job_config.maximum_bytes_billed = 1000
+ job_config.reservation = "reservation_1"
+
+ final_job_config = job_config._fill_from_default(default_job_config=None)
+ self.assertTrue(final_job_config.dry_run)
+ self.assertEqual(final_job_config.maximum_bytes_billed, 1000)
+ self.assertEqual(final_job_config.reservation, "reservation_1")
+
@mock.patch("google.cloud.bigquery._helpers._get_sub_prop")
def test__get_sub_prop_wo_default(self, _get_sub_prop):
job_config = self._make_one()
@@ -1181,3 +1219,101 @@ def test_labels_setter(self):
job_config = self._make_one()
job_config.labels = labels
self.assertEqual(job_config._properties["labels"], labels)
+
+ def test_job_timeout_ms_raises_valueerror(self):
+ # Confirm that attempting to set a non-integer values will raise an Error.
+ with pytest.raises(ValueError):
+ job_config = self._make_one()
+ job_config.job_timeout_ms = "WillRaiseError"
+
+ def test_job_timeout_ms(self):
+ # Confirm that default status is None.
+ job_config = self._make_one()
+ assert job_config.job_timeout_ms is None
+
+ # Confirm that integers get converted to strings.
+ job_config.job_timeout_ms = 5000
+ assert job_config.job_timeout_ms == "5000" # int is converted to string
+
+ def test_job_timeout_is_none_when_set_none(self):
+ job_config = self._make_one()
+ job_config.job_timeout_ms = None
+ # Confirm value is None and not literal string 'None'
+ assert job_config.job_timeout_ms is None
+
+ def test_job_timeout_properties(self):
+ # Make sure any value stored in properties is erased
+ # when setting job_timeout to None.
+ job_config = self._make_one()
+ job_config.job_timeout_ms = 4200
+ assert job_config.job_timeout_ms == "4200"
+ assert job_config._properties.get("jobTimeoutMs") == "4200"
+
+ job_config.job_timeout_ms = None
+ assert job_config.job_timeout_ms is None
+ assert "jobTimeoutMs" not in job_config._properties
+
+ def test_reservation_miss(self):
+ job_config = self._make_one()
+ self.assertEqual(job_config.reservation, None)
+
+ def test_reservation_hit(self):
+ job_config = self._make_one()
+ job_config._properties["reservation"] = "foo"
+ self.assertEqual(job_config.reservation, "foo")
+
+ def test_reservation_update_in_place(self):
+ job_config = self._make_one()
+ job_config.reservation = "bar" # update in place
+ self.assertEqual(job_config.reservation, "bar")
+
+ def test_reservation_setter_invalid(self):
+ job_config = self._make_one()
+ with self.assertRaises(ValueError):
+ job_config.reservation = object()
+
+ def test_reservation_setter(self):
+ job_config = self._make_one()
+ job_config.reservation = "foo"
+ self.assertEqual(job_config._properties["reservation"], "foo")
+
+ def test_max_slots_miss(self):
+ job_config = self._make_one()
+ self.assertEqual(job_config.max_slots, None)
+
+ def test_max_slots_set_and_clear(self):
+ job_config = self._make_one()
+ job_config.max_slots = 14
+ self.assertEqual(job_config.max_slots, 14)
+ job_config.max_slots = None
+ self.assertEqual(job_config.max_slots, None)
+
+ def test_max_slots_hit_str(self):
+ job_config = self._make_one()
+ job_config._properties["maxSlots"] = "4"
+ self.assertEqual(job_config.max_slots, 4)
+
+ def test_max_slots_hit_int(self):
+ job_config = self._make_one()
+ job_config._properties["maxSlots"] = int(3)
+ self.assertEqual(job_config.max_slots, 3)
+
+ def test_max_slots_hit_invalid(self):
+ job_config = self._make_one()
+ job_config._properties["maxSlots"] = object()
+ self.assertEqual(job_config.max_slots, None)
+
+ def test_max_slots_update_in_place(self):
+ job_config = self._make_one()
+ job_config.max_slots = 45 # update in place
+ self.assertEqual(job_config.max_slots, 45)
+
+ def test_max_slots_setter_invalid(self):
+ job_config = self._make_one()
+ with self.assertRaises(ValueError):
+ job_config.max_slots = "foo"
+
+ def test_max_slots_setter(self):
+ job_config = self._make_one()
+ job_config.max_slots = 123
+ self.assertEqual(job_config._properties["maxSlots"], "123")
diff --git a/tests/unit/job/test_copy.py b/tests/unit/job/test_copy.py
index a3b5c70e3..8e2845316 100644
--- a/tests/unit/job/test_copy.py
+++ b/tests/unit/job/test_copy.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import mock
+from unittest import mock
from ..helpers import make_connection
@@ -147,7 +147,6 @@ def _verifyResourceProperties(self, job, resource):
self._verifyReadonlyResourceProperties(job, resource)
config = resource.get("configuration", {}).get("copy")
-
table_ref = config["destinationTable"]
self.assertEqual(job.destination.project, table_ref["projectId"])
self.assertEqual(job.destination.dataset_id, table_ref["datasetId"])
@@ -477,6 +476,8 @@ def test_exists_hit_w_alternate_client(self):
)
def test_reload_w_bound_client(self):
+ from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
+
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
conn = make_connection(RESOURCE)
@@ -489,14 +490,27 @@ def test_reload_w_bound_client(self):
) as final_attributes:
job.reload()
- final_attributes.assert_called_with({"path": PATH}, client, job)
+ final_attributes.assert_called_with(
+ {
+ "path": PATH,
+ "job_id": self.JOB_ID,
+ "location": None,
+ },
+ client,
+ None,
+ )
conn.api_request.assert_called_once_with(
- method="GET", path=PATH, query_params={}, timeout=None
+ method="GET",
+ path=PATH,
+ query_params={"projection": "full"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
self._verifyResourceProperties(job, RESOURCE)
def test_reload_w_alternate_client(self):
+ from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
+
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
conn1 = make_connection()
@@ -511,10 +525,21 @@ def test_reload_w_alternate_client(self):
) as final_attributes:
job.reload(client=client2)
- final_attributes.assert_called_with({"path": PATH}, client2, job)
+ final_attributes.assert_called_with(
+ {
+ "path": PATH,
+ "job_id": self.JOB_ID,
+ "location": None,
+ },
+ client2,
+ None,
+ )
conn1.api_request.assert_not_called()
conn2.api_request.assert_called_once_with(
- method="GET", path=PATH, query_params={}, timeout=None
+ method="GET",
+ path=PATH,
+ query_params={"projection": "full"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
self._verifyResourceProperties(job, RESOURCE)
diff --git a/tests/unit/job/test_extract.py b/tests/unit/job/test_extract.py
index 8bada51af..ebf9f09e6 100644
--- a/tests/unit/job/test_extract.py
+++ b/tests/unit/job/test_extract.py
@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import mock
+import json
+from unittest import mock
from ..helpers import make_connection
@@ -45,9 +46,8 @@ def test_to_api_repr(self):
config.print_header = False
config._properties["extract"]["someNewField"] = "some-value"
config.use_avro_logical_types = True
- resource = config.to_api_repr()
- self.assertEqual(
- resource,
+ resource = json.dumps(config.to_api_repr(), sort_keys=True)
+ expected = json.dumps(
{
"extract": {
"compression": "SNAPPY",
@@ -58,6 +58,12 @@ def test_to_api_repr(self):
"useAvroLogicalTypes": True,
}
},
+ sort_keys=True,
+ )
+
+ self.assertEqual(
+ resource,
+ expected,
)
def test_from_api_repr(self):
@@ -393,6 +399,7 @@ def test_exists_hit_w_alternate_client(self):
def test_reload_w_bound_client(self):
from google.cloud.bigquery.dataset import DatasetReference
+ from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
@@ -406,14 +413,26 @@ def test_reload_w_bound_client(self):
) as final_attributes:
job.reload()
- final_attributes.assert_called_with({"path": PATH}, client, job)
+ final_attributes.assert_called_with(
+ {
+ "path": PATH,
+ "job_id": self.JOB_ID,
+ "location": None,
+ },
+ client,
+ None,
+ )
conn.api_request.assert_called_once_with(
- method="GET", path=PATH, query_params={}, timeout=None
+ method="GET",
+ path=PATH,
+ query_params={"projection": "full"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
self._verifyResourceProperties(job, RESOURCE)
def test_reload_w_alternate_client(self):
from google.cloud.bigquery.dataset import DatasetReference
+ from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
@@ -429,10 +448,21 @@ def test_reload_w_alternate_client(self):
) as final_attributes:
job.reload(client=client2)
- final_attributes.assert_called_with({"path": PATH}, client2, job)
+ final_attributes.assert_called_with(
+ {
+ "path": PATH,
+ "job_id": self.JOB_ID,
+ "location": None,
+ },
+ client2,
+ None,
+ )
conn1.api_request.assert_not_called()
conn2.api_request.assert_called_once_with(
- method="GET", path=PATH, query_params={}, timeout=None
+ method="GET",
+ path=PATH,
+ query_params={"projection": "full"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
self._verifyResourceProperties(job, RESOURCE)
diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py
index cf2096b8b..b551d52dd 100644
--- a/tests/unit/job/test_load.py
+++ b/tests/unit/job/test_load.py
@@ -13,13 +13,13 @@
# limitations under the License.
import copy
-
-import mock
+from unittest import mock
from ..helpers import make_connection
from .helpers import _Base
from .helpers import _make_client
+from google.cloud.bigquery.enums import SourceColumnMatch
class TestLoadJob(_Base):
@@ -37,16 +37,33 @@ def _setUpConstants(self):
self.INPUT_BYTES = 12345
self.OUTPUT_BYTES = 23456
self.OUTPUT_ROWS = 345
+ self.REFERENCE_FILE_SCHEMA_URI = "gs://path/to/reference"
+ self.SOURCE_COLUMN_MATCH = "NAME"
+ self.DATE_FORMAT = "%Y-%m-%d"
+ self.DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
+ self.TIME_ZONE = "UTC"
+ self.TIME_FORMAT = "%H:%M:%S"
+ self.TIMESTAMP_FORMAT = "YYYY-MM-DD HH:MM:SS.SSSSSSZ"
+ self.NULL_MARKERS = ["", "NA"]
def _make_resource(self, started=False, ended=False):
resource = super(TestLoadJob, self)._make_resource(started, ended)
config = resource["configuration"]["load"]
config["sourceUris"] = [self.SOURCE1]
+ config["sourceColumnMatch"] = self.SOURCE_COLUMN_MATCH
+ config["dateFormat"] = self.DATE_FORMAT
+ config["datetimeFormat"] = self.DATETIME_FORMAT
+ config["timeZone"] = self.TIME_ZONE
+ config["timeFormat"] = self.TIME_FORMAT
+ config["timestampFormat"] = self.TIMESTAMP_FORMAT
+ config["nullMarkers"] = self.NULL_MARKERS
+
config["destinationTable"] = {
"projectId": self.PROJECT,
"datasetId": self.DS_ID,
"tableId": self.TABLE_ID,
}
+ config["referenceFileSchemaUri"] = self.REFERENCE_FILE_SCHEMA_URI
if ended:
resource["status"] = {"state": "DONE"}
@@ -128,6 +145,10 @@ def _verifyResourceProperties(self, job, resource):
self.assertEqual(job.null_marker, config["nullMarker"])
else:
self.assertIsNone(job.null_marker)
+ if "nullMarkers" in config:
+ self.assertEqual(job.null_markers, config["nullMarkers"])
+ else:
+ self.assertIsNone(job.null_markers)
if "quote" in config:
self.assertEqual(job.quote_character, config["quote"])
else:
@@ -136,7 +157,12 @@ def _verifyResourceProperties(self, job, resource):
self.assertEqual(str(job.skip_leading_rows), config["skipLeadingRows"])
else:
self.assertIsNone(job.skip_leading_rows)
-
+ if "referenceFileSchemaUri" in config:
+ self.assertEqual(
+ job.reference_file_schema_uri, config["referenceFileSchemaUri"]
+ )
+ else:
+ self.assertIsNone(job.reference_file_schema_uri)
if "destinationEncryptionConfiguration" in config:
self.assertIsNotNone(job.destination_encryption_configuration)
self.assertEqual(
@@ -145,6 +171,35 @@ def _verifyResourceProperties(self, job, resource):
)
else:
self.assertIsNone(job.destination_encryption_configuration)
+ if "dateFormat" in config:
+ self.assertEqual(job.date_format, config["dateFormat"])
+ else:
+ self.assertIsNone(job.date_format)
+ if "datetimeFormat" in config:
+ self.assertEqual(job.datetime_format, config["datetimeFormat"])
+ else:
+ self.assertIsNone(job.datetime_format)
+ if "timeZone" in config:
+ self.assertEqual(job.time_zone, config["timeZone"])
+ else:
+ self.assertIsNone(job.time_zone)
+ if "timeFormat" in config:
+ self.assertEqual(job.time_format, config["timeFormat"])
+ else:
+ self.assertIsNone(job.time_format)
+ if "timestampFormat" in config:
+ self.assertEqual(job.timestamp_format, config["timestampFormat"])
+ else:
+ self.assertIsNone(job.timestamp_format)
+
+ if "sourceColumnMatch" in config:
+ # job.source_column_match will be an Enum, config[...] is a string
+ self.assertEqual(
+ job.source_column_match.value,
+ config["sourceColumnMatch"],
+ )
+ else:
+ self.assertIsNone(job.source_column_match)
def test_ctor(self):
client = _make_client(project=self.PROJECT)
@@ -174,6 +229,7 @@ def test_ctor(self):
self.assertIsNone(job.ignore_unknown_values)
self.assertIsNone(job.max_bad_records)
self.assertIsNone(job.null_marker)
+ self.assertIsNone(job.null_markers)
self.assertIsNone(job.quote_character)
self.assertIsNone(job.skip_leading_rows)
self.assertIsNone(job.source_format)
@@ -186,6 +242,13 @@ def test_ctor(self):
self.assertIsNone(job.use_avro_logical_types)
self.assertIsNone(job.clustering_fields)
self.assertIsNone(job.schema_update_options)
+ self.assertIsNone(job.reference_file_schema_uri)
+ self.assertIsNone(job.source_column_match)
+ self.assertIsNone(job.date_format)
+ self.assertIsNone(job.datetime_format)
+ self.assertIsNone(job.time_zone)
+ self.assertIsNone(job.time_format)
+ self.assertIsNone(job.timestamp_format)
def test_ctor_w_config(self):
from google.cloud.bigquery.schema import SchemaField
@@ -264,7 +327,7 @@ def test_schema_setter_invalid_field(self):
config = LoadJobConfig()
full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
- with self.assertRaises(ValueError):
+ with self.assertRaises(TypeError):
config.schema = [full_name, object()]
def test_schema_setter(self):
@@ -383,6 +446,8 @@ def test_from_api_repr_bare(self):
job = klass.from_api_repr(RESOURCE, client=client)
self.assertIs(job._client, client)
self._verifyResourceProperties(job, RESOURCE)
+ self.assertEqual(len(job.connection_properties), 0)
+ self.assertIsNone(job.create_session)
def test_from_api_with_encryption(self):
self._setUpConstants()
@@ -421,6 +486,24 @@ def test_from_api_repr_w_properties(self):
self.assertIs(job._client, client)
self._verifyResourceProperties(job, RESOURCE)
+ def test_to_api_repr(self):
+ self._setUpConstants()
+ client = _make_client(project=self.PROJECT)
+ RESOURCE = self._make_resource(ended=False)
+
+ klass = self._get_target_class()
+ job = klass.from_api_repr(RESOURCE, client)
+ api_repr = job.to_api_repr()
+
+ # as per the documentation in load.py -> LoadJob.to_api_repr(),
+ # the return value from to_api_repr should not include statistics
+ expected = {
+ "jobReference": RESOURCE["jobReference"],
+ "configuration": RESOURCE["configuration"],
+ }
+
+ self.assertEqual(api_repr, expected)
+
def test_begin_w_already_running(self):
conn = make_connection()
client = _make_client(project=self.PROJECT, connection=conn)
@@ -440,6 +523,7 @@ def test_begin_w_bound_client(self):
conn = make_connection(RESOURCE)
client = _make_client(project=self.PROJECT, connection=conn)
job = self._make_one(self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client)
+ job.configuration.reference_file_schema_uri = self.REFERENCE_FILE_SCHEMA_URI
path = "/projects/{}/jobs".format(self.PROJECT)
with mock.patch(
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
@@ -461,6 +545,7 @@ def test_begin_w_bound_client(self):
"datasetId": self.DS_ID,
"tableId": self.TABLE_ID,
},
+ "referenceFileSchemaUri": self.REFERENCE_FILE_SCHEMA_URI,
}
},
},
@@ -486,6 +571,7 @@ def test_begin_w_autodetect(self):
job = self._make_one(
self.JOB_ID, [self.SOURCE1], self.TABLE_REF, client, config
)
+ job.configuration.reference_file_schema_uri = self.REFERENCE_FILE_SCHEMA_URI
with mock.patch(
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
) as final_attributes:
@@ -503,6 +589,7 @@ def test_begin_w_autodetect(self):
"datasetId": self.DS_ID,
"tableId": self.TABLE_ID,
},
+ "referenceFileSchemaUri": self.REFERENCE_FILE_SCHEMA_URI,
"autodetect": True,
}
},
@@ -541,24 +628,30 @@ def test_begin_w_alternate_client(self):
"sourceFormat": "CSV",
"useAvroLogicalTypes": True,
"writeDisposition": WriteDisposition.WRITE_TRUNCATE,
+ "referenceFileSchemaUri": "gs://path/to/reference",
"schema": {
"fields": [
{
"name": "full_name",
"type": "STRING",
"mode": "REQUIRED",
- "description": None,
},
{
"name": "age",
"type": "INTEGER",
"mode": "REQUIRED",
- "description": None,
},
]
},
"schemaUpdateOptions": [SchemaUpdateOption.ALLOW_FIELD_ADDITION],
+ "sourceColumnMatch": self.SOURCE_COLUMN_MATCH,
+ "dateFormat": self.DATE_FORMAT,
+ "datetimeFormat": self.DATETIME_FORMAT,
+ "timeZone": self.TIME_ZONE,
+ "timeFormat": self.TIME_FORMAT,
+ "timestampFormat": self.TIMESTAMP_FORMAT,
}
+
RESOURCE["configuration"]["load"] = LOAD_CONFIGURATION
conn1 = make_connection()
client1 = _make_client(project=self.PROJECT, connection=conn1)
@@ -585,6 +678,14 @@ def test_begin_w_alternate_client(self):
config.use_avro_logical_types = True
config.write_disposition = WriteDisposition.WRITE_TRUNCATE
config.schema_update_options = [SchemaUpdateOption.ALLOW_FIELD_ADDITION]
+ config.reference_file_schema_uri = "gs://path/to/reference"
+ config.source_column_match = SourceColumnMatch(self.SOURCE_COLUMN_MATCH)
+ config.date_format = self.DATE_FORMAT
+ config.datetime_format = self.DATETIME_FORMAT
+ config.time_zone = self.TIME_ZONE
+ config.time_format = self.TIME_FORMAT
+ config.timestamp_format = self.TIMESTAMP_FORMAT
+
with mock.patch(
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
) as final_attributes:
@@ -700,6 +801,8 @@ def test_exists_miss_w_job_reference(self):
)
def test_reload_w_bound_client(self):
+ from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
+
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
conn = make_connection(RESOURCE)
@@ -710,14 +813,27 @@ def test_reload_w_bound_client(self):
) as final_attributes:
job.reload()
- final_attributes.assert_called_with({"path": PATH}, client, job)
+ final_attributes.assert_called_with(
+ {
+ "path": PATH,
+ "job_id": self.JOB_ID,
+ "location": None,
+ },
+ client,
+ None,
+ )
conn.api_request.assert_called_once_with(
- method="GET", path=PATH, query_params={}, timeout=None
+ method="GET",
+ path=PATH,
+ query_params={"projection": "full"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
self._verifyResourceProperties(job, RESOURCE)
def test_reload_w_alternate_client(self):
+ from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
+
PATH = "/projects/%s/jobs/%s" % (self.PROJECT, self.JOB_ID)
RESOURCE = self._make_resource()
conn1 = make_connection()
@@ -730,16 +846,28 @@ def test_reload_w_alternate_client(self):
) as final_attributes:
job.reload(client=client2)
- final_attributes.assert_called_with({"path": PATH}, client2, job)
+ final_attributes.assert_called_with(
+ {
+ "path": PATH,
+ "job_id": self.JOB_ID,
+ "location": None,
+ },
+ client2,
+ None,
+ )
conn1.api_request.assert_not_called()
conn2.api_request.assert_called_once_with(
- method="GET", path=PATH, query_params={}, timeout=None
+ method="GET",
+ path=PATH,
+ query_params={"projection": "full"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
self._verifyResourceProperties(job, RESOURCE)
def test_reload_w_job_reference(self):
from google.cloud.bigquery import job
+ from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
resource = self._make_resource(ended=True)
resource["jobReference"]["projectId"] = "alternative-project"
@@ -754,16 +882,20 @@ def test_reload_w_job_reference(self):
load_job.reload()
final_attributes.assert_called_with(
- {"path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID)},
+ {
+ "path": "/projects/alternative-project/jobs/{}".format(self.JOB_ID),
+ "job_id": self.JOB_ID,
+ "location": "US",
+ },
client,
- load_job,
+ None,
)
conn.api_request.assert_called_once_with(
method="GET",
path="/projects/alternative-project/jobs/{}".format(self.JOB_ID),
- query_params={"location": "US"},
- timeout=None,
+ query_params={"projection": "full", "location": "US"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
def test_cancel_w_bound_client(self):
diff --git a/tests/unit/job/test_load_config.py b/tests/unit/job/test_load_config.py
index 5a0c5a83f..27d3cead1 100644
--- a/tests/unit/job/test_load_config.py
+++ b/tests/unit/job/test_load_config.py
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import copy
import warnings
import pytest
@@ -122,6 +123,27 @@ def test_create_disposition_setter(self):
config.create_disposition = disposition
self.assertEqual(config._properties["load"]["createDisposition"], disposition)
+ def test_connection_properties(self):
+ from google.cloud.bigquery.query import ConnectionProperty
+
+ config = self._get_target_class()()
+ self.assertEqual(len(config.connection_properties), 0)
+
+ session_id = ConnectionProperty("session_id", "abcd")
+ time_zone = ConnectionProperty("time_zone", "America/Chicago")
+ config.connection_properties = [session_id, time_zone]
+ self.assertEqual(len(config.connection_properties), 2)
+ self.assertEqual(config.connection_properties[0].key, "session_id")
+ self.assertEqual(config.connection_properties[0].value, "abcd")
+ self.assertEqual(config.connection_properties[1].key, "time_zone")
+ self.assertEqual(config.connection_properties[1].value, "America/Chicago")
+
+ def test_create_session(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.create_session)
+ config.create_session = True
+ self.assertTrue(config.create_session)
+
def test_decimal_target_types_miss(self):
config = self._get_target_class()()
self.assertIsNone(config.decimal_target_types)
@@ -392,6 +414,29 @@ def test_ignore_unknown_values_setter(self):
config.ignore_unknown_values = True
self.assertTrue(config._properties["load"]["ignoreUnknownValues"])
+ def test_json_extension_missing(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.json_extension)
+
+ def test_json_extension_hit(self):
+ config = self._get_target_class()()
+ config._properties["load"]["jsonExtension"] = "GEOJSON"
+ self.assertEqual(config.json_extension, "GEOJSON")
+
+ def test_json_extension_setter(self):
+ config = self._get_target_class()()
+ self.assertFalse(config.json_extension)
+ config.json_extension = "GEOJSON"
+ self.assertTrue(config.json_extension)
+ self.assertEqual(config._properties["load"]["jsonExtension"], "GEOJSON")
+
+ def test_to_api_repr_includes_json_extension(self):
+ config = self._get_target_class()()
+ config._properties["load"]["jsonExtension"] = "GEOJSON"
+ api_repr = config.to_api_repr()
+ self.assertIn("jsonExtension", api_repr["load"])
+ self.assertEqual(api_repr["load"]["jsonExtension"], "GEOJSON")
+
def test_max_bad_records_missing(self):
config = self._get_target_class()()
self.assertIsNone(config.max_bad_records)
@@ -424,6 +469,36 @@ def test_null_marker_setter(self):
config.null_marker = null_marker
self.assertEqual(config._properties["load"]["nullMarker"], null_marker)
+ def test_null_markers_missing(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.null_markers)
+
+ def test_null_markers_hit(self):
+ null_markers = ["", "NA"]
+ config = self._get_target_class()()
+ config._properties["load"]["nullMarkers"] = null_markers
+ self.assertEqual(config.null_markers, null_markers)
+
+ def test_null_markers_setter(self):
+ null_markers = ["", "NA"]
+ config = self._get_target_class()()
+ config.null_markers = null_markers
+ self.assertEqual(config._properties["load"]["nullMarkers"], null_markers)
+
+ def test_preserve_ascii_control_characters_missing(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.preserve_ascii_control_characters)
+
+ def test_preserve_ascii_control_characters_hit(self):
+ config = self._get_target_class()()
+ config._properties["load"]["preserveAsciiControlCharacters"] = True
+ self.assertTrue(config.preserve_ascii_control_characters)
+
+ def test_preserve_ascii_control_characters_setter(self):
+ config = self._get_target_class()()
+ config.preserve_ascii_control_characters = True
+ self.assertTrue(config._properties["load"]["preserveAsciiControlCharacters"])
+
def test_projection_fields_miss(self):
config = self._get_target_class()()
self.assertIsNone(config.projection_fields)
@@ -513,16 +588,34 @@ def test_schema_setter_valid_mappings_list(self):
config._properties["load"]["schema"], {"fields": [full_name_repr, age_repr]}
)
- def test_schema_setter_invalid_mappings_list(self):
+ def test_schema_setter_allows_unknown_properties(self):
config = self._get_target_class()()
schema = [
- {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
- {"name": "age", "typeoo": "INTEGER", "mode": "REQUIRED"},
+ {
+ "name": "full_name",
+ "type": "STRING",
+ "mode": "REQUIRED",
+ "someNewProperty": "test-value",
+ },
+ {
+ "name": "age",
+ # Note: This type should be included, too. Avoid client-side
+ # validation, as it could prevent backwards-compatible
+ # evolution of the server-side behavior.
+ "typo": "INTEGER",
+ "mode": "REQUIRED",
+ "anotherNewProperty": "another-test",
+ },
]
- with self.assertRaises(Exception):
- config.schema = schema
+ # Make sure the setter doesn't mutate schema.
+ expected_schema = copy.deepcopy(schema)
+
+ config.schema = schema
+
+ # _properties should include all fields, including unknown ones.
+ assert config._properties["load"]["schema"]["fields"] == expected_schema
def test_schema_setter_unsetting_schema(self):
from google.cloud.bigquery.schema import SchemaField
@@ -751,6 +844,120 @@ def test_write_disposition_setter(self):
config._properties["load"]["writeDisposition"], write_disposition
)
+ def test_source_column_match_missing(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.source_column_match)
+
+ def test_source_column_match_hit(self):
+ from google.cloud.bigquery.enums import SourceColumnMatch
+
+ option_enum = SourceColumnMatch.NAME
+ config = self._get_target_class()()
+ # Assume API stores the string value of the enum
+ config._properties["load"]["sourceColumnMatch"] = option_enum.value
+ self.assertEqual(config.source_column_match, option_enum)
+
+ def test_source_column_match_setter(self):
+ from google.cloud.bigquery.enums import SourceColumnMatch
+
+ option_enum = SourceColumnMatch.POSITION
+ config = self._get_target_class()()
+ config.source_column_match = option_enum
+ # Assert that the string value of the enum is stored
+ self.assertEqual(
+ config._properties["load"]["sourceColumnMatch"], option_enum.value
+ )
+ option_str = "NAME"
+ config.source_column_match = option_str
+ self.assertEqual(config._properties["load"]["sourceColumnMatch"], option_str)
+
+ def test_source_column_match_setter_invalid_type(self):
+ config = self._get_target_class()()
+ with self.assertRaises(TypeError):
+ config.source_column_match = 3.14
+
+ def test_date_format_missing(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.date_format)
+
+ def test_date_format_hit(self):
+ date_format = "%Y-%m-%d"
+ config = self._get_target_class()()
+ config._properties["load"]["dateFormat"] = date_format
+ self.assertEqual(config.date_format, date_format)
+
+ def test_date_format_setter(self):
+ date_format = "YYYY/MM/DD"
+ config = self._get_target_class()()
+ config.date_format = date_format
+ self.assertEqual(config._properties["load"]["dateFormat"], date_format)
+
+ def test_datetime_format_missing(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.datetime_format)
+
+ def test_datetime_format_hit(self):
+ datetime_format = "%Y-%m-%dT%H:%M:%S"
+ config = self._get_target_class()()
+ config._properties["load"]["datetimeFormat"] = datetime_format
+ self.assertEqual(config.datetime_format, datetime_format)
+
+ def test_datetime_format_setter(self):
+ datetime_format = "YYYY/MM/DD HH24:MI:SS"
+ config = self._get_target_class()()
+ config.datetime_format = datetime_format
+ self.assertEqual(config._properties["load"]["datetimeFormat"], datetime_format)
+
+ def test_time_zone_missing(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.time_zone)
+
+ def test_time_zone_hit(self):
+ time_zone = "UTC"
+ config = self._get_target_class()()
+ config._properties["load"]["timeZone"] = time_zone
+ self.assertEqual(config.time_zone, time_zone)
+
+ def test_time_zone_setter(self):
+ time_zone = "America/New_York"
+ config = self._get_target_class()()
+ config.time_zone = time_zone
+ self.assertEqual(config._properties["load"]["timeZone"], time_zone)
+
+ def test_time_format_missing(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.time_format)
+
+ def test_time_format_hit(self):
+ time_format = "%H:%M:%S"
+ config = self._get_target_class()()
+ config._properties["load"]["timeFormat"] = time_format
+ self.assertEqual(config.time_format, time_format)
+
+ def test_time_format_setter(self):
+ time_format = "HH24:MI:SS"
+ config = self._get_target_class()()
+ config.time_format = time_format
+ self.assertEqual(config._properties["load"]["timeFormat"], time_format)
+
+ def test_timestamp_format_missing(self):
+ config = self._get_target_class()()
+ self.assertIsNone(config.timestamp_format)
+
+ def test_timestamp_format_hit(self):
+ timestamp_format = "%Y-%m-%dT%H:%M:%S.%fZ"
+ config = self._get_target_class()()
+ config._properties["load"]["timestampFormat"] = timestamp_format
+ self.assertEqual(config.timestamp_format, timestamp_format)
+
+ def test_timestamp_format_setter(self):
+ timestamp_format = "YYYY/MM/DD HH24:MI:SS.FF6 TZR"
+ config = self._get_target_class()()
+ config.timestamp_format = timestamp_format
+ self.assertEqual(
+ config._properties["load"]["timestampFormat"], timestamp_format
+ )
+
def test_parquet_options_missing(self):
config = self._get_target_class()()
self.assertIsNone(config.parquet_options)
@@ -785,3 +992,156 @@ def test_parquet_options_setter_clearing(self):
config.parquet_options = None
self.assertNotIn("parquetOptions", config._properties["load"])
+
+ def test_column_name_character_map_missing(self):
+ from google.cloud.bigquery.job.load import ColumnNameCharacterMap
+
+ config = self._get_target_class()()
+ self.assertEqual(
+ config.column_name_character_map,
+ ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
+ )
+
+ def test_column_name_character_map_hit(self):
+ from google.cloud.bigquery.job.load import ColumnNameCharacterMap
+
+ config = self._get_target_class()()
+ config._properties["load"]["columnNameCharacterMap"] = "STRICT"
+ self.assertEqual(
+ config.column_name_character_map,
+ ColumnNameCharacterMap.STRICT,
+ )
+
+ def test_column_name_character_map_setter(self):
+ from google.cloud.bigquery.job.load import ColumnNameCharacterMap
+
+ config = self._get_target_class()()
+ config.column_name_character_map = "V1"
+ self.assertEqual(
+ config._properties["load"]["columnNameCharacterMap"],
+ ColumnNameCharacterMap.V1,
+ )
+
+ def test_column_name_character_map_none(self):
+ from google.cloud.bigquery.job.load import ColumnNameCharacterMap
+
+ config = self._get_target_class()()
+ config.column_name_character_map = None
+ self.assertEqual(
+ config._properties["load"]["columnNameCharacterMap"],
+ ColumnNameCharacterMap.COLUMN_NAME_CHARACTER_MAP_UNSPECIFIED,
+ )
+
+ RESOURCE = {
+ "load": {
+ "allowJaggedRows": True,
+ "createDisposition": "CREATE_NEVER",
+ "encoding": "UTF-8",
+ "fieldDelimiter": ",",
+ "ignoreUnknownValues": True,
+ "maxBadRecords": 10,
+ "nullMarker": "\\N",
+ "quote": '"',
+ "schema": {
+ "fields": [
+ {"name": "name", "type": "STRING", "mode": "NULLABLE"},
+ {"name": "age", "type": "INTEGER", "mode": "NULLABLE"},
+ ]
+ },
+ "skipLeadingRows": "1",
+ "sourceFormat": "CSV",
+ "timePartitioning": {
+ "type": "DAY",
+ "field": "transaction_date",
+ },
+ "useAvroLogicalTypes": True,
+ "writeDisposition": "WRITE_TRUNCATE",
+ "dateFormat": "%Y-%m-%d",
+ "timeZone": "America/New_York",
+ "parquetOptions": {"enableListInference": True},
+ "columnNameCharacterMap": "V2",
+ "someNewField": "some-value",
+ }
+ }
+
+ def test_from_api_repr(self):
+ from google.cloud.bigquery.job import (
+ CreateDisposition,
+ LoadJobConfig,
+ SourceFormat,
+ WriteDisposition,
+ )
+ from google.cloud.bigquery.schema import SchemaField
+ from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType
+
+ from google.cloud.bigquery.job.load import ColumnNameCharacterMap
+
+ config = LoadJobConfig.from_api_repr(self.RESOURCE)
+
+ self.assertTrue(config.allow_jagged_rows)
+ self.assertEqual(config.create_disposition, CreateDisposition.CREATE_NEVER)
+ self.assertEqual(config.encoding, "UTF-8")
+ self.assertEqual(config.field_delimiter, ",")
+ self.assertTrue(config.ignore_unknown_values)
+ self.assertEqual(config.max_bad_records, 10)
+ self.assertEqual(config.null_marker, "\\N")
+ self.assertEqual(config.quote_character, '"')
+ self.assertEqual(
+ config.schema,
+ [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")],
+ )
+ self.assertEqual(config.skip_leading_rows, 1)
+ self.assertEqual(config.source_format, SourceFormat.CSV)
+ self.assertEqual(
+ config.time_partitioning,
+ TimePartitioning(type_=TimePartitioningType.DAY, field="transaction_date"),
+ )
+ self.assertTrue(config.use_avro_logical_types)
+ self.assertEqual(config.write_disposition, WriteDisposition.WRITE_TRUNCATE)
+ self.assertEqual(config.date_format, "%Y-%m-%d")
+ self.assertEqual(config.time_zone, "America/New_York")
+ self.assertTrue(config.parquet_options.enable_list_inference)
+ self.assertEqual(config.column_name_character_map, ColumnNameCharacterMap.V2)
+ self.assertEqual(config._properties["load"]["someNewField"], "some-value")
+
+ def test_to_api_repr(self):
+ from google.cloud.bigquery.job import (
+ CreateDisposition,
+ LoadJobConfig,
+ SourceFormat,
+ WriteDisposition,
+ )
+ from google.cloud.bigquery.schema import SchemaField
+ from google.cloud.bigquery.table import TimePartitioning, TimePartitioningType
+ from google.cloud.bigquery.format_options import ParquetOptions
+ from google.cloud.bigquery.job.load import ColumnNameCharacterMap
+
+ config = LoadJobConfig()
+ config.allow_jagged_rows = True
+ config.create_disposition = CreateDisposition.CREATE_NEVER
+ config.encoding = "UTF-8"
+ config.field_delimiter = ","
+ config.ignore_unknown_values = True
+ config.max_bad_records = 10
+ config.null_marker = r"\N"
+ config.quote_character = '"'
+ config.schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")]
+ config.skip_leading_rows = 1
+ config.source_format = SourceFormat.CSV
+ config.time_partitioning = TimePartitioning(
+ type_=TimePartitioningType.DAY, field="transaction_date"
+ )
+ config.use_avro_logical_types = True
+ config.write_disposition = WriteDisposition.WRITE_TRUNCATE
+ config.date_format = "%Y-%m-%d"
+ config.time_zone = "America/New_York"
+ parquet_options = ParquetOptions()
+ parquet_options.enable_list_inference = True
+ config.parquet_options = parquet_options
+ config.column_name_character_map = ColumnNameCharacterMap.V2
+ config._properties["load"]["someNewField"] = "some-value"
+
+ api_repr = config.to_api_repr()
+
+ expected = self.RESOURCE
+ self.assertEqual(api_repr, expected)
diff --git a/tests/unit/job/test_query.py b/tests/unit/job/test_query.py
index 33a52cfec..4a6771c46 100644
--- a/tests/unit/job/test_query.py
+++ b/tests/unit/job/test_query.py
@@ -13,19 +13,19 @@
# limitations under the License.
import concurrent
+import concurrent.futures
import copy
import http
import textwrap
import types
+from unittest import mock
-import freezegun
-from google.api_core import exceptions
-import google.api_core.retry
-import mock
import requests
from google.cloud.bigquery.client import _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS
+import google.cloud.bigquery._job_helpers
import google.cloud.bigquery.query
+from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
from google.cloud.bigquery.table import _EmptyRowIterator
from ..helpers import make_connection
@@ -54,7 +54,6 @@ def _make_resource(self, started=False, ended=False, location="US"):
return resource
def _verifyBooleanResourceProperties(self, job, config):
-
if "allowLargeResults" in config:
self.assertEqual(job.allow_large_results, config["allowLargeResults"])
else:
@@ -371,100 +370,6 @@ def test_cancelled(self):
self.assertTrue(job.cancelled())
- def test__done_or_raise_w_timeout(self):
- client = _make_client(project=self.PROJECT)
- resource = self._make_resource(ended=False)
- job = self._get_target_class().from_api_repr(resource, client)
-
- with mock.patch.object(
- client, "_get_query_results"
- ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload:
- job._done_or_raise(timeout=42)
-
- fake_get_results.assert_called_once()
- call_args = fake_get_results.call_args
- self.assertEqual(call_args.kwargs.get("timeout"), 42)
-
- call_args = fake_reload.call_args
- self.assertEqual(call_args.kwargs.get("timeout"), 42)
-
- def test__done_or_raise_w_timeout_and_longer_internal_api_timeout(self):
- client = _make_client(project=self.PROJECT)
- resource = self._make_resource(ended=False)
- job = self._get_target_class().from_api_repr(resource, client)
- job._done_timeout = 8.8
-
- with mock.patch.object(
- client, "_get_query_results"
- ) as fake_get_results, mock.patch.object(job, "reload") as fake_reload:
- job._done_or_raise(timeout=5.5)
-
- # The expected timeout used is simply the given timeout, as the latter
- # is shorter than the job's internal done timeout.
- expected_timeout = 5.5
-
- fake_get_results.assert_called_once()
- call_args = fake_get_results.call_args
- self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout)
-
- call_args = fake_reload.call_args
- self.assertAlmostEqual(call_args.kwargs.get("timeout"), expected_timeout)
-
- def test__done_or_raise_w_query_results_error_reload_ok(self):
- client = _make_client(project=self.PROJECT)
- bad_request_error = exceptions.BadRequest("Error in query")
- client._get_query_results = mock.Mock(side_effect=bad_request_error)
-
- resource = self._make_resource(ended=False)
- job = self._get_target_class().from_api_repr(resource, client)
- job._exception = None
-
- def fake_reload(self, *args, **kwargs):
- self._properties["status"]["state"] = "DONE"
- self.set_exception(copy.copy(bad_request_error))
-
- fake_reload_method = types.MethodType(fake_reload, job)
-
- with mock.patch.object(job, "reload", new=fake_reload_method):
- job._done_or_raise()
-
- assert isinstance(job._exception, exceptions.BadRequest)
-
- def test__done_or_raise_w_query_results_error_reload_error(self):
- client = _make_client(project=self.PROJECT)
- bad_request_error = exceptions.BadRequest("Error in query")
- client._get_query_results = mock.Mock(side_effect=bad_request_error)
-
- resource = self._make_resource(ended=False)
- job = self._get_target_class().from_api_repr(resource, client)
- reload_error = exceptions.DataLoss("Oops, sorry!")
- job.reload = mock.Mock(side_effect=reload_error)
- job._exception = None
-
- job._done_or_raise()
-
- assert job._exception is bad_request_error
-
- def test__done_or_raise_w_job_query_results_ok_reload_error(self):
- client = _make_client(project=self.PROJECT)
- query_results = google.cloud.bigquery.query._QueryResults(
- properties={
- "jobComplete": True,
- "jobReference": {"projectId": self.PROJECT, "jobId": "12345"},
- }
- )
- client._get_query_results = mock.Mock(return_value=query_results)
-
- resource = self._make_resource(ended=False)
- job = self._get_target_class().from_api_repr(resource, client)
- retry_error = exceptions.RetryError("Too many retries", cause=TimeoutError)
- job.reload = mock.Mock(side_effect=retry_error)
- job._exception = None
-
- job._done_or_raise()
-
- assert job._exception is retry_error
-
def test_query_plan(self):
from google.cloud._helpers import _RFC3339_MICROS
from google.cloud.bigquery.job import QueryPlanEntry
@@ -911,7 +816,67 @@ def test_dml_stats(self):
assert isinstance(job.dml_stats, DmlStats)
assert job.dml_stats.inserted_row_count == 35
- def test_result(self):
+ def test_search_stats(self):
+ from google.cloud.bigquery.job.query import SearchStats
+
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ assert job.search_stats is None
+
+ statistics = job._properties["statistics"] = {}
+ assert job.search_stats is None
+
+ query_stats = statistics["query"] = {}
+ assert job.search_stats is None
+
+ query_stats["searchStatistics"] = {
+ "indexUsageMode": "INDEX_USAGE_MODE_UNSPECIFIED",
+ "indexUnusedReasons": [],
+ }
+ # job.search_stats is a daisy-chain of calls and gets:
+ # job.search_stats << job._job_statistics << job._properties
+ assert isinstance(job.search_stats, SearchStats)
+ assert job.search_stats.mode == "INDEX_USAGE_MODE_UNSPECIFIED"
+
+ def test_incremental_result_stats(self):
+ from google.cloud.bigquery.job.query import IncrementalResultStats
+
+ client = _make_client(project=self.PROJECT)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ assert job.incremental_result_stats is None
+
+ statistics = job._properties["statistics"] = {}
+ assert job.incremental_result_stats is None
+
+ query_stats = statistics["query"] = {}
+ assert job.incremental_result_stats is None
+
+ query_stats["incrementalResultStats"] = {"disabledReason": "BAZ"}
+ assert isinstance(job.incremental_result_stats, IncrementalResultStats)
+ assert job.incremental_result_stats.disabled_reason == "BAZ"
+
+ def test_reload_query_results_uses_transport_timeout(self):
+ conn = make_connection({})
+ client = _make_client(self.PROJECT, connection=conn)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ job._transport_timeout = 123
+
+ job._reload_query_results()
+
+ query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}"
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path=query_results_path,
+ query_params={"maxResults": 0},
+ timeout=123,
+ )
+
+ def test_result_reloads_job_state_until_done(self):
+ """Verify that result() doesn't return until state == 'DONE'.
+
+ This test verifies correctness for a possible sequence of API responses
+ that might cause internal customer issue b/332850329.
+ """
from google.cloud.bigquery.table import RowIterator
query_resource = {
@@ -931,9 +896,15 @@ def test_result(self):
},
"schema": {"fields": [{"name": "col1", "type": "STRING"}]},
"totalRows": "2",
+ "queryId": "abc-def",
}
job_resource = self._make_resource(started=True, location="EU")
job_resource_done = self._make_resource(started=True, ended=True, location="EU")
+ job_resource_done["statistics"]["query"]["totalBytesProcessed"] = str(1234)
+ job_resource_done["statistics"]["query"]["totalSlotMs"] = str(5678)
+ job_resource_done["statistics"]["creationTime"] = str(11)
+ job_resource_done["statistics"]["startTime"] = str(22)
+ job_resource_done["statistics"]["endTime"] = str(33)
job_resource_done["configuration"]["query"]["destinationTable"] = {
"projectId": "dest-project",
"datasetId": "dest_dataset",
@@ -947,7 +918,54 @@ def test_result(self):
"rows": [{"f": [{"v": "abc"}]}],
}
conn = make_connection(
- query_resource, query_resource_done, job_resource_done, query_page_resource
+ # QueryJob.result() makes a pair of jobs.get & jobs.getQueryResults
+ # REST API calls each iteration to determine if the job has finished
+ # or not.
+ #
+ # jobs.get (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/get)
+ # is necessary to make sure the job has really finished via
+ # `Job.status.state == "DONE"` and to get necessary properties for
+ # `RowIterator` like the destination table.
+ #
+ # jobs.getQueryResults
+ # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults)
+ # with maxResults == 0 is technically optional,
+ # but it hangs up to 10 seconds until the job has finished. This
+ # makes sure we can know when the query has finished as close as
+ # possible to when the query finishes. It also gets properties
+ # necessary for `RowIterator` that isn't available on the job
+ # resource such as the schema
+ # (https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults#body.GetQueryResultsResponse.FIELDS.schema)
+ # of the results.
+ job_resource,
+ query_resource,
+ # The query wasn't finished in the last call to jobs.get, so try
+ # again with a call to both jobs.get & jobs.getQueryResults.
+ job_resource,
+ query_resource_done,
+ # Even though, the previous jobs.getQueryResults response says
+ # the job is complete, we haven't downloaded the full job status
+ # yet.
+ #
+ # Important: per internal issue 332850329, this reponse has
+ # `Job.status.state = "RUNNING"`. This ensures we are protected
+ # against possible eventual consistency issues where
+ # `jobs.getQueryResults` says jobComplete == True, but our next
+ # call to `jobs.get` still doesn't have
+ # `Job.status.state == "DONE"`.
+ job_resource,
+ # Try again until `Job.status.state == "DONE"`.
+ #
+ # Note: the call to `jobs.getQueryResults` is missing here as
+ # an optimization. We already received a "completed" response, so
+ # we won't learn anything new by calling that API again.
+ job_resource,
+ job_resource_done,
+ # When we iterate over the `RowIterator` we return from
+ # `QueryJob.result()`, we make additional calls to
+ # `jobs.getQueryResults` but this time allowing the actual rows
+ # to be returned as well.
+ query_page_resource,
)
client = _make_client(self.PROJECT, connection=conn)
job = self._get_target_class().from_api_repr(job_resource, client)
@@ -959,9 +977,19 @@ def test_result(self):
rows = list(result)
self.assertEqual(len(rows), 1)
self.assertEqual(rows[0].col1, "abc")
+ self.assertEqual(result.job_id, self.JOB_ID)
+ self.assertEqual(result.location, "EU")
+ self.assertEqual(result.project, self.PROJECT)
+ self.assertEqual(result.query_id, "abc-def")
# Test that the total_rows property has changed during iteration, based
# on the response from tabledata.list.
self.assertEqual(result.total_rows, 1)
+ self.assertEqual(result.query, job.query)
+ self.assertEqual(result.total_bytes_processed, 1234)
+ self.assertEqual(result.slot_millis, 5678)
+ self.assertEqual(result.created.timestamp() * 1000, 11)
+ self.assertEqual(result.started.timestamp() * 1000, 22)
+ self.assertEqual(result.ended.timestamp() * 1000, 33)
query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}"
query_results_call = mock.call(
@@ -973,8 +1001,8 @@ def test_result(self):
reload_call = mock.call(
method="GET",
path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}",
- query_params={"location": "EU"},
- timeout=None,
+ query_params={"projection": "full", "location": "EU"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
query_page_call = mock.call(
method="GET",
@@ -986,8 +1014,32 @@ def test_result(self):
},
timeout=None,
)
+ # Ensure that we actually made the expected API calls in the sequence
+ # we thought above at the make_connection() call above.
+ #
+ # Note: The responses from jobs.get and jobs.getQueryResults can be
+ # deceptively similar, so this check ensures we actually made the
+ # requests we expected.
conn.api_request.assert_has_calls(
- [query_results_call, query_results_call, reload_call, query_page_call]
+ [
+ # jobs.get & jobs.getQueryResults because the job just started.
+ reload_call,
+ query_results_call,
+ # jobs.get & jobs.getQueryResults because the query is still
+ # running.
+ reload_call,
+ query_results_call,
+ # We got a jobComplete response from the most recent call to
+ # jobs.getQueryResults, so now call jobs.get until we get
+ # `Jobs.status.state == "DONE"`. This tests a fix for internal
+ # issue b/332850329.
+ reload_call,
+ reload_call,
+ reload_call,
+ # jobs.getQueryResults without `maxResults` set to download
+ # the rows as we iterate over the `RowIterator`.
+ query_page_call,
+ ]
)
def test_result_dry_run(self):
@@ -1002,6 +1054,92 @@ def test_result_dry_run(self):
calls = conn.api_request.mock_calls
self.assertIsInstance(result, _EmptyRowIterator)
self.assertEqual(calls, [])
+ self.assertEqual(result.location, "EU")
+ self.assertEqual(result.project, self.PROJECT)
+ # Intentionally omit job_id and query_id since this doesn't
+ # actually correspond to a finished query job.
+ self.assertIsNone(result.job_id)
+ self.assertIsNone(result.query_id)
+
+ # If the job doesn't exist, create the job first. Issue:
+ # https://github.com/googleapis/python-bigquery/issues/1940
+ def test_result_begin_job_if_not_exist(self):
+ begun_resource = self._make_resource()
+ query_running_resource = {
+ "jobComplete": True,
+ "jobReference": {
+ "projectId": self.PROJECT,
+ "jobId": self.JOB_ID,
+ "location": "US",
+ },
+ "schema": {"fields": [{"name": "col1", "type": "STRING"}]},
+ "status": {"state": "RUNNING"},
+ }
+ query_done_resource = {
+ "jobComplete": True,
+ "jobReference": {
+ "projectId": self.PROJECT,
+ "jobId": self.JOB_ID,
+ "location": "US",
+ },
+ "schema": {"fields": [{"name": "col1", "type": "STRING"}]},
+ "status": {"state": "DONE"},
+ }
+ done_resource = copy.deepcopy(begun_resource)
+ done_resource["status"] = {"state": "DONE"}
+ connection = make_connection(
+ begun_resource,
+ query_running_resource,
+ query_done_resource,
+ done_resource,
+ )
+ client = _make_client(project=self.PROJECT, connection=connection)
+ job = self._make_one(self.JOB_ID, self.QUERY, client)
+ job._properties["jobReference"]["location"] = "US"
+
+ job.result()
+
+ create_job_call = mock.call(
+ method="POST",
+ path=f"/projects/{self.PROJECT}/jobs",
+ data={
+ "jobReference": {
+ "jobId": self.JOB_ID,
+ "projectId": self.PROJECT,
+ "location": "US",
+ },
+ "configuration": {
+ "query": {"useLegacySql": False, "query": self.QUERY},
+ },
+ },
+ timeout=None,
+ )
+ reload_call = mock.call(
+ method="GET",
+ path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}",
+ query_params={"projection": "full", "location": "US"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
+ )
+ get_query_results_call = mock.call(
+ method="GET",
+ path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}",
+ query_params={
+ "maxResults": 0,
+ "location": "US",
+ },
+ timeout=None,
+ )
+
+ connection.api_request.assert_has_calls(
+ [
+ # Make sure we start a job that hasn't started yet. See:
+ # https://github.com/googleapis/python-bigquery/issues/1940
+ create_job_call,
+ reload_call,
+ get_query_results_call,
+ reload_call,
+ ]
+ )
def test_result_with_done_job_calls_get_query_results(self):
query_resource_done = {
@@ -1049,106 +1187,165 @@ def test_result_with_done_job_calls_get_query_results(self):
timeout=None,
)
conn.api_request.assert_has_calls([query_results_call, query_results_page_call])
+ assert conn.api_request.call_count == 2
- def test_result_with_max_results(self):
- from google.cloud.bigquery.table import RowIterator
+ def test_result_with_done_jobs_query_response_doesnt_call_get_query_results(self):
+ """With a done result from jobs.query, we don't need to call
+ jobs.getQueryResults to wait for the query to finish.
- query_resource = {
+ jobs.get is still called because there is an assumption that after
+ QueryJob.result(), all job metadata is available locally.
+ """
+ job_resource = self._make_resource(started=True, ended=True, location="EU")
+ conn = make_connection(job_resource)
+ client = _make_client(self.PROJECT, connection=conn)
+ query_resource_done = {
"jobComplete": True,
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
"schema": {"fields": [{"name": "col1", "type": "STRING"}]},
- "totalRows": "5",
- }
- query_page_resource = {
- "totalRows": "5",
- "pageToken": None,
- "rows": [
- {"f": [{"v": "abc"}]},
- {"f": [{"v": "def"}]},
- {"f": [{"v": "ghi"}]},
- ],
+ "rows": [{"f": [{"v": "abc"}]}],
+ "totalRows": "1",
}
- connection = make_connection(query_resource, query_page_resource)
- client = _make_client(self.PROJECT, connection=connection)
- resource = self._make_resource(ended=True)
- job = self._get_target_class().from_api_repr(resource, client)
-
- max_results = 3
+ job = google.cloud.bigquery._job_helpers._to_query_job(
+ client,
+ "SELECT 'abc' AS col1",
+ request_config=None,
+ query_response=query_resource_done,
+ )
- result = job.result(max_results=max_results)
+ # We want job.result() to refresh the job state, so the conversion is
+ # always "PENDING", even if the job is finished.
+ assert job.state == "PENDING"
- self.assertIsInstance(result, RowIterator)
- self.assertEqual(result.total_rows, 5)
+ result = job.result()
rows = list(result)
+ self.assertEqual(len(rows), 1)
+ self.assertEqual(rows[0].col1, "abc")
+ job_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}"
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path=job_path,
+ query_params={"projection": "full"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
+ )
- self.assertEqual(len(rows), 3)
- self.assertEqual(len(connection.api_request.call_args_list), 2)
- query_page_request = connection.api_request.call_args_list[1]
- self.assertEqual(
- query_page_request[1]["query_params"]["maxResults"], max_results
+ def test_result_with_none_timeout(self):
+ # Verifies that with an intentional None timeout, get job uses None
+ # instead of the default timeout.
+ job_resource = self._make_resource(started=True, ended=True, location="EU")
+ conn = make_connection(job_resource)
+ client = _make_client(self.PROJECT, connection=conn)
+ query_resource_done = {
+ "jobComplete": True,
+ "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
+ "schema": {"fields": [{"name": "col1", "type": "STRING"}]},
+ "rows": [{"f": [{"v": "abc"}]}],
+ "totalRows": "1",
+ }
+ job = google.cloud.bigquery._job_helpers._to_query_job(
+ client,
+ "SELECT 'abc' AS col1",
+ request_config=None,
+ query_response=query_resource_done,
+ )
+
+ job.result(timeout=None)
+
+ job_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}"
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path=job_path,
+ query_params={"projection": "full"},
+ timeout=None,
)
- def test_result_w_retry(self):
+ def test_result_with_max_results(self):
from google.cloud.bigquery.table import RowIterator
query_resource = {
- "jobComplete": False,
- "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
- }
- query_resource_done = {
"jobComplete": True,
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
"schema": {"fields": [{"name": "col1", "type": "STRING"}]},
- "totalRows": "2",
+ "totalRows": "10",
+ "pageToken": "first-page-token",
+ "rows": [
+ {"f": [{"v": "abc"}]},
+ {"f": [{"v": "def"}]},
+ {"f": [{"v": "ghi"}]},
+ {"f": [{"v": "jkl"}]},
+ {"f": [{"v": "mno"}]},
+ {"f": [{"v": "pqr"}]},
+ # Pretend these are very large rows, so the API doesn't return
+ # all of the rows we asked for in the first response.
+ ],
}
- job_resource = self._make_resource(started=True, location="asia-northeast1")
- job_resource_done = self._make_resource(
- started=True, ended=True, location="asia-northeast1"
- )
- job_resource_done["configuration"]["query"]["destinationTable"] = {
- "projectId": "dest-project",
- "datasetId": "dest_dataset",
- "tableId": "dest_table",
+ query_page_resource = {
+ "totalRows": "10",
+ "pageToken": None,
+ "rows": [
+ {"f": [{"v": "stu"}]},
+ {"f": [{"v": "vwx"}]},
+ {"f": [{"v": "yz0"}]},
+ ],
}
-
- connection = make_connection(
- exceptions.NotFound("not normally retriable"),
- query_resource,
- exceptions.NotFound("not normally retriable"),
- query_resource_done,
- exceptions.NotFound("not normally retriable"),
- job_resource_done,
+ job_resource_running = self._make_resource(
+ started=True, ended=False, location="US"
)
- client = _make_client(self.PROJECT, connection=connection)
- job = self._get_target_class().from_api_repr(job_resource, client)
+ job_resource_done = self._make_resource(started=True, ended=True, location="US")
+ conn = make_connection(job_resource_done, query_resource, query_page_resource)
+ client = _make_client(self.PROJECT, connection=conn)
+ job = self._get_target_class().from_api_repr(job_resource_running, client)
- custom_predicate = mock.Mock()
- custom_predicate.return_value = True
- custom_retry = google.api_core.retry.Retry(
- initial=0.001,
- maximum=0.001,
- multiplier=1.0,
- deadline=0.1,
- predicate=custom_predicate,
- )
+ max_results = 9
+ result = job.result(max_results=max_results)
- self.assertIsInstance(job.result(retry=custom_retry), RowIterator)
- query_results_call = mock.call(
+ self.assertIsInstance(result, RowIterator)
+ self.assertEqual(result.total_rows, 10)
+
+ rows = list(result)
+
+ self.assertEqual(len(rows), 9)
+ jobs_get_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}"
+ jobs_get_call = mock.call(
method="GET",
- path=f"/projects/{self.PROJECT}/queries/{self.JOB_ID}",
- query_params={"maxResults": 0, "location": "asia-northeast1"},
- timeout=None,
+ path=jobs_get_path,
+ query_params={"projection": "full", "location": "US"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
- reload_call = mock.call(
+ query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}"
+ query_page_waiting_call = mock.call(
method="GET",
- path=f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}",
- query_params={"location": "asia-northeast1"},
+ path=query_results_path,
+ query_params={
+ # Waiting for the results should set maxResults and cache the
+ # first page if page_size is set. This allows customers to
+ # more finely tune when we fallback to the BQ Storage API.
+ # See internal issue: 344008814.
+ "maxResults": max_results,
+ "formatOptions.useInt64Timestamp": True,
+ "location": "US",
+ },
timeout=None,
)
-
- connection.api_request.assert_has_calls(
- [query_results_call, query_results_call, reload_call]
+ query_page_2_call = mock.call(
+ timeout=None,
+ method="GET",
+ path=query_results_path,
+ query_params={
+ "pageToken": "first-page-token",
+ "maxResults": 3,
+ "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS,
+ "location": "US",
+ "formatOptions.useInt64Timestamp": True,
+ },
+ )
+ # Waiting for the results should set maxResults and cache the
+ # first page if max_results is set. This allows customers to
+ # more finely tune when we fallback to the BQ Storage API.
+ # See internal issue: 344008814.
+ conn.api_request.assert_has_calls(
+ [jobs_get_call, query_page_waiting_call, query_page_2_call]
)
def test_result_w_empty_schema(self):
@@ -1159,82 +1356,21 @@ def test_result_w_empty_schema(self):
"jobComplete": True,
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
"schema": {"fields": []},
+ "queryId": "xyz-abc",
}
connection = make_connection(query_resource, query_resource)
client = _make_client(self.PROJECT, connection=connection)
- resource = self._make_resource(ended=True)
+ resource = self._make_resource(ended=True, location="asia-northeast1")
job = self._get_target_class().from_api_repr(resource, client)
result = job.result()
self.assertIsInstance(result, _EmptyRowIterator)
self.assertEqual(list(result), [])
-
- def test_result_invokes_begins(self):
- begun_resource = self._make_resource()
- incomplete_resource = {
- "jobComplete": False,
- "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
- "schema": {"fields": [{"name": "col1", "type": "STRING"}]},
- }
- query_resource = copy.deepcopy(incomplete_resource)
- query_resource["jobComplete"] = True
- done_resource = copy.deepcopy(begun_resource)
- done_resource["status"] = {"state": "DONE"}
- connection = make_connection(
- begun_resource,
- incomplete_resource,
- query_resource,
- done_resource,
- query_resource,
- )
- client = _make_client(project=self.PROJECT, connection=connection)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
-
- job.result()
-
- self.assertEqual(len(connection.api_request.call_args_list), 4)
- begin_request = connection.api_request.call_args_list[0]
- query_request = connection.api_request.call_args_list[2]
- reload_request = connection.api_request.call_args_list[3]
- self.assertEqual(begin_request[1]["method"], "POST")
- self.assertEqual(query_request[1]["method"], "GET")
- self.assertEqual(reload_request[1]["method"], "GET")
-
- def test_result_w_timeout(self):
- import google.cloud.bigquery.client
-
- begun_resource = self._make_resource()
- query_resource = {
- "jobComplete": True,
- "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
- "schema": {"fields": [{"name": "col1", "type": "STRING"}]},
- }
- done_resource = copy.deepcopy(begun_resource)
- done_resource["status"] = {"state": "DONE"}
- connection = make_connection(begun_resource, query_resource, done_resource)
- client = _make_client(project=self.PROJECT, connection=connection)
- job = self._make_one(self.JOB_ID, self.QUERY, client)
-
- with freezegun.freeze_time("1970-01-01 00:00:00", tick=False):
- job.result(timeout=1.0)
-
- self.assertEqual(len(connection.api_request.call_args_list), 3)
- begin_request = connection.api_request.call_args_list[0]
- query_request = connection.api_request.call_args_list[1]
- reload_request = connection.api_request.call_args_list[2]
- self.assertEqual(begin_request[1]["method"], "POST")
- self.assertEqual(query_request[1]["method"], "GET")
- self.assertEqual(
- query_request[1]["path"],
- "/projects/{}/queries/{}".format(self.PROJECT, self.JOB_ID),
- )
- self.assertEqual(query_request[1]["query_params"]["timeoutMs"], 900)
- self.assertEqual(
- query_request[1]["timeout"],
- google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT,
- )
- self.assertEqual(reload_request[1]["method"], "GET")
+ self.assertEqual(result.project, self.PROJECT)
+ self.assertEqual(result.job_id, self.JOB_ID)
+ self.assertEqual(result.location, "asia-northeast1")
+ self.assertEqual(result.query_id, "xyz-abc")
def test_result_w_page_size(self):
# Arrange
@@ -1242,63 +1378,85 @@ def test_result_w_page_size(self):
"jobComplete": True,
"jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
"schema": {"fields": [{"name": "col1", "type": "STRING"}]},
- "totalRows": "4",
- }
- job_resource = self._make_resource(started=True, ended=True, location="US")
- q_config = job_resource["configuration"]["query"]
- q_config["destinationTable"] = {
- "projectId": self.PROJECT,
- "datasetId": self.DS_ID,
- "tableId": self.TABLE_ID,
- }
- query_page_resource = {
- "totalRows": 4,
- "pageToken": "some-page-token",
+ "totalRows": "10",
"rows": [
{"f": [{"v": "row1"}]},
{"f": [{"v": "row2"}]},
{"f": [{"v": "row3"}]},
+ {"f": [{"v": "row4"}]},
+ {"f": [{"v": "row5"}]},
+ {"f": [{"v": "row6"}]},
+ {"f": [{"v": "row7"}]},
+ {"f": [{"v": "row8"}]},
+ {"f": [{"v": "row9"}]},
],
+ "pageToken": "first-page-token",
+ }
+ job_resource_running = self._make_resource(
+ started=True, ended=False, location="US"
+ )
+ job_resource_done = self._make_resource(started=True, ended=True, location="US")
+ destination_table = {
+ "projectId": self.PROJECT,
+ "datasetId": self.DS_ID,
+ "tableId": self.TABLE_ID,
}
- query_page_resource_2 = {"totalRows": 4, "rows": [{"f": [{"v": "row4"}]}]}
+ q_config = job_resource_done["configuration"]["query"]
+ q_config["destinationTable"] = destination_table
+ query_page_resource_2 = {"totalRows": 10, "rows": [{"f": [{"v": "row10"}]}]}
conn = make_connection(
- query_results_resource, query_page_resource, query_page_resource_2
+ job_resource_running,
+ query_results_resource,
+ job_resource_done,
+ query_page_resource_2,
)
client = _make_client(self.PROJECT, connection=conn)
- job = self._get_target_class().from_api_repr(job_resource, client)
+ job = self._get_target_class().from_api_repr(job_resource_running, client)
# Act
- result = job.result(page_size=3)
+ result = job.result(page_size=9)
# Assert
actual_rows = list(result)
- self.assertEqual(len(actual_rows), 4)
+ self.assertEqual(len(actual_rows), 10)
+ jobs_get_path = f"/projects/{self.PROJECT}/jobs/{self.JOB_ID}"
+ jobs_get_call = mock.call(
+ method="GET",
+ path=jobs_get_path,
+ query_params={"projection": "full", "location": "US"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
+ )
query_results_path = f"/projects/{self.PROJECT}/queries/{self.JOB_ID}"
- query_page_1_call = mock.call(
+ query_page_waiting_call = mock.call(
method="GET",
path=query_results_path,
query_params={
- "maxResults": 3,
- "fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS,
+ # Waiting for the results should set maxResults and cache the
+ # first page if page_size is set. This allows customers to
+ # more finely tune when we fallback to the BQ Storage API.
+ # See internal issue: 344008814.
+ "maxResults": 9,
"location": "US",
"formatOptions.useInt64Timestamp": True,
},
timeout=None,
)
query_page_2_call = mock.call(
+ timeout=None,
method="GET",
path=query_results_path,
query_params={
- "pageToken": "some-page-token",
- "maxResults": 3,
+ "pageToken": "first-page-token",
+ "maxResults": 9,
"fields": _LIST_ROWS_FROM_QUERY_RESULTS_FIELDS,
"location": "US",
"formatOptions.useInt64Timestamp": True,
},
- timeout=None,
)
- conn.api_request.assert_has_calls([query_page_1_call, query_page_2_call])
+ conn.api_request.assert_has_calls(
+ [jobs_get_call, query_page_waiting_call, jobs_get_call, query_page_2_call]
+ )
def test_result_with_start_index(self):
from google.cloud.bigquery.table import RowIterator
@@ -1326,7 +1484,17 @@ def test_result_with_start_index(self):
start_index = 1
- result = job.result(start_index=start_index)
+ # Verifies that page_size isn't overwritten by max_results when
+ # start_index is not None. See
+ # https://github.com/googleapis/python-bigquery/issues/1950
+ page_size = 10
+ max_results = 100
+
+ result = job.result(
+ page_size=page_size,
+ max_results=max_results,
+ start_index=start_index,
+ )
self.assertIsInstance(result, RowIterator)
self.assertEqual(result.total_rows, 5)
@@ -1339,6 +1507,81 @@ def test_result_with_start_index(self):
self.assertEqual(
tabledata_list_request[1]["query_params"]["startIndex"], start_index
)
+ self.assertEqual(
+ tabledata_list_request[1]["query_params"]["maxResults"], page_size
+ )
+
+ def test_result_with_start_index_multi_page(self):
+ # When there are multiple pages of response and the user has set
+ # start_index, we should supply start_index to the server in the first
+ # request. However, in the subsequent requests, we will pass only
+ # page_token but not start_index, because the server only allows one
+ # of them.
+ from google.cloud.bigquery.table import RowIterator
+
+ query_resource = {
+ "jobComplete": True,
+ "jobReference": {"projectId": self.PROJECT, "jobId": self.JOB_ID},
+ "schema": {"fields": [{"name": "col1", "type": "STRING"}]},
+ "totalRows": "7",
+ }
+
+ # Although the result has 7 rows, the response only returns 6, because
+ # start_index is 1.
+ tabledata_resource_1 = {
+ "totalRows": "7",
+ "pageToken": "page_token_1",
+ "rows": [
+ {"f": [{"v": "abc"}]},
+ {"f": [{"v": "def"}]},
+ {"f": [{"v": "ghi"}]},
+ ],
+ }
+ tabledata_resource_2 = {
+ "totalRows": "7",
+ "pageToken": None,
+ "rows": [
+ {"f": [{"v": "jkl"}]},
+ {"f": [{"v": "mno"}]},
+ {"f": [{"v": "pqe"}]},
+ ],
+ }
+
+ connection = make_connection(
+ query_resource, tabledata_resource_1, tabledata_resource_2
+ )
+ client = _make_client(self.PROJECT, connection=connection)
+ resource = self._make_resource(ended=True)
+ job = self._get_target_class().from_api_repr(resource, client)
+
+ start_index = 1
+ page_size = 3
+
+ result = job.result(page_size=page_size, start_index=start_index)
+
+ self.assertIsInstance(result, RowIterator)
+ self.assertEqual(result.total_rows, 7)
+
+ rows = list(result)
+
+ self.assertEqual(len(rows), 6)
+ self.assertEqual(len(connection.api_request.call_args_list), 3)
+
+ # First call has both startIndex and maxResults.
+ tabledata_list_request_1 = connection.api_request.call_args_list[1]
+ self.assertEqual(
+ tabledata_list_request_1[1]["query_params"]["startIndex"], start_index
+ )
+ self.assertEqual(
+ tabledata_list_request_1[1]["query_params"]["maxResults"], page_size
+ )
+
+ # Second call only has maxResults.
+ tabledata_list_request_2 = connection.api_request.call_args_list[2]
+ self.assertFalse("startIndex" in tabledata_list_request_2[1]["query_params"])
+ self.assertEqual(
+ tabledata_list_request_2[1]["query_params"]["maxResults"], page_size
+ )
def test_result_error(self):
from google.cloud import exceptions
@@ -1964,12 +2207,23 @@ def test_reload_w_bound_client(self):
) as final_attributes:
job.reload()
- final_attributes.assert_called_with({"path": PATH}, client, job)
+ final_attributes.assert_called_with(
+ {
+ "path": PATH,
+ "job_id": self.JOB_ID,
+ "location": None,
+ },
+ client,
+ None,
+ )
self.assertNotEqual(job.destination, table_ref)
conn.api_request.assert_called_once_with(
- method="GET", path=PATH, query_params={}, timeout=None
+ method="GET",
+ path=PATH,
+ query_params={"projection": "full"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
self._verifyResourceProperties(job, RESOURCE)
@@ -1994,11 +2248,22 @@ def test_reload_w_alternate_client(self):
) as final_attributes:
job.reload(client=client2)
- final_attributes.assert_called_with({"path": PATH}, client2, job)
+ final_attributes.assert_called_with(
+ {
+ "path": PATH,
+ "job_id": self.JOB_ID,
+ "location": None,
+ },
+ client2,
+ None,
+ )
conn1.api_request.assert_not_called()
conn2.api_request.assert_called_once_with(
- method="GET", path=PATH, query_params={}, timeout=None
+ method="GET",
+ path=PATH,
+ query_params={"projection": "full"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
self._verifyResourceProperties(job, RESOURCE)
@@ -2021,13 +2286,23 @@ def test_reload_w_timeout(self):
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
) as final_attributes:
job.reload(timeout=4.2)
-
- final_attributes.assert_called_with({"path": PATH}, client, job)
+ final_attributes.assert_called_with(
+ {
+ "path": PATH,
+ "job_id": self.JOB_ID,
+ "location": None,
+ },
+ client,
+ None,
+ )
self.assertNotEqual(job.destination, table_ref)
conn.api_request.assert_called_once_with(
- method="GET", path=PATH, query_params={}, timeout=4.2
+ method="GET",
+ path=PATH,
+ query_params={"projection": "full"},
+ timeout=4.2,
)
def test_iter(self):
@@ -2043,5 +2318,6 @@ def test_iter(self):
connection = make_connection(begun_resource, query_resource, done_resource)
client = _make_client(project=self.PROJECT, connection=connection)
job = self._make_one(self.JOB_ID, self.QUERY, client)
+ job._properties["status"] = {"state": "RUNNING"}
self.assertIsInstance(iter(job), types.GeneratorType)
diff --git a/tests/unit/job/test_query_config.py b/tests/unit/job/test_query_config.py
index 7818236f4..a63a14b73 100644
--- a/tests/unit/job/test_query_config.py
+++ b/tests/unit/job/test_query_config.py
@@ -167,6 +167,16 @@ def test_connection_properties(self):
self.assertEqual(config.connection_properties[1].key, "time_zone")
self.assertEqual(config.connection_properties[1].value, "America/Chicago")
+ def test_incremental_results(self):
+ config = self._get_target_class()()
+ config.write_incremental_results = True
+ self.assertEqual(config.write_incremental_results, True)
+
+ def test_max_slots(self):
+ config = self._get_target_class()()
+ config.max_slots = 99
+ self.assertEqual(config.max_slots, 99)
+
def test_create_session(self):
config = self._get_target_class()()
self.assertIsNone(config.create_session)
diff --git a/tests/unit/job/test_query_job_retry.py b/tests/unit/job/test_query_job_retry.py
new file mode 100644
index 000000000..c8355b688
--- /dev/null
+++ b/tests/unit/job/test_query_job_retry.py
@@ -0,0 +1,229 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from unittest import mock
+
+import concurrent.futures
+import freezegun
+from google.api_core import exceptions
+import google.api_core.retry
+import pytest
+
+from google.cloud.bigquery.client import _MIN_GET_QUERY_RESULTS_TIMEOUT
+from google.cloud.bigquery.job import QueryJob
+from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
+from google.cloud.bigquery.table import RowIterator
+
+from ..helpers import make_connection
+from .helpers import _make_client
+
+
+PROJECT = "test-project"
+JOB_ID = "test-job-id"
+QUERY = "select count(*) from persons"
+
+
+def _make_resource(started=False, ended=False, location="US"):
+ resource = {
+ "jobReference": {"projectId": PROJECT, "jobId": JOB_ID, "location": location},
+ "status": {"state": "PENDING"},
+ "configuration": {
+ "query": {"query": QUERY},
+ "job_type": "query",
+ },
+ "statistics": {"creationTime": "1"},
+ }
+
+ if started:
+ resource["status"]["state"] = "RUNNING"
+ resource["statistics"]["startTime"] = "2"
+
+ if ended:
+ resource["status"]["state"] = "DONE"
+ resource["statistics"]["endTime"] = "3"
+
+ return resource
+
+
+def test_result_w_custom_retry(global_time_lock):
+ query_resource = {
+ "jobComplete": False,
+ "jobReference": {"projectId": PROJECT, "jobId": JOB_ID},
+ }
+ query_resource_done = {
+ "jobComplete": True,
+ "jobReference": {"projectId": PROJECT, "jobId": JOB_ID},
+ "schema": {"fields": [{"name": "col1", "type": "STRING"}]},
+ "totalRows": "2",
+ }
+ job_resource = _make_resource(started=True, location="asia-northeast1")
+ job_resource_done = _make_resource(
+ started=True, ended=True, location="asia-northeast1"
+ )
+ job_resource_done["configuration"]["query"]["destinationTable"] = {
+ "projectId": "dest-project",
+ "datasetId": "dest_dataset",
+ "tableId": "dest_table",
+ }
+
+ connection = make_connection(
+ # Also, for each API request, raise an exception that we know can
+ # be retried. Because of this, for each iteration we do:
+ # jobs.get (x2) & jobs.getQueryResults (x2)
+ exceptions.NotFound("not normally retriable"),
+ job_resource,
+ exceptions.NotFound("not normally retriable"),
+ query_resource,
+ # Query still not done, repeat both.
+ exceptions.NotFound("not normally retriable"),
+ job_resource,
+ exceptions.NotFound("not normally retriable"),
+ query_resource,
+ exceptions.NotFound("not normally retriable"),
+ # Query still not done, repeat both.
+ job_resource_done,
+ exceptions.NotFound("not normally retriable"),
+ query_resource_done,
+ # Query finished!
+ )
+ client = _make_client(PROJECT, connection=connection)
+ job = QueryJob.from_api_repr(job_resource, client)
+
+ custom_predicate = mock.Mock()
+ custom_predicate.return_value = True
+ custom_retry = google.api_core.retry.Retry(
+ initial=0.001,
+ maximum=0.001,
+ multiplier=1.0,
+ deadline=0.1,
+ predicate=custom_predicate,
+ )
+
+ assert isinstance(job.result(retry=custom_retry), RowIterator)
+ query_results_call = mock.call(
+ method="GET",
+ path=f"/projects/{PROJECT}/queries/{JOB_ID}",
+ query_params={"maxResults": 0, "location": "asia-northeast1"},
+ timeout=mock.ANY,
+ )
+ reload_call = mock.call(
+ method="GET",
+ path=f"/projects/{PROJECT}/jobs/{JOB_ID}",
+ query_params={"projection": "full", "location": "asia-northeast1"},
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
+ )
+
+ connection.api_request.assert_has_calls(
+ [
+ reload_call,
+ reload_call,
+ query_results_call,
+ query_results_call,
+ reload_call,
+ reload_call,
+ query_results_call,
+ query_results_call,
+ reload_call,
+ reload_call,
+ query_results_call,
+ query_results_call,
+ ]
+ )
+
+
+def test_result_w_timeout_doesnt_raise(global_time_lock):
+ begun_resource = _make_resource()
+ query_resource = {
+ "jobComplete": True,
+ "jobReference": {"projectId": PROJECT, "jobId": JOB_ID},
+ "schema": {"fields": [{"name": "col1", "type": "STRING"}]},
+ }
+ done_resource = begun_resource.copy()
+ done_resource["status"] = {"state": "DONE"}
+ connection = make_connection(begun_resource, query_resource, done_resource)
+ client = _make_client(project=PROJECT, connection=connection)
+ job = QueryJob(JOB_ID, QUERY, client)
+ job._properties["jobReference"]["location"] = "US"
+ job._properties["status"] = {"state": "RUNNING"}
+
+ with freezegun.freeze_time("1970-01-01 00:00:00", tick=False):
+ job.result(
+ timeout=1.125,
+ )
+
+ reload_call = mock.call(
+ method="GET",
+ path=f"/projects/{PROJECT}/jobs/{JOB_ID}",
+ query_params={"projection": "full", "location": "US"},
+ timeout=1.125,
+ )
+ get_query_results_call = mock.call(
+ method="GET",
+ path=f"/projects/{PROJECT}/queries/{JOB_ID}",
+ query_params={
+ "maxResults": 0,
+ "location": "US",
+ },
+ timeout=_MIN_GET_QUERY_RESULTS_TIMEOUT,
+ )
+ connection.api_request.assert_has_calls(
+ [
+ reload_call,
+ get_query_results_call,
+ reload_call,
+ ]
+ )
+
+
+def test_result_w_timeout_raises_concurrent_futures_timeout(global_time_lock):
+ begun_resource = _make_resource()
+ begun_resource["jobReference"]["location"] = "US"
+ query_resource = {
+ "jobComplete": True,
+ "jobReference": {"projectId": PROJECT, "jobId": JOB_ID},
+ "schema": {"fields": [{"name": "col1", "type": "STRING"}]},
+ }
+ done_resource = begun_resource.copy()
+ done_resource["status"] = {"state": "DONE"}
+ connection = make_connection(begun_resource, query_resource, done_resource)
+ client = _make_client(project=PROJECT, connection=connection)
+ job = QueryJob(JOB_ID, QUERY, client)
+ job._properties["jobReference"]["location"] = "US"
+ job._properties["status"] = {"state": "RUNNING"}
+
+ with freezegun.freeze_time(
+ "1970-01-01 00:00:00", auto_tick_seconds=1.0
+ ), pytest.raises(concurrent.futures.TimeoutError):
+ job.result(timeout=1.125)
+
+ reload_call = mock.call(
+ method="GET",
+ path=f"/projects/{PROJECT}/jobs/{JOB_ID}",
+ query_params={"projection": "full", "location": "US"},
+ timeout=1.125,
+ )
+ get_query_results_call = mock.call(
+ method="GET",
+ path=f"/projects/{PROJECT}/queries/{JOB_ID}",
+ query_params={
+ "maxResults": 0,
+ "location": "US",
+ },
+ timeout=_MIN_GET_QUERY_RESULTS_TIMEOUT,
+ )
+ connection.api_request.assert_has_calls(
+ [
+ reload_call,
+ get_query_results_call,
+ ]
+ )
diff --git a/tests/unit/job/test_query_pandas.py b/tests/unit/job/test_query_pandas.py
index 84aab3aca..a6c59b158 100644
--- a/tests/unit/job/test_query_pandas.py
+++ b/tests/unit/job/test_query_pandas.py
@@ -15,47 +15,47 @@
import concurrent.futures
import copy
import json
+from unittest import mock
-import mock
-import pyarrow
import pytest
-from google.cloud import bigquery_storage
-import google.cloud.bigquery_storage_v1.reader
-import google.cloud.bigquery_storage_v1.services.big_query_read.client
+from ..helpers import make_connection
+from .helpers import _make_client
+from .helpers import _make_job_resource
+from google.cloud.bigquery.enums import DefaultPandasDTypes
try:
- import pandas
-except (ImportError, AttributeError): # pragma: NO COVER
- pandas = None
+ from google.cloud import bigquery_storage
+ import google.cloud.bigquery_storage_v1.reader
+ import google.cloud.bigquery_storage_v1.services.big_query_read.client
+except (ImportError, AttributeError):
+ bigquery_storage = None
+
+
try:
import shapely
-except (ImportError, AttributeError): # pragma: NO COVER
+except (ImportError, AttributeError):
shapely = None
try:
import geopandas
-except (ImportError, AttributeError): # pragma: NO COVER
+except (ImportError, AttributeError):
geopandas = None
try:
- from tqdm import tqdm
-except (ImportError, AttributeError): # pragma: NO COVER
+ import tqdm
+except (ImportError, AttributeError):
tqdm = None
-from ..helpers import make_connection
-from .helpers import _make_client
-from .helpers import _make_job_resource
+try:
+ import pyarrow
+ import pyarrow.types
+except ImportError:
+ pyarrow = None
pandas = pytest.importorskip("pandas")
@pytest.fixture
def table_read_options_kwarg():
- # Create a BigQuery Storage table read options object with pyarrow compression
- # enabled if a recent-enough version of google-cloud-bigquery-storage dependency is
- # installed to support the compression.
- if not hasattr(bigquery_storage, "ArrowSerializationOptions"):
- return {}
-
read_options = bigquery_storage.ReadSession.TableReadOptions(
arrow_serialization_options=bigquery_storage.ArrowSerializationOptions(
buffer_compression=bigquery_storage.ArrowSerializationOptions.CompressionCodec.LZ4_FRAME
@@ -89,6 +89,9 @@ def test__contains_order_by(query, expected):
assert not mut._contains_order_by(query)
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
@pytest.mark.parametrize(
"query",
(
@@ -179,6 +182,7 @@ def test_to_dataframe_bqstorage_preserve_order(query, table_read_options_kwarg):
)
+@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
def test_to_arrow():
from google.cloud.bigquery.job import QueryJob as target_class
@@ -265,6 +269,7 @@ def test_to_arrow():
]
+@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
def test_to_arrow_max_results_no_progress_bar():
from google.cloud.bigquery import table
from google.cloud.bigquery.job import QueryJob as target_class
@@ -300,8 +305,10 @@ def test_to_arrow_max_results_no_progress_bar():
assert tbl.num_rows == 2
+@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
-def test_to_arrow_w_tqdm_w_query_plan():
+@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm")
+def test_to_arrow_w_tqdm_w_query_plan(tqdm_mock):
from google.cloud.bigquery import table
from google.cloud.bigquery.job import QueryJob as target_class
from google.cloud.bigquery.schema import SchemaField
@@ -344,20 +351,21 @@ def test_to_arrow_w_tqdm_w_query_plan():
row_iterator,
],
)
-
- with result_patch as result_patch_tqdm, reload_patch:
+ with result_patch as tqdm_mock, reload_patch:
tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False)
- assert result_patch_tqdm.call_count == 3
+ assert tqdm_mock.call_count == 3
assert isinstance(tbl, pyarrow.Table)
assert tbl.num_rows == 2
- result_patch_tqdm.assert_called_with(
+ tqdm_mock.assert_called_with(
timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None
)
+@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
-def test_to_arrow_w_tqdm_w_pending_status():
+@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm")
+def test_to_arrow_w_tqdm_w_pending_status(tqdm_mock):
from google.cloud.bigquery import table
from google.cloud.bigquery.job import QueryJob as target_class
from google.cloud.bigquery.schema import SchemaField
@@ -396,20 +404,21 @@ def test_to_arrow_w_tqdm_w_pending_status():
"google.cloud.bigquery.job.QueryJob.result",
side_effect=[concurrent.futures.TimeoutError, row_iterator],
)
-
- with result_patch as result_patch_tqdm, reload_patch:
+ with result_patch as tqdm_mock, reload_patch:
tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False)
- assert result_patch_tqdm.call_count == 2
+ assert tqdm_mock.call_count == 2
assert isinstance(tbl, pyarrow.Table)
assert tbl.num_rows == 2
- result_patch_tqdm.assert_called_with(
+ tqdm_mock.assert_called_with(
timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None
)
+@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
-def test_to_arrow_w_tqdm_wo_query_plan():
+@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm")
+def test_to_arrow_w_tqdm_wo_query_plan(tqdm_mock):
from google.cloud.bigquery import table
from google.cloud.bigquery.job import QueryJob as target_class
from google.cloud.bigquery.schema import SchemaField
@@ -439,14 +448,13 @@ def test_to_arrow_w_tqdm_wo_query_plan():
"google.cloud.bigquery.job.QueryJob.result",
side_effect=[concurrent.futures.TimeoutError, row_iterator],
)
-
- with result_patch as result_patch_tqdm, reload_patch:
+ with result_patch as tqdm_mock, reload_patch:
tbl = job.to_arrow(progress_bar_type="tqdm", create_bqstorage_client=False)
- assert result_patch_tqdm.call_count == 2
+ assert tqdm_mock.call_count == 2
assert isinstance(tbl, pyarrow.Table)
assert tbl.num_rows == 2
- result_patch_tqdm.assert_called()
+ tqdm_mock.assert_called()
def _make_job(schema=(), rows=()):
@@ -510,6 +518,9 @@ def test_to_dataframe_ddl_query():
assert len(df) == 0
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
def test_to_dataframe_bqstorage(table_read_options_kwarg):
from google.cloud.bigquery.job import QueryJob as target_class
@@ -540,7 +551,7 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg):
[name_array, age_array], schema=arrow_schema
)
connection = make_connection(query_resource)
- client = _make_client(connection=connection)
+ client = _make_client(connection=connection, project="bqstorage-billing-project")
job = target_class.from_api_repr(resource, client)
session = bigquery_storage.types.ReadSession()
session.arrow_schema.serialized_schema = arrow_schema.serialize().to_pybytes()
@@ -577,13 +588,18 @@ def test_to_dataframe_bqstorage(table_read_options_kwarg):
**table_read_options_kwarg,
)
bqstorage_client.create_read_session.assert_called_once_with(
- parent=f"projects/{client.project}",
+ # The billing project can differ from the data project. Make sure we
+ # are charging to the billing project, not the data project.
+ parent="projects/bqstorage-billing-project",
read_session=expected_session,
max_stream_count=0, # Use default number of streams for best performance.
)
bqstorage_client.read_rows.assert_called_once_with(stream_id)
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
def test_to_dataframe_bqstorage_no_pyarrow_compression():
from google.cloud.bigquery.job import QueryJob as target_class
@@ -595,7 +611,7 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression():
"schema": {"fields": [{"name": "name", "type": "STRING", "mode": "NULLABLE"}]},
}
connection = make_connection(query_resource)
- client = _make_client(connection=connection)
+ client = _make_client(connection=connection, project="bqstorage-billing-project")
job = target_class.from_api_repr(resource, client)
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
session = bigquery_storage.types.ReadSession()
@@ -623,12 +639,15 @@ def test_to_dataframe_bqstorage_no_pyarrow_compression():
data_format=bigquery_storage.DataFormat.ARROW,
)
bqstorage_client.create_read_session.assert_called_once_with(
- parent=f"projects/{client.project}",
+ # The billing project can differ from the data project. Make sure we
+ # are charging to the billing project, not the data project.
+ parent="projects/bqstorage-billing-project",
read_session=expected_session,
max_stream_count=0,
)
+@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
def test_to_dataframe_column_dtypes():
from google.cloud.bigquery.job import QueryJob as target_class
@@ -679,7 +698,6 @@ def test_to_dataframe_column_dtypes():
exp_columns = [field["name"] for field in query_resource["schema"]["fields"]]
assert list(df) == exp_columns # verify the column names
- assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]"
assert df.seconds.dtype.name == "Int64"
assert df.miles.dtype.name == "float64"
assert df.km.dtype.name == "float16"
@@ -687,6 +705,11 @@ def test_to_dataframe_column_dtypes():
assert df.complete.dtype.name == "boolean"
assert df.date.dtype.name == "dbdate"
+ if pandas.__version__.startswith("2."):
+ assert df.start_timestamp.dtype.name == "datetime64[us, UTC]"
+ else:
+ assert df.start_timestamp.dtype.name == "datetime64[ns, UTC]"
+
def test_to_dataframe_column_date_dtypes():
from google.cloud.bigquery.job import QueryJob as target_class
@@ -720,7 +743,7 @@ def test_to_dataframe_column_date_dtypes():
@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
-@mock.patch("tqdm.tqdm")
+@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm")
def test_to_dataframe_with_progress_bar(tqdm_mock):
from google.cloud.bigquery.job import QueryJob as target_class
@@ -744,14 +767,15 @@ def test_to_dataframe_with_progress_bar(tqdm_mock):
job = target_class.from_api_repr(begun_resource, client)
job.to_dataframe(progress_bar_type=None, create_bqstorage_client=False)
- tqdm_mock.assert_not_called()
+ tqdm_mock.tqdm.assert_not_called()
job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False)
- tqdm_mock.assert_called()
+ tqdm_mock.tqdm.assert_called()
@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
-def test_to_dataframe_w_tqdm_pending():
+@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm")
+def test_to_dataframe_w_tqdm_pending(tqdm_mock):
from google.cloud.bigquery import table
from google.cloud.bigquery.job import QueryJob as target_class
from google.cloud.bigquery.schema import SchemaField
@@ -780,7 +804,7 @@ def test_to_dataframe_w_tqdm_pending():
job._properties["statistics"] = {
"query": {
"queryPlan": [
- {"name": "S00: Input", "id": "0", "status": "PRNDING"},
+ {"name": "S00: Input", "id": "0", "status": "PENDING"},
{"name": "S01: Output", "id": "1", "status": "COMPLETE"},
]
},
@@ -792,21 +816,21 @@ def test_to_dataframe_w_tqdm_pending():
"google.cloud.bigquery.job.QueryJob.result",
side_effect=[concurrent.futures.TimeoutError, row_iterator],
)
-
- with result_patch as result_patch_tqdm, reload_patch:
+ with result_patch as tqdm_mock, reload_patch:
df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False)
- assert result_patch_tqdm.call_count == 2
+ assert tqdm_mock.call_count == 2
assert isinstance(df, pandas.DataFrame)
assert len(df) == 4 # verify the number of rows
assert list(df) == ["name", "age"] # verify the column names
- result_patch_tqdm.assert_called_with(
+ tqdm_mock.assert_called_with(
timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None
)
@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
-def test_to_dataframe_w_tqdm():
+@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm")
+def test_to_dataframe_w_tqdm(tqdm_mock):
from google.cloud.bigquery import table
from google.cloud.bigquery.job import QueryJob as target_class
from google.cloud.bigquery.schema import SchemaField
@@ -852,20 +876,21 @@ def test_to_dataframe_w_tqdm():
],
)
- with result_patch as result_patch_tqdm, reload_patch:
+ with result_patch as tqdm_mock, reload_patch:
df = job.to_dataframe(progress_bar_type="tqdm", create_bqstorage_client=False)
- assert result_patch_tqdm.call_count == 3
+ assert tqdm_mock.call_count == 3
assert isinstance(df, pandas.DataFrame)
assert len(df) == 4 # verify the number of rows
assert list(df), ["name", "age"] # verify the column names
- result_patch_tqdm.assert_called_with(
+ tqdm_mock.assert_called_with(
timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=None
)
@pytest.mark.skipif(tqdm is None, reason="Requires `tqdm`")
-def test_to_dataframe_w_tqdm_max_results():
+@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm")
+def test_to_dataframe_w_tqdm_max_results(tqdm_mock):
from google.cloud.bigquery import table
from google.cloud.bigquery.job import QueryJob as target_class
from google.cloud.bigquery.schema import SchemaField
@@ -901,16 +926,13 @@ def test_to_dataframe_w_tqdm_max_results():
"google.cloud.bigquery.job.QueryJob.result",
side_effect=[concurrent.futures.TimeoutError, row_iterator],
)
-
- with result_patch as result_patch_tqdm, reload_patch:
+ with result_patch as tqdm_mock, reload_patch:
job.to_dataframe(
progress_bar_type="tqdm", create_bqstorage_client=False, max_results=3
)
- assert result_patch_tqdm.call_count == 2
- result_patch_tqdm.assert_called_with(
- timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3
- )
+ assert tqdm_mock.call_count == 2
+ tqdm_mock.assert_called_with(timeout=_PROGRESS_BAR_UPDATE_INTERVAL, max_results=3)
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
@@ -997,5 +1019,9 @@ def test_query_job_to_geodataframe_delegation(wait_for_query):
progress_bar_type=progress_bar_type,
create_bqstorage_client=create_bqstorage_client,
geography_column=geography_column,
+ bool_dtype=DefaultPandasDTypes.BOOL_DTYPE,
+ int_dtype=DefaultPandasDTypes.INT_DTYPE,
+ float_dtype=None,
+ string_dtype=None,
)
assert df is row_iterator.to_geodataframe.return_value
diff --git a/tests/unit/job/test_query_stats.py b/tests/unit/job/test_query_stats.py
index 13e022ced..c7c7a31e0 100644
--- a/tests/unit/job/test_query_stats.py
+++ b/tests/unit/job/test_query_stats.py
@@ -13,6 +13,7 @@
# limitations under the License.
from .helpers import _Base
+import datetime
class TestBiEngineStats:
@@ -108,6 +109,75 @@ def test_from_api_repr_full_stats(self):
assert result.updated_row_count == 4
+class TestSearchStatistics:
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.job.query import SearchStats
+
+ return SearchStats
+
+ def _make_one(self, *args, **kwargs):
+ return self._get_target_class()(*args, **kwargs)
+
+ def test_ctor_defaults(self):
+ search_stats = self._make_one()
+ assert search_stats.mode is None
+ assert search_stats.reason == []
+
+ def test_from_api_repr_unspecified(self):
+ klass = self._get_target_class()
+ result = klass.from_api_repr(
+ {"indexUsageMode": "INDEX_USAGE_MODE_UNSPECIFIED", "indexUnusedReasons": []}
+ )
+
+ assert isinstance(result, klass)
+ assert result.mode == "INDEX_USAGE_MODE_UNSPECIFIED"
+ assert result.reason == []
+
+
+class TestIndexUnusedReason:
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.job.query import IndexUnusedReason
+
+ return IndexUnusedReason
+
+ def _make_one(self, *args, **kwargs):
+ return self._get_target_class()(*args, **kwargs)
+
+ def test_ctor_defaults(self):
+ search_reason = self._make_one()
+ assert search_reason.code is None
+ assert search_reason.message is None
+ assert search_reason.baseTable is None
+ assert search_reason.indexName is None
+
+ def test_from_api_repr_unspecified(self):
+ klass = self._get_target_class()
+ result = klass.from_api_repr(
+ {
+ "code": "INDEX_CONFIG_NOT_AVAILABLE",
+ "message": "There is no search index...",
+ "baseTable": {
+ "projectId": "bigquery-public-data",
+ "datasetId": "usa_names",
+ "tableId": "usa_1910_current",
+ },
+ "indexName": None,
+ }
+ )
+
+ assert isinstance(result, klass)
+ assert result.code == "INDEX_CONFIG_NOT_AVAILABLE"
+ assert result.message == "There is no search index..."
+ assert result.baseTable == {
+ "projectId": "bigquery-public-data",
+ "datasetId": "usa_names",
+ "tableId": "usa_1910_current",
+ }
+ assert result.indexName is None
+
+
class TestQueryPlanEntryStep(_Base):
KIND = "KIND"
SUBSTEPS = ("SUB1", "SUB2")
@@ -192,6 +262,7 @@ class TestQueryPlanEntry(_Base):
STATUS = "STATUS"
SHUFFLE_OUTPUT_BYTES = 1024
SHUFFLE_OUTPUT_BYTES_SPILLED = 1
+ SLOT_MS = 25
START_RFC3339_MICROS = "2018-04-01T00:00:00.000000Z"
END_RFC3339_MICROS = "2018-04-01T00:00:04.000000Z"
@@ -236,6 +307,7 @@ def test_from_api_repr_empty(self):
self.assertIsNone(entry.shuffle_output_bytes)
self.assertIsNone(entry.shuffle_output_bytes_spilled)
self.assertEqual(entry.steps, [])
+ self.assertIsNone(entry.slot_ms)
def test_from_api_repr_normal(self):
from google.cloud.bigquery.job import QueryPlanEntryStep
@@ -279,6 +351,7 @@ def test_from_api_repr_normal(self):
"substeps": TestQueryPlanEntryStep.SUBSTEPS,
}
],
+ "slotMs": self.SLOT_MS,
}
klass = self._get_target_class()
@@ -297,6 +370,7 @@ def test_from_api_repr_normal(self):
self.assertEqual(entry.records_written, self.RECORDS_WRITTEN)
self.assertEqual(entry.status, self.STATUS)
self.assertEqual(entry.steps, steps)
+ self.assertEqual(entry.slot_ms, self.SLOT_MS)
def test_start(self):
from google.cloud._helpers import _RFC3339_MICROS
@@ -447,3 +521,63 @@ def test_from_api_repr_normal(self):
self.assertEqual(entry.pending_units, self.PENDING_UNITS)
self.assertEqual(entry.completed_units, self.COMPLETED_UNITS)
self.assertEqual(entry.slot_millis, self.SLOT_MILLIS)
+
+
+class TestIncrementalResultStats:
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.job import IncrementalResultStats
+
+ return IncrementalResultStats
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor_defaults(self):
+ stats = self._make_one()
+ assert stats.disabled_reason is None
+ assert stats.result_set_last_replace_time is None
+ assert stats.result_set_last_modify_time is None
+
+ def test_from_api_repr_partial_stats(self):
+ klass = self._get_target_class()
+ stats = klass.from_api_repr({"disabledReason": "FOO"})
+
+ assert isinstance(stats, klass)
+ assert stats.disabled_reason == "FOO"
+ assert stats.result_set_last_replace_time is None
+ assert stats.result_set_last_modify_time is None
+
+ def test_from_api_repr_full_stats(self):
+ klass = self._get_target_class()
+ stats = klass.from_api_repr(
+ {
+ "disabledReason": "BAR",
+ "resultSetLastReplaceTime": "2025-01-02T03:04:05.06Z",
+ "resultSetLastModifyTime": "2025-02-02T02:02:02.02Z",
+ }
+ )
+
+ assert isinstance(stats, klass)
+ assert stats.disabled_reason == "BAR"
+ assert stats.result_set_last_replace_time == datetime.datetime(
+ 2025, 1, 2, 3, 4, 5, 60000, tzinfo=datetime.timezone.utc
+ )
+ assert stats.result_set_last_modify_time == datetime.datetime(
+ 2025, 2, 2, 2, 2, 2, 20000, tzinfo=datetime.timezone.utc
+ )
+
+ def test_from_api_repr_invalid_stats(self):
+ klass = self._get_target_class()
+ stats = klass.from_api_repr(
+ {
+ "disabledReason": "BAR",
+ "resultSetLastReplaceTime": "xxx",
+ "resultSetLastModifyTime": "yyy",
+ }
+ )
+
+ assert isinstance(stats, klass)
+ assert stats.disabled_reason == "BAR"
+ assert stats.result_set_last_replace_time is None
+ assert stats.result_set_last_modify_time is None
diff --git a/tests/unit/line_arg_parser/test_parser.py b/tests/unit/line_arg_parser/test_parser.py
index 3f9e9ff41..b170d536a 100644
--- a/tests/unit/line_arg_parser/test_parser.py
+++ b/tests/unit/line_arg_parser/test_parser.py
@@ -44,7 +44,7 @@ def test_consume_unexpected_eol(parser_class):
fake_lexer = [Token(TokenType.EOL, lexeme="", pos=0)]
parser = parser_class(fake_lexer)
- with pytest.raises(ParseError, match=r"Unexpected end of input.*expected COLON.*"):
+ with pytest.raises(ParseError, match=r"Unexpected end of input.*expected.*COLON.*"):
parser.consume(TokenType.COLON)
diff --git a/tests/unit/model/test_model.py b/tests/unit/model/test_model.py
index 1ae988414..279a954c7 100644
--- a/tests/unit/model/test_model.py
+++ b/tests/unit/model/test_model.py
@@ -18,7 +18,9 @@
import pytest
+
import google.cloud._helpers
+import google.cloud.bigquery.model
KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1"
@@ -136,6 +138,7 @@ def test_from_api_repr(target_class):
google.cloud._helpers._rfc3339_to_datetime(got.training_runs[2]["startTime"])
== expiration_time
)
+ assert got.transform_columns == []
def test_from_api_repr_w_minimal_resource(target_class):
@@ -293,6 +296,71 @@ def test_feature_columns(object_under_test):
assert object_under_test.feature_columns == expected
+def test_from_api_repr_w_transform_columns(target_class):
+ resource = {
+ "modelReference": {
+ "projectId": "my-project",
+ "datasetId": "my_dataset",
+ "modelId": "my_model",
+ },
+ "transformColumns": [
+ {
+ "name": "transform_name",
+ "type": {"typeKind": "INT64"},
+ "transformSql": "transform_sql",
+ }
+ ],
+ }
+ got = target_class.from_api_repr(resource)
+ assert len(got.transform_columns) == 1
+ transform_column = got.transform_columns[0]
+ assert isinstance(transform_column, google.cloud.bigquery.model.TransformColumn)
+ assert transform_column.name == "transform_name"
+
+
+def test_transform_column_name():
+ transform_columns = google.cloud.bigquery.model.TransformColumn(
+ {"name": "is_female"}
+ )
+ assert transform_columns.name == "is_female"
+
+
+def test_transform_column_transform_sql():
+ transform_columns = google.cloud.bigquery.model.TransformColumn(
+ {"transformSql": "is_female"}
+ )
+ assert transform_columns.transform_sql == "is_female"
+
+
+def test_transform_column_type():
+ transform_columns = google.cloud.bigquery.model.TransformColumn(
+ {"type": {"typeKind": "BOOL"}}
+ )
+ assert transform_columns.type_.type_kind == "BOOL"
+
+
+def test_transform_column_type_none():
+ transform_columns = google.cloud.bigquery.model.TransformColumn({})
+ assert transform_columns.type_ is None
+
+
+def test_transform_column_from_api_repr_with_unknown_properties():
+ transform_column = google.cloud.bigquery.model.TransformColumn.from_api_repr(
+ {
+ "name": "is_female",
+ "type": {"typeKind": "BOOL"},
+ "transformSql": "is_female",
+ "test": "one",
+ }
+ )
+ assert transform_column._properties == {
+ "name": "is_female",
+ "type": {"typeKind": "BOOL"},
+ "transformSql": "is_female",
+ "test": "one",
+ }
+
+
def test_label_columns(object_under_test):
from google.cloud.bigquery import standard_sql
diff --git a/tests/unit/routine/test_external_runtime_options.py b/tests/unit/routine/test_external_runtime_options.py
new file mode 100644
index 000000000..d4edaae9a
--- /dev/null
+++ b/tests/unit/routine/test_external_runtime_options.py
@@ -0,0 +1,191 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2024 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+
+@pytest.fixture
+def target_class():
+ from google.cloud.bigquery.routine.routine import ExternalRuntimeOptions
+
+ return ExternalRuntimeOptions
+
+
+@pytest.fixture
+def object_under_test(target_class):
+ return target_class()
+
+
+def test_ctor(target_class):
+ container_memory = "1G"
+ container_cpu = 1
+ runtime_connection = (
+ "projects/my-project/locations/us-central1/connections/my-connection"
+ )
+ max_batching_rows = 100
+ runtime_version = "python-3.11"
+
+ instance = target_class(
+ container_memory=container_memory,
+ container_cpu=container_cpu,
+ runtime_connection=runtime_connection,
+ max_batching_rows=max_batching_rows,
+ runtime_version=runtime_version,
+ )
+
+ assert instance.container_memory == container_memory
+ assert instance.container_cpu == container_cpu
+ assert instance.runtime_connection == runtime_connection
+ assert instance.max_batching_rows == max_batching_rows
+ assert instance.runtime_version == runtime_version
+
+
+def test_container_memory(object_under_test):
+ container_memory = "512Mi"
+ object_under_test.container_memory = container_memory
+ assert object_under_test.container_memory == container_memory
+
+
+def test_container_cpu(object_under_test):
+ container_cpu = 1
+ object_under_test.container_cpu = container_cpu
+ assert object_under_test.container_cpu == container_cpu
+
+
+def test_runtime_connection(object_under_test):
+ runtime_connection = (
+ "projects/my-project/locations/us-central1/connections/my-connection"
+ )
+ object_under_test.runtime_connection = runtime_connection
+ assert object_under_test.runtime_connection == runtime_connection
+
+
+def test_max_batching_rows(object_under_test):
+ max_batching_rows = 100
+ object_under_test.max_batching_rows = max_batching_rows
+ assert object_under_test.max_batching_rows == max_batching_rows
+
+
+def test_runtime_version(object_under_test):
+ runtime_version = "python-3.11"
+ object_under_test.runtime_version = runtime_version
+ assert object_under_test.runtime_version == runtime_version
+
+
+def test_ctor_w_properties(target_class):
+ properties = {
+ "containerMemory": "1G",
+ "containerCpu": 1,
+ }
+ instance = target_class(_properties=properties)
+ assert instance._properties == properties
+
+
+def test_ne(target_class):
+ instance1 = target_class(container_memory="1G")
+ instance2 = target_class(container_memory="2G")
+ assert instance1 != instance2
+
+
+def test_ne_false(target_class):
+ instance1 = target_class(container_memory="1G")
+ instance2 = target_class(container_memory="1G")
+ assert not (instance1 != instance2)
+
+
+def test_eq_not_implemented(object_under_test):
+ assert not (object_under_test == object())
+ assert object_under_test != object()
+
+
+def test_from_api_repr(target_class):
+ resource = {
+ "containerMemory": "1G",
+ "containerCpu": 1,
+ "runtimeConnection": "projects/my-project/locations/us-central1/connections/my-connection",
+ "maxBatchingRows": "100",
+ "runtimeVersion": "python-3.11",
+ }
+ instance = target_class.from_api_repr(resource)
+
+ assert instance.container_memory == "1G"
+ assert instance.container_cpu == 1
+ assert (
+ instance.runtime_connection
+ == "projects/my-project/locations/us-central1/connections/my-connection"
+ )
+ assert instance.max_batching_rows == 100
+ assert instance.runtime_version == "python-3.11"
+
+
+def test_to_api_repr(target_class):
+ instance = target_class(
+ container_memory="1G",
+ container_cpu=1,
+ runtime_connection="projects/my-project/locations/us-central1/connections/my-connection",
+ max_batching_rows=100,
+ runtime_version="python-3.11",
+ )
+ resource = instance.to_api_repr()
+
+ assert resource == {
+ "containerMemory": "1G",
+ "containerCpu": 1,
+ "runtimeConnection": "projects/my-project/locations/us-central1/connections/my-connection",
+ "maxBatchingRows": "100",
+ "runtimeVersion": "python-3.11",
+ }
+
+
+def test_repr(target_class):
+ instance = target_class(
+ container_memory="1G",
+ container_cpu=1,
+ )
+ expected_repr = (
+ "ExternalRuntimeOptions(container_cpu=1, container_memory='1G', "
+ "max_batching_rows=None, runtime_connection=None, runtime_version=None)"
+ )
+ assert repr(instance) == expected_repr
+
+
+def test_invalid_container_memory(object_under_test):
+ with pytest.raises(ValueError, match="container_memory must be a string or None."):
+ object_under_test.container_memory = 123
+
+
+def test_invalid_container_cpu(object_under_test):
+ with pytest.raises(ValueError, match="container_cpu must be an integer or None."):
+ object_under_test.container_cpu = "1"
+
+
+def test_invalid_runtime_connection(object_under_test):
+ with pytest.raises(
+ ValueError, match="runtime_connection must be a string or None."
+ ):
+ object_under_test.runtime_connection = 123
+
+
+def test_invalid_max_batching_rows(object_under_test):
+ with pytest.raises(
+ ValueError, match="max_batching_rows must be an integer or None."
+ ):
+ object_under_test.max_batching_rows = "100"
+
+
+def test_invalid_runtime_version(object_under_test):
+ with pytest.raises(ValueError, match="runtime_version must be a string or None."):
+ object_under_test.runtime_version = 123
diff --git a/tests/unit/routine/test_remote_function_options.py b/tests/unit/routine/test_remote_function_options.py
new file mode 100644
index 000000000..ffd57e8c1
--- /dev/null
+++ b/tests/unit/routine/test_remote_function_options.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+ENDPOINT = "https://some.endpoint"
+CONNECTION = "connection_string"
+MAX_BATCHING_ROWS = 50
+USER_DEFINED_CONTEXT = {
+ "foo": "bar",
+}
+
+
+@pytest.fixture
+def target_class():
+ from google.cloud.bigquery.routine import RemoteFunctionOptions
+
+ return RemoteFunctionOptions
+
+
+def test_ctor(target_class):
+ options = target_class(
+ endpoint=ENDPOINT,
+ connection=CONNECTION,
+ max_batching_rows=MAX_BATCHING_ROWS,
+ user_defined_context=USER_DEFINED_CONTEXT,
+ )
+ assert options.endpoint == ENDPOINT
+ assert options.connection == CONNECTION
+ assert options.max_batching_rows == MAX_BATCHING_ROWS
+ assert options.user_defined_context == USER_DEFINED_CONTEXT
+
+
+def test_empty_ctor(target_class):
+ options = target_class()
+ assert options._properties == {}
+ options = target_class(_properties=None)
+ assert options._properties == {}
+ options = target_class(_properties={})
+ assert options._properties == {}
+
+
+def test_ctor_bad_context(target_class):
+ with pytest.raises(ValueError, match="value must be dictionary"):
+ target_class(user_defined_context=[1, 2, 3, 4])
+
+
+def test_from_api_repr(target_class):
+ resource = {
+ "endpoint": ENDPOINT,
+ "connection": CONNECTION,
+ "maxBatchingRows": MAX_BATCHING_ROWS,
+ "userDefinedContext": USER_DEFINED_CONTEXT,
+ "someRandomField": "someValue",
+ }
+ options = target_class.from_api_repr(resource)
+ assert options.endpoint == ENDPOINT
+ assert options.connection == CONNECTION
+ assert options.max_batching_rows == MAX_BATCHING_ROWS
+ assert options.user_defined_context == USER_DEFINED_CONTEXT
+ assert options._properties["someRandomField"] == "someValue"
+
+
+def test_from_api_repr_w_minimal_resource(target_class):
+ resource = {}
+ options = target_class.from_api_repr(resource)
+ assert options.endpoint is None
+ assert options.connection is None
+ assert options.max_batching_rows is None
+ assert options.user_defined_context is None
+
+
+def test_from_api_repr_w_unknown_fields(target_class):
+ resource = {"thisFieldIsNotInTheProto": "just ignore me"}
+ options = target_class.from_api_repr(resource)
+ assert options._properties is resource
+
+
+def test_eq(target_class):
+ options = target_class(
+ endpoint=ENDPOINT,
+ connection=CONNECTION,
+ max_batching_rows=MAX_BATCHING_ROWS,
+ user_defined_context=USER_DEFINED_CONTEXT,
+ )
+ other_options = target_class(
+ endpoint=ENDPOINT,
+ connection=CONNECTION,
+ max_batching_rows=MAX_BATCHING_ROWS,
+ user_defined_context=USER_DEFINED_CONTEXT,
+ )
+ assert options == other_options
+ assert not (options != other_options)
+
+ empty_options = target_class()
+ assert not (options == empty_options)
+ assert options != empty_options
+
+ notanarg = object()
+ assert not (options == notanarg)
+ assert options != notanarg
+
+
+def test_repr(target_class):
+ options = target_class(
+ endpoint=ENDPOINT,
+ connection=CONNECTION,
+ max_batching_rows=MAX_BATCHING_ROWS,
+ user_defined_context=USER_DEFINED_CONTEXT,
+ )
+ actual_repr = repr(options)
+ assert actual_repr == (
+ "RemoteFunctionOptions(connection='connection_string', endpoint='https://some.endpoint', max_batching_rows=50, user_defined_context={'foo': 'bar'})"
+ )
diff --git a/tests/unit/routine/test_routine.py b/tests/unit/routine/test_routine.py
index 80a3def73..965c6b2eb 100644
--- a/tests/unit/routine/test_routine.py
+++ b/tests/unit/routine/test_routine.py
@@ -75,6 +75,20 @@ def test_ctor_w_properties(target_class):
description = "A routine description."
determinism_level = bigquery.DeterminismLevel.NOT_DETERMINISTIC
+ options = bigquery.RemoteFunctionOptions(
+ endpoint="https://some.endpoint",
+ connection="connection_string",
+ max_batching_rows=99,
+ user_defined_context={"foo": "bar"},
+ )
+ external_runtime_options = bigquery.ExternalRuntimeOptions(
+ container_memory="1G",
+ container_cpu=1,
+ runtime_connection="projects/p/locations/l/connections/c",
+ max_batching_rows=100,
+ runtime_version="python-3.11",
+ )
+
actual_routine = target_class(
routine_id,
arguments=arguments,
@@ -84,6 +98,8 @@ def test_ctor_w_properties(target_class):
type_=type_,
description=description,
determinism_level=determinism_level,
+ remote_function_options=options,
+ external_runtime_options=external_runtime_options,
)
ref = RoutineReference.from_string(routine_id)
@@ -97,6 +113,30 @@ def test_ctor_w_properties(target_class):
assert (
actual_routine.determinism_level == bigquery.DeterminismLevel.NOT_DETERMINISTIC
)
+ assert actual_routine.remote_function_options == options
+ assert actual_routine.external_runtime_options == external_runtime_options
+
+
+def test_ctor_invalid_remote_function_options(target_class):
+ with pytest.raises(
+ ValueError,
+ match=".*must be google.cloud.bigquery.routine.RemoteFunctionOptions.*",
+ ):
+ target_class(
+ "my-proj.my_dset.my_routine",
+ remote_function_options=object(),
+ )
+
+
+def test_ctor_invalid_external_runtime_options(target_class):
+ with pytest.raises(
+ ValueError,
+ match=".*must be google.cloud.bigquery.routine.ExternalRuntimeOptions.*",
+ ):
+ target_class(
+ "my-proj.my_dset.my_routine",
+ external_runtime_options=object(),
+ )
def test_from_api_repr(target_class):
@@ -126,6 +166,22 @@ def test_from_api_repr(target_class):
"someNewField": "someValue",
"description": "A routine description.",
"determinismLevel": bigquery.DeterminismLevel.DETERMINISTIC,
+ "remoteFunctionOptions": {
+ "endpoint": "https://some.endpoint",
+ "connection": "connection_string",
+ "maxBatchingRows": 50,
+ "userDefinedContext": {
+ "foo": "bar",
+ },
+ },
+ "dataGovernanceType": "DATA_MASKING",
+ "externalRuntimeOptions": {
+ "containerMemory": "1G",
+ "containerCpu": 1,
+ "runtimeConnection": "projects/p/locations/l/connections/c",
+ "maxBatchingRows": 100,
+ "runtimeVersion": "python-3.11",
+ },
}
actual_routine = target_class.from_api_repr(resource)
@@ -160,6 +216,19 @@ def test_from_api_repr(target_class):
assert actual_routine._properties["someNewField"] == "someValue"
assert actual_routine.description == "A routine description."
assert actual_routine.determinism_level == "DETERMINISTIC"
+ assert actual_routine.remote_function_options.endpoint == "https://some.endpoint"
+ assert actual_routine.remote_function_options.connection == "connection_string"
+ assert actual_routine.remote_function_options.max_batching_rows == 50
+ assert actual_routine.remote_function_options.user_defined_context == {"foo": "bar"}
+ assert actual_routine.data_governance_type == "DATA_MASKING"
+ assert actual_routine.external_runtime_options.container_memory == "1G"
+ assert actual_routine.external_runtime_options.container_cpu == 1
+ assert (
+ actual_routine.external_runtime_options.runtime_connection
+ == "projects/p/locations/l/connections/c"
+ )
+ assert actual_routine.external_runtime_options.max_batching_rows == 100
+ assert actual_routine.external_runtime_options.runtime_version == "python-3.11"
def test_from_api_repr_tvf_function(target_class):
@@ -261,6 +330,9 @@ def test_from_api_repr_w_minimal_resource(target_class):
assert actual_routine.type_ is None
assert actual_routine.description is None
assert actual_routine.determinism_level is None
+ assert actual_routine.remote_function_options is None
+ assert actual_routine.data_governance_type is None
+ assert actual_routine.external_runtime_options is None
def test_from_api_repr_w_unknown_fields(target_class):
@@ -395,6 +467,20 @@ def test_from_api_repr_w_unknown_fields(target_class):
"determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED
},
),
+ (
+ {
+ "arguments": [{"name": "x", "dataType": {"typeKind": "INT64"}}],
+ "definitionBody": "x * 3",
+ "language": "SQL",
+ "returnType": {"typeKind": "INT64"},
+ "routineType": "SCALAR_FUNCTION",
+ "description": "A routine description.",
+ "determinismLevel": bigquery.DeterminismLevel.DETERMINISM_LEVEL_UNSPECIFIED,
+ "dataGovernanceType": "DATA_MASKING",
+ },
+ ["data_governance_type"],
+ {"dataGovernanceType": "DATA_MASKING"},
+ ),
(
{},
[
@@ -421,6 +507,24 @@ def test_from_api_repr_w_unknown_fields(target_class):
["someNewField"],
{"someNewField": "someValue"},
),
+ (
+ {
+ "routineType": "SCALAR_FUNCTION",
+ "remoteFunctionOptions": {
+ "endpoint": "https://some_endpoint",
+ "connection": "connection_string",
+ "max_batching_rows": 101,
+ },
+ },
+ ["remote_function_options"],
+ {
+ "remoteFunctionOptions": {
+ "endpoint": "https://some_endpoint",
+ "connection": "connection_string",
+ "max_batching_rows": 101,
+ },
+ },
+ ),
],
)
def test_build_resource(object_under_test, resource, filter_fields, expected):
@@ -497,6 +601,48 @@ def test_set_description_w_none(object_under_test):
assert object_under_test._properties["description"] is None
+def test_set_remote_function_options_w_none(object_under_test):
+ object_under_test.remote_function_options = None
+ assert object_under_test.remote_function_options is None
+ assert object_under_test._properties["remoteFunctionOptions"] is None
+
+
+def test_set_external_runtime_options_w_none(object_under_test):
+ object_under_test.external_runtime_options = None
+ assert object_under_test.external_runtime_options is None
+ assert object_under_test._properties["externalRuntimeOptions"] is None
+
+
+def test_set_data_governance_type_w_none(object_under_test):
+ object_under_test.data_governance_type = None
+ assert object_under_test.data_governance_type is None
+ assert object_under_test._properties["dataGovernanceType"] is None
+
+
+def test_set_data_governance_type_valid(object_under_test):
+ object_under_test.data_governance_type = "DATA_MASKING"
+ assert object_under_test.data_governance_type == "DATA_MASKING"
+ assert object_under_test._properties["dataGovernanceType"] == "DATA_MASKING"
+
+
+def test_set_data_governance_type_wrong_type(object_under_test):
+ with pytest.raises(ValueError) as exp:
+ object_under_test.data_governance_type = 1
+ assert "invalid data_governance_type" in str(exp)
+ assert object_under_test.data_governance_type is None
+ assert object_under_test._properties.get("dataGovernanceType") is None
+
+
+def test_set_data_governance_type_wrong_str(object_under_test):
+ """Client does not verify the content of data_governance_type string to be
+ compatible with future upgrades. If the value is not supported, BigQuery
+ itself will report an error.
+ """
+ object_under_test.data_governance_type = "RANDOM_STRING"
+ assert object_under_test.data_governance_type == "RANDOM_STRING"
+ assert object_under_test._properties["dataGovernanceType"] == "RANDOM_STRING"
+
+
def test_repr(target_class):
model = target_class("my-proj.my_dset.my_routine")
actual_routine = repr(model)
diff --git a/tests/unit/test__helpers.py b/tests/unit/test__helpers.py
index 2e714c707..4e53236e3 100644
--- a/tests/unit/test__helpers.py
+++ b/tests/unit/test__helpers.py
@@ -12,450 +12,124 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import base64
import datetime
import decimal
+import json
+import os
+import warnings
+import pytest
+import packaging
import unittest
+from unittest import mock
-import mock
+import google.api_core
+from google.cloud.bigquery._helpers import _isinstance_or_raise
-class TestBQStorageVersions(unittest.TestCase):
- def tearDown(self):
- from google.cloud.bigquery import _helpers
+@pytest.mark.skipif(
+ packaging.version.parse(getattr(google.api_core, "__version__", "0.0.0"))
+ < packaging.version.Version("2.15.0"),
+ reason="universe_domain not supported with google-api-core < 2.15.0",
+)
+class Test_get_client_universe(unittest.TestCase):
+ def test_with_none(self):
+ from google.cloud.bigquery._helpers import _get_client_universe
- # Reset any cached versions since it may not match reality.
- _helpers.BQ_STORAGE_VERSIONS._installed_version = None
+ self.assertEqual("googleapis.com", _get_client_universe(None))
- def _object_under_test(self):
- from google.cloud.bigquery import _helpers
+ def test_with_dict(self):
+ from google.cloud.bigquery._helpers import _get_client_universe
- return _helpers.BQStorageVersions()
+ options = {"universe_domain": "foo.com"}
+ self.assertEqual("foo.com", _get_client_universe(options))
- def test_installed_version_returns_cached(self):
- versions = self._object_under_test()
- versions._installed_version = object()
- assert versions.installed_version is versions._installed_version
+ def test_with_dict_empty(self):
+ from google.cloud.bigquery._helpers import _get_client_universe
- def test_installed_version_returns_parsed_version(self):
- versions = self._object_under_test()
+ options = {"universe_domain": ""}
+ self.assertEqual("googleapis.com", _get_client_universe(options))
- with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"):
- version = versions.installed_version
+ def test_with_client_options(self):
+ from google.cloud.bigquery._helpers import _get_client_universe
+ from google.api_core import client_options
- assert version.major == 1
- assert version.minor == 2
- assert version.micro == 3
+ options = client_options.from_dict({"universe_domain": "foo.com"})
+ self.assertEqual("foo.com", _get_client_universe(options))
- def test_is_read_session_optional_true(self):
- versions = self._object_under_test()
- with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"):
- assert versions.is_read_session_optional
+ @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"})
+ def test_with_environ(self):
+ from google.cloud.bigquery._helpers import _get_client_universe
- def test_is_read_session_optional_false(self):
- versions = self._object_under_test()
- with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"):
- assert not versions.is_read_session_optional
+ self.assertEqual("foo.com", _get_client_universe(None))
+ @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"})
+ def test_with_environ_and_dict(self):
+ from google.cloud.bigquery._helpers import _get_client_universe
-class TestPyarrowVersions(unittest.TestCase):
- def tearDown(self):
- from google.cloud.bigquery import _helpers
+ options = ({"credentials_file": "file.json"},)
+ self.assertEqual("foo.com", _get_client_universe(options))
- # Reset any cached versions since it may not match reality.
- _helpers.PYARROW_VERSIONS._installed_version = None
+ @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"})
+ def test_with_environ_and_empty_options(self):
+ from google.cloud.bigquery._helpers import _get_client_universe
+ from google.api_core import client_options
- def _object_under_test(self):
- from google.cloud.bigquery import _helpers
+ options = client_options.from_dict({})
+ self.assertEqual("foo.com", _get_client_universe(options))
- return _helpers.PyarrowVersions()
+ @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": ""})
+ def test_with_environ_empty(self):
+ from google.cloud.bigquery._helpers import _get_client_universe
- def test_installed_version_returns_cached(self):
- versions = self._object_under_test()
- versions._installed_version = object()
- assert versions.installed_version is versions._installed_version
+ self.assertEqual("googleapis.com", _get_client_universe(None))
- def test_installed_version_returns_parsed_version(self):
- versions = self._object_under_test()
- with mock.patch("pyarrow.__version__", new="1.2.3"):
- version = versions.installed_version
+class Test_validate_universe(unittest.TestCase):
+ def test_with_none(self):
+ from google.cloud.bigquery._helpers import _validate_universe
- assert version.major == 1
- assert version.minor == 2
- assert version.micro == 3
+ # should not raise
+ _validate_universe("googleapis.com", None)
+ def test_with_no_universe_creds(self):
+ from google.cloud.bigquery._helpers import _validate_universe
+ from .helpers import make_creds
-class Test_not_null(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _not_null
-
- return _not_null(value, field)
-
- def test_w_none_nullable(self):
- self.assertFalse(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_none_required(self):
- self.assertTrue(self._call_fut(None, _Field("REQUIRED")))
-
- def test_w_value(self):
- self.assertTrue(self._call_fut(object(), object()))
-
-
-class Test_int_from_json(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _int_from_json
-
- return _int_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field("REQUIRED"))
-
- def test_w_string_value(self):
- coerced = self._call_fut("42", object())
- self.assertEqual(coerced, 42)
-
- def test_w_float_value(self):
- coerced = self._call_fut(42, object())
- self.assertEqual(coerced, 42)
-
-
-class Test_float_from_json(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _float_from_json
-
- return _float_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field("REQUIRED"))
-
- def test_w_string_value(self):
- coerced = self._call_fut("3.1415", object())
- self.assertEqual(coerced, 3.1415)
-
- def test_w_float_value(self):
- coerced = self._call_fut(3.1415, object())
- self.assertEqual(coerced, 3.1415)
-
-
-class Test_decimal_from_json(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _decimal_from_json
-
- return _decimal_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field("REQUIRED"))
-
- def test_w_string_value(self):
- coerced = self._call_fut("3.1415", object())
- self.assertEqual(coerced, decimal.Decimal("3.1415"))
-
- def test_w_float_value(self):
- coerced = self._call_fut(3.1415, object())
- # There is no exact float representation of 3.1415.
- self.assertEqual(coerced, decimal.Decimal(3.1415))
-
-
-class Test_bool_from_json(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _bool_from_json
-
- return _bool_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_none_required(self):
- with self.assertRaises(AttributeError):
- self._call_fut(None, _Field("REQUIRED"))
-
- def test_w_value_t(self):
- coerced = self._call_fut("T", object())
- self.assertTrue(coerced)
-
- def test_w_value_true(self):
- coerced = self._call_fut("True", object())
- self.assertTrue(coerced)
-
- def test_w_value_1(self):
- coerced = self._call_fut("1", object())
- self.assertTrue(coerced)
-
- def test_w_value_other(self):
- coerced = self._call_fut("f", object())
- self.assertFalse(coerced)
-
-
-class Test_string_from_json(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _string_from_json
-
- return _string_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_none_required(self):
- self.assertIsNone(self._call_fut(None, _Field("REQUIRED")))
-
- def test_w_string_value(self):
- coerced = self._call_fut("Wonderful!", object())
- self.assertEqual(coerced, "Wonderful!")
-
-
-class Test_bytes_from_json(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _bytes_from_json
-
- return _bytes_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field("REQUIRED"))
-
- def test_w_base64_encoded_bytes(self):
- expected = b"Wonderful!"
- encoded = base64.standard_b64encode(expected)
- coerced = self._call_fut(encoded, object())
- self.assertEqual(coerced, expected)
-
- def test_w_base64_encoded_text(self):
- expected = b"Wonderful!"
- encoded = base64.standard_b64encode(expected).decode("ascii")
- coerced = self._call_fut(encoded, object())
- self.assertEqual(coerced, expected)
-
-
-class Test_timestamp_from_json(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _timestamp_from_json
-
- return _timestamp_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field("REQUIRED"))
-
- def test_w_string_int_value(self):
- from google.cloud._helpers import _EPOCH
-
- coerced = self._call_fut("1234567", object())
- self.assertEqual(
- coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)
- )
-
- def test_w_int_value(self):
- from google.cloud._helpers import _EPOCH
-
- coerced = self._call_fut(1234567, object())
- self.assertEqual(
- coerced, _EPOCH + datetime.timedelta(seconds=1, microseconds=234567)
- )
-
-
-class Test_timestamp_query_param_from_json(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery import _helpers
-
- return _helpers._timestamp_query_param_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_timestamp_valid(self):
- from google.cloud._helpers import UTC
-
- samples = [
- (
- "2016-12-20 15:58:27.339328+00:00",
- datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC),
- ),
- (
- "2016-12-20 15:58:27+00:00",
- datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC),
- ),
- (
- "2016-12-20T15:58:27.339328+00:00",
- datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC),
- ),
- (
- "2016-12-20T15:58:27+00:00",
- datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC),
- ),
- (
- "2016-12-20 15:58:27.339328Z",
- datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC),
- ),
- (
- "2016-12-20 15:58:27Z",
- datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC),
- ),
- (
- "2016-12-20T15:58:27.339328Z",
- datetime.datetime(2016, 12, 20, 15, 58, 27, 339328, tzinfo=UTC),
- ),
- (
- "2016-12-20T15:58:27Z",
- datetime.datetime(2016, 12, 20, 15, 58, 27, tzinfo=UTC),
- ),
- ]
- for timestamp_str, expected_result in samples:
- self.assertEqual(
- self._call_fut(timestamp_str, _Field("NULLABLE")), expected_result
- )
-
- def test_w_timestamp_invalid(self):
- with self.assertRaises(ValueError):
- self._call_fut("definitely-not-a-timestamp", _Field("NULLABLE"))
-
-
-class Test_datetime_from_json(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _datetime_from_json
-
- return _datetime_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field("REQUIRED"))
-
- def test_w_string_value(self):
- coerced = self._call_fut("2016-12-02T18:51:33", object())
- self.assertEqual(coerced, datetime.datetime(2016, 12, 2, 18, 51, 33))
-
- def test_w_microseconds(self):
- coerced = self._call_fut("2015-05-22T10:11:12.987654", object())
- self.assertEqual(coerced, datetime.datetime(2015, 5, 22, 10, 11, 12, 987654))
-
-
-class Test_date_from_json(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _date_from_json
-
- return _date_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field("REQUIRED"))
-
- def test_w_string_value(self):
- coerced = self._call_fut("1987-09-22", object())
- self.assertEqual(coerced, datetime.date(1987, 9, 22))
+ creds = make_creds(None)
+ # should not raise
+ _validate_universe("googleapis.com", creds)
+ def test_with_matched_universe_creds(self):
+ from google.cloud.bigquery._helpers import _validate_universe
+ from .helpers import make_creds
-class Test_time_from_json(unittest.TestCase):
- def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _time_from_json
-
- return _time_from_json(value, field)
-
- def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
-
- def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field("REQUIRED"))
-
- def test_w_string_value(self):
- coerced = self._call_fut("12:12:27", object())
- self.assertEqual(coerced, datetime.time(12, 12, 27))
+ creds = make_creds("googleapis.com")
+ # should not raise
+ _validate_universe("googleapis.com", creds)
- def test_w_subsecond_string_value(self):
- coerced = self._call_fut("12:12:27.123456", object())
- self.assertEqual(coerced, datetime.time(12, 12, 27, 123456))
+ def test_with_mismatched_universe_creds(self):
+ from google.cloud.bigquery._helpers import _validate_universe
+ from .helpers import make_creds
- def test_w_bogus_string_value(self):
+ creds = make_creds("foo.com")
with self.assertRaises(ValueError):
- self._call_fut("12:12:27.123", object())
+ _validate_universe("googleapis.com", creds)
-class Test_record_from_json(unittest.TestCase):
+class Test_not_null(unittest.TestCase):
def _call_fut(self, value, field):
- from google.cloud.bigquery._helpers import _record_from_json
+ from google.cloud.bigquery._helpers import _not_null
- return _record_from_json(value, field)
+ return _not_null(value, field)
def test_w_none_nullable(self):
- self.assertIsNone(self._call_fut(None, _Field("NULLABLE")))
+ self.assertFalse(self._call_fut(None, _Field("NULLABLE")))
def test_w_none_required(self):
- with self.assertRaises(TypeError):
- self._call_fut(None, _Field("REQUIRED"))
-
- def test_w_nullable_subfield_none(self):
- subfield = _Field("NULLABLE", "age", "INTEGER")
- field = _Field("REQUIRED", fields=[subfield])
- value = {"f": [{"v": None}]}
- coerced = self._call_fut(value, field)
- self.assertEqual(coerced, {"age": None})
-
- def test_w_scalar_subfield(self):
- subfield = _Field("REQUIRED", "age", "INTEGER")
- field = _Field("REQUIRED", fields=[subfield])
- value = {"f": [{"v": 42}]}
- coerced = self._call_fut(value, field)
- self.assertEqual(coerced, {"age": 42})
-
- def test_w_scalar_subfield_geography(self):
- subfield = _Field("REQUIRED", "geo", "GEOGRAPHY")
- field = _Field("REQUIRED", fields=[subfield])
- value = {"f": [{"v": "POINT(1, 2)"}]}
- coerced = self._call_fut(value, field)
- self.assertEqual(coerced, {"geo": "POINT(1, 2)"})
-
- def test_w_repeated_subfield(self):
- subfield = _Field("REPEATED", "color", "STRING")
- field = _Field("REQUIRED", fields=[subfield])
- value = {"f": [{"v": [{"v": "red"}, {"v": "yellow"}, {"v": "blue"}]}]}
- coerced = self._call_fut(value, field)
- self.assertEqual(coerced, {"color": ["red", "yellow", "blue"]})
+ self.assertTrue(self._call_fut(None, _Field("REQUIRED")))
- def test_w_record_subfield(self):
- full_name = _Field("REQUIRED", "full_name", "STRING")
- area_code = _Field("REQUIRED", "area_code", "STRING")
- local_number = _Field("REQUIRED", "local_number", "STRING")
- rank = _Field("REQUIRED", "rank", "INTEGER")
- phone = _Field(
- "NULLABLE", "phone", "RECORD", fields=[area_code, local_number, rank]
- )
- person = _Field("REQUIRED", "person", "RECORD", fields=[full_name, phone])
- value = {
- "f": [
- {"v": "Phred Phlyntstone"},
- {"v": {"f": [{"v": "800"}, {"v": "555-1212"}, {"v": 1}]}},
- ]
- }
- expected = {
- "full_name": "Phred Phlyntstone",
- "phone": {"area_code": "800", "local_number": "555-1212", "rank": 1},
- }
- coerced = self._call_fut(value, person)
- self.assertEqual(coerced, expected)
+ def test_w_value(self):
+ self.assertTrue(self._call_fut(object(), object()))
class Test_field_to_index_mapping(unittest.TestCase):
@@ -489,6 +163,17 @@ def test_w_single_scalar_column(self):
row = {"f": [{"v": "1"}]}
self.assertEqual(self._call_fut(row, schema=[col]), (1,))
+ def test_w_unknown_type(self):
+ # SELECT 1 AS col
+ col = _Field("REQUIRED", "col", "UNKNOWN")
+ row = {"f": [{"v": "1"}]}
+ with warnings.catch_warnings(record=True) as warned:
+ self.assertEqual(self._call_fut(row, schema=[col]), ("1",))
+ self.assertEqual(len(warned), 1)
+ warning = warned[0]
+ self.assertTrue("UNKNOWN" in str(warning))
+ self.assertTrue("col" in str(warning))
+
def test_w_single_scalar_geography_column(self):
# SELECT 1 AS col
col = _Field("REQUIRED", "geo", "GEOGRAPHY")
@@ -509,6 +194,17 @@ def test_w_single_array_column(self):
row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]}
self.assertEqual(self._call_fut(row, schema=[col]), ([1, 2, 3],))
+ def test_w_unknown_type_repeated(self):
+ # SELECT 1 AS col
+ col = _Field("REPEATED", "col", "UNKNOWN")
+ row = {"f": [{"v": [{"v": "1"}, {"v": "2"}, {"v": "3"}]}]}
+ with warnings.catch_warnings(record=True) as warned:
+ self.assertEqual(self._call_fut(row, schema=[col]), (["1", "2", "3"],))
+ self.assertEqual(len(warned), 1)
+ warning = warned[0]
+ self.assertTrue("UNKNOWN" in str(warning))
+ self.assertTrue("col" in str(warning))
+
def test_w_struct_w_nested_array_column(self):
# SELECT ([1, 2], 3, [4, 5]) as col
first = _Field("REPEATED", "first", "INTEGER")
@@ -533,6 +229,39 @@ def test_w_struct_w_nested_array_column(self):
({"first": [1, 2], "second": 3, "third": [4, 5]},),
)
+ def test_w_unknown_type_subfield(self):
+ # SELECT [(1, 2, 3), (4, 5, 6)] as col
+ first = _Field("REPEATED", "first", "UNKNOWN1")
+ second = _Field("REQUIRED", "second", "UNKNOWN2")
+ third = _Field("REPEATED", "third", "INTEGER")
+ col = _Field("REQUIRED", "col", "RECORD", fields=[first, second, third])
+ row = {
+ "f": [
+ {
+ "v": {
+ "f": [
+ {"v": [{"v": "1"}, {"v": "2"}]},
+ {"v": "3"},
+ {"v": [{"v": "4"}, {"v": "5"}]},
+ ]
+ }
+ }
+ ]
+ }
+ with warnings.catch_warnings(record=True) as warned:
+ self.assertEqual(
+ self._call_fut(row, schema=[col]),
+ ({"first": ["1", "2"], "second": "3", "third": [4, 5]},),
+ )
+ self.assertEqual(len(warned), 2) # 1 warning per unknown field.
+ warned = [str(warning) for warning in warned]
+ self.assertTrue(
+ any(["first" in warning and "UNKNOWN1" in warning for warning in warned])
+ )
+ self.assertTrue(
+ any(["second" in warning and "UNKNOWN2" in warning for warning in warned])
+ )
+
def test_w_array_of_struct(self):
# SELECT [(1, 2, 3), (4, 5, 6)] as col
first = _Field("REQUIRED", "first", "INTEGER")
@@ -898,10 +627,22 @@ def test_w_datetime(self):
self.assertEqual(self._call_fut(when), "12:13:41")
-def _make_field(field_type, mode="NULLABLE", name="testing", fields=()):
+def _make_field(
+ field_type,
+ mode="NULLABLE",
+ name="testing",
+ fields=(),
+ range_element_type=None,
+):
from google.cloud.bigquery.schema import SchemaField
- return SchemaField(name=name, field_type=field_type, mode=mode, fields=fields)
+ return SchemaField(
+ name=name,
+ field_type=field_type,
+ mode=mode,
+ fields=fields,
+ range_element_type=range_element_type,
+ )
class Test_scalar_field_to_json(unittest.TestCase):
@@ -913,8 +654,12 @@ def _call_fut(self, field, value):
def test_w_unknown_field_type(self):
field = _make_field("UNKNOWN")
original = object()
- converted = self._call_fut(field, original)
+ with warnings.catch_warnings(record=True) as warned:
+ converted = self._call_fut(field, original)
self.assertIs(converted, original)
+ self.assertEqual(len(warned), 1)
+ warning = warned[0]
+ self.assertTrue("UNKNOWN" in str(warning))
def test_w_known_field_type(self):
field = _make_field("INT64")
@@ -922,6 +667,16 @@ def test_w_known_field_type(self):
converted = self._call_fut(field, original)
self.assertEqual(converted, str(original))
+ def test_w_scalar_none(self):
+ import google.cloud.bigquery._helpers as module_under_test
+
+ scalar_types = module_under_test._SCALAR_VALUE_TO_JSON_ROW.keys()
+ for type_ in scalar_types:
+ field = _make_field(type_)
+ original = None
+ converted = self._call_fut(field, original)
+ self.assertIsNone(converted, msg=f"{type_} did not return None")
+
class Test_single_field_to_json(unittest.TestCase):
def _call_fut(self, field, value):
@@ -957,6 +712,12 @@ def test_w_scalar_ignores_mode(self):
converted = self._call_fut(field, original)
self.assertEqual(converted, original)
+ def test_w_scalar_json(self):
+ field = _make_field("JSON")
+ original = {"alpha": "abc", "num": [1, 2, 3]}
+ converted = self._call_fut(field, original)
+ self.assertEqual(converted, json.dumps(original))
+
class Test_repeated_field_to_json(unittest.TestCase):
def _call_fut(self, field, value):
@@ -1084,6 +845,98 @@ def test_w_dict_unknown_fields(self):
)
+class Test_range_field_to_json(unittest.TestCase):
+ def _call_fut(self, field, value):
+ from google.cloud.bigquery._helpers import _range_field_to_json
+
+ return _range_field_to_json(field, value)
+
+ def test_w_date(self):
+ field = _make_field("RANGE", range_element_type="DATE")
+ start = datetime.date(2016, 12, 3)
+ original = {"start": start}
+ converted = self._call_fut(field.range_element_type, original)
+ expected = {"start": "2016-12-03", "end": None}
+ self.assertEqual(converted, expected)
+
+ def test_w_date_string(self):
+ field = _make_field("RANGE", range_element_type="DATE")
+ original = {"start": "2016-12-03"}
+ converted = self._call_fut(field.range_element_type, original)
+ expected = {"start": "2016-12-03", "end": None}
+ self.assertEqual(converted, expected)
+
+ def test_w_datetime(self):
+ field = _make_field("RANGE", range_element_type="DATETIME")
+ start = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456)
+ original = {"start": start}
+ converted = self._call_fut(field.range_element_type, original)
+ expected = {"start": "2016-12-03T14:11:27.123456", "end": None}
+ self.assertEqual(converted, expected)
+
+ def test_w_datetime_string(self):
+ field = _make_field("RANGE", range_element_type="DATETIME")
+ original = {"start": "2016-12-03T14:11:27.123456"}
+ converted = self._call_fut(field.range_element_type, original)
+ expected = {"start": "2016-12-03T14:11:27.123456", "end": None}
+ self.assertEqual(converted, expected)
+
+ def test_w_timestamp(self):
+ from google.cloud._helpers import UTC
+
+ field = _make_field("RANGE", range_element_type="TIMESTAMP")
+ start = datetime.datetime(2016, 12, 3, 14, 11, 27, 123456, tzinfo=UTC)
+ original = {"start": start}
+ converted = self._call_fut(field.range_element_type, original)
+ expected = {"start": "2016-12-03T14:11:27.123456Z", "end": None}
+ self.assertEqual(converted, expected)
+
+ def test_w_timestamp_string(self):
+ field = _make_field("RANGE", range_element_type="TIMESTAMP")
+ original = {"start": "2016-12-03T14:11:27.123456Z"}
+ converted = self._call_fut(field.range_element_type, original)
+ expected = {"start": "2016-12-03T14:11:27.123456Z", "end": None}
+ self.assertEqual(converted, expected)
+
+ def test_w_timestamp_float(self):
+ field = _make_field("RANGE", range_element_type="TIMESTAMP")
+ original = {"start": 12.34567}
+ converted = self._call_fut(field.range_element_type, original)
+ expected = {"start": 12.34567, "end": None}
+ self.assertEqual(converted, expected)
+
+ def test_w_string_literal(self):
+ field = _make_field("RANGE", range_element_type="DATE")
+ original = "[2016-12-03, UNBOUNDED)"
+ converted = self._call_fut(field.range_element_type, original)
+ expected = {"start": "2016-12-03", "end": None}
+ self.assertEqual(converted, expected)
+
+ def test_w_unsupported_range_element_type(self):
+ field = _make_field("RANGE", range_element_type="TIME")
+ with self.assertRaises(ValueError):
+ self._call_fut(
+ field.range_element_type,
+ {"start": datetime.time(12, 13, 41)},
+ )
+
+ def test_w_no_range_element_type(self):
+ field = _make_field("RANGE")
+ with self.assertRaises(ValueError):
+ self._call_fut(field.range_element_type, "2016-12-03")
+
+ def test_w_incorrect_literal_format(self):
+ field = _make_field("RANGE", range_element_type="DATE")
+ original = "[2016-12-03, UNBOUNDED]"
+ with self.assertRaises(ValueError):
+ self._call_fut(field.range_element_type, original)
+
+ def test_w_unsupported_representation(self):
+ field = _make_field("RANGE", range_element_type="DATE")
+ with self.assertRaises(ValueError):
+ self._call_fut(field.range_element_type, object())
+
+
class Test_field_to_json(unittest.TestCase):
def _call_fut(self, field, value):
from google.cloud.bigquery._helpers import _field_to_json
@@ -1118,6 +971,12 @@ def test_w_scalar(self):
converted = self._call_fut(field, original)
self.assertEqual(converted, str(original))
+ def test_w_range(self):
+ field = _make_field("RANGE", range_element_type="DATE")
+ original = {"start": "2016-12-03", "end": "2024-12-03"}
+ converted = self._call_fut(field, original)
+ self.assertEqual(converted, original)
+
class Test_snake_to_camel_case(unittest.TestCase):
def _call_fut(self, value):
@@ -1249,11 +1108,21 @@ def test_w_str(self):
class _Field(object):
- def __init__(self, mode, name="unknown", field_type="UNKNOWN", fields=()):
+ def __init__(
+ self,
+ mode,
+ name="unknown",
+ field_type="UNKNOWN",
+ fields=(),
+ range_element_type=None,
+ element_type=None,
+ ):
self.mode = mode
self.name = name
self.field_type = field_type
self.fields = fields
+ self.range_element_type = range_element_type
+ self.element_type = element_type
def _field_isinstance_patcher():
@@ -1314,3 +1183,34 @@ def test_w_env_var(self):
host = self._call_fut()
self.assertEqual(host, HOST)
+
+
+class Test__isinstance_or_raise:
+ @pytest.mark.parametrize(
+ "value,dtype,none_allowed,expected",
+ [
+ (None, str, True, None),
+ ("hello world.uri", str, True, "hello world.uri"),
+ ("hello world.uri", str, False, "hello world.uri"),
+ (None, (str, float), True, None),
+ ("hello world.uri", (str, float), True, "hello world.uri"),
+ ("hello world.uri", (str, float), False, "hello world.uri"),
+ ],
+ )
+ def test__valid_isinstance_or_raise(self, value, dtype, none_allowed, expected):
+ result = _isinstance_or_raise(value, dtype, none_allowed=none_allowed)
+ assert result == expected
+
+ @pytest.mark.parametrize(
+ "value,dtype,none_allowed,expected",
+ [
+ (None, str, False, pytest.raises(TypeError)),
+ ({"key": "value"}, str, True, pytest.raises(TypeError)),
+ ({"key": "value"}, str, False, pytest.raises(TypeError)),
+ ({"key": "value"}, (str, float), True, pytest.raises(TypeError)),
+ ({"key": "value"}, (str, float), False, pytest.raises(TypeError)),
+ ],
+ )
+ def test__invalid_isinstance_or_raise(self, value, dtype, none_allowed, expected):
+ with expected:
+ _isinstance_or_raise(value, dtype, none_allowed=none_allowed)
diff --git a/tests/unit/test__http.py b/tests/unit/test__http.py
index 09f6d29d7..fd7ecdc42 100644
--- a/tests/unit/test__http.py
+++ b/tests/unit/test__http.py
@@ -13,8 +13,8 @@
# limitations under the License.
import unittest
+from unittest import mock
-import mock
import requests
diff --git a/tests/unit/test__job_helpers.py b/tests/unit/test__job_helpers.py
index 012352f4e..10cbefe13 100644
--- a/tests/unit/test__job_helpers.py
+++ b/tests/unit/test__job_helpers.py
@@ -15,14 +15,22 @@
from typing import Any, Dict, Optional
from unittest import mock
+import google.api_core.exceptions
from google.api_core import retry as retries
import pytest
-from google.cloud.bigquery.client import Client
from google.cloud.bigquery import _job_helpers
-from google.cloud.bigquery.job.query import QueryJob, QueryJobConfig
+from google.cloud.bigquery import enums
+from google.cloud.bigquery import retry
+from google.cloud.bigquery.client import Client
+from google.cloud.bigquery.job import copy_ as job_copy
+from google.cloud.bigquery.job import extract as job_extract
+from google.cloud.bigquery.job import load as job_load
+from google.cloud.bigquery.job import query as job_query
from google.cloud.bigquery.query import ConnectionProperty, ScalarQueryParameter
+from .helpers import make_client, make_connection
+
def make_query_request(additional_properties: Optional[Dict[str, Any]] = None):
request = {"useLegacySql": False, "formatOptions": {"useInt64Timestamp": True}}
@@ -54,10 +62,35 @@ def make_query_response(
@pytest.mark.parametrize(
("job_config", "expected"),
(
- (None, make_query_request()),
- (QueryJobConfig(), make_query_request()),
- (
- QueryJobConfig(default_dataset="my-project.my_dataset"),
+ pytest.param(
+ None,
+ make_query_request(),
+ id="job_config=None-default-request",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(),
+ make_query_request(),
+ id="job_config=QueryJobConfig()-default-request",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig.from_api_repr(
+ {
+ "unknownTopLevelProperty": "some-test-value",
+ "query": {
+ "unknownQueryProperty": "some-other-value",
+ },
+ },
+ ),
+ make_query_request(
+ {
+ "unknownTopLevelProperty": "some-test-value",
+ "unknownQueryProperty": "some-other-value",
+ }
+ ),
+ id="job_config-with-unknown-properties-includes-all-properties-in-request",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(default_dataset="my-project.my_dataset"),
make_query_request(
{
"defaultDataset": {
@@ -66,18 +99,25 @@ def make_query_response(
}
}
),
+ id="job_config-with-default_dataset",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(dry_run=True),
+ make_query_request({"dryRun": True}),
+ id="job_config-with-dry_run",
),
- (QueryJobConfig(dry_run=True), make_query_request({"dryRun": True})),
- (
- QueryJobConfig(use_query_cache=False),
+ pytest.param(
+ job_query.QueryJobConfig(use_query_cache=False),
make_query_request({"useQueryCache": False}),
+ id="job_config-with-use_query_cache",
),
- (
- QueryJobConfig(use_legacy_sql=True),
+ pytest.param(
+ job_query.QueryJobConfig(use_legacy_sql=True),
make_query_request({"useLegacySql": True}),
+ id="job_config-with-use_legacy_sql",
),
- (
- QueryJobConfig(
+ pytest.param(
+ job_query.QueryJobConfig(
query_parameters=[
ScalarQueryParameter("named_param1", "STRING", "param-value"),
ScalarQueryParameter("named_param2", "INT64", 123),
@@ -100,9 +140,10 @@ def make_query_response(
],
}
),
+ id="job_config-with-query_parameters-named",
),
- (
- QueryJobConfig(
+ pytest.param(
+ job_query.QueryJobConfig(
query_parameters=[
ScalarQueryParameter(None, "STRING", "param-value"),
ScalarQueryParameter(None, "INT64", 123),
@@ -123,9 +164,10 @@ def make_query_response(
],
}
),
+ id="job_config-with-query_parameters-positional",
),
- (
- QueryJobConfig(
+ pytest.param(
+ job_query.QueryJobConfig(
connection_properties=[
ConnectionProperty(key="time_zone", value="America/Chicago"),
ConnectionProperty(key="session_id", value="abcd-efgh-ijkl-mnop"),
@@ -139,28 +181,67 @@ def make_query_response(
]
}
),
+ id="job_config-with-connection_properties",
),
- (
- QueryJobConfig(labels={"abc": "def"}),
+ pytest.param(
+ job_query.QueryJobConfig(labels={"abc": "def"}),
make_query_request({"labels": {"abc": "def"}}),
+ id="job_config-with-labels",
),
- (
- QueryJobConfig(maximum_bytes_billed=987654),
+ pytest.param(
+ job_query.QueryJobConfig(maximum_bytes_billed=987654),
make_query_request({"maximumBytesBilled": "987654"}),
+ id="job_config-with-maximum_bytes_billed",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(
+ write_incremental_results=True,
+ ),
+ make_query_request({"writeIncrementalResults": True}),
+ id="job_config-with-incremental-results",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(
+ reservation="foo",
+ max_slots=100,
+ ),
+ make_query_request(
+ {
+ "maxSlots": "100",
+ "reservation": "foo",
+ }
+ ),
+ id="job_config-with-reservation-and-slots",
),
),
)
def test__to_query_request(job_config, expected):
- result = _job_helpers._to_query_request(job_config)
+ result = _job_helpers._to_query_request(job_config, query="SELECT 1")
+ expected["query"] = "SELECT 1"
assert result == expected
+@pytest.mark.parametrize(
+ ("job_config", "invalid_key"),
+ (
+ pytest.param(job_copy.CopyJobConfig(), "copy", id="copy"),
+ pytest.param(job_extract.ExtractJobConfig(), "extract", id="extract"),
+ pytest.param(job_load.LoadJobConfig(), "load", id="load"),
+ ),
+)
+def test__to_query_request_raises_for_invalid_config(job_config, invalid_key):
+ with pytest.raises(ValueError, match=f"{repr(invalid_key)} in job_config"):
+ _job_helpers._to_query_request(job_config, query="SELECT 1")
+
+
def test__to_query_job_defaults():
mock_client = mock.create_autospec(Client)
response = make_query_response(
job_id="test-job", project_id="some-project", location="asia-northeast1"
)
- job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response)
+ job: job_query.QueryJob = _job_helpers._to_query_job(
+ mock_client, "query-str", None, response
+ )
assert job.query == "query-str"
assert job._client is mock_client
assert job.job_id == "test-job"
@@ -175,9 +256,9 @@ def test__to_query_job_dry_run():
response = make_query_response(
job_id="test-job", project_id="some-project", location="asia-northeast1"
)
- job_config: QueryJobConfig = QueryJobConfig()
+ job_config: job_query.QueryJobConfig = job_query.QueryJobConfig()
job_config.dry_run = True
- job: QueryJob = _job_helpers._to_query_job(
+ job: job_query.QueryJob = _job_helpers._to_query_job(
mock_client, "query-str", job_config, response
)
assert job.dry_run is True
@@ -186,14 +267,18 @@ def test__to_query_job_dry_run():
@pytest.mark.parametrize(
("completed", "expected_state"),
(
- (True, "DONE"),
+ # Always pending so that we refresh the job state to get the
+ # destination table or job stats in case it's needed.
+ (True, "PENDING"),
(False, "PENDING"),
),
)
def test__to_query_job_sets_state(completed, expected_state):
mock_client = mock.create_autospec(Client)
response = make_query_response(completed=completed)
- job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response)
+ job: job_query.QueryJob = _job_helpers._to_query_job(
+ mock_client, "query-str", None, response
+ )
assert job.state == expected_state
@@ -206,7 +291,9 @@ def test__to_query_job_sets_errors():
{"message": "something else went wrong"},
]
)
- job: QueryJob = _job_helpers._to_query_job(mock_client, "query-str", None, response)
+ job: job_query.QueryJob = _job_helpers._to_query_job(
+ mock_client, "query-str", None, response
+ )
assert len(job.errors) == 2
# If we got back a response instead of an HTTP error status code, most
# likely the job didn't completely fail.
@@ -313,6 +400,599 @@ def test_query_jobs_query_sets_timeout(timeout, expected_timeout):
assert request["timeoutMs"] == expected_timeout
+def test_query_and_wait_uses_jobs_insert():
+ """With unsupported features, call jobs.insert instead of jobs.query."""
+ client = mock.create_autospec(Client)
+ client._call_api.return_value = {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "abc",
+ "location": "response-location",
+ },
+ "query": {
+ "query": "SELECT 1",
+ },
+ # Make sure the job has "started"
+ "status": {"state": "DONE"},
+ "jobComplete": True,
+ }
+ job_config = job_query.QueryJobConfig(
+ destination="dest-project.dest_dset.dest_table",
+ )
+ _job_helpers.query_and_wait(
+ client,
+ query="SELECT 1",
+ location="request-location",
+ project="request-project",
+ job_config=job_config,
+ retry=None,
+ job_retry=None,
+ page_size=None,
+ max_results=None,
+ )
+
+ # We should call jobs.insert since jobs.query doesn't support destination.
+ request_path = "/projects/request-project/jobs"
+ client._call_api.assert_any_call(
+ None, # retry,
+ span_name="BigQuery.job.begin",
+ span_attributes={"path": request_path},
+ job_ref=mock.ANY,
+ method="POST",
+ path=request_path,
+ data={
+ "jobReference": {
+ "jobId": mock.ANY,
+ "projectId": "request-project",
+ "location": "request-location",
+ },
+ "configuration": {
+ "query": {
+ "destinationTable": {
+ "projectId": "dest-project",
+ "datasetId": "dest_dset",
+ "tableId": "dest_table",
+ },
+ "useLegacySql": False,
+ "query": "SELECT 1",
+ }
+ },
+ },
+ timeout=None,
+ )
+
+
+def test_query_and_wait_sets_job_creation_mode():
+ client = mock.create_autospec(Client)
+ client.default_job_creation_mode = "JOB_CREATION_OPTIONAL"
+ client._call_api.return_value = {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "abc",
+ "location": "response-location",
+ },
+ "jobComplete": True,
+ }
+ _job_helpers.query_and_wait(
+ client,
+ query="SELECT 1",
+ location="request-location",
+ project="request-project",
+ job_config=None,
+ retry=None,
+ job_retry=None,
+ page_size=None,
+ max_results=None,
+ )
+
+ # We should only call jobs.query once, no additional row requests needed.
+ request_path = "/projects/request-project/queries"
+ client._call_api.assert_called_once_with(
+ None, # retry
+ span_name="BigQuery.query",
+ span_attributes={"path": request_path},
+ method="POST",
+ path=request_path,
+ data={
+ "query": "SELECT 1",
+ "location": "request-location",
+ "useLegacySql": False,
+ "formatOptions": {
+ "useInt64Timestamp": True,
+ },
+ "requestId": mock.ANY,
+ "jobCreationMode": "JOB_CREATION_OPTIONAL",
+ },
+ timeout=None,
+ )
+
+
+def test_query_and_wait_sets_location():
+ client = mock.create_autospec(Client)
+ client._call_api.return_value = {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "abc",
+ "location": "response-location",
+ },
+ "jobComplete": True,
+ }
+ rows = _job_helpers.query_and_wait(
+ client,
+ query="SELECT 1",
+ location="request-location",
+ project="request-project",
+ job_config=None,
+ retry=None,
+ job_retry=None,
+ page_size=None,
+ max_results=None,
+ )
+ assert rows.location == "response-location"
+
+ # We should only call jobs.query once, no additional row requests needed.
+ request_path = "/projects/request-project/queries"
+ client._call_api.assert_called_once_with(
+ None, # retry
+ span_name="BigQuery.query",
+ span_attributes={"path": request_path},
+ method="POST",
+ path=request_path,
+ data={
+ "query": "SELECT 1",
+ "location": "request-location",
+ "useLegacySql": False,
+ "formatOptions": {
+ "useInt64Timestamp": True,
+ },
+ "requestId": mock.ANY,
+ "jobCreationMode": mock.ANY,
+ },
+ timeout=None,
+ )
+
+
+@pytest.mark.parametrize(
+ ("max_results", "page_size", "expected"),
+ [
+ (10, None, 10),
+ (None, 11, 11),
+ (12, 100, 12),
+ (100, 13, 13),
+ ],
+)
+def test_query_and_wait_sets_max_results(max_results, page_size, expected):
+ client = mock.create_autospec(Client)
+ client.default_job_creation_mode = None
+ client._call_api.return_value = {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "abc",
+ "location": "response-location",
+ },
+ "jobComplete": True,
+ }
+ rows = _job_helpers.query_and_wait(
+ client,
+ query="SELECT 1",
+ location="request-location",
+ project="request-project",
+ job_config=None,
+ retry=None,
+ job_retry=None,
+ page_size=page_size,
+ max_results=max_results,
+ )
+ assert rows.location == "response-location"
+
+ # We should only call jobs.query once, no additional row requests needed.
+ request_path = "/projects/request-project/queries"
+ client._call_api.assert_called_once_with(
+ None, # retry
+ span_name="BigQuery.query",
+ span_attributes={"path": request_path},
+ method="POST",
+ path=request_path,
+ data={
+ "query": "SELECT 1",
+ "location": "request-location",
+ "useLegacySql": False,
+ "formatOptions": {
+ "useInt64Timestamp": True,
+ },
+ "requestId": mock.ANY,
+ "maxResults": expected,
+ },
+ timeout=None,
+ )
+
+
+def test_query_and_wait_caches_completed_query_results_one_page():
+ client = mock.create_autospec(Client)
+ client.default_job_creation_mode = None
+ client._call_api.return_value = {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "abc",
+ "location": "US",
+ },
+ "jobComplete": True,
+ "queryId": "xyz",
+ "schema": {
+ "fields": [
+ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
+ {"name": "age", "type": "INT64", "mode": "NULLABLE"},
+ ],
+ },
+ "rows": [
+ {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
+ {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ ],
+ # Even though totalRows > len(rows), we should use the presence of a
+ # next page token to decide if there are any more pages.
+ "totalRows": 8,
+ }
+ rows = _job_helpers.query_and_wait(
+ client,
+ query="SELECT full_name, age FROM people;",
+ job_config=None,
+ location=None,
+ project="request-project",
+ retry=None,
+ job_retry=None,
+ page_size=None,
+ max_results=None,
+ )
+ rows_list = list(rows)
+ assert rows.project == "response-project"
+ assert rows.job_id == "abc"
+ assert rows.location == "US"
+ assert rows.query_id == "xyz"
+ assert rows.total_rows == 8
+ assert len(rows_list) == 4
+
+ # We should only call jobs.query once, no additional row requests needed.
+ request_path = "/projects/request-project/queries"
+ client._call_api.assert_called_once_with(
+ None, # retry
+ span_name="BigQuery.query",
+ span_attributes={"path": request_path},
+ method="POST",
+ path=request_path,
+ data={
+ "query": "SELECT full_name, age FROM people;",
+ "useLegacySql": False,
+ "formatOptions": {
+ "useInt64Timestamp": True,
+ },
+ "requestId": mock.ANY,
+ },
+ timeout=None,
+ )
+
+
+def test_query_and_wait_caches_completed_query_results_one_page_no_rows():
+ client = mock.create_autospec(Client)
+ client.default_job_creation_mode = None
+ client._call_api.return_value = {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "abc",
+ "location": "US",
+ },
+ "jobComplete": True,
+ "queryId": "xyz",
+ }
+ rows = _job_helpers.query_and_wait(
+ client,
+ query="CREATE TABLE abc;",
+ project="request-project",
+ job_config=None,
+ location=None,
+ retry=None,
+ job_retry=None,
+ page_size=None,
+ max_results=None,
+ )
+ assert rows.project == "response-project"
+ assert rows.job_id == "abc"
+ assert rows.location == "US"
+ assert rows.query_id == "xyz"
+ assert list(rows) == []
+
+ # We should only call jobs.query once, no additional row requests needed.
+ request_path = "/projects/request-project/queries"
+ client._call_api.assert_called_once_with(
+ None, # retry
+ span_name="BigQuery.query",
+ span_attributes={"path": request_path},
+ method="POST",
+ path=request_path,
+ data={
+ "query": "CREATE TABLE abc;",
+ "useLegacySql": False,
+ "formatOptions": {
+ "useInt64Timestamp": True,
+ },
+ "requestId": mock.ANY,
+ },
+ timeout=None,
+ )
+
+
+def test_query_and_wait_caches_completed_query_results_more_pages():
+ client = make_client()
+ conn = client._connection = make_connection(
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "response-job-id",
+ "location": "response-location",
+ },
+ "jobComplete": True,
+ "queryId": "xyz",
+ "schema": {
+ "fields": [
+ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
+ {"name": "age", "type": "INT64", "mode": "NULLABLE"},
+ ],
+ },
+ "rows": [
+ {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
+ {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ ],
+ # Even though totalRows <= len(rows), we should use the presence of a
+ # next page token to decide if there are any more pages.
+ "totalRows": 2,
+ "pageToken": "page-2",
+ },
+ # TODO(swast): This is a case where we can avoid a call to jobs.get,
+ # but currently do so because the RowIterator might need the
+ # destination table, since results aren't fully cached.
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "response-job-id",
+ "location": "response-location",
+ },
+ "status": {"state": "DONE"},
+ },
+ {
+ "rows": [
+ {"f": [{"v": "Pebbles Phlyntstone"}, {"v": "4"}]},
+ {"f": [{"v": "Bamm-Bamm Rhubble"}, {"v": "5"}]},
+ {"f": [{"v": "Joseph Rockhead"}, {"v": "32"}]},
+ {"f": [{"v": "Perry Masonry"}, {"v": "33"}]},
+ ],
+ "totalRows": 3,
+ "pageToken": "page-3",
+ },
+ {
+ "rows": [
+ {"f": [{"v": "Pearl Slaghoople"}, {"v": "53"}]},
+ ],
+ "totalRows": 4,
+ },
+ )
+ rows = _job_helpers.query_and_wait(
+ client,
+ query="SELECT full_name, age FROM people;",
+ project="request-project",
+ job_config=None,
+ location=None,
+ retry=None,
+ job_retry=None,
+ page_size=None,
+ max_results=None,
+ )
+ assert rows.total_rows == 2 # Match the API response.
+ rows_list = list(rows)
+ assert rows.total_rows == 4 # Match the final API response.
+ assert len(rows_list) == 9
+
+ # Start the query.
+ jobs_query_path = "/projects/request-project/queries"
+ conn.api_request.assert_any_call(
+ method="POST",
+ path=jobs_query_path,
+ data={
+ "query": "SELECT full_name, age FROM people;",
+ "useLegacySql": False,
+ "formatOptions": {
+ "useInt64Timestamp": True,
+ },
+ "requestId": mock.ANY,
+ },
+ timeout=None,
+ )
+
+ # Note: There is no get call to
+ # "/projects/response-project/jobs/response-job-id", because fetching job
+ # metadata isn't necessary in this case. The job already completed in
+ # jobs.query and we don't need the full job metadata in query_and_wait.
+
+ # Fetch the remaining two pages.
+ jobs_get_query_results_path = "/projects/response-project/queries/response-job-id"
+ conn.api_request.assert_any_call(
+ timeout=None,
+ method="GET",
+ path=jobs_get_query_results_path,
+ query_params={
+ "pageToken": "page-2",
+ "fields": "jobReference,totalRows,pageToken,rows",
+ "location": "response-location",
+ "formatOptions.useInt64Timestamp": True,
+ },
+ )
+ conn.api_request.assert_any_call(
+ timeout=None,
+ method="GET",
+ path=jobs_get_query_results_path,
+ query_params={
+ "pageToken": "page-3",
+ "fields": "jobReference,totalRows,pageToken,rows",
+ "location": "response-location",
+ "formatOptions.useInt64Timestamp": True,
+ },
+ )
+
+
+def test_query_and_wait_incomplete_query():
+ client = make_client()
+ conn = client._connection = make_connection(
+ # jobs.query
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "response-job-id",
+ "location": "response-location",
+ },
+ "jobComplete": False,
+ },
+ # jobs.get
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "response-job-id",
+ "location": "response-location",
+ },
+ "status": {"state": "RUNNING"},
+ },
+ # jobs.getQueryResults with max_results=0
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "response-job-id",
+ "location": "response-location",
+ },
+ "jobComplete": True,
+ "totalRows": 2,
+ "queryId": "xyz",
+ "schema": {
+ "fields": [
+ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
+ {"name": "age", "type": "INT64", "mode": "NULLABLE"},
+ ],
+ },
+ },
+ # jobs.get
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "response-job-id",
+ "location": "response-location",
+ },
+ "status": {"state": "DONE"},
+ },
+ # jobs.getQueryResults
+ # Note: No more jobs.getQueryResults with max_results=0 because the
+ # previous call to jobs.getQueryResults returned with jobComplete=True.
+ {
+ "rows": [
+ {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
+ {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ ],
+ # Even though totalRows <= len(rows), we should use the presence of a
+ # next page token to decide if there are any more pages.
+ "totalRows": 2,
+ "pageToken": "page-2",
+ },
+ # jobs.getQueryResults
+ {
+ "rows": [
+ {"f": [{"v": "Pearl Slaghoople"}, {"v": "53"}]},
+ ],
+ },
+ )
+ rows = _job_helpers.query_and_wait(
+ client,
+ query="SELECT full_name, age FROM people;",
+ project="request-project",
+ job_config=None,
+ location=None,
+ retry=None,
+ job_retry=None,
+ page_size=None,
+ max_results=None,
+ )
+ rows_list = list(rows)
+ assert rows.total_rows == 2 # Match the API response.
+ assert len(rows_list) == 5
+
+ # Start the query.
+ jobs_query_path = "/projects/request-project/queries"
+ conn.api_request.assert_any_call(
+ method="POST",
+ path=jobs_query_path,
+ data={
+ "query": "SELECT full_name, age FROM people;",
+ "useLegacySql": False,
+ "formatOptions": {
+ "useInt64Timestamp": True,
+ },
+ "requestId": mock.ANY,
+ },
+ timeout=None,
+ )
+
+ # Wait for the query to finish.
+ jobs_get_query_results_path = "/projects/response-project/queries/response-job-id"
+ conn.api_request.assert_any_call(
+ method="GET",
+ path=jobs_get_query_results_path,
+ query_params={
+ # job_query.QueryJob uses getQueryResults to wait for the query to finish.
+ # It avoids fetching the results because:
+ # (1) For large rows this can take a long time, much longer than
+ # our progress bar update frequency.
+ # See: https://github.com/googleapis/python-bigquery/issues/403
+ # (2) Caching the first page of results uses an unexpected increase in memory.
+ # See: https://github.com/googleapis/python-bigquery/issues/394
+ "maxResults": 0,
+ "location": "response-location",
+ },
+ timeout=None,
+ )
+
+ # Fetch the job metadata in case the RowIterator needs the destination table.
+ jobs_get_path = "/projects/response-project/jobs/response-job-id"
+ conn.api_request.assert_any_call(
+ method="GET",
+ path=jobs_get_path,
+ query_params={"projection": "full", "location": "response-location"},
+ timeout=retry.DEFAULT_GET_JOB_TIMEOUT,
+ )
+
+ # Fetch the remaining two pages.
+ conn.api_request.assert_any_call(
+ timeout=None,
+ method="GET",
+ path=jobs_get_query_results_path,
+ query_params={
+ "fields": "jobReference,totalRows,pageToken,rows",
+ "location": "response-location",
+ "formatOptions.useInt64Timestamp": True,
+ },
+ )
+ conn.api_request.assert_any_call(
+ timeout=None,
+ method="GET",
+ path=jobs_get_query_results_path,
+ query_params={
+ "pageToken": "page-2",
+ "fields": "jobReference,totalRows,pageToken,rows",
+ "location": "response-location",
+ "formatOptions.useInt64Timestamp": True,
+ },
+ )
+
+
def test_make_job_id_wo_suffix():
job_id = _job_helpers.make_job_id("job_id")
assert job_id == "job_id"
@@ -335,3 +1015,150 @@ def test_make_job_id_random():
def test_make_job_id_w_job_id_overrides_prefix():
job_id = _job_helpers.make_job_id("job_id", prefix="unused_prefix")
assert job_id == "job_id"
+
+
+@pytest.mark.parametrize(
+ ("job_config", "expected"),
+ (
+ pytest.param(None, True),
+ pytest.param(job_query.QueryJobConfig(), True, id="default"),
+ pytest.param(
+ job_query.QueryJobConfig(use_query_cache=False), True, id="use_query_cache"
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(maximum_bytes_billed=10_000_000),
+ True,
+ id="maximum_bytes_billed",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(clustering_fields=["a", "b", "c"]),
+ False,
+ id="clustering_fields",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(destination="p.d.t"), False, id="destination"
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(
+ destination_encryption_configuration=job_query.EncryptionConfiguration(
+ "key"
+ )
+ ),
+ False,
+ id="destination_encryption_configuration",
+ ),
+ # priority="BATCH" is not supported. See:
+ # https://github.com/googleapis/python-bigquery/issues/1867
+ pytest.param(
+ job_query.QueryJobConfig(
+ priority=enums.QueryPriority.BATCH,
+ ),
+ False,
+ id="priority=BATCH",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(write_incremental_results=True),
+ True,
+ id="write_incremental_results",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(job_timeout_ms=1000),
+ True,
+ id="job_timeout_ms",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(reservation="foo"),
+ True,
+ id="reservation",
+ ),
+ pytest.param(
+ job_query.QueryJobConfig(max_slots=20),
+ True,
+ id="max_slots",
+ ),
+ ),
+)
+def test_supported_by_jobs_query_from_queryjobconfig(
+ job_config: Optional[job_query.QueryJobConfig], expected: bool
+):
+ request_body = _job_helpers._to_query_request(job_config, query="SELECT 1")
+ assert _job_helpers._supported_by_jobs_query(request_body) == expected
+
+
+def test_wait_or_cancel_no_exception():
+ job = mock.create_autospec(job_query.QueryJob, instance=True)
+ expected_rows = object()
+ job.result.return_value = expected_rows
+ retry = retries.Retry()
+
+ rows = _job_helpers._wait_or_cancel(
+ job,
+ api_timeout=123,
+ wait_timeout=456,
+ retry=retry,
+ page_size=789,
+ max_results=101112,
+ )
+
+ job.result.assert_called_once_with(
+ timeout=456,
+ retry=retry,
+ page_size=789,
+ max_results=101112,
+ )
+ assert rows is expected_rows
+
+
+def test_wait_or_cancel_exception_cancels_job():
+ job = mock.create_autospec(job_query.QueryJob, instance=True)
+ job.result.side_effect = google.api_core.exceptions.BadGateway("test error")
+ retry = retries.Retry()
+
+ with pytest.raises(google.api_core.exceptions.BadGateway):
+ _job_helpers._wait_or_cancel(
+ job,
+ api_timeout=123,
+ wait_timeout=456,
+ retry=retry,
+ page_size=789,
+ max_results=101112,
+ )
+
+ job.result.assert_called_once_with(
+ timeout=456,
+ retry=retry,
+ page_size=789,
+ max_results=101112,
+ )
+ job.cancel.assert_called_once_with(
+ timeout=123,
+ retry=retry,
+ )
+
+
+def test_wait_or_cancel_exception_raises_original_exception():
+ job = mock.create_autospec(job_query.QueryJob, instance=True)
+ job.result.side_effect = google.api_core.exceptions.BadGateway("test error")
+ job.cancel.side_effect = google.api_core.exceptions.NotFound("don't raise me")
+ retry = retries.Retry()
+
+ with pytest.raises(google.api_core.exceptions.BadGateway):
+ _job_helpers._wait_or_cancel(
+ job,
+ api_timeout=123,
+ wait_timeout=456,
+ retry=retry,
+ page_size=789,
+ max_results=101112,
+ )
+
+ job.result.assert_called_once_with(
+ timeout=456,
+ retry=retry,
+ page_size=789,
+ max_results=101112,
+ )
+ job.cancel.assert_called_once_with(
+ timeout=123,
+ retry=retry,
+ )
diff --git a/tests/unit/test__job_helpers_retry.py b/tests/unit/test__job_helpers_retry.py
new file mode 100644
index 000000000..3ea4b1aae
--- /dev/null
+++ b/tests/unit/test__job_helpers_retry.py
@@ -0,0 +1,122 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import freezegun
+import google.api_core.exceptions
+from google.api_core import retry as retries
+import pytest
+
+from google.cloud.bigquery import _job_helpers
+
+from . import helpers
+
+
+def test_query_and_wait_retries_job(global_time_lock):
+ with freezegun.freeze_time(auto_tick_seconds=100):
+ conn = helpers.make_connection(
+ google.api_core.exceptions.BadGateway("retry me"),
+ google.api_core.exceptions.InternalServerError("job_retry me"),
+ google.api_core.exceptions.BadGateway("retry me"),
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "abc",
+ "location": "response-location",
+ },
+ "jobComplete": True,
+ "schema": {
+ "fields": [
+ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
+ {"name": "age", "type": "INT64", "mode": "NULLABLE"},
+ ],
+ },
+ "rows": [
+ {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
+ {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ ],
+ },
+ )
+ client = helpers.make_client(project="client-project")
+ client._connection = conn
+ rows = _job_helpers.query_and_wait(
+ client,
+ query="SELECT 1",
+ location="request-location",
+ project="request-project",
+ job_config=None,
+ page_size=None,
+ max_results=None,
+ retry=retries.Retry(
+ lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway),
+ multiplier=1.0,
+ ).with_deadline(
+ 200.0
+ ), # Since auto_tick_seconds is 100, we should get at least 1 retry.
+ job_retry=retries.Retry(
+ lambda exc: isinstance(
+ exc, google.api_core.exceptions.InternalServerError
+ ),
+ multiplier=1.0,
+ ).with_deadline(600.0),
+ )
+ assert len(list(rows)) == 4
+
+ # For this code path, where the query has finished immediately, we should
+ # only be calling the jobs.query API and no other request path.
+ request_path = "/projects/request-project/queries"
+ for call in client._connection.api_request.call_args_list:
+ _, kwargs = call
+ assert kwargs["method"] == "POST"
+ assert kwargs["path"] == request_path
+
+
+def test_query_and_wait_retries_job_times_out(global_time_lock):
+ with freezegun.freeze_time(auto_tick_seconds=100):
+ conn = helpers.make_connection(
+ google.api_core.exceptions.BadGateway("retry me"),
+ google.api_core.exceptions.InternalServerError("job_retry me"),
+ google.api_core.exceptions.BadGateway("retry me"),
+ google.api_core.exceptions.InternalServerError("job_retry me"),
+ )
+ client = helpers.make_client(project="client-project")
+ client._connection = conn
+
+ with pytest.raises(google.api_core.exceptions.RetryError) as exc_info:
+ _job_helpers.query_and_wait(
+ client,
+ query="SELECT 1",
+ location="request-location",
+ project="request-project",
+ job_config=None,
+ page_size=None,
+ max_results=None,
+ retry=retries.Retry(
+ lambda exc: isinstance(exc, google.api_core.exceptions.BadGateway),
+ multiplier=1.0,
+ ).with_deadline(
+ 200.0
+ ), # Since auto_tick_seconds is 100, we should get at least 1 retry.
+ job_retry=retries.Retry(
+ lambda exc: isinstance(
+ exc, google.api_core.exceptions.InternalServerError
+ ),
+ multiplier=1.0,
+ ).with_deadline(400.0),
+ )
+
+ assert isinstance(
+ exc_info.value.cause, google.api_core.exceptions.InternalServerError
+ )
diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py
index 1a3f918eb..bc94f5f54 100644
--- a/tests/unit/test__pandas_helpers.py
+++ b/tests/unit/test__pandas_helpers.py
@@ -16,43 +16,67 @@
import datetime
import decimal
import functools
+import gc
import operator
import queue
+from typing import Union
+from unittest import mock
import warnings
-import pkg_resources
-import mock
+import importlib.metadata as metadata
try:
import pandas
import pandas.api.types
import pandas.testing
-except ImportError: # pragma: NO COVER
+except ImportError:
pandas = None
-import pyarrow
-import pyarrow.types
+try:
+ import pandas_gbq.schema.pandas_to_bigquery
+except ImportError:
+ pandas_gbq = None
try:
import geopandas
-except ImportError: # pragma: NO COVER
+except ImportError:
geopandas = None
import pytest
from google import api_core
-from google.cloud import bigquery_storage
-from google.cloud.bigquery import _helpers
+
+from google.cloud.bigquery import exceptions
+from google.cloud.bigquery import _pyarrow_helpers
+from google.cloud.bigquery import _versions_helpers
from google.cloud.bigquery import schema
+from google.cloud.bigquery._pandas_helpers import determine_requested_streams
+
+pyarrow = _versions_helpers.PYARROW_VERSIONS.try_import()
+if pyarrow:
+ import pyarrow.parquet
+ import pyarrow.types
+
+ _BIGNUMERIC_SUPPORT = True
+else:
+ # Mock out pyarrow when missing, because methods from pyarrow.types are
+ # used in test parameterization.
+ pyarrow = mock.Mock()
+ _BIGNUMERIC_SUPPORT = False
-PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0")
+bigquery_storage = _versions_helpers.BQ_STORAGE_VERSIONS.try_import()
if pandas is not None:
- PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
+ PANDAS_INSTALLED_VERSION = metadata.version("pandas")
else:
- # Set to less than MIN version.
- PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0")
+ PANDAS_INSTALLED_VERSION = "0.0.0"
+
+
+skip_if_no_bignumeric = pytest.mark.skipif(
+ not _BIGNUMERIC_SUPPORT,
+ reason="BIGNUMERIC support requires pyarrow>=3.0.0",
+)
@pytest.fixture
@@ -110,6 +134,7 @@ def all_(*functions):
return functools.partial(do_all, functions)
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_is_datetime():
assert is_datetime(pyarrow.timestamp("us", tz=None))
assert not is_datetime(pyarrow.timestamp("ms", tz=None))
@@ -142,7 +167,12 @@ def test_all_():
("FLOAT", "NULLABLE", pyarrow.types.is_float64),
("FLOAT64", "NULLABLE", pyarrow.types.is_float64),
("NUMERIC", "NULLABLE", is_numeric),
- ("BIGNUMERIC", "NULLABLE", is_bignumeric),
+ pytest.param(
+ "BIGNUMERIC",
+ "NULLABLE",
+ is_bignumeric,
+ marks=skip_if_no_bignumeric,
+ ),
("BOOLEAN", "NULLABLE", pyarrow.types.is_boolean),
("BOOL", "NULLABLE", pyarrow.types.is_boolean),
("TIMESTAMP", "NULLABLE", is_timestamp),
@@ -221,10 +251,11 @@ def test_all_():
"REPEATED",
all_(pyarrow.types.is_list, lambda type_: is_numeric(type_.value_type)),
),
- (
+ pytest.param(
"BIGNUMERIC",
"REPEATED",
all_(pyarrow.types.is_list, lambda type_: is_bignumeric(type_.value_type)),
+ marks=skip_if_no_bignumeric,
),
(
"BOOLEAN",
@@ -280,6 +311,7 @@ def test_all_():
("UNKNOWN_TYPE", "REPEATED", is_none),
],
)
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_type):
field = schema.SchemaField("ignored_name", bq_type, mode=bq_mode)
actual = module_under_test.bq_to_arrow_data_type(field)
@@ -287,6 +319,7 @@ def test_bq_to_arrow_data_type(module_under_test, bq_type, bq_mode, is_correct_t
@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"])
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type):
fields = (
schema.SchemaField("field01", "STRING"),
@@ -316,14 +349,14 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type):
pyarrow.field("field04", pyarrow.int64()),
pyarrow.field("field05", pyarrow.float64()),
pyarrow.field("field06", pyarrow.float64()),
- pyarrow.field("field07", module_under_test.pyarrow_numeric()),
- pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),
+ pyarrow.field("field07", _pyarrow_helpers.pyarrow_numeric()),
+ pyarrow.field("field08", _pyarrow_helpers.pyarrow_bignumeric()),
pyarrow.field("field09", pyarrow.bool_()),
pyarrow.field("field10", pyarrow.bool_()),
- pyarrow.field("field11", module_under_test.pyarrow_timestamp()),
+ pyarrow.field("field11", _pyarrow_helpers.pyarrow_timestamp()),
pyarrow.field("field12", pyarrow.date32()),
- pyarrow.field("field13", module_under_test.pyarrow_time()),
- pyarrow.field("field14", module_under_test.pyarrow_datetime()),
+ pyarrow.field("field13", _pyarrow_helpers.pyarrow_time()),
+ pyarrow.field("field14", _pyarrow_helpers.pyarrow_datetime()),
pyarrow.field("field15", pyarrow.string()),
)
expected = pyarrow.struct(expected)
@@ -334,6 +367,7 @@ def test_bq_to_arrow_data_type_w_struct(module_under_test, bq_type):
@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"])
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
fields = (
schema.SchemaField("field01", "STRING"),
@@ -363,14 +397,14 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
pyarrow.field("field04", pyarrow.int64()),
pyarrow.field("field05", pyarrow.float64()),
pyarrow.field("field06", pyarrow.float64()),
- pyarrow.field("field07", module_under_test.pyarrow_numeric()),
- pyarrow.field("field08", module_under_test.pyarrow_bignumeric()),
+ pyarrow.field("field07", _pyarrow_helpers.pyarrow_numeric()),
+ pyarrow.field("field08", _pyarrow_helpers.pyarrow_bignumeric()),
pyarrow.field("field09", pyarrow.bool_()),
pyarrow.field("field10", pyarrow.bool_()),
- pyarrow.field("field11", module_under_test.pyarrow_timestamp()),
+ pyarrow.field("field11", _pyarrow_helpers.pyarrow_timestamp()),
pyarrow.field("field12", pyarrow.date32()),
- pyarrow.field("field13", module_under_test.pyarrow_time()),
- pyarrow.field("field14", module_under_test.pyarrow_datetime()),
+ pyarrow.field("field13", _pyarrow_helpers.pyarrow_time()),
+ pyarrow.field("field14", _pyarrow_helpers.pyarrow_datetime()),
pyarrow.field("field15", pyarrow.string()),
)
expected_value_type = pyarrow.struct(expected)
@@ -381,6 +415,7 @@ def test_bq_to_arrow_data_type_w_array_struct(module_under_test, bq_type):
assert actual.value_type.equals(expected_value_type)
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test):
fields = (
schema.SchemaField("field1", "STRING"),
@@ -417,7 +452,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test):
decimal.Decimal("999.123456789"),
],
),
- (
+ pytest.param(
"BIGNUMERIC",
[
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
@@ -479,6 +514,7 @@ def test_bq_to_arrow_data_type_w_struct_unknown_subfield(module_under_test):
],
)
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows):
series = pandas.Series(rows, dtype="object")
bq_field = schema.SchemaField("field_name", bq_type)
@@ -513,6 +549,8 @@ def test_bq_to_arrow_array_w_nullable_scalars(module_under_test, bq_type, rows):
],
)
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(PANDAS_INSTALLED_VERSION[0:2] not in ["0.", "1."], reason="")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows):
rows = [pandas.Timestamp(row) for row in rows]
series = pandas.Series(rows)
@@ -523,24 +561,38 @@ def test_bq_to_arrow_array_w_pandas_timestamp(module_under_test, bq_type, rows):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_array_w_arrays(module_under_test):
rows = [[1, 2, 3], [], [4, 5, 6]]
- series = pandas.Series(rows, dtype="object")
+ series = pandas.Series(rows, name="test_col", dtype="object")
bq_field = schema.SchemaField("field_name", "INTEGER", mode="REPEATED")
arrow_array = module_under_test.bq_to_arrow_array(series, bq_field)
roundtrip = arrow_array.to_pylist()
assert rows == roundtrip
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(pyarrow is None, reason="Requires `pyarrow`")
+def test_bq_to_arrow_array_w_conversion_fail(module_under_test): # pragma: NO COVER
+ rows = [[1, 2, 3], [], [4, 5, 6]]
+ series = pandas.Series(rows, name="test_col", dtype="object")
+ bq_field = schema.SchemaField("field_name", "STRING", mode="REPEATED")
+ exc_msg = f"""Error converting Pandas column with name: "{series.name}" and datatype: "{series.dtype}" to an appropriate pyarrow datatype: Array, ListArray, or StructArray"""
+ with pytest.raises(pyarrow.ArrowTypeError, match=exc_msg):
+ module_under_test.bq_to_arrow_array(series, bq_field)
+ raise pyarrow.ArrowTypeError(exc_msg)
+
+
@pytest.mark.parametrize("bq_type", ["RECORD", "record", "STRUCT", "struct"])
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_array_w_structs(module_under_test, bq_type):
rows = [
{"int_col": 123, "string_col": "abc"},
None,
{"int_col": 456, "string_col": "def"},
]
- series = pandas.Series(rows, dtype="object")
+ series = pandas.Series(rows, name="test_col", dtype="object")
bq_field = schema.SchemaField(
"field_name",
bq_type,
@@ -555,6 +607,7 @@ def test_bq_to_arrow_array_w_structs(module_under_test, bq_type):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_array_w_special_floats(module_under_test):
bq_field = schema.SchemaField("field_name", "FLOAT64")
rows = [float("-inf"), float("nan"), float("inf"), None]
@@ -622,6 +675,68 @@ def test_bq_to_arrow_array_w_geography_type_wkb_data(module_under_test):
assert array.to_pylist() == list(series)
+@pytest.mark.parametrize(
+ "bq_schema,expected",
+ [
+ (
+ schema.SchemaField(
+ "field1",
+ "RANGE",
+ range_element_type=schema.FieldElementType("DATE"),
+ mode="NULLABLE",
+ ),
+ pyarrow.struct(
+ [
+ ("start", pyarrow.date32()),
+ ("end", pyarrow.date32()),
+ ]
+ ),
+ ),
+ (
+ schema.SchemaField(
+ "field2",
+ "RANGE",
+ range_element_type=schema.FieldElementType("DATETIME"),
+ mode="NULLABLE",
+ ),
+ pyarrow.struct(
+ [
+ ("start", pyarrow.timestamp("us", tz=None)),
+ ("end", pyarrow.timestamp("us", tz=None)),
+ ]
+ ),
+ ),
+ (
+ schema.SchemaField(
+ "field3",
+ "RANGE",
+ range_element_type=schema.FieldElementType("TIMESTAMP"),
+ mode="NULLABLE",
+ ),
+ pyarrow.struct(
+ [
+ ("start", pyarrow.timestamp("us", tz="UTC")),
+ ("end", pyarrow.timestamp("us", tz="UTC")),
+ ]
+ ),
+ ),
+ ],
+)
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_bq_to_arrow_data_type_w_range(module_under_test, bq_schema, expected):
+ actual = module_under_test.bq_to_arrow_data_type(bq_schema)
+ assert actual.equals(expected)
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_bq_to_arrow_data_type_w_range_no_element(module_under_test):
+ field = schema.SchemaField("field1", "RANGE", mode="NULLABLE")
+ with pytest.raises(ValueError, match="Range element type cannot be None"):
+ module_under_test.bq_to_arrow_data_type(field)
+
+
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_schema_w_unknown_type(module_under_test):
fields = (
schema.SchemaField("field1", "STRING"),
@@ -647,6 +762,7 @@ def test_get_column_or_index_not_found(module_under_test):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_get_column_or_index_with_multiindex_not_found(module_under_test):
dataframe = pandas.DataFrame(
{"column_name": [1, 2, 3, 4, 5, 6]},
@@ -768,47 +884,67 @@ def test_list_columns_and_indexes_with_named_index_same_as_column_name(
assert columns_and_indexes == expected
-@pytest.mark.skipif(
- pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION,
- reason="Requires `pandas version >= 1.0.0` which introduces pandas.NA",
-)
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_dataframe_to_json_generator(module_under_test):
- utcnow = datetime.datetime.utcnow()
- df_data = collections.OrderedDict(
- [
- ("a_series", [pandas.NA, 2, 3, 4]),
- ("b_series", [0.1, float("NaN"), 0.3, 0.4]),
- ("c_series", ["a", "b", pandas.NA, "d"]),
- ("d_series", [utcnow, utcnow, utcnow, pandas.NaT]),
- ("e_series", [True, False, True, None]),
- ]
- )
+ utcnow = datetime.datetime.now(datetime.timezone.utc)
dataframe = pandas.DataFrame(
- df_data, index=pandas.Index([4, 5, 6, 7], name="a_index")
+ {
+ "a_series": [1, 2, 3, 4],
+ "b_series": [0.1, float("NaN"), 0.3, 0.4],
+ "c_series": ["a", "b", pandas.NA, "d"],
+ "d_series": [utcnow, utcnow, utcnow, pandas.NaT],
+ "e_series": [True, False, True, None],
+ # Support nullable dtypes.
+ # https://github.com/googleapis/python-bigquery/issues/1815
+ "boolean_series": pandas.Series(
+ [True, False, pandas.NA, False], dtype="boolean"
+ ),
+ "int64_series": pandas.Series([-1, pandas.NA, -3, -4], dtype="Int64"),
+ }
)
- dataframe = dataframe.astype({"a_series": pandas.Int64Dtype()})
+ # Index is not included, even if it is not the default and has a name.
+ dataframe = dataframe.rename(index=lambda idx: idx + 4)
+ dataframe.index.name = "a_index"
- rows = module_under_test.dataframe_to_json_generator(dataframe)
+ rows = list(module_under_test.dataframe_to_json_generator(dataframe))
expected = [
- {"b_series": 0.1, "c_series": "a", "d_series": utcnow, "e_series": True},
- {"a_series": 2, "c_series": "b", "d_series": utcnow, "e_series": False},
- {"a_series": 3, "b_series": 0.3, "d_series": utcnow, "e_series": True},
- {"a_series": 4, "b_series": 0.4, "c_series": "d"},
+ {
+ "a_series": 1,
+ "b_series": 0.1,
+ "c_series": "a",
+ "d_series": utcnow,
+ "e_series": True,
+ "boolean_series": True,
+ "int64_series": -1,
+ },
+ {
+ "a_series": 2,
+ "c_series": "b",
+ "d_series": utcnow,
+ "e_series": False,
+ "boolean_series": False,
+ },
+ {
+ "a_series": 3,
+ "b_series": 0.3,
+ "d_series": utcnow,
+ "e_series": True,
+ "int64_series": -3,
+ },
+ {
+ "a_series": 4,
+ "b_series": 0.4,
+ "c_series": "d",
+ "boolean_series": False,
+ "int64_series": -4,
+ },
]
- assert list(rows) == expected
+ assert rows == expected
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_dataframe_to_json_generator_repeated_field(module_under_test):
- pytest.importorskip(
- "pandas",
- minversion=str(PANDAS_MINIUM_VERSION),
- reason=(
- f"Requires `pandas version >= {PANDAS_MINIUM_VERSION}` "
- "which introduces pandas.NA"
- ),
- )
-
df_data = [
collections.OrderedDict(
[("repeated_col", [pandas.NA, 2, None, 4]), ("not_repeated_col", "first")]
@@ -891,32 +1027,6 @@ def test_list_columns_and_indexes_with_multiindex(module_under_test):
assert columns_and_indexes == expected
-@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_dataframe_to_bq_schema_dict_sequence(module_under_test):
- df_data = collections.OrderedDict(
- [
- ("str_column", ["hello", "world"]),
- ("int_column", [42, 8]),
- ("bool_column", [True, False]),
- ]
- )
- dataframe = pandas.DataFrame(df_data)
-
- dict_schema = [
- {"name": "str_column", "type": "STRING", "mode": "NULLABLE"},
- {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"},
- ]
-
- returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema)
-
- expected_schema = (
- schema.SchemaField("str_column", "STRING", "NULLABLE"),
- schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
- schema.SchemaField("bool_column", "BOOL", "REQUIRED"),
- )
- assert returned_schema == expected_schema
-
-
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_dataframe_to_arrow_with_multiindex(module_under_test):
bq_schema = (
@@ -984,6 +1094,7 @@ def test_dataframe_to_arrow_with_multiindex(module_under_test):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_dataframe_to_arrow_with_required_fields(module_under_test):
bq_schema = (
schema.SchemaField("field01", "STRING", mode="REQUIRED"),
@@ -1004,30 +1115,41 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test):
)
data = {
- "field01": ["hello", "world"],
- "field02": [b"abd", b"efg"],
- "field03": [1, 2],
- "field04": [3, 4],
- "field05": [1.25, 9.75],
- "field06": [-1.75, -3.5],
- "field07": [decimal.Decimal("1.2345"), decimal.Decimal("6.7891")],
+ "field01": ["hello", None, "world"],
+ "field02": [b"abd", b"efg", b"hij"],
+ "field03": [1, 2, 3],
+ "field04": [4, None, 5],
+ "field05": [1.25, 0.0, 9.75],
+ "field06": [-1.75, None, -3.5],
+ "field07": [
+ decimal.Decimal("1.2345"),
+ decimal.Decimal("6.7891"),
+ -decimal.Decimal("10.111213"),
+ ],
"field08": [
decimal.Decimal("-{d38}.{d38}".format(d38="9" * 38)),
+ None,
decimal.Decimal("{d38}.{d38}".format(d38="9" * 38)),
],
- "field09": [True, False],
- "field10": [False, True],
+ "field09": [True, False, True],
+ "field10": [False, True, None],
"field11": [
datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc),
datetime.datetime(2012, 12, 21, 9, 7, 42, tzinfo=datetime.timezone.utc),
+ datetime.datetime(2022, 7, 14, 23, 59, 59, tzinfo=datetime.timezone.utc),
],
- "field12": [datetime.date(9999, 12, 31), datetime.date(1970, 1, 1)],
- "field13": [datetime.time(23, 59, 59, 999999), datetime.time(12, 0, 0)],
+ "field12": [datetime.date(9999, 12, 31), None, datetime.date(1970, 1, 1)],
+ "field13": [datetime.time(23, 59, 59, 999999), None, datetime.time(12, 0, 0)],
"field14": [
datetime.datetime(1970, 1, 1, 0, 0, 0),
+ None,
datetime.datetime(2012, 12, 21, 9, 7, 42),
],
- "field15": ["POINT(30 10)", "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))"],
+ "field15": [
+ None,
+ "POINT(30 10)",
+ "POLYGON ((30 10, 40 40, 20 40, 10 20, 30 10))",
+ ],
}
dataframe = pandas.DataFrame(data)
@@ -1036,10 +1158,15 @@ def test_dataframe_to_arrow_with_required_fields(module_under_test):
assert len(arrow_schema) == len(bq_schema)
for arrow_field in arrow_schema:
- assert not arrow_field.nullable
+ # Even if the remote schema is REQUIRED, there's a chance there's
+ # local NULL values. Arrow will gladly interpret these NULL values
+ # as non-NULL and give you an arbitrary value. See:
+ # https://github.com/googleapis/python-bigquery/issues/1692
+ assert arrow_field.nullable
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_dataframe_to_arrow_with_unknown_type(module_under_test):
bq_schema = (
schema.SchemaField("field00", "UNKNOWN_TYPE"),
@@ -1072,6 +1199,7 @@ def test_dataframe_to_arrow_with_unknown_type(module_under_test):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_dataframe_to_arrow_dict_sequence_schema(module_under_test):
dict_schema = [
{"name": "field01", "type": "STRING", "mode": "REQUIRED"},
@@ -1086,13 +1214,32 @@ def test_dataframe_to_arrow_dict_sequence_schema(module_under_test):
arrow_schema = arrow_table.schema
expected_fields = [
- pyarrow.field("field01", "string", nullable=False),
+ # Even if the remote schema is REQUIRED, there's a chance there's
+ # local NULL values. Arrow will gladly interpret these NULL values
+ # as non-NULL and give you an arbitrary value. See:
+ # https://github.com/googleapis/python-bigquery/issues/1692
+ pyarrow.field("field01", "string", nullable=True),
pyarrow.field("field02", "bool", nullable=True),
]
assert list(arrow_schema) == expected_fields
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_dataframe_to_parquet_without_pyarrow(module_under_test, monkeypatch):
+ mock_pyarrow_import = mock.Mock()
+ mock_pyarrow_import.side_effect = exceptions.LegacyPyarrowError(
+ "pyarrow not installed"
+ )
+ monkeypatch.setattr(
+ _versions_helpers.PYARROW_VERSIONS, "try_import", mock_pyarrow_import
+ )
+
+ with pytest.raises(exceptions.LegacyPyarrowError):
+ module_under_test.dataframe_to_parquet(pandas.DataFrame(), (), None)
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_dataframe_to_parquet_w_extra_fields(module_under_test):
with pytest.raises(ValueError) as exc_context:
module_under_test.dataframe_to_parquet(
@@ -1104,6 +1251,7 @@ def test_dataframe_to_parquet_w_extra_fields(module_under_test):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_dataframe_to_parquet_w_missing_fields(module_under_test):
with pytest.raises(ValueError) as exc_context:
module_under_test.dataframe_to_parquet(
@@ -1115,6 +1263,7 @@ def test_dataframe_to_parquet_w_missing_fields(module_under_test):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_dataframe_to_parquet_compression_method(module_under_test):
bq_schema = (schema.SchemaField("field00", "STRING"),)
dataframe = pandas.DataFrame({"field00": ["foo", "bar"]})
@@ -1130,11 +1279,150 @@ def test_dataframe_to_parquet_compression_method(module_under_test):
call_args = fake_write_table.call_args
assert call_args is not None
- assert call_args.kwargs.get("compression") == "ZSTD"
+ assert call_args[1].get("compression") == "ZSTD"
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(pandas_gbq is None, reason="Requires `pandas-gbq`")
+def test_dataframe_to_bq_schema_returns_schema_with_pandas_gbq(
+ module_under_test, monkeypatch
+):
+ monkeypatch.setattr(module_under_test, "pandas_gbq", None)
+ dataframe = pandas.DataFrame({"field00": ["foo", "bar"]})
+ got = module_under_test.dataframe_to_bq_schema(dataframe, [])
+ # Don't assert beyond this, since pandas-gbq is now source of truth.
+ assert got is not None
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_dataframe_to_bq_schema_w_named_index(module_under_test, monkeypatch):
+ monkeypatch.setattr(module_under_test, "pandas_gbq", None)
+
+ df_data = collections.OrderedDict(
+ [
+ ("str_column", ["hello", "world"]),
+ ("int_column", [42, 8]),
+ ("bool_column", [True, False]),
+ ]
+ )
+ index = pandas.Index(["a", "b"], name="str_index")
+ dataframe = pandas.DataFrame(df_data, index=index)
+
+ with pytest.warns(FutureWarning, match="pandas-gbq"):
+ returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, [])
+
+ expected_schema = (
+ schema.SchemaField("str_index", "STRING", "NULLABLE"),
+ schema.SchemaField("str_column", "STRING", "NULLABLE"),
+ schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
+ schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"),
+ )
+ assert returned_schema == expected_schema
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_dataframe_to_bq_schema_w_multiindex(module_under_test, monkeypatch):
+ monkeypatch.setattr(module_under_test, "pandas_gbq", None)
+
+ df_data = collections.OrderedDict(
+ [
+ ("str_column", ["hello", "world"]),
+ ("int_column", [42, 8]),
+ ("bool_column", [True, False]),
+ ]
+ )
+ index = pandas.MultiIndex.from_tuples(
+ [
+ ("a", 0, datetime.datetime(1999, 12, 31, 23, 59, 59, 999999)),
+ ("a", 0, datetime.datetime(2000, 1, 1, 0, 0, 0)),
+ ],
+ names=["str_index", "int_index", "dt_index"],
+ )
+ dataframe = pandas.DataFrame(df_data, index=index)
+
+ with pytest.warns(FutureWarning, match="pandas-gbq"):
+ returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, [])
+
+ expected_schema = (
+ schema.SchemaField("str_index", "STRING", "NULLABLE"),
+ schema.SchemaField("int_index", "INTEGER", "NULLABLE"),
+ schema.SchemaField("dt_index", "DATETIME", "NULLABLE"),
+ schema.SchemaField("str_column", "STRING", "NULLABLE"),
+ schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
+ schema.SchemaField("bool_column", "BOOLEAN", "NULLABLE"),
+ )
+ assert returned_schema == expected_schema
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch):
+ monkeypatch.setattr(module_under_test, "pandas_gbq", None)
+
+ df_data = collections.OrderedDict(
+ [
+ ("str_column", ["hello", "world"]),
+ ("int_column", [42, 8]),
+ ("bool_column", [True, False]),
+ ]
+ )
+ dataframe = pandas.DataFrame(df_data)
+
+ dict_schema = [
+ {"name": "str_column", "type": "STRING", "mode": "NULLABLE"},
+ {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"},
+ ]
+
+ with pytest.warns(FutureWarning, match="pandas-gbq"):
+ returned_schema = module_under_test.dataframe_to_bq_schema(
+ dataframe, dict_schema
+ )
+
+ expected_schema = (
+ schema.SchemaField("str_column", "STRING", "NULLABLE"),
+ schema.SchemaField("int_column", "INTEGER", "NULLABLE"),
+ schema.SchemaField("bool_column", "BOOL", "REQUIRED"),
+ )
+ assert returned_schema == expected_schema
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test):
+def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(
+ module_under_test, monkeypatch
+):
+ monkeypatch.setattr(module_under_test, "pandas_gbq", None)
+
+ dataframe = pandas.DataFrame(
+ data=[
+ {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)},
+ {"id": 20, "status": "BAR", "created_at": datetime.date(2018, 9, 12)},
+ ]
+ )
+
+ no_pyarrow_patch = mock.patch(module_under_test.__name__ + ".pyarrow", None)
+
+ with no_pyarrow_patch, warnings.catch_warnings(record=True) as warned:
+ detected_schema = module_under_test.dataframe_to_bq_schema(
+ dataframe, bq_schema=[]
+ )
+
+ assert detected_schema is None
+
+ # a warning should also be issued
+ expected_warnings = [
+ warning for warning in warned if "could not determine" in str(warning).lower()
+ ]
+ assert len(expected_warnings) == 1
+ msg = str(expected_warnings[0])
+ assert "execution_date" in msg and "created_at" in msg
+
+
+@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(
+ module_under_test, monkeypatch
+):
+ monkeypatch.setattr(module_under_test, "pandas_gbq", None)
+
dataframe = pandas.DataFrame(
data=[
{"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)},
@@ -1163,7 +1451,10 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test):
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test, monkeypatch):
+ monkeypatch.setattr(module_under_test, "pandas_gbq", None)
+
dataframe = pandas.DataFrame(
data=[
{"struct_field": {"one": 2}, "status": "FOO"},
@@ -1187,9 +1478,11 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test):
@pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`")
-def test_dataframe_to_bq_schema_geography(module_under_test):
+def test_dataframe_to_bq_schema_geography(module_under_test, monkeypatch):
from shapely import wkt
+ monkeypatch.setattr(module_under_test, "pandas_gbq", None)
+
df = geopandas.GeoDataFrame(
pandas.DataFrame(
dict(
@@ -1200,7 +1493,10 @@ def test_dataframe_to_bq_schema_geography(module_under_test):
),
geometry="geo1",
)
- bq_schema = module_under_test.dataframe_to_bq_schema(df, [])
+
+ with pytest.warns(FutureWarning, match="pandas-gbq"):
+ bq_schema = module_under_test.dataframe_to_bq_schema(df, [])
+
assert bq_schema == (
schema.SchemaField("name", "STRING"),
schema.SchemaField("geo1", "GEOGRAPHY"),
@@ -1249,6 +1545,7 @@ def test__first_array_valid_no_arrays_with_valid_items(module_under_test):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_augment_schema_type_detection_succeeds(module_under_test):
dataframe = pandas.DataFrame(
data=[
@@ -1271,31 +1568,7 @@ def test_augment_schema_type_detection_succeeds(module_under_test):
# set to "datetime64[ns]", and pyarrow converts that to pyarrow.TimestampArray.
# We thus cannot expect to get a DATETIME date when converting back to the
# BigQuery type.
-
- current_schema = (
- schema.SchemaField("bool_field", field_type=None, mode="NULLABLE"),
- schema.SchemaField("int_field", field_type=None, mode="NULLABLE"),
- schema.SchemaField("float_field", field_type=None, mode="NULLABLE"),
- schema.SchemaField("time_field", field_type=None, mode="NULLABLE"),
- schema.SchemaField("timestamp_field", field_type=None, mode="NULLABLE"),
- schema.SchemaField("date_field", field_type=None, mode="NULLABLE"),
- schema.SchemaField("bytes_field", field_type=None, mode="NULLABLE"),
- schema.SchemaField("string_field", field_type=None, mode="NULLABLE"),
- schema.SchemaField("numeric_field", field_type=None, mode="NULLABLE"),
- schema.SchemaField("bignumeric_field", field_type=None, mode="NULLABLE"),
- )
-
- with warnings.catch_warnings(record=True) as warned:
- augmented_schema = module_under_test.augment_schema(dataframe, current_schema)
-
- # there should be no relevant warnings
- unwanted_warnings = [
- warning for warning in warned if "Pyarrow could not" in str(warning)
- ]
- assert not unwanted_warnings
-
- # the augmented schema must match the expected
- expected_schema = (
+ expected_schemas = (
schema.SchemaField("bool_field", field_type="BOOL", mode="NULLABLE"),
schema.SchemaField("int_field", field_type="INT64", mode="NULLABLE"),
schema.SchemaField("float_field", field_type="FLOAT64", mode="NULLABLE"),
@@ -1310,11 +1583,17 @@ def test_augment_schema_type_detection_succeeds(module_under_test):
),
)
- by_name = operator.attrgetter("name")
- assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name)
+ for col_name, expected_schema in zip(dataframe, expected_schemas):
+ with warnings.catch_warnings(record=True) as warned:
+ schema_field = module_under_test._get_schema_by_pyarrow(
+ col_name, dataframe[col_name]
+ )
+ assert warned == []
+ assert schema_field == expected_schema
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_augment_schema_repeated_fields(module_under_test):
dataframe = pandas.DataFrame(
data=[
@@ -1341,30 +1620,20 @@ def test_augment_schema_repeated_fields(module_under_test):
]
)
- current_schema = (
- schema.SchemaField("string_array", field_type=None, mode="NULLABLE"),
- schema.SchemaField("timestamp_array", field_type=None, mode="NULLABLE"),
- schema.SchemaField("datetime_array", field_type=None, mode="NULLABLE"),
- )
-
- with warnings.catch_warnings(record=True) as warned:
- augmented_schema = module_under_test.augment_schema(dataframe, current_schema)
-
- # there should be no relevant warnings
- unwanted_warnings = [
- warning for warning in warned if "Pyarrow could not" in str(warning)
- ]
- assert not unwanted_warnings
-
# the augmented schema must match the expected
- expected_schema = (
+ expected_schemas = (
schema.SchemaField("string_array", field_type="STRING", mode="REPEATED"),
schema.SchemaField("timestamp_array", field_type="TIMESTAMP", mode="REPEATED"),
schema.SchemaField("datetime_array", field_type="DATETIME", mode="REPEATED"),
)
- by_name = operator.attrgetter("name")
- assert sorted(augmented_schema, key=by_name) == sorted(expected_schema, key=by_name)
+ for col_name, expected_schema in zip(dataframe, expected_schemas):
+ with warnings.catch_warnings(record=True) as warned:
+ schema_field = module_under_test._get_schema_by_pyarrow(
+ col_name, dataframe[col_name]
+ )
+ assert warned == []
+ assert schema_field == expected_schema
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
@@ -1383,24 +1652,21 @@ def test_augment_schema_type_detection_fails(module_under_test):
},
]
)
- current_schema = [
- schema.SchemaField("status", field_type="STRING", mode="NULLABLE"),
- schema.SchemaField("struct_field", field_type=None, mode="NULLABLE"),
- schema.SchemaField("struct_field_2", field_type=None, mode="NULLABLE"),
- ]
-
- with warnings.catch_warnings(record=True) as warned:
- augmented_schema = module_under_test.augment_schema(dataframe, current_schema)
- assert augmented_schema is None
+ expected_schemas = (
+ schema.SchemaField("status", field_type="STRING", mode="NULLABLE"),
+ # Could not determine the type of these columns
+ None,
+ None,
+ )
- expected_warnings = [
- warning for warning in warned if "could not determine" in str(warning)
- ]
- assert len(expected_warnings) == 1
- warning_msg = str(expected_warnings[0])
- assert "pyarrow" in warning_msg.lower()
- assert "struct_field" in warning_msg and "struct_field_2" in warning_msg
+ for col_name, expected_schema in zip(dataframe, expected_schemas):
+ with warnings.catch_warnings(record=True) as warned:
+ schema_field = module_under_test._get_schema_by_pyarrow(
+ col_name, dataframe[col_name]
+ )
+ assert warned == []
+ assert schema_field == expected_schema
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
@@ -1408,25 +1674,17 @@ def test_augment_schema_type_detection_fails_array_data(module_under_test):
dataframe = pandas.DataFrame(
data=[{"all_none_array": [None, float("NaN")], "empty_array": []}]
)
- current_schema = [
- schema.SchemaField("all_none_array", field_type=None, mode="NULLABLE"),
- schema.SchemaField("empty_array", field_type=None, mode="NULLABLE"),
- ]
- with warnings.catch_warnings(record=True) as warned:
- augmented_schema = module_under_test.augment_schema(dataframe, current_schema)
-
- assert augmented_schema is None
-
- expected_warnings = [
- warning for warning in warned if "could not determine" in str(warning)
- ]
- assert len(expected_warnings) == 1
- warning_msg = str(expected_warnings[0])
- assert "pyarrow" in warning_msg.lower()
- assert "all_none_array" in warning_msg and "empty_array" in warning_msg
+ for col_name in dataframe:
+ with warnings.catch_warnings(record=True) as warned:
+ schema_field = module_under_test._get_schema_by_pyarrow(
+ col_name, dataframe[col_name]
+ )
+ assert warned == []
+ assert schema_field is None
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_dataframe_to_parquet_dict_sequence_schema(module_under_test):
pandas = pytest.importorskip("pandas")
@@ -1453,17 +1711,22 @@ def test_dataframe_to_parquet_dict_sequence_schema(module_under_test):
schema.SchemaField("field01", "STRING", mode="REQUIRED"),
schema.SchemaField("field02", "BOOL", mode="NULLABLE"),
]
- schema_arg = fake_to_arrow.call_args.args[1]
+ schema_arg = fake_to_arrow.call_args[0][1]
assert schema_arg == expected_schema_arg
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
def test__download_table_bqstorage_stream_includes_read_session(
monkeypatch, module_under_test
):
import google.cloud.bigquery_storage_v1.reader
import google.cloud.bigquery_storage_v1.types
- monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None)
+ monkeypatch.setattr(
+ _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None
+ )
monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0")
bqstorage_client = mock.create_autospec(
bigquery_storage.BigQueryReadClient, instance=True
@@ -1487,7 +1750,8 @@ def test__download_table_bqstorage_stream_includes_read_session(
@pytest.mark.skipif(
- not _helpers.BQ_STORAGE_VERSIONS.is_read_session_optional,
+ bigquery_storage is None
+ or not _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional,
reason="Requires `google-cloud-bigquery-storage` >= 2.6.0",
)
def test__download_table_bqstorage_stream_omits_read_session(
@@ -1496,7 +1760,9 @@ def test__download_table_bqstorage_stream_omits_read_session(
import google.cloud.bigquery_storage_v1.reader
import google.cloud.bigquery_storage_v1.types
- monkeypatch.setattr(_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None)
+ monkeypatch.setattr(
+ _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None
+ )
monkeypatch.setattr(bigquery_storage, "__version__", "2.6.0")
bqstorage_client = mock.create_autospec(
bigquery_storage.BigQueryReadClient, instance=True
@@ -1527,6 +1793,9 @@ def test__download_table_bqstorage_stream_omits_read_session(
(7, {"max_queue_size": None}, 7, 0), # infinite queue size
],
)
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
def test__download_table_bqstorage(
module_under_test,
stream_count,
@@ -1577,6 +1846,100 @@ def fake_download_stream(
assert queue_used.maxsize == expected_maxsize
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test__download_table_bqstorage_shuts_down_workers(
+ monkeypatch,
+ module_under_test,
+):
+ """Regression test for https://github.com/googleapis/python-bigquery/issues/2032
+
+ Make sure that when the top-level iterator goes out of scope (is deleted),
+ the child threads are also stopped.
+ """
+ pytest.importorskip("google.cloud.bigquery_storage_v1")
+ from google.cloud.bigquery import dataset
+ from google.cloud.bigquery import table
+ import google.cloud.bigquery_storage_v1.reader
+ import google.cloud.bigquery_storage_v1.types
+
+ monkeypatch.setattr(
+ _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None
+ )
+ monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0")
+
+ # Create a fake stream with a decent number of rows.
+ arrow_schema = pyarrow.schema(
+ [
+ ("int_col", pyarrow.int64()),
+ ("str_col", pyarrow.string()),
+ ]
+ )
+ arrow_rows = pyarrow.record_batch(
+ [
+ pyarrow.array([0, 1, 2], type=pyarrow.int64()),
+ pyarrow.array(["a", "b", "c"], type=pyarrow.string()),
+ ],
+ schema=arrow_schema,
+ )
+ session = google.cloud.bigquery_storage_v1.types.ReadSession()
+ session.data_format = "ARROW"
+ session.arrow_schema = {"serialized_schema": arrow_schema.serialize().to_pybytes()}
+ session.streams = [
+ google.cloud.bigquery_storage_v1.types.ReadStream(name=name)
+ for name in ("stream/s0", "stream/s1", "stream/s2")
+ ]
+ bqstorage_client = mock.create_autospec(
+ bigquery_storage.BigQueryReadClient, instance=True
+ )
+ reader = mock.create_autospec(
+ google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True
+ )
+ reader.__iter__.return_value = [
+ google.cloud.bigquery_storage_v1.types.ReadRowsResponse(
+ arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()},
+ arrow_record_batch={
+ "serialized_record_batch": arrow_rows.serialize().to_pybytes()
+ },
+ )
+ for _ in range(100)
+ ]
+ reader.rows.return_value = google.cloud.bigquery_storage_v1.reader.ReadRowsIterable(
+ reader, read_session=session
+ )
+ bqstorage_client.read_rows.return_value = reader
+ bqstorage_client.create_read_session.return_value = session
+ table_ref = table.TableReference(
+ dataset.DatasetReference("project-x", "dataset-y"),
+ "table-z",
+ )
+ download_state = module_under_test._DownloadState()
+ assert download_state.started_workers == 0
+ assert download_state.finished_workers == 0
+
+ result_gen = module_under_test._download_table_bqstorage(
+ "some-project",
+ table_ref,
+ bqstorage_client,
+ max_queue_size=1,
+ page_to_item=module_under_test._bqstorage_page_to_arrow,
+ download_state=download_state,
+ )
+
+ result_gen_iter = iter(result_gen)
+ next(result_gen_iter)
+ assert download_state.started_workers == 3
+ assert download_state.finished_workers == 0
+
+ # Stop iteration early and simulate the variables going out of scope
+ # to be doubly sure that the worker threads are supposed to be cleaned up.
+ del result_gen, result_gen_iter
+ gc.collect()
+
+ assert download_state.started_workers == 3
+ assert download_state.finished_workers == 3
+
+
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_download_arrow_row_iterator_unknown_field_type(module_under_test):
fake_page = api_core.page_iterator.Page(
parent=mock.Mock(),
@@ -1612,6 +1975,7 @@ def test_download_arrow_row_iterator_unknown_field_type(module_under_test):
assert col.to_pylist() == [2.2, 22.22, 222.222]
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_download_arrow_row_iterator_known_field_type(module_under_test):
fake_page = api_core.page_iterator.Page(
parent=mock.Mock(),
@@ -1646,6 +2010,7 @@ def test_download_arrow_row_iterator_known_field_type(module_under_test):
assert col.to_pylist() == ["2.2", "22.22", "222.222"]
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_download_arrow_row_iterator_dict_sequence_schema(module_under_test):
fake_page = api_core.page_iterator.Page(
parent=mock.Mock(),
@@ -1712,6 +2077,7 @@ def test_table_data_listpage_to_dataframe_skips_stop_iteration(module_under_test
assert isinstance(dataframe, pandas.DataFrame)
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_field_type_override(module_under_test):
# When loading pandas data, we may need to override the type
# decision based on data contents, because GEOGRAPHY data can be
@@ -1731,6 +2097,23 @@ def test_bq_to_arrow_field_type_override(module_under_test):
)
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
+def test_bq_to_arrow_field_set_repeated_nullable_false(module_under_test):
+ assert (
+ module_under_test.bq_to_arrow_field(
+ schema.SchemaField("name", "STRING", mode="REPEATED")
+ ).nullable
+ is False
+ )
+
+ assert (
+ module_under_test.bq_to_arrow_field(
+ schema.SchemaField("name", "STRING", mode="NULLABLE")
+ ).nullable
+ is True
+ )
+
+
@pytest.mark.parametrize(
"field_type, metadata",
[
@@ -1744,6 +2127,7 @@ def test_bq_to_arrow_field_type_override(module_under_test):
),
],
)
+@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`")
def test_bq_to_arrow_field_metadata(module_under_test, field_type, metadata):
assert (
module_under_test.bq_to_arrow_field(
@@ -1764,3 +2148,32 @@ def test_verify_pandas_imports_no_db_dtypes(module_under_test, monkeypatch):
monkeypatch.setattr(module_under_test, "db_dtypes", None)
with pytest.raises(ValueError, match="Please install the 'db-dtypes' package"):
module_under_test.verify_pandas_imports()
+
+
+@pytest.mark.parametrize(
+ "preserve_order, max_stream_count, expected_requested_streams",
+ [
+ # If preserve_order is set/True, it takes precedence:
+ (True, 10, 1), # use 1
+ (True, None, 1), # use 1
+ # If preserve_order is not set check max_stream_count:
+ (False, 10, 10), # max_stream_count (X) takes precedence
+ (False, None, 0), # Unbounded (0) when both are unset
+ ],
+)
+def test_determine_requested_streams(
+ preserve_order: bool,
+ max_stream_count: Union[int, None],
+ expected_requested_streams: int,
+):
+ """Tests various combinations of preserve_order and max_stream_count."""
+ actual_requested_streams = determine_requested_streams(
+ preserve_order, max_stream_count
+ )
+ assert actual_requested_streams == expected_requested_streams
+
+
+def test_determine_requested_streams_invalid_max_stream_count():
+ """Tests that a ValueError is raised if max_stream_count is negative."""
+ with pytest.raises(ValueError):
+ determine_requested_streams(preserve_order=False, max_stream_count=-1)
diff --git a/tests/unit/test__pyarrow_helpers.py b/tests/unit/test__pyarrow_helpers.py
new file mode 100644
index 000000000..c12a526de
--- /dev/null
+++ b/tests/unit/test__pyarrow_helpers.py
@@ -0,0 +1,46 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+numpy = pytest.importorskip("numpy")
+pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0")
+
+
+@pytest.fixture
+def module_under_test():
+ from google.cloud.bigquery import _pyarrow_helpers
+
+ return _pyarrow_helpers
+
+
+def test_bq_to_arrow_scalars(module_under_test):
+ assert (
+ module_under_test.bq_to_arrow_scalars("BIGNUMERIC")()
+ == module_under_test.pyarrow_bignumeric()
+ )
+ assert (
+ # Normally, we'd prefer JSON type built-in to pyarrow (added in 19.0.0),
+ # but we'd like this to map as closely to the BQ Storage API as
+ # possible, which uses the string() dtype, as JSON support in Arrow
+ # predates JSON support in BigQuery by several years.
+ module_under_test.bq_to_arrow_scalars("JSON")()
+ == pyarrow.string()
+ )
+ assert module_under_test.bq_to_arrow_scalars("UNKNOWN_TYPE") is None
+
+
+def test_arrow_scalar_ids_to_bq(module_under_test):
+ assert module_under_test.arrow_scalar_ids_to_bq(pyarrow.bool_().id) == "BOOL"
+ assert module_under_test.arrow_scalar_ids_to_bq("UNKNOWN_TYPE") is None
diff --git a/tests/unit/test__versions_helpers.py b/tests/unit/test__versions_helpers.py
new file mode 100644
index 000000000..8379c87c1
--- /dev/null
+++ b/tests/unit/test__versions_helpers.py
@@ -0,0 +1,248 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest import mock
+
+import pytest
+
+try:
+ import pyarrow # type: ignore
+except ImportError:
+ pyarrow = None
+
+try:
+ from google.cloud import bigquery_storage # type: ignore
+except ImportError:
+ bigquery_storage = None
+
+try:
+ import pandas # type: ignore
+except ImportError:
+ pandas = None
+
+from google.cloud.bigquery import _versions_helpers
+from google.cloud.bigquery import exceptions
+
+
+@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed")
+def test_try_import_raises_no_error_w_recent_pyarrow():
+ versions = _versions_helpers.PyarrowVersions()
+ with mock.patch("pyarrow.__version__", new="5.0.0"):
+ pyarrow = versions.try_import(raise_if_error=True)
+ assert pyarrow is not None
+
+
+@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed")
+def test_try_import_returns_none_w_legacy_pyarrow():
+ versions = _versions_helpers.PyarrowVersions()
+ with mock.patch("pyarrow.__version__", new="2.0.0"):
+ pyarrow = versions.try_import()
+ assert pyarrow is None
+
+
+@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed")
+def test_try_import_raises_error_w_legacy_pyarrow():
+ versions = _versions_helpers.PyarrowVersions()
+ with mock.patch("pyarrow.__version__", new="2.0.0"):
+ with pytest.raises(exceptions.LegacyPyarrowError):
+ versions.try_import(raise_if_error=True)
+
+
+@pytest.mark.skipif(
+ pyarrow is not None,
+ reason="pyarrow is installed, but this test needs it not to be",
+)
+def test_try_import_raises_error_w_no_pyarrow():
+ versions = _versions_helpers.PyarrowVersions()
+ with pytest.raises(exceptions.LegacyPyarrowError):
+ versions.try_import(raise_if_error=True)
+
+
+@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed")
+def test_installed_pyarrow_version_returns_cached():
+ versions = _versions_helpers.PyarrowVersions()
+ versions._installed_version = object()
+ assert versions.installed_version is versions._installed_version
+
+
+@pytest.mark.skipif(pyarrow is None, reason="pyarrow is not installed")
+def test_installed_pyarrow_version_returns_parsed_version():
+ versions = _versions_helpers.PyarrowVersions()
+ with mock.patch("pyarrow.__version__", new="1.2.3"):
+ version = versions.installed_version
+
+ assert version.major == 1
+ assert version.minor == 2
+ assert version.micro == 3
+
+
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test_raises_no_error_w_recent_bqstorage():
+ with mock.patch("google.cloud.bigquery_storage.__version__", new="2.0.0"):
+ try:
+ bqstorage_versions = _versions_helpers.BQStorageVersions()
+ bqstorage_versions.try_import(raise_if_error=True)
+ except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER
+ raise ("Legacy error raised with a non-legacy dependency version.")
+
+
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test_raises_error_w_legacy_bqstorage():
+ with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"):
+ with pytest.raises(exceptions.LegacyBigQueryStorageError):
+ bqstorage_versions = _versions_helpers.BQStorageVersions()
+ bqstorage_versions.try_import(raise_if_error=True)
+
+
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test_returns_none_with_legacy_bqstorage():
+ with mock.patch("google.cloud.bigquery_storage.__version__", new="1.9.9"):
+ try:
+ bqstorage_versions = _versions_helpers.BQStorageVersions()
+ bq_storage = bqstorage_versions.try_import()
+ except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER
+ raise ("Legacy error raised when raise_if_error == False.")
+ assert bq_storage is None
+
+
+@pytest.mark.skipif(
+ bigquery_storage is not None,
+ reason="Tests behavior when `google-cloud-bigquery-storage` isn't installed",
+)
+def test_returns_none_with_bqstorage_uninstalled():
+ try:
+ bqstorage_versions = _versions_helpers.BQStorageVersions()
+ bq_storage = bqstorage_versions.try_import()
+ except exceptions.LegacyBigQueryStorageError: # pragma: NO COVER
+ raise ("NotFound error raised when raise_if_error == False.")
+ assert bq_storage is None
+
+
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test_raises_error_w_unknown_bqstorage_version():
+ with mock.patch("google.cloud.bigquery_storage", autospec=True) as fake_module:
+ del fake_module.__version__
+ error_pattern = r"version found: 0.0.0"
+ with pytest.raises(exceptions.LegacyBigQueryStorageError, match=error_pattern):
+ bqstorage_versions = _versions_helpers.BQStorageVersions()
+ bqstorage_versions.try_import(raise_if_error=True)
+
+
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test_installed_bqstorage_version_returns_cached():
+ bqstorage_versions = _versions_helpers.BQStorageVersions()
+ bqstorage_versions._installed_version = object()
+ assert bqstorage_versions.installed_version is bqstorage_versions._installed_version
+
+
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test_installed_bqstorage_version_returns_parsed_version():
+ bqstorage_versions = _versions_helpers.BQStorageVersions()
+ with mock.patch("google.cloud.bigquery_storage.__version__", new="1.2.3"):
+ bqstorage_versions = bqstorage_versions.installed_version
+
+ assert bqstorage_versions.major == 1
+ assert bqstorage_versions.minor == 2
+ assert bqstorage_versions.micro == 3
+
+
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test_bqstorage_is_read_session_optional_true():
+ bqstorage_versions = _versions_helpers.BQStorageVersions()
+ with mock.patch("google.cloud.bigquery_storage.__version__", new="2.6.0"):
+ assert bqstorage_versions.is_read_session_optional
+
+
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test_bqstorage_is_read_session_optional_false():
+ bqstorage_versions = _versions_helpers.BQStorageVersions()
+ with mock.patch("google.cloud.bigquery_storage.__version__", new="2.5.0"):
+ assert not bqstorage_versions.is_read_session_optional
+
+
+@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
+@pytest.mark.parametrize("version", ["1.1.5", "2.0.0", "2.1.0"])
+def test_try_import_raises_no_error_w_recent_pandas(version):
+ # Comparing against the minimum allowed pandas version.
+ # As long as the installed version is greater than that, no
+ # error is raised.
+ versions = _versions_helpers.PandasVersions()
+ with mock.patch("pandas.__version__", new=version):
+ try:
+ pandas = versions.try_import(raise_if_error=True)
+ assert pandas is not None
+ # this exception should not fire unless there is something broken
+ # hence the pragma.
+ except exceptions.LegacyPandasError: # pragma: no cover
+ raise ("Legacy error raised with a non-legacy dependency version.")
+
+
+@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
+def test_try_import_returns_none_w_legacy_pandas():
+ versions = _versions_helpers.PandasVersions()
+ with mock.patch("pandas.__version__", new="1.0.0"):
+ pandas = versions.try_import()
+ assert pandas is None
+
+
+@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
+def test_try_import_raises_error_w_legacy_pandas():
+ versions = _versions_helpers.PandasVersions()
+ with mock.patch("pandas.__version__", new="1.0.0"):
+ with pytest.raises(exceptions.LegacyPandasError):
+ versions.try_import(raise_if_error=True)
+
+
+@pytest.mark.skipif(
+ pandas is not None,
+ reason="pandas is installed, but this test needs it not to be",
+)
+def test_try_import_raises_error_w_no_pandas():
+ versions = _versions_helpers.PandasVersions()
+ with pytest.raises(exceptions.LegacyPandasError):
+ versions.try_import(raise_if_error=True)
+
+
+@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
+def test_installed_pandas_version_returns_cached():
+ versions = _versions_helpers.PandasVersions()
+ versions._installed_version = object()
+ assert versions.installed_version is versions._installed_version
+
+
+@pytest.mark.skipif(pandas is None, reason="pandas is not installed")
+def test_installed_pandas_version_returns_parsed_version():
+ versions = _versions_helpers.PandasVersions()
+ with mock.patch("pandas.__version__", new="1.1.0"):
+ version = versions.installed_version
+
+ assert version.major == 1
+ assert version.minor == 1
+ assert version.micro == 0
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
index 30bab8fa9..213f382dc 100644
--- a/tests/unit/test_client.py
+++ b/tests/unit/test_client.py
@@ -16,25 +16,21 @@
import collections
import datetime
import decimal
-import email
import gzip
import http.client
import io
import itertools
import json
import operator
+import os
import unittest
+from unittest import mock
import warnings
-import mock
-import requests
+import packaging
import pytest
-import pkg_resources
+import requests
-try:
- import pandas
-except (ImportError, AttributeError): # pragma: NO COVER
- pandas = None
try:
import opentelemetry
@@ -57,21 +53,18 @@
from google.api_core import client_info
import google.cloud._helpers
from google.cloud import bigquery
-from google.cloud import bigquery_storage
-from google.cloud.bigquery.dataset import DatasetReference
-from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+
+from google.cloud.bigquery.dataset import DatasetReference, Dataset
+from google.cloud.bigquery.enums import UpdateMode, DatasetView
+from google.cloud.bigquery import exceptions
from google.cloud.bigquery import ParquetOptions
+import google.cloud.bigquery.retry
+from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
+import google.cloud.bigquery.table
+from test_utils.imports import maybe_fail_import
from tests.unit.helpers import make_connection
-PANDAS_MINIUM_VERSION = pkg_resources.parse_version("1.0.0")
-
-if pandas is not None:
- PANDAS_INSTALLED_VERSION = pkg_resources.get_distribution("pandas").parsed_version
-else:
- # Set to less than MIN version.
- PANDAS_INSTALLED_VERSION = pkg_resources.parse_version("0.0.0")
-
def _make_credentials():
import google.auth.credentials
@@ -100,7 +93,6 @@ def _make_list_partitons_meta_info(project, dataset_id, table_id, num_rows=0):
class TestClient(unittest.TestCase):
-
PROJECT = "PROJECT"
DS_ID = "DATASET_ID"
TABLE_ID = "TABLE_ID"
@@ -158,8 +150,18 @@ def test_ctor_w_empty_client_options(self):
client._connection.API_BASE_URL, client._connection.DEFAULT_API_ENDPOINT
)
- def test_ctor_w_client_options_dict(self):
+ @mock.patch.dict(os.environ, {"GOOGLE_CLOUD_UNIVERSE_DOMAIN": "foo.com"})
+ def test_ctor_w_only_env_universe(self):
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(
+ project=self.PROJECT,
+ credentials=creds,
+ _http=http,
+ )
+ self.assertEqual(client._connection.API_BASE_URL, "https://bigquery.foo.com")
+ def test_ctor_w_client_options_dict(self):
creds = _make_credentials()
http = object()
client_options = {"api_endpoint": "https://www.foo-googleapis.com"}
@@ -189,6 +191,34 @@ def test_ctor_w_client_options_object(self):
client._connection.API_BASE_URL, "https://www.foo-googleapis.com"
)
+ @pytest.mark.skipif(
+ packaging.version.parse(getattr(google.api_core, "__version__", "0.0.0"))
+ < packaging.version.Version("2.15.0"),
+ reason="universe_domain not supported with google-api-core < 2.15.0",
+ )
+ def test_ctor_w_client_options_universe(self):
+ creds = _make_credentials()
+ http = object()
+ client_options = {"universe_domain": "foo.com"}
+ client = self._make_one(
+ project=self.PROJECT,
+ credentials=creds,
+ _http=http,
+ client_options=client_options,
+ )
+ self.assertEqual(client._connection.API_BASE_URL, "https://bigquery.foo.com")
+
+ def test_ctor_w_job_creation_mode(self):
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(
+ project=self.PROJECT,
+ credentials=creds,
+ _http=http,
+ default_job_creation_mode="foo",
+ )
+ self.assertEqual(client.default_job_creation_mode, "foo")
+
def test_ctor_w_location(self):
from google.cloud.bigquery._http import Connection
@@ -228,31 +258,55 @@ def test_ctor_w_query_job_config(self):
self.assertIsInstance(client._default_query_job_config, QueryJobConfig)
self.assertTrue(client._default_query_job_config.dry_run)
- def test__call_api_applying_custom_retry_on_timeout(self):
- from concurrent.futures import TimeoutError
- from google.cloud.bigquery.retry import DEFAULT_RETRY
+ def test_ctor_w_load_job_config(self):
+ from google.cloud.bigquery._http import Connection
+ from google.cloud.bigquery import LoadJobConfig
creds = _make_credentials()
- client = self._make_one(project=self.PROJECT, credentials=creds)
+ http = object()
+ location = "us-central"
+ job_config = LoadJobConfig()
+ job_config.create_session = True
- api_request_patcher = mock.patch.object(
- client._connection,
- "api_request",
- side_effect=[TimeoutError, "result"],
- )
- retry = DEFAULT_RETRY.with_deadline(1).with_predicate(
- lambda exc: isinstance(exc, TimeoutError)
+ client = self._make_one(
+ project=self.PROJECT,
+ credentials=creds,
+ _http=http,
+ location=location,
+ default_load_job_config=job_config,
)
+ self.assertIsInstance(client._connection, Connection)
+ self.assertIs(client._connection.credentials, creds)
+ self.assertIs(client._connection.http, http)
+ self.assertEqual(client.location, location)
- with api_request_patcher as fake_api_request:
- result = client._call_api(retry, foo="bar")
+ self.assertIsInstance(client._default_load_job_config, LoadJobConfig)
+ self.assertTrue(client._default_load_job_config.create_session)
- self.assertEqual(result, "result")
- self.assertEqual(
- fake_api_request.call_args_list,
- [mock.call(foo="bar"), mock.call(foo="bar")], # was retried once
+ def test__call_api_extra_headers(self):
+ # Note: We test at a lower layer to ensure that extra headers are
+ # populated when we actually make the call in requests.
+ # Arrange
+ http = mock.create_autospec(requests.Session, instance=True)
+ http.is_mtls = False
+ response = mock.create_autospec(requests.Response, instance=True)
+ response.status_code = 200
+ http.request.return_value = response
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+
+ # Act
+ client._connection.extra_headers = {"x-goog-request-reason": "because-friday"}
+ client._call_api(
+ retry=None, method="GET", path="/bigquery/v2/projects/my-proj/jobs/my-job"
)
+ # Assert
+ http.request.assert_called_once()
+ _, kwargs = http.request.call_args
+ headers = kwargs["headers"]
+ assert headers["x-goog-request-reason"] == "because-friday"
+
def test__call_api_span_creator_not_called(self):
from concurrent.futures import TimeoutError
from google.cloud.bigquery.retry import DEFAULT_RETRY
@@ -359,6 +413,31 @@ def test__get_query_results_miss_w_short_timeout(self):
timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT,
)
+ def test__get_query_results_miss_w_default_timeout(self):
+ import google.cloud.bigquery.client
+ from google.cloud.exceptions import NotFound
+
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = make_connection()
+ path = "/projects/other-project/queries/nothere"
+ with self.assertRaises(NotFound):
+ client._get_query_results(
+ "nothere",
+ None,
+ project="other-project",
+ location=self.LOCATION,
+ timeout_ms=500,
+ timeout=object(), # the api core default timeout
+ )
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path=path,
+ query_params={"maxResults": 0, "timeoutMs": 500, "location": self.LOCATION},
+ timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT,
+ )
+
def test__get_query_results_miss_w_client_location(self):
from google.cloud.exceptions import NotFound
@@ -402,6 +481,101 @@ def test__get_query_results_hit(self):
self.assertEqual(query_results.total_rows, 10)
self.assertTrue(query_results.complete)
+ def test__list_rows_from_query_results_w_none_timeout(self):
+ from google.cloud.exceptions import NotFound
+ from google.cloud.bigquery.schema import SchemaField
+
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = make_connection()
+ path = "/projects/project/queries/nothere"
+ iterator = client._list_rows_from_query_results(
+ "nothere",
+ location=None,
+ project="project",
+ schema=[
+ SchemaField("f1", "STRING", mode="REQUIRED"),
+ SchemaField("f2", "INTEGER", mode="REQUIRED"),
+ ],
+ timeout=None,
+ )
+
+ # trigger the iterator to request data
+ with self.assertRaises(NotFound):
+ iterator._get_next_page_response()
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path=path,
+ query_params={
+ "fields": "jobReference,totalRows,pageToken,rows",
+ "location": None,
+ "formatOptions.useInt64Timestamp": True,
+ },
+ timeout=None,
+ )
+
+ def test__list_rows_from_query_results_w_default_timeout(self):
+ import google.cloud.bigquery.client
+ from google.cloud.exceptions import NotFound
+ from google.cloud.bigquery.schema import SchemaField
+
+ creds = _make_credentials()
+ client = self._make_one(self.PROJECT, creds)
+ conn = client._connection = make_connection()
+ path = "/projects/project/queries/nothere"
+ iterator = client._list_rows_from_query_results(
+ "nothere",
+ location=None,
+ project="project",
+ schema=[
+ SchemaField("f1", "STRING", mode="REQUIRED"),
+ SchemaField("f2", "INTEGER", mode="REQUIRED"),
+ ],
+ timeout=object(),
+ )
+
+ # trigger the iterator to request data
+ with self.assertRaises(NotFound):
+ iterator._get_next_page_response()
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path=path,
+ query_params={
+ "fields": "jobReference,totalRows,pageToken,rows",
+ "location": None,
+ "formatOptions.useInt64Timestamp": True,
+ },
+ timeout=google.cloud.bigquery.client._MIN_GET_QUERY_RESULTS_TIMEOUT,
+ )
+
+ def test_default_query_job_config(self):
+ from google.cloud.bigquery import QueryJobConfig
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ self.assertIsNone(client.default_query_job_config)
+
+ job_config = QueryJobConfig()
+ job_config.dry_run = True
+ client.default_query_job_config = job_config
+ self.assertIsInstance(client.default_query_job_config, QueryJobConfig)
+
+ def test_default_load_job_config(self):
+ from google.cloud.bigquery import LoadJobConfig
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ self.assertIsNone(client.default_load_job_config)
+
+ job_config = LoadJobConfig()
+ job_config.create_session = True
+ client.default_load_job_config = job_config
+ self.assertIsInstance(client.default_load_job_config, LoadJobConfig)
+
def test_get_service_account_email(self):
path = "/projects/%s/serviceAccount" % (self.PROJECT,)
creds = _make_credentials()
@@ -439,48 +613,6 @@ def test_get_service_account_email_w_alternate_project(self):
)
self.assertEqual(service_account_email, email)
- def test_get_service_account_email_w_custom_retry(self):
- from google.cloud.bigquery.retry import DEFAULT_RETRY
-
- api_path = "/projects/{}/serviceAccount".format(self.PROJECT)
- creds = _make_credentials()
- http = object()
- client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
-
- resource = {
- "kind": "bigquery#getServiceAccountResponse",
- "email": "bq-123@bigquery-encryption.iam.gserviceaccount.com",
- }
- api_request_patcher = mock.patch.object(
- client._connection,
- "api_request",
- side_effect=[ValueError, resource],
- )
-
- retry = DEFAULT_RETRY.with_deadline(1).with_predicate(
- lambda exc: isinstance(exc, ValueError)
- )
-
- with api_request_patcher as fake_api_request:
- with mock.patch(
- "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
- ) as final_attributes:
- service_account_email = client.get_service_account_email(
- retry=retry, timeout=7.5
- )
-
- final_attributes.assert_called_once_with({"path": api_path}, client, None)
- self.assertEqual(
- service_account_email, "bq-123@bigquery-encryption.iam.gserviceaccount.com"
- )
- self.assertEqual(
- fake_api_request.call_args_list,
- [
- mock.call(method="GET", path=api_path, timeout=7.5),
- mock.call(method="GET", path=api_path, timeout=7.5), # was retried once
- ],
- )
-
def test_dataset_with_specified_project(self):
from google.cloud.bigquery.dataset import DatasetReference
@@ -548,7 +680,7 @@ def test_get_dataset(self):
final_attributes.assert_called_once_with({"path": "/%s" % path}, client, None)
conn.api_request.assert_called_once_with(
- method="GET", path="/%s" % path, timeout=7.5
+ method="GET", path="/%s" % path, timeout=7.5, query_params={}
)
self.assertEqual(dataset.dataset_id, self.DS_ID)
@@ -614,7 +746,75 @@ def test_get_dataset(self):
self.assertEqual(dataset.dataset_id, self.DS_ID)
+ def test_get_dataset_with_dataset_view(self):
+ path = "projects/%s/datasets/%s" % (self.PROJECT, self.DS_ID)
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ resource = {
+ "id": "%s:%s" % (self.PROJECT, self.DS_ID),
+ "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
+ }
+ dataset_ref = DatasetReference(self.PROJECT, self.DS_ID)
+
+ test_cases = [
+ (None, None),
+ (DatasetView.DATASET_VIEW_UNSPECIFIED, "DATASET_VIEW_UNSPECIFIED"),
+ (DatasetView.METADATA, "METADATA"),
+ (DatasetView.ACL, "ACL"),
+ (DatasetView.FULL, "FULL"),
+ ]
+
+ for dataset_view_arg, expected_param_value in test_cases:
+ with self.subTest(
+ dataset_view_arg=dataset_view_arg,
+ expected_param_value=expected_param_value,
+ ):
+ # Re-initialize the connection mock for each sub-test to reset side_effect
+ conn = client._connection = make_connection(resource)
+
+ dataset = client.get_dataset(dataset_ref, dataset_view=dataset_view_arg)
+
+ self.assertEqual(dataset.dataset_id, self.DS_ID)
+
+ if expected_param_value:
+ expected_query_params = {"datasetView": expected_param_value}
+ else:
+ expected_query_params = {}
+
+ conn.api_request.assert_called_once_with(
+ method="GET",
+ path="/%s" % path,
+ timeout=DEFAULT_TIMEOUT,
+ query_params=expected_query_params if expected_query_params else {},
+ )
+
+ def test_get_dataset_with_invalid_dataset_view(self):
+ invalid_view_values = [
+ "INVALID_STRING",
+ 123,
+ 123.45,
+ object(),
+ ]
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ resource = {
+ "id": "%s:%s" % (self.PROJECT, self.DS_ID),
+ "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
+ }
+ conn = client._connection = make_connection(resource)
+ dataset_ref = DatasetReference(self.PROJECT, self.DS_ID)
+
+ for invalid_view_value in invalid_view_values:
+ with self.subTest(invalid_view_value=invalid_view_value):
+ conn.api_request.reset_mock() # Reset mock for each sub-test
+ with self.assertRaises(AttributeError):
+ client.get_dataset(dataset_ref, dataset_view=invalid_view_value)
+
def test_ensure_bqstorage_client_creating_new_instance(self):
+ bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")
+
mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
mock_client_instance = object()
mock_client.return_value = mock_client_instance
@@ -636,7 +836,50 @@ def test_ensure_bqstorage_client_creating_new_instance(self):
client_info=mock.sentinel.client_info,
)
+ def test_ensure_bqstorage_client_missing_dependency(self):
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+
+ def fail_bqstorage_import(name, globals, locals, fromlist, level):
+ # NOTE: *very* simplified, assuming a straightforward absolute import
+ return "bigquery_storage" in name or (
+ fromlist is not None and "bigquery_storage" in fromlist
+ )
+
+ no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import)
+
+ with no_bqstorage, warnings.catch_warnings(record=True) as warned:
+ bqstorage_client = client._ensure_bqstorage_client()
+
+ self.assertIsNone(bqstorage_client)
+ matching_warnings = [
+ warning
+ for warning in warned
+ if "not installed" in str(warning)
+ and "google-cloud-bigquery-storage" in str(warning)
+ ]
+ assert matching_warnings, "Missing dependency warning not raised."
+
+ def test_ensure_bqstorage_client_obsolete_dependency(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+
+ patcher = mock.patch(
+ "google.cloud.bigquery.client._versions_helpers.BQ_STORAGE_VERSIONS.try_import",
+ side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"),
+ )
+ with patcher, warnings.catch_warnings(record=True) as warned:
+ bqstorage_client = client._ensure_bqstorage_client()
+
+ self.assertIsNone(bqstorage_client)
+ matching_warnings = [
+ warning for warning in warned if "BQ Storage too old" in str(warning)
+ ]
+ assert matching_warnings, "Obsolete dependency warning not raised."
+
def test_ensure_bqstorage_client_existing_client_check_passes(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
creds = _make_credentials()
client = self._make_one(project=self.PROJECT, credentials=creds)
mock_storage_client = mock.sentinel.mock_storage_client
@@ -647,6 +890,40 @@ def test_ensure_bqstorage_client_existing_client_check_passes(self):
self.assertIs(bqstorage_client, mock_storage_client)
+ def test_ensure_bqstorage_client_is_none(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ bqstorage_client = None
+
+ assert bqstorage_client is None
+ bqstorage_client = client._ensure_bqstorage_client(
+ bqstorage_client=bqstorage_client,
+ )
+
+ assert isinstance(
+ bqstorage_client, google.cloud.bigquery_storage_v1.BigQueryReadClient
+ )
+
+ def test_ensure_bqstorage_client_existing_client_check_fails(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+ mock_storage_client = mock.sentinel.mock_storage_client
+
+ patcher = mock.patch(
+ "google.cloud.bigquery.client._versions_helpers.BQ_STORAGE_VERSIONS.try_import",
+ side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"),
+ )
+ with patcher, warnings.catch_warnings(record=True) as warned:
+ bqstorage_client = client._ensure_bqstorage_client(mock_storage_client)
+
+ self.assertIsNone(bqstorage_client)
+ matching_warnings = [
+ warning for warning in warned if "BQ Storage too old" in str(warning)
+ ]
+ assert matching_warnings, "Obsolete dependency warning not raised."
+
def test_create_routine_w_minimal_resource(self):
from google.cloud.bigquery.routine import Routine
from google.cloud.bigquery.routine import RoutineReference
@@ -715,8 +992,8 @@ def test_create_routine_w_conflict(self):
timeout=DEFAULT_TIMEOUT,
)
- @unittest.skipIf(opentelemetry is None, "Requires `opentelemetry`")
def test_span_status_is_set(self):
+ pytest.importorskip("opentelemetry")
from google.cloud.bigquery.routine import Routine
tracer_provider = TracerProvider()
@@ -1543,14 +1820,14 @@ def test_set_iam_policy(self):
{"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]},
{"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]},
]
- MASK = "bindings,etag"
+ FIELDS = ("bindings", "etag")
RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS}
policy = Policy()
for binding in BINDINGS:
policy[binding["role"]] = binding["members"]
- BODY = {"policy": policy.to_api_repr(), "updateMask": MASK}
+ BODY = {"policy": policy.to_api_repr(), "updateMask": "bindings,etag"}
creds = _make_credentials()
http = object()
@@ -1561,7 +1838,7 @@ def test_set_iam_policy(self):
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
) as final_attributes:
returned_policy = client.set_iam_policy(
- self.TABLE_REF, policy, updateMask=MASK, timeout=7.5
+ self.TABLE_REF, policy, fields=FIELDS, timeout=7.5
)
final_attributes.assert_called_once_with({"path": PATH}, client, None)
@@ -1573,7 +1850,10 @@ def test_set_iam_policy(self):
self.assertEqual(returned_policy.version, VERSION)
self.assertEqual(dict(returned_policy), dict(policy))
- def test_set_iam_policy_no_mask(self):
+ def test_set_iam_policy_updateMask(self):
+ from google.cloud.bigquery.iam import BIGQUERY_DATA_OWNER_ROLE
+ from google.cloud.bigquery.iam import BIGQUERY_DATA_EDITOR_ROLE
+ from google.cloud.bigquery.iam import BIGQUERY_DATA_VIEWER_ROLE
from google.api_core.iam import Policy
PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % (
@@ -1581,9 +1861,60 @@ def test_set_iam_policy_no_mask(self):
self.DS_ID,
self.TABLE_ID,
)
- RETURNED = {"etag": "foo", "version": 1, "bindings": []}
-
- policy = Policy()
+ ETAG = "foo"
+ VERSION = 1
+ OWNER1 = "user:phred@example.com"
+ OWNER2 = "group:cloud-logs@google.com"
+ EDITOR1 = "domain:google.com"
+ EDITOR2 = "user:phred@example.com"
+ VIEWER1 = "serviceAccount:1234-abcdef@service.example.com"
+ VIEWER2 = "user:phred@example.com"
+ BINDINGS = [
+ {"role": BIGQUERY_DATA_OWNER_ROLE, "members": [OWNER1, OWNER2]},
+ {"role": BIGQUERY_DATA_EDITOR_ROLE, "members": [EDITOR1, EDITOR2]},
+ {"role": BIGQUERY_DATA_VIEWER_ROLE, "members": [VIEWER1, VIEWER2]},
+ ]
+ MASK = "bindings,etag"
+ RETURNED = {"etag": ETAG, "version": VERSION, "bindings": BINDINGS}
+
+ policy = Policy()
+ for binding in BINDINGS:
+ policy[binding["role"]] = binding["members"]
+
+ BODY = {"policy": policy.to_api_repr(), "updateMask": MASK}
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ conn = client._connection = make_connection(RETURNED)
+
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ returned_policy = client.set_iam_policy(
+ self.TABLE_REF, policy, updateMask=MASK, timeout=7.5
+ )
+
+ final_attributes.assert_called_once_with({"path": PATH}, client, None)
+
+ conn.api_request.assert_called_once_with(
+ method="POST", path=PATH, data=BODY, timeout=7.5
+ )
+ self.assertEqual(returned_policy.etag, ETAG)
+ self.assertEqual(returned_policy.version, VERSION)
+ self.assertEqual(dict(returned_policy), dict(policy))
+
+ def test_set_iam_policy_no_mask(self):
+ from google.api_core.iam import Policy
+
+ PATH = "/projects/%s/datasets/%s/tables/%s:setIamPolicy" % (
+ self.PROJECT,
+ self.DS_ID,
+ self.TABLE_ID,
+ )
+ RETURNED = {"etag": "foo", "version": 1, "bindings": []}
+
+ policy = Policy()
BODY = {"policy": policy.to_api_repr()}
creds = _make_credentials()
@@ -1601,6 +1932,19 @@ def test_set_iam_policy_no_mask(self):
method="POST", path=PATH, data=BODY, timeout=7.5
)
+ def test_set_ia_policy_updateMask_and_fields(self):
+ from google.api_core.iam import Policy
+
+ policy = Policy()
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+
+ with pytest.raises(ValueError, match="updateMask"):
+ client.set_iam_policy(
+ self.TABLE_REF, policy, updateMask="bindings", fields=("bindings",)
+ )
+
def test_set_iam_policy_invalid_policy(self):
from google.api_core.iam import Policy
@@ -1694,6 +2038,7 @@ def test_update_dataset(self):
LABELS = {"priority": "high"}
ACCESS = [{"role": "OWNER", "userByEmail": "phred@example.com"}]
EXP = 17
+ RESOURCE_TAGS = {"123456789012/key": "value"}
RESOURCE = {
"datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
"etag": "etag",
@@ -1703,6 +2048,7 @@ def test_update_dataset(self):
"defaultTableExpirationMs": EXP,
"labels": LABELS,
"access": ACCESS,
+ "resourceTags": RESOURCE_TAGS,
}
creds = _make_credentials()
client = self._make_one(project=self.PROJECT, credentials=creds)
@@ -1714,12 +2060,14 @@ def test_update_dataset(self):
ds.default_table_expiration_ms = EXP
ds.labels = LABELS
ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")]
- fields = [
+ ds.resource_tags = RESOURCE_TAGS
+ filter_fields = [
"description",
"friendly_name",
"location",
"labels",
"access_entries",
+ "resource_tags",
]
with mock.patch(
@@ -1727,12 +2075,12 @@ def test_update_dataset(self):
) as final_attributes:
ds2 = client.update_dataset(
ds,
- fields=fields,
+ fields=filter_fields,
timeout=7.5,
)
final_attributes.assert_called_once_with(
- {"path": "/%s" % PATH, "fields": fields}, client, None
+ {"path": "/%s" % PATH, "fields": filter_fields}, client, None
)
conn.api_request.assert_called_once_with(
@@ -1743,21 +2091,112 @@ def test_update_dataset(self):
"location": LOCATION,
"labels": LABELS,
"access": ACCESS,
+ "resourceTags": RESOURCE_TAGS,
},
path="/" + PATH,
timeout=7.5,
+ query_params={},
)
self.assertEqual(ds2.description, ds.description)
self.assertEqual(ds2.friendly_name, ds.friendly_name)
self.assertEqual(ds2.location, ds.location)
self.assertEqual(ds2.labels, ds.labels)
self.assertEqual(ds2.access_entries, ds.access_entries)
+ self.assertEqual(ds2.resource_tags, ds.resource_tags)
# ETag becomes If-Match header.
ds._properties["etag"] = "etag"
client.update_dataset(ds, [])
req = conn.api_request.call_args
self.assertEqual(req[1]["headers"]["If-Match"], "etag")
+ self.assertEqual(req[1].get("query_params"), {})
+
+ def test_update_dataset_w_update_mode(self):
+ PATH = f"projects/{self.PROJECT}/datasets/{self.DS_ID}"
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+
+ DESCRIPTION = "DESCRIPTION"
+ RESOURCE = {
+ "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
+ "etag": "etag",
+ "description": DESCRIPTION,
+ }
+ dataset_ref = DatasetReference(self.PROJECT, self.DS_ID)
+ orig_dataset = Dataset(dataset_ref)
+ orig_dataset.description = DESCRIPTION
+ filter_fields = ["description"]
+
+ test_cases = [
+ (None, None),
+ (UpdateMode.UPDATE_MODE_UNSPECIFIED, "UPDATE_MODE_UNSPECIFIED"),
+ (UpdateMode.UPDATE_METADATA, "UPDATE_METADATA"),
+ (UpdateMode.UPDATE_ACL, "UPDATE_ACL"),
+ (UpdateMode.UPDATE_FULL, "UPDATE_FULL"),
+ ]
+
+ for update_mode_arg, expected_param_value in test_cases:
+ with self.subTest(
+ update_mode_arg=update_mode_arg,
+ expected_param_value=expected_param_value,
+ ):
+ conn = client._connection = make_connection(RESOURCE, RESOURCE)
+
+ new_dataset = client.update_dataset(
+ orig_dataset,
+ fields=filter_fields,
+ update_mode=update_mode_arg,
+ )
+ self.assertEqual(orig_dataset.description, new_dataset.description)
+
+ if expected_param_value:
+ expected_query_params = {"updateMode": expected_param_value}
+ else:
+ expected_query_params = {}
+
+ conn.api_request.assert_called_once_with(
+ method="PATCH",
+ path="/" + PATH,
+ data={"description": DESCRIPTION},
+ timeout=DEFAULT_TIMEOUT,
+ query_params=expected_query_params if expected_query_params else {},
+ )
+
+ def test_update_dataset_w_invalid_update_mode(self):
+ creds = _make_credentials()
+ client = self._make_one(project=self.PROJECT, credentials=creds)
+
+ DESCRIPTION = "DESCRIPTION"
+ resource = {
+ "datasetReference": {"projectId": self.PROJECT, "datasetId": self.DS_ID},
+ "etag": "etag",
+ }
+
+ dataset_ref = DatasetReference(self.PROJECT, self.DS_ID)
+ orig_dataset = Dataset(dataset_ref)
+ orig_dataset.description = DESCRIPTION
+ filter_fields = ["description"] # A non-empty list of fields is required
+
+ # Mock the connection to prevent actual API calls
+ # and to provide a minimal valid response if the call were to proceed.
+ conn = client._connection = make_connection(resource)
+
+ test_cases = [
+ "INVALID_STRING",
+ 123,
+ 123.45,
+ object(),
+ ]
+
+ for invalid_update_mode in test_cases:
+ with self.subTest(invalid_update_mode=invalid_update_mode):
+ conn.api_request.reset_mock() # Reset mock for each sub-test
+ with self.assertRaises(AttributeError):
+ client.update_dataset(
+ orig_dataset,
+ fields=filter_fields,
+ update_mode=invalid_update_mode,
+ )
def test_update_dataset_w_custom_property(self):
# The library should handle sending properties to the API that are not
@@ -1789,6 +2228,7 @@ def test_update_dataset_w_custom_property(self):
data={"newAlphaProperty": "unreleased property"},
path=path,
timeout=DEFAULT_TIMEOUT,
+ query_params={},
)
self.assertEqual(dataset.dataset_id, self.DS_ID)
@@ -1980,6 +2420,7 @@ def test_update_table(self):
"description": description,
"friendlyName": title,
"labels": {"x": "y"},
+ "resourceTags": {"123456789012/key": "value"},
}
)
schema = [
@@ -2003,7 +2444,8 @@ def test_update_table(self):
table.description = description
table.friendly_name = title
table.labels = {"x": "y"}
- fields = ["schema", "description", "friendly_name", "labels"]
+ table.resource_tags = {"123456789012/key": "value"}
+ fields = ["schema", "description", "friendly_name", "labels", "resource_tags"]
with mock.patch(
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
) as final_attributes:
@@ -2035,14 +2477,16 @@ def test_update_table(self):
"description": description,
"friendlyName": title,
"labels": {"x": "y"},
+ "resourceTags": {"123456789012/key": "value"},
}
conn.api_request.assert_called_once_with(
- method="PATCH", data=sent, path="/" + path, timeout=7.5
+ method="PATCH", data=sent, path="/" + path, timeout=7.5, query_params={}
)
self.assertEqual(updated_table.description, table.description)
self.assertEqual(updated_table.friendly_name, table.friendly_name)
self.assertEqual(updated_table.schema, table.schema)
self.assertEqual(updated_table.labels, table.labels)
+ self.assertEqual(updated_table.resource_tags, table.resource_tags)
# ETag becomes If-Match header.
table._properties["etag"] = "etag"
@@ -2090,6 +2534,7 @@ def test_update_table_w_custom_property(self):
path="/%s" % path,
data={"newAlphaProperty": "unreleased property"},
timeout=DEFAULT_TIMEOUT,
+ query_params={},
)
self.assertEqual(
updated_table._properties["newAlphaProperty"], "unreleased property"
@@ -2126,6 +2571,7 @@ def test_update_table_only_use_legacy_sql(self):
path="/%s" % path,
data={"view": {"useLegacySql": True}},
timeout=DEFAULT_TIMEOUT,
+ query_params={},
)
self.assertEqual(updated_table.view_use_legacy_sql, table.view_use_legacy_sql)
@@ -2218,9 +2664,10 @@ def test_update_table_w_query(self):
"schema": schema_resource,
},
timeout=DEFAULT_TIMEOUT,
+ query_params={},
)
- def test_update_table_w_schema_None(self):
+ def test_update_table_w_schema_None_autodetect_schema(self):
# Simulate deleting schema: not sure if back-end will actually
# allow this operation, but the spec says it is optional.
path = "projects/%s/datasets/%s/tables/%s" % (
@@ -2262,7 +2709,9 @@ def test_update_table_w_schema_None(self):
with mock.patch(
"google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
) as final_attributes:
- updated_table = client.update_table(table, ["schema"])
+ updated_table = client.update_table(
+ table, ["schema"], autodetect_schema=True
+ )
final_attributes.assert_called_once_with(
{"path": "/%s" % path, "fields": ["schema"]}, client, None
@@ -2271,9 +2720,10 @@ def test_update_table_w_schema_None(self):
self.assertEqual(len(conn.api_request.call_args_list), 2)
req = conn.api_request.call_args_list[1]
self.assertEqual(req[1]["method"], "PATCH")
- sent = {"schema": None}
+ sent = {"schema": {"fields": None}}
self.assertEqual(req[1]["data"], sent)
self.assertEqual(req[1]["path"], "/%s" % path)
+ self.assertEqual(req[1]["query_params"], {"autodetect_schema": True})
self.assertEqual(len(updated_table.schema), 0)
def test_update_table_delete_property(self):
@@ -2644,17 +3094,21 @@ def _create_job_helper(self, job_config):
http = object()
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
- RESOURCE = {
+ resource = {
+ "jobReference": {"projectId": self.PROJECT, "jobId": "random-id"},
+ "configuration": job_config,
+ }
+ expected = {
"jobReference": {"projectId": self.PROJECT, "jobId": mock.ANY},
"configuration": job_config,
}
- conn = client._connection = make_connection(RESOURCE)
+ conn = client._connection = make_connection(resource)
client.create_job(job_config=job_config)
conn.api_request.assert_called_once_with(
method="POST",
path="/projects/%s/jobs" % self.PROJECT,
- data=RESOURCE,
+ data=expected,
timeout=DEFAULT_TIMEOUT,
)
@@ -2830,6 +3284,7 @@ def test_job_from_resource_unknown_type(self):
def test_get_job_miss_w_explict_project(self):
from google.cloud.exceptions import NotFound
+ from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
OTHER_PROJECT = "OTHER_PROJECT"
JOB_ID = "NONESUCH"
@@ -2844,11 +3299,12 @@ def test_get_job_miss_w_explict_project(self):
method="GET",
path="/projects/OTHER_PROJECT/jobs/NONESUCH",
query_params={"projection": "full"},
- timeout=DEFAULT_TIMEOUT,
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
def test_get_job_miss_w_client_location(self):
from google.cloud.exceptions import NotFound
+ from google.cloud.bigquery.retry import DEFAULT_GET_JOB_TIMEOUT
JOB_ID = "NONESUCH"
creds = _make_credentials()
@@ -2862,7 +3318,7 @@ def test_get_job_miss_w_client_location(self):
method="GET",
path="/projects/client-proj/jobs/NONESUCH",
query_params={"projection": "full", "location": "client-loc"},
- timeout=DEFAULT_TIMEOUT,
+ timeout=DEFAULT_GET_JOB_TIMEOUT,
)
def test_get_job_hit_w_timeout(self):
@@ -3057,7 +3513,7 @@ def test_load_table_from_uri(self):
self.assertEqual(job_config.to_api_repr(), original_config_copy.to_api_repr())
self.assertIsInstance(job, LoadJob)
- self.assertIsInstance(job._configuration, LoadJobConfig)
+ self.assertIsInstance(job.configuration, LoadJobConfig)
self.assertIs(job._client, client)
self.assertEqual(job.job_id, JOB)
self.assertEqual(list(job.source_uris), [SOURCE_URI])
@@ -3179,175 +3635,145 @@ def test_load_table_from_uri_w_invalid_job_config(self):
self.assertIn("Expected an instance of LoadJobConfig", exc.exception.args[0])
- @staticmethod
- def _mock_requests_response(status_code, headers, content=b""):
- return mock.Mock(
- content=content,
- headers=headers,
- status_code=status_code,
- spec=["content", "headers", "status_code"],
- )
-
- def _mock_transport(self, status_code, headers, content=b""):
- fake_transport = mock.Mock(spec=["request"])
- fake_response = self._mock_requests_response(
- status_code, headers, content=content
- )
- fake_transport.request.return_value = fake_response
- return fake_transport
-
- def _initiate_resumable_upload_helper(self, num_retries=None, mtls=False):
- from google.resumable_media.requests import ResumableUpload
- from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE
- from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE
- from google.cloud.bigquery.client import _get_upload_headers
- from google.cloud.bigquery.job import LoadJob
+ def test_load_table_from_uri_w_explicit_job_config(self):
from google.cloud.bigquery.job import LoadJobConfig
- from google.cloud.bigquery.job import SourceFormat
- # Create mocks to be checked for doing transport.
- resumable_url = "http://test.invalid?upload_id=hey-you"
- response_headers = {"location": resumable_url}
- fake_transport = self._mock_transport(http.client.OK, response_headers)
- client = self._make_one(project=self.PROJECT, _http=fake_transport)
- conn = client._connection = make_connection()
- if mtls:
- conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls")
+ JOB = "job_name"
+ DESTINATION = "destination_table"
+ SOURCE_URI = "http://example.com/source.csv"
+ RESOURCE = {
+ "jobReference": {"jobId": JOB, "projectId": self.PROJECT},
+ "configuration": {
+ "load": {
+ "sourceUris": [SOURCE_URI],
+ "destinationTable": {
+ "projectId": self.PROJECT,
+ "datasetId": self.DS_ID,
+ "tableId": DESTINATION,
+ },
+ "createSession": True,
+ "encoding": "UTF-8",
+ }
+ },
+ }
- # Create some mock arguments and call the method under test.
- data = b"goodbye gudbi gootbee"
- stream = io.BytesIO(data)
- config = LoadJobConfig()
- config.source_format = SourceFormat.CSV
- job = LoadJob(None, None, self.TABLE_REF, client, job_config=config)
- metadata = job.to_api_repr()
- upload, transport = client._initiate_resumable_upload(
- stream, metadata, num_retries, None
- )
-
- # Check the returned values.
- self.assertIsInstance(upload, ResumableUpload)
-
- host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com"
- upload_url = (
- f"{host_name}/upload/bigquery/v2/projects/{self.PROJECT}"
- "/jobs?uploadType=resumable"
- )
- self.assertEqual(upload.upload_url, upload_url)
- expected_headers = _get_upload_headers(conn.user_agent)
- self.assertEqual(upload._headers, expected_headers)
- self.assertFalse(upload.finished)
- self.assertEqual(upload._chunk_size, _DEFAULT_CHUNKSIZE)
- self.assertIs(upload._stream, stream)
- self.assertIsNone(upload._total_bytes)
- self.assertEqual(upload._content_type, _GENERIC_CONTENT_TYPE)
- self.assertEqual(upload.resumable_url, resumable_url)
-
- retry_strategy = upload._retry_strategy
- self.assertEqual(retry_strategy.max_sleep, 64.0)
- if num_retries is None:
- self.assertEqual(retry_strategy.max_cumulative_retry, 600.0)
- self.assertIsNone(retry_strategy.max_retries)
- else:
- self.assertIsNone(retry_strategy.max_cumulative_retry)
- self.assertEqual(retry_strategy.max_retries, num_retries)
- self.assertIs(transport, fake_transport)
- # Make sure we never read from the stream.
- self.assertEqual(stream.tell(), 0)
-
- # Check the mocks.
- request_headers = expected_headers.copy()
- request_headers["x-upload-content-type"] = _GENERIC_CONTENT_TYPE
- fake_transport.request.assert_called_once_with(
- "POST",
- upload_url,
- data=json.dumps(metadata).encode("utf-8"),
- headers=request_headers,
- timeout=mock.ANY,
- )
-
- def test__initiate_resumable_upload(self):
- self._initiate_resumable_upload_helper()
-
- def test__initiate_resumable_upload_mtls(self):
- self._initiate_resumable_upload_helper(mtls=True)
-
- def test__initiate_resumable_upload_with_retry(self):
- self._initiate_resumable_upload_helper(num_retries=11)
-
- def _do_multipart_upload_success_helper(
- self, get_boundary, num_retries=None, project=None, mtls=False
- ):
- from google.cloud.bigquery.client import _get_upload_headers
- from google.cloud.bigquery.job import LoadJob
+ creds = _make_credentials()
+ http = object()
+
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ conn = client._connection = make_connection(RESOURCE)
+ destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION)
+
+ job_config = LoadJobConfig()
+ job_config.create_session = True
+ job_config.encoding = "UTF-8"
+ client.load_table_from_uri(
+ SOURCE_URI, destination, job_id=JOB, job_config=job_config
+ )
+
+ # Check that load_table_from_uri actually starts the job.
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path="/projects/%s/jobs" % self.PROJECT,
+ data=RESOURCE,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ def test_load_table_from_uri_w_explicit_job_config_override(self):
from google.cloud.bigquery.job import LoadJobConfig
- from google.cloud.bigquery.job import SourceFormat
- fake_transport = self._mock_transport(http.client.OK, {})
- client = self._make_one(project=self.PROJECT, _http=fake_transport)
- conn = client._connection = make_connection()
- if mtls:
- conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls")
+ JOB = "job_name"
+ DESTINATION = "destination_table"
+ SOURCE_URI = "http://example.com/source.csv"
+ RESOURCE = {
+ "jobReference": {"jobId": JOB, "projectId": self.PROJECT},
+ "configuration": {
+ "load": {
+ "sourceUris": [SOURCE_URI],
+ "destinationTable": {
+ "projectId": self.PROJECT,
+ "datasetId": self.DS_ID,
+ "tableId": DESTINATION,
+ },
+ "createSession": False,
+ "encoding": "ISO-8859-1",
+ }
+ },
+ }
- if project is None:
- project = self.PROJECT
+ creds = _make_credentials()
+ http = object()
+ default_job_config = LoadJobConfig()
+ default_job_config.create_session = True
+ default_job_config.encoding = "ISO-8859-1"
- # Create some mock arguments.
- data = b"Bzzzz-zap \x00\x01\xf4"
- stream = io.BytesIO(data)
- config = LoadJobConfig()
- config.source_format = SourceFormat.CSV
- job = LoadJob(None, None, self.TABLE_REF, client, job_config=config)
- metadata = job.to_api_repr()
- size = len(data)
-
- response = client._do_multipart_upload(
- stream, metadata, size, num_retries, None, project=project
- )
-
- # Check the mocks and the returned value.
- self.assertIs(response, fake_transport.request.return_value)
- self.assertEqual(stream.tell(), size)
- get_boundary.assert_called_once_with()
-
- host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com"
- upload_url = (
- f"{host_name}/upload/bigquery/v2/projects/{project}"
- "/jobs?uploadType=multipart"
- )
- payload = (
- b"--==0==\r\n"
- b"content-type: application/json; charset=UTF-8\r\n\r\n"
- b"%(json_metadata)s"
- b"\r\n"
- b"--==0==\r\n"
- b"content-type: */*\r\n\r\n"
- b"%(data)s"
- b"\r\n"
- b"--==0==--"
- ) % {b"json_metadata": json.dumps(metadata).encode("utf-8"), b"data": data}
-
- headers = _get_upload_headers(conn.user_agent)
- headers["content-type"] = b'multipart/related; boundary="==0=="'
- fake_transport.request.assert_called_once_with(
- "POST", upload_url, data=payload, headers=headers, timeout=mock.ANY
- )
-
- @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==")
- def test__do_multipart_upload(self, get_boundary):
- self._do_multipart_upload_success_helper(get_boundary)
-
- @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==")
- def test__do_multipart_upload_mtls(self, get_boundary):
- self._do_multipart_upload_success_helper(get_boundary, mtls=True)
-
- @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==")
- def test__do_multipart_upload_with_retry(self, get_boundary):
- self._do_multipart_upload_success_helper(get_boundary, num_retries=8)
-
- @mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==")
- def test__do_multipart_upload_with_custom_project(self, get_boundary):
- self._do_multipart_upload_success_helper(get_boundary, project="custom-project")
+ client = self._make_one(
+ project=self.PROJECT,
+ credentials=creds,
+ _http=http,
+ default_load_job_config=default_job_config,
+ )
+ conn = client._connection = make_connection(RESOURCE)
+ destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION)
+
+ job_config = LoadJobConfig()
+ job_config.create_session = False
+ client.load_table_from_uri(
+ SOURCE_URI, destination, job_id=JOB, job_config=job_config
+ )
+
+ # Check that load_table_from_uri actually starts the job.
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path="/projects/%s/jobs" % self.PROJECT,
+ data=RESOURCE,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ def test_load_table_from_uri_w_default_load_config(self):
+ from google.cloud.bigquery.job import LoadJobConfig
+
+ JOB = "job_name"
+ DESTINATION = "destination_table"
+ SOURCE_URI = "http://example.com/source.csv"
+ RESOURCE = {
+ "jobReference": {"jobId": JOB, "projectId": self.PROJECT},
+ "configuration": {
+ "load": {
+ "sourceUris": [SOURCE_URI],
+ "destinationTable": {
+ "projectId": self.PROJECT,
+ "datasetId": self.DS_ID,
+ "tableId": DESTINATION,
+ },
+ "encoding": "ISO-8859-1",
+ }
+ },
+ }
+
+ creds = _make_credentials()
+ http = object()
+ default_job_config = LoadJobConfig()
+ default_job_config.encoding = "ISO-8859-1"
+
+ client = self._make_one(
+ project=self.PROJECT,
+ credentials=creds,
+ _http=http,
+ default_load_job_config=default_job_config,
+ )
+ conn = client._connection = make_connection(RESOURCE)
+ destination = DatasetReference(self.PROJECT, self.DS_ID).table(DESTINATION)
+
+ client.load_table_from_uri(SOURCE_URI, destination, job_id=JOB)
+
+ # Check that load_table_from_uri actually starts the job.
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path="/projects/%s/jobs" % self.PROJECT,
+ data=RESOURCE,
+ timeout=DEFAULT_TIMEOUT,
+ )
def test_copy_table(self):
from google.cloud.bigquery.job import CopyJob
@@ -3563,7 +3989,7 @@ def test_copy_table_w_source_strings(self):
creds = _make_credentials()
http = object()
client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
- client._connection = make_connection({})
+ conn = client._connection = make_connection({})
sources = [
"dataset_wo_proj.some_table",
"other_project.other_dataset.other_table",
@@ -3575,6 +4001,11 @@ def test_copy_table_w_source_strings(self):
job = client.copy_table(sources, destination)
+ # Replace job with the request instead of response so we can verify those properties.
+ _, kwargs = conn.api_request.call_args
+ request = kwargs["data"]
+ job._properties = request
+
expected_sources = [
DatasetReference(client.project, "dataset_wo_proj").table("some_table"),
DatasetReference("other_project", "other_dataset").table("other_table"),
@@ -3651,7 +4082,7 @@ def test_copy_table_w_valid_job_config(self):
data=RESOURCE,
timeout=DEFAULT_TIMEOUT,
)
- self.assertIsInstance(job._configuration, CopyJobConfig)
+ self.assertIsInstance(job.configuration, CopyJobConfig)
# the original config object should not have been modified
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
@@ -4213,7 +4644,7 @@ def test_query_w_api_method_query_and_job_id_fails(self):
client._connection = make_connection({})
with self.assertRaises(TypeError) as exc:
- client.query(query, job_id="abcd", api_method="QUERY")
+ client.query(query, job_id="abcd", api_method="QUERY", job_retry=None)
self.assertIn(
"`job_id` was provided, but the 'QUERY' `api_method` was requested",
exc.exception.args[0],
@@ -4268,7 +4699,11 @@ def test_query_w_explicit_project(self):
conn = client._connection = make_connection(resource)
client.query(
- query, job_id=job_id, project="other-project", location=self.LOCATION
+ query,
+ job_id=job_id,
+ project="other-project",
+ location=self.LOCATION,
+ job_retry=None,
)
# Check that query actually starts the job.
@@ -4327,7 +4762,11 @@ def test_query_w_explicit_job_config(self):
original_config_copy = copy.deepcopy(job_config)
client.query(
- query, job_id=job_id, location=self.LOCATION, job_config=job_config
+ query,
+ job_id=job_id,
+ location=self.LOCATION,
+ job_config=job_config,
+ job_retry=None,
)
# Check that query actually starts the job.
@@ -4378,7 +4817,11 @@ def test_query_preserving_explicit_job_config(self):
original_config_copy = copy.deepcopy(job_config)
client.query(
- query, job_id=job_id, location=self.LOCATION, job_config=job_config
+ query,
+ job_id=job_id,
+ location=self.LOCATION,
+ job_config=job_config,
+ job_retry=None,
)
# Check that query actually starts the job.
@@ -4434,7 +4877,13 @@ def test_query_preserving_explicit_default_job_config(self):
)
conn = client._connection = make_connection(resource)
- client.query(query, job_id=job_id, location=self.LOCATION, job_config=None)
+ client.query(
+ query,
+ job_id=job_id,
+ location=self.LOCATION,
+ job_config=None,
+ job_retry=None,
+ )
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
@@ -4472,7 +4921,11 @@ def test_query_w_invalid_job_config(self):
with self.assertRaises(TypeError) as exc:
client.query(
- query, job_id=job_id, location=self.LOCATION, job_config=job_config
+ query,
+ job_id=job_id,
+ location=self.LOCATION,
+ job_config=job_config,
+ job_retry=None,
)
self.assertIn("Expected an instance of QueryJobConfig", exc.exception.args[0])
@@ -4521,7 +4974,11 @@ def test_query_w_explicit_job_config_override(self):
job_config.default_dataset = None
client.query(
- query, job_id=job_id, location=self.LOCATION, job_config=job_config
+ query,
+ job_id=job_id,
+ location=self.LOCATION,
+ job_config=job_config,
+ job_retry=None,
)
# Check that query actually starts the job.
@@ -4566,7 +5023,7 @@ def test_query_w_client_default_config_no_incoming(self):
)
conn = client._connection = make_connection(resource)
- client.query(query, job_id=job_id, location=self.LOCATION)
+ client.query(query, job_id=job_id, location=self.LOCATION, job_retry=None)
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
@@ -4577,20 +5034,17 @@ def test_query_w_client_default_config_no_incoming(self):
)
def test_query_w_invalid_default_job_config(self):
- job_id = "some-job-id"
- query = "select count(*) from persons"
creds = _make_credentials()
http = object()
default_job_config = object()
- client = self._make_one(
- project=self.PROJECT,
- credentials=creds,
- _http=http,
- default_query_job_config=default_job_config,
- )
with self.assertRaises(TypeError) as exc:
- client.query(query, job_id=job_id, location=self.LOCATION)
+ self._make_one(
+ project=self.PROJECT,
+ credentials=creds,
+ _http=http,
+ default_query_job_config=default_job_config,
+ )
self.assertIn("Expected an instance of QueryJobConfig", exc.exception.args[0])
def test_query_w_client_location(self):
@@ -4611,7 +5065,7 @@ def test_query_w_client_location(self):
)
conn = client._connection = make_connection(resource)
- client.query(query, job_id=job_id, project="other-project")
+ client.query(query, job_id=job_id, project="other-project", job_retry=None)
# Check that query actually starts the job.
conn.api_request.assert_called_once_with(
@@ -4675,7 +5129,7 @@ def test_query_w_udf_resources(self):
config.udf_resources = udf_resources
config.use_legacy_sql = True
- job = client.query(QUERY, job_config=config, job_id=JOB)
+ job = client.query(QUERY, job_config=config, job_id=JOB, job_retry=None)
self.assertIsInstance(job, QueryJob)
self.assertIs(job._client, client)
@@ -4731,7 +5185,7 @@ def test_query_w_query_parameters(self):
config = QueryJobConfig()
config.query_parameters = query_parameters
- job = client.query(QUERY, job_config=config, job_id=JOB)
+ job = client.query(QUERY, job_config=config, job_id=JOB, job_retry=None)
self.assertIsInstance(job, QueryJob)
self.assertIs(job._client, client)
@@ -4774,7 +5228,7 @@ def test_query_job_rpc_fail_w_random_error(self):
)
with job_begin_patcher:
with pytest.raises(Unknown, match="Not sure what went wrong."):
- client.query("SELECT 1;", job_id="123")
+ client.query("SELECT 1;", job_id="123", job_retry=None)
def test_query_job_rpc_fail_w_conflict_job_id_given(self):
from google.api_core.exceptions import Conflict
@@ -4790,7 +5244,7 @@ def test_query_job_rpc_fail_w_conflict_job_id_given(self):
)
with job_begin_patcher:
with pytest.raises(Conflict, match="Job already exists."):
- client.query("SELECT 1;", job_id="123")
+ client.query("SELECT 1;", job_id="123", job_retry=None)
def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self):
from google.api_core.exceptions import Conflict
@@ -4806,14 +5260,46 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self):
QueryJob, "_begin", side_effect=job_create_error
)
get_job_patcher = mock.patch.object(
- client, "get_job", side_effect=DataLoss("we lost yor job, sorry")
+ client, "get_job", side_effect=DataLoss("we lost your job, sorry")
)
with job_begin_patcher, get_job_patcher:
- # If get job request fails, the original exception should be raised.
- with pytest.raises(Conflict, match="Job already exists."):
+ # If get job request fails but supposedly there does exist a job
+ # with this ID already, raise the exception explaining why we
+ # couldn't recover the job.
+ with pytest.raises(DataLoss, match="we lost your job, sorry"):
client.query("SELECT 1;", job_id=None)
+ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails_no_retries(self):
+ from google.api_core.exceptions import Conflict
+ from google.api_core.exceptions import DataLoss
+ from google.cloud.bigquery.job import QueryJob
+
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+
+ job_create_error = Conflict("Job already exists.")
+ job_begin_patcher = mock.patch.object(
+ QueryJob, "_begin", side_effect=job_create_error
+ )
+ get_job_patcher = mock.patch.object(
+ client, "get_job", side_effect=DataLoss("we lost your job, sorry")
+ )
+
+ with job_begin_patcher, get_job_patcher:
+ # If get job request fails but supposedly there does exist a job
+ # with this ID already, raise the exception explaining why we
+ # couldn't recover the job.
+ with pytest.raises(DataLoss, match="we lost your job, sorry"):
+ client.query(
+ "SELECT 1;",
+ job_id=None,
+ # Explicitly test with no retries to make sure those branches are covered.
+ retry=None,
+ job_retry=None,
+ )
+
def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self):
from google.api_core.exceptions import Conflict
from google.cloud.bigquery.job import QueryJob
@@ -4835,6 +5321,282 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self):
assert result is mock.sentinel.query_job
+ def test_query_and_wait_defaults(self):
+ query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`"
+ jobs_query_response = {
+ "jobComplete": True,
+ "schema": {
+ "fields": [
+ {
+ "name": "f0_",
+ "type": "INTEGER",
+ "mode": "NULLABLE",
+ },
+ ],
+ },
+ "totalRows": "1",
+ "rows": [{"f": [{"v": "5552452"}]}],
+ "queryId": "job_abcDEF_",
+ "totalBytesProcessed": 1234,
+ "totalSlotMs": 5678,
+ "creationTime": "1437767599006",
+ "startTime": "1437767600007",
+ "endTime": "1437767601008",
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ conn = client._connection = make_connection(jobs_query_response)
+
+ rows = client.query_and_wait(query)
+
+ self.assertIsInstance(rows, google.cloud.bigquery.table.RowIterator)
+ self.assertEqual(rows.query_id, "job_abcDEF_")
+ self.assertEqual(rows.total_rows, 1)
+ # No job reference in the response should be OK for completed query.
+ self.assertIsNone(rows.job_id)
+ self.assertIsNone(rows.project)
+ self.assertIsNone(rows.location)
+ self.assertEqual(rows.query, query)
+ self.assertEqual(rows.total_bytes_processed, 1234)
+ self.assertEqual(rows.slot_millis, 5678)
+ self.assertEqual(rows.created.timestamp() * 1000, 1437767599006)
+ self.assertEqual(rows.started.timestamp() * 1000, 1437767600007)
+ self.assertEqual(rows.ended.timestamp() * 1000, 1437767601008)
+
+ # Verify the request we send is to jobs.query.
+ conn.api_request.assert_called_once()
+ _, req = conn.api_request.call_args
+ self.assertEqual(req["method"], "POST")
+ self.assertEqual(req["path"], "/projects/PROJECT/queries")
+ self.assertEqual(req["timeout"], DEFAULT_TIMEOUT)
+ sent = req["data"]
+ self.assertEqual(sent["query"], query)
+ self.assertFalse(sent["useLegacySql"])
+
+ def test_query_and_wait_w_default_query_job_config(self):
+ from google.cloud.bigquery import job
+
+ query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`"
+ jobs_query_response = {
+ "jobComplete": True,
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(
+ project=self.PROJECT,
+ credentials=creds,
+ _http=http,
+ default_query_job_config=job.QueryJobConfig(
+ labels={
+ "default-label": "default-value",
+ },
+ ),
+ )
+ conn = client._connection = make_connection(jobs_query_response)
+
+ _ = client.query_and_wait(query)
+
+ # Verify the request we send is to jobs.query.
+ conn.api_request.assert_called_once()
+ _, req = conn.api_request.call_args
+ self.assertEqual(req["method"], "POST")
+ self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries")
+ sent = req["data"]
+ self.assertEqual(sent["labels"], {"default-label": "default-value"})
+
+ def test_query_and_wait_w_job_config(self):
+ from google.cloud.bigquery import job
+
+ query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`"
+ jobs_query_response = {
+ "jobComplete": True,
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(
+ project=self.PROJECT,
+ credentials=creds,
+ _http=http,
+ )
+ conn = client._connection = make_connection(jobs_query_response)
+
+ _ = client.query_and_wait(
+ query,
+ job_config=job.QueryJobConfig(
+ labels={
+ "job_config-label": "job_config-value",
+ },
+ ),
+ )
+
+ # Verify the request we send is to jobs.query.
+ conn.api_request.assert_called_once()
+ _, req = conn.api_request.call_args
+ self.assertEqual(req["method"], "POST")
+ self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries")
+ sent = req["data"]
+ self.assertEqual(sent["labels"], {"job_config-label": "job_config-value"})
+
+ def test_query_and_wait_w_location(self):
+ query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`"
+ jobs_query_response = {
+ "jobComplete": True,
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ conn = client._connection = make_connection(jobs_query_response)
+
+ _ = client.query_and_wait(query, location="not-the-client-location")
+
+ # Verify the request we send is to jobs.query.
+ conn.api_request.assert_called_once()
+ _, req = conn.api_request.call_args
+ self.assertEqual(req["method"], "POST")
+ self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries")
+ sent = req["data"]
+ self.assertEqual(sent["location"], "not-the-client-location")
+
+ def test_query_and_wait_w_max_results(self):
+ query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`"
+ jobs_query_response = {
+ "jobComplete": True,
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ conn = client._connection = make_connection(jobs_query_response)
+
+ _ = client.query_and_wait(query, max_results=11)
+
+ # Verify the request we send is to jobs.query.
+ conn.api_request.assert_called_once()
+ _, req = conn.api_request.call_args
+ self.assertEqual(req["method"], "POST")
+ self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries")
+ sent = req["data"]
+ self.assertTrue(sent["formatOptions"]["useInt64Timestamp"])
+ self.assertTrue(sent["maxResults"], 11)
+
+ def test_query_and_wait_w_page_size(self):
+ query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`"
+ jobs_query_response = {
+ "jobComplete": True,
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ conn = client._connection = make_connection(jobs_query_response)
+
+ _ = client.query_and_wait(query, page_size=11)
+
+ # Verify the request we send is to jobs.query.
+ conn.api_request.assert_called_once()
+ _, req = conn.api_request.call_args
+ self.assertEqual(req["method"], "POST")
+ self.assertEqual(req["path"], f"/projects/{self.PROJECT}/queries")
+ sent = req["data"]
+ self.assertTrue(sent["formatOptions"]["useInt64Timestamp"])
+ self.assertTrue(sent["maxResults"], 11)
+
+ def test_query_and_wait_w_page_size_multiple_requests(self):
+ """
+ For queries that last longer than the intial (about 10s) call to
+ jobs.query, we should still pass through the page size to the
+ subsequent calls to jobs.getQueryResults.
+
+ See internal issue 344008814.
+ """
+ query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`"
+ job_reference = {
+ "projectId": "my-jobs-project",
+ "location": "my-jobs-location",
+ "jobId": "my-jobs-id",
+ }
+ jobs_query_response = {
+ "jobComplete": False,
+ "jobReference": job_reference,
+ }
+ jobs_get_response = {
+ "jobReference": job_reference,
+ "status": {"state": "DONE"},
+ }
+ get_query_results_response = {
+ "jobComplete": True,
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ conn = client._connection = make_connection(
+ jobs_query_response,
+ jobs_get_response,
+ get_query_results_response,
+ )
+
+ _ = client.query_and_wait(query, page_size=11)
+
+ conn.api_request.assert_has_calls(
+ [
+ # Verify the request we send is to jobs.query.
+ mock.call(
+ method="POST",
+ path=f"/projects/{self.PROJECT}/queries",
+ data={
+ "useLegacySql": False,
+ "query": query,
+ "formatOptions": {"useInt64Timestamp": True},
+ "maxResults": 11,
+ "requestId": mock.ANY,
+ },
+ timeout=None,
+ ),
+ # jobs.get: Check if the job has finished.
+ mock.call(
+ method="GET",
+ path="/projects/my-jobs-project/jobs/my-jobs-id",
+ query_params={
+ "projection": "full",
+ "location": "my-jobs-location",
+ },
+ timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
+ ),
+ # jobs.getQueryResults: wait for the query / fetch first page
+ mock.call(
+ method="GET",
+ path="/projects/my-jobs-project/queries/my-jobs-id",
+ query_params={
+ # We should still pass through the page size to the
+ # subsequent calls to jobs.getQueryResults.
+ #
+ # See internal issue 344008814.
+ "maxResults": 11,
+ "formatOptions.useInt64Timestamp": True,
+ "location": "my-jobs-location",
+ },
+ timeout=None,
+ ),
+ ]
+ )
+
+ def test_query_and_wait_w_project(self):
+ query = "select count(*) from `bigquery-public-data.usa_names.usa_1910_2013`"
+ jobs_query_response = {
+ "jobComplete": True,
+ }
+ creds = _make_credentials()
+ http = object()
+ client = self._make_one(project=self.PROJECT, credentials=creds, _http=http)
+ conn = client._connection = make_connection(jobs_query_response)
+
+ _ = client.query_and_wait(query, project="not-the-client-project")
+
+ # Verify the request we send is to jobs.query.
+ conn.api_request.assert_called_once()
+ _, req = conn.api_request.call_args
+ self.assertEqual(req["method"], "POST")
+ self.assertEqual(req["path"], "/projects/not-the-client-project/queries")
+
def test_insert_rows_w_timeout(self):
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
@@ -4887,7 +5649,7 @@ def test_insert_rows_w_schema(self):
from google.cloud.bigquery.schema import SchemaField
WHEN_TS = 1437767599.006
- WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC)
+ WHEN = datetime.datetime.fromtimestamp(WHEN_TS, UTC).replace(tzinfo=UTC)
PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % (
self.PROJECT,
self.DS_ID,
@@ -4948,7 +5710,7 @@ def test_insert_rows_w_list_of_dictionaries(self):
from google.cloud.bigquery.table import Table
WHEN_TS = 1437767599.006
- WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS).replace(tzinfo=UTC)
+ WHEN = datetime.datetime.fromtimestamp(WHEN_TS, UTC).replace(tzinfo=UTC)
PATH = "projects/%s/datasets/%s/tables/%s/insertAll" % (
self.PROJECT,
self.DS_ID,
@@ -5131,6 +5893,7 @@ def _row_data(row):
)
def test_insert_rows_w_repeated_fields(self):
+ from google.cloud._helpers import UTC
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
@@ -5160,12 +5923,8 @@ def test_insert_rows_w_repeated_fields(self):
(
12,
[
- datetime.datetime(
- 2018, 12, 1, 12, 0, 0, tzinfo=datetime.timezone.utc
- ),
- datetime.datetime(
- 2018, 12, 1, 13, 0, 0, tzinfo=datetime.timezone.utc
- ),
+ datetime.datetime(2018, 12, 1, 12, 0, 0, tzinfo=UTC),
+ datetime.datetime(2018, 12, 1, 13, 0, 0, tzinfo=UTC),
],
[1.25, 2.5],
),
@@ -5423,8 +6182,8 @@ def test_insert_rows_w_numeric(self):
timeout=DEFAULT_TIMEOUT,
)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_insert_rows_from_dataframe(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
@@ -5510,8 +6269,8 @@ def test_insert_rows_from_dataframe(self):
)
assert call == expected_call
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_insert_rows_from_dataframe_nan(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
@@ -5578,8 +6337,8 @@ def test_insert_rows_from_dataframe_nan(self):
)
assert call == expected_call
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_insert_rows_from_dataframe_many_columns(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
@@ -5631,8 +6390,8 @@ def test_insert_rows_from_dataframe_many_columns(self):
assert len(actual_calls) == 1
assert actual_calls[0] == expected_call
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_insert_rows_from_dataframe_w_explicit_none_insert_ids(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.table import Table
@@ -6000,7 +6759,9 @@ def test_list_rows(self):
)
WHEN_TS = 1437767599006000
- WHEN = datetime.datetime.utcfromtimestamp(WHEN_TS / 1e6).replace(tzinfo=UTC)
+ WHEN = datetime.datetime.fromtimestamp(
+ WHEN_TS / 1e6, datetime.timezone.utc
+ ).replace(tzinfo=UTC)
WHEN_1 = WHEN + datetime.timedelta(microseconds=1)
WHEN_2 = WHEN + datetime.timedelta(microseconds=2)
ROWS = 1234
@@ -6024,11 +6785,16 @@ def test_list_rows(self):
age = SchemaField("age", "INTEGER", mode="NULLABLE")
joined = SchemaField("joined", "TIMESTAMP", mode="NULLABLE")
table = Table(self.TABLE_REF, schema=[full_name, age, joined])
+ table._properties["location"] = "us-central1"
table._properties["numRows"] = 7
iterator = client.list_rows(table, timeout=7.5)
- # Check that initial total_rows is populated from the table.
+ # Check that initial RowIterator is populated from the table metadata.
+ self.assertIsNone(iterator.job_id)
+ self.assertEqual(iterator.location, "us-central1")
+ self.assertEqual(iterator.project, table.project)
+ self.assertIsNone(iterator.query_id)
self.assertEqual(iterator.total_rows, 7)
page = next(iterator.pages)
rows = list(page)
@@ -6144,6 +6910,10 @@ def test_list_rows_empty_table(self):
selected_fields=[],
)
+ self.assertIsNone(rows.job_id)
+ self.assertIsNone(rows.location)
+ self.assertEqual(rows.project, self.TABLE_REF.project)
+ self.assertIsNone(rows.query_id)
# When a table reference / string and selected_fields is provided,
# total_rows can't be populated until iteration starts.
self.assertIsNone(rows.total_rows)
@@ -6832,8 +7602,121 @@ def test_load_table_from_file_w_invalid_job_config(self):
err_msg = str(exc.value)
assert "Expected an instance of LoadJobConfig" in err_msg
- @unittest.skipIf(pandas is None, "Requires `pandas`")
+ def test_load_table_from_file_w_explicit_job_config(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+
+ client = self._make_client()
+ file_obj = self._make_file_obj()
+
+ job_config = self._make_config()
+ job_config.create_session = True
+ job_config.encoding = "UTF-8"
+ do_upload_patch = self._make_do_upload_patch(
+ client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION
+ )
+ with do_upload_patch as do_upload:
+ client.load_table_from_file(
+ file_obj,
+ self.TABLE_REF,
+ job_id="job_id",
+ project=self.PROJECT,
+ location=self.LOCATION,
+ job_config=job_config,
+ )
+
+ expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION)
+ expected_resource["jobReference"]["location"] = self.LOCATION
+ expected_resource["jobReference"]["projectId"] = self.PROJECT
+ expected_resource["configuration"]["load"]["createSession"] = True
+ expected_resource["configuration"]["load"]["encoding"] = "UTF-8"
+ do_upload.assert_called_once_with(
+ file_obj,
+ expected_resource,
+ _DEFAULT_NUM_RETRIES,
+ DEFAULT_TIMEOUT,
+ project=self.PROJECT,
+ )
+
+ def test_load_table_from_file_w_explicit_job_config_override(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery.job import LoadJobConfig
+
+ client = self._make_client()
+ file_obj = self._make_file_obj()
+
+ default_job_config = LoadJobConfig()
+ default_job_config.create_session = True
+ default_job_config.encoding = "ISO-8859-1"
+ client.default_load_job_config = default_job_config
+
+ job_config = self._make_config()
+ job_config.create_session = False
+ do_upload_patch = self._make_do_upload_patch(
+ client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION
+ )
+ with do_upload_patch as do_upload:
+ client.load_table_from_file(
+ file_obj,
+ self.TABLE_REF,
+ job_id="job_id",
+ project=self.PROJECT,
+ location=self.LOCATION,
+ job_config=job_config,
+ )
+
+ expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION)
+ expected_resource["jobReference"]["location"] = self.LOCATION
+ expected_resource["jobReference"]["projectId"] = self.PROJECT
+ expected_resource["configuration"]["load"]["createSession"] = False
+ expected_resource["configuration"]["load"]["encoding"] = "ISO-8859-1"
+ do_upload.assert_called_once_with(
+ file_obj,
+ expected_resource,
+ _DEFAULT_NUM_RETRIES,
+ DEFAULT_TIMEOUT,
+ project=self.PROJECT,
+ )
+
+ def test_load_table_from_file_w_default_load_config(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery.job import LoadJobConfig
+
+ client = self._make_client()
+ file_obj = self._make_file_obj()
+
+ default_job_config = LoadJobConfig()
+ default_job_config.encoding = "ISO-8859-1"
+ client.default_load_job_config = default_job_config
+
+ job_config = self._make_config()
+ do_upload_patch = self._make_do_upload_patch(
+ client, "_do_resumable_upload", self.EXPECTED_CONFIGURATION
+ )
+ with do_upload_patch as do_upload:
+ client.load_table_from_file(
+ file_obj,
+ self.TABLE_REF,
+ job_id="job_id",
+ project=self.PROJECT,
+ location=self.LOCATION,
+ job_config=job_config,
+ )
+
+ expected_resource = copy.deepcopy(self.EXPECTED_CONFIGURATION)
+ expected_resource["jobReference"]["location"] = self.LOCATION
+ expected_resource["jobReference"]["projectId"] = self.PROJECT
+ expected_resource["configuration"]["load"]["encoding"] = "ISO-8859-1"
+ do_upload.assert_called_once_with(
+ file_obj,
+ expected_resource,
+ _DEFAULT_NUM_RETRIES,
+ DEFAULT_TIMEOUT,
+ project=self.PROJECT,
+ )
+
def test_load_table_from_dataframe(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import PolicyTagList, SchemaField
@@ -6927,8 +7810,9 @@ def test_load_table_from_dataframe(self):
# (not passed in via job_config)
assert "description" not in field
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_client_location(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -6971,8 +7855,9 @@ def test_load_table_from_dataframe_w_client_location(self):
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.PARQUET
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7025,8 +7910,9 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel
# the original config object should not have been modified
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7080,8 +7966,9 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self):
# the original config object should not have been modified
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_parquet_options_none(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7131,8 +8018,9 @@ def test_load_table_from_dataframe_w_parquet_options_none(self):
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.parquet_options.enable_list_inference is True
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_list_inference_none(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7190,8 +8078,9 @@ def test_load_table_from_dataframe_w_list_inference_none(self):
# the original config object should not have been modified
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- def test_load_table_from_dataframe_w_list_inference_false(self):
+ def test_load_table_from_dataframe_w_explicit_job_config_override(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7200,8 +8089,119 @@ def test_load_table_from_dataframe_w_list_inference_false(self):
records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
dataframe = pandas.DataFrame(records)
- parquet_options = ParquetOptions()
- parquet_options.enable_list_inference = False
+ client.default_load_job_config = job.LoadJobConfig(
+ encoding="ISO-8859-1",
+ write_disposition=job.WriteDisposition.WRITE_TRUNCATE,
+ source_format=job.SourceFormat.PARQUET,
+ )
+
+ job_config = job.LoadJobConfig(
+ write_disposition=job.WriteDisposition.WRITE_APPEND,
+ source_format=job.SourceFormat.PARQUET,
+ )
+ original_config_copy = copy.deepcopy(job_config)
+
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ return_value=mock.Mock(
+ schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")]
+ ),
+ )
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+ with load_patch as load_table_from_file, get_table_patch:
+ client.load_table_from_dataframe(
+ dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION
+ )
+
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ rewind=True,
+ size=mock.ANY,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=self.LOCATION,
+ project=None,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.write_disposition == job.WriteDisposition.WRITE_APPEND
+ assert sent_config.source_format == job.SourceFormat.PARQUET
+ assert sent_config.encoding == "ISO-8859-1"
+
+ # the original config object should not have been modified
+ assert job_config.to_api_repr() == original_config_copy.to_api_repr()
+
+ def test_load_table_from_dataframe_w_default_load_config(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.schema import SchemaField
+
+ client = self._make_client()
+ records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
+ dataframe = pandas.DataFrame(records)
+
+ client.default_load_job_config = job.LoadJobConfig(
+ write_disposition=job.WriteDisposition.WRITE_TRUNCATE,
+ source_format=job.SourceFormat.PARQUET,
+ )
+
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ return_value=mock.Mock(
+ schema=[SchemaField("id", "INTEGER"), SchemaField("age", "INTEGER")]
+ ),
+ )
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+ with load_patch as load_table_from_file, get_table_patch:
+ client.load_table_from_dataframe(
+ dataframe, self.TABLE_REF, location=self.LOCATION
+ )
+
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ rewind=True,
+ size=mock.ANY,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=self.LOCATION,
+ project=None,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.write_disposition == job.WriteDisposition.WRITE_TRUNCATE
+ assert sent_config.source_format == job.SourceFormat.PARQUET
+
+ def test_load_table_from_dataframe_w_list_inference_false(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.schema import SchemaField
+
+ client = self._make_client()
+ records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
+ dataframe = pandas.DataFrame(records)
+
+ parquet_options = ParquetOptions()
+ parquet_options.enable_list_inference = False
job_config = job.LoadJobConfig(
write_disposition=job.WriteDisposition.WRITE_TRUNCATE,
@@ -7250,8 +8250,9 @@ def test_load_table_from_dataframe_w_list_inference_false(self):
# the original config object should not have been modified
assert job_config.to_api_repr() == original_config_copy.to_api_repr()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery import job
client = self._make_client()
@@ -7269,8 +8270,9 @@ def test_load_table_from_dataframe_w_custom_job_config_w_wrong_source_format(sel
assert "Got unexpected source_format:" in str(exc.value)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_automatic_schema(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7369,8 +8371,9 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
SchemaField("time_col", "TIME"),
)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
@@ -7390,8 +8393,12 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self):
autospec=True,
side_effect=google.api_core.exceptions.NotFound("Table not found"),
)
+ pandas_gbq_patch = mock.patch(
+ "google.cloud.bigquery._pandas_helpers.pandas_gbq",
+ new=None,
+ )
- with load_patch as load_table_from_file, get_table_patch:
+ with load_patch as load_table_from_file, get_table_patch, pandas_gbq_patch:
with warnings.catch_warnings(record=True) as warned:
client.load_table_from_dataframe(
dataframe, self.TABLE_REF, location=self.LOCATION
@@ -7428,8 +8435,9 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self):
assert sent_config.source_format == job.SourceFormat.PARQUET
assert sent_config.schema is None
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_index_and_auto_schema(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7446,7 +8454,6 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self):
load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)
-
get_table_patch = mock.patch(
"google.cloud.bigquery.client.Client.get_table",
autospec=True,
@@ -7458,6 +8465,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self):
]
),
)
+
with load_patch as load_table_from_file, get_table_patch:
client.load_table_from_dataframe(
dataframe, self.TABLE_REF, location=self.LOCATION
@@ -7489,8 +8497,9 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self):
]
assert sent_schema == expected_sent_schema
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_unknown_table(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
client = self._make_client()
@@ -7524,11 +8533,9 @@ def test_load_table_from_dataframe_unknown_table(self):
timeout=DEFAULT_TIMEOUT,
)
- @unittest.skipIf(
- pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION,
- "Only `pandas version >=1.0.0` supported",
- )
def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7571,21 +8578,18 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
SchemaField("x", "INT64", "NULLABLE", None),
)
- @unittest.skipIf(
- pandas is None or PANDAS_INSTALLED_VERSION < PANDAS_MINIUM_VERSION,
- "Only `pandas version >=1.0.0` supported",
- )
def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
client = self._make_client()
dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64")
+
load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)
-
get_table_patch = mock.patch(
"google.cloud.bigquery.client.Client.get_table",
autospec=True,
@@ -7614,12 +8618,16 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.PARQUET
- assert tuple(sent_config.schema) == (
- SchemaField("x", "INT64", "NULLABLE", None),
+ assert (
+ # Accept either the GoogleSQL or legacy SQL type name from pandas-gbq.
+ tuple(sent_config.schema) == (SchemaField("x", "INT64", "NULLABLE", None),)
+ or tuple(sent_config.schema)
+ == (SchemaField("x", "INTEGER", "NULLABLE", None),)
)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_struct_fields(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7677,12 +8685,13 @@ def test_load_table_from_dataframe_struct_fields(self):
assert sent_config.source_format == job.SourceFormat.PARQUET
assert sent_config.schema == schema
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_array_fields(self):
"""Test that a DataFrame with array columns can be uploaded correctly.
See: https://github.com/googleapis/python-bigquery/issues/19
"""
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7741,12 +8750,13 @@ def test_load_table_from_dataframe_array_fields(self):
assert sent_config.source_format == job.SourceFormat.PARQUET
assert sent_config.schema == schema
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_array_fields_w_auto_schema(self):
"""Test that a DataFrame with array columns can be uploaded correctly.
See: https://github.com/googleapis/python-bigquery/issues/19
"""
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7758,7 +8768,7 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self):
data=records, columns=["float_column", "array_column"]
)
- expected_schema = [
+ expected_schema_googlesql = [
SchemaField("float_column", "FLOAT"),
SchemaField(
"array_column",
@@ -7766,6 +8776,14 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self):
mode="REPEATED",
),
]
+ expected_schema_legacy_sql = [
+ SchemaField("float_column", "FLOAT"),
+ SchemaField(
+ "array_column",
+ "INTEGER",
+ mode="REPEATED",
+ ),
+ ]
load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
@@ -7801,10 +8819,14 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self):
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.PARQUET
- assert sent_config.schema == expected_schema
+ assert (
+ sent_config.schema == expected_schema_googlesql
+ or sent_config.schema == expected_schema_legacy_sql
+ )
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_partial_schema(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7886,8 +8908,9 @@ def test_load_table_from_dataframe_w_partial_schema(self):
SchemaField("bytes_col", "BYTES"),
)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_partial_schema_extra_types(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7919,11 +8942,11 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self):
load_table_from_file.assert_not_called()
message = str(exc_context.value)
- assert "bq_schema contains fields not present in dataframe" in message
assert "unknown_col" in message
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -7950,17 +8973,76 @@ def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self):
parquet_compression="LZ4",
)
- call_args = fake_to_parquet.call_args
+ call_args = fake_to_parquet.call_args[1]
assert call_args is not None
- assert call_args.kwargs.get("parquet_compression") == "LZ4"
+ assert call_args.get("parquet_compression") == "LZ4"
+
+ def test_load_table_from_dataframe_wo_pyarrow_raises_error(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
+ client = self._make_client()
+ records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
+ dataframe = pandas.DataFrame(records)
+
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ side_effect=google.api_core.exceptions.NotFound("Table not found"),
+ )
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+ pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None)
+ to_parquet_patch = mock.patch.object(
+ dataframe, "to_parquet", wraps=dataframe.to_parquet
+ )
+
+ with load_patch, get_table_patch, pyarrow_patch, to_parquet_patch:
+ with pytest.raises(ValueError):
+ client.load_table_from_dataframe(
+ dataframe,
+ self.TABLE_REF,
+ location=self.LOCATION,
+ parquet_compression="gzip",
+ )
+
+ def test_load_table_from_dataframe_w_bad_pyarrow_issues_warning(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
+
+ client = self._make_client()
+ records = [{"id": 1, "age": 100}, {"id": 2, "age": 60}]
+ dataframe = pandas.DataFrame(records)
+
+ pyarrow_version_patch = mock.patch(
+ "google.cloud.bigquery._versions_helpers.PYARROW_VERSIONS._installed_version",
+ packaging.version.parse("2.0.0"), # A known bad version of pyarrow.
+ )
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ side_effect=google.api_core.exceptions.NotFound("Table not found"),
+ )
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+
+ with load_patch, get_table_patch, pyarrow_version_patch:
+ with pytest.raises(exceptions.LegacyPyarrowError):
+ client.load_table_from_dataframe(
+ dataframe,
+ self.TABLE_REF,
+ location=self.LOCATION,
+ )
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_nulls(self):
"""Test that a DataFrame with null columns can be uploaded if a
BigQuery schema is specified.
See: https://github.com/googleapis/google-cloud-python/issues/7370
"""
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.schema import SchemaField
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
@@ -7998,8 +9080,8 @@ def test_load_table_from_dataframe_w_nulls(self):
assert sent_config.schema == schema
assert sent_config.source_format == job.SourceFormat.PARQUET
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_w_invaild_job_config(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery import job
client = self._make_client()
@@ -8016,8 +9098,8 @@ def test_load_table_from_dataframe_w_invaild_job_config(self):
err_msg = str(exc.value)
assert "Expected an instance of LoadJobConfig" in err_msg
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_load_table_from_dataframe_with_csv_source_format(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
from google.cloud.bigquery.schema import SchemaField
@@ -8066,6 +9148,51 @@ def test_load_table_from_dataframe_with_csv_source_format(self):
sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
assert sent_config.source_format == job.SourceFormat.CSV
+ def test_load_table_from_dataframe_w_higher_scale_decimal128_datatype(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.schema import SchemaField
+ from decimal import Decimal
+
+ client = self._make_client()
+ dataframe = pandas.DataFrame({"x": [Decimal("0.1234567891")]})
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table", autospec=True
+ )
+ with load_patch as load_table_from_file, get_table_patch:
+ client.load_table_from_dataframe(
+ dataframe, self.TABLE_REF, location=self.LOCATION
+ )
+
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ rewind=True,
+ size=mock.ANY,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=self.LOCATION,
+ project=None,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.source_format == job.SourceFormat.PARQUET
+ assert tuple(sent_config.schema) == (
+ SchemaField("x", "BIGNUMERIC", "NULLABLE", None),
+ )
+
+ # With autodetect specified, we pass the value as is. For more info, see
+ # https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
def test_load_table_from_json_basic_use(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
from google.cloud.bigquery import job
@@ -8077,12 +9204,28 @@ def test_load_table_from_json_basic_use(self):
{"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True},
]
+ job_config = job.LoadJobConfig(autodetect=True)
+
load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)
- with load_patch as load_table_from_file:
- client.load_table_from_json(json_rows, self.TABLE_REF)
+ # mock: remote table already exists
+ get_table_reference = {
+ "projectId": "project_id",
+ "datasetId": "test_dataset",
+ "tableId": "test_table",
+ }
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ return_value=mock.Mock(table_reference=get_table_reference),
+ )
+
+ with load_patch as load_table_from_file, get_table_patch:
+ client.load_table_from_json(
+ json_rows, self.TABLE_REF, job_config=job_config
+ )
load_table_from_file.assert_called_once_with(
client,
@@ -8181,21 +9324,47 @@ def test_load_table_from_json_w_invalid_job_config(self):
err_msg = str(exc.value)
assert "Expected an instance of LoadJobConfig" in err_msg
- def test_load_table_from_json_unicode_emoji_data_case(self):
+ # When all following are true:
+ # (1) no schema provided;
+ # (2) no autodetect value provided;
+ # (3) writeDisposition == WRITE_APPEND or None;
+ # (4) table already exists,
+ # client sets autodetect == False
+ # For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
+ def test_load_table_from_json_wo_schema_wo_autodetect_write_append_w_table(self):
from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.job import WriteDisposition
client = self._make_client()
- emoji = "\U0001F3E6"
- json_row = {"emoji": emoji}
- json_rows = [json_row]
+ json_rows = [
+ {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False},
+ {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True},
+ ]
+
+ job_config = job.LoadJobConfig(write_disposition=WriteDisposition.WRITE_APPEND)
load_patch = mock.patch(
"google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)
- with load_patch as load_table_from_file:
- client.load_table_from_json(json_rows, self.TABLE_REF)
+ # mock: remote table already exists
+ get_table_reference = {
+ "projectId": "project_id",
+ "datasetId": "test_dataset",
+ "tableId": "test_table",
+ }
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ return_value=mock.Mock(table_reference=get_table_reference),
+ )
+
+ with load_patch as load_table_from_file, get_table_patch:
+ client.load_table_from_json(
+ json_rows, self.TABLE_REF, job_config=job_config
+ )
load_table_from_file.assert_called_once_with(
client,
@@ -8211,211 +9380,278 @@ def test_load_table_from_json_unicode_emoji_data_case(self):
timeout=DEFAULT_TIMEOUT,
)
- sent_data_file = load_table_from_file.mock_calls[0][1][1]
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
+ assert sent_config.schema is None
+ assert not sent_config.autodetect
- # make sure json_row's unicode characters are only encoded one time
- expected_bytes = b'{"emoji": "' + emoji.encode("utf8") + b'"}'
- assert sent_data_file.getvalue() == expected_bytes
+ # When all following are true:
+ # (1) no schema provided;
+ # (2) no autodetect value provided;
+ # (3) writeDisposition == WRITE_APPEND or None;
+ # (4) table does NOT exist,
+ # client sets autodetect == True
+ # For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
+ def test_load_table_from_json_wo_schema_wo_autodetect_write_append_wo_table(self):
+ import google.api_core.exceptions as core_exceptions
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.job import WriteDisposition
- # Low-level tests
+ client = self._make_client()
- @classmethod
- def _make_resumable_upload_responses(cls, size):
- """Make a series of responses for a successful resumable upload."""
- from google import resumable_media
+ json_rows = [
+ {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False},
+ {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True},
+ ]
- resumable_url = "http://test.invalid?upload_id=and-then-there-was-1"
- initial_response = cls._make_response(
- http.client.OK, "", {"location": resumable_url}
- )
- data_response = cls._make_response(
- resumable_media.PERMANENT_REDIRECT,
- "",
- {"range": "bytes=0-{:d}".format(size - 1)},
+ job_config = job.LoadJobConfig(write_disposition=WriteDisposition.WRITE_APPEND)
+
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)
- final_response = cls._make_response(
- http.client.OK,
- json.dumps({"size": size}),
- {"Content-Type": "application/json"},
+
+ # mock: remote table doesn't exist
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ side_effect=core_exceptions.NotFound(""),
)
- return [initial_response, data_response, final_response]
- @staticmethod
- def _make_transport(responses=None):
- import google.auth.transport.requests
+ with load_patch as load_table_from_file, get_table_patch:
+ client.load_table_from_json(
+ json_rows, self.TABLE_REF, job_config=job_config
+ )
- transport = mock.create_autospec(
- google.auth.transport.requests.AuthorizedSession, instance=True
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ size=mock.ANY,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=client.location,
+ project=client.project,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
)
- transport.request.side_effect = responses
- return transport
- def test__do_resumable_upload(self):
- file_obj = self._make_file_obj()
- file_obj_len = len(file_obj.getvalue())
- transport = self._make_transport(
- self._make_resumable_upload_responses(file_obj_len)
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
+ assert sent_config.schema is None
+ assert sent_config.autodetect
+
+ # When all following are true:
+ # (1) no schema provided;
+ # (2) no autodetect value provided;
+ # (3) writeDisposition == WRITE_TRUNCATE or WRITE_EMPTY;
+ # client sets autodetect == True
+ # For more details, see https://github.com/googleapis/python-bigquery/issues/1228#issuecomment-1910946297
+ def test_load_table_from_json_wo_schema_wo_autodetect_others(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.job import WriteDisposition
+
+ client = self._make_client()
+
+ json_rows = [
+ {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False},
+ {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True},
+ ]
+
+ job_config = job.LoadJobConfig(
+ write_disposition=WriteDisposition.WRITE_TRUNCATE
)
- client = self._make_client(transport)
- result = client._do_resumable_upload(
- file_obj, self.EXPECTED_CONFIGURATION, None, None
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)
- content = result.content.decode("utf-8")
- assert json.loads(content) == {"size": file_obj_len}
+ with load_patch as load_table_from_file:
+ client.load_table_from_json(
+ json_rows, self.TABLE_REF, job_config=job_config
+ )
- # Verify that configuration data was passed in with the initial
- # request.
- transport.request.assert_any_call(
- "POST",
+ load_table_from_file.assert_called_once_with(
+ client,
mock.ANY,
- data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"),
- headers=mock.ANY,
- timeout=mock.ANY,
+ self.TABLE_REF,
+ size=mock.ANY,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=client.location,
+ project=client.project,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
)
- def test__do_resumable_upload_custom_project(self):
- file_obj = self._make_file_obj()
- file_obj_len = len(file_obj.getvalue())
- transport = self._make_transport(
- self._make_resumable_upload_responses(file_obj_len)
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
+ assert sent_config.schema is None
+ assert sent_config.autodetect
+
+ def test_load_table_from_json_w_explicit_job_config_override(self):
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery.schema import SchemaField
+
+ client = self._make_client()
+
+ json_rows = [
+ {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False},
+ {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True},
+ ]
+
+ schema = [
+ SchemaField("name", "STRING"),
+ SchemaField("age", "INTEGER"),
+ SchemaField("adult", "BOOLEAN"),
+ ]
+ client.default_load_job_config = job.LoadJobConfig(
+ schema=schema, encoding="ISO-8859-1"
)
- client = self._make_client(transport)
- result = client._do_resumable_upload(
- file_obj,
- self.EXPECTED_CONFIGURATION,
- None,
- None,
- project="custom-project",
+ override_schema = schema
+ override_schema[0] = SchemaField("username", "STRING")
+ job_config = job.LoadJobConfig(schema=override_schema)
+ original_config_copy = copy.deepcopy(job_config)
+
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)
- content = result.content.decode("utf-8")
- assert json.loads(content) == {"size": file_obj_len}
+ with load_patch as load_table_from_file:
+ client.load_table_from_json(
+ json_rows,
+ self.TABLE_REF,
+ job_config=job_config,
+ project="project-x",
+ location="EU",
+ )
- # Verify that configuration data was passed in with the initial
- # request.
- transport.request.assert_any_call(
- "POST",
+ load_table_from_file.assert_called_once_with(
+ client,
mock.ANY,
- data=json.dumps(self.EXPECTED_CONFIGURATION).encode("utf-8"),
- headers=mock.ANY,
- timeout=mock.ANY,
+ self.TABLE_REF,
+ size=mock.ANY,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location="EU",
+ project="project-x",
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
)
- # Check the project ID used in the call to initiate resumable upload.
- initiation_url = next(
- (
- call.args[1]
- for call in transport.request.call_args_list
- if call.args[0] == "POST" and "uploadType=resumable" in call.args[1]
- ),
- None,
- ) # pragma: NO COVER
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
+ assert sent_config.schema == override_schema
+ assert sent_config.encoding == "ISO-8859-1"
+ assert not sent_config.autodetect
- assert initiation_url is not None
- assert "projects/custom-project" in initiation_url
+ # the original config object should not have been modified
+ assert job_config.to_api_repr() == original_config_copy.to_api_repr()
- def test__do_resumable_upload_custom_timeout(self):
- file_obj = self._make_file_obj()
- file_obj_len = len(file_obj.getvalue())
- transport = self._make_transport(
- self._make_resumable_upload_responses(file_obj_len)
- )
- client = self._make_client(transport)
+ def test_load_table_from_json_w_default_job_config(self):
+ from google.cloud.bigquery import job
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
+ from google.cloud.bigquery.schema import SchemaField
- client._do_resumable_upload(
- file_obj, self.EXPECTED_CONFIGURATION, num_retries=0, timeout=3.14
- )
+ client = self._make_client()
- # The timeout should be applied to all underlying calls.
- for call_args in transport.request.call_args_list:
- assert call_args.kwargs.get("timeout") == 3.14
+ json_rows = [
+ {"name": "One", "age": 11, "birthday": "2008-09-10", "adult": False},
+ {"name": "Two", "age": 22, "birthday": "1997-08-09", "adult": True},
+ ]
- def test__do_multipart_upload(self):
- transport = self._make_transport([self._make_response(http.client.OK)])
- client = self._make_client(transport)
- file_obj = self._make_file_obj()
- file_obj_len = len(file_obj.getvalue())
+ schema = [
+ SchemaField("name", "STRING"),
+ SchemaField("age", "INTEGER"),
+ SchemaField("adult", "BOOLEAN"),
+ ]
+ client.default_load_job_config = job.LoadJobConfig(schema=schema)
- client._do_multipart_upload(
- file_obj, self.EXPECTED_CONFIGURATION, file_obj_len, None, None
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
)
- # Verify that configuration data was passed in with the initial
- # request.
- request_args = transport.request.mock_calls[0][2]
- request_data = request_args["data"].decode("utf-8")
- request_headers = request_args["headers"]
-
- request_content = email.message_from_string(
- "Content-Type: {}\r\n{}".format(
- request_headers["content-type"].decode("utf-8"), request_data
+ with load_patch as load_table_from_file:
+ client.load_table_from_json(
+ json_rows,
+ self.TABLE_REF,
+ job_config=None,
+ project="project-x",
+ location="EU",
)
+
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ size=mock.ANY,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location="EU",
+ project="project-x",
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
)
- # There should be two payloads: the configuration and the binary daya.
- configuration_data = request_content.get_payload(0).get_payload()
- binary_data = request_content.get_payload(1).get_payload()
+ sent_config = load_table_from_file.mock_calls[0][2]["job_config"]
+ assert sent_config.source_format == job.SourceFormat.NEWLINE_DELIMITED_JSON
+ assert sent_config.schema == schema
- assert json.loads(configuration_data) == self.EXPECTED_CONFIGURATION
- assert binary_data.encode("utf-8") == file_obj.getvalue()
+ def test_load_table_from_json_unicode_emoji_data_case(self):
+ from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES
- def test__do_multipart_upload_wrong_size(self):
client = self._make_client()
- file_obj = self._make_file_obj()
- file_obj_len = len(file_obj.getvalue())
-
- with pytest.raises(ValueError):
- client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None, None)
-
- def test_schema_from_json_with_file_path(self):
- from google.cloud.bigquery.schema import SchemaField
- file_content = """[
- {
- "description": "quarter",
- "mode": "REQUIRED",
- "name": "qtr",
- "type": "STRING"
- },
- {
- "description": "sales representative",
- "mode": "NULLABLE",
- "name": "rep",
- "type": "STRING"
- },
- {
- "description": "total sales",
- "mode": "NULLABLE",
- "name": "sales",
- "type": "FLOAT"
- }
- ]"""
+ emoji = "\U0001F3E6"
+ json_row = {"emoji": emoji}
+ json_rows = [json_row]
- expected = [
- SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
- SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
- SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
- ]
+ load_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True
+ )
+ # mock: remote table already exists
+ get_table_reference = {
+ "projectId": "project_id",
+ "datasetId": "test_dataset",
+ "tableId": "test_table",
+ }
+ get_table_patch = mock.patch(
+ "google.cloud.bigquery.client.Client.get_table",
+ autospec=True,
+ return_value=mock.Mock(table_reference=get_table_reference),
+ )
- client = self._make_client()
- mock_file_path = "/mocked/file.json"
+ with load_patch as load_table_from_file, get_table_patch:
+ client.load_table_from_json(json_rows, self.TABLE_REF)
- open_patch = mock.patch(
- "builtins.open", new=mock.mock_open(read_data=file_content)
+ load_table_from_file.assert_called_once_with(
+ client,
+ mock.ANY,
+ self.TABLE_REF,
+ size=mock.ANY,
+ num_retries=_DEFAULT_NUM_RETRIES,
+ job_id=mock.ANY,
+ job_id_prefix=None,
+ location=client.location,
+ project=client.project,
+ job_config=mock.ANY,
+ timeout=DEFAULT_TIMEOUT,
)
- with open_patch as _mock_file:
- actual = client.schema_from_json(mock_file_path)
- _mock_file.assert_called_once_with(mock_file_path)
- # This assert is to make sure __exit__ is called in the context
- # manager that opens the file in the function
- _mock_file().__exit__.assert_called_once()
+ sent_data_file = load_table_from_file.mock_calls[0][1][1]
- assert expected == actual
+ # make sure json_row's unicode characters are only encoded one time
+ expected_bytes = b'{"emoji": "' + emoji.encode("utf8") + b'"}'
+ assert sent_data_file.getvalue() == expected_bytes
+ # Low-level tests
def test_schema_from_json_with_file_object(self):
from google.cloud.bigquery.schema import SchemaField
@@ -8441,9 +9677,11 @@ def test_schema_from_json_with_file_object(self):
]"""
expected = [
- SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
- SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
- SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
+ SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
+ SchemaField(
+ "rep", "STRING", "NULLABLE", description="sales representative"
+ ),
+ SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
]
client = self._make_client()
@@ -8477,9 +9715,11 @@ def test_schema_to_json_with_file_path(self):
]
schema_list = [
- SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
- SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
- SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
+ SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
+ SchemaField(
+ "rep", "STRING", "NULLABLE", description="sales representative"
+ ),
+ SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
]
client = self._make_client()
@@ -8521,9 +9761,11 @@ def test_schema_to_json_with_file_object(self):
]
schema_list = [
- SchemaField("qtr", "STRING", "REQUIRED", "quarter"),
- SchemaField("rep", "STRING", "NULLABLE", "sales representative"),
- SchemaField("sales", "FLOAT", "NULLABLE", "total sales"),
+ SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
+ SchemaField(
+ "rep", "STRING", "NULLABLE", description="sales representative"
+ ),
+ SchemaField("sales", "FLOAT", "NULLABLE", description="total sales"),
]
fake_file = io.StringIO()
diff --git a/tests/unit/test_client_bigframes.py b/tests/unit/test_client_bigframes.py
new file mode 100644
index 000000000..0260da5e4
--- /dev/null
+++ b/tests/unit/test_client_bigframes.py
@@ -0,0 +1,411 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for Client features enabling the bigframes integration."""
+
+from __future__ import annotations
+
+import datetime
+from unittest import mock
+
+import pytest
+
+import google.auth.credentials
+from google.api_core import exceptions
+from google.cloud import bigquery
+import google.cloud.bigquery.client
+from google.cloud.bigquery import _job_helpers
+
+
+PROJECT = "test-project"
+LOCATION = "test-location"
+
+
+def make_response(body, *, status_code: int = 200):
+ response = mock.Mock()
+ type(response).status_code = mock.PropertyMock(return_value=status_code)
+ response.json.return_value = body
+ return response
+
+
+@pytest.fixture
+def client():
+ """A real client object with mocked API requests."""
+ credentials = mock.create_autospec(
+ google.auth.credentials.Credentials, instance=True
+ )
+ http_session = mock.Mock()
+ return google.cloud.bigquery.client.Client(
+ project=PROJECT,
+ credentials=credentials,
+ _http=http_session,
+ location=LOCATION,
+ )
+
+
+def test_query_and_wait_bigframes_dry_run_no_callback(client):
+ client._http.request.side_effect = [
+ make_response(
+ {
+ # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
+ "location": LOCATION,
+ "queryId": "abcdefg",
+ "totalBytesProcessed": "123",
+ "jobComplete": True,
+ }
+ ),
+ ]
+ callback = mock.Mock()
+ job_config = bigquery.QueryJobConfig(dry_run=True)
+ response = client._query_and_wait_bigframes(
+ query="SELECT 1", job_config=job_config, callback=callback
+ )
+ callback.assert_not_called()
+ assert response.total_bytes_processed == 123
+ assert response.query_id == "abcdefg"
+
+
+def test_query_and_wait_bigframes_callback(client):
+ created = datetime.datetime(
+ 2025, 8, 18, 10, 11, 12, 345000, tzinfo=datetime.timezone.utc
+ )
+ started = datetime.datetime(
+ 2025, 8, 18, 10, 11, 13, 456000, tzinfo=datetime.timezone.utc
+ )
+ ended = datetime.datetime(
+ 2025, 8, 18, 10, 11, 14, 567000, tzinfo=datetime.timezone.utc
+ )
+ client._http.request.side_effect = [
+ make_response(
+ {
+ # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
+ "location": LOCATION,
+ "queryId": "abcdefg",
+ "totalRows": "100",
+ "totalBytesProcessed": "123",
+ "totalSlotMs": "987",
+ "jobComplete": True,
+ "creationTime": _to_millis(created),
+ "startTime": _to_millis(started),
+ "endTime": _to_millis(ended),
+ }
+ ),
+ ]
+ callback = mock.Mock()
+ client._query_and_wait_bigframes(query="SELECT 1", callback=callback)
+ callback.assert_has_calls(
+ [
+ mock.call(
+ _job_helpers.QuerySentEvent(
+ query="SELECT 1",
+ billing_project=PROJECT,
+ location=LOCATION,
+ # No job ID, because a basic query is eligible for jobs.query.
+ job_id=None,
+ request_id=mock.ANY,
+ )
+ ),
+ mock.call(
+ _job_helpers.QueryFinishedEvent(
+ billing_project=PROJECT,
+ location=LOCATION,
+ query_id="abcdefg",
+ total_rows=100,
+ total_bytes_processed=123,
+ slot_millis=987,
+ created=created,
+ started=started,
+ ended=ended,
+ # No job ID or destination, because a basic query is eligible for jobs.query.
+ job_id=None,
+ destination=None,
+ ),
+ ),
+ ]
+ )
+
+
+def _to_millis(dt: datetime.datetime) -> str:
+ return str(
+ int(
+ (dt - datetime.datetime(1970, 1, 1, 0, 0, 0, tzinfo=datetime.timezone.utc))
+ / datetime.timedelta(milliseconds=1)
+ )
+ )
+
+
+def test_query_and_wait_bigframes_with_jobs_insert_callback_empty_results(client):
+ client._http.request.side_effect = [
+ # jobs.insert because destination table present in job_config
+ make_response(
+ {
+ # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert
+ # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "response-job-id",
+ "location": "response-location",
+ },
+ "statistics": {
+ "creationTime": _to_millis(
+ datetime.datetime(
+ 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc
+ )
+ ),
+ "query": {
+ "statementType": "SELECT",
+ # "queryPlan": [{"name": "part1"}, {"name": "part2"}],
+ },
+ },
+ "status": {
+ "state": "PENDING",
+ },
+ }
+ ),
+ # jobs.get waiting for query to finish
+ make_response(
+ {
+ # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/insert
+ # https://cloud.google.com/bigquery/docs/reference/rest/v2/Job
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "response-job-id",
+ "location": "response-location",
+ },
+ "status": {
+ "state": "PENDING",
+ },
+ }
+ ),
+ # jobs.getQueryResults with max_results=0
+ make_response(
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "response-job-id",
+ "location": "response-location",
+ },
+ "jobComplete": True,
+ # totalRows is intentionally missing so we end up in the _EmptyRowIterator code path.
+ }
+ ),
+ # jobs.get
+ make_response(
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "response-job-id",
+ "location": "response-location",
+ },
+ "statistics": {
+ "creationTime": _to_millis(
+ datetime.datetime(
+ 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc
+ )
+ ),
+ "startTime": _to_millis(
+ datetime.datetime(
+ 2025, 8, 13, 13, 7, 32, 123000, tzinfo=datetime.timezone.utc
+ )
+ ),
+ "endTime": _to_millis(
+ datetime.datetime(
+ 2025, 8, 13, 13, 7, 33, 123000, tzinfo=datetime.timezone.utc
+ )
+ ),
+ "query": {
+ "statementType": "SELECT",
+ "totalBytesProcessed": 123,
+ "totalSlotMs": 987,
+ },
+ },
+ "status": {"state": "DONE"},
+ }
+ ),
+ ]
+ callback = mock.Mock()
+ config = bigquery.QueryJobConfig()
+ config.destination = "proj.dset.table"
+ client._query_and_wait_bigframes(
+ query="SELECT 1", job_config=config, callback=callback
+ )
+ callback.assert_has_calls(
+ [
+ mock.call(
+ _job_helpers.QuerySentEvent(
+ query="SELECT 1",
+ billing_project="response-project",
+ location="response-location",
+ job_id="response-job-id",
+ # We use jobs.insert not jobs.query because destination is
+ # present on job_config.
+ request_id=None,
+ )
+ ),
+ mock.call(
+ _job_helpers.QueryReceivedEvent(
+ billing_project="response-project",
+ location="response-location",
+ job_id="response-job-id",
+ statement_type="SELECT",
+ state="PENDING",
+ query_plan=[],
+ created=datetime.datetime(
+ 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc
+ ),
+ started=None,
+ ended=None,
+ )
+ ),
+ mock.call(
+ _job_helpers.QueryFinishedEvent(
+ billing_project="response-project",
+ location="response-location",
+ job_id="response-job-id",
+ query_id=None,
+ total_rows=0,
+ total_bytes_processed=123,
+ slot_millis=987,
+ created=datetime.datetime(
+ 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc
+ ),
+ started=datetime.datetime(
+ 2025, 8, 13, 13, 7, 32, 123000, tzinfo=datetime.timezone.utc
+ ),
+ ended=datetime.datetime(
+ 2025, 8, 13, 13, 7, 33, 123000, tzinfo=datetime.timezone.utc
+ ),
+ destination=None,
+ ),
+ ),
+ ]
+ )
+
+
+def test_query_and_wait_bigframes_with_jobs_insert_dry_run_no_callback(client):
+ client._http.request.side_effect = [
+ # jobs.insert because destination table present in job_config
+ make_response(
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "response-job-id",
+ "location": "response-location",
+ },
+ "statistics": {
+ "creationTime": _to_millis(
+ datetime.datetime(
+ 2025, 8, 13, 13, 7, 31, 123000, tzinfo=datetime.timezone.utc
+ )
+ ),
+ "query": {
+ "statementType": "SELECT",
+ "totalBytesProcessed": 123,
+ "schema": {
+ "fields": [
+ {"name": "_f0", "type": "INTEGER"},
+ ],
+ },
+ },
+ },
+ "configuration": {
+ "dryRun": True,
+ },
+ "status": {"state": "DONE"},
+ }
+ ),
+ ]
+ callback = mock.Mock()
+ config = bigquery.QueryJobConfig()
+ config.destination = "proj.dset.table"
+ config.dry_run = True
+ result = client._query_and_wait_bigframes(
+ query="SELECT 1", job_config=config, callback=callback
+ )
+ callback.assert_not_called()
+ assert result.total_bytes_processed == 123
+ assert result.schema == [bigquery.SchemaField("_f0", "INTEGER")]
+
+
+def test_query_and_wait_bigframes_with_query_retry_callbacks(client, global_time_lock):
+ created = datetime.datetime(
+ 2025, 8, 18, 10, 11, 12, 345000, tzinfo=datetime.timezone.utc
+ )
+ started = datetime.datetime(
+ 2025, 8, 18, 10, 11, 13, 456000, tzinfo=datetime.timezone.utc
+ )
+ ended = datetime.datetime(
+ 2025, 8, 18, 10, 11, 14, 567000, tzinfo=datetime.timezone.utc
+ )
+ client._http.request.side_effect = [
+ exceptions.InternalServerError(
+ "first try", errors=({"reason": "jobInternalError"},)
+ ),
+ make_response(
+ {
+ # https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
+ "location": LOCATION,
+ "queryId": "abcdefg",
+ "totalRows": "100",
+ "totalBytesProcessed": "123",
+ "totalSlotMs": "987",
+ "jobComplete": True,
+ "creationTime": _to_millis(created),
+ "startTime": _to_millis(started),
+ "endTime": _to_millis(ended),
+ }
+ ),
+ ]
+ callback = mock.Mock()
+ client._query_and_wait_bigframes(query="SELECT 1", callback=callback)
+ callback.assert_has_calls(
+ [
+ mock.call(
+ _job_helpers.QuerySentEvent(
+ query="SELECT 1",
+ billing_project=PROJECT,
+ location=LOCATION,
+ # No job ID, because a basic query is eligible for jobs.query.
+ job_id=None,
+ request_id=mock.ANY,
+ )
+ ),
+ mock.call(
+ _job_helpers.QueryRetryEvent(
+ query="SELECT 1",
+ billing_project=PROJECT,
+ location=LOCATION,
+ # No job ID, because a basic query is eligible for jobs.query.
+ job_id=None,
+ request_id=mock.ANY,
+ )
+ ),
+ mock.call(
+ _job_helpers.QueryFinishedEvent(
+ billing_project=PROJECT,
+ location=LOCATION,
+ query_id=mock.ANY,
+ total_rows=100,
+ total_bytes_processed=123,
+ slot_millis=987,
+ created=created,
+ started=started,
+ ended=ended,
+ # No job ID or destination, because a basic query is eligible for jobs.query.
+ job_id=None,
+ destination=None,
+ ),
+ ),
+ ]
+ )
diff --git a/tests/unit/test_client_resumable_media_upload.py b/tests/unit/test_client_resumable_media_upload.py
new file mode 100644
index 000000000..642c18d15
--- /dev/null
+++ b/tests/unit/test_client_resumable_media_upload.py
@@ -0,0 +1,433 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest import mock
+import email
+import http.client
+import io
+import json
+
+import pytest
+
+from google.cloud.bigquery.table import TableReference
+
+from .helpers import make_connection
+
+
+PROJECT = "test-project"
+TABLE_REF = TableReference.from_string(f"{PROJECT}.test_dataset.test_table")
+EXPECTED_CONFIGURATION = {
+ "load": {
+ "destinationTable": {
+ "projectId": PROJECT,
+ "datasetId": "test_dataset",
+ "tableId": "test_table",
+ },
+ "sourceFormat": "CSV",
+ }
+}
+
+
+@pytest.fixture(autouse=True)
+def mock_sleep(monkeypatch):
+ sleep = mock.Mock()
+ monkeypatch.setattr("time.sleep", sleep)
+
+
+def _make_credentials():
+ import google.auth.credentials
+
+ return mock.Mock(spec=google.auth.credentials.Credentials)
+
+
+def _make_client(*args, **kw):
+ from google.cloud.bigquery.client import Client
+
+ kw["credentials"] = _make_credentials()
+ kw["project"] = PROJECT
+ return Client(*args, **kw)
+
+
+def _make_file_obj(contents=b"some data"):
+ return io.BytesIO(contents)
+
+
+def _make_response(status_code, content=b"", headers=None):
+ response = mock.Mock(spec=["status_code", "content", "request", "headers"])
+ response.status_code = status_code
+ response.content = content
+ response.headers = headers or {}
+ response.request = mock.Mock(spec=["headers"])
+ return response
+
+
+def _make_resumable_upload_responses(num_bytes):
+ # In a real scenario, the upload URL is returned in a 'Location'
+ # header.
+ return [
+ _make_response(
+ http.client.OK,
+ headers={"location": "http://test.invalid/upload-id"},
+ ),
+ _make_response(
+ http.client.OK, content=json.dumps({"size": num_bytes}).encode("utf-8")
+ ),
+ ]
+
+
+def _make_transport(responses=None):
+ import google.auth.transport.requests
+
+ transport = mock.create_autospec(
+ google.auth.transport.requests.AuthorizedSession, instance=True
+ )
+ transport.request.side_effect = responses
+ return transport
+
+
+def _mock_requests_response(status_code, headers, content=b""):
+ return mock.Mock(
+ content=content,
+ headers=headers,
+ status_code=status_code,
+ spec=["content", "headers", "status_code"],
+ )
+
+
+def _mock_transport(status_code, headers, content=b""):
+ fake_transport = mock.Mock(spec=["request"])
+ fake_response = _mock_requests_response(status_code, headers, content=content)
+ fake_transport.request.return_value = fake_response
+ return fake_transport
+
+
+def _initiate_resumable_upload_helper(num_retries=None, mtls=False):
+ from google.resumable_media.requests import ResumableUpload
+ from google.cloud.bigquery.client import _DEFAULT_CHUNKSIZE
+ from google.cloud.bigquery.client import _GENERIC_CONTENT_TYPE
+ from google.cloud.bigquery.client import _get_upload_headers
+ from google.cloud.bigquery.job import LoadJob
+ from google.cloud.bigquery.job import LoadJobConfig
+ from google.cloud.bigquery.job import SourceFormat
+
+ # Create mocks to be checked for doing transport.
+ resumable_url = "http://test.invalid?upload_id=hey-you"
+ response_headers = {"location": resumable_url}
+ fake_transport = _mock_transport(http.client.OK, response_headers)
+ client = _make_client(_http=fake_transport)
+ conn = client._connection = make_connection()
+ if mtls:
+ conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls")
+
+ # Create some mock arguments and call the method under test.
+ data = b"goodbye gudbi gootbee"
+ stream = io.BytesIO(data)
+ config = LoadJobConfig()
+ config.source_format = SourceFormat.CSV
+ job = LoadJob(None, None, TABLE_REF, client, job_config=config)
+ metadata = job.to_api_repr()
+ upload, transport_out = client._initiate_resumable_upload(
+ stream, metadata, num_retries, None
+ )
+
+ # Check the returned values.
+ assert isinstance(upload, ResumableUpload)
+
+ host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com"
+ upload_url = (
+ f"{host_name}/upload/bigquery/v2/projects/{PROJECT}/jobs?uploadType=resumable"
+ )
+ assert upload.upload_url == upload_url
+ expected_headers = _get_upload_headers(conn.user_agent)
+ assert upload._headers == expected_headers
+ assert not upload.finished
+ assert upload._chunk_size == _DEFAULT_CHUNKSIZE
+ assert upload._stream is stream
+ assert upload._total_bytes is None
+ assert upload._content_type == _GENERIC_CONTENT_TYPE
+ assert upload.resumable_url == resumable_url
+
+ retry_strategy = upload._retry_strategy
+ assert retry_strategy.max_sleep == 64.0
+ if num_retries is None:
+ assert retry_strategy.max_cumulative_retry == 600.0
+ assert retry_strategy.max_retries is None
+ else:
+ assert retry_strategy.max_cumulative_retry is None
+ assert retry_strategy.max_retries == num_retries
+ assert transport_out is fake_transport
+ # Make sure we never read from the stream.
+ assert stream.tell() == 0
+
+ # Check the mocks.
+ request_headers = expected_headers.copy()
+ request_headers["x-upload-content-type"] = _GENERIC_CONTENT_TYPE
+ fake_transport.request.assert_called_once_with(
+ "POST",
+ upload_url,
+ data=json.dumps(metadata).encode("utf-8"),
+ headers=request_headers,
+ timeout=mock.ANY,
+ )
+
+
+def test__initiate_resumable_upload():
+ _initiate_resumable_upload_helper()
+
+
+def test__initiate_resumable_upload_mtls():
+ _initiate_resumable_upload_helper(mtls=True)
+
+
+def test_initiate_resumable_upload_with_retry():
+ _initiate_resumable_upload_helper(num_retries=11)
+
+
+def _do_multipart_upload_success_helper(
+ get_boundary, num_retries=None, project=None, mtls=False
+):
+ from google.cloud.bigquery.client import _get_upload_headers
+ from google.cloud.bigquery.job import LoadJob
+ from google.cloud.bigquery.job import LoadJobConfig
+ from google.cloud.bigquery.job import SourceFormat
+
+ fake_transport = _mock_transport(http.client.OK, {})
+ client = _make_client(_http=fake_transport)
+ conn = client._connection = make_connection()
+ if mtls:
+ conn.get_api_base_url_for_mtls = mock.Mock(return_value="https://foo.mtls")
+
+ if project is None:
+ project = PROJECT
+
+ # Create some mock arguments.
+ data = b"Bzzzz-zap \x00\x01\xf4"
+ stream = io.BytesIO(data)
+ config = LoadJobConfig()
+ config.source_format = SourceFormat.CSV
+ job = LoadJob(None, None, TABLE_REF, client, job_config=config)
+ metadata = job.to_api_repr()
+ size = len(data)
+
+ response = client._do_multipart_upload(
+ stream, metadata, size, num_retries, None, project=project
+ )
+
+ # Check the mocks and the returned value.
+ assert response is fake_transport.request.return_value
+ assert stream.tell() == size
+ get_boundary.assert_called_once_with()
+
+ host_name = "https://foo.mtls" if mtls else "https://bigquery.googleapis.com"
+ upload_url = (
+ f"{host_name}/upload/bigquery/v2/projects/{project}/jobs?uploadType=multipart"
+ )
+ payload = (
+ b"--==0==\r\n"
+ b"content-type: application/json; charset=UTF-8\r\n\r\n"
+ b"%(json_metadata)s"
+ b"\r\n"
+ b"--==0==\r\n"
+ b"content-type: */*\r\n\r\n"
+ b"%(data)s"
+ b"\r\n"
+ b"--==0==--"
+ ) % {b"json_metadata": json.dumps(metadata).encode("utf-8"), b"data": data}
+
+ headers = _get_upload_headers(conn.user_agent)
+ headers["content-type"] = b'multipart/related; boundary="==0=="'
+ fake_transport.request.assert_called_once_with(
+ "POST", upload_url, data=payload, headers=headers, timeout=mock.ANY
+ )
+
+
+@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==")
+def test__do_multipart_upload(get_boundary):
+ _do_multipart_upload_success_helper(get_boundary)
+
+
+@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==")
+def test__do_multipart_upload_mtls(get_boundary):
+ _do_multipart_upload_success_helper(get_boundary, mtls=True)
+
+
+@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==")
+def test_do_multipart_upload_with_retry(get_boundary):
+ _do_multipart_upload_success_helper(get_boundary, num_retries=8)
+
+
+@mock.patch("google.resumable_media._upload.get_boundary", return_value=b"==0==")
+def test__do_multipart_upload_with_custom_project(get_boundary):
+ _do_multipart_upload_success_helper(get_boundary, project="custom-project")
+
+
+def test__do_resumable_upload():
+ file_obj = _make_file_obj()
+ file_obj_len = len(file_obj.getvalue())
+ transport = _make_transport(_make_resumable_upload_responses(file_obj_len))
+ client = _make_client(_http=transport)
+
+ result = client._do_resumable_upload(file_obj, EXPECTED_CONFIGURATION, None, None)
+
+ content = result.content.decode("utf-8")
+ assert json.loads(content) == {"size": file_obj_len}
+
+ transport.request.assert_any_call(
+ "POST",
+ mock.ANY,
+ data=json.dumps(EXPECTED_CONFIGURATION).encode("utf-8"),
+ headers=mock.ANY,
+ timeout=mock.ANY,
+ )
+
+
+def test__do_resumable_upload_custom_project():
+ file_obj = _make_file_obj()
+ file_obj_len = len(file_obj.getvalue())
+ transport = _make_transport(_make_resumable_upload_responses(file_obj_len))
+ client = _make_client(_http=transport)
+
+ result = client._do_resumable_upload(
+ file_obj,
+ EXPECTED_CONFIGURATION,
+ None,
+ None,
+ project="custom-project",
+ )
+
+ content = result.content.decode("utf-8")
+ assert json.loads(content) == {"size": file_obj_len}
+
+ transport.request.assert_any_call(
+ "POST",
+ mock.ANY,
+ data=json.dumps(EXPECTED_CONFIGURATION).encode("utf-8"),
+ headers=mock.ANY,
+ timeout=mock.ANY,
+ )
+
+ initiation_url = next(
+ (
+ call[0][1]
+ for call in transport.request.call_args_list
+ if call[0][0] == "POST" and "uploadType=resumable" in call[0][1]
+ ),
+ None,
+ )
+ assert initiation_url is not None
+ assert "projects/custom-project" in initiation_url
+
+
+def test__do_resumable_upload_custom_timeout():
+ file_obj = _make_file_obj()
+ file_obj_len = len(file_obj.getvalue())
+ transport = _make_transport(_make_resumable_upload_responses(file_obj_len))
+ client = _make_client(_http=transport)
+
+ client._do_resumable_upload(
+ file_obj, EXPECTED_CONFIGURATION, num_retries=0, timeout=3.14
+ )
+
+ for call_args in transport.request.call_args_list:
+ assert call_args[1].get("timeout") == 3.14
+
+
+def test__do_multipart_upload_request_body():
+ transport = _make_transport([_make_response(http.client.OK)])
+ client = _make_client(_http=transport)
+ file_obj = _make_file_obj()
+ file_obj_len = len(file_obj.getvalue())
+
+ client._do_multipart_upload(
+ file_obj, EXPECTED_CONFIGURATION, file_obj_len, None, None
+ )
+
+ request_args = transport.request.mock_calls[0][2]
+ request_data = request_args["data"].decode("utf-8")
+ request_headers = request_args["headers"]
+
+ request_content = email.message_from_string(
+ "Content-Type: {}\n{}".format(
+ request_headers["content-type"].decode("utf-8"), request_data
+ )
+ )
+
+ configuration_data = request_content.get_payload(0).get_payload()
+ binary_data = request_content.get_payload(1).get_payload()
+
+ assert json.loads(configuration_data) == EXPECTED_CONFIGURATION
+ assert binary_data.encode("utf-8") == file_obj.getvalue()
+
+
+def test__do_multipart_upload_wrong_size():
+ client = _make_client()
+ file_obj = _make_file_obj()
+ file_obj_len = len(file_obj.getvalue())
+
+ with pytest.raises(ValueError):
+ client._do_multipart_upload(file_obj, {}, file_obj_len + 1, None, None)
+
+
+def test_schema_from_json_with_file_path():
+ from google.cloud.bigquery.schema import SchemaField
+
+ file_content = """
+ [
+ {
+ "description": "quarter",
+ "mode": "REQUIRED",
+ "name": "qtr",
+ "type": "STRING"
+ },
+ {
+ "description": "sales representative",
+ "mode": "NULLABLE",
+ "name": "rep",
+ "type": "STRING"
+ },
+ {
+ "description": "total sales",
+ "mode": "NULLABLE",
+ "name": "sales",
+ "type": "FLOAT"
+ }
+ ]"""
+
+ expected = [
+ SchemaField("qtr", "STRING", "REQUIRED", description="quarter"),
+ SchemaField(
+ "rep",
+ "STRING",
+ "NULLABLE",
+ description="sales representative",
+ ),
+ SchemaField(
+ "sales",
+ "FLOAT",
+ "NULLABLE",
+ description="total sales",
+ ),
+ ]
+
+ client = _make_client()
+ mock_file_path = "/mocked/file.json"
+
+ open_patch = mock.patch("builtins.open", new=mock.mock_open(read_data=file_content))
+
+ with open_patch as _mock_file:
+ actual = client.schema_from_json(mock_file_path)
+ _mock_file.assert_called_once_with(mock_file_path)
+ _mock_file.return_value.read.assert_called_once()
+
+ assert expected == actual
diff --git a/tests/unit/test_client_retry.py b/tests/unit/test_client_retry.py
new file mode 100644
index 000000000..6e49cc464
--- /dev/null
+++ b/tests/unit/test_client_retry.py
@@ -0,0 +1,279 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from unittest import mock
+
+import freezegun
+import google.api_core.exceptions
+from google.cloud.bigquery import job as bqjob
+from google.cloud.bigquery.retry import DEFAULT_RETRY
+from .helpers import make_connection
+
+
+PROJECT = "test-project"
+
+
+def _make_credentials():
+ import google.auth.credentials
+
+ return mock.Mock(spec=google.auth.credentials.Credentials)
+
+
+def _make_client(*args, **kw):
+ from google.cloud.bigquery.client import Client
+
+ return Client(*args, **kw)
+
+
+def test_get_service_account_email_w_custom_retry(global_time_lock):
+ api_path = f"/projects/{PROJECT}/serviceAccount"
+ creds = _make_credentials()
+ http = object()
+ client = _make_client(project=PROJECT, credentials=creds, _http=http)
+
+ resource = {
+ "kind": "bigquery#getServiceAccountResponse",
+ "email": "bq-123@bigquery-encryption.iam.gserviceaccount.com",
+ }
+ api_request_patcher = mock.patch.object(
+ client._connection,
+ "api_request",
+ side_effect=[ValueError, resource],
+ )
+
+ retry = DEFAULT_RETRY.with_deadline(1).with_predicate(
+ lambda exc: isinstance(exc, ValueError)
+ )
+
+ with api_request_patcher as fake_api_request:
+ with mock.patch(
+ "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes"
+ ) as final_attributes:
+ service_account_email = client.get_service_account_email(
+ retry=retry, timeout=7.5
+ )
+
+ final_attributes.assert_called_once_with({"path": api_path}, client, None)
+ assert service_account_email == "bq-123@bigquery-encryption.iam.gserviceaccount.com"
+ assert fake_api_request.call_args_list == [
+ mock.call(method="GET", path=api_path, timeout=7.5),
+ mock.call(method="GET", path=api_path, timeout=7.5), # was retried once
+ ]
+
+
+def test_call_api_applying_custom_retry_on_timeout(global_time_lock):
+ from concurrent.futures import TimeoutError
+
+ creds = _make_credentials()
+ client = _make_client(project=PROJECT, credentials=creds)
+
+ api_request_patcher = mock.patch.object(
+ client._connection,
+ "api_request",
+ side_effect=[TimeoutError, "result"],
+ )
+ retry = DEFAULT_RETRY.with_deadline(1).with_predicate(
+ lambda exc: isinstance(exc, TimeoutError)
+ )
+
+ with api_request_patcher as fake_api_request:
+ result = client._call_api(retry, foo="bar")
+
+ assert result == "result"
+ assert fake_api_request.call_args_list == [
+ mock.call(foo="bar"),
+ mock.call(foo="bar"),
+ ]
+
+
+def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404(
+ global_time_lock,
+):
+ """Regression test for https://github.com/googleapis/python-bigquery/issues/2134
+
+ Sometimes after a Conflict, the fetch fails with a 404, but we know
+ because of the conflict that really the job does exist. Retry until we
+ get the job status (or timeout).
+ """
+ job_id = "abc123"
+ creds = _make_credentials()
+ http = object()
+ client = _make_client(project=PROJECT, credentials=creds, _http=http)
+ conn = client._connection = make_connection(
+ # We're mocking QueryJob._begin, so this is only going to be
+ # jobs.get requests and responses.
+ google.api_core.exceptions.TooManyRequests("this is retriable by default"),
+ google.api_core.exceptions.NotFound("we lost your job"),
+ google.api_core.exceptions.NotFound("we lost your job again, sorry"),
+ {
+ "jobReference": {
+ "projectId": PROJECT,
+ "location": "TESTLOC",
+ "jobId": job_id,
+ }
+ },
+ )
+
+ job_create_error = google.api_core.exceptions.Conflict("Job already exists.")
+ job_begin_patcher = mock.patch.object(
+ bqjob.QueryJob, "_begin", side_effect=job_create_error
+ )
+ job_id_patcher = mock.patch.object(
+ google.cloud.bigquery._job_helpers,
+ "make_job_id",
+ return_value=job_id,
+ )
+
+ with job_begin_patcher, job_id_patcher:
+ # If get job request fails there does exist a job
+ # with this ID already, retry 404 until we get it (or fails for a
+ # non-retriable reason, see other tests).
+ result = client.query("SELECT 1;", job_id=None)
+
+ jobs_get_path = mock.call(
+ method="GET",
+ path=f"/projects/{PROJECT}/jobs/{job_id}",
+ query_params={
+ "projection": "full",
+ },
+ timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
+ )
+ conn.api_request.assert_has_calls(
+ # Double-check that it was jobs.get that was called for each of our
+ # mocked responses.
+ [jobs_get_path]
+ * 4,
+ )
+ assert result.job_id == job_id
+
+
+def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert(
+ global_time_lock,
+):
+ """Regression test for https://github.com/googleapis/python-bigquery/issues/2134
+
+ Sometimes after a Conflict, the fetch fails with a 404. If it keeps
+ failing with a 404, assume that the job actually doesn't exist.
+ """
+ job_id_1 = "abc123"
+ job_id_2 = "xyz789"
+ creds = _make_credentials()
+ http = object()
+ client = _make_client(project=PROJECT, credentials=creds, _http=http)
+
+ # We're mocking QueryJob._begin, so that the connection should only get
+ # jobs.get requests.
+ job_create_error = google.api_core.exceptions.Conflict("Job already exists.")
+ job_begin_patcher = mock.patch.object(
+ bqjob.QueryJob, "_begin", side_effect=job_create_error
+ )
+ conn = client._connection = make_connection(
+ google.api_core.exceptions.NotFound("we lost your job again, sorry"),
+ {
+ "jobReference": {
+ "projectId": PROJECT,
+ "location": "TESTLOC",
+ "jobId": job_id_2,
+ }
+ },
+ )
+
+ # Choose a small deadline so the 404 retries give up.
+ retry = google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1)
+ job_id_patcher = mock.patch.object(
+ google.cloud.bigquery._job_helpers,
+ "make_job_id",
+ side_effect=[job_id_1, job_id_2],
+ )
+ retry_patcher = mock.patch.object(
+ google.cloud.bigquery.retry,
+ "_DEFAULT_GET_JOB_CONFLICT_RETRY",
+ retry,
+ )
+
+ with freezegun.freeze_time(
+ "2025-01-01 00:00:00",
+ # 10x the retry deadline to guarantee a timeout.
+ auto_tick_seconds=10,
+ ), job_begin_patcher, job_id_patcher, retry_patcher:
+ # If get job request fails there does exist a job
+ # with this ID already, retry 404 until we get it (or fails for a
+ # non-retriable reason, see other tests).
+ result = client.query("SELECT 1;", job_id=None)
+
+ jobs_get_path_1 = mock.call(
+ method="GET",
+ path=f"/projects/{PROJECT}/jobs/{job_id_1}",
+ query_params={
+ "projection": "full",
+ },
+ timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
+ )
+ jobs_get_path_2 = mock.call(
+ method="GET",
+ path=f"/projects/{PROJECT}/jobs/{job_id_2}",
+ query_params={
+ "projection": "full",
+ },
+ timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
+ )
+ conn.api_request.assert_has_calls(
+ # Double-check that it was jobs.get that was called for each of our
+ # mocked responses.
+ [jobs_get_path_1, jobs_get_path_2],
+ )
+ assert result.job_id == job_id_2
+
+
+def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retry(global_time_lock):
+ """Regression test for https://github.com/googleapis/python-bigquery/issues/2134
+
+ If we get a 409 conflict on jobs.insert, and we are using a random
+ job ID, we should retry by getting the job by ID. This test ensures that
+ if the get job by ID fails, we retry the whole sequence.
+ """
+ from google.cloud.bigquery import job
+
+ client = _make_client(project=PROJECT, credentials=_make_credentials())
+ job_id = "some-random-job-id"
+ query_text = "SELECT 1"
+ job_config = job.QueryJobConfig()
+ job_config.use_legacy_sql = False
+
+ job_resource = {
+ "jobReference": {"projectId": PROJECT, "jobId": job_id},
+ "configuration": {"query": {"query": query_text}},
+ "status": {"state": "DONE"},
+ }
+
+ conn = make_connection(
+ # First attempt at jobs.insert fails with a 409
+ google.api_core.exceptions.Conflict("Job already exists."),
+ # First attempt at jobs.get fails with a 500
+ google.api_core.exceptions.InternalServerError("get job failed"),
+ # Second attempt at jobs.insert succeeds
+ job_resource,
+ )
+ client._connection = conn
+
+ job_id_patcher = mock.patch.object(
+ google.cloud.bigquery._job_helpers,
+ "make_job_id",
+ return_value=job_id,
+ )
+
+ with job_id_patcher:
+ query_job = client.query(query_text, job_config=job_config, job_id=None)
+
+ assert query_job.job_id == job_id
diff --git a/tests/unit/test_create_dataset.py b/tests/unit/test_create_dataset.py
index 81af52261..b144471ca 100644
--- a/tests/unit/test_create_dataset.py
+++ b/tests/unit/test_create_dataset.py
@@ -12,11 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from unittest import mock
+
from google.cloud.bigquery.dataset import Dataset, DatasetReference
from .helpers import make_connection, dataset_polymorphic, make_client
import google.cloud.bigquery.dataset
from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
-import mock
import pytest
@@ -63,6 +64,8 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
"datasetId": "starry-skies",
"tableId": "northern-hemisphere",
}
+ DEFAULT_ROUNDING_MODE = "ROUND_HALF_EVEN"
+ RESOURCE_TAGS = {"123456789012/foo": "bar"}
RESOURCE = {
"datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
"etag": "etag",
@@ -73,6 +76,8 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
"defaultTableExpirationMs": "3600",
"labels": LABELS,
"access": [{"role": "OWNER", "userByEmail": USER_EMAIL}, {"view": VIEW}],
+ "defaultRoundingMode": DEFAULT_ROUNDING_MODE,
+ "resourceTags": RESOURCE_TAGS,
}
conn = client._connection = make_connection(RESOURCE)
entries = [
@@ -88,8 +93,9 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
before.default_table_expiration_ms = 3600
before.location = LOCATION
before.labels = LABELS
+ before.resource_tags = RESOURCE_TAGS
+ before.default_rounding_mode = DEFAULT_ROUNDING_MODE
after = client.create_dataset(before)
-
assert after.dataset_id == DS_ID
assert after.project == PROJECT
assert after.etag == RESOURCE["etag"]
@@ -99,6 +105,8 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
assert after.location == LOCATION
assert after.default_table_expiration_ms == 3600
assert after.labels == LABELS
+ assert after.default_rounding_mode == DEFAULT_ROUNDING_MODE
+ assert after.resource_tags == RESOURCE_TAGS
conn.api_request.assert_called_once_with(
method="POST",
@@ -109,11 +117,13 @@ def test_create_dataset_w_attrs(client, PROJECT, DS_ID):
"friendlyName": FRIENDLY_NAME,
"location": LOCATION,
"defaultTableExpirationMs": "3600",
+ "defaultRoundingMode": DEFAULT_ROUNDING_MODE,
"access": [
{"role": "OWNER", "userByEmail": USER_EMAIL},
{"view": VIEW, "role": None},
],
"labels": LABELS,
+ "resourceTags": RESOURCE_TAGS,
},
timeout=DEFAULT_TIMEOUT,
)
@@ -362,6 +372,187 @@ def test_create_dataset_alreadyexists_w_exists_ok_true(PROJECT, DS_ID, LOCATION)
},
timeout=DEFAULT_TIMEOUT,
),
- mock.call(method="GET", path=get_path, timeout=DEFAULT_TIMEOUT),
+ mock.call(
+ method="GET",
+ path=get_path,
+ timeout=DEFAULT_TIMEOUT,
+ query_params={},
+ ),
]
)
+
+
+def test_create_dataset_with_default_rounding_mode_if_value_is_none(
+ PROJECT, DS_ID, LOCATION
+):
+ default_rounding_mode = None
+ path = "/projects/%s/datasets" % PROJECT
+ resource = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "etag": "etag",
+ "id": "{}:{}".format(PROJECT, DS_ID),
+ "location": LOCATION,
+ }
+ client = make_client(location=LOCATION)
+ conn = client._connection = make_connection(resource)
+
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ before = Dataset(ds_ref)
+ before.default_rounding_mode = default_rounding_mode
+ after = client.create_dataset(before)
+
+ assert after.dataset_id == DS_ID
+ assert after.project == PROJECT
+ assert after.default_rounding_mode is None
+
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path=path,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "labels": {},
+ "location": LOCATION,
+ "defaultRoundingMode": "ROUNDING_MODE_UNSPECIFIED",
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_create_dataset_with_default_rounding_mode_if_value_is_not_string(
+ PROJECT, DS_ID, LOCATION
+):
+ default_rounding_mode = 10
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ dataset = Dataset(ds_ref)
+ with pytest.raises(ValueError) as e:
+ dataset.default_rounding_mode = default_rounding_mode
+ assert str(e.value) == "Pass a string, or None"
+
+
+def test_create_dataset_with_default_rounding_mode_if_value_is_not_in_possible_values(
+ PROJECT, DS_ID
+):
+ default_rounding_mode = "ROUND_HALF_AWAY_FROM_ZEROS"
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ dataset = Dataset(ds_ref)
+ with pytest.raises(ValueError) as e:
+ dataset.default_rounding_mode = default_rounding_mode
+ assert (
+ str(e.value)
+ == "rounding mode needs to be one of ROUNDING_MODE_UNSPECIFIED,ROUND_HALF_AWAY_FROM_ZERO,ROUND_HALF_EVEN"
+ )
+
+
+def test_create_dataset_with_default_rounding_mode_if_value_is_in_possible_values(
+ PROJECT, DS_ID, LOCATION
+):
+ default_rounding_mode = "ROUND_HALF_AWAY_FROM_ZERO"
+ path = "/projects/%s/datasets" % PROJECT
+ resource = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "etag": "etag",
+ "id": "{}:{}".format(PROJECT, DS_ID),
+ "location": LOCATION,
+ }
+ client = make_client(location=LOCATION)
+ conn = client._connection = make_connection(resource)
+
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ before = Dataset(ds_ref)
+ before.default_rounding_mode = default_rounding_mode
+ after = client.create_dataset(before)
+
+ assert after.dataset_id == DS_ID
+ assert after.project == PROJECT
+ assert after.default_rounding_mode is None
+
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path=path,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "labels": {},
+ "location": LOCATION,
+ "defaultRoundingMode": default_rounding_mode,
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_create_dataset_with_max_time_travel_hours(PROJECT, DS_ID, LOCATION):
+ path = "/projects/%s/datasets" % PROJECT
+ max_time_travel_hours = 24 * 3
+
+ resource = {
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "etag": "etag",
+ "id": "{}:{}".format(PROJECT, DS_ID),
+ "location": LOCATION,
+ "maxTimeTravelHours": max_time_travel_hours,
+ }
+ client = make_client(location=LOCATION)
+ conn = client._connection = make_connection(resource)
+
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ before = Dataset(ds_ref)
+ before.max_time_travel_hours = max_time_travel_hours
+ after = client.create_dataset(before)
+ assert after.dataset_id == DS_ID
+ assert after.project == PROJECT
+ assert after.max_time_travel_hours == max_time_travel_hours
+
+ conn.api_request.assert_called_once_with(
+ method="POST",
+ path=path,
+ data={
+ "datasetReference": {"projectId": PROJECT, "datasetId": DS_ID},
+ "labels": {},
+ "location": LOCATION,
+ "maxTimeTravelHours": max_time_travel_hours,
+ },
+ timeout=DEFAULT_TIMEOUT,
+ )
+
+
+def test_create_dataset_with_max_time_travel_hours_not_multiple_of_24(
+ PROJECT, DS_ID, LOCATION
+):
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ dataset = Dataset(ds_ref)
+ with pytest.raises(ValueError) as e:
+ dataset.max_time_travel_hours = 50
+ assert str(e.value) == "Time Travel Window should be multiple of 24"
+
+
+def test_create_dataset_with_max_time_travel_hours_is_less_than_2_days(
+ PROJECT, DS_ID, LOCATION
+):
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ dataset = Dataset(ds_ref)
+ with pytest.raises(ValueError) as e:
+ dataset.max_time_travel_hours = 24
+ assert (
+ str(e.value)
+ == "Time Travel Window should be from 48 to 168 hours (2 to 7 days)"
+ )
+
+
+def test_create_dataset_with_max_time_travel_hours_is_greater_than_7_days(
+ PROJECT, DS_ID, LOCATION
+):
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ dataset = Dataset(ds_ref)
+ with pytest.raises(ValueError) as e:
+ dataset.max_time_travel_hours = 192
+ assert (
+ str(e.value)
+ == "Time Travel Window should be from 48 to 168 hours (2 to 7 days)"
+ )
+
+
+def test_create_dataset_with_max_time_travel_hours_is_not_int(PROJECT, DS_ID, LOCATION):
+ ds_ref = DatasetReference(PROJECT, DS_ID)
+ dataset = Dataset(ds_ref)
+ with pytest.raises(ValueError) as e:
+ dataset.max_time_travel_hours = "50"
+ assert str(e.value) == "max_time_travel_hours must be an integer. Got 50"
diff --git a/tests/unit/test_dataset.py b/tests/unit/test_dataset.py
index 856674daf..604e5ed2e 100644
--- a/tests/unit/test_dataset.py
+++ b/tests/unit/test_dataset.py
@@ -13,12 +13,13 @@
# limitations under the License.
import unittest
+from unittest import mock
-import mock
from google.cloud.bigquery.routine.routine import Routine, RoutineReference
import pytest
from google.cloud.bigquery.dataset import (
AccessEntry,
+ Condition,
Dataset,
DatasetReference,
Table,
@@ -152,6 +153,25 @@ def test_from_api_repr_w_unknown_entity_type(self):
exp_resource = entry.to_api_repr()
self.assertEqual(resource, exp_resource)
+ def test_from_api_repr_wo_role(self):
+ resource = {
+ "view": {
+ "projectId": "my-project",
+ "datasetId": "my_dataset",
+ "tableId": "my_table",
+ }
+ }
+ entry = self._get_target_class().from_api_repr(resource)
+ exp_entry = self._make_one(
+ role=None,
+ entity_type="view",
+ entity_id=resource["view"],
+ )
+
+ assert entry.entity_type == exp_entry.entity_type
+ assert entry.entity_id == exp_entry.entity_id
+ assert entry.role is None
+
def test_to_api_repr_w_extra_properties(self):
resource = {
"role": "READER",
@@ -162,15 +182,6 @@ def test_to_api_repr_w_extra_properties(self):
exp_resource = entry.to_api_repr()
self.assertEqual(resource, exp_resource)
- def test_from_api_repr_entries_w_extra_keys(self):
- resource = {
- "role": "READER",
- "specialGroup": "projectReaders",
- "userByEmail": "salmon@example.com",
- }
- with self.assertRaises(ValueError):
- self._get_target_class().from_api_repr(resource)
-
def test_view_getter_setter(self):
view = {
"projectId": "my_project",
@@ -290,7 +301,10 @@ def test_dataset_getter_setter_dataset_ref(self):
entry.dataset = dataset_ref
resource = entry.to_api_repr()
exp_resource = {
- "dataset": {"dataset": dataset_ref, "targetTypes": None},
+ "dataset": {
+ "dataset": {"datasetId": "my_dataset", "projectId": "my-project"},
+ "targetTypes": None,
+ },
"role": None,
}
self.assertEqual(resource, exp_resource)
@@ -477,6 +491,278 @@ def test_dataset_target_types_getter_setter_w_dataset(self):
self.assertEqual(entry.dataset_target_types, target_types)
+# --- Tests for AccessEntry when using Condition ---
+
+EXPRESSION = "request.time < timestamp('2026-01-01T00:00:00Z')"
+TITLE = "Expires end 2025"
+DESCRIPTION = "Access expires at the start of 2026."
+
+
+@pytest.fixture
+def condition_1():
+ """Provides a sample Condition object."""
+ return Condition(
+ expression=EXPRESSION,
+ title=TITLE,
+ description=DESCRIPTION,
+ )
+
+
+@pytest.fixture
+def condition_1_api_repr():
+ """Provides the API representation for condition_1."""
+ # Use the actual to_api_repr method
+ return Condition(
+ expression=EXPRESSION,
+ title=TITLE,
+ description=DESCRIPTION,
+ ).to_api_repr()
+
+
+@pytest.fixture
+def condition_2():
+ """Provides a second, different Condition object."""
+ return Condition(
+ expression="resource.name.startsWith('projects/_/buckets/restricted/')",
+ title="Restricted Buckets",
+ )
+
+
+@pytest.fixture
+def condition_2_api_repr():
+ """Provides the API representation for condition2."""
+ # Use the actual to_api_repr method
+ return Condition(
+ expression="resource.name.startsWith('projects/_/buckets/restricted/')",
+ title="Restricted Buckets",
+ ).to_api_repr()
+
+
+class TestAccessEntryAndCondition:
+ @staticmethod
+ def _get_target_class():
+ return AccessEntry
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ # Test __init__ without condition
+ def test_init_without_condition(self):
+ entry = AccessEntry("READER", "userByEmail", "test@example.com")
+ assert entry.role == "READER"
+ assert entry.entity_type == "userByEmail"
+ assert entry.entity_id == "test@example.com"
+ assert entry.condition is None
+ # Accessing _properties is for internal verification in tests
+ assert "condition" not in entry._properties
+
+ # Test __init__ with condition object
+ def test_init_with_condition_object(self, condition_1, condition_1_api_repr):
+ entry = AccessEntry(
+ "READER", "userByEmail", "test@example.com", condition=condition_1
+ )
+ assert entry.condition == condition_1
+ assert entry._properties.get("condition") == condition_1_api_repr
+
+ # Test __init__ with condition=None
+ def test_init_with_condition_none(self):
+ entry = AccessEntry("READER", "userByEmail", "test@example.com", condition=None)
+ assert entry.condition is None
+
+ # Test condition getter/setter
+ def test_condition_getter_setter(
+ self, condition_1, condition_1_api_repr, condition_2, condition_2_api_repr
+ ):
+ entry = AccessEntry("WRITER", "group", "admins@example.com")
+ assert entry.condition is None
+
+ # Set condition 1
+ entry.condition = condition_1
+ assert entry.condition.to_api_repr() == condition_1_api_repr
+ assert entry._properties.get("condition") == condition_1_api_repr
+
+ # Set condition 2
+ entry.condition = condition_2
+ assert entry.condition.to_api_repr() == condition_2_api_repr
+ assert entry._properties.get("condition") != condition_1_api_repr
+ assert entry._properties.get("condition") == condition_2.to_api_repr()
+
+ # Set back to None
+ entry.condition = None
+ assert entry.condition is None
+
+ # Set condition using a dict
+ entry.condition = condition_1_api_repr
+ assert entry._properties.get("condition") == condition_1_api_repr
+
+ # Test setter validation
+ def test_condition_setter_invalid_type(self):
+ entry = AccessEntry("READER", "domain", "example.com")
+ with pytest.raises(
+ TypeError, match="condition must be a Condition object, dict, or None"
+ ):
+ entry.condition = 123 # type: ignore
+
+ # Test equality/hash without condition
+ def test_equality_and_hash_without_condition(self):
+ entry1 = AccessEntry("OWNER", "specialGroup", "projectOwners")
+ entry2 = AccessEntry("OWNER", "specialGroup", "projectOwners")
+ entry3 = AccessEntry("WRITER", "specialGroup", "projectOwners")
+ assert entry1 == entry2
+ assert entry1 != entry3
+ assert hash(entry1) == hash(entry2)
+ assert hash(entry1) != hash(entry3) # Usually true
+
+ def test_equality_and_hash_from_api_repr(self):
+ """Compare equal entries where one was created via from_api_repr."""
+ entry1 = AccessEntry("OWNER", "specialGroup", "projectOwners")
+ entry2 = AccessEntry.from_api_repr(
+ {"role": "OWNER", "specialGroup": "projectOwners"}
+ )
+ assert entry1 == entry2
+ assert hash(entry1) == hash(entry2)
+
+ def test_equality_and_hash_with_condition(self, condition_1, condition_2):
+ cond1a = Condition(
+ condition_1.expression, condition_1.title, condition_1.description
+ )
+ cond1b = Condition(
+ condition_1.expression, condition_1.title, condition_1.description
+ ) # Same values, different object
+
+ entry1a = AccessEntry(
+ "READER", "userByEmail", "a@example.com", condition=cond1a
+ )
+ entry1b = AccessEntry(
+ "READER", "userByEmail", "a@example.com", condition=cond1b
+ ) # Different Condition instance
+ entry2 = AccessEntry(
+ "READER", "userByEmail", "a@example.com", condition=condition_2
+ )
+ entry3 = AccessEntry("READER", "userByEmail", "a@example.com") # No condition
+ entry4 = AccessEntry(
+ "WRITER", "userByEmail", "a@example.com", condition=cond1a
+ ) # Different role
+
+ assert entry1a == entry1b
+ assert entry1a != entry2
+ assert entry1a != entry3
+ assert entry1a != entry4
+ assert entry2 != entry3
+
+ assert hash(entry1a) == hash(entry1b)
+ assert hash(entry1a) != hash(entry2) # Usually true
+ assert hash(entry1a) != hash(entry3) # Usually true
+ assert hash(entry1a) != hash(entry4) # Usually true
+
+ # Test to_api_repr with condition
+ def test_to_api_repr_with_condition(self, condition_1, condition_1_api_repr):
+ entry = AccessEntry(
+ "WRITER", "groupByEmail", "editors@example.com", condition=condition_1
+ )
+ expected_repr = {
+ "role": "WRITER",
+ "groupByEmail": "editors@example.com",
+ "condition": condition_1_api_repr,
+ }
+ assert entry.to_api_repr() == expected_repr
+
+ def test_view_property_with_condition(self, condition_1):
+ """Test setting/getting view property when condition is present."""
+ entry = AccessEntry(role=None, entity_type="view", condition=condition_1)
+ view_ref = TableReference(DatasetReference("proj", "dset"), "view_tbl")
+ entry.view = view_ref # Use the setter
+ assert entry.view == view_ref
+ assert entry.condition == condition_1 # Condition should persist
+ assert entry.role is None
+ assert entry.entity_type == "view"
+
+ # Check internal representation
+ assert "view" in entry._properties
+ assert "condition" in entry._properties
+
+ def test_user_by_email_property_with_condition(self, condition_1):
+ """Test setting/getting user_by_email property when condition is present."""
+ entry = AccessEntry(
+ role="READER", entity_type="userByEmail", condition=condition_1
+ )
+ email = "test@example.com"
+ entry.user_by_email = email # Use the setter
+ assert entry.user_by_email == email
+ assert entry.condition == condition_1 # Condition should persist
+ assert entry.role == "READER"
+ assert entry.entity_type == "userByEmail"
+
+ # Check internal representation
+ assert "userByEmail" in entry._properties
+ assert "condition" in entry._properties
+
+ # Test from_api_repr without condition
+ def test_from_api_repr_without_condition(self):
+ api_repr = {"role": "OWNER", "userByEmail": "owner@example.com"}
+ entry = AccessEntry.from_api_repr(api_repr)
+ assert entry.role == "OWNER"
+ assert entry.entity_type == "userByEmail"
+ assert entry.entity_id == "owner@example.com"
+ assert entry.condition is None
+
+ # Test from_api_repr with condition
+ def test_from_api_repr_with_condition(self, condition_1, condition_1_api_repr):
+ api_repr = {
+ "role": "READER",
+ "view": {"projectId": "p", "datasetId": "d", "tableId": "v"},
+ "condition": condition_1_api_repr,
+ }
+ entry = AccessEntry.from_api_repr(api_repr)
+ assert entry.role == "READER"
+ assert entry.entity_type == "view"
+ # The entity_id for view/routine/dataset is the dict itself
+ assert entry.entity_id == {"projectId": "p", "datasetId": "d", "tableId": "v"}
+ assert entry.condition == condition_1
+
+ # Test from_api_repr edge case
+ def test_from_api_repr_no_entity(self, condition_1, condition_1_api_repr):
+ api_repr = {"role": "READER", "condition": condition_1_api_repr}
+ entry = AccessEntry.from_api_repr(api_repr)
+ assert entry.role == "READER"
+ assert entry.entity_type is None
+ assert entry.entity_id is None
+ assert entry.condition == condition_1
+
+ def test_dataset_property_with_condition(self, condition_1):
+ project = "my-project"
+ dataset_id = "my_dataset"
+ dataset_ref = DatasetReference(project, dataset_id)
+ entry = self._make_one(None)
+ entry.dataset = dataset_ref
+ entry.condition = condition_1
+
+ resource = entry.to_api_repr()
+ exp_resource = {
+ "role": None,
+ "dataset": {
+ "dataset": {"datasetId": "my_dataset", "projectId": "my-project"},
+ "targetTypes": None,
+ },
+ "condition": {
+ "expression": "request.time < timestamp('2026-01-01T00:00:00Z')",
+ "title": "Expires end 2025",
+ "description": "Access expires at the start of 2026.",
+ },
+ }
+ assert resource == exp_resource
+ # Check internal representation
+ assert "dataset" in entry._properties
+ assert "condition" in entry._properties
+
+ def test_repr_from_api_repr(self):
+ """Check that repr() includes the correct entity_type when the object is initialized from a dictionary."""
+ api_repr = {"role": "OWNER", "userByEmail": "owner@example.com"}
+ entry = AccessEntry.from_api_repr(api_repr)
+ entry_str = repr(entry)
+ assert entry_str == ""
+
+
class TestDatasetReference(unittest.TestCase):
@staticmethod
def _get_target_class():
@@ -622,6 +908,10 @@ def test___repr__(self):
expected = "DatasetReference('project1', 'dataset1')"
self.assertEqual(repr(dataset), expected)
+ def test___str__(self):
+ dataset = self._make_one("project1", "dataset1")
+ self.assertEqual(str(dataset), "project1.dataset1")
+
class TestDataset(unittest.TestCase):
from google.cloud.bigquery.dataset import DatasetReference
@@ -630,6 +920,16 @@ class TestDataset(unittest.TestCase):
DS_ID = "dataset-id"
DS_REF = DatasetReference(PROJECT, DS_ID)
KMS_KEY_NAME = "projects/1/locations/us/keyRings/1/cryptoKeys/1"
+ DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path"
+ PARAMETERS = {"key": "value"}
+ API_REPR = {
+ "datasetReference": {"projectId": "project", "datasetId": "dataset-id"},
+ "labels": {},
+ "externalCatalogDatasetOptions": {
+ "defaultStorageLocationUri": DEFAULT_STORAGE_LOCATION_URI,
+ "parameters": PARAMETERS,
+ },
+ }
@staticmethod
def _get_target_class():
@@ -645,7 +945,9 @@ def _setUpConstants(self):
from google.cloud._helpers import UTC
self.WHEN_TS = 1437767599.006
- self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC)
+ self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace(
+ tzinfo=UTC
+ )
self.ETAG = "ETAG"
self.DS_FULL_ID = "%s:%s" % (self.PROJECT, self.DS_ID)
self.RESOURCE_URL = "http://example.com/path/to/resource"
@@ -663,6 +965,7 @@ def _make_resource(self):
"location": "US",
"selfLink": self.RESOURCE_URL,
"defaultTableExpirationMs": 3600,
+ "storageBillingModel": "LOGICAL",
"access": [
{"role": "OWNER", "userByEmail": USER_EMAIL},
{"role": "OWNER", "groupByEmail": GROUP_EMAIL},
@@ -688,7 +991,6 @@ def _verify_access_entry(self, access_entries, resource):
self.assertEqual(a_entry.entity_id, r_entry["entity_id"])
def _verify_readonly_resource_properties(self, dataset, resource):
-
self.assertEqual(dataset.project, self.PROJECT)
self.assertEqual(dataset.dataset_id, self.DS_ID)
self.assertEqual(dataset.reference.project, self.PROJECT)
@@ -712,7 +1014,6 @@ def _verify_readonly_resource_properties(self, dataset, resource):
self.assertIsNone(dataset.self_link)
def _verify_resource_properties(self, dataset, resource):
-
self._verify_readonly_resource_properties(dataset, resource)
if "defaultTableExpirationMs" in resource:
@@ -725,6 +1026,9 @@ def _verify_resource_properties(self, dataset, resource):
self.assertEqual(dataset.description, resource.get("description"))
self.assertEqual(dataset.friendly_name, resource.get("friendlyName"))
self.assertEqual(dataset.location, resource.get("location"))
+ self.assertEqual(
+ dataset.is_case_insensitive, resource.get("isCaseInsensitive") or False
+ )
if "defaultEncryptionConfiguration" in resource:
self.assertEqual(
dataset.default_encryption_configuration.kms_key_name,
@@ -732,7 +1036,12 @@ def _verify_resource_properties(self, dataset, resource):
)
else:
self.assertIsNone(dataset.default_encryption_configuration)
-
+ if "storageBillingModel" in resource:
+ self.assertEqual(
+ dataset.storage_billing_model, resource.get("storageBillingModel")
+ )
+ else:
+ self.assertIsNone(dataset.storage_billing_model)
if "access" in resource:
self._verify_access_entry(dataset.access_entries, resource)
else:
@@ -757,6 +1066,8 @@ def test_ctor_defaults(self):
self.assertIsNone(dataset.description)
self.assertIsNone(dataset.friendly_name)
self.assertIsNone(dataset.location)
+ self.assertEqual(dataset.is_case_insensitive, False)
+ self.assertIsNone(dataset.access_policy_version)
def test_ctor_string(self):
dataset = self._make_one("some-project.some_dset")
@@ -782,7 +1093,15 @@ def test_ctor_explicit(self):
self.assertEqual(
dataset.path, "/projects/%s/datasets/%s" % (OTHER_PROJECT, self.DS_ID)
)
- self.assertEqual(dataset.access_entries, entries)
+ # creating a list of entries relies on AccessEntry.from_api_repr
+ # which does not create an object in exactly the same way as calling the
+ # class directly. We rely on calls to .entity_type and .entity_id to
+ # finalize the settings on each class.
+ entry_pairs = zip(dataset.access_entries, entries)
+ for pair in entry_pairs:
+ assert pair[0].role == pair[1].role
+ assert pair[0].entity_type == pair[1].entity_type
+ assert pair[0].entity_id == pair[1].entity_id
self.assertIsNone(dataset.created)
self.assertIsNone(dataset.full_dataset_id)
@@ -794,6 +1113,35 @@ def test_ctor_explicit(self):
self.assertIsNone(dataset.description)
self.assertIsNone(dataset.friendly_name)
self.assertIsNone(dataset.location)
+ self.assertEqual(dataset.is_case_insensitive, False)
+
+ def test_access_entries_getter_from_api_repr(self):
+ """Check that `in` works correctly when Dataset is made via from_api_repr()."""
+ from google.cloud.bigquery.dataset import AccessEntry
+
+ dataset = self._get_target_class().from_api_repr(
+ {
+ "datasetReference": {"projectId": "my-proj", "datasetId": "my_dset"},
+ "access": [
+ {
+ "role": "OWNER",
+ "userByEmail": "uilma@example.com",
+ },
+ {
+ "role": "READER",
+ "groupByEmail": "rhubbles@example.com",
+ },
+ ],
+ }
+ )
+ assert (
+ AccessEntry("OWNER", "userByEmail", "uilma@example.com")
+ in dataset.access_entries
+ )
+ assert (
+ AccessEntry("READER", "groupByEmail", "rhubbles@example.com")
+ in dataset.access_entries
+ )
def test_access_entries_setter_non_list(self):
dataset = self._make_one(self.DS_REF)
@@ -814,8 +1162,18 @@ def test_access_entries_setter(self):
dataset = self._make_one(self.DS_REF)
phred = AccessEntry("OWNER", "userByEmail", "phred@example.com")
bharney = AccessEntry("OWNER", "userByEmail", "bharney@example.com")
- dataset.access_entries = [phred, bharney]
- self.assertEqual(dataset.access_entries, [phred, bharney])
+ entries = [phred, bharney]
+ dataset.access_entries = entries
+
+ # creating a list of entries relies on AccessEntry.from_api_repr
+ # which does not create an object in exactly the same way as calling the
+ # class directly. We rely on calls to .entity_type and .entity_id to
+ # finalize the settings on each class.
+ entry_pairs = zip(dataset.access_entries, entries)
+ for pair in entry_pairs:
+ assert pair[0].role == pair[1].role
+ assert pair[0].entity_type == pair[1].entity_type
+ assert pair[0].entity_id == pair[1].entity_id
def test_default_partition_expiration_ms(self):
dataset = self._make_one("proj.dset")
@@ -865,6 +1223,28 @@ def test_location_setter(self):
dataset.location = "LOCATION"
self.assertEqual(dataset.location, "LOCATION")
+ def test_resource_tags_update_in_place(self):
+ dataset = self._make_one(self.DS_REF)
+ tags = dataset.resource_tags
+ tags["123456789012/foo"] = "bar" # update in place
+ self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"})
+
+ def test_resource_tags_setter(self):
+ dataset = self._make_one(self.DS_REF)
+ dataset.resource_tags = {"123456789012/foo": "bar"}
+ self.assertEqual(dataset.resource_tags, {"123456789012/foo": "bar"})
+
+ def test_resource_tags_setter_bad_value(self):
+ dataset = self._make_one(self.DS_REF)
+ with self.assertRaises(ValueError):
+ dataset.resource_tags = "invalid"
+ with self.assertRaises(ValueError):
+ dataset.resource_tags = 123
+
+ def test_resource_tags_getter_missing_value(self):
+ dataset = self._make_one(self.DS_REF)
+ self.assertEqual(dataset.resource_tags, {})
+
def test_labels_update_in_place(self):
dataset = self._make_one(self.DS_REF)
del dataset._properties["labels"] # don't start w/ existing dict
@@ -886,6 +1266,26 @@ def test_labels_getter_missing_value(self):
dataset = self._make_one(self.DS_REF)
self.assertEqual(dataset.labels, {})
+ def test_is_case_insensitive_setter_bad_value(self):
+ dataset = self._make_one(self.DS_REF)
+ with self.assertRaises(ValueError):
+ dataset.is_case_insensitive = 0
+
+ def test_is_case_insensitive_setter_true(self):
+ dataset = self._make_one(self.DS_REF)
+ dataset.is_case_insensitive = True
+ self.assertEqual(dataset.is_case_insensitive, True)
+
+ def test_is_case_insensitive_setter_none(self):
+ dataset = self._make_one(self.DS_REF)
+ dataset.is_case_insensitive = None
+ self.assertEqual(dataset.is_case_insensitive, False)
+
+ def test_is_case_insensitive_setter_false(self):
+ dataset = self._make_one(self.DS_REF)
+ dataset.is_case_insensitive = False
+ self.assertEqual(dataset.is_case_insensitive, False)
+
def test_from_api_repr_missing_identity(self):
self._setUpConstants()
RESOURCE = {}
@@ -937,6 +1337,23 @@ def test_default_encryption_configuration_setter(self):
dataset.default_encryption_configuration = None
self.assertIsNone(dataset.default_encryption_configuration)
+ def test_storage_billing_model_setter(self):
+ dataset = self._make_one(self.DS_REF)
+ dataset.storage_billing_model = "PHYSICAL"
+ self.assertEqual(dataset.storage_billing_model, "PHYSICAL")
+
+ def test_storage_billing_model_setter_with_none(self):
+ dataset = self._make_one(self.DS_REF)
+ dataset.storage_billing_model = None
+ self.assertIsNone(dataset.storage_billing_model)
+
+ def test_storage_billing_model_setter_with_invalid_type(self):
+ dataset = self._make_one(self.DS_REF)
+ with self.assertRaises(ValueError) as raises:
+ dataset.storage_billing_model = object()
+
+ self.assertIn("storage_billing_model", str(raises.exception))
+
def test_from_string(self):
cls = self._get_target_class()
got = cls.from_string("string-project.string_dataset")
@@ -979,6 +1396,109 @@ def test___repr__(self):
expected = "Dataset(DatasetReference('project1', 'dataset1'))"
self.assertEqual(repr(dataset), expected)
+ def test_external_catalog_dataset_options_setter(self):
+ # GIVEN the parameters DEFAULT_STORAGE_LOCATION_URI and PARAMETERS
+ # WHEN an ExternalCatalogDatasetOptions obj is created
+ # and added to a dataset.
+ # THEN the api representation of the dataset will match API_REPR
+
+ from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions
+
+ dataset = self._make_one(self.DS_REF)
+
+ ecdo_obj = ExternalCatalogDatasetOptions(
+ default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
+ parameters=self.PARAMETERS,
+ )
+ dataset.external_catalog_dataset_options = ecdo_obj
+
+ result = dataset.to_api_repr()
+ expected = self.API_REPR
+ assert result == expected
+
+ def test_external_catalog_dataset_options_getter_prop_exists(self):
+ # GIVEN default dataset PLUS an ExternalCatalogDatasetOptions
+ # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions
+ # matches the api_repr of the external_catalog_dataset_options attribute.
+
+ from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions
+
+ dataset = self._make_one(self.DS_REF)
+ ecdo_obj = ExternalCatalogDatasetOptions(
+ default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
+ parameters=self.PARAMETERS,
+ )
+ dataset.external_catalog_dataset_options = ecdo_obj
+ result = dataset.external_catalog_dataset_options.to_api_repr()
+ expected = ecdo_obj.to_api_repr()
+ assert result == expected
+
+ def test_external_catalog_dataset_options_getter_prop_is_none(self):
+ # GIVEN only a default dataset
+ # THEN confirm that external_catalog_dataset_options is None
+
+ dataset = self._make_one(self.DS_REF)
+ expected = None
+ result = dataset.external_catalog_dataset_options
+ assert result == expected
+
+ def test_external_catalog_dataset_options_from_api_repr(self):
+ # GIVEN default dataset including an ExternalCatalogDatasetOptions
+ # THEN confirm that the api_repr of the ExternalCatalogDatasetsOptions
+ # on a dataset object created via from_api_repr matches the api_repr
+ # of the "externalCatalogDatasetOptions" key.
+
+ api_repr = self.API_REPR
+ klass = self._get_target_class()
+ dataset = klass.from_api_repr(api_repr)
+
+ result = dataset.external_catalog_dataset_options.to_api_repr()
+ expected = api_repr["externalCatalogDatasetOptions"]
+ assert result == expected
+
+ def test_external_catalog_dataset_options_to_api_repr(self):
+ # GIVEN a dataset api_repr including an ExternalCatalogDatasetOptions key
+ # THEN confirm that the api_repr of that key from a dataset object created
+ # via the to_api_repr() method matches the value of the key
+ # used to create the dataset object
+
+ api_repr = self.API_REPR
+ klass = self._get_target_class()
+ dataset = klass.from_api_repr(api_repr)
+
+ result = dataset.to_api_repr()["externalCatalogDatasetOptions"]
+ expected = api_repr["externalCatalogDatasetOptions"]
+ assert result == expected
+
+ def test_access_policy_version_valid_input(self):
+ dataset = self._make_one(self.DS_REF)
+ # Valid inputs for access_policy_version are currently
+ # ints 1, 2, 3, and None
+ # We rely upon the BQ backend to validate acceptable integer
+ # values, rather than perform that validation in the client.
+ for expected in [1, 2, 3, None]:
+ # set property using setter and integer
+ dataset.access_policy_version = expected
+
+ # check getter and _properties dict
+ assert (
+ dataset.access_policy_version == expected
+ ), f"Expected {expected} but got {dataset.access_policy_version}"
+ assert dataset._properties["accessPolicyVersion"] == expected
+
+ def test_access_policy_version_invalid_input(self):
+ dataset = self._make_one(self.DS_REF)
+ # Valid inputs for access_policy_version are currently
+ # ints 1, 2, 3, and None
+
+ with pytest.raises(ValueError):
+ invalid_value = "a string"
+ dataset.access_policy_version = invalid_value
+
+ with pytest.raises(ValueError):
+ invalid_value = 42.0
+ dataset.access_policy_version = invalid_value
+
class TestDatasetListItem(unittest.TestCase):
@staticmethod
@@ -1056,3 +1576,261 @@ def test_table(self):
self.assertEqual(table.table_id, "table_id")
self.assertEqual(table.dataset_id, dataset_id)
self.assertEqual(table.project, project)
+
+
+class TestCondition:
+ EXPRESSION = 'resource.name.startsWith("projects/my-project/instances/")'
+ TITLE = "Instance Access"
+ DESCRIPTION = "Access to instances in my-project"
+
+ @pytest.fixture
+ def condition_instance(self):
+ """Provides a Condition instance for tests."""
+ return Condition(
+ expression=self.EXPRESSION,
+ title=self.TITLE,
+ description=self.DESCRIPTION,
+ )
+
+ @pytest.fixture
+ def condition_api_repr(self):
+ """Provides the API representation for the test Condition."""
+ return {
+ "expression": self.EXPRESSION,
+ "title": self.TITLE,
+ "description": self.DESCRIPTION,
+ }
+
+ # --- Basic Functionality Tests ---
+
+ def test_constructor_and_getters_full(self, condition_instance):
+ """Test initialization with all arguments and subsequent attribute access."""
+ assert condition_instance.expression == self.EXPRESSION
+ assert condition_instance.title == self.TITLE
+ assert condition_instance.description == self.DESCRIPTION
+
+ def test_constructor_and_getters_minimal(self):
+ """Test initialization with only the required expression."""
+ condition = Condition(expression=self.EXPRESSION)
+ assert condition.expression == self.EXPRESSION
+ assert condition.title is None
+ assert condition.description is None
+
+ def test_setters(self, condition_instance):
+ """Test setting attributes after initialization."""
+ new_title = "New Title"
+ new_desc = "New Description"
+ new_expr = "request.time < timestamp('2024-01-01T00:00:00Z')"
+
+ condition_instance.title = new_title
+ assert condition_instance.title == new_title
+
+ condition_instance.description = new_desc
+ assert condition_instance.description == new_desc
+
+ condition_instance.expression = new_expr
+ assert condition_instance.expression == new_expr
+
+ # Test setting title and description to empty strings
+ condition_instance.title = ""
+ assert condition_instance.title == ""
+
+ condition_instance.description = ""
+ assert condition_instance.description == ""
+
+ # Test setting optional fields back to None
+ condition_instance.title = None
+ assert condition_instance.title is None
+ condition_instance.description = None
+ assert condition_instance.description is None
+
+ # --- API Representation Tests ---
+
+ def test_to_api_repr_full(self, condition_instance, condition_api_repr):
+ """Test converting a fully populated Condition to API representation."""
+ api_repr = condition_instance.to_api_repr()
+ assert api_repr == condition_api_repr
+
+ def test_to_api_repr_minimal(self):
+ """Test converting a minimally populated Condition to API representation."""
+ condition = Condition(expression=self.EXPRESSION)
+ expected_api_repr = {
+ "expression": self.EXPRESSION,
+ "title": None,
+ "description": None,
+ }
+ api_repr = condition.to_api_repr()
+ assert api_repr == expected_api_repr
+
+ def test_from_api_repr_full(self, condition_api_repr):
+ """Test creating a Condition from a full API representation."""
+ condition = Condition.from_api_repr(condition_api_repr)
+ assert condition.expression == self.EXPRESSION
+ assert condition.title == self.TITLE
+ assert condition.description == self.DESCRIPTION
+
+ def test_from_api_repr_minimal(self):
+ """Test creating a Condition from a minimal API representation."""
+ minimal_repr = {"expression": self.EXPRESSION}
+ condition = Condition.from_api_repr(minimal_repr)
+ assert condition.expression == self.EXPRESSION
+ assert condition.title is None
+ assert condition.description is None
+
+ def test_from_api_repr_with_extra_fields(self):
+ """Test creating a Condition from an API repr with unexpected fields."""
+ api_repr = {
+ "expression": self.EXPRESSION,
+ "title": self.TITLE,
+ "unexpected_field": "some_value",
+ }
+ condition = Condition.from_api_repr(api_repr)
+ assert condition.expression == self.EXPRESSION
+ assert condition.title == self.TITLE
+ assert condition.description is None
+ # Check that the extra field didn't get added to internal properties
+ assert "unexpected_field" not in condition._properties
+
+ # # --- Validation Tests ---
+
+ @pytest.mark.parametrize(
+ "kwargs, error_msg",
+ [
+ ({"expression": None}, "Pass a non-empty string for expression"), # type: ignore
+ ({"expression": ""}, "expression cannot be an empty string"),
+ ({"expression": 123}, "Pass a non-empty string for expression"), # type: ignore
+ ({"expression": EXPRESSION, "title": 123}, "Pass a string for title, or None"), # type: ignore
+ ({"expression": EXPRESSION, "description": False}, "Pass a string for description, or None"), # type: ignore
+ ],
+ )
+ def test_validation_init(self, kwargs, error_msg):
+ """Test validation during __init__."""
+ with pytest.raises(ValueError, match=error_msg):
+ Condition(**kwargs)
+
+ @pytest.mark.parametrize(
+ "attribute, value, error_msg",
+ [
+ ("expression", None, "Pass a non-empty string for expression"), # type: ignore
+ ("expression", "", "expression cannot be an empty string"),
+ ("expression", 123, "Pass a non-empty string for expression"), # type: ignore
+ ("title", 123, "Pass a string for title, or None"), # type: ignore
+ ("description", [], "Pass a string for description, or None"), # type: ignore
+ ],
+ )
+ def test_validation_setters(self, condition_instance, attribute, value, error_msg):
+ """Test validation via setters."""
+ with pytest.raises(ValueError, match=error_msg):
+ setattr(condition_instance, attribute, value)
+
+ def test_validation_expression_required_from_api(self):
+ """Test ValueError is raised if expression is missing in from_api_repr."""
+ api_repr = {"title": self.TITLE}
+ with pytest.raises(
+ ValueError, match="API representation missing required 'expression' field."
+ ):
+ Condition.from_api_repr(api_repr)
+
+ def test___eq___equality(self, condition_1):
+ result = condition_1
+ expected = condition_1
+ assert result == expected
+
+ def test___eq___equality_not_condition(self, condition_1):
+ result = condition_1
+ other = "not a condition"
+ expected = result.__eq__(other)
+ assert expected is NotImplemented
+
+ def test__ne__not_equality(self):
+ result = condition_1
+ expected = condition_2
+ assert result != expected
+
+ def test__hash__function(self, condition_2):
+ cond1 = Condition(
+ expression=self.EXPRESSION, title=self.TITLE, description=self.DESCRIPTION
+ )
+ cond2 = cond1
+ cond_not_equal = condition_2
+ assert cond1 == cond2
+ assert cond1 is cond2
+ assert hash(cond1) == hash(cond2)
+ assert hash(cond1) is not None
+ assert cond_not_equal != cond1
+ assert hash(cond_not_equal) != hash(cond1)
+
+ def test__hash__with_minimal_inputs(self):
+ cond1 = Condition(
+ expression="example",
+ title=None,
+ description=None,
+ )
+ assert hash(cond1) is not None
+
+ def test_access_entry_view_equality(self):
+ from google.cloud import bigquery
+
+ entry1 = bigquery.dataset.AccessEntry(
+ entity_type="view",
+ entity_id={
+ "projectId": "my_project",
+ "datasetId": "my_dataset",
+ "tableId": "my_table",
+ },
+ )
+ entry2 = bigquery.dataset.AccessEntry.from_api_repr(
+ {
+ "view": {
+ "projectId": "my_project",
+ "datasetId": "my_dataset",
+ "tableId": "my_table",
+ }
+ }
+ )
+
+ entry3 = bigquery.dataset.AccessEntry(
+ entity_type="routine",
+ entity_id={
+ "projectId": "my_project",
+ "datasetId": "my_dataset",
+ "routineId": "my_routine",
+ },
+ )
+
+ entry4 = bigquery.dataset.AccessEntry.from_api_repr(
+ {
+ "routine": {
+ "projectId": "my_project",
+ "datasetId": "my_dataset",
+ "routineId": "my_routine",
+ }
+ }
+ )
+
+ entry5 = bigquery.dataset.AccessEntry(
+ entity_type="dataset",
+ entity_id={
+ "dataset": {
+ "projectId": "my_project",
+ "datasetId": "my_dataset",
+ },
+ "target_types": "VIEWS",
+ },
+ )
+
+ entry6 = bigquery.dataset.AccessEntry.from_api_repr(
+ {
+ "dataset": {
+ "dataset": {
+ "projectId": "my_project",
+ "datasetId": "my_dataset",
+ },
+ "target_types": "VIEWS",
+ }
+ }
+ )
+
+ assert entry1 == entry2
+ assert entry3 == entry4
+ assert entry5 == entry6
diff --git a/tests/unit/test_dbapi__helpers.py b/tests/unit/test_dbapi__helpers.py
index 7cc1f11c3..9907df97b 100644
--- a/tests/unit/test_dbapi__helpers.py
+++ b/tests/unit/test_dbapi__helpers.py
@@ -25,7 +25,6 @@
from google.cloud.bigquery import query, table
from google.cloud.bigquery.dbapi import _helpers
from google.cloud.bigquery.dbapi import exceptions
-from tests.unit.helpers import _to_pyarrow
class TestQueryParameters(unittest.TestCase):
@@ -211,6 +210,10 @@ def test_empty_iterable(self):
self.assertEqual(list(result), [])
def test_non_empty_iterable(self):
+ pytest.importorskip("numpy")
+ pytest.importorskip("pyarrow")
+ from tests.unit.helpers import _to_pyarrow
+
rows_iterable = [
dict(
one=_to_pyarrow(1.1),
@@ -249,7 +252,6 @@ def test_non_empty_iterable(self):
class TestRaiseOnClosedDecorator(unittest.TestCase):
def _make_class(self):
class Foo(object):
-
class_member = "class member"
def __init__(self):
diff --git a/tests/unit/test_dbapi_connection.py b/tests/unit/test_dbapi_connection.py
index e96ab55d7..f5c77c448 100644
--- a/tests/unit/test_dbapi_connection.py
+++ b/tests/unit/test_dbapi_connection.py
@@ -13,11 +13,9 @@
# limitations under the License.
import gc
+import pytest
import unittest
-
-import mock
-
-from google.cloud import bigquery_storage
+from unittest import mock
class TestConnection(unittest.TestCase):
@@ -37,6 +35,10 @@ def _mock_client(self):
return mock_client
def _mock_bqstorage_client(self):
+ # Assumption: bigquery_storage exists. It's the test's responisbility to
+ # not use this helper or skip itself if bqstorage is not installed.
+ from google.cloud import bigquery_storage
+
mock_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
mock_client._transport = mock.Mock(spec=["channel"])
mock_client._transport.grpc_channel = mock.Mock(spec=["close"])
@@ -54,6 +56,7 @@ def test_ctor_wo_bqstorage_client(self):
self.assertIs(connection._bqstorage_client, None)
def test_ctor_w_bqstorage_client(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery.dbapi import Connection
mock_client = self._mock_client()
@@ -83,6 +86,7 @@ def test_connect_wo_client(self, mock_client):
self.assertIsNotNone(connection._bqstorage_client)
def test_connect_w_client(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery.dbapi import connect
from google.cloud.bigquery.dbapi import Connection
@@ -98,6 +102,7 @@ def test_connect_w_client(self):
self.assertIs(connection._bqstorage_client, mock_bqstorage_client)
def test_connect_w_both_clients(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery.dbapi import connect
from google.cloud.bigquery.dbapi import Connection
@@ -117,6 +122,26 @@ def test_connect_w_both_clients(self):
self.assertIs(connection._client, mock_client)
self.assertIs(connection._bqstorage_client, mock_bqstorage_client)
+ def test_connect_prefer_bqstorage_client_false(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
+ from google.cloud.bigquery.dbapi import connect
+ from google.cloud.bigquery.dbapi import Connection
+
+ mock_client = self._mock_client()
+ mock_bqstorage_client = self._mock_bqstorage_client()
+ mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client
+
+ connection = connect(
+ client=mock_client,
+ bqstorage_client=mock_bqstorage_client,
+ prefer_bqstorage_client=False,
+ )
+
+ mock_client._ensure_bqstorage_client.assert_not_called()
+ self.assertIsInstance(connection, Connection)
+ self.assertIs(connection._client, mock_client)
+ self.assertIs(connection._bqstorage_client, None)
+
def test_raises_error_if_closed(self):
from google.cloud.bigquery.dbapi.exceptions import ProgrammingError
@@ -131,6 +156,7 @@ def test_raises_error_if_closed(self):
getattr(connection, method)()
def test_close_closes_all_created_bigquery_clients(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
client = self._mock_client()
bqstorage_client = self._mock_bqstorage_client()
@@ -153,6 +179,7 @@ def test_close_closes_all_created_bigquery_clients(self):
self.assertTrue(bqstorage_client._transport.grpc_channel.close.called)
def test_close_does_not_close_bigquery_clients_passed_to_it(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
client = self._mock_client()
bqstorage_client = self._mock_bqstorage_client()
connection = self._make_one(client=client, bqstorage_client=bqstorage_client)
diff --git a/tests/unit/test_dbapi_cursor.py b/tests/unit/test_dbapi_cursor.py
index d672c0f6c..6fca4cec0 100644
--- a/tests/unit/test_dbapi_cursor.py
+++ b/tests/unit/test_dbapi_cursor.py
@@ -12,14 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import mock
+import functools
import operator as op
import unittest
+from unittest import mock
import pytest
+import google.cloud.bigquery.table as bq_table
+
from google.api_core import exceptions
-from google.cloud import bigquery_storage
from tests.unit.helpers import _to_pyarrow
@@ -39,27 +41,45 @@ def _mock_client(
rows=None,
schema=None,
num_dml_affected_rows=None,
- default_query_job_config=None,
dry_run_job=False,
total_bytes_processed=0,
+ total_rows=None,
+ destination_table="test-project.test_dataset.test_table",
):
from google.cloud.bigquery import client
- if rows is None:
+ if total_rows is None:
total_rows = 0
- else:
- total_rows = len(rows)
+ if rows is not None:
+ total_rows = len(rows)
+ table = bq_table.TableReference.from_string(destination_table)
mock_client = mock.create_autospec(client.Client)
- mock_client.query.return_value = self._mock_job(
+ mock_job = self._mock_job(
total_rows=total_rows,
schema=schema,
num_dml_affected_rows=num_dml_affected_rows,
dry_run=dry_run_job,
total_bytes_processed=total_bytes_processed,
- rows=rows,
+ rows=self._mock_rows(
+ rows,
+ total_rows=total_rows,
+ schema=schema,
+ num_dml_affected_rows=num_dml_affected_rows,
+ table=table,
+ ),
+ )
+ mock_client.get_job.return_value = mock_job
+ mock_client.query.return_value = mock_job
+ mock_client.query_and_wait.return_value = self._mock_rows(
+ rows,
+ total_rows=total_rows,
+ schema=schema,
+ num_dml_affected_rows=num_dml_affected_rows,
+ # Sometimes all the results will be available in the initial
+ # response, in which case may be no job and no destination table.
+ table=table if rows is not None and total_rows > len(rows) else None,
)
- mock_client._default_query_job_config = default_query_job_config
# Assure that the REST client gets used, not the BQ Storage client.
mock_client._ensure_bqstorage_client.return_value = None
@@ -67,6 +87,8 @@ def _mock_client(
return mock_client
def _mock_bqstorage_client(self, rows=None, stream_count=0):
+ from google.cloud import bigquery_storage
+
if rows is None:
rows = []
@@ -97,9 +119,6 @@ def _mock_job(
):
from google.cloud.bigquery import job
- if rows is None:
- rows = []
-
mock_job = mock.create_autospec(job.QueryJob)
mock_job.error_result = None
mock_job.state = "DONE"
@@ -127,6 +146,30 @@ def _mock_job(
return mock_job
+ def _mock_rows(
+ self, rows, total_rows=0, schema=None, num_dml_affected_rows=None, table=None
+ ):
+ mock_rows = mock.create_autospec(bq_table.RowIterator, instance=True)
+ mock_rows.__iter__.return_value = rows
+ mock_rows._table = table
+ mock_rows._should_use_bqstorage = functools.partial(
+ bq_table.RowIterator._should_use_bqstorage,
+ mock_rows,
+ )
+ mock_rows._is_almost_completely_cached = functools.partial(
+ bq_table.RowIterator._is_almost_completely_cached,
+ mock_rows,
+ )
+ mock_rows.max_results = None
+ type(mock_rows).job_id = mock.PropertyMock(return_value="test-job-id")
+ type(mock_rows).location = mock.PropertyMock(return_value="test-location")
+ type(mock_rows).num_dml_affected_rows = mock.PropertyMock(
+ return_value=num_dml_affected_rows
+ )
+ type(mock_rows).total_rows = mock.PropertyMock(return_value=total_rows)
+ type(mock_rows).schema = mock.PropertyMock(return_value=schema)
+ return mock_rows
+
def _mock_results(self, total_rows=0, schema=None, num_dml_affected_rows=None):
from google.cloud.bigquery import query
@@ -270,13 +313,18 @@ def test_fetchall_w_row(self):
self.assertEqual(rows[0], (1,))
def test_fetchall_w_bqstorage_client_fetch_success(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery import dbapi
- from google.cloud.bigquery import table
# use unordered data to also test any non-determenistic key order in dicts
row_data = [
- table.Row([1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}),
- table.Row([2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}),
+ bq_table.Row(
+ [1.4, 1.1, 1.3, 1.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}
+ ),
+ bq_table.Row(
+ [2.4, 2.1, 2.3, 2.2], {"bar": 3, "baz": 2, "foo": 1, "quux": 0}
+ ),
]
bqstorage_streamed_rows = [
{
@@ -323,9 +371,15 @@ def test_fetchall_w_bqstorage_client_fetch_success(self):
self.assertEqual(sorted_row_data, expected_row_data)
def test_fetchall_w_bqstorage_client_fetch_no_rows(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import dbapi
- mock_client = self._mock_client(rows=[])
+ mock_client = self._mock_client(
+ rows=[],
+ # Assume there are many more pages of data to look at so that the
+ # BQ Storage API is necessary.
+ total_rows=1000,
+ )
mock_bqstorage_client = self._mock_bqstorage_client(stream_count=0)
mock_client._ensure_bqstorage_client.return_value = mock_bqstorage_client
@@ -345,15 +399,20 @@ def test_fetchall_w_bqstorage_client_fetch_no_rows(self):
self.assertEqual(rows, [])
def test_fetchall_w_bqstorage_client_fetch_error_no_fallback(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import dbapi
- from google.cloud.bigquery import table
- row_data = [table.Row([1.1, 1.2], {"foo": 0, "bar": 1})]
+ row_data = [bq_table.Row([1.1, 1.2], {"foo": 0, "bar": 1})]
def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs):
return bqstorage_client
- mock_client = self._mock_client(rows=row_data)
+ mock_client = self._mock_client(
+ rows=row_data,
+ # Assume there are many more pages of data to look at so that the
+ # BQ Storage API is necessary.
+ total_rows=1000,
+ )
mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client
mock_bqstorage_client = self._mock_bqstorage_client(
stream_count=1,
@@ -376,17 +435,25 @@ def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs):
mock_client.list_rows.assert_not_called()
def test_fetchall_w_bqstorage_client_no_arrow_compression(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
+ pytest.importorskip("pyarrow")
+ from google.cloud import bigquery_storage
from google.cloud.bigquery import dbapi
- from google.cloud.bigquery import table
# Use unordered data to also test any non-determenistic key order in dicts.
- row_data = [table.Row([1.2, 1.1], {"bar": 1, "foo": 0})]
+ row_data = [bq_table.Row([1.2, 1.1], {"bar": 1, "foo": 0})]
bqstorage_streamed_rows = [{"bar": _to_pyarrow(1.2), "foo": _to_pyarrow(1.1)}]
def fake_ensure_bqstorage_client(bqstorage_client=None, **kwargs):
return bqstorage_client
- mock_client = self._mock_client(rows=row_data)
+ mock_client = self._mock_client(
+ rows=row_data,
+ # Assume there are many more pages of data to look at so that the
+ # BQ Storage API is necessary.
+ total_rows=1000,
+ destination_table="P.DS.T",
+ )
mock_client._ensure_bqstorage_client.side_effect = fake_ensure_bqstorage_client
mock_bqstorage_client = self._mock_bqstorage_client(
stream_count=1,
@@ -436,12 +503,8 @@ def test_execute_custom_job_id(self):
def test_execute_w_default_config(self):
from google.cloud.bigquery.dbapi import connect
- from google.cloud.bigquery import job
- default_config = job.QueryJobConfig(use_legacy_sql=False, flatten_results=True)
- client = self._mock_client(
- rows=[], num_dml_affected_rows=0, default_query_job_config=default_config
- )
+ client = self._mock_client(rows=[], num_dml_affected_rows=0)
connection = connect(client)
cursor = connection.cursor()
@@ -449,10 +512,7 @@ def test_execute_w_default_config(self):
_, kwargs = client.query.call_args
used_config = kwargs["job_config"]
- expected_config = job.QueryJobConfig(
- use_legacy_sql=False, flatten_results=True, query_parameters=[]
- )
- self.assertEqual(used_config._properties, expected_config._properties)
+ self.assertIsNone(used_config)
def test_execute_custom_job_config_wo_default_config(self):
from google.cloud.bigquery.dbapi import connect
@@ -472,10 +532,7 @@ def test_execute_custom_job_config_w_default_config(self):
from google.cloud.bigquery.dbapi import connect
from google.cloud.bigquery import job
- default_config = job.QueryJobConfig(use_legacy_sql=False, flatten_results=True)
- client = self._mock_client(
- rows=[], num_dml_affected_rows=0, default_query_job_config=default_config
- )
+ client = self._mock_client(rows=[], num_dml_affected_rows=0)
connection = connect(client)
cursor = connection.cursor()
config = job.QueryJobConfig(use_legacy_sql=True)
@@ -486,7 +543,6 @@ def test_execute_custom_job_config_w_default_config(self):
used_config = kwargs["job_config"]
expected_config = job.QueryJobConfig(
use_legacy_sql=True, # the config passed to execute() prevails
- flatten_results=True, # from the default
query_parameters=[],
)
self.assertEqual(used_config._properties, expected_config._properties)
@@ -553,7 +609,7 @@ def test_execute_w_query_dry_run(self):
connection = dbapi.connect(
self._mock_client(
- rows=[("hello", "world", 1), ("howdy", "y'all", 2)],
+ rows=[],
schema=[
SchemaField("a", "STRING", mode="NULLABLE"),
SchemaField("b", "STRING", mode="REQUIRED"),
@@ -571,7 +627,7 @@ def test_execute_w_query_dry_run(self):
)
self.assertEqual(cursor.rowcount, 0)
- self.assertIsNone(cursor.description)
+ self.assertIsNotNone(cursor.description)
rows = cursor.fetchall()
self.assertEqual(list(rows), [])
@@ -579,16 +635,11 @@ def test_execute_raises_if_result_raises(self):
import google.cloud.exceptions
from google.cloud.bigquery import client
- from google.cloud.bigquery import job
from google.cloud.bigquery.dbapi import connect
from google.cloud.bigquery.dbapi import exceptions
- job = mock.create_autospec(job.QueryJob)
- job.dry_run = None
- job.result.side_effect = google.cloud.exceptions.GoogleCloudError("")
client = mock.create_autospec(client.Client)
- client._default_query_job_config = None
- client.query.return_value = job
+ client.query_and_wait.side_effect = google.cloud.exceptions.GoogleCloudError("")
connection = connect(client)
cursor = connection.cursor()
@@ -639,6 +690,41 @@ def test_is_iterable(self):
"Iterating again over the same results should produce no rows.",
)
+ def test_query_job_wo_execute(self):
+ from google.cloud.bigquery import dbapi
+
+ connection = dbapi.connect(self._mock_client())
+ cursor = connection.cursor()
+ self.assertIsNone(cursor.query_job)
+
+ def test_query_job_w_execute(self):
+ from google.cloud.bigquery import dbapi, QueryJob
+
+ connection = dbapi.connect(self._mock_client())
+ cursor = connection.cursor()
+ cursor.execute("SELECT 1;")
+ self.assertIsInstance(cursor.query_job, QueryJob)
+
+ def test_query_job_w_execute_no_job(self):
+ from google.cloud.bigquery import dbapi
+
+ connection = dbapi.connect(self._mock_client())
+ cursor = connection.cursor()
+ cursor.execute("SELECT 1;")
+
+ # Simulate jobless execution.
+ type(cursor._query_rows).job_id = mock.PropertyMock(return_value=None)
+
+ self.assertIsNone(cursor.query_job)
+
+ def test_query_job_w_executemany(self):
+ from google.cloud.bigquery import dbapi, QueryJob
+
+ connection = dbapi.connect(self._mock_client())
+ cursor = connection.cursor()
+ cursor.executemany("SELECT %s;", (("1",), ("2",)))
+ self.assertIsInstance(cursor.query_job, QueryJob)
+
def test__format_operation_w_dict(self):
from google.cloud.bigquery.dbapi import cursor
diff --git a/tests/unit/test_encryption_configuration.py b/tests/unit/test_encryption_configuration.py
index f432a903b..cdd944a8f 100644
--- a/tests/unit/test_encryption_configuration.py
+++ b/tests/unit/test_encryption_configuration.py
@@ -13,7 +13,7 @@
# limitations under the License.
import unittest
-import mock
+from unittest import mock
class TestEncryptionConfiguration(unittest.TestCase):
diff --git a/tests/unit/test_external_config.py b/tests/unit/test_external_config.py
index 3ef61d738..ea827a560 100644
--- a/tests/unit/test_external_config.py
+++ b/tests/unit/test_external_config.py
@@ -14,15 +14,24 @@
import base64
import copy
+from typing import Any, Dict, Optional
import unittest
from google.cloud.bigquery import external_config
from google.cloud.bigquery import schema
+from google.cloud.bigquery.enums import SourceColumnMatch
+import pytest
-class TestExternalConfig(unittest.TestCase):
+class TestExternalConfig(unittest.TestCase):
SOURCE_URIS = ["gs://foo", "gs://bar"]
+ SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME
+ DATE_FORMAT = "MM/DD/YYYY"
+ DATETIME_FORMAT = "MM/DD/YYYY HH24:MI:SS"
+ TIME_ZONE = "America/Los_Angeles"
+ TIME_FORMAT = "HH24:MI:SS"
+ TIMESTAMP_FORMAT = "MM/DD/YYYY HH24:MI:SS.FF6 TZR"
BASE_RESOURCE = {
"sourceFormat": "",
@@ -31,6 +40,11 @@ class TestExternalConfig(unittest.TestCase):
"autodetect": True,
"ignoreUnknownValues": False,
"compression": "compression",
+ "dateFormat": DATE_FORMAT,
+ "datetimeFormat": DATETIME_FORMAT,
+ "timeZone": TIME_ZONE,
+ "timeFormat": TIME_FORMAT,
+ "timestampFormat": TIMESTAMP_FORMAT,
}
def test_from_api_repr_base(self):
@@ -77,6 +91,12 @@ def test_to_api_repr_base(self):
ec.connection_id = "path/to/connection"
ec.schema = [schema.SchemaField("full_name", "STRING", mode="REQUIRED")]
+ ec.date_format = self.DATE_FORMAT
+ ec.datetime_format = self.DATETIME_FORMAT
+ ec.time_zone = self.TIME_ZONE
+ ec.time_format = self.TIME_FORMAT
+ ec.timestamp_format = self.TIMESTAMP_FORMAT
+
exp_schema = {
"fields": [{"name": "full_name", "type": "STRING", "mode": "REQUIRED"}]
}
@@ -90,6 +110,11 @@ def test_to_api_repr_base(self):
"compression": "compression",
"connectionId": "path/to/connection",
"schema": exp_schema,
+ "dateFormat": self.DATE_FORMAT,
+ "datetimeFormat": self.DATETIME_FORMAT,
+ "timeZone": self.TIME_ZONE,
+ "timeFormat": self.TIME_FORMAT,
+ "timestampFormat": self.TIMESTAMP_FORMAT,
}
self.assertEqual(got_resource, exp_resource)
@@ -99,6 +124,12 @@ def test_connection_id(self):
ec.connection_id = "path/to/connection"
self.assertEqual(ec.connection_id, "path/to/connection")
+ def test_reference_file_schema_uri(self):
+ ec = external_config.ExternalConfig("")
+ self.assertIsNone(ec.reference_file_schema_uri)
+ ec.reference_file_schema_uri = "path/to/reference"
+ self.assertEqual(ec.reference_file_schema_uri, "path/to/reference")
+
def test_schema_None(self):
ec = external_config.ExternalConfig("")
ec.schema = None
@@ -119,6 +150,11 @@ def _verify_base(self, ec):
self.assertEqual(ec.ignore_unknown_values, False)
self.assertEqual(ec.max_bad_records, 17)
self.assertEqual(ec.source_uris, self.SOURCE_URIS)
+ self.assertEqual(ec.date_format, self.DATE_FORMAT)
+ self.assertEqual(ec.datetime_format, self.DATETIME_FORMAT)
+ self.assertEqual(ec.time_zone, self.TIME_ZONE)
+ self.assertEqual(ec.time_format, self.TIME_FORMAT)
+ self.assertEqual(ec.timestamp_format, self.TIMESTAMP_FORMAT)
def test_to_api_repr_source_format(self):
ec = external_config.ExternalConfig("CSV")
@@ -242,6 +278,9 @@ def test_from_api_repr_csv(self):
"allowQuotedNewlines": True,
"allowJaggedRows": False,
"encoding": "encoding",
+ "preserveAsciiControlCharacters": False,
+ "sourceColumnMatch": self.SOURCE_COLUMN_MATCH,
+ "nullMarkers": ["", "NA"],
},
},
)
@@ -257,6 +296,12 @@ def test_from_api_repr_csv(self):
self.assertEqual(ec.options.allow_quoted_newlines, True)
self.assertEqual(ec.options.allow_jagged_rows, False)
self.assertEqual(ec.options.encoding, "encoding")
+ self.assertEqual(ec.options.preserve_ascii_control_characters, False)
+ self.assertEqual(
+ ec.options.source_column_match,
+ self.SOURCE_COLUMN_MATCH,
+ )
+ self.assertEqual(ec.options.null_markers, ["", "NA"])
got_resource = ec.to_api_repr()
@@ -277,6 +322,10 @@ def test_to_api_repr_csv(self):
options.quote_character = "quote"
options.skip_leading_rows = 123
options.allow_jagged_rows = False
+ options.preserve_ascii_control_characters = False
+ options.source_column_match = self.SOURCE_COLUMN_MATCH
+ options.null_markers = ["", "NA"]
+
ec.csv_options = options
exp_resource = {
@@ -288,6 +337,9 @@ def test_to_api_repr_csv(self):
"allowQuotedNewlines": True,
"allowJaggedRows": False,
"encoding": "encoding",
+ "preserveAsciiControlCharacters": False,
+ "sourceColumnMatch": self.SOURCE_COLUMN_MATCH,
+ "nullMarkers": ["", "NA"],
},
}
@@ -508,17 +560,23 @@ def test_csv_options_getter_and_setter(self):
from google.cloud.bigquery.external_config import CSVOptions
options = CSVOptions.from_api_repr(
- {"allowJaggedRows": True, "allowQuotedNewlines": False}
+ {
+ "allowJaggedRows": True,
+ "allowQuotedNewlines": False,
+ "preserveAsciiControlCharacters": False,
+ }
)
ec = external_config.ExternalConfig(external_config.ExternalSourceFormat.CSV)
self.assertIsNone(ec.csv_options.allow_jagged_rows)
self.assertIsNone(ec.csv_options.allow_quoted_newlines)
+ self.assertIsNone(ec.csv_options.preserve_ascii_control_characters)
ec.csv_options = options
self.assertTrue(ec.csv_options.allow_jagged_rows)
self.assertFalse(ec.csv_options.allow_quoted_newlines)
+ self.assertFalse(ec.csv_options.preserve_ascii_control_characters)
self.assertIs(ec.options._properties, ec._properties[CSVOptions._RESOURCE_NAME])
self.assertIs(
ec.csv_options._properties, ec._properties[CSVOptions._RESOURCE_NAME]
@@ -833,7 +891,9 @@ def test_to_api_repr(self):
)
-class CSVOptions(unittest.TestCase):
+class TestCSVOptions(unittest.TestCase):
+ SOURCE_COLUMN_MATCH = SourceColumnMatch.NAME
+
def test_to_api_repr(self):
options = external_config.CSVOptions()
options.field_delimiter = "\t"
@@ -842,6 +902,8 @@ def test_to_api_repr(self):
options.allow_quoted_newlines = True
options.allow_jagged_rows = False
options.encoding = "UTF-8"
+ options.preserve_ascii_control_characters = False
+ options.source_column_match = self.SOURCE_COLUMN_MATCH
resource = options.to_api_repr()
@@ -854,9 +916,38 @@ def test_to_api_repr(self):
"allowQuotedNewlines": True,
"allowJaggedRows": False,
"encoding": "UTF-8",
+ "preserveAsciiControlCharacters": False,
+ "sourceColumnMatch": self.SOURCE_COLUMN_MATCH,
},
)
+ def test_source_column_match_None(self):
+ ec = external_config.CSVOptions()
+ ec.source_column_match = None
+ expected = None
+ result = ec.source_column_match
+ self.assertEqual(expected, result)
+
+ def test_source_column_match_valid_input(self):
+ ec = external_config.CSVOptions()
+ ec.source_column_match = SourceColumnMatch.NAME
+ expected = "NAME"
+ result = ec.source_column_match
+ self.assertEqual(expected, result)
+
+ ec.source_column_match = "POSITION"
+ expected = "POSITION"
+ result = ec.source_column_match
+ self.assertEqual(expected, result)
+
+ def test_source_column_match_invalid_input(self):
+ ec = external_config.CSVOptions()
+ with self.assertRaisesRegex(
+ TypeError,
+ "value must be a google.cloud.bigquery.enums.SourceColumnMatch, str, or None",
+ ):
+ ec.source_column_match = 3.14
+
class TestGoogleSheetsOptions(unittest.TestCase):
def test_to_api_repr(self):
@@ -873,3 +964,226 @@ def _copy_and_update(d, u):
d = copy.deepcopy(d)
d.update(u)
return d
+
+
+class TestExternalCatalogDatasetOptions:
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.external_config import ExternalCatalogDatasetOptions
+
+ return ExternalCatalogDatasetOptions
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ DEFAULT_STORAGE_LOCATION_URI = "gs://test-bucket/test-path"
+ PARAMETERS = {"key": "value"}
+
+ @pytest.mark.parametrize(
+ "default_storage_location_uri,parameters",
+ [
+ (DEFAULT_STORAGE_LOCATION_URI, PARAMETERS), # set all params
+ (DEFAULT_STORAGE_LOCATION_URI, None), # set only one argument at a time
+ (None, PARAMETERS),
+ (None, None), # use default parameters
+ ],
+ )
+ def test_ctor_initialization(
+ self,
+ default_storage_location_uri,
+ parameters,
+ ):
+ """Test ExternalCatalogDatasetOptions constructor with explicit values."""
+
+ instance = self._make_one(
+ default_storage_location_uri=default_storage_location_uri,
+ parameters=parameters,
+ )
+
+ assert instance.default_storage_location_uri == default_storage_location_uri
+ assert instance.parameters == parameters
+
+ @pytest.mark.parametrize(
+ "default_storage_location_uri,parameters",
+ [
+ (123, None), # does not accept integers
+ (None, 123),
+ ],
+ )
+ def test_ctor_invalid_input(self, default_storage_location_uri, parameters):
+ """Test ExternalCatalogDatasetOptions constructor with invalid input."""
+
+ with pytest.raises(TypeError) as e:
+ self._make_one(
+ default_storage_location_uri=default_storage_location_uri,
+ parameters=parameters,
+ )
+
+ # Looking for the first word from the string "Pass as..."
+ assert "Pass " in str(e.value)
+
+ def test_to_api_repr(self):
+ """Test ExternalCatalogDatasetOptions.to_api_repr method."""
+
+ instance = self._make_one(
+ default_storage_location_uri=self.DEFAULT_STORAGE_LOCATION_URI,
+ parameters=self.PARAMETERS,
+ )
+ resource = instance.to_api_repr()
+ assert (
+ resource["defaultStorageLocationUri"] == self.DEFAULT_STORAGE_LOCATION_URI
+ )
+ assert resource["parameters"] == self.PARAMETERS
+
+ def test_from_api_repr(self):
+ """GIVEN an api representation of an ExternalCatalogDatasetOptions object (i.e. api_repr)
+ WHEN converted into an ExternalCatalogDatasetOptions object using from_api_repr()
+ THEN it will have the representation in dict format as an ExternalCatalogDatasetOptions
+ object made directly (via _make_one()) and represented in dict format.
+ """
+
+ instance = self._make_one()
+ api_repr = {
+ "defaultStorageLocationUri": self.DEFAULT_STORAGE_LOCATION_URI,
+ "parameters": self.PARAMETERS,
+ }
+ result = instance.from_api_repr(api_repr)
+
+ assert isinstance(result, external_config.ExternalCatalogDatasetOptions)
+ assert result._properties == api_repr
+
+
+class TestExternalCatalogTableOptions:
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.external_config import ExternalCatalogTableOptions
+
+ return ExternalCatalogTableOptions
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ storage_descriptor_repr = {
+ "inputFormat": "testpath.to.OrcInputFormat",
+ "locationUri": "gs://test/path/",
+ "outputFormat": "testpath.to.OrcOutputFormat",
+ "serDeInfo": {
+ "serializationLibrary": "testpath.to.LazySimpleSerDe",
+ "name": "serde_lib_name",
+ "parameters": {"key": "value"},
+ },
+ }
+
+ CONNECTIONID = "connection123"
+ PARAMETERS = {"key": "value"}
+ STORAGEDESCRIPTOR = schema.StorageDescriptor.from_api_repr(storage_descriptor_repr)
+ EXTERNALCATALOGTABLEOPTIONS = {
+ "connectionId": "connection123",
+ "parameters": {"key": "value"},
+ "storageDescriptor": STORAGEDESCRIPTOR.to_api_repr(),
+ }
+
+ @pytest.mark.parametrize(
+ "connection_id,parameters,storage_descriptor",
+ [
+ (
+ CONNECTIONID,
+ PARAMETERS,
+ STORAGEDESCRIPTOR,
+ ), # set all parameters at once
+ (CONNECTIONID, None, None), # set only one parameter at a time
+ (None, PARAMETERS, None),
+ (None, None, STORAGEDESCRIPTOR), # set storage descriptor using obj
+ (None, None, storage_descriptor_repr), # set storage descriptor using dict
+ (None, None, None), # use default parameters
+ ],
+ )
+ def test_ctor_initialization(
+ self,
+ connection_id,
+ parameters,
+ storage_descriptor,
+ ):
+ instance = self._make_one(
+ connection_id=connection_id,
+ parameters=parameters,
+ storage_descriptor=storage_descriptor,
+ )
+
+ assert instance.connection_id == connection_id
+ assert instance.parameters == parameters
+
+ if isinstance(storage_descriptor, schema.StorageDescriptor):
+ assert (
+ instance.storage_descriptor.to_api_repr()
+ == storage_descriptor.to_api_repr()
+ )
+ elif isinstance(storage_descriptor, dict):
+ assert instance.storage_descriptor.to_api_repr() == storage_descriptor
+ else:
+ assert instance.storage_descriptor is None
+
+ @pytest.mark.parametrize(
+ "connection_id,parameters,storage_descriptor",
+ [
+ pytest.param(
+ 123,
+ PARAMETERS,
+ STORAGEDESCRIPTOR,
+ id="connection_id-invalid-type",
+ ),
+ pytest.param(
+ CONNECTIONID,
+ 123,
+ STORAGEDESCRIPTOR,
+ id="parameters-invalid-type",
+ ),
+ pytest.param(
+ CONNECTIONID,
+ PARAMETERS,
+ 123,
+ id="storage_descriptor-invalid-type",
+ ),
+ ],
+ )
+ def test_ctor_invalid_input(
+ self,
+ connection_id: str,
+ parameters: Dict[str, Any],
+ storage_descriptor: Optional[schema.StorageDescriptor],
+ ):
+ with pytest.raises(TypeError) as e:
+ external_config.ExternalCatalogTableOptions(
+ connection_id=connection_id,
+ parameters=parameters,
+ storage_descriptor=storage_descriptor,
+ )
+
+ # Looking for the first word from the string "Pass as..."
+ assert "Pass " in str(e.value)
+
+ def test_to_api_repr(self):
+ instance = self._make_one(
+ connection_id=self.CONNECTIONID,
+ parameters=self.PARAMETERS,
+ storage_descriptor=self.STORAGEDESCRIPTOR,
+ )
+
+ result = instance.to_api_repr()
+ expected = self.EXTERNALCATALOGTABLEOPTIONS
+
+ assert result == expected
+
+ def test_from_api_repr(self):
+ result = self._make_one(
+ connection_id=self.CONNECTIONID,
+ parameters=self.PARAMETERS,
+ storage_descriptor=self.STORAGEDESCRIPTOR,
+ )
+
+ instance = self._make_one()
+ api_repr = self.EXTERNALCATALOGTABLEOPTIONS
+ result = instance.from_api_repr(api_repr)
+
+ assert isinstance(result, external_config.ExternalCatalogTableOptions)
+ assert result._properties == api_repr
diff --git a/tests/unit/test_format_options.py b/tests/unit/test_format_options.py
index c8fecbfa6..94a01570f 100644
--- a/tests/unit/test_format_options.py
+++ b/tests/unit/test_format_options.py
@@ -54,11 +54,17 @@ def test_from_api_repr(self):
)
assert not config.enum_as_string
assert config.enable_list_inference
+ assert config.map_target_type is None
def test_to_api_repr(self):
config = self._get_target_class()()
config.enum_as_string = True
config.enable_list_inference = False
+ config.map_target_type = "ARRAY_OF_STRUCT"
result = config.to_api_repr()
- assert result == {"enumAsString": True, "enableListInference": False}
+ assert result == {
+ "enumAsString": True,
+ "enableListInference": False,
+ "mapTargetType": "ARRAY_OF_STRUCT",
+ }
diff --git a/tests/unit/test_job_retry.py b/tests/unit/test_job_retry.py
index b2095d2f2..fa55e8f6a 100644
--- a/tests/unit/test_job_retry.py
+++ b/tests/unit/test_job_retry.py
@@ -14,88 +14,102 @@
import datetime
import re
+from unittest import mock
-import mock
import pytest
import google.api_core.exceptions
import google.api_core.retry
+import freezegun
+import requests.exceptions
-from .helpers import make_connection
+from google.cloud.bigquery import _job_helpers
+import google.cloud.bigquery.retry
+from .helpers import make_client, make_connection
-# With job_retry_on_query, we're testing 4 scenarios:
-# - No `job_retry` passed, retry on default rateLimitExceeded.
-# - Pass NotFound retry to `query`.
-# - Pass NotFound retry to `result`.
-# - Pass BadRequest retry to query, with the value passed to `result` overriding.
-@pytest.mark.parametrize("job_retry_on_query", [None, "Query", "Result", "Both"])
-@mock.patch("time.sleep")
-def test_retry_failed_jobs(sleep, client, job_retry_on_query):
- """
- Test retry of job failures, as opposed to API-invocation failures.
- """
- retry_notfound = google.api_core.retry.Retry(
+_RETRY_NOT_FOUND = {
+ "job_retry": google.api_core.retry.Retry(
predicate=google.api_core.retry.if_exception_type(
- google.api_core.exceptions.NotFound
- )
- )
- retry_badrequest = google.api_core.retry.Retry(
+ google.api_core.exceptions.NotFound,
+ ),
+ ),
+}
+_RETRY_BAD_REQUEST = {
+ "job_retry": google.api_core.retry.Retry(
predicate=google.api_core.retry.if_exception_type(
- google.api_core.exceptions.BadRequest
- )
- )
+ google.api_core.exceptions.BadRequest,
+ ),
+ ),
+}
- if job_retry_on_query is None:
- reason = "rateLimitExceeded"
- else:
- reason = "notFound"
+# Test retry of job failures, instead of API-invocation failures. 4 scenarios:
+# - No `job_retry` passed, retry on default rateLimitExceeded.
+# - Pass NotFound retry to `query`.
+# - Pass NotFound retry to `result`.
+# - Pass BadRequest retry to query, with the value passed to `result` overriding.
+@mock.patch("time.sleep")
+@pytest.mark.parametrize(
+ "reason, job_retry, result_retry",
+ [
+ pytest.param(
+ "rateLimitExceeded",
+ {},
+ {},
+ id="no job_retry",
+ ),
+ pytest.param(
+ "notFound",
+ _RETRY_NOT_FOUND,
+ {},
+ id="Query NotFound",
+ ),
+ pytest.param(
+ "notFound",
+ _RETRY_NOT_FOUND,
+ _RETRY_NOT_FOUND,
+ id="Result NotFound",
+ ),
+ pytest.param(
+ "notFound",
+ _RETRY_BAD_REQUEST,
+ _RETRY_NOT_FOUND,
+ id="BadRequest",
+ ),
+ ],
+)
+def test_retry_failed_jobs(sleep, reason, job_retry, result_retry, global_time_lock):
+ client = make_client()
err = dict(reason=reason)
- responses = [
- dict(status=dict(state="DONE", errors=[err], errorResult=err)),
- dict(status=dict(state="DONE", errors=[err], errorResult=err)),
- dict(status=dict(state="DONE", errors=[err], errorResult=err)),
- dict(status=dict(state="DONE")),
+ conn = client._connection = make_connection(
+ dict(
+ status=dict(state="DONE", errors=[err], errorResult=err),
+ jobReference={"jobId": "id_1"},
+ ),
+ dict(
+ status=dict(state="DONE", errors=[err], errorResult=err),
+ jobReference={"jobId": "id_1"},
+ ),
+ dict(
+ status=dict(state="DONE", errors=[err], errorResult=err),
+ jobReference={"jobId": "id_1"},
+ ),
+ dict(status=dict(state="DONE"), jobReference={"jobId": "id_2"}),
dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"),
- ]
-
- def api_request(method, path, query_params=None, data=None, **kw):
- response = responses.pop(0)
- if data:
- response["jobReference"] = data["jobReference"]
- else:
- response["jobReference"] = dict(
- jobId=path.split("/")[-1], projectId="PROJECT"
- )
- return response
-
- conn = client._connection = make_connection()
- conn.api_request.side_effect = api_request
+ )
- if job_retry_on_query == "Query":
- job_retry = dict(job_retry=retry_notfound)
- elif job_retry_on_query == "Both":
- # This will be overridden in `result`
- job_retry = dict(job_retry=retry_badrequest)
- else:
- job_retry = {}
job = client.query("select 1", **job_retry)
+ result = job.result(**result_retry)
- orig_job_id = job.job_id
- job_retry = (
- dict(job_retry=retry_notfound)
- if job_retry_on_query in ("Result", "Both")
- else {}
- )
- result = job.result(**job_retry)
assert result.total_rows == 1
- assert not responses # We made all the calls we expected to.
+
+ # We made all the calls we expected to.
+ assert conn.api_request.call_count == 5
# The job adjusts it's job id based on the id of the last attempt.
- assert job.job_id != orig_job_id
- assert job.job_id == conn.mock_calls[3][2]["data"]["jobReference"]["jobId"]
+ assert job.job_id == "id_2"
# We had to sleep three times
assert len(sleep.mock_calls) == 3
@@ -108,17 +122,296 @@ def api_request(method, path, query_params=None, data=None, **kw):
assert max(c[1][0] for c in sleep.mock_calls) <= 8
# We can ask for the result again:
- responses = [
+ conn = client._connection = make_connection(
dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"),
- ]
- orig_job_id = job.job_id
+ )
result = job.result()
+
assert result.total_rows == 1
- assert not responses # We made all the calls we expected to.
+
+ # We made all the calls we expected to.
+ assert conn.api_request.call_count == 1
# We wouldn't (and didn't) fail, because we're dealing with a successful job.
# So the job id hasn't changed.
- assert job.job_id == orig_job_id
+ assert job.job_id == "id_2"
+
+
+def test_retry_connection_error_with_default_retries_and_successful_first_job(
+ monkeypatch, client, global_time_lock
+):
+ """
+ Make sure ConnectionError can be retried at `is_job_done` level, even if
+ retries are exhaused by API-level retry.
+
+ Note: Because restart_query_job is set to True only in the case of a
+ confirmed job failure, this should be safe to do even when a job is not
+ idempotent.
+
+ Regression test for issue
+ https://github.com/googleapis/python-bigquery/issues/1929
+ """
+ job_counter = 0
+
+ def make_job_id(*args, **kwargs):
+ nonlocal job_counter
+ job_counter += 1
+ return f"{job_counter}"
+
+ monkeypatch.setattr(_job_helpers, "make_job_id", make_job_id)
+ conn = client._connection = make_connection()
+ project = client.project
+ job_reference_1 = {"projectId": project, "jobId": "1", "location": "test-loc"}
+ NUM_API_RETRIES = 2
+
+ with freezegun.freeze_time(
+ "2024-01-01 00:00:00",
+ # Note: because of exponential backoff and a bit of jitter,
+ # NUM_API_RETRIES will get less accurate the greater the value.
+ # We add 1 because we know there will be at least some additional
+ # calls to fetch the time / sleep before the retry deadline is hit.
+ auto_tick_seconds=(
+ google.cloud.bigquery.retry._DEFAULT_RETRY_DEADLINE / NUM_API_RETRIES
+ )
+ + 1,
+ ):
+ conn.api_request.side_effect = [
+ # jobs.insert
+ {"jobReference": job_reference_1, "status": {"state": "PENDING"}},
+ # jobs.get
+ {"jobReference": job_reference_1, "status": {"state": "RUNNING"}},
+ # jobs.getQueryResults x2
+ requests.exceptions.ConnectionError(),
+ requests.exceptions.ConnectionError(),
+ # jobs.get
+ # Job actually succeeeded, so we shouldn't be restarting the job,
+ # even though we are retrying at the `is_job_done` level.
+ {"jobReference": job_reference_1, "status": {"state": "DONE"}},
+ # jobs.getQueryResults
+ {"jobReference": job_reference_1, "jobComplete": True},
+ ]
+
+ job = client.query("select 1")
+ rows_iter = job.result()
+
+ assert job.done() # Shouldn't make any additional API calls.
+ assert rows_iter is not None
+
+ # Should only have created one job, even though we did call job_retry.
+ assert job_counter == 1
+
+ # Double-check that we made the API calls we expected to make.
+ conn.api_request.assert_has_calls(
+ [
+ # jobs.insert
+ mock.call(
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data={
+ "jobReference": {"jobId": "1", "projectId": "PROJECT"},
+ "configuration": {
+ "query": {"useLegacySql": False, "query": "select 1"}
+ },
+ },
+ timeout=None,
+ ),
+ # jobs.get
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/jobs/1",
+ query_params={"location": "test-loc", "projection": "full"},
+ timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
+ ),
+ # jobs.getQueryResults x2
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/queries/1",
+ query_params={"maxResults": 0, "location": "test-loc"},
+ timeout=None,
+ ),
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/queries/1",
+ query_params={"maxResults": 0, "location": "test-loc"},
+ timeout=None,
+ ),
+ # jobs.get -- is_job_done checking again
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/jobs/1",
+ query_params={"location": "test-loc", "projection": "full"},
+ timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
+ ),
+ # jobs.getQueryResults
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/queries/1",
+ query_params={"maxResults": 0, "location": "test-loc"},
+ timeout=None,
+ ),
+ ],
+ )
+
+
+def test_query_retry_with_default_retry_and_ambiguous_errors_only_retries_with_failed_job(
+ client, monkeypatch, global_time_lock
+):
+ """
+ Some errors like 'rateLimitExceeded' can be ambiguous. Make sure we only
+ retry the job when we know for sure that the job has failed for a retriable
+ reason. We can only be sure after a "successful" call to jobs.get to fetch
+ the failed job status.
+ """
+ job_counter = 0
+
+ def make_job_id(*args, **kwargs):
+ nonlocal job_counter
+ job_counter += 1
+ return f"{job_counter}"
+
+ monkeypatch.setattr(_job_helpers, "make_job_id", make_job_id)
+
+ project = client.project
+ job_reference_1 = {"projectId": project, "jobId": "1", "location": "test-loc"}
+ job_reference_2 = {"projectId": project, "jobId": "2", "location": "test-loc"}
+ NUM_API_RETRIES = 2
+
+ # This error is modeled after a real customer exception in
+ # https://github.com/googleapis/python-bigquery/issues/707.
+ internal_error = google.api_core.exceptions.InternalServerError(
+ "Job failed just because...",
+ errors=[
+ {"reason": "internalError"},
+ ],
+ )
+ responses = [
+ # jobs.insert
+ {"jobReference": job_reference_1, "status": {"state": "PENDING"}},
+ # jobs.get
+ {"jobReference": job_reference_1, "status": {"state": "RUNNING"}},
+ # jobs.getQueryResults x2
+ #
+ # Note: internalError is ambiguous in jobs.getQueryResults. The
+ # problem could be at the Google Frontend level or it could be because
+ # the job has failed due to some transient issues and the BigQuery
+ # REST API is translating the job failed status into failure HTTP
+ # codes.
+ #
+ # TODO(GH#1903): We shouldn't retry nearly this many times when we get
+ # ambiguous errors from jobs.getQueryResults.
+ # See: https://github.com/googleapis/python-bigquery/issues/1903
+ internal_error,
+ internal_error,
+ # jobs.get -- the job has failed
+ {
+ "jobReference": job_reference_1,
+ "status": {"state": "DONE", "errorResult": {"reason": "internalError"}},
+ },
+ # jobs.insert
+ {"jobReference": job_reference_2, "status": {"state": "PENDING"}},
+ # jobs.get
+ {"jobReference": job_reference_2, "status": {"state": "RUNNING"}},
+ # jobs.getQueryResults
+ {"jobReference": job_reference_2, "jobComplete": True},
+ # jobs.get
+ {"jobReference": job_reference_2, "status": {"state": "DONE"}},
+ ]
+
+ conn = client._connection = make_connection(*responses)
+
+ with freezegun.freeze_time(
+ # Note: because of exponential backoff and a bit of jitter,
+ # NUM_API_RETRIES will get less accurate the greater the value.
+ # We add 1 because we know there will be at least some additional
+ # calls to fetch the time / sleep before the retry deadline is hit.
+ auto_tick_seconds=(
+ google.cloud.bigquery.retry._DEFAULT_RETRY_DEADLINE / NUM_API_RETRIES
+ )
+ + 1,
+ ):
+ job = client.query("select 1")
+ job.result()
+
+ conn.api_request.assert_has_calls(
+ [
+ # jobs.insert
+ mock.call(
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data={
+ "jobReference": {"jobId": "1", "projectId": "PROJECT"},
+ "configuration": {
+ "query": {"useLegacySql": False, "query": "select 1"}
+ },
+ },
+ timeout=None,
+ ),
+ # jobs.get
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/jobs/1",
+ query_params={"location": "test-loc", "projection": "full"},
+ timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
+ ),
+ # jobs.getQueryResults x2
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/queries/1",
+ query_params={"maxResults": 0, "location": "test-loc"},
+ timeout=None,
+ ),
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/queries/1",
+ query_params={"maxResults": 0, "location": "test-loc"},
+ timeout=None,
+ ),
+ # jobs.get -- verify that the job has failed
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/jobs/1",
+ query_params={"location": "test-loc", "projection": "full"},
+ timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
+ ),
+ # jobs.insert
+ mock.call(
+ method="POST",
+ path="/projects/PROJECT/jobs",
+ data={
+ "jobReference": {
+ # Make sure that we generated a new job ID.
+ "jobId": "2",
+ "projectId": "PROJECT",
+ },
+ "configuration": {
+ "query": {"useLegacySql": False, "query": "select 1"}
+ },
+ },
+ timeout=None,
+ ),
+ # jobs.get
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/jobs/2",
+ query_params={"location": "test-loc", "projection": "full"},
+ timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
+ ),
+ # jobs.getQueryResults
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/queries/2",
+ query_params={"maxResults": 0, "location": "test-loc"},
+ timeout=None,
+ ),
+ # jobs.get
+ mock.call(
+ method="GET",
+ path="/projects/PROJECT/jobs/2",
+ query_params={"location": "test-loc", "projection": "full"},
+ timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT,
+ ),
+ ]
+ )
# With job_retry_on_query, we're testing 4 scenarios:
@@ -126,7 +419,7 @@ def api_request(method, path, query_params=None, data=None, **kw):
# - Pass None retry to `result`.
@pytest.mark.parametrize("job_retry_on_query", ["Query", "Result"])
@mock.patch("time.sleep")
-def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query):
+def test_disable_retry_failed_jobs(sleep, client, job_retry_on_query, global_time_lock):
"""
Test retry of job failures, as opposed to API-invocation failures.
"""
@@ -149,99 +442,253 @@ def api_request(method, path, query_params=None, data=None, **kw):
orig_job_id = job.job_id
job_retry = dict(job_retry=None) if job_retry_on_query == "Result" else {}
- with pytest.raises(google.api_core.exceptions.Forbidden):
+ with pytest.raises(google.api_core.exceptions.TooManyRequests):
job.result(**job_retry)
assert job.job_id == orig_job_id
assert len(sleep.mock_calls) == 0
-@mock.patch("google.api_core.retry.datetime_helpers")
@mock.patch("time.sleep")
-def test_retry_failed_jobs_after_retry_failed(sleep, datetime_helpers, client):
+def test_retry_failed_jobs_after_retry_failed(sleep, client, global_time_lock):
"""
If at first you don't succeed, maybe you will later. :)
"""
conn = client._connection = make_connection()
- datetime_helpers.utcnow.return_value = datetime.datetime(2021, 7, 29, 10, 43, 2)
-
- err = dict(reason="rateLimitExceeded")
-
- def api_request(method, path, query_params=None, data=None, **kw):
- calls = sleep.mock_calls
- if calls:
- datetime_helpers.utcnow.return_value += datetime.timedelta(
- seconds=calls[-1][1][0]
- )
- response = dict(status=dict(state="DONE", errors=[err], errorResult=err))
- response["jobReference"] = data["jobReference"]
- return response
-
- conn.api_request.side_effect = api_request
-
- job = client.query("select 1")
- orig_job_id = job.job_id
+ with freezegun.freeze_time("2024-01-01 00:00:00") as frozen_datetime:
+ err = dict(reason="rateLimitExceeded")
- with pytest.raises(google.api_core.exceptions.RetryError):
- job.result()
-
- # We never got a successful job, so the job id never changed:
- assert job.job_id == orig_job_id
-
- # We failed because we couldn't succeed after 120 seconds.
- # But we can try again:
- err2 = dict(reason="backendError") # We also retry on this
- responses = [
- dict(status=dict(state="DONE", errors=[err2], errorResult=err2)),
- dict(status=dict(state="DONE", errors=[err], errorResult=err)),
- dict(status=dict(state="DONE", errors=[err2], errorResult=err2)),
- dict(status=dict(state="DONE")),
- dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"),
- ]
-
- def api_request(method, path, query_params=None, data=None, **kw):
- calls = sleep.mock_calls
- datetime_helpers.utcnow.return_value += datetime.timedelta(
- seconds=calls[-1][1][0]
- )
- response = responses.pop(0)
- if data:
+ def api_request(method, path, query_params=None, data=None, **kw):
+ calls = sleep.mock_calls
+ if calls:
+ frozen_datetime.tick(delta=datetime.timedelta(seconds=calls[-1][1][0]))
+ response = dict(status=dict(state="DONE", errors=[err], errorResult=err))
response["jobReference"] = data["jobReference"]
- else:
- response["jobReference"] = dict(
- jobId=path.split("/")[-1], projectId="PROJECT"
- )
- return response
-
- conn.api_request.side_effect = api_request
- result = job.result()
- assert result.total_rows == 1
- assert not responses # We made all the calls we expected to.
- assert job.job_id != orig_job_id
-
-
-def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client):
+ return response
+
+ conn.api_request.side_effect = api_request
+
+ job = client.query("select 1")
+ orig_job_id = job.job_id
+
+ with pytest.raises(google.api_core.exceptions.RetryError):
+ job.result()
+
+ # We retried the job at least once, so we should have generated a new job ID.
+ assert job.job_id != orig_job_id
+
+ # We failed because we couldn't succeed after 120 seconds.
+ # But we can try again:
+ err2 = dict(reason="backendError") # We also retry on this
+ responses = [
+ dict(status=dict(state="DONE", errors=[err2], errorResult=err2)),
+ dict(status=dict(state="DONE", errors=[err], errorResult=err)),
+ dict(status=dict(state="DONE", errors=[err2], errorResult=err2)),
+ dict(status=dict(state="DONE")),
+ dict(rows=[{"f": [{"v": "1"}]}], totalRows="1"),
+ ]
+
+ def api_request(method, path, query_params=None, data=None, **kw):
+ calls = sleep.mock_calls
+ frozen_datetime.tick(delta=datetime.timedelta(seconds=calls[-1][1][0]))
+ response = responses.pop(0)
+ if data:
+ response["jobReference"] = data["jobReference"]
+ else:
+ response["jobReference"] = dict(
+ jobId=path.split("/")[-1], projectId="PROJECT"
+ )
+ return response
+
+ conn.api_request.side_effect = api_request
+ result = job.result()
+ assert result.total_rows == 1
+ assert not responses # We made all the calls we expected to.
+ assert job.job_id != orig_job_id
+
+
+def test_raises_on_job_retry_on_query_with_non_retryable_jobs(client, global_time_lock):
with pytest.raises(
TypeError,
- match=re.escape(
+ match=(
"`job_retry` was provided, but the returned job is"
" not retryable, because a custom `job_id` was"
" provided."
- ),
+ ).replace(" ", r"\s"),
):
client.query("select 42", job_id=42, job_retry=google.api_core.retry.Retry())
-def test_raises_on_job_retry_on_result_with_non_retryable_jobs(client):
+def test_raises_on_job_retry_on_result_with_non_retryable_jobs(
+ client, global_time_lock
+):
client._connection = make_connection({})
- job = client.query("select 42", job_id=42)
+
+ with pytest.warns(
+ FutureWarning,
+ match=re.escape("job_retry must be explicitly set to None if job_id is set."),
+ ):
+ # Implicitly providing a job_retry is a warning and will be an error in the future.
+ job = client.query("select 42", job_id=42)
+
with pytest.raises(
TypeError,
- match=re.escape(
+ match=(
"`job_retry` was provided, but this job is"
" not retryable, because a custom `job_id` was"
" provided to the query that created this job."
- ),
+ ).replace(" ", r"\s"),
):
+ # Explicitly providing a job_retry is an error.
job.result(job_retry=google.api_core.retry.Retry())
+
+
+def test_query_and_wait_retries_job_for_DDL_queries(global_time_lock):
+ """
+ Specific test for retrying DDL queries with "jobRateLimitExceeded" error:
+ https://github.com/googleapis/python-bigquery/issues/1790
+ """
+ freezegun.freeze_time(auto_tick_seconds=1)
+
+ client = make_client()
+ conn = client._connection = make_connection(
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "abc",
+ "location": "response-location",
+ },
+ "jobComplete": False,
+ },
+ google.api_core.exceptions.InternalServerError(
+ "job_retry me", errors=[{"reason": "jobRateLimitExceeded"}]
+ ),
+ google.api_core.exceptions.BadRequest(
+ "retry me", errors=[{"reason": "jobRateLimitExceeded"}]
+ ),
+ {
+ "jobReference": {
+ "projectId": "response-project",
+ "jobId": "abc",
+ "location": "response-location",
+ },
+ "jobComplete": True,
+ "schema": {
+ "fields": [
+ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
+ {"name": "age", "type": "INT64", "mode": "NULLABLE"},
+ ],
+ },
+ "rows": [
+ {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
+ {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ ],
+ },
+ )
+ rows = _job_helpers.query_and_wait(
+ client,
+ query="SELECT 1",
+ location="request-location",
+ project="request-project",
+ job_config=None,
+ page_size=None,
+ max_results=None,
+ retry=google.cloud.bigquery.retry.DEFAULT_RETRY,
+ job_retry=google.cloud.bigquery.retry.DEFAULT_JOB_RETRY,
+ )
+ assert len(list(rows)) == 4
+
+ # Relevant docs for the REST API path: https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/query
+ # and https://cloud.google.com/bigquery/docs/reference/rest/v2/jobs/getQueryResults
+ query_request_path = "/projects/request-project/queries"
+
+ calls = conn.api_request.call_args_list
+ _, kwargs = calls[0]
+ assert kwargs["method"] == "POST"
+ assert kwargs["path"] == query_request_path
+
+ # TODO: Add assertion statements for response paths after PR#1797 is fixed
+
+ _, kwargs = calls[3]
+ assert kwargs["method"] == "POST"
+ assert kwargs["path"] == query_request_path
+
+
+@pytest.mark.parametrize(
+ "result_retry_param",
+ [
+ pytest.param(
+ {},
+ id="default retry {}",
+ ),
+ pytest.param(
+ {
+ "retry": google.cloud.bigquery.retry.DEFAULT_RETRY.with_timeout(
+ timeout=10.0
+ )
+ },
+ id="custom retry object with timeout 10.0",
+ ),
+ ],
+)
+def test_retry_load_job_result(result_retry_param, PROJECT, DS_ID):
+ from google.cloud.bigquery.dataset import DatasetReference
+ from google.cloud.bigquery.job.load import LoadJob
+ import google.cloud.bigquery.retry
+
+ client = make_client()
+ conn = client._connection = make_connection(
+ dict(
+ status=dict(state="RUNNING"),
+ jobReference={"jobId": "id_1"},
+ ),
+ google.api_core.exceptions.ServiceUnavailable("retry me"),
+ dict(
+ status=dict(state="DONE"),
+ jobReference={"jobId": "id_1"},
+ statistics={"load": {"outputRows": 1}},
+ ),
+ )
+
+ table_ref = DatasetReference(project=PROJECT, dataset_id=DS_ID).table("new_table")
+ job = LoadJob("id_1", source_uris=None, destination=table_ref, client=client)
+
+ with mock.patch.object(
+ client, "_call_api", wraps=client._call_api
+ ) as wrapped_call_api:
+ result = job.result(**result_retry_param)
+
+ assert job.state == "DONE"
+ assert result.output_rows == 1
+
+ # Check that _call_api was called multiple times due to retry
+ assert wrapped_call_api.call_count > 1
+
+ # Verify the retry object used in the calls to _call_api
+ expected_retry = result_retry_param.get(
+ "retry", google.cloud.bigquery.retry.DEFAULT_RETRY
+ )
+
+ for call in wrapped_call_api.mock_calls:
+ name, args, kwargs = call
+ # The retry object is the first positional argument to _call_api
+ called_retry = args[0]
+
+ # We only care about the calls made during the job.result() polling
+ if kwargs.get("method") == "GET" and "jobs/id_1" in kwargs.get("path", ""):
+ assert called_retry._predicate == expected_retry._predicate
+ assert called_retry._initial == expected_retry._initial
+ assert called_retry._maximum == expected_retry._maximum
+ assert called_retry._multiplier == expected_retry._multiplier
+ assert called_retry._deadline == expected_retry._deadline
+ if "retry" in result_retry_param:
+ # Specifically check the timeout for the custom retry case
+ assert called_retry._timeout == 10.0
+ else:
+ assert called_retry._timeout == expected_retry._timeout
+
+ # The number of api_request calls should still be 3
+ assert conn.api_request.call_count == 3
diff --git a/tests/unit/test_legacy_types.py b/tests/unit/test_legacy_types.py
index 3f51cc511..75f3e77d7 100644
--- a/tests/unit/test_legacy_types.py
+++ b/tests/unit/test_legacy_types.py
@@ -13,9 +13,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import pytest
+
import warnings
+try:
+ import proto
+except ImportError:
+ proto = None # type: ignore
+
+@pytest.mark.skipif(proto is None, reason="proto is not installed")
def test_importing_legacy_types_emits_warning():
with warnings.catch_warnings(record=True) as warned:
from google.cloud.bigquery_v2 import types # noqa: F401
diff --git a/tests/unit/test_list_datasets.py b/tests/unit/test_list_datasets.py
index 6f0b55c5e..4ef99fd86 100644
--- a/tests/unit/test_list_datasets.py
+++ b/tests/unit/test_list_datasets.py
@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import mock
+from unittest import mock
+
import pytest
from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
diff --git a/tests/unit/test_list_jobs.py b/tests/unit/test_list_jobs.py
index 1db6b5668..edb85af0a 100644
--- a/tests/unit/test_list_jobs.py
+++ b/tests/unit/test_list_jobs.py
@@ -13,8 +13,8 @@
# limitations under the License.
import datetime
+from unittest import mock
-import mock
import pytest
from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
diff --git a/tests/unit/test_list_projects.py b/tests/unit/test_list_projects.py
index 190612b44..5260e5246 100644
--- a/tests/unit/test_list_projects.py
+++ b/tests/unit/test_list_projects.py
@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import mock
+from unittest import mock
+
import pytest
from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
diff --git a/tests/unit/test_magics.py b/tests/unit/test_magics.py
index ea8fe568f..c79e923f8 100644
--- a/tests/unit/test_magics.py
+++ b/tests/unit/test_magics.py
@@ -15,16 +15,17 @@
import copy
import re
from concurrent import futures
+from unittest import mock
import warnings
from google.api_core import exceptions
import google.auth.credentials
-import mock
import pytest
from tests.unit.helpers import make_connection
from test_utils.imports import maybe_fail_import
from google.cloud import bigquery
+from google.cloud.bigquery import exceptions as bq_exceptions
from google.cloud.bigquery import job
from google.cloud.bigquery import table
from google.cloud.bigquery.retry import DEFAULT_TIMEOUT
@@ -35,6 +36,7 @@
except ImportError:
magics = None
+
bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")
IPython = pytest.importorskip("IPython")
interactiveshell = pytest.importorskip("IPython.terminal.interactiveshell")
@@ -76,6 +78,19 @@ def ipython_ns_cleanup():
del ip.user_ns[name]
+@pytest.fixture(scope="session")
+def missing_bq_storage():
+ """Provide a patcher that can make the bigquery storage import to fail."""
+
+ def fail_if(name, globals, locals, fromlist, level):
+ # NOTE: *very* simplified, assuming a straightforward absolute import
+ return "bigquery_storage" in name or (
+ fromlist is not None and "bigquery_storage" in fromlist
+ )
+
+ return maybe_fail_import(predicate=fail_if)
+
+
@pytest.fixture(scope="session")
def missing_grpcio_lib():
"""Provide a patcher that can make the gapic library import to fail."""
@@ -141,9 +156,10 @@ def test_context_with_default_credentials():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_context_with_default_connection():
+def test_context_with_default_connection(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._credentials = None
magics.context._project = None
magics.context._connection = None
@@ -204,9 +220,10 @@ def test_context_credentials_and_project_can_be_set_explicitly():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_context_with_custom_connection():
+def test_context_with_custom_connection(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
magics.context._credentials = None
context_conn = magics.context._connection = make_connection(
@@ -278,7 +295,6 @@ def test__run_query():
assert len(execution_updates) == 3 # one update per API response
for line in execution_updates:
assert re.match("Query executing: .*s", line)
- assert re.match("Query complete after .*s", updates[-1])
def test__run_query_dry_run_without_errors_is_silent():
@@ -311,6 +327,9 @@ def test__make_bqstorage_client_false():
assert got is None
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
def test__make_bqstorage_client_true():
credentials_mock = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
@@ -322,6 +341,61 @@ def test__make_bqstorage_client_true():
assert isinstance(got, bigquery_storage.BigQueryReadClient)
+def test__make_bqstorage_client_true_raises_import_error(missing_bq_storage):
+ """When package `google-cloud-bigquery-storage` is not installed, reports
+ ImportError.
+ """
+ credentials_mock = mock.create_autospec(
+ google.auth.credentials.Credentials, instance=True
+ )
+ test_client = bigquery.Client(
+ project="test_project", credentials=credentials_mock, location="test_location"
+ )
+
+ with pytest.raises(ImportError) as exc_context, missing_bq_storage:
+ magics._make_bqstorage_client(test_client, True, {})
+
+ error_msg = str(exc_context.value)
+ assert "google-cloud-bigquery-storage" in error_msg
+ assert "pyarrow" in error_msg
+
+
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
+def test__make_bqstorage_client_true_obsolete_dependency():
+ """When package `google-cloud-bigquery-storage` is installed but has outdated
+ version, returns None, and raises a warning.
+ """
+ credentials_mock = mock.create_autospec(
+ google.auth.credentials.Credentials, instance=True
+ )
+ test_client = bigquery.Client(
+ project="test_project", credentials=credentials_mock, location="test_location"
+ )
+
+ patcher = mock.patch(
+ "google.cloud.bigquery._versions_helpers.BQ_STORAGE_VERSIONS.try_import",
+ side_effect=bq_exceptions.LegacyBigQueryStorageError(
+ "google-cloud-bigquery-storage is outdated"
+ ),
+ )
+ with patcher, warnings.catch_warnings(record=True) as warned:
+ got = magics._make_bqstorage_client(test_client, True, {})
+
+ assert got is None
+
+ matching_warnings = [
+ warning
+ for warning in warned
+ if "google-cloud-bigquery-storage is outdated" in str(warning)
+ ]
+ assert matching_warnings, "Obsolete dependency warning not raised."
+
+
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test__make_bqstorage_client_true_missing_gapic(missing_grpcio_lib):
credentials_mock = mock.create_autospec(
@@ -368,18 +442,20 @@ def test__create_dataset_if_necessary_not_exist():
@pytest.mark.usefixtures("ipython_interactive")
def test_extension_load():
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
- # verify that the magic is registered and has the correct source
- magic = ip.magics_manager.magics["cell"].get("bigquery")
- assert magic.__module__ == "google.cloud.bigquery.magics.magics"
+ with pytest.warns(FutureWarning, match="bigquery_magics"):
+ bigquery.load_ipython_extension(ip)
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
def test_bigquery_magic_without_optional_arguments(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
mock_credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -404,6 +480,7 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch):
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
)
+ magics.context.project = "unit-test-project"
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
@@ -420,9 +497,10 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch):
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_default_connection_user_agent():
+def test_bigquery_magic_default_connection_user_agent(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._connection = None
credentials_mock = mock.create_autospec(
@@ -439,15 +517,16 @@ def test_bigquery_magic_default_connection_user_agent():
with conn_patch as conn, run_query_patch, default_patch:
ip.run_cell_magic("bigquery", "", "SELECT 17 as num")
- client_info_arg = conn.call_args.kwargs.get("client_info")
+ client_info_arg = conn.call_args[1].get("client_info")
assert client_info_arg is not None
assert client_info_arg.user_agent == "ipython-" + IPython.__version__
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_with_legacy_sql():
+def test_bigquery_magic_with_legacy_sql(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -464,9 +543,10 @@ def test_bigquery_magic_with_legacy_sql():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_result_saved_to_variable(ipython_ns_cleanup):
+def test_bigquery_magic_with_result_saved_to_variable(ipython_ns_cleanup, monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -497,9 +577,10 @@ def test_bigquery_magic_with_result_saved_to_variable(ipython_ns_cleanup):
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_does_not_clear_display_in_verbose_mode():
+def test_bigquery_magic_does_not_clear_display_in_verbose_mode(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -518,9 +599,10 @@ def test_bigquery_magic_does_not_clear_display_in_verbose_mode():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_clears_display_in_non_verbose_mode():
+def test_bigquery_magic_clears_display_in_non_verbose_mode(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -539,11 +621,13 @@ def test_bigquery_magic_clears_display_in_non_verbose_mode():
@pytest.mark.usefixtures("ipython_interactive")
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch):
- pandas = pytest.importorskip("pandas")
-
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
mock_credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -572,9 +656,9 @@ def test_bigquery_magic_with_bqstorage_from_argument(monkeypatch):
google.cloud.bigquery.job.QueryJob, instance=True
)
query_job_mock.to_dataframe.return_value = result
- with run_query_patch as run_query_mock, bqstorage_client_patch, warnings.catch_warnings(
- record=True
- ) as warned:
+ with run_query_patch as run_query_mock, (
+ bqstorage_client_patch
+ ), warnings.catch_warnings(record=True) as warned:
run_query_mock.return_value = query_job_mock
return_value = ip.run_cell_magic("bigquery", "--use_bqstorage_api", sql)
@@ -588,7 +672,7 @@ def warning_match(warning):
assert len(expected_warnings) == 1
assert len(bqstorage_mock.call_args_list) == 1
- kwargs = bqstorage_mock.call_args_list[0].kwargs
+ kwargs = bqstorage_mock.call_args_list[0][1]
assert kwargs.get("credentials") is mock_credentials
client_info = kwargs.get("client_info")
assert client_info is not None
@@ -597,18 +681,22 @@ def warning_match(warning):
query_job_mock.to_dataframe.assert_called_once_with(
bqstorage_client=bqstorage_instance_mock,
create_bqstorage_client=mock.ANY,
- progress_bar_type="tqdm",
+ progress_bar_type="tqdm_notebook",
)
assert isinstance(return_value, pandas.DataFrame)
@pytest.mark.usefixtures("ipython_interactive")
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
def test_bigquery_magic_with_rest_client_requested(monkeypatch):
pandas = pytest.importorskip("pandas")
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
mock_credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -641,16 +729,17 @@ def test_bigquery_magic_with_rest_client_requested(monkeypatch):
query_job_mock.to_dataframe.assert_called_once_with(
bqstorage_client=None,
create_bqstorage_client=False,
- progress_bar_type="tqdm",
+ progress_bar_type="tqdm_notebook",
)
assert isinstance(return_value, pandas.DataFrame)
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_w_max_results_invalid():
+def test_bigquery_magic_w_max_results_invalid(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
credentials_mock = mock.create_autospec(
@@ -670,9 +759,10 @@ def test_bigquery_magic_w_max_results_invalid():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_w_max_results_valid_calls_queryjob_result():
+def test_bigquery_magic_w_max_results_valid_calls_queryjob_result(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
credentials_mock = mock.create_autospec(
@@ -704,9 +794,10 @@ def test_bigquery_magic_w_max_results_valid_calls_queryjob_result():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_w_max_results_query_job_results_fails():
+def test_bigquery_magic_w_max_results_query_job_results_fails(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
credentials_mock = mock.create_autospec(
@@ -732,16 +823,20 @@ def test_bigquery_magic_w_max_results_query_job_results_fails():
with pytest.raises(
OSError
- ), client_query_patch as client_query_mock, default_patch, close_transports_patch as close_transports:
+ ), client_query_patch as client_query_mock, (
+ default_patch
+ ), close_transports_patch as close_transports:
client_query_mock.return_value = query_job_mock
ip.run_cell_magic("bigquery", "--max_results=5", sql)
assert close_transports.called
-def test_bigquery_magic_w_table_id_invalid():
+@pytest.mark.usefixtures("ipython_interactive")
+def test_bigquery_magic_w_table_id_invalid(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
credentials_mock = mock.create_autospec(
@@ -768,9 +863,11 @@ def test_bigquery_magic_w_table_id_invalid():
assert "Traceback (most recent call last)" not in output
-def test_bigquery_magic_w_missing_query():
+@pytest.mark.usefixtures("ipython_interactive")
+def test_bigquery_magic_w_missing_query(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
credentials_mock = mock.create_autospec(
@@ -793,9 +890,10 @@ def test_bigquery_magic_w_missing_query():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup):
+def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup, monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
ipython_ns_cleanup.append((ip, "df"))
@@ -831,10 +929,14 @@ def test_bigquery_magic_w_table_id_and_destination_var(ipython_ns_cleanup):
@pytest.mark.usefixtures("ipython_interactive")
+@pytest.mark.skipif(
+ bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
+)
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_w_table_id_and_bqstorage_client():
+def test_bigquery_magic_w_table_id_and_bqstorage_client(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
credentials_mock = mock.create_autospec(
@@ -876,13 +978,15 @@ def test_bigquery_magic_w_table_id_and_bqstorage_client():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_dryrun_option_sets_job_config():
+def test_bigquery_magic_dryrun_option_sets_job_config(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
+ magics.context.project = "project-from-context"
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
)
@@ -897,12 +1001,14 @@ def test_bigquery_magic_dryrun_option_sets_job_config():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_dryrun_option_returns_query_job():
+def test_bigquery_magic_dryrun_option_returns_query_job(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
+ magics.context.project = "project-from-context"
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
@@ -921,13 +1027,17 @@ def test_bigquery_magic_dryrun_option_returns_query_job():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_dryrun_option_variable_error_message(ipython_ns_cleanup):
+def test_bigquery_magic_dryrun_option_variable_error_message(
+ ipython_ns_cleanup, monkeypatch
+):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
+ magics.context.project = "project-from-context"
ipython_ns_cleanup.append((ip, "q_job"))
run_query_patch = mock.patch(
@@ -948,12 +1058,16 @@ def test_bigquery_magic_dryrun_option_variable_error_message(ipython_ns_cleanup)
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(ipython_ns_cleanup):
+def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(
+ ipython_ns_cleanup, monkeypatch
+):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
+ magics.context.project = "project-from-context"
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
@@ -978,13 +1092,17 @@ def test_bigquery_magic_dryrun_option_saves_query_job_to_variable(ipython_ns_cle
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_saves_query_job_to_variable_on_error(ipython_ns_cleanup):
+def test_bigquery_magic_saves_query_job_to_variable_on_error(
+ ipython_ns_cleanup, monkeypatch
+):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
+ magics.context.project = "project-from-context"
ipython_ns_cleanup.append((ip, "result"))
client_query_patch = mock.patch(
@@ -1011,9 +1129,10 @@ def test_bigquery_magic_saves_query_job_to_variable_on_error(ipython_ns_cleanup)
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_w_maximum_bytes_billed_invalid():
+def test_bigquery_magic_w_maximum_bytes_billed_invalid(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
credentials_mock = mock.create_autospec(
@@ -1035,9 +1154,12 @@ def test_bigquery_magic_w_maximum_bytes_billed_invalid():
)
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, expected):
+def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(
+ param_value, expected, monkeypatch
+):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
# Set the default maximum bytes billed, so we know it's overridable by the param.
@@ -1075,9 +1197,10 @@ def test_bigquery_magic_w_maximum_bytes_billed_overrides_context(param_value, ex
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace():
+def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
magics.context.default_query_job_config.maximum_bytes_billed = 1337
@@ -1112,9 +1235,10 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_inplace():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter():
+def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
magics.context.default_query_job_config = job.QueryJobConfig(
@@ -1153,9 +1277,15 @@ def test_bigquery_magic_w_maximum_bytes_billed_w_context_setter():
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_bigquery_magic_with_no_query_cache(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
conn = make_connection()
monkeypatch.setattr(magics.context, "_connection", conn)
+ monkeypatch.setattr(
+ magics.context,
+ "credentials",
+ mock.create_autospec(google.auth.credentials.Credentials, instance=True),
+ )
monkeypatch.setattr(magics.context, "project", "project-from-context")
# --no_query_cache option should override context.
@@ -1183,7 +1313,8 @@ def test_bigquery_magic_with_no_query_cache(monkeypatch):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_context_with_no_query_cache_from_context(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
conn = make_connection()
monkeypatch.setattr(magics.context, "_connection", conn)
monkeypatch.setattr(magics.context, "project", "project-from-context")
@@ -1211,7 +1342,8 @@ def test_context_with_no_query_cache_from_context(monkeypatch):
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
magics.context.progress_bar_type = "tqdm_gui"
@@ -1235,6 +1367,8 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch):
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
)
+ magics.context.project = "unit-test-project"
+
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
@@ -1255,14 +1389,17 @@ def test_bigquery_magic_w_progress_bar_type_w_context_setter(monkeypatch):
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_with_progress_bar_type():
+def test_bigquery_magic_with_progress_bar_type(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.progress_bar_type = None
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
)
+ magics.context.project = "unit-test-project"
+
with run_query_patch as run_query_mock:
ip.run_cell_magic(
"bigquery", "--progress_bar_type=tqdm_gui", "SELECT 17 as num"
@@ -1275,9 +1412,10 @@ def test_bigquery_magic_with_progress_bar_type():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_with_project():
+def test_bigquery_magic_with_project(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
credentials_mock = mock.create_autospec(
@@ -1299,9 +1437,10 @@ def test_bigquery_magic_with_project():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_with_bigquery_api_endpoint(ipython_ns_cleanup):
+def test_bigquery_magic_with_bigquery_api_endpoint(ipython_ns_cleanup, monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._connection = None
run_query_patch = mock.patch(
@@ -1321,9 +1460,10 @@ def test_bigquery_magic_with_bigquery_api_endpoint(ipython_ns_cleanup):
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_with_bigquery_api_endpoint_context_dict():
+def test_bigquery_magic_with_bigquery_api_endpoint_context_dict(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._connection = None
magics.context.bigquery_client_options = {}
@@ -1344,9 +1484,10 @@ def test_bigquery_magic_with_bigquery_api_endpoint_context_dict():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_with_bqstorage_api_endpoint(ipython_ns_cleanup):
+def test_bigquery_magic_with_bqstorage_api_endpoint(ipython_ns_cleanup, monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._connection = None
run_query_patch = mock.patch(
@@ -1366,9 +1507,10 @@ def test_bigquery_magic_with_bqstorage_api_endpoint(ipython_ns_cleanup):
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_with_bqstorage_api_endpoint_context_dict():
+def test_bigquery_magic_with_bqstorage_api_endpoint_context_dict(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._connection = None
magics.context.bqstorage_client_options = {}
@@ -1389,9 +1531,10 @@ def test_bigquery_magic_with_bqstorage_api_endpoint_context_dict():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_with_multiple_options():
+def test_bigquery_magic_with_multiple_options(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
credentials_mock = mock.create_autospec(
@@ -1421,9 +1564,10 @@ def test_bigquery_magic_with_multiple_options():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_string_params(ipython_ns_cleanup):
+def test_bigquery_magic_with_string_params(ipython_ns_cleanup, monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1438,6 +1582,8 @@ def test_bigquery_magic_with_string_params(ipython_ns_cleanup):
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
)
+ magics.context.project = "unit-test-project"
+
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
@@ -1458,9 +1604,10 @@ def test_bigquery_magic_with_string_params(ipython_ns_cleanup):
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_dict_params(ipython_ns_cleanup):
+def test_bigquery_magic_with_dict_params(ipython_ns_cleanup, monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1477,6 +1624,8 @@ def test_bigquery_magic_with_dict_params(ipython_ns_cleanup):
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
)
+ magics.context.project = "unit-test-project"
+
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
@@ -1502,9 +1651,10 @@ def test_bigquery_magic_with_dict_params(ipython_ns_cleanup):
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_dict_params_nonexisting():
+def test_bigquery_magic_with_dict_params_nonexisting(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1517,9 +1667,10 @@ def test_bigquery_magic_with_dict_params_nonexisting():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_dict_params_incorrect_syntax():
+def test_bigquery_magic_with_dict_params_incorrect_syntax(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1533,9 +1684,10 @@ def test_bigquery_magic_with_dict_params_incorrect_syntax():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_dict_params_duplicate():
+def test_bigquery_magic_with_dict_params_duplicate(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1551,12 +1703,14 @@ def test_bigquery_magic_with_dict_params_duplicate():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_option_value_incorrect():
+def test_bigquery_magic_with_option_value_incorrect(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
+ magics.context.project = "unit-test-project"
sql = "SELECT @foo AS foo"
@@ -1567,9 +1721,12 @@ def test_bigquery_magic_with_option_value_incorrect():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_dict_params_negative_value(ipython_ns_cleanup):
+def test_bigquery_magic_with_dict_params_negative_value(
+ ipython_ns_cleanup, monkeypatch
+):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1584,6 +1741,8 @@ def test_bigquery_magic_with_dict_params_negative_value(ipython_ns_cleanup):
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
)
+ magics.context.project = "unit-test-project"
+
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
@@ -1607,9 +1766,10 @@ def test_bigquery_magic_with_dict_params_negative_value(ipython_ns_cleanup):
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup):
+def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup, monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1624,6 +1784,8 @@ def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup):
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
)
+ magics.context.project = "unit-test-project"
+
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
@@ -1647,9 +1809,10 @@ def test_bigquery_magic_with_dict_params_array_value(ipython_ns_cleanup):
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup):
+def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup, monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1664,6 +1827,8 @@ def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup):
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
)
+ magics.context.project = "unit-test-project"
+
query_job_mock = mock.create_autospec(
google.cloud.bigquery.job.QueryJob, instance=True
)
@@ -1687,9 +1852,10 @@ def test_bigquery_magic_with_dict_params_tuple_value(ipython_ns_cleanup):
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_improperly_formatted_params():
+def test_bigquery_magic_with_improperly_formatted_params(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1705,12 +1871,16 @@ def test_bigquery_magic_with_improperly_formatted_params():
)
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_valid_query_in_existing_variable(ipython_ns_cleanup, raw_sql):
+def test_bigquery_magic_valid_query_in_existing_variable(
+ ipython_ns_cleanup, raw_sql, monkeypatch
+):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
+ magics.context.project = "unit-test-project"
ipython_ns_cleanup.append((ip, "custom_query"))
ipython_ns_cleanup.append((ip, "query_results_df"))
@@ -1744,12 +1914,14 @@ def test_bigquery_magic_valid_query_in_existing_variable(ipython_ns_cleanup, raw
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_nonexisting_query_variable():
+def test_bigquery_magic_nonexisting_query_variable(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
+ magics.context.project = "unit-test-project"
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
@@ -1768,13 +1940,14 @@ def test_bigquery_magic_nonexisting_query_variable():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_empty_query_variable_name():
+def test_bigquery_magic_empty_query_variable_name(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
-
+ magics.context.project = "unit-test-project"
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
)
@@ -1790,12 +1963,14 @@ def test_bigquery_magic_empty_query_variable_name():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup):
+def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup, monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
+ magics.context.project = "unit-test-project"
run_query_patch = mock.patch(
"google.cloud.bigquery.magics.magics._run_query", autospec=True
@@ -1816,16 +1991,22 @@ def test_bigquery_magic_query_variable_non_string(ipython_ns_cleanup):
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_query_variable_not_identifier():
+def test_bigquery_magic_query_variable_not_identifier(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
+ magics.context.project = "unit-test-project"
cell_body = "$123foo" # 123foo is not valid Python identifier
- with io.capture_output() as captured_io:
+ run_query_patch = mock.patch(
+ "google.cloud.bigquery.magics.magics._run_query", autospec=True
+ )
+
+ with run_query_patch, io.capture_output() as captured_io:
ip.run_cell_magic("bigquery", "", cell_body)
# If "$" prefixes a string that is not a Python identifier, we do not treat such
@@ -1839,9 +2020,10 @@ def test_bigquery_magic_query_variable_not_identifier():
@pytest.mark.usefixtures("ipython_interactive")
@pytest.mark.skipif(pandas is None, reason="Requires `pandas`")
-def test_bigquery_magic_with_invalid_multiple_option_values():
+def test_bigquery_magic_with_invalid_multiple_option_values(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1856,9 +2038,10 @@ def test_bigquery_magic_with_invalid_multiple_option_values():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_omits_tracebacks_from_error_message():
+def test_bigquery_magic_omits_tracebacks_from_error_message(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
credentials_mock = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
@@ -1883,9 +2066,10 @@ def test_bigquery_magic_omits_tracebacks_from_error_message():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_w_destination_table_invalid_format():
+def test_bigquery_magic_w_destination_table_invalid_format(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context._project = None
credentials_mock = mock.create_autospec(
@@ -1911,9 +2095,10 @@ def test_bigquery_magic_w_destination_table_invalid_format():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_w_destination_table():
+def test_bigquery_magic_w_destination_table(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1943,9 +2128,10 @@ def test_bigquery_magic_w_destination_table():
@pytest.mark.usefixtures("ipython_interactive")
-def test_bigquery_magic_create_dataset_fails():
+def test_bigquery_magic_create_dataset_fails(monkeypatch):
ip = IPython.get_ipython()
- ip.extension_manager.load_extension("google.cloud.bigquery")
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
magics.context.credentials = mock.create_autospec(
google.auth.credentials.Credentials, instance=True
)
@@ -1970,3 +2156,22 @@ def test_bigquery_magic_create_dataset_fails():
)
assert close_transports.called
+
+
+@pytest.mark.usefixtures("ipython_interactive")
+def test_bigquery_magic_with_location(monkeypatch):
+ ip = IPython.get_ipython()
+ monkeypatch.setattr(bigquery, "bigquery_magics", None)
+ bigquery.load_ipython_extension(ip)
+ magics.context.credentials = mock.create_autospec(
+ google.auth.credentials.Credentials, instance=True
+ )
+
+ run_query_patch = mock.patch(
+ "google.cloud.bigquery.magics.magics._run_query", autospec=True
+ )
+ with run_query_patch as run_query_mock:
+ ip.run_cell_magic("bigquery", "--location=us-east1", "SELECT 17 AS num")
+
+ client_options_used = run_query_mock.call_args_list[0][0][0]
+ assert client_options_used.location == "us-east1"
diff --git a/tests/unit/test_opentelemetry_tracing.py b/tests/unit/test_opentelemetry_tracing.py
index 3021a3dbf..57132a1b9 100644
--- a/tests/unit/test_opentelemetry_tracing.py
+++ b/tests/unit/test_opentelemetry_tracing.py
@@ -15,12 +15,11 @@
import datetime
import importlib
import sys
-
-import mock
+from unittest import mock
try:
import opentelemetry
-except ImportError: # pragma: NO COVER
+except ImportError:
opentelemetry = None
if opentelemetry is not None:
@@ -43,7 +42,6 @@
TEST_SPAN_ATTRIBUTES = {"foo": "baz"}
-@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`")
@pytest.fixture
def setup():
importlib.reload(opentelemetry_tracing)
@@ -143,6 +141,8 @@ def test_default_job_attributes(setup):
"timeEnded": ended_time.isoformat(),
"hasErrors": True,
"state": "some_job_state",
+ "total_bytes_billed": 42,
+ "total_bytes_processed": 13,
}
with mock.patch("google.cloud.bigquery.job._AsyncJob") as test_job_ref:
test_job_ref.job_id = "test_job_id"
@@ -155,6 +155,8 @@ def test_default_job_attributes(setup):
test_job_ref.ended = ended_time
test_job_ref.error_result = error_result
test_job_ref.state = "some_job_state"
+ test_job_ref.total_bytes_billed = 42
+ test_job_ref.total_bytes_processed = 13
with opentelemetry_tracing.create_span(
TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job_ref
@@ -164,6 +166,34 @@ def test_default_job_attributes(setup):
assert span.attributes == expected_attributes
+@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`")
+def test_optional_job_attributes(setup):
+ # This test ensures we don't propagate unset values into span attributes
+ import google.cloud._helpers
+
+ time_created = datetime.datetime(
+ 2010, 5, 19, 16, 0, 0, tzinfo=google.cloud._helpers.UTC
+ )
+
+ with mock.patch("google.cloud.bigquery.job._AsyncJob") as test_job_ref:
+ test_job_ref.job_id = "test_job_id"
+ test_job_ref.location = None
+ test_job_ref.project = "test_project_id"
+ test_job_ref.created = time_created
+ test_job_ref.state = "some_job_state"
+ test_job_ref.num_child_jobs = None
+ test_job_ref.parent_job_id = None
+ test_job_ref.total_bytes_billed = None
+ test_job_ref.total_bytes_processed = None
+
+ with opentelemetry_tracing.create_span(
+ TEST_SPAN_NAME, attributes=TEST_SPAN_ATTRIBUTES, job_ref=test_job_ref
+ ) as span:
+ assert span is not None
+ for val in span.attributes.values():
+ assert val is not None
+
+
@pytest.mark.skipif(opentelemetry is None, reason="Require `opentelemetry`")
def test_default_no_data_leakage(setup):
import google.auth.credentials
diff --git a/tests/unit/test_packaging.py b/tests/unit/test_packaging.py
new file mode 100644
index 000000000..6f1b16c66
--- /dev/null
+++ b/tests/unit/test_packaging.py
@@ -0,0 +1,37 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import subprocess
+import sys
+
+
+def test_namespace_package_compat(tmp_path):
+ # The ``google`` namespace package should not be masked
+ # by the presence of ``google-cloud-bigquery``.
+ google = tmp_path / "google"
+ google.mkdir()
+ google.joinpath("othermod.py").write_text("")
+ env = dict(os.environ, PYTHONPATH=str(tmp_path))
+ cmd = [sys.executable, "-m", "google.othermod"]
+ subprocess.check_call(cmd, env=env)
+
+ # The ``google.cloud`` namespace package should not be masked
+ # by the presence of ``google-cloud-bigquery``.
+ google_cloud = tmp_path / "google" / "cloud"
+ google_cloud.mkdir()
+ google_cloud.joinpath("othermod.py").write_text("")
+ env = dict(os.environ, PYTHONPATH=str(tmp_path))
+ cmd = [sys.executable, "-m", "google.cloud.othermod"]
+ subprocess.check_call(cmd, env=env)
diff --git a/tests/unit/test_query.py b/tests/unit/test_query.py
index 4b687152f..adb43bcd9 100644
--- a/tests/unit/test_query.py
+++ b/tests/unit/test_query.py
@@ -15,8 +15,7 @@
import datetime
import decimal
import unittest
-
-import mock
+from unittest import mock
class Test_UDFResource(unittest.TestCase):
@@ -376,6 +375,100 @@ def test_repr_all_optional_attrs(self):
self.assertEqual(repr(param_type), expected)
+class Test_RangeQueryParameterType(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ return RangeQueryParameterType
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor_str(self):
+ param_type = self._make_one("DATE", name="foo", description="bar")
+ self.assertEqual(param_type.type_._type, "DATE")
+ self.assertEqual(param_type.name, "foo")
+ self.assertEqual(param_type.description, "bar")
+
+ def test_ctor_type(self):
+ from google.cloud.bigquery import ScalarQueryParameterType
+
+ scalar_type = ScalarQueryParameterType("DATE")
+ param_type = self._make_one(scalar_type, name="foo", description="bar")
+ self.assertEqual(param_type.type_._type, "DATE")
+ self.assertEqual(param_type.name, "foo")
+ self.assertEqual(param_type.description, "bar")
+
+ def test_ctor_unsupported_type_str(self):
+ with self.assertRaises(ValueError):
+ self._make_one("TIME")
+
+ def test_ctor_unsupported_type_type(self):
+ from google.cloud.bigquery import ScalarQueryParameterType
+
+ scalar_type = ScalarQueryParameterType("TIME")
+ with self.assertRaises(ValueError):
+ self._make_one(scalar_type)
+
+ def test_ctor_wrong_type(self):
+ with self.assertRaises(ValueError):
+ self._make_one(None)
+
+ def test_from_api_repr(self):
+ RESOURCE = {
+ "type": "RANGE",
+ "rangeElementType": {"type": "DATE"},
+ }
+
+ klass = self._get_target_class()
+ result = klass.from_api_repr(RESOURCE)
+ self.assertEqual(result.type_._type, "DATE")
+ self.assertIsNone(result.name)
+ self.assertIsNone(result.description)
+
+ def test_to_api_repr(self):
+ EXPECTED = {
+ "type": "RANGE",
+ "rangeElementType": {"type": "DATE"},
+ }
+ param_type = self._make_one("DATE", name="foo", description="bar")
+ result = param_type.to_api_repr()
+ self.assertEqual(result, EXPECTED)
+
+ def test__repr__(self):
+ param_type = self._make_one("DATE", name="foo", description="bar")
+ param_repr = "RangeQueryParameterType(ScalarQueryParameterType('DATE'), name='foo', description='bar')"
+ self.assertEqual(repr(param_type), param_repr)
+
+ def test__eq__(self):
+ param_type1 = self._make_one("DATE", name="foo", description="bar")
+ self.assertEqual(param_type1, param_type1)
+ self.assertNotEqual(param_type1, object())
+
+ alias = self._make_one("DATE", name="foo", description="bar")
+ self.assertIsNot(param_type1, alias)
+ self.assertEqual(param_type1, alias)
+
+ wrong_type = self._make_one("DATETIME", name="foo", description="bar")
+ self.assertNotEqual(param_type1, wrong_type)
+
+ wrong_name = self._make_one("DATETIME", name="foo2", description="bar")
+ self.assertNotEqual(param_type1, wrong_name)
+
+ wrong_description = self._make_one("DATETIME", name="foo", description="bar2")
+ self.assertNotEqual(param_type1, wrong_description)
+
+ def test_with_name(self):
+ param_type1 = self._make_one("DATE", name="foo", description="bar")
+ param_type2 = param_type1.with_name("foo2")
+
+ self.assertIsNot(param_type1, param_type2)
+ self.assertEqual(param_type2.type_._type, "DATE")
+ self.assertEqual(param_type2.name, "foo2")
+ self.assertEqual(param_type2.description, "bar")
+
+
class Test__AbstractQueryParameter(unittest.TestCase):
@staticmethod
def _get_target_class():
@@ -544,9 +637,9 @@ def test_to_api_repr_w_timestamp_datetime(self):
self.assertEqual(param.to_api_repr(), EXPECTED)
def test_to_api_repr_w_timestamp_micros(self):
- from google.cloud._helpers import _microseconds_from_datetime
+ from google.cloud._helpers import _microseconds_from_datetime, UTC
- now = datetime.datetime.utcnow()
+ now = datetime.datetime.now(UTC)
seconds = _microseconds_from_datetime(now) / 1.0e6
EXPECTED = {
"parameterType": {"type": "TIMESTAMP"},
@@ -557,9 +650,9 @@ def test_to_api_repr_w_timestamp_micros(self):
self.assertEqual(param.to_api_repr(), EXPECTED)
def test_to_api_repr_w_datetime_datetime(self):
- from google.cloud._helpers import _datetime_to_rfc3339
+ from google.cloud._helpers import _datetime_to_rfc3339, UTC
- now = datetime.datetime.utcnow()
+ now = datetime.datetime.now(UTC)
EXPECTED = {
"parameterType": {"type": "DATETIME"},
"parameterValue": {
@@ -571,9 +664,9 @@ def test_to_api_repr_w_datetime_datetime(self):
self.assertEqual(param.to_api_repr(), EXPECTED)
def test_to_api_repr_w_datetime_string(self):
- from google.cloud._helpers import _datetime_to_rfc3339
+ from google.cloud._helpers import _datetime_to_rfc3339, UTC
- now = datetime.datetime.utcnow()
+ now = datetime.datetime.now(UTC)
now_str = _datetime_to_rfc3339(now)
EXPECTED = {
"parameterType": {"type": "DATETIME"},
@@ -663,6 +756,461 @@ def test___repr__(self):
self.assertEqual(repr(field1), expected)
+class Test_RangeQueryParameter(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.query import RangeQueryParameter
+
+ return RangeQueryParameter
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ def test_ctor(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ range_element_type = RangeQueryParameterType(type_="DATE")
+ param = self._make_one(
+ range_element_type="DATE", start="2016-08-11", name="foo"
+ )
+ self.assertEqual(param.name, "foo")
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertEqual(param.start, "2016-08-11")
+ self.assertIs(param.end, None)
+
+ def test_ctor_w_datetime_query_parameter_type_str(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ range_element_type = RangeQueryParameterType(type_="DATETIME")
+ start_datetime = datetime.datetime(year=2020, month=12, day=31, hour=12)
+ end_datetime = datetime.datetime(year=2021, month=12, day=31, hour=12)
+ param = self._make_one(
+ range_element_type="DATETIME",
+ start=start_datetime,
+ end=end_datetime,
+ name="foo",
+ )
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertEqual(param.start, start_datetime)
+ self.assertEqual(param.end, end_datetime)
+ self.assertEqual(param.name, "foo")
+
+ def test_ctor_w_datetime_query_parameter_type_type(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ range_element_type = RangeQueryParameterType(type_="DATETIME")
+ param = self._make_one(range_element_type=range_element_type)
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertEqual(param.start, None)
+ self.assertEqual(param.end, None)
+ self.assertEqual(param.name, None)
+
+ def test_ctor_w_timestamp_query_parameter_typ_str(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ range_element_type = RangeQueryParameterType(type_="TIMESTAMP")
+ start_datetime = datetime.datetime(year=2020, month=12, day=31, hour=12)
+ end_datetime = datetime.datetime(year=2021, month=12, day=31, hour=12)
+ param = self._make_one(
+ range_element_type="TIMESTAMP",
+ start=start_datetime,
+ end=end_datetime,
+ name="foo",
+ )
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertEqual(param.start, start_datetime)
+ self.assertEqual(param.end, end_datetime)
+ self.assertEqual(param.name, "foo")
+
+ def test_ctor_w_timestamp_query_parameter_type_type(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ range_element_type = RangeQueryParameterType(type_="TIMESTAMP")
+ param = self._make_one(range_element_type=range_element_type)
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertEqual(param.start, None)
+ self.assertEqual(param.end, None)
+ self.assertEqual(param.name, None)
+
+ def test_ctor_w_date_query_parameter_type_str(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ range_element_type = RangeQueryParameterType(type_="DATE")
+ start_date = datetime.date(year=2020, month=12, day=31)
+ end_date = datetime.date(year=2021, month=12, day=31)
+ param = self._make_one(
+ range_element_type="DATE",
+ start=start_date,
+ end=end_date,
+ name="foo",
+ )
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertEqual(param.start, start_date)
+ self.assertEqual(param.end, end_date)
+ self.assertEqual(param.name, "foo")
+
+ def test_ctor_w_date_query_parameter_type_type(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ range_element_type = RangeQueryParameterType(type_="DATE")
+ param = self._make_one(range_element_type=range_element_type)
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertEqual(param.start, None)
+ self.assertEqual(param.end, None)
+ self.assertEqual(param.name, None)
+
+ def test_ctor_w_name_empty_str(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ range_element_type = RangeQueryParameterType(type_="DATE")
+ param = self._make_one(
+ range_element_type="DATE",
+ name="",
+ )
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertIs(param.start, None)
+ self.assertIs(param.end, None)
+ self.assertEqual(param.name, "")
+
+ def test_ctor_wo_value(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ range_element_type = RangeQueryParameterType(type_="DATETIME")
+ param = self._make_one(range_element_type="DATETIME", name="foo")
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertIs(param.start, None)
+ self.assertIs(param.end, None)
+ self.assertEqual(param.name, "foo")
+
+ def test_ctor_w_unsupported_query_parameter_type_str(self):
+ with self.assertRaises(ValueError):
+ self._make_one(range_element_type="TIME", name="foo")
+
+ def test_ctor_w_unsupported_query_parameter_type_type(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ range_element_type = RangeQueryParameterType(type_="DATE")
+ range_element_type.type_._type = "TIME"
+ with self.assertRaises(ValueError):
+ self._make_one(range_element_type=range_element_type, name="foo")
+
+ def test_ctor_w_unsupported_query_parameter_type_input(self):
+ with self.assertRaises(ValueError):
+ self._make_one(range_element_type=None, name="foo")
+
+ def test_positional(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ range_element_type = RangeQueryParameterType(type_="DATE")
+ klass = self._get_target_class()
+ param = klass.positional(
+ range_element_type="DATE", start="2016-08-11", end="2016-08-12"
+ )
+ self.assertIs(param.name, None)
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertEqual(param.start, "2016-08-11")
+ self.assertEqual(param.end, "2016-08-12")
+
+ def test_from_api_repr_w_name(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ RESOURCE = {
+ "name": "foo",
+ "parameterType": {
+ "type": "RANGE",
+ "rangeElementType": {
+ "type": "DATE",
+ },
+ },
+ "parameterValue": {
+ "rangeValue": {"start": {"value": None}, "end": {"value": "2020-12-31"}}
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+ range_element_type = RangeQueryParameterType(type_="DATE")
+ self.assertEqual(param.name, "foo")
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertEqual(param.start, None)
+ self.assertEqual(param.end, "2020-12-31")
+
+ def test_from_api_repr_wo_name(self):
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ RESOURCE = {
+ "parameterType": {
+ "type": "RANGE",
+ "rangeElementType": {
+ "type": "DATE",
+ },
+ },
+ "parameterValue": {
+ "rangeValue": {"start": {"value": None}, "end": {"value": "2020-12-31"}}
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+ range_element_type = RangeQueryParameterType(type_="DATE")
+ self.assertEqual(param.name, None)
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertEqual(param.start, None)
+ self.assertEqual(param.end, "2020-12-31")
+
+ def test_from_api_repr_wo_value(self):
+ # Back-end may not send back values for None params. See #9027
+ from google.cloud.bigquery.query import RangeQueryParameterType
+
+ RESOURCE = {
+ "parameterType": {
+ "type": "RANGE",
+ "rangeElementType": {
+ "type": "DATE",
+ },
+ },
+ }
+ range_element_type = RangeQueryParameterType(type_="DATE")
+ klass = self._get_target_class()
+ param = klass.from_api_repr(RESOURCE)
+ self.assertIs(param.name, None)
+ self.assertEqual(param.range_element_type, range_element_type)
+ self.assertIs(param.start, None)
+ self.assertIs(param.end, None)
+
+ def test_to_api_repr_w_name(self):
+ EXPECTED = {
+ "name": "foo",
+ "parameterType": {
+ "type": "RANGE",
+ "rangeElementType": {
+ "type": "DATE",
+ },
+ },
+ "parameterValue": {
+ "rangeValue": {"start": {"value": None}, "end": {"value": "2016-08-11"}}
+ },
+ }
+ param = self._make_one(range_element_type="DATE", end="2016-08-11", name="foo")
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_wo_name(self):
+ EXPECTED = {
+ "parameterType": {
+ "type": "RANGE",
+ "rangeElementType": {
+ "type": "DATE",
+ },
+ },
+ "parameterValue": {
+ "rangeValue": {"start": {"value": None}, "end": {"value": "2016-08-11"}}
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(range_element_type="DATE", end="2016-08-11")
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_date_date(self):
+ today = datetime.date.today()
+ today_str = today.strftime("%Y-%m-%d")
+ EXPECTED = {
+ "name": "foo",
+ "parameterType": {
+ "type": "RANGE",
+ "rangeElementType": {
+ "type": "DATE",
+ },
+ },
+ "parameterValue": {
+ "rangeValue": {"start": {"value": None}, "end": {"value": today_str}}
+ },
+ }
+ param = self._make_one(range_element_type="DATE", end=today, name="foo")
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_datetime_str(self):
+ EXPECTED = {
+ "parameterType": {
+ "type": "RANGE",
+ "rangeElementType": {
+ "type": "DATETIME",
+ },
+ },
+ "parameterValue": {
+ "rangeValue": {
+ "start": {"value": None},
+ "end": {"value": "2020-01-01T12:00:00.000000"},
+ }
+ },
+ }
+ klass = self._get_target_class()
+ end_datetime = datetime.datetime(year=2020, month=1, day=1, hour=12)
+ param = klass.positional(range_element_type="DATETIME", end=end_datetime)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_datetime_datetime(self):
+ from google.cloud._helpers import UTC # type: ignore
+ from google.cloud.bigquery._helpers import _RFC3339_MICROS_NO_ZULU
+
+ now = datetime.datetime.now(UTC)
+ now_str = now.strftime(_RFC3339_MICROS_NO_ZULU)
+ EXPECTED = {
+ "parameterType": {
+ "type": "RANGE",
+ "rangeElementType": {
+ "type": "DATETIME",
+ },
+ },
+ "parameterValue": {
+ "rangeValue": {"start": {"value": None}, "end": {"value": now_str}}
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(range_element_type="DATETIME", end=now)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_timestamp_str(self):
+ EXPECTED = {
+ "parameterType": {
+ "type": "RANGE",
+ "rangeElementType": {
+ "type": "TIMESTAMP",
+ },
+ },
+ "parameterValue": {
+ "rangeValue": {
+ "start": {"value": None},
+ "end": {"value": "2020-01-01 12:00:00+00:00"},
+ }
+ },
+ }
+ klass = self._get_target_class()
+ end_timestamp = datetime.datetime(year=2020, month=1, day=1, hour=12)
+ param = klass.positional(range_element_type="TIMESTAMP", end=end_timestamp)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_w_timestamp_timestamp(self):
+ from google.cloud._helpers import UTC # type: ignore
+
+ now = datetime.datetime.now(UTC)
+ now = now.astimezone(UTC)
+ now_str = str(now)
+ EXPECTED = {
+ "parameterType": {
+ "type": "RANGE",
+ "rangeElementType": {
+ "type": "TIMESTAMP",
+ },
+ },
+ "parameterValue": {
+ "rangeValue": {"start": {"value": None}, "end": {"value": now_str}}
+ },
+ }
+ klass = self._get_target_class()
+ param = klass.positional(range_element_type="TIMESTAMP", end=now)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_wo_values(self):
+ EXPECTED = {
+ "name": "foo",
+ "parameterType": {
+ "type": "RANGE",
+ "rangeElementType": {
+ "type": "DATE",
+ },
+ },
+ "parameterValue": {
+ "rangeValue": {"start": {"value": None}, "end": {"value": None}}
+ },
+ }
+ param = self._make_one(range_element_type="DATE", name="foo")
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
+ def test_to_api_repr_unsupported_value_type(self):
+ with self.assertRaisesRegex(
+ ValueError, "Cannot convert range element value from type"
+ ):
+ range_param = self._make_one(
+ range_element_type="DATE", start=datetime.date.today()
+ )
+ range_param.range_element_type.type_._type = "LONG"
+ range_param.to_api_repr()
+
+ def test___eq__(self):
+ param = self._make_one(
+ range_element_type="DATE", start="2016-08-11", name="foo"
+ )
+ self.assertEqual(param, param)
+ self.assertNotEqual(param, object())
+ alias = self._make_one(
+ range_element_type="DATE", start="2016-08-11", name="bar"
+ )
+ self.assertNotEqual(param, alias)
+ wrong_type = self._make_one(
+ range_element_type="DATETIME",
+ start="2020-12-31 12:00:00.000000",
+ name="foo",
+ )
+ self.assertNotEqual(param, wrong_type)
+ wrong_val = self._make_one(
+ range_element_type="DATE", start="2016-08-12", name="foo"
+ )
+ self.assertNotEqual(param, wrong_val)
+
+ def test___eq___wrong_type(self):
+ param = self._make_one(
+ range_element_type="DATE", start="2016-08-11", name="foo"
+ )
+ other = object()
+ self.assertNotEqual(param, other)
+ self.assertEqual(param, mock.ANY)
+
+ def test___eq___name_mismatch(self):
+ param = self._make_one(
+ range_element_type="DATE", start="2016-08-11", name="foo"
+ )
+ other = self._make_one(
+ range_element_type="DATE", start="2016-08-11", name="bar"
+ )
+ self.assertNotEqual(param, other)
+
+ def test___eq___field_type_mismatch(self):
+ param = self._make_one(range_element_type="DATE")
+ other = self._make_one(range_element_type="DATETIME")
+ self.assertNotEqual(param, other)
+
+ def test___eq___value_mismatch(self):
+ param = self._make_one(range_element_type="DATE", start="2016-08-11")
+ other = self._make_one(range_element_type="DATE", start="2016-08-12")
+ self.assertNotEqual(param, other)
+
+ def test___eq___hit(self):
+ param = self._make_one(range_element_type="DATE", start="2016-08-12")
+ other = self._make_one(range_element_type="DATE", start="2016-08-12")
+ self.assertEqual(param, other)
+
+ def test___ne___wrong_type(self):
+ param = self._make_one(range_element_type="DATE")
+ other = object()
+ self.assertNotEqual(param, other)
+ self.assertEqual(param, mock.ANY)
+
+ def test___ne___same_value(self):
+ param1 = self._make_one(range_element_type="DATE")
+ param2 = self._make_one(range_element_type="DATE")
+ # unittest ``assertEqual`` uses ``==`` not ``!=``.
+ comparison_val = param1 != param2
+ self.assertFalse(comparison_val)
+
+ def test___ne___different_values(self):
+ param1 = self._make_one(range_element_type="DATE", start="2016-08-12")
+ param2 = self._make_one(range_element_type="DATE")
+ self.assertNotEqual(param1, param2)
+
+ def test___repr__(self):
+ param1 = self._make_one(range_element_type="DATE", start="2016-08-12")
+ expected = "RangeQueryParameter(None, {'type': 'RANGE', 'rangeElementType': {'type': 'DATE'}}, '2016-08-12', None)"
+ self.assertEqual(repr(param1), expected)
+
+
def _make_subparam(name, type_, value):
from google.cloud.bigquery.query import ScalarQueryParameter
@@ -1233,6 +1781,25 @@ def test_to_api_repr_w_nested_struct(self):
param = self._make_one("foo", scalar_1, sub)
self.assertEqual(param.to_api_repr(), EXPECTED)
+ def test_to_api_repr_w_unknown_type(self):
+ EXPECTED = {
+ "name": "foo",
+ "parameterType": {
+ "type": "STRUCT",
+ "structTypes": [
+ {"name": "bar", "type": {"type": "INT64"}},
+ {"name": "baz", "type": {"type": "UNKNOWN_TYPE"}},
+ ],
+ },
+ "parameterValue": {
+ "structValues": {"bar": {"value": "123"}, "baz": {"value": "abc"}}
+ },
+ }
+ sub_1 = _make_subparam("bar", "INT64", 123)
+ sub_2 = _make_subparam("baz", "UNKNOWN_TYPE", "abc")
+ param = self._make_one("foo", sub_1, sub_2)
+ self.assertEqual(param.to_api_repr(), EXPECTED)
+
def test___eq___wrong_type(self):
field = self._make_one("test", _make_subparam("bar", "STRING", "abc"))
other = object()
@@ -1281,7 +1848,6 @@ def test___repr__(self):
field1 = self._make_one("test", _make_subparam("field1", "STRING", "hello"))
got = repr(field1)
self.assertIn("StructQueryParameter", got)
- self.assertIn("'field1', 'STRING'", got)
self.assertIn("'field1': 'hello'", got)
@@ -1362,13 +1928,13 @@ def test_errors_present(self):
self.assertEqual(query.errors, ERRORS)
def test_job_id_missing(self):
- with self.assertRaises(ValueError):
- self._make_one({})
+ query = self._make_one({})
+ self.assertIsNone(query.job_id)
def test_job_id_broken_job_reference(self):
resource = {"jobReference": {"bogus": "BOGUS"}}
- with self.assertRaises(ValueError):
- self._make_one(resource)
+ query = self._make_one(resource)
+ self.assertIsNone(query.job_id)
def test_job_id_present(self):
resource = self._make_resource()
@@ -1376,6 +1942,16 @@ def test_job_id_present(self):
query = self._make_one(resource)
self.assertEqual(query.job_id, "custom-job")
+ def test_location_missing(self):
+ query = self._make_one({})
+ self.assertIsNone(query.location)
+
+ def test_location_present(self):
+ resource = self._make_resource()
+ resource["jobReference"]["location"] = "test-location"
+ query = self._make_one(resource)
+ self.assertEqual(query.location, "test-location")
+
def test_page_token_missing(self):
query = self._make_one(self._make_resource())
self.assertIsNone(query.page_token)
@@ -1386,6 +1962,16 @@ def test_page_token_present(self):
query = self._make_one(resource)
self.assertEqual(query.page_token, "TOKEN")
+ def test_query_id_missing(self):
+ query = self._make_one(self._make_resource())
+ self.assertIsNone(query.query_id)
+
+ def test_query_id_present(self):
+ resource = self._make_resource()
+ resource["queryId"] = "test-query-id"
+ query = self._make_one(resource)
+ self.assertEqual(query.query_id, "test-query-id")
+
def test_total_rows_present_integer(self):
resource = self._make_resource()
resource["totalRows"] = 42
@@ -1414,6 +2000,70 @@ def test_total_bytes_processed_present_string(self):
query = self._make_one(resource)
self.assertEqual(query.total_bytes_processed, 123456)
+ def test_slot_millis_missing(self):
+ query = self._make_one(self._make_resource())
+ self.assertIsNone(query.slot_millis)
+
+ def test_slot_millis_present_integer(self):
+ resource = self._make_resource()
+ resource["totalSlotMs"] = 123456
+ query = self._make_one(resource)
+ self.assertEqual(query.slot_millis, 123456)
+
+ def test_slot_millis_present_string(self):
+ resource = self._make_resource()
+ resource["totalSlotMs"] = "123456"
+ query = self._make_one(resource)
+ self.assertEqual(query.slot_millis, 123456)
+
+ def test_created_missing(self):
+ query = self._make_one(self._make_resource())
+ self.assertIsNone(query.created)
+
+ def test_created_present_integer(self):
+ resource = self._make_resource()
+ resource["creationTime"] = 1437767599006
+ query = self._make_one(resource)
+ self.assertEqual(query.created.timestamp() * 1000, 1437767599006)
+
+ def test_created_present_string(self):
+ resource = self._make_resource()
+ resource["creationTime"] = "1437767599006"
+ query = self._make_one(resource)
+ self.assertEqual(query.created.timestamp() * 1000, 1437767599006)
+
+ def test_started_missing(self):
+ query = self._make_one(self._make_resource())
+ self.assertIsNone(query.started)
+
+ def test_started_present_integer(self):
+ resource = self._make_resource()
+ resource["startTime"] = 1437767599006
+ query = self._make_one(resource)
+ self.assertEqual(query.started.timestamp() * 1000, 1437767599006)
+
+ def test_started_present_string(self):
+ resource = self._make_resource()
+ resource["startTime"] = "1437767599006"
+ query = self._make_one(resource)
+ self.assertEqual(query.started.timestamp() * 1000, 1437767599006)
+
+ def test_ended_missing(self):
+ query = self._make_one(self._make_resource())
+ self.assertIsNone(query.ended)
+
+ def test_ended_present_integer(self):
+ resource = self._make_resource()
+ resource["endTime"] = 1437767599006
+ query = self._make_one(resource)
+ self.assertEqual(query.ended.timestamp() * 1000, 1437767599006)
+
+ def test_ended_present_string(self):
+ resource = self._make_resource()
+ resource["endTime"] = "1437767599006"
+ query = self._make_one(resource)
+ self.assertEqual(query.ended.timestamp() * 1000, 1437767599006)
+
def test_num_dml_affected_rows_missing(self):
query = self._make_one(self._make_resource())
self.assertIsNone(query.num_dml_affected_rows)
diff --git a/tests/unit/test_retry.py b/tests/unit/test_retry.py
index e0a992f78..6e533c849 100644
--- a/tests/unit/test_retry.py
+++ b/tests/unit/test_retry.py
@@ -13,8 +13,8 @@
# limitations under the License.
import unittest
+from unittest import mock
-import mock
import requests.exceptions
@@ -79,6 +79,12 @@ def test_w_unstructured_too_many_requests(self):
exc = TooManyRequests("testing")
self.assertTrue(self._call_fut(exc))
+ def test_w_unstructured_service_unavailable(self):
+ from google.api_core.exceptions import ServiceUnavailable
+
+ exc = ServiceUnavailable("testing")
+ self.assertTrue(self._call_fut(exc))
+
def test_w_internalError(self):
exc = mock.Mock(errors=[{"reason": "internalError"}], spec=["errors"])
self.assertTrue(self._call_fut(exc))
@@ -119,6 +125,34 @@ def test_DEFAULT_JOB_RETRY_predicate():
def test_DEFAULT_JOB_RETRY_deadline():
+ from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY, DEFAULT_RETRY
+
+ # Make sure we can retry the job at least once.
+ assert DEFAULT_JOB_RETRY._deadline > DEFAULT_RETRY._deadline
+
+
+def test_DEFAULT_JOB_RETRY_job_rate_limit_exceeded_retry_predicate():
+ """Tests the retry predicate specifically for jobRateLimitExceeded."""
from google.cloud.bigquery.retry import DEFAULT_JOB_RETRY
+ from google.api_core.exceptions import ClientError
+
+ # Non-ClientError exceptions should never trigger a retry
+ assert not DEFAULT_JOB_RETRY._predicate(TypeError())
+
+ # ClientError without specific reason shouldn't trigger a retry
+ assert not DEFAULT_JOB_RETRY._predicate(ClientError("fail"))
- assert DEFAULT_JOB_RETRY._deadline == 600
+ # ClientError with generic reason "idk" shouldn't trigger a retry
+ assert not DEFAULT_JOB_RETRY._predicate(
+ ClientError("fail", errors=[dict(reason="idk")])
+ )
+
+ # ClientError with reason "jobRateLimitExceeded" should trigger a retry
+ assert DEFAULT_JOB_RETRY._predicate(
+ ClientError("fail", errors=[dict(reason="jobRateLimitExceeded")])
+ )
+
+ # Other retryable reasons should still work as expected
+ assert DEFAULT_JOB_RETRY._predicate(
+ ClientError("fail", errors=[dict(reason="backendError")])
+ )
diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py
index 6a547cb13..f61b22035 100644
--- a/tests/unit/test_schema.py
+++ b/tests/unit/test_schema.py
@@ -12,14 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from google.cloud import bigquery
-from google.cloud.bigquery.standard_sql import StandardSqlStructType
-from google.cloud.bigquery.schema import PolicyTagList
+import copy
import unittest
+from unittest import mock
-import mock
import pytest
+from google.cloud import bigquery
+from google.cloud.bigquery import enums
+from google.cloud.bigquery.standard_sql import StandardSqlStructType
+from google.cloud.bigquery import schema
+from google.cloud.bigquery.schema import PolicyTagList
+
class TestSchemaField(unittest.TestCase):
@staticmethod
@@ -45,8 +49,15 @@ def test_constructor_defaults(self):
self.assertIsNone(field.description)
self.assertEqual(field.fields, ())
self.assertIsNone(field.policy_tags)
+ self.assertIsNone(field.default_value_expression)
+ self.assertEqual(field.rounding_mode, None)
+ self.assertEqual(field.foreign_type_definition, None)
+ self.assertEqual(
+ field.timestamp_precision, enums.TimestampPrecision.MICROSECOND
+ )
def test_constructor_explicit(self):
+ FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field"
field = self._make_one(
"test",
"STRING",
@@ -58,10 +69,15 @@ def test_constructor_explicit(self):
"projects/f/locations/g/taxonomies/h/policyTags/i",
)
),
+ default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION,
+ rounding_mode=enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED,
+ foreign_type_definition="INTEGER",
+ timestamp_precision=enums.TimestampPrecision.PICOSECOND,
)
self.assertEqual(field.name, "test")
self.assertEqual(field.field_type, "STRING")
self.assertEqual(field.mode, "REQUIRED")
+ self.assertEqual(field.default_value_expression, FIELD_DEFAULT_VALUE_EXPRESSION)
self.assertEqual(field.description, "Testing")
self.assertEqual(field.fields, ())
self.assertEqual(
@@ -73,6 +89,12 @@ def test_constructor_explicit(self):
)
),
)
+ self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED")
+ self.assertEqual(field.foreign_type_definition, "INTEGER")
+ self.assertEqual(
+ field.timestamp_precision,
+ enums.TimestampPrecision.PICOSECOND,
+ )
def test_constructor_explicit_none(self):
field = self._make_one("test", "STRING", description=None, policy_tags=None)
@@ -93,17 +115,53 @@ def test_constructor_subfields(self):
self.assertEqual(field.fields[0], sub_field1)
self.assertEqual(field.fields[1], sub_field2)
- def test_to_api_repr(self):
- from google.cloud.bigquery.schema import PolicyTagList
+ def test_constructor_range(self):
+ from google.cloud.bigquery.schema import FieldElementType
+
+ field = self._make_one(
+ "test",
+ "RANGE",
+ mode="REQUIRED",
+ description="Testing",
+ range_element_type=FieldElementType("DATETIME"),
+ )
+ self.assertEqual(field.name, "test")
+ self.assertEqual(field.field_type, "RANGE")
+ self.assertEqual(field.mode, "REQUIRED")
+ self.assertEqual(field.description, "Testing")
+ self.assertEqual(field.range_element_type.element_type, "DATETIME")
+ def test_constructor_range_str(self):
+ field = self._make_one(
+ "test",
+ "RANGE",
+ mode="REQUIRED",
+ description="Testing",
+ range_element_type="DATETIME",
+ )
+ self.assertEqual(field.name, "test")
+ self.assertEqual(field.field_type, "RANGE")
+ self.assertEqual(field.mode, "REQUIRED")
+ self.assertEqual(field.description, "Testing")
+ self.assertEqual(field.range_element_type.element_type, "DATETIME")
+
+ def test_to_api_repr(self):
policy = PolicyTagList(names=("foo", "bar"))
self.assertEqual(
policy.to_api_repr(),
{"names": ["foo", "bar"]},
)
+ ROUNDINGMODE = enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED
+
field = self._make_one(
- "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy
+ "foo",
+ "INTEGER",
+ "NULLABLE",
+ description="hello world",
+ policy_tags=policy,
+ rounding_mode=ROUNDINGMODE,
+ foreign_type_definition=None,
)
self.assertEqual(
field.to_api_repr(),
@@ -113,6 +171,7 @@ def test_to_api_repr(self):
"type": "INTEGER",
"description": "hello world",
"policyTags": {"names": ["foo", "bar"]},
+ "roundingMode": "ROUNDING_MODE_UNSPECIFIED",
},
)
@@ -138,6 +197,23 @@ def test_to_api_repr_with_subfield(self):
},
)
+ def test_to_api_repr_w_timestamp_precision(self):
+ field = self._make_one(
+ "foo",
+ "TIMESTAMP",
+ "NULLABLE",
+ timestamp_precision=enums.TimestampPrecision.PICOSECOND,
+ )
+ self.assertEqual(
+ field.to_api_repr(),
+ {
+ "mode": "NULLABLE",
+ "name": "foo",
+ "type": "TIMESTAMP",
+ "timestampPrecision": 12,
+ },
+ )
+
def test_from_api_repr(self):
field = self._get_target_class().from_api_repr(
{
@@ -146,6 +222,8 @@ def test_from_api_repr(self):
"description": "test_description",
"name": "foo",
"type": "record",
+ "roundingMode": "ROUNDING_MODE_UNSPECIFIED",
+ "timestampPrecision": 12,
}
)
self.assertEqual(field.name, "foo")
@@ -156,6 +234,12 @@ def test_from_api_repr(self):
self.assertEqual(field.fields[0].name, "bar")
self.assertEqual(field.fields[0].field_type, "INTEGER")
self.assertEqual(field.fields[0].mode, "NULLABLE")
+ self.assertEqual(field.range_element_type, None)
+ self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED")
+ self.assertEqual(
+ field.timestamp_precision,
+ enums.TimestampPrecision.PICOSECOND,
+ )
def test_from_api_repr_policy(self):
field = self._get_target_class().from_api_repr(
@@ -174,6 +258,23 @@ def test_from_api_repr_policy(self):
self.assertEqual(field.fields[0].field_type, "INTEGER")
self.assertEqual(field.fields[0].mode, "NULLABLE")
+ def test_from_api_repr_range(self):
+ field = self._get_target_class().from_api_repr(
+ {
+ "mode": "nullable",
+ "description": "test_range",
+ "name": "foo",
+ "type": "range",
+ "rangeElementType": {"type": "DATETIME"},
+ }
+ )
+ self.assertEqual(field.name, "foo")
+ self.assertEqual(field.field_type, "RANGE")
+ self.assertEqual(field.mode, "NULLABLE")
+ self.assertEqual(field.description, "test_range")
+ self.assertEqual(len(field.fields), 0)
+ self.assertEqual(field.range_element_type.element_type, "DATETIME")
+
def test_from_api_repr_defaults(self):
field = self._get_target_class().from_api_repr(
{"name": "foo", "type": "record"}
@@ -182,13 +283,27 @@ def test_from_api_repr_defaults(self):
self.assertEqual(field.field_type, "RECORD")
self.assertEqual(field.mode, "NULLABLE")
self.assertEqual(len(field.fields), 0)
+ self.assertEqual(field.default_value_expression, None)
# Keys not present in API representation shouldn't be included in
# _properties.
self.assertIsNone(field.description)
self.assertIsNone(field.policy_tags)
+ self.assertIsNone(field.range_element_type)
self.assertNotIn("description", field._properties)
self.assertNotIn("policyTags", field._properties)
+ self.assertNotIn("rangeElementType", field._properties)
+
+ def test_from_api_repr_timestamp_precision_str(self):
+ # The backend would return timestampPrecision field as a string, even
+ # if we send over an integer. This test verifies we manually converted
+ # it into integer to ensure resending could succeed.
+ field = self._get_target_class().from_api_repr(
+ {
+ "timestampPrecision": "12",
+ }
+ )
+ self.assertEqual(field._properties["timestampPrecision"], 12)
def test_name_property(self):
name = "lemon-ness"
@@ -227,6 +342,44 @@ def test_fields_property(self):
schema_field = self._make_one("boat", "RECORD", fields=fields)
self.assertEqual(schema_field.fields, fields)
+ def test_roundingmode_property_str(self):
+ ROUNDINGMODE = "ROUND_HALF_AWAY_FROM_ZERO"
+ schema_field = self._make_one("test", "STRING", rounding_mode=ROUNDINGMODE)
+ self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE)
+
+ del schema_field
+ schema_field = self._make_one("test", "STRING")
+ schema_field._properties["roundingMode"] = ROUNDINGMODE
+ self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE)
+
+ def test_foreign_type_definition_property_str(self):
+ FOREIGN_TYPE_DEFINITION = "INTEGER"
+ schema_field = self._make_one(
+ "test", "STRING", foreign_type_definition=FOREIGN_TYPE_DEFINITION
+ )
+ self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION)
+
+ del schema_field
+ schema_field = self._make_one("test", "STRING")
+ schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION
+ self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION)
+
+ def test_timestamp_precision_unsupported_type(self):
+ with pytest.raises(ValueError) as e:
+ self._make_one("test", "TIMESTAMP", timestamp_precision=12)
+
+ assert "timestamp_precision must be class enums.TimestampPrecision" in str(
+ e.value
+ )
+
+ def test_timestamp_precision_property(self):
+ TIMESTAMP_PRECISION = enums.TimestampPrecision.PICOSECOND
+ schema_field = self._make_one("test", "TIMESTAMP")
+ schema_field._properties[
+ "timestampPrecision"
+ ] = enums.TimestampPrecision.PICOSECOND.value
+ self.assertEqual(schema_field.timestamp_precision, TIMESTAMP_PRECISION)
+
def test_to_standard_sql_simple_type(self):
examples = (
# a few legacy types
@@ -401,6 +554,20 @@ def test_to_standard_sql_unknown_type(self):
bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED,
)
+ def test_to_standard_sql_foreign_type_valid(self):
+ legacy_type = "FOREIGN"
+ standard_type = bigquery.StandardSqlTypeNames.FOREIGN
+ foreign_type_definition = "INTEGER"
+
+ field = self._make_one(
+ "some_field",
+ field_type=legacy_type,
+ foreign_type_definition=foreign_type_definition,
+ )
+ standard_field = field.to_standard_sql()
+ self.assertEqual(standard_field.name, "some_field")
+ self.assertEqual(standard_field.type.type_kind, standard_type)
+
def test___eq___wrong_type(self):
field = self._make_one("test", "STRING")
other = object()
@@ -527,12 +694,9 @@ def test___hash__not_equals(self):
def test___repr__(self):
field1 = self._make_one("field1", "STRING")
- expected = "SchemaField('field1', 'STRING', 'NULLABLE', None, (), None)"
- self.assertEqual(repr(field1), expected)
-
- def test___repr__type_not_set(self):
- field1 = self._make_one("field1", field_type=None)
- expected = "SchemaField('field1', None, 'NULLABLE', None, (), None)"
+ expected = (
+ "SchemaField('field1', 'STRING', 'NULLABLE', None, None, (), None, None)"
+ )
self.assertEqual(repr(field1), expected)
def test___repr__evaluable_no_policy_tags(self):
@@ -561,6 +725,40 @@ def test___repr__evaluable_with_policy_tags(self):
assert field == evaled_field
+class TestFieldElementType(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.schema import FieldElementType
+
+ return FieldElementType
+
+ def _make_one(self, *args):
+ return self._get_target_class()(*args)
+
+ def test_constructor(self):
+ element_type = self._make_one("DATETIME")
+ self.assertEqual(element_type.element_type, "DATETIME")
+ self.assertEqual(element_type._properties["type"], "DATETIME")
+
+ def test_to_api_repr(self):
+ element_type = self._make_one("DATETIME")
+ self.assertEqual(element_type.to_api_repr(), {"type": "DATETIME"})
+
+ def test_from_api_repr(self):
+ api_repr = {"type": "DATETIME"}
+ expected_element_type = self._make_one("DATETIME")
+ self.assertEqual(
+ expected_element_type.element_type,
+ self._get_target_class().from_api_repr(api_repr).element_type,
+ )
+
+ def test_from_api_repr_empty(self):
+ self.assertEqual(None, self._get_target_class().from_api_repr({}))
+
+ def test_from_api_repr_none(self):
+ self.assertEqual(None, self._get_target_class().from_api_repr(None))
+
+
# TODO: dedup with the same class in test_table.py.
class _SchemaBase(object):
def _verify_field(self, field, r_field):
@@ -621,27 +819,62 @@ def test__parse_schema_resource_fields_without_mode(self):
self._verifySchema(schema, RESOURCE)
-class Test_build_schema_resource(unittest.TestCase, _SchemaBase):
+class Test_build_schema_resource:
+ """Tests for the _build_schema_resource function."""
+
def _call_fut(self, resource):
- from google.cloud.bigquery.schema import _build_schema_resource
+ return schema._build_schema_resource(resource)
+
+ FULL_NAME = schema.SchemaField(
+ name="full_name", field_type="STRING", mode="REQUIRED"
+ )
+ AGE = schema.SchemaField(name="age", field_type="INTEGER", mode="REQUIRED")
+ LIST_RESOURCE = [
+ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
+ {"name": "age", "type": "INTEGER", "mode": "REQUIRED"},
+ ]
+ FOREIGN_TYPE_INFO = schema.ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED")
+ FOREIGN_TYPE_INFO_RESOURCE = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}
+
+ @pytest.mark.parametrize(
+ "schema,expected",
+ [
+ pytest.param([], [], id="empty list"),
+ pytest.param([FULL_NAME, AGE], LIST_RESOURCE, id="list"),
+ ],
+ )
+ def test_ctor_valid_input(self, schema, expected):
+ result = self._call_fut(schema)
- return _build_schema_resource(resource)
+ assert result == expected
+
+ @pytest.mark.parametrize(
+ "schema,expected",
+ [
+ pytest.param(123, TypeError, id="invalid type"),
+ ],
+ )
+ def test_ctor_invalid_input(self, schema, expected):
+ with pytest.raises(TypeError) as e:
+ self._call_fut(schema)
+
+ # Looking for the first phrase from the string "Schema must be a ..."
+ assert "Schema must be a " in str(e.value)
def test_defaults(self):
from google.cloud.bigquery.schema import SchemaField
full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
age = SchemaField("age", "INTEGER", mode="REQUIRED")
+ # test with simple list
resource = self._call_fut([full_name, age])
- self.assertEqual(len(resource), 2)
- self.assertEqual(
- resource[0],
- {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
- )
- self.assertEqual(
- resource[1],
- {"name": "age", "type": "INTEGER", "mode": "REQUIRED"},
- )
+ assert len(resource) == 2
+ assert resource[0] == {
+ "name": "full_name",
+ "type": "STRING",
+ "mode": "REQUIRED",
+ }
+ assert resource[1] == {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}
def test_w_description(self):
from google.cloud.bigquery.schema import SchemaField
@@ -658,25 +891,20 @@ def test_w_description(self):
description=None,
)
resource = self._call_fut([full_name, age])
- self.assertEqual(len(resource), 2)
- self.assertEqual(
- resource[0],
- {
- "name": "full_name",
- "type": "STRING",
- "mode": "REQUIRED",
- "description": DESCRIPTION,
- },
- )
- self.assertEqual(
- resource[1],
- {
- "name": "age",
- "type": "INTEGER",
- "mode": "REQUIRED",
- "description": None,
- },
- )
+ assert len(resource) == 2
+ assert resource[0] == {
+ "name": "full_name",
+ "type": "STRING",
+ "mode": "REQUIRED",
+ "description": DESCRIPTION,
+ }
+
+ assert resource[1] == {
+ "name": "age",
+ "type": "INTEGER",
+ "mode": "REQUIRED",
+ "description": None,
+ }
def test_w_subfields(self):
from google.cloud.bigquery.schema import SchemaField
@@ -688,57 +916,99 @@ def test_w_subfields(self):
"phone", "RECORD", mode="REPEATED", fields=[ph_type, ph_num]
)
resource = self._call_fut([full_name, phone])
- self.assertEqual(len(resource), 2)
- self.assertEqual(
- resource[0],
- {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
- )
- self.assertEqual(
- resource[1],
- {
- "name": "phone",
- "type": "RECORD",
- "mode": "REPEATED",
- "fields": [
- {"name": "type", "type": "STRING", "mode": "REQUIRED"},
- {"name": "number", "type": "STRING", "mode": "REQUIRED"},
- ],
- },
- )
+ assert len(resource) == 2
+ assert resource[0] == {
+ "name": "full_name",
+ "type": "STRING",
+ "mode": "REQUIRED",
+ }
+ assert resource[1] == {
+ "name": "phone",
+ "type": "RECORD",
+ "mode": "REPEATED",
+ "fields": [
+ {"name": "type", "type": "STRING", "mode": "REQUIRED"},
+ {"name": "number", "type": "STRING", "mode": "REQUIRED"},
+ ],
+ }
+
+class Test_to_schema_fields:
+ """Tests for the _to_schema_fields function."""
-class Test_to_schema_fields(unittest.TestCase):
@staticmethod
def _call_fut(schema):
from google.cloud.bigquery.schema import _to_schema_fields
return _to_schema_fields(schema)
- def test_invalid_type(self):
- schema = [
- ("full_name", "STRING", "REQUIRED"),
- ("address", "STRING", "REQUIRED"),
- ]
- with self.assertRaises(ValueError):
- self._call_fut(schema)
+ FULL_NAME = schema.SchemaField(
+ name="full_name", field_type="STRING", mode="REQUIRED"
+ )
+ AGE = schema.SchemaField(name="age", field_type="INTEGER", mode="REQUIRED")
+ LIST_RESOURCE = [
+ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
+ {"name": "age", "type": "INTEGER", "mode": "REQUIRED"},
+ ]
+ FOREIGN_TYPE_INFO = schema.ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED")
+ FOREIGN_TYPE_INFO_RESOURCE = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}
+
+ @pytest.mark.parametrize(
+ "schema,expected",
+ [
+ pytest.param([], [], id="empty list"),
+ pytest.param((), [], id="empty tuple"),
+ pytest.param(LIST_RESOURCE, [FULL_NAME, AGE], id="list"),
+ ],
+ )
+ def test_ctor_valid_input(self, schema, expected):
+ result = self._call_fut(schema)
- def test_schema_fields_sequence(self):
- from google.cloud.bigquery.schema import SchemaField
+ assert result == expected
+
+ @pytest.mark.parametrize(
+ "schema,expected",
+ [
+ pytest.param(123, TypeError, id="invalid schema type"),
+ pytest.param([123, 123], TypeError, id="invalid SchemaField type"),
+ pytest.param({"fields": 123}, TypeError, id="invalid type, dict"),
+ pytest.param(
+ {"fields": 123, "foreignTypeInfo": 123},
+ TypeError,
+ id="invalid type, dict",
+ ),
+ ],
+ )
+ def test_ctor_invalid_input(self, schema, expected):
+ with pytest.raises(expected):
+ self._call_fut(schema)
+ def test_unknown_properties(self):
schema = [
- SchemaField("full_name", "STRING", mode="REQUIRED"),
- SchemaField("age", "INT64", mode="NULLABLE"),
+ {
+ "name": "full_name",
+ "type": "STRING",
+ "mode": "REQUIRED",
+ "someNewProperty": "test-value",
+ },
+ {
+ "name": "age",
+ # Note: This type should be included, too. Avoid client-side
+ # validation, as it could prevent backwards-compatible
+ # evolution of the server-side behavior.
+ "typo": "INTEGER",
+ "mode": "REQUIRED",
+ "anotherNewProperty": "another-test",
+ },
]
+
+ # Make sure the setter doesn't mutate schema.
+ expected_schema = copy.deepcopy(schema)
+
result = self._call_fut(schema)
- self.assertEqual(result, schema)
- def test_invalid_mapping_representation(self):
- schema = [
- {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
- {"name": "address", "typeooo": "STRING", "mode": "REQUIRED"},
- ]
- with self.assertRaises(Exception):
- self._call_fut(schema)
+ for api_repr, field in zip(expected_schema, result):
+ assert field.to_api_repr() == api_repr
def test_valid_mapping_representation(self):
from google.cloud.bigquery.schema import SchemaField
@@ -770,14 +1040,12 @@ def test_valid_mapping_representation(self):
]
result = self._call_fut(schema)
- self.assertEqual(result, expected_schema)
+ assert result == expected_schema
class TestPolicyTags(unittest.TestCase):
@staticmethod
def _get_target_class():
- from google.cloud.bigquery.schema import PolicyTagList
-
return PolicyTagList
def _make_one(self, *args, **kw):
@@ -1019,3 +1287,285 @@ def test_to_api_repr_parameterized(field, api):
from google.cloud.bigquery.schema import SchemaField
assert SchemaField(**field).to_api_repr() == api
+
+
+class TestForeignTypeInfo:
+ """Tests for ForeignTypeInfo objects."""
+
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.schema import ForeignTypeInfo
+
+ return ForeignTypeInfo
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class()(*args, **kw)
+
+ @pytest.mark.parametrize(
+ "type_system,expected",
+ [
+ (None, None),
+ ("TYPE_SYSTEM_UNSPECIFIED", "TYPE_SYSTEM_UNSPECIFIED"),
+ ("HIVE", "HIVE"),
+ ],
+ )
+ def test_ctor_valid_input(self, type_system, expected):
+ result = self._make_one(type_system=type_system)
+
+ assert result.type_system == expected
+
+ def test_ctor_invalid_input(self):
+ with pytest.raises(TypeError) as e:
+ self._make_one(type_system=123)
+
+ # Looking for the first word from the string "Pass as..."
+ assert "Pass " in str(e.value)
+
+ @pytest.mark.parametrize(
+ "type_system,expected",
+ [
+ ("TYPE_SYSTEM_UNSPECIFIED", {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}),
+ ("HIVE", {"typeSystem": "HIVE"}),
+ (None, {"typeSystem": None}),
+ ],
+ )
+ def test_to_api_repr(self, type_system, expected):
+ result = self._make_one(type_system=type_system)
+
+ assert result.to_api_repr() == expected
+
+ def test_from_api_repr(self):
+ """GIVEN an api representation of a ForeignTypeInfo object (i.e. api_repr)
+ WHEN converted into a ForeignTypeInfo object using from_api_repr()
+ THEN it will have the same representation in dict format as a ForeignTypeInfo
+ object made directly (via _make_one()) and represented in dict format.
+ """
+ api_repr = {
+ "typeSystem": "TYPE_SYSTEM_UNSPECIFIED",
+ }
+
+ expected = self._make_one(
+ type_system="TYPE_SYSTEM_UNSPECIFIED",
+ )
+
+ klass = self._get_target_class()
+ result = klass.from_api_repr(api_repr)
+
+ # We convert both to dict format because these classes do not have a
+ # __eq__() method to facilitate direct equality comparisons.
+ assert result.to_api_repr() == expected.to_api_repr()
+
+
+class TestSerDeInfo:
+ """Tests for the SerDeInfo class."""
+
+ @staticmethod
+ def _get_target_class():
+ return schema.SerDeInfo
+
+ def _make_one(self, *args, **kwargs):
+ return self._get_target_class()(*args, **kwargs)
+
+ @pytest.mark.parametrize(
+ "serialization_library,name,parameters",
+ [
+ ("testpath.to.LazySimpleSerDe", None, None),
+ ("testpath.to.LazySimpleSerDe", "serde_name", None),
+ ("testpath.to.LazySimpleSerDe", None, {"key": "value"}),
+ ("testpath.to.LazySimpleSerDe", "serde_name", {"key": "value"}),
+ ],
+ )
+ def test_ctor_valid_input(self, serialization_library, name, parameters):
+ serde_info = self._make_one(
+ serialization_library=serialization_library,
+ name=name,
+ parameters=parameters,
+ )
+ assert serde_info.serialization_library == serialization_library
+ assert serde_info.name == name
+ assert serde_info.parameters == parameters
+
+ @pytest.mark.parametrize(
+ "serialization_library,name,parameters",
+ [
+ (123, None, None),
+ ("testpath.to.LazySimpleSerDe", 123, None),
+ ("testpath.to.LazySimpleSerDe", None, ["test", "list"]),
+ ("testpath.to.LazySimpleSerDe", None, 123),
+ ],
+ )
+ def test_ctor_invalid_input(self, serialization_library, name, parameters):
+ with pytest.raises(TypeError) as e:
+ self._make_one(
+ serialization_library=serialization_library,
+ name=name,
+ parameters=parameters,
+ )
+ # Looking for the first word from the string "Pass as..."
+ assert "Pass " in str(e.value)
+
+ def test_to_api_repr(self):
+ serde_info = self._make_one(
+ serialization_library="testpath.to.LazySimpleSerDe",
+ name="serde_name",
+ parameters={"key": "value"},
+ )
+ expected_repr = {
+ "serializationLibrary": "testpath.to.LazySimpleSerDe",
+ "name": "serde_name",
+ "parameters": {"key": "value"},
+ }
+ assert serde_info.to_api_repr() == expected_repr
+
+ def test_from_api_repr(self):
+ """GIVEN an api representation of a SerDeInfo object (i.e. api_repr)
+ WHEN converted into a SerDeInfo object using from_api_repr()
+ THEN it will have the same representation in dict format as a SerDeInfo
+ object made directly (via _make_one()) and represented in dict format.
+ """
+ api_repr = {
+ "serializationLibrary": "testpath.to.LazySimpleSerDe",
+ "name": "serde_name",
+ "parameters": {"key": "value"},
+ }
+
+ expected = self._make_one(
+ serialization_library="testpath.to.LazySimpleSerDe",
+ name="serde_name",
+ parameters={"key": "value"},
+ )
+
+ klass = self._get_target_class()
+ result = klass.from_api_repr(api_repr)
+
+ # We convert both to dict format because these classes do not have a
+ # __eq__() method to facilitate direct equality comparisons.
+ assert result.to_api_repr() == expected.to_api_repr()
+
+
+class TestStorageDescriptor:
+ """Tests for the StorageDescriptor class."""
+
+ @staticmethod
+ def _get_target_class():
+ return schema.StorageDescriptor
+
+ def _make_one(self, *args, **kwargs):
+ return self._get_target_class()(*args, **kwargs)
+
+ serdeinfo_resource = {
+ "serialization_library": "testpath.to.LazySimpleSerDe",
+ "name": "serde_lib_name",
+ "parameters": {"key": "value"},
+ }
+
+ SERDEINFO = schema.SerDeInfo("PLACEHOLDER").from_api_repr(serdeinfo_resource)
+
+ STORAGEDESCRIPTOR = {
+ "inputFormat": "testpath.to.OrcInputFormat",
+ "locationUri": "gs://test/path/",
+ "outputFormat": "testpath.to.OrcOutputFormat",
+ "serDeInfo": SERDEINFO.to_api_repr(),
+ }
+
+ @pytest.mark.parametrize(
+ "input_format,location_uri,output_format,serde_info",
+ [
+ (None, None, None, None),
+ ("testpath.to.OrcInputFormat", None, None, None),
+ (None, "gs://test/path/", None, None),
+ (None, None, "testpath.to.OrcOutputFormat", None),
+ (None, None, None, SERDEINFO),
+ (
+ "testpath.to.OrcInputFormat",
+ "gs://test/path/",
+ "testpath.to.OrcOutputFormat",
+ SERDEINFO, # uses SERDEINFO class format
+ ),
+ (
+ "testpath.to.OrcInputFormat",
+ "gs://test/path/",
+ "testpath.to.OrcOutputFormat",
+ serdeinfo_resource, # uses api resource format (dict)
+ ),
+ ],
+ )
+ def test_ctor_valid_input(
+ self, input_format, location_uri, output_format, serde_info
+ ):
+ storage_descriptor = self._make_one(
+ input_format=input_format,
+ location_uri=location_uri,
+ output_format=output_format,
+ serde_info=serde_info,
+ )
+ assert storage_descriptor.input_format == input_format
+ assert storage_descriptor.location_uri == location_uri
+ assert storage_descriptor.output_format == output_format
+ if isinstance(serde_info, schema.SerDeInfo):
+ assert (
+ storage_descriptor.serde_info.to_api_repr() == serde_info.to_api_repr()
+ )
+ elif isinstance(serde_info, dict):
+ assert storage_descriptor.serde_info.to_api_repr() == serde_info
+ else:
+ assert storage_descriptor.serde_info is None
+
+ @pytest.mark.parametrize(
+ "input_format,location_uri,output_format,serde_info",
+ [
+ (123, None, None, None),
+ (None, 123, None, None),
+ (None, None, 123, None),
+ (None, None, None, 123),
+ ],
+ )
+ def test_ctor_invalid_input(
+ self, input_format, location_uri, output_format, serde_info
+ ):
+ with pytest.raises(TypeError) as e:
+ self._make_one(
+ input_format=input_format,
+ location_uri=location_uri,
+ output_format=output_format,
+ serde_info=serde_info,
+ )
+
+ # Looking for the first word from the string "Pass as..."
+ assert "Pass " in str(e.value)
+
+ def test_to_api_repr(self):
+ storage_descriptor = self._make_one(
+ input_format="input_format",
+ location_uri="location_uri",
+ output_format="output_format",
+ serde_info=self.SERDEINFO,
+ )
+ expected_repr = {
+ "inputFormat": "input_format",
+ "locationUri": "location_uri",
+ "outputFormat": "output_format",
+ "serDeInfo": self.SERDEINFO.to_api_repr(),
+ }
+ assert storage_descriptor.to_api_repr() == expected_repr
+
+ def test_from_api_repr(self):
+ """GIVEN an api representation of a StorageDescriptor (i.e. STORAGEDESCRIPTOR)
+ WHEN converted into a StorageDescriptor using from_api_repr() and
+ displayed as a dict
+ THEN it will have the same representation a StorageDescriptor created
+ directly (via the _make_one() func) and displayed as a dict.
+ """
+
+ # generate via STORAGEDESCRIPTOR
+ resource = self.STORAGEDESCRIPTOR
+ result = self._get_target_class().from_api_repr(resource)
+ # result = klass.from_api_repr(resource)
+
+ expected = self._make_one(
+ input_format="testpath.to.OrcInputFormat",
+ location_uri="gs://test/path/",
+ output_format="testpath.to.OrcOutputFormat",
+ serde_info=self.SERDEINFO,
+ )
+ assert result.to_api_repr() == expected.to_api_repr()
diff --git a/tests/unit/test_standard_sql_types.py b/tests/unit/test_standard_sql_types.py
index 0ba0e0cfd..3ed912b5a 100644
--- a/tests/unit/test_standard_sql_types.py
+++ b/tests/unit/test_standard_sql_types.py
@@ -129,6 +129,28 @@ def test_to_api_repr_struct_type_w_field_types(self):
}
assert result == expected
+ def test_to_api_repr_range_type_element_type_missing(self):
+ instance = self._make_one(
+ bq.StandardSqlTypeNames.RANGE, range_element_type=None
+ )
+
+ result = instance.to_api_repr()
+
+ assert result == {"typeKind": "RANGE"}
+
+ def test_to_api_repr_range_type_w_element_type(self):
+ range_element_type = self._make_one(type_kind=bq.StandardSqlTypeNames.DATE)
+ instance = self._make_one(
+ bq.StandardSqlTypeNames.RANGE, range_element_type=range_element_type
+ )
+
+ result = instance.to_api_repr()
+
+ assert result == {
+ "typeKind": "RANGE",
+ "rangeElementType": {"typeKind": "DATE"},
+ }
+
def test_from_api_repr_empty_resource(self):
klass = self._get_target_class()
result = klass.from_api_repr(resource={})
@@ -276,6 +298,31 @@ def test_from_api_repr_struct_type_incomplete_field_info(self):
)
assert result == expected
+ def test_from_api_repr_range_type_full(self):
+ klass = self._get_target_class()
+ resource = {"typeKind": "RANGE", "rangeElementType": {"typeKind": "DATE"}}
+
+ result = klass.from_api_repr(resource=resource)
+
+ expected = klass(
+ type_kind=bq.StandardSqlTypeNames.RANGE,
+ range_element_type=klass(type_kind=bq.StandardSqlTypeNames.DATE),
+ )
+ assert result == expected
+
+ def test_from_api_repr_range_type_missing_element_type(self):
+ klass = self._get_target_class()
+ resource = {"typeKind": "RANGE"}
+
+ result = klass.from_api_repr(resource=resource)
+
+ expected = klass(
+ type_kind=bq.StandardSqlTypeNames.RANGE,
+ range_element_type=None,
+ struct_type=None,
+ )
+ assert result == expected
+
def test__eq__another_type(self):
instance = self._make_one()
@@ -321,6 +368,11 @@ def test__eq__similar_instance(self):
bq.StandardSqlStructType(fields=[bq.StandardSqlField(name="foo")]),
bq.StandardSqlStructType(fields=[bq.StandardSqlField(name="bar")]),
),
+ (
+ "range_element_type",
+ bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.DATE),
+ bq.StandardSqlDataType(type_kind=bq.StandardSqlTypeNames.DATETIME),
+ ),
),
)
def test__eq__attribute_differs(self, attr_name, value, value2):
diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py
index b5f2e58c6..af31d116b 100644
--- a/tests/unit/test_table.py
+++ b/tests/unit/test_table.py
@@ -12,43 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import copy
import datetime
import logging
import re
import time
import types
import unittest
+from unittest import mock
import warnings
-import mock
-import pyarrow
-import pyarrow.types
import pytest
import google.api_core.exceptions
+from test_utils.imports import maybe_fail_import
+from google.cloud.bigquery import _versions_helpers
+from google.cloud.bigquery import exceptions
+from google.cloud.bigquery import external_config
+from google.cloud.bigquery import schema
+from google.cloud.bigquery.enums import DefaultPandasDTypes
from google.cloud.bigquery.table import TableReference
-
-from google.cloud import bigquery_storage
-from google.cloud.bigquery_storage_v1.services.big_query_read.transports import (
- grpc as big_query_read_grpc_transport,
-)
-
-try:
- import pandas
-except (ImportError, AttributeError): # pragma: NO COVER
- pandas = None
-
-try:
- import geopandas
-except (ImportError, AttributeError): # pragma: NO COVER
- geopandas = None
-
-try:
- from tqdm import tqdm
-except (ImportError, AttributeError): # pragma: NO COVER
- tqdm = None
-
from google.cloud.bigquery.dataset import DatasetReference
@@ -387,7 +371,6 @@ def test___str__(self):
class TestTable(unittest.TestCase, _SchemaBase):
-
PROJECT = "prahj-ekt"
DS_ID = "dataset-name"
TABLE_NAME = "table-name"
@@ -412,7 +395,9 @@ def _setUpConstants(self):
from google.cloud._helpers import UTC
self.WHEN_TS = 1437767599.006
- self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC)
+ self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace(
+ tzinfo=UTC
+ )
self.ETAG = "ETAG"
self.TABLE_FULL_ID = "%s:%s.%s" % (self.PROJECT, self.DS_ID, self.TABLE_NAME)
self.RESOURCE_URL = "http://example.com/path/to/resource"
@@ -453,6 +438,12 @@ def _make_resource(self):
"sourceFormat": "CSV",
"csvOptions": {"allowJaggedRows": True, "encoding": "encoding"},
},
+ "biglakeConfiguration": {
+ "connectionId": "connection",
+ "storageUri": "uri",
+ "fileFormat": "PARQUET",
+ "tableFormat": "ICEBERG",
+ },
"labels": {"x": "y"},
}
@@ -495,7 +486,6 @@ def _verifyReadonlyResourceProperties(self, table, resource):
)
def _verifyResourceProperties(self, table, resource):
-
self._verifyReadonlyResourceProperties(table, resource)
if "expirationTime" in resource:
@@ -540,6 +530,15 @@ def _verifyResourceProperties(self, table, resource):
else:
self.assertIsNone(table.encryption_configuration)
+ if "biglakeConfiguration" in resource:
+ self.assertIsNotNone(table.biglake_configuration)
+ self.assertEqual(table.biglake_configuration.connection_id, "connection")
+ self.assertEqual(table.biglake_configuration.storage_uri, "uri")
+ self.assertEqual(table.biglake_configuration.file_format, "PARQUET")
+ self.assertEqual(table.biglake_configuration.table_format, "ICEBERG")
+ else:
+ self.assertIsNone(table.biglake_configuration)
+
def test_ctor(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
@@ -577,6 +576,7 @@ def test_ctor(self):
self.assertIsNone(table.encryption_configuration)
self.assertIsNone(table.time_partitioning)
self.assertIsNone(table.clustering_fields)
+ self.assertIsNone(table.table_constraints)
def test_ctor_w_schema(self):
from google.cloud.bigquery.schema import SchemaField
@@ -717,7 +717,7 @@ def test_schema_setter_invalid_field(self):
table_ref = dataset.table(self.TABLE_NAME)
table = self._make_one(table_ref)
full_name = SchemaField("full_name", "STRING", mode="REQUIRED")
- with self.assertRaises(ValueError):
+ with self.assertRaises(TypeError):
table.schema = [full_name, object()]
def test_schema_setter_valid_fields(self):
@@ -731,14 +731,35 @@ def test_schema_setter_valid_fields(self):
table.schema = [full_name, age]
self.assertEqual(table.schema, [full_name, age])
- def test_schema_setter_invalid_mapping_representation(self):
+ def test_schema_setter_allows_unknown_properties(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
table = self._make_one(table_ref)
- full_name = {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}
- invalid_field = {"name": "full_name", "typeooo": "STRING", "mode": "REQUIRED"}
- with self.assertRaises(Exception):
- table.schema = [full_name, invalid_field]
+ schema = [
+ {
+ "name": "full_name",
+ "type": "STRING",
+ "mode": "REQUIRED",
+ "someNewProperty": "test-value",
+ },
+ {
+ "name": "age",
+ # Note: This type should be included, too. Avoid client-side
+ # validation, as it could prevent backwards-compatible
+ # evolution of the server-side behavior.
+ "typo": "INTEGER",
+ "mode": "REQUIRED",
+ "anotherNewProperty": "another-test",
+ },
+ ]
+
+ # Make sure the setter doesn't mutate schema.
+ expected_schema = copy.deepcopy(schema)
+
+ table.schema = schema
+
+ # _properties should include all fields, including unknown ones.
+ assert table._properties["schema"]["fields"] == expected_schema
def test_schema_setter_valid_mapping_representation(self):
from google.cloud.bigquery.schema import SchemaField
@@ -875,6 +896,227 @@ def test_clone_definition_set(self):
2010, 9, 28, 10, 20, 30, 123000, tzinfo=UTC
)
+ def test_table_constraints_property_getter(self):
+ from google.cloud.bigquery.table import PrimaryKey, TableConstraints
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table._properties["tableConstraints"] = {
+ "primaryKey": {"columns": ["id"]},
+ }
+
+ table_constraints = table.table_constraints
+
+ assert isinstance(table_constraints, TableConstraints)
+ assert table_constraints.primary_key == PrimaryKey(columns=["id"])
+
+ def test_biglake_configuration_not_set(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ assert table.biglake_configuration is None
+
+ def test_biglake_configuration_set(self):
+ from google.cloud.bigquery.table import BigLakeConfiguration
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ table._properties["biglakeConfiguration"] = {
+ "connectionId": "connection",
+ "storageUri": "uri",
+ "fileFormat": "PARQUET",
+ "tableFormat": "ICEBERG",
+ }
+
+ config = table.biglake_configuration
+
+ assert isinstance(config, BigLakeConfiguration)
+ assert config.connection_id == "connection"
+ assert config.storage_uri == "uri"
+ assert config.file_format == "PARQUET"
+ assert config.table_format == "ICEBERG"
+
+ def test_biglake_configuration_property_setter(self):
+ from google.cloud.bigquery.table import BigLakeConfiguration
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ config = BigLakeConfiguration(
+ connection_id="connection",
+ storage_uri="uri",
+ file_format="PARQUET",
+ table_format="ICEBERG",
+ )
+ table.biglake_configuration = config
+
+ assert table._properties["biglakeConfiguration"] == {
+ "connectionId": "connection",
+ "storageUri": "uri",
+ "fileFormat": "PARQUET",
+ "tableFormat": "ICEBERG",
+ }
+
+ table.biglake_configuration = None
+ assert table.biglake_configuration is None
+
+ def test_table_constraints_property_setter(self):
+ from google.cloud.bigquery.table import (
+ ColumnReference,
+ ForeignKey,
+ PrimaryKey,
+ TableConstraints,
+ TableReference,
+ )
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ primary_key = PrimaryKey(columns=["id"])
+ foreign_keys = [
+ ForeignKey(
+ name="fk_name",
+ referenced_table=TableReference.from_string(
+ "my_project.my_dataset.table"
+ ),
+ column_references=[
+ ColumnReference(
+ referenced_column="product_id", referencing_column="id"
+ )
+ ],
+ )
+ ]
+ table_constraints = TableConstraints(
+ primary_key=primary_key, foreign_keys=foreign_keys
+ )
+ table.table_constraints = table_constraints
+
+ assert table._properties["tableConstraints"] == {
+ "primaryKey": {"columns": ["id"]},
+ "foreignKeys": [
+ {
+ "name": "fk_name",
+ "referencedTable": {
+ "projectId": "my_project",
+ "datasetId": "my_dataset",
+ "tableId": "table",
+ },
+ "columnReferences": [
+ {"referencedColumn": "product_id", "referencingColumn": "id"}
+ ],
+ }
+ ],
+ }
+
+ def test_table_constraints_property_setter_empty_value(self):
+ from google.cloud.bigquery.table import TableConstraints
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ table.table_constraints = TableConstraints(primary_key=None, foreign_keys=None)
+ assert table._properties["tableConstraints"] == {}
+
+ def test_table_constraints_property_setter_invalid_value(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ with pytest.raises(
+ ValueError,
+ match="value must be google.cloud.bigquery.table.TableConstraints or None",
+ ):
+ table.table_constraints = "invalid_value"
+
+ def test_table_constraints_property_setter_none_value(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ table.table_constraints = None
+ assert table._properties["tableConstraints"] is None
+
+ def test_table_constraints_property_setter_only_primary_key_set(self):
+ from google.cloud.bigquery.table import PrimaryKey, TableConstraints
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ primary_key = PrimaryKey(columns=["id"])
+
+ table_constraints = TableConstraints(primary_key=primary_key, foreign_keys=None)
+ table.table_constraints = table_constraints
+
+ assert table._properties["tableConstraints"] == {
+ "primaryKey": {"columns": ["id"]}
+ }
+
+ def test_table_constraints_property_setter_only_foriegn_keys(self):
+ from google.cloud.bigquery.table import (
+ ColumnReference,
+ ForeignKey,
+ TableConstraints,
+ TableReference,
+ )
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ foreign_keys = [
+ ForeignKey(
+ name="fk_name",
+ referenced_table=TableReference.from_string(
+ "my_project.my_dataset.table"
+ ),
+ column_references=[
+ ColumnReference(
+ referenced_column="product_id", referencing_column="id"
+ )
+ ],
+ )
+ ]
+ table_constraints = TableConstraints(
+ primary_key=None, foreign_keys=foreign_keys
+ )
+ table.table_constraints = table_constraints
+
+ assert table._properties["tableConstraints"] == {
+ "foreignKeys": [
+ {
+ "name": "fk_name",
+ "referencedTable": {
+ "projectId": "my_project",
+ "datasetId": "my_dataset",
+ "tableId": "table",
+ },
+ "columnReferences": [
+ {"referencedColumn": "product_id", "referencingColumn": "id"}
+ ],
+ }
+ ]
+ }
+
+ def test_table_constraints_property_setter_empty_constraints(self):
+ from google.cloud.bigquery.table import TableConstraints
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+
+ table_constraints = TableConstraints(primary_key=None, foreign_keys=None)
+ table.table_constraints = table_constraints
+
+ assert table._properties["tableConstraints"] == {}
+
def test_description_setter_bad_value(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
@@ -1055,6 +1297,16 @@ def test_mview_refresh_interval(self):
table.mview_refresh_interval = None
self.assertIsNone(table.mview_refresh_interval)
+ def test_mview_allow_non_incremental_definition(self):
+ table = self._make_one()
+ self.assertIsNone(table.mview_allow_non_incremental_definition)
+ table.mview_allow_non_incremental_definition = True
+ self.assertTrue(table.mview_allow_non_incremental_definition)
+ table.mview_allow_non_incremental_definition = False
+ self.assertFalse(table.mview_allow_non_incremental_definition)
+ table.mview_allow_non_incremental_definition = None
+ self.assertIsNone(table.mview_allow_non_incremental_definition)
+
def test_from_string(self):
cls = self._get_target_class()
got = cls.from_string("string-project.string_dataset.string_table")
@@ -1166,6 +1418,102 @@ def test_to_api_repr_w_custom_field(self):
}
self.assertEqual(resource, exp_resource)
+ def test_to_api_repr_w_unsetting_expiration(self):
+ from google.cloud.bigquery.table import TimePartitioningType
+
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table.partition_expiration = None
+ resource = table.to_api_repr()
+
+ exp_resource = {
+ "tableReference": table_ref.to_api_repr(),
+ "labels": {},
+ "timePartitioning": {
+ "expirationMs": None,
+ "type": TimePartitioningType.DAY,
+ },
+ }
+ self.assertEqual(resource, exp_resource)
+
+ def test_to_api_repr_w_schema_and_foreign_type_info(self):
+ """Tests to ensure that to_api_repr works correctly with
+ both schema and foreign_type_info fields
+ """
+
+ PROJECT = "test-project"
+ DATASET_ID = "test_dataset"
+ TABLE_ID = "coffee_table"
+ FOREIGNTYPEINFO = {
+ "typeSystem": "TYPE_SYSTEM_UNSPECIFIED",
+ }
+ SCHEMA = {
+ "fields": [
+ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
+ {"name": "age", "type": "INTEGER", "mode": "REQUIRED"},
+ ],
+ "foreignTypeInfo": FOREIGNTYPEINFO,
+ }
+
+ API_REPR = {
+ "tableReference": {
+ "projectId": PROJECT,
+ "datasetId": DATASET_ID,
+ "tableId": TABLE_ID,
+ },
+ "schema": SCHEMA,
+ }
+
+ table = self._get_target_class().from_api_repr(API_REPR)
+ assert table._properties == table.to_api_repr()
+
+ # update schema (i.e. the fields), ensure foreign_type_info is unchanged
+ table.schema = []
+ expected = {
+ "fields": [],
+ "foreignTypeInfo": {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"},
+ }
+ assert table.to_api_repr()["schema"] == expected
+
+ # update foreign_type_info, ensure schema (i.e. the fields), is unchanged
+ table.foreign_type_info = {"typeSystem": "SCHEMA_SHOULD_NOT_CHANGE"}
+ expected = {
+ "fields": [],
+ "foreignTypeInfo": {"typeSystem": "SCHEMA_SHOULD_NOT_CHANGE"},
+ }
+ assert table.to_api_repr()["schema"] == expected
+
+ def test_from_api_repr_w_schema_and_foreign_type_info(self):
+ """Tests to ensure that to_api_repr works correctly with
+ both schema and foreign_type_info fields
+ """
+
+ PROJECT = "test-project"
+ DATASET_ID = "test_dataset"
+ TABLE_ID = "coffee_table"
+ FOREIGNTYPEINFO = {
+ "typeSystem": "TYPE_SYSTEM_UNSPECIFIED",
+ }
+ SCHEMA = {
+ "fields": [
+ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
+ {"name": "age", "type": "INTEGER", "mode": "REQUIRED"},
+ ],
+ "foreignTypeInfo": FOREIGNTYPEINFO,
+ }
+ API_REPR = {
+ "tableReference": {
+ "projectId": PROJECT,
+ "datasetId": DATASET_ID,
+ "tableId": TABLE_ID,
+ },
+ "schema": SCHEMA,
+ }
+
+ table = self._get_target_class().from_api_repr(API_REPR)
+ assert table._properties == API_REPR
+
def test__build_resource_w_custom_field(self):
dataset = DatasetReference(self.PROJECT, self.DS_ID)
table_ref = dataset.table(self.TABLE_NAME)
@@ -1434,6 +1782,33 @@ def test_encryption_configuration_setter(self):
table.encryption_configuration = None
self.assertIsNone(table.encryption_configuration)
+ def test_resource_tags_getter_empty(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ self.assertEqual(table.resource_tags, {})
+
+ def test_resource_tags_update_in_place(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table.resource_tags["123456789012/key"] = "value"
+ self.assertEqual(table.resource_tags, {"123456789012/key": "value"})
+
+ def test_resource_tags_setter(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ table.resource_tags = {"123456789012/key": "value"}
+ self.assertEqual(table.resource_tags, {"123456789012/key": "value"})
+
+ def test_resource_tags_setter_bad_value(self):
+ dataset = DatasetReference(self.PROJECT, self.DS_ID)
+ table_ref = dataset.table(self.TABLE_NAME)
+ table = self._make_one(table_ref)
+ with self.assertRaises(ValueError):
+ table.resource_tags = 12345
+
def test___repr__(self):
from google.cloud.bigquery.table import TableReference
@@ -1451,9 +1826,51 @@ def test___str__(self):
table1 = self._make_one(TableReference(dataset, "table1"))
self.assertEqual(str(table1), "project1.dataset1.table1")
+ def test_max_staleness_getter(self):
+ """Test getting max_staleness property."""
+ dataset = DatasetReference("test-project", "test_dataset")
+ table_ref = dataset.table("test_table")
+ table = self._make_one(table_ref)
+ # Initially None
+ self.assertIsNone(table.max_staleness)
+ # Set max_staleness using setter
+ table.max_staleness = "1h"
+ self.assertEqual(table.max_staleness, "1h")
+
+ def test_max_staleness_setter(self):
+ """Test setting max_staleness property."""
+ dataset = DatasetReference("test-project", "test_dataset")
+ table_ref = dataset.table("test_table")
+ table = self._make_one(table_ref)
+ # Set valid max_staleness
+ table.max_staleness = "30m"
+ self.assertEqual(table.max_staleness, "30m")
+ # Set to None
+ table.max_staleness = None
+ self.assertIsNone(table.max_staleness)
+
+ def test_max_staleness_setter_invalid_type(self):
+ """Test setting max_staleness with an invalid type raises ValueError."""
+ dataset = DatasetReference("test-project", "test_dataset")
+ table_ref = dataset.table("test_table")
+ table = self._make_one(table_ref)
+ # Try setting invalid type
+ with self.assertRaises(ValueError):
+ table.max_staleness = 123 # Not a string
+
+ def test_max_staleness_to_api_repr(self):
+ """Test max_staleness is correctly represented in API representation."""
+ dataset = DatasetReference("test-project", "test_dataset")
+ table_ref = dataset.table("test_table")
+ table = self._make_one(table_ref)
+ # Set max_staleness
+ table.max_staleness = "1h"
+ # Convert to API representation
+ resource = table.to_api_repr()
+ self.assertEqual(resource.get("maxStaleness"), "1h")
+
class Test_row_from_mapping(unittest.TestCase, _SchemaBase):
-
PROJECT = "prahj-ekt"
DS_ID = "dataset-name"
TABLE_NAME = "table-name"
@@ -1537,7 +1954,9 @@ def _setUpConstants(self):
from google.cloud._helpers import UTC
self.WHEN_TS = 1437767599.125
- self.WHEN = datetime.datetime.utcfromtimestamp(self.WHEN_TS).replace(tzinfo=UTC)
+ self.WHEN = datetime.datetime.fromtimestamp(self.WHEN_TS, UTC).replace(
+ tzinfo=UTC
+ )
self.EXP_TIME = datetime.datetime(2015, 8, 1, 23, 59, 59, tzinfo=UTC)
def test_ctor(self):
@@ -1742,7 +2161,6 @@ def _make_table_list_item(*args, **kwargs):
return TableListItem(*args, **kwargs)
def test_table_eq_table_ref(self):
-
table = self._make_table("project_foo.dataset_bar.table_baz")
dataset_ref = DatasetReference("project_foo", "dataset_bar")
table_ref = self._make_table_ref(dataset_ref, "table_baz")
@@ -1766,7 +2184,6 @@ def test_table_eq_table_list_item(self):
assert table_list_item == table
def test_table_ref_eq_table_list_item(self):
-
dataset_ref = DatasetReference("project_foo", "dataset_bar")
table_ref = self._make_table_ref(dataset_ref, "table_baz")
table_list_item = self._make_table_list_item(
@@ -1823,6 +2240,97 @@ def test_ctor_full_resource(self):
assert instance.snapshot_time == expected_time
+class TestBigLakeConfiguration(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.table import BigLakeConfiguration
+
+ return BigLakeConfiguration
+
+ @classmethod
+ def _make_one(cls, *args, **kwargs):
+ klass = cls._get_target_class()
+ return klass(*args, **kwargs)
+
+ def test_ctor_empty_resource(self):
+ instance = self._make_one()
+ self.assertIsNone(instance.connection_id)
+ self.assertIsNone(instance.storage_uri)
+ self.assertIsNone(instance.file_format)
+ self.assertIsNone(instance.table_format)
+
+ def test_ctor_kwargs(self):
+ instance = self._make_one(
+ connection_id="conn",
+ storage_uri="uri",
+ file_format="FILE",
+ table_format="TABLE",
+ )
+ self.assertEqual(instance.connection_id, "conn")
+ self.assertEqual(instance.storage_uri, "uri")
+ self.assertEqual(instance.file_format, "FILE")
+ self.assertEqual(instance.table_format, "TABLE")
+
+ def test_ctor_full_resource(self):
+ resource = {
+ "connectionId": "conn",
+ "storageUri": "uri",
+ "fileFormat": "FILE",
+ "tableFormat": "TABLE",
+ }
+ instance = self._make_one(_properties=resource)
+ self.assertEqual(instance.connection_id, "conn")
+ self.assertEqual(instance.storage_uri, "uri")
+ self.assertEqual(instance.file_format, "FILE")
+ self.assertEqual(instance.table_format, "TABLE")
+
+ def test_to_api_repr(self):
+ resource = {
+ "connectionId": "conn",
+ "storageUri": "uri",
+ "fileFormat": "FILE",
+ "tableFormat": "TABLE",
+ }
+ instance = self._make_one(_properties=resource)
+ self.assertEqual(instance.to_api_repr(), resource)
+
+ def test_from_api_repr_partial(self):
+ klass = self._get_target_class()
+ api_repr = {"fileFormat": "FILE"}
+ instance = klass.from_api_repr(api_repr)
+
+ self.assertIsNone(instance.connection_id)
+ self.assertIsNone(instance.storage_uri)
+ self.assertEqual(instance.file_format, "FILE")
+ self.assertIsNone(instance.table_format)
+
+ def test_comparisons(self):
+ resource = {
+ "connectionId": "conn",
+ "storageUri": "uri",
+ "fileFormat": "FILE",
+ "tableFormat": "TABLE",
+ }
+
+ first = self._make_one(_properties=resource)
+ second = self._make_one(_properties=copy.deepcopy(resource))
+ # Exercise comparator overloads.
+ # first and second should be equivalent.
+ self.assertNotEqual(first, resource)
+ self.assertEqual(first, second)
+ self.assertEqual(hash(first), hash(second))
+
+ # Update second to ensure that first and second are no longer equivalent.
+ second.connection_id = "foo"
+ self.assertNotEqual(first, second)
+ self.assertNotEqual(hash(first), hash(second))
+
+ # Update first with the same change, restoring equivalence.
+ first.connection_id = "foo"
+ self.assertEqual(first, second)
+ self.assertEqual(hash(first), hash(second))
+
+
class TestCloneDefinition:
@staticmethod
def _get_target_class():
@@ -1890,6 +2398,8 @@ def test_row(self):
class Test_EmptyRowIterator(unittest.TestCase):
+ PYARROW_MINIMUM_VERSION = str(_versions_helpers._MIN_PYARROW_VERSION)
+
def _make_one(self):
from google.cloud.bigquery.table import _EmptyRowIterator
@@ -1899,13 +2409,25 @@ def test_total_rows_eq_zero(self):
row_iterator = self._make_one()
self.assertEqual(row_iterator.total_rows, 0)
+ @mock.patch("google.cloud.bigquery.table.pyarrow", new=None)
+ def test_to_arrow_error_if_pyarrow_is_none(self):
+ row_iterator = self._make_one()
+ with self.assertRaises(ValueError):
+ row_iterator.to_arrow()
+
def test_to_arrow(self):
+ pytest.importorskip("numpy")
+ pyarrow = pytest.importorskip("pyarrow")
row_iterator = self._make_one()
tbl = row_iterator.to_arrow()
self.assertIsInstance(tbl, pyarrow.Table)
self.assertEqual(tbl.num_rows, 0)
def test_to_arrow_iterable(self):
+ pytest.importorskip("numpy")
+ pyarrow = pytest.importorskip(
+ "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION
+ )
row_iterator = self._make_one()
arrow_iter = row_iterator.to_arrow_iterable()
@@ -1923,8 +2445,8 @@ def test_to_dataframe_error_if_pandas_is_none(self):
with self.assertRaises(ValueError):
row_iterator.to_dataframe()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe(self):
+ pandas = pytest.importorskip("pandas")
row_iterator = self._make_one()
df = row_iterator.to_dataframe(create_bqstorage_client=False)
self.assertIsInstance(df, pandas.DataFrame)
@@ -1936,8 +2458,8 @@ def test_to_dataframe_iterable_error_if_pandas_is_none(self):
with self.assertRaises(ValueError):
row_iterator.to_dataframe_iterable()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_iterable(self):
+ pandas = pytest.importorskip("pandas")
row_iterator = self._make_one()
df_iter = row_iterator.to_dataframe_iterable()
@@ -1961,16 +2483,22 @@ def test_to_geodataframe_if_geopandas_is_none(self):
):
row_iterator.to_geodataframe(create_bqstorage_client=False)
- @unittest.skipIf(geopandas is None, "Requires `geopandas`")
def test_to_geodataframe(self):
+ geopandas = pytest.importorskip("geopandas")
row_iterator = self._make_one()
df = row_iterator.to_geodataframe(create_bqstorage_client=False)
self.assertIsInstance(df, geopandas.GeoDataFrame)
self.assertEqual(len(df), 0) # verify the number of rows
- self.assertIsNone(df.crs)
+
+ if geopandas.__version__ == "0.9.0":
+ assert hasattr(df, "crs")
+ else:
+ assert not hasattr(df, "crs")
class TestRowIterator(unittest.TestCase):
+ PYARROW_MINIMUM_VERSION = str(_versions_helpers._MIN_PYARROW_VERSION)
+
def _class_under_test(self):
from google.cloud.bigquery.table import RowIterator
@@ -1983,7 +2511,7 @@ def _make_one(
path=None,
schema=None,
table=None,
- **kwargs
+ **kwargs,
):
from google.cloud.bigquery.table import TableReference
@@ -2064,6 +2592,46 @@ def test_constructor_with_dict_schema(self):
]
self.assertEqual(iterator.schema, expected_schema)
+ def test_job_id_missing(self):
+ rows = self._make_one()
+ self.assertIsNone(rows.job_id)
+
+ def test_job_id_present(self):
+ rows = self._make_one(job_id="abc-123")
+ self.assertEqual(rows.job_id, "abc-123")
+
+ def test_location_missing(self):
+ rows = self._make_one()
+ self.assertIsNone(rows.location)
+
+ def test_location_present(self):
+ rows = self._make_one(location="asia-northeast1")
+ self.assertEqual(rows.location, "asia-northeast1")
+
+ def test_num_dml_affected_rows_missing(self):
+ rows = self._make_one()
+ self.assertIsNone(rows.num_dml_affected_rows)
+
+ def test_num_dml_affected_rows_present(self):
+ rows = self._make_one(num_dml_affected_rows=1234)
+ self.assertEqual(rows.num_dml_affected_rows, 1234)
+
+ def test_project_missing(self):
+ rows = self._make_one()
+ self.assertIsNone(rows.project)
+
+ def test_project_present(self):
+ rows = self._make_one(project="test-project")
+ self.assertEqual(rows.project, "test-project")
+
+ def test_query_id_missing(self):
+ rows = self._make_one()
+ self.assertIsNone(rows.query_id)
+
+ def test_query_id_present(self):
+ rows = self._make_one(query_id="xyz-987")
+ self.assertEqual(rows.query_id, "xyz-987")
+
def test_iterate(self):
from google.cloud.bigquery.schema import SchemaField
@@ -2116,9 +2684,18 @@ def test_iterate_with_cached_first_page(self):
path = "/foo"
api_request = mock.Mock(return_value={"rows": rows})
row_iterator = self._make_one(
- _mock_client(), api_request, path, schema, first_page_response=first_page
+ _mock_client(),
+ api_request,
+ path,
+ schema,
+ first_page_response=first_page,
+ total_rows=4,
)
+ self.assertEqual(row_iterator.total_rows, 4)
rows = list(row_iterator)
+ # Total rows should be maintained, even though subsequent API calls
+ # don't include it.
+ self.assertEqual(row_iterator.total_rows, 4)
self.assertEqual(len(rows), 4)
self.assertEqual(rows[0].age, 27)
self.assertEqual(rows[1].age, 28)
@@ -2129,7 +2706,40 @@ def test_iterate_with_cached_first_page(self):
method="GET", path=path, query_params={"pageToken": "next-page"}
)
- def test_page_size(self):
+ def test_iterate_with_cached_first_page_max_results(self):
+ from google.cloud.bigquery.schema import SchemaField
+
+ first_page = {
+ "rows": [
+ {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
+ {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ ],
+ "pageToken": "next-page",
+ }
+ schema = [
+ SchemaField("name", "STRING", mode="REQUIRED"),
+ SchemaField("age", "INTEGER", mode="REQUIRED"),
+ ]
+ path = "/foo"
+ api_request = mock.Mock(return_value=first_page)
+ row_iterator = self._make_one(
+ _mock_client(),
+ api_request,
+ path,
+ schema,
+ max_results=3,
+ first_page_response=first_page,
+ )
+ rows = list(row_iterator)
+ self.assertEqual(len(rows), 3)
+ self.assertEqual(rows[0].age, 27)
+ self.assertEqual(rows[1].age, 28)
+ self.assertEqual(rows[2].age, 32)
+ api_request.assert_not_called()
+
+ def test_page_size(self):
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2154,39 +2764,151 @@ def test_page_size(self):
query_params={"maxResults": row_iterator._page_size},
)
- def test__is_completely_cached_returns_false_without_first_page(self):
+ def test__is_almost_completely_cached_returns_false_without_first_page(self):
iterator = self._make_one(first_page_response=None)
- self.assertFalse(iterator._is_completely_cached())
+ self.assertFalse(iterator._is_almost_completely_cached())
+
+ def test__is_almost_completely_cached_returns_true_with_more_rows_than_max_results(
+ self,
+ ):
+ rows = [
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
+ {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
+ ]
+ first_page = {"pageToken": "next-page", "rows": rows}
+ iterator = self._make_one(max_results=4, first_page_response=first_page)
+ self.assertTrue(iterator._is_almost_completely_cached())
+
+ def test__is_almost_completely_cached_returns_false_with_too_many_rows_remaining(
+ self,
+ ):
+ rows = [
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ ]
+ first_page = {"pageToken": "next-page", "rows": rows}
+ iterator = self._make_one(first_page_response=first_page, total_rows=100)
+ self.assertFalse(iterator._is_almost_completely_cached())
- def test__is_completely_cached_returns_false_with_page_token(self):
- first_page = {"pageToken": "next-page"}
+ def test__is_almost_completely_cached_returns_false_with_rows_remaining_and_no_total_rows(
+ self,
+ ):
+ rows = [
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ ]
+ first_page = {"pageToken": "next-page", "rows": rows}
iterator = self._make_one(first_page_response=first_page)
- self.assertFalse(iterator._is_completely_cached())
+ self.assertFalse(iterator._is_almost_completely_cached())
- def test__is_completely_cached_returns_true(self):
+ def test__is_almost_completely_cached_returns_true_with_some_rows_remaining(self):
+ rows = [
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ {"f": [{"v": "Whillma Phlyntstone"}, {"v": "27"}]},
+ {"f": [{"v": "Bhetty Rhubble"}, {"v": "28"}]},
+ {"f": [{"v": "Pebbles Phlyntstone"}, {"v": "4"}]},
+ {"f": [{"v": "Bamm-Bamm Rhubble"}, {"v": "5"}]},
+ {"f": [{"v": "Joseph Rockhead"}, {"v": "32"}]},
+ {"f": [{"v": "Perry Masonry"}, {"v": "33"}]},
+ ]
+ first_page = {"pageToken": "next-page", "rows": rows}
+ iterator = self._make_one(
+ first_page_response=first_page, total_rows=len(rows) + 1
+ )
+ self.assertTrue(iterator._is_almost_completely_cached())
+
+ def test__is_almost_completely_cached_returns_true_with_no_rows_remaining(self):
first_page = {"rows": []}
iterator = self._make_one(first_page_response=first_page)
- self.assertTrue(iterator._is_completely_cached())
+ self.assertTrue(iterator._is_almost_completely_cached())
- def test__validate_bqstorage_returns_false_when_completely_cached(self):
+ def test__should_use_bqstorage_returns_false_when_completely_cached(self):
first_page = {"rows": []}
iterator = self._make_one(first_page_response=first_page)
self.assertFalse(
- iterator._validate_bqstorage(
+ iterator._should_use_bqstorage(
bqstorage_client=None, create_bqstorage_client=True
)
)
- def test__validate_bqstorage_returns_false_if_max_results_set(self):
+ def test__should_use_bqstorage_returns_true_if_no_cached_results(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
+ iterator = self._make_one(first_page_response=None) # not cached
+ result = iterator._should_use_bqstorage(
+ bqstorage_client=None, create_bqstorage_client=True
+ )
+ self.assertTrue(result)
+
+ def test__should_use_bqstorage_returns_false_if_page_token_set(self):
+ iterator = self._make_one(
+ page_token="abc", first_page_response=None # not cached
+ )
+ result = iterator._should_use_bqstorage(
+ bqstorage_client=None, create_bqstorage_client=True
+ )
+ self.assertFalse(result)
+
+ def test__should_use_bqstorage_returns_false_if_max_results_set(self):
iterator = self._make_one(
max_results=10, first_page_response=None # not cached
)
- result = iterator._validate_bqstorage(
+ result = iterator._should_use_bqstorage(
bqstorage_client=None, create_bqstorage_client=True
)
self.assertFalse(result)
+ def test__should_use_bqstorage_returns_false_w_warning_if_missing_dependency(self):
+ iterator = self._make_one(first_page_response=None) # not cached
+
+ def fail_bqstorage_import(name, globals, locals, fromlist, level):
+ """Returns True if bigquery_storage has been imported."""
+ # NOTE: *very* simplified, assuming a straightforward absolute import
+ return "bigquery_storage" in name or (
+ fromlist is not None and "bigquery_storage" in fromlist
+ )
+
+ # maybe_fail_import() returns ImportError if the predicate is True
+ no_bqstorage = maybe_fail_import(predicate=fail_bqstorage_import)
+
+ with no_bqstorage, warnings.catch_warnings(record=True) as warned:
+ result = iterator._should_use_bqstorage(
+ bqstorage_client=None, create_bqstorage_client=True
+ )
+
+ self.assertFalse(result)
+
+ matching_warnings = [
+ warning for warning in warned if "Storage module not found" in str(warning)
+ ]
+ assert matching_warnings, "Dependency not found warning not raised."
+
+ def test__should_use_bqstorage_returns_false_w_warning_if_obsolete_version(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
+ iterator = self._make_one(first_page_response=None) # not cached
+
+ patcher = mock.patch(
+ "google.cloud.bigquery.table._versions_helpers.BQ_STORAGE_VERSIONS.try_import",
+ side_effect=exceptions.LegacyBigQueryStorageError("BQ Storage too old"),
+ )
+ with patcher, warnings.catch_warnings(record=True) as warned:
+ result = iterator._should_use_bqstorage(
+ bqstorage_client=None, create_bqstorage_client=True
+ )
+
+ self.assertFalse(result)
+
+ matching_warnings = [
+ warning for warning in warned if "BQ Storage too old" in str(warning)
+ ]
+ assert matching_warnings, "Obsolete dependency warning not raised."
+
def test_to_arrow_iterable(self):
+ pyarrow = pytest.importorskip(
+ "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION
+ )
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2287,9 +3009,16 @@ def test_to_arrow_iterable(self):
)
def test_to_arrow_iterable_w_bqstorage(self):
+ pyarrow = pytest.importorskip("pyarrow")
+ pytest.importorskip("google.cloud.bigquery_storage")
+ from google.cloud import bigquery_storage
+ from google.cloud.bigquery_storage_v1 import reader
+ from google.cloud.bigquery_storage_v1.services.big_query_read.transports import (
+ grpc as big_query_read_grpc_transport,
+ )
+
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
- from google.cloud.bigquery_storage_v1 import reader
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
bqstorage_client._transport = mock.create_autospec(
@@ -2362,6 +3091,10 @@ def test_to_arrow_iterable_w_bqstorage(self):
bqstorage_client._transport.grpc_channel.close.assert_not_called()
def test_to_arrow(self):
+ pytest.importorskip("numpy")
+ pyarrow = pytest.importorskip(
+ "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION
+ )
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2443,6 +3176,11 @@ def test_to_arrow(self):
)
def test_to_arrow_w_nulls(self):
+ pytest.importorskip("numpy")
+ pyarrow = pytest.importorskip(
+ "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION
+ )
+ import pyarrow.types
from google.cloud.bigquery.schema import SchemaField
schema = [SchemaField("name", "STRING"), SchemaField("age", "INTEGER")]
@@ -2475,6 +3213,10 @@ def test_to_arrow_w_nulls(self):
self.assertEqual(ages, [32, 29, None, 111])
def test_to_arrow_w_unknown_type(self):
+ pytest.importorskip("numpy")
+ pyarrow = pytest.importorskip(
+ "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION
+ )
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2512,11 +3254,15 @@ def test_to_arrow_w_unknown_type(self):
self.assertEqual(ages, [33, 29])
self.assertEqual(sports, ["volleyball", "basketball"])
- self.assertEqual(len(warned), 1)
- warning = warned[0]
- self.assertTrue("sport" in str(warning))
+ # Expect warning from both the arrow conversion, and the json deserialization.
+ self.assertEqual(len(warned), 2)
+ self.assertTrue(all("sport" in str(warning) for warning in warned))
def test_to_arrow_w_empty_table(self):
+ pytest.importorskip("numpy")
+ pyarrow = pytest.importorskip(
+ "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION
+ )
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2555,6 +3301,9 @@ def test_to_arrow_w_empty_table(self):
self.assertEqual(child_field.type.value_type[1].name, "age")
def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self):
+ pytest.importorskip("numpy")
+ pytest.importorskip("pyarrow")
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2595,6 +3344,9 @@ def test_to_arrow_max_results_w_explicit_bqstorage_client_warning(self):
mock_client._ensure_bqstorage_client.assert_not_called()
def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self):
+ pytest.importorskip("numpy")
+ pytest.importorskip("pyarrow")
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2631,9 +3383,16 @@ def test_to_arrow_max_results_w_create_bqstorage_client_no_warning(self):
mock_client._ensure_bqstorage_client.assert_not_called()
def test_to_arrow_w_bqstorage(self):
+ pytest.importorskip("numpy")
+ pyarrow = pytest.importorskip("pyarrow")
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
+ from google.cloud import bigquery_storage
from google.cloud.bigquery_storage_v1 import reader
+ from google.cloud.bigquery_storage_v1.services.big_query_read.transports import (
+ grpc as big_query_read_grpc_transport,
+ )
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
bqstorage_client._transport = mock.create_autospec(
@@ -2708,8 +3467,15 @@ def test_to_arrow_w_bqstorage(self):
bqstorage_client._transport.grpc_channel.close.assert_not_called()
def test_to_arrow_w_bqstorage_creates_client(self):
+ pytest.importorskip("numpy")
+ pytest.importorskip("pyarrow")
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
+ from google.cloud import bigquery_storage
+ from google.cloud.bigquery_storage_v1.services.big_query_read.transports import (
+ grpc as big_query_read_grpc_transport,
+ )
mock_client = _mock_client()
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
@@ -2735,6 +3501,10 @@ def test_to_arrow_w_bqstorage_creates_client(self):
bqstorage_client._transport.grpc_channel.close.assert_called_once()
def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self):
+ pytest.importorskip("numpy")
+ pyarrow = pytest.importorskip(
+ "pyarrow", minversion=self.PYARROW_MINIMUM_VERSION
+ )
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2749,20 +3519,28 @@ def test_to_arrow_ensure_bqstorage_client_wo_bqstorage(self):
api_request = mock.Mock(return_value={"rows": rows})
mock_client = _mock_client()
- mock_client._ensure_bqstorage_client.return_value = None
row_iterator = self._make_one(mock_client, api_request, path, schema)
- tbl = row_iterator.to_arrow(create_bqstorage_client=True)
+ def mock_verify_version(raise_if_error: bool = False):
+ raise exceptions.LegacyBigQueryStorageError("no bqstorage")
- # The client attempted to create a BQ Storage client, and even though
- # that was not possible, results were still returned without errors.
- mock_client._ensure_bqstorage_client.assert_called_once()
+ with mock.patch(
+ "google.cloud.bigquery._versions_helpers.BQ_STORAGE_VERSIONS.try_import",
+ mock_verify_version,
+ ):
+ tbl = row_iterator.to_arrow(create_bqstorage_client=True)
+
+ mock_client._ensure_bqstorage_client.assert_not_called()
self.assertIsInstance(tbl, pyarrow.Table)
self.assertEqual(tbl.num_rows, 2)
def test_to_arrow_w_bqstorage_no_streams(self):
+ pytest.importorskip("numpy")
+ pyarrow = pytest.importorskip("pyarrow")
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
+ from google.cloud import bigquery_storage
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
session = bigquery_storage.types.ReadSession()
@@ -2796,11 +3574,11 @@ def test_to_arrow_w_bqstorage_no_streams(self):
self.assertEqual(actual_table.schema[1].name, "colC")
self.assertEqual(actual_table.schema[2].name, "colB")
- @unittest.skipIf(tqdm is None, "Requires `tqdm`")
- @mock.patch("tqdm.tqdm_gui")
- @mock.patch("tqdm.tqdm_notebook")
- @mock.patch("tqdm.tqdm")
- def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock):
+ def test_to_arrow_progress_bar(self):
+ pytest.importorskip("numpy")
+ pytest.importorskip("pyarrow")
+ pytest.importorskip("tqdm")
+ pytest.importorskip("tqdm.notebook")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2817,12 +3595,13 @@ def test_to_arrow_progress_bar(self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_moc
api_request = mock.Mock(return_value={"rows": rows})
progress_bars = (
- ("tqdm", tqdm_mock),
- ("tqdm_notebook", tqdm_notebook_mock),
- ("tqdm_gui", tqdm_gui_mock),
+ ("tqdm", mock.patch("tqdm.tqdm")),
+ ("tqdm_notebook", mock.patch("tqdm.notebook.tqdm")),
+ ("tqdm_gui", mock.patch("tqdm.tqdm_gui")),
)
- for progress_bar_type, progress_bar_mock in progress_bars:
+ for progress_bar_type, bar_patch in progress_bars:
+ progress_bar_mock = bar_patch.start()
row_iterator = self._make_one(_mock_client(), api_request, path, schema)
tbl = row_iterator.to_arrow(
progress_bar_type=progress_bar_type,
@@ -2845,8 +3624,8 @@ def test_to_arrow_w_pyarrow_none(self):
with self.assertRaises(ValueError):
row_iterator.to_arrow()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_iterable(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2887,8 +3666,8 @@ def test_to_dataframe_iterable(self):
self.assertEqual(df_2["name"][0], "Sven")
self.assertEqual(df_2["age"][0], 33)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_iterable_with_dtypes(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -2929,11 +3708,18 @@ def test_to_dataframe_iterable_with_dtypes(self):
self.assertEqual(df_2["name"][0], "Sven")
self.assertEqual(df_2["age"][0], 33)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_iterable_w_bqstorage(self):
+ pytest.importorskip("numpy")
+ pandas = pytest.importorskip("pandas")
+ pyarrow = pytest.importorskip("pyarrow")
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
+ from google.cloud import bigquery_storage
from google.cloud.bigquery_storage_v1 import reader
+ from google.cloud.bigquery_storage_v1.services.big_query_read.transports import (
+ grpc as big_query_read_grpc_transport,
+ )
arrow_fields = [
pyarrow.field("colA", pyarrow.int64()),
@@ -2997,10 +3783,13 @@ def test_to_dataframe_iterable_w_bqstorage(self):
# Don't close the client if it was passed in.
bqstorage_client._transport.grpc_channel.close.assert_not_called()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_iterable_w_bqstorage_max_results_warning(self):
+ pytest.importorskip("numpy")
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
+ from google.cloud import bigquery_storage
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
@@ -3067,8 +3856,8 @@ def test_to_dataframe_iterable_error_if_pandas_is_none(self):
with pytest.raises(ValueError, match="pandas"):
row_iterator.to_dataframe_iterable()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -3093,8 +3882,9 @@ def test_to_dataframe(self):
self.assertEqual(df.name.dtype.name, "object")
self.assertEqual(df.age.dtype.name, "Int64")
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.schema import SchemaField
schema = [SchemaField("some_timestamp", "TIMESTAMP")]
@@ -3120,8 +3910,9 @@ def test_to_dataframe_timestamp_out_of_pyarrow_bounds(self):
],
)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_datetime_out_of_pyarrow_bounds(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.schema import SchemaField
schema = [SchemaField("some_datetime", "DATETIME")]
@@ -3143,14 +3934,11 @@ def test_to_dataframe_datetime_out_of_pyarrow_bounds(self):
[datetime.datetime(4567, 1, 1), datetime.datetime(9999, 12, 31)],
)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- @unittest.skipIf(tqdm is None, "Requires `tqdm`")
- @mock.patch("tqdm.tqdm_gui")
- @mock.patch("tqdm.tqdm_notebook")
- @mock.patch("tqdm.tqdm")
- def test_to_dataframe_progress_bar(
- self, tqdm_mock, tqdm_notebook_mock, tqdm_gui_mock
- ):
+ def test_to_dataframe_progress_bar(self):
+ pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
+ pytest.importorskip("tqdm")
+
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -3167,12 +3955,12 @@ def test_to_dataframe_progress_bar(
api_request = mock.Mock(return_value={"rows": rows})
progress_bars = (
- ("tqdm", tqdm_mock),
- ("tqdm_notebook", tqdm_notebook_mock),
- ("tqdm_gui", tqdm_gui_mock),
+ ("tqdm", mock.patch("tqdm.tqdm")),
+ ("tqdm_gui", mock.patch("tqdm.tqdm_gui")),
)
- for progress_bar_type, progress_bar_mock in progress_bars:
+ for progress_bar_type, bar_patch in progress_bars:
+ progress_bar_mock = bar_patch.start()
row_iterator = self._make_one(_mock_client(), api_request, path, schema)
df = row_iterator.to_dataframe(
progress_bar_type=progress_bar_type,
@@ -3184,9 +3972,43 @@ def test_to_dataframe_progress_bar(
progress_bar_mock().close.assert_called_once()
self.assertEqual(len(df), 4)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
+ def test_to_dataframe_progress_bar_notebook(self):
+ pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
+ pytest.importorskip("tqdm")
+ pytest.importorskip("tqdm.notebook")
+
+ from google.cloud.bigquery.schema import SchemaField
+
+ schema = [
+ SchemaField("name", "STRING", mode="REQUIRED"),
+ SchemaField("age", "INTEGER", mode="REQUIRED"),
+ ]
+ rows = [
+ {"f": [{"v": "Phred Phlyntstone"}, {"v": "32"}]},
+ {"f": [{"v": "Bharney Rhubble"}, {"v": "33"}]},
+ {"f": [{"v": "Wylma Phlyntstone"}, {"v": "29"}]},
+ {"f": [{"v": "Bhettye Rhubble"}, {"v": "27"}]},
+ ]
+ path = "/foo"
+ api_request = mock.Mock(return_value={"rows": rows})
+
+ with mock.patch("tqdm.notebook.tqdm") as progress_bar_mock:
+ row_iterator = self._make_one(_mock_client(), api_request, path, schema)
+ df = row_iterator.to_dataframe(
+ progress_bar_type="tqdm_notebook",
+ create_bqstorage_client=False,
+ )
+
+ progress_bar_mock.assert_called()
+ progress_bar_mock().update.assert_called()
+ progress_bar_mock().close.assert_called_once()
+ self.assertEqual(len(df), 4)
+
@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None)
def test_to_dataframe_no_tqdm_no_progress_bar(self):
+ pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -3209,12 +4031,16 @@ def test_to_dataframe_no_tqdm_no_progress_bar(self):
user_warnings = [
warning for warning in warned if warning.category is UserWarning
]
- self.assertEqual(len(user_warnings), 0)
+ # With pandas < 1.5, pandas.ArrowDtype is not supported
+ # and len(user_warnings) = 3.
+ # We raise warnings because range columns have to be converted to object.
+ # With higher pandas versions and noextra tests, len(user_warnings) = 0
+ self.assertIn(len(user_warnings), [0, 3])
self.assertEqual(len(df), 4)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
@mock.patch("google.cloud.bigquery._tqdm_helpers.tqdm", new=None)
def test_to_dataframe_no_tqdm(self):
+ pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -3240,18 +4066,22 @@ def test_to_dataframe_no_tqdm(self):
user_warnings = [
warning for warning in warned if warning.category is UserWarning
]
- self.assertEqual(len(user_warnings), 1)
+ # With pandas < 1.5, pandas.ArrowDtype is not supported
+ # and len(user_warnings) = 4.
+ # We raise warnings because range columns have to be converted to object.
+ # With higher pandas versions and noextra tests, len(user_warnings) = 1
+ self.assertIn(len(user_warnings), [1, 4])
# Even though the progress bar won't show, downloading the dataframe
# should still work.
self.assertEqual(len(df), 4)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- @unittest.skipIf(tqdm is None, "Requires `tqdm`")
- @mock.patch("tqdm.tqdm_gui", new=None) # will raise TypeError on call
- @mock.patch("tqdm.tqdm_notebook", new=None) # will raise TypeError on call
- @mock.patch("tqdm.tqdm", new=None) # will raise TypeError on call
def test_to_dataframe_tqdm_error(self):
+ pytest.importorskip("pandas")
+ tqdm = pytest.importorskip("tqdm")
+ mock.patch("tqdm.tqdm_gui", new=None)
+ mock.patch("tqdm.notebook.tqdm", new=None)
+ mock.patch("tqdm.tqdm", new=None)
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -3280,11 +4110,14 @@ def test_to_dataframe_tqdm_error(self):
# Warn that a progress bar was requested, but creating the tqdm
# progress bar failed.
- for warning in warned:
- self.assertIs(warning.category, UserWarning)
+ for warning in warned: # pragma: NO COVER
+ self.assertIn(
+ warning.category,
+ [UserWarning, DeprecationWarning, tqdm.TqdmExperimentalWarning],
+ )
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_w_empty_results(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -3300,8 +4133,8 @@ def test_to_dataframe_w_empty_results(self):
self.assertEqual(len(df), 0) # verify the number of rows
self.assertEqual(list(df), ["name", "age"]) # verify the column names
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_w_various_types_nullable(self):
+ pandas = pytest.importorskip("pandas")
import datetime
from google.cloud.bigquery.schema import SchemaField
@@ -3341,8 +4174,420 @@ def test_to_dataframe_w_various_types_nullable(self):
self.assertIsInstance(row.complete, bool)
self.assertIsInstance(row.date, datetime.date)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
+ def test_to_dataframe_w_dtypes_mapper(self):
+ pandas = pytest.importorskip("pandas")
+ pyarrow = pytest.importorskip("pyarrow")
+ from google.cloud.bigquery.schema import SchemaField
+
+ schema = [
+ SchemaField("name", "STRING"),
+ SchemaField("complete", "BOOL"),
+ SchemaField("age", "INTEGER"),
+ SchemaField("seconds", "INT64"),
+ SchemaField("miles", "FLOAT64"),
+ SchemaField("date", "DATE"),
+ SchemaField("datetime", "DATETIME"),
+ SchemaField("time", "TIME"),
+ SchemaField("timestamp", "TIMESTAMP"),
+ SchemaField("range_timestamp", "RANGE", range_element_type="TIMESTAMP"),
+ SchemaField("range_datetime", "RANGE", range_element_type="DATETIME"),
+ SchemaField("range_date", "RANGE", range_element_type="DATE"),
+ ]
+ row_data = [
+ [
+ "Phred Phlyntstone",
+ "true",
+ "32",
+ "23000",
+ "1.77",
+ "1999-12-01",
+ "1999-12-31T00:00:00.000000",
+ "00:00:00.000000",
+ "1433836800000000",
+ "[1433836800000000, 1433999900000000)",
+ "[2009-06-17T13:45:30, 2019-07-17T13:45:30)",
+ "[2020-10-01, 2021-10-02)",
+ ],
+ [
+ "Bharney Rhubble",
+ "false",
+ "33",
+ "454000",
+ "6.66",
+ "4567-06-14",
+ "4567-12-31T00:00:00.000000",
+ "12:00:00.232413",
+ "81953424000000000",
+ "[1433836800000000, UNBOUNDED)",
+ "[2009-06-17T13:45:30, UNBOUNDED)",
+ "[2020-10-01, UNBOUNDED)",
+ ],
+ [
+ "Wylma Phlyntstone",
+ "true",
+ "29",
+ "341000",
+ "2.0",
+ "9999-12-31",
+ "9999-12-31T23:59:59.999999",
+ "23:59:59.999999",
+ "253402261199999999",
+ "[UNBOUNDED, UNBOUNDED)",
+ "[UNBOUNDED, UNBOUNDED)",
+ "[UNBOUNDED, UNBOUNDED)",
+ ],
+ ]
+ rows = [{"f": [{"v": field} for field in row]} for row in row_data]
+ path = "/foo"
+ api_request = mock.Mock(return_value={"rows": rows})
+ row_iterator = self._make_one(_mock_client(), api_request, path, schema)
+
+ df = row_iterator.to_dataframe(
+ create_bqstorage_client=False,
+ bool_dtype=pandas.BooleanDtype(),
+ int_dtype=pandas.Int32Dtype(),
+ float_dtype=(
+ pandas.Float64Dtype()
+ if hasattr(pandas, "Float64Dtype")
+ else pandas.StringDtype()
+ ),
+ string_dtype=pandas.StringDtype(),
+ date_dtype=(
+ pandas.ArrowDtype(pyarrow.date32())
+ if hasattr(pandas, "ArrowDtype")
+ else None
+ ),
+ datetime_dtype=(
+ pandas.ArrowDtype(pyarrow.timestamp("us"))
+ if hasattr(pandas, "ArrowDtype")
+ else None
+ ),
+ time_dtype=(
+ pandas.ArrowDtype(pyarrow.time64("us"))
+ if hasattr(pandas, "ArrowDtype")
+ else None
+ ),
+ timestamp_dtype=(
+ pandas.ArrowDtype(pyarrow.timestamp("us", tz="UTC"))
+ if hasattr(pandas, "ArrowDtype")
+ else None
+ ),
+ range_date_dtype=(
+ pandas.ArrowDtype(
+ pyarrow.struct(
+ [("start", pyarrow.date32()), ("end", pyarrow.date32())]
+ )
+ )
+ if hasattr(pandas, "ArrowDtype")
+ else None
+ ),
+ range_datetime_dtype=(
+ pandas.ArrowDtype(
+ pyarrow.struct(
+ [
+ ("start", pyarrow.timestamp("us")),
+ ("end", pyarrow.timestamp("us")),
+ ]
+ )
+ )
+ if hasattr(pandas, "ArrowDtype")
+ else None
+ ),
+ range_timestamp_dtype=(
+ pandas.ArrowDtype(
+ pyarrow.struct(
+ [
+ ("start", pyarrow.timestamp("us", tz="UTC")),
+ ("end", pyarrow.timestamp("us", tz="UTC")),
+ ]
+ )
+ )
+ if hasattr(pandas, "ArrowDtype")
+ else None
+ ),
+ )
+
+ self.assertIsInstance(df, pandas.DataFrame)
+
+ self.assertEqual(list(df.complete), [True, False, True])
+ self.assertEqual(df.complete.dtype.name, "boolean")
+
+ self.assertEqual(list(df.age), [32, 33, 29])
+ self.assertEqual(df.age.dtype.name, "Int32")
+
+ self.assertEqual(list(df.seconds), [23000, 454000, 341000])
+ self.assertEqual(df.seconds.dtype.name, "Int32")
+
+ self.assertEqual(
+ list(df.name), ["Phred Phlyntstone", "Bharney Rhubble", "Wylma Phlyntstone"]
+ )
+ self.assertEqual(df.name.dtype.name, "string")
+
+ self.assertEqual(list(df.miles), [1.77, 6.66, 2.0])
+ self.assertEqual(df.miles.dtype.name, "Float64")
+
+ if hasattr(pandas, "ArrowDtype"):
+ self.assertEqual(
+ list(df.date),
+ [
+ datetime.date(1999, 12, 1),
+ datetime.date(4567, 6, 14),
+ datetime.date(9999, 12, 31),
+ ],
+ )
+ self.assertEqual(df.date.dtype.name, "date32[day][pyarrow]")
+
+ self.assertEqual(
+ list(df.datetime),
+ [
+ datetime.datetime(1999, 12, 31, 0, 0),
+ datetime.datetime(4567, 12, 31, 0, 0),
+ datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
+ ],
+ )
+ self.assertEqual(df.datetime.dtype.name, "timestamp[us][pyarrow]")
+
+ self.assertEqual(
+ list(df.time),
+ [
+ datetime.time(0, 0),
+ datetime.time(12, 0, 0, 232413),
+ datetime.time(23, 59, 59, 999999),
+ ],
+ )
+ self.assertEqual(df.time.dtype.name, "time64[us][pyarrow]")
+
+ self.assertEqual(
+ list(df.timestamp),
+ [
+ datetime.datetime(2015, 6, 9, 8, 0, tzinfo=datetime.timezone.utc),
+ datetime.datetime(4567, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
+ datetime.datetime(
+ 9999, 12, 31, 12, 59, 59, 999999, tzinfo=datetime.timezone.utc
+ ),
+ ],
+ )
+ self.assertEqual(df.timestamp.dtype.name, "timestamp[us, tz=UTC][pyarrow]")
+
+ self.assertEqual(
+ list(df.range_timestamp),
+ [
+ {
+ "start": datetime.datetime(
+ 2015, 6, 9, 8, 0, 0, tzinfo=datetime.timezone.utc
+ ),
+ "end": datetime.datetime(
+ 2015, 6, 11, 5, 18, 20, tzinfo=datetime.timezone.utc
+ ),
+ },
+ {
+ "start": datetime.datetime(
+ 2015, 6, 9, 8, 0, 0, tzinfo=datetime.timezone.utc
+ ),
+ "end": None,
+ },
+ {"start": None, "end": None},
+ ],
+ )
+
+ self.assertEqual(
+ list(df.range_datetime),
+ [
+ {
+ "start": datetime.datetime(2009, 6, 17, 13, 45, 30),
+ "end": datetime.datetime(2019, 7, 17, 13, 45, 30),
+ },
+ {"start": datetime.datetime(2009, 6, 17, 13, 45, 30), "end": None},
+ {"start": None, "end": None},
+ ],
+ )
+
+ self.assertEqual(
+ list(df.range_date),
+ [
+ {
+ "start": datetime.date(2020, 10, 1),
+ "end": datetime.date(2021, 10, 2),
+ },
+ {"start": datetime.date(2020, 10, 1), "end": None},
+ {"start": None, "end": None},
+ ],
+ )
+ else:
+ self.assertEqual(
+ list(df.date),
+ [
+ pandas.Timestamp("1999-12-01 00:00:00"),
+ pandas.Timestamp("2229-03-27 01:41:45.161793536"),
+ pandas.Timestamp("1816-03-29 05:56:08.066277376"),
+ ],
+ )
+ self.assertEqual(df.date.dtype.name, "datetime64[ns]")
+
+ self.assertEqual(
+ list(df.datetime),
+ [
+ datetime.datetime(1999, 12, 31, 0, 0),
+ datetime.datetime(4567, 12, 31, 0, 0),
+ datetime.datetime(9999, 12, 31, 23, 59, 59, 999999),
+ ],
+ )
+ self.assertEqual(df.datetime.dtype.name, "object")
+
+ self.assertEqual(
+ list(df.time),
+ [
+ datetime.time(0, 0),
+ datetime.time(12, 0, 0, 232413),
+ datetime.time(23, 59, 59, 999999),
+ ],
+ )
+ self.assertEqual(df.time.dtype.name, "object")
+
+ self.assertEqual(
+ list(df.timestamp),
+ [
+ datetime.datetime(2015, 6, 9, 8, 0, tzinfo=datetime.timezone.utc),
+ datetime.datetime(4567, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
+ datetime.datetime(
+ 9999, 12, 31, 12, 59, 59, 999999, tzinfo=datetime.timezone.utc
+ ),
+ ],
+ )
+ self.assertEqual(df.timestamp.dtype.name, "object")
+
+ def test_to_dataframe_w_none_dtypes_mapper(self):
+ pandas = pytest.importorskip("pandas")
+ pandas_major_version = pandas.__version__[0:2]
+ if pandas_major_version not in ["0.", "1."]:
+ pytest.skip(reason="Requires a version of pandas less than 2.0")
+ from google.cloud.bigquery.schema import SchemaField
+
+ schema = [
+ SchemaField("name", "STRING"),
+ SchemaField("complete", "BOOL"),
+ SchemaField("age", "INTEGER"),
+ SchemaField("seconds", "INT64"),
+ SchemaField("miles", "FLOAT64"),
+ SchemaField("date", "DATE"),
+ SchemaField("datetime", "DATETIME"),
+ SchemaField("time", "TIME"),
+ SchemaField("timestamp", "TIMESTAMP"),
+ SchemaField("range_timestamp", "RANGE", range_element_type="TIMESTAMP"),
+ SchemaField("range_datetime", "RANGE", range_element_type="DATETIME"),
+ SchemaField("range_date", "RANGE", range_element_type="DATE"),
+ ]
+ row_data = [
+ [
+ "Phred Phlyntstone",
+ "true",
+ "32",
+ "23000",
+ "1.77",
+ "1999-12-01",
+ "1999-12-31T00:00:00.000000",
+ "23:59:59.999999",
+ "1433836800000000",
+ "[1433836800000000, 1433999900000000)",
+ "[2009-06-17T13:45:30, 2019-07-17T13:45:30)",
+ "[2020-10-01, 2021-10-02)",
+ ],
+ ]
+ rows = [{"f": [{"v": field} for field in row]} for row in row_data]
+ path = "/foo"
+ api_request = mock.Mock(return_value={"rows": rows})
+ row_iterator = self._make_one(_mock_client(), api_request, path, schema)
+
+ df = row_iterator.to_dataframe(
+ create_bqstorage_client=False,
+ bool_dtype=None,
+ int_dtype=None,
+ float_dtype=None,
+ string_dtype=None,
+ date_dtype=None,
+ datetime_dtype=None,
+ time_dtype=None,
+ timestamp_dtype=None,
+ range_timestamp_dtype=None,
+ range_datetime_dtype=None,
+ range_date_dtype=None,
+ )
+ self.assertIsInstance(df, pandas.DataFrame)
+ self.assertEqual(df.complete.dtype.name, "bool")
+ self.assertEqual(df.age.dtype.name, "int64")
+ self.assertEqual(df.seconds.dtype.name, "int64")
+ self.assertEqual(df.miles.dtype.name, "float64")
+ self.assertEqual(df.name.dtype.name, "object")
+ self.assertEqual(df.date.dtype.name, "datetime64[ns]")
+ self.assertEqual(df.datetime.dtype.name, "datetime64[ns]")
+ self.assertEqual(df.time.dtype.name, "object")
+ self.assertEqual(df.timestamp.dtype.name, "datetime64[ns, UTC]")
+ self.assertEqual(df.range_timestamp.dtype.name, "object")
+ self.assertEqual(df.range_datetime.dtype.name, "object")
+ self.assertEqual(df.range_date.dtype.name, "object")
+
+ def test_to_dataframe_w_unsupported_dtypes_mapper(self):
+ pytest.importorskip("pandas")
+ numpy = pytest.importorskip("numpy")
+ from google.cloud.bigquery.schema import SchemaField
+
+ schema = [
+ SchemaField("name", "STRING"),
+ ]
+ row_data = [
+ ["Phred Phlyntstone"],
+ ]
+ rows = [{"f": [{"v": field} for field in row]} for row in row_data]
+ path = "/foo"
+ api_request = mock.Mock(return_value={"rows": rows})
+ row_iterator = self._make_one(_mock_client(), api_request, path, schema)
+
+ with self.assertRaises(ValueError):
+ row_iterator.to_dataframe(
+ create_bqstorage_client=False,
+ bool_dtype=numpy.dtype("bool"),
+ )
+ with self.assertRaises(ValueError):
+ row_iterator.to_dataframe(
+ create_bqstorage_client=False,
+ int_dtype=numpy.dtype("int64"),
+ )
+ with self.assertRaises(ValueError):
+ row_iterator.to_dataframe(
+ create_bqstorage_client=False,
+ float_dtype=numpy.dtype("float64"),
+ )
+ with self.assertRaises(ValueError):
+ row_iterator.to_dataframe(
+ create_bqstorage_client=False,
+ string_dtype=numpy.dtype("object"),
+ )
+ with self.assertRaises(ValueError):
+ row_iterator.to_dataframe(
+ create_bqstorage_client=False,
+ date_dtype=numpy.dtype("object"),
+ )
+ with self.assertRaises(ValueError):
+ row_iterator.to_dataframe(
+ create_bqstorage_client=False,
+ datetime_dtype=numpy.dtype("datetime64[us]"),
+ )
+ with self.assertRaises(ValueError):
+ row_iterator.to_dataframe(
+ create_bqstorage_client=False,
+ time_dtype=numpy.dtype("datetime64[us]"),
+ )
+ with self.assertRaises(ValueError):
+ row_iterator.to_dataframe(
+ create_bqstorage_client=False,
+ timestamp_dtype=numpy.dtype("datetime64[us]"),
+ )
+
def test_to_dataframe_column_dtypes(self):
+ pandas = pytest.importorskip("pandas")
+ pandas_major_version = pandas.__version__[0:2]
+ if pandas_major_version not in ["0.", "1."]:
+ pytest.skip("Requires a version of pandas less than 2.0")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -3355,9 +4600,9 @@ def test_to_dataframe_column_dtypes(self):
SchemaField("date", "DATE"),
]
row_data = [
- ["1433836800000000", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"],
+ ["1433836800000", "420", "1.1", "1.77", "Cash", "true", "1999-12-01"],
[
- "1387811700000000",
+ "1387811700000",
"2580",
"17.7",
"28.5",
@@ -3365,7 +4610,7 @@ def test_to_dataframe_column_dtypes(self):
"false",
"1953-06-14",
],
- ["1385565300000000", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"],
+ ["1385565300000", "2280", "4.4", "7.1", "Credit", "true", "1981-11-04"],
]
rows = [{"f": [{"v": field} for field in row]} for row in row_data]
path = "/foo"
@@ -3390,13 +4635,12 @@ def test_to_dataframe_column_dtypes(self):
self.assertEqual(df.complete.dtype.name, "boolean")
self.assertEqual(df.date.dtype.name, "dbdate")
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_datetime_objects(self):
# When converting date or timestamp values to nanosecond
# precision, the result can be out of pyarrow bounds. To avoid
# the error when converting to Pandas, we use object type if
# necessary.
-
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -3439,9 +4683,10 @@ def test_to_dataframe_error_if_pandas_is_none(self):
with self.assertRaises(ValueError):
row_iterator.to_dataframe()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
@mock.patch("google.cloud.bigquery.table.shapely", new=None)
def test_to_dataframe_error_if_shapely_is_none(self):
+ pytest.importorskip("pandas")
+
with self.assertRaisesRegex(
ValueError,
re.escape(
@@ -3451,8 +4696,9 @@ def test_to_dataframe_error_if_shapely_is_none(self):
):
self._make_one_from_data().to_dataframe(geography_as_object=True)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_max_results_w_bqstorage_warning(self):
+ pytest.importorskip("pandas")
+
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -3487,8 +4733,8 @@ def test_to_dataframe_max_results_w_bqstorage_warning(self):
]
self.assertEqual(len(matches), 1, msg="User warning was not emitted.")
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self):
+ pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -3528,8 +4774,8 @@ def test_to_dataframe_max_results_w_explicit_bqstorage_client_warning(self):
)
mock_client._ensure_bqstorage_client.assert_not_called()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self):
+ pytest.importorskip("pandas")
from google.cloud.bigquery.schema import SchemaField
schema = [
@@ -3565,10 +4811,16 @@ def test_to_dataframe_max_results_w_create_bqstorage_client_no_warning(self):
self.assertFalse(matches)
mock_client._ensure_bqstorage_client.assert_not_called()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_w_bqstorage_creates_client(self):
+ pytest.importorskip("numpy")
+ pytest.importorskip("pandas")
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
+ from google.cloud import bigquery_storage
+ from google.cloud.bigquery_storage_v1.services.big_query_read.transports import (
+ grpc as big_query_read_grpc_transport,
+ )
mock_client = _mock_client()
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
@@ -3593,10 +4845,13 @@ def test_to_dataframe_w_bqstorage_creates_client(self):
mock_client._ensure_bqstorage_client.assert_called_once()
bqstorage_client._transport.grpc_channel.close.assert_called_once()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_w_bqstorage_no_streams(self):
+ pytest.importorskip("numpy")
+ pytest.importorskip("pandas")
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
+ from google.cloud import bigquery_storage
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
session = bigquery_storage.types.ReadSession()
@@ -3619,9 +4874,13 @@ def test_to_dataframe_w_bqstorage_no_streams(self):
self.assertEqual(list(got), column_names)
self.assertTrue(got.empty)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_w_bqstorage_logs_session(self):
+ pytest.importorskip("numpy")
+ pytest.importorskip("google.cloud.bigquery_storage")
+ pytest.importorskip("pandas")
+ pytest.importorskip("pyarrow")
from google.cloud.bigquery.table import Table
+ from google.cloud import bigquery_storage
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
session = bigquery_storage.types.ReadSession()
@@ -3640,8 +4899,12 @@ def test_to_dataframe_w_bqstorage_logs_session(self):
"with BQ Storage API session 'projects/test-proj/locations/us/sessions/SOMESESSION'."
)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_w_bqstorage_empty_streams(self):
+ pytest.importorskip("numpy")
+ pytest.importorskip("google.cloud.bigquery_storage")
+ pytest.importorskip("pandas")
+ pyarrow = pytest.importorskip("pyarrow")
+ from google.cloud import bigquery_storage
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
from google.cloud.bigquery_storage_v1 import reader
@@ -3691,11 +4954,18 @@ def test_to_dataframe_w_bqstorage_empty_streams(self):
self.assertEqual(list(got), column_names)
self.assertTrue(got.empty)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_w_bqstorage_nonempty(self):
+ pytest.importorskip("numpy")
+ pytest.importorskip("google.cloud.bigquery_storage")
+ pytest.importorskip("pandas")
+ pyarrow = pytest.importorskip("pyarrow")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
+ from google.cloud import bigquery_storage
from google.cloud.bigquery_storage_v1 import reader
+ from google.cloud.bigquery_storage_v1.services.big_query_read.transports import (
+ grpc as big_query_read_grpc_transport,
+ )
arrow_fields = [
pyarrow.field("colA", pyarrow.int64()),
@@ -3767,8 +5037,11 @@ def test_to_dataframe_w_bqstorage_nonempty(self):
# Don't close the client if it was passed in.
bqstorage_client._transport.grpc_channel.close.assert_not_called()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self):
+ pytest.importorskip("numpy")
+ bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")
+ pytest.importorskip("pandas")
+ pyarrow = pytest.importorskip("pyarrow")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
from google.cloud.bigquery_storage_v1 import reader
@@ -3817,10 +5090,12 @@ def test_to_dataframe_w_bqstorage_multiple_streams_return_unique_index(self):
self.assertEqual(len(got.index), total_rows)
self.assertTrue(got.index.is_unique)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
- @unittest.skipIf(tqdm is None, "Requires `tqdm`")
- @mock.patch("tqdm.tqdm")
- def test_to_dataframe_w_bqstorage_updates_progress_bar(self, tqdm_mock):
+ def test_to_dataframe_w_bqstorage_updates_progress_bar(self):
+ pytest.importorskip("numpy")
+ bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")
+ pytest.importorskip("pandas")
+ pyarrow = pytest.importorskip("pyarrow")
+ pytest.importorskip("tqdm")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
from google.cloud.bigquery_storage_v1 import reader
@@ -3876,24 +5151,28 @@ def blocking_to_arrow(*args, **kwargs):
selected_fields=schema,
)
- row_iterator.to_dataframe(
- bqstorage_client=bqstorage_client, progress_bar_type="tqdm"
- )
+ with mock.patch("tqdm.tqdm") as tqdm_mock:
+ row_iterator.to_dataframe(
+ bqstorage_client=bqstorage_client, progress_bar_type="tqdm"
+ )
- # Make sure that this test updated the progress bar once per page from
- # each stream.
- total_pages = len(streams) * len(mock_pages)
- expected_total_rows = total_pages * len(page_items)
- progress_updates = [
- args[0] for args, kwargs in tqdm_mock().update.call_args_list
- ]
- # Should have sent >1 update due to delay in blocking_to_arrow.
- self.assertGreater(len(progress_updates), 1)
- self.assertEqual(sum(progress_updates), expected_total_rows)
- tqdm_mock().close.assert_called_once()
+ # Make sure that this test updated the progress bar once per page from
+ # each stream.
+ total_pages = len(streams) * len(mock_pages)
+ expected_total_rows = total_pages * len(page_items)
+ progress_updates = [
+ args[0] for args, kwargs in tqdm_mock().update.call_args_list
+ ]
+ # Should have sent >1 update due to delay in blocking_to_arrow.
+ self.assertGreater(len(progress_updates), 1)
+ self.assertEqual(sum(progress_updates), expected_total_rows)
+ tqdm_mock().close.assert_called_once()
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_w_bqstorage_exits_on_keyboardinterrupt(self):
+ pytest.importorskip("numpy")
+ bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")
+ pytest.importorskip("pandas")
+ pyarrow = pytest.importorskip("pyarrow")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
from google.cloud.bigquery_storage_v1 import reader
@@ -3976,8 +5255,8 @@ def blocking_to_arrow(*args, **kwargs):
# should have been set.
self.assertLessEqual(mock_page.to_dataframe.call_count, 2)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self):
+ pandas = pytest.importorskip("pandas")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
@@ -4008,8 +5287,10 @@ def test_to_dataframe_tabledata_list_w_multiple_pages_return_unique_index(self):
self.assertEqual(df.name.dtype.name, "object")
self.assertTrue(df.index.is_unique)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_w_bqstorage_raises_auth_error(self):
+ pytest.importorskip("google.cloud.bigquery_storage")
+ pytest.importorskip("pandas")
+ from google.cloud import bigquery_storage
from google.cloud.bigquery import table as mut
bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
@@ -4028,6 +5309,7 @@ def test_to_dataframe_w_bqstorage_raises_auth_error(self):
row_iterator.to_dataframe(bqstorage_client=bqstorage_client)
def test_to_dataframe_w_bqstorage_partition(self):
+ bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
@@ -4045,6 +5327,7 @@ def test_to_dataframe_w_bqstorage_partition(self):
row_iterator.to_dataframe(bqstorage_client)
def test_to_dataframe_w_bqstorage_snapshot(self):
+ bigquery_storage = pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
@@ -4061,11 +5344,18 @@ def test_to_dataframe_w_bqstorage_snapshot(self):
with pytest.raises(ValueError):
row_iterator.to_dataframe(bqstorage_client)
- @unittest.skipIf(pandas is None, "Requires `pandas`")
def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self):
+ pytest.importorskip("numpy")
+ pytest.importorskip("google.cloud.bigquery_storage")
+ pandas = pytest.importorskip("pandas")
+ pyarrow = pytest.importorskip("pyarrow")
+ from google.cloud import bigquery_storage
from google.cloud.bigquery import schema
from google.cloud.bigquery import table as mut
from google.cloud.bigquery_storage_v1 import reader
+ from google.cloud.bigquery_storage_v1.services.big_query_read.transports import (
+ grpc as big_query_read_grpc_transport,
+ )
arrow_fields = [
# Not alphabetical to test column order.
@@ -4170,8 +5460,9 @@ def test_to_dataframe_concat_categorical_dtype_w_pyarrow(self):
# Don't close the client if it was passed in.
bqstorage_client._transport.grpc_channel.close.assert_not_called()
- @unittest.skipIf(geopandas is None, "Requires `geopandas`")
def test_to_dataframe_geography_as_object(self):
+ pandas = pytest.importorskip("pandas")
+ pytest.importorskip("geopandas")
row_iterator = self._make_one_from_data(
(("name", "STRING"), ("geog", "GEOGRAPHY")),
(
@@ -4205,8 +5496,8 @@ def test_to_geodataframe_error_if_geopandas_is_none(self):
):
self._make_one_from_data().to_geodataframe()
- @unittest.skipIf(geopandas is None, "Requires `geopandas`")
def test_to_geodataframe(self):
+ geopandas = pytest.importorskip("geopandas")
row_iterator = self._make_one_from_data(
(("name", "STRING"), ("geog", "GEOGRAPHY")),
(
@@ -4235,8 +5526,8 @@ def test_to_geodataframe(self):
self.assertEqual(df.geog.crs.srs, "EPSG:4326")
self.assertEqual(df.geog.crs.name, "WGS 84")
- @unittest.skipIf(geopandas is None, "Requires `geopandas`")
def test_to_geodataframe_ambiguous_geog(self):
+ pytest.importorskip("geopandas")
row_iterator = self._make_one_from_data(
(("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), ()
)
@@ -4250,8 +5541,8 @@ def test_to_geodataframe_ambiguous_geog(self):
):
row_iterator.to_geodataframe(create_bqstorage_client=False)
- @unittest.skipIf(geopandas is None, "Requires `geopandas`")
def test_to_geodataframe_bad_geography_column(self):
+ pytest.importorskip("geopandas")
row_iterator = self._make_one_from_data(
(("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")), ()
)
@@ -4266,8 +5557,8 @@ def test_to_geodataframe_bad_geography_column(self):
create_bqstorage_client=False, geography_column="xxx"
)
- @unittest.skipIf(geopandas is None, "Requires `geopandas`")
def test_to_geodataframe_no_geog(self):
+ pytest.importorskip("geopandas")
row_iterator = self._make_one_from_data(
(("name", "STRING"), ("geog", "STRING")), ()
)
@@ -4280,8 +5571,9 @@ def test_to_geodataframe_no_geog(self):
):
row_iterator.to_geodataframe(create_bqstorage_client=False)
- @unittest.skipIf(geopandas is None, "Requires `geopandas`")
def test_to_geodataframe_w_geography_column(self):
+ geopandas = pytest.importorskip("geopandas")
+ pandas = pytest.importorskip("pandas")
row_iterator = self._make_one_from_data(
(("name", "STRING"), ("geog", "GEOGRAPHY"), ("geog2", "GEOGRAPHY")),
(
@@ -4326,7 +5618,6 @@ def test_to_geodataframe_w_geography_column(self):
["0.0", "0.0", "0.0"],
)
- @unittest.skipIf(geopandas is None, "Requires `geopandas`")
@mock.patch("google.cloud.bigquery.table.RowIterator.to_dataframe")
def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
"""
@@ -4335,7 +5626,9 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
This test just demonstrates that. We don't need to test all the
variations, which are tested for to_dataframe.
"""
- import numpy
+ pandas = pytest.importorskip("pandas")
+ geopandas = pytest.importorskip("geopandas")
+ numpy = pytest.importorskip("numpy")
from shapely import wkt
row_iterator = self._make_one_from_data(
@@ -4368,6 +5661,10 @@ def test_rowiterator_to_geodataframe_delegation(self, to_dataframe):
progress_bar_type,
create_bqstorage_client,
geography_as_object=True,
+ bool_dtype=DefaultPandasDTypes.BOOL_DTYPE,
+ int_dtype=DefaultPandasDTypes.INT_DTYPE,
+ float_dtype=None,
+ string_dtype=None,
)
self.assertIsInstance(df, geopandas.GeoDataFrame)
@@ -4764,6 +6061,731 @@ def test_set_expiration_w_none(self):
assert time_partitioning._properties["expirationMs"] is None
+class TestPrimaryKey(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.table import PrimaryKey
+
+ return PrimaryKey
+
+ @classmethod
+ def _make_one(cls, *args, **kwargs):
+ return cls._get_target_class()(*args, **kwargs)
+
+ def test_constructor_explicit(self):
+ columns = ["id", "product_id"]
+ primary_key = self._make_one(columns)
+
+ self.assertEqual(primary_key.columns, columns)
+
+ def test__eq__columns_mismatch(self):
+ primary_key = self._make_one(columns=["id", "product_id"])
+ other_primary_key = self._make_one(columns=["id"])
+
+ self.assertNotEqual(primary_key, other_primary_key)
+
+ def test__eq__other_type(self):
+ primary_key = self._make_one(columns=["id", "product_id"])
+ with self.assertRaises(TypeError):
+ primary_key == "This is not a Primary Key"
+
+
+class TestColumnReference(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.table import ColumnReference
+
+ return ColumnReference
+
+ @classmethod
+ def _make_one(cls, *args, **kwargs):
+ return cls._get_target_class()(*args, **kwargs)
+
+ def test_constructor_explicit(self):
+ referencing_column = "product_id"
+ referenced_column = "id"
+ column_reference = self._make_one(referencing_column, referenced_column)
+
+ self.assertEqual(column_reference.referencing_column, referencing_column)
+ self.assertEqual(column_reference.referenced_column, referenced_column)
+
+ def test__eq__referencing_column_mismatch(self):
+ column_reference = self._make_one(
+ referencing_column="product_id",
+ referenced_column="id",
+ )
+ other_column_reference = self._make_one(
+ referencing_column="item_id",
+ referenced_column="id",
+ )
+
+ self.assertNotEqual(column_reference, other_column_reference)
+
+ def test__eq__referenced_column_mismatch(self):
+ column_reference = self._make_one(
+ referencing_column="product_id",
+ referenced_column="id",
+ )
+ other_column_reference = self._make_one(
+ referencing_column="product_id",
+ referenced_column="id_1",
+ )
+
+ self.assertNotEqual(column_reference, other_column_reference)
+
+ def test__eq__other_type(self):
+ column_reference = self._make_one(
+ referencing_column="product_id",
+ referenced_column="id",
+ )
+ with self.assertRaises(TypeError):
+ column_reference == "This is not a Column Reference"
+
+
+class TestForeignKey(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.table import ForeignKey
+
+ return ForeignKey
+
+ @classmethod
+ def _make_one(cls, *args, **kwargs):
+ return cls._get_target_class()(*args, **kwargs)
+
+ def test_constructor_explicit(self):
+ name = "my_fk"
+ referenced_table = TableReference.from_string("my-project.mydataset.mytable")
+ column_references = []
+ foreign_key = self._make_one(name, referenced_table, column_references)
+
+ self.assertEqual(foreign_key.name, name)
+ self.assertEqual(foreign_key.referenced_table, referenced_table)
+ self.assertEqual(foreign_key.column_references, column_references)
+
+ def test__eq__name_mismatch(self):
+ referenced_table = TableReference.from_string("my-project.mydataset.mytable")
+ column_references = []
+ foreign_key = self._make_one(
+ name="my_fk",
+ referenced_table=referenced_table,
+ column_references=column_references,
+ )
+ other_foreign_key = self._make_one(
+ name="my_other_fk",
+ referenced_table=referenced_table,
+ column_references=column_references,
+ )
+
+ self.assertNotEqual(foreign_key, other_foreign_key)
+
+ def test__eq__referenced_table_mismatch(self):
+ name = "my_fk"
+ column_references = []
+ foreign_key = self._make_one(
+ name=name,
+ referenced_table=TableReference.from_string("my-project.mydataset.mytable"),
+ column_references=column_references,
+ )
+ other_foreign_key = self._make_one(
+ name=name,
+ referenced_table=TableReference.from_string(
+ "my-project.mydataset.my-other-table"
+ ),
+ column_references=column_references,
+ )
+
+ self.assertNotEqual(foreign_key, other_foreign_key)
+
+ def test__eq__column_references_mismatch(self):
+ from google.cloud.bigquery.table import ColumnReference
+
+ name = "my_fk"
+ referenced_table = TableReference.from_string("my-project.mydataset.mytable")
+ foreign_key = self._make_one(
+ name=name,
+ referenced_table=referenced_table,
+ column_references=[],
+ )
+ other_foreign_key = self._make_one(
+ name=name,
+ referenced_table=referenced_table,
+ column_references=[
+ ColumnReference(
+ referencing_column="product_id", referenced_column="id"
+ ),
+ ],
+ )
+
+ self.assertNotEqual(foreign_key, other_foreign_key)
+
+ def test__eq__other_type(self):
+ foreign_key = self._make_one(
+ name="my_fk",
+ referenced_table=TableReference.from_string("my-project.mydataset.mytable"),
+ column_references=[],
+ )
+ with self.assertRaises(TypeError):
+ foreign_key == "This is not a Foreign Key"
+
+ def test_to_api_repr(self):
+ from google.cloud.bigquery.table import ColumnReference, TableReference
+
+ name = "my_fk"
+ referenced_table = TableReference.from_string("my-project.mydataset.mytable")
+ column_references = [
+ ColumnReference(referencing_column="product_id", referenced_column="id")
+ ]
+ foreign_key = self._make_one(name, referenced_table, column_references)
+
+ expected = {
+ "name": name,
+ "referencedTable": {
+ "projectId": "my-project",
+ "datasetId": "mydataset",
+ "tableId": "mytable",
+ },
+ "columnReferences": [
+ {"referencingColumn": "product_id", "referencedColumn": "id"}
+ ],
+ }
+ self.assertEqual(foreign_key.to_api_repr(), expected)
+
+ def test_to_api_repr_empty_column_references(self):
+ from google.cloud.bigquery.table import TableReference
+
+ name = "my_fk"
+ referenced_table = TableReference.from_string("my-project.mydataset.mytable")
+ column_references = []
+ foreign_key = self._make_one(name, referenced_table, column_references)
+
+ expected = {
+ "name": name,
+ "referencedTable": {
+ "projectId": "my-project",
+ "datasetId": "mydataset",
+ "tableId": "mytable",
+ },
+ "columnReferences": [],
+ }
+ self.assertEqual(foreign_key.to_api_repr(), expected)
+
+
+class TestTableConstraint(unittest.TestCase):
+ @staticmethod
+ def _get_target_class():
+ from google.cloud.bigquery.table import TableConstraints
+
+ return TableConstraints
+
+ @classmethod
+ def _make_one(cls, *args, **kwargs):
+ return cls._get_target_class()(*args, **kwargs)
+
+ def test_constructor_defaults(self):
+ instance = self._make_one(primary_key=None, foreign_keys=None)
+ self.assertIsNone(instance.primary_key)
+ self.assertIsNone(instance.foreign_keys)
+
+ def test_constructor_explicit(self):
+ from google.cloud.bigquery.table import (
+ PrimaryKey,
+ ForeignKey,
+ TableReference,
+ ColumnReference,
+ )
+
+ primary_key = PrimaryKey(columns=["my_pk_id"])
+ foriegn_keys = [
+ ForeignKey(
+ name="my_fk_id",
+ referenced_table=TableReference.from_string(
+ "my-project.my-dataset.my-table"
+ ),
+ column_references=[
+ ColumnReference(referencing_column="id", referenced_column="id"),
+ ],
+ ),
+ ]
+
+ table_constraint = self._make_one(
+ primary_key=primary_key,
+ foreign_keys=foriegn_keys,
+ )
+
+ self.assertEqual(table_constraint.primary_key, primary_key)
+ self.assertEqual(table_constraint.foreign_keys, foriegn_keys)
+
+ def test_constructor_explicit_with_none(self):
+ table_constraint = self._make_one(primary_key=None, foreign_keys=None)
+
+ self.assertIsNone(table_constraint.primary_key)
+ self.assertIsNone(table_constraint.foreign_keys)
+
+ def test__eq__other_type(self):
+ from google.cloud.bigquery.table import (
+ PrimaryKey,
+ ForeignKey,
+ TableReference,
+ ColumnReference,
+ )
+
+ table_constraint = self._make_one(
+ primary_key=PrimaryKey(columns=["my_pk_id"]),
+ foreign_keys=[
+ ForeignKey(
+ name="my_fk_id",
+ referenced_table=TableReference.from_string(
+ "my-project.my-dataset.my-table"
+ ),
+ column_references=[
+ ColumnReference(
+ referencing_column="id", referenced_column="id"
+ ),
+ ],
+ ),
+ ],
+ )
+ with self.assertRaises(TypeError):
+ table_constraint == "This is not a Table Constraint"
+
+ def test_from_api_repr_full_resource(self):
+ from google.cloud.bigquery.table import (
+ ColumnReference,
+ ForeignKey,
+ TableReference,
+ )
+
+ resource = {
+ "primaryKey": {
+ "columns": ["id", "product_id"],
+ },
+ "foreignKeys": [
+ {
+ "name": "my_fk_name",
+ "referencedTable": {
+ "projectId": "my-project",
+ "datasetId": "your-dataset",
+ "tableId": "products",
+ },
+ "columnReferences": [
+ {"referencingColumn": "product_id", "referencedColumn": "id"},
+ ],
+ }
+ ],
+ }
+ instance = self._get_target_class().from_api_repr(resource)
+
+ self.assertIsNotNone(instance.primary_key)
+ self.assertEqual(instance.primary_key.columns, ["id", "product_id"])
+ self.assertEqual(
+ instance.foreign_keys,
+ [
+ ForeignKey(
+ name="my_fk_name",
+ referenced_table=TableReference.from_string(
+ "my-project.your-dataset.products"
+ ),
+ column_references=[
+ ColumnReference(
+ referencing_column="product_id", referenced_column="id"
+ ),
+ ],
+ ),
+ ],
+ )
+
+ def test_from_api_repr_only_primary_key_resource(self):
+ resource = {
+ "primaryKey": {
+ "columns": ["id"],
+ },
+ }
+ instance = self._get_target_class().from_api_repr(resource)
+
+ self.assertIsNotNone(instance.primary_key)
+ self.assertEqual(instance.primary_key.columns, ["id"])
+ self.assertIsNone(instance.foreign_keys)
+
+ def test_from_api_repr_only_foreign_keys_resource(self):
+ resource = {
+ "foreignKeys": [
+ {
+ "name": "my_fk_name",
+ "referencedTable": {
+ "projectId": "my-project",
+ "datasetId": "your-dataset",
+ "tableId": "products",
+ },
+ "columnReferences": [
+ {"referencingColumn": "product_id", "referencedColumn": "id"},
+ ],
+ }
+ ]
+ }
+ instance = self._get_target_class().from_api_repr(resource)
+
+ self.assertIsNone(instance.primary_key)
+ self.assertIsNotNone(instance.foreign_keys)
+
+ def test_to_api_repr(self):
+ from google.cloud.bigquery.table import ColumnReference, ForeignKey, PrimaryKey
+
+ primary_key = PrimaryKey(columns=["id", "product_id"])
+ foreign_keys = [
+ ForeignKey(
+ name="my_fk_name",
+ referenced_table=TableReference.from_string(
+ "my-project.my-dataset.products"
+ ),
+ column_references=[
+ ColumnReference(
+ referencing_column="product_id", referenced_column="id"
+ ),
+ ],
+ )
+ ]
+ instance = self._make_one(primary_key=primary_key, foreign_keys=foreign_keys)
+
+ expected = {
+ "primaryKey": {
+ "columns": ["id", "product_id"],
+ },
+ "foreignKeys": [
+ {
+ "name": "my_fk_name",
+ "referencedTable": {
+ "projectId": "my-project",
+ "datasetId": "my-dataset",
+ "tableId": "products",
+ },
+ "columnReferences": [
+ {"referencingColumn": "product_id", "referencedColumn": "id"},
+ ],
+ }
+ ],
+ }
+ self.assertEqual(instance.to_api_repr(), expected)
+
+ def test_to_api_repr_only_primary_key(self):
+ from google.cloud.bigquery.table import PrimaryKey
+
+ primary_key = PrimaryKey(columns=["id", "product_id"])
+ instance = self._make_one(primary_key=primary_key, foreign_keys=None)
+ expected = {
+ "primaryKey": {
+ "columns": ["id", "product_id"],
+ },
+ }
+ self.assertEqual(instance.to_api_repr(), expected)
+
+ def test_to_api_repr_empty_primary_key(self):
+ from google.cloud.bigquery.table import PrimaryKey
+
+ primary_key = PrimaryKey(columns=[])
+ instance = self._make_one(primary_key=primary_key, foreign_keys=None)
+
+ expected = {
+ "primaryKey": {
+ "columns": [],
+ },
+ }
+ self.assertEqual(instance.to_api_repr(), expected)
+
+ def test_to_api_repr_only_foreign_keys(self):
+ from google.cloud.bigquery.table import ColumnReference, ForeignKey
+
+ foreign_keys = [
+ ForeignKey(
+ name="my_fk_name",
+ referenced_table=TableReference.from_string(
+ "my-project.my-dataset.products"
+ ),
+ column_references=[
+ ColumnReference(
+ referencing_column="product_id", referenced_column="id"
+ ),
+ ],
+ )
+ ]
+ instance = self._make_one(primary_key=None, foreign_keys=foreign_keys)
+ expected = {
+ "foreignKeys": [
+ {
+ "name": "my_fk_name",
+ "referencedTable": {
+ "projectId": "my-project",
+ "datasetId": "my-dataset",
+ "tableId": "products",
+ },
+ "columnReferences": [
+ {"referencingColumn": "product_id", "referencedColumn": "id"},
+ ],
+ }
+ ],
+ }
+ self.assertEqual(instance.to_api_repr(), expected)
+
+ def test_to_api_repr_empty_foreign_keys(self):
+ foreign_keys = []
+ instance = self._make_one(primary_key=None, foreign_keys=foreign_keys)
+
+ expected = {}
+ self.assertEqual(instance.to_api_repr(), expected)
+
+ def test_to_api_repr_empty_constraints(self):
+ instance = self._make_one(primary_key=None, foreign_keys=None)
+ expected = {}
+ self.assertEqual(instance.to_api_repr(), expected)
+
+
+@pytest.mark.parametrize(
+ "self_pk_name,self_fk_name,other_pk_name,other_fk_name,expected_equal",
+ [
+ (None, None, None, None, True),
+ ("pkey", None, "pkey", None, True),
+ ("pkey", "fkey", "pkey", "fkey", True),
+ (None, "fkey", None, "fkey", True),
+ ("pkey", None, "pkey_no_match", None, False),
+ ("pkey", "fkey", "pkey_no_match", "fkey_no_match", False),
+ (None, "fkey", None, "fkey_no_match", False),
+ ("pkey", "fkey", "pkey_no_match", "fkey", False),
+ ("pkey", "fkey", "pkey", "fkey_no_match", False),
+ ],
+)
+def test_table_constraint_eq_parametrized(
+ self_pk_name, self_fk_name, other_pk_name, other_fk_name, expected_equal
+):
+ # Imports are placed here to ensure they are self-contained for this example.
+ # In a real test file, they would likely be at the top of the file.
+ from google.cloud.bigquery.table import (
+ ColumnReference,
+ ForeignKey,
+ PrimaryKey,
+ TableReference,
+ TableConstraints,
+ )
+
+ # Helper function to create a PrimaryKey object or None
+ def _create_primary_key(name):
+ if name is None:
+ return None
+ return PrimaryKey(columns=[name])
+
+ # Helper function to create a list of ForeignKey objects or None
+ def _create_foreign_keys(name):
+ if name is None:
+ return None
+ # Using a generic referenced_table and column_references for simplicity
+ # The 'name' parameter ensures different ForeignKey objects for different names
+ return [
+ ForeignKey(
+ name=name,
+ referenced_table=TableReference.from_string(
+ f"my-project.my-dataset.{name}_referenced_table"
+ ),
+ column_references=[
+ ColumnReference(
+ referencing_column=f"{name}_ref_col",
+ referenced_column=f"{name}_pk_col",
+ )
+ ],
+ )
+ ]
+
+ # Create the two TableConstraints instances for comparison
+ tc1 = TableConstraints(
+ primary_key=_create_primary_key(self_pk_name),
+ foreign_keys=_create_foreign_keys(self_fk_name),
+ )
+ tc2 = TableConstraints(
+ primary_key=_create_primary_key(other_pk_name),
+ foreign_keys=_create_foreign_keys(other_fk_name),
+ )
+
+ # Assert the equality based on the expected outcome
+ assert (tc1 == tc2) == expected_equal
+
+
+class TestExternalCatalogTableOptions:
+ PROJECT = "test-project"
+ DATASET_ID = "test_dataset"
+ TABLE_ID = "coffee_table"
+ DATASET = DatasetReference(PROJECT, DATASET_ID)
+ TABLEREF = DATASET.table(TABLE_ID)
+
+ @staticmethod
+ def _get_target_class(self):
+ from google.cloud.bigquery.table import Table
+
+ return Table
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class(self)(*args, **kw)
+
+ EXTERNALCATALOGTABLEOPTIONS = {
+ "connection_id": "connection123",
+ "parameters": {"key": "value"},
+ "storage_descriptor": {
+ "input_format": "testpath.to.OrcInputFormat",
+ "location_uri": "gs://test/path/",
+ "output_format": "testpath.to.OrcOutputFormat",
+ "serde_info": {
+ "serialization_library": "testpath.to.LazySimpleSerDe",
+ "name": "serde_lib_name",
+ "parameters": {"key": "value"},
+ },
+ },
+ }
+
+ def test_external_catalog_table_options_default_initialization(self):
+ table = self._make_one(self.TABLEREF)
+
+ assert table.external_catalog_table_options is None
+
+ def test_external_catalog_table_options_valid_inputs(self):
+ table = self._make_one(self.TABLEREF)
+
+ # supplied in api_repr format
+ table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS
+ result = table.external_catalog_table_options.to_api_repr()
+ expected = self.EXTERNALCATALOGTABLEOPTIONS
+ assert result == expected
+
+ # supplied in obj format
+ ecto = external_config.ExternalCatalogTableOptions.from_api_repr(
+ self.EXTERNALCATALOGTABLEOPTIONS
+ )
+ assert isinstance(ecto, external_config.ExternalCatalogTableOptions)
+
+ table.external_catalog_table_options = ecto
+ result = table.external_catalog_table_options.to_api_repr()
+ expected = self.EXTERNALCATALOGTABLEOPTIONS
+ assert result == expected
+
+ def test_external_catalog_table_options_invalid_input(self):
+ table = self._make_one(self.TABLEREF)
+
+ # invalid on the whole
+ with pytest.raises(TypeError) as e:
+ table.external_catalog_table_options = 123
+
+ # Looking for the first word from the string "Pass as..."
+ assert "Pass " in str(e.value)
+
+ def test_external_catalog_table_options_to_api_repr(self):
+ table = self._make_one(self.TABLEREF)
+
+ table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS
+ result = table.external_catalog_table_options.to_api_repr()
+ expected = self.EXTERNALCATALOGTABLEOPTIONS
+ assert result == expected
+
+ def test_external_catalog_table_options_from_api_repr(self):
+ table = self._make_one(self.TABLEREF)
+
+ table.external_catalog_table_options = self.EXTERNALCATALOGTABLEOPTIONS
+ ecto = external_config.ExternalCatalogTableOptions.from_api_repr(
+ self.EXTERNALCATALOGTABLEOPTIONS
+ )
+ result = ecto.to_api_repr()
+ expected = self.EXTERNALCATALOGTABLEOPTIONS
+ assert result == expected
+
+
+class TestForeignTypeInfo:
+ PROJECT = "test-project"
+ DATASET_ID = "test_dataset"
+ TABLE_ID = "coffee_table"
+ DATASET = DatasetReference(PROJECT, DATASET_ID)
+ TABLEREF = DATASET.table(TABLE_ID)
+ FOREIGNTYPEINFO = {
+ "typeSystem": "TYPE_SYSTEM_UNSPECIFIED",
+ }
+ API_REPR = {
+ "tableReference": {
+ "projectId": PROJECT,
+ "datasetId": DATASET_ID,
+ "tableId": TABLE_ID,
+ },
+ "schema": {
+ "fields": [
+ {"name": "full_name", "type": "STRING", "mode": "REQUIRED"},
+ {"name": "age", "type": "INTEGER", "mode": "REQUIRED"},
+ ],
+ "foreign_info_type": FOREIGNTYPEINFO,
+ },
+ }
+
+ from google.cloud.bigquery.schema import ForeignTypeInfo
+
+ @staticmethod
+ def _get_target_class(self):
+ from google.cloud.bigquery.table import Table
+
+ return Table
+
+ def _make_one(self, *args, **kw):
+ return self._get_target_class(self)(*args, **kw)
+
+ def test_foreign_type_info_default_initialization(self):
+ table = self._make_one(self.TABLEREF)
+ assert table.foreign_type_info is None
+
+ @pytest.mark.parametrize(
+ "foreign_type_info, expected",
+ [
+ (
+ {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"},
+ "TYPE_SYSTEM_UNSPECIFIED",
+ ),
+ (None, None),
+ (
+ ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED"),
+ "TYPE_SYSTEM_UNSPECIFIED",
+ ),
+ ],
+ )
+ def test_foreign_type_info_valid_inputs(self, foreign_type_info, expected):
+ table = self._make_one(self.TABLEREF)
+
+ table.foreign_type_info = foreign_type_info
+
+ if foreign_type_info is None:
+ result = table.foreign_type_info
+ else:
+ result = table.foreign_type_info.type_system
+ assert result == expected
+
+ def test_foreign_type_info_invalid_inputs(self):
+ table = self._make_one(self.TABLEREF)
+
+ # invalid on the whole
+ with pytest.raises(TypeError, match="Pass .*"):
+ table.foreign_type_info = 123
+
+ def test_foreign_type_info_to_api_repr(self):
+ table = self._make_one(self.TABLEREF)
+
+ table.foreign_type_info = self.ForeignTypeInfo(
+ type_system="TYPE_SYSTEM_UNSPECIFIED",
+ )
+
+ result = table.to_api_repr()["schema"]["foreignTypeInfo"]
+ expected = self.FOREIGNTYPEINFO
+ assert result == expected
+
+ def test_foreign_type_info_from_api_repr(self):
+ table = self._make_one(self.TABLEREF)
+ table.foreign_type_info = self.FOREIGNTYPEINFO
+
+ fti = schema.ForeignTypeInfo.from_api_repr(self.FOREIGNTYPEINFO)
+
+ result = fti.to_api_repr()
+ expected = self.FOREIGNTYPEINFO
+ assert result == expected
+
+
@pytest.mark.parametrize(
"table_path",
(
@@ -4774,6 +6796,7 @@ def test_set_expiration_w_none(self):
),
)
def test_table_reference_to_bqstorage_v1_stable(table_path):
+ pytest.importorskip("google.cloud.bigquery_storage")
from google.cloud.bigquery import table as mut
expected = "projects/my-project/datasets/my_dataset/tables/my_table"
@@ -4781,3 +6804,73 @@ def test_table_reference_to_bqstorage_v1_stable(table_path):
for klass in (mut.TableReference, mut.Table, mut.TableListItem):
got = klass.from_string(table_path).to_bqstorage()
assert got == expected
+
+
+@pytest.mark.parametrize("preserve_order", [True, False])
+def test_to_arrow_iterable_w_bqstorage_max_stream_count(preserve_order):
+ pytest.importorskip("pandas")
+ pytest.importorskip("google.cloud.bigquery_storage")
+ from google.cloud.bigquery import schema
+ from google.cloud.bigquery import table as mut
+ from google.cloud import bigquery_storage
+
+ bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
+ session = bigquery_storage.types.ReadSession()
+ bqstorage_client.create_read_session.return_value = session
+
+ row_iterator = mut.RowIterator(
+ _mock_client(),
+ api_request=None,
+ path=None,
+ schema=[
+ schema.SchemaField("colA", "INTEGER"),
+ ],
+ table=mut.TableReference.from_string("proj.dset.tbl"),
+ )
+ row_iterator._preserve_order = preserve_order
+
+ max_stream_count = 132
+ result_iterable = row_iterator.to_arrow_iterable(
+ bqstorage_client=bqstorage_client, max_stream_count=max_stream_count
+ )
+ list(result_iterable)
+ bqstorage_client.create_read_session.assert_called_once_with(
+ parent=mock.ANY,
+ read_session=mock.ANY,
+ max_stream_count=max_stream_count if not preserve_order else 1,
+ )
+
+
+@pytest.mark.parametrize("preserve_order", [True, False])
+def test_to_dataframe_iterable_w_bqstorage_max_stream_count(preserve_order):
+ pytest.importorskip("pandas")
+ pytest.importorskip("google.cloud.bigquery_storage")
+ from google.cloud.bigquery import schema
+ from google.cloud.bigquery import table as mut
+ from google.cloud import bigquery_storage
+
+ bqstorage_client = mock.create_autospec(bigquery_storage.BigQueryReadClient)
+ session = bigquery_storage.types.ReadSession()
+ bqstorage_client.create_read_session.return_value = session
+
+ row_iterator = mut.RowIterator(
+ _mock_client(),
+ api_request=None,
+ path=None,
+ schema=[
+ schema.SchemaField("colA", "INTEGER"),
+ ],
+ table=mut.TableReference.from_string("proj.dset.tbl"),
+ )
+ row_iterator._preserve_order = preserve_order
+
+ max_stream_count = 132
+ result_iterable = row_iterator.to_dataframe_iterable(
+ bqstorage_client=bqstorage_client, max_stream_count=max_stream_count
+ )
+ list(result_iterable)
+ bqstorage_client.create_read_session.assert_called_once_with(
+ parent=mock.ANY,
+ read_session=mock.ANY,
+ max_stream_count=max_stream_count if not preserve_order else 1,
+ )
diff --git a/tests/unit/test_table_arrow.py b/tests/unit/test_table_arrow.py
new file mode 100644
index 000000000..fdd1b7b78
--- /dev/null
+++ b/tests/unit/test_table_arrow.py
@@ -0,0 +1,161 @@
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import pytest
+
+from google.cloud import bigquery
+import google.cloud.bigquery.table
+
+
+pytest.importorskip("numpy")
+pytest.importorskip("pyarrow", minversion="3.0.0")
+
+
+def test_to_arrow_with_jobs_query_response():
+ resource = {
+ "kind": "bigquery#queryResponse",
+ "schema": {
+ "fields": [
+ {"name": "name", "type": "STRING", "mode": "NULLABLE"},
+ {"name": "number", "type": "INTEGER", "mode": "NULLABLE"},
+ {"name": "json", "type": "JSON", "mode": "NULLABLE"},
+ ]
+ },
+ "jobReference": {
+ "projectId": "test-project",
+ "jobId": "job_ocd3cb-N62QIslU7R5qKKa2_427J",
+ "location": "US",
+ },
+ "totalRows": "9",
+ "rows": [
+ {"f": [{"v": "Tiarra"}, {"v": "6"}, {"v": "123"}]},
+ {"f": [{"v": "Timothy"}, {"v": "325"}, {"v": '{"key":"value"}'}]},
+ {"f": [{"v": "Tina"}, {"v": "26"}, {"v": "[1,2,3]"}]},
+ {
+ "f": [
+ {"v": "Tierra"},
+ {"v": "10"},
+ {"v": '{"aKey": {"bKey": {"cKey": -123}}}'},
+ ]
+ },
+ {"f": [{"v": "Tia"}, {"v": "17"}, {"v": None}]},
+ {"f": [{"v": "Tiara"}, {"v": "22"}, {"v": '"some-json-string"'}]},
+ {"f": [{"v": "Tiana"}, {"v": "6"}, {"v": '{"nullKey":null}'}]},
+ {"f": [{"v": "Tiffany"}, {"v": "229"}, {"v": '""'}]},
+ {"f": [{"v": "Tiffani"}, {"v": "8"}, {"v": "[]"}]},
+ ],
+ "totalBytesProcessed": "154775150",
+ "jobComplete": True,
+ "cacheHit": False,
+ "queryId": "job_ocd3cb-N62QIslU7R5qKKa2_427J",
+ }
+
+ rows = google.cloud.bigquery.table.RowIterator(
+ client=None,
+ api_request=None,
+ path=None,
+ schema=[
+ bigquery.SchemaField.from_api_repr(field)
+ for field in resource["schema"]["fields"]
+ ],
+ first_page_response=resource,
+ )
+ records = rows.to_arrow()
+
+ assert records.column_names == ["name", "number", "json"]
+ assert records["name"].to_pylist() == [
+ "Tiarra",
+ "Timothy",
+ "Tina",
+ "Tierra",
+ "Tia",
+ "Tiara",
+ "Tiana",
+ "Tiffany",
+ "Tiffani",
+ ]
+ assert records["number"].to_pylist() == [6, 325, 26, 10, 17, 22, 6, 229, 8]
+ assert records["json"].to_pylist() == [
+ "123",
+ '{"key":"value"}',
+ "[1,2,3]",
+ '{"aKey": {"bKey": {"cKey": -123}}}',
+ None,
+ '"some-json-string"',
+ '{"nullKey":null}',
+ '""',
+ "[]",
+ ]
+
+
+def test_to_arrow_with_jobs_query_response_and_max_results():
+ resource = {
+ "kind": "bigquery#queryResponse",
+ "schema": {
+ "fields": [
+ {"name": "name", "type": "STRING", "mode": "NULLABLE"},
+ {"name": "number", "type": "INTEGER", "mode": "NULLABLE"},
+ {"name": "json", "type": "JSON", "mode": "NULLABLE"},
+ ]
+ },
+ "jobReference": {
+ "projectId": "test-project",
+ "jobId": "job_ocd3cb-N62QIslU7R5qKKa2_427J",
+ "location": "US",
+ },
+ "totalRows": "9",
+ "rows": [
+ {"f": [{"v": "Tiarra"}, {"v": "6"}, {"v": "123"}]},
+ {"f": [{"v": "Timothy"}, {"v": "325"}, {"v": '{"key":"value"}'}]},
+ {"f": [{"v": "Tina"}, {"v": "26"}, {"v": "[1,2,3]"}]},
+ {
+ "f": [
+ {"v": "Tierra"},
+ {"v": "10"},
+ {"v": '{"aKey": {"bKey": {"cKey": -123}}}'},
+ ]
+ },
+ {"f": [{"v": "Tia"}, {"v": "17"}, {"v": None}]},
+ {"f": [{"v": "Tiara"}, {"v": "22"}, {"v": '"some-json-string"'}]},
+ {"f": [{"v": "Tiana"}, {"v": "6"}, {"v": '{"nullKey":null}'}]},
+ {"f": [{"v": "Tiffany"}, {"v": "229"}, {"v": '""'}]},
+ {"f": [{"v": "Tiffani"}, {"v": "8"}, {"v": "[]"}]},
+ ],
+ "totalBytesProcessed": "154775150",
+ "jobComplete": True,
+ "cacheHit": False,
+ "queryId": "job_ocd3cb-N62QIslU7R5qKKa2_427J",
+ }
+
+ rows = google.cloud.bigquery.table.RowIterator(
+ client=None,
+ api_request=None,
+ path=None,
+ schema=[
+ bigquery.SchemaField.from_api_repr(field)
+ for field in resource["schema"]["fields"]
+ ],
+ first_page_response=resource,
+ max_results=3,
+ )
+ records = rows.to_arrow()
+
+ assert records.column_names == ["name", "number", "json"]
+ assert records["name"].to_pylist() == [
+ "Tiarra",
+ "Timothy",
+ "Tina",
+ ]
+ assert records["number"].to_pylist() == [6, 325, 26]
+ assert records["json"].to_pylist() == ["123", '{"key":"value"}', "[1,2,3]"]
diff --git a/tests/unit/test_table_pandas.py b/tests/unit/test_table_pandas.py
index 943baa326..a4fa3fa39 100644
--- a/tests/unit/test_table_pandas.py
+++ b/tests/unit/test_table_pandas.py
@@ -12,16 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from unittest import mock
import datetime
import decimal
-from unittest import mock
-import pyarrow
import pytest
from google.cloud import bigquery
pandas = pytest.importorskip("pandas")
+pyarrow = pytest.importorskip("pyarrow", minversion="3.0.0")
TEST_PATH = "/v1/project/test-proj/dataset/test-dset/table/test-tbl/data"
@@ -34,8 +34,10 @@ def class_under_test():
return RowIterator
-def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test):
- # See tests/system/test_arrow.py for the actual types we get from the API.
+def test_to_dataframe_nullable_scalars(
+ monkeypatch, class_under_test
+): # pragma: NO COVER
+ """See tests/system/test_arrow.py for the actual types we get from the API."""
arrow_schema = pyarrow.schema(
[
pyarrow.field("bignumeric_col", pyarrow.decimal256(76, scale=38)),
@@ -51,6 +53,7 @@ def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test):
pyarrow.field(
"timestamp_col", pyarrow.timestamp("us", tz=datetime.timezone.utc)
),
+ pyarrow.field("json_col", pyarrow.string()),
]
)
arrow_table = pyarrow.Table.from_pydict(
@@ -70,6 +73,7 @@ def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test):
2021, 8, 9, 13, 30, 44, 123456, tzinfo=datetime.timezone.utc
)
],
+ "json_col": ["{}"],
},
schema=arrow_schema,
)
@@ -86,6 +90,7 @@ def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test):
bigquery.SchemaField("string_col", "STRING"),
bigquery.SchemaField("time_col", "TIME"),
bigquery.SchemaField("timestamp_col", "TIMESTAMP"),
+ bigquery.SchemaField("json_col", "JSON"),
]
mock_client = mock.create_autospec(bigquery.Client)
mock_client.project = "test-proj"
@@ -102,13 +107,18 @@ def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test):
assert df.dtypes["bool_col"].name == "boolean"
assert df.dtypes["bytes_col"].name == "object"
assert df.dtypes["date_col"].name == "dbdate"
- assert df.dtypes["datetime_col"].name == "datetime64[ns]"
assert df.dtypes["float64_col"].name == "float64"
assert df.dtypes["int64_col"].name == "Int64"
assert df.dtypes["numeric_col"].name == "object"
assert df.dtypes["string_col"].name == "object"
assert df.dtypes["time_col"].name == "dbtime"
- assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]"
+ assert df.dtypes["json_col"].name == "object"
+ if pandas.__version__.startswith("2."):
+ assert df.dtypes["datetime_col"].name == "datetime64[us]"
+ assert df.dtypes["timestamp_col"].name == "datetime64[us, UTC]"
+ else:
+ assert df.dtypes["datetime_col"].name == "datetime64[ns]"
+ assert df.dtypes["timestamp_col"].name == "datetime64[ns, UTC]"
# Check for expected values.
assert df["bignumeric_col"][0] == decimal.Decimal("123.456789101112131415")
@@ -125,12 +135,10 @@ def test_to_dataframe_nullable_scalars(monkeypatch, class_under_test):
assert df["int64_col"][0] == -7
assert df["numeric_col"][0] == decimal.Decimal("-123.456789")
assert df["string_col"][0] == "abcdefg"
-
# Pandas timedelta64 might be a better choice for pandas time columns. Then
# they can more easily be combined with date columns to form datetimes.
# https://github.com/googleapis/python-bigquery/issues/862
assert df["time_col"][0] == datetime.time(14, 21, 17, 123456)
-
assert df["timestamp_col"][0] == pandas.to_datetime("2021-08-09 13:30:44.123456Z")
@@ -192,3 +200,165 @@ def test_to_dataframe_arrays(monkeypatch, class_under_test):
assert df.dtypes["int64_repeated"].name == "object"
assert tuple(df["int64_repeated"][0]) == (-1, 0, 2)
+
+
+def test_to_dataframe_with_jobs_query_response(class_under_test):
+ resource = {
+ "kind": "bigquery#queryResponse",
+ "schema": {
+ "fields": [
+ {"name": "name", "type": "STRING", "mode": "NULLABLE"},
+ {"name": "number", "type": "INTEGER", "mode": "NULLABLE"},
+ ]
+ },
+ "jobReference": {
+ "projectId": "test-project",
+ "jobId": "job_ocd3cb-N62QIslU7R5qKKa2_427J",
+ "location": "US",
+ },
+ "totalRows": "9",
+ "rows": [
+ {"f": [{"v": "Tiarra"}, {"v": "6"}]},
+ {"f": [{"v": "Timothy"}, {"v": "325"}]},
+ {"f": [{"v": "Tina"}, {"v": "26"}]},
+ {"f": [{"v": "Tierra"}, {"v": "10"}]},
+ {"f": [{"v": "Tia"}, {"v": "17"}]},
+ {"f": [{"v": "Tiara"}, {"v": "22"}]},
+ {"f": [{"v": "Tiana"}, {"v": "6"}]},
+ {"f": [{"v": "Tiffany"}, {"v": "229"}]},
+ {"f": [{"v": "Tiffani"}, {"v": "8"}]},
+ ],
+ "totalBytesProcessed": "154775150",
+ "jobComplete": True,
+ "cacheHit": False,
+ "queryId": "job_ocd3cb-N62QIslU7R5qKKa2_427J",
+ }
+
+ rows = class_under_test(
+ client=None,
+ api_request=None,
+ path=None,
+ schema=[
+ bigquery.SchemaField.from_api_repr(field)
+ for field in resource["schema"]["fields"]
+ ],
+ first_page_response=resource,
+ )
+ df = rows.to_dataframe()
+
+ assert list(df.columns) == ["name", "number"]
+ assert list(df["name"]) == [
+ "Tiarra",
+ "Timothy",
+ "Tina",
+ "Tierra",
+ "Tia",
+ "Tiara",
+ "Tiana",
+ "Tiffany",
+ "Tiffani",
+ ]
+ assert list(df["number"]) == [6, 325, 26, 10, 17, 22, 6, 229, 8]
+
+
+@mock.patch("google.cloud.bigquery.table.geopandas")
+def test_rowiterator_to_geodataframe_with_default_dtypes(
+ mock_geopandas, monkeypatch, class_under_test
+):
+ mock_geopandas.GeoDataFrame = mock.Mock(spec=True)
+ mock_client = mock.create_autospec(bigquery.Client)
+ mock_client.project = "test-proj"
+ mock_api_request = mock.Mock()
+ schema = [
+ bigquery.SchemaField("geo_col", "GEOGRAPHY"),
+ bigquery.SchemaField("bool_col", "BOOLEAN"),
+ bigquery.SchemaField("int_col", "INTEGER"),
+ bigquery.SchemaField("float_col", "FLOAT"),
+ bigquery.SchemaField("string_col", "STRING"),
+ ]
+ rows = class_under_test(mock_client, mock_api_request, TEST_PATH, schema)
+
+ mock_df = pandas.DataFrame(
+ {
+ "geo_col": ["POINT (1 2)"],
+ "bool_col": [True],
+ "int_col": [123],
+ "float_col": [1.23],
+ "string_col": ["abc"],
+ }
+ )
+ rows.to_dataframe = mock.Mock(return_value=mock_df)
+
+ rows.to_geodataframe(geography_column="geo_col")
+
+ rows.to_dataframe.assert_called_once_with(
+ None, # bqstorage_client
+ None, # dtypes
+ None, # progress_bar_type
+ True, # create_bqstorage_client
+ geography_as_object=True,
+ bool_dtype=bigquery.enums.DefaultPandasDTypes.BOOL_DTYPE,
+ int_dtype=bigquery.enums.DefaultPandasDTypes.INT_DTYPE,
+ float_dtype=None,
+ string_dtype=None,
+ )
+ mock_geopandas.GeoDataFrame.assert_called_once_with(
+ mock_df, crs="EPSG:4326", geometry="geo_col"
+ )
+
+
+@mock.patch("google.cloud.bigquery.table.geopandas")
+def test_rowiterator_to_geodataframe_with_custom_dtypes(
+ mock_geopandas, monkeypatch, class_under_test
+):
+ mock_geopandas.GeoDataFrame = mock.Mock(spec=True)
+ mock_client = mock.create_autospec(bigquery.Client)
+ mock_client.project = "test-proj"
+ mock_api_request = mock.Mock()
+ schema = [
+ bigquery.SchemaField("geo_col", "GEOGRAPHY"),
+ bigquery.SchemaField("bool_col", "BOOLEAN"),
+ bigquery.SchemaField("int_col", "INTEGER"),
+ bigquery.SchemaField("float_col", "FLOAT"),
+ bigquery.SchemaField("string_col", "STRING"),
+ ]
+ rows = class_under_test(mock_client, mock_api_request, TEST_PATH, schema)
+
+ mock_df = pandas.DataFrame(
+ {
+ "geo_col": ["POINT (3 4)"],
+ "bool_col": [False],
+ "int_col": [456],
+ "float_col": [4.56],
+ "string_col": ["def"],
+ }
+ )
+ rows.to_dataframe = mock.Mock(return_value=mock_df)
+
+ custom_bool_dtype = "bool"
+ custom_int_dtype = "int32"
+ custom_float_dtype = "float32"
+ custom_string_dtype = "string"
+
+ rows.to_geodataframe(
+ geography_column="geo_col",
+ bool_dtype=custom_bool_dtype,
+ int_dtype=custom_int_dtype,
+ float_dtype=custom_float_dtype,
+ string_dtype=custom_string_dtype,
+ )
+
+ rows.to_dataframe.assert_called_once_with(
+ None, # bqstorage_client
+ None, # dtypes
+ None, # progress_bar_type
+ True, # create_bqstorage_client
+ geography_as_object=True,
+ bool_dtype=custom_bool_dtype,
+ int_dtype=custom_int_dtype,
+ float_dtype=custom_float_dtype,
+ string_dtype=custom_string_dtype,
+ )
+ mock_geopandas.GeoDataFrame.assert_called_once_with(
+ mock_df, crs="EPSG:4326", geometry="geo_col"
+ )