diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 453e7f5dff..d0cce9492b 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -37,14 +37,33 @@ def __init__( location: Optional[str] = None, bq_connection: Optional[str] = None, use_regional_endpoints: bool = False, + application_name: Optional[str] = None, ): self._credentials = credentials self._project = project self._location = location self._bq_connection = bq_connection self._use_regional_endpoints = use_regional_endpoints + self._application_name = application_name self._session_started = False + @property + def application_name(self) -> Optional[str]: + """The application name to amend to the user-agent sent to Google APIs. + + Recommended format is ``"appplication-name/major.minor.patch_version"`` + or ``"(gpn:PartnerName;)"`` for official Google partners. + """ + return self._application_name + + @application_name.setter + def application_name(self, value: Optional[str]): + if self._session_started and self._application_name != value: + raise ValueError( + SESSION_STARTED_MESSAGE.format(attribute="application_name") + ) + self._application_name = value + @property def credentials(self) -> Optional[google.auth.credentials.Credentials]: """The OAuth2 Credentials to use for this client.""" diff --git a/bigframes/pandas/__init__.py b/bigframes/pandas/__init__.py index 833f0d1d1d..1b9144fb62 100644 --- a/bigframes/pandas/__init__.py +++ b/bigframes/pandas/__init__.py @@ -51,6 +51,7 @@ import bigframes.dataframe import bigframes.series import bigframes.session +import bigframes.session.clients import third_party.bigframes_vendored.pandas.core.reshape.concat as vendored_pandas_concat import third_party.bigframes_vendored.pandas.core.reshape.merge as vendored_pandas_merge import third_party.bigframes_vendored.pandas.core.reshape.tile as vendored_pandas_tile @@ -180,11 +181,12 @@ def _set_default_session_location_if_possible(query): ): return - clients_provider = bigframes.session.ClientsProvider( + clients_provider = bigframes.session.clients.ClientsProvider( project=options.bigquery.project, location=options.bigquery.location, use_regional_endpoints=options.bigquery.use_regional_endpoints, credentials=options.bigquery.credentials, + application_name=options.bigquery.application_name, ) bqclient = clients_provider.bqclient diff --git a/bigframes/session.py b/bigframes/session/__init__.py similarity index 90% rename from bigframes/session.py rename to bigframes/session/__init__.py index fa5b415350..1031fde9b5 100644 --- a/bigframes/session.py +++ b/bigframes/session/__init__.py @@ -61,7 +61,6 @@ ReadPickleBuffer, StorageOptions, ) -import pydata_google_auth import bigframes._config.bigquery_options as bigquery_options import bigframes.constants as constants @@ -75,6 +74,7 @@ import bigframes.formatting_helpers as formatting_helpers from bigframes.remote_function import read_gbq_function as bigframes_rgf from bigframes.remote_function import remote_function as bigframes_rf +import bigframes.session.clients import bigframes.version # Even though the ibis.backends.bigquery.registry import is unused, it's needed @@ -85,18 +85,6 @@ import third_party.bigframes_vendored.pandas.io.parsers.readers as third_party_pandas_readers import third_party.bigframes_vendored.pandas.io.pickle as third_party_pandas_pickle -_ENV_DEFAULT_PROJECT = "GOOGLE_CLOUD_PROJECT" -_APPLICATION_NAME = f"bigframes/{bigframes.version.__version__}" -_SCOPES = ["https://www.googleapis.com/auth/cloud-platform"] - -# BigQuery is a REST API, which requires the protocol as part of the URL. -_BIGQUERY_REGIONAL_ENDPOINT = "https://{location}-bigquery.googleapis.com" - -# BigQuery Connection and Storage are gRPC APIs, which don't support the -# https:// protocol in the API endpoint URL. -_BIGQUERYCONNECTION_REGIONAL_ENDPOINT = "{location}-bigqueryconnection.googleapis.com" -_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = "{location}-bigquerystorage.googleapis.com" - _BIGFRAMES_DEFAULT_CONNECTION_ID = "bigframes-default-connection" _MAX_CLUSTER_COLUMNS = 4 @@ -122,149 +110,6 @@ def _is_query(query_or_table: str) -> bool: return re.search(r"\s", query_or_table.strip(), re.MULTILINE) is not None -def _get_default_credentials_with_project(): - return pydata_google_auth.default(scopes=_SCOPES, use_local_webserver=False) - - -class ClientsProvider: - """Provides client instances necessary to perform cloud operations.""" - - def __init__( - self, - project: Optional[str], - location: Optional[str], - use_regional_endpoints: Optional[bool], - credentials: Optional[google.auth.credentials.Credentials], - ): - credentials_project = None - if credentials is None: - credentials, credentials_project = _get_default_credentials_with_project() - - # Prefer the project in this order: - # 1. Project explicitly specified by the user - # 2. Project set in the environment - # 3. Project associated with the default credentials - project = ( - project - or os.getenv(_ENV_DEFAULT_PROJECT) - or typing.cast(Optional[str], credentials_project) - ) - - if not project: - raise ValueError( - "Project must be set to initialize BigQuery client. " - "Try setting `bigframes.options.bigquery.project` first." - ) - - self._project = project - self._location = location - self._use_regional_endpoints = use_regional_endpoints - self._credentials = credentials - - # cloud clients initialized for lazy load - self._bqclient = None - self._bqconnectionclient = None - self._bqstorageclient = None - self._cloudfunctionsclient = None - self._resourcemanagerclient = None - - @property - def bqclient(self): - if not self._bqclient: - bq_options = None - if self._use_regional_endpoints: - bq_options = google.api_core.client_options.ClientOptions( - api_endpoint=_BIGQUERY_REGIONAL_ENDPOINT.format( - location=self._location - ), - ) - bq_info = google.api_core.client_info.ClientInfo( - user_agent=_APPLICATION_NAME - ) - self._bqclient = bigquery.Client( - client_info=bq_info, - client_options=bq_options, - credentials=self._credentials, - project=self._project, - location=self._location, - ) - - return self._bqclient - - @property - def bqconnectionclient(self): - if not self._bqconnectionclient: - bqconnection_options = None - if self._use_regional_endpoints: - bqconnection_options = google.api_core.client_options.ClientOptions( - api_endpoint=_BIGQUERYCONNECTION_REGIONAL_ENDPOINT.format( - location=self._location - ) - ) - bqconnection_info = google.api_core.gapic_v1.client_info.ClientInfo( - user_agent=_APPLICATION_NAME - ) - self._bqconnectionclient = ( - google.cloud.bigquery_connection_v1.ConnectionServiceClient( - client_info=bqconnection_info, - client_options=bqconnection_options, - credentials=self._credentials, - ) - ) - - return self._bqconnectionclient - - @property - def bqstorageclient(self): - if not self._bqstorageclient: - bqstorage_options = None - if self._use_regional_endpoints: - bqstorage_options = google.api_core.client_options.ClientOptions( - api_endpoint=_BIGQUERYSTORAGE_REGIONAL_ENDPOINT.format( - location=self._location - ) - ) - bqstorage_info = google.api_core.gapic_v1.client_info.ClientInfo( - user_agent=_APPLICATION_NAME - ) - self._bqstorageclient = google.cloud.bigquery_storage_v1.BigQueryReadClient( - client_info=bqstorage_info, - client_options=bqstorage_options, - credentials=self._credentials, - ) - - return self._bqstorageclient - - @property - def cloudfunctionsclient(self): - if not self._cloudfunctionsclient: - functions_info = google.api_core.gapic_v1.client_info.ClientInfo( - user_agent=_APPLICATION_NAME - ) - self._cloudfunctionsclient = ( - google.cloud.functions_v2.FunctionServiceClient( - client_info=functions_info, - credentials=self._credentials, - ) - ) - - return self._cloudfunctionsclient - - @property - def resourcemanagerclient(self): - if not self._resourcemanagerclient: - resourcemanager_info = google.api_core.gapic_v1.client_info.ClientInfo( - user_agent=_APPLICATION_NAME - ) - self._resourcemanagerclient = ( - google.cloud.resourcemanager_v3.ProjectsClient( - credentials=self._credentials, client_info=resourcemanager_info - ) - ) - - return self._resourcemanagerclient - - class Session( third_party_pandas_gbq.GBQIOMixin, third_party_pandas_parquet.ParquetIOMixin, @@ -279,14 +124,14 @@ class Session( Configuration adjusting how to connect to BigQuery and related APIs. Note that some options are ignored if ``clients_provider`` is set. - clients_provider (bigframes.session.ClientsProvider): + clients_provider (bigframes.session.bigframes.session.clients.ClientsProvider): An object providing client library objects. """ def __init__( self, context: Optional[bigquery_options.BigQueryOptions] = None, - clients_provider: Optional[ClientsProvider] = None, + clients_provider: Optional[bigframes.session.clients.ClientsProvider] = None, ): if context is None: context = bigquery_options.BigQueryOptions() @@ -306,11 +151,12 @@ def __init__( if clients_provider: self._clients_provider = clients_provider else: - self._clients_provider = ClientsProvider( + self._clients_provider = bigframes.session.clients.ClientsProvider( project=context.project, location=self._location, use_regional_endpoints=context.use_regional_endpoints, credentials=context.credentials, + application_name=context.application_name, ) self._create_and_bind_bq_session() @@ -319,7 +165,7 @@ def __init__( ibis.bigquery.connect( project_id=context.project, client=self.bqclient, - storage_client=self.bqstorageclient, + storage_client=self.bqstoragereadclient, ), ) @@ -338,8 +184,8 @@ def bqconnectionclient(self): return self._clients_provider.bqconnectionclient @property - def bqstorageclient(self): - return self._clients_provider.bqstorageclient + def bqstoragereadclient(self): + return self._clients_provider.bqstoragereadclient @property def cloudfunctionsclient(self): diff --git a/bigframes/session/clients.py b/bigframes/session/clients.py new file mode 100644 index 0000000000..544f74265f --- /dev/null +++ b/bigframes/session/clients.py @@ -0,0 +1,196 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Clients manages the connection to Google APIs.""" + +import os +import typing +from typing import Optional + +import google.api_core.client_info +import google.api_core.client_options +import google.api_core.exceptions +import google.api_core.gapic_v1.client_info +import google.auth.credentials +import google.cloud.bigquery as bigquery +import google.cloud.bigquery_connection_v1 +import google.cloud.bigquery_storage_v1 +import google.cloud.functions_v2 +import google.cloud.resourcemanager_v3 +import pydata_google_auth + +import bigframes.version + +_ENV_DEFAULT_PROJECT = "GOOGLE_CLOUD_PROJECT" +_APPLICATION_NAME = f"bigframes/{bigframes.version.__version__}" +_SCOPES = ["https://www.googleapis.com/auth/cloud-platform"] + +# BigQuery is a REST API, which requires the protocol as part of the URL. +_BIGQUERY_REGIONAL_ENDPOINT = "https://{location}-bigquery.googleapis.com" + +# BigQuery Connection and Storage are gRPC APIs, which don't support the +# https:// protocol in the API endpoint URL. +_BIGQUERYCONNECTION_REGIONAL_ENDPOINT = "{location}-bigqueryconnection.googleapis.com" +_BIGQUERYSTORAGE_REGIONAL_ENDPOINT = "{location}-bigquerystorage.googleapis.com" + + +def _get_default_credentials_with_project(): + return pydata_google_auth.default(scopes=_SCOPES, use_local_webserver=False) + + +class ClientsProvider: + """Provides client instances necessary to perform cloud operations.""" + + def __init__( + self, + project: Optional[str], + location: Optional[str], + use_regional_endpoints: Optional[bool], + credentials: Optional[google.auth.credentials.Credentials], + application_name: Optional[str], + ): + credentials_project = None + if credentials is None: + credentials, credentials_project = _get_default_credentials_with_project() + + # Prefer the project in this order: + # 1. Project explicitly specified by the user + # 2. Project set in the environment + # 3. Project associated with the default credentials + project = ( + project + or os.getenv(_ENV_DEFAULT_PROJECT) + or typing.cast(Optional[str], credentials_project) + ) + + if not project: + raise ValueError( + "Project must be set to initialize BigQuery client. " + "Try setting `bigframes.options.bigquery.project` first." + ) + + self._application_name = ( + f"{_APPLICATION_NAME} {application_name}" + if application_name + else _APPLICATION_NAME + ) + self._project = project + self._location = location + self._use_regional_endpoints = use_regional_endpoints + self._credentials = credentials + + # cloud clients initialized for lazy load + self._bqclient = None + self._bqconnectionclient = None + self._bqstoragereadclient = None + self._cloudfunctionsclient = None + self._resourcemanagerclient = None + + @property + def bqclient(self): + if not self._bqclient: + bq_options = None + if self._use_regional_endpoints: + bq_options = google.api_core.client_options.ClientOptions( + api_endpoint=_BIGQUERY_REGIONAL_ENDPOINT.format( + location=self._location + ), + ) + bq_info = google.api_core.client_info.ClientInfo( + user_agent=self._application_name + ) + self._bqclient = bigquery.Client( + client_info=bq_info, + client_options=bq_options, + credentials=self._credentials, + project=self._project, + location=self._location, + ) + + return self._bqclient + + @property + def bqconnectionclient(self): + if not self._bqconnectionclient: + bqconnection_options = None + if self._use_regional_endpoints: + bqconnection_options = google.api_core.client_options.ClientOptions( + api_endpoint=_BIGQUERYCONNECTION_REGIONAL_ENDPOINT.format( + location=self._location + ) + ) + bqconnection_info = google.api_core.gapic_v1.client_info.ClientInfo( + user_agent=self._application_name + ) + self._bqconnectionclient = ( + google.cloud.bigquery_connection_v1.ConnectionServiceClient( + client_info=bqconnection_info, + client_options=bqconnection_options, + credentials=self._credentials, + ) + ) + + return self._bqconnectionclient + + @property + def bqstoragereadclient(self): + if not self._bqstoragereadclient: + bqstorage_options = None + if self._use_regional_endpoints: + bqstorage_options = google.api_core.client_options.ClientOptions( + api_endpoint=_BIGQUERYSTORAGE_REGIONAL_ENDPOINT.format( + location=self._location + ) + ) + bqstorage_info = google.api_core.gapic_v1.client_info.ClientInfo( + user_agent=self._application_name + ) + self._bqstoragereadclient = ( + google.cloud.bigquery_storage_v1.BigQueryReadClient( + client_info=bqstorage_info, + client_options=bqstorage_options, + credentials=self._credentials, + ) + ) + + return self._bqstoragereadclient + + @property + def cloudfunctionsclient(self): + if not self._cloudfunctionsclient: + functions_info = google.api_core.gapic_v1.client_info.ClientInfo( + user_agent=self._application_name + ) + self._cloudfunctionsclient = ( + google.cloud.functions_v2.FunctionServiceClient( + client_info=functions_info, + credentials=self._credentials, + ) + ) + + return self._cloudfunctionsclient + + @property + def resourcemanagerclient(self): + if not self._resourcemanagerclient: + resourcemanager_info = google.api_core.gapic_v1.client_info.ClientInfo( + user_agent=self._application_name + ) + self._resourcemanagerclient = ( + google.cloud.resourcemanager_v3.ProjectsClient( + credentials=self._credentials, client_info=resourcemanager_info + ) + ) + + return self._resourcemanagerclient diff --git a/tests/unit/_config/test_bigquery_options.py b/tests/unit/_config/test_bigquery_options.py index aeee058319..e5b6cfe2f1 100644 --- a/tests/unit/_config/test_bigquery_options.py +++ b/tests/unit/_config/test_bigquery_options.py @@ -22,11 +22,13 @@ @pytest.mark.parametrize( ["attribute", "original_value", "new_value"], [ + ("application_name", None, "test-partner"), # For credentials, the match is by reference. ("credentials", object(), object()), ("location", "us-east1", "us-central1"), ("project", "my-project", "my-other-project"), ("bq_connection", "path/to/connection/1", "path/to/connection/2"), + ("use_regional_endpoints", False, True), ], ) def test_setter_raises_if_session_started(attribute, original_value, new_value): @@ -53,10 +55,12 @@ def test_setter_raises_if_session_started(attribute, original_value, new_value): [ (attribute,) for attribute in [ + "application_name", "credentials", "location", "project", "bq_connection", + "use_regional_endpoints", ] ], ) diff --git a/tests/unit/resources.py b/tests/unit/resources.py index c8ed6e86ed..0a68600a35 100644 --- a/tests/unit/resources.py +++ b/tests/unit/resources.py @@ -22,6 +22,7 @@ import bigframes import bigframes.core as core +import bigframes.session.clients """Utilities for creating test resources.""" @@ -37,7 +38,7 @@ def create_bigquery_session( bqclient = mock.create_autospec(google.cloud.bigquery.Client, instance=True) bqclient.project = "test-project" - clients_provider = mock.create_autospec(bigframes.session.ClientsProvider) + clients_provider = mock.create_autospec(bigframes.session.clients.ClientsProvider) type(clients_provider).bqclient = mock.PropertyMock(return_value=bqclient) clients_provider._credentials = credentials diff --git a/tests/unit/session/__init__.py b/tests/unit/session/__init__.py new file mode 100644 index 0000000000..1dc90d1848 --- /dev/null +++ b/tests/unit/session/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/unit/session/test_clients.py b/tests/unit/session/test_clients.py new file mode 100644 index 0000000000..f1b2a5045a --- /dev/null +++ b/tests/unit/session/test_clients.py @@ -0,0 +1,114 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional +import unittest.mock as mock + +import google.api_core.client_info +import google.api_core.client_options +import google.api_core.exceptions +import google.api_core.gapic_v1.client_info +import google.auth.credentials +import google.cloud.bigquery +import google.cloud.bigquery_connection_v1 +import google.cloud.bigquery_storage_v1 +import google.cloud.functions_v2 +import google.cloud.resourcemanager_v3 + +import bigframes.session.clients as clients +import bigframes.version + + +def create_clients_provider(application_name: Optional[str] = None): + credentials = mock.create_autospec(google.auth.credentials.Credentials) + return clients.ClientsProvider( + project="test-project", + location="test-region", + use_regional_endpoints=False, + credentials=credentials, + application_name=application_name, + ) + + +def monkeypatch_client_constructors(monkeypatch): + bqclient = mock.create_autospec(google.cloud.bigquery.Client) + bqclient.return_value = bqclient + monkeypatch.setattr(google.cloud.bigquery, "Client", bqclient) + + bqconnectionclient = mock.create_autospec( + google.cloud.bigquery_connection_v1.ConnectionServiceClient + ) + bqconnectionclient.return_value = bqconnectionclient + monkeypatch.setattr( + google.cloud.bigquery_connection_v1, + "ConnectionServiceClient", + bqconnectionclient, + ) + + bqstoragereadclient = mock.create_autospec( + google.cloud.bigquery_storage_v1.BigQueryReadClient + ) + bqstoragereadclient.return_value = bqstoragereadclient + monkeypatch.setattr( + google.cloud.bigquery_storage_v1, "BigQueryReadClient", bqstoragereadclient + ) + + cloudfunctionsclient = mock.create_autospec( + google.cloud.functions_v2.FunctionServiceClient + ) + cloudfunctionsclient.return_value = cloudfunctionsclient + monkeypatch.setattr( + google.cloud.functions_v2, "FunctionServiceClient", cloudfunctionsclient + ) + + resourcemanagerclient = mock.create_autospec( + google.cloud.resourcemanager_v3.ProjectsClient + ) + resourcemanagerclient.return_value = resourcemanagerclient + monkeypatch.setattr( + google.cloud.resourcemanager_v3, "ProjectsClient", resourcemanagerclient + ) + + +def assert_constructed_w_user_agent(mock_client: mock.Mock, expected_user_agent: str): + assert ( + expected_user_agent + in mock_client.call_args.kwargs["client_info"].to_user_agent() + ) + + +def assert_clients_w_user_agent( + provider: clients.ClientsProvider, expected_user_agent: str +): + assert_constructed_w_user_agent(provider.bqclient, expected_user_agent) + assert_constructed_w_user_agent(provider.bqconnectionclient, expected_user_agent) + assert_constructed_w_user_agent(provider.bqstoragereadclient, expected_user_agent) + assert_constructed_w_user_agent(provider.cloudfunctionsclient, expected_user_agent) + assert_constructed_w_user_agent(provider.resourcemanagerclient, expected_user_agent) + + +def test_user_agent_default(monkeypatch): + monkeypatch_client_constructors(monkeypatch) + provider = create_clients_provider(application_name=None) + assert_clients_w_user_agent(provider, f"bigframes/{bigframes.version.__version__}") + + +def test_user_agent_custom(monkeypatch): + monkeypatch_client_constructors(monkeypatch) + provider = create_clients_provider(application_name="(gpn:testpartner;)") + assert_clients_w_user_agent(provider, "(gpn:testpartner;)") + + # We still need to include attribution to bigframes, even if there's also a + # partner using the package. + assert_clients_w_user_agent(provider, f"bigframes/{bigframes.version.__version__}") diff --git a/tests/unit/test_session.py b/tests/unit/session/test_session.py similarity index 98% rename from tests/unit/test_session.py rename to tests/unit/session/test_session.py index e39a316e5b..18fd42e0f3 100644 --- a/tests/unit/test_session.py +++ b/tests/unit/session/test_session.py @@ -20,7 +20,7 @@ import bigframes -from . import resources +from .. import resources @pytest.mark.parametrize("missing_parts_table_id", [(""), ("table")])