From 2448dd0d5bbc606b41fa47a93be0b9995409b1dc Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Tue, 2 Jun 2020 03:19:33 +0000 Subject: [PATCH 1/6] [dlp] opt in using BUILD_SPECIFIC_GCLOUD_PROJECT --- dlp/README.rst | 34 +++++++++--------- dlp/README.rst.in | 8 ++++- dlp/noxfile_config.py | 37 ++++++++++++++++++++ scripts/readme-gen/templates/README.tmpl.rst | 7 ++++ 4 files changed, 69 insertions(+), 17 deletions(-) create mode 100644 dlp/noxfile_config.py diff --git a/dlp/README.rst b/dlp/README.rst index ce8b8550024..76bd9dd8dfc 100644 --- a/dlp/README.rst +++ b/dlp/README.rst @@ -14,6 +14,15 @@ This directory contains samples for Google Data Loss Prevention. `Google Data Lo .. _Google Data Loss Prevention: https://cloud.google.com/dlp/docs/ +To run the sample, you need to enable the API at: https://console.cloud.google.com/apis/library/dlp.googleapis.com + + +To run the sample, you need to have the following roles: +* `DLP Administrator` +* `DLP API Service Agent` + + + Setup ------------------------------------------------------------------------------- @@ -58,15 +67,6 @@ Install Dependencies .. _pip: https://pip.pypa.io/ .. _virtualenv: https://virtualenv.pypa.io/ -#. For running *_test.py files, install test dependencies - - .. code-block:: bash - - $ pip install -r requirements-test.txt - $ pytest inspect_content_test.py - -** *_test.py files are demo wrappers and make API calls. You may get rate limited for making high number of requests. ** - Samples ------------------------------------------------------------------------------- @@ -83,7 +83,7 @@ To run this sample: .. code-block:: bash - $ python quickstart.py + $ python quickstart.py Inspect Content @@ -101,15 +101,16 @@ To run this sample: $ python inspect_content.py - usage: inspect_content.py [-h] {string,file,gcs,datastore,bigquery} ... + usage: inspect_content.py [-h] {string,table,file,gcs,datastore,bigquery} ... Sample app that uses the Data Loss Prevention API to inspect a string, a local file or a file on Google Cloud Storage. positional arguments: - {string,file,gcs,datastore,bigquery} + {string,table,file,gcs,datastore,bigquery} Select how to submit content to the API. string Inspect a string. + table Inspect a table. file Inspect a local file. gcs Inspect files on Google Cloud Storage. datastore Inspect files on Google Datastore. @@ -135,13 +136,14 @@ To run this sample: $ python redact.py - usage: redact.py [-h] [--project PROJECT] [--info_types INFO_TYPES] + usage: redact.py [-h] [--project PROJECT] + [--info_types INFO_TYPES [INFO_TYPES ...]] [--min_likelihood {LIKELIHOOD_UNSPECIFIED,VERY_UNLIKELY,UNLIKELY,POSSIBLE,LIKELY,VERY_LIKELY}] [--mime_type MIME_TYPE] filename output_filename - Sample app that uses the Data Loss Prevent API to redact the contents of a - string or an image file. + Sample app that uses the Data Loss Prevent API to redact the contents of an + image file. positional arguments: filename The path to the file to inspect. @@ -151,7 +153,7 @@ To run this sample: -h, --help show this help message and exit --project PROJECT The Google Cloud project id to use as a parent resource. - --info_types INFO_TYPES + --info_types INFO_TYPES [INFO_TYPES ...] Strings representing info types to look for. A full list of info categories and types is available from the API. Examples include "FIRST_NAME", "LAST_NAME", diff --git a/dlp/README.rst.in b/dlp/README.rst.in index 8a143392b17..708e870fa08 100644 --- a/dlp/README.rst.in +++ b/dlp/README.rst.in @@ -4,7 +4,7 @@ product: name: Google Data Loss Prevention short_name: Data Loss Prevention url: https://cloud.google.com/dlp/docs/ - description: > + description: > `Google Data Loss Prevention`_ provides programmatic access to a powerful detection engine for personally identifiable information and other privacy-sensitive data in unstructured data streams. @@ -13,6 +13,12 @@ setup: - auth - install_deps +required_api_url: https://console.cloud.google.com/apis/library/dlp.googleapis.com + +required_roles: +- DLP Administrator +- DLP API Service Agent + samples: - name: Quickstart file: quickstart.py diff --git a/dlp/noxfile_config.py b/dlp/noxfile_config.py new file mode 100644 index 00000000000..950c3a070bd --- /dev/null +++ b/dlp/noxfile_config.py @@ -0,0 +1,37 @@ +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be inported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + 'ignored_versions': ["2.7"], + + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + # 'gcloud_project_env': 'GCLOUD_PROJECT', + 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + 'envs': {}, +} diff --git a/scripts/readme-gen/templates/README.tmpl.rst b/scripts/readme-gen/templates/README.tmpl.rst index 30ad03d050d..1d0432d0d92 100644 --- a/scripts/readme-gen/templates/README.tmpl.rst +++ b/scripts/readme-gen/templates/README.tmpl.rst @@ -23,6 +23,13 @@ To run the sample, you need to enable the API at: {{required_api_url}} To run the sample, you need to have `{{required_role}}` role. {% endif %} +{% if required_roles %} +To run the sample, you need to have the following roles: +{% for role in required_roles %} +* `{{role}}` +{% endfor %} +{% endif %} + {{other_required_steps}} {% if setup %} From 4c27b63610c97ce034cef7849505a91b4a116467 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 3 Jun 2020 16:14:59 +0000 Subject: [PATCH 2/6] increase the operation wait to 3 minutes --- dlp/risk_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlp/risk_test.py b/dlp/risk_test.py index 0164cf3b8c0..b5b7021039f 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -37,7 +37,7 @@ BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING -TIMEOUT = 30 +TIMEOUT = 180 # 3 minutes # Create new custom topic/subscription From 91d30e2b91788ecf80227a16e31b9a895839d53c Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 3 Jun 2020 18:52:28 +0000 Subject: [PATCH 3/6] correct comparison of the job name * re-enabled some tests * remove delay between retries * appropriate timeout value --- dlp/inspect_content.py | 18 +++++++++++++--- dlp/inspect_content_test.py | 37 +++++++++++++++++++------------- dlp/risk.py | 30 +++++++++++++++++++++----- dlp/risk_test.py | 42 ++++++++++++------------------------- 4 files changed, 76 insertions(+), 51 deletions(-) diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index 6d6baad4827..aa8bd5b051a 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -486,7 +486,11 @@ def inspect_gcs_file( def callback(message): try: - if message.attributes["DlpJobName"] == operation.name: + # The DlpJobName in the Pub/Sub message has the location indicator + # and we need to remove that part for comparison. + dlp_job_name = message.attributes["DlpJobName"].replace( + '/locations/global', '') + if dlp_job_name == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -650,7 +654,11 @@ def inspect_datastore( def callback(message): try: - if message.attributes["DlpJobName"] == operation.name: + # The DlpJobName in the Pub/Sub message has the location indicator + # and we need to remove that part for comparison. + dlp_job_name = message.attributes["DlpJobName"].replace( + '/locations/global', '') + if dlp_job_name == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -817,7 +825,11 @@ def inspect_bigquery( def callback(message): try: - if message.attributes["DlpJobName"] == operation.name: + # The DlpJobName in the Pub/Sub message has the location indicator + # and we need to remove that part for comparison. + dlp_job_name = message.attributes["DlpJobName"].replace( + '/locations/global', '') + if dlp_job_name == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index ea100d16d84..12fa40610f6 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -40,6 +40,8 @@ BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING +TIMEOUT = 180 # 3 minutes + @pytest.fixture(scope="module") def bucket(): @@ -298,6 +300,7 @@ def cancel_operation(out): client.cancel_dlp_job(operation_id) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys): try: inspect_content.inspect_gcs_file( @@ -307,15 +310,16 @@ def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys): topic_id, subscription_id, ["EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=1 + timeout=TIMEOUT ) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "Info type: EMAIL_ADDRESS" in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_inspect_gcs_file_with_custom_info_types( bucket, topic_id, subscription_id, capsys): try: @@ -331,15 +335,16 @@ def test_inspect_gcs_file_with_custom_info_types( [], custom_dictionaries=dictionaries, custom_regexes=regexes, - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "Info type: EMAIL_ADDRESS" in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_inspect_gcs_file_no_results( bucket, topic_id, subscription_id, capsys): try: @@ -350,15 +355,16 @@ def test_inspect_gcs_file_no_results( topic_id, subscription_id, ["EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "No findings" in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys): try: inspect_content.inspect_gcs_file( @@ -368,14 +374,15 @@ def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys): topic_id, subscription_id, ["EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "Info type: EMAIL_ADDRESS" in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys): try: inspect_content.inspect_gcs_file( @@ -385,15 +392,16 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys): topic_id, subscription_id, ["EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "Info type: EMAIL_ADDRESS" in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_inspect_datastore( datastore_project, topic_id, subscription_id, capsys): try: @@ -404,14 +412,15 @@ def test_inspect_datastore( topic_id, subscription_id, ["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"], - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "Info type: EMAIL_ADDRESS" in out finally: cancel_operation(out) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_inspect_datastore_no_results( datastore_project, topic_id, subscription_id, capsys): try: @@ -422,10 +431,10 @@ def test_inspect_datastore_no_results( topic_id, subscription_id, ["PHONE_NUMBER"], - timeout=1) + timeout=TIMEOUT) out, _ = capsys.readouterr() - assert "Inspection operation started" in out + assert "No findings" in out finally: cancel_operation(out) diff --git a/dlp/risk.py b/dlp/risk.py index a31dfb12c6e..9c82dc58bc2 100644 --- a/dlp/risk.py +++ b/dlp/risk.py @@ -86,7 +86,11 @@ def numerical_risk_analysis( operation = dlp.create_dlp_job(parent, risk_job=risk_job) def callback(message): - if message.attributes["DlpJobName"] == operation.name: + # The DlpJobName in the Pub/Sub message has the location indicator + # and we need to remove that part for comparison. + dlp_job_name = message.attributes["DlpJobName"].replace( + '/locations/global', '') + if dlp_job_name == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -196,7 +200,11 @@ def categorical_risk_analysis( operation = dlp.create_dlp_job(parent, risk_job=risk_job) def callback(message): - if message.attributes["DlpJobName"] == operation.name: + # The DlpJobName in the Pub/Sub message has the location indicator + # and we need to remove that part for comparison. + dlp_job_name = message.attributes["DlpJobName"].replace( + '/locations/global', '') + if dlp_job_name == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -324,7 +332,11 @@ def map_fields(field): operation = dlp.create_dlp_job(parent, risk_job=risk_job) def callback(message): - if message.attributes["DlpJobName"] == operation.name: + # The DlpJobName in the Pub/Sub message has the location indicator + # and we need to remove that part for comparison. + dlp_job_name = message.attributes["DlpJobName"].replace( + '/locations/global', '') + if dlp_job_name == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -460,7 +472,11 @@ def map_fields(field): operation = dlp.create_dlp_job(parent, risk_job=risk_job) def callback(message): - if message.attributes["DlpJobName"] == operation.name: + # The DlpJobName in the Pub/Sub message has the location indicator + # and we need to remove that part for comparison. + dlp_job_name = message.attributes["DlpJobName"].replace( + '/locations/global', '') + if dlp_job_name == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -617,7 +633,11 @@ def map_fields(quasi_id, info_type): operation = dlp.create_dlp_job(parent, risk_job=risk_job) def callback(message): - if message.attributes["DlpJobName"] == operation.name: + # The DlpJobName in the Pub/Sub message has the location indicator + # and we need to remove that part for comparison. + dlp_job_name = message.attributes["DlpJobName"].replace( + '/locations/global', '') + if dlp_job_name == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() diff --git a/dlp/risk_test.py b/dlp/risk_test.py index b5b7021039f..36f7f54a095 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -13,7 +13,6 @@ # limitations under the License. import os -import time import uuid import google.cloud.bigquery @@ -37,14 +36,14 @@ BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING -TIMEOUT = 180 # 3 minutes +TIMEOUT = 60 # 1 minutes # Create new custom topic/subscription # We observe sometimes all the tests in this file fail. In a # hypothesis where DLP service somehow loses the connection to the # topic, now we use function scope for Pub/Sub fixtures. -@pytest.fixture(scope="function") +@pytest.fixture(scope="module") def topic_id(): # Creates a pubsub topic, and tears it down. publisher = google.cloud.pubsub.PublisherClient() @@ -59,7 +58,7 @@ def topic_id(): publisher.delete_topic(topic_path) -@pytest.fixture(scope="function") +@pytest.fixture(scope="module") def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() @@ -166,22 +165,7 @@ def bigquery_project(): bigquery_client.delete_dataset(dataset_ref, delete_contents=True) -def delay(err, *args): - # 20 mins of delay. This sounds like too long a delay, but we - # occasionally observe consequtive time block where operations are - # slow which leads to the test failures. These situations tend to - # get self healed in 20 minutes or so, so I'm trying this strategy. - # - # There are 10 tests, so we don't want the retry delay happening - # for all the tests. When we exhaust the MAX_FLAKY_WAIT, we retry - # the test immediately. - wait_time = min(pytest.MAX_FLAKY_WAIT, 60*20) - pytest.MAX_FLAKY_WAIT -= wait_time - time.sleep(wait_time) - return True - - -@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_numerical_risk_analysis( topic_id, subscription_id, bigquery_project, capsys ): @@ -200,7 +184,7 @@ def test_numerical_risk_analysis( assert "Value Range:" in out -@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_categorical_risk_analysis_on_string_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -219,7 +203,7 @@ def test_categorical_risk_analysis_on_string_field( assert "Most common value occurs" in out -@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_categorical_risk_analysis_on_number_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -238,7 +222,7 @@ def test_categorical_risk_analysis_on_number_field( assert "Most common value occurs" in out -@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_k_anonymity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -258,7 +242,7 @@ def test_k_anonymity_analysis_single_field( assert "Class size:" in out -@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_k_anonymity_analysis_multiple_fields( topic_id, subscription_id, bigquery_project, capsys ): @@ -278,7 +262,7 @@ def test_k_anonymity_analysis_multiple_fields( assert "Class size:" in out -@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_l_diversity_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -300,7 +284,7 @@ def test_l_diversity_analysis_single_field( assert "Sensitive value" in out -@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_l_diversity_analysis_multiple_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -322,7 +306,7 @@ def test_l_diversity_analysis_multiple_field( assert "Sensitive value" in out -@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_k_map_estimate_analysis_single_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -344,7 +328,7 @@ def test_k_map_estimate_analysis_single_field( assert "Values" in out -@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_k_map_estimate_analysis_multiple_field( topic_id, subscription_id, bigquery_project, capsys ): @@ -366,7 +350,7 @@ def test_k_map_estimate_analysis_multiple_field( assert "Values" in out -@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay) +@pytest.mark.flaky(max_runs=2, min_passes=1) def test_k_map_estimate_analysis_quasi_ids_info_types_equal( topic_id, subscription_id, bigquery_project ): From 26d4b4b121ad3d7ff8a17748aaa80b79aaf09090 Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 3 Jun 2020 20:00:32 +0000 Subject: [PATCH 4/6] opt out from using BUILD_SPECIFIC_GCLOUD_PROJECT --- dlp/conftest.py | 20 -------------------- dlp/noxfile_config.py | 37 ------------------------------------- 2 files changed, 57 deletions(-) delete mode 100644 dlp/conftest.py delete mode 100644 dlp/noxfile_config.py diff --git a/dlp/conftest.py b/dlp/conftest.py deleted file mode 100644 index 362e5a2c271..00000000000 --- a/dlp/conftest.py +++ /dev/null @@ -1,20 +0,0 @@ -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import pytest - - -# Used in risk_test.py to limit the maximum wait time before the flaky retries. -def pytest_configure(config): - pytest.MAX_FLAKY_WAIT = 3600 # maximum of an hour diff --git a/dlp/noxfile_config.py b/dlp/noxfile_config.py deleted file mode 100644 index 950c3a070bd..00000000000 --- a/dlp/noxfile_config.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2020 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Default TEST_CONFIG_OVERRIDE for python repos. - -# You can copy this file into your directory, then it will be inported from -# the noxfile.py. - -# The source of truth: -# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py - -TEST_CONFIG_OVERRIDE = { - # You can opt out from the test for specific Python versions. - 'ignored_versions': ["2.7"], - - # An envvar key for determining the project id to use. Change it - # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a - # build specific Cloud project. You can also use your own string - # to use your own Cloud project. - # 'gcloud_project_env': 'GCLOUD_PROJECT', - 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', - - # A dictionary you want to inject into your test. Don't put any - # secrets here. These values will override predefined values. - 'envs': {}, -} From e0ee53d83ada5cddc7316cc0705a594aa540a9fd Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 3 Jun 2020 20:50:49 +0000 Subject: [PATCH 5/6] use parent with the global location specified --- dlp/inspect_content.py | 39 +++++++++++---------------- dlp/risk.py | 61 ++++++++++++++++-------------------------- 2 files changed, 38 insertions(+), 62 deletions(-) diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index aa8bd5b051a..1c5f9c4df95 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -459,11 +459,12 @@ def inspect_gcs_file( url = "gs://{}/{}".format(bucket, filename) storage_config = {"cloud_storage_options": {"file_set": {"url": url}}} - # Convert the project id into a full resource id. - parent = dlp.project_path(project) + # Convert the project id into full resource ids. + topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id) + parent = dlp.location_path(project, 'global') # Tell the API where to send a notification when the job is complete. - actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] + actions = [{"pub_sub": {"topic": topic}}] # Construct the inspect_job, which defines the entire inspect content task. inspect_job = { @@ -486,11 +487,7 @@ def inspect_gcs_file( def callback(message): try: - # The DlpJobName in the Pub/Sub message has the location indicator - # and we need to remove that part for comparison. - dlp_job_name = message.attributes["DlpJobName"].replace( - '/locations/global', '') - if dlp_job_name == operation.name: + if message.attributes["DlpJobName"] == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -627,11 +624,12 @@ def inspect_datastore( } } - # Convert the project id into a full resource id. - parent = dlp.project_path(project) + # Convert the project id into full resource ids. + topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id) + parent = dlp.location_path(project, 'global') # Tell the API where to send a notification when the job is complete. - actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] + actions = [{"pub_sub": {"topic": topic}}] # Construct the inspect_job, which defines the entire inspect content task. inspect_job = { @@ -654,11 +652,7 @@ def inspect_datastore( def callback(message): try: - # The DlpJobName in the Pub/Sub message has the location indicator - # and we need to remove that part for comparison. - dlp_job_name = message.attributes["DlpJobName"].replace( - '/locations/global', '') - if dlp_job_name == operation.name: + if message.attributes["DlpJobName"] == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -798,11 +792,12 @@ def inspect_bigquery( } } - # Convert the project id into a full resource id. - parent = dlp.project_path(project) + # Convert the project id into full resource ids. + topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id) + parent = dlp.location_path(project, 'global') # Tell the API where to send a notification when the job is complete. - actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] + actions = [{"pub_sub": {"topic": topic}}] # Construct the inspect_job, which defines the entire inspect content task. inspect_job = { @@ -825,11 +820,7 @@ def inspect_bigquery( def callback(message): try: - # The DlpJobName in the Pub/Sub message has the location indicator - # and we need to remove that part for comparison. - dlp_job_name = message.attributes["DlpJobName"].replace( - '/locations/global', '') - if dlp_job_name == operation.name: + if message.attributes["DlpJobName"] == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() diff --git a/dlp/risk.py b/dlp/risk.py index 9c82dc58bc2..518f947eee6 100644 --- a/dlp/risk.py +++ b/dlp/risk.py @@ -59,8 +59,9 @@ def numerical_risk_analysis( # Instantiate a client. dlp = google.cloud.dlp_v2.DlpServiceClient() - # Convert the project id into a full resource id. - parent = dlp.project_path(project) + # Convert the project id into full resource ids. + topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id) + parent = dlp.location_path(project, 'global') # Location info of the BigQuery table. source_table = { @@ -70,7 +71,7 @@ def numerical_risk_analysis( } # Tell the API where to send a notification when the job is complete. - actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] + actions = [{"pub_sub": {"topic": topic}}] # Configure risk analysis job # Give the name of the numeric column to compute risk metrics for @@ -86,11 +87,7 @@ def numerical_risk_analysis( operation = dlp.create_dlp_job(parent, risk_job=risk_job) def callback(message): - # The DlpJobName in the Pub/Sub message has the location indicator - # and we need to remove that part for comparison. - dlp_job_name = message.attributes["DlpJobName"].replace( - '/locations/global', '') - if dlp_job_name == operation.name: + if message.attributes["DlpJobName"] == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -173,8 +170,9 @@ def categorical_risk_analysis( # Instantiate a client. dlp = google.cloud.dlp_v2.DlpServiceClient() - # Convert the project id into a full resource id. - parent = dlp.project_path(project) + # Convert the project id into full resource ids. + topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id) + parent = dlp.location_path(project, 'global') # Location info of the BigQuery table. source_table = { @@ -184,7 +182,7 @@ def categorical_risk_analysis( } # Tell the API where to send a notification when the job is complete. - actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] + actions = [{"pub_sub": {"topic": topic}}] # Configure risk analysis job # Give the name of the numeric column to compute risk metrics for @@ -200,11 +198,7 @@ def categorical_risk_analysis( operation = dlp.create_dlp_job(parent, risk_job=risk_job) def callback(message): - # The DlpJobName in the Pub/Sub message has the location indicator - # and we need to remove that part for comparison. - dlp_job_name = message.attributes["DlpJobName"].replace( - '/locations/global', '') - if dlp_job_name == operation.name: + if message.attributes["DlpJobName"] == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -302,7 +296,8 @@ def get_values(obj): dlp = google.cloud.dlp_v2.DlpServiceClient() # Convert the project id into a full resource id. - parent = dlp.project_path(project) + topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id) + parent = dlp.location_path(project, 'global') # Location info of the BigQuery table. source_table = { @@ -318,7 +313,7 @@ def map_fields(field): quasi_ids = map(map_fields, quasi_ids) # Tell the API where to send a notification when the job is complete. - actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] + actions = [{"pub_sub": {"topic": topic}}] # Configure risk analysis job # Give the name of the numeric column to compute risk metrics for @@ -332,11 +327,7 @@ def map_fields(field): operation = dlp.create_dlp_job(parent, risk_job=risk_job) def callback(message): - # The DlpJobName in the Pub/Sub message has the location indicator - # and we need to remove that part for comparison. - dlp_job_name = message.attributes["DlpJobName"].replace( - '/locations/global', '') - if dlp_job_name == operation.name: + if message.attributes["DlpJobName"] == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -437,7 +428,8 @@ def get_values(obj): dlp = google.cloud.dlp_v2.DlpServiceClient() # Convert the project id into a full resource id. - parent = dlp.project_path(project) + topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id) + parent = dlp.location_path(project, 'global') # Location info of the BigQuery table. source_table = { @@ -453,7 +445,7 @@ def map_fields(field): quasi_ids = map(map_fields, quasi_ids) # Tell the API where to send a notification when the job is complete. - actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] + actions = [{"pub_sub": {"topic": topic}}] # Configure risk analysis job # Give the name of the numeric column to compute risk metrics for @@ -472,11 +464,7 @@ def map_fields(field): operation = dlp.create_dlp_job(parent, risk_job=risk_job) def callback(message): - # The DlpJobName in the Pub/Sub message has the location indicator - # and we need to remove that part for comparison. - dlp_job_name = message.attributes["DlpJobName"].replace( - '/locations/global', '') - if dlp_job_name == operation.name: + if message.attributes["DlpJobName"] == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() @@ -590,8 +578,9 @@ def get_values(obj): # Instantiate a client. dlp = google.cloud.dlp_v2.DlpServiceClient() - # Convert the project id into a full resource id. - parent = dlp.project_path(project) + # Convert the project id into full resource ids. + topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id) + parent = dlp.location_path(project, 'global') # Location info of the BigQuery table. source_table = { @@ -614,7 +603,7 @@ def map_fields(quasi_id, info_type): quasi_ids = map(map_fields, quasi_ids, info_types) # Tell the API where to send a notification when the job is complete. - actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}] + actions = [{"pub_sub": {"topic": topic}}] # Configure risk analysis job # Give the name of the numeric column to compute risk metrics for @@ -633,11 +622,7 @@ def map_fields(quasi_id, info_type): operation = dlp.create_dlp_job(parent, risk_job=risk_job) def callback(message): - # The DlpJobName in the Pub/Sub message has the location indicator - # and we need to remove that part for comparison. - dlp_job_name = message.attributes["DlpJobName"].replace( - '/locations/global', '') - if dlp_job_name == operation.name: + if message.attributes["DlpJobName"] == operation.name: # This is the message we're looking for, so acknowledge it. message.ack() From b165a8175e516ee5cf4451bb847a54a22567506d Mon Sep 17 00:00:00 2001 From: Takashi Matsuo Date: Wed, 3 Jun 2020 21:32:00 +0000 Subject: [PATCH 6/6] increase the timeout to 5 mins --- dlp/inspect_content_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index 12fa40610f6..e2192bdd6c4 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -40,7 +40,7 @@ BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING -TIMEOUT = 180 # 3 minutes +TIMEOUT = 300 # 5 minutes @pytest.fixture(scope="module")