From 2448dd0d5bbc606b41fa47a93be0b9995409b1dc Mon Sep 17 00:00:00 2001
From: Takashi Matsuo <tmatsuo@google.com>
Date: Tue, 2 Jun 2020 03:19:33 +0000
Subject: [PATCH 1/6] [dlp] opt in using BUILD_SPECIFIC_GCLOUD_PROJECT

---
 dlp/README.rst                               | 34 +++++++++---------
 dlp/README.rst.in                            |  8 ++++-
 dlp/noxfile_config.py                        | 37 ++++++++++++++++++++
 scripts/readme-gen/templates/README.tmpl.rst |  7 ++++
 4 files changed, 69 insertions(+), 17 deletions(-)
 create mode 100644 dlp/noxfile_config.py

diff --git a/dlp/README.rst b/dlp/README.rst
index ce8b8550024..76bd9dd8dfc 100644
--- a/dlp/README.rst
+++ b/dlp/README.rst
@@ -14,6 +14,15 @@ This directory contains samples for Google Data Loss Prevention. `Google Data Lo
 
 .. _Google Data Loss Prevention: https://cloud.google.com/dlp/docs/
 
+To run the sample, you need to enable the API at: https://console.cloud.google.com/apis/library/dlp.googleapis.com
+
+
+To run the sample, you need to have the following roles:
+* `DLP Administrator`
+* `DLP API Service Agent`
+
+
+
 Setup
 -------------------------------------------------------------------------------
 
@@ -58,15 +67,6 @@ Install Dependencies
 .. _pip: https://pip.pypa.io/
 .. _virtualenv: https://virtualenv.pypa.io/
 
-#. For running *_test.py files, install test dependencies
-
-    .. code-block:: bash
-
-        $ pip install -r requirements-test.txt
-        $ pytest inspect_content_test.py
-
-** *_test.py files are demo wrappers and make API calls. You may get rate limited for making high number of requests. **
-
 Samples
 -------------------------------------------------------------------------------
 
@@ -83,7 +83,7 @@ To run this sample:
 
 .. code-block:: bash
 
-    $ python quickstart.py <project-id>
+    $ python quickstart.py
 
 
 Inspect Content
@@ -101,15 +101,16 @@ To run this sample:
 
     $ python inspect_content.py
 
-    usage: inspect_content.py [-h] {string,file,gcs,datastore,bigquery} ...
+    usage: inspect_content.py [-h] {string,table,file,gcs,datastore,bigquery} ...
 
     Sample app that uses the Data Loss Prevention API to inspect a string, a local
     file or a file on Google Cloud Storage.
 
     positional arguments:
-      {string,file,gcs,datastore,bigquery}
+      {string,table,file,gcs,datastore,bigquery}
                             Select how to submit content to the API.
         string              Inspect a string.
+        table               Inspect a table.
         file                Inspect a local file.
         gcs                 Inspect files on Google Cloud Storage.
         datastore           Inspect files on Google Datastore.
@@ -135,13 +136,14 @@ To run this sample:
 
     $ python redact.py
 
-    usage: redact.py [-h] [--project PROJECT] [--info_types INFO_TYPES]
+    usage: redact.py [-h] [--project PROJECT]
+                     [--info_types INFO_TYPES [INFO_TYPES ...]]
                      [--min_likelihood {LIKELIHOOD_UNSPECIFIED,VERY_UNLIKELY,UNLIKELY,POSSIBLE,LIKELY,VERY_LIKELY}]
                      [--mime_type MIME_TYPE]
                      filename output_filename
 
-    Sample app that uses the Data Loss Prevent API to redact the contents of a
-    string or an image file.
+    Sample app that uses the Data Loss Prevent API to redact the contents of an
+    image file.
 
     positional arguments:
       filename              The path to the file to inspect.
@@ -151,7 +153,7 @@ To run this sample:
       -h, --help            show this help message and exit
       --project PROJECT     The Google Cloud project id to use as a parent
                             resource.
-      --info_types INFO_TYPES
+      --info_types INFO_TYPES [INFO_TYPES ...]
                             Strings representing info types to look for. A full
                             list of info categories and types is available from
                             the API. Examples include "FIRST_NAME", "LAST_NAME",
diff --git a/dlp/README.rst.in b/dlp/README.rst.in
index 8a143392b17..708e870fa08 100644
--- a/dlp/README.rst.in
+++ b/dlp/README.rst.in
@@ -4,7 +4,7 @@ product:
   name: Google Data Loss Prevention
   short_name: Data Loss Prevention
   url: https://cloud.google.com/dlp/docs/
-  description: > 
+  description: >
     `Google Data Loss Prevention`_ provides programmatic access to a powerful
     detection engine for personally identifiable information and other
     privacy-sensitive data in unstructured data streams.
@@ -13,6 +13,12 @@ setup:
 - auth
 - install_deps
 
+required_api_url: https://console.cloud.google.com/apis/library/dlp.googleapis.com
+
+required_roles:
+- DLP Administrator
+- DLP API Service Agent
+
 samples:
 - name: Quickstart
   file: quickstart.py
diff --git a/dlp/noxfile_config.py b/dlp/noxfile_config.py
new file mode 100644
index 00000000000..950c3a070bd
--- /dev/null
+++ b/dlp/noxfile_config.py
@@ -0,0 +1,37 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Default TEST_CONFIG_OVERRIDE for python repos.
+
+# You can copy this file into your directory, then it will be inported from
+# the noxfile.py.
+
+# The source of truth:
+# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py
+
+TEST_CONFIG_OVERRIDE = {
+    # You can opt out from the test for specific Python versions.
+    'ignored_versions': ["2.7"],
+
+    # An envvar key for determining the project id to use. Change it
+    # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
+    # build specific Cloud project. You can also use your own string
+    # to use your own Cloud project.
+    # 'gcloud_project_env': 'GCLOUD_PROJECT',
+    'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
+
+    # A dictionary you want to inject into your test. Don't put any
+    # secrets here. These values will override predefined values.
+    'envs': {},
+}
diff --git a/scripts/readme-gen/templates/README.tmpl.rst b/scripts/readme-gen/templates/README.tmpl.rst
index 30ad03d050d..1d0432d0d92 100644
--- a/scripts/readme-gen/templates/README.tmpl.rst
+++ b/scripts/readme-gen/templates/README.tmpl.rst
@@ -23,6 +23,13 @@ To run the sample, you need to enable the API at: {{required_api_url}}
 To run the sample, you need to have `{{required_role}}` role.
 {% endif %}
 
+{% if required_roles %}
+To run the sample, you need to have the following roles:
+{% for role in required_roles %}
+* `{{role}}`
+{% endfor %}
+{% endif %}
+
 {{other_required_steps}}
 
 {% if setup %}

From 4c27b63610c97ce034cef7849505a91b4a116467 Mon Sep 17 00:00:00 2001
From: Takashi Matsuo <tmatsuo@google.com>
Date: Wed, 3 Jun 2020 16:14:59 +0000
Subject: [PATCH 2/6] increase the operation wait to 3 minutes

---
 dlp/risk_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlp/risk_test.py b/dlp/risk_test.py
index 0164cf3b8c0..b5b7021039f 100644
--- a/dlp/risk_test.py
+++ b/dlp/risk_test.py
@@ -37,7 +37,7 @@
 BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING
 BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING
 
-TIMEOUT = 30
+TIMEOUT = 180  # 3 minutes
 
 
 # Create new custom topic/subscription

From 91d30e2b91788ecf80227a16e31b9a895839d53c Mon Sep 17 00:00:00 2001
From: Takashi Matsuo <tmatsuo@google.com>
Date: Wed, 3 Jun 2020 18:52:28 +0000
Subject: [PATCH 3/6] correct comparison of the job name

* re-enabled some tests
* remove delay between retries
* appropriate timeout value
---
 dlp/inspect_content.py      | 18 +++++++++++++---
 dlp/inspect_content_test.py | 37 +++++++++++++++++++-------------
 dlp/risk.py                 | 30 +++++++++++++++++++++-----
 dlp/risk_test.py            | 42 ++++++++++++-------------------------
 4 files changed, 76 insertions(+), 51 deletions(-)

diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py
index 6d6baad4827..aa8bd5b051a 100644
--- a/dlp/inspect_content.py
+++ b/dlp/inspect_content.py
@@ -486,7 +486,11 @@ def inspect_gcs_file(
 
     def callback(message):
         try:
-            if message.attributes["DlpJobName"] == operation.name:
+            # The DlpJobName in the Pub/Sub message has the location indicator
+            # and we need to remove that part for comparison.
+            dlp_job_name = message.attributes["DlpJobName"].replace(
+                '/locations/global', '')
+            if dlp_job_name == operation.name:
                 # This is the message we're looking for, so acknowledge it.
                 message.ack()
 
@@ -650,7 +654,11 @@ def inspect_datastore(
 
     def callback(message):
         try:
-            if message.attributes["DlpJobName"] == operation.name:
+            # The DlpJobName in the Pub/Sub message has the location indicator
+            # and we need to remove that part for comparison.
+            dlp_job_name = message.attributes["DlpJobName"].replace(
+                '/locations/global', '')
+            if dlp_job_name == operation.name:
                 # This is the message we're looking for, so acknowledge it.
                 message.ack()
 
@@ -817,7 +825,11 @@ def inspect_bigquery(
 
     def callback(message):
         try:
-            if message.attributes["DlpJobName"] == operation.name:
+            # The DlpJobName in the Pub/Sub message has the location indicator
+            # and we need to remove that part for comparison.
+            dlp_job_name = message.attributes["DlpJobName"].replace(
+                '/locations/global', '')
+            if dlp_job_name == operation.name:
                 # This is the message we're looking for, so acknowledge it.
                 message.ack()
 
diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py
index ea100d16d84..12fa40610f6 100644
--- a/dlp/inspect_content_test.py
+++ b/dlp/inspect_content_test.py
@@ -40,6 +40,8 @@
 BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING
 BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING
 
+TIMEOUT = 180  # 3 minutes
+
 
 @pytest.fixture(scope="module")
 def bucket():
@@ -298,6 +300,7 @@ def cancel_operation(out):
         client.cancel_dlp_job(operation_id)
 
 
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
     try:
         inspect_content.inspect_gcs_file(
@@ -307,15 +310,16 @@ def test_inspect_gcs_file(bucket, topic_id, subscription_id, capsys):
             topic_id,
             subscription_id,
             ["EMAIL_ADDRESS", "PHONE_NUMBER"],
-            timeout=1
+            timeout=TIMEOUT
         )
 
         out, _ = capsys.readouterr()
-        assert "Inspection operation started" in out
+        assert "Info type: EMAIL_ADDRESS" in out
     finally:
         cancel_operation(out)
 
 
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_inspect_gcs_file_with_custom_info_types(
         bucket, topic_id, subscription_id, capsys):
     try:
@@ -331,15 +335,16 @@ def test_inspect_gcs_file_with_custom_info_types(
             [],
             custom_dictionaries=dictionaries,
             custom_regexes=regexes,
-            timeout=1)
+            timeout=TIMEOUT)
 
         out, _ = capsys.readouterr()
 
-        assert "Inspection operation started" in out
+        assert "Info type: EMAIL_ADDRESS" in out
     finally:
         cancel_operation(out)
 
 
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_inspect_gcs_file_no_results(
         bucket, topic_id, subscription_id, capsys):
     try:
@@ -350,15 +355,16 @@ def test_inspect_gcs_file_no_results(
             topic_id,
             subscription_id,
             ["EMAIL_ADDRESS", "PHONE_NUMBER"],
-            timeout=1)
+            timeout=TIMEOUT)
 
         out, _ = capsys.readouterr()
 
-        assert "Inspection operation started" in out
+        assert "No findings" in out
     finally:
         cancel_operation(out)
 
 
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys):
     try:
         inspect_content.inspect_gcs_file(
@@ -368,14 +374,15 @@ def test_inspect_gcs_image_file(bucket, topic_id, subscription_id, capsys):
             topic_id,
             subscription_id,
             ["EMAIL_ADDRESS", "PHONE_NUMBER"],
-            timeout=1)
+            timeout=TIMEOUT)
 
         out, _ = capsys.readouterr()
-        assert "Inspection operation started" in out
+        assert "Info type: EMAIL_ADDRESS" in out
     finally:
         cancel_operation(out)
 
 
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
     try:
         inspect_content.inspect_gcs_file(
@@ -385,15 +392,16 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
             topic_id,
             subscription_id,
             ["EMAIL_ADDRESS", "PHONE_NUMBER"],
-            timeout=1)
+            timeout=TIMEOUT)
 
         out, _ = capsys.readouterr()
 
-        assert "Inspection operation started" in out
+        assert "Info type: EMAIL_ADDRESS" in out
     finally:
         cancel_operation(out)
 
 
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_inspect_datastore(
         datastore_project, topic_id, subscription_id, capsys):
     try:
@@ -404,14 +412,15 @@ def test_inspect_datastore(
             topic_id,
             subscription_id,
             ["FIRST_NAME", "EMAIL_ADDRESS", "PHONE_NUMBER"],
-            timeout=1)
+            timeout=TIMEOUT)
 
         out, _ = capsys.readouterr()
-        assert "Inspection operation started" in out
+        assert "Info type: EMAIL_ADDRESS" in out
     finally:
         cancel_operation(out)
 
 
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_inspect_datastore_no_results(
         datastore_project, topic_id, subscription_id, capsys):
     try:
@@ -422,10 +431,10 @@ def test_inspect_datastore_no_results(
             topic_id,
             subscription_id,
             ["PHONE_NUMBER"],
-            timeout=1)
+            timeout=TIMEOUT)
 
         out, _ = capsys.readouterr()
-        assert "Inspection operation started" in out
+        assert "No findings" in out
     finally:
         cancel_operation(out)
 
diff --git a/dlp/risk.py b/dlp/risk.py
index a31dfb12c6e..9c82dc58bc2 100644
--- a/dlp/risk.py
+++ b/dlp/risk.py
@@ -86,7 +86,11 @@ def numerical_risk_analysis(
     operation = dlp.create_dlp_job(parent, risk_job=risk_job)
 
     def callback(message):
-        if message.attributes["DlpJobName"] == operation.name:
+        # The DlpJobName in the Pub/Sub message has the location indicator
+        # and we need to remove that part for comparison.
+        dlp_job_name = message.attributes["DlpJobName"].replace(
+            '/locations/global', '')
+        if dlp_job_name == operation.name:
             # This is the message we're looking for, so acknowledge it.
             message.ack()
 
@@ -196,7 +200,11 @@ def categorical_risk_analysis(
     operation = dlp.create_dlp_job(parent, risk_job=risk_job)
 
     def callback(message):
-        if message.attributes["DlpJobName"] == operation.name:
+        # The DlpJobName in the Pub/Sub message has the location indicator
+        # and we need to remove that part for comparison.
+        dlp_job_name = message.attributes["DlpJobName"].replace(
+            '/locations/global', '')
+        if dlp_job_name == operation.name:
             # This is the message we're looking for, so acknowledge it.
             message.ack()
 
@@ -324,7 +332,11 @@ def map_fields(field):
     operation = dlp.create_dlp_job(parent, risk_job=risk_job)
 
     def callback(message):
-        if message.attributes["DlpJobName"] == operation.name:
+        # The DlpJobName in the Pub/Sub message has the location indicator
+        # and we need to remove that part for comparison.
+        dlp_job_name = message.attributes["DlpJobName"].replace(
+            '/locations/global', '')
+        if dlp_job_name == operation.name:
             # This is the message we're looking for, so acknowledge it.
             message.ack()
 
@@ -460,7 +472,11 @@ def map_fields(field):
     operation = dlp.create_dlp_job(parent, risk_job=risk_job)
 
     def callback(message):
-        if message.attributes["DlpJobName"] == operation.name:
+        # The DlpJobName in the Pub/Sub message has the location indicator
+        # and we need to remove that part for comparison.
+        dlp_job_name = message.attributes["DlpJobName"].replace(
+            '/locations/global', '')
+        if dlp_job_name == operation.name:
             # This is the message we're looking for, so acknowledge it.
             message.ack()
 
@@ -617,7 +633,11 @@ def map_fields(quasi_id, info_type):
     operation = dlp.create_dlp_job(parent, risk_job=risk_job)
 
     def callback(message):
-        if message.attributes["DlpJobName"] == operation.name:
+        # The DlpJobName in the Pub/Sub message has the location indicator
+        # and we need to remove that part for comparison.
+        dlp_job_name = message.attributes["DlpJobName"].replace(
+            '/locations/global', '')
+        if dlp_job_name == operation.name:
             # This is the message we're looking for, so acknowledge it.
             message.ack()
 
diff --git a/dlp/risk_test.py b/dlp/risk_test.py
index b5b7021039f..36f7f54a095 100644
--- a/dlp/risk_test.py
+++ b/dlp/risk_test.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import os
-import time
 import uuid
 
 import google.cloud.bigquery
@@ -37,14 +36,14 @@
 BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING
 BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING
 
-TIMEOUT = 180  # 3 minutes
+TIMEOUT = 60  # 1 minutes
 
 
 # Create new custom topic/subscription
 # We observe sometimes all the tests in this file fail. In a
 # hypothesis where DLP service somehow loses the connection to the
 # topic, now we use function scope for Pub/Sub fixtures.
-@pytest.fixture(scope="function")
+@pytest.fixture(scope="module")
 def topic_id():
     # Creates a pubsub topic, and tears it down.
     publisher = google.cloud.pubsub.PublisherClient()
@@ -59,7 +58,7 @@ def topic_id():
     publisher.delete_topic(topic_path)
 
 
-@pytest.fixture(scope="function")
+@pytest.fixture(scope="module")
 def subscription_id(topic_id):
     # Subscribes to a topic.
     subscriber = google.cloud.pubsub.SubscriberClient()
@@ -166,22 +165,7 @@ def bigquery_project():
     bigquery_client.delete_dataset(dataset_ref, delete_contents=True)
 
 
-def delay(err, *args):
-    # 20 mins of delay. This sounds like too long a delay, but we
-    # occasionally observe consequtive time block where operations are
-    # slow which leads to the test failures. These situations tend to
-    # get self healed in 20 minutes or so, so I'm trying this strategy.
-    #
-    # There are 10 tests, so we don't want the retry delay happening
-    # for all the tests. When we exhaust the MAX_FLAKY_WAIT, we retry
-    # the test immediately.
-    wait_time = min(pytest.MAX_FLAKY_WAIT, 60*20)
-    pytest.MAX_FLAKY_WAIT -= wait_time
-    time.sleep(wait_time)
-    return True
-
-
-@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_numerical_risk_analysis(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -200,7 +184,7 @@ def test_numerical_risk_analysis(
     assert "Value Range:" in out
 
 
-@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_categorical_risk_analysis_on_string_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -219,7 +203,7 @@ def test_categorical_risk_analysis_on_string_field(
     assert "Most common value occurs" in out
 
 
-@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_categorical_risk_analysis_on_number_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -238,7 +222,7 @@ def test_categorical_risk_analysis_on_number_field(
     assert "Most common value occurs" in out
 
 
-@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_k_anonymity_analysis_single_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -258,7 +242,7 @@ def test_k_anonymity_analysis_single_field(
     assert "Class size:" in out
 
 
-@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_k_anonymity_analysis_multiple_fields(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -278,7 +262,7 @@ def test_k_anonymity_analysis_multiple_fields(
     assert "Class size:" in out
 
 
-@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_l_diversity_analysis_single_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -300,7 +284,7 @@ def test_l_diversity_analysis_single_field(
     assert "Sensitive value" in out
 
 
-@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_l_diversity_analysis_multiple_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -322,7 +306,7 @@ def test_l_diversity_analysis_multiple_field(
     assert "Sensitive value" in out
 
 
-@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_k_map_estimate_analysis_single_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -344,7 +328,7 @@ def test_k_map_estimate_analysis_single_field(
     assert "Values" in out
 
 
-@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_k_map_estimate_analysis_multiple_field(
     topic_id, subscription_id, bigquery_project, capsys
 ):
@@ -366,7 +350,7 @@ def test_k_map_estimate_analysis_multiple_field(
     assert "Values" in out
 
 
-@pytest.mark.flaky(max_runs=2, min_passes=1, rerun_filter=delay)
+@pytest.mark.flaky(max_runs=2, min_passes=1)
 def test_k_map_estimate_analysis_quasi_ids_info_types_equal(
     topic_id, subscription_id, bigquery_project
 ):

From 26d4b4b121ad3d7ff8a17748aaa80b79aaf09090 Mon Sep 17 00:00:00 2001
From: Takashi Matsuo <tmatsuo@google.com>
Date: Wed, 3 Jun 2020 20:00:32 +0000
Subject: [PATCH 4/6] opt out from using BUILD_SPECIFIC_GCLOUD_PROJECT

---
 dlp/conftest.py       | 20 --------------------
 dlp/noxfile_config.py | 37 -------------------------------------
 2 files changed, 57 deletions(-)
 delete mode 100644 dlp/conftest.py
 delete mode 100644 dlp/noxfile_config.py

diff --git a/dlp/conftest.py b/dlp/conftest.py
deleted file mode 100644
index 362e5a2c271..00000000000
--- a/dlp/conftest.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the 'License');
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an 'AS IS' BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import pytest
-
-
-# Used in risk_test.py to limit the maximum wait time before the flaky retries.
-def pytest_configure(config):
-    pytest.MAX_FLAKY_WAIT = 3600  # maximum of an hour
diff --git a/dlp/noxfile_config.py b/dlp/noxfile_config.py
deleted file mode 100644
index 950c3a070bd..00000000000
--- a/dlp/noxfile_config.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Default TEST_CONFIG_OVERRIDE for python repos.
-
-# You can copy this file into your directory, then it will be inported from
-# the noxfile.py.
-
-# The source of truth:
-# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py
-
-TEST_CONFIG_OVERRIDE = {
-    # You can opt out from the test for specific Python versions.
-    'ignored_versions': ["2.7"],
-
-    # An envvar key for determining the project id to use. Change it
-    # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
-    # build specific Cloud project. You can also use your own string
-    # to use your own Cloud project.
-    # 'gcloud_project_env': 'GCLOUD_PROJECT',
-    'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
-
-    # A dictionary you want to inject into your test. Don't put any
-    # secrets here. These values will override predefined values.
-    'envs': {},
-}

From e0ee53d83ada5cddc7316cc0705a594aa540a9fd Mon Sep 17 00:00:00 2001
From: Takashi Matsuo <tmatsuo@google.com>
Date: Wed, 3 Jun 2020 20:50:49 +0000
Subject: [PATCH 5/6] use parent with the global location specified

---
 dlp/inspect_content.py | 39 +++++++++++----------------
 dlp/risk.py            | 61 ++++++++++++++++--------------------------
 2 files changed, 38 insertions(+), 62 deletions(-)

diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py
index aa8bd5b051a..1c5f9c4df95 100644
--- a/dlp/inspect_content.py
+++ b/dlp/inspect_content.py
@@ -459,11 +459,12 @@ def inspect_gcs_file(
     url = "gs://{}/{}".format(bucket, filename)
     storage_config = {"cloud_storage_options": {"file_set": {"url": url}}}
 
-    # Convert the project id into a full resource id.
-    parent = dlp.project_path(project)
+    # Convert the project id into full resource ids.
+    topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id)
+    parent = dlp.location_path(project, 'global')
 
     # Tell the API where to send a notification when the job is complete.
-    actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}]
+    actions = [{"pub_sub": {"topic": topic}}]
 
     # Construct the inspect_job, which defines the entire inspect content task.
     inspect_job = {
@@ -486,11 +487,7 @@ def inspect_gcs_file(
 
     def callback(message):
         try:
-            # The DlpJobName in the Pub/Sub message has the location indicator
-            # and we need to remove that part for comparison.
-            dlp_job_name = message.attributes["DlpJobName"].replace(
-                '/locations/global', '')
-            if dlp_job_name == operation.name:
+            if message.attributes["DlpJobName"] == operation.name:
                 # This is the message we're looking for, so acknowledge it.
                 message.ack()
 
@@ -627,11 +624,12 @@ def inspect_datastore(
         }
     }
 
-    # Convert the project id into a full resource id.
-    parent = dlp.project_path(project)
+    # Convert the project id into full resource ids.
+    topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id)
+    parent = dlp.location_path(project, 'global')
 
     # Tell the API where to send a notification when the job is complete.
-    actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}]
+    actions = [{"pub_sub": {"topic": topic}}]
 
     # Construct the inspect_job, which defines the entire inspect content task.
     inspect_job = {
@@ -654,11 +652,7 @@ def inspect_datastore(
 
     def callback(message):
         try:
-            # The DlpJobName in the Pub/Sub message has the location indicator
-            # and we need to remove that part for comparison.
-            dlp_job_name = message.attributes["DlpJobName"].replace(
-                '/locations/global', '')
-            if dlp_job_name == operation.name:
+            if message.attributes["DlpJobName"] == operation.name:
                 # This is the message we're looking for, so acknowledge it.
                 message.ack()
 
@@ -798,11 +792,12 @@ def inspect_bigquery(
         }
     }
 
-    # Convert the project id into a full resource id.
-    parent = dlp.project_path(project)
+    # Convert the project id into full resource ids.
+    topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id)
+    parent = dlp.location_path(project, 'global')
 
     # Tell the API where to send a notification when the job is complete.
-    actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}]
+    actions = [{"pub_sub": {"topic": topic}}]
 
     # Construct the inspect_job, which defines the entire inspect content task.
     inspect_job = {
@@ -825,11 +820,7 @@ def inspect_bigquery(
 
     def callback(message):
         try:
-            # The DlpJobName in the Pub/Sub message has the location indicator
-            # and we need to remove that part for comparison.
-            dlp_job_name = message.attributes["DlpJobName"].replace(
-                '/locations/global', '')
-            if dlp_job_name == operation.name:
+            if message.attributes["DlpJobName"] == operation.name:
                 # This is the message we're looking for, so acknowledge it.
                 message.ack()
 
diff --git a/dlp/risk.py b/dlp/risk.py
index 9c82dc58bc2..518f947eee6 100644
--- a/dlp/risk.py
+++ b/dlp/risk.py
@@ -59,8 +59,9 @@ def numerical_risk_analysis(
     # Instantiate a client.
     dlp = google.cloud.dlp_v2.DlpServiceClient()
 
-    # Convert the project id into a full resource id.
-    parent = dlp.project_path(project)
+    # Convert the project id into full resource ids.
+    topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id)
+    parent = dlp.location_path(project, 'global')
 
     # Location info of the BigQuery table.
     source_table = {
@@ -70,7 +71,7 @@ def numerical_risk_analysis(
     }
 
     # Tell the API where to send a notification when the job is complete.
-    actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}]
+    actions = [{"pub_sub": {"topic": topic}}]
 
     # Configure risk analysis job
     # Give the name of the numeric column to compute risk metrics for
@@ -86,11 +87,7 @@ def numerical_risk_analysis(
     operation = dlp.create_dlp_job(parent, risk_job=risk_job)
 
     def callback(message):
-        # The DlpJobName in the Pub/Sub message has the location indicator
-        # and we need to remove that part for comparison.
-        dlp_job_name = message.attributes["DlpJobName"].replace(
-            '/locations/global', '')
-        if dlp_job_name == operation.name:
+        if message.attributes["DlpJobName"] == operation.name:
             # This is the message we're looking for, so acknowledge it.
             message.ack()
 
@@ -173,8 +170,9 @@ def categorical_risk_analysis(
     # Instantiate a client.
     dlp = google.cloud.dlp_v2.DlpServiceClient()
 
-    # Convert the project id into a full resource id.
-    parent = dlp.project_path(project)
+    # Convert the project id into full resource ids.
+    topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id)
+    parent = dlp.location_path(project, 'global')
 
     # Location info of the BigQuery table.
     source_table = {
@@ -184,7 +182,7 @@ def categorical_risk_analysis(
     }
 
     # Tell the API where to send a notification when the job is complete.
-    actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}]
+    actions = [{"pub_sub": {"topic": topic}}]
 
     # Configure risk analysis job
     # Give the name of the numeric column to compute risk metrics for
@@ -200,11 +198,7 @@ def categorical_risk_analysis(
     operation = dlp.create_dlp_job(parent, risk_job=risk_job)
 
     def callback(message):
-        # The DlpJobName in the Pub/Sub message has the location indicator
-        # and we need to remove that part for comparison.
-        dlp_job_name = message.attributes["DlpJobName"].replace(
-            '/locations/global', '')
-        if dlp_job_name == operation.name:
+        if message.attributes["DlpJobName"] == operation.name:
             # This is the message we're looking for, so acknowledge it.
             message.ack()
 
@@ -302,7 +296,8 @@ def get_values(obj):
     dlp = google.cloud.dlp_v2.DlpServiceClient()
 
     # Convert the project id into a full resource id.
-    parent = dlp.project_path(project)
+    topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id)
+    parent = dlp.location_path(project, 'global')
 
     # Location info of the BigQuery table.
     source_table = {
@@ -318,7 +313,7 @@ def map_fields(field):
     quasi_ids = map(map_fields, quasi_ids)
 
     # Tell the API where to send a notification when the job is complete.
-    actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}]
+    actions = [{"pub_sub": {"topic": topic}}]
 
     # Configure risk analysis job
     # Give the name of the numeric column to compute risk metrics for
@@ -332,11 +327,7 @@ def map_fields(field):
     operation = dlp.create_dlp_job(parent, risk_job=risk_job)
 
     def callback(message):
-        # The DlpJobName in the Pub/Sub message has the location indicator
-        # and we need to remove that part for comparison.
-        dlp_job_name = message.attributes["DlpJobName"].replace(
-            '/locations/global', '')
-        if dlp_job_name == operation.name:
+        if message.attributes["DlpJobName"] == operation.name:
             # This is the message we're looking for, so acknowledge it.
             message.ack()
 
@@ -437,7 +428,8 @@ def get_values(obj):
     dlp = google.cloud.dlp_v2.DlpServiceClient()
 
     # Convert the project id into a full resource id.
-    parent = dlp.project_path(project)
+    topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id)
+    parent = dlp.location_path(project, 'global')
 
     # Location info of the BigQuery table.
     source_table = {
@@ -453,7 +445,7 @@ def map_fields(field):
     quasi_ids = map(map_fields, quasi_ids)
 
     # Tell the API where to send a notification when the job is complete.
-    actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}]
+    actions = [{"pub_sub": {"topic": topic}}]
 
     # Configure risk analysis job
     # Give the name of the numeric column to compute risk metrics for
@@ -472,11 +464,7 @@ def map_fields(field):
     operation = dlp.create_dlp_job(parent, risk_job=risk_job)
 
     def callback(message):
-        # The DlpJobName in the Pub/Sub message has the location indicator
-        # and we need to remove that part for comparison.
-        dlp_job_name = message.attributes["DlpJobName"].replace(
-            '/locations/global', '')
-        if dlp_job_name == operation.name:
+        if message.attributes["DlpJobName"] == operation.name:
             # This is the message we're looking for, so acknowledge it.
             message.ack()
 
@@ -590,8 +578,9 @@ def get_values(obj):
     # Instantiate a client.
     dlp = google.cloud.dlp_v2.DlpServiceClient()
 
-    # Convert the project id into a full resource id.
-    parent = dlp.project_path(project)
+    # Convert the project id into full resource ids.
+    topic = google.cloud.pubsub.PublisherClient.topic_path(project, topic_id)
+    parent = dlp.location_path(project, 'global')
 
     # Location info of the BigQuery table.
     source_table = {
@@ -614,7 +603,7 @@ def map_fields(quasi_id, info_type):
     quasi_ids = map(map_fields, quasi_ids, info_types)
 
     # Tell the API where to send a notification when the job is complete.
-    actions = [{"pub_sub": {"topic": "{}/topics/{}".format(parent, topic_id)}}]
+    actions = [{"pub_sub": {"topic": topic}}]
 
     # Configure risk analysis job
     # Give the name of the numeric column to compute risk metrics for
@@ -633,11 +622,7 @@ def map_fields(quasi_id, info_type):
     operation = dlp.create_dlp_job(parent, risk_job=risk_job)
 
     def callback(message):
-        # The DlpJobName in the Pub/Sub message has the location indicator
-        # and we need to remove that part for comparison.
-        dlp_job_name = message.attributes["DlpJobName"].replace(
-            '/locations/global', '')
-        if dlp_job_name == operation.name:
+        if message.attributes["DlpJobName"] == operation.name:
             # This is the message we're looking for, so acknowledge it.
             message.ack()
 

From b165a8175e516ee5cf4451bb847a54a22567506d Mon Sep 17 00:00:00 2001
From: Takashi Matsuo <tmatsuo@google.com>
Date: Wed, 3 Jun 2020 21:32:00 +0000
Subject: [PATCH 6/6] increase the timeout to 5 mins

---
 dlp/inspect_content_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py
index 12fa40610f6..e2192bdd6c4 100644
--- a/dlp/inspect_content_test.py
+++ b/dlp/inspect_content_test.py
@@ -40,7 +40,7 @@
 BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING
 BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING
 
-TIMEOUT = 180  # 3 minutes
+TIMEOUT = 300  # 5 minutes
 
 
 @pytest.fixture(scope="module")