From 1e4e04372c8c2ef74f6a272cca7f99cefdb59dc2 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Mon, 12 Jul 2021 11:28:19 +0200 Subject: [PATCH 1/2] fix: avoid possible job already exists error If job create request fails, a query job might still have started successfully. This commit handles this edge case and returns such query job one can be found. --- google/cloud/bigquery/client.py | 27 +++++++++++++-- tests/unit/test_client.py | 59 +++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 2 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 2a02c7629..9a419f7f2 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3190,6 +3190,7 @@ def query( If ``job_config`` is not an instance of :class:`~google.cloud.bigquery.job.QueryJobConfig` class. """ + job_id_given = job_id is not None job_id = _make_job_id(job_id, job_id_prefix) if project is None: @@ -3221,9 +3222,31 @@ def query( job_ref = job._JobReference(job_id, project=project, location=location) query_job = job.QueryJob(job_ref, query, client=self, job_config=job_config) - query_job._begin(retry=retry, timeout=timeout) - return query_job + try: + query_job._begin(retry=retry, timeout=timeout) + except core_exceptions.GoogleAPICallError as create_exc: + # Even if create job request fails, it is still possible that the job has + # actually been successfully created. We check this by trying to fetch it, + # but only if we created a random job ID ourselves (otherwise we might + # mistakenly fetch a job created by someone else). + if job_id_given: + raise create_exc + + try: + query_job = self.get_job( + job_id, + project=project, + location=location, + retry=retry, + timeout=timeout, + ) + except core_exceptions.GoogleAPIError: # (includes RetryError) + raise create_exc + else: + return query_job + else: + return query_job def insert_rows( self, diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index dffe7bdba..6e50b505d 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -4617,6 +4617,65 @@ def test_query_w_query_parameters(self): }, ) + def test_query_job_rpc_fail_w_job_id_given(self): + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = GoogleAPICallError("Dubious oops.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + with job_begin_patcher: + with pytest.raises(GoogleAPICallError, match="Dubious oops."): + client.query("SELECT 1;", job_id="123") + + def test_query_job_rpc_fail_w_random_id_job_create_indeed_failed(self): + from google.api_core.exceptions import GoogleAPICallError + from google.api_core.exceptions import NotFound + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = GoogleAPICallError("Dubious oops.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", side_effect=NotFound("no such job") + ) + + with job_begin_patcher, get_job_patcher: + # If get job request fails, the original exception should be raised. + with pytest.raises(GoogleAPICallError, match="Dubious oops."): + client.query("SELECT 1;", job_id=None) + + def test_query_job_rpc_fail_w_random_id_job_create_succeeded(self): + from google.api_core.exceptions import GoogleAPICallError + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = GoogleAPICallError("Dubious oops.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", return_value=mock.sentinel.query_job + ) + + with job_begin_patcher, get_job_patcher: + result = client.query("SELECT 1;", job_id=None) + + assert result is mock.sentinel.query_job + def test_insert_rows_w_timeout(self): from google.cloud.bigquery.schema import SchemaField from google.cloud.bigquery.table import Table From 2b1a8b5b76642362a112a46d834a2cf0c6157d22 Mon Sep 17 00:00:00 2001 From: Peter Lamut Date: Wed, 14 Jul 2021 13:02:53 +0200 Subject: [PATCH 2/2] Catch only Conflict errors on query job create --- google/cloud/bigquery/client.py | 9 ++++--- tests/unit/test_client.py | 42 +++++++++++++++++++++++---------- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/google/cloud/bigquery/client.py b/google/cloud/bigquery/client.py index 9a419f7f2..de259abce 100644 --- a/google/cloud/bigquery/client.py +++ b/google/cloud/bigquery/client.py @@ -3225,11 +3225,10 @@ def query( try: query_job._begin(retry=retry, timeout=timeout) - except core_exceptions.GoogleAPICallError as create_exc: - # Even if create job request fails, it is still possible that the job has - # actually been successfully created. We check this by trying to fetch it, - # but only if we created a random job ID ourselves (otherwise we might - # mistakenly fetch a job created by someone else). + except core_exceptions.Conflict as create_exc: + # The thought is if someone is providing their own job IDs and they get + # their job ID generation wrong, this could end up returning results for + # the wrong query. We thus only try to recover if job ID was not given. if job_id_given: raise create_exc diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 6e50b505d..2be8daab6 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -4617,53 +4617,69 @@ def test_query_w_query_parameters(self): }, ) - def test_query_job_rpc_fail_w_job_id_given(self): - from google.api_core.exceptions import GoogleAPICallError + def test_query_job_rpc_fail_w_random_error(self): + from google.api_core.exceptions import Unknown from google.cloud.bigquery.job import QueryJob creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - job_create_error = GoogleAPICallError("Dubious oops.") + job_create_error = Unknown("Not sure what went wrong.") job_begin_patcher = mock.patch.object( QueryJob, "_begin", side_effect=job_create_error ) with job_begin_patcher: - with pytest.raises(GoogleAPICallError, match="Dubious oops."): + with pytest.raises(Unknown, match="Not sure what went wrong."): client.query("SELECT 1;", job_id="123") - def test_query_job_rpc_fail_w_random_id_job_create_indeed_failed(self): - from google.api_core.exceptions import GoogleAPICallError - from google.api_core.exceptions import NotFound + def test_query_job_rpc_fail_w_conflict_job_id_given(self): + from google.api_core.exceptions import Conflict from google.cloud.bigquery.job import QueryJob creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - job_create_error = GoogleAPICallError("Dubious oops.") + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + with job_begin_patcher: + with pytest.raises(Conflict, match="Job already exists."): + client.query("SELECT 1;", job_id="123") + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): + from google.api_core.exceptions import Conflict + from google.api_core.exceptions import DataLoss + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") job_begin_patcher = mock.patch.object( QueryJob, "_begin", side_effect=job_create_error ) get_job_patcher = mock.patch.object( - client, "get_job", side_effect=NotFound("no such job") + client, "get_job", side_effect=DataLoss("we lost yor job, sorry") ) with job_begin_patcher, get_job_patcher: # If get job request fails, the original exception should be raised. - with pytest.raises(GoogleAPICallError, match="Dubious oops."): + with pytest.raises(Conflict, match="Job already exists."): client.query("SELECT 1;", job_id=None) - def test_query_job_rpc_fail_w_random_id_job_create_succeeded(self): - from google.api_core.exceptions import GoogleAPICallError + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): + from google.api_core.exceptions import Conflict from google.cloud.bigquery.job import QueryJob creds = _make_credentials() http = object() client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) - job_create_error = GoogleAPICallError("Dubious oops.") + job_create_error = Conflict("Job already exists.") job_begin_patcher = mock.patch.object( QueryJob, "_begin", side_effect=job_create_error )