From 2a33272a821880b42501630b496800c40d8816f4 Mon Sep 17 00:00:00 2001 From: Mauricio Martinez Date: Thu, 30 Nov 2023 14:32:37 -0600 Subject: [PATCH 01/10] Bigquery spark stored procedures --- docs/source/infrastructures.rst | 111 +++++- .../README.md | 10 + .../additional.py | 6 + .../example_bq_spark_stored_procedure/main.py | 23 ++ .../requirements.txt | 1 + .../spark.py | 23 ++ goblet/cli.py | 1 + goblet/client.py | 1 + goblet/decorators.py | 48 +++ .../bq_spark_stored_procedure.py | 322 ++++++++++++++++++ goblet/resource_manager.py | 3 + ...ects-goblet-datasets-blogs-routines_1.json | 20 ++ ...cts-goblet-locations-us-connections_1.json | 11 + ...outines-test_spark_stored_procedure_1.json | 4 + ...ections-bqsparkstoredprocedure_test_1.json | 4 + goblet/tests/test_bqsparkstoredprocedure.py | 101 ++++++ requirements.txt | 1 + 17 files changed, 689 insertions(+), 1 deletion(-) create mode 100644 examples/example_bq_spark_stored_procedure/README.md create mode 100644 examples/example_bq_spark_stored_procedure/additional.py create mode 100644 examples/example_bq_spark_stored_procedure/main.py create mode 100644 examples/example_bq_spark_stored_procedure/requirements.txt create mode 100644 examples/example_bq_spark_stored_procedure/spark.py create mode 100644 goblet/infrastructures/bq_spark_stored_procedure.py create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-deploy/post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-deploy/post-v1-projects-goblet-locations-us-connections_1.json create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-destroy/delete-bigquery-v2-projects-goblet-datasets-blogs-routines-test_spark_stored_procedure_1.json create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-destroy/delete-v1-projects-goblet-locations-us-connections-bqsparkstoredprocedure_test_1.json create mode 100644 goblet/tests/test_bqsparkstoredprocedure.py diff --git a/docs/source/infrastructures.rst b/docs/source/infrastructures.rst index d1d2f461..3707f24f 100644 --- a/docs/source/infrastructures.rst +++ b/docs/source/infrastructures.rst @@ -140,4 +140,113 @@ PubSub Topics config = { ... } app.pubsub_topic("topic", config=config) -To further configure your PubSub topic within Goblet, provide the config parameter base on the documentation. `Topic Resource `_. \ No newline at end of file +To further configure your PubSub topic within Goblet, provide the config parameter base on the documentation. `Topic Resource `_. + +BigQuery Spark Stored Procedures +^^^^^^^^^^^^^^^^^^^^^^^^^ + +To deploy BigQuery stored procedures using Spark follow the example below. +BigQuery stored procedures documentation can be found `here `_. + +Using a function from the same python file: +.. code:: python + import logging + from goblet import Goblet, goblet_entrypoint + + app = Goblet(function_name="create-bq-spark-stored-procedure") + + app.log.setLevel(logging.DEBUG) # configure goblet logger level + goblet_entrypoint(app) + + # Create a bq spark stored procedure with the spark code and additional python files + def spark_handler(): + from pyspark.sql import SparkSession + import pyspark.sql.functions as F + spark = SparkSession.builder.appName("spark-bigquery-demo").getOrCreate() + + # Load data from BigQuery. + texts = spark.read.format("bigquery") \ + .option("table", "tutorial.poc") \ + .load() + texts.createOrReplaceTempView("words") + + # Perform word count. + text_count = texts.select("id", "text", F.length("text").alias("sum_text_count")) + text_count.show() + text_count.printSchema() + + # Saving the data to BigQuery + text_count.write.mode("append").format("bigquery") \ + .option("writeMethod", "direct") \ + .save("tutorial.wordcount_output") + + app.bqsparkstoredprocedure( + name="count_words_procedure_external", + dataset_id="tutorial", + func=spark_handler, + ) + +Using a function from a different python file and loading additional python files: +`spark.py`: +.. code:: python + def spark_handler(): + from pyspark.sql import SparkSession + import pyspark.sql.functions as F + spark = SparkSession.builder.appName("spark-bigquery-demo").getOrCreate() + + # Load data from BigQuery. + texts = spark.read.format("bigquery") \ + .option("table", "tutorial.poc") \ + .load() + texts.createOrReplaceTempView("words") + + # Perform word count. + text_count = texts.select("id", "text", F.length("text").alias("sum_text_count")) + text_count.show() + text_count.printSchema() + + # Saving the data to BigQuery + text_count.write.mode("append").format("bigquery") \ + .option("writeMethod", "direct") \ + .save("tutorial.wordcount_output") + + if __name__ == "__main__": + spark_handler() + +`additional.py`: +.. code:: python + import logging + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger(__name__) + + def additional_func(): + logger.info("additional_func") + +`main.py`: +.. code:: python + import logging + from goblet import Goblet, goblet_entrypoint + + app = Goblet(function_name="create-bq-spark-stored-procedure") + + app.log.setLevel(logging.DEBUG) # configure goblet logger level + goblet_entrypoint(app) + + # Create a bq spark stored procedure with the spark code and additional python files + app.bqsparkstoredprocedure( + name="count_words_procedure_external", + dataset_id="tutorial", + spark_file="spark.py", + additional_python_files=["additional.py"], + ) + +Options that can be passed to the `bqsparkstoredprocedure` method are: +- name: name of resource +- dataset_id: dataset id where the routine will be created +- func (optional): function/method to be executed +- runtime_version (optional): runtime version of the spark code +- container_image (optional): container image to use +- spark_file (optional): file from local path with the spark code +- additional_python_files (optional): List of files from local path with additional code (Ex: libraries) +- additional_files (optional): List of files from local path with additional files (Ex: csvs) +- properties (optional): Dictionary with additional properties. `Supported properties `_ \ No newline at end of file diff --git a/examples/example_bq_spark_stored_procedure/README.md b/examples/example_bq_spark_stored_procedure/README.md new file mode 100644 index 00000000..1b539e4e --- /dev/null +++ b/examples/example_bq_spark_stored_procedure/README.md @@ -0,0 +1,10 @@ +# BigQuery Spark Stored Procedure Example +This example demonstrates how to use Goblet to create a BigQuery stored procedure that uses Spark. https://cloud.google.com/bigquery/docs/spark-procedures + +## Running Example +```bash +# Run the example. This will create a topic and a subscription on the emulator. +goblet deploy --skip-backend +bq query --use_legacy_sql=false --destination_table=myDataset.myTable \ + 'CALL `project.myDataset.count_words_procedure_external`();' +``` diff --git a/examples/example_bq_spark_stored_procedure/additional.py b/examples/example_bq_spark_stored_procedure/additional.py new file mode 100644 index 00000000..34ae993d --- /dev/null +++ b/examples/example_bq_spark_stored_procedure/additional.py @@ -0,0 +1,6 @@ +import logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def additional_func(): + logger.info("additional_func") \ No newline at end of file diff --git a/examples/example_bq_spark_stored_procedure/main.py b/examples/example_bq_spark_stored_procedure/main.py new file mode 100644 index 00000000..2d6a39db --- /dev/null +++ b/examples/example_bq_spark_stored_procedure/main.py @@ -0,0 +1,23 @@ +import logging +from goblet import Goblet, goblet_entrypoint +# from spark import spark_handler + +app = Goblet(function_name="create-bq-spark-stored-procedure") + +app.log.setLevel(logging.DEBUG) # configure goblet logger level +goblet_entrypoint(app) + +# Create a bq spark stored procedure with the spark code and additional python files +app.bqsparkstoredprocedure( + name="count_words_procedure_external", + dataset_id="tutorial", + spark_file="spark.py", + additional_python_files=["additional.py"], +) + +# Create a bq spark stored procedure with the spark code from the function +# app.bqsparkstoredprocedure( +# name="count_words_procedure", +# dataset_id="tutorial", +# func=spark_handler, +# ) diff --git a/examples/example_bq_spark_stored_procedure/requirements.txt b/examples/example_bq_spark_stored_procedure/requirements.txt new file mode 100644 index 00000000..7c579ecd --- /dev/null +++ b/examples/example_bq_spark_stored_procedure/requirements.txt @@ -0,0 +1 @@ +pyspark==3.5.0 \ No newline at end of file diff --git a/examples/example_bq_spark_stored_procedure/spark.py b/examples/example_bq_spark_stored_procedure/spark.py new file mode 100644 index 00000000..6d7ad0d9 --- /dev/null +++ b/examples/example_bq_spark_stored_procedure/spark.py @@ -0,0 +1,23 @@ +def spark_handler(): + from pyspark.sql import SparkSession + import pyspark.sql.functions as F + spark = SparkSession.builder.appName("spark-bigquery-demo").getOrCreate() + + # Load data from BigQuery. + texts = spark.read.format("bigquery") \ + .option("table", "tutorial.poc") \ + .load() + texts.createOrReplaceTempView("words") + + # Perform word count. + text_count = texts.select("id", "text", F.length("text").alias("sum_text_count")) + text_count.show() + text_count.printSchema() + + # Saving the data to BigQuery + text_count.write.mode("append").format("bigquery") \ + .option("writeMethod", "direct") \ + .save("tutorial.wordcount_output") + +if __name__ == "__main__": + spark_handler() \ No newline at end of file diff --git a/goblet/cli.py b/goblet/cli.py index b9691f63..13567ca9 100644 --- a/goblet/cli.py +++ b/goblet/cli.py @@ -34,6 +34,7 @@ "pubsub", "redis", "vpcconnector", + "bqsparkstoredprocedure", ] diff --git a/goblet/client.py b/goblet/client.py index 08671c80..29fabb42 100644 --- a/goblet/client.py +++ b/goblet/client.py @@ -24,6 +24,7 @@ "iam": "v1", "cloudresourcemanager": "v3", "artifactregistry": "v1", + "storage": "v1", } diff --git a/goblet/decorators.py b/goblet/decorators.py index 09a2468c..a0108a71 100644 --- a/goblet/decorators.py +++ b/goblet/decorators.py @@ -5,6 +5,7 @@ import logging from goblet_gcp_client.client import get_default_location, get_default_project +from goblet.client import VersionedClients from goblet.backends.cloudfunctionv1 import CloudFunctionV1 from goblet.backends.cloudfunctionv2 import CloudFunctionV2 @@ -15,6 +16,9 @@ from goblet.infrastructures.cloudtask import CloudTaskQueue from goblet.infrastructures.pubsub import PubSubTopic from goblet.infrastructures.alerts import PubSubDLQCondition +from goblet.infrastructures.bq_spark_stored_procedure import ( + BigQuerySparkStoredProcedure, +) log = logging.getLogger(__name__) log.setLevel(logging.getLevelName(os.getenv("GOBLET_LOG_LEVEL", "INFO"))) @@ -43,6 +47,7 @@ "vpcconnector": VPCConnector, "cloudtaskqueue": CloudTaskQueue, "pubsub_topic": PubSubTopic, + "bqsparkstoredprocedure": BigQuerySparkStoredProcedure, } @@ -346,6 +351,49 @@ def vpcconnector(self, name, **kwargs): kwargs={"name": name, "kwargs": kwargs}, ) + def bqsparkstoredprocedure( + self, + name, + dataset_id, + runtime_version="1.1", + container_image=None, + func=None, + spark_file=None, + additional_python_files=None, + additional_files=None, + properties=None, + **kwargs, + ): + """ + BigQuery Spark Stored Procedure trigger + :param name: name of resource + :param dataset_id: dataset id where the routine will be created + :param func (optional): function/method + :param runtime_version (optional): runtime version of the spark code + :param container_image (optional): container image to use + :param spark_file (optional): file from a local path with the spark code + :param additional_python_files (optional): List of files from a local path with additional code (Ex: libraries) + :param additional_files (optional): List of files from a local path with additional files (Ex: csvs) + :param properties (optional): Dictionary with additional properties. Supported properties: https://spark.apache.org/docs/latest/configuration.html#spark-properties + """ + return self._register_infrastructure( + handler_type="bqsparkstoredprocedure", + kwargs={ + "name": name, + "kwargs": { + "dataset_id": dataset_id, + "func": func, + "runtime_version": runtime_version, + "container_image": container_image, + "spark_file": spark_file, + "additional_python_files": additional_python_files, + "additional_files": additional_files, + "properties": properties, + **kwargs, + }, + }, + ) + def errorhandler(self, error): def _register_error_handler(error_handler): self.error_handlers[error] = error_handler diff --git a/goblet/infrastructures/bq_spark_stored_procedure.py b/goblet/infrastructures/bq_spark_stored_procedure.py new file mode 100644 index 00000000..5e3bc447 --- /dev/null +++ b/goblet/infrastructures/bq_spark_stored_procedure.py @@ -0,0 +1,322 @@ +import logging +import os +import inspect + +from googleapiclient.errors import HttpError + +from goblet.infrastructures.infrastructure import Infrastructure +from goblet_gcp_client.client import get_default_project, get_default_location +from goblet.permissions import gcp_generic_resource_permissions +from google.cloud import storage +from google.api_core.exceptions import Conflict, NotFound + + +log = logging.getLogger("goblet.deployer") +log.setLevel(logging.getLevelName(os.getenv("GOBLET_LOG_LEVEL", "INFO"))) + +storage_client = storage.Client() + +class BigQuerySparkStoredProcedure(Infrastructure): + """ + Cloud Big Query Spark Stored procedures. + https://cloud.google.com/bigquery/docs/spark-procedures + + Limitations + https://cloud.google.com/bigquery/docs/spark-procedures#limitations + + """ + resource_type = "bqsparkstoredprocedure" + required_apis = ["bigquery", "bigqueryconnection"] + permissions = [ + "bigquery.jobs.create", + "bigquery.connections.delegate", + "bigquery.connections.use", + *gcp_generic_resource_permissions("storage", "objects"), + *gcp_generic_resource_permissions("storage", "buckets"), + *gcp_generic_resource_permissions("bigquery", "connections"), + *gcp_generic_resource_permissions("bigquery", "table"), + *gcp_generic_resource_permissions("bigquery", "routines"), + ] + + def register(self, name, kwargs): + """ + Register in handler resources + :param name: name of resource + :param kwargs: + :return: + """ + config = self.config.bqsparkstoredprocedure.copy() if self.config.bqsparkstoredprocedure else {} + # Routine names must contain only letters, numbers, and underscores, and be at most 256 characters long. + routine_name = config.get("name", name).replace("-", "_") + dataset_id = config.get("dataset_id", kwargs["dataset_id"]) + runtime_version = config.get("runtime_version", kwargs["runtime_version"]) + # Func cannot be loaded from config file as it is a function + func = kwargs.get("func") + spark_file = config.get("spark_file", kwargs["spark_file"]) + container_image = config.get("container_image", kwargs.get("container_image", None)) + additional_python_files = config.get("additional_python_files", kwargs.get("additional_python_files", [])) + additional_files = config.get("additional_files", kwargs.get("additional_files", [])) + properties = config.get("properties", kwargs.get("properties", {})) + + local_code = False + if func is not None: + func = self.stringify_func(func) + local_code = True + + self.connection_location = config.get("location", kwargs.get("location", get_default_location())) + self.resources[routine_name] = { + "routine_name": routine_name, + "dataset_id": dataset_id, + "func": func, + "location": self.connection_location, + "runtime_version": runtime_version, + "spark_file": spark_file, + "local_code": local_code, + "container_image": container_image, + "additional_python_files": additional_python_files, + "additional_files": additional_files, + "properties": properties, + } + return True + + def _deploy(self): + if not self.resources: + return + log.info("Deploying bigquery remote functions") + + try: + self.deploy_bigquery_connection( + self.name, self.connection_location + ) + except HttpError as exception: + if exception.resp.status == 409: + log.info( + "Connection already created bigquery query: for %s", self.name + ) + else: + log.error("Create connection %s", exception.error_details) + raise exception + + for _, resource in self.resources.items(): + if not resource["local_code"]: + self.deploy_bucket(self.name) + resource["spark_file"] = self.upload_file(resource["spark_file"], self.name) + if resource["additional_python_files"]: + for i in range(len(resource["additional_python_files"])): + resource["additional_python_files"][i] = self.upload_file(resource["additional_python_files"][i], self.name) + if resource["additional_files"]: + for i in range(len(resource["additional_files"])): + resource["additional_files"][i] = self.upload_file(resource["additional_files"][i], self.name) + + create_routine_query = self.create_routine_payload(resource) + routine_name = resource["routine_name"] + try: + self.versioned_clients.bigquery_routines.execute( + "insert", + params={ + "body": create_routine_query, + "projectId": get_default_project(), + "datasetId": resource["dataset_id"], + }, + parent_key="projectId", + ) + log.info("Created bq routine %s", routine_name) + except HttpError as exception: + if exception.resp.status == 409: + self.versioned_clients.bigquery_routines.execute( + "update", + params={ + "body": create_routine_query, + "projectId": get_default_project(), + "datasetId": resource["dataset_id"], + "routineId": routine_name, + }, + parent_key="projectId", + ) + log.info("Updated Spark Stored Procedure %s", routine_name) + else: + log.error( + "Bigquery Spark Stored Procedure couldn't be created " + "nor updated name %s with error: %s", + routine_name, + exception.error_details, + ) + raise exception + + def destroy(self): + """ + Destroy connection then destroy one by one every routine + :return: + """ + if not self.resources: + return + self.destroy_bigquery_connection() + for _, resource in self.resources.items(): + self.destroy_routine(resource["dataset_id"], resource["routine_name"]) + self.destroy_bucket(self.name) + + def deploy_bigquery_connection(self, connection_name, location): + """ + Creates (or get if exists) a connection resource with Handler.name + :param connection_name: name for the connection + :return: + """ + connection_id = f"{self.name}" + resource_type = {"spark": {}} + try: + bq_connection = self.versioned_clients.bigquery_connections.execute( + "create", + params={"body": resource_type, "connectionId": connection_id}, + parent_schema=f"projects/{get_default_project()}/locations/{location}", + ) + log.info(f"Created bigquery connection name: {connection_id}") + + except HttpError as exception: + if exception.resp.status == 409: + log.info( + f"Bigquery connection already exist with name: {connection_name} for {self.name} and location {location}" + ) + client = self.versioned_clients.bigquery_connections + bq_connection = client.execute( + "get", + params={"name": client.parent + "/connections/" + connection_id}, + parent=False, + ) + log.info(f"Returning connection {bq_connection['name']}") + else: + log.error(exception.error_details) + raise exception + return bq_connection + + def destroy_bigquery_connection(self): + """ + Destroy bigquery connection, if already exist do nothing + :return: + """ + client = self.versioned_clients.bigquery_connections + try: + client.execute( + "delete", + params={ + "name": f"projects/{get_default_project()}/locations/{self.connection_location}" + + "/connections/" + + self.name + }, + parent=False, + ) + except HttpError as exception: + if exception.resp.status == 404: + log.info(f"Connection {self.name} already destroyed") + else: + raise exception + return True + + def destroy_routine(self, dataset_id, routine_id): + """ + :param dataset_id: + :param routine_id: + """ + try: + self.versioned_clients.bigquery_routines.execute( + "delete", + params={ + "projectId": get_default_project(), + "datasetId": dataset_id, + "routineId": routine_id, + }, + parent=False, + ) + log.info(f"Destroyed routine {routine_id} for dataset {dataset_id}") + except HttpError as exception: + if exception.resp.status == 409: + log.info(f"Routine {routine_id} already destroyed") + elif exception.resp.status == 404: + log.info(f"Routine {routine_id} doesn't exist. already destroyed?") + else: + log.error( + f"Couldn't destroy {routine_id} for dataset {dataset_id}. {exception.error_details}" + ) + raise exception + + def create_routine_payload(self, resource): + """ + Create a routine object according to BigQuery specification + :param resource: a resource saved in resources in Handler + :return: a dict representing a routine according to + https://cloud.google.com/bigquery/docs/reference/rest/v2/routines + """ + spark_options = { + "connection": f"projects/{get_default_project()}/locations/{resource['location']}/connections/{self.name}", + "runtimeVersion": resource["runtime_version"], + "containerImage": resource["container_image"], + "properties": resource["properties"], + } + routine_reference = { + "projectId": get_default_project(), + "datasetId": resource["dataset_id"], + "routineId": resource["routine_name"], + } + language = "PYTHON" + + query_request = { + "language": language, + "routineReference": routine_reference, + "routineType": "PROCEDURE", + "sparkOptions": spark_options, + } + + if resource["local_code"]: + query_request["definitionBody"] = resource["func"] + else: + spark_options["mainFileUri"]= resource["spark_file"] + query_request["sparkOptions"] = spark_options + + if resource["additional_python_files"]: + spark_options["pyFileUris"] = resource["additional_python_files"] + query_request["sparkOptions"] = spark_options + + if resource["additional_files"]: + spark_options["archiveUris"] = resource["additional_files"] + query_request["sparkOptions"] = spark_options + + log.debug("Routine payload %s", query_request) + + return query_request + + def deploy_bucket(self, bucket_name): + try: + log.info(f"creating storage bucket {bucket_name}") + storage_client.create_bucket( + bucket_name, + project=get_default_project(), + location=get_default_location(), + ) + log.info(f"bucket {bucket_name} created") + except Conflict: + log.info(f"storage bucket {bucket_name} already exists") + + def upload_file(self, file, bucket_name): + bucket = storage_client.bucket(bucket_name) + destination_blob_name = file.split("/")[-1] + blob = bucket.blob(destination_blob_name) + blob.upload_from_filename(file) + log.info(f"uploaded file {file} to bucket {bucket_name}") + log.debug(f"gs://{bucket_name}/{destination_blob_name}") + return f"gs://{bucket_name}/{destination_blob_name}" + + def destroy_bucket(self, bucket_name): + try: + bucket = storage_client.get_bucket( + bucket_name, + ) + bucket.delete(force=True) + log.info(f"bucket {bucket_name} deleted") + except NotFound: + log.info(f"bucket {bucket_name} already deleted") + + @staticmethod + def stringify_func(func): + lines, _ = inspect.getsourcelines(func) + if lines[0].startswith("def"): + lines.pop(0) + return "".join(map(str.lstrip, lines)) \ No newline at end of file diff --git a/goblet/resource_manager.py b/goblet/resource_manager.py index 2f92a2bc..f697b428 100644 --- a/goblet/resource_manager.py +++ b/goblet/resource_manager.py @@ -25,6 +25,7 @@ from goblet.infrastructures.apigateway import ApiGateway from goblet.infrastructures.cloudtask import CloudTaskQueue from goblet.infrastructures.pubsub import PubSubTopic +from goblet.infrastructures.bq_spark_stored_procedure import BigQuerySparkStoredProcedure from goblet.response import default_missing_route @@ -61,6 +62,7 @@ "vpcconnector": VPCConnector, "cloudtaskqueue": CloudTaskQueue, "pubsub_topic": PubSubTopic, + "bqsparkstoredprocedure": BigQuerySparkStoredProcedure, } @@ -109,6 +111,7 @@ def __init__( "alerts": Alerts(function_name, backend=backend), "apigateway": ApiGateway(function_name, backend=backend), "pubsub_topic": PubSubTopic(function_name, backend=backend), + "bqsparkstoredprocedure": BigQuerySparkStoredProcedure(function_name, backend=backend), } self.middleware_handlers = { diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-deploy/post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-deploy/post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json new file mode 100644 index 00000000..a42cb7ea --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-deploy/post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json @@ -0,0 +1,20 @@ +{ + "headers": {}, + "body": { + "etag": "Mg9Ctdrelv6gNGbkcZFsuQ==", + "routineReference": { + "projectId": "goblet", + "datasetId": "blogs", + "routineId": "test_spark_stored_procedure" + }, + "routineType": "PROCEDURE", + "creationTime": "1701375036581", + "lastModifiedTime": "1701375036581", + "language": "PYTHON", + "definitionBody": "def spark_handler():\npass\n", + "sparkOptions": { + "connection": "projects/goblet/locations/us/connections/bqsparkstoredprocedure_test", + "runtimeVersion": "1.1" + } + } +} \ No newline at end of file diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-deploy/post-v1-projects-goblet-locations-us-connections_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-deploy/post-v1-projects-goblet-locations-us-connections_1.json new file mode 100644 index 00000000..abf874e3 --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-deploy/post-v1-projects-goblet-locations-us-connections_1.json @@ -0,0 +1,11 @@ +{ + "headers": {}, + "body": { + "name": "projects/goblet/locations/us/connections/bqsparkstoredprocedure_test", + "creationTime": "1701375034518", + "lastModifiedTime": "1701375034518", + "spark": { + "serviceAccountId": "bqcx-98058317567-d1ht@gcp-sa-bigquery-consp.iam.gserviceaccount.com" + } + } +} \ No newline at end of file diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-destroy/delete-bigquery-v2-projects-goblet-datasets-blogs-routines-test_spark_stored_procedure_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-destroy/delete-bigquery-v2-projects-goblet-datasets-blogs-routines-test_spark_stored_procedure_1.json new file mode 100644 index 00000000..57238766 --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-destroy/delete-bigquery-v2-projects-goblet-datasets-blogs-routines-test_spark_stored_procedure_1.json @@ -0,0 +1,4 @@ +{ + "headers": {}, + "body": {} +} \ No newline at end of file diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-destroy/delete-v1-projects-goblet-locations-us-connections-bqsparkstoredprocedure_test_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-destroy/delete-v1-projects-goblet-locations-us-connections-bqsparkstoredprocedure_test_1.json new file mode 100644 index 00000000..57238766 --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-destroy/delete-v1-projects-goblet-locations-us-connections-bqsparkstoredprocedure_test_1.json @@ -0,0 +1,4 @@ +{ + "headers": {}, + "body": {} +} \ No newline at end of file diff --git a/goblet/tests/test_bqsparkstoredprocedure.py b/goblet/tests/test_bqsparkstoredprocedure.py new file mode 100644 index 00000000..7fca6d07 --- /dev/null +++ b/goblet/tests/test_bqsparkstoredprocedure.py @@ -0,0 +1,101 @@ +import json +from unittest.mock import Mock +from goblet import Goblet +from goblet_gcp_client import get_responses, get_response +from goblet.infrastructures.bq_spark_stored_procedure import BigQuerySparkStoredProcedure + +class TestBqSparkStoredProcedure: + def test_register_bqsparkstoredprocedure(self, monkeypatch): + app = Goblet(function_name="bqsparkstoredprocedure_test") + monkeypatch.setenv("GOOGLE_PROJECT", "goblet") + monkeypatch.setenv("GOOGLE_LOCATION", "us") + + test_dataset_id = "blogs" + + def spark_handler(): + pass + + app.bqsparkstoredprocedure( + name="test_spark_stored_procedure", + dataset_id=test_dataset_id, + func=spark_handler, + ) + + resources = app.infrastructure["bqsparkstoredprocedure"].resources["test_spark_stored_procedure"] + + expected_resources = { + "routine_name": "test_spark_stored_procedure", + "dataset_id": test_dataset_id, + "func": BigQuerySparkStoredProcedure.stringify_func(spark_handler), + "location": "us", + "runtime_version": "1.1", + "spark_file": None, + "local_code": True, + "container_image": None, + "additional_python_files": None, + "additional_files": None, + "properties": None, + } + + for key, value in resources.items(): + assert expected_resources.get(key) == value + + def test_deploy_bqsparkstoredprocedure(self, monkeypatch): + test_deploy_name = "bqsparkstoredprocedure-deploy" + monkeypatch.setenv("GOOGLE_PROJECT", "goblet") + monkeypatch.setenv("GOOGLE_LOCATION", "us") + monkeypatch.setenv("G_TEST_NAME", test_deploy_name) + monkeypatch.setenv("G_HTTP_TEST", "REPLAY") + + test_name = "bqsparkstoredprocedure_test" + procedure_name = "test_spark_stored_procedure" + app = Goblet(function_name=test_name) + test_dataset_id = "blogs" + + def spark_handler(): + pass + + app.bqsparkstoredprocedure( + name=procedure_name, + dataset_id=test_dataset_id, + func=spark_handler, + ) + + app.deploy(skip_backend=True) + responses = get_responses(test_deploy_name) + assert len(responses) > 0 + + connection_response = get_response(test_deploy_name, "post-v1-projects-goblet-locations-us-connections_1.json") + assert connection_response["body"]["name"] == f"projects/goblet/locations/us/connections/{test_name}" + assert "spark" in connection_response["body"] + + routine_response = get_response(test_deploy_name, "post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json") + assert routine_response["body"]["routineReference"]["routineId"] == procedure_name + assert routine_response["body"]["routineReference"]["datasetId"] == test_dataset_id + assert routine_response["body"]["sparkOptions"]["connection"] == connection_response["body"]["name"] + + + def test_destroy_bqsparkstoredprocedure(self, monkeypatch): + test_deploy_name = "bqsparkstoredprocedure-destroy" + monkeypatch.setenv("GOOGLE_PROJECT", "goblet") + monkeypatch.setenv("GOOGLE_LOCATION", "us") + monkeypatch.setenv("G_TEST_NAME", test_deploy_name) + monkeypatch.setenv("G_HTTP_TEST", "REPLAY") + + test_name = "bqsparkstoredprocedure_test" + app = Goblet(function_name=test_name) + test_dataset_id = "blogs" + + def spark_handler(): + pass + + app.bqsparkstoredprocedure( + name="test_spark_stored_procedure", + dataset_id=test_dataset_id, + func=spark_handler, + ) + + app.destroy(skip_backend=True) + responses = get_responses(test_deploy_name) + + assert len(responses) != 0 diff --git a/requirements.txt b/requirements.txt index 1dd1acfd..f44010f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,6 @@ pydantic==2.1.1 PyYAML==6.0.1 google-cloud-logging==3.6.0 google-cloud-appengine-logging==1.3.1 +google-cloud-storage==2.13.0 goblet-gcp-client==0.1.9 protobuf==4.24.0 From e8dd8f9f3eca8529fc04b0665865515a8623e481 Mon Sep 17 00:00:00 2001 From: Mauricio Martinez Date: Thu, 30 Nov 2023 14:33:40 -0600 Subject: [PATCH 02/10] Cleaning for PR --- goblet/client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/goblet/client.py b/goblet/client.py index 29fabb42..08671c80 100644 --- a/goblet/client.py +++ b/goblet/client.py @@ -24,7 +24,6 @@ "iam": "v1", "cloudresourcemanager": "v3", "artifactregistry": "v1", - "storage": "v1", } From 6579106f27b15382352793a7c733760168b8bcf4 Mon Sep 17 00:00:00 2001 From: Mauricio Martinez Date: Thu, 30 Nov 2023 14:34:51 -0600 Subject: [PATCH 03/10] Lint --- .../bq_spark_stored_procedure.py | 56 ++++++++++++------- goblet/resource_manager.py | 8 ++- goblet/tests/test_bqsparkstoredprocedure.py | 49 +++++++++++----- 3 files changed, 76 insertions(+), 37 deletions(-) diff --git a/goblet/infrastructures/bq_spark_stored_procedure.py b/goblet/infrastructures/bq_spark_stored_procedure.py index 5e3bc447..def28045 100644 --- a/goblet/infrastructures/bq_spark_stored_procedure.py +++ b/goblet/infrastructures/bq_spark_stored_procedure.py @@ -16,6 +16,7 @@ storage_client = storage.Client() + class BigQuerySparkStoredProcedure(Infrastructure): """ Cloud Big Query Spark Stored procedures. @@ -25,6 +26,7 @@ class BigQuerySparkStoredProcedure(Infrastructure): https://cloud.google.com/bigquery/docs/spark-procedures#limitations """ + resource_type = "bqsparkstoredprocedure" required_apis = ["bigquery", "bigqueryconnection"] permissions = [ @@ -45,7 +47,11 @@ def register(self, name, kwargs): :param kwargs: :return: """ - config = self.config.bqsparkstoredprocedure.copy() if self.config.bqsparkstoredprocedure else {} + config = ( + self.config.bqsparkstoredprocedure.copy() + if self.config.bqsparkstoredprocedure + else {} + ) # Routine names must contain only letters, numbers, and underscores, and be at most 256 characters long. routine_name = config.get("name", name).replace("-", "_") dataset_id = config.get("dataset_id", kwargs["dataset_id"]) @@ -53,9 +59,15 @@ def register(self, name, kwargs): # Func cannot be loaded from config file as it is a function func = kwargs.get("func") spark_file = config.get("spark_file", kwargs["spark_file"]) - container_image = config.get("container_image", kwargs.get("container_image", None)) - additional_python_files = config.get("additional_python_files", kwargs.get("additional_python_files", [])) - additional_files = config.get("additional_files", kwargs.get("additional_files", [])) + container_image = config.get( + "container_image", kwargs.get("container_image", None) + ) + additional_python_files = config.get( + "additional_python_files", kwargs.get("additional_python_files", []) + ) + additional_files = config.get( + "additional_files", kwargs.get("additional_files", []) + ) properties = config.get("properties", kwargs.get("properties", {})) local_code = False @@ -63,7 +75,9 @@ def register(self, name, kwargs): func = self.stringify_func(func) local_code = True - self.connection_location = config.get("location", kwargs.get("location", get_default_location())) + self.connection_location = config.get( + "location", kwargs.get("location", get_default_location()) + ) self.resources[routine_name] = { "routine_name": routine_name, "dataset_id": dataset_id, @@ -85,14 +99,10 @@ def _deploy(self): log.info("Deploying bigquery remote functions") try: - self.deploy_bigquery_connection( - self.name, self.connection_location - ) + self.deploy_bigquery_connection(self.name, self.connection_location) except HttpError as exception: if exception.resp.status == 409: - log.info( - "Connection already created bigquery query: for %s", self.name - ) + log.info("Connection already created bigquery query: for %s", self.name) else: log.error("Create connection %s", exception.error_details) raise exception @@ -100,13 +110,19 @@ def _deploy(self): for _, resource in self.resources.items(): if not resource["local_code"]: self.deploy_bucket(self.name) - resource["spark_file"] = self.upload_file(resource["spark_file"], self.name) + resource["spark_file"] = self.upload_file( + resource["spark_file"], self.name + ) if resource["additional_python_files"]: for i in range(len(resource["additional_python_files"])): - resource["additional_python_files"][i] = self.upload_file(resource["additional_python_files"][i], self.name) + resource["additional_python_files"][i] = self.upload_file( + resource["additional_python_files"][i], self.name + ) if resource["additional_files"]: for i in range(len(resource["additional_files"])): - resource["additional_files"][i] = self.upload_file(resource["additional_files"][i], self.name) + resource["additional_files"][i] = self.upload_file( + resource["additional_files"][i], self.name + ) create_routine_query = self.create_routine_payload(resource) routine_name = resource["routine_name"] @@ -268,13 +284,13 @@ def create_routine_payload(self, resource): if resource["local_code"]: query_request["definitionBody"] = resource["func"] else: - spark_options["mainFileUri"]= resource["spark_file"] + spark_options["mainFileUri"] = resource["spark_file"] query_request["sparkOptions"] = spark_options if resource["additional_python_files"]: spark_options["pyFileUris"] = resource["additional_python_files"] query_request["sparkOptions"] = spark_options - + if resource["additional_files"]: spark_options["archiveUris"] = resource["additional_files"] query_request["sparkOptions"] = spark_options @@ -283,7 +299,7 @@ def create_routine_payload(self, resource): return query_request - def deploy_bucket(self, bucket_name): + def deploy_bucket(self, bucket_name): try: log.info(f"creating storage bucket {bucket_name}") storage_client.create_bucket( @@ -294,7 +310,7 @@ def deploy_bucket(self, bucket_name): log.info(f"bucket {bucket_name} created") except Conflict: log.info(f"storage bucket {bucket_name} already exists") - + def upload_file(self, file, bucket_name): bucket = storage_client.bucket(bucket_name) destination_blob_name = file.split("/")[-1] @@ -313,10 +329,10 @@ def destroy_bucket(self, bucket_name): log.info(f"bucket {bucket_name} deleted") except NotFound: log.info(f"bucket {bucket_name} already deleted") - + @staticmethod def stringify_func(func): lines, _ = inspect.getsourcelines(func) if lines[0].startswith("def"): lines.pop(0) - return "".join(map(str.lstrip, lines)) \ No newline at end of file + return "".join(map(str.lstrip, lines)) diff --git a/goblet/resource_manager.py b/goblet/resource_manager.py index f697b428..a1b03f65 100644 --- a/goblet/resource_manager.py +++ b/goblet/resource_manager.py @@ -25,7 +25,9 @@ from goblet.infrastructures.apigateway import ApiGateway from goblet.infrastructures.cloudtask import CloudTaskQueue from goblet.infrastructures.pubsub import PubSubTopic -from goblet.infrastructures.bq_spark_stored_procedure import BigQuerySparkStoredProcedure +from goblet.infrastructures.bq_spark_stored_procedure import ( + BigQuerySparkStoredProcedure, +) from goblet.response import default_missing_route @@ -111,7 +113,9 @@ def __init__( "alerts": Alerts(function_name, backend=backend), "apigateway": ApiGateway(function_name, backend=backend), "pubsub_topic": PubSubTopic(function_name, backend=backend), - "bqsparkstoredprocedure": BigQuerySparkStoredProcedure(function_name, backend=backend), + "bqsparkstoredprocedure": BigQuerySparkStoredProcedure( + function_name, backend=backend + ), } self.middleware_handlers = { diff --git a/goblet/tests/test_bqsparkstoredprocedure.py b/goblet/tests/test_bqsparkstoredprocedure.py index 7fca6d07..34783c6e 100644 --- a/goblet/tests/test_bqsparkstoredprocedure.py +++ b/goblet/tests/test_bqsparkstoredprocedure.py @@ -2,7 +2,10 @@ from unittest.mock import Mock from goblet import Goblet from goblet_gcp_client import get_responses, get_response -from goblet.infrastructures.bq_spark_stored_procedure import BigQuerySparkStoredProcedure +from goblet.infrastructures.bq_spark_stored_procedure import ( + BigQuerySparkStoredProcedure, +) + class TestBqSparkStoredProcedure: def test_register_bqsparkstoredprocedure(self, monkeypatch): @@ -21,7 +24,9 @@ def spark_handler(): func=spark_handler, ) - resources = app.infrastructure["bqsparkstoredprocedure"].resources["test_spark_stored_procedure"] + resources = app.infrastructure["bqsparkstoredprocedure"].resources[ + "test_spark_stored_procedure" + ] expected_resources = { "routine_name": "test_spark_stored_procedure", @@ -35,8 +40,8 @@ def spark_handler(): "additional_python_files": None, "additional_files": None, "properties": None, - } - + } + for key, value in resources.items(): assert expected_resources.get(key) == value @@ -51,10 +56,10 @@ def test_deploy_bqsparkstoredprocedure(self, monkeypatch): procedure_name = "test_spark_stored_procedure" app = Goblet(function_name=test_name) test_dataset_id = "blogs" - + def spark_handler(): pass - + app.bqsparkstoredprocedure( name=procedure_name, dataset_id=test_dataset_id, @@ -65,15 +70,29 @@ def spark_handler(): responses = get_responses(test_deploy_name) assert len(responses) > 0 - connection_response = get_response(test_deploy_name, "post-v1-projects-goblet-locations-us-connections_1.json") - assert connection_response["body"]["name"] == f"projects/goblet/locations/us/connections/{test_name}" + connection_response = get_response( + test_deploy_name, "post-v1-projects-goblet-locations-us-connections_1.json" + ) + assert ( + connection_response["body"]["name"] + == f"projects/goblet/locations/us/connections/{test_name}" + ) assert "spark" in connection_response["body"] - routine_response = get_response(test_deploy_name, "post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json") - assert routine_response["body"]["routineReference"]["routineId"] == procedure_name - assert routine_response["body"]["routineReference"]["datasetId"] == test_dataset_id - assert routine_response["body"]["sparkOptions"]["connection"] == connection_response["body"]["name"] - + routine_response = get_response( + test_deploy_name, + "post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json", + ) + assert ( + routine_response["body"]["routineReference"]["routineId"] == procedure_name + ) + assert ( + routine_response["body"]["routineReference"]["datasetId"] == test_dataset_id + ) + assert ( + routine_response["body"]["sparkOptions"]["connection"] + == connection_response["body"]["name"] + ) def test_destroy_bqsparkstoredprocedure(self, monkeypatch): test_deploy_name = "bqsparkstoredprocedure-destroy" @@ -85,10 +104,10 @@ def test_destroy_bqsparkstoredprocedure(self, monkeypatch): test_name = "bqsparkstoredprocedure_test" app = Goblet(function_name=test_name) test_dataset_id = "blogs" - + def spark_handler(): pass - + app.bqsparkstoredprocedure( name="test_spark_stored_procedure", dataset_id=test_dataset_id, From 6b40636a60250a07885076ecc127f48b37d5648a Mon Sep 17 00:00:00 2001 From: Mauricio Martinez Date: Thu, 30 Nov 2023 14:35:33 -0600 Subject: [PATCH 04/10] Lint --- goblet/tests/test_bqsparkstoredprocedure.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/goblet/tests/test_bqsparkstoredprocedure.py b/goblet/tests/test_bqsparkstoredprocedure.py index 34783c6e..c57eef4a 100644 --- a/goblet/tests/test_bqsparkstoredprocedure.py +++ b/goblet/tests/test_bqsparkstoredprocedure.py @@ -1,5 +1,3 @@ -import json -from unittest.mock import Mock from goblet import Goblet from goblet_gcp_client import get_responses, get_response from goblet.infrastructures.bq_spark_stored_procedure import ( From 47d7ff0ffe007b63f828dd07592c78a36586174e Mon Sep 17 00:00:00 2001 From: Mauricio Martinez Date: Thu, 30 Nov 2023 14:36:58 -0600 Subject: [PATCH 05/10] Lint --- goblet/decorators.py | 1 - 1 file changed, 1 deletion(-) diff --git a/goblet/decorators.py b/goblet/decorators.py index a0108a71..3ab58c0b 100644 --- a/goblet/decorators.py +++ b/goblet/decorators.py @@ -5,7 +5,6 @@ import logging from goblet_gcp_client.client import get_default_location, get_default_project -from goblet.client import VersionedClients from goblet.backends.cloudfunctionv1 import CloudFunctionV1 from goblet.backends.cloudfunctionv2 import CloudFunctionV2 From 61b7217da02d76681ef3d028c70cfee2a1006c13 Mon Sep 17 00:00:00 2001 From: Mauricio Martinez Date: Fri, 1 Dec 2023 10:50:58 -0600 Subject: [PATCH 06/10] Triggering actions From 85e059a69710697b4b678aa41e286cb1b378d4e3 Mon Sep 17 00:00:00 2001 From: Mauricio Martinez Date: Fri, 1 Dec 2023 13:05:29 -0600 Subject: [PATCH 07/10] Adding storage client with goblet_gcp_client --- goblet/client.py | 17 ++++ .../bq_spark_stored_procedure.py | 78 +++++++++++------ ...outines-test_spark_stored_procedure_1.json | 4 + ...dprocedure-remote-deploy-o-spark.py_1.json | 4 + ...qsparkstoredprocedure-remote-deploy_1.json | 4 + ...qsparkstoredprocedure-remote-deploy_1.json | 4 + ...parkstoredprocedure-remote-deploy-o_1.json | 27 ++++++ ...ects-goblet-datasets-blogs-routines_1.json | 20 +++++ .../post-storage-v1-b_1.json | 29 +++++++ ...parkstoredprocedure-remote-deploy-o_1.json | 22 +++++ ...cts-goblet-locations-us-connections_1.json | 11 +++ goblet/tests/test_bqsparkstoredprocedure.py | 84 ++++++++++++++++++- requirements.txt | 1 - 13 files changed, 277 insertions(+), 28 deletions(-) create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-bigquery-v2-projects-goblet-datasets-blogs-routines-test_spark_stored_procedure_1.json create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o-spark.py_1.json create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-storage-v1-b-bqsparkstoredprocedure-remote-deploy_1.json create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-v1-projects-goblet-locations-us-connections-bqsparkstoredprocedure-remote-deploy_1.json create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/get-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o_1.json create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-storage-v1-b_1.json create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-upload-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o_1.json create mode 100644 goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-v1-projects-goblet-locations-us-connections_1.json diff --git a/goblet/client.py b/goblet/client.py index 08671c80..989f7fd1 100644 --- a/goblet/client.py +++ b/goblet/client.py @@ -24,6 +24,7 @@ "iam": "v1", "cloudresourcemanager": "v3", "artifactregistry": "v1", + "storage": "v1", } @@ -284,3 +285,19 @@ def artifactregistry_repositories(self): calls="projects.locations.repositories", parent_schema="projects/{project_id}/locations/{location_id}", ) + + @property + def storage_buckets(self): + return Client( + "storage", + self.client_versions.get("storage", "v1"), + calls="buckets", + ) + + @property + def storage_objects(self): + return Client( + "storage", + self.client_versions.get("storage", "v1"), + calls="objects", + ) diff --git a/goblet/infrastructures/bq_spark_stored_procedure.py b/goblet/infrastructures/bq_spark_stored_procedure.py index def28045..d0c4fdc7 100644 --- a/goblet/infrastructures/bq_spark_stored_procedure.py +++ b/goblet/infrastructures/bq_spark_stored_procedure.py @@ -3,20 +3,15 @@ import inspect from googleapiclient.errors import HttpError - from goblet.infrastructures.infrastructure import Infrastructure -from goblet_gcp_client.client import get_default_project, get_default_location +from goblet_gcp_client.client import get_default_project, get_default_location, get_credentials from goblet.permissions import gcp_generic_resource_permissions -from google.cloud import storage -from google.api_core.exceptions import Conflict, NotFound +from goblet.client import VersionedClients log = logging.getLogger("goblet.deployer") log.setLevel(logging.getLevelName(os.getenv("GOBLET_LOG_LEVEL", "INFO"))) -storage_client = storage.Client() - - class BigQuerySparkStoredProcedure(Infrastructure): """ Cloud Big Query Spark Stored procedures. @@ -169,7 +164,8 @@ def destroy(self): self.destroy_bigquery_connection() for _, resource in self.resources.items(): self.destroy_routine(resource["dataset_id"], resource["routine_name"]) - self.destroy_bucket(self.name) + if not resource["local_code"]: + self.destroy_bucket(self.name) def deploy_bigquery_connection(self, connection_name, location): """ @@ -302,33 +298,65 @@ def create_routine_payload(self, resource): def deploy_bucket(self, bucket_name): try: log.info(f"creating storage bucket {bucket_name}") - storage_client.create_bucket( - bucket_name, - project=get_default_project(), - location=get_default_location(), + VersionedClients().storage_buckets.execute( + "insert", + params={ + "project": get_default_project(), + "body": { + "name": bucket_name, + }, + }, ) log.info(f"bucket {bucket_name} created") - except Conflict: - log.info(f"storage bucket {bucket_name} already exists") + except HttpError as e: + if e.resp.status == 409: + log.info(f"storage bucket {bucket_name} already exists") def upload_file(self, file, bucket_name): - bucket = storage_client.bucket(bucket_name) - destination_blob_name = file.split("/")[-1] - blob = bucket.blob(destination_blob_name) - blob.upload_from_filename(file) + log.debug(f"gs://{bucket_name}/{file}") + VersionedClients().storage_objects.execute( + "insert", + params={ + "bucket": bucket_name, + "uploadType": "media", + "media_body": file, + "body": { + "name": file, + }, + }, + ) log.info(f"uploaded file {file} to bucket {bucket_name}") - log.debug(f"gs://{bucket_name}/{destination_blob_name}") - return f"gs://{bucket_name}/{destination_blob_name}" + return f"gs://{bucket_name}/{file}" def destroy_bucket(self, bucket_name): + # Empty bucket + log.info(f"emptying storage bucket {bucket_name}") + objects = VersionedClients().storage_objects.execute( + "list", + params={ + "bucket": bucket_name, + }, + )["items"] + for obj in objects: + VersionedClients().storage_objects.execute( + "delete", + params={ + "bucket": bucket_name, + "object": obj["name"], + }, + ) try: - bucket = storage_client.get_bucket( - bucket_name, + log.info(f"deleting storage bucket {bucket_name}") + VersionedClients().storage_buckets.execute( + "delete", + params={ + "bucket": bucket_name, + }, ) - bucket.delete(force=True) log.info(f"bucket {bucket_name} deleted") - except NotFound: - log.info(f"bucket {bucket_name} already deleted") + except HttpError as e: + if e.resp.status == 404: + log.info(f"storage bucket {bucket_name} already deleted") @staticmethod def stringify_func(func): diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-bigquery-v2-projects-goblet-datasets-blogs-routines-test_spark_stored_procedure_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-bigquery-v2-projects-goblet-datasets-blogs-routines-test_spark_stored_procedure_1.json new file mode 100644 index 00000000..57238766 --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-bigquery-v2-projects-goblet-datasets-blogs-routines-test_spark_stored_procedure_1.json @@ -0,0 +1,4 @@ +{ + "headers": {}, + "body": {} +} \ No newline at end of file diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o-spark.py_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o-spark.py_1.json new file mode 100644 index 00000000..57238766 --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o-spark.py_1.json @@ -0,0 +1,4 @@ +{ + "headers": {}, + "body": {} +} \ No newline at end of file diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-storage-v1-b-bqsparkstoredprocedure-remote-deploy_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-storage-v1-b-bqsparkstoredprocedure-remote-deploy_1.json new file mode 100644 index 00000000..57238766 --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-storage-v1-b-bqsparkstoredprocedure-remote-deploy_1.json @@ -0,0 +1,4 @@ +{ + "headers": {}, + "body": {} +} \ No newline at end of file diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-v1-projects-goblet-locations-us-connections-bqsparkstoredprocedure-remote-deploy_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-v1-projects-goblet-locations-us-connections-bqsparkstoredprocedure-remote-deploy_1.json new file mode 100644 index 00000000..57238766 --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/delete-v1-projects-goblet-locations-us-connections-bqsparkstoredprocedure-remote-deploy_1.json @@ -0,0 +1,4 @@ +{ + "headers": {}, + "body": {} +} \ No newline at end of file diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/get-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/get-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o_1.json new file mode 100644 index 00000000..d07fd4ad --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy-destroy/get-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o_1.json @@ -0,0 +1,27 @@ +{ + "headers": {}, + "body": { + "kind": "storage#objects", + "items": [ + { + "kind": "storage#object", + "id": "bqsparkstoredprocedure-remote-deploy/spark.py/1701456256852863", + "selfLink": "https://www.googleapis.com/storage/v1/b/bqsparkstoredprocedure-remote-deploy/o/spark.py", + "mediaLink": "https://storage.googleapis.com/download/storage/v1/b/bqsparkstoredprocedure-remote-deploy/o/spark.py?generation=1701456256852863&alt=media", + "name": "spark.py", + "bucket": "bqsparkstoredprocedure-remote-deploy", + "generation": "1701456256852863", + "metageneration": "1", + "contentType": "text/x-python", + "storageClass": "STANDARD", + "size": "26", + "md5Hash": "uPtlCcB2DsVh35EJS1MnOg==", + "crc32c": "tvHweQ==", + "etag": "CP/Gqb7y7oIDEAE=", + "timeCreated": "2023-12-01T18:44:16.878Z", + "updated": "2023-12-01T18:44:16.878Z", + "timeStorageClassUpdated": "2023-12-01T18:44:16.878Z" + } + ] + } +} \ No newline at end of file diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json new file mode 100644 index 00000000..84ba0806 --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json @@ -0,0 +1,20 @@ +{ + "headers": {}, + "body": { + "etag": "G3aGhxZhhdd0NxWe7/aYag==", + "routineReference": { + "projectId": "goblet", + "datasetId": "blogs", + "routineId": "test_spark_stored_procedure" + }, + "routineType": "PROCEDURE", + "creationTime": "1701456259115", + "lastModifiedTime": "1701456259115", + "language": "PYTHON", + "sparkOptions": { + "connection": "projects/goblet/locations/us/connections/bqsparkstoredprocedure-remote-deploy", + "runtimeVersion": "1.1", + "mainFileUri": "gs://bqsparkstoredprocedure-remote-deploy/spark.py" + } + } +} \ No newline at end of file diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-storage-v1-b_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-storage-v1-b_1.json new file mode 100644 index 00000000..0e3998b7 --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-storage-v1-b_1.json @@ -0,0 +1,29 @@ +{ + "headers": {}, + "body": { + "kind": "storage#bucket", + "selfLink": "https://www.googleapis.com/storage/v1/b/bqsparkstoredprocedure-remote-deploy", + "id": "bqsparkstoredprocedure-remote-deploy", + "name": "bqsparkstoredprocedure-remote-deploy", + "projectNumber": "98058317567", + "metageneration": "1", + "location": "US", + "storageClass": "STANDARD", + "etag": "CAE=", + "timeCreated": "2023-12-01T18:44:15.380Z", + "updated": "2023-12-01T18:44:15.380Z", + "iamConfiguration": { + "bucketPolicyOnly": { + "enabled": true, + "lockedTime": "2024-02-29T18:44:15.380Z" + }, + "uniformBucketLevelAccess": { + "enabled": true, + "lockedTime": "2024-02-29T18:44:15.380Z" + }, + "publicAccessPrevention": "inherited" + }, + "locationType": "multi-region", + "rpo": "DEFAULT" + } +} \ No newline at end of file diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-upload-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-upload-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o_1.json new file mode 100644 index 00000000..a44e0ca7 --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-upload-storage-v1-b-bqsparkstoredprocedure-remote-deploy-o_1.json @@ -0,0 +1,22 @@ +{ + "headers": {}, + "body": { + "kind": "storage#object", + "id": "bqsparkstoredprocedure-remote-deploy/spark.py/1701456256852863", + "selfLink": "https://www.googleapis.com/storage/v1/b/bqsparkstoredprocedure-remote-deploy/o/spark.py", + "mediaLink": "https://storage.googleapis.com/download/storage/v1/b/bqsparkstoredprocedure-remote-deploy/o/spark.py?generation=1701456256852863&alt=media", + "name": "spark.py", + "bucket": "bqsparkstoredprocedure-remote-deploy", + "generation": "1701456256852863", + "metageneration": "1", + "contentType": "text/x-python", + "storageClass": "STANDARD", + "size": "26", + "md5Hash": "uPtlCcB2DsVh35EJS1MnOg==", + "crc32c": "tvHweQ==", + "etag": "CP/Gqb7y7oIDEAE=", + "timeCreated": "2023-12-01T18:44:16.878Z", + "updated": "2023-12-01T18:44:16.878Z", + "timeStorageClassUpdated": "2023-12-01T18:44:16.878Z" + } +} \ No newline at end of file diff --git a/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-v1-projects-goblet-locations-us-connections_1.json b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-v1-projects-goblet-locations-us-connections_1.json new file mode 100644 index 00000000..783d1a59 --- /dev/null +++ b/goblet/tests/data/http/bqsparkstoredprocedure-remote-deploy/post-v1-projects-goblet-locations-us-connections_1.json @@ -0,0 +1,11 @@ +{ + "headers": {}, + "body": { + "name": "projects/goblet/locations/us/connections/bqsparkstoredprocedure-remote-deploy", + "creationTime": "1701456253237", + "lastModifiedTime": "1701456253237", + "spark": { + "serviceAccountId": "bqcx-98058317567-w3og@gcp-sa-bigquery-consp.iam.gserviceaccount.com" + } + } +} \ No newline at end of file diff --git a/goblet/tests/test_bqsparkstoredprocedure.py b/goblet/tests/test_bqsparkstoredprocedure.py index c57eef4a..f26aac4c 100644 --- a/goblet/tests/test_bqsparkstoredprocedure.py +++ b/goblet/tests/test_bqsparkstoredprocedure.py @@ -1,10 +1,10 @@ +import os from goblet import Goblet from goblet_gcp_client import get_responses, get_response from goblet.infrastructures.bq_spark_stored_procedure import ( BigQuerySparkStoredProcedure, ) - class TestBqSparkStoredProcedure: def test_register_bqsparkstoredprocedure(self, monkeypatch): app = Goblet(function_name="bqsparkstoredprocedure_test") @@ -42,7 +42,7 @@ def spark_handler(): for key, value in resources.items(): assert expected_resources.get(key) == value - + def test_deploy_bqsparkstoredprocedure(self, monkeypatch): test_deploy_name = "bqsparkstoredprocedure-deploy" monkeypatch.setenv("GOOGLE_PROJECT", "goblet") @@ -116,3 +116,83 @@ def spark_handler(): responses = get_responses(test_deploy_name) assert len(responses) != 0 + + def test_deploy_bqsparkstoredprocedure_remote_code(self, monkeypatch): + test_name = "bqsparkstoredprocedure-remote-deploy" + monkeypatch.setenv("GOOGLE_PROJECT", "goblet") + monkeypatch.setenv("GOOGLE_LOCATION", "us") + monkeypatch.setenv("G_TEST_NAME", test_name) + monkeypatch.setenv("G_HTTP_TEST", "REPLAY") + + procedure_name = "test_spark_stored_procedure" + app = Goblet(function_name=test_name) + test_dataset_id = "blogs" + + with open("spark.py", "w") as f: + f.write( + """ + def main(): + print("Hello World!") + """ + ) + app.bqsparkstoredprocedure( + name=procedure_name, + dataset_id=test_dataset_id, + spark_file="spark.py", + ) + + app.deploy(skip_backend=True) + responses = get_responses(test_name) + assert len(responses) > 0 + + connection_response = get_response( + test_name, "post-v1-projects-goblet-locations-us-connections_1.json" + ) + assert ( + connection_response["body"]["name"] + == f"projects/goblet/locations/us/connections/{test_name}" + ) + assert "spark" in connection_response["body"] + + routine_response = get_response( + test_name, + "post-bigquery-v2-projects-goblet-datasets-blogs-routines_1.json", + ) + assert ( + routine_response["body"]["routineReference"]["routineId"] == procedure_name + ) + assert ( + routine_response["body"]["routineReference"]["datasetId"] == test_dataset_id + ) + assert ( + routine_response["body"]["sparkOptions"]["connection"] + == connection_response["body"]["name"] + ) + assert ( + routine_response["body"]["sparkOptions"]["mainFileUri"] + == f"gs://{test_name}/spark.py" + ) + os.remove("spark.py") + + def test_destroy_bqsparkstoredprocedure_remote_code(self, monkeypatch): + test_deploy_name = "bqsparkstoredprocedure-remote-deploy-destroy" + monkeypatch.setenv("GOOGLE_PROJECT", "goblet") + monkeypatch.setenv("GOOGLE_LOCATION", "us") + monkeypatch.setenv("G_TEST_NAME", test_deploy_name) + monkeypatch.setenv("G_HTTP_TEST", "REPLAY") + + test_name = "bqsparkstoredprocedure-remote-deploy" + procedure_name = "test_spark_stored_procedure" + app = Goblet(function_name=test_name) + test_dataset_id = "blogs" + + app.bqsparkstoredprocedure( + name=procedure_name, + dataset_id=test_dataset_id, + spark_file="spark.py", + ) + + app.destroy(skip_backend=True) + responses = get_responses(test_deploy_name) + + assert len(responses) != 0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f44010f0..1dd1acfd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,5 @@ pydantic==2.1.1 PyYAML==6.0.1 google-cloud-logging==3.6.0 google-cloud-appengine-logging==1.3.1 -google-cloud-storage==2.13.0 goblet-gcp-client==0.1.9 protobuf==4.24.0 From f1de01a513edef12dcf6defdda349eec0f424dfd Mon Sep 17 00:00:00 2001 From: Mauricio Martinez Date: Fri, 1 Dec 2023 13:06:46 -0600 Subject: [PATCH 08/10] Lint --- goblet/client.py | 4 ++-- goblet/infrastructures/bq_spark_stored_procedure.py | 6 +++++- goblet/tests/test_bqsparkstoredprocedure.py | 11 ++++++----- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/goblet/client.py b/goblet/client.py index 989f7fd1..ceeb9584 100644 --- a/goblet/client.py +++ b/goblet/client.py @@ -285,7 +285,7 @@ def artifactregistry_repositories(self): calls="projects.locations.repositories", parent_schema="projects/{project_id}/locations/{location_id}", ) - + @property def storage_buckets(self): return Client( @@ -293,7 +293,7 @@ def storage_buckets(self): self.client_versions.get("storage", "v1"), calls="buckets", ) - + @property def storage_objects(self): return Client( diff --git a/goblet/infrastructures/bq_spark_stored_procedure.py b/goblet/infrastructures/bq_spark_stored_procedure.py index d0c4fdc7..a9ff9069 100644 --- a/goblet/infrastructures/bq_spark_stored_procedure.py +++ b/goblet/infrastructures/bq_spark_stored_procedure.py @@ -4,7 +4,10 @@ from googleapiclient.errors import HttpError from goblet.infrastructures.infrastructure import Infrastructure -from goblet_gcp_client.client import get_default_project, get_default_location, get_credentials +from goblet_gcp_client.client import ( + get_default_project, + get_default_location, +) from goblet.permissions import gcp_generic_resource_permissions from goblet.client import VersionedClients @@ -12,6 +15,7 @@ log = logging.getLogger("goblet.deployer") log.setLevel(logging.getLevelName(os.getenv("GOBLET_LOG_LEVEL", "INFO"))) + class BigQuerySparkStoredProcedure(Infrastructure): """ Cloud Big Query Spark Stored procedures. diff --git a/goblet/tests/test_bqsparkstoredprocedure.py b/goblet/tests/test_bqsparkstoredprocedure.py index f26aac4c..0f4a4b02 100644 --- a/goblet/tests/test_bqsparkstoredprocedure.py +++ b/goblet/tests/test_bqsparkstoredprocedure.py @@ -5,6 +5,7 @@ BigQuerySparkStoredProcedure, ) + class TestBqSparkStoredProcedure: def test_register_bqsparkstoredprocedure(self, monkeypatch): app = Goblet(function_name="bqsparkstoredprocedure_test") @@ -42,7 +43,7 @@ def spark_handler(): for key, value in resources.items(): assert expected_resources.get(key) == value - + def test_deploy_bqsparkstoredprocedure(self, monkeypatch): test_deploy_name = "bqsparkstoredprocedure-deploy" monkeypatch.setenv("GOOGLE_PROJECT", "goblet") @@ -116,7 +117,7 @@ def spark_handler(): responses = get_responses(test_deploy_name) assert len(responses) != 0 - + def test_deploy_bqsparkstoredprocedure_remote_code(self, monkeypatch): test_name = "bqsparkstoredprocedure-remote-deploy" monkeypatch.setenv("GOOGLE_PROJECT", "goblet") @@ -127,7 +128,7 @@ def test_deploy_bqsparkstoredprocedure_remote_code(self, monkeypatch): procedure_name = "test_spark_stored_procedure" app = Goblet(function_name=test_name) test_dataset_id = "blogs" - + with open("spark.py", "w") as f: f.write( """ @@ -173,7 +174,7 @@ def main(): == f"gs://{test_name}/spark.py" ) os.remove("spark.py") - + def test_destroy_bqsparkstoredprocedure_remote_code(self, monkeypatch): test_deploy_name = "bqsparkstoredprocedure-remote-deploy-destroy" monkeypatch.setenv("GOOGLE_PROJECT", "goblet") @@ -195,4 +196,4 @@ def test_destroy_bqsparkstoredprocedure_remote_code(self, monkeypatch): app.destroy(skip_backend=True) responses = get_responses(test_deploy_name) - assert len(responses) != 0 \ No newline at end of file + assert len(responses) != 0 From 6b52122de95cc2493b08e4e68656cccf3827c839 Mon Sep 17 00:00:00 2001 From: Mauricio Martinez Date: Fri, 1 Dec 2023 13:17:31 -0600 Subject: [PATCH 09/10] Add assert for replay_counts --- goblet/tests/test_bqsparkstoredprocedure.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/goblet/tests/test_bqsparkstoredprocedure.py b/goblet/tests/test_bqsparkstoredprocedure.py index 0f4a4b02..233f9de3 100644 --- a/goblet/tests/test_bqsparkstoredprocedure.py +++ b/goblet/tests/test_bqsparkstoredprocedure.py @@ -1,16 +1,21 @@ import os from goblet import Goblet -from goblet_gcp_client import get_responses, get_response +from goblet_gcp_client import ( + get_responses, + get_response, + reset_replay_count, + get_replay_count, +) from goblet.infrastructures.bq_spark_stored_procedure import ( BigQuerySparkStoredProcedure, ) - class TestBqSparkStoredProcedure: def test_register_bqsparkstoredprocedure(self, monkeypatch): app = Goblet(function_name="bqsparkstoredprocedure_test") monkeypatch.setenv("GOOGLE_PROJECT", "goblet") monkeypatch.setenv("GOOGLE_LOCATION", "us") + reset_replay_count() test_dataset_id = "blogs" @@ -43,6 +48,8 @@ def spark_handler(): for key, value in resources.items(): assert expected_resources.get(key) == value + + assert 0 == get_replay_count() def test_deploy_bqsparkstoredprocedure(self, monkeypatch): test_deploy_name = "bqsparkstoredprocedure-deploy" @@ -50,6 +57,7 @@ def test_deploy_bqsparkstoredprocedure(self, monkeypatch): monkeypatch.setenv("GOOGLE_LOCATION", "us") monkeypatch.setenv("G_TEST_NAME", test_deploy_name) monkeypatch.setenv("G_HTTP_TEST", "REPLAY") + reset_replay_count() test_name = "bqsparkstoredprocedure_test" procedure_name = "test_spark_stored_procedure" @@ -92,6 +100,7 @@ def spark_handler(): routine_response["body"]["sparkOptions"]["connection"] == connection_response["body"]["name"] ) + assert 3 == get_replay_count() def test_destroy_bqsparkstoredprocedure(self, monkeypatch): test_deploy_name = "bqsparkstoredprocedure-destroy" @@ -99,7 +108,8 @@ def test_destroy_bqsparkstoredprocedure(self, monkeypatch): monkeypatch.setenv("GOOGLE_LOCATION", "us") monkeypatch.setenv("G_TEST_NAME", test_deploy_name) monkeypatch.setenv("G_HTTP_TEST", "REPLAY") - + reset_replay_count() + test_name = "bqsparkstoredprocedure_test" app = Goblet(function_name=test_name) test_dataset_id = "blogs" @@ -117,6 +127,7 @@ def spark_handler(): responses = get_responses(test_deploy_name) assert len(responses) != 0 + assert 3 == get_replay_count() def test_deploy_bqsparkstoredprocedure_remote_code(self, monkeypatch): test_name = "bqsparkstoredprocedure-remote-deploy" @@ -124,6 +135,7 @@ def test_deploy_bqsparkstoredprocedure_remote_code(self, monkeypatch): monkeypatch.setenv("GOOGLE_LOCATION", "us") monkeypatch.setenv("G_TEST_NAME", test_name) monkeypatch.setenv("G_HTTP_TEST", "REPLAY") + reset_replay_count() procedure_name = "test_spark_stored_procedure" app = Goblet(function_name=test_name) @@ -173,6 +185,7 @@ def main(): routine_response["body"]["sparkOptions"]["mainFileUri"] == f"gs://{test_name}/spark.py" ) + assert 5 == get_replay_count() os.remove("spark.py") def test_destroy_bqsparkstoredprocedure_remote_code(self, monkeypatch): @@ -181,6 +194,7 @@ def test_destroy_bqsparkstoredprocedure_remote_code(self, monkeypatch): monkeypatch.setenv("GOOGLE_LOCATION", "us") monkeypatch.setenv("G_TEST_NAME", test_deploy_name) monkeypatch.setenv("G_HTTP_TEST", "REPLAY") + reset_replay_count() test_name = "bqsparkstoredprocedure-remote-deploy" procedure_name = "test_spark_stored_procedure" @@ -197,3 +211,4 @@ def test_destroy_bqsparkstoredprocedure_remote_code(self, monkeypatch): responses = get_responses(test_deploy_name) assert len(responses) != 0 + assert 6 == get_replay_count() From ffa0cea7927d12951e71dcc0f0070bc4ef086c0d Mon Sep 17 00:00:00 2001 From: Mauricio Martinez Date: Fri, 1 Dec 2023 13:19:26 -0600 Subject: [PATCH 10/10] Lint --- goblet/tests/test_bqsparkstoredprocedure.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/goblet/tests/test_bqsparkstoredprocedure.py b/goblet/tests/test_bqsparkstoredprocedure.py index 233f9de3..83c15edf 100644 --- a/goblet/tests/test_bqsparkstoredprocedure.py +++ b/goblet/tests/test_bqsparkstoredprocedure.py @@ -1,7 +1,7 @@ import os from goblet import Goblet -from goblet_gcp_client import ( - get_responses, +from goblet_gcp_client import ( + get_responses, get_response, reset_replay_count, get_replay_count, @@ -10,6 +10,7 @@ BigQuerySparkStoredProcedure, ) + class TestBqSparkStoredProcedure: def test_register_bqsparkstoredprocedure(self, monkeypatch): app = Goblet(function_name="bqsparkstoredprocedure_test") @@ -48,7 +49,7 @@ def spark_handler(): for key, value in resources.items(): assert expected_resources.get(key) == value - + assert 0 == get_replay_count() def test_deploy_bqsparkstoredprocedure(self, monkeypatch): @@ -109,7 +110,7 @@ def test_destroy_bqsparkstoredprocedure(self, monkeypatch): monkeypatch.setenv("G_TEST_NAME", test_deploy_name) monkeypatch.setenv("G_HTTP_TEST", "REPLAY") reset_replay_count() - + test_name = "bqsparkstoredprocedure_test" app = Goblet(function_name=test_name) test_dataset_id = "blogs"