rtdip · cching95 · Mar 24, 2023 · Mar 22, 2023 · Mar 23, 2023 · Mar 23, 2023
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -28,13 +28,15 @@
 				"python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle",
 				"python.linting.pylintPath": "/opt/conda/bin/pylint",
 				"python.testing.pytestArgs": [
+					"--cov=.",
+					"--cov-report=xml:cov.xml",					
 					"tests"
 				],
 				"python.testing.unittestEnabled": false,
 				"python.testing.pytestEnabled": true,
 				"python.testing.cwd": "${workspaceFolder}",
 				"python.analysis.extraPaths": ["${workspaceFolder}"],
-				"python.defaultInterpreterPath": "/opt/conda/envs/rtdip-sdk/bin/python",
+				"python.defaultInterpreterPath": "~/micromamba/envs/rtdip-sdk/bin/python",
 				"terminal.integrated.env.linux":{
 					"PYTHONPATH": "${workspaceFolder}:${env:PYTHONPATH}"        
 				},				

diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -7,6 +7,18 @@
       "request": "attach",
       "port": 9091,
       "preLaunchTask": "func: host start",
-    }
+    },
+    {
+      "name": "Python: Debug Tests",
+      "type": "python",
+      "request": "launch",
+      "program": "${file}",
+      "purpose": ["debug-test"],
+      "console": "internalConsole",
+      "env": {
+          "PYTEST_ADDOPTS": "--no-cov"
+      },
+      "justMyCode": false
+  }    
   ]
 }
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -12,13 +12,15 @@
     "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle",
     "python.linting.pylintPath": "/opt/conda/bin/pylint",
     "python.testing.pytestArgs": [
+        "--cov=.",
+        "--cov-report=xml:cov.xml",
         "tests"
     ],
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true,
     "python.testing.cwd": "${workspaceFolder}",
     "python.analysis.extraPaths": ["${workspaceFolder}"],
-    "python.defaultInterpreterPath": "/opt/conda/envs/rtdip-sdk/bin/python",
+    "python.defaultInterpreterPath": "~/micromamba/envs/rtdip-sdk/bin/python",
     "terminal.integrated.env.osx":{
         "PYTHONPATH": "${workspaceFolder}:${env:PYTHONPATH}"        
     },    

diff --git a/docs/sdk/code-reference/pipelines/secrets/databricks.md b/docs/sdk/code-reference/pipelines/secrets/databricks.md
@@ -0,0 +1,2 @@
+# Databricks Secret Scope
+::: src.sdk.python.rtdip_sdk.pipelines.secrets.databricks
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -139,6 +139,8 @@ nav:
                       - Utilities:
                             - Spark:
                                  - Delta Table Create: sdk/code-reference/pipelines/utilities/spark/delta_table_create.md
+                      - Secrets:
+                            - Databricks: sdk/code-reference/pipelines/secrets/databricks.md
     - API:
           - Overview: api/overview.md
           - Authentication: api/authentication.md

diff --git a/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py b/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py
@@ -55,9 +55,23 @@ def get_spark_session(self) -> SparkSession:
             return spark_session
 
         except Exception as e:
-            logging.exception('error with spark session function', e.__traceback__)
+            logging.exception(str(e))
             raise e
 
+def get_dbutils(
+    spark: SparkSession,
+):  # please note that this function is used in mocking by its name
+    try:
+        from pyspark.dbutils import DBUtils  # noqa
+
+        if "dbutils" not in locals():
+            utils = DBUtils(spark)
+            return utils
+        else:
+            return locals().get("dbutils")
+    except ImportError:
+        return None
+
 # # TODO: Implemented in DBR 11 but not yet available in open source pyspark
 # from pyspark.sql.streaming import StreamingQueryListener
 # class SparkStreamingListener(StreamingQueryListener):

diff --git a/src/sdk/python/rtdip_sdk/pipelines/deploy/databricks.py b/src/sdk/python/rtdip_sdk/pipelines/deploy/databricks.py
@@ -64,9 +64,6 @@ def deploy(self) -> bool:
                     databricks_job_task.libraries.append(DatabricksLibraries(maven=DatabricksLibrariesMaven(coordinates=maven_library.to_string(), repo=maven_library.repo)))
                 for wheel_library in libraries.pythonwheel_libraries:
                     databricks_job_task.libraries.append(DatabricksLibraries(whl=wheel_library))
-
-                # convert to string for json conversion later
-                step.component = step.component.__name__
 
             try:
                 rtdip_version = version("rtdip-sdk")

diff --git a/src/sdk/python/rtdip_sdk/pipelines/deploy/dbx/rtdip/tasks/common.py b/src/sdk/python/rtdip_sdk/pipelines/deploy/dbx/rtdip/tasks/common.py
@@ -6,21 +6,7 @@
 from pyspark.sql import SparkSession
 import sys
 
-
-def get_dbutils(
-    spark: SparkSession,
-):  # please note that this function is used in mocking by its name
-    try:
-        from pyspark.dbutils import DBUtils  # noqa
-
-        if "dbutils" not in locals():
-            utils = DBUtils(spark)
-            return utils
-        else:
-            return locals().get("dbutils")
-    except ImportError:
-        return None
-
+from ....._pipeline_utils.spark import get_dbutils
 
 class Task(ABC):
     """

diff --git a/src/sdk/python/rtdip_sdk/pipelines/deploy/dbx/rtdip/tasks/pipeline_task.py b/src/sdk/python/rtdip_sdk/pipelines/deploy/dbx/rtdip/tasks/pipeline_task.py
@@ -24,10 +24,10 @@ class RTDIPPipelineTask(Task):
     def launch(self):
         self.logger.info("Launching RTDIP Pipeline Task")
         self.logger.info("Job to execute {}".format(sys.argv[0]))
-        pipeline_job = PipelineJobFromJson(sys.argv[0])
+        pipeline_job = PipelineJobFromJson(sys.argv[0]).convert()
         pipeline_job_execute = PipelineJobExecute(pipeline_job)
         pipeline_job_execute.run()
-        self.logger.info("RTDIP Pipeline Task finished!")
+        self.logger.info("RTDIP Pipeline Task completed")
 
 # if you're using python_wheel_task, you'll need the entrypoint function to be used in setup.py
 def entrypoint():  # pragma: no cover

diff --git a/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/delta.py b/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/delta.py
@@ -56,6 +56,10 @@ def __init__(self, table_name:str, options: dict, mode: str = "append", trigger=
 
     @staticmethod
     def system_type():
+        '''
+        Attributes:
+            SystemType (Environment): Requires PYSPARK
+        '''             
         return SystemType.PYSPARK
 
     @staticmethod
@@ -92,7 +96,7 @@ def write_batch(self, df: DataFrame):
             )
 
         except Py4JJavaError as e:
-            logging.exception('error with spark write batch delta function', e.errmsg)
+            logging.exception(e.errmsg)
             raise e
         except Exception as e:
             logging.exception(str(e))
@@ -119,7 +123,7 @@ def write_stream(self, df: DataFrame) -> DataFrame:
                 time.sleep(30)
 
         except Py4JJavaError as e:
-            logging.exception('error with spark write stream delta function', e.errmsg)
+            logging.exception(e.errmsg)
             raise e
         except Exception as e:
             logging.exception(str(e))

diff --git a/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/eventhub.py b/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/eventhub.py
@@ -43,6 +43,10 @@ def __init__(self, options: dict) -> None:
 
     @staticmethod
     def system_type():
+        '''
+        Attributes:
+            SystemType (Environment): Requires PYSPARK
+        '''             
         return SystemType.PYSPARK
 
     @staticmethod
@@ -75,7 +79,7 @@ def write_batch(self, df: DataFrame):
             )
 
         except Py4JJavaError as e:
-            logging.exception('error with spark write batch eventhub function', e.errmsg)
+            logging.exception(e.errmsg)
             raise e
         except Exception as e:
             logging.exception(str(e))
@@ -98,7 +102,7 @@ def write_stream(self, df: DataFrame):
                 time.sleep(30)
 
         except Py4JJavaError as e:
-            logging.exception('error with spark write stream eventhub function', e.errmsg)
+            logging.exception(e.errmsg)
             raise e
         except Exception as e:
             logging.exception(str(e))

diff --git a/src/sdk/python/rtdip_sdk/pipelines/execute/job.py b/src/sdk/python/rtdip_sdk/pipelines/execute/job.py
@@ -19,10 +19,18 @@
 from .container import Clients, Configs
 from .models import PipelineJob, PipelineTask, PipelineStep
 from .._pipeline_utils.models import Libraries, SystemType
+from ..sources import * # NOSONAR
 from ..sources.interfaces import SourceInterface
+from ..transformers import * # NOSONAR
 from ..transformers.interfaces import TransformerInterface
+from ..destinations import * # NOSONAR
 from ..destinations.interfaces import DestinationInterface
+from ..utilities import * # NOSONAR
 from ..utilities.interfaces import UtilitiesInterface
+from ..secrets import * # NOSONAR
+from ..secrets.models import PipelineSecret
+
+
 
 class PipelineJobExecute():
     '''
@@ -37,6 +45,21 @@ class PipelineJobExecute():
     def __init__(self, job: PipelineJob, batch_job: bool = False):
         self.job = job
 
+    def _get_provider_attributes(self, provider: providers.Factory, component: object) -> providers.Factory:
+        attributes = getattr(component, '__annotations__', {}).items()
+        # add spark session, if needed
+        for key, value in attributes:
+            # if isinstance(value, SparkSession): # TODO: fix this as value does not seem to be an instance of SparkSession
+            if key == "spark":
+                provider.add_kwargs(spark=Clients.spark_client().spark_session)
+        return provider
+
+    def _get_secret_provider_attributes(self, pipeline_secret: PipelineSecret) -> providers.Factory:
+        secret_provider = providers.Factory(pipeline_secret.type)
+        secret_provider = self._get_provider_attributes(secret_provider, pipeline_secret.type)
+        secret_provider.add_kwargs(vault=pipeline_secret.vault, key=pipeline_secret.key)
+        return secret_provider
+
     def _tasks_order(self, task_list: list[PipelineTask]):
         '''
         Orders tasks within a job
@@ -107,16 +130,23 @@ def _task_setup_dependency_injection(self, step_list: list[PipelineStep]):
         for step in step_list:
             # setup factory provider for component
             provider = providers.Factory(step.component)
-            attributes = getattr(step.component, '__annotations__', {}).items()
-            # add spark session, if needed
-            for key, value in attributes:
-                # if isinstance(value, SparkSession): # TODO: fix this as value does not seem to be an instance of SparkSession
-                if key == "spark":
-                    provider.add_kwargs(spark=Clients.spark_client().spark_session)
+            provider = self._get_provider_attributes(provider, step.component)
+
             # add parameters
             if isinstance(step.component, DestinationInterface):
                 step.component_parameters["query_name"] = step.name
+
+            # get secrets
+            for param_key, param_value in step.component_parameters.items():
+                if isinstance(param_value, PipelineSecret):
+                    step.component_parameters[param_key] = self._get_secret_provider_attributes(param_value)().get()
+                if isinstance(param_value, dict):
+                    for key, value in param_value.items():
+                        if isinstance(value, PipelineSecret):
+                            step.component_parameters[param_key][key] = self._get_secret_provider_attributes(value)().get()
+
             provider.add_kwargs(**step.component_parameters)
+
             # set provider
             container.set_provider(
                 step.name,
@@ -175,8 +205,16 @@ class PipelineJobFromJson():
     '''
     pipeline_json: str
 
-    def init(self, pipeline_json: str):
+    def __init__(self, pipeline_json: str):
         self.pipeline_json = pipeline_json
+
+    def _try_convert_to_pipeline_secret(self, value):
+        try:
+            if "type" in value:
+                value["type"] = getattr(sys.modules[__name__], value["type"])
+            return PipelineSecret.parse_obj(value)
+        except: # NOSONAR
+            return value
 
     def convert(self) -> PipelineJob:
         pipeline_job_dict = json.loads(self.pipeline_json)
@@ -185,6 +223,11 @@ def convert(self) -> PipelineJob:
         for task in pipeline_job_dict["task_list"]:
             for step in task["step_list"]:
                 step["component"] = getattr(sys.modules[__name__], step["component"])
+                for param_key, param_value in step["component_parameters"].items():
+                    step["component_parameters"][param_key] = self._try_convert_to_pipeline_secret(param_value)
+                    if not isinstance(step["component_parameters"][param_key], PipelineSecret) and isinstance(param_value, dict):
+                        for key, value in param_value.items():
+                            step["component_parameters"][param_key][key] = self._try_convert_to_pipeline_secret(value) 
 
         return PipelineJob(**pipeline_job_dict)
 
diff --git a/src/sdk/python/rtdip_sdk/pipelines/execute/models.py b/src/sdk/python/rtdip_sdk/pipelines/execute/models.py
@@ -12,13 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from typing import Optional, List, Type, Union
+from typing import Optional, Type, Union
 from pydantic import BaseModel
-
+from abc import ABCMeta
 from ..sources.interfaces import SourceInterface
 from ..transformers.interfaces import TransformerInterface
 from ..destinations.interfaces import DestinationInterface
 from ..utilities.interfaces import UtilitiesInterface
+from ..secrets.models import PipelineSecret
 
 class PipelineStep(BaseModel):
     name: str
@@ -30,7 +31,8 @@ class PipelineStep(BaseModel):
 
     class Config:
         json_encoders = {
-            Union[Type[SourceInterface], Type[TransformerInterface], Type[DestinationInterface], Type[UtilitiesInterface]]: lambda x: x.__name__
+            ABCMeta: lambda x: x.__name__,
+            PipelineSecret: lambda x: {'__type__': "PipelineSecret", "__values__": x.dict()}
         }
 
 class PipelineTask(BaseModel):
@@ -40,8 +42,20 @@ class PipelineTask(BaseModel):
     step_list: list[PipelineStep]
     batch_task: Optional[bool]
 
+    class Config:
+        json_encoders = {
+            ABCMeta: lambda x: x.__name__,
+            PipelineSecret: lambda x: {'__type__': "PipelineSecret", "__values__": x.dict()}
+        }
+
 class PipelineJob(BaseModel):
     name: str
     description: str
     version: str
-    task_list: list[PipelineTask]
+    task_list: list[PipelineTask]
+
+    class Config:
+        json_encoders = {
+            ABCMeta: lambda x: x.__name__,
+            PipelineSecret: lambda x: {'__type__': "PipelineSecret", "__values__": x.dict()}
+        }    
diff --git a/src/sdk/python/rtdip_sdk/pipelines/secrets/__init__.py b/src/sdk/python/rtdip_sdk/pipelines/secrets/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2022 RTDIP
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .databricks import *
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Databricks Secret Scope
		::: src.sdk.python.rtdip_sdk.pipelines.secrets.databricks