rtdip · GBBBAS · Mar 21, 2023 · Mar 17, 2023 · Mar 20, 2023 · Mar 20, 2023
diff --git a/.gitignore b/.gitignore
@@ -43,6 +43,7 @@ htmlcov/
 .nox/
 .coverage
 .coverage.*
+cov.xml
 .cache
 nosetests.xml
 coverage.xml

diff --git a/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/delta.py b/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/delta.py
@@ -95,7 +95,7 @@ def write_batch(self, df: DataFrame):
             logging.exception('error with spark write batch delta function', e.errmsg)
             raise e
         except Exception as e:
-            logging.exception('error with spark write batch delta function', e.__traceback__)
+            logging.exception(str(e))
             raise e
 
     def write_stream(self, df: DataFrame) -> DataFrame:
@@ -122,5 +122,5 @@ def write_stream(self, df: DataFrame) -> DataFrame:
             logging.exception('error with spark write stream delta function', e.errmsg)
             raise e
         except Exception as e:
-            logging.exception('error with spark write stream delta function', e.__traceback__)
+            logging.exception(str(e))
             raise e
diff --git a/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/eventhub.py b/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/eventhub.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import logging
+import time
 from pyspark.sql import DataFrame, SparkSession
 from py4j.protocol import Py4JJavaError
 
@@ -37,7 +38,7 @@ class SparkEventhubDestination(DestinationInterface):
     '''
     options: dict
 
-    def __init__(self,options: dict) -> None:
+    def __init__(self, options: dict) -> None:
         self.options = options
 
     @staticmethod
@@ -52,10 +53,7 @@ def libraries():
 
     @staticmethod
     def settings() -> dict:
-        return {
-            "spark.sql.extensions": "io.delta.sql.DeltaSparkSessionExtension",
-            "spark.sql.catalog.spark_catalog": "org.apache.spark.sql.delta.catalog.DeltaCatalog"
-        }
+        return {}
 
     def pre_write_validation(self):
         return True
@@ -80,24 +78,28 @@ def write_batch(self, df: DataFrame):
             logging.exception('error with spark write batch eventhub function', e.errmsg)
             raise e
         except Exception as e:
-            logging.exception('error with spark write batch eventhub function', e.__traceback__)
+            logging.exception(str(e))
             raise e
 
-    def write_stream(self, df: DataFrame, options: dict, mode: str = "append") -> DataFrame:
+    def write_stream(self, df: DataFrame):
         '''
         Writes steaming data to Eventhubs.
         '''
         try:
-            return (df
+            query = (df
                 .writeStream
                 .format("eventhubs")
                 .options(**self.options)
                 .start()
             )
+            while query.isActive:
+                if query.lastProgress:
+                    logging.info(query.lastProgress)
+                time.sleep(30)
 
         except Py4JJavaError as e:
             logging.exception('error with spark write stream eventhub function', e.errmsg)
             raise e
         except Exception as e:
-            logging.exception('error with spark write stream eventhub function', e.__traceback__)
+            logging.exception(str(e))
             raise e
diff --git a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/autoloader.py b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/autoloader.py
@@ -79,5 +79,5 @@ def read_stream(self) -> DataFrame:
             )
 
         except Exception as e:
-            logging.exception('error with spark read stream auto loader function', e.__traceback__)
+            logging.exception(str(e))
             raise e
diff --git a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta.py b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta.py
@@ -81,7 +81,7 @@ def read_batch(self):
             )
 
         except Exception as e:
-            logging.exception('error with spark read batch delta function', e.__traceback__)
+            logging.exception(str(e))
             raise e
 
     def read_stream(self) -> DataFrame:
@@ -97,5 +97,5 @@ def read_stream(self) -> DataFrame:
             )
 
         except Exception as e:
-            logging.exception('error with spark read stream delta function', e.__traceback__)
+            logging.exception(str(e))
             raise e
diff --git a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta_sharing.py b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta_sharing.py
@@ -86,7 +86,7 @@ def read_batch(self):
             logging.exception('error with spark read batch delta sharing function', e.errmsg)
             raise e
         except Exception as e:
-            logging.exception('error with spark read batch delta sharing function', e.__traceback__)
+            logging.exception(str(e))
             raise e
 
     def read_stream(self) -> DataFrame:
@@ -105,5 +105,5 @@ def read_stream(self) -> DataFrame:
             logging.exception('error with spark read stream delta sharing function', e.errmsg)
             raise e
         except Exception as e:
-            logging.exception('error with spark read stream delta sharing function', e.__traceback__)
+            logging.exception(str(e))
             raise e
diff --git a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/eventhub.py b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/eventhub.py
@@ -85,8 +85,7 @@ def read_batch(self) -> DataFrame:
             )
 
         except Exception as e:
-            print(e)
-            logging.exception("error with spark read batch eventhub function")
+            logging.exception(str(e))
             raise e
 
     def read_stream(self) -> DataFrame:
@@ -107,6 +106,5 @@ def read_stream(self) -> DataFrame:
             )
 
         except Exception as e:
-            print(e)
-            logging.exception("error with spark read stream eventhub function")
+            logging.exception(str(e))
             raise e
diff --git a/src/sdk/python/rtdip_sdk/pipelines/utilities/spark/delta_table_create.py b/src/sdk/python/rtdip_sdk/pipelines/utilities/spark/delta_table_create.py
@@ -95,5 +95,5 @@ def execute(self) -> bool:
             logging.exception('error with spark delta table create function', e.errmsg)
             raise e
         except Exception as e:
-            logging.exception('error with spark delta table create function', e.__traceback__)
+            logging.exception(str(e))
             raise e
diff --git a/tests/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark_configuration_constants.py b/tests/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark_configuration_constants.py
@@ -17,6 +17,7 @@
 import os
 import shutil
 from src.sdk.python.rtdip_sdk.pipelines.destinations.spark.delta import SparkDeltaDestination
+from src.sdk.python.rtdip_sdk.pipelines.destinations.spark.eventhub import SparkEventhubDestination
 from src.sdk.python.rtdip_sdk.pipelines.sources.spark.delta import SparkDeltaSource
 from src.sdk.python.rtdip_sdk.pipelines.sources.spark.delta_sharing import SparkDeltaSharingSource
 from src.sdk.python.rtdip_sdk.pipelines.sources.spark.eventhub import SparkEventhubSource
@@ -34,7 +35,7 @@
 
 @pytest.fixture(scope="session")
 def spark_session():
-    component_list = [SparkDeltaSource(None, {}, "test_table"), SparkDeltaSharingSource(None, {}, "test_table"), SparkDeltaDestination("test_table", {}), SparkEventhubSource(None, {})]
+    component_list = [SparkDeltaSource(None, {}, "test_table"), SparkDeltaSharingSource(None, {}, "test_table"), SparkDeltaDestination("test_table", {}), SparkEventhubSource(None, {}), SparkEventhubDestination({})]
     task_libraries = Libraries()
     task_libraries.get_libraries_from_components(component_list)
     spark_configuration = SPARK_TESTING_CONFIGURATION.copy()

diff --git a/tests/sdk/python/rtdip_sdk/pipelines/destinations/spark/test_eventhub.py b/tests/sdk/python/rtdip_sdk/pipelines/destinations/spark/test_eventhub.py
@@ -0,0 +1,39 @@
+# Copyright 2022 RTDIP
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+sys.path.insert(0, '.')
+import pytest 
+from pytest_mock import MockerFixture
+from src.sdk.python.rtdip_sdk.pipelines.destinations.spark.eventhub import SparkEventhubDestination
+from tests.sdk.python.rtdip_sdk.pipelines._pipeline_utils.spark_configuration_constants import spark_session
+from pyspark.sql import SparkSession, DataFrame
+from pyspark.sql.streaming import StreamingQuery
+
+class TestStreamingQueryClass():
+    isActive: bool = False
+
+def test_spark_eventhub_write_batch(spark_session: SparkSession, mocker: MockerFixture):
+    mocker.patch("pyspark.sql.DataFrame.write", new_callable=mocker.Mock(return_value=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(save=mocker.Mock(return_value=None))))))))
+    expected_df = spark_session.createDataFrame([{"id": "1"}])
+    eventhub_destination = SparkEventhubDestination({})
+    actual = eventhub_destination.write_batch(expected_df)
+    assert actual is None
+
+def test_spark_eventhub_write_stream(spark_session: SparkSession, mocker: MockerFixture):
+    mocker.patch("pyspark.sql.DataFrame.writeStream", new_callable=mocker.Mock(return_value=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(start=mocker.Mock(return_value=TestStreamingQueryClass()))))))))
+    expected_df = spark_session.createDataFrame([{"id": "1"}])
+    eventhub_destination = SparkEventhubDestination({})
+    actual = eventhub_destination.write_stream(expected_df)
+    assert actual is None