diff --git a/.gitignore b/.gitignore index 2868da4a7..92c504d28 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,7 @@ htmlcov/ .nox/ .coverage .coverage.* +cov.xml .cache nosetests.xml coverage.xml diff --git a/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/delta.py b/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/delta.py index 6236bb8f8..d3c6f5bbc 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/delta.py +++ b/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/delta.py @@ -95,7 +95,7 @@ def write_batch(self, df: DataFrame): logging.exception('error with spark write batch delta function', e.errmsg) raise e except Exception as e: - logging.exception('error with spark write batch delta function', e.__traceback__) + logging.exception(str(e)) raise e def write_stream(self, df: DataFrame) -> DataFrame: @@ -122,5 +122,5 @@ def write_stream(self, df: DataFrame) -> DataFrame: logging.exception('error with spark write stream delta function', e.errmsg) raise e except Exception as e: - logging.exception('error with spark write stream delta function', e.__traceback__) + logging.exception(str(e)) raise e \ No newline at end of file diff --git a/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/eventhub.py b/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/eventhub.py index d1abdf09f..9809ec808 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/eventhub.py +++ b/src/sdk/python/rtdip_sdk/pipelines/destinations/spark/eventhub.py @@ -13,6 +13,7 @@ # limitations under the License. import logging +import time from pyspark.sql import DataFrame, SparkSession from py4j.protocol import Py4JJavaError @@ -37,7 +38,7 @@ class SparkEventhubDestination(DestinationInterface): ''' options: dict - def __init__(self,options: dict) -> None: + def __init__(self, options: dict) -> None: self.options = options @staticmethod @@ -52,10 +53,7 @@ def libraries(): @staticmethod def settings() -> dict: - return { - "spark.sql.extensions": "io.delta.sql.DeltaSparkSessionExtension", - "spark.sql.catalog.spark_catalog": "org.apache.spark.sql.delta.catalog.DeltaCatalog" - } + return {} def pre_write_validation(self): return True @@ -80,24 +78,28 @@ def write_batch(self, df: DataFrame): logging.exception('error with spark write batch eventhub function', e.errmsg) raise e except Exception as e: - logging.exception('error with spark write batch eventhub function', e.__traceback__) + logging.exception(str(e)) raise e - def write_stream(self, df: DataFrame, options: dict, mode: str = "append") -> DataFrame: + def write_stream(self, df: DataFrame): ''' Writes steaming data to Eventhubs. ''' try: - return (df + query = (df .writeStream .format("eventhubs") .options(**self.options) .start() ) + while query.isActive: + if query.lastProgress: + logging.info(query.lastProgress) + time.sleep(30) except Py4JJavaError as e: logging.exception('error with spark write stream eventhub function', e.errmsg) raise e except Exception as e: - logging.exception('error with spark write stream eventhub function', e.__traceback__) + logging.exception(str(e)) raise e \ No newline at end of file diff --git a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/autoloader.py b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/autoloader.py index dda3d4c6c..c11f2717e 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/autoloader.py +++ b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/autoloader.py @@ -79,5 +79,5 @@ def read_stream(self) -> DataFrame: ) except Exception as e: - logging.exception('error with spark read stream auto loader function', e.__traceback__) + logging.exception(str(e)) raise e \ No newline at end of file diff --git a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta.py b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta.py index c1a246992..af6c0057f 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta.py +++ b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta.py @@ -81,7 +81,7 @@ def read_batch(self): ) except Exception as e: - logging.exception('error with spark read batch delta function', e.__traceback__) + logging.exception(str(e)) raise e def read_stream(self) -> DataFrame: @@ -97,5 +97,5 @@ def read_stream(self) -> DataFrame: ) except Exception as e: - logging.exception('error with spark read stream delta function', e.__traceback__) + logging.exception(str(e)) raise e \ No newline at end of file diff --git a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta_sharing.py b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta_sharing.py index b5c254303..fcfed0d35 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta_sharing.py +++ b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta_sharing.py @@ -86,7 +86,7 @@ def read_batch(self): logging.exception('error with spark read batch delta sharing function', e.errmsg) raise e except Exception as e: - logging.exception('error with spark read batch delta sharing function', e.__traceback__) + logging.exception(str(e)) raise e def read_stream(self) -> DataFrame: @@ -105,5 +105,5 @@ def read_stream(self) -> DataFrame: logging.exception('error with spark read stream delta sharing function', e.errmsg) raise e except Exception as e: - logging.exception('error with spark read stream delta sharing function', e.__traceback__) + logging.exception(str(e)) raise e \ No newline at end of file diff --git a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/eventhub.py b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/eventhub.py index d12d69cb7..a9e3391ce 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/sources/spark/eventhub.py +++ b/src/sdk/python/rtdip_sdk/pipelines/sources/spark/eventhub.py @@ -85,8 +85,7 @@ def read_batch(self) -> DataFrame: ) except Exception as e: - print(e) - logging.exception("error with spark read batch eventhub function") + logging.exception(str(e)) raise e def read_stream(self) -> DataFrame: @@ -107,6 +106,5 @@ def read_stream(self) -> DataFrame: ) except Exception as e: - print(e) - logging.exception("error with spark read stream eventhub function") + logging.exception(str(e)) raise e \ No newline at end of file diff --git a/src/sdk/python/rtdip_sdk/pipelines/utilities/spark/delta_table_create.py b/src/sdk/python/rtdip_sdk/pipelines/utilities/spark/delta_table_create.py index 20d6434e8..c9c8b2715 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/utilities/spark/delta_table_create.py +++ b/src/sdk/python/rtdip_sdk/pipelines/utilities/spark/delta_table_create.py @@ -95,5 +95,5 @@ def execute(self) -> bool: logging.exception('error with spark delta table create function', e.errmsg) raise e except Exception as e: - logging.exception('error with spark delta table create function', e.__traceback__) + logging.exception(str(e)) raise e \ No newline at end of file diff --git a/tests/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark_configuration_constants.py b/tests/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark_configuration_constants.py index 3511a6b1d..98876144d 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark_configuration_constants.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark_configuration_constants.py @@ -17,6 +17,7 @@ import os import shutil from src.sdk.python.rtdip_sdk.pipelines.destinations.spark.delta import SparkDeltaDestination +from src.sdk.python.rtdip_sdk.pipelines.destinations.spark.eventhub import SparkEventhubDestination from src.sdk.python.rtdip_sdk.pipelines.sources.spark.delta import SparkDeltaSource from src.sdk.python.rtdip_sdk.pipelines.sources.spark.delta_sharing import SparkDeltaSharingSource from src.sdk.python.rtdip_sdk.pipelines.sources.spark.eventhub import SparkEventhubSource @@ -34,7 +35,7 @@ @pytest.fixture(scope="session") def spark_session(): - component_list = [SparkDeltaSource(None, {}, "test_table"), SparkDeltaSharingSource(None, {}, "test_table"), SparkDeltaDestination("test_table", {}), SparkEventhubSource(None, {})] + component_list = [SparkDeltaSource(None, {}, "test_table"), SparkDeltaSharingSource(None, {}, "test_table"), SparkDeltaDestination("test_table", {}), SparkEventhubSource(None, {}), SparkEventhubDestination({})] task_libraries = Libraries() task_libraries.get_libraries_from_components(component_list) spark_configuration = SPARK_TESTING_CONFIGURATION.copy() diff --git a/tests/sdk/python/rtdip_sdk/pipelines/destinations/spark/test_eventhub.py b/tests/sdk/python/rtdip_sdk/pipelines/destinations/spark/test_eventhub.py new file mode 100644 index 000000000..d3122ff04 --- /dev/null +++ b/tests/sdk/python/rtdip_sdk/pipelines/destinations/spark/test_eventhub.py @@ -0,0 +1,39 @@ +# Copyright 2022 RTDIP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +sys.path.insert(0, '.') +import pytest +from pytest_mock import MockerFixture +from src.sdk.python.rtdip_sdk.pipelines.destinations.spark.eventhub import SparkEventhubDestination +from tests.sdk.python.rtdip_sdk.pipelines._pipeline_utils.spark_configuration_constants import spark_session +from pyspark.sql import SparkSession, DataFrame +from pyspark.sql.streaming import StreamingQuery + +class TestStreamingQueryClass(): + isActive: bool = False + +def test_spark_eventhub_write_batch(spark_session: SparkSession, mocker: MockerFixture): + mocker.patch("pyspark.sql.DataFrame.write", new_callable=mocker.Mock(return_value=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(save=mocker.Mock(return_value=None)))))))) + expected_df = spark_session.createDataFrame([{"id": "1"}]) + eventhub_destination = SparkEventhubDestination({}) + actual = eventhub_destination.write_batch(expected_df) + assert actual is None + +def test_spark_eventhub_write_stream(spark_session: SparkSession, mocker: MockerFixture): + mocker.patch("pyspark.sql.DataFrame.writeStream", new_callable=mocker.Mock(return_value=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(start=mocker.Mock(return_value=TestStreamingQueryClass())))))))) + expected_df = spark_session.createDataFrame([{"id": "1"}]) + eventhub_destination = SparkEventhubDestination({}) + actual = eventhub_destination.write_stream(expected_df) + assert actual is None \ No newline at end of file