Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ htmlcov/
.nox/
.coverage
.coverage.*
cov.xml
.cache
nosetests.xml
coverage.xml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def write_batch(self, df: DataFrame):
logging.exception('error with spark write batch delta function', e.errmsg)
raise e
except Exception as e:
logging.exception('error with spark write batch delta function', e.__traceback__)
logging.exception(str(e))
raise e

def write_stream(self, df: DataFrame) -> DataFrame:
Expand All @@ -122,5 +122,5 @@ def write_stream(self, df: DataFrame) -> DataFrame:
logging.exception('error with spark write stream delta function', e.errmsg)
raise e
except Exception as e:
logging.exception('error with spark write stream delta function', e.__traceback__)
logging.exception(str(e))
raise e
20 changes: 11 additions & 9 deletions src/sdk/python/rtdip_sdk/pipelines/destinations/spark/eventhub.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

import logging
import time
from pyspark.sql import DataFrame, SparkSession
from py4j.protocol import Py4JJavaError

Expand All @@ -37,7 +38,7 @@ class SparkEventhubDestination(DestinationInterface):
'''
options: dict

def __init__(self,options: dict) -> None:
def __init__(self, options: dict) -> None:
self.options = options

@staticmethod
Expand All @@ -52,10 +53,7 @@ def libraries():

@staticmethod
def settings() -> dict:
return {
"spark.sql.extensions": "io.delta.sql.DeltaSparkSessionExtension",
"spark.sql.catalog.spark_catalog": "org.apache.spark.sql.delta.catalog.DeltaCatalog"
}
return {}

def pre_write_validation(self):
return True
Expand All @@ -80,24 +78,28 @@ def write_batch(self, df: DataFrame):
logging.exception('error with spark write batch eventhub function', e.errmsg)
raise e
except Exception as e:
logging.exception('error with spark write batch eventhub function', e.__traceback__)
logging.exception(str(e))
raise e

def write_stream(self, df: DataFrame, options: dict, mode: str = "append") -> DataFrame:
def write_stream(self, df: DataFrame):
'''
Writes steaming data to Eventhubs.
'''
try:
return (df
query = (df
.writeStream
.format("eventhubs")
.options(**self.options)
.start()
)
while query.isActive:
if query.lastProgress:
logging.info(query.lastProgress)
time.sleep(30)

except Py4JJavaError as e:
logging.exception('error with spark write stream eventhub function', e.errmsg)
raise e
except Exception as e:
logging.exception('error with spark write stream eventhub function', e.__traceback__)
logging.exception(str(e))
raise e
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,5 @@ def read_stream(self) -> DataFrame:
)

except Exception as e:
logging.exception('error with spark read stream auto loader function', e.__traceback__)
logging.exception(str(e))
raise e
4 changes: 2 additions & 2 deletions src/sdk/python/rtdip_sdk/pipelines/sources/spark/delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def read_batch(self):
)

except Exception as e:
logging.exception('error with spark read batch delta function', e.__traceback__)
logging.exception(str(e))
raise e

def read_stream(self) -> DataFrame:
Expand All @@ -97,5 +97,5 @@ def read_stream(self) -> DataFrame:
)

except Exception as e:
logging.exception('error with spark read stream delta function', e.__traceback__)
logging.exception(str(e))
raise e
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def read_batch(self):
logging.exception('error with spark read batch delta sharing function', e.errmsg)
raise e
except Exception as e:
logging.exception('error with spark read batch delta sharing function', e.__traceback__)
logging.exception(str(e))
raise e

def read_stream(self) -> DataFrame:
Expand All @@ -105,5 +105,5 @@ def read_stream(self) -> DataFrame:
logging.exception('error with spark read stream delta sharing function', e.errmsg)
raise e
except Exception as e:
logging.exception('error with spark read stream delta sharing function', e.__traceback__)
logging.exception(str(e))
raise e
6 changes: 2 additions & 4 deletions src/sdk/python/rtdip_sdk/pipelines/sources/spark/eventhub.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,7 @@ def read_batch(self) -> DataFrame:
)

except Exception as e:
print(e)
logging.exception("error with spark read batch eventhub function")
logging.exception(str(e))
raise e

def read_stream(self) -> DataFrame:
Expand All @@ -107,6 +106,5 @@ def read_stream(self) -> DataFrame:
)

except Exception as e:
print(e)
logging.exception("error with spark read stream eventhub function")
logging.exception(str(e))
raise e
Original file line number Diff line number Diff line change
Expand Up @@ -95,5 +95,5 @@ def execute(self) -> bool:
logging.exception('error with spark delta table create function', e.errmsg)
raise e
except Exception as e:
logging.exception('error with spark delta table create function', e.__traceback__)
logging.exception(str(e))
raise e
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os
import shutil
from src.sdk.python.rtdip_sdk.pipelines.destinations.spark.delta import SparkDeltaDestination
from src.sdk.python.rtdip_sdk.pipelines.destinations.spark.eventhub import SparkEventhubDestination
from src.sdk.python.rtdip_sdk.pipelines.sources.spark.delta import SparkDeltaSource
from src.sdk.python.rtdip_sdk.pipelines.sources.spark.delta_sharing import SparkDeltaSharingSource
from src.sdk.python.rtdip_sdk.pipelines.sources.spark.eventhub import SparkEventhubSource
Expand All @@ -34,7 +35,7 @@

@pytest.fixture(scope="session")
def spark_session():
component_list = [SparkDeltaSource(None, {}, "test_table"), SparkDeltaSharingSource(None, {}, "test_table"), SparkDeltaDestination("test_table", {}), SparkEventhubSource(None, {})]
component_list = [SparkDeltaSource(None, {}, "test_table"), SparkDeltaSharingSource(None, {}, "test_table"), SparkDeltaDestination("test_table", {}), SparkEventhubSource(None, {}), SparkEventhubDestination({})]
task_libraries = Libraries()
task_libraries.get_libraries_from_components(component_list)
spark_configuration = SPARK_TESTING_CONFIGURATION.copy()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# Copyright 2022 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
sys.path.insert(0, '.')
import pytest
from pytest_mock import MockerFixture
from src.sdk.python.rtdip_sdk.pipelines.destinations.spark.eventhub import SparkEventhubDestination
from tests.sdk.python.rtdip_sdk.pipelines._pipeline_utils.spark_configuration_constants import spark_session
from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.streaming import StreamingQuery

class TestStreamingQueryClass():
isActive: bool = False

def test_spark_eventhub_write_batch(spark_session: SparkSession, mocker: MockerFixture):
mocker.patch("pyspark.sql.DataFrame.write", new_callable=mocker.Mock(return_value=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(save=mocker.Mock(return_value=None))))))))
expected_df = spark_session.createDataFrame([{"id": "1"}])
eventhub_destination = SparkEventhubDestination({})
actual = eventhub_destination.write_batch(expected_df)
assert actual is None

def test_spark_eventhub_write_stream(spark_session: SparkSession, mocker: MockerFixture):
mocker.patch("pyspark.sql.DataFrame.writeStream", new_callable=mocker.Mock(return_value=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(start=mocker.Mock(return_value=TestStreamingQueryClass()))))))))
expected_df = spark_session.createDataFrame([{"id": "1"}])
eventhub_destination = SparkEventhubDestination({})
actual = eventhub_destination.write_stream(expected_df)
assert actual is None