Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import sys
sys.path.insert(0, '.')
import pytest
from src.sdk.python.rtdip_sdk.pipelines.destinations.spark.delta import SparkDeltaDestination
from tests.sdk.python.rtdip_sdk.pipelines._pipeline_utils.spark_configuration_constants import spark_session
from pyspark.sql.functions import lit
Expand All @@ -36,4 +37,18 @@ def test_spark_delta_write_stream(spark_session: SparkSession, mocker: MockerFix
expected_df = spark_session.createDataFrame([{"id": "1"}])
eventhub_destination = SparkDeltaDestination("test_spark_delta_write_stream", {}, "overwrite")
actual = eventhub_destination.write_stream(expected_df)
assert actual is None
assert actual is None

def test_spark_delta_write_batch_fails(spark_session: SparkSession, mocker: MockerFixture):
mocker.patch("pyspark.sql.DataFrame.write", new_callable=mocker.Mock(return_value=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(mode=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(saveAsTable=mocker.Mock(side_effect=Exception))))))))))
expected_df = spark_session.createDataFrame([{"id": "1"}])
eventhub_destination = SparkDeltaDestination("test_spark_delta_write_batch", {}, "overwrite")
with pytest.raises(Exception):
eventhub_destination.write_batch(expected_df)

def test_spark_delta_write_stream_fails(spark_session: SparkSession, mocker: MockerFixture):
mocker.patch("pyspark.sql.DataFrame.writeStream", new_callable=mocker.Mock(return_value=mocker.Mock(trigger=mocker.Mock(return_value=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(queryName=mocker.Mock(return_value=mocker.Mock(outputMode=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(toTable=mocker.Mock(side_effect=Exception))))))))))))))
expected_df = spark_session.createDataFrame([{"id": "1"}])
eventhub_destination = SparkDeltaDestination("test_spark_delta_write_stream", {}, "overwrite")
with pytest.raises(Exception):
eventhub_destination.write_stream(expected_df)
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,18 @@ def test_spark_eventhub_write_stream(spark_session: SparkSession, mocker: Mocker
expected_df = spark_session.createDataFrame([{"id": "1"}])
eventhub_destination = SparkEventhubDestination({})
actual = eventhub_destination.write_stream(expected_df)
assert actual is None
assert actual is None

def test_spark_eventhub_write_batch_fails(spark_session: SparkSession, mocker: MockerFixture):
mocker.patch("pyspark.sql.DataFrame.write", new_callable=mocker.Mock(return_value=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(save=mocker.Mock(side_effect=Exception))))))))
expected_df = spark_session.createDataFrame([{"id": "1"}])
eventhub_destination = SparkEventhubDestination({})
with pytest.raises(Exception):
eventhub_destination.write_batch(expected_df)

def test_spark_eventhub_write_stream_fails(spark_session: SparkSession, mocker: MockerFixture):
mocker.patch("pyspark.sql.DataFrame.writeStream", new_callable=mocker.Mock(return_value=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(start=mocker.Mock(side_effect=Exception))))))))
expected_df = spark_session.createDataFrame([{"id": "1"}])
eventhub_destination = SparkEventhubDestination({})
with pytest.raises(Exception):
eventhub_destination.write_stream(expected_df)
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,26 @@
from tests.sdk.python.rtdip_sdk.pipelines._pipeline_utils.spark_configuration_constants import spark_session
from pyspark.sql import DataFrame, SparkSession

path = "/path"

def test_databricks_autoloader_read_batch(spark_session: SparkSession):
with pytest.raises(NotImplementedError) as excinfo:
autoloader_source = DataBricksAutoLoaderSource(spark_session, {}, "/path", "parquet")
autoloader_source = DataBricksAutoLoaderSource(spark_session, {}, path, "parquet")
autoloader_source.read_batch()
assert str(excinfo.value) == 'Auto Loader only supports streaming reads. To perform a batch read, use the read_stream method and specify Trigger on the write_stream as `availableNow=True`'

def test_databricks_autoloader_read_stream(spark_session: SparkSession, mocker: MockerFixture):
autoloader_source = DataBricksAutoLoaderSource(spark_session, {}, "/path", "parquet")
autoloader_source = DataBricksAutoLoaderSource(spark_session, {}, path, "parquet")
expected_df = spark_session.createDataFrame([{"a": "x"}])
mocker.patch.object(autoloader_source, "spark", new_callable=mocker.PropertyMock(return_value=mocker.Mock(readStream=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(load=mocker.Mock(return_value=expected_df)))))))))
assert autoloader_source.pre_read_validation()
df = autoloader_source.read_stream()
assert isinstance(df, DataFrame)
assert autoloader_source.post_read_validation(df)
assert autoloader_source.post_read_validation(df)

def test_databricks_autoloader_read_stream_fails(spark_session: SparkSession, mocker: MockerFixture):
autoloader_source = DataBricksAutoLoaderSource(spark_session, {}, path, "parquet")
mocker.patch.object(autoloader_source, "spark", new_callable=mocker.PropertyMock(return_value=mocker.Mock(readStream=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(load=mocker.Mock(side_effect= Exception)))))))))

with pytest.raises(Exception):
autoloader_source.read_stream()
20 changes: 18 additions & 2 deletions tests/sdk/python/rtdip_sdk/pipelines/sources/spark/test_delta.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import sys
sys.path.insert(0, '.')
import pytest
from src.sdk.python.rtdip_sdk.pipelines.destinations.spark.delta import SparkDeltaDestination
from src.sdk.python.rtdip_sdk.pipelines.sources.spark.delta import SparkDeltaSource
from tests.sdk.python.rtdip_sdk.pipelines._pipeline_utils.spark_configuration_constants import spark_session
Expand All @@ -31,10 +32,25 @@ def test_spark_delta_read_batch(spark_session: SparkSession):
assert actual_df.schema == StructType([StructField('id', StringType(), True)])

def test_spark_delta_read_stream(spark_session: SparkSession, mocker: MockerFixture):
delta_source = SparkDeltaSource(spark_session, {}, "test_spark_delta_read_batch")
delta_source = SparkDeltaSource(spark_session, {}, "test_spark_delta_read_stream")
expected_df = spark_session.createDataFrame([{"a": "x"}])
mocker.patch.object(delta_source, "spark", new_callable=mocker.PropertyMock(return_value=mocker.Mock(readStream=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(load=mocker.Mock(return_value=expected_df)))))))))
assert delta_source.pre_read_validation()
df = delta_source.read_stream()
assert isinstance(df, DataFrame)
assert delta_source.post_read_validation()
assert delta_source.post_read_validation()

def test_spark_delta_read_batch_fails(spark_session: SparkSession, mocker: MockerFixture):
delta_source = SparkDeltaSource(spark_session, {}, "test_spark_delta_read_batch")
mocker.patch.object(delta_source, "spark", new_callable=mocker.PropertyMock(return_value=mocker.Mock(read=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(table=mocker.Mock(side_effect=Exception)))))))))

with pytest.raises(Exception):
delta_source.read_batch()


def test_spark_delta_read_stream_fails(spark_session: SparkSession, mocker: MockerFixture):
delta_source = SparkDeltaSource(spark_session, {}, "test_spark_delta_read_stream")
mocker.patch.object(delta_source, "spark", new_callable=mocker.PropertyMock(return_value=mocker.Mock(readStream=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(load=mocker.Mock(side_effect=Exception)))))))))

with pytest.raises(Exception):
delta_source.read_stream()
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import sys
sys.path.insert(0, '.')
import pytest
from src.sdk.python.rtdip_sdk.pipelines.sources.spark.eventhub import SparkEventhubSource
from tests.sdk.python.rtdip_sdk.pipelines._pipeline_utils.spark_configuration_constants import spark_session
import json
Expand Down Expand Up @@ -44,4 +45,19 @@ def test_spark_eventhub_read_stream(spark_session: SparkSession):
assert eventhub_source.pre_read_validation()
df = eventhub_source.read_stream()
assert isinstance(df, DataFrame)
assert eventhub_source.post_read_validation(df)
assert eventhub_source.post_read_validation(df)

def test_spark_eventhub_read_batch_fails(spark_session: SparkSession, mocker: MockerFixture):
eventhub_source = SparkEventhubSource(spark_session, {})
mocker.patch.object(eventhub_source, "spark", new_callable=mocker.PropertyMock(return_value=mocker.Mock(read=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(load=mocker.Mock(side_effect=Exception)))))))))
assert eventhub_source.pre_read_validation()
with pytest.raises(Exception):
eventhub_source.read_batch()

def test_spark_eventhub_read_stream_fails(spark_session: SparkSession, mocker: MockerFixture):
eventhub_source = SparkEventhubSource(spark_session, {})
mocker.patch.object(eventhub_source, "spark", new_callable=mocker.PropertyMock(return_value=mocker.Mock(readStream=mocker.Mock(format=mocker.Mock(return_value=mocker.Mock(options=mocker.Mock(return_value=mocker.Mock(load=mocker.Mock(side_effect=Exception)))))))))
assert eventhub_source.pre_read_validation()
with pytest.raises(Exception):
eventhub_source.read_stream()