From c6259a0a5363323c87e64a3171e9a4a55897456d Mon Sep 17 00:00:00 2001 From: Amber-Rigg Date: Thu, 14 Dec 2023 09:38:41 +0000 Subject: [PATCH 1/8] OPC AE schema and script Signed-off-by: Amber-Rigg --- .../pipelines/_pipeline_utils/spark.py | 122 ++++++++++++ .../pipelines/transformers/__init__.py | 1 + .../spark/opc_publisher_opcae_json_to_pcdm.py | 176 ++++++++++++++++++ 3 files changed, 299 insertions(+) create mode 100644 src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py diff --git a/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py b/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py index 0f60d035e..93d75e213 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py +++ b/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py @@ -20,6 +20,7 @@ TimestampType, StringType, BinaryType, + BooleanType, LongType, MapType, IntegerType, @@ -160,6 +161,127 @@ def get_dbutils( ] ) +OPC_PUBLISHER_AE_SCHEMA = StructType([ + StructField("NodeId", StringType(), True), + StructField("EndpointUrl", StringType(), True), + StructField("DisplayName", StringType(), True), + StructField("Value", StructType([ + StructField("ConditionId", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("AckedState", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("AckedState/FalseState", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("AckedState/Id", StructType([ + StructField("Value", BooleanType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("AckedState/TrueState", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("ActiveState", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("ActiveState/FalseState", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("ActiveState/Id", StructType([ + StructField("Value", BooleanType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("ActiveState/TrueState", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("EnabledState", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("EnabledState/FalseState", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("EnabledState/Id", StructType([ + StructField("Value", BooleanType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("EnabledState/TrueState", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("EventId", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("EventType", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("HighHighLimit", StructType([ + StructField("Value", DoubleType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("HighLimit", StructType([ + StructField("Value", DoubleType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("InputNode", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("LowLimit", StructType([ + StructField("Value", DoubleType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("LowLowLimit", StructType([ + StructField("Value", DoubleType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("Message", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("Quality", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("ReceiveTime", StructType([ + StructField("Value", TimestampType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("Retain", StructType([ + StructField("Value", BooleanType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("Severity", StructType([ + StructField("Value", DoubleType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("SourceName", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("SourceNode", StructType([ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True), + StructField("Time", StructType([ + StructField("Value", TimestampType(), True), + StructField("SourceTimestamp", TimestampType(), True) + ]), True) + ]), True) +]) + + PROCESS_DATA_MODEL_SCHEMA = StructType( [ StructField("TagName", StringType(), True), diff --git a/src/sdk/python/rtdip_sdk/pipelines/transformers/__init__.py b/src/sdk/python/rtdip_sdk/pipelines/transformers/__init__.py index 2288a130c..8dbccb8b9 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/transformers/__init__.py +++ b/src/sdk/python/rtdip_sdk/pipelines/transformers/__init__.py @@ -14,6 +14,7 @@ from .spark.binary_to_string import * from .spark.opc_publisher_opcua_json_to_pcdm import * +from .spark.opc_publisher_opcae_json_to_pcdm import * from .spark.fledge_opcua_json_to_pcdm import * from .spark.ssip_pi_binary_file_to_pcdm import * from .spark.ssip_pi_binary_json_to_pcdm import * diff --git a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py new file mode 100644 index 000000000..9f82edfc1 --- /dev/null +++ b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py @@ -0,0 +1,176 @@ +# Copyright 2022 RTDIP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pyspark.sql import DataFrame +from pyspark.sql.functions import ( + from_json, + col, + explode, + to_timestamp, + when, + lit, + coalesce, +) +from pyspark.sql.types import ArrayType, StringType + +from ..interfaces import TransformerInterface +from ..._pipeline_utils.models import Libraries, SystemType +from ..._pipeline_utils.spark import OPC_PUBLISHER_SCHEMA + + +class OPCPublisherOPCUAJsonToPCDMTransformer(TransformerInterface): + """ + Converts a Spark Dataframe column containing a json string created by OPC Publisher for A&E(Alarm &Events) data to the Process Control Data Model. + + Example + -------- + ```python + from rtdip_sdk.pipelines.transformers import OPCPublisherOPCAEJsonToPCDMTransformer + + opc_publisher_opcua_json_to_pcdm_transformer = OPCPublisherOPCAEJsonToPCDMTransformer( + data=df, + souce_column_name="body", + status_null_value="Good", + change_type_value="insert", + timestamp_formats=[ + "yyyy-MM-dd'T'HH:mm:ss.SSSX", + "yyyy-MM-dd'T'HH:mm:ssX" + ], + filter=None + ) + + result = opc_publisher_opcAE_json_to_pcdm_transformer.transform() + ``` + + Parameters: + data (DataFrame): Dataframe containing the column with Json OPC AE data + source_column_name (str): Spark Dataframe column containing the OPC Publisher Json OPC AE data + status_null_value (optional str): If populated, will replace null values in the Status column with the specified value. + change_type_value (optional str): If populated, will replace 'insert' in the ChangeType column with the specified value. + timestamp_formats (optional list[str]): Specifies the timestamp formats to be used for converting the timestamp string to a Timestamp Type. For more information on formats, refer to this [documentation.](https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html) + filter (optional str): Enables providing a filter to the data which can be required in certain scenarios. For example, it would be possible to filter on IoT Hub Device Id and Module by providing a filter in SQL format such as `systemProperties.iothub-connection-device-id = "" AND systemProperties.iothub-connection-module-id = ""` + """ + + data: DataFrame + source_column_name: str + tagname_field: str + status_null_value: str + change_type_value: str + timestamp_formats: list + filter: str + + def __init__( + self, + data: DataFrame, + source_column_name: str, + tagname_field: str = "DisplayName", + status_null_value: str = None, + change_type_value: str = "insert", + timestamp_formats: list = [ + "yyyy-MM-dd'T'HH:mm:ss.SSSX", + "yyyy-MM-dd'T'HH:mm:ssX", + ], + filter: str = None, + ) -> None: # NOSONAR + self.data = data + self.source_column_name = source_column_name + self.tagname_field = tagname_field + self.status_null_value = status_null_value + self.change_type_value = change_type_value + self.timestamp_formats = timestamp_formats + self.filter = filter + + @staticmethod + def system_type(): + """ + Attributes: + SystemType (Environment): Requires PYSPARK + """ + return SystemType.PYSPARK + + @staticmethod + def libraries(): + libraries = Libraries() + return libraries + + @staticmethod + def settings() -> dict: + return {} + + def pre_transform_validation(self): + return True + + def post_transform_validation(self): + return True + + def transform(self) -> DataFrame: + """ + Returns: + DataFrame: A dataframe with the specified column converted to PCDM + """ + + df = self.data.withColumn( + self.source_column_name, + from_json(col(self.source_column_name), ArrayType(StringType())), + ).withColumn(self.source_column_name, explode(self.source_column_name)) + + if self.filter != None: + df = df.where(self.filter) + + df = df.withColumn('OPCAE', from_json(col('body'), json_schema)) + + df_data = df_data.select(col("enqueuedTime"),col("OPCAE.NodeId"),col("OPCAE.DisplayName"), col("OPCAE.Value.*")) + df_data = df_data.select(col("enqueuedTime"),col("OPCAE.NodeId"),col("OPCAE.DisplayName"), col("OPCAE.Value.ConditionId.SourceTimestamp"), col("OPCAE.Value.*.Value")) + + + df = + + df = ( + df.withColumn( + "OPCUA", from_json(col(self.source_column_name), OPC_PUBLISHER_SCHEMA) + ) + .withColumn("TagName", (col("OPCUA.{}".format(self.tagname_field)))) + .withColumn( + "EventTime", + coalesce( + *[ + to_timestamp(col("OPCUA.Value.SourceTimestamp"), f) + for f in self.timestamp_formats + ] + ), + ) + .withColumn("Value", col("OPCUA.Value.Value")) + .withColumn( + "ValueType", + when(col("Value").cast("float").isNotNull(), "float") + .when(col("Value").cast("float").isNull(), "string") + .otherwise("unknown"), + ) + .withColumn("ChangeType", lit(self.change_type_value)) + ) + + status_col_name = "OPCUA.Value.StatusCode.Symbol" + if self.status_null_value != None: + df = df.withColumn( + "Status", + when(col(status_col_name).isNotNull(), col(status_col_name)).otherwise( + lit(self.status_null_value) + ), + ) + else: + df = df.withColumn("Status", col(status_col_name)) + + return df.select( + "TagName", "EventTime", "Status", "Value", "ValueType", "ChangeType" + ) From dc1ecf3948e4e4d8c452415c89fddef554d6bef8 Mon Sep 17 00:00:00 2001 From: Amber-Rigg Date: Thu, 14 Dec 2023 11:51:12 +0000 Subject: [PATCH 2/8] Update documentation Signed-off-by: Amber-Rigg --- .../spark/opc_publisher_opcae_json_to_pcdm.md | 2 + mkdocs.yml | 1 + .../spark/opc_publisher_opcae_json_to_pcdm.py | 86 ++++++++----------- 3 files changed, 39 insertions(+), 50 deletions(-) create mode 100644 docs/sdk/code-reference/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.md diff --git a/docs/sdk/code-reference/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.md b/docs/sdk/code-reference/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.md new file mode 100644 index 000000000..e317f4112 --- /dev/null +++ b/docs/sdk/code-reference/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.md @@ -0,0 +1,2 @@ +# Convert OPC Publisher Json for A&E(Alarm & Events) Data to Process Control Data Model +::: src.sdk.python.rtdip_sdk.pipelines.transformers.spark.opc_publisher_opcae_json_to_pcdm diff --git a/mkdocs.yml b/mkdocs.yml index 4e7c97054..d0d3be966 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -186,6 +186,7 @@ nav: - Spark: - Binary To String: sdk/code-reference/pipelines/transformers/spark/binary_to_string.md - OPC Publisher Json To Process Control Data Model: sdk/code-reference/pipelines/transformers/spark/opc_publisher_opcua_json_to_pcdm.md + - OPC Publisher Json for A&E(Alarm & Events) Data to Process Control Data Model: sdk/code-reference/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.md - Fledge Json To Process Control Data Model: sdk/code-reference/pipelines/transformers/spark/fledge_opcua_json_to_pcdm.md - EdgeX JSON data To Process Control Data Model: sdk/code-reference/pipelines/transformers/spark/edgex_opcua_json_to_pcdm.md - SEM data To Process Control Data Model: sdk/code-reference/pipelines/transformers/spark/sem_json_to_pcdm.md diff --git a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py index 9f82edfc1..3f4eff1d0 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py +++ b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py @@ -18,15 +18,13 @@ col, explode, to_timestamp, - when, - lit, coalesce, ) from pyspark.sql.types import ArrayType, StringType from ..interfaces import TransformerInterface from ..._pipeline_utils.models import Libraries, SystemType -from ..._pipeline_utils.spark import OPC_PUBLISHER_SCHEMA +from ..._pipeline_utils.spark import OPC_PUBLISHER_AE_SCHEMA class OPCPublisherOPCUAJsonToPCDMTransformer(TransformerInterface): @@ -41,8 +39,6 @@ class OPCPublisherOPCUAJsonToPCDMTransformer(TransformerInterface): opc_publisher_opcua_json_to_pcdm_transformer = OPCPublisherOPCAEJsonToPCDMTransformer( data=df, souce_column_name="body", - status_null_value="Good", - change_type_value="insert", timestamp_formats=[ "yyyy-MM-dd'T'HH:mm:ss.SSSX", "yyyy-MM-dd'T'HH:mm:ssX" @@ -56,17 +52,12 @@ class OPCPublisherOPCUAJsonToPCDMTransformer(TransformerInterface): Parameters: data (DataFrame): Dataframe containing the column with Json OPC AE data source_column_name (str): Spark Dataframe column containing the OPC Publisher Json OPC AE data - status_null_value (optional str): If populated, will replace null values in the Status column with the specified value. - change_type_value (optional str): If populated, will replace 'insert' in the ChangeType column with the specified value. timestamp_formats (optional list[str]): Specifies the timestamp formats to be used for converting the timestamp string to a Timestamp Type. For more information on formats, refer to this [documentation.](https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html) filter (optional str): Enables providing a filter to the data which can be required in certain scenarios. For example, it would be possible to filter on IoT Hub Device Id and Module by providing a filter in SQL format such as `systemProperties.iothub-connection-device-id = "" AND systemProperties.iothub-connection-module-id = ""` """ data: DataFrame source_column_name: str - tagname_field: str - status_null_value: str - change_type_value: str timestamp_formats: list filter: str @@ -74,9 +65,6 @@ def __init__( self, data: DataFrame, source_column_name: str, - tagname_field: str = "DisplayName", - status_null_value: str = None, - change_type_value: str = "insert", timestamp_formats: list = [ "yyyy-MM-dd'T'HH:mm:ss.SSSX", "yyyy-MM-dd'T'HH:mm:ssX", @@ -85,9 +73,6 @@ def __init__( ) -> None: # NOSONAR self.data = data self.source_column_name = source_column_name - self.tagname_field = tagname_field - self.status_null_value = status_null_value - self.change_type_value = change_type_value self.timestamp_formats = timestamp_formats self.filter = filter @@ -128,49 +113,50 @@ def transform(self) -> DataFrame: if self.filter != None: df = df.where(self.filter) - df = df.withColumn('OPCAE', from_json(col('body'), json_schema)) - - df_data = df_data.select(col("enqueuedTime"),col("OPCAE.NodeId"),col("OPCAE.DisplayName"), col("OPCAE.Value.*")) - df_data = df_data.select(col("enqueuedTime"),col("OPCAE.NodeId"),col("OPCAE.DisplayName"), col("OPCAE.Value.ConditionId.SourceTimestamp"), col("OPCAE.Value.*.Value")) - - - df = + df = df.withColumn('OPCAE', from_json(col(self.source_column_name), OPC_PUBLISHER_AE_SCHEMA)) + + df = df.select(col("enqueuedTime").alias("EnqueuedTime"),col("OPCAE.NodeId"),col("OPCAE.DisplayName"), + col("OPCAE.Value.ConditionId.Value").alias("ConditionId"), + col("OPCAE.Value.AckedState.Value").alias("AckedState"), + col("OPCAE.Value.AckedState/FalseState.Value").alias("AckedState/FalseState"), + col("OPCAE.Value.AckedState/Id.Value").alias("AckedState/Id"), + col("OPCAE.Value.AckedState/TrueState.Value").alias("AckedState/TrueState"), + col("OPCAE.Value.ActiveState.Value").alias("ActiveState"), + col("OPCAE.Value.ActiveState/FalseState.Value").alias("ActiveState/FalseState"), + col("OPCAE.Value.ActiveState/Id.Value").alias("ActiveState/Id"), + col("OPCAE.Value.ActiveState/TrueState.Value").alias("ActiveState/TrueState"), + col("OPCAE.Value.EnabledState.Value").alias("EnabledState"), + col("OPCAE.Value.EnabledState/FalseState.Value").alias("EnabledState/FalseState"), + col("OPCAE.Value.EnabledState/Id.Value").alias("EnabledState/Id"), + col("OPCAE.Value.EnabledState/TrueState.Value").alias("EnabledState/TrueState"), + col("OPCAE.Value.EventId.Value").alias("EventId"), + col("OPCAE.Value.EventType.Value").alias("EventType"), + col("OPCAE.Value.HighHighLimit.Value").alias("HighHighLimit"), + col("OPCAE.Value.HighLimit.Value").alias("HighLimit"), + col("OPCAE.Value.InputNode.Value").alias("InputNode"), + col("OPCAE.Value.LowLimit.Value").alias("LowLimit"), + col("OPCAE.Value.LowLowLimit.Value").alias("LowLowLimit"), + col("OPCAE.Value.Message.Value").alias("Message"), + col("OPCAE.Value.Quality.Value").alias("Quality"), + col("OPCAE.Value.ReceiveTime.Value").alias("ReceiveTime"), + col("OPCAE.Value.Retain.Value").alias("Retain"), + col("OPCAE.Value.Severity.Value").alias("Severity"), + col("OPCAE.Value.SourceName.Value").alias("SourceName"), + col("OPCAE.Value.SourceNode.Value").alias("SourceNode"), + col("OPCAE.Value.Time.Value").alias("EventTime") + ) df = ( df.withColumn( - "OPCUA", from_json(col(self.source_column_name), OPC_PUBLISHER_SCHEMA) - ) - .withColumn("TagName", (col("OPCUA.{}".format(self.tagname_field)))) - .withColumn( "EventTime", coalesce( *[ - to_timestamp(col("OPCUA.Value.SourceTimestamp"), f) + to_timestamp(col("EventTime"), f) for f in self.timestamp_formats ] ), ) - .withColumn("Value", col("OPCUA.Value.Value")) - .withColumn( - "ValueType", - when(col("Value").cast("float").isNotNull(), "float") - .when(col("Value").cast("float").isNull(), "string") - .otherwise("unknown"), - ) - .withColumn("ChangeType", lit(self.change_type_value)) + ) - status_col_name = "OPCUA.Value.StatusCode.Symbol" - if self.status_null_value != None: - df = df.withColumn( - "Status", - when(col(status_col_name).isNotNull(), col(status_col_name)).otherwise( - lit(self.status_null_value) - ), - ) - else: - df = df.withColumn("Status", col(status_col_name)) - - return df.select( - "TagName", "EventTime", "Status", "Value", "ValueType", "ChangeType" - ) + return df From b2b13b87b0333d8872b5a04a2e57c62350d644b6 Mon Sep 17 00:00:00 2001 From: Amber-Rigg Date: Fri, 15 Dec 2023 13:36:35 +0000 Subject: [PATCH 3/8] Test and Black Fromatting Signed-off-by: Amber-Rigg --- .../pipelines/_pipeline_utils/spark.py | 414 +++++++++++++----- .../spark/opc_publisher_opcae_json_to_pcdm.py | 59 +-- .../test_opc_publisher_opcae_json_to_pcdm.py | 128 ++++++ 3 files changed, 457 insertions(+), 144 deletions(-) create mode 100644 tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py diff --git a/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py b/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py index 93d75e213..a9924aabe 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py +++ b/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py @@ -161,125 +161,301 @@ def get_dbutils( ] ) -OPC_PUBLISHER_AE_SCHEMA = StructType([ - StructField("NodeId", StringType(), True), - StructField("EndpointUrl", StringType(), True), - StructField("DisplayName", StringType(), True), - StructField("Value", StructType([ - StructField("ConditionId", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("AckedState", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("AckedState/FalseState", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("AckedState/Id", StructType([ - StructField("Value", BooleanType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("AckedState/TrueState", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("ActiveState", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("ActiveState/FalseState", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("ActiveState/Id", StructType([ - StructField("Value", BooleanType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("ActiveState/TrueState", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("EnabledState", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("EnabledState/FalseState", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("EnabledState/Id", StructType([ - StructField("Value", BooleanType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("EnabledState/TrueState", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("EventId", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("EventType", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("HighHighLimit", StructType([ - StructField("Value", DoubleType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("HighLimit", StructType([ - StructField("Value", DoubleType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("InputNode", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("LowLimit", StructType([ - StructField("Value", DoubleType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("LowLowLimit", StructType([ - StructField("Value", DoubleType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("Message", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("Quality", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("ReceiveTime", StructType([ - StructField("Value", TimestampType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("Retain", StructType([ - StructField("Value", BooleanType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("Severity", StructType([ - StructField("Value", DoubleType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("SourceName", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("SourceNode", StructType([ - StructField("Value", StringType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True), - StructField("Time", StructType([ - StructField("Value", TimestampType(), True), - StructField("SourceTimestamp", TimestampType(), True) - ]), True) - ]), True) -]) +OPC_PUBLISHER_AE_SCHEMA = StructType( + [ + StructField("NodeId", StringType(), True), + StructField("EndpointUrl", StringType(), True), + StructField("DisplayName", StringType(), True), + StructField( + "Value", + StructType( + [ + StructField( + "ConditionId", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "AckedState", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "AckedState/FalseState", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "AckedState/Id", + StructType( + [ + StructField("Value", BooleanType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "AckedState/TrueState", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "ActiveState", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "ActiveState/FalseState", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "ActiveState/Id", + StructType( + [ + StructField("Value", BooleanType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "ActiveState/TrueState", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "EnabledState", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "EnabledState/FalseState", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "EnabledState/Id", + StructType( + [ + StructField("Value", BooleanType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "EnabledState/TrueState", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "EventId", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "EventType", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "HighHighLimit", + StructType( + [ + StructField("Value", DoubleType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "HighLimit", + StructType( + [ + StructField("Value", DoubleType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "InputNode", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "LowLimit", + StructType( + [ + StructField("Value", DoubleType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "LowLowLimit", + StructType( + [ + StructField("Value", DoubleType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "Message", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "Quality", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "ReceiveTime", + StructType( + [ + StructField("Value", TimestampType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "Retain", + StructType( + [ + StructField("Value", BooleanType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "Severity", + StructType( + [ + StructField("Value", DoubleType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "SourceName", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "SourceNode", + StructType( + [ + StructField("Value", StringType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + StructField( + "Time", + StructType( + [ + StructField("Value", TimestampType(), True), + StructField("SourceTimestamp", TimestampType(), True), + ] + ), + True, + ), + ] + ), + True, + ), + ] +) PROCESS_DATA_MODEL_SCHEMA = StructType( diff --git a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py index 3f4eff1d0..c90338c8f 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py +++ b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py @@ -104,31 +104,46 @@ def transform(self) -> DataFrame: Returns: DataFrame: A dataframe with the specified column converted to PCDM """ - + df = self.data.withColumn( - self.source_column_name, - from_json(col(self.source_column_name), ArrayType(StringType())), - ).withColumn(self.source_column_name, explode(self.source_column_name)) + self.source_column_name, + from_json(col(self.source_column_name), ArrayType(StringType())), + ).withColumn(self.source_column_name, explode(self.source_column_name)) if self.filter != None: df = df.where(self.filter) - df = df.withColumn('OPCAE', from_json(col(self.source_column_name), OPC_PUBLISHER_AE_SCHEMA)) + df = df.withColumn( + "OPCAE", from_json(col(self.source_column_name), OPC_PUBLISHER_AE_SCHEMA) + ) - df = df.select(col("enqueuedTime").alias("EnqueuedTime"),col("OPCAE.NodeId"),col("OPCAE.DisplayName"), + df = df.select( + col("enqueuedTime").alias("EnqueuedTime"), + col("OPCAE.NodeId"), + col("OPCAE.DisplayName"), col("OPCAE.Value.ConditionId.Value").alias("ConditionId"), col("OPCAE.Value.AckedState.Value").alias("AckedState"), - col("OPCAE.Value.AckedState/FalseState.Value").alias("AckedState/FalseState"), + col("OPCAE.Value.AckedState/FalseState.Value").alias( + "AckedState/FalseState" + ), col("OPCAE.Value.AckedState/Id.Value").alias("AckedState/Id"), col("OPCAE.Value.AckedState/TrueState.Value").alias("AckedState/TrueState"), col("OPCAE.Value.ActiveState.Value").alias("ActiveState"), - col("OPCAE.Value.ActiveState/FalseState.Value").alias("ActiveState/FalseState"), + col("OPCAE.Value.ActiveState/FalseState.Value").alias( + "ActiveState/FalseState" + ), col("OPCAE.Value.ActiveState/Id.Value").alias("ActiveState/Id"), - col("OPCAE.Value.ActiveState/TrueState.Value").alias("ActiveState/TrueState"), + col("OPCAE.Value.ActiveState/TrueState.Value").alias( + "ActiveState/TrueState" + ), col("OPCAE.Value.EnabledState.Value").alias("EnabledState"), - col("OPCAE.Value.EnabledState/FalseState.Value").alias("EnabledState/FalseState"), + col("OPCAE.Value.EnabledState/FalseState.Value").alias( + "EnabledState/FalseState" + ), col("OPCAE.Value.EnabledState/Id.Value").alias("EnabledState/Id"), - col("OPCAE.Value.EnabledState/TrueState.Value").alias("EnabledState/TrueState"), + col("OPCAE.Value.EnabledState/TrueState.Value").alias( + "EnabledState/TrueState" + ), col("OPCAE.Value.EventId.Value").alias("EventId"), col("OPCAE.Value.EventType.Value").alias("EventType"), col("OPCAE.Value.HighHighLimit.Value").alias("HighHighLimit"), @@ -143,20 +158,14 @@ def transform(self) -> DataFrame: col("OPCAE.Value.Severity.Value").alias("Severity"), col("OPCAE.Value.SourceName.Value").alias("SourceName"), col("OPCAE.Value.SourceNode.Value").alias("SourceNode"), - col("OPCAE.Value.Time.Value").alias("EventTime") - ) - - df = ( - df.withColumn( - "EventTime", - coalesce( - *[ - to_timestamp(col("EventTime"), f) - for f in self.timestamp_formats - ] - ), - ) - + col("OPCAE.Value.Time.Value").alias("EventTime"), + ) + + df = df.withColumn( + "EventTime", + coalesce( + *[to_timestamp(col("EventTime"), f) for f in self.timestamp_formats] + ), ) return df diff --git a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py new file mode 100644 index 000000000..da33ffa28 --- /dev/null +++ b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py @@ -0,0 +1,128 @@ +# Copyright 2022 RTDIP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +sys.path.insert(0, ".") +from src.sdk.python.rtdip_sdk.pipelines.transformers.spark.opc_publisher_opcae_json_to_pcdm import ( + OPCPublisherOPCUAJsonToPCDMTransformer, +) +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( + Libraries, + SystemType, +) +from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.spark import ( + OPC_PUBLISHER_AE_SCHEMA, +) + +from pyspark.sql import SparkSession, DataFrame +from pyspark.sql.types import ( + StructType, + StructField, + StringType, + TimestampType, + DoubleType, + BooleanType, +) +from datetime import datetime + + +def test_opc_publisher_json_to_pcdm(spark_session: SparkSession): + opcua_json_data = '[{"NodeId":"ns=6;s=MyLevel.Alarm","EndpointUrl":"opc.tcp://aewnw00042diot5.europe.shell.com:53530/OPCUA/SimulationServer","DisplayName":"MyLevelAlarm","Value":{"ConditionId":{"Value":"http://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel.Alarm","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState":{"Value":"Unacknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/FalseState":{"Value":"Unacknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/Id":{"Value":false,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/TrueState":{"Value":"Acknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState":{"Value":"Inactive","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/FalseState":{"Value":"Inactive","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/Id":{"Value":false,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/TrueState":{"Value":"Active","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState":{"Value":"Enabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/FalseState":{"Value":"Disabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/Id":{"Value":true,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/TrueState":{"Value":"Enabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EventId":{"Value":"AAAAAAAAGycAAAAAAAAbJg==","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EventType":{"Value":"i=9482","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"HighHighLimit":{"Value":90,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"HighLimit":{"Value":70,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"InputNode":{"Value":null,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"LowLimit":{"Value":30,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"LowLowLimit":{"Value":10,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Message":{"Value":"Level exceeded","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Quality":{"Value":null,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ReceiveTime":{"Value":"2023-10-19T13:08:08.503Z","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Retain":{"Value":true,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Severity":{"Value":500,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"SourceName":{"Value":"MyLevel","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"SourceNode":{"Value":"http://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Time":{"Value":"2023-10-19T13:08:08.503Z","SourceTimestamp":"2023-10-19T13:08:08.503Z"}}}]' + opcua_df: DataFrame = spark_session.createDataFrame([{"body": opcua_json_data}]) + + expected_schema = StructType( + [ + StructField("EnqueuedTime", TimestampType(), True), + StructField("NodeId", StringType(), True), + StructField("DisplayName", StringType(), True), + StructField("ConditionId", StringType(), True), + StructField("AckedState", StringType(), True), + StructField("AckedState/FalseState", StringType(), True), + StructField("AckedState/Id", BooleanType(), True), + StructField("AckedState/TrueState", StringType(), True), + StructField("ActiveState", StringType(), True), + StructField("ActiveState/FalseState", StringType(), True), + StructField("ActiveState/Id", BooleanType(), True), + StructField("ActiveState/TrueState", StringType(), True), + StructField("EnabledState", StringType(), True), + StructField("EnabledState/FalseState", StringType(), True), + StructField("EnabledState/Id", BooleanType(), True), + StructField("EnabledState/TrueState", StringType(), True), + StructField("EventId", StringType(), True), + StructField("EventType", StringType(), True), + StructField("HighHighLimit", DoubleType(), True), + StructField("HighLimit", DoubleType(), True), + StructField("InputNode", StringType(), True), + StructField("LowLimit", DoubleType(), True), + StructField("LowLowLimit", DoubleType(), True), + StructField("Message", StringType(), True), + StructField("Quality", StringType(), True), + StructField("ReceiveTime", TimestampType(), True), + StructField("Retain", BooleanType(), True), + StructField("Severity", DoubleType(), True), + StructField("SourceName", StringType(), True), + StructField("SourceNode", StringType(), True), + StructField("EventTime", TimestampType(), True), + ] + ) + + expected_data = [ + { + "EnqueuedTime": datetime.fromisoformat("2023-10-19T13:08:09.049+00:00"), + "NodeId": "ns=6;s=MyLevel.Alarm", + "DisplayName": "MyLevelAlarm", + "ConditionId": "http://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel.Alarm", + "AckedState": "Unacknowledged", + "AckedState/FalseState": "Unacknowledged", + "AckedState/Id": False, + "AckedState/TrueState": "Acknowledged", + "ActiveState": "Inactive", + "ActiveState/FalseState": "Inactive", + "ActiveState/Id": False, + "ActiveState/TrueState": "Active", + "EnabledState": "Enabled", + "EnabledState/FalseState": "Disabled", + "EnabledState/Id": True, + "EnabledState/TrueState": "Enabled", + "EventId": "AAAAAAAAGycAAAAAAAAbJg==", + "EventType": "i=9482", + "HighHighLimit": 90.0, + "HighLimit": 70.0, + "InputNode": None, + "LowLimit": 30.0, + "LowLowLimit": 10.0, + "Message": "Level exceeded", + "Quality": None, + "ReceiveTime": datetime.fromisoformat("2023-10-19T13:08:08.503+00:00"), + "Retain": True, + "Severity": 500.0, + "SourceName": "MyLevel", + "SourceNode": "http://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel", + "EventTime": datetime.fromisoformat("2023-10-19T13:08:08.503+00:00"), + } + ] + + expected_df: DataFrame = spark_session.createDataFrame( + schema=expected_schema, data=expected_data + ) + eventhub_json_to_opcua_transformer = OPCPublisherOPCUAJsonToPCDMTransformer( + opcua_df, source_column_name="body", status_null_value="Good" + ) + actual_df = eventhub_json_to_opcua_transformer.transform() + + assert eventhub_json_to_opcua_transformer.system_type() == SystemType.PYSPARK + assert isinstance(eventhub_json_to_opcua_transformer.libraries(), Libraries) + assert expected_schema == actual_df.schema + assert expected_df.collect() == actual_df.collect() From 3b7c613a8af78931bd461dc9c87cf6e9975074d5 Mon Sep 17 00:00:00 2001 From: Amber-Rigg Date: Fri, 15 Dec 2023 14:18:16 +0000 Subject: [PATCH 4/8] Test completed Signed-off-by: Amber-Rigg --- .vscode/settings.json | 6 +----- .../spark/opc_publisher_opcae_json_to_pcdm.py | 17 ++++++++--------- .../test_opc_publisher_opcae_json_to_pcdm.py | 14 ++++++-------- 3 files changed, 15 insertions(+), 22 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 2b688f678..334815bf7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,11 +5,7 @@ "azureFunctions.projectRuntime": "~4", "python.testing.pytestArgs": [ - "--cache-clear", - "--cov=.", - "--cov-report=xml:cov.xml", - "tests", - "-vv", + "tests" ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, diff --git a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py index c90338c8f..2a6347f40 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py +++ b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py @@ -27,7 +27,7 @@ from ..._pipeline_utils.spark import OPC_PUBLISHER_AE_SCHEMA -class OPCPublisherOPCUAJsonToPCDMTransformer(TransformerInterface): +class OPCPublisherOPCAEJsonToPCDMTransformer(TransformerInterface): """ Converts a Spark Dataframe column containing a json string created by OPC Publisher for A&E(Alarm &Events) data to the Process Control Data Model. @@ -46,7 +46,7 @@ class OPCPublisherOPCUAJsonToPCDMTransformer(TransformerInterface): filter=None ) - result = opc_publisher_opcAE_json_to_pcdm_transformer.transform() + result = opc_publisher_opcae_json_to_pcdm_transformer.transform() ``` Parameters: @@ -65,15 +65,15 @@ def __init__( self, data: DataFrame, source_column_name: str, - timestamp_formats: list = [ - "yyyy-MM-dd'T'HH:mm:ss.SSSX", - "yyyy-MM-dd'T'HH:mm:ssX", - ], + timestamp_formats=None, filter: str = None, ) -> None: # NOSONAR self.data = data self.source_column_name = source_column_name - self.timestamp_formats = timestamp_formats + self.timestamp_formats = timestamp_formats or [ + "yyyy-MM-dd'T'HH:mm:ss.SSSX", + "yyyy-MM-dd'T'HH:mm:ssX", + ] self.filter = filter @staticmethod @@ -102,7 +102,7 @@ def post_transform_validation(self): def transform(self) -> DataFrame: """ Returns: - DataFrame: A dataframe with the specified column converted to PCDM + DataFrame: A dataframe with the OPC Publisher A&E data converted to the Process Control Data Model """ df = self.data.withColumn( @@ -118,7 +118,6 @@ def transform(self) -> DataFrame: ) df = df.select( - col("enqueuedTime").alias("EnqueuedTime"), col("OPCAE.NodeId"), col("OPCAE.DisplayName"), col("OPCAE.Value.ConditionId.Value").alias("ConditionId"), diff --git a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py index da33ffa28..98d28967c 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py @@ -16,7 +16,7 @@ sys.path.insert(0, ".") from src.sdk.python.rtdip_sdk.pipelines.transformers.spark.opc_publisher_opcae_json_to_pcdm import ( - OPCPublisherOPCUAJsonToPCDMTransformer, + OPCPublisherOPCAEJsonToPCDMTransformer, ) from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import ( Libraries, @@ -44,7 +44,6 @@ def test_opc_publisher_json_to_pcdm(spark_session: SparkSession): expected_schema = StructType( [ - StructField("EnqueuedTime", TimestampType(), True), StructField("NodeId", StringType(), True), StructField("DisplayName", StringType(), True), StructField("ConditionId", StringType(), True), @@ -80,7 +79,6 @@ def test_opc_publisher_json_to_pcdm(spark_session: SparkSession): expected_data = [ { - "EnqueuedTime": datetime.fromisoformat("2023-10-19T13:08:09.049+00:00"), "NodeId": "ns=6;s=MyLevel.Alarm", "DisplayName": "MyLevelAlarm", "ConditionId": "http://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel.Alarm", @@ -117,12 +115,12 @@ def test_opc_publisher_json_to_pcdm(spark_session: SparkSession): expected_df: DataFrame = spark_session.createDataFrame( schema=expected_schema, data=expected_data ) - eventhub_json_to_opcua_transformer = OPCPublisherOPCUAJsonToPCDMTransformer( - opcua_df, source_column_name="body", status_null_value="Good" + eventhub_json_to_opcae_transformer = OPCPublisherOPCAEJsonToPCDMTransformer( + opcua_df, source_column_name="body" ) - actual_df = eventhub_json_to_opcua_transformer.transform() + actual_df = eventhub_json_to_opcae_transformer.transform() - assert eventhub_json_to_opcua_transformer.system_type() == SystemType.PYSPARK - assert isinstance(eventhub_json_to_opcua_transformer.libraries(), Libraries) + assert eventhub_json_to_opcae_transformer.system_type() == SystemType.PYSPARK + assert isinstance(eventhub_json_to_opcae_transformer.libraries(), Libraries) assert expected_schema == actual_df.schema assert expected_df.collect() == actual_df.collect() From f7c0265097dbdc3e88da7598f6eba2c7ba3721a3 Mon Sep 17 00:00:00 2001 From: Amber-Rigg Date: Fri, 15 Dec 2023 14:24:09 +0000 Subject: [PATCH 5/8] Rever to develop settings Signed-off-by: Amber-Rigg --- .vscode/settings.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 334815bf7..2b688f678 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -5,7 +5,11 @@ "azureFunctions.projectRuntime": "~4", "python.testing.pytestArgs": [ - "tests" + "--cache-clear", + "--cov=.", + "--cov-report=xml:cov.xml", + "tests", + "-vv", ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, From 1869928c839fe10c34f57634aece71f8263f235f Mon Sep 17 00:00:00 2001 From: Amber-Rigg Date: Fri, 15 Dec 2023 14:45:24 +0000 Subject: [PATCH 6/8] Alter http to https Signed-off-by: Amber-Rigg --- .../spark/test_opc_publisher_opcae_json_to_pcdm.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py index 98d28967c..4bb4369dc 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py @@ -39,7 +39,7 @@ def test_opc_publisher_json_to_pcdm(spark_session: SparkSession): - opcua_json_data = '[{"NodeId":"ns=6;s=MyLevel.Alarm","EndpointUrl":"opc.tcp://aewnw00042diot5.europe.shell.com:53530/OPCUA/SimulationServer","DisplayName":"MyLevelAlarm","Value":{"ConditionId":{"Value":"http://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel.Alarm","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState":{"Value":"Unacknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/FalseState":{"Value":"Unacknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/Id":{"Value":false,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/TrueState":{"Value":"Acknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState":{"Value":"Inactive","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/FalseState":{"Value":"Inactive","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/Id":{"Value":false,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/TrueState":{"Value":"Active","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState":{"Value":"Enabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/FalseState":{"Value":"Disabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/Id":{"Value":true,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/TrueState":{"Value":"Enabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EventId":{"Value":"AAAAAAAAGycAAAAAAAAbJg==","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EventType":{"Value":"i=9482","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"HighHighLimit":{"Value":90,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"HighLimit":{"Value":70,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"InputNode":{"Value":null,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"LowLimit":{"Value":30,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"LowLowLimit":{"Value":10,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Message":{"Value":"Level exceeded","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Quality":{"Value":null,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ReceiveTime":{"Value":"2023-10-19T13:08:08.503Z","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Retain":{"Value":true,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Severity":{"Value":500,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"SourceName":{"Value":"MyLevel","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"SourceNode":{"Value":"http://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Time":{"Value":"2023-10-19T13:08:08.503Z","SourceTimestamp":"2023-10-19T13:08:08.503Z"}}}]' + opcua_json_data = '[{"NodeId":"ns=6;s=MyLevel.Alarm","EndpointUrl":"opc.tcp://aewnw00042diot5.europe.shell.com:53530/OPCUA/SimulationServer","DisplayName":"MyLevelAlarm","Value":{"ConditionId":{"Value":"https://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel.Alarm","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState":{"Value":"Unacknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/FalseState":{"Value":"Unacknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/Id":{"Value":false,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/TrueState":{"Value":"Acknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState":{"Value":"Inactive","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/FalseState":{"Value":"Inactive","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/Id":{"Value":false,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/TrueState":{"Value":"Active","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState":{"Value":"Enabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/FalseState":{"Value":"Disabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/Id":{"Value":true,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/TrueState":{"Value":"Enabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EventId":{"Value":"AAAAAAAAGycAAAAAAAAbJg==","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EventType":{"Value":"i=9482","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"HighHighLimit":{"Value":90,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"HighLimit":{"Value":70,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"InputNode":{"Value":null,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"LowLimit":{"Value":30,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"LowLowLimit":{"Value":10,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Message":{"Value":"Level exceeded","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Quality":{"Value":null,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ReceiveTime":{"Value":"2023-10-19T13:08:08.503Z","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Retain":{"Value":true,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Severity":{"Value":500,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"SourceName":{"Value":"MyLevel","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"SourceNode":{"Value":"https://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Time":{"Value":"2023-10-19T13:08:08.503Z","SourceTimestamp":"2023-10-19T13:08:08.503Z"}}}]' opcua_df: DataFrame = spark_session.createDataFrame([{"body": opcua_json_data}]) expected_schema = StructType( @@ -81,7 +81,7 @@ def test_opc_publisher_json_to_pcdm(spark_session: SparkSession): { "NodeId": "ns=6;s=MyLevel.Alarm", "DisplayName": "MyLevelAlarm", - "ConditionId": "http://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel.Alarm", + "ConditionId": "https://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel.Alarm", "AckedState": "Unacknowledged", "AckedState/FalseState": "Unacknowledged", "AckedState/Id": False, @@ -107,7 +107,7 @@ def test_opc_publisher_json_to_pcdm(spark_session: SparkSession): "Retain": True, "Severity": 500.0, "SourceName": "MyLevel", - "SourceNode": "http://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel", + "SourceNode": "https://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel", "EventTime": datetime.fromisoformat("2023-10-19T13:08:08.503+00:00"), } ] From ba02d831392227acf7c7ea2b88aba9653872a9c7 Mon Sep 17 00:00:00 2001 From: Amber-Rigg Date: Mon, 18 Dec 2023 14:53:07 +0000 Subject: [PATCH 7/8] Removal of Specfic Info Signed-off-by: Amber-Rigg --- .../transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py index 4bb4369dc..b9abc1baa 100644 --- a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py +++ b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_opc_publisher_opcae_json_to_pcdm.py @@ -39,7 +39,7 @@ def test_opc_publisher_json_to_pcdm(spark_session: SparkSession): - opcua_json_data = '[{"NodeId":"ns=6;s=MyLevel.Alarm","EndpointUrl":"opc.tcp://aewnw00042diot5.europe.shell.com:53530/OPCUA/SimulationServer","DisplayName":"MyLevelAlarm","Value":{"ConditionId":{"Value":"https://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel.Alarm","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState":{"Value":"Unacknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/FalseState":{"Value":"Unacknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/Id":{"Value":false,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/TrueState":{"Value":"Acknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState":{"Value":"Inactive","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/FalseState":{"Value":"Inactive","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/Id":{"Value":false,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/TrueState":{"Value":"Active","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState":{"Value":"Enabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/FalseState":{"Value":"Disabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/Id":{"Value":true,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/TrueState":{"Value":"Enabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EventId":{"Value":"AAAAAAAAGycAAAAAAAAbJg==","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EventType":{"Value":"i=9482","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"HighHighLimit":{"Value":90,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"HighLimit":{"Value":70,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"InputNode":{"Value":null,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"LowLimit":{"Value":30,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"LowLowLimit":{"Value":10,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Message":{"Value":"Level exceeded","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Quality":{"Value":null,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ReceiveTime":{"Value":"2023-10-19T13:08:08.503Z","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Retain":{"Value":true,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Severity":{"Value":500,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"SourceName":{"Value":"MyLevel","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"SourceNode":{"Value":"https://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Time":{"Value":"2023-10-19T13:08:08.503Z","SourceTimestamp":"2023-10-19T13:08:08.503Z"}}}]' + opcua_json_data = '[{"NodeId":"ns=6;s=MyLevel.Alarm","EndpointUrl":"opc.tcp://xxxxxxxxx/OPCUA/SimulationServer","DisplayName":"MyLevelAlarm","Value":{"ConditionId":{"Value":"https://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel.Alarm","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState":{"Value":"Unacknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/FalseState":{"Value":"Unacknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/Id":{"Value":false,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"AckedState/TrueState":{"Value":"Acknowledged","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState":{"Value":"Inactive","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/FalseState":{"Value":"Inactive","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/Id":{"Value":false,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ActiveState/TrueState":{"Value":"Active","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState":{"Value":"Enabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/FalseState":{"Value":"Disabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/Id":{"Value":true,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EnabledState/TrueState":{"Value":"Enabled","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EventId":{"Value":"AAAAAAAAGycAAAAAAAAbJg==","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"EventType":{"Value":"i=9482","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"HighHighLimit":{"Value":90,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"HighLimit":{"Value":70,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"InputNode":{"Value":null,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"LowLimit":{"Value":30,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"LowLowLimit":{"Value":10,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Message":{"Value":"Level exceeded","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Quality":{"Value":null,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"ReceiveTime":{"Value":"2023-10-19T13:08:08.503Z","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Retain":{"Value":true,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Severity":{"Value":500,"SourceTimestamp":"2023-10-19T13:08:08.503Z"},"SourceName":{"Value":"MyLevel","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"SourceNode":{"Value":"https://www.prosysopc.com/OPCUA/SampleAddressSpace#s=MyLevel","SourceTimestamp":"2023-10-19T13:08:08.503Z"},"Time":{"Value":"2023-10-19T13:08:08.503Z","SourceTimestamp":"2023-10-19T13:08:08.503Z"}}}]' opcua_df: DataFrame = spark_session.createDataFrame([{"body": opcua_json_data}]) expected_schema = StructType( From 15032f4176f4337852d923024532318261fe7d10 Mon Sep 17 00:00:00 2001 From: Amber-Rigg Date: Mon, 18 Dec 2023 15:03:11 +0000 Subject: [PATCH 8/8] Update UA to AE Signed-off-by: Amber-Rigg --- .../transformers/spark/opc_publisher_opcae_json_to_pcdm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py index 2a6347f40..7b334ecf9 100644 --- a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py +++ b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/opc_publisher_opcae_json_to_pcdm.py @@ -36,7 +36,7 @@ class OPCPublisherOPCAEJsonToPCDMTransformer(TransformerInterface): ```python from rtdip_sdk.pipelines.transformers import OPCPublisherOPCAEJsonToPCDMTransformer - opc_publisher_opcua_json_to_pcdm_transformer = OPCPublisherOPCAEJsonToPCDMTransformer( + opc_publisher_opcae_json_to_pcdm_transformer = OPCPublisherOPCAEJsonToPCDMTransformer( data=df, souce_column_name="body", timestamp_formats=[