rtdip · GBBBAS · Aug 20, 2023 · Apr 12, 2023 · Aug 3, 2023 · Aug 7, 2023
diff --git a/docs/sdk/code-reference/pipelines/transformers/spark/honeywell_apm_to_pcdm.md b/docs/sdk/code-reference/pipelines/transformers/spark/honeywell_apm_to_pcdm.md
@@ -0,0 +1,2 @@
+# Convert Honeywell APM Json to Process Control Data Model
+::: src.sdk.python.rtdip_sdk.pipelines.transformers.spark.honeywell_apm_to_pcdm
diff --git a/docs/sdk/code-reference/pipelines/transformers/spark/pcdm_to_honeywell_apm.md b/docs/sdk/code-reference/pipelines/transformers/spark/pcdm_to_honeywell_apm.md
@@ -0,0 +1,2 @@
+# Convert Process Control Data Model to Honeywell APM Json
+::: src.sdk.python.rtdip_sdk.pipelines.transformers.spark.pcdm_to_honeywell_apm
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -187,6 +187,8 @@ nav:
                       - Pandas to PySpark DataFrame Conversion: sdk/code-reference/pipelines/transformers/spark/pandas_to_pyspark.md
                       - PySpark to Pandas DataFrame Conversion: sdk/code-reference/pipelines/transformers/spark/pyspark_to_pandas.md
                       - Base Raw To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/base_raw_to_mdm.md
+                      - Honeywell APM To Process Control Data Model: sdk/code-reference/pipelines/transformers/spark/honeywell_apm_to_pcdm.md
+                      - Process Control Data Model To Honeywell APM: sdk/code-reference/pipelines/transformers/spark/pcdm_to_honeywell_apm.md
                       - ISO:
                           - MISO To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/miso_to_mdm.md
                           - PJM To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/pjm_to_mdm.md

diff --git a/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py b/src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py
@@ -14,7 +14,7 @@
 
 import logging
 from pyspark.sql import SparkSession
-from pyspark.sql.types import StructType, StructField, TimestampType, StringType, BinaryType, LongType, MapType, IntegerType, ArrayType
+from pyspark.sql.types import StructType, StructField, TimestampType, StringType, BinaryType, LongType, MapType, IntegerType, ArrayType, DoubleType
 
 from .models import Libraries
 from ..._sdk_utils.compare_versions import _package_version_meets_minimum
@@ -185,4 +185,22 @@ def get_dbutils(
       StructField('valueType', StringType(), True),
       StructField('value', StringType(), True)]))
   , True)
+])
+
+APM_SCHEMA = StructType([
+    StructField('Id', StringType(), True),
+    StructField('TenantId', StringType(), True),
+    StructField('IdType', StringType(), True),
+    StructField('Samples', ArrayType(
+        StructType([
+            StructField('ItemName', StringType(), True),
+            StructField('Time', StringType(), True),
+            StructField('Value', StringType(), True),
+            StructField('Unit', StringType(), True),
+            StructField('NormalizedQuality', StringType(), True),
+            StructField('HighValue', DoubleType(), True),
+            StructField('LowValue', DoubleType(), True),
+            StructField('TargetValue', DoubleType(), True),
+        ])
+    ), True),
 ])
diff --git a/src/sdk/python/rtdip_sdk/pipelines/transformers/__init__.py b/src/sdk/python/rtdip_sdk/pipelines/transformers/__init__.py
@@ -19,3 +19,5 @@
 from .spark.ssip_pi_binary_json_to_pcdm import *
 from .spark.iso import *
 from .spark.edgex_opcua_json_to_pcdm import *
+from .spark.pcdm_to_honeywell_apm import *
+from .spark.honeywell_apm_to_pcdm import *
diff --git a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/honeywell_apm_to_pcdm.py b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/honeywell_apm_to_pcdm.py
@@ -0,0 +1,79 @@
+# Copyright 2022 RTDIP
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pyspark.sql import DataFrame
+from pyspark.sql.functions import from_json, col, explode, when, lit , regexp_replace
+
+from ..interfaces import TransformerInterface
+from ..._pipeline_utils.models import Libraries, SystemType
+from ..._pipeline_utils.spark import APM_SCHEMA
+
+class HoneywellAPMJsonToPCDMTransformer(TransformerInterface):
+    '''
+    Converts a Spark Dataframe column containing a json string created by Honeywell APM to the Process Control Data Model
+
+    Args:
+        data (DataFrame): Dataframe containing the column with EdgeX data
+        source_column_name (str): Spark Dataframe column containing the OPC Publisher Json OPC UA data
+        status_null_value (optional str): If populated, will replace 'Good' in the Status column with the specified value.
+        change_type_value (optional str): If populated, will replace 'insert' in the ChangeType column with the specified value.
+    '''
+    data: DataFrame
+    source_column_name: str
+    status_null_value: str
+    change_type_value: str
+
+    def __init__(self, data: DataFrame, source_column_name: str, status_null_value: str = "Good", change_type_value: str = "insert") -> None: 
+        self.data = data
+        self.source_column_name = source_column_name
+        self.status_null_value = status_null_value
+        self.change_type_value = change_type_value
+
+    @staticmethod
+    def system_type():
+        '''
+        Attributes:
+            SystemType (Environment): Requires PYSPARK
+        '''
+        return SystemType.PYSPARK
+
+    @staticmethod
+    def libraries():
+        libraries = Libraries()
+        return libraries
+
+    @staticmethod
+    def settings() -> dict:
+        return {}
+
+    def pre_transform_validation(self):
+        return True
+
+    def post_transform_validation(self):
+        return True
+
+    def transform(self) -> DataFrame:
+        '''
+        Returns:
+            DataFrame: A dataframe with the specified column converted to PCDM
+        '''
+        df = (self.data.withColumn("body",from_json(self.source_column_name, APM_SCHEMA)).select(explode("body.Samples"))
+        .selectExpr("*", "to_timestamp(col.Time) as EventTime")
+        .withColumn("TagName", col("col.Itemname"))
+        .withColumn("Status", lit(self.status_null_value))
+        .withColumn("Value", col("col.Value"))
+        .withColumn("ValueType", when(col("value").cast("float").isNotNull(), "float").when(col("value").cast("float").isNull(), "string"))
+        .withColumn("ChangeType", lit(self.change_type_value)))
+
+        return df.select("TagName", "EventTime", "Status", "Value", "ValueType", "ChangeType")
diff --git a/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/pcdm_to_honeywell_apm.py b/src/sdk/python/rtdip_sdk/pipelines/transformers/spark/pcdm_to_honeywell_apm.py
@@ -0,0 +1,101 @@
+# Copyright 2022 RTDIP
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pyspark.sql import DataFrame
+from pyspark.sql.functions import to_json, col, struct, create_map, lit, array, monotonically_increasing_id, floor, row_number, collect_list, expr
+from pyspark.sql import Window
+from datetime import datetime
+import pytz
+import uuid
+
+from ..interfaces import TransformerInterface
+from ..._pipeline_utils.models import Libraries, SystemType
+from ..._pipeline_utils.spark import EDGEX_SCHEMA
+
+class PCDMToHoneywellAPMTransformer(TransformerInterface):
+    '''
+    Converts a Spark Dataframe in PCDM format to Honeywell APM format.
+    Args:
+        data (Dataframe): Spark Dataframe in PCDM format
+        quality (str): Value for quality inside HistorySamples
+        history_samples_per_message (int): The number of HistorySamples for each row in the DataFrame
+
+    '''
+    data: DataFrame
+    quality: str
+    history_samples_per_message: int
+
+    def __init__(self, data: DataFrame, quality: str = "Good", history_samples_per_message: int = 1) -> None: 
+        self.data = data
+        self.quality = quality
+        self.history_samples_per_message = history_samples_per_message
+
+
+    @staticmethod
+    def system_type():
+        '''
+        Attributes:
+            SystemType (Environment): Requires PYSPARK
+        '''
+        return SystemType.PYSPARK
+
+    @staticmethod
+    def libraries():
+        libraries = Libraries()
+        return libraries
+
+    @staticmethod
+    def settings() -> dict:
+        return {}
+
+    def pre_transform_validation(self):
+        return True
+
+    def post_transform_validation(self):
+        return True
+
+    def transform(self) -> DataFrame:
+        '''
+        Returns:
+            DataFrame: A dataframe with with rows in Honeywell APM format
+        '''
+        pcdm_df = self.data.withColumn("counter", monotonically_increasing_id())
+        w = Window.orderBy("counter")
+        indexed_pcdm_df = (pcdm_df.withColumn("index", floor((row_number().over(w)-0.01)/self.history_samples_per_message)).withColumn("HistorySamples", struct(
+                col("TagName").alias("ItemName"),
+                lit(self.quality).alias("Quality"),
+                col("EventTime").alias("Time"),
+                col("Value").alias("Value")).alias("HistorySamples")).groupBy("index").agg(collect_list("HistorySamples").alias("HistorySamples"))
+                .withColumn("guid",expr("uuid()"))
+                .withColumn("value", struct(col("guid").alias("SystemGuid"), col("HistorySamples")).alias("value")))                                                                                                     
+
+        df = indexed_pcdm_df.withColumn("CloudPlatformEvent",
+        create_map(
+        lit("CloudPlatformEvent"),
+        struct(
+            lit(datetime.now(tz=pytz.UTC)).alias("CreatedTime"),
+            lit(expr("uuid()")).alias("Id"),
+            col("guid").alias("CreatorId"),
+            lit("CloudPlatformSystem").alias("CreatorType"),
+            lit(None).alias("GeneratorId"),
+            lit("CloudPlatformTenant").alias("GeneratorType"),
+            col("guid").alias("TargetId"),
+            lit("CloudPlatformTenant").alias("TargetType"),
+            lit(None).alias("TargetContext"),
+            struct(lit("TextualBody").alias("type"), to_json(col("value")).alias("value"), lit("application/json").alias("format")).alias("Body"),
+            array(struct(lit("SystemType").alias("Key"),lit("apm-system").alias("Value")),
+                struct(lit("SystemGuid").alias("Key"),col("guid").alias("Value"))).alias("BodyProperties"),
+            lit("DataChange.Update").alias("EventType")))).withColumn("AnnotationStreamIds", lit("self.AnnotationStreamIds"))
+
+        return df.select("CloudPlatformEvent", "AnnotationStreamIds")
diff --git a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_honeywell_apm_to_pcdm.py b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_honeywell_apm_to_pcdm.py
@@ -0,0 +1,54 @@
+# Copyright 2022 RTDIP
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+sys.path.insert(0, '.')
+from src.sdk.python.rtdip_sdk.pipelines.transformers.spark.honeywell_apm_to_pcdm import HoneywellAPMJsonToPCDMTransformer
+from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType
+
+from pyspark.sql import SparkSession, DataFrame
+from pyspark.sql.types import StructType, StructField, StringType, TimestampType
+from datetime import datetime, timezone
+
+def test_honeywell_apm_to_pcdm(spark_session: SparkSession):
+    honeywell_json_data = '{"Id": "testId","TenantId": "testTenantId","IdType": "calculatedpoint","Samples": [{"ItemName": "test.item1", "Time": "2023-07-31T06:53:00+00:00","Value": "5.0","Unit": null,"NormalizedQuality": "good", "HighValue": null,"LowValue": null,"TargetValue": null},{"ItemName": "test_item2","Time": "2023-07-31T06:53:00+00:00","Value": 0.0,"Unit": null,"NormalizedQuality": "good","HighValue": null,"LowValue": null,"TargetValue": null},{"ItemName": "testItem3","Time": "2023-07-31T06:53:00.205+00:00","Value": "test_string","Unit": null,"NormalizedQuality": "good","HighValue": null,"LowValue": null,"TargetValue": null}]}'
+    honeywell_df: DataFrame = spark_session.createDataFrame([{"body": honeywell_json_data}])
+
+    expected_schema = StructType([
+        StructField("TagName", StringType(), True),
+        StructField("EventTime", TimestampType(), True),
+        StructField("Status", StringType(), False),
+        StructField("Value", StringType(), True),
+        StructField("ValueType", StringType(), True),
+        StructField("ChangeType", StringType(), False),
+    ])
+
+    expected_data = [
+        {"TagName":"test.item1", "EventTime": datetime.fromisoformat("2023-07-31T06:53:00+00:00"), "Status":"Good", "Value": 5.0, "ValueType":"float", "ChangeType": "insert"},
+        {"TagName":"test_item2", "EventTime": datetime.fromisoformat("2023-07-31T06:53:00+00:00"), "Status":"Good", "Value": 0.0, "ValueType":"float", "ChangeType": "insert"},
+        {"TagName":"testItem3", "EventTime": datetime.fromisoformat("2023-07-31T06:53:00.205+00:00"), "Status":"Good", "Value": "test_string", "ValueType":"string", "ChangeType": "insert"},
+    ]
+
+    expected_df: DataFrame = spark_session.createDataFrame(
+        schema=expected_schema,
+        data=expected_data
+    )
+
+    honeywell_eventhub_json_to_PCDM_transformer = HoneywellAPMJsonToPCDMTransformer(data=honeywell_df, source_column_name="body")
+    actual_df = honeywell_eventhub_json_to_PCDM_transformer.transform()
+
+    assert honeywell_eventhub_json_to_PCDM_transformer.system_type() == SystemType.PYSPARK
+    assert isinstance(honeywell_eventhub_json_to_PCDM_transformer.libraries(), Libraries)
+    assert expected_schema == actual_df.schema
+    assert expected_df.collect() == actual_df.collect()
diff --git a/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_pcdm_to_honeywell_apm.py b/tests/sdk/python/rtdip_sdk/pipelines/transformers/spark/test_pcdm_to_honeywell_apm.py
@@ -0,0 +1,66 @@
+# Copyright 2022 RTDIP
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+sys.path.insert(0, '.')
+import os
+os.environ['PYSPARK_PYTHON'] = sys.executable
+os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable
+
+from src.sdk.python.rtdip_sdk.pipelines.transformers.spark.pcdm_to_honeywell_apm import PCDMToHoneywellAPMTransformer
+from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType
+
+from pyspark.sql import SparkSession, DataFrame
+from pytest_mock import MockerFixture
+from pyspark.sql.types import StructType, StructField, StringType, TimestampType
+from pyspark.sql.functions import regexp_replace
+from datetime import datetime
+import json
+
+def test_pcdm_to_honeywell_apm(spark_session: SparkSession, mocker: MockerFixture):
+
+    pcdm_schema = StructType([
+        StructField("TagName", StringType(), True),
+        StructField("EventTime", TimestampType(), True),
+        StructField("Status", StringType(), False),
+        StructField("Value", StringType(), True),
+        StructField("ValueType", StringType(), False),
+        StructField("ChangeType", StringType(), False),
+    ])
+
+    pcdm_data = [
+        {"TagName":"test.item1", "EventTime": datetime.fromisoformat("2023-07-31T06:53:00+00:00"), "Status":"Good", "Value": 5.0, "ValueType":"float", "ChangeType": "insert"},
+        {"TagName":"Test_item2", "EventTime": datetime.fromisoformat("2023-07-31T06:54:00+00:00"), "Status":"Good", "Value": 1, "ValueType":"float", "ChangeType": "insert"},
+    ]
+
+    pcdm_df: DataFrame = spark_session.createDataFrame(
+        schema=pcdm_schema,
+        data=pcdm_data
+    )
+    honeywell_json_data = {"CloudPlatformEvent": {"CreatedTime": "2023-08-10T06:53:00+00:00","Id": "2b2a64f6-bfee-49f5-9d1b-04df844e80be","CreatorId": "065a7343-a3b5-4ecd-9bac-19cdff5cf048","CreatorType": "CloudPlatformSystem","GeneratorId": None,"GeneratorType": "CloudPlatformTenant","TargetId": "065a7343-a3b5-4ecd-9bac-19cdff5cf048","TargetType": "CloudPlatformTenant","TargetContext": None,"Body": {"type": "TextualBody","value": "{\"SystemGuid\":\"065a7343-a3b5-4ecd-9bac-19cdff5cf048\",\"HistorySamples\":[{\"ItemName\":\"test.item1\",\"Quality\":\"Good\",\"Time\":\"2023-07-31T06:53:00+00:00\",\"Value\":5},{\"ItemName\":\"Test_item2\",\"Quality\":\"Good\",\"Time\":\"2023-07-31T06:54:00+00:00\",\"Value\":1}]}","format": "application/json"},"BodyProperties":[{"Key": "SystemType","Value": "apm-system"},{"Key": "SystemGuid","Value": "065a7343-a3b5-4ecd-9bac-19cdff5cf048"}],"EventType": "DataChange.Update"},"AnnotationStreamIds": ","}
+    expected_df = spark_session.createDataFrame([honeywell_json_data])
+    PCDM_to_honeywell_eventhub_json_transformer = PCDMToHoneywellAPMTransformer(data=pcdm_df, history_samples_per_message=3)
+
+    actual_df = PCDM_to_honeywell_eventhub_json_transformer.transform()
+    dict = actual_df.collect()[0]['CloudPlatformEvent']
+
+
+    assert len(dict) == 1
+    assert PCDM_to_honeywell_eventhub_json_transformer.system_type() == SystemType.PYSPARK
+    assert isinstance(PCDM_to_honeywell_eventhub_json_transformer.libraries(), Libraries)
+    assert len(dict) == 1
+    assert len(dict["CloudPlatformEvent"]) == 12
+    assert len(dict["CloudPlatformEvent"]["Body"]) == 3
+    assert len(dict["CloudPlatformEvent"]["BodyProperties"]) == 2
+    assert len(dict["CloudPlatformEvent"]["BodyProperties"][0]) == 2
+    assert len(dict["CloudPlatformEvent"]["BodyProperties"][1]) == 2
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Convert Honeywell APM Json to Process Control Data Model
		::: src.sdk.python.rtdip_sdk.pipelines.transformers.spark.honeywell_apm_to_pcdm
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Convert Process Control Data Model to Honeywell APM Json
		::: src.sdk.python.rtdip_sdk.pipelines.transformers.spark.pcdm_to_honeywell_apm