Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Convert Honeywell APM Json to Process Control Data Model
::: src.sdk.python.rtdip_sdk.pipelines.transformers.spark.honeywell_apm_to_pcdm
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Convert Process Control Data Model to Honeywell APM Json
::: src.sdk.python.rtdip_sdk.pipelines.transformers.spark.pcdm_to_honeywell_apm
2 changes: 2 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,8 @@ nav:
- Pandas to PySpark DataFrame Conversion: sdk/code-reference/pipelines/transformers/spark/pandas_to_pyspark.md
- PySpark to Pandas DataFrame Conversion: sdk/code-reference/pipelines/transformers/spark/pyspark_to_pandas.md
- Base Raw To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/base_raw_to_mdm.md
- Honeywell APM To Process Control Data Model: sdk/code-reference/pipelines/transformers/spark/honeywell_apm_to_pcdm.md
- Process Control Data Model To Honeywell APM: sdk/code-reference/pipelines/transformers/spark/pcdm_to_honeywell_apm.md
- ISO:
- MISO To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/miso_to_mdm.md
- PJM To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/pjm_to_mdm.md
Expand Down
20 changes: 19 additions & 1 deletion src/sdk/python/rtdip_sdk/pipelines/_pipeline_utils/spark.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

import logging
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, TimestampType, StringType, BinaryType, LongType, MapType, IntegerType, ArrayType
from pyspark.sql.types import StructType, StructField, TimestampType, StringType, BinaryType, LongType, MapType, IntegerType, ArrayType, DoubleType

from .models import Libraries
from ..._sdk_utils.compare_versions import _package_version_meets_minimum
Expand Down Expand Up @@ -185,4 +185,22 @@ def get_dbutils(
StructField('valueType', StringType(), True),
StructField('value', StringType(), True)]))
, True)
])

APM_SCHEMA = StructType([
StructField('Id', StringType(), True),
StructField('TenantId', StringType(), True),
StructField('IdType', StringType(), True),
StructField('Samples', ArrayType(
StructType([
StructField('ItemName', StringType(), True),
StructField('Time', StringType(), True),
StructField('Value', StringType(), True),
StructField('Unit', StringType(), True),
StructField('NormalizedQuality', StringType(), True),
StructField('HighValue', DoubleType(), True),
StructField('LowValue', DoubleType(), True),
StructField('TargetValue', DoubleType(), True),
])
), True),
])
2 changes: 2 additions & 0 deletions src/sdk/python/rtdip_sdk/pipelines/transformers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,5 @@
from .spark.ssip_pi_binary_json_to_pcdm import *
from .spark.iso import *
from .spark.edgex_opcua_json_to_pcdm import *
from .spark.pcdm_to_honeywell_apm import *
from .spark.honeywell_apm_to_pcdm import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Copyright 2022 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyspark.sql import DataFrame
from pyspark.sql.functions import from_json, col, explode, when, lit , regexp_replace

from ..interfaces import TransformerInterface
from ..._pipeline_utils.models import Libraries, SystemType
from ..._pipeline_utils.spark import APM_SCHEMA

class HoneywellAPMJsonToPCDMTransformer(TransformerInterface):
'''
Converts a Spark Dataframe column containing a json string created by Honeywell APM to the Process Control Data Model

Args:
data (DataFrame): Dataframe containing the column with EdgeX data
source_column_name (str): Spark Dataframe column containing the OPC Publisher Json OPC UA data
status_null_value (optional str): If populated, will replace 'Good' in the Status column with the specified value.
change_type_value (optional str): If populated, will replace 'insert' in the ChangeType column with the specified value.
'''
data: DataFrame
source_column_name: str
status_null_value: str
change_type_value: str

def __init__(self, data: DataFrame, source_column_name: str, status_null_value: str = "Good", change_type_value: str = "insert") -> None:
self.data = data
self.source_column_name = source_column_name
self.status_null_value = status_null_value
self.change_type_value = change_type_value

@staticmethod
def system_type():
'''
Attributes:
SystemType (Environment): Requires PYSPARK
'''
return SystemType.PYSPARK

@staticmethod
def libraries():
libraries = Libraries()
return libraries

@staticmethod
def settings() -> dict:
return {}

def pre_transform_validation(self):
return True

def post_transform_validation(self):
return True

def transform(self) -> DataFrame:
'''
Returns:
DataFrame: A dataframe with the specified column converted to PCDM
'''
df = (self.data.withColumn("body",from_json(self.source_column_name, APM_SCHEMA)).select(explode("body.Samples"))
.selectExpr("*", "to_timestamp(col.Time) as EventTime")
.withColumn("TagName", col("col.Itemname"))
.withColumn("Status", lit(self.status_null_value))
.withColumn("Value", col("col.Value"))
.withColumn("ValueType", when(col("value").cast("float").isNotNull(), "float").when(col("value").cast("float").isNull(), "string"))
.withColumn("ChangeType", lit(self.change_type_value)))

return df.select("TagName", "EventTime", "Status", "Value", "ValueType", "ChangeType")
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Copyright 2022 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyspark.sql import DataFrame
from pyspark.sql.functions import to_json, col, struct, create_map, lit, array, monotonically_increasing_id, floor, row_number, collect_list, expr
from pyspark.sql import Window
from datetime import datetime
import pytz
import uuid

from ..interfaces import TransformerInterface
from ..._pipeline_utils.models import Libraries, SystemType
from ..._pipeline_utils.spark import EDGEX_SCHEMA

class PCDMToHoneywellAPMTransformer(TransformerInterface):
'''
Converts a Spark Dataframe in PCDM format to Honeywell APM format.
Args:
data (Dataframe): Spark Dataframe in PCDM format
quality (str): Value for quality inside HistorySamples
history_samples_per_message (int): The number of HistorySamples for each row in the DataFrame

'''
data: DataFrame
quality: str
history_samples_per_message: int

def __init__(self, data: DataFrame, quality: str = "Good", history_samples_per_message: int = 1) -> None:
self.data = data
self.quality = quality
self.history_samples_per_message = history_samples_per_message


@staticmethod
def system_type():
'''
Attributes:
SystemType (Environment): Requires PYSPARK
'''
return SystemType.PYSPARK

@staticmethod
def libraries():
libraries = Libraries()
return libraries

@staticmethod
def settings() -> dict:
return {}

def pre_transform_validation(self):
return True

def post_transform_validation(self):
return True

def transform(self) -> DataFrame:
'''
Returns:
DataFrame: A dataframe with with rows in Honeywell APM format
'''
pcdm_df = self.data.withColumn("counter", monotonically_increasing_id())
w = Window.orderBy("counter")
indexed_pcdm_df = (pcdm_df.withColumn("index", floor((row_number().over(w)-0.01)/self.history_samples_per_message)).withColumn("HistorySamples", struct(
col("TagName").alias("ItemName"),
lit(self.quality).alias("Quality"),
col("EventTime").alias("Time"),
col("Value").alias("Value")).alias("HistorySamples")).groupBy("index").agg(collect_list("HistorySamples").alias("HistorySamples"))
.withColumn("guid",expr("uuid()"))
.withColumn("value", struct(col("guid").alias("SystemGuid"), col("HistorySamples")).alias("value")))

df = indexed_pcdm_df.withColumn("CloudPlatformEvent",
create_map(
lit("CloudPlatformEvent"),
struct(
lit(datetime.now(tz=pytz.UTC)).alias("CreatedTime"),
lit(expr("uuid()")).alias("Id"),
col("guid").alias("CreatorId"),
lit("CloudPlatformSystem").alias("CreatorType"),
lit(None).alias("GeneratorId"),
lit("CloudPlatformTenant").alias("GeneratorType"),
col("guid").alias("TargetId"),
lit("CloudPlatformTenant").alias("TargetType"),
lit(None).alias("TargetContext"),
struct(lit("TextualBody").alias("type"), to_json(col("value")).alias("value"), lit("application/json").alias("format")).alias("Body"),
array(struct(lit("SystemType").alias("Key"),lit("apm-system").alias("Value")),
struct(lit("SystemGuid").alias("Key"),col("guid").alias("Value"))).alias("BodyProperties"),
lit("DataChange.Update").alias("EventType")))).withColumn("AnnotationStreamIds", lit("self.AnnotationStreamIds"))

return df.select("CloudPlatformEvent", "AnnotationStreamIds")
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright 2022 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys
sys.path.insert(0, '.')
from src.sdk.python.rtdip_sdk.pipelines.transformers.spark.honeywell_apm_to_pcdm import HoneywellAPMJsonToPCDMTransformer
from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType

from pyspark.sql import SparkSession, DataFrame
from pyspark.sql.types import StructType, StructField, StringType, TimestampType
from datetime import datetime, timezone

def test_honeywell_apm_to_pcdm(spark_session: SparkSession):
honeywell_json_data = '{"Id": "testId","TenantId": "testTenantId","IdType": "calculatedpoint","Samples": [{"ItemName": "test.item1", "Time": "2023-07-31T06:53:00+00:00","Value": "5.0","Unit": null,"NormalizedQuality": "good", "HighValue": null,"LowValue": null,"TargetValue": null},{"ItemName": "test_item2","Time": "2023-07-31T06:53:00+00:00","Value": 0.0,"Unit": null,"NormalizedQuality": "good","HighValue": null,"LowValue": null,"TargetValue": null},{"ItemName": "testItem3","Time": "2023-07-31T06:53:00.205+00:00","Value": "test_string","Unit": null,"NormalizedQuality": "good","HighValue": null,"LowValue": null,"TargetValue": null}]}'
honeywell_df: DataFrame = spark_session.createDataFrame([{"body": honeywell_json_data}])

expected_schema = StructType([
StructField("TagName", StringType(), True),
StructField("EventTime", TimestampType(), True),
StructField("Status", StringType(), False),
StructField("Value", StringType(), True),
StructField("ValueType", StringType(), True),
StructField("ChangeType", StringType(), False),
])

expected_data = [
{"TagName":"test.item1", "EventTime": datetime.fromisoformat("2023-07-31T06:53:00+00:00"), "Status":"Good", "Value": 5.0, "ValueType":"float", "ChangeType": "insert"},
{"TagName":"test_item2", "EventTime": datetime.fromisoformat("2023-07-31T06:53:00+00:00"), "Status":"Good", "Value": 0.0, "ValueType":"float", "ChangeType": "insert"},
{"TagName":"testItem3", "EventTime": datetime.fromisoformat("2023-07-31T06:53:00.205+00:00"), "Status":"Good", "Value": "test_string", "ValueType":"string", "ChangeType": "insert"},
]

expected_df: DataFrame = spark_session.createDataFrame(
schema=expected_schema,
data=expected_data
)

honeywell_eventhub_json_to_PCDM_transformer = HoneywellAPMJsonToPCDMTransformer(data=honeywell_df, source_column_name="body")
actual_df = honeywell_eventhub_json_to_PCDM_transformer.transform()

assert honeywell_eventhub_json_to_PCDM_transformer.system_type() == SystemType.PYSPARK
assert isinstance(honeywell_eventhub_json_to_PCDM_transformer.libraries(), Libraries)
assert expected_schema == actual_df.schema
assert expected_df.collect() == actual_df.collect()
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright 2022 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
sys.path.insert(0, '.')
import os
os.environ['PYSPARK_PYTHON'] = sys.executable
os.environ['PYSPARK_DRIVER_PYTHON'] = sys.executable

from src.sdk.python.rtdip_sdk.pipelines.transformers.spark.pcdm_to_honeywell_apm import PCDMToHoneywellAPMTransformer
from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import Libraries, SystemType

from pyspark.sql import SparkSession, DataFrame
from pytest_mock import MockerFixture
from pyspark.sql.types import StructType, StructField, StringType, TimestampType
from pyspark.sql.functions import regexp_replace
from datetime import datetime
import json

def test_pcdm_to_honeywell_apm(spark_session: SparkSession, mocker: MockerFixture):

pcdm_schema = StructType([
StructField("TagName", StringType(), True),
StructField("EventTime", TimestampType(), True),
StructField("Status", StringType(), False),
StructField("Value", StringType(), True),
StructField("ValueType", StringType(), False),
StructField("ChangeType", StringType(), False),
])

pcdm_data = [
{"TagName":"test.item1", "EventTime": datetime.fromisoformat("2023-07-31T06:53:00+00:00"), "Status":"Good", "Value": 5.0, "ValueType":"float", "ChangeType": "insert"},
{"TagName":"Test_item2", "EventTime": datetime.fromisoformat("2023-07-31T06:54:00+00:00"), "Status":"Good", "Value": 1, "ValueType":"float", "ChangeType": "insert"},
]

pcdm_df: DataFrame = spark_session.createDataFrame(
schema=pcdm_schema,
data=pcdm_data
)
honeywell_json_data = {"CloudPlatformEvent": {"CreatedTime": "2023-08-10T06:53:00+00:00","Id": "2b2a64f6-bfee-49f5-9d1b-04df844e80be","CreatorId": "065a7343-a3b5-4ecd-9bac-19cdff5cf048","CreatorType": "CloudPlatformSystem","GeneratorId": None,"GeneratorType": "CloudPlatformTenant","TargetId": "065a7343-a3b5-4ecd-9bac-19cdff5cf048","TargetType": "CloudPlatformTenant","TargetContext": None,"Body": {"type": "TextualBody","value": "{\"SystemGuid\":\"065a7343-a3b5-4ecd-9bac-19cdff5cf048\",\"HistorySamples\":[{\"ItemName\":\"test.item1\",\"Quality\":\"Good\",\"Time\":\"2023-07-31T06:53:00+00:00\",\"Value\":5},{\"ItemName\":\"Test_item2\",\"Quality\":\"Good\",\"Time\":\"2023-07-31T06:54:00+00:00\",\"Value\":1}]}","format": "application/json"},"BodyProperties":[{"Key": "SystemType","Value": "apm-system"},{"Key": "SystemGuid","Value": "065a7343-a3b5-4ecd-9bac-19cdff5cf048"}],"EventType": "DataChange.Update"},"AnnotationStreamIds": ","}
expected_df = spark_session.createDataFrame([honeywell_json_data])
PCDM_to_honeywell_eventhub_json_transformer = PCDMToHoneywellAPMTransformer(data=pcdm_df, history_samples_per_message=3)

actual_df = PCDM_to_honeywell_eventhub_json_transformer.transform()
dict = actual_df.collect()[0]['CloudPlatformEvent']


assert len(dict) == 1
assert PCDM_to_honeywell_eventhub_json_transformer.system_type() == SystemType.PYSPARK
assert isinstance(PCDM_to_honeywell_eventhub_json_transformer.libraries(), Libraries)
assert len(dict) == 1
assert len(dict["CloudPlatformEvent"]) == 12
assert len(dict["CloudPlatformEvent"]["Body"]) == 3
assert len(dict["CloudPlatformEvent"]["BodyProperties"]) == 2
assert len(dict["CloudPlatformEvent"]["BodyProperties"][0]) == 2
assert len(dict["CloudPlatformEvent"]["BodyProperties"][1]) == 2