Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
::: src.sdk.python.rtdip_sdk.pipelines.transformers.spark.iso.ercot_to_mdm
1 change: 1 addition & 0 deletions docs/sdk/pipelines/components.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ Transformers are components that perform transformations on data. These will tar
|[MISO To Meters Data Model](../code-reference/pipelines/transformers/spark/iso/miso_to_mdm.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|
|[Raw Forecast to Weather Data Model](../code-reference/pipelines/transformers/spark/the_weather_company/raw_forecast_to_weather_data_model.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|
|[PJM To Meters Data Model](../code-reference/pipelines/transformers/spark/iso/pjm_to_mdm.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|
|[ERCOT To Meters Data Model](../code-reference/pipelines/transformers/spark/iso/ercot_to_mdm.md)||:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|:heavy_check_mark:|

!!! note "Note"
This list will dynamically change as the framework is further developed and new components are added.
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ nav:
- ISO:
- MISO To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/miso_to_mdm.md
- PJM To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/pjm_to_mdm.md
- ERCOT To Meters Data Model: sdk/code-reference/pipelines/transformers/spark/iso/ercot_to_mdm.md
- The Weather Company:
- Raw Forecast To Weather Data Model: sdk/code-reference/pipelines/transformers/spark/the_weather_company/raw_forecast_to_weather_data_model.md
- ECMWF:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .ercot_to_mdm import ERCOTToMDMTransformer
from .miso_to_mdm import MISOToMDMTransformer
from .pjm_to_mdm import PJMToMDMTransformer
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Copyright 2022 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from pyspark.sql import DataFrame, SparkSession, functions as F

from ..base_raw_to_mdm import BaseRawToMDMTransformer
from ...._pipeline_utils.iso import ERCOT_SCHEMA, melt
from .....data_models.timeseries import SeriesType, ModelType, ValueType


class ERCOTToMDMTransformer(BaseRawToMDMTransformer):
"""
Converts ERCOT Raw data into Meters Data Model.

Please check the BaseRawToMDMTransformer for the required arguments and methods.

Example
--------
```python
from rtdip_sdk.pipelines.transformers import ERCOTToMDMTransformer
from rtdip_sdk.pipelines.utilities import SparkSessionUtility

# Not required if using Databricks
spark = SparkSessionUtility(config={}).execute()

ercot_to_mdm_transformer = ERCOTToMDMTransformer(
spark=spark,
data=df,
output_type="usage",
name=None,
description=None,
value_type=None,
version=None,
series_id=None,
series_parent_id=None
)

result = ercot_to_mdm_transformer.transform()
```
"""

spark: SparkSession
data: DataFrame
input_schema = ERCOT_SCHEMA
uid_col = "variable"
series_id_col = "'series_std_001'"
timestamp_col = "to_utc_timestamp(StartTime, 'America/Chicago')"
interval_timestamp_col = "Timestamp + INTERVAL 1 HOURS"
value_col = "value"
series_parent_id_col = "'series_parent_std_001'"
name_col = "'ERCOT API'"
uom_col = "'mwh'"
description_col = "'ERCOT data pulled from ERCOT ISO API'"
timestamp_start_col = "StartTime"
timestamp_end_col = "StartTime + INTERVAL 1 HOURS"
time_zone_col = "'America/Chicago'"
version_col = "'1'"
series_type = SeriesType.Hour
model_type = ModelType.Default
value_type = ValueType.Usage
properties_col = "null"

def _pre_process(self) -> DataFrame:
df: DataFrame = super(ERCOTToMDMTransformer, self)._pre_process()
df = melt(
df,
id_vars=["Date", "HourEnding", "DstFlag"],
value_vars=[
"Coast",
"East",
"FarWest",
"North",
"NorthCentral",
"SouthCentral",
"Southern",
"West",
"SystemTotal",
],
)
df = df.withColumn(
"StartTime",
F.expr(
"Date + MAKE_INTERVAL(0,0,0,0,cast(split(HourEnding,':')[0] as integer),0,0)"
),
)
return df
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Date,HourEnding,Coast,East,FarWest,North,NorthCentral,SouthCentral,Southern,West,SystemTotal,DstFlag
2023-12-10T00:00:00,1:00,10173.7002,1384.3101,6210.73,1213.63,12670.0996,6350.9702,3160.3601,1292.41,42456.2102,N
2023-12-10T00:00:00,2:00,9950.5098,1389.42,6276.6201,1215.9399,12627.5,6270.8701,3046.97,1288.1801,42066.01,N
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{"Uid":"Coast","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"East","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"FarWest","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"North","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"NorthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"SouthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"Southern","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"West","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"SystemTotal","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T01:00:00","TimestampEnd":"2023-12-10T02:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"Coast","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"East","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"FarWest","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"North","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"NorthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"SouthCentral","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"Southern","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"West","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
{"Uid":"SystemTotal","SeriesId":"series_std_001","SeriesParentId":"series_parent_std_001","Name":"ERCOT API","Uom":"mwh","Description":"ERCOT data pulled from ERCOT ISO API","TimestampStart":"2023-12-10T02:00:00","TimestampEnd":"2023-12-10T03:00:00","Timezone":"America/Chicago","Version":"1","SeriesType":64,"ModelType":1,"ValueType":16}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Date,HourEnding,Coast,East,FarWest,North,NorthCentral,SouthCentral,Southern,West,SystemTotal,DstFlag
2023-12-10T00:00:00,1:00,10173.7002,1384.3101,6210.73,1213.63,12670.0996,6350.9702,3160.3601,1292.41,42456.2102,N
2023-12-10T00:00:00,2:00,9950.5098,1389.42,6276.6201,1215.9399,12627.5,6270.8701,3046.97,1288.1801,42066.01,N
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
Uid,SeriesId,Timestamp,IntervalTimestamp,Value
Coast,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,10173.7002
East,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,1384.3101
FarWest,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,6210.73
North,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,1213.63
NorthCentral,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,12670.0996
SouthCentral,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,6350.9702
Southern,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,3160.3601
West,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,1292.41
SystemTotal,series_std_001,2023-12-10T07:00:00,2023-12-10T08:00:00,42456.2102
Coast,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,9950.5098
East,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,1389.42
FarWest,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,6276.6201
North,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,1215.9399
NorthCentral,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,12627.5
SouthCentral,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,6270.8701
Southern,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,3046.97
West,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,1288.1801
SystemTotal,series_std_001,2023-12-10T08:00:00,2023-12-10T09:00:00,42066.01
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Copyright 2022 RTDIP
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from pyspark.sql import SparkSession, DataFrame

from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.iso import ERCOT_SCHEMA
from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.mdm import (
MDM_USAGE_SCHEMA,
MDM_META_SCHEMA,
)
from src.sdk.python.rtdip_sdk.pipelines._pipeline_utils.models import (
Libraries,
SystemType,
)
from src.sdk.python.rtdip_sdk.pipelines.transformers import ERCOTToMDMTransformer

parent_base_path: str = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "test_data"
)


def ercot_to_mdm_test(
spark_session: SparkSession,
output_type: str,
expected_df: DataFrame,
base_path: str,
):
input_df: DataFrame = spark_session.read.csv(
f"{base_path}/input.csv", header=True, schema=ERCOT_SCHEMA
)

transformer = ERCOTToMDMTransformer(
spark_session, input_df, output_type=output_type
)
assert transformer.system_type() == SystemType.PYSPARK
assert isinstance(transformer.libraries(), Libraries)
assert transformer.settings() == dict()

actual_df = transformer.transform()

cols = list(
filter(
lambda column: column in expected_df.columns,
["Uid", "Timestamp", "TimestampStart"],
)
)
assert actual_df.orderBy(cols).collect() == expected_df.orderBy(cols).collect()


def test_ercot_to_mdm_usage(spark_session: SparkSession):
usage_path = os.path.join(parent_base_path, "ercot_usage")
usage_expected_df: DataFrame = spark_session.read.csv(
f"{usage_path}/output.csv", header=True, schema=MDM_USAGE_SCHEMA
)

ercot_to_mdm_test(spark_session, "usage", usage_expected_df, usage_path)


def test_ercot_to_mdm_meta(spark_session: SparkSession):
meta_path: str = os.path.join(parent_base_path, "ercot_meta")

meta_expected_df: DataFrame = spark_session.read.json(
f"{meta_path}/output.json", schema=MDM_META_SCHEMA
)

ercot_to_mdm_test(spark_session, "meta", meta_expected_df, meta_path)