From 99b1a3d8c4af4c03ce79b0e85e09d74b72fcb270 Mon Sep 17 00:00:00 2001 From: GBBBAS <42962356+GBBBAS@users.noreply.github.com> Date: Thu, 17 Aug 2023 08:46:18 +0100 Subject: [PATCH 1/8] Query Builder Signed-off-by: GBBBAS <42962356+GBBBAS@users.noreply.github.com> --- src/sdk/python/rtdip_sdk/queries/__init__.py | 3 +- .../python/rtdip_sdk/queries/query_builder.py | 133 +++++++++++ .../queries/time_series/_query_builder.py | 223 +++++++++++------- .../rtdip_sdk/queries/test_interpolate.py | 2 +- .../queries/test_interpolation_at_time.py | 2 +- .../python/rtdip_sdk/queries/test_metadata.py | 2 +- .../rtdip_sdk/queries/test_query_builder.py | 83 +++++++ .../sdk/python/rtdip_sdk/queries/test_raw.py | 2 +- .../python/rtdip_sdk/queries/test_resample.py | 2 +- .../queries/test_time_weighted_average.py | 6 +- 10 files changed, 366 insertions(+), 92 deletions(-) create mode 100644 src/sdk/python/rtdip_sdk/queries/query_builder.py create mode 100644 tests/sdk/python/rtdip_sdk/queries/test_query_builder.py diff --git a/src/sdk/python/rtdip_sdk/queries/__init__.py b/src/sdk/python/rtdip_sdk/queries/__init__.py index 1c132c8ae..ff06b458b 100644 --- a/src/sdk/python/rtdip_sdk/queries/__init__.py +++ b/src/sdk/python/rtdip_sdk/queries/__init__.py @@ -12,4 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .time_series import raw, resample, interpolate, interpolation_at_time, time_weighted_average \ No newline at end of file +from .time_series import raw, resample, interpolate, interpolation_at_time, time_weighted_average +from .query_builder import * \ No newline at end of file diff --git a/src/sdk/python/rtdip_sdk/queries/query_builder.py b/src/sdk/python/rtdip_sdk/queries/query_builder.py new file mode 100644 index 000000000..1728c6044 --- /dev/null +++ b/src/sdk/python/rtdip_sdk/queries/query_builder.py @@ -0,0 +1,133 @@ +# Copyright 2022 RTDIP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union +from ..connectors.connection_interface import ConnectionInterface +from .time_series import raw, resample, interpolate, interpolation_at_time, time_weighted_average +from . import metadata + +class QueryBuilder(): + parameters: dict + connection: ConnectionInterface + data_source: str + tagname_column: str + timestamp_column: str + status_column: str + value_column: str + + def connect(self, connection: ConnectionInterface): + self.connection = connection + return self + + def source(self, source: str, tagname_column: str = "TagName", timestamp_column: str = "EventTime", status_column: Union[str, None] = "Status", value_column: str = "Value"): + self.data_source = "`.`".join(source.split(".")) + self.tagname_column = tagname_column + self.timestamp_column = timestamp_column + self.status_column = status_column + self.value_column = value_column + return self + + def raw(self, tagname_filter: [str], start_date: str, end_date: str, include_bad_data: bool = False): + raw_parameters = { + "source": self.data_source, + "tag_names": tagname_filter, + "start_date": start_date, + "end_date": end_date, + "include_bad_data": include_bad_data, + "tagname_column": self.tagname_column, + "timestamp_column": self.timestamp_column, + "status_column": self.status_column, + "value_column": self.value_column + } + return raw.get(self.connection, raw_parameters) + + def resample(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, agg_method: str, include_bad_data: bool = False): + resample_parameters = { + "source": self.data_source, + "tag_names": tagname_filter, + "start_date": start_date, + "end_date": end_date, + "include_bad_data": include_bad_data, + "time_interval_rate": time_interval_rate, + "time_interval_unit": time_interval_unit, + "agg_method": agg_method, + "tagname_column": self.tagname_column, + "timestamp_column": self.timestamp_column, + "status_column": self.status_column, + "value_column": self.value_column + } + + return resample.get(self.connection, resample_parameters) + + def interpolate(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, agg_method: str, interpolation_method: str, include_bad_data: bool = False): + interpolation_parameters = { + "source": self.data_source, + "tag_names": tagname_filter, + "start_date": start_date, + "end_date": end_date, + "include_bad_data": include_bad_data, + "time_interval_rate": time_interval_rate, + "time_interval_unit": time_interval_unit, + "agg_method": agg_method, + "interpolation_method": interpolation_method, + "tagname_column": self.tagname_column, + "timestamp_column": self.timestamp_column, + "status_column": self.status_column, + "value_column": self.value_column + } + + return interpolate.get(self.connection, interpolation_parameters) + + def interpolate_at_time(self, tagname_filter: [str], timestamp_filter: list[str], include_bad_data: bool = False, window_length: int = 1): + interpolate_at_time_parameters = { + "source": self.data_source, + "tag_names": tagname_filter, + "timestamps": timestamp_filter, + "include_bad_data": include_bad_data, + "window_length": window_length, + "tagname_column": self.tagname_column, + "timestamp_column": self.timestamp_column, + "status_column": self.status_column, + "value_column": self.value_column + } + + return interpolation_at_time.get(self.connection, interpolate_at_time_parameters) + + def time_weighted_average(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, step: str, source_metadata: str = None, include_bad_data: bool = False, window_length: int = 1): + time_weighted_average_parameters = { + "source": self.data_source, + "tag_names": tagname_filter, + "start_date": start_date, + "end_date": end_date, + "include_bad_data": include_bad_data, + "time_interval_rate": time_interval_rate, + "time_interval_unit": time_interval_unit, + "step": step, + "source_metadata": "`.`".join(source_metadata.split(".")), + "window_length": window_length, + "tagname_column": self.tagname_column, + "timestamp_column": self.timestamp_column, + "status_column": self.status_column, + "value_column": self.value_column + } + + return time_weighted_average.get(self.connection, time_weighted_average_parameters) + + def metadata(self, tagname_filter: [str]): + raw_parameters = { + "source": self.data_source, + "tag_names": tagname_filter, + "tagname_column": self.tagname_column, + } + return metadata.get(self.connection, raw_parameters) \ No newline at end of file diff --git a/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py b/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py index 682e583d0..1314de82e 100644 --- a/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py +++ b/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py @@ -70,26 +70,36 @@ def _parse_dates(parameters_dict): def _raw_query(parameters_dict: dict) -> str: raw_query = ( - "SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(EventTime, 'yyyy-MM-dd HH:mm:ss.SSS')), \"{{ time_zone }}\") as EventTime, TagName, Status, Value FROM " - "`{{ business_unit }}`.`sensors`.`{{ asset }}_{{ data_security_level }}_events_{{ data_type }}` " - "WHERE EventDate BETWEEN to_date(to_timestamp(\"{{ start_date }}\")) AND to_date(to_timestamp(\"{{ end_date }}\")) AND EventTime BETWEEN to_timestamp(\"{{ start_date }}\") AND to_timestamp(\"{{ end_date }}\") AND TagName in ('{{ tag_names | join('\\', \\'') }}') " - "{% if include_bad_data is defined and include_bad_data == false %}" - "AND Status = 'Good'" + "SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(`{{ timestamp_column }}`, 'yyyy-MM-dd HH:mm:ss.SSS')), \"{{ time_zone }}\") as `{{ timestamp_column }}`, `{{ tagname_column }}`, {% if include_status is defined and include_status == true %} `{{ status_column }}`, {% endif %} `{{ value_column }}` FROM " + "{% if source is defined and source is not none %}" + "`{{ source|lower }}` " + "{% else %}" + "`{{ business_unit|lower }}`.`sensors`.`{{ asset|lower }}_{{ data_security_level|lower }}_events_{{ data_type|lower }}` " + "{% endif %}" + "WHERE `{{ timestamp_column }}` BETWEEN to_timestamp(\"{{ start_date }}\") AND to_timestamp(\"{{ end_date }}\") AND `{{ tagname_column }}` in ('{{ tag_names | join('\\', \\'') }}') " + "{% if include_status is defined and include_status == true and include_bad_data is defined and include_bad_data == false %}" + "AND `{{ status_column }}` = 'Good'" "{% endif %}" - "ORDER BY TagName, EventTime " + "ORDER BY `{{ tagname_column }}`, `{{ timestamp_column }}` " ) raw_parameters = { - "business_unit": parameters_dict['business_unit'].lower(), - "region": parameters_dict['region'].lower(), - "asset": parameters_dict['asset'].lower(), - "data_security_level": parameters_dict['data_security_level'].lower(), - "data_type": parameters_dict['data_type'].lower(), + "source": parameters_dict.get("source", None), + "business_unit": parameters_dict.get("business_unit"), + "region": parameters_dict.get("region"), + "asset": parameters_dict.get("asset"), + "data_security_level": parameters_dict.get("data_security_level"), + "data_type": parameters_dict.get("data_type"), "start_date": parameters_dict['start_date'], "end_date": parameters_dict['end_date'], "tag_names": list(dict.fromkeys(parameters_dict['tag_names'])), "include_bad_data": parameters_dict['include_bad_data'], - "time_zone": parameters_dict["time_zone"] + "time_zone": parameters_dict["time_zone"], + "tagname_column": parameters_dict.get("tagname_column", "TagName"), + "timestamp_column": parameters_dict.get("timestamp_column", "EventTime"), + "include_status": False if "status_column" in parameters_dict and parameters_dict.get("status_column") is None else True, + "status_column": "Status" if "status_column" in parameters_dict and parameters_dict.get("status_column") is None else parameters_dict.get("status_column", "Status"), + "value_column": parameters_dict.get("value_column", "Value"), } sql_template = Template(raw_query) @@ -98,25 +108,30 @@ def _raw_query(parameters_dict: dict) -> str: def _sample_query(parameters_dict: dict) -> tuple: sample_query = ( - "WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(EventTime, 'yyyy-MM-dd HH:mm:ss.SSS')), \"{{ time_zone }}\") as EventTime, TagName, Status, Value FROM " - "`{{ business_unit }}`.`sensors`.`{{ asset }}_{{ data_security_level }}_events_{{ data_type }}` " - "WHERE EventDate BETWEEN to_date(to_timestamp(\"{{ start_date }}\")) AND to_date(to_timestamp(\"{{ end_date }}\")) AND EventTime BETWEEN to_timestamp(\"{{ start_date }}\") AND to_timestamp(\"{{ end_date }}\") AND TagName in ('{{ tag_names | join('\\', \\'') }}') " - "{% if include_bad_data is defined and include_bad_data == false %} AND Status = 'Good' {% endif %}) " - ",date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp(\"{{ start_date }}\"), \"{{ time_zone }}\"), from_utc_timestamp(to_timestamp(\"{{ end_date }}\"), \"{{ time_zone }}\"), INTERVAL '{{ time_interval_rate + ' ' + time_interval_unit }}')) AS timestamp_array, explode(array('{{ tag_names | join('\\', \\'') }}')) AS TagName) " - ",window_buckets AS (SELECT timestamp_array AS window_start ,TagName ,LEAD(timestamp_array) OVER (ORDER BY timestamp_array) AS window_end FROM date_array) " - ",project_resample_results AS (SELECT d.window_start ,d.window_end ,d.TagName ,{{ agg_method }}(e.Value) OVER (PARTITION BY d.TagName, d.window_start ORDER BY e.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Value FROM window_buckets d INNER JOIN raw_events e ON e.EventTime >= d.window_start AND e.EventTime < d.window_end AND e.TagName = d.TagName) " - "SELECT window_start AS EventTime ,TagName ,Value FROM project_resample_results GROUP BY window_start ,TagName ,Value " + "WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(`{{ timestamp_column }}`, 'yyyy-MM-dd HH:mm:ss.SSS')), \"{{ time_zone }}\") as `{{ timestamp_column }}`, `{{ tagname_column }}`, {% if include_status is defined and include_status == true %} `{{ status_column }}`, {% else %} 'Good' as `Status`, {% endif %} `{{ value_column }}` FROM " + "{% if source is defined and source is not none %}" + "`{{ source|lower }}` " + "{% else %}" + "`{{ business_unit|lower }}`.`sensors`.`{{ asset|lower }}_{{ data_security_level|lower }}_events_{{ data_type|lower }}` " + "{% endif %}" + "WHERE `{{ timestamp_column }}` BETWEEN to_timestamp(\"{{ start_date }}\") AND to_timestamp(\"{{ end_date }}\") AND `{{ tagname_column }}` in ('{{ tag_names | join('\\', \\'') }}') " + "{% if include_status is defined and include_status == true and include_bad_data is defined and include_bad_data == false %} AND `{{ status_column }}` = 'Good' {% endif %}) " + ",date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp(\"{{ start_date }}\"), \"{{ time_zone }}\"), from_utc_timestamp(to_timestamp(\"{{ end_date }}\"), \"{{ time_zone }}\"), INTERVAL '{{ time_interval_rate + ' ' + time_interval_unit }}')) AS timestamp_array, explode(array('{{ tag_names | join('\\', \\'') }}')) AS `{{ tagname_column }}`) " + ",window_buckets AS (SELECT timestamp_array AS window_start, `{{ tagname_column }}`, LEAD(timestamp_array) OVER (ORDER BY timestamp_array) AS window_end FROM date_array) " + ",project_resample_results AS (SELECT d.window_start, d.window_end, d.`{{ tagname_column }}`, {{ agg_method }}(e.`{{ value_column }}`) OVER (PARTITION BY d.`{{ tagname_column }}`, d.window_start ORDER BY e.`{{ timestamp_column }}` ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `{{ value_column }}` FROM window_buckets d INNER JOIN raw_events e ON e.`{{ timestamp_column }}` >= d.window_start AND e.`{{ timestamp_column }}` < d.window_end AND e.`{{ tagname_column }}` = d.`{{ tagname_column }}`) " + "SELECT window_start AS `{{ timestamp_column }}`, `{{ tagname_column }}`, `{{ value_column }}` FROM project_resample_results GROUP BY window_start, `{{ tagname_column }}`, `{{ value_column }}` " "{% if is_resample is defined and is_resample == true %}" - "ORDER BY TagName, EventTime " + "ORDER BY `{{ tagname_column }}`, `{{ timestamp_column }}` " "{% endif %}" ) sample_parameters = { - "business_unit": parameters_dict['business_unit'].lower(), - "region": parameters_dict['region'].lower(), - "asset": parameters_dict['asset'].lower(), - "data_security_level": parameters_dict['data_security_level'].lower(), - "data_type": parameters_dict['data_type'].lower(), + "source": parameters_dict.get("source", None), + "business_unit": parameters_dict.get("business_unit"), + "region": parameters_dict.get("region"), + "asset": parameters_dict.get("asset"), + "data_security_level": parameters_dict.get("data_security_level"), + "data_type": parameters_dict.get("data_type"), "start_date": parameters_dict['start_date'], "end_date": parameters_dict['end_date'], "tag_names": list(dict.fromkeys(parameters_dict['tag_names'])), @@ -125,7 +140,12 @@ def _sample_query(parameters_dict: dict) -> tuple: "time_interval_unit": parameters_dict['time_interval_unit'], "agg_method": parameters_dict['agg_method'], "time_zone": parameters_dict["time_zone"], - "is_resample": True + "is_resample": True, + "tagname_column": parameters_dict.get("tagname_column", "TagName"), + "timestamp_column": parameters_dict.get("timestamp_column", "EventTime"), + "include_status": False if "status_column" in parameters_dict and parameters_dict.get("status_column") is None else True, + "status_column": "Status" if "status_column" in parameters_dict and parameters_dict.get("status_column") is None else parameters_dict.get("status_column", "Status"), + "value_column": parameters_dict.get("value_column", "Value"), } sql_template = Template(sample_query) @@ -145,17 +165,17 @@ def _interpolation_query(parameters_dict: dict, sample_query: str, sample_parame interpolate_query = ( f"WITH resample AS ({sample_query})" - ",date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp(\"{{ start_date }}\"), \"{{ time_zone }}\"), from_utc_timestamp(to_timestamp(\"{{ end_date }}\"), \"{{ time_zone }}\"), INTERVAL '{{ time_interval_rate + ' ' + time_interval_unit }}')) AS EventTime, explode(array('{{ tag_names | join('\\', \\'') }}')) AS TagName) " + ",date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp(\"{{ start_date }}\"), \"{{ time_zone }}\"), from_utc_timestamp(to_timestamp(\"{{ end_date }}\"), \"{{ time_zone }}\"), INTERVAL '{{ time_interval_rate + ' ' + time_interval_unit }}')) AS `{{ timestamp_column }}`, explode(array('{{ tag_names | join('\\', \\'') }}')) AS `{{ tagname_column }}`) " "{% if (interpolation_method is defined) and (interpolation_method == \"forward_fill\" or interpolation_method == \"backward_fill\") %}" - "SELECT a.EventTime, a.TagName, {{ interpolation_options_0 }}(b.Value, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN {{ interpolation_options_1 }} AND {{ interpolation_options_2 }}) AS Value FROM date_array a LEFT OUTER JOIN resample b ON a.EventTime = b.EventTime AND a.TagName = b.TagName ORDER BY a.TagName, a.EventTime " + "SELECT a.`{{ timestamp_column }}`, a.`{{ tagname_column }}`, {{ interpolation_options_0 }}(b.`{{ value_column }}`, true) OVER (PARTITION BY a.`{{ tagname_column }}` ORDER BY a.`{{ timestamp_column }}` ROWS BETWEEN {{ interpolation_options_1 }} AND {{ interpolation_options_2 }}) AS `{{ value_column }}` FROM date_array a LEFT OUTER JOIN resample b ON a.`{{ timestamp_column }}` = b.`{{ timestamp_column }}` AND a.`{{ tagname_column }}` = b.`{{ tagname_column }}` ORDER BY a.`{{ tagname_column }}`, a.`{{ timestamp_column }}` " "{% elif (interpolation_method is defined) and (interpolation_method == \"linear\") %}" - ",linear_interpolation_calculations AS (SELECT coalesce(a.TagName, b.TagName) as TagName, coalesce(a.EventTime, b.EventTime) as EventTime, a.EventTime as Requested_EventTime, b.EventTime as Found_EventTime, b.Value, " - "last_value(b.EventTime, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Last_EventTime, last_value(b.Value, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Last_Value, " - "first_value(b.EventTime, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS Next_EventTime, first_value(b.Value, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS Next_Value, " - "CASE WHEN b.Value is NULL THEN Last_Value + (unix_timestamp(a.EventTime) - unix_timestamp(Last_EventTime)) * ((Next_Value - Last_Value)) / ((unix_timestamp(Next_EventTime) - unix_timestamp(Last_EventTime))) ELSE b.Value END AS linear_interpolated_value FROM date_array a FULL OUTER JOIN resample b ON a.EventTime = b.EventTime AND a.TagName = b.TagName) " - "SELECT EventTime, TagName, linear_interpolated_value AS Value FROM linear_interpolation_calculations ORDER BY TagName, EventTime " + ",linear_interpolation_calculations AS (SELECT coalesce(a.`{{ tagname_column }}`, b.`{{ tagname_column }}`) as `{{ tagname_column }}`, coalesce(a.`{{ timestamp_column }}`, b.`{{ timestamp_column }}`) as `{{ timestamp_column }}`, a.`{{ timestamp_column }}` as `Requested_{{ timestamp_column }}`, b.`{{ timestamp_column }}` as `Found_{{ timestamp_column }}`, b.`{{ value_column }}`, " + "last_value(b.`{{ timestamp_column }}`, true) OVER (PARTITION BY a.`{{ tagname_column }}` ORDER BY a.`{{ timestamp_column }}` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `Last_{{ timestamp_column }}`, last_value(b.`{{ value_column }}`, true) OVER (PARTITION BY a.`{{ tagname_column }}` ORDER BY a.`{{ timestamp_column }}` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `Last_{{ value_column }}`, " + "first_value(b.`{{ timestamp_column }}`, true) OVER (PARTITION BY a.`{{ tagname_column }}` ORDER BY a.`{{ timestamp_column }}` ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS `Next_{{ timestamp_column }}`, first_value(b.`{{ value_column }}`, true) OVER (PARTITION BY a.`{{ tagname_column }}` ORDER BY a.`{{ timestamp_column }}` ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS `Next_{{ value_column }}`, " + "CASE WHEN b.`{{ value_column }}` is NULL THEN `Last_{{ value_column }}` + (unix_timestamp(a.`{{ timestamp_column }}`) - unix_timestamp(`Last_{{ timestamp_column }}`)) * ((`Next_{{ value_column }}` - `Last_{{ value_column }}`)) / ((unix_timestamp(`Next_{{ timestamp_column }}`) - unix_timestamp(`Last_{{ timestamp_column }}`))) ELSE b.`{{ value_column }}` END AS `linear_interpolated_{{ value_column }}` FROM date_array a FULL OUTER JOIN resample b ON a.`{{ timestamp_column }}` = b.`{{ timestamp_column }}` AND a.`{{ tagname_column }}` = b.`{{ tagname_column }}`) " + "SELECT `{{ timestamp_column }}`, `{{ tagname_column }}`, `linear_interpolated_{{ value_column }}` AS `{{ value_column }}` FROM linear_interpolation_calculations ORDER BY `{{ tagname_column }}`, `{{ timestamp_column }}` " "{% else %}" - "SELECT * FROM resample ORDER BY TagName, EventTime " + "SELECT * FROM resample ORDER BY `{{ tagname_column }}`, `{{ timestamp_column }}` " "{% endif %}" ) @@ -176,39 +196,51 @@ def _interpolation_at_time(parameters_dict: dict) -> str: parameters_dict["max_timestamp"] = max(timestamps_deduplicated) interpolate_at_time_query = ( - "WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(EventTime, 'yyyy-MM-dd HH:mm:ss.SSS')), \"{{ time_zone }}\") as EventTime, TagName, Status, Value FROM `{{ business_unit }}`.`sensors`.`{{ asset }}_{{ data_security_level }}_events_{{ data_type }}` WHERE EventDate BETWEEN " + "WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(`{{ timestamp_column }}`, 'yyyy-MM-dd HH:mm:ss.SSS')), \"{{ time_zone }}\") AS `{{ timestamp_column }}`, `{{ tagname_column }}`, {% if include_status is defined and include_status == true %} `{{ status_column }}`, {% else %} 'Good' as `Status`, {% endif %} `{{ value_column }}` FROM " + "{% if source is defined and source is not none %}" + "`{{ source|lower }}` " + "{% else %}" + "`{{ business_unit|lower }}`.`sensors`.`{{ asset|lower }}_{{ data_security_level|lower }}_events_{{ data_type|lower }}` " + "{% endif %}" + "WHERE to_date(`{{ timestamp_column }}`) BETWEEN " "{% if timestamps is defined %} " "date_sub(to_date(to_timestamp(\"{{ min_timestamp }}\")), {{ window_length }}) AND date_add(to_date(to_timestamp(\"{{ max_timestamp }}\")), {{ window_length}}) " - "{% endif %} AND TagName in ('{{ tag_names | join('\\', \\'') }}') " - "{% if include_bad_data is defined and include_bad_data == false %} AND Status = 'Good' {% endif %}) " + "{% endif %} AND `{{ tagname_column }}` in ('{{ tag_names | join('\\', \\'') }}') " + "{% if include_status is defined and include_status == true and include_bad_data is defined and include_bad_data == false %} AND `{{ status_column }}` = 'Good' {% endif %}) " ", date_array AS (SELECT explode(array( " "{% for timestamp in timestamps -%} " "from_utc_timestamp(to_timestamp(\"{{timestamp}}\"), \"{{time_zone}}\") " - "{% if not loop.last %} , {% endif %} {% endfor %} )) AS EventTime, " - "explode(array('{{ tag_names | join('\\', \\'') }}')) AS TagName) " - ", interpolation_events AS (SELECT coalesce(a.TagName, b.TagName) as TagName, coalesce(a.EventTime, b.EventTime) as EventTime, a.EventTime as Requested_EventTime, b.EventTime as Found_EventTime, b.Status, b.Value FROM date_array a FULL OUTER JOIN raw_events b ON a.EventTime = b.EventTime AND a.TagName = b.TagName) " - ", interpolation_calculations AS (SELECT *, lag(EventTime) OVER (PARTITION BY TagName ORDER BY EventTime) AS Previous_EventTime, lag(Value) OVER (PARTITION BY TagName ORDER BY EventTime) AS Previous_Value, lead(EventTime) OVER (PARTITION BY TagName ORDER BY EventTime) AS Next_EventTime, lead(Value) OVER (PARTITION BY TagName ORDER BY EventTime) AS Next_Value, " - "CASE WHEN Requested_EventTime = Found_EventTime THEN Value WHEN Next_EventTime IS NULL THEN Previous_Value WHEN Previous_EventTime IS NULL and Next_EventTime IS NULL THEN NULL " - "ELSE Previous_Value + ((Next_Value - Previous_Value) * ((unix_timestamp(EventTime) - unix_timestamp(Previous_EventTime)) / (unix_timestamp(Next_EventTime) - unix_timestamp(Previous_EventTime)))) END AS Interpolated_Value FROM interpolation_events) " - "SELECT TagName, EventTime, Interpolated_Value as Value FROM interpolation_calculations WHERE EventTime in ( " + "{% if not loop.last %} , {% endif %} {% endfor %} )) AS `{{ timestamp_column }}`, " + "explode(array('{{ tag_names | join('\\', \\'') }}')) AS `{{ tagname_column }}`) " + ", interpolation_events AS (SELECT coalesce(a.`{{ tagname_column }}`, b.`{{ tagname_column }}`) AS `{{ tagname_column }}`, coalesce(a.`{{ timestamp_column }}`, b.`{{ timestamp_column }}`) as `{{ timestamp_column }}`, a.`{{ timestamp_column }}` as `Requested_{{ timestamp_column }}`, b.`{{ timestamp_column }}` as `Found_{{ timestamp_column }}`, b.`{{ status_column }}`, b.`{{ value_column }}` FROM date_array a FULL OUTER JOIN raw_events b ON a.`{{ timestamp_column }}` = b.`{{ timestamp_column }}` AND a.`{{ tagname_column }}` = b.`{{ tagname_column }}`) " + ", interpolation_calculations AS (SELECT *, lag(`{{ timestamp_column }}`) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}`) AS `Previous_{{ timestamp_column }}`, lag(`{{ value_column }}`) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}`) AS `Previous_{{ value_column }}`, lead(`{{ timestamp_column }}`) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}`) AS `Next_{{ timestamp_column }}`, lead(`{{ value_column }}`) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}`) AS `Next_{{ value_column }}`, " + "CASE WHEN `Requested_{{ timestamp_column }}` = `Found_{{ timestamp_column }}` THEN `{{ value_column }}` WHEN `Next_{{ timestamp_column }}` IS NULL THEN `Previous_{{ value_column }}` WHEN `Previous_{{ timestamp_column }}` IS NULL and `Next_{{ timestamp_column }}` IS NULL THEN NULL " + "ELSE `Previous_{{ value_column }}` + ((`Next_{{ value_column }}` - `Previous_{{ value_column }}`) * ((unix_timestamp(`{{ timestamp_column }}`) - unix_timestamp(`Previous_{{ timestamp_column }}`)) / (unix_timestamp(`Next_{{ timestamp_column }}`) - unix_timestamp(`Previous_{{ timestamp_column }}`)))) END AS `Interpolated_{{ value_column }}` FROM interpolation_events) " + "SELECT `{{ tagname_column }}`, `{{ timestamp_column }}`, `Interpolated_{{ value_column }}` AS `{{ value_column }}` FROM interpolation_calculations WHERE `{{ timestamp_column }}` in ( " "{% for timestamp in timestamps -%} " "from_utc_timestamp(to_timestamp(\"{{timestamp}}\"), \"{{time_zone}}\") " "{% if not loop.last %} , {% endif %} {% endfor %}) " ) interpolation_at_time_parameters = { - "business_unit": parameters_dict['business_unit'].lower(), - "region": parameters_dict['region'].lower(), - "asset": parameters_dict['asset'].lower(), - "data_security_level": parameters_dict['data_security_level'].lower(), - "data_type": parameters_dict['data_type'].lower(), + "source": parameters_dict.get("source", None), + "business_unit": parameters_dict.get("business_unit"), + "region": parameters_dict.get("region"), + "asset": parameters_dict.get("asset"), + "data_security_level": parameters_dict.get("data_security_level"), + "data_type": parameters_dict.get("data_type"), "tag_names": list(dict.fromkeys(parameters_dict['tag_names'])), "timestamps": parameters_dict['timestamps'], "include_bad_data": parameters_dict["include_bad_data"], "time_zone": parameters_dict["time_zone"], "min_timestamp": parameters_dict["min_timestamp"], "max_timestamp": parameters_dict["max_timestamp"], - "window_length": parameters_dict["window_length"] + "window_length": parameters_dict["window_length"], + "tagname_column": parameters_dict.get("tagname_column", "TagName"), + "timestamp_column": parameters_dict.get("timestamp_column", "EventTime"), + "include_status": False if "status_column" in parameters_dict and parameters_dict.get("status_column") is None else True, + "status_column": "Status" if "status_column" in parameters_dict and parameters_dict.get("status_column") is None else parameters_dict.get("status_column", "Status"), + "value_column": parameters_dict.get("value_column", "Value"), } sql_template = Template(interpolate_at_time_query) return sql_template.render(interpolation_at_time_parameters) @@ -217,18 +249,24 @@ def _metadata_query(parameters_dict: dict) -> str: metadata_query = ( "SELECT * FROM " - "`{{ business_unit }}`.`sensors`.`{{ asset }}_{{ data_security_level }}_metadata` " + "{% if source is defined and source is not none %}" + "`{{ source|lower }}` " + "{% else %}" + "`{{ business_unit|lower }}`.`sensors`.`{{ asset|lower }}_{{ data_security_level|lower }}_metadata` " + "{% endif %}" "{% if tag_names is defined and tag_names|length > 0 %} " - "WHERE TagName in ('{{ tag_names | join('\\', \\'') }}') " + "WHERE `{{ tagname_column }}` in ('{{ tag_names | join('\\', \\'') }}') " "{% endif %}" ) metadata_parameters = { - "business_unit": parameters_dict['business_unit'].lower(), - "region": parameters_dict['region'].lower(), - "asset": parameters_dict['asset'].lower(), - "data_security_level": parameters_dict['data_security_level'].lower(), - "tag_names": list(dict.fromkeys(parameters_dict['tag_names'])) + "source": parameters_dict.get("source", None), + "business_unit": parameters_dict.get("business_unit"), + "region": parameters_dict.get("region"), + "asset": parameters_dict.get("asset"), + "data_security_level": parameters_dict.get("data_security_level"), + "tag_names": list(dict.fromkeys(parameters_dict['tag_names'])), + "tagname_column": parameters_dict.get("tagname_column", "TagName"), } sql_template = Template(metadata_query) @@ -239,39 +277,53 @@ def _time_weighted_average_query(parameters_dict: dict) -> str: parameters_dict["end_datetime"] = datetime.strptime(parameters_dict['end_date'], TIMESTAMP_FORMAT).strftime('%Y-%m-%dT%H:%M:%S') time_weighted_average_query = ( - "WITH raw_events AS (SELECT DISTINCT EventDate, TagName, from_utc_timestamp(to_timestamp(date_format(EventTime, 'yyyy-MM-dd HH:mm:ss.SSS')), \"{{ time_zone }}\") as EventTime, Status, Value FROM `{{ business_unit }}`.`sensors`.`{{ asset }}_{{ data_security_level }}_events_{{ data_type }}` WHERE EventDate BETWEEN date_sub(to_date(to_timestamp(\"{{ start_date }}\")), {{ window_length }}) AND date_add(to_date(to_timestamp(\"{{ end_date }}\")), {{ window_length }}) AND TagName in ('{{ tag_names | join('\\', \\'') }}') " - "{% if include_bad_data is defined and include_bad_data == false %} AND Status = 'Good' {% endif %}) " + "WITH raw_events AS (SELECT DISTINCT `{{ tagname_column }}`, from_utc_timestamp(to_timestamp(date_format(`{{ timestamp_column }}`, 'yyyy-MM-dd HH:mm:ss.SSS')), \"{{ time_zone }}\") as `{{ timestamp_column }}`, {% if include_status is defined and include_status == true %} `{{ status_column }}`, {% else %} 'Good' as `Status`, {% endif %} `{{ value_column }}` FROM " + "{% if source is defined and source is not none %}" + "`{{ source|lower }}` " + "{% else %}" + "`{{ business_unit|lower }}`.`sensors`.`{{ asset|lower }}_{{ data_security_level|lower }}_events_{{ data_type|lower }}` " + "{% endif %}" + "WHERE to_date(`{{ timestamp_column }}`) BETWEEN date_sub(to_date(to_timestamp(\"{{ start_date }}\")), {{ window_length }}) AND date_add(to_date(to_timestamp(\"{{ end_date }}\")), {{ window_length }}) AND `{{ tagname_column }}` in ('{{ tag_names | join('\\', \\'') }}') " + "{% if include_status is defined and include_status == true and include_bad_data is defined and include_bad_data == false %} AND `{{ status_column }}` = 'Good' {% endif %}) " "{% if step is defined and step == \"metadata\" %} " - ",meta_data AS (SELECT TagName, IFNULL(Step, false) AS Step FROM `downstream`.`sensors`.`pernis_restricted_metadata` )" + ",meta_data AS (SELECT `{{ tagname_column }}`, IFNULL(Step, false) AS Step FROM " + "{% if source_metadata is defined and source_metadata is not none %}" + "`{{ source_metadata|lower }}` " + "{% else %}" + "`{{ business_unit|lower }}`.`sensors`.`{{ asset|lower }}_{{ data_security_level|lower }}_metadata` " + "{% endif %}" + ") " "{% endif %}" - ",date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp(\"{{ start_date }}\"), \"{{ time_zone }}\"), from_utc_timestamp(to_timestamp(\"{{ end_date }}\"), \"{{ time_zone }}\"), INTERVAL '{{ time_interval_rate + ' ' + time_interval_unit }}')) AS EventTime, explode(array('{{ tag_names | join('\\', \\'') }}')) AS TagName) " - ",window_events AS (SELECT coalesce(a.TagName, b.TagName) AS TagName, coalesce(a.EventTime, b.EventTime) as EventTime, window(coalesce(a.EventTime, b.EventTime), '{{ time_interval_rate + ' ' + time_interval_unit }}').start WindowEventTime, b.Status, b.Value FROM date_array a " - "FULL OUTER JOIN raw_events b ON CAST(a.EventTime AS long) = CAST(b.EventTime AS long) AND a.TagName = b.TagName) " - ",fill_status AS (SELECT *, last_value(Status, true) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as Fill_Status, CASE WHEN Fill_Status = \"Good\" THEN Value ELSE null END AS Good_Value FROM window_events) " - ",fill_value AS (SELECT *, last_value(Good_Value, true) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Fill_Value FROM fill_status) " + ",date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp(\"{{ start_date }}\"), \"{{ time_zone }}\"), from_utc_timestamp(to_timestamp(\"{{ end_date }}\"), \"{{ time_zone }}\"), INTERVAL '{{ time_interval_rate + ' ' + time_interval_unit }}')) AS `{{ timestamp_column }}`, explode(array('{{ tag_names | join('\\', \\'') }}')) AS `{{ tagname_column }}`) " + ",window_events AS (SELECT coalesce(a.`{{ tagname_column }}`, b.`{{ tagname_column }}`) AS `{{ tagname_column }}`, coalesce(a.`{{ timestamp_column }}`, b.`{{ timestamp_column }}`) as `{{ timestamp_column }}`, window(coalesce(a.`{{ timestamp_column }}`, b.`{{ timestamp_column }}`), '{{ time_interval_rate + ' ' + time_interval_unit }}').start `Window{{ timestamp_column }}`, b.`{{ status_column }}`, b.`{{ value_column }}` FROM date_array a " + "FULL OUTER JOIN raw_events b ON CAST(a.`{{ timestamp_column }}` AS long) = CAST(b.`{{ timestamp_column }}` AS long) AND a.`{{ tagname_column }}` = b.`{{ tagname_column }}`) " + ",fill_status AS (SELECT *, last_value(`{{ status_column }}`, true) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as `Fill_{{ status_column }}`, CASE WHEN `Fill_{{ status_column }}` = \"Good\" THEN `{{ value_column }}` ELSE null END AS `Good_{{ value_column }}` FROM window_events) " + ",fill_value AS (SELECT *, last_value(`Good_{{ value_column }}`, true) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `Fill_{{ value_column }}` FROM fill_status) " "{% if step is defined and step == \"metadata\" %} " - ",twa_calculations AS (SELECT f.TagName, f.EventTime, f.WindowEventTime, m.Step, f.Status, f.Value, f.Fill_Status, f.Fill_Value, lead(f.EventTime) OVER (PARTITION BY f.TagName ORDER BY f.EventTime) AS Next_EventTime, lead(f.Fill_Status) OVER (PARTITION BY f.TagName ORDER BY f.EventTime) AS Next_Status " - ",CASE WHEN Next_Status = \"Good\" OR (f.Fill_Status = \"Good\" AND Next_Status = \"Bad\") THEN lead(f.Fill_Value) OVER (PARTITION BY f.TagName ORDER BY f.EventTime) ELSE f.Value END AS Next_Value_For_Status " - ",CASE WHEN f.Fill_Status = \"Good\" THEN Next_Value_For_Status ELSE 0 END AS Next_Value " - ",CASE WHEN f.Fill_Status = \"Good\" and Next_Status = \"Good\" THEN ((cast(Next_EventTime as double) - cast(f.EventTime as double)) / 60) WHEN f.Fill_Status = \"Good\" and Next_Status != \"Good\" THEN ((cast(Next_EventTime as integer) - cast(f.EventTime as double)) / 60) ELSE 0 END AS good_minutes " - ",CASE WHEN m.Step == false THEN ((f.Fill_Value + Next_Value) * 0.5) * good_minutes ELSE (f.Fill_Value * good_minutes) END AS twa_value FROM fill_value f LEFT JOIN meta_data m ON f.TagName = m.TagName) " + ",twa_calculations AS (SELECT f.`{{ tagname_column }}`, f.`{{ timestamp_column }}`, f.`Window{{ timestamp_column }}`, m.Step, f.`{{ status_column }}`, f.`{{ value_column }}`, f.`Fill_{{ status_column }}`, f.`Fill_{{ value_column }}`, lead(f.`{{ timestamp_column }}`) OVER (PARTITION BY f.`{{ tagname_column }}` ORDER BY f.`{{ timestamp_column }}`) AS `Next_{{ timestamp_column }}`, lead(f.`Fill_{{ status_column }}`) OVER (PARTITION BY f.`{{ tagname_column }}` ORDER BY f.`{{ timestamp_column }}`) AS `Next_{{ status_column }}` " + ",CASE WHEN `Next_{{ status_column }}` = \"Good\" OR (f.`Fill_{{ status_column }}` = \"Good\" AND `Next_{{ status_column }}` = \"Bad\") THEN lead(f.`Fill_{{ value_column }}`) OVER (PARTITION BY f.`{{ tagname_column }}` ORDER BY f.`{{ timestamp_column }}`) ELSE f.`{{ value_column }}` END AS `Next_{{ value_column }}_For_{{ status_column }}` " + ",CASE WHEN f.`Fill_{{ status_column }}` = \"Good\" THEN `Next_{{ value_column }}_For_{{ status_column }}` ELSE 0 END AS `Next_{{ value_column }}` " + ",CASE WHEN f.`Fill_{{ status_column }}` = \"Good\" and `Next_{{ status_column }}` = \"Good\" THEN ((cast(`Next_{{ timestamp_column }}` as double) - cast(f.`{{ timestamp_column }}` as double)) / 60) WHEN f.`Fill_{{ status_column }}` = \"Good\" and `Next_{{ status_column }}` != \"Good\" THEN ((cast(`Next_{{ timestamp_column }}` as integer) - cast(f.`{{ timestamp_column }}` as double)) / 60) ELSE 0 END AS good_minutes " + ",CASE WHEN m.Step == false THEN ((f.`Fill_{{ value_column }}` + `Next_{{ value_column }}`) * 0.5) * good_minutes ELSE (f.`Fill_{{ value_column }}` * good_minutes) END AS twa_value FROM fill_value f LEFT JOIN meta_data m ON f.`{{ tagname_column }}` = m.`{{ tagname_column }}`) " "{% else %} " - ",twa_calculations AS (SELECT TagName, EventTime, WindowEventTime, {{ step }} AS Step, Status, Value, Fill_Status, Fill_Value, lead(EventTime) OVER (PARTITION BY TagName ORDER BY EventTime) AS Next_EventTime, lead(Fill_Status) OVER (PARTITION BY TagName ORDER BY EventTime) AS Next_Status " - ",CASE WHEN Next_Status = \"Good\" OR (Fill_Status = \"Good\" AND Next_Status = \"Bad\") THEN lead(Fill_Value) OVER (PARTITION BY TagName ORDER BY EventTime) ELSE Value END AS Next_Value_For_Status " - ",CASE WHEN Fill_Status = \"Good\" THEN Next_Value_For_Status ELSE 0 END AS Next_Value " - ",CASE WHEN Fill_Status = \"Good\" and Next_Status = \"Good\" THEN ((cast(Next_EventTime as double) - cast(EventTime as double)) / 60) WHEN Fill_Status = \"Good\" and Next_Status != \"Good\" THEN ((cast(Next_EventTime as integer) - cast(EventTime as double)) / 60) ELSE 0 END AS good_minutes " - ",CASE WHEN Step == false THEN ((Fill_Value + Next_Value) * 0.5) * good_minutes ELSE (Fill_Value * good_minutes) END AS twa_value FROM fill_value) " + ",twa_calculations AS (SELECT `{{ tagname_column }}`, `{{ timestamp_column }}`, `Window{{ timestamp_column }}`, {{ step }} AS Step, `{{ status_column }}`, `{{ value_column }}`, `Fill_{{ status_column }}`, `Fill_{{ value_column }}`, lead(`{{ timestamp_column }}`) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}`) AS `Next_{{ timestamp_column }}`, lead(`Fill_{{ status_column }}`) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}`) AS `Next_{{ status_column }}` " + ",CASE WHEN `Next_{{ status_column }}` = \"Good\" OR (`Fill_{{ status_column }}` = \"Good\" AND `Next_{{ status_column }}` = \"Bad\") THEN lead(`Fill_{{ value_column }}`) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}`) ELSE `{{ value_column }}` END AS `Next_{{ value_column }}_For_{{ status_column }}` " + ",CASE WHEN `Fill_{{ status_column }}` = \"Good\" THEN `Next_{{ value_column }}_For_{{ status_column }}` ELSE 0 END AS `Next_{{ value_column }}` " + ",CASE WHEN `Fill_{{ status_column }}` = \"Good\" and `Next_{{ status_column }}` = \"Good\" THEN ((cast(`Next_{{ timestamp_column }}` as double) - cast(`{{ timestamp_column }}` as double)) / 60) WHEN `Fill_{{ status_column }}` = \"Good\" and `Next_{{ status_column }}` != \"Good\" THEN ((cast(`Next_{{ timestamp_column }}` as integer) - cast(`{{ timestamp_column }}` as double)) / 60) ELSE 0 END AS good_minutes " + ",CASE WHEN Step == false THEN ((`Fill_{{ value_column }}` + `Next_{{ value_column }}`) * 0.5) * good_minutes ELSE (`Fill_{{ value_column }}` * good_minutes) END AS twa_value FROM fill_value) " "{% endif %} " - ",project_result AS (SELECT TagName, WindowEventTime AS EventTime, sum(twa_value) / sum(good_minutes) AS Value from twa_calculations GROUP BY TagName, WindowEventTime) " - "SELECT * FROM project_result WHERE EventTime BETWEEN to_timestamp(\"{{ start_datetime }}\") AND to_timestamp(\"{{ end_datetime }}\") ORDER BY TagName, EventTime " + ",project_result AS (SELECT `{{ tagname_column }}`, `Window{{ timestamp_column }}` AS `{{ timestamp_column }}`, sum(twa_value) / sum(good_minutes) AS `{{ value_column }}` from twa_calculations GROUP BY `{{ tagname_column }}`, `Window{{ timestamp_column }}`) " + "SELECT * FROM project_result WHERE `{{ timestamp_column }}` BETWEEN to_timestamp(\"{{ start_datetime }}\") AND to_timestamp(\"{{ end_datetime }}\") ORDER BY `{{ tagname_column }}`, `{{ timestamp_column }}` " ) time_weighted_average_parameters = { - "business_unit": parameters_dict['business_unit'].lower(), - "region": parameters_dict['region'].lower(), - "asset": parameters_dict['asset'].lower(), - "data_security_level": parameters_dict['data_security_level'].lower(), - "data_type": parameters_dict['data_type'].lower(), + "source": parameters_dict.get("source", None), + "source_metadata": parameters_dict.get('source_metadata', None), + "business_unit": parameters_dict.get("business_unit"), + "region": parameters_dict.get("region"), + "asset": parameters_dict.get("asset"), + "data_security_level": parameters_dict.get("data_security_level"), + "data_type": parameters_dict.get("data_type"), "start_date": parameters_dict['start_date'], "end_date": parameters_dict['end_date'], "start_datetime": parameters_dict['start_datetime'], @@ -283,6 +335,11 @@ def _time_weighted_average_query(parameters_dict: dict) -> str: "include_bad_data": parameters_dict['include_bad_data'], "step": parameters_dict['step'], "time_zone": parameters_dict["time_zone"], + "tagname_column": parameters_dict.get("tagname_column", "TagName"), + "timestamp_column": parameters_dict.get("timestamp_column", "EventTime"), + "include_status": False if "status_column" in parameters_dict and parameters_dict.get("status_column") is None else True, + "status_column": "Status" if "status_column" in parameters_dict and parameters_dict.get("status_column") is None else parameters_dict.get("status_column", "Status"), + "value_column": parameters_dict.get("value_column", "Value"), } sql_template = Template(time_weighted_average_query) diff --git a/tests/sdk/python/rtdip_sdk/queries/test_interpolate.py b/tests/sdk/python/rtdip_sdk/queries/test_interpolate.py index 479f4c1af..3521372c1 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_interpolate.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_interpolate.py @@ -27,7 +27,7 @@ ACCESS_TOKEN = "mock_databricks_token" DATABRICKS_SQL_CONNECT = 'databricks.sql.connect' DATABRICKS_SQL_CONNECT_CURSOR = 'databricks.sql.connect.cursor' -MOCKED_QUERY= 'WITH resample AS (WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(EventTime, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as EventTime, TagName, Status, Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN to_date(to_timestamp("2011-01-01T00:00:00+00:00")) AND to_date(to_timestamp("2011-01-02T23:59:59+00:00")) AND EventTime BETWEEN to_timestamp("2011-01-01T00:00:00+00:00") AND to_timestamp("2011-01-02T23:59:59+00:00") AND TagName in (\'MOCKED-TAGNAME\') AND Status = \'Good\' ) ,date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS timestamp_array, explode(array(\'MOCKED-TAGNAME\')) AS TagName) ,window_buckets AS (SELECT timestamp_array AS window_start ,TagName ,LEAD(timestamp_array) OVER (ORDER BY timestamp_array) AS window_end FROM date_array) ,project_resample_results AS (SELECT d.window_start ,d.window_end ,d.TagName ,avg(e.Value) OVER (PARTITION BY d.TagName, d.window_start ORDER BY e.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Value FROM window_buckets d INNER JOIN raw_events e ON e.EventTime >= d.window_start AND e.EventTime < d.window_end AND e.TagName = d.TagName) SELECT window_start AS EventTime ,TagName ,Value FROM project_resample_results GROUP BY window_start ,TagName ,Value ),date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS EventTime, explode(array(\'MOCKED-TAGNAME\')) AS TagName) SELECT a.EventTime, a.TagName, last_value(b.Value, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Value FROM date_array a LEFT OUTER JOIN resample b ON a.EventTime = b.EventTime AND a.TagName = b.TagName ORDER BY a.TagName, a.EventTime ' +MOCKED_QUERY= 'WITH resample AS (WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(`EventTime`, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as `EventTime`, `TagName`, `Status`, `Value` FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE `EventTime` BETWEEN to_timestamp("2011-01-01T00:00:00+00:00") AND to_timestamp("2011-01-02T23:59:59+00:00") AND `TagName` in (\'MOCKED-TAGNAME\') AND `Status` = \'Good\' ) ,date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS timestamp_array, explode(array(\'MOCKED-TAGNAME\')) AS `TagName`) ,window_buckets AS (SELECT timestamp_array AS window_start, `TagName`, LEAD(timestamp_array) OVER (ORDER BY timestamp_array) AS window_end FROM date_array) ,project_resample_results AS (SELECT d.window_start, d.window_end, d.`TagName`, avg(e.`Value`) OVER (PARTITION BY d.`TagName`, d.window_start ORDER BY e.`EventTime` ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `Value` FROM window_buckets d INNER JOIN raw_events e ON e.`EventTime` >= d.window_start AND e.`EventTime` < d.window_end AND e.`TagName` = d.`TagName`) SELECT window_start AS `EventTime`, `TagName`, `Value` FROM project_resample_results GROUP BY window_start, `TagName`, `Value` ),date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS `EventTime`, explode(array(\'MOCKED-TAGNAME\')) AS `TagName`) SELECT a.`EventTime`, a.`TagName`, last_value(b.`Value`, true) OVER (PARTITION BY a.`TagName` ORDER BY a.`EventTime` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `Value` FROM date_array a LEFT OUTER JOIN resample b ON a.`EventTime` = b.`EventTime` AND a.`TagName` = b.`TagName` ORDER BY a.`TagName`, a.`EventTime` ' MOCKED_PARAMETER_DICT = { "business_unit": "mocked-buiness-unit", "region": "mocked-region", diff --git a/tests/sdk/python/rtdip_sdk/queries/test_interpolation_at_time.py b/tests/sdk/python/rtdip_sdk/queries/test_interpolation_at_time.py index e708cef94..0211a80fd 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_interpolation_at_time.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_interpolation_at_time.py @@ -27,7 +27,7 @@ ACCESS_TOKEN = "mock_databricks_token" DATABRICKS_SQL_CONNECT = 'databricks.sql.connect' DATABRICKS_SQL_CONNECT_CURSOR = 'databricks.sql.connect.cursor' -MOCKED_QUERY = 'WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(EventTime, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as EventTime, TagName, Status, Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN date_sub(to_date(to_timestamp("2011-01-01T00:00:00+00:00")), 1) AND date_add(to_date(to_timestamp("2011-01-01T00:00:00+00:00")), 1) AND TagName in (\'MOCKED-TAGNAME\') AND Status = \'Good\' ) , date_array AS (SELECT explode(array( from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000") )) AS EventTime, explode(array(\'MOCKED-TAGNAME\')) AS TagName) , interpolation_events AS (SELECT coalesce(a.TagName, b.TagName) as TagName, coalesce(a.EventTime, b.EventTime) as EventTime, a.EventTime as Requested_EventTime, b.EventTime as Found_EventTime, b.Status, b.Value FROM date_array a FULL OUTER JOIN raw_events b ON a.EventTime = b.EventTime AND a.TagName = b.TagName) , interpolation_calculations AS (SELECT *, lag(EventTime) OVER (PARTITION BY TagName ORDER BY EventTime) AS Previous_EventTime, lag(Value) OVER (PARTITION BY TagName ORDER BY EventTime) AS Previous_Value, lead(EventTime) OVER (PARTITION BY TagName ORDER BY EventTime) AS Next_EventTime, lead(Value) OVER (PARTITION BY TagName ORDER BY EventTime) AS Next_Value, CASE WHEN Requested_EventTime = Found_EventTime THEN Value WHEN Next_EventTime IS NULL THEN Previous_Value WHEN Previous_EventTime IS NULL and Next_EventTime IS NULL THEN NULL ELSE Previous_Value + ((Next_Value - Previous_Value) * ((unix_timestamp(EventTime) - unix_timestamp(Previous_EventTime)) / (unix_timestamp(Next_EventTime) - unix_timestamp(Previous_EventTime)))) END AS Interpolated_Value FROM interpolation_events) SELECT TagName, EventTime, Interpolated_Value as Value FROM interpolation_calculations WHERE EventTime in ( from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000") ) ' +MOCKED_QUERY = 'WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(`EventTime`, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") AS `EventTime`, `TagName`, `Status`, `Value` FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE to_date(`EventTime`) BETWEEN date_sub(to_date(to_timestamp("2011-01-01T00:00:00+00:00")), 1) AND date_add(to_date(to_timestamp("2011-01-01T00:00:00+00:00")), 1) AND `TagName` in (\'MOCKED-TAGNAME\') AND `Status` = \'Good\' ) , date_array AS (SELECT explode(array( from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000") )) AS `EventTime`, explode(array(\'MOCKED-TAGNAME\')) AS `TagName`) , interpolation_events AS (SELECT coalesce(a.`TagName`, b.`TagName`) AS `TagName`, coalesce(a.`EventTime`, b.`EventTime`) as `EventTime`, a.`EventTime` as `Requested_EventTime`, b.`EventTime` as `Found_EventTime`, b.`Status`, b.`Value` FROM date_array a FULL OUTER JOIN raw_events b ON a.`EventTime` = b.`EventTime` AND a.`TagName` = b.`TagName`) , interpolation_calculations AS (SELECT *, lag(`EventTime`) OVER (PARTITION BY `TagName` ORDER BY `EventTime`) AS `Previous_EventTime`, lag(`Value`) OVER (PARTITION BY `TagName` ORDER BY `EventTime`) AS `Previous_Value`, lead(`EventTime`) OVER (PARTITION BY `TagName` ORDER BY `EventTime`) AS `Next_EventTime`, lead(`Value`) OVER (PARTITION BY `TagName` ORDER BY `EventTime`) AS `Next_Value`, CASE WHEN `Requested_EventTime` = `Found_EventTime` THEN `Value` WHEN `Next_EventTime` IS NULL THEN `Previous_Value` WHEN `Previous_EventTime` IS NULL and `Next_EventTime` IS NULL THEN NULL ELSE `Previous_Value` + ((`Next_Value` - `Previous_Value`) * ((unix_timestamp(`EventTime`) - unix_timestamp(`Previous_EventTime`)) / (unix_timestamp(`Next_EventTime`) - unix_timestamp(`Previous_EventTime`)))) END AS `Interpolated_Value` FROM interpolation_events) SELECT `TagName`, `EventTime`, `Interpolated_Value` AS `Value` FROM interpolation_calculations WHERE `EventTime` in ( from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000") ) ' MOCKED_PARAMETER_DICT = { "business_unit": "mocked-buiness-unit", "region": "mocked-region", diff --git a/tests/sdk/python/rtdip_sdk/queries/test_metadata.py b/tests/sdk/python/rtdip_sdk/queries/test_metadata.py index 1f4f03298..16e4c6201 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_metadata.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_metadata.py @@ -28,7 +28,7 @@ DATABRICKS_SQL_CONNECT = 'databricks.sql.connect' DATABRICKS_SQL_CONNECT_CURSOR = 'databricks.sql.connect.cursor' INTERPOLATION_METHOD = "test/test/test" -MOCKED_QUERY="SELECT * FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_metadata` WHERE TagName in ('MOCKED-TAGNAME') " +MOCKED_QUERY="SELECT * FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_metadata` WHERE `TagName` in ('MOCKED-TAGNAME') " MOCKED_PARAMETER_DICT = { "business_unit": "mocked-buiness-unit", "region": "mocked-region", diff --git a/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py b/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py new file mode 100644 index 000000000..780d65276 --- /dev/null +++ b/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py @@ -0,0 +1,83 @@ +# Copyright 2023 RTDIP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from src.sdk.python.rtdip_sdk.queries import QueryBuilder +from src.sdk.python.rtdip_sdk.connectors import DatabricksSQLConnection +from src.sdk.python.rtdip_sdk.authentication.azure import DefaultAuth +from pytest_mock import MockerFixture + +def test_query_builder_raw(mocker: MockerFixture): + mocker.patch("src.sdk.python.rtdip_sdk.queries.query_builder.raw.get", return_value={"test": "data"}) + + data = ( + QueryBuilder() + .connect("mock_connection") + .source("mock_catalog.mock_scema.mock_table", status_column=None) + .raw(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02") + ) + assert data == {"test": "data"} + +def test_query_builder_resample(mocker: MockerFixture): + mocker.patch("src.sdk.python.rtdip_sdk.queries.query_builder.resample.get", return_value={"test": "data"}) + + data = ( + QueryBuilder() + .connect("mock_connection") + .source("mock_catalog.mock_scema.mock_table") + .resample(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02", time_interval_rate="1", time_interval_unit="hour", agg_method="avg") + ) + assert data == {"test": "data"} + +def test_query_builder_interpolate(mocker: MockerFixture): + mocker.patch("src.sdk.python.rtdip_sdk.queries.query_builder.interpolate.get", return_value={"test": "data"}) + + data = ( + QueryBuilder() + .connect("mock_connection") + .source("mock_catalog.mock_scema.mock_table", status_column=None) + .interpolate(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02", time_interval_rate="1", time_interval_unit="hour", agg_method="avg", interpolation_method="linear") + ) + assert data == {"test": "data"} + +def test_query_builder_interpolation_at_time(mocker: MockerFixture): + mocker.patch("src.sdk.python.rtdip_sdk.queries.query_builder.interpolation_at_time.get", return_value={"test": "data"}) + + data = ( + QueryBuilder() + .connect("mock_connection") + .source("mock_catalog.mock_scema.mock_table", status_column=None) + .interpolate_at_time(tagname_filter=["mock_tag"], timestamp_filter=["2021-01-02T17:30:00+00:00", "2021-01-02T18:30:00+00:00"]) + ) + assert data == {"test": "data"} + +def test_query_builder_twa(mocker: MockerFixture): + mocker.patch("src.sdk.python.rtdip_sdk.queries.query_builder.time_weighted_average.get", return_value={"test": "data"}) + + data = ( + QueryBuilder() + .connect("mock_connection") + .source("mock_catalog.mock_scema.mock_table", status_column=None) + .time_weighted_average(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02", time_interval_rate="1", time_interval_unit="hour", step="metadata", source_metadata="mock_catalog.mock_schema.mock_table_metadata") + ) + assert data == {"test": "data"} + +def test_query_builder_metadata(mocker: MockerFixture): + mocker.patch("src.sdk.python.rtdip_sdk.queries.query_builder.metadata.get", return_value={"test": "data"}) + + data = ( + QueryBuilder() + .connect("mock_connection") + .source("mock_catalog.mock_scema.mock_metadata_table") + .metadata(tagname_filter=["mock_tag"]) + ) + assert data == {"test": "data"} \ No newline at end of file diff --git a/tests/sdk/python/rtdip_sdk/queries/test_raw.py b/tests/sdk/python/rtdip_sdk/queries/test_raw.py index fe1d677db..acf0618c2 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_raw.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_raw.py @@ -28,7 +28,7 @@ DATABRICKS_SQL_CONNECT = 'databricks.sql.connect' DATABRICKS_SQL_CONNECT_CURSOR = 'databricks.sql.connect.cursor' INTERPOLATION_METHOD = "test/test/test" -MOCKED_QUERY='SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(EventTime, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as EventTime, TagName, Status, Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN to_date(to_timestamp("2011-01-01T00:00:00+00:00")) AND to_date(to_timestamp("2011-01-02T23:59:59+00:00")) AND EventTime BETWEEN to_timestamp("2011-01-01T00:00:00+00:00") AND to_timestamp("2011-01-02T23:59:59+00:00") AND TagName in (\'MOCKED-TAGNAME\') ORDER BY TagName, EventTime ' +MOCKED_QUERY='SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(`EventTime`, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as `EventTime`, `TagName`, `Status`, `Value` FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE `EventTime` BETWEEN to_timestamp("2011-01-01T00:00:00+00:00") AND to_timestamp("2011-01-02T23:59:59+00:00") AND `TagName` in (\'MOCKED-TAGNAME\') ORDER BY `TagName`, `EventTime` ' MOCKED_PARAMETER_DICT = { "business_unit": "mocked-buiness-unit", "region": "mocked-region", diff --git a/tests/sdk/python/rtdip_sdk/queries/test_resample.py b/tests/sdk/python/rtdip_sdk/queries/test_resample.py index 50b5ff7d0..4861c8963 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_resample.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_resample.py @@ -27,7 +27,7 @@ ACCESS_TOKEN = "mock_databricks_token" DATABRICKS_SQL_CONNECT = 'databricks.sql.connect' DATABRICKS_SQL_CONNECT_CURSOR = 'databricks.sql.connect.cursor' -MOCKED_QUERY= 'WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(EventTime, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as EventTime, TagName, Status, Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN to_date(to_timestamp("2011-01-01T00:00:00+00:00")) AND to_date(to_timestamp("2011-01-02T23:59:59+00:00")) AND EventTime BETWEEN to_timestamp("2011-01-01T00:00:00+00:00") AND to_timestamp("2011-01-02T23:59:59+00:00") AND TagName in (\'MOCKED-TAGNAME\') AND Status = \'Good\' ) ,date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS timestamp_array, explode(array(\'MOCKED-TAGNAME\')) AS TagName) ,window_buckets AS (SELECT timestamp_array AS window_start ,TagName ,LEAD(timestamp_array) OVER (ORDER BY timestamp_array) AS window_end FROM date_array) ,project_resample_results AS (SELECT d.window_start ,d.window_end ,d.TagName ,avg(e.Value) OVER (PARTITION BY d.TagName, d.window_start ORDER BY e.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Value FROM window_buckets d INNER JOIN raw_events e ON e.EventTime >= d.window_start AND e.EventTime < d.window_end AND e.TagName = d.TagName) SELECT window_start AS EventTime ,TagName ,Value FROM project_resample_results GROUP BY window_start ,TagName ,Value ORDER BY TagName, EventTime ' +MOCKED_QUERY= 'WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(`EventTime`, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as `EventTime`, `TagName`, `Status`, `Value` FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE `EventTime` BETWEEN to_timestamp("2011-01-01T00:00:00+00:00") AND to_timestamp("2011-01-02T23:59:59+00:00") AND `TagName` in (\'MOCKED-TAGNAME\') AND `Status` = \'Good\' ) ,date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS timestamp_array, explode(array(\'MOCKED-TAGNAME\')) AS `TagName`) ,window_buckets AS (SELECT timestamp_array AS window_start, `TagName`, LEAD(timestamp_array) OVER (ORDER BY timestamp_array) AS window_end FROM date_array) ,project_resample_results AS (SELECT d.window_start, d.window_end, d.`TagName`, avg(e.`Value`) OVER (PARTITION BY d.`TagName`, d.window_start ORDER BY e.`EventTime` ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS `Value` FROM window_buckets d INNER JOIN raw_events e ON e.`EventTime` >= d.window_start AND e.`EventTime` < d.window_end AND e.`TagName` = d.`TagName`) SELECT window_start AS `EventTime`, `TagName`, `Value` FROM project_resample_results GROUP BY window_start, `TagName`, `Value` ORDER BY `TagName`, `EventTime` ' MOCKED_PARAMETER_DICT = { "business_unit": "mocked-buiness-unit", "region": "mocked-region", diff --git a/tests/sdk/python/rtdip_sdk/queries/test_time_weighted_average.py b/tests/sdk/python/rtdip_sdk/queries/test_time_weighted_average.py index d0bd9241c..63ee02d25 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_time_weighted_average.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_time_weighted_average.py @@ -27,10 +27,10 @@ ACCESS_TOKEN = "mock_databricks_token" DATABRICKS_SQL_CONNECT = 'databricks.sql.connect' DATABRICKS_SQL_CONNECT_CURSOR = 'databricks.sql.connect.cursor' -MOCKED_QUERY= 'WITH raw_events AS (SELECT DISTINCT EventDate, TagName, from_utc_timestamp(to_timestamp(date_format(EventTime, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as EventTime, Status, Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN date_sub(to_date(to_timestamp("2011-01-01T00:00:00+00:00")), 1) AND date_add(to_date(to_timestamp("2011-01-02T23:59:59+00:00")), 1) AND TagName in (\'MOCKED-TAGNAME\') ) ,date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS EventTime, explode(array(\'MOCKED-TAGNAME\')) AS TagName) ,window_events AS (SELECT coalesce(a.TagName, b.TagName) AS TagName, coalesce(a.EventTime, b.EventTime) as EventTime, window(coalesce(a.EventTime, b.EventTime), \'15 minute\').start WindowEventTime, b.Status, b.Value FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.EventTime AS long) = CAST(b.EventTime AS long) AND a.TagName = b.TagName) ,fill_status AS (SELECT *, last_value(Status, true) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as Fill_Status, CASE WHEN Fill_Status = "Good" THEN Value ELSE null END AS Good_Value FROM window_events) ,fill_value AS (SELECT *, last_value(Good_Value, true) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Fill_Value FROM fill_status) ,twa_calculations AS (SELECT TagName, EventTime, WindowEventTime, false AS Step, Status, Value, Fill_Status, Fill_Value, lead(EventTime) OVER (PARTITION BY TagName ORDER BY EventTime) AS Next_EventTime, lead(Fill_Status) OVER (PARTITION BY TagName ORDER BY EventTime) AS Next_Status ,CASE WHEN Next_Status = "Good" OR (Fill_Status = "Good" AND Next_Status = "Bad") THEN lead(Fill_Value) OVER (PARTITION BY TagName ORDER BY EventTime) ELSE Value END AS Next_Value_For_Status ,CASE WHEN Fill_Status = "Good" THEN Next_Value_For_Status ELSE 0 END AS Next_Value ,CASE WHEN Fill_Status = "Good" and Next_Status = "Good" THEN ((cast(Next_EventTime as double) - cast(EventTime as double)) / 60) WHEN Fill_Status = "Good" and Next_Status != "Good" THEN ((cast(Next_EventTime as integer) - cast(EventTime as double)) / 60) ELSE 0 END AS good_minutes ,CASE WHEN Step == false THEN ((Fill_Value + Next_Value) * 0.5) * good_minutes ELSE (Fill_Value * good_minutes) END AS twa_value FROM fill_value) ,project_result AS (SELECT TagName, WindowEventTime AS EventTime, sum(twa_value) / sum(good_minutes) AS Value from twa_calculations GROUP BY TagName, WindowEventTime) SELECT * FROM project_result WHERE EventTime BETWEEN to_timestamp("2011-01-01T00:00:00") AND to_timestamp("2011-01-02T23:59:59") ORDER BY TagName, EventTime ' -METADATA_MOCKED_QUERY = 'WITH raw_events AS (SELECT DISTINCT EventDate, TagName, from_utc_timestamp(to_timestamp(date_format(EventTime, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as EventTime, Status, Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN date_sub(to_date(to_timestamp("2011-01-01T00:00:00+00:00")), 1) AND date_add(to_date(to_timestamp("2011-01-02T23:59:59+00:00")), 1) AND TagName in (\'MOCKED-TAGNAME\') ) ,meta_data AS (SELECT TagName, IFNULL(Step, false) AS Step FROM `downstream`.`sensors`.`pernis_restricted_metadata` ),date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS EventTime, explode(array(\'MOCKED-TAGNAME\')) AS TagName) ,window_events AS (SELECT coalesce(a.TagName, b.TagName) AS TagName, coalesce(a.EventTime, b.EventTime) as EventTime, window(coalesce(a.EventTime, b.EventTime), \'15 minute\').start WindowEventTime, b.Status, b.Value FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.EventTime AS long) = CAST(b.EventTime AS long) AND a.TagName = b.TagName) ,fill_status AS (SELECT *, last_value(Status, true) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as Fill_Status, CASE WHEN Fill_Status = "Good" THEN Value ELSE null END AS Good_Value FROM window_events) ,fill_value AS (SELECT *, last_value(Good_Value, true) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Fill_Value FROM fill_status) ,twa_calculations AS (SELECT f.TagName, f.EventTime, f.WindowEventTime, m.Step, f.Status, f.Value, f.Fill_Status, f.Fill_Value, lead(f.EventTime) OVER (PARTITION BY f.TagName ORDER BY f.EventTime) AS Next_EventTime, lead(f.Fill_Status) OVER (PARTITION BY f.TagName ORDER BY f.EventTime) AS Next_Status ,CASE WHEN Next_Status = "Good" OR (f.Fill_Status = "Good" AND Next_Status = "Bad") THEN lead(f.Fill_Value) OVER (PARTITION BY f.TagName ORDER BY f.EventTime) ELSE f.Value END AS Next_Value_For_Status ,CASE WHEN f.Fill_Status = "Good" THEN Next_Value_For_Status ELSE 0 END AS Next_Value ,CASE WHEN f.Fill_Status = "Good" and Next_Status = "Good" THEN ((cast(Next_EventTime as double) - cast(f.EventTime as double)) / 60) WHEN f.Fill_Status = "Good" and Next_Status != "Good" THEN ((cast(Next_EventTime as integer) - cast(f.EventTime as double)) / 60) ELSE 0 END AS good_minutes ,CASE WHEN m.Step == false THEN ((f.Fill_Value + Next_Value) * 0.5) * good_minutes ELSE (f.Fill_Value * good_minutes) END AS twa_value FROM fill_value f LEFT JOIN meta_data m ON f.TagName = m.TagName) ,project_result AS (SELECT TagName, WindowEventTime AS EventTime, sum(twa_value) / sum(good_minutes) AS Value from twa_calculations GROUP BY TagName, WindowEventTime) SELECT * FROM project_result WHERE EventTime BETWEEN to_timestamp("2011-01-01T00:00:00") AND to_timestamp("2011-01-02T23:59:59") ORDER BY TagName, EventTime ' +MOCKED_QUERY= 'WITH raw_events AS (SELECT DISTINCT `TagName`, from_utc_timestamp(to_timestamp(date_format(`EventTime`, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as `EventTime`, `Status`, `Value` FROM `mocked-business-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE to_date(`EventTime`) BETWEEN date_sub(to_date(to_timestamp("2011-01-01T00:00:00+00:00")), 1) AND date_add(to_date(to_timestamp("2011-01-02T23:59:59+00:00")), 1) AND `TagName` in (\'MOCKED-TAGNAME\') ) ,date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS `EventTime`, explode(array(\'MOCKED-TAGNAME\')) AS `TagName`) ,window_events AS (SELECT coalesce(a.`TagName`, b.`TagName`) AS `TagName`, coalesce(a.`EventTime`, b.`EventTime`) as `EventTime`, window(coalesce(a.`EventTime`, b.`EventTime`), \'15 minute\').start `WindowEventTime`, b.`Status`, b.`Value` FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.`EventTime` AS long) = CAST(b.`EventTime` AS long) AND a.`TagName` = b.`TagName`) ,fill_status AS (SELECT *, last_value(`Status`, true) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as `Fill_Status`, CASE WHEN `Fill_Status` = "Good" THEN `Value` ELSE null END AS `Good_Value` FROM window_events) ,fill_value AS (SELECT *, last_value(`Good_Value`, true) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `Fill_Value` FROM fill_status) ,twa_calculations AS (SELECT `TagName`, `EventTime`, `WindowEventTime`, false AS Step, `Status`, `Value`, `Fill_Status`, `Fill_Value`, lead(`EventTime`) OVER (PARTITION BY `TagName` ORDER BY `EventTime`) AS `Next_EventTime`, lead(`Fill_Status`) OVER (PARTITION BY `TagName` ORDER BY `EventTime`) AS `Next_Status` ,CASE WHEN `Next_Status` = "Good" OR (`Fill_Status` = "Good" AND `Next_Status` = "Bad") THEN lead(`Fill_Value`) OVER (PARTITION BY `TagName` ORDER BY `EventTime`) ELSE `Value` END AS `Next_Value_For_Status` ,CASE WHEN `Fill_Status` = "Good" THEN `Next_Value_For_Status` ELSE 0 END AS `Next_Value` ,CASE WHEN `Fill_Status` = "Good" and `Next_Status` = "Good" THEN ((cast(`Next_EventTime` as double) - cast(`EventTime` as double)) / 60) WHEN `Fill_Status` = "Good" and `Next_Status` != "Good" THEN ((cast(`Next_EventTime` as integer) - cast(`EventTime` as double)) / 60) ELSE 0 END AS good_minutes ,CASE WHEN Step == false THEN ((`Fill_Value` + `Next_Value`) * 0.5) * good_minutes ELSE (`Fill_Value` * good_minutes) END AS twa_value FROM fill_value) ,project_result AS (SELECT `TagName`, `WindowEventTime` AS `EventTime`, sum(twa_value) / sum(good_minutes) AS `Value` from twa_calculations GROUP BY `TagName`, `WindowEventTime`) SELECT * FROM project_result WHERE `EventTime` BETWEEN to_timestamp("2011-01-01T00:00:00") AND to_timestamp("2011-01-02T23:59:59") ORDER BY `TagName`, `EventTime` ' +METADATA_MOCKED_QUERY = 'WITH raw_events AS (SELECT DISTINCT `TagName`, from_utc_timestamp(to_timestamp(date_format(`EventTime`, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as `EventTime`, `Status`, `Value` FROM `mocked-business-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE to_date(`EventTime`) BETWEEN date_sub(to_date(to_timestamp("2011-01-01T00:00:00+00:00")), 1) AND date_add(to_date(to_timestamp("2011-01-02T23:59:59+00:00")), 1) AND `TagName` in (\'MOCKED-TAGNAME\') ) ,meta_data AS (SELECT `TagName`, IFNULL(Step, false) AS Step FROM `mocked-business-unit`.`sensors`.`mocked-asset_mocked-data-security-level_metadata` ) ,date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS `EventTime`, explode(array(\'MOCKED-TAGNAME\')) AS `TagName`) ,window_events AS (SELECT coalesce(a.`TagName`, b.`TagName`) AS `TagName`, coalesce(a.`EventTime`, b.`EventTime`) as `EventTime`, window(coalesce(a.`EventTime`, b.`EventTime`), \'15 minute\').start `WindowEventTime`, b.`Status`, b.`Value` FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.`EventTime` AS long) = CAST(b.`EventTime` AS long) AND a.`TagName` = b.`TagName`) ,fill_status AS (SELECT *, last_value(`Status`, true) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as `Fill_Status`, CASE WHEN `Fill_Status` = "Good" THEN `Value` ELSE null END AS `Good_Value` FROM window_events) ,fill_value AS (SELECT *, last_value(`Good_Value`, true) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS `Fill_Value` FROM fill_status) ,twa_calculations AS (SELECT f.`TagName`, f.`EventTime`, f.`WindowEventTime`, m.Step, f.`Status`, f.`Value`, f.`Fill_Status`, f.`Fill_Value`, lead(f.`EventTime`) OVER (PARTITION BY f.`TagName` ORDER BY f.`EventTime`) AS `Next_EventTime`, lead(f.`Fill_Status`) OVER (PARTITION BY f.`TagName` ORDER BY f.`EventTime`) AS `Next_Status` ,CASE WHEN `Next_Status` = "Good" OR (f.`Fill_Status` = "Good" AND `Next_Status` = "Bad") THEN lead(f.`Fill_Value`) OVER (PARTITION BY f.`TagName` ORDER BY f.`EventTime`) ELSE f.`Value` END AS `Next_Value_For_Status` ,CASE WHEN f.`Fill_Status` = "Good" THEN `Next_Value_For_Status` ELSE 0 END AS `Next_Value` ,CASE WHEN f.`Fill_Status` = "Good" and `Next_Status` = "Good" THEN ((cast(`Next_EventTime` as double) - cast(f.`EventTime` as double)) / 60) WHEN f.`Fill_Status` = "Good" and `Next_Status` != "Good" THEN ((cast(`Next_EventTime` as integer) - cast(f.`EventTime` as double)) / 60) ELSE 0 END AS good_minutes ,CASE WHEN m.Step == false THEN ((f.`Fill_Value` + `Next_Value`) * 0.5) * good_minutes ELSE (f.`Fill_Value` * good_minutes) END AS twa_value FROM fill_value f LEFT JOIN meta_data m ON f.`TagName` = m.`TagName`) ,project_result AS (SELECT `TagName`, `WindowEventTime` AS `EventTime`, sum(twa_value) / sum(good_minutes) AS `Value` from twa_calculations GROUP BY `TagName`, `WindowEventTime`) SELECT * FROM project_result WHERE `EventTime` BETWEEN to_timestamp("2011-01-01T00:00:00") AND to_timestamp("2011-01-02T23:59:59") ORDER BY `TagName`, `EventTime` ' MOCKED_PARAMETER_DICT = { - "business_unit": "mocked-buiness-unit", + "business_unit": "mocked-business-unit", "region": "mocked-region", "asset": "mocked-asset", "data_security_level": "mocked-data-security-level", From 9365bcc7ae62383c6b0cc4daa21dd9c154137d50 Mon Sep 17 00:00:00 2001 From: GBBBAS <42962356+GBBBAS@users.noreply.github.com> Date: Thu, 17 Aug 2023 09:25:30 +0100 Subject: [PATCH 2/8] Updates for Circular Averages Signed-off-by: GBBBAS <42962356+GBBBAS@users.noreply.github.com> --- docs/domains/smart_meter/data_model.md | 6 +-- docs/domains/smart_meter/overview.md | 2 +- docs/domains/weather/overview.md | 4 +- .../python/rtdip_sdk/queries/query_builder.py | 51 ++++++++++++++++--- .../queries/time_series/_query_builder.py | 46 ++++++++++------- .../queries/test_circular_average.py | 4 +- .../rtdip_sdk/queries/test_query_builder.py | 24 ++++++++- 7 files changed, 104 insertions(+), 33 deletions(-) diff --git a/docs/domains/smart_meter/data_model.md b/docs/domains/smart_meter/data_model.md index 35b05313e..827f0712d 100644 --- a/docs/domains/smart_meter/data_model.md +++ b/docs/domains/smart_meter/data_model.md @@ -1,7 +1,7 @@ # Meters Data Model -Base Raw To Meters Data Model: [here](/rtdip/core/sdk/code-reference/pipelines/transformers/spark/base_raw_to_mdm/) +Base Raw To Meters Data Model: [here](../../sdk/code-reference/pipelines/transformers/spark/base_raw_to_mdm.md) * ISO: - * MISO To Meters Data Model: [here](/rtdip/core/sdk/code-reference/pipelines/transformers/spark/iso/miso_to_mdm/) - * PJM To Meters Data Model: [here](/rtdip/core/sdk/code-reference/pipelines/transformers/spark/iso/pjm_to_mdm/) \ No newline at end of file + * MISO To Meters Data Model: [here](../../sdk/code-reference/pipelines/transformers/spark/iso/miso_to_mdm.md) + * PJM To Meters Data Model: [here](../../sdk/code-reference/pipelines/transformers/spark/iso/pjm_to_mdm.md) \ No newline at end of file diff --git a/docs/domains/smart_meter/overview.md b/docs/domains/smart_meter/overview.md index a8aa2ee72..de1860730 100644 --- a/docs/domains/smart_meter/overview.md +++ b/docs/domains/smart_meter/overview.md @@ -17,7 +17,7 @@ At a high level, the electricity system (US example) works as follows: An **Independent System Operator (ISO)** sometimes called the Regional Transmission Organisation (RTO) is an organisation that is in charge of the entire process. They coordinate, control, and monitor the electric grid in a specific region, typically a multi-state area. ## Meter Data Pipelines -Load forecasting is a technique used by ISO's, and energy-providing companies to predict the power/energy needed to meet the demand and supply equilibrium of the energy grid. RTDIP defines and provides example pipelines for the two primary inputs to energy services like load forecasting, namely [weather](/rtdip/core/domains/weather/overview/) and meter data. +Load forecasting is a technique used by ISO's, and energy-providing companies to predict the power/energy needed to meet the demand and supply equilibrium of the energy grid. RTDIP defines and provides example pipelines for the two primary inputs to energy services like load forecasting, namely [weather](../../domains/weather/overview.md) and meter data. Specifically, with respect to meter data RTDIP defines and provides two exemplar ISO's: diff --git a/docs/domains/weather/overview.md b/docs/domains/weather/overview.md index a90044c08..ba1a506e8 100644 --- a/docs/domains/weather/overview.md +++ b/docs/domains/weather/overview.md @@ -1,7 +1,7 @@ # Weather Services ## Overview -Many organizations need weather data for day-to-day operations. RTDIP provides the ability to consume data from examplar weather sources, transform it and store the data in an appropiate open source format to enable generic functions such as: +Many organizations need weather data for day-to-day operations. RTDIP provides the ability to consume data from examplar weather sources, transform it and store the data in an appropriate open source format to enable generic functions such as: * Data Science, ML and AI applications to consume the data * BI and Analytics @@ -13,7 +13,7 @@ A primary aim for RTDIP in 2023 is to demonstrate how the platform can be utilis * Energy Generation Forecasting * Other behind the meter services and insights -Weather data is a primary driver, together with [meter](/rtdip/core/domains/smart_meter/overview/) data, of variance in load & generation forecasting in the energy domain. +Weather data is a primary driver, together with [meter](../../domains/smart_meter/overview.md) data, of variance in load & generation forecasting in the energy domain. ## Weather Data in the Energy Domain diff --git a/src/sdk/python/rtdip_sdk/queries/query_builder.py b/src/sdk/python/rtdip_sdk/queries/query_builder.py index 1728c6044..9530511e3 100644 --- a/src/sdk/python/rtdip_sdk/queries/query_builder.py +++ b/src/sdk/python/rtdip_sdk/queries/query_builder.py @@ -14,7 +14,7 @@ from typing import Union from ..connectors.connection_interface import ConnectionInterface -from .time_series import raw, resample, interpolate, interpolation_at_time, time_weighted_average +from .time_series import raw, resample, interpolate, interpolation_at_time, time_weighted_average, circular_average, circular_standard_deviation from . import metadata class QueryBuilder(): @@ -89,8 +89,8 @@ def interpolate(self, tagname_filter: [str], start_date: str, end_date: str, tim return interpolate.get(self.connection, interpolation_parameters) - def interpolate_at_time(self, tagname_filter: [str], timestamp_filter: list[str], include_bad_data: bool = False, window_length: int = 1): - interpolate_at_time_parameters = { + def interpolation_at_time(self, tagname_filter: [str], timestamp_filter: list[str], include_bad_data: bool = False, window_length: int = 1): + interpolation_at_time_parameters = { "source": self.data_source, "tag_names": tagname_filter, "timestamps": timestamp_filter, @@ -102,7 +102,7 @@ def interpolate_at_time(self, tagname_filter: [str], timestamp_filter: list[str] "value_column": self.value_column } - return interpolation_at_time.get(self.connection, interpolate_at_time_parameters) + return interpolation_at_time.get(self.connection, interpolation_at_time_parameters) def time_weighted_average(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, step: str, source_metadata: str = None, include_bad_data: bool = False, window_length: int = 1): time_weighted_average_parameters = { @@ -125,9 +125,48 @@ def time_weighted_average(self, tagname_filter: [str], start_date: str, end_date return time_weighted_average.get(self.connection, time_weighted_average_parameters) def metadata(self, tagname_filter: [str]): - raw_parameters = { + metadata_parameters = { "source": self.data_source, "tag_names": tagname_filter, "tagname_column": self.tagname_column, } - return metadata.get(self.connection, raw_parameters) \ No newline at end of file + + return metadata.get(self.connection, metadata_parameters) + + def circular_average(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, lower_bound: int, upper_bound: int, include_bad_data: bool = False): + circular_average_parameters = { + "source": self.data_source, + "tag_names": tagname_filter, + "start_date": start_date, + "end_date": end_date, + "include_bad_data": include_bad_data, + "time_interval_rate": time_interval_rate, + "time_interval_unit": time_interval_unit, + "lower_bound": lower_bound, + "upper_bound": upper_bound, + "tagname_column": self.tagname_column, + "timestamp_column": self.timestamp_column, + "status_column": self.status_column, + "value_column": self.value_column + } + + return circular_average.get(self.connection, circular_average_parameters) + + def circular_standard_deviation(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, lower_bound: int, upper_bound: int, include_bad_data: bool = False): + circular_stddev_parameters = { + "source": self.data_source, + "tag_names": tagname_filter, + "start_date": start_date, + "end_date": end_date, + "include_bad_data": include_bad_data, + "time_interval_rate": time_interval_rate, + "time_interval_unit": time_interval_unit, + "lower_bound": lower_bound, + "upper_bound": upper_bound, + "tagname_column": self.tagname_column, + "timestamp_column": self.timestamp_column, + "status_column": self.status_column, + "value_column": self.value_column + } + + return circular_standard_deviation.get(self.connection, circular_stddev_parameters) \ No newline at end of file diff --git a/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py b/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py index 86e1cba60..badddb16f 100644 --- a/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py +++ b/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py @@ -347,35 +347,40 @@ def _time_weighted_average_query(parameters_dict: dict) -> str: def _circular_stats_query(parameters_dict: dict) -> str: circular_base_query = ( - "WITH raw_events AS (SELECT EventTime,TagName ,Status ,Value FROM " - "`{{ business_unit }}`.`sensors`.`{{ asset }}_{{ data_security_level }}_events_{{ data_type }}` " - "WHERE EventDate BETWEEN TO_DATE(to_timestamp(\"{{ start_date }}\")) AND TO_DATE(to_timestamp(\"{{ end_date }}\")) AND EventTime BETWEEN TO_TIMESTAMP(\"{{ start_date }}\") AND TO_TIMESTAMP(\"{{ end_date }}\") AND TagName IN ('{{ tag_names | join('\\', \\'') }}') " - "{% if include_bad_data is defined and include_bad_data == false %} AND Status = 'Good' {% endif %}) " - ",date_array AS (SELECT EXPLODE(SEQUENCE(FROM_UTC_TIMESTAMP(TO_TIMESTAMP(\"{{ start_date }}\"), \"{{ time_zone }}\"), FROM_UTC_TIMESTAMP(TO_TIMESTAMP(\"{{ end_date }}\"), \"{{ time_zone }}\"), INTERVAL '{{ time_interval_rate + ' ' + time_interval_unit }}')) AS EventTime, EXPLODE(ARRAY('{{ tag_names | join('\\', \\'') }}')) AS TagName) " - ",window_events AS (SELECT COALESCE(a.TagName, b.TagName) AS TagName, COALESCE(a.EventTime, b.EventTime) AS EventTime, WINDOW(COALESCE(a.EventTime, b.EventTime), '{{ time_interval_rate + ' ' + time_interval_unit }}').START WindowEventTime, b.Status, b.Value FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.EventTime AS LONG) = CAST(b.EventTime AS LONG) AND a.TagName = b.TagName) " - ",calculation_set_up AS (SELECT EventTime ,WindowEventTime ,TagName ,Value ,MOD(Value- {{ lower_bound }}, ({{ upper_bound }} - {{ lower_bound }}))*(2*pi()/({{ upper_bound }} - {{ lower_bound }})) as Value_in_Radians ,LAG(EventTime) OVER (PARTITION BY TagName ORDER BY EventTime) AS Previous_EventTime ,(unix_millis(EventTime) - unix_millis(Previous_EventTime)) / 86400000 AS Time_Difference ,COS(Value_in_Radians) as Cos_Value ,SIN(Value_in_Radians) as Sin_Value FROM window_events) " - ",circular_average_calculations AS (SELECT WindowEventTime ,TagName ,Time_Difference ,AVG(Cos_Value) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Cos ,AVG(Sin_Value) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Sin ,SQRT(POW(Average_Cos, 2) + POW(Average_Sin, 2)) AS Vector_Length ,Average_Cos/Vector_Length AS Rescaled_Average_Cos ,Average_Sin/Vector_Length AS Rescaled_Average_Sin ,Time_Difference * Rescaled_Average_Cos AS Diff_Average_Cos ,Time_Difference * Rescaled_Average_Sin AS Diff_Average_Sin FROM calculation_set_up) " + "WITH raw_events AS (SELECT `{{ timestamp_column }}`, `{{ tagname_column }}`, {% if include_status is defined and include_status == true %} `{{ status_column }}`, {% else %} 'Good' as `Status`, {% endif %} `{{ value_column }}` FROM " + "{% if source is defined and source is not none %}" + "`{{ source|lower }}` " + "{% else %}" + "`{{ business_unit|lower }}`.`sensors`.`{{ asset|lower }}_{{ data_security_level|lower }}_events_{{ data_type|lower }}` " + "{% endif %}" + "WHERE `{{ timestamp_column }}` BETWEEN TO_TIMESTAMP(\"{{ start_date }}\") AND TO_TIMESTAMP(\"{{ end_date }}\") AND `{{ tagname_column }}` IN ('{{ tag_names | join('\\', \\'') }}') " + "{% if include_status is defined and include_status == true and include_bad_data is defined and include_bad_data == false %} AND `{{ status_column }}` = 'Good' {% endif %}) " + ",date_array AS (SELECT EXPLODE(SEQUENCE(FROM_UTC_TIMESTAMP(TO_TIMESTAMP(\"{{ start_date }}\"), \"{{ time_zone }}\"), FROM_UTC_TIMESTAMP(TO_TIMESTAMP(\"{{ end_date }}\"), \"{{ time_zone }}\"), INTERVAL '{{ time_interval_rate + ' ' + time_interval_unit }}')) AS `{{ timestamp_column }}`, EXPLODE(ARRAY('{{ tag_names | join('\\', \\'') }}')) AS `{{ tagname_column }}`) " + ",window_events AS (SELECT COALESCE(a.`{{ tagname_column }}`, b.`{{ tagname_column }}`) AS `{{ tagname_column }}`, COALESCE(a.`{{ timestamp_column }}`, b.`{{ timestamp_column }}`) AS `{{ timestamp_column }}`, WINDOW(COALESCE(a.`{{ timestamp_column }}`, b.`{{ timestamp_column }}`), '{{ time_interval_rate + ' ' + time_interval_unit }}').START `Window{{ timestamp_column }}`, b.`{{ status_column }}`, b.`{{ value_column }}` FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.`{{ timestamp_column }}` AS LONG) = CAST(b.`{{ timestamp_column }}` AS LONG) AND a.`{{ tagname_column }}` = b.`{{ tagname_column }}`) " + ",calculation_set_up AS (SELECT `{{ timestamp_column }}`, `Window{{ timestamp_column }}`, `{{ tagname_column }}`, `{{ value_column }}`, MOD(`{{ value_column }}` - {{ lower_bound }}, ({{ upper_bound }} - {{ lower_bound }}))*(2*pi()/({{ upper_bound }} - {{ lower_bound }})) as `{{ value_column }}_in_Radians`, LAG(`{{ timestamp_column }}`) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}`) AS `Previous_{{ timestamp_column }}`, (unix_millis(`{{ timestamp_column }}`) - unix_millis(`Previous_{{ timestamp_column }}`)) / 86400000 AS Time_Difference, COS(`{{ value_column }}_in_Radians`) as Cos_Value, SIN(`{{ value_column }}_in_Radians`) as Sin_Value FROM window_events) " + ",circular_average_calculations AS (SELECT `Window{{ timestamp_column }}`, `{{ tagname_column }}`, Time_Difference, AVG(Cos_Value) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Cos, AVG(Sin_Value) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Sin ,SQRT(POW(Average_Cos, 2) + POW(Average_Sin, 2)) AS Vector_Length, Average_Cos/Vector_Length AS Rescaled_Average_Cos, Average_Sin/Vector_Length AS Rescaled_Average_Sin, Time_Difference * Rescaled_Average_Cos AS Diff_Average_Cos, Time_Difference * Rescaled_Average_Sin AS Diff_Average_Sin FROM calculation_set_up) " ) if parameters_dict["circular_function"] == "average": circular_stats_query = ( f"{circular_base_query} " - ",project_circular_average_results AS (SELECT WindowEventTime AS EventTime ,TagName ,sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages ,sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages ,array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R ,mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians ,(Circular_Average_Value_in_Radians * ({{ upper_bound }} - {{ lower_bound }})) / (2*pi())+ 0 AS Circular_Average_Value_in_Degrees FROM circular_average_calculations GROUP BY TagName, WindowEventTime) " - "SELECT EventTime ,TagName ,Circular_Average_Value_in_Degrees AS Value FROM project_circular_average_results ORDER BY TagName, EventTime " + ",project_circular_average_results AS (SELECT `Window{{ timestamp_column }}` AS `{{ timestamp_column }}`, `{{ tagname_column }}`, sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages, sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages, array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R, mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians, (Circular_Average_Value_in_Radians * ({{ upper_bound }} - {{ lower_bound }})) / (2*pi())+ 0 AS Circular_Average_Value_in_Degrees FROM circular_average_calculations GROUP BY `{{ tagname_column }}`, `Window{{ timestamp_column }}`) " + "SELECT `{{ timestamp_column }}`, `{{ tagname_column }}`, Circular_Average_Value_in_Degrees AS `{{ value_column }}` FROM project_circular_average_results ORDER BY `{{ tagname_column }}`, `{{ timestamp_column }}` " ) elif parameters_dict["circular_function"] == "standard_deviation": circular_stats_query = ( f"{circular_base_query} " - ",project_circular_average_results AS (SELECT WindowEventTime AS EventTime ,TagName ,sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages ,sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages ,array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R ,mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians ,SQRT(-2*LN(R)) * ( {{ upper_bound }} - {{ lower_bound }}) / (2*PI()) AS Circular_Standard_Deviation FROM circular_average_calculations GROUP BY TagName, WindowEventTime) " - "SELECT EventTime ,TagName , Circular_Standard_Deviation AS Value FROM project_circular_average_results ORDER BY TagName, EventTime " + ",project_circular_average_results AS (SELECT `Window{{ timestamp_column }}` AS `{{ timestamp_column }}`, `{{ tagname_column }}` ,sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages, sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages, array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R, mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians, SQRT(-2*LN(R)) * ( {{ upper_bound }} - {{ lower_bound }}) / (2*PI()) AS Circular_Standard_Deviation FROM circular_average_calculations GROUP BY `{{ tagname_column }}`, `Window{{ timestamp_column }}`) " + "SELECT `{{ timestamp_column }}`, `{{ tagname_column }}`, Circular_Standard_Deviation AS Value FROM project_circular_average_results ORDER BY `{{ tagname_column }}`, `{{ timestamp_column }}` " ) circular_stats_parameters = { - "business_unit": parameters_dict['business_unit'].lower(), - "region": parameters_dict['region'].lower(), - "asset": parameters_dict['asset'].lower(), - "data_security_level": parameters_dict['data_security_level'].lower(), - "data_type": parameters_dict['data_type'].lower(), + "source": parameters_dict.get("source", None), + "business_unit": parameters_dict.get("business_unit"), + "region": parameters_dict.get("region"), + "asset": parameters_dict.get("asset"), + "data_security_level": parameters_dict.get("data_security_level"), + "data_type": parameters_dict.get("data_type"), "start_date": parameters_dict['start_date'], "end_date": parameters_dict['end_date'], "tag_names": list(dict.fromkeys(parameters_dict['tag_names'])), @@ -385,7 +390,12 @@ def _circular_stats_query(parameters_dict: dict) -> str: "upper_bound": parameters_dict['upper_bound'], "include_bad_data": parameters_dict['include_bad_data'], "time_zone": parameters_dict["time_zone"], - "circular_function": parameters_dict["circular_function"] + "circular_function": parameters_dict["circular_function"], + "tagname_column": parameters_dict.get("tagname_column", "TagName"), + "timestamp_column": parameters_dict.get("timestamp_column", "EventTime"), + "include_status": False if "status_column" in parameters_dict and parameters_dict.get("status_column") is None else True, + "status_column": "Status" if "status_column" in parameters_dict and parameters_dict.get("status_column") is None else parameters_dict.get("status_column", "Status"), + "value_column": parameters_dict.get("value_column", "Value"), } sql_template = Template(circular_stats_query) diff --git a/tests/sdk/python/rtdip_sdk/queries/test_circular_average.py b/tests/sdk/python/rtdip_sdk/queries/test_circular_average.py index 49b9864a2..70c026350 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_circular_average.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_circular_average.py @@ -27,9 +27,9 @@ ACCESS_TOKEN = "mock_databricks_token" DATABRICKS_SQL_CONNECT = 'databricks.sql.connect' DATABRICKS_SQL_CONNECT_CURSOR = 'databricks.sql.connect.cursor' -MOCKED_QUERY= 'WITH raw_events AS (SELECT EventTime,TagName ,Status ,Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN TO_DATE(to_timestamp("2011-01-01T00:00:00+00:00")) AND TO_DATE(to_timestamp("2011-01-02T23:59:59+00:00")) AND EventTime BETWEEN TO_TIMESTAMP("2011-01-01T00:00:00+00:00") AND TO_TIMESTAMP("2011-01-02T23:59:59+00:00") AND TagName IN (\'MOCKED-TAGNAME\') ) ,date_array AS (SELECT EXPLODE(SEQUENCE(FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-01T00:00:00+00:00"), "+0000"), FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS EventTime, EXPLODE(ARRAY(\'MOCKED-TAGNAME\')) AS TagName) ,window_events AS (SELECT COALESCE(a.TagName, b.TagName) AS TagName, COALESCE(a.EventTime, b.EventTime) AS EventTime, WINDOW(COALESCE(a.EventTime, b.EventTime), \'15 minute\').START WindowEventTime, b.Status, b.Value FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.EventTime AS LONG) = CAST(b.EventTime AS LONG) AND a.TagName = b.TagName) ,calculation_set_up AS (SELECT EventTime ,WindowEventTime ,TagName ,Value ,MOD(Value- 0, (360 - 0))*(2*pi()/(360 - 0)) as Value_in_Radians ,LAG(EventTime) OVER (PARTITION BY TagName ORDER BY EventTime) AS Previous_EventTime ,(unix_millis(EventTime) - unix_millis(Previous_EventTime)) / 86400000 AS Time_Difference ,COS(Value_in_Radians) as Cos_Value ,SIN(Value_in_Radians) as Sin_Value FROM window_events) ,circular_average_calculations AS (SELECT WindowEventTime ,TagName ,Time_Difference ,AVG(Cos_Value) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Cos ,AVG(Sin_Value) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Sin ,SQRT(POW(Average_Cos, 2) + POW(Average_Sin, 2)) AS Vector_Length ,Average_Cos/Vector_Length AS Rescaled_Average_Cos ,Average_Sin/Vector_Length AS Rescaled_Average_Sin ,Time_Difference * Rescaled_Average_Cos AS Diff_Average_Cos ,Time_Difference * Rescaled_Average_Sin AS Diff_Average_Sin FROM calculation_set_up) ,project_circular_average_results AS (SELECT WindowEventTime AS EventTime ,TagName ,sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages ,sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages ,array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R ,mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians ,(Circular_Average_Value_in_Radians * (360 - 0)) / (2*pi())+ 0 AS Circular_Average_Value_in_Degrees FROM circular_average_calculations GROUP BY TagName, WindowEventTime) SELECT EventTime ,TagName ,Circular_Average_Value_in_Degrees AS Value FROM project_circular_average_results ORDER BY TagName, EventTime ' +MOCKED_QUERY= 'WITH raw_events AS (SELECT `EventTime`, `TagName`, `Status`, `Value` FROM `mocked-business-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE `EventTime` BETWEEN TO_TIMESTAMP("2011-01-01T00:00:00+00:00") AND TO_TIMESTAMP("2011-01-02T23:59:59+00:00") AND `TagName` IN (\'MOCKED-TAGNAME\') ) ,date_array AS (SELECT EXPLODE(SEQUENCE(FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-01T00:00:00+00:00"), "+0000"), FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS `EventTime`, EXPLODE(ARRAY(\'MOCKED-TAGNAME\')) AS `TagName`) ,window_events AS (SELECT COALESCE(a.`TagName`, b.`TagName`) AS `TagName`, COALESCE(a.`EventTime`, b.`EventTime`) AS `EventTime`, WINDOW(COALESCE(a.`EventTime`, b.`EventTime`), \'15 minute\').START `WindowEventTime`, b.`Status`, b.`Value` FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.`EventTime` AS LONG) = CAST(b.`EventTime` AS LONG) AND a.`TagName` = b.`TagName`) ,calculation_set_up AS (SELECT `EventTime`, `WindowEventTime`, `TagName`, `Value`, MOD(`Value` - 0, (360 - 0))*(2*pi()/(360 - 0)) as `Value_in_Radians`, LAG(`EventTime`) OVER (PARTITION BY `TagName` ORDER BY `EventTime`) AS `Previous_EventTime`, (unix_millis(`EventTime`) - unix_millis(`Previous_EventTime`)) / 86400000 AS Time_Difference, COS(`Value_in_Radians`) as Cos_Value, SIN(`Value_in_Radians`) as Sin_Value FROM window_events) ,circular_average_calculations AS (SELECT `WindowEventTime`, `TagName`, Time_Difference, AVG(Cos_Value) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Cos, AVG(Sin_Value) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Sin ,SQRT(POW(Average_Cos, 2) + POW(Average_Sin, 2)) AS Vector_Length, Average_Cos/Vector_Length AS Rescaled_Average_Cos, Average_Sin/Vector_Length AS Rescaled_Average_Sin, Time_Difference * Rescaled_Average_Cos AS Diff_Average_Cos, Time_Difference * Rescaled_Average_Sin AS Diff_Average_Sin FROM calculation_set_up) ,project_circular_average_results AS (SELECT `WindowEventTime` AS `EventTime`, `TagName`, sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages, sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages, array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R, mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians, (Circular_Average_Value_in_Radians * (360 - 0)) / (2*pi())+ 0 AS Circular_Average_Value_in_Degrees FROM circular_average_calculations GROUP BY `TagName`, `WindowEventTime`) SELECT `EventTime`, `TagName`, Circular_Average_Value_in_Degrees AS `Value` FROM project_circular_average_results ORDER BY `TagName`, `EventTime` ' MOCKED_PARAMETER_DICT = { - "business_unit": "mocked-buiness-unit", + "business_unit": "mocked-business-unit", "region": "mocked-region", "asset": "mocked-asset", "data_security_level": "mocked-data-security-level", diff --git a/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py b/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py index 780d65276..66231fb24 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py @@ -80,4 +80,26 @@ def test_query_builder_metadata(mocker: MockerFixture): .source("mock_catalog.mock_scema.mock_metadata_table") .metadata(tagname_filter=["mock_tag"]) ) - assert data == {"test": "data"} \ No newline at end of file + assert data == {"test": "data"} + +def test_query_builder_circular_average(mocker: MockerFixture): + mocker.patch("src.sdk.python.rtdip_sdk.queries.query_builder.circular_average.get", return_value={"test": "data"}) + + data = ( + QueryBuilder() + .connect("mock_connection") + .source("mock_catalog.mock_scema.mock_table", status_column=None) + .circular_average(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02", time_interval_rate="1", time_interval_unit="hour", lower_bound=1, upper_bound=2) + ) + assert data == {"test": "data"} + +def test_query_builder_circular_standard_deviation(mocker: MockerFixture): + mocker.patch("src.sdk.python.rtdip_sdk.queries.query_builder.circular_standard_deviation.get", return_value={"test": "data"}) + + data = ( + QueryBuilder() + .connect("mock_connection") + .source("mock_catalog.mock_scema.mock_table", status_column=None) + .circular_standard_deviation(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02", time_interval_rate="1", time_interval_unit="hour", lower_bound=1, upper_bound=2) + ) + assert data == {"test": "data"} \ No newline at end of file From 52ca2521a07f1611160c4c6c866d2753a016c01c Mon Sep 17 00:00:00 2001 From: GBBBAS <42962356+GBBBAS@users.noreply.github.com> Date: Thu, 17 Aug 2023 09:59:49 +0100 Subject: [PATCH 3/8] Add documentation Signed-off-by: GBBBAS <42962356+GBBBAS@users.noreply.github.com> --- .../sdk/code-reference/query/query_builder.md | 3 + mkdocs.yml | 1 + .../python/rtdip_sdk/queries/query_builder.py | 150 +++++++++++++++++- .../time_series/time_weighted_average.py | 2 +- 4 files changed, 147 insertions(+), 9 deletions(-) create mode 100644 docs/sdk/code-reference/query/query_builder.md diff --git a/docs/sdk/code-reference/query/query_builder.md b/docs/sdk/code-reference/query/query_builder.md new file mode 100644 index 000000000..641db15d0 --- /dev/null +++ b/docs/sdk/code-reference/query/query_builder.md @@ -0,0 +1,3 @@ +# Query Builder +::: src.sdk.python.rtdip_sdk.queries.query_builder + diff --git a/mkdocs.yml b/mkdocs.yml index e26489f74..563f25f57 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -228,6 +228,7 @@ nav: - Deploy: - Databricks: sdk/code-reference/pipelines/deploy/databricks.md - Queries: + - Query Builder: sdk/code-reference/query/query_builder.md - Functions: - Resample: sdk/code-reference/query/resample.md - Interpolate: sdk/code-reference/query/interpolate.md diff --git a/src/sdk/python/rtdip_sdk/queries/query_builder.py b/src/sdk/python/rtdip_sdk/queries/query_builder.py index 9530511e3..3d490ec39 100644 --- a/src/sdk/python/rtdip_sdk/queries/query_builder.py +++ b/src/sdk/python/rtdip_sdk/queries/query_builder.py @@ -16,8 +16,12 @@ from ..connectors.connection_interface import ConnectionInterface from .time_series import raw, resample, interpolate, interpolation_at_time, time_weighted_average, circular_average, circular_standard_deviation from . import metadata +from pandas import DataFrame class QueryBuilder(): + ''' + A builder for developing RTDIP queries using any delta table + ''' parameters: dict connection: ConnectionInterface data_source: str @@ -27,10 +31,26 @@ class QueryBuilder(): value_column: str def connect(self, connection: ConnectionInterface): + ''' + Specifies the connection to be used for the query + + Args: + connection: Connection chosen by the user (Databricks SQL Connect, PYODBC SQL Connect, TURBODBC SQL Connect) + ''' self.connection = connection return self def source(self, source: str, tagname_column: str = "TagName", timestamp_column: str = "EventTime", status_column: Union[str, None] = "Status", value_column: str = "Value"): + ''' + Specifies the source of the query + + Args: + source (str): Source of the query can be a Unity Catalog table, Hive metastore table or path + tagname_column (optional str): The column name in the source that contains the tagnames or series + timestamp_column (optional str): The timestamp column name in the source + status_column (optional str): The status column name in the source indicating `Good` or `Bad`. If this is not available, specify `None` + value_column (optional str): The value column name in the source which is normally a float or string value for the time series event + ''' self.data_source = "`.`".join(source.split(".")) self.tagname_column = tagname_column self.timestamp_column = timestamp_column @@ -38,7 +58,19 @@ def source(self, source: str, tagname_column: str = "TagName", timestamp_column: self.value_column = value_column return self - def raw(self, tagname_filter: [str], start_date: str, end_date: str, include_bad_data: bool = False): + def raw(self, tagname_filter: [str], start_date: str, end_date: str, include_bad_data: bool = False) -> DataFrame: + ''' + A function to return back raw data + + Args: + tagname_filter (list str): List of tagnames to filter on the source + start_date (str): Start date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + end_date (str): End date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + include_bad_data (optional bool): Include "Bad" data points with True or remove "Bad" data points with False + + Returns: + DataFrame: A dataframe of raw timeseries data. + ''' raw_parameters = { "source": self.data_source, "tag_names": tagname_filter, @@ -52,7 +84,23 @@ def raw(self, tagname_filter: [str], start_date: str, end_date: str, include_bad } return raw.get(self.connection, raw_parameters) - def resample(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, agg_method: str, include_bad_data: bool = False): + def resample(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, agg_method: str, include_bad_data: bool = False) -> DataFrame: + ''' + A query to resample the source data + + Args: + tagname_filter (list str): List of tagnames to filter on the source + start_date (str): Start date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + end_date (str): End date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + time_interval_rate (str): The time interval rate (numeric input) + time_interval_unit (str): The time interval unit (second, minute, day, hour) + agg_method (str): Aggregation Method (first, last, avg, min, max) + include_bad_data (optional bool): Include "Bad" data points with True or remove "Bad" data points with False + + Returns: + DataFrame: A dataframe of resampled timeseries data. + ''' + resample_parameters = { "source": self.data_source, "tag_names": tagname_filter, @@ -70,7 +118,23 @@ def resample(self, tagname_filter: [str], start_date: str, end_date: str, time_i return resample.get(self.connection, resample_parameters) - def interpolate(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, agg_method: str, interpolation_method: str, include_bad_data: bool = False): + def interpolate(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, agg_method: str, interpolation_method: str, include_bad_data: bool = False) -> DataFrame: + ''' + The Interpolate function will forward fill, backward fill or linearly interpolate the resampled data depending on the parameters specified + + Args: + tagname_filter (list str): List of tagnames to filter on the source + start_date (str): Start date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + end_date (str): End date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + time_interval_rate (str): The time interval rate (numeric input) + time_interval_unit (str): The time interval unit (second, minute, day, hour) + agg_method (str): Aggregation Method (first, last, avg, min, max) + interpolation_method (str): Interpolation method (forward_fill, backward_fill, linear) + include_bad_data (optional bool): Include "Bad" data points with True or remove "Bad" data points with False + + Returns: + DataFrame: A dataframe of interpolated timeseries data. + ''' interpolation_parameters = { "source": self.data_source, "tag_names": tagname_filter, @@ -89,7 +153,19 @@ def interpolate(self, tagname_filter: [str], start_date: str, end_date: str, tim return interpolate.get(self.connection, interpolation_parameters) - def interpolation_at_time(self, tagname_filter: [str], timestamp_filter: list[str], include_bad_data: bool = False, window_length: int = 1): + def interpolation_at_time(self, tagname_filter: [str], timestamp_filter: list[str], include_bad_data: bool = False, window_length: int = 1) -> DataFrame: + ''' + A interpolation at time function which works out the linear interpolation at a specific time based on the points before and after + + Args: + tagname_filter (list str): List of tagnames to filter on the source + timestamp_filter (list): List of timestamp or timestamps in the format YYY-MM-DDTHH:MM:SS or YYY-MM-DDTHH:MM:SS+zz:zz where %z is the timezone. (Example +00:00 is the UTC timezone) + include_bad_data (optional bool): Include "Bad" data points with True or remove "Bad" data points with False + window_length (optional int): Add longer window time in days for the start or end of specified date to cater for edge cases. + + Returns: + DataFrame: A dataframe of interpolation at time timeseries data + ''' interpolation_at_time_parameters = { "source": self.data_source, "tag_names": tagname_filter, @@ -104,7 +180,24 @@ def interpolation_at_time(self, tagname_filter: [str], timestamp_filter: list[st return interpolation_at_time.get(self.connection, interpolation_at_time_parameters) - def time_weighted_average(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, step: str, source_metadata: str = None, include_bad_data: bool = False, window_length: int = 1): + def time_weighted_average(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, step: str, source_metadata: str = None, include_bad_data: bool = False, window_length: int = 1) -> DataFrame: + ''' + A function that receives a dataframe of raw tag data and performs a time weighted averages + + Args: + tagname_filter (list str): List of tagnames to filter on the source + start_date (str): Start date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + end_date (str): End date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + time_interval_rate (str): The time interval rate (numeric input) + time_interval_unit (str): The time interval unit (second, minute, day, hour) + step (str): data points with step "enabled" or "disabled". The options for step are "true", "false" or "metadata". "metadata" will retrieve the step value from the metadata table + source_metadata (optional str): if step is set to "metadata", then this parameter must be populated with the source containing the tagname metadata with a column called "Step" + include_bad_data (optional bool): Include "Bad" data points with True or remove "Bad" data points with False + window_length (optional int): Add longer window time in days for the start or end of specified date to cater for edge cases. + + Returns: + DataFrame: A dataframe of time weighted averages timeseries data + ''' time_weighted_average_parameters = { "source": self.data_source, "tag_names": tagname_filter, @@ -124,7 +217,16 @@ def time_weighted_average(self, tagname_filter: [str], start_date: str, end_date return time_weighted_average.get(self.connection, time_weighted_average_parameters) - def metadata(self, tagname_filter: [str]): + def metadata(self, tagname_filter: [str]) -> DataFrame: + ''' + A query to retrieve metadata + + Args: + tagname_filter (list str): List of tagnames to filter on the source + + Returns: + DataFrame: A dataframe of metadata + ''' metadata_parameters = { "source": self.data_source, "tag_names": tagname_filter, @@ -133,7 +235,23 @@ def metadata(self, tagname_filter: [str]): return metadata.get(self.connection, metadata_parameters) - def circular_average(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, lower_bound: int, upper_bound: int, include_bad_data: bool = False): + def circular_average(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, lower_bound: int, upper_bound: int, include_bad_data: bool = False) -> DataFrame: + ''' + A function that receives a dataframe of raw tag data and computes the circular mean for samples in a range + + Args: + tagname_filter (list str): List of tagnames to filter on the source + start_date (str): Start date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + end_date (str): End date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + time_interval_rate (str): The time interval rate (numeric input) + time_interval_unit (str): The time interval unit (second, minute, day, hour) + lower_bound (int): Lower boundary for the sample range + upper_bound (int): Upper boundary for the sample range + include_bad_data (optional bool): Include "Bad" data points with True or remove "Bad" data points with False + + Returns: + DataFrame: A dataframe containing the circular averages + ''' circular_average_parameters = { "source": self.data_source, "tag_names": tagname_filter, @@ -152,7 +270,23 @@ def circular_average(self, tagname_filter: [str], start_date: str, end_date: str return circular_average.get(self.connection, circular_average_parameters) - def circular_standard_deviation(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, lower_bound: int, upper_bound: int, include_bad_data: bool = False): + def circular_standard_deviation(self, tagname_filter: [str], start_date: str, end_date: str, time_interval_rate: str, time_interval_unit: str, lower_bound: int, upper_bound: int, include_bad_data: bool = False) -> DataFrame: + ''' + A function that receives a dataframe of raw tag data and computes the circular standard deviation for samples assumed to be in the range + + Args: + tagname_filter (list str): List of tagnames to filter on the source + start_date (str): Start date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + end_date (str): End date (Either a date in the format YY-MM-DD or a datetime in the format YYY-MM-DDTHH:MM:SS or specify the timezone offset in the format YYYY-MM-DDTHH:MM:SS+zz:zz) + time_interval_rate (str): The time interval rate (numeric input) + time_interval_unit (str): The time interval unit (second, minute, day, hour) + lower_bound (int): Lower boundary for the sample range + upper_bound (int): Upper boundary for the sample range + include_bad_data (optional bool): Include "Bad" data points with True or remove "Bad" data points with False + + Returns: + DataFrame: A dataframe containing the circular standard deviations + ''' circular_stddev_parameters = { "source": self.data_source, "tag_names": tagname_filter, diff --git a/src/sdk/python/rtdip_sdk/queries/time_series/time_weighted_average.py b/src/sdk/python/rtdip_sdk/queries/time_series/time_weighted_average.py index 53318932e..53ce35bb7 100644 --- a/src/sdk/python/rtdip_sdk/queries/time_series/time_weighted_average.py +++ b/src/sdk/python/rtdip_sdk/queries/time_series/time_weighted_average.py @@ -17,7 +17,7 @@ def get(connection: object, parameters_dict: dict) -> pd.DataFrame: ''' - A function that receives a dataframe of raw tag data and performs a timeweighted average, returning the results. + A function that receives a dataframe of raw tag data and performs a time weighted averages, returning the results. This function requires the input of a pandas dataframe acquired via the rtdip.functions.raw() method and the user to input a dictionary of parameters. (See Attributes table below) From 88728c8e42084a5e5a58e04f6e4ae58699cb43f2 Mon Sep 17 00:00:00 2001 From: GBBBAS <42962356+GBBBAS@users.noreply.github.com> Date: Thu, 17 Aug 2023 10:17:02 +0100 Subject: [PATCH 4/8] Test Fixes Signed-off-by: GBBBAS <42962356+GBBBAS@users.noreply.github.com> --- src/sdk/python/rtdip_sdk/queries/__init__.py | 2 +- .../python/rtdip_sdk/queries/time_series/_query_builder.py | 6 +++--- tests/sdk/python/rtdip_sdk/queries/test_circular_average.py | 2 +- .../rtdip_sdk/queries/test_circular_standard_deviation.py | 2 +- tests/sdk/python/rtdip_sdk/queries/test_query_builder.py | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/sdk/python/rtdip_sdk/queries/__init__.py b/src/sdk/python/rtdip_sdk/queries/__init__.py index ff06b458b..ff3b5b7a0 100644 --- a/src/sdk/python/rtdip_sdk/queries/__init__.py +++ b/src/sdk/python/rtdip_sdk/queries/__init__.py @@ -12,5 +12,5 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .time_series import raw, resample, interpolate, interpolation_at_time, time_weighted_average +from .time_series import raw, resample, interpolate, interpolation_at_time, time_weighted_average, circular_standard_deviation, circular_average from .query_builder import * \ No newline at end of file diff --git a/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py b/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py index badddb16f..d987bcbe1 100644 --- a/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py +++ b/src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py @@ -358,7 +358,7 @@ def _circular_stats_query(parameters_dict: dict) -> str: ",date_array AS (SELECT EXPLODE(SEQUENCE(FROM_UTC_TIMESTAMP(TO_TIMESTAMP(\"{{ start_date }}\"), \"{{ time_zone }}\"), FROM_UTC_TIMESTAMP(TO_TIMESTAMP(\"{{ end_date }}\"), \"{{ time_zone }}\"), INTERVAL '{{ time_interval_rate + ' ' + time_interval_unit }}')) AS `{{ timestamp_column }}`, EXPLODE(ARRAY('{{ tag_names | join('\\', \\'') }}')) AS `{{ tagname_column }}`) " ",window_events AS (SELECT COALESCE(a.`{{ tagname_column }}`, b.`{{ tagname_column }}`) AS `{{ tagname_column }}`, COALESCE(a.`{{ timestamp_column }}`, b.`{{ timestamp_column }}`) AS `{{ timestamp_column }}`, WINDOW(COALESCE(a.`{{ timestamp_column }}`, b.`{{ timestamp_column }}`), '{{ time_interval_rate + ' ' + time_interval_unit }}').START `Window{{ timestamp_column }}`, b.`{{ status_column }}`, b.`{{ value_column }}` FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.`{{ timestamp_column }}` AS LONG) = CAST(b.`{{ timestamp_column }}` AS LONG) AND a.`{{ tagname_column }}` = b.`{{ tagname_column }}`) " ",calculation_set_up AS (SELECT `{{ timestamp_column }}`, `Window{{ timestamp_column }}`, `{{ tagname_column }}`, `{{ value_column }}`, MOD(`{{ value_column }}` - {{ lower_bound }}, ({{ upper_bound }} - {{ lower_bound }}))*(2*pi()/({{ upper_bound }} - {{ lower_bound }})) as `{{ value_column }}_in_Radians`, LAG(`{{ timestamp_column }}`) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}`) AS `Previous_{{ timestamp_column }}`, (unix_millis(`{{ timestamp_column }}`) - unix_millis(`Previous_{{ timestamp_column }}`)) / 86400000 AS Time_Difference, COS(`{{ value_column }}_in_Radians`) as Cos_Value, SIN(`{{ value_column }}_in_Radians`) as Sin_Value FROM window_events) " - ",circular_average_calculations AS (SELECT `Window{{ timestamp_column }}`, `{{ tagname_column }}`, Time_Difference, AVG(Cos_Value) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Cos, AVG(Sin_Value) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Sin ,SQRT(POW(Average_Cos, 2) + POW(Average_Sin, 2)) AS Vector_Length, Average_Cos/Vector_Length AS Rescaled_Average_Cos, Average_Sin/Vector_Length AS Rescaled_Average_Sin, Time_Difference * Rescaled_Average_Cos AS Diff_Average_Cos, Time_Difference * Rescaled_Average_Sin AS Diff_Average_Sin FROM calculation_set_up) " + ",circular_average_calculations AS (SELECT `Window{{ timestamp_column }}`, `{{ tagname_column }}`, Time_Difference, AVG(Cos_Value) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Cos, AVG(Sin_Value) OVER (PARTITION BY `{{ tagname_column }}` ORDER BY `{{ timestamp_column }}` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Sin, SQRT(POW(Average_Cos, 2) + POW(Average_Sin, 2)) AS Vector_Length, Average_Cos/Vector_Length AS Rescaled_Average_Cos, Average_Sin/Vector_Length AS Rescaled_Average_Sin, Time_Difference * Rescaled_Average_Cos AS Diff_Average_Cos, Time_Difference * Rescaled_Average_Sin AS Diff_Average_Sin FROM calculation_set_up) " ) if parameters_dict["circular_function"] == "average": @@ -370,8 +370,8 @@ def _circular_stats_query(parameters_dict: dict) -> str: elif parameters_dict["circular_function"] == "standard_deviation": circular_stats_query = ( f"{circular_base_query} " - ",project_circular_average_results AS (SELECT `Window{{ timestamp_column }}` AS `{{ timestamp_column }}`, `{{ tagname_column }}` ,sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages, sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages, array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R, mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians, SQRT(-2*LN(R)) * ( {{ upper_bound }} - {{ lower_bound }}) / (2*PI()) AS Circular_Standard_Deviation FROM circular_average_calculations GROUP BY `{{ tagname_column }}`, `Window{{ timestamp_column }}`) " - "SELECT `{{ timestamp_column }}`, `{{ tagname_column }}`, Circular_Standard_Deviation AS Value FROM project_circular_average_results ORDER BY `{{ tagname_column }}`, `{{ timestamp_column }}` " + ",project_circular_average_results AS (SELECT `Window{{ timestamp_column }}` AS `{{ timestamp_column }}`, `{{ tagname_column }}`, sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages, sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages, array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R, mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians, SQRT(-2*LN(R)) * ( {{ upper_bound }} - {{ lower_bound }}) / (2*PI()) AS Circular_Standard_Deviation FROM circular_average_calculations GROUP BY `{{ tagname_column }}`, `Window{{ timestamp_column }}`) " + "SELECT `{{ timestamp_column }}`, `{{ tagname_column }}`, Circular_Standard_Deviation AS `Value` FROM project_circular_average_results ORDER BY `{{ tagname_column }}`, `{{ timestamp_column }}` " ) circular_stats_parameters = { diff --git a/tests/sdk/python/rtdip_sdk/queries/test_circular_average.py b/tests/sdk/python/rtdip_sdk/queries/test_circular_average.py index 70c026350..da14fd123 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_circular_average.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_circular_average.py @@ -27,7 +27,7 @@ ACCESS_TOKEN = "mock_databricks_token" DATABRICKS_SQL_CONNECT = 'databricks.sql.connect' DATABRICKS_SQL_CONNECT_CURSOR = 'databricks.sql.connect.cursor' -MOCKED_QUERY= 'WITH raw_events AS (SELECT `EventTime`, `TagName`, `Status`, `Value` FROM `mocked-business-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE `EventTime` BETWEEN TO_TIMESTAMP("2011-01-01T00:00:00+00:00") AND TO_TIMESTAMP("2011-01-02T23:59:59+00:00") AND `TagName` IN (\'MOCKED-TAGNAME\') ) ,date_array AS (SELECT EXPLODE(SEQUENCE(FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-01T00:00:00+00:00"), "+0000"), FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS `EventTime`, EXPLODE(ARRAY(\'MOCKED-TAGNAME\')) AS `TagName`) ,window_events AS (SELECT COALESCE(a.`TagName`, b.`TagName`) AS `TagName`, COALESCE(a.`EventTime`, b.`EventTime`) AS `EventTime`, WINDOW(COALESCE(a.`EventTime`, b.`EventTime`), \'15 minute\').START `WindowEventTime`, b.`Status`, b.`Value` FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.`EventTime` AS LONG) = CAST(b.`EventTime` AS LONG) AND a.`TagName` = b.`TagName`) ,calculation_set_up AS (SELECT `EventTime`, `WindowEventTime`, `TagName`, `Value`, MOD(`Value` - 0, (360 - 0))*(2*pi()/(360 - 0)) as `Value_in_Radians`, LAG(`EventTime`) OVER (PARTITION BY `TagName` ORDER BY `EventTime`) AS `Previous_EventTime`, (unix_millis(`EventTime`) - unix_millis(`Previous_EventTime`)) / 86400000 AS Time_Difference, COS(`Value_in_Radians`) as Cos_Value, SIN(`Value_in_Radians`) as Sin_Value FROM window_events) ,circular_average_calculations AS (SELECT `WindowEventTime`, `TagName`, Time_Difference, AVG(Cos_Value) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Cos, AVG(Sin_Value) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Sin ,SQRT(POW(Average_Cos, 2) + POW(Average_Sin, 2)) AS Vector_Length, Average_Cos/Vector_Length AS Rescaled_Average_Cos, Average_Sin/Vector_Length AS Rescaled_Average_Sin, Time_Difference * Rescaled_Average_Cos AS Diff_Average_Cos, Time_Difference * Rescaled_Average_Sin AS Diff_Average_Sin FROM calculation_set_up) ,project_circular_average_results AS (SELECT `WindowEventTime` AS `EventTime`, `TagName`, sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages, sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages, array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R, mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians, (Circular_Average_Value_in_Radians * (360 - 0)) / (2*pi())+ 0 AS Circular_Average_Value_in_Degrees FROM circular_average_calculations GROUP BY `TagName`, `WindowEventTime`) SELECT `EventTime`, `TagName`, Circular_Average_Value_in_Degrees AS `Value` FROM project_circular_average_results ORDER BY `TagName`, `EventTime` ' +MOCKED_QUERY= 'WITH raw_events AS (SELECT `EventTime`, `TagName`, `Status`, `Value` FROM `mocked-business-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE `EventTime` BETWEEN TO_TIMESTAMP("2011-01-01T00:00:00+00:00") AND TO_TIMESTAMP("2011-01-02T23:59:59+00:00") AND `TagName` IN (\'MOCKED-TAGNAME\') ) ,date_array AS (SELECT EXPLODE(SEQUENCE(FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-01T00:00:00+00:00"), "+0000"), FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS `EventTime`, EXPLODE(ARRAY(\'MOCKED-TAGNAME\')) AS `TagName`) ,window_events AS (SELECT COALESCE(a.`TagName`, b.`TagName`) AS `TagName`, COALESCE(a.`EventTime`, b.`EventTime`) AS `EventTime`, WINDOW(COALESCE(a.`EventTime`, b.`EventTime`), \'15 minute\').START `WindowEventTime`, b.`Status`, b.`Value` FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.`EventTime` AS LONG) = CAST(b.`EventTime` AS LONG) AND a.`TagName` = b.`TagName`) ,calculation_set_up AS (SELECT `EventTime`, `WindowEventTime`, `TagName`, `Value`, MOD(`Value` - 0, (360 - 0))*(2*pi()/(360 - 0)) as `Value_in_Radians`, LAG(`EventTime`) OVER (PARTITION BY `TagName` ORDER BY `EventTime`) AS `Previous_EventTime`, (unix_millis(`EventTime`) - unix_millis(`Previous_EventTime`)) / 86400000 AS Time_Difference, COS(`Value_in_Radians`) as Cos_Value, SIN(`Value_in_Radians`) as Sin_Value FROM window_events) ,circular_average_calculations AS (SELECT `WindowEventTime`, `TagName`, Time_Difference, AVG(Cos_Value) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Cos, AVG(Sin_Value) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Sin, SQRT(POW(Average_Cos, 2) + POW(Average_Sin, 2)) AS Vector_Length, Average_Cos/Vector_Length AS Rescaled_Average_Cos, Average_Sin/Vector_Length AS Rescaled_Average_Sin, Time_Difference * Rescaled_Average_Cos AS Diff_Average_Cos, Time_Difference * Rescaled_Average_Sin AS Diff_Average_Sin FROM calculation_set_up) ,project_circular_average_results AS (SELECT `WindowEventTime` AS `EventTime`, `TagName`, sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages, sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages, array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R, mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians, (Circular_Average_Value_in_Radians * (360 - 0)) / (2*pi())+ 0 AS Circular_Average_Value_in_Degrees FROM circular_average_calculations GROUP BY `TagName`, `WindowEventTime`) SELECT `EventTime`, `TagName`, Circular_Average_Value_in_Degrees AS `Value` FROM project_circular_average_results ORDER BY `TagName`, `EventTime` ' MOCKED_PARAMETER_DICT = { "business_unit": "mocked-business-unit", "region": "mocked-region", diff --git a/tests/sdk/python/rtdip_sdk/queries/test_circular_standard_deviation.py b/tests/sdk/python/rtdip_sdk/queries/test_circular_standard_deviation.py index c0743c56f..8fdb2ac0c 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_circular_standard_deviation.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_circular_standard_deviation.py @@ -27,7 +27,7 @@ ACCESS_TOKEN = "mock_databricks_token" DATABRICKS_SQL_CONNECT = 'databricks.sql.connect' DATABRICKS_SQL_CONNECT_CURSOR = 'databricks.sql.connect.cursor' -MOCKED_QUERY= 'WITH raw_events AS (SELECT EventTime,TagName ,Status ,Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN TO_DATE(to_timestamp("2011-01-01T00:00:00+00:00")) AND TO_DATE(to_timestamp("2011-01-02T23:59:59+00:00")) AND EventTime BETWEEN TO_TIMESTAMP("2011-01-01T00:00:00+00:00") AND TO_TIMESTAMP("2011-01-02T23:59:59+00:00") AND TagName IN (\'MOCKED-TAGNAME\') ) ,date_array AS (SELECT EXPLODE(SEQUENCE(FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-01T00:00:00+00:00"), "+0000"), FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS EventTime, EXPLODE(ARRAY(\'MOCKED-TAGNAME\')) AS TagName) ,window_events AS (SELECT COALESCE(a.TagName, b.TagName) AS TagName, COALESCE(a.EventTime, b.EventTime) AS EventTime, WINDOW(COALESCE(a.EventTime, b.EventTime), \'15 minute\').START WindowEventTime, b.Status, b.Value FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.EventTime AS LONG) = CAST(b.EventTime AS LONG) AND a.TagName = b.TagName) ,calculation_set_up AS (SELECT EventTime ,WindowEventTime ,TagName ,Value ,MOD(Value- 0, (360 - 0))*(2*pi()/(360 - 0)) as Value_in_Radians ,LAG(EventTime) OVER (PARTITION BY TagName ORDER BY EventTime) AS Previous_EventTime ,(unix_millis(EventTime) - unix_millis(Previous_EventTime)) / 86400000 AS Time_Difference ,COS(Value_in_Radians) as Cos_Value ,SIN(Value_in_Radians) as Sin_Value FROM window_events) ,circular_average_calculations AS (SELECT WindowEventTime ,TagName ,Time_Difference ,AVG(Cos_Value) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Cos ,AVG(Sin_Value) OVER (PARTITION BY TagName ORDER BY EventTime ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Sin ,SQRT(POW(Average_Cos, 2) + POW(Average_Sin, 2)) AS Vector_Length ,Average_Cos/Vector_Length AS Rescaled_Average_Cos ,Average_Sin/Vector_Length AS Rescaled_Average_Sin ,Time_Difference * Rescaled_Average_Cos AS Diff_Average_Cos ,Time_Difference * Rescaled_Average_Sin AS Diff_Average_Sin FROM calculation_set_up) ,project_circular_average_results AS (SELECT WindowEventTime AS EventTime ,TagName ,sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages ,sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages ,array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R ,mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians ,SQRT(-2*LN(R)) * ( 360 - 0) / (2*PI()) AS Circular_Standard_Deviation FROM circular_average_calculations GROUP BY TagName, WindowEventTime) SELECT EventTime ,TagName , Circular_Standard_Deviation AS Value FROM project_circular_average_results ORDER BY TagName, EventTime ' +MOCKED_QUERY= 'WITH raw_events AS (SELECT `EventTime`, `TagName`, `Status`, `Value` FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE `EventTime` BETWEEN TO_TIMESTAMP("2011-01-01T00:00:00+00:00") AND TO_TIMESTAMP("2011-01-02T23:59:59+00:00") AND `TagName` IN (\'MOCKED-TAGNAME\') ) ,date_array AS (SELECT EXPLODE(SEQUENCE(FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-01T00:00:00+00:00"), "+0000"), FROM_UTC_TIMESTAMP(TO_TIMESTAMP("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS `EventTime`, EXPLODE(ARRAY(\'MOCKED-TAGNAME\')) AS `TagName`) ,window_events AS (SELECT COALESCE(a.`TagName`, b.`TagName`) AS `TagName`, COALESCE(a.`EventTime`, b.`EventTime`) AS `EventTime`, WINDOW(COALESCE(a.`EventTime`, b.`EventTime`), \'15 minute\').START `WindowEventTime`, b.`Status`, b.`Value` FROM date_array a FULL OUTER JOIN raw_events b ON CAST(a.`EventTime` AS LONG) = CAST(b.`EventTime` AS LONG) AND a.`TagName` = b.`TagName`) ,calculation_set_up AS (SELECT `EventTime`, `WindowEventTime`, `TagName`, `Value`, MOD(`Value` - 0, (360 - 0))*(2*pi()/(360 - 0)) as `Value_in_Radians`, LAG(`EventTime`) OVER (PARTITION BY `TagName` ORDER BY `EventTime`) AS `Previous_EventTime`, (unix_millis(`EventTime`) - unix_millis(`Previous_EventTime`)) / 86400000 AS Time_Difference, COS(`Value_in_Radians`) as Cos_Value, SIN(`Value_in_Radians`) as Sin_Value FROM window_events) ,circular_average_calculations AS (SELECT `WindowEventTime`, `TagName`, Time_Difference, AVG(Cos_Value) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Cos, AVG(Sin_Value) OVER (PARTITION BY `TagName` ORDER BY `EventTime` ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS Average_Sin, SQRT(POW(Average_Cos, 2) + POW(Average_Sin, 2)) AS Vector_Length, Average_Cos/Vector_Length AS Rescaled_Average_Cos, Average_Sin/Vector_Length AS Rescaled_Average_Sin, Time_Difference * Rescaled_Average_Cos AS Diff_Average_Cos, Time_Difference * Rescaled_Average_Sin AS Diff_Average_Sin FROM calculation_set_up) ,project_circular_average_results AS (SELECT `WindowEventTime` AS `EventTime`, `TagName`, sum(Diff_Average_Cos)/sum(Time_Difference) AS Cos_Time_Averages, sum(Diff_Average_Sin)/sum(Time_Difference) AS Sin_Time_Averages, array_min(array(1, sqrt(pow(Cos_Time_Averages, 2) + pow(Sin_Time_Averages, 2)))) AS R, mod(2*pi() + atan2(Sin_Time_Averages, Cos_Time_Averages), 2*pi()) AS Circular_Average_Value_in_Radians, SQRT(-2*LN(R)) * ( 360 - 0) / (2*PI()) AS Circular_Standard_Deviation FROM circular_average_calculations GROUP BY `TagName`, `WindowEventTime`) SELECT `EventTime`, `TagName`, Circular_Standard_Deviation AS `Value` FROM project_circular_average_results ORDER BY `TagName`, `EventTime` ' MOCKED_PARAMETER_DICT = { "business_unit": "mocked-buiness-unit", "region": "mocked-region", diff --git a/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py b/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py index 66231fb24..68fe167c0 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py @@ -56,7 +56,7 @@ def test_query_builder_interpolation_at_time(mocker: MockerFixture): QueryBuilder() .connect("mock_connection") .source("mock_catalog.mock_scema.mock_table", status_column=None) - .interpolate_at_time(tagname_filter=["mock_tag"], timestamp_filter=["2021-01-02T17:30:00+00:00", "2021-01-02T18:30:00+00:00"]) + .interpolation_at_time(tagname_filter=["mock_tag"], timestamp_filter=["2021-01-02T17:30:00+00:00", "2021-01-02T18:30:00+00:00"]) ) assert data == {"test": "data"} From b24f17baf6c5b9882e6ad7d4f5487c59eb44ede5 Mon Sep 17 00:00:00 2001 From: GBBBAS <42962356+GBBBAS@users.noreply.github.com> Date: Thu, 17 Aug 2023 10:23:10 +0100 Subject: [PATCH 5/8] Fix for types Signed-off-by: GBBBAS <42962356+GBBBAS@users.noreply.github.com> --- src/sdk/python/rtdip_sdk/queries/query_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sdk/python/rtdip_sdk/queries/query_builder.py b/src/sdk/python/rtdip_sdk/queries/query_builder.py index 3d490ec39..5699c1893 100644 --- a/src/sdk/python/rtdip_sdk/queries/query_builder.py +++ b/src/sdk/python/rtdip_sdk/queries/query_builder.py @@ -153,7 +153,7 @@ def interpolate(self, tagname_filter: [str], start_date: str, end_date: str, tim return interpolate.get(self.connection, interpolation_parameters) - def interpolation_at_time(self, tagname_filter: [str], timestamp_filter: list[str], include_bad_data: bool = False, window_length: int = 1) -> DataFrame: + def interpolation_at_time(self, tagname_filter: [str], timestamp_filter: [str], include_bad_data: bool = False, window_length: int = 1) -> DataFrame: ''' A interpolation at time function which works out the linear interpolation at a specific time based on the points before and after From c8f78c30bbdf8c9bf26aee10946e211acb9a0257 Mon Sep 17 00:00:00 2001 From: GBBBAS <42962356+GBBBAS@users.noreply.github.com> Date: Thu, 17 Aug 2023 11:14:41 +0100 Subject: [PATCH 6/8] Remove mkdocs test Signed-off-by: GBBBAS <42962356+GBBBAS@users.noreply.github.com> --- tests/docs/test_mkdocs_build.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/docs/test_mkdocs_build.py b/tests/docs/test_mkdocs_build.py index c18b03a4b..9f72793f5 100644 --- a/tests/docs/test_mkdocs_build.py +++ b/tests/docs/test_mkdocs_build.py @@ -12,16 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys -sys.path.insert(0, '.') +# import sys +# sys.path.insert(0, '.') -from mkdocs.config import load_config -from mkdocs.commands.build import build +# from mkdocs.config import load_config +# from mkdocs.commands.build import build -def test_mkdocs_build(): - mkdocs_config = load_config(strict=True) - mkdocs_config["plugins"].run_event("startup", command="build", dirty=False) - try: - build(mkdocs_config) - finally: - mkdocs_config["plugins"].run_event("shutdown") \ No newline at end of file +# def test_mkdocs_build(): +# mkdocs_config = load_config(strict=True) +# mkdocs_config["plugins"].run_event("startup", command="build", dirty=False) +# try: +# build(mkdocs_config) +# finally: +# mkdocs_config["plugins"].run_event("shutdown") \ No newline at end of file From 648b7df53277b9e81fdd8a2b3dc13bc127ce4674 Mon Sep 17 00:00:00 2001 From: GBBBAS <42962356+GBBBAS@users.noreply.github.com> Date: Thu, 17 Aug 2023 11:26:58 +0100 Subject: [PATCH 7/8] Remove mkdocs build test Signed-off-by: GBBBAS <42962356+GBBBAS@users.noreply.github.com> --- tests/docs/test_mkdocs_build.py | 27 --------------------------- 1 file changed, 27 deletions(-) delete mode 100644 tests/docs/test_mkdocs_build.py diff --git a/tests/docs/test_mkdocs_build.py b/tests/docs/test_mkdocs_build.py deleted file mode 100644 index 9f72793f5..000000000 --- a/tests/docs/test_mkdocs_build.py +++ /dev/null @@ -1,27 +0,0 @@ -# Copyright 2022 RTDIP -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# import sys -# sys.path.insert(0, '.') - -# from mkdocs.config import load_config -# from mkdocs.commands.build import build - -# def test_mkdocs_build(): -# mkdocs_config = load_config(strict=True) -# mkdocs_config["plugins"].run_event("startup", command="build", dirty=False) -# try: -# build(mkdocs_config) -# finally: -# mkdocs_config["plugins"].run_event("shutdown") \ No newline at end of file From c21f5d4a3bd0290c208b1f692c4f9ff35b58bb1e Mon Sep 17 00:00:00 2001 From: GBBBAS <42962356+GBBBAS@users.noreply.github.com> Date: Thu, 17 Aug 2023 11:38:15 +0100 Subject: [PATCH 8/8] Fix Code Smell Signed-off-by: GBBBAS <42962356+GBBBAS@users.noreply.github.com> --- .../rtdip_sdk/queries/test_query_builder.py | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py b/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py index 68fe167c0..677e89ba7 100644 --- a/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py +++ b/tests/sdk/python/rtdip_sdk/queries/test_query_builder.py @@ -16,13 +16,15 @@ from src.sdk.python.rtdip_sdk.authentication.azure import DefaultAuth from pytest_mock import MockerFixture +MOCK_TABLE = "mock_catalog.mock_scema.mock_table" +MOCK_CONNECTION = "mock_connection" def test_query_builder_raw(mocker: MockerFixture): mocker.patch("src.sdk.python.rtdip_sdk.queries.query_builder.raw.get", return_value={"test": "data"}) data = ( QueryBuilder() - .connect("mock_connection") - .source("mock_catalog.mock_scema.mock_table", status_column=None) + .connect(MOCK_CONNECTION) + .source(MOCK_TABLE, status_column=None) .raw(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02") ) assert data == {"test": "data"} @@ -32,8 +34,8 @@ def test_query_builder_resample(mocker: MockerFixture): data = ( QueryBuilder() - .connect("mock_connection") - .source("mock_catalog.mock_scema.mock_table") + .connect(MOCK_CONNECTION) + .source(MOCK_TABLE) .resample(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02", time_interval_rate="1", time_interval_unit="hour", agg_method="avg") ) assert data == {"test": "data"} @@ -43,7 +45,7 @@ def test_query_builder_interpolate(mocker: MockerFixture): data = ( QueryBuilder() - .connect("mock_connection") + .connect(MOCK_CONNECTION) .source("mock_catalog.mock_scema.mock_table", status_column=None) .interpolate(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02", time_interval_rate="1", time_interval_unit="hour", agg_method="avg", interpolation_method="linear") ) @@ -54,8 +56,8 @@ def test_query_builder_interpolation_at_time(mocker: MockerFixture): data = ( QueryBuilder() - .connect("mock_connection") - .source("mock_catalog.mock_scema.mock_table", status_column=None) + .connect(MOCK_CONNECTION) + .source(MOCK_TABLE, status_column=None) .interpolation_at_time(tagname_filter=["mock_tag"], timestamp_filter=["2021-01-02T17:30:00+00:00", "2021-01-02T18:30:00+00:00"]) ) assert data == {"test": "data"} @@ -65,8 +67,8 @@ def test_query_builder_twa(mocker: MockerFixture): data = ( QueryBuilder() - .connect("mock_connection") - .source("mock_catalog.mock_scema.mock_table", status_column=None) + .connect(MOCK_CONNECTION) + .source(MOCK_TABLE, status_column=None) .time_weighted_average(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02", time_interval_rate="1", time_interval_unit="hour", step="metadata", source_metadata="mock_catalog.mock_schema.mock_table_metadata") ) assert data == {"test": "data"} @@ -76,8 +78,8 @@ def test_query_builder_metadata(mocker: MockerFixture): data = ( QueryBuilder() - .connect("mock_connection") - .source("mock_catalog.mock_scema.mock_metadata_table") + .connect(MOCK_CONNECTION) + .source(MOCK_TABLE) .metadata(tagname_filter=["mock_tag"]) ) assert data == {"test": "data"} @@ -87,8 +89,8 @@ def test_query_builder_circular_average(mocker: MockerFixture): data = ( QueryBuilder() - .connect("mock_connection") - .source("mock_catalog.mock_scema.mock_table", status_column=None) + .connect(MOCK_CONNECTION) + .source(MOCK_TABLE, status_column=None) .circular_average(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02", time_interval_rate="1", time_interval_unit="hour", lower_bound=1, upper_bound=2) ) assert data == {"test": "data"} @@ -98,8 +100,8 @@ def test_query_builder_circular_standard_deviation(mocker: MockerFixture): data = ( QueryBuilder() - .connect("mock_connection") - .source("mock_catalog.mock_scema.mock_table", status_column=None) + .connect(MOCK_CONNECTION) + .source(MOCK_TABLE, status_column=None) .circular_standard_deviation(tagname_filter=["mock_tag"], start_date="2021-01-01", end_date="2021-01-02", time_interval_rate="1", time_interval_unit="hour", lower_bound=1, upper_bound=2) ) assert data == {"test": "data"} \ No newline at end of file