Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/sdk/code-reference/query/interpolate.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ parameters = {
"time_interval_rate": "15", #numeric input
"time_interval_unit": "minute", #options: ["second", "minute", "day", "hour"]
"agg_method": "first", #options: ["first", "last", "avg", "min", "max"]
"interpolation_method": "forward_fill", #options: ["forward_fill", "backward_fill"]
"interpolation_method": "forward_fill", #options: ["forward_fill", "backward_fill", "linear"]
"include_bad_data": True, #options: [True, False]
}
x = interpolate.get(connection, parameters)
Expand Down
4 changes: 2 additions & 2 deletions docs/sdk/queries/databricks/databricks-sql.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ parameters = {
"start_date": "2022-03-08", #start_date can be a date in the format "YYYY-MM-DD" or a datetime in the format "YYYY-MM-DDTHH:MM:SS"
"end_date": "2022-03-10", #end_date can be a date in the format "YYYY-MM-DD" or a datetime in the format "YYYY-MM-DDTHH:MM:SS"
"time_interval_rate": "1", #numeric input
"time_interval_unit": "hour", #options are second, minute, day, hour
"time_interval_unit": "hour", #options are second, minute, day or hour
"agg_method": "first", #options are first, last, avg, min, max
"interpolation_method": "forward_fill", #options are forward_fill or backward_fill
"interpolation_method": "forward_fill", #options are forward_fill, backward_fill or linear
"include_bad_data": True #boolean options are True or False
}

Expand Down
2 changes: 1 addition & 1 deletion src/api/v1/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def __init__(
class InterpolateQueryParams:
def __init__(
self,
interpolation_method: str = Query(..., description="Interpolation Method can be forward_fill or backward_fill", examples=["forward_fill", "backward_fill"]),
interpolation_method: str = Query(..., description="Interpolation Method can e one of the following [forward_fill, backward_fill, linear]", examples=["forward_fill", "backward_fill", "linear"]),
):
self.interpolation_method = interpolation_method

Expand Down
31 changes: 21 additions & 10 deletions src/sdk/python/rtdip_sdk/queries/time_series/_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def _sample_query(parameters_dict: dict) -> tuple:
"{% if include_bad_data is defined and include_bad_data == false %} AND Status = 'Good' {% endif %}) "
",date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp(\"{{ start_date }}\"), \"{{ time_zone }}\"), from_utc_timestamp(to_timestamp(\"{{ end_date }}\"), \"{{ time_zone }}\"), INTERVAL '{{ time_interval_rate + ' ' + time_interval_unit }}')) AS timestamp_array, explode(array('{{ tag_names | join('\\', \\'') }}')) AS TagName) "
",window_buckets AS (SELECT timestamp_array AS window_start ,TagName ,LEAD(timestamp_array) OVER (ORDER BY timestamp_array) AS window_end FROM date_array) "
",project_resample_results AS (SELECT d.window_start ,d.window_end ,d.TagName ,FIRST(e.Value) OVER (PARTITION BY d.TagName, d.window_start ORDER BY e.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Value FROM window_buckets d INNER JOIN raw_events e ON e.EventTime >= d.window_start AND e.EventTime < d.window_end AND e.TagName = d.TagName) "
",project_resample_results AS (SELECT d.window_start ,d.window_end ,d.TagName ,{{ agg_method }}(e.Value) OVER (PARTITION BY d.TagName, d.window_start ORDER BY e.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Value FROM window_buckets d INNER JOIN raw_events e ON e.EventTime >= d.window_start AND e.EventTime < d.window_end AND e.TagName = d.TagName) "
"SELECT window_start AS EventTime ,TagName ,Value FROM project_resample_results GROUP BY window_start ,TagName ,Value ORDER BY TagName, EventTime "
)

Expand All @@ -131,25 +131,36 @@ def _sample_query(parameters_dict: dict) -> tuple:
def _interpolation_query(parameters_dict: dict, sample_query: str, sample_parameters: dict) -> str:

if parameters_dict["interpolation_method"] == "forward_fill":
interpolation_method = 'last_value/UNBOUNDED PRECEDING/CURRENT ROW'
interpolation_methods = 'last_value/UNBOUNDED PRECEDING/CURRENT ROW'

if parameters_dict["interpolation_method"] == "backward_fill":
interpolation_method = 'first_value/CURRENT ROW/UNBOUNDED FOLLOWING'
interpolation_methods = 'first_value/CURRENT ROW/UNBOUNDED FOLLOWING'

interpolation_options = interpolation_method.split('/')
if parameters_dict["interpolation_method"] == "forward_fill" or parameters_dict["interpolation_method"] == "backward_fill":
interpolation_options = interpolation_methods.split('/')

interpolate_query = (
f"WITH resample AS ({sample_query})"
",date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp(\"{{ start_date }}\"), \"{{ time_zone }}\"), from_utc_timestamp(to_timestamp(\"{{ end_date }}\"), \"{{ time_zone }}\"), INTERVAL '{{ time_interval_rate + ' ' + time_interval_unit }}')) AS EventTime, explode(array('{{ tag_names | join('\\', \\'') }}')) AS TagName) "
"SELECT a.EventTime, a.TagName, {{ interpolation_options_0 }}(b.Value, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN {{ interpolation_options_1 }} AND {{ interpolation_options_2 }}) AS Value FROM date_array a "
"LEFT OUTER JOIN resample b "
"ON a.EventTime = b.EventTime AND a.TagName = b.TagName ORDER BY a.TagName, a.EventTime "
"{% if (interpolation_method is defined) and (interpolation_method == \"forward_fill\" or interpolation_method == \"backward_fill\") %}"
"SELECT a.EventTime, a.TagName, {{ interpolation_options_0 }}(b.Value, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN {{ interpolation_options_1 }} AND {{ interpolation_options_2 }}) AS Value FROM date_array a LEFT OUTER JOIN resample b ON a.EventTime = b.EventTime AND a.TagName = b.TagName ORDER BY a.TagName, a.EventTime "
"{% elif (interpolation_method is defined) and (interpolation_method == \"linear\") %}"
",linear_interpolation_calculations AS (SELECT coalesce(a.TagName, b.TagName) as TagName, coalesce(a.EventTime, b.EventTime) as EventTime, a.EventTime as Requested_EventTime, b.EventTime as Found_EventTime, b.Value, "
"last_value(b.EventTime, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Last_EventTime, last_value(b.Value, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Last_Value, "
"first_value(b.EventTime, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS Next_EventTime, first_value(b.Value, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS Next_Value, "
"CASE WHEN b.Value is NULL THEN Last_Value + (unix_timestamp(a.EventTime) - unix_timestamp(Last_EventTime)) * ((Next_Value - Last_Value)) / ((unix_timestamp(Next_EventTime) - unix_timestamp(Last_EventTime))) ELSE b.Value END AS linear_interpolated_value FROM date_array a FULL OUTER JOIN resample b ON a.EventTime = b.EventTime AND a.TagName = b.TagName ORDER BY a.EventTime, b.TagName) "
"SELECT EventTime, TagName, linear_interpolated_value AS Value FROM linear_interpolation_calculations "
"{% else %}"
"SELECT * FROM resample "
"{% endif %}"
)

interpolate_parameters = sample_parameters.copy()
interpolate_parameters["interpolation_options_0"] = interpolation_options[0]
interpolate_parameters["interpolation_options_1"] = interpolation_options[1]
interpolate_parameters["interpolation_options_2"] = interpolation_options[2]
interpolate_parameters["interpolation_method"] = parameters_dict["interpolation_method"]
if parameters_dict["interpolation_method"] == "forward_fill" or parameters_dict["interpolation_method"] == "backward_fill":
interpolate_parameters["interpolation_options_0"] = interpolation_options[0]
interpolate_parameters["interpolation_options_1"] = interpolation_options[1]
interpolate_parameters["interpolation_options_2"] = interpolation_options[2]

sql_template = Template(interpolate_query)
return sql_template.render(interpolate_parameters)
Expand Down
5 changes: 3 additions & 2 deletions src/sdk/python/rtdip_sdk/queries/time_series/interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,12 @@
import pandas as pd
from ._query_builder import _query_builder


def get(connection: object, parameters_dict: dict) -> pd.DataFrame:
'''
An RTDIP interpolation function that is intertwined with the RTDIP Resampling function.

The Interpolation function will forward fill or backward fill the resampled data depending users specified interpolation method.
The Interpolation function will forward fill, backward fill or linearly interpolate the resampled data depending users specified interpolation method.

This function requires the user to input a dictionary of parameters. (See Attributes table below.)

Expand All @@ -42,7 +43,7 @@ def get(connection: object, parameters_dict: dict) -> pd.DataFrame:
time_interval_rate (str): The time interval rate (numeric input)
time_interval_unit (str): The time interval unit (second, minute, day, hour)
agg_method (str): Aggregation Method (first, last, avg, min, max)
interpolation_method (str): Optional. Interpolation method (forward_fill, backward_fill)
interpolation_method (str): Interpolation method (forward_fill, backward_fill, linear)
include_bad_data (bool): Include "Bad" data points with True or remove "Bad" data points with False

Returns:
Expand Down
2 changes: 1 addition & 1 deletion tests/sdk/python/rtdip_sdk/queries/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
ACCESS_TOKEN = "mock_databricks_token"
DATABRICKS_SQL_CONNECT = 'databricks.sql.connect'
DATABRICKS_SQL_CONNECT_CURSOR = 'databricks.sql.connect.cursor'
MOCKED_QUERY= 'WITH resample AS (WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(EventTime, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as EventTime, TagName, Status, Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN to_date(to_timestamp("2011-01-01T00:00:00+00:00")) AND to_date(to_timestamp("2011-01-02T23:59:59+00:00")) AND EventTime BETWEEN to_timestamp("2011-01-01T00:00:00+00:00") AND to_timestamp("2011-01-02T23:59:59+00:00") AND TagName in (\'MOCKED-TAGNAME\') AND Status = \'Good\' ) ,date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS timestamp_array, explode(array(\'MOCKED-TAGNAME\')) AS TagName) ,window_buckets AS (SELECT timestamp_array AS window_start ,TagName ,LEAD(timestamp_array) OVER (ORDER BY timestamp_array) AS window_end FROM date_array) ,project_resample_results AS (SELECT d.window_start ,d.window_end ,d.TagName ,FIRST(e.Value) OVER (PARTITION BY d.TagName, d.window_start ORDER BY e.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Value FROM window_buckets d INNER JOIN raw_events e ON e.EventTime >= d.window_start AND e.EventTime < d.window_end AND e.TagName = d.TagName) SELECT window_start AS EventTime ,TagName ,Value FROM project_resample_results GROUP BY window_start ,TagName ,Value ORDER BY TagName, EventTime ),date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS EventTime, explode(array(\'MOCKED-TAGNAME\')) AS TagName) SELECT a.EventTime, a.TagName, last_value(b.Value, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Value FROM date_array a LEFT OUTER JOIN resample b ON a.EventTime = b.EventTime AND a.TagName = b.TagName ORDER BY a.TagName, a.EventTime '
MOCKED_QUERY= 'WITH resample AS (WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(EventTime, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as EventTime, TagName, Status, Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN to_date(to_timestamp("2011-01-01T00:00:00+00:00")) AND to_date(to_timestamp("2011-01-02T23:59:59+00:00")) AND EventTime BETWEEN to_timestamp("2011-01-01T00:00:00+00:00") AND to_timestamp("2011-01-02T23:59:59+00:00") AND TagName in (\'MOCKED-TAGNAME\') AND Status = \'Good\' ) ,date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS timestamp_array, explode(array(\'MOCKED-TAGNAME\')) AS TagName) ,window_buckets AS (SELECT timestamp_array AS window_start ,TagName ,LEAD(timestamp_array) OVER (ORDER BY timestamp_array) AS window_end FROM date_array) ,project_resample_results AS (SELECT d.window_start ,d.window_end ,d.TagName ,avg(e.Value) OVER (PARTITION BY d.TagName, d.window_start ORDER BY e.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Value FROM window_buckets d INNER JOIN raw_events e ON e.EventTime >= d.window_start AND e.EventTime < d.window_end AND e.TagName = d.TagName) SELECT window_start AS EventTime ,TagName ,Value FROM project_resample_results GROUP BY window_start ,TagName ,Value ORDER BY TagName, EventTime ),date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS EventTime, explode(array(\'MOCKED-TAGNAME\')) AS TagName) SELECT a.EventTime, a.TagName, last_value(b.Value, true) OVER (PARTITION BY a.TagName ORDER BY a.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Value FROM date_array a LEFT OUTER JOIN resample b ON a.EventTime = b.EventTime AND a.TagName = b.TagName ORDER BY a.TagName, a.EventTime '
MOCKED_PARAMETER_DICT = {
"business_unit": "mocked-buiness-unit",
"region": "mocked-region",
Expand Down
2 changes: 1 addition & 1 deletion tests/sdk/python/rtdip_sdk/queries/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
ACCESS_TOKEN = "mock_databricks_token"
DATABRICKS_SQL_CONNECT = 'databricks.sql.connect'
DATABRICKS_SQL_CONNECT_CURSOR = 'databricks.sql.connect.cursor'
MOCKED_QUERY= 'WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(EventTime, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as EventTime, TagName, Status, Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN to_date(to_timestamp("2011-01-01T00:00:00+00:00")) AND to_date(to_timestamp("2011-01-02T23:59:59+00:00")) AND EventTime BETWEEN to_timestamp("2011-01-01T00:00:00+00:00") AND to_timestamp("2011-01-02T23:59:59+00:00") AND TagName in (\'MOCKED-TAGNAME\') AND Status = \'Good\' ) ,date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS timestamp_array, explode(array(\'MOCKED-TAGNAME\')) AS TagName) ,window_buckets AS (SELECT timestamp_array AS window_start ,TagName ,LEAD(timestamp_array) OVER (ORDER BY timestamp_array) AS window_end FROM date_array) ,project_resample_results AS (SELECT d.window_start ,d.window_end ,d.TagName ,FIRST(e.Value) OVER (PARTITION BY d.TagName, d.window_start ORDER BY e.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Value FROM window_buckets d INNER JOIN raw_events e ON e.EventTime >= d.window_start AND e.EventTime < d.window_end AND e.TagName = d.TagName) SELECT window_start AS EventTime ,TagName ,Value FROM project_resample_results GROUP BY window_start ,TagName ,Value ORDER BY TagName, EventTime '
MOCKED_QUERY= 'WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(EventTime, \'yyyy-MM-dd HH:mm:ss.SSS\')), "+0000") as EventTime, TagName, Status, Value FROM `mocked-buiness-unit`.`sensors`.`mocked-asset_mocked-data-security-level_events_mocked-data-type` WHERE EventDate BETWEEN to_date(to_timestamp("2011-01-01T00:00:00+00:00")) AND to_date(to_timestamp("2011-01-02T23:59:59+00:00")) AND EventTime BETWEEN to_timestamp("2011-01-01T00:00:00+00:00") AND to_timestamp("2011-01-02T23:59:59+00:00") AND TagName in (\'MOCKED-TAGNAME\') AND Status = \'Good\' ) ,date_array AS (SELECT explode(sequence(from_utc_timestamp(to_timestamp("2011-01-01T00:00:00+00:00"), "+0000"), from_utc_timestamp(to_timestamp("2011-01-02T23:59:59+00:00"), "+0000"), INTERVAL \'15 minute\')) AS timestamp_array, explode(array(\'MOCKED-TAGNAME\')) AS TagName) ,window_buckets AS (SELECT timestamp_array AS window_start ,TagName ,LEAD(timestamp_array) OVER (ORDER BY timestamp_array) AS window_end FROM date_array) ,project_resample_results AS (SELECT d.window_start ,d.window_end ,d.TagName ,avg(e.Value) OVER (PARTITION BY d.TagName, d.window_start ORDER BY e.EventTime ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS Value FROM window_buckets d INNER JOIN raw_events e ON e.EventTime >= d.window_start AND e.EventTime < d.window_end AND e.TagName = d.TagName) SELECT window_start AS EventTime ,TagName ,Value FROM project_resample_results GROUP BY window_start ,TagName ,Value ORDER BY TagName, EventTime '
MOCKED_PARAMETER_DICT = {
"business_unit": "mocked-buiness-unit",
"region": "mocked-region",
Expand Down