From 3d81cf0bae8dd28daa97e3d355d8e1753b6143af Mon Sep 17 00:00:00 2001 From: GBBBAS <42962356+GBBBAS@users.noreply.github.com> Date: Tue, 20 Feb 2024 15:27:58 +0000 Subject: [PATCH 1/2] Add SQL Query API Signed-off-by: GBBBAS <42962356+GBBBAS@users.noreply.github.com> --- mkdocs.yml | 2 + src/api/v1/__init__.py | 1 + src/api/v1/common.py | 4 ++ src/api/v1/models.py | 29 +++++++++++ src/api/v1/sql.py | 113 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 149 insertions(+) create mode 100644 src/api/v1/sql.py diff --git a/mkdocs.yml b/mkdocs.yml index 884cc2bc9..27d5e3649 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -260,6 +260,8 @@ nav: - Weather Data Query Builder: sdk/code-reference/query/functions/weather/weather_query_builder.md - Weather Raw: sdk/code-reference/query/functions/weather/raw.md - Weather Latest: sdk/code-reference/query/functions/weather/latest.md + - SQL: + - SQL Query Builder: sdk/code-reference/query/sql/sql_query.md - Connectors: sdk/queries/connectors.md - Databricks: - Databricks SQL: sdk/queries/databricks/databricks-sql.md diff --git a/src/api/v1/__init__.py b/src/api/v1/__init__.py index 09eb6cbc2..fa38d1eca 100644 --- a/src/api/v1/__init__.py +++ b/src/api/v1/__init__.py @@ -18,6 +18,7 @@ from src.api.v1 import ( metadata, raw, + sql, latest, resample, interpolate, diff --git a/src/api/v1/common.py b/src/api/v1/common.py index 3abef2d92..5fb0367ce 100644 --- a/src/api/v1/common.py +++ b/src/api/v1/common.py @@ -27,6 +27,7 @@ def common_api_setup_tasks( # NOSONAR base_headers: BaseHeaders, # NOSONAR metadata_query_parameters=None, raw_query_parameters=None, + sql_query_parameters=None, tag_query_parameters=None, resample_query_parameters=None, interpolate_query_parameters=None, @@ -83,6 +84,9 @@ def common_api_setup_tasks( # NOSONAR parameters["start_date"] = raw_query_parameters.start_date parameters["end_date"] = raw_query_parameters.end_date + if sql_query_parameters != None: + parameters = dict(parameters, **sql_query_parameters.__dict__) + if tag_query_parameters != None: parameters = dict(parameters, **tag_query_parameters.__dict__) parameters["tag_names"] = parameters.pop("tag_name") diff --git a/src/api/v1/models.py b/src/api/v1/models.py index 980d5cd54..18cdc5547 100644 --- a/src/api/v1/models.py +++ b/src/api/v1/models.py @@ -20,6 +20,8 @@ from fastapi import Query, Header, Depends from datetime import date from src.api.auth.azuread import oauth2_scheme +from typing import Generic, TypeVar + EXAMPLE_DATE = "2022-01-01" EXAMPLE_DATETIME = "2022-01-01T15:00:00" @@ -115,6 +117,17 @@ class RawResponse(BaseModel): pagination: Union[PaginationRow, None] +SqlT = TypeVar("SqlT") + + +class SqlResponse(BaseModel, Generic[SqlT]): + field_schema: FieldSchema = Field( + None, alias="schema", serialization_alias="schema" + ) + data: List[SqlT] + pagination: Union[PaginationRow, None] + + class ResampleInterpolateRow(BaseModel): EventTime: datetime TagName: str @@ -244,6 +257,22 @@ def __init__( self.end_date = end_date +class SqlQueryParams: + def __init__( + self, + sql_statement: str = Query( + ..., + description="SQL Statement to be executed", + examples=["select * from 1"], + ), + ): + self.sql_statement = sql_statement + + +class SqlBodyParams(BaseModel): + sql_statement: str + + class TagsQueryParams: def __init__( self, diff --git a/src/api/v1/sql.py b/src/api/v1/sql.py new file mode 100644 index 000000000..d04ba8077 --- /dev/null +++ b/src/api/v1/sql.py @@ -0,0 +1,113 @@ +# Copyright 2022 RTDIP +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import logging +import numpy as np +from pandas.io.json import build_table_schema +from fastapi import Query, HTTPException, Depends, Body +import nest_asyncio +from src.sdk.python.rtdip_sdk.queries.sql.sql_query import SQLQueryBuilder +from src.api.v1.models import ( + BaseHeaders, + BaseQueryParams, + SqlBodyParams, + SqlResponse, + LimitOffsetQueryParams, + HTTPError, + PaginationRow, +) +from src.api.auth.azuread import oauth2_scheme +from src.api.FastAPIApp import api_v1_router +import src.api.v1.common + +nest_asyncio.apply() + + +def sql_get( + base_query_parameters, + sql_query_parameters, + limit_offset_parameters, + base_headers, +): + try: + (connection, parameters) = src.api.v1.common.common_api_setup_tasks( + base_query_parameters, + sql_query_parameters=sql_query_parameters, + limit_offset_query_parameters=limit_offset_parameters, + base_headers=base_headers, + ) + + data = SQLQueryBuilder().get(connection, sql_query_parameters.sql_statement) + + pagination = None + + if ( + limit_offset_parameters.limit is not None + and limit_offset_parameters.offset is not None + ): + next = None + + if len(data.index) == limit_offset_parameters.limit: + next = limit_offset_parameters.offset + limit_offset_parameters.limit + + pagination = PaginationRow( + limit=limit_offset_parameters.limit, + offset=limit_offset_parameters.offset, + next=next, + ) + + return SqlResponse( + schema=build_table_schema(data, index=False, primary_key=False), + data=data.replace({np.nan: None}).to_dict(orient="records"), + pagination=pagination, + ) + except Exception as e: + logging.error(str(e)) + raise HTTPException(status_code=400, detail=str(e)) + + +post_description = """ +## Raw + +Retrieval of raw timeseries data via a POST method to enable providing a list of tag names that can exceed url length restrictions via GET Query Parameters. +""" + + +@api_v1_router.post( + path="/events/raw", + name="Raw POST", + description=post_description, + tags=["Events"], + dependencies=[Depends(oauth2_scheme)], + responses={200: {"model": SqlResponse}, 400: {"model": HTTPError}}, + openapi_extra={ + "externalDocs": { + "description": "RTDIP SQL Query Documentation", + "url": "https://www.rtdip.io/sdk/code-reference/query/functions/sql/sql_query_builder/", + } + }, +) +async def raw_post( + base_query_parameters: BaseQueryParams = Depends(), + sql_query_parameters: SqlBodyParams = Body(default=...), + limit_offset_query_parameters: LimitOffsetQueryParams = Depends(), + base_headers: BaseHeaders = Depends(), +): + return sql_get( + base_query_parameters, + sql_query_parameters, + limit_offset_query_parameters, + base_headers, + ) From 0b46347023df22513e75d21528bd1ae2f7da59ca Mon Sep 17 00:00:00 2001 From: GBBBAS <42962356+GBBBAS@users.noreply.github.com> Date: Tue, 20 Feb 2024 15:47:55 +0000 Subject: [PATCH 2/2] SQL Statement updates Signed-off-by: GBBBAS <42962356+GBBBAS@users.noreply.github.com> --- src/api/FastAPIApp/__init__.py | 4 ++++ src/api/v1/sql.py | 12 +++++----- .../python/rtdip_sdk/queries/sql/sql_query.py | 18 +++++++++++--- .../time_series/_time_series_query_builder.py | 24 +++++++++++++++++++ 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/src/api/FastAPIApp/__init__.py b/src/api/FastAPIApp/__init__.py index 9766d41ce..5e0681bc5 100644 --- a/src/api/FastAPIApp/__init__.py +++ b/src/api/FastAPIApp/__init__.py @@ -34,6 +34,10 @@ "name": "Metadata", "description": "Contextual metadata about timeseries events", }, + { + "name": "SQL", + "description": "Retrieval of data using SQL queries", + }, ] description = """ diff --git a/src/api/v1/sql.py b/src/api/v1/sql.py index d04ba8077..915026a90 100644 --- a/src/api/v1/sql.py +++ b/src/api/v1/sql.py @@ -49,7 +49,7 @@ def sql_get( base_headers=base_headers, ) - data = SQLQueryBuilder().get(connection, sql_query_parameters.sql_statement) + data = SQLQueryBuilder().get(connection, parameters["sql_statement"]) pagination = None @@ -79,17 +79,17 @@ def sql_get( post_description = """ -## Raw +## Sql -Retrieval of raw timeseries data via a POST method to enable providing a list of tag names that can exceed url length restrictions via GET Query Parameters. +Retrieval of data via a POST method to enable execution of generic SQL statements. """ @api_v1_router.post( - path="/events/raw", - name="Raw POST", + path="/sql", + name="Sql POST", description=post_description, - tags=["Events"], + tags=["SQL"], dependencies=[Depends(oauth2_scheme)], responses={200: {"model": SqlResponse}, 400: {"model": HTTPError}}, openapi_extra={ diff --git a/src/sdk/python/rtdip_sdk/queries/sql/sql_query.py b/src/sdk/python/rtdip_sdk/queries/sql/sql_query.py index 1bb9f1734..a0891bb1b 100644 --- a/src/sdk/python/rtdip_sdk/queries/sql/sql_query.py +++ b/src/sdk/python/rtdip_sdk/queries/sql/sql_query.py @@ -15,6 +15,7 @@ import logging import pandas as pd from ...connectors.connection_interface import ConnectionInterface +from ..time_series._time_series_query_builder import _query_builder class SQLQueryBuilder: @@ -25,7 +26,9 @@ class SQLQueryBuilder: sql_query: dict connection: ConnectionInterface - def get(self, connection=object, sql_query=str) -> pd.DataFrame: + def get( + self, connection=object, sql_query=str, limit=None, offset=None + ) -> pd.DataFrame: """ A function to return back raw data by querying databricks SQL Warehouse using a connection specified by the user. @@ -38,14 +41,23 @@ def get(self, connection=object, sql_query=str) -> pd.DataFrame: Args: connection (obj): Connection chosen by the user (Databricks SQL Connect, PYODBC SQL Connect, TURBODBC SQL Connect) sql_query (str): A string of the SQL query to be executed. + limit (optional int): Limit the number of rows to be returned + offset (optional int): Offset the start of the rows to be returned Returns: - DataFrame: A dataframe of raw timeseries data. + DataFrame: A dataframe of data. """ try: + parameters_dict = {"sql_statement": sql_query} + if limit: + parameters_dict["limit"] = limit + if offset: + parameters_dict["offset"] = offset + + query = _query_builder(parameters_dict, "sql") try: cursor = connection.cursor() - cursor.execute(sql_query) + cursor.execute(query) df = cursor.fetch_all() cursor.close() connection.close() diff --git a/src/sdk/python/rtdip_sdk/queries/time_series/_time_series_query_builder.py b/src/sdk/python/rtdip_sdk/queries/time_series/_time_series_query_builder.py index 9eba013bb..45e57f934 100644 --- a/src/sdk/python/rtdip_sdk/queries/time_series/_time_series_query_builder.py +++ b/src/sdk/python/rtdip_sdk/queries/time_series/_time_series_query_builder.py @@ -90,6 +90,27 @@ def _raw_query(parameters_dict: dict) -> str: return sql_template.render(raw_parameters) +def _sql_query(parameters_dict: dict) -> str: + sql_query = ( + "{{ sql_statement }}" + "{% if limit is defined and limit is not none %}" + "LIMIT {{ limit }} " + "{% endif %}" + "{% if offset is defined and offset is not none %}" + "OFFSET {{ offset }} " + "{% endif %}" + ) + + sql_parameters = { + "sql_statement": parameters_dict.get("sql_statement"), + "limit": parameters_dict.get("limit", None), + "offset": parameters_dict.get("offset", None), + } + + sql_template = Template(sql_query) + return sql_template.render(sql_parameters) + + def _sample_query(parameters_dict: dict) -> tuple: sample_query = ( "WITH raw_events AS (SELECT DISTINCT from_utc_timestamp(to_timestamp(date_format(`{{ timestamp_column }}`, 'yyyy-MM-dd HH:mm:ss.SSS')), \"{{ time_zone }}\") AS `{{ timestamp_column }}`, `{{ tagname_column }}`, {% if include_status is defined and include_status == true %} `{{ status_column }}`, {% else %} 'Good' AS `Status`, {% endif %} `{{ value_column }}` FROM " @@ -781,6 +802,9 @@ def _query_builder(parameters_dict: dict, query_type: str) -> str: ) # remove potential duplicates in tags parameters_dict["tag_names"] = tagnames_deduplicated.copy() + if query_type == "sql": + return _sql_query(parameters_dict) + if query_type == "metadata": return _metadata_query(parameters_dict)