rtdip · GBBBAS · May 29, 2023 · May 19, 2023 · May 19, 2023 · May 19, 2023
diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml
@@ -102,7 +102,7 @@ jobs:
         shell: python 
       - name: Build Wheel
         run: |
-          python -m build --wheel
+          python -m build
         env:
           RTDIP_SDK_NEXT_VER: ${{ steps.next_ver.outputs.rtdip_sdk_next_ver }}      
       - name: Upload Python wheel as artifact

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -33,7 +33,7 @@ jobs:
           python -m pip install --upgrade pip
           pip install twine
           pip install build
-          python -m build --wheel
+          python -m build
         env:
           RTDIP_SDK_NEXT_VER: ${{ github.ref_name }}
       - name: Upload Python wheel as artifact
@@ -109,7 +109,7 @@ jobs:
           sudo apt install -y libboost-all-dev
 
       - name: Install Conda environment with Micromamba
-        uses: mamba-org/provision-with-micromamba@main
+        uses: mamba-org/setup-micromamba@main
         with:
           environment-file: environment.yml
           cache-env: true

diff --git a/.github/workflows/sonarcloud_reusable.yml b/.github/workflows/sonarcloud_reusable.yml
@@ -47,9 +47,9 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python-version: ["3.10"]
-        pyspark: ["3.3.2"]
-        delta-spark: ["2.3.0"]
+        python-version: ["3.11"]
+        pyspark: ["3.4.0"]
+        delta-spark: ["2.4.0"]
     runs-on: ${{ matrix.os }} 
     steps:
       - uses: actions/checkout@v3
@@ -74,7 +74,7 @@ jobs:
           echo $CONDA/bin >> $GITHUB_PATH
 
       - name: Install Conda environment with Micromamba
-        uses: mamba-org/provision-with-micromamba@main
+        uses: mamba-org/setup-micromamba@main
         with:
           environment-file: environment.yml
           extra-specs: |

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -23,17 +23,20 @@ jobs:
         run:
           shell: bash -l {0}
       strategy:
+        fail-fast: false
         matrix:
           os: [ubuntu-latest]
-          python-version: ["3.8", "3.9", "3.10"]
-          pyspark: ["3.3.0", "3.3.1", "3.3.2"]
+          python-version: ["3.8", "3.9", "3.10", "3.11"]
+          pyspark: ["3.3.0", "3.3.1", "3.3.2", "3.4.0"]
           include:
             - pyspark: "3.3.0"
               delta-spark: "2.2.0"
             - pyspark: "3.3.1"
               delta-spark: "2.3.0"
             - pyspark: "3.3.2"
               delta-spark: "2.3.0"
+            - pyspark: "3.4.0"
+              delta-spark: "2.4.0"              
       runs-on: ${{ matrix.os }}
       steps:
         - uses: actions/checkout@v3
@@ -56,7 +59,7 @@ jobs:
             echo $CONDA/bin >> $GITHUB_PATH
 
         - name: Install Conda environment with Micromamba
-          uses: mamba-org/provision-with-micromamba@main
+          uses: mamba-org/setup-micromamba@main
           with:
             environment-file: environment.yml
             extra-specs: |

diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -4,13 +4,13 @@
     "azureFunctions.projectLanguage": "Python",
     "azureFunctions.projectRuntime": "~4",				
     "python.linting.enabled": true,
-    "python.linting.pylintEnabled": true,
+    "python.linting.pylintEnabled": false,
     "python.formatting.autopep8Path": "/opt/conda/bin/autopep8",
     "python.formatting.yapfPath": "/opt/conda/bin/yapf",
     "python.linting.flake8Path": "/opt/conda/bin/flake8",
     "python.linting.pycodestylePath": "/opt/conda/bin/pycodestyle",
     "python.linting.pydocstylePath": "/opt/conda/bin/pydocstyle",
-    "python.linting.pylintPath": "/opt/conda/bin/pylint",
+    // "python.linting.pylintPath": "/opt/conda/bin/pylint",
     "python.testing.pytestArgs": [
         "--cov=.",
         "--cov-report=xml:cov.xml",
@@ -33,5 +33,9 @@
     "git.alwaysSignOff": true,
     "githubPullRequests.ignoredPullRequestBranches": [
         "develop"
-    ]
+    ],
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter"
+    },
+    "python.formatting.provider": "none"
 }
diff --git a/README.md b/README.md
@@ -1,11 +1,17 @@
 # Real Time Data Ingestion Platform (RTDIP)
 
+<p align="center"><img src=https://raw.githubusercontent.com/rtdip/core/develop/docs/getting-started/images/rtdip-horizontal-color.png alt="rtdip" width=50% height=50%/></p>
+
+<div align="center">
+
 | Branch | Workflow Status | Code Coverage | Vulnerabilities | Bugs |
 |--------|-----------------|---------------|----------|------|
 | main | [![Main](https://github.com/rtdip/core/actions/workflows/main.yml/badge.svg?branch=main)](https://github.com/rtdip/core/actions/workflows/main.yml) | [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=rtdip_core&metric=coverage&branch=main)](https://sonarcloud.io/summary/new_code?id=rtdip_core) | [![Vulnerabilities](https://sonarcloud.io/api/project_badges/measure?project=rtdip_core&metric=vulnerabilities&branch=main)](https://sonarcloud.io/summary/new_code?id=rtdip_core) | [![Bugs](https://sonarcloud.io/api/project_badges/measure?project=rtdip_core&metric=bugs&branch=main)](https://sonarcloud.io/summary/new_code?id=rtdip_core) |
 | develop | [![Develop](https://github.com/rtdip/core/actions/workflows/develop.yml/badge.svg)](https://github.com/rtdip/core/actions/workflows/develop.yml) | [![Coverage](https://sonarcloud.io/api/project_badges/measure?project=rtdip_core&metric=coverage&branch=develop)](https://sonarcloud.io/summary/new_code?id=rtdip_core) | [![Vulnerabilities](https://sonarcloud.io/api/project_badges/measure?project=rtdip_core&metric=vulnerabilities&branch=develop)](https://sonarcloud.io/summary/new_code?id=rtdip_core) | [![Bugs](https://sonarcloud.io/api/project_badges/measure?project=rtdip_core&metric=bugs&branch=develop)](https://sonarcloud.io/summary/new_code?id=rtdip_core) |
 | feature | [![.github/workflows/pr.yml](https://github.com/rtdip/core/actions/workflows/pr.yml/badge.svg)](https://github.com/rtdip/core/actions/workflows/pr.yml) |
 
+</div>
+
 This repository contains Real Time Data Ingestion Platform SDK functions and documentation. This README will be a developer guide to understand the repository.
 
 ## What is RTDIP SDK?

diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
@@ -16,7 +16,7 @@ This article provides a guide on how to install the RTDIP SDK. Get started by en
 
 There are a few things to note before using the RTDIP SDK. The following prerequisites will need to be installed on your local machine.
 
-Python version 3.8 >= and < 3.11 should be installed. Check which python version you have with the following command:
+Python version 3.8 >= and < 3.12 should be installed. Check which python version you have with the following command:
 
     python --version
 
@@ -58,14 +58,21 @@ Installing the RTDIP can be done using a package installer, such as [Pip](https:
 ### ODBC
 To use pyodbc or turbodbc python libraries, ensure that the required ODBC driver is installed as per these [instructions](https://docs.microsoft.com/en-us/azure/databricks/integrations/bi/jdbc-odbc-bi#download-the-odbc-driver).
 
-#### Pyodbc
-If you plan to use pyodbc, Microsoft Visual C++ 14.0 or greater is required. Get it from [Microsoft C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/).
+=== "Pyodbc"
+    If you plan to use pyodbc, Microsoft Visual C++ 14.0 or greater is required. Get it from [Microsoft C++ Build Tools](https://visualstudio.microsoft.com/visual-cpp-build-tools/).
 
-#### Turbodbc
-To use turbodbc python library, ensure to follow the [Turbodbc Getting Started](https://turbodbc.readthedocs.io/en/latest/pages/getting_started.html) section and ensure that [Boost](https://turbodbc.readthedocs.io/en/latest/pages/getting_started.html) is installed correctly.
+=== "Turbodbc"
+    To use turbodbc python library, ensure to follow the [Turbodbc Getting Started](https://turbodbc.readthedocs.io/en/latest/pages/getting_started.html) section and ensure that [Boost](https://turbodbc.readthedocs.io/en/latest/pages/getting_started.html) is installed correctly.
+
+### Spark Connect
+
+Spark Connect was released in Apache Spark 3.4.0 and enables a decoupled client-server architecture that allows remote connectivity to Spark Clusters. RTDIP SDK supports Spark Connect and can be configured using the Spark Connector and providing the Spark Connect connection string required to connect to your Spark Cluster.
+
+Please ensure that you have followed the [instructions](https://spark.apache.org/docs/latest/spark-connect-overview.html#how-to-use-spark-connect) to enable Spark Connect on your Spark Cluster and that you are using a `pyspark>=3.4.0`. If you are connecting to a Databricks Cluster, then you may prefer to install python package `databricks-connect>=13.0.1` instead of `pyspark`.
 
 ### Java
-If you are planning to use the RTDIP Pipelines in your own environment that leverages [pyspark](https://spark.apache.org/docs/latest/api/python/getting_started/install.html) for a component, Java 8 or later is a [prerequisite](https://spark.apache.org/docs/latest/api/python/getting_started/install.html#dependencies). See below for suggestions to install Java in your development environment.
+
+To use RTDIP Pipelines components in your own environment that leverages [pyspark](https://spark.apache.org/docs/latest/api/python/getting_started/install.html) and you do not want to leverage [Spark Connect](#spark-connect), Java 8 or later is a [prerequisite](https://spark.apache.org/docs/latest/api/python/getting_started/install.html#dependencies). See below for suggestions to install Java in your development environment.
 
 === "Conda"
     A fairly simple option is to use the conda **openjdk** package to install Java into your python virtual environment. An example of a conda **environment.yml** file to achieve this is below.
@@ -76,7 +83,7 @@ If you are planning to use the RTDIP Pipelines in your own environment that leve
         - conda-forge
         - defaults
     dependencies:
-        - python==3.10
+        - python==3.11
         - pip==23.0.1
         - openjdk==11.0.15
         - pip:

diff --git a/docs/sdk/code-reference/query/interpolate.md b/docs/sdk/code-reference/query/interpolate.md
@@ -30,7 +30,7 @@ x = interpolate.get(connection, parameters)
 print(x)
 ```
 
-This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md) or [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md).
+This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md), [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md) or [```SparkConnection()```](spark-connector.md).
 
 !!! note "Note"
     </b>```server_hostname``` and ```http_path``` can be found on the [SQL Warehouses Page](../../queries/databricks/sql-warehouses.md). <br />
diff --git a/docs/sdk/code-reference/query/interpolation_at_time.md b/docs/sdk/code-reference/query/interpolation_at_time.md
@@ -24,7 +24,7 @@ x = interpolation_at_time.get(connection, parameters)
 print(x)
 ```
 
-This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md) or [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md).
+This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md), [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md) or [```SparkConnection()```](spark-connector.md).
 
 !!! note "Note"
     </b>```server_hostname``` and ```http_path``` can be found on the [SQL Warehouses Page](../../queries/databricks/sql-warehouses.md). <br />
diff --git a/docs/sdk/code-reference/query/metadata.md b/docs/sdk/code-reference/query/metadata.md
@@ -22,7 +22,7 @@ x = metadata.get(connection, parameters)
 print(x)
 ```
 
-This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md) or [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md).
+This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md), [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md) or [```SparkConnection()```](spark-connector.md).
 
 !!! note "Note"
     </b>```server_hostname``` and ```http_path``` can be found on the [SQL Warehouses Page](../../queries/databricks/sql-warehouses.md). <br />
diff --git a/docs/sdk/code-reference/query/raw.md b/docs/sdk/code-reference/query/raw.md
@@ -26,7 +26,7 @@ x = raw.get(connection, parameters)
 print(x)
 ```
 
-This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md) or [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md). 
+This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md), [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md) or [```SparkConnection()```](spark-connector.md).
 
 !!! note "Note"
     </b>```server_hostname``` and ```http_path``` can be found on the [SQL Warehouses Page](../../queries/databricks/sql-warehouses.md). <br />
diff --git a/docs/sdk/code-reference/query/resample.md b/docs/sdk/code-reference/query/resample.md
@@ -29,7 +29,7 @@ x = resample.get(connection, parameters)
 print(x)
 ```
 
-This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md) or [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md).
+This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md), [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md) or [```SparkConnection()```](spark-connector.md).
 
 !!! note "Note"
     </b>```server_hostname``` and ```http_path``` can be found on the [SQL Warehouses Page](../../queries/databricks/sql-warehouses.md). <br />
diff --git a/docs/sdk/code-reference/query/spark-connector.md b/docs/sdk/code-reference/query/spark-connector.md
@@ -0,0 +1,2 @@
+# Spark Connector
+::: src.sdk.python.rtdip_sdk.connectors.grpc.spark_connector
diff --git a/docs/sdk/code-reference/query/time-weighted-average.md b/docs/sdk/code-reference/query/time-weighted-average.md
@@ -30,7 +30,7 @@ x = time_weighted_average.get(connection, parameters)
 print(x)
 ```
 
-This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md) or [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md).
+This example is using [```DefaultAuth()```](../authentication/azure.md) and [```DatabricksSQLConnection()```](db-sql-connector.md) to authenticate and connect. You can find other ways to authenticate [here](../authentication/azure.md). The alternative built in connection methods are either by [```PYODBCSQLConnection()```](pyodbc-sql-connector.md), [```TURBODBCSQLConnection()```](turbodbc-sql-connector.md) or [```SparkConnection()```](spark-connector.md).
 
 !!! note "Note"
     </b>```server_hostname``` and ```http_path``` can be found on the [SQL Warehouses Page](../../queries/databricks/sql-warehouses.md). <br />
diff --git a/docs/sdk/queries/connectors.md b/docs/sdk/queries/connectors.md
@@ -10,8 +10,6 @@ Enables connectivity to Databricks using the [Databricks SQL Connector](https://
 
 For more information refer to this [documentation](https://docs.databricks.com/dev-tools/python-sql-connector.html) and for the specific implementation within the RTDIP SDK, refer to this [link](../code-reference/query/db-sql-connector.md)
 
-#### Example
-
 ```python
 from rtdip_sdk.connectors import DatabricksSQLConnection
 
@@ -30,8 +28,6 @@ Replace **server_hostname**, **http_path** and **access_token** with your own in
 
 View information about how pyodbc is implemented in the RTDIP SDK [here.](../code-reference/query/pyodbc-sql-connector.md)
 
-#### Example
-
 ```python
 from rtdip_sdk.connectors import PYODBCSQLConnection
 
@@ -49,7 +45,6 @@ Replace **server_hostname**, **http_path** and **access_token** with your own in
 
 Turbodbc is a powerful python ODBC package that has advanced options for querying performance. Find out more about installing it on your operation system and what Turbodbc can do [here](https://turbodbc.readthedocs.io/en/latest/) and refer to this [documentation](../code-reference/query/turbodbc-sql-connector.md) for more information about how it is implemented in the RTDIP SDK.
 
-#### Example
 ```python
 from rtdip_sdk.connectors import TURBODBCSQLConnection
 
@@ -62,3 +57,20 @@ connection = TURBODBCSQLConnection(server_hostname, http_path, access_token)
 
 Replace **server_hostname**, **http_path** and **access_token** with your own information.
 
+## Spark 
+
+### Spark Connector 
+
+The Spark Connector enables querying of data using a Spark Session. This is useful for querying local instances of Spark or Delta. However, the most useful application of this connector is to leverage [Spark Connect](https://spark.apache.org/docs/latest/spark-connect-overview.html) to enable connecting to a remote Spark Cluster to provide the compute for the query being run from a local machine.
+
+```python
+from rtdip_sdk.connectors import SparkConnection
+
+spark_server = "spark_server"
+access_token = "my_token"
+
+spark_remote = "sc://{}:443;token={}".format(spark_server, access_token)
+connection = SparkConnection(spark_remote=spark_remote)
+```
+
+Replace the **access_token** with your own information.
diff --git a/docs/sdk/queries/spark/spark-connect.md b/docs/sdk/queries/spark/spark-connect.md
@@ -0,0 +1,24 @@
+# Spark Connect
+
+[Spark Connect](https://spark.apache.org/docs/latest/spark-connect-overview.html) was released in Apache Spark 3.4.0 to enable a decoupled client-server architecture that allows remote connectivity to Spark clusters using the Spark DataFrame API.
+
+This means any Spark cluster could provide compute to a spark job and therefore enables options such as Spark on Kubernetes, Spark running locally or Databricks Interactive Clusters to be leveraged in the RTDIP SDK to perform time series queries.
+
+## Prerequisites
+
+Please ensure that you have followed the [instructions](https://spark.apache.org/docs/latest/spark-connect-overview.html#how-to-use-spark-connect) to enable Spark Connect on your Spark cluster and that you are using a `pyspark>=3.4.0`. If you are connecting to Databricks, then install `databricks-connect>=13.0.1` instead of `pyspark`.
+
+## Example
+
+This
+```python
+from rtdip_sdk.connectors import SparkConnection
+
+spark_server = "sparkserver.com"
+access_token = "my_token"
+
+spark_remote = "sc://{}:443;token={}".format(spark_server, access_token)
+connection = SparkConnection(spark_remote=spark_remote)
+```
+
+Replace the **access_token** with your own information(this assumes an access token is required to authenticate with the remote Spark server).
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Spark Connector
		::: src.sdk.python.rtdip_sdk.connectors.grpc.spark_connector