From dd3cf406d3fa35970a6525ed3db642dbff2fb597 Mon Sep 17 00:00:00 2001 From: Jesse Whitehouse Date: Thu, 7 Jul 2022 15:58:21 -0500 Subject: [PATCH 01/30] Scaffold basic file structure for a new dialect https://github.com/sqlalchemy/sqlalchemy/blob/main/README.dialects.rst Signed-off-by: Jesse Whitehouse --- poetry.lock | 141 +++++++++++++++++++++- pyproject.toml | 7 +- setup.cfg | 4 + src/databricks/sqlalchemy/__init__.py | 1 + src/databricks/sqlalchemy/dialect.py | 45 +++++++ src/databricks/sqlalchemy/requirements.py | 18 +++ tests/sqlalchemy/conftest.py | 7 ++ tests/sqlalchemy/test_suite.py | 2 + 8 files changed, 223 insertions(+), 2 deletions(-) create mode 100644 setup.cfg create mode 100644 src/databricks/sqlalchemy/__init__.py create mode 100644 src/databricks/sqlalchemy/dialect.py create mode 100644 src/databricks/sqlalchemy/requirements.py create mode 100644 tests/sqlalchemy/conftest.py create mode 100644 tests/sqlalchemy/test_suite.py diff --git a/poetry.lock b/poetry.lock index 9bc3ae8ae..21c9c4d68 100644 --- a/poetry.lock +++ b/poetry.lock @@ -63,6 +63,17 @@ category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "greenlet" +version = "1.1.2" +description = "Lightweight in-process concurrent programming" +category = "dev" +optional = false +python-versions = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*" + +[package.extras] +docs = ["sphinx"] + [[package]] name = "importlib-metadata" version = "4.11.3" @@ -269,6 +280,39 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +[[package]] +name = "sqlalchemy" +version = "1.4.39" +description = "Database Abstraction Library" +category = "dev" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7" + +[package.dependencies] +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +importlib-metadata = {version = "*", markers = "python_version < \"3.8\""} + +[package.extras] +aiomysql = ["greenlet (!=0.4.17)", "aiomysql"] +aiosqlite = ["typing_extensions (!=3.10.0.1)", "greenlet (!=0.4.17)", "aiosqlite"] +asyncio = ["greenlet (!=0.4.17)"] +asyncmy = ["greenlet (!=0.4.17)", "asyncmy (>=0.2.3,!=0.2.4)"] +mariadb_connector = ["mariadb (>=1.0.1)"] +mssql = ["pyodbc"] +mssql_pymssql = ["pymssql"] +mssql_pyodbc = ["pyodbc"] +mypy = ["sqlalchemy2-stubs", "mypy (>=0.910)"] +mysql = ["mysqlclient (>=1.4.0,<2)", "mysqlclient (>=1.4.0)"] +mysql_connector = ["mysql-connector-python"] +oracle = ["cx_oracle (>=7,<8)", "cx_oracle (>=7)"] +postgresql = ["psycopg2 (>=2.7)"] +postgresql_asyncpg = ["greenlet (!=0.4.17)", "asyncpg"] +postgresql_pg8000 = ["pg8000 (>=1.16.6,!=1.29.0)"] +postgresql_psycopg2binary = ["psycopg2-binary"] +postgresql_psycopg2cffi = ["psycopg2cffi"] +pymysql = ["pymysql (<1)", "pymysql"] +sqlcipher = ["sqlcipher3-binary"] + [[package]] name = "thrift" version = "0.13.0" @@ -324,7 +368,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.7.1" -content-hash = "9a8934a880c7e31bf7dc9673ee9a9eafe4111ec26ef98298cbe20aa2b7533b52" +content-hash = "46fe2288362fc103abfdcd49c9dce356736b9ea6758d57b5d7fed173e2b9ceb5" [metadata.files] atomicwrites = [ @@ -368,6 +412,63 @@ colorama = [ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, ] +greenlet = [ + {file = "greenlet-1.1.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6"}, + {file = "greenlet-1.1.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a"}, + {file = "greenlet-1.1.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d"}, + {file = "greenlet-1.1.2-cp27-cp27m-win32.whl", hash = "sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713"}, + {file = "greenlet-1.1.2-cp27-cp27m-win_amd64.whl", hash = "sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40"}, + {file = "greenlet-1.1.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d"}, + {file = "greenlet-1.1.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8"}, + {file = "greenlet-1.1.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d"}, + {file = "greenlet-1.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497"}, + {file = "greenlet-1.1.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1"}, + {file = "greenlet-1.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58"}, + {file = "greenlet-1.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b336501a05e13b616ef81ce329c0e09ac5ed8c732d9ba7e3e983fcc1a9e86965"}, + {file = "greenlet-1.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708"}, + {file = "greenlet-1.1.2-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23"}, + {file = "greenlet-1.1.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee"}, + {file = "greenlet-1.1.2-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c"}, + {file = "greenlet-1.1.2-cp35-cp35m-win32.whl", hash = "sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963"}, + {file = "greenlet-1.1.2-cp35-cp35m-win_amd64.whl", hash = "sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e"}, + {file = "greenlet-1.1.2-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073"}, + {file = "greenlet-1.1.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c"}, + {file = "greenlet-1.1.2-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e"}, + {file = "greenlet-1.1.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce"}, + {file = "greenlet-1.1.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08"}, + {file = "greenlet-1.1.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168"}, + {file = "greenlet-1.1.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b8c008de9d0daba7b6666aa5bbfdc23dcd78cafc33997c9b7741ff6353bafb7f"}, + {file = "greenlet-1.1.2-cp36-cp36m-win32.whl", hash = "sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa"}, + {file = "greenlet-1.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d"}, + {file = "greenlet-1.1.2-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4"}, + {file = "greenlet-1.1.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b"}, + {file = "greenlet-1.1.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c"}, + {file = "greenlet-1.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1"}, + {file = "greenlet-1.1.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28"}, + {file = "greenlet-1.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5"}, + {file = "greenlet-1.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c5d5b35f789a030ebb95bff352f1d27a93d81069f2adb3182d99882e095cefe"}, + {file = "greenlet-1.1.2-cp37-cp37m-win32.whl", hash = "sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc"}, + {file = "greenlet-1.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06"}, + {file = "greenlet-1.1.2-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0"}, + {file = "greenlet-1.1.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627"}, + {file = "greenlet-1.1.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478"}, + {file = "greenlet-1.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43"}, + {file = "greenlet-1.1.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711"}, + {file = "greenlet-1.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b"}, + {file = "greenlet-1.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2bde6792f313f4e918caabc46532aa64aa27a0db05d75b20edfc5c6f46479de2"}, + {file = "greenlet-1.1.2-cp38-cp38-win32.whl", hash = "sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd"}, + {file = "greenlet-1.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3"}, + {file = "greenlet-1.1.2-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67"}, + {file = "greenlet-1.1.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab"}, + {file = "greenlet-1.1.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5"}, + {file = "greenlet-1.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88"}, + {file = "greenlet-1.1.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b"}, + {file = "greenlet-1.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3"}, + {file = "greenlet-1.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0051c6f1f27cb756ffc0ffbac7d2cd48cb0362ac1736871399a739b2885134d3"}, + {file = "greenlet-1.1.2-cp39-cp39-win32.whl", hash = "sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf"}, + {file = "greenlet-1.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd"}, + {file = "greenlet-1.1.2.tar.gz", hash = "sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a"}, +] importlib-metadata = [ {file = "importlib_metadata-4.11.3-py3-none-any.whl", hash = "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6"}, {file = "importlib_metadata-4.11.3.tar.gz", hash = "sha256:ea4c597ebf37142f827b8f39299579e31685c31d3a438b59f469406afd0f2539"}, @@ -532,6 +633,44 @@ six = [ {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, ] +sqlalchemy = [ + {file = "SQLAlchemy-1.4.39-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:4770eb3ba69ec5fa41c681a75e53e0e342ac24c1f9220d883458b5596888e43a"}, + {file = "SQLAlchemy-1.4.39-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:752ef2e8dbaa3c5d419f322e3632f00ba6b1c3230f65bc97c2ff5c5c6c08f441"}, + {file = "SQLAlchemy-1.4.39-cp27-cp27m-win32.whl", hash = "sha256:b30e70f1594ee3c8902978fd71900d7312453922827c4ce0012fa6a8278d6df4"}, + {file = "SQLAlchemy-1.4.39-cp27-cp27m-win_amd64.whl", hash = "sha256:864d4f89f054819cb95e93100b7d251e4d114d1c60bc7576db07b046432af280"}, + {file = "SQLAlchemy-1.4.39-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:8f901be74f00a13bf375241a778455ee864c2c21c79154aad196b7a994e1144f"}, + {file = "SQLAlchemy-1.4.39-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:1745987ada1890b0e7978abdb22c133eca2e89ab98dc17939042240063e1ef21"}, + {file = "SQLAlchemy-1.4.39-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ede13a472caa85a13abe5095e71676af985d7690eaa8461aeac5c74f6600b6c0"}, + {file = "SQLAlchemy-1.4.39-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7f13644b15665f7322f9e0635129e0ef2098409484df67fcd225d954c5861559"}, + {file = "SQLAlchemy-1.4.39-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26146c59576dfe9c546c9f45397a7c7c4a90c25679492ff610a7500afc7d03a6"}, + {file = "SQLAlchemy-1.4.39-cp310-cp310-win32.whl", hash = "sha256:91d2b89bb0c302f89e753bea008936acfa4e18c156fb264fe41eb6bbb2bbcdeb"}, + {file = "SQLAlchemy-1.4.39-cp310-cp310-win_amd64.whl", hash = "sha256:50e7569637e2e02253295527ff34666706dbb2bc5f6c61a5a7f44b9610c9bb09"}, + {file = "SQLAlchemy-1.4.39-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:107df519eb33d7f8e0d0d052128af2f25066c1a0f6b648fd1a9612ab66800b86"}, + {file = "SQLAlchemy-1.4.39-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f24d4d6ec301688c59b0c4bb1c1c94c5d0bff4ecad33bb8f5d9efdfb8d8bc925"}, + {file = "SQLAlchemy-1.4.39-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7b2785dd2a0c044a36836857ac27310dc7a99166253551ee8f5408930958cc60"}, + {file = "SQLAlchemy-1.4.39-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6e2c8581c6620136b9530137954a8376efffd57fe19802182c7561b0ab48b48"}, + {file = "SQLAlchemy-1.4.39-cp36-cp36m-win32.whl", hash = "sha256:fbc076f79d830ae4c9d49926180a1140b49fa675d0f0d555b44c9a15b29f4c80"}, + {file = "SQLAlchemy-1.4.39-cp36-cp36m-win_amd64.whl", hash = "sha256:0ec54460475f0c42512895c99c63d90dd2d9cbd0c13491a184182e85074b04c5"}, + {file = "SQLAlchemy-1.4.39-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:6f95706da857e6e79b54c33c1214f5467aab10600aa508ddd1239d5df271986e"}, + {file = "SQLAlchemy-1.4.39-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:621f050e72cc7dfd9ad4594ff0abeaad954d6e4a2891545e8f1a53dcdfbef445"}, + {file = "SQLAlchemy-1.4.39-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05a05771617bfa723ba4cef58d5b25ac028b0d68f28f403edebed5b8243b3a87"}, + {file = "SQLAlchemy-1.4.39-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20bf65bcce65c538e68d5df27402b39341fabeecf01de7e0e72b9d9836c13c52"}, + {file = "SQLAlchemy-1.4.39-cp37-cp37m-win32.whl", hash = "sha256:f2a42acc01568b9701665e85562bbff78ec3e21981c7d51d56717c22e5d3d58b"}, + {file = "SQLAlchemy-1.4.39-cp37-cp37m-win_amd64.whl", hash = "sha256:6d81de54e45f1d756785405c9d06cd17918c2eecc2d4262dc2d276ca612c2f61"}, + {file = "SQLAlchemy-1.4.39-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:5c2d19bfb33262bf987ef0062345efd0f54c4189c2d95159c72995457bf4a359"}, + {file = "SQLAlchemy-1.4.39-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14ea8ff2d33c48f8e6c3c472111d893b9e356284d1482102da9678195e5a8eac"}, + {file = "SQLAlchemy-1.4.39-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ec3985c883d6d217cf2013028afc6e3c82b8907192ba6195d6e49885bfc4b19d"}, + {file = "SQLAlchemy-1.4.39-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1962dfee37b7fb17d3d4889bf84c4ea08b1c36707194c578f61e6e06d12ab90f"}, + {file = "SQLAlchemy-1.4.39-cp38-cp38-win32.whl", hash = "sha256:047ef5ccd8860f6147b8ac6c45a4bc573d4e030267b45d9a1c47b55962ff0e6f"}, + {file = "SQLAlchemy-1.4.39-cp38-cp38-win_amd64.whl", hash = "sha256:b71be98ef6e180217d1797185c75507060a57ab9cd835653e0112db16a710f0d"}, + {file = "SQLAlchemy-1.4.39-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:365b75938049ae31cf2176efd3d598213ddb9eb883fbc82086efa019a5f649df"}, + {file = "SQLAlchemy-1.4.39-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7a7667d928ba6ee361a3176e1bef6847c1062b37726b33505cc84136f657e0d"}, + {file = "SQLAlchemy-1.4.39-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c6d00cb9da8d0cbfaba18cad046e94b06de6d4d0ffd9d4095a3ad1838af22528"}, + {file = "SQLAlchemy-1.4.39-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0538b66f959771c56ff996d828081908a6a52a47c5548faed4a3d0a027a5368"}, + {file = "SQLAlchemy-1.4.39-cp39-cp39-win32.whl", hash = "sha256:d1f665e50592caf4cad3caed3ed86f93227bffe0680218ccbb293bd5a6734ca8"}, + {file = "SQLAlchemy-1.4.39-cp39-cp39-win_amd64.whl", hash = "sha256:8b773c9974c272aae0fa7e95b576d98d17ee65f69d8644f9b6ffc90ee96b4d19"}, + {file = "SQLAlchemy-1.4.39.tar.gz", hash = "sha256:8194896038753b46b08a0b0ae89a5d80c897fb601dd51e243ed5720f1f155d27"}, +] thrift = [ {file = "thrift-0.13.0.tar.gz", hash = "sha256:9af1c86bf73433afc6010ed376a6c6aca2b54099cc0d61895f640870a9ae7d89"}, ] diff --git a/pyproject.toml b/pyproject.toml index de9160deb..97790a151 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,10 +14,14 @@ thrift = "^0.13.0" pyarrow = "^5.0.0" pandas = "^1.3.0" +[tool.poetry.plugins."sqlalchemy.dialects"] +"databricks.thrift" = "databricks.sqlalchemy:DatabricksDialect" + [tool.poetry.dev-dependencies] pytest = "^7.1.2" mypy = "^0.950" black = "^22.3.0" +SQLAlchemy = "^1.4.39" [build-system] requires = ["poetry-core>=1.0.0"] @@ -28,4 +32,5 @@ ignore_missing_imports = "true" exclude = ['ttypes\.py$', 'TCLIService\.py$'] [tool.black] -exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/' \ No newline at end of file +exclude = '/(\.eggs|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|\.svn|_build|buck-out|build|dist|thrift_api)/' + diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..c28f7e134 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,4 @@ +[sqla_testing] + +requirement_cls = databricks.sqlalchemy.requirements:Requirements +profile_file = tests/sqlalchemy/profiles.txt \ No newline at end of file diff --git a/src/databricks/sqlalchemy/__init__.py b/src/databricks/sqlalchemy/__init__.py new file mode 100644 index 000000000..22f87c3e5 --- /dev/null +++ b/src/databricks/sqlalchemy/__init__.py @@ -0,0 +1 @@ +from databricks.sqlalchemy.dialect import DatabricksDialect \ No newline at end of file diff --git a/src/databricks/sqlalchemy/dialect.py b/src/databricks/sqlalchemy/dialect.py new file mode 100644 index 000000000..cbe8afa21 --- /dev/null +++ b/src/databricks/sqlalchemy/dialect.py @@ -0,0 +1,45 @@ +from databricks import sql +from typing import AnyStr + +from sqlalchemy import types +from sqlalchemy import util + +from sqlalchemy.engine import default + + +class DatabricksDialect(default.DefaultDialect): + + # Possible attributes are defined here: https://docs.sqlalchemy.org/en/14/core/internals.html#sqlalchemy.engine.Dialect + name: str = "databricks" + driver: str= "thrift" + default_schema_name: str = "default" + + @classmethod + def dbapi(cls): + return sql + + def create_connect_args(self, url): + # Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/*** + + kwargs = { + "server_hostname": url.host, + "access_token": url.password, + "http_path": url.query.get("http_path") + } + + return [], kwargs + + def get_table_names(self, *args, **kwargs): + + # TODO: Implement with native driver `.tables()` call + return super().get_table_names(*args, **kwargs) + + def get_columns(self, *args, **kwargs): + + # TODO: Implement with native driver `.columns()` call + + return super().get_columns(*args, **kwargs) + + def do_rollback(self, dbapi_connection): + # Databricks SQL Does not support transaction + pass diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py new file mode 100644 index 000000000..6ce986887 --- /dev/null +++ b/src/databricks/sqlalchemy/requirements.py @@ -0,0 +1,18 @@ +# Following official SQLAlchemy guide: +# +# https://github.com/sqlalchemy/sqlalchemy/blob/main/README.dialects.rst#dialect-layout +# +# The full group of requirements is available here: +# +# https://github.com/sqlalchemy/sqlalchemy/blob/a453256afc334acabee25ec275de555ef7287144/test/requirements.py + + +from sqlalchemy.testing.requirements import SuiteRequirements +from sqlalchemy.testing import exclusions + +class Requirements(SuiteRequirements): + + @property + def two_phase_transactions(self): + # Databricks SQL doesn't support transactions + return exclusions.closed() diff --git a/tests/sqlalchemy/conftest.py b/tests/sqlalchemy/conftest.py new file mode 100644 index 000000000..d95fcd2c8 --- /dev/null +++ b/tests/sqlalchemy/conftest.py @@ -0,0 +1,7 @@ +from sqlalchemy.dialects import registry +import pytest + +registry.register("databricks.thrift", "databricks.sqlalchemy", "DatabricksDialect") +pytest.register_assert_rewrite("sqlalchemy.testing.assertions") + +from sqlalchemy.testing.plugin.pytestplugin import * \ No newline at end of file diff --git a/tests/sqlalchemy/test_suite.py b/tests/sqlalchemy/test_suite.py new file mode 100644 index 000000000..e6d642ced --- /dev/null +++ b/tests/sqlalchemy/test_suite.py @@ -0,0 +1,2 @@ +from sqlalchemy.testing.suite import * + From e13fb3b0e2d279713f5d36a5881872ef8784f155 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 12 Jul 2022 17:18:48 -0700 Subject: [PATCH 02/30] barebone (& non-working) DatabricksDialect implementation -a bare-bone dialect styled after PyHive and Impyla -includes minimal/stubs for DatabricksIdentifierPreparer, DatabricksExecutionContext, DatabricksTypeCompiler, & DatabricksCompiler --- poetry.lock | 271 ++++++--------------------- pyproject.toml | 1 + src/databricks/sqlalchemy/dialect.py | 196 +++++++++++++++++-- tests/sqlalchemy/test_suite.py | 51 +++++ 4 files changed, 290 insertions(+), 229 deletions(-) diff --git a/poetry.lock b/poetry.lock index 21c9c4d68..1ce4fe65c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -63,6 +63,14 @@ category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +[[package]] +name = "future" +version = "0.18.2" +description = "Clean single-source support for Python 3 and 2" +category = "main" +optional = false +python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" + [[package]] name = "greenlet" version = "1.1.2" @@ -220,6 +228,25 @@ python-versions = ">=3.6" [package.dependencies] numpy = ">=1.16.6" +[[package]] +name = "pyhive" +version = "0.6.5" +description = "Python interface to Hive" +category = "main" +optional = false +python-versions = "*" + +[package.dependencies] +future = "*" +python-dateutil = "*" + +[package.extras] +hive = ["sasl (>=0.2.1)", "thrift (>=0.10.0)", "thrift_sasl (>=0.1.0)"] +kerberos = ["requests_kerberos (>=0.12.0)"] +presto = ["requests (>=1.0.0)"] +sqlalchemy = ["sqlalchemy (>=1.3.0)"] +trino = ["requests (>=1.0.0)"] + [[package]] name = "pyparsing" version = "3.0.9" @@ -368,17 +395,11 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.7.1" -content-hash = "46fe2288362fc103abfdcd49c9dce356736b9ea6758d57b5d7fed173e2b9ceb5" +content-hash = "fd3612b808472d7c5ada95fde014214d9a1a25a7a457f6d2ee933a2ff7398894" [metadata.files] -atomicwrites = [ - {file = "atomicwrites-1.4.0-py2.py3-none-any.whl", hash = "sha256:6d1784dea7c0c8d4a5172b6c620f40b6e4cbfdf96d783691f2e1302a7b88e197"}, - {file = "atomicwrites-1.4.0.tar.gz", hash = "sha256:ae70396ad1a434f9c7046fd2dd196fc04b12f9e91ffb859164193be8b6168a7a"}, -] -attrs = [ - {file = "attrs-21.4.0-py2.py3-none-any.whl", hash = "sha256:2d27e3784d7a565d36ab851fe94887c5eccd6a463168875832a1be79c82828b4"}, - {file = "attrs-21.4.0.tar.gz", hash = "sha256:626ba8234211db98e869df76230a137c4c40a12d72445c45d5f5b716f076e2fd"}, -] +atomicwrites = [] +attrs = [] black = [ {file = "black-22.3.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2497f9c2386572e28921fa8bec7be3e51de6801f7459dffd6e62492531c47e09"}, {file = "black-22.3.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5795a0375eb87bfe902e80e0c8cfaedf8af4d49694d69161e5bd3206c18618bb"}, @@ -404,79 +425,20 @@ black = [ {file = "black-22.3.0-py3-none-any.whl", hash = "sha256:bc58025940a896d7e5356952228b68f793cf5fcb342be703c3a2669a1488cb72"}, {file = "black-22.3.0.tar.gz", hash = "sha256:35020b8886c022ced9282b51b5a875b6d1ab0c387b31a065b84db7c33085ca79"}, ] -click = [ - {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, - {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, -] +click = [] colorama = [ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, ] -greenlet = [ - {file = "greenlet-1.1.2-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:58df5c2a0e293bf665a51f8a100d3e9956febfbf1d9aaf8c0677cf70218910c6"}, - {file = "greenlet-1.1.2-cp27-cp27m-manylinux1_x86_64.whl", hash = "sha256:aec52725173bd3a7b56fe91bc56eccb26fbdff1386ef123abb63c84c5b43b63a"}, - {file = "greenlet-1.1.2-cp27-cp27m-manylinux2010_x86_64.whl", hash = "sha256:833e1551925ed51e6b44c800e71e77dacd7e49181fdc9ac9a0bf3714d515785d"}, - {file = "greenlet-1.1.2-cp27-cp27m-win32.whl", hash = "sha256:aa5b467f15e78b82257319aebc78dd2915e4c1436c3c0d1ad6f53e47ba6e2713"}, - {file = "greenlet-1.1.2-cp27-cp27m-win_amd64.whl", hash = "sha256:40b951f601af999a8bf2ce8c71e8aaa4e8c6f78ff8afae7b808aae2dc50d4c40"}, - {file = "greenlet-1.1.2-cp27-cp27mu-manylinux1_x86_64.whl", hash = "sha256:95e69877983ea39b7303570fa6760f81a3eec23d0e3ab2021b7144b94d06202d"}, - {file = "greenlet-1.1.2-cp27-cp27mu-manylinux2010_x86_64.whl", hash = "sha256:356b3576ad078c89a6107caa9c50cc14e98e3a6c4874a37c3e0273e4baf33de8"}, - {file = "greenlet-1.1.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:8639cadfda96737427330a094476d4c7a56ac03de7265622fcf4cfe57c8ae18d"}, - {file = "greenlet-1.1.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:97e5306482182170ade15c4b0d8386ded995a07d7cc2ca8f27958d34d6736497"}, - {file = "greenlet-1.1.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e6a36bb9474218c7a5b27ae476035497a6990e21d04c279884eb10d9b290f1b1"}, - {file = "greenlet-1.1.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:abb7a75ed8b968f3061327c433a0fbd17b729947b400747c334a9c29a9af6c58"}, - {file = "greenlet-1.1.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:b336501a05e13b616ef81ce329c0e09ac5ed8c732d9ba7e3e983fcc1a9e86965"}, - {file = "greenlet-1.1.2-cp310-cp310-win_amd64.whl", hash = "sha256:14d4f3cd4e8b524ae9b8aa567858beed70c392fdec26dbdb0a8a418392e71708"}, - {file = "greenlet-1.1.2-cp35-cp35m-macosx_10_14_x86_64.whl", hash = "sha256:17ff94e7a83aa8671a25bf5b59326ec26da379ace2ebc4411d690d80a7fbcf23"}, - {file = "greenlet-1.1.2-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:9f3cba480d3deb69f6ee2c1825060177a22c7826431458c697df88e6aeb3caee"}, - {file = "greenlet-1.1.2-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:fa877ca7f6b48054f847b61d6fa7bed5cebb663ebc55e018fda12db09dcc664c"}, - {file = "greenlet-1.1.2-cp35-cp35m-win32.whl", hash = "sha256:7cbd7574ce8e138bda9df4efc6bf2ab8572c9aff640d8ecfece1b006b68da963"}, - {file = "greenlet-1.1.2-cp35-cp35m-win_amd64.whl", hash = "sha256:903bbd302a2378f984aef528f76d4c9b1748f318fe1294961c072bdc7f2ffa3e"}, - {file = "greenlet-1.1.2-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:049fe7579230e44daef03a259faa24511d10ebfa44f69411d99e6a184fe68073"}, - {file = "greenlet-1.1.2-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:dd0b1e9e891f69e7675ba5c92e28b90eaa045f6ab134ffe70b52e948aa175b3c"}, - {file = "greenlet-1.1.2-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:7418b6bfc7fe3331541b84bb2141c9baf1ec7132a7ecd9f375912eca810e714e"}, - {file = "greenlet-1.1.2-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f9d29ca8a77117315101425ec7ec2a47a22ccf59f5593378fc4077ac5b754fce"}, - {file = "greenlet-1.1.2-cp36-cp36m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:21915eb821a6b3d9d8eefdaf57d6c345b970ad722f856cd71739493ce003ad08"}, - {file = "greenlet-1.1.2-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eff9d20417ff9dcb0d25e2defc2574d10b491bf2e693b4e491914738b7908168"}, - {file = "greenlet-1.1.2-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:b8c008de9d0daba7b6666aa5bbfdc23dcd78cafc33997c9b7741ff6353bafb7f"}, - {file = "greenlet-1.1.2-cp36-cp36m-win32.whl", hash = "sha256:32ca72bbc673adbcfecb935bb3fb1b74e663d10a4b241aaa2f5a75fe1d1f90aa"}, - {file = "greenlet-1.1.2-cp36-cp36m-win_amd64.whl", hash = "sha256:f0214eb2a23b85528310dad848ad2ac58e735612929c8072f6093f3585fd342d"}, - {file = "greenlet-1.1.2-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:b92e29e58bef6d9cfd340c72b04d74c4b4e9f70c9fa7c78b674d1fec18896dc4"}, - {file = "greenlet-1.1.2-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:fdcec0b8399108577ec290f55551d926d9a1fa6cad45882093a7a07ac5ec147b"}, - {file = "greenlet-1.1.2-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:93f81b134a165cc17123626ab8da2e30c0455441d4ab5576eed73a64c025b25c"}, - {file = "greenlet-1.1.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1e12bdc622676ce47ae9abbf455c189e442afdde8818d9da983085df6312e7a1"}, - {file = "greenlet-1.1.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8c790abda465726cfb8bb08bd4ca9a5d0a7bd77c7ac1ca1b839ad823b948ea28"}, - {file = "greenlet-1.1.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f276df9830dba7a333544bd41070e8175762a7ac20350786b322b714b0e654f5"}, - {file = "greenlet-1.1.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c5d5b35f789a030ebb95bff352f1d27a93d81069f2adb3182d99882e095cefe"}, - {file = "greenlet-1.1.2-cp37-cp37m-win32.whl", hash = "sha256:64e6175c2e53195278d7388c454e0b30997573f3f4bd63697f88d855f7a6a1fc"}, - {file = "greenlet-1.1.2-cp37-cp37m-win_amd64.whl", hash = "sha256:b11548073a2213d950c3f671aa88e6f83cda6e2fb97a8b6317b1b5b33d850e06"}, - {file = "greenlet-1.1.2-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:9633b3034d3d901f0a46b7939f8c4d64427dfba6bbc5a36b1a67364cf148a1b0"}, - {file = "greenlet-1.1.2-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:eb6ea6da4c787111adf40f697b4e58732ee0942b5d3bd8f435277643329ba627"}, - {file = "greenlet-1.1.2-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:f3acda1924472472ddd60c29e5b9db0cec629fbe3c5c5accb74d6d6d14773478"}, - {file = "greenlet-1.1.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e859fcb4cbe93504ea18008d1df98dee4f7766db66c435e4882ab35cf70cac43"}, - {file = "greenlet-1.1.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:00e44c8afdbe5467e4f7b5851be223be68adb4272f44696ee71fe46b7036a711"}, - {file = "greenlet-1.1.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec8c433b3ab0419100bd45b47c9c8551248a5aee30ca5e9d399a0b57ac04651b"}, - {file = "greenlet-1.1.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:2bde6792f313f4e918caabc46532aa64aa27a0db05d75b20edfc5c6f46479de2"}, - {file = "greenlet-1.1.2-cp38-cp38-win32.whl", hash = "sha256:288c6a76705dc54fba69fbcb59904ae4ad768b4c768839b8ca5fdadec6dd8cfd"}, - {file = "greenlet-1.1.2-cp38-cp38-win_amd64.whl", hash = "sha256:8d2f1fb53a421b410751887eb4ff21386d119ef9cde3797bf5e7ed49fb51a3b3"}, - {file = "greenlet-1.1.2-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:166eac03e48784a6a6e0e5f041cfebb1ab400b394db188c48b3a84737f505b67"}, - {file = "greenlet-1.1.2-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:572e1787d1460da79590bf44304abbc0a2da944ea64ec549188fa84d89bba7ab"}, - {file = "greenlet-1.1.2-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:be5f425ff1f5f4b3c1e33ad64ab994eed12fc284a6ea71c5243fd564502ecbe5"}, - {file = "greenlet-1.1.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1692f7d6bc45e3200844be0dba153612103db241691088626a33ff1f24a0d88"}, - {file = "greenlet-1.1.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7227b47e73dedaa513cdebb98469705ef0d66eb5a1250144468e9c3097d6b59b"}, - {file = "greenlet-1.1.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ff61ff178250f9bb3cd89752df0f1dd0e27316a8bd1465351652b1b4a4cdfd3"}, - {file = "greenlet-1.1.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:0051c6f1f27cb756ffc0ffbac7d2cd48cb0362ac1736871399a739b2885134d3"}, - {file = "greenlet-1.1.2-cp39-cp39-win32.whl", hash = "sha256:f70a9e237bb792c7cc7e44c531fd48f5897961701cdaa06cf22fc14965c496cf"}, - {file = "greenlet-1.1.2-cp39-cp39-win_amd64.whl", hash = "sha256:013d61294b6cd8fe3242932c1c5e36e5d1db2c8afb58606c5a67efce62c1f5fd"}, - {file = "greenlet-1.1.2.tar.gz", hash = "sha256:e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a"}, +future = [ + {file = "future-0.18.2.tar.gz", hash = "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"}, ] +greenlet = [] importlib-metadata = [ {file = "importlib_metadata-4.11.3-py3-none-any.whl", hash = "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6"}, {file = "importlib_metadata-4.11.3.tar.gz", hash = "sha256:ea4c597ebf37142f827b8f39299579e31685c31d3a438b59f469406afd0f2539"}, ] -iniconfig = [ - {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, - {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, -] +iniconfig = [] mypy = [ {file = "mypy-0.950-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b"}, {file = "mypy-0.950-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0"}, @@ -502,87 +464,14 @@ mypy = [ {file = "mypy-0.950-py3-none-any.whl", hash = "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb"}, {file = "mypy-0.950.tar.gz", hash = "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de"}, ] -mypy-extensions = [ - {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, - {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, -] -numpy = [ - {file = "numpy-1.21.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:38e8648f9449a549a7dfe8d8755a5979b45b3538520d1e735637ef28e8c2dc50"}, - {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:fd7d7409fa643a91d0a05c7554dd68aa9c9bb16e186f6ccfe40d6e003156e33a"}, - {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a75b4498b1e93d8b700282dc8e655b8bd559c0904b3910b144646dbbbc03e062"}, - {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1412aa0aec3e00bc23fbb8664d76552b4efde98fb71f60737c83efbac24112f1"}, - {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e46ceaff65609b5399163de5893d8f2a82d3c77d5e56d976c8b5fb01faa6b671"}, - {file = "numpy-1.21.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:c6a2324085dd52f96498419ba95b5777e40b6bcbc20088fddb9e8cbb58885e8e"}, - {file = "numpy-1.21.1-cp37-cp37m-win32.whl", hash = "sha256:73101b2a1fef16602696d133db402a7e7586654682244344b8329cdcbbb82172"}, - {file = "numpy-1.21.1-cp37-cp37m-win_amd64.whl", hash = "sha256:7a708a79c9a9d26904d1cca8d383bf869edf6f8e7650d85dbc77b041e8c5a0f8"}, - {file = "numpy-1.21.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:95b995d0c413f5d0428b3f880e8fe1660ff9396dcd1f9eedbc311f37b5652e16"}, - {file = "numpy-1.21.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:635e6bd31c9fb3d475c8f44a089569070d10a9ef18ed13738b03049280281267"}, - {file = "numpy-1.21.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4a3d5fb89bfe21be2ef47c0614b9c9c707b7362386c9a3ff1feae63e0267ccb6"}, - {file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a326af80e86d0e9ce92bcc1e65c8ff88297de4fa14ee936cb2293d414c9ec63"}, - {file = "numpy-1.21.1-cp38-cp38-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:791492091744b0fe390a6ce85cc1bf5149968ac7d5f0477288f78c89b385d9af"}, - {file = "numpy-1.21.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0318c465786c1f63ac05d7c4dbcecd4d2d7e13f0959b01b534ea1e92202235c5"}, - {file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a513bd9c1551894ee3d31369f9b07460ef223694098cf27d399513415855b68"}, - {file = "numpy-1.21.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:91c6f5fc58df1e0a3cc0c3a717bb3308ff850abdaa6d2d802573ee2b11f674a8"}, - {file = "numpy-1.21.1-cp38-cp38-win32.whl", hash = "sha256:978010b68e17150db8765355d1ccdd450f9fc916824e8c4e35ee620590e234cd"}, - {file = "numpy-1.21.1-cp38-cp38-win_amd64.whl", hash = "sha256:9749a40a5b22333467f02fe11edc98f022133ee1bfa8ab99bda5e5437b831214"}, - {file = "numpy-1.21.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d7a4aeac3b94af92a9373d6e77b37691b86411f9745190d2c351f410ab3a791f"}, - {file = "numpy-1.21.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:d9e7912a56108aba9b31df688a4c4f5cb0d9d3787386b87d504762b6754fbb1b"}, - {file = "numpy-1.21.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:25b40b98ebdd272bc3020935427a4530b7d60dfbe1ab9381a39147834e985eac"}, - {file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:8a92c5aea763d14ba9d6475803fc7904bda7decc2a0a68153f587ad82941fec1"}, - {file = "numpy-1.21.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05a0f648eb28bae4bcb204e6fd14603de2908de982e761a2fc78efe0f19e96e1"}, - {file = "numpy-1.21.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f01f28075a92eede918b965e86e8f0ba7b7797a95aa8d35e1cc8821f5fc3ad6a"}, - {file = "numpy-1.21.1-cp39-cp39-win32.whl", hash = "sha256:88c0b89ad1cc24a5efbb99ff9ab5db0f9a86e9cc50240177a571fbe9c2860ac2"}, - {file = "numpy-1.21.1-cp39-cp39-win_amd64.whl", hash = "sha256:01721eefe70544d548425a07c80be8377096a54118070b8a62476866d5208e33"}, - {file = "numpy-1.21.1-pp37-pypy37_pp73-manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:2d4d1de6e6fb3d28781c73fbde702ac97f03d79e4ffd6598b880b2d95d62ead4"}, - {file = "numpy-1.21.1.zip", hash = "sha256:dff4af63638afcc57a3dfb9e4b26d434a7a602d225b42d746ea7fe2edf1342fd"}, -] -packaging = [ - {file = "packaging-21.3-py3-none-any.whl", hash = "sha256:ef103e05f519cdc783ae24ea4e2e0f508a9c99b2d4969652eed6a2e1ea5bd522"}, - {file = "packaging-21.3.tar.gz", hash = "sha256:dd47c42927d89ab911e606518907cc2d3a1f38bbd026385970643f9c5b8ecfeb"}, -] -pandas = [ - {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:62d5b5ce965bae78f12c1c0df0d387899dd4211ec0bdc52822373f13a3a022b9"}, - {file = "pandas-1.3.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:adfeb11be2d54f275142c8ba9bf67acee771b7186a5745249c7d5a06c670136b"}, - {file = "pandas-1.3.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:60a8c055d58873ad81cae290d974d13dd479b82cbb975c3e1fa2cf1920715296"}, - {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fd541ab09e1f80a2a1760032d665f6e032d8e44055d602d65eeea6e6e85498cb"}, - {file = "pandas-1.3.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2651d75b9a167cc8cc572cf787ab512d16e316ae00ba81874b560586fa1325e0"}, - {file = "pandas-1.3.5-cp310-cp310-win_amd64.whl", hash = "sha256:aaf183a615ad790801fa3cf2fa450e5b6d23a54684fe386f7e3208f8b9bfbef6"}, - {file = "pandas-1.3.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:344295811e67f8200de2390093aeb3c8309f5648951b684d8db7eee7d1c81fb7"}, - {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:552020bf83b7f9033b57cbae65589c01e7ef1544416122da0c79140c93288f56"}, - {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5cce0c6bbeb266b0e39e35176ee615ce3585233092f685b6a82362523e59e5b4"}, - {file = "pandas-1.3.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d28a3c65463fd0d0ba8bbb7696b23073efee0510783340a44b08f5e96ffce0c"}, - {file = "pandas-1.3.5-cp37-cp37m-win32.whl", hash = "sha256:a62949c626dd0ef7de11de34b44c6475db76995c2064e2d99c6498c3dba7fe58"}, - {file = "pandas-1.3.5-cp37-cp37m-win_amd64.whl", hash = "sha256:8025750767e138320b15ca16d70d5cdc1886e8f9cc56652d89735c016cd8aea6"}, - {file = "pandas-1.3.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fe95bae4e2d579812865db2212bb733144e34d0c6785c0685329e5b60fcb85dd"}, - {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5f261553a1e9c65b7a310302b9dbac31cf0049a51695c14ebe04e4bfd4a96f02"}, - {file = "pandas-1.3.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b6dbec5f3e6d5dc80dcfee250e0a2a652b3f28663492f7dab9a24416a48ac39"}, - {file = "pandas-1.3.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3bc49af96cd6285030a64779de5b3688633a07eb75c124b0747134a63f4c05f"}, - {file = "pandas-1.3.5-cp38-cp38-win32.whl", hash = "sha256:b6b87b2fb39e6383ca28e2829cddef1d9fc9e27e55ad91ca9c435572cdba51bf"}, - {file = "pandas-1.3.5-cp38-cp38-win_amd64.whl", hash = "sha256:a395692046fd8ce1edb4c6295c35184ae0c2bbe787ecbe384251da609e27edcb"}, - {file = "pandas-1.3.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bd971a3f08b745a75a86c00b97f3007c2ea175951286cdda6abe543e687e5f2f"}, - {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:37f06b59e5bc05711a518aa10beaec10942188dccb48918bb5ae602ccbc9f1a0"}, - {file = "pandas-1.3.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2c21778a688d3712d35710501f8001cdbf96eb70a7c587a3d5613573299fdca6"}, - {file = "pandas-1.3.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3345343206546545bc26a05b4602b6a24385b5ec7c75cb6059599e3d56831da2"}, - {file = "pandas-1.3.5-cp39-cp39-win32.whl", hash = "sha256:c69406a2808ba6cf580c2255bcf260b3f214d2664a3a4197d0e640f573b46fd3"}, - {file = "pandas-1.3.5-cp39-cp39-win_amd64.whl", hash = "sha256:32e1a26d5ade11b547721a72f9bfc4bd113396947606e00d5b4a5b79b3dcb006"}, - {file = "pandas-1.3.5.tar.gz", hash = "sha256:1e4285f5de1012de20ca46b188ccf33521bff61ba5c5ebd78b4fb28e5416a9f1"}, -] -pathspec = [ - {file = "pathspec-0.9.0-py2.py3-none-any.whl", hash = "sha256:7d15c4ddb0b5c802d161efc417ec1a2558ea2653c2e8ad9c19098201dc1c993a"}, - {file = "pathspec-0.9.0.tar.gz", hash = "sha256:e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"}, -] -platformdirs = [ - {file = "platformdirs-2.5.2-py3-none-any.whl", hash = "sha256:027d8e83a2d7de06bbac4e5ef7e023c02b863d7ea5d079477e722bb41ab25788"}, - {file = "platformdirs-2.5.2.tar.gz", hash = "sha256:58c8abb07dcb441e6ee4b11d8df0ac856038f944ab98b7be6b27b2a3c7feef19"}, -] -pluggy = [ - {file = "pluggy-1.0.0-py2.py3-none-any.whl", hash = "sha256:74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3"}, - {file = "pluggy-1.0.0.tar.gz", hash = "sha256:4224373bacce55f955a878bf9cfa763c1e360858e330072059e10bad68531159"}, -] -py = [ - {file = "py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378"}, - {file = "py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719"}, -] +mypy-extensions = [] +numpy = [] +packaging = [] +pandas = [] +pathspec = [] +platformdirs = [] +pluggy = [] +py = [] pyarrow = [ {file = "pyarrow-5.0.0-cp36-cp36m-macosx_10_13_x86_64.whl", hash = "sha256:e9ec80f4a77057498cf4c5965389e42e7f6a618b6859e6dd615e57505c9167a6"}, {file = "pyarrow-5.0.0-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:b1453c2411b5062ba6bf6832dbc4df211ad625f678c623a2ee177aee158f199b"}, @@ -613,71 +502,20 @@ pyarrow = [ {file = "pyarrow-5.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:4d8adda1892ef4553c4804af7f67cce484f4d6371564e2d8374b8e2bc85293e2"}, {file = "pyarrow-5.0.0.tar.gz", hash = "sha256:24e64ea33eed07441cc0e80c949e3a1b48211a1add8953268391d250f4d39922"}, ] -pyparsing = [ - {file = "pyparsing-3.0.9-py3-none-any.whl", hash = "sha256:5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc"}, - {file = "pyparsing-3.0.9.tar.gz", hash = "sha256:2b020ecf7d21b687f219b71ecad3631f644a47f01403fa1d1036b0c6416d70fb"}, -] -pytest = [ - {file = "pytest-7.1.2-py3-none-any.whl", hash = "sha256:13d0e3ccfc2b6e26be000cb6568c832ba67ba32e719443bfe725814d3c42433c"}, - {file = "pytest-7.1.2.tar.gz", hash = "sha256:a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"}, +pyhive = [ + {file = "PyHive-0.6.5.tar.gz", hash = "sha256:cae07bd177527d04f6a5c7f96cb1849ba8bd9121750b75bbf5e3d4a3be566909"}, ] +pyparsing = [] +pytest = [] python-dateutil = [ {file = "python-dateutil-2.8.2.tar.gz", hash = "sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86"}, {file = "python_dateutil-2.8.2-py2.py3-none-any.whl", hash = "sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9"}, ] -pytz = [ - {file = "pytz-2022.1-py2.py3-none-any.whl", hash = "sha256:e68985985296d9a66a881eb3193b0906246245294a881e7c8afe623866ac6a5c"}, - {file = "pytz-2022.1.tar.gz", hash = "sha256:1e760e2fe6a8163bc0b3d9a19c4f84342afa0a2affebfaa84b01b978a02ecaa7"}, -] -six = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, -] -sqlalchemy = [ - {file = "SQLAlchemy-1.4.39-cp27-cp27m-macosx_10_14_x86_64.whl", hash = "sha256:4770eb3ba69ec5fa41c681a75e53e0e342ac24c1f9220d883458b5596888e43a"}, - {file = "SQLAlchemy-1.4.39-cp27-cp27m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:752ef2e8dbaa3c5d419f322e3632f00ba6b1c3230f65bc97c2ff5c5c6c08f441"}, - {file = "SQLAlchemy-1.4.39-cp27-cp27m-win32.whl", hash = "sha256:b30e70f1594ee3c8902978fd71900d7312453922827c4ce0012fa6a8278d6df4"}, - {file = "SQLAlchemy-1.4.39-cp27-cp27m-win_amd64.whl", hash = "sha256:864d4f89f054819cb95e93100b7d251e4d114d1c60bc7576db07b046432af280"}, - {file = "SQLAlchemy-1.4.39-cp27-cp27mu-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:8f901be74f00a13bf375241a778455ee864c2c21c79154aad196b7a994e1144f"}, - {file = "SQLAlchemy-1.4.39-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:1745987ada1890b0e7978abdb22c133eca2e89ab98dc17939042240063e1ef21"}, - {file = "SQLAlchemy-1.4.39-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ede13a472caa85a13abe5095e71676af985d7690eaa8461aeac5c74f6600b6c0"}, - {file = "SQLAlchemy-1.4.39-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7f13644b15665f7322f9e0635129e0ef2098409484df67fcd225d954c5861559"}, - {file = "SQLAlchemy-1.4.39-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:26146c59576dfe9c546c9f45397a7c7c4a90c25679492ff610a7500afc7d03a6"}, - {file = "SQLAlchemy-1.4.39-cp310-cp310-win32.whl", hash = "sha256:91d2b89bb0c302f89e753bea008936acfa4e18c156fb264fe41eb6bbb2bbcdeb"}, - {file = "SQLAlchemy-1.4.39-cp310-cp310-win_amd64.whl", hash = "sha256:50e7569637e2e02253295527ff34666706dbb2bc5f6c61a5a7f44b9610c9bb09"}, - {file = "SQLAlchemy-1.4.39-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:107df519eb33d7f8e0d0d052128af2f25066c1a0f6b648fd1a9612ab66800b86"}, - {file = "SQLAlchemy-1.4.39-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f24d4d6ec301688c59b0c4bb1c1c94c5d0bff4ecad33bb8f5d9efdfb8d8bc925"}, - {file = "SQLAlchemy-1.4.39-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:7b2785dd2a0c044a36836857ac27310dc7a99166253551ee8f5408930958cc60"}, - {file = "SQLAlchemy-1.4.39-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e6e2c8581c6620136b9530137954a8376efffd57fe19802182c7561b0ab48b48"}, - {file = "SQLAlchemy-1.4.39-cp36-cp36m-win32.whl", hash = "sha256:fbc076f79d830ae4c9d49926180a1140b49fa675d0f0d555b44c9a15b29f4c80"}, - {file = "SQLAlchemy-1.4.39-cp36-cp36m-win_amd64.whl", hash = "sha256:0ec54460475f0c42512895c99c63d90dd2d9cbd0c13491a184182e85074b04c5"}, - {file = "SQLAlchemy-1.4.39-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:6f95706da857e6e79b54c33c1214f5467aab10600aa508ddd1239d5df271986e"}, - {file = "SQLAlchemy-1.4.39-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:621f050e72cc7dfd9ad4594ff0abeaad954d6e4a2891545e8f1a53dcdfbef445"}, - {file = "SQLAlchemy-1.4.39-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:05a05771617bfa723ba4cef58d5b25ac028b0d68f28f403edebed5b8243b3a87"}, - {file = "SQLAlchemy-1.4.39-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20bf65bcce65c538e68d5df27402b39341fabeecf01de7e0e72b9d9836c13c52"}, - {file = "SQLAlchemy-1.4.39-cp37-cp37m-win32.whl", hash = "sha256:f2a42acc01568b9701665e85562bbff78ec3e21981c7d51d56717c22e5d3d58b"}, - {file = "SQLAlchemy-1.4.39-cp37-cp37m-win_amd64.whl", hash = "sha256:6d81de54e45f1d756785405c9d06cd17918c2eecc2d4262dc2d276ca612c2f61"}, - {file = "SQLAlchemy-1.4.39-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:5c2d19bfb33262bf987ef0062345efd0f54c4189c2d95159c72995457bf4a359"}, - {file = "SQLAlchemy-1.4.39-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:14ea8ff2d33c48f8e6c3c472111d893b9e356284d1482102da9678195e5a8eac"}, - {file = "SQLAlchemy-1.4.39-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ec3985c883d6d217cf2013028afc6e3c82b8907192ba6195d6e49885bfc4b19d"}, - {file = "SQLAlchemy-1.4.39-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1962dfee37b7fb17d3d4889bf84c4ea08b1c36707194c578f61e6e06d12ab90f"}, - {file = "SQLAlchemy-1.4.39-cp38-cp38-win32.whl", hash = "sha256:047ef5ccd8860f6147b8ac6c45a4bc573d4e030267b45d9a1c47b55962ff0e6f"}, - {file = "SQLAlchemy-1.4.39-cp38-cp38-win_amd64.whl", hash = "sha256:b71be98ef6e180217d1797185c75507060a57ab9cd835653e0112db16a710f0d"}, - {file = "SQLAlchemy-1.4.39-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:365b75938049ae31cf2176efd3d598213ddb9eb883fbc82086efa019a5f649df"}, - {file = "SQLAlchemy-1.4.39-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e7a7667d928ba6ee361a3176e1bef6847c1062b37726b33505cc84136f657e0d"}, - {file = "SQLAlchemy-1.4.39-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:c6d00cb9da8d0cbfaba18cad046e94b06de6d4d0ffd9d4095a3ad1838af22528"}, - {file = "SQLAlchemy-1.4.39-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0538b66f959771c56ff996d828081908a6a52a47c5548faed4a3d0a027a5368"}, - {file = "SQLAlchemy-1.4.39-cp39-cp39-win32.whl", hash = "sha256:d1f665e50592caf4cad3caed3ed86f93227bffe0680218ccbb293bd5a6734ca8"}, - {file = "SQLAlchemy-1.4.39-cp39-cp39-win_amd64.whl", hash = "sha256:8b773c9974c272aae0fa7e95b576d98d17ee65f69d8644f9b6ffc90ee96b4d19"}, - {file = "SQLAlchemy-1.4.39.tar.gz", hash = "sha256:8194896038753b46b08a0b0ae89a5d80c897fb601dd51e243ed5720f1f155d27"}, -] -thrift = [ - {file = "thrift-0.13.0.tar.gz", hash = "sha256:9af1c86bf73433afc6010ed376a6c6aca2b54099cc0d61895f640870a9ae7d89"}, -] -tomli = [ - {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"}, - {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"}, -] +pytz = [] +six = [] +sqlalchemy = [] +thrift = [] +tomli = [] typed-ast = [ {file = "typed_ast-1.5.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ad3b48cf2b487be140072fb86feff36801487d4abb7382bb1929aaac80638ea"}, {file = "typed_ast-1.5.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:542cd732351ba8235f20faa0fc7398946fe1a57f2cdb289e5497e1e7f48cfedb"}, @@ -708,7 +546,4 @@ typing-extensions = [ {file = "typing_extensions-4.2.0-py3-none-any.whl", hash = "sha256:6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708"}, {file = "typing_extensions-4.2.0.tar.gz", hash = "sha256:f1c24655a0da0d1b67f07e17a5e6b2a105894e6824b92096378bb3668ef02376"}, ] -zipp = [ - {file = "zipp-3.8.0-py3-none-any.whl", hash = "sha256:c4f6e5bbf48e74f7a38e7cc5b0480ff42b0ae5178957d564d18932525d5cf099"}, - {file = "zipp-3.8.0.tar.gz", hash = "sha256:56bf8aadb83c24db6c4b577e13de374ccfb67da2078beba1d037c17980bf43ad"}, -] +zipp = [] diff --git a/pyproject.toml b/pyproject.toml index 97790a151..74defa731 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ python = "^3.7.1" thrift = "^0.13.0" pyarrow = "^5.0.0" pandas = "^1.3.0" +PyHive = "^0.6.5" [tool.poetry.plugins."sqlalchemy.dialects"] "databricks.thrift" = "databricks.sqlalchemy:DatabricksDialect" diff --git a/src/databricks/sqlalchemy/dialect.py b/src/databricks/sqlalchemy/dialect.py index cbe8afa21..e504fd985 100644 --- a/src/databricks/sqlalchemy/dialect.py +++ b/src/databricks/sqlalchemy/dialect.py @@ -1,26 +1,134 @@ from databricks import sql -from typing import AnyStr +from databricks import sql as dbsql + +# cribbing from Hive +from pyhive.sqlalchemy_hive import HiveExecutionContext, HiveIdentifierPreparer, HiveCompiler, HiveTypeCompiler +from pyhive.sqlalchemy_hive import _type_map + +import re from sqlalchemy import types from sqlalchemy import util -from sqlalchemy.engine import default +from sqlalchemy.engine import default, interfaces +from sqlalchemy.sql import compiler +from typing import AnyStr -class DatabricksDialect(default.DefaultDialect): +class DatabricksIdentifierPreparer(compiler.IdentifierPreparer): + # SparkSQL identifier specification: + # ref: https://spark.apache.org/docs/latest/sql-ref-identifier.html + + legal_characters = re.compile(r'^[A-Z0-9_]+$', re.I) + + def __init__(self, dialect): + super(DatabricksIdentifierPreparer, self).__init__( + dialect, + initial_quote='`', + ) + + +class DatabricksExecutionContext(default.DefaultExecutionContext): + # There doesn't seem to be any override of DefaultExecutionContext required + # but I will nonetheless introduce this class for clarity + + # TODO: revisit server-side cursors + # ref: https://docs.databricks.com/dev-tools/python-sql-connector.html#manage-cursors-and-connections + pass + + +class DatabricksTypeCompiler(compiler.GenericTypeCompiler): + # ref: https://spark.apache.org/docs/latest/sql-ref-datatypes.html + + def visit_TINYINT(self, type_): + return 'TINYINT' + + def visit_SMALLINT(self, type_, **kw): + return "SMALLINT" + + def visit_INTEGER(self, type_, **kw): + return "INT" + + def visit_BIGINT(self, type_, **kw): + return "BIGINT" + + + def visit_FLOAT(self, type_, **kw): + return "FLOAT" + + def visit_DOUBLE(self, type_, **kw): + return "DOUBLE" + + + def visit_DECIMAL(self, type_, **kw): + if type_.precision is None: + return "DECIMAL" + elif type_.scale is None: + return "DECIMAL(%(precision)s)" % {"precision": type_.precision} + else: + return "DECIMAL(%(precision)s, %(scale)s)" % { + "precision": type_.precision, + "scale": type_.scale, + } + + + def visit_DATE(self, type_, **kw): + return "DATE" + + def visit_TIMESTAMP(self, type_, **kw): + return "TIMESTAMP" + + + def visit_BOOLEAN(self, type_, **kw): + return "BOOLEAN" + + def visit_STRING(self, type_, **kw): + return "STRING" + + +class DatabricksCompiler(compiler.SQLCompiler): + # stub + pass + + + +class DatabricksDialect(default.DefaultDialect): # Possible attributes are defined here: https://docs.sqlalchemy.org/en/14/core/internals.html#sqlalchemy.engine.Dialect name: str = "databricks" driver: str= "thrift" default_schema_name: str = "default" + preparer = DatabricksIdentifierPreparer + execution_ctx_cls = DatabricksExecutionContext + statement_compiler = DatabricksCompiler + type_compiler = DatabricksTypeCompiler + + # the following attributes are cribbed from HiveDialect: + supports_views = False + supports_alter = True + supports_pk_autoincrement = False + supports_default_values = False + supports_empty_insert = False + supports_native_decimal = True + supports_native_boolean = True + supports_unicode_statements = True + supports_unicode_binds = True + returns_unicode_strings = True + description_encoding = None + supports_multivalues_insert = True + supports_sane_rowcount = False + + @classmethod def dbapi(cls): return sql - def create_connect_args(self, url): + def create_connect_args(self, url: "URL"): # Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/*** + # TODO: add schema in + kwargs = { "server_hostname": url.host, "access_token": url.password, @@ -29,17 +137,83 @@ def create_connect_args(self, url): return [], kwargs - def get_table_names(self, *args, **kwargs): + # def initialize(self, connection) -> None: + # super().initialize(connection) + + def get_schema_names(self, connection, **kw): + connection = dbsql.connect( + server_hostname=kwargs['server_hostname'], + http_path=kwargs['http_path'], + access_token=kwargs['access_token'], + schema='default' + ) + TABLE_SCHEM = 2 + with connection.cursor() as cur: + data = cur.schemas(catalog_name='%').fetchall() + _schemas = [i[TABLE_SCHEM] for i in data] + + return _schemas + + def get_table_names(self, connection, schema = None, **kw): + # TODO: can initialize() take care of shared initialization? + connection = dbsql.connect( + server_hostname=kwargs['server_hostname'], + http_path=kwargs['http_path'], + access_token=kwargs['access_token'], + schema='default' + ) + + breakpoint() + + TABLE_NAME = 2 + with connection.cursor() as cur: + data = cur.tables(schema_name='default').fetchall() + _tables = [i[TABLE_NAME] for i in data] - # TODO: Implement with native driver `.tables()` call - return super().get_table_names(*args, **kwargs) + return _tables - def get_columns(self, *args, **kwargs): + def get_columns(self, connection, table_name, schema=None, **kw): # TODO: Implement with native driver `.columns()` call + return self._get_table_columns(connection, table_name, schema) + + + def get_view_names(self, connection, schema=None, **kw): + # no views at present + return [] + + # private method to serve get_columns() and has_tables() + def _get_table_columns(self, connection, table_name, schema): + with connection.cursor() as cur: + data = cur.columns(schema_name='default', table_name=table_name).fetchall() + _tables = [i[COLUMN_NAME] for i in data] + return _tables + + def has_table( + self, + connection, + table_name, + schema = None, + **kw, + ) -> bool: + try: + self._get_table_columns(connection, table_name, schema) + return True + except exc.NoSuchTableError: + return False + + def has_table(self, connection, table_name, schema=None): + # Spark has no foreign keys + return [] + + def has_table(self, connection, table_name, schema=None): + # Spark has no primary keys + return [] - return super().get_columns(*args, **kwargs) + def get_indexes(self, connection, table_name, schema=None, **kw): + # TODO: treat partitions as indices + return [] - def do_rollback(self, dbapi_connection): - # Databricks SQL Does not support transaction + def do_rollback(self, dbapi_connection) -> None: + # Spark/Delta transaction only covers single-table updates... to simplify things, just skip this for now. pass diff --git a/tests/sqlalchemy/test_suite.py b/tests/sqlalchemy/test_suite.py index e6d642ced..cd6ac482c 100644 --- a/tests/sqlalchemy/test_suite.py +++ b/tests/sqlalchemy/test_suite.py @@ -1,2 +1,53 @@ from sqlalchemy.testing.suite import * +from sqlalchemy.testing.suite import ( + CastTypeDecoratorTest as _CastTypeDecoratorTest, +) +from sqlalchemy.testing.suite import ( + ComponentReflectionTest as _ComponentReflectionTest, +) +from sqlalchemy.testing.suite import ( + ComponentReflectionTestExtra as _ComponentReflectionTestExtra, +) +from sqlalchemy.testing.suite import DateTimeTest as _DateTimeTest +from sqlalchemy.testing.suite import ( + DifficultParametersTest as _DifficultParametersTest, +) +from sqlalchemy.testing.suite import ExistsTest as _ExistsTest +from sqlalchemy.testing.suite import ( + ExpandingBoundInTest as _ExpandingBoundInTest, +) +from sqlalchemy.testing.suite import ( + FetchLimitOffsetTest as _FetchLimitOffsetTest, +) +from sqlalchemy.testing.suite import InsertBehaviorTest as _InsertBehaviorTest +from sqlalchemy.testing.suite import IntegerTest as _IntegerTest +from sqlalchemy.testing.suite import JoinTest as _JoinTest +from sqlalchemy.testing.suite import LikeFunctionsTest as _LikeFunctionsTest +from sqlalchemy.testing.suite import ( + LongNameBlowoutTest as _LongNameBlowoutTest, +) +from sqlalchemy.testing.suite import NumericTest as _NumericTest +from sqlalchemy.testing.suite import OrderByLabelTest as _OrderByLabelTest +from sqlalchemy.testing.suite import ( + QuotedNameArgumentTest as _QuotedNameArgumentTest, +) +from sqlalchemy.testing.suite import TableDDLTest as _TableDDLTest + + + +class JoinTest(): + def test_inner_join_true(self): + breakpoint() + return + + @testing.skip("databricks") + def test_inner_join_false(self): + return + + @testing.skip("databricks") + def test_outer_join_false(self): + return + + + From 0ee8d268d96905aa2be5718b1b8c8d543c15d182 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 12 Jul 2022 17:40:19 -0700 Subject: [PATCH 03/30] reminder about TABLE_SCHEM --- src/databricks/sqlalchemy/dialect.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/databricks/sqlalchemy/dialect.py b/src/databricks/sqlalchemy/dialect.py index e504fd985..905af9d51 100644 --- a/src/databricks/sqlalchemy/dialect.py +++ b/src/databricks/sqlalchemy/dialect.py @@ -147,6 +147,7 @@ def get_schema_names(self, connection, **kw): access_token=kwargs['access_token'], schema='default' ) + # TODO: look up correct index for TABLE_SCHEM TABLE_SCHEM = 2 with connection.cursor() as cur: data = cur.schemas(catalog_name='%').fetchall() From 2742a9277a88ac6a6e11e1fd1e72135e1777a67e Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 19 Jul 2022 14:29:33 -0700 Subject: [PATCH 04/30] initial checkin with working pytest test suites. -bare-bone implementation of dialect -basic test suite -sample app (not working) --- Makefile | 37 ++++ env.template | 10 + src/databricks/sqlalchemy/.gitignore | 2 + src/databricks/sqlalchemy/Makefile | 21 ++ src/databricks/sqlalchemy/create-table.sql | 24 +++ src/databricks/sqlalchemy/describe-table.sql | 6 + src/databricks/sqlalchemy/dialect.py | 150 +++++++++----- src/databricks/sqlalchemy/drop-table.sql | 5 + .../sqlalchemy/sample-app-select.py | 33 ++++ src/databricks/sqlalchemy/select-table.sql | 7 + tests/sqlalchemy/README.md | 21 ++ tests/sqlalchemy/test_suite.py | 185 +++++++++++++----- 12 files changed, 407 insertions(+), 94 deletions(-) create mode 100644 Makefile create mode 100644 env.template create mode 100644 src/databricks/sqlalchemy/.gitignore create mode 100644 src/databricks/sqlalchemy/Makefile create mode 100644 src/databricks/sqlalchemy/create-table.sql create mode 100644 src/databricks/sqlalchemy/describe-table.sql create mode 100644 src/databricks/sqlalchemy/drop-table.sql create mode 100644 src/databricks/sqlalchemy/sample-app-select.py create mode 100644 src/databricks/sqlalchemy/select-table.sql create mode 100644 tests/sqlalchemy/README.md diff --git a/Makefile b/Makefile new file mode 100644 index 000000000..9c6d0ee01 --- /dev/null +++ b/Makefile @@ -0,0 +1,37 @@ +DBSCLI=dbsqlcli +PYTEST=poetry run python3 -m pytest + +SUITE_PATH=tests/sqlalchemy + +SUITE=test_suite.py + +# NB: add noglob when issuing this iteractively in zsh if you have globbing set +#--dburi "databricks+thrift://token:dapie08893e611277fabdd78a186a1331278@e2-dogfood.staging.cloud.databricks.com?http_path=/sql/protocolv1/o/6051921418418893/0819-204509-hill72" + +all: full + +clean: drop_simpletest drop_reflectiontest + +showtables: + $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); show tables;" + + +full: + $(PYTEST) $(SUITE_PATH) \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +reflection: + $(PYTEST) $(SUITE_PATH)/test_query.py::ReflectionTest \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +simple: + $(PYTEST) $(SUITE_PATH)/test_query.py::SimpleTest \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + + +drop_simpletest: + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS simpletest_num;" + +drop_reflectiontest: + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS reflectiontest;" + diff --git a/env.template b/env.template new file mode 100644 index 000000000..5c43330a3 --- /dev/null +++ b/env.template @@ -0,0 +1,10 @@ + +#!/usr/bin/env bash + +export DATABRICKS_SERVER_HOSTNAME=your-host-name +export DATABRICKS_HTTP_PATH=your-path +export DATABRICKS_TOKEN=your-token +export DATABRICKS_SCHEMA=default-or-something-else + +# hackey dynamic breakpoint; delete the following if you do *not* want the breakpoint +export DATABRICKS_DIALECT_DEBUG=True diff --git a/src/databricks/sqlalchemy/.gitignore b/src/databricks/sqlalchemy/.gitignore new file mode 100644 index 000000000..6c5416e66 --- /dev/null +++ b/src/databricks/sqlalchemy/.gitignore @@ -0,0 +1,2 @@ +*env + diff --git a/src/databricks/sqlalchemy/Makefile b/src/databricks/sqlalchemy/Makefile new file mode 100644 index 000000000..5e82a3af7 --- /dev/null +++ b/src/databricks/sqlalchemy/Makefile @@ -0,0 +1,21 @@ +DBSCLI=dbsqlcli +PY=poetry run python3 + +sample1: + $(PY) sample-app-select.py + +sample2: + +clean: drop + +create: + $(DBSCLI) -e create-table.sql + +describe: + $(DBSCLI) -e describe-table.sql + +select: + $(DBSCLI) -e select-table.sql + +drop: + echo y | $(DBSCLI) -e drop-table.sql \ No newline at end of file diff --git a/src/databricks/sqlalchemy/create-table.sql b/src/databricks/sqlalchemy/create-table.sql new file mode 100644 index 000000000..5cf40e61b --- /dev/null +++ b/src/databricks/sqlalchemy/create-table.sql @@ -0,0 +1,24 @@ +/* table/data for sample app */ + +USE george_chow_dbtest; + +CREATE TABLE sample_numtypes + ( + f_byte BYTE, + f_short SHORT, + f_int INT, + f_long LONG, + f_float FLOAT, + f_decimal DECIMAL(10,2), + f_boolean BOOLEAN + ); + +INSERT INTO sample_numtypes VALUES + ( 125, 32700, 2001002003, 9001002003004005006, 1E30, 1.5, TRUE ), + ( -125, -32700, -2001002003, -9001002003004005006, 1E-30, -1.5, FALSE ), + ( 125, 32700, 2001002003, 9001002003004005006, -1E30, 1.5, TRUE ); + +SELECT * FROM sample_numtypes; + +DESCRIBE sample_numtypes; + diff --git a/src/databricks/sqlalchemy/describe-table.sql b/src/databricks/sqlalchemy/describe-table.sql new file mode 100644 index 000000000..ba36ffb1a --- /dev/null +++ b/src/databricks/sqlalchemy/describe-table.sql @@ -0,0 +1,6 @@ +/* table/data for sample app */ + +USE george_chow_dbtest; + +DESCRIBE sample_numtypes; + diff --git a/src/databricks/sqlalchemy/dialect.py b/src/databricks/sqlalchemy/dialect.py index 905af9d51..a9ccc74a5 100644 --- a/src/databricks/sqlalchemy/dialect.py +++ b/src/databricks/sqlalchemy/dialect.py @@ -1,14 +1,16 @@ +import os + from databricks import sql from databricks import sql as dbsql # cribbing from Hive from pyhive.sqlalchemy_hive import HiveExecutionContext, HiveIdentifierPreparer, HiveCompiler, HiveTypeCompiler -from pyhive.sqlalchemy_hive import _type_map import re from sqlalchemy import types from sqlalchemy import util +from sqlalchemy import exc from sqlalchemy.engine import default, interfaces from sqlalchemy.sql import compiler @@ -16,6 +18,10 @@ from typing import AnyStr +# provide a way to break in +debugbreakpoint = os.getenv("DATABRICKS_DIALECT_DEBUG") or False + + class DatabricksIdentifierPreparer(compiler.IdentifierPreparer): # SparkSQL identifier specification: # ref: https://spark.apache.org/docs/latest/sql-ref-identifier.html @@ -93,6 +99,20 @@ class DatabricksCompiler(compiler.SQLCompiler): +# we use DATA_TYPE because TYPE_NAME is instance-specific (e.g. DECIMAL is reported as DECIMAL(precision, scale)) +_type_map = { + 5: types.Integer, # small_int + 4: types.Integer, # int + -5: types.BigInteger, # big_int + 6: types.Float, + 3: types.DECIMAL, + 16: types.Boolean, + 'string': types.String, + # 'date': HiveDate, + # 'timestamp': HiveTimestamp, +} + + class DatabricksDialect(default.DefaultDialect): # Possible attributes are defined here: https://docs.sqlalchemy.org/en/14/core/internals.html#sqlalchemy.engine.Dialect name: str = "databricks" @@ -118,21 +138,21 @@ class DatabricksDialect(default.DefaultDialect): description_encoding = None supports_multivalues_insert = True supports_sane_rowcount = False - + + # added based on comments here: https://docs.sqlalchemy.org/en/14/errors.html#error-cprf + supports_statement_cache = False @classmethod def dbapi(cls): return sql def create_connect_args(self, url: "URL"): - # Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com?http_path=/sql/*** - - # TODO: add schema in - + # Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com/***?http_path=/sql/*** kwargs = { "server_hostname": url.host, "access_token": url.password, - "http_path": url.query.get("http_path") + "http_path": url.query.get("http_path"), + "schema": url.database or "default" } return [], kwargs @@ -140,78 +160,120 @@ def create_connect_args(self, url: "URL"): # def initialize(self, connection) -> None: # super().initialize(connection) - def get_schema_names(self, connection, **kw): - connection = dbsql.connect( - server_hostname=kwargs['server_hostname'], - http_path=kwargs['http_path'], - access_token=kwargs['access_token'], - schema='default' - ) + def get_schema_names(self, connection, **kwargs): + # conn = dbsql.connect( + # server_hostname=kwargs['server_hostname'], + # http_path=kwargs['http_path'], + # access_token=kwargs['access_token'], + # schema=kwargs['schema'] + # ) # TODO: look up correct index for TABLE_SCHEM + TABLE_SCHEM = 2 - with connection.cursor() as cur: + if debugbreakpoint: + breakpoint() + with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: data = cur.schemas(catalog_name='%').fetchall() _schemas = [i[TABLE_SCHEM] for i in data] return _schemas - def get_table_names(self, connection, schema = None, **kw): - # TODO: can initialize() take care of shared initialization? - connection = dbsql.connect( - server_hostname=kwargs['server_hostname'], - http_path=kwargs['http_path'], - access_token=kwargs['access_token'], - schema='default' - ) - - breakpoint() + def get_table_names(self, connection, schema = None, **kwargs): + if debugbreakpoint: + breakpoint() TABLE_NAME = 2 - with connection.cursor() as cur: - data = cur.tables(schema_name='default').fetchall() + with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: + data = cur.tables(schema_name=schema).fetchall() _tables = [i[TABLE_NAME] for i in data] return _tables - def get_columns(self, connection, table_name, schema=None, **kw): - # TODO: Implement with native driver `.columns()` call - return self._get_table_columns(connection, table_name, schema) + # Example row + # Row(TABLE_CAT='hive_metastore', TABLE_SCHEM='george_chow_dbtest', TABLE_NAME='all_types', COLUMN_NAME='f_byte', DATA_TYPE=4, + # TYPE_NAME='INT', COLUMN_SIZE=4, BUFFER_LENGTH=None, DECIMAL_DIGITS=0, NUM_PREC_RADIX=10, + # NULLABLE=1, REMARKS='', COLUMN_DEF=None, SQL_DATA_TYPE=None, SQL_DATETIME_SUB=None, + # CHAR_OCTET_LENGTH=None, ORDINAL_POSITION=0, IS_NULLABLE='YES', SCOPE_CATALOG=None, SCOPE_SCHEMA=None, + # SCOPE_TABLE=None, SOURCE_DATA_TYPE=None, IS_AUTO_INCREMENT='NO') + def get_columns(self, connection, table_name, schema=None, **kwargs): + COLUMN_NAME=3 + COLUMN_TYPE_NAME=5 + COLUMN_TYPE=4 # by DATA_TYPE + COLUMN_NULLABLE=17 + COLUMN_COMMENT=11 + COLUMN_AUTOINCREMENT=22 + + result = [] + with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: + if debugbreakpoint: + breakpoint() + data = cur.columns(schema_name=schema, table_name=table_name).fetchall() + for i in data: + if debugbreakpoint: + breakpoint() + + # filled-in according to interfaces.py's class ReflectedColumn(TypedDict): + try: + coltype = _type_map[i[COLUMN_TYPE]] + except KeyError: + util.warn("Did not recognize type '%s'('%s') of column '%s'" % (i[COLUMN_TYPE_NAME], i[COLUMN_TYPE], i[COLUMN_NAME])) + coltype = types.NullType + + try: + nullable = i[COLUMN_NULLABLE] == 'YES'; + except KeyError: + nullable = True; + + try: + autoincrement = i[COLUMN_AUTOINCREMENT] == 'YES'; + except KeyError: + autoincrement = False; + + result.append({ + 'name': i[COLUMN_NAME], + 'type': coltype, + 'nullable': nullable, + 'comment': i[COLUMN_COMMENT], + 'autoincrement': autoincrement, + }) + + return result - def get_view_names(self, connection, schema=None, **kw): + def get_view_names(self, connection, schema=None, **kwargs): # no views at present return [] - # private method to serve get_columns() and has_tables() - def _get_table_columns(self, connection, table_name, schema): - with connection.cursor() as cur: - data = cur.columns(schema_name='default', table_name=table_name).fetchall() - _tables = [i[COLUMN_NAME] for i in data] - return _tables - def has_table( self, connection, table_name, schema = None, - **kw, + **kwargs, ) -> bool: + if debugbreakpoint: + breakpoint() try: - self._get_table_columns(connection, table_name, schema) - return True + COLUMN_NAME=3 + with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: + data = cur.columns(schema_name=schema or 'default', table_name=table_name).fetchmany(1) + # as long as we have some number of columns, the table exists! + if debugbreakpoint: + breakpoint() + return len(data) > 0 except exc.NoSuchTableError: return False - def has_table(self, connection, table_name, schema=None): + def get_foreign_keys(self, connection, table_name, schema=None, **kwargs): # Spark has no foreign keys return [] - def has_table(self, connection, table_name, schema=None): + def get_pk_constraint(self, connection, table_name, schema=None, **kwargs): # Spark has no primary keys return [] - def get_indexes(self, connection, table_name, schema=None, **kw): + def get_indexes(self, connection, table_name, schema=None, **kwargs): # TODO: treat partitions as indices return [] diff --git a/src/databricks/sqlalchemy/drop-table.sql b/src/databricks/sqlalchemy/drop-table.sql new file mode 100644 index 000000000..0e419e3d4 --- /dev/null +++ b/src/databricks/sqlalchemy/drop-table.sql @@ -0,0 +1,5 @@ +/* table/data for sample app */ + +USE george_chow_dbtest; + +DROP TABLE IF EXISTS sample_numtypes; diff --git a/src/databricks/sqlalchemy/sample-app-select.py b/src/databricks/sqlalchemy/sample-app-select.py new file mode 100644 index 000000000..78cc69ae7 --- /dev/null +++ b/src/databricks/sqlalchemy/sample-app-select.py @@ -0,0 +1,33 @@ +# SELECT statement +import os + +from sqlalchemy import create_engine +from sqlalchemy import MetaData +from sqlalchemy import Table, Column, Integer, BigInteger, Float, Boolean +from sqlalchemy import select + +server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME") +http_path = os.getenv("DATABRICKS_HTTP_PATH") +access_token = os.getenv("DATABRICKS_TOKEN") +default_schema = os.getenv("DATABRICKS_SCHEMA") + +engine = create_engine("databricks+thrift://token:{access_token}@{server_hostname}/{default_schema}?http_path={http_path}", echo=True, future=True) + +metadata_obj = MetaData() +numtypes = Table( + "sample_numtypes", + metadata_obj, + Column('f_byte', Integer), + Column('f_short', Integer), + Column('f_int', Integer), + Column('f_long', BigInteger), + Column('f_float', Float), + Column('f_decimal', Float), + Column('f_boolean', Boolean) +) + +stmt = select(numtypes).where(numtypes.c.f_byte == -125) +print(stmt) +with engine.connect() as conn: + for row in conn.execute(stmt): + print(row) diff --git a/src/databricks/sqlalchemy/select-table.sql b/src/databricks/sqlalchemy/select-table.sql new file mode 100644 index 000000000..01096375c --- /dev/null +++ b/src/databricks/sqlalchemy/select-table.sql @@ -0,0 +1,7 @@ +/* table/data for sample app */ + +USE george_chow_dbtest; + +SELECT * FROM sample_numtypes; + + diff --git a/tests/sqlalchemy/README.md b/tests/sqlalchemy/README.md new file mode 100644 index 000000000..9916b51ed --- /dev/null +++ b/tests/sqlalchemy/README.md @@ -0,0 +1,21 @@ +# Introduction + +This is work-in-progress of a SQLAlchemy dialect for Databricks. + +The dialect is embedded within the Databricks SQL Connector. + +## Test Usage + +Initialize your project with Poetry (e.g., `poetry install`) before continuing with the sample tests here. + +A `Makefile` has been setup to reduce typing. + +Configure the test via environment using the sample `env.template`. + +To sample run would be as follows: + +```bash +$ source yourtestenv +$ make simpletest +``` + diff --git a/tests/sqlalchemy/test_suite.py b/tests/sqlalchemy/test_suite.py index cd6ac482c..ffe1546ad 100644 --- a/tests/sqlalchemy/test_suite.py +++ b/tests/sqlalchemy/test_suite.py @@ -1,53 +1,138 @@ -from sqlalchemy.testing.suite import * - -from sqlalchemy.testing.suite import ( - CastTypeDecoratorTest as _CastTypeDecoratorTest, -) -from sqlalchemy.testing.suite import ( - ComponentReflectionTest as _ComponentReflectionTest, -) -from sqlalchemy.testing.suite import ( - ComponentReflectionTestExtra as _ComponentReflectionTestExtra, -) -from sqlalchemy.testing.suite import DateTimeTest as _DateTimeTest -from sqlalchemy.testing.suite import ( - DifficultParametersTest as _DifficultParametersTest, -) -from sqlalchemy.testing.suite import ExistsTest as _ExistsTest -from sqlalchemy.testing.suite import ( - ExpandingBoundInTest as _ExpandingBoundInTest, -) -from sqlalchemy.testing.suite import ( - FetchLimitOffsetTest as _FetchLimitOffsetTest, -) -from sqlalchemy.testing.suite import InsertBehaviorTest as _InsertBehaviorTest -from sqlalchemy.testing.suite import IntegerTest as _IntegerTest -from sqlalchemy.testing.suite import JoinTest as _JoinTest -from sqlalchemy.testing.suite import LikeFunctionsTest as _LikeFunctionsTest -from sqlalchemy.testing.suite import ( - LongNameBlowoutTest as _LongNameBlowoutTest, -) -from sqlalchemy.testing.suite import NumericTest as _NumericTest -from sqlalchemy.testing.suite import OrderByLabelTest as _OrderByLabelTest -from sqlalchemy.testing.suite import ( - QuotedNameArgumentTest as _QuotedNameArgumentTest, -) -from sqlalchemy.testing.suite import TableDDLTest as _TableDDLTest - - - -class JoinTest(): - def test_inner_join_true(self): - breakpoint() - return - - @testing.skip("databricks") - def test_inner_join_false(self): - return - - @testing.skip("databricks") - def test_outer_join_false(self): - return +import datetime +from sqlalchemy import BIGINT +from sqlalchemy import BOOLEAN +from sqlalchemy import DATE +from sqlalchemy import DECIMAL +from sqlalchemy import FLOAT +from sqlalchemy import INT +from sqlalchemy import Integer +from sqlalchemy import Interval +from sqlalchemy import SMALLINT +from sqlalchemy import String +from sqlalchemy import TIMESTAMP +from sqlalchemy import Table, Column +from sqlalchemy import and_ +from sqlalchemy import asc +from sqlalchemy import bindparam +from sqlalchemy import cast +from sqlalchemy import desc +from sqlalchemy import exc +from sqlalchemy import except_ +from sqlalchemy import ForeignKey +from sqlalchemy import func +from sqlalchemy import intersect +from sqlalchemy import literal +from sqlalchemy import literal_column +from sqlalchemy import MetaData +from sqlalchemy import not_ +from sqlalchemy import or_ +from sqlalchemy import select +from sqlalchemy import sql +from sqlalchemy import testing +from sqlalchemy import text +from sqlalchemy import tuple_ +from sqlalchemy import TypeDecorator +from sqlalchemy import union +from sqlalchemy import union_all +from sqlalchemy import VARCHAR +from sqlalchemy.engine import default +from sqlalchemy.sql import LABEL_STYLE_TABLENAME_PLUS_COL +from sqlalchemy.sql.selectable import LABEL_STYLE_NONE +from sqlalchemy.testing import assert_raises_message +from sqlalchemy.testing import AssertsExecutionResults +from sqlalchemy.testing import ComparesTables +from sqlalchemy.testing import eq_ +from sqlalchemy.testing import fixtures +from sqlalchemy.testing import is_ +from sqlalchemy.testing.schema import Column +from sqlalchemy.testing.schema import Table +from sqlalchemy.testing.util import resolve_lambda + + + +class ReflectionTest(fixtures.TablesTest, ComparesTables): + def test_numtypes(self, metadata, connection): + meta = metadata + + # TODO: switch over to internal golden tables once all types are implemented + all_num_types = Table( + "all_num_types", + meta, + Column("f_short", SMALLINT), + Column("f_int", INT), + Column("f_long", BIGINT), + Column("f_float", FLOAT), + Column("f_decimal", DECIMAL(9,3)), + Column("f_boolean", BOOLEAN), + ) + + meta.create_all(connection) + + meta2 = MetaData() + reflected_types = Table( + "all_num_types", meta2, autoload_with=connection + ) + + self.assert_tables_equal(all_num_types, reflected_types) + + + # TODO: not working yet + def test_strtypes(self, metadata, connection): + meta = metadata + + all_num_types = Table( + "all_str_types", + meta, + Column("f_string", String), + ) + + meta.create_all(connection) + + meta2 = MetaData() + reflected_types = Table( + "all_str_types", meta2, autoload_with=connection + ) + + self.assert_tables_equal(all_str_types, reflected_types) + + + +class SimpleTest(fixtures.TablesTest, ComparesTables, AssertsExecutionResults): + # __only_on__ = "databricks" + + @classmethod + def define_tables(cls, metadata): + Table( + "simpletest_num", + metadata, + Column("f_byte", INT), + Column("f_short", SMALLINT), + Column("f_int", INT), + Column("f_long", BIGINT), + Column("f_float", FLOAT), + Column("f_decimal", DECIMAL), + Column("f_boolean", BOOLEAN), + test_needs_acid=False, + ) + + def test_select_type_byte(self, connection): + simpletest_num = self.tables.simpletest_num + stmt = select([simpletest_num.c.f_byte]) + + connection.execute(stmt) + + def test_select_type_inttype(self, connection): + simpletest_num = self.tables.simpletest_num + stmt = select([simpletest_num.c.f_int]) + + connection.execute(stmt) + + + def test_select_star_with_limit(self, connection): + simpletest_num = self.tables.simpletest_num + stmt = select([simpletest_num.c.f_byte]).limit(10) + + connection.execute(stmt) From c6c1322419ce997e5d9d32ec31467616aae30cc0 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 19 Jul 2022 14:48:30 -0700 Subject: [PATCH 05/30] remove secret in comment (secret revoked) --- Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 9c6d0ee01..29a64ce62 100644 --- a/Makefile +++ b/Makefile @@ -5,9 +5,6 @@ SUITE_PATH=tests/sqlalchemy SUITE=test_suite.py -# NB: add noglob when issuing this iteractively in zsh if you have globbing set -#--dburi "databricks+thrift://token:dapie08893e611277fabdd78a186a1331278@e2-dogfood.staging.cloud.databricks.com?http_path=/sql/protocolv1/o/6051921418418893/0819-204509-hill72" - all: full clean: drop_simpletest drop_reflectiontest @@ -28,10 +25,11 @@ simple: $(PYTEST) $(SUITE_PATH)/test_query.py::SimpleTest \ --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" - +# clean up after failed SimpleTest run drop_simpletest: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS simpletest_num;" +# TODO: if needs be drop_reflectiontest: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS reflectiontest;" From a594e7c0c7b0033688941bdbaca4ec091acaed05 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 19 Jul 2022 16:57:19 -0700 Subject: [PATCH 06/30] minor corrections --- Makefile | 4 ++-- env.template | 3 ++- tests/sqlalchemy/test_suite.py | 9 ++++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 29a64ce62..9117cd2e8 100644 --- a/Makefile +++ b/Makefile @@ -18,11 +18,11 @@ full: --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" reflection: - $(PYTEST) $(SUITE_PATH)/test_query.py::ReflectionTest \ + $(PYTEST) $(SUITE_PATH)/$(SUITE)::ReflectionTest \ --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" simple: - $(PYTEST) $(SUITE_PATH)/test_query.py::SimpleTest \ + $(PYTEST) $(SUITE_PATH)/$(SUITE)::SimpleTest \ --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" # clean up after failed SimpleTest run diff --git a/env.template b/env.template index 5c43330a3..eeaadd848 100644 --- a/env.template +++ b/env.template @@ -6,5 +6,6 @@ export DATABRICKS_HTTP_PATH=your-path export DATABRICKS_TOKEN=your-token export DATABRICKS_SCHEMA=default-or-something-else -# hackey dynamic breakpoint; delete the following if you do *not* want the breakpoint +# hackey dynamic breakpoint; delete the following and/or use unset if you do *not* want the breakpoint export DATABRICKS_DIALECT_DEBUG=True +#unset DATABRICKS_DIALECT_DEBUG \ No newline at end of file diff --git a/tests/sqlalchemy/test_suite.py b/tests/sqlalchemy/test_suite.py index ffe1546ad..4a73cea56 100644 --- a/tests/sqlalchemy/test_suite.py +++ b/tests/sqlalchemy/test_suite.py @@ -59,7 +59,7 @@ def test_numtypes(self, metadata, connection): # TODO: switch over to internal golden tables once all types are implemented all_num_types = Table( - "all_num_types", + "reflectiontest_all_num_types", meta, Column("f_short", SMALLINT), Column("f_int", INT), @@ -73,14 +73,16 @@ def test_numtypes(self, metadata, connection): meta2 = MetaData() reflected_types = Table( - "all_num_types", meta2, autoload_with=connection + "reflectiontest_all_num_types", meta2, autoload_with=connection ) self.assert_tables_equal(all_num_types, reflected_types) + meta.drop_all(connection) + # TODO: not working yet - def test_strtypes(self, metadata, connection): + def off_test_strtypes(self, metadata, connection): meta = metadata all_num_types = Table( @@ -98,6 +100,7 @@ def test_strtypes(self, metadata, connection): self.assert_tables_equal(all_str_types, reflected_types) + meta.drop_all(connection) class SimpleTest(fixtures.TablesTest, ComparesTables, AssertsExecutionResults): From 595178b41bcbed3f83e0159e5f48aa7ba04c19f4 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 19 Jul 2022 17:11:45 -0700 Subject: [PATCH 07/30] add cleanup and notes for self --- Makefile | 6 +++--- src/databricks/sqlalchemy/Makefile | 6 +++--- tests/sqlalchemy/test_suite.py | 2 ++ 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index 9117cd2e8..3628f5a1f 100644 --- a/Makefile +++ b/Makefile @@ -25,11 +25,11 @@ simple: $(PYTEST) $(SUITE_PATH)/$(SUITE)::SimpleTest \ --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" -# clean up after failed SimpleTest run +# clean up after SimpleTest run drop_simpletest: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS simpletest_num;" -# TODO: if needs be +# clean up after ReflectionTest run drop_reflectiontest: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS reflectiontest;" + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS reflectiontest_all_num_types;" diff --git a/src/databricks/sqlalchemy/Makefile b/src/databricks/sqlalchemy/Makefile index 5e82a3af7..82ab3271d 100644 --- a/src/databricks/sqlalchemy/Makefile +++ b/src/databricks/sqlalchemy/Makefile @@ -12,10 +12,10 @@ create: $(DBSCLI) -e create-table.sql describe: - $(DBSCLI) -e describe-table.sql + $(DBSCLI) -e "DESCRIBE george_chow_dbtest.sample_numtypes;" select: - $(DBSCLI) -e select-table.sql + $(DBSCLI) -e "SELECT * FROM george_chow_dbtest.sample_numtypes LIMIT 10;" drop: - echo y | $(DBSCLI) -e drop-table.sql \ No newline at end of file + echo y | $(DBSCLI) -e "DROP TABLE IF EXISTS george_chow_dbtest.sample_numtypes;" \ No newline at end of file diff --git a/tests/sqlalchemy/test_suite.py b/tests/sqlalchemy/test_suite.py index 4a73cea56..43a7e433c 100644 --- a/tests/sqlalchemy/test_suite.py +++ b/tests/sqlalchemy/test_suite.py @@ -78,6 +78,7 @@ def test_numtypes(self, metadata, connection): self.assert_tables_equal(all_num_types, reflected_types) + # TODO: this drop_all isn't working meta.drop_all(connection) @@ -120,6 +121,7 @@ def define_tables(cls, metadata): Column("f_boolean", BOOLEAN), test_needs_acid=False, ) + # TODO: why is the cleanup of this table not happening? def test_select_type_byte(self, connection): simpletest_num = self.tables.simpletest_num From d7e72a8e9bff534ccff645aacc670ee2243e88c2 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 19 Jul 2022 18:12:39 -0700 Subject: [PATCH 08/30] add sample programs & prelim README --- src/databricks/sqlalchemy/Makefile | 10 ++ src/databricks/sqlalchemy/README.md | 130 ++++++++++++++++++ .../sqlalchemy/create-table-extra.sql | 25 ++++ .../sqlalchemy/create-table-reseq.sql | 24 ++++ .../sqlalchemy/create-table-wrong.sql | 24 ++++ .../sqlalchemy/sample-app-reflection.py | 38 +++++ .../sqlalchemy/sample-app-select.py | 22 ++- src/databricks/sqlalchemy/select-table.sql | 2 +- 8 files changed, 270 insertions(+), 5 deletions(-) create mode 100644 src/databricks/sqlalchemy/README.md create mode 100644 src/databricks/sqlalchemy/create-table-extra.sql create mode 100644 src/databricks/sqlalchemy/create-table-reseq.sql create mode 100644 src/databricks/sqlalchemy/create-table-wrong.sql create mode 100644 src/databricks/sqlalchemy/sample-app-reflection.py diff --git a/src/databricks/sqlalchemy/Makefile b/src/databricks/sqlalchemy/Makefile index 82ab3271d..dcdfb3a19 100644 --- a/src/databricks/sqlalchemy/Makefile +++ b/src/databricks/sqlalchemy/Makefile @@ -5,12 +5,22 @@ sample1: $(PY) sample-app-select.py sample2: + $(PY) sample-app-reflection.py clean: drop create: $(DBSCLI) -e create-table.sql +create-reseq: + $(DBSCLI) -e create-table-reseq.sql + +create-extra: + $(DBSCLI) -e create-table-extra.sql + +create-wrong: + $(DBSCLI) -e create-table-wrong.sql + describe: $(DBSCLI) -e "DESCRIBE george_chow_dbtest.sample_numtypes;" diff --git a/src/databricks/sqlalchemy/README.md b/src/databricks/sqlalchemy/README.md new file mode 100644 index 000000000..9bf377afb --- /dev/null +++ b/src/databricks/sqlalchemy/README.md @@ -0,0 +1,130 @@ +# Introduction + +This is work-in-progress of a SQLAlchemy dialect for Databricks. + +The dialect is embedded within the Databricks SQL Connector. + +## Connection String + +Using the dialect requires the following: + +1. SQL Warehouse hostname +2. Endpoint +3. Access token + +The schema `default` is used unless an alternate is specified via _Default-schema_. + +The connection string is constructed as follows: + +`databricks+thrift://token:`_Access-token_`@`_SQL-warehouse-hostname_`/`_Default-schema_`?http_path=`_Endpoint_ + + +## Data Types + +|Databricks type| SQLAlchemy type | Extra| +|:-|:-|:-| + `smallint` | `integer` | + `int` | `integer` | + `bigint` | `integer` | + `float` | `float` | + `decimal` | `float` | + `boolean` | `boolean` | + `string` | WIP | + `date` | WIP | + `timestamp` | WIP | + + + +## Sample Code + +The focus of this dialect is enabling SQLAlchemy Core (as opposed to SQLAchemy ORM). + + + +### The Simplest Program + +A program (see [`sample-app-select.py`](https://github.com/overcoil/fork-databricks-sql-python/blob/sqlalchemy-dev/src/databricks/sqlalchemy/sample-app-select.py)) to read from a Databricks table looks roughly as follows: + +```Python +import os + +from sqlalchemy import create_engine +from sqlalchemy import MetaData +from sqlalchemy import Table, Column, Integer, BigInteger, Float, Boolean +from sqlalchemy import select + +# pickup settings from the env +server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME") +http_path = os.getenv("DATABRICKS_HTTP_PATH") +access_token = os.getenv("DATABRICKS_TOKEN") +default_schema = os.getenv("DATABRICKS_SCHEMA") + +# use echo=True for verbose log +engine = create_engine(f"databricks+thrift://token:{access_token}@{server_hostname}/{default_schema}?http_path={http_path}", echo=False, future=True) + +metadata_obj = MetaData() + +# NB: sample_numtypes is a pre-created/populated table +tableName = "sample_numtypes" + +# declare the schema we're expecting +numtypes = Table( + tableName, + metadata_obj, + Column('f_byte', Integer), + Column('f_short', Integer), + Column('f_int', Integer), + Column('f_long', BigInteger), + Column('f_float', Float), + Column('f_decimal', Float), + Column('f_boolean', Boolean) +) + +# SELECT * FROM t WHERE f_byte = -125 +stmt = select(numtypes).where(numtypes.c.f_byte == -125) +print(f"Attempting to execute: {stmt}\n") + +print(f"Rows from table {tableName}") + +with engine.connect() as conn: + for row in conn.execute(stmt): + print(row) +``` + + +### Table definition via reflection +Reflection may be used to recover the schema of a table dynamically via [the `Table` constructor's `autoload_with` parameter](https://docs.sqlalchemy.org/en/14/core/reflection.html). + +```Python +some_table = Table("some_table", metadata_obj, autoload_with=engine) +stmt = select(some_table).where(some_table.c.f_byte == -125) +... +``` + +### INSERT statement +```Python + +``` + +### Unmanaged table creation +```Python +# TODO +metadata_obj = MetaData() +user_table = Table( + "user_account", + metadata_obj, + Column('id', Integer, primary_key=True), + Column('name', String(30)), + Column('fullname', String) +) +metadata_obj.create_all(engine) +``` + +### Direct access to Spark SQL +```Python +# TODO: does this work? +with engine.connect() as conn: + result = conn.execute(text("VACCUM tablename")) + print(result.all()) +``` + diff --git a/src/databricks/sqlalchemy/create-table-extra.sql b/src/databricks/sqlalchemy/create-table-extra.sql new file mode 100644 index 000000000..beadefee4 --- /dev/null +++ b/src/databricks/sqlalchemy/create-table-extra.sql @@ -0,0 +1,25 @@ +/* alternate table/data for sample app */ + +USE george_chow_dbtest; + +CREATE TABLE sample_numtypes + ( + f_byte BYTE, + f_short SHORT, + f_int INT, + f_long LONG, + f_float FLOAT, + f_decimal DECIMAL(10,2), + f_boolean BOOLEAN, + f_extra INT + ); + +INSERT INTO sample_numtypes VALUES + ( 125, 32700, 2001002003, 9001002003004005006, 1E30, 1.5, TRUE, 17 ), + ( -125, -32700, -2001002003, -9001002003004005006, 1E-30, -1.5, FALSE, 42 ), + ( 125, 32700, 2001002003, 9001002003004005006, -1E30, 1.5, TRUE, 11 ); + +SELECT * FROM sample_numtypes; + +DESCRIBE sample_numtypes; + diff --git a/src/databricks/sqlalchemy/create-table-reseq.sql b/src/databricks/sqlalchemy/create-table-reseq.sql new file mode 100644 index 000000000..9cd62e9b4 --- /dev/null +++ b/src/databricks/sqlalchemy/create-table-reseq.sql @@ -0,0 +1,24 @@ +/* alternate table/data for sample app */ + +USE george_chow_dbtest; + +CREATE TABLE sample_numtypes + ( + f_byte BYTE, + f_boolean BOOLEAN, + f_short SHORT, + f_int INT, + f_long LONG, + f_float FLOAT, + f_decimal DECIMAL(10,2) + ); + +INSERT INTO sample_numtypes VALUES + ( 125, TRUE, 32700, 2001002003, 9001002003004005006, 1E30, 1.5 ), + ( -125, FALSE, -32700, -2001002003, -9001002003004005006, 1E-30, -1.5 ), + ( 125, TRUE, 32700, 2001002003, 9001002003004005006, -1E30, 1.5 ); + +SELECT * FROM sample_numtypes; + +DESCRIBE sample_numtypes; + diff --git a/src/databricks/sqlalchemy/create-table-wrong.sql b/src/databricks/sqlalchemy/create-table-wrong.sql new file mode 100644 index 000000000..9c4931e33 --- /dev/null +++ b/src/databricks/sqlalchemy/create-table-wrong.sql @@ -0,0 +1,24 @@ +/* table/data for sample app */ + +USE george_chow_dbtest; + +CREATE TABLE sample_numtypes + ( + f_byte BOOLEAN, + f_short BOOLEAN, + f_int BOOLEAN, + f_long BOOLEAN, + f_float BOOLEAN, + f_decimal DECIMAL(10,2), + f_boolean INT + ); + +INSERT INTO sample_numtypes VALUES + ( 125, 32700, 2001002003, 9001002003004005006, 1E30, 1.5, TRUE ), + ( -125, -32700, -2001002003, -9001002003004005006, 1E-30, -1.5, FALSE ), + ( 125, 32700, 2001002003, 9001002003004005006, -1E30, 1.5, TRUE ); + +SELECT * FROM sample_numtypes; + +DESCRIBE sample_numtypes; + diff --git a/src/databricks/sqlalchemy/sample-app-reflection.py b/src/databricks/sqlalchemy/sample-app-reflection.py new file mode 100644 index 000000000..9a2267119 --- /dev/null +++ b/src/databricks/sqlalchemy/sample-app-reflection.py @@ -0,0 +1,38 @@ +# sample-app-reflection +# +# Program to demonstrate use of reflection instead of explicit declaration +# + +import os + +from sqlalchemy import create_engine +from sqlalchemy import MetaData +from sqlalchemy import Table, Column, Integer, BigInteger, Float, Boolean +from sqlalchemy import select + +# pickup settings from the env +server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME") +http_path = os.getenv("DATABRICKS_HTTP_PATH") +access_token = os.getenv("DATABRICKS_TOKEN") +default_schema = os.getenv("DATABRICKS_SCHEMA") + +# use echo=True for verbose log +with create_engine(f"databricks+thrift://token:{access_token}@{server_hostname}/{default_schema}?http_path={http_path}", echo=False, future=True).connect() as conn: + metadata_obj = MetaData() + + # NB: sample_numtypes is a pre-created/populated table + tableName = "sample_numtypes" + + # use reflection here to discover the schema dynamically + numtypes = Table( + "sample_numtypes", metadata_obj, autoload_with=conn + ) + + # SELECT * FROM t WHERE f_byte = -125 + stmt = select(numtypes).where(numtypes.c.f_byte == -125) + print(f"Attempting to execute: {stmt}\n") + + print(f"Rows from table {tableName}") + + for row in conn.execute(stmt): + print(row) diff --git a/src/databricks/sqlalchemy/sample-app-select.py b/src/databricks/sqlalchemy/sample-app-select.py index 78cc69ae7..d5aa72b5b 100644 --- a/src/databricks/sqlalchemy/sample-app-select.py +++ b/src/databricks/sqlalchemy/sample-app-select.py @@ -1,4 +1,8 @@ -# SELECT statement +# sample-app-select.py +# +# Program to demonstrate the simplest SELECT statement +# + import os from sqlalchemy import create_engine @@ -6,16 +10,22 @@ from sqlalchemy import Table, Column, Integer, BigInteger, Float, Boolean from sqlalchemy import select +# pickup settings from the env server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME") http_path = os.getenv("DATABRICKS_HTTP_PATH") access_token = os.getenv("DATABRICKS_TOKEN") default_schema = os.getenv("DATABRICKS_SCHEMA") -engine = create_engine("databricks+thrift://token:{access_token}@{server_hostname}/{default_schema}?http_path={http_path}", echo=True, future=True) +# use echo=True for verbose log +engine = create_engine(f"databricks+thrift://token:{access_token}@{server_hostname}/{default_schema}?http_path={http_path}", echo=False, future=True) metadata_obj = MetaData() + +# NB: sample_numtypes is a pre-created/populated table +tableName = "sample_numtypes" + numtypes = Table( - "sample_numtypes", + tableName, metadata_obj, Column('f_byte', Integer), Column('f_short', Integer), @@ -26,8 +36,12 @@ Column('f_boolean', Boolean) ) +# SELECT * FROM t WHERE f_byte = -125 stmt = select(numtypes).where(numtypes.c.f_byte == -125) -print(stmt) +print(f"Attempting to execute: {stmt}\n") + +print(f"Rows from table {tableName}") + with engine.connect() as conn: for row in conn.execute(stmt): print(row) diff --git a/src/databricks/sqlalchemy/select-table.sql b/src/databricks/sqlalchemy/select-table.sql index 01096375c..d3773ef63 100644 --- a/src/databricks/sqlalchemy/select-table.sql +++ b/src/databricks/sqlalchemy/select-table.sql @@ -2,6 +2,6 @@ USE george_chow_dbtest; -SELECT * FROM sample_numtypes; +SELECT * FROM sample_numtypes LIMIT 10; From 59bf8b797464a0064999e61a79614cb6331f0476 Mon Sep 17 00:00:00 2001 From: George Chow Date: Thu, 21 Jul 2022 18:16:22 -0700 Subject: [PATCH 09/30] add prelim support for string and derived types --- Makefile | 10 ++ src/databricks/sqlalchemy/Makefile | 20 +++- src/databricks/sqlalchemy/create-table-sa.sql | 48 ++++++++ src/databricks/sqlalchemy/create-table.sql | 30 ++++- src/databricks/sqlalchemy/dialect.py | 109 ++++++++++++------ .../sqlalchemy/sample-app-reflection.py | 2 +- .../sqlalchemy/sample-app-select.py | 40 +++++-- tests/sqlalchemy/test_suite.py | 68 +++++++++-- 8 files changed, 269 insertions(+), 58 deletions(-) create mode 100644 src/databricks/sqlalchemy/create-table-sa.sql diff --git a/Makefile b/Makefile index 3628f5a1f..fe182eeab 100644 --- a/Makefile +++ b/Makefile @@ -21,6 +21,14 @@ reflection: $(PYTEST) $(SUITE_PATH)/$(SUITE)::ReflectionTest \ --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" +num: + $(PYTEST) $(SUITE_PATH)/$(SUITE)::ReflectionTest::test_numtypes \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +str: + $(PYTEST) $(SUITE_PATH)/$(SUITE)::ReflectionTest::test_strtypes \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + simple: $(PYTEST) $(SUITE_PATH)/$(SUITE)::SimpleTest \ --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" @@ -28,8 +36,10 @@ simple: # clean up after SimpleTest run drop_simpletest: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS simpletest_num;" + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS simpletest_str;" # clean up after ReflectionTest run drop_reflectiontest: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS reflectiontest_all_num_types;" + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS reflectiontest_all_str_types;" diff --git a/src/databricks/sqlalchemy/Makefile b/src/databricks/sqlalchemy/Makefile index dcdfb3a19..e47737393 100644 --- a/src/databricks/sqlalchemy/Makefile +++ b/src/databricks/sqlalchemy/Makefile @@ -9,7 +9,9 @@ sample2: clean: drop +# caution: create table in the correct schema! create: + # $(DBSCLI) -e create-table-sa.sql $(DBSCLI) -e create-table.sql create-reseq: @@ -21,11 +23,23 @@ create-extra: create-wrong: $(DBSCLI) -e create-table-wrong.sql +create-db: + $(DBSCLI) -e "CREATE DATABASE george_chow_satest;" + +drop-db: + $(DBSCLI) -e "DROP DATABASE IF EXISTS george_chow_satest;" + +showtables: + $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); show tables;" + describe: - $(DBSCLI) -e "DESCRIBE george_chow_dbtest.sample_numtypes;" + $(DBSCLI) -e "DESCRIBE $(DATABRICKS_SCHEMA).sample_numtypes;" + $(DBSCLI) -e "DESCRIBE $(DATABRICKS_SCHEMA).sample_strtypes;" select: - $(DBSCLI) -e "SELECT * FROM george_chow_dbtest.sample_numtypes LIMIT 10;" + $(DBSCLI) -e "SELECT * FROM $(DATABRICKS_SCHEMA).sample_numtypes LIMIT 10;" + $(DBSCLI) -e "SELECT * FROM $(DATABRICKS_SCHEMA).sample_strtypes LIMIT 10;" drop: - echo y | $(DBSCLI) -e "DROP TABLE IF EXISTS george_chow_dbtest.sample_numtypes;" \ No newline at end of file + echo y | $(DBSCLI) -e "DROP TABLE IF EXISTS $(DATABRICKS_SCHEMA).sample_numtypes;" + echo y | $(DBSCLI) -e "DROP TABLE IF EXISTS $(DATABRICKS_SCHEMA).sample_strtypes;" \ No newline at end of file diff --git a/src/databricks/sqlalchemy/create-table-sa.sql b/src/databricks/sqlalchemy/create-table-sa.sql new file mode 100644 index 000000000..1f1c479ae --- /dev/null +++ b/src/databricks/sqlalchemy/create-table-sa.sql @@ -0,0 +1,48 @@ +/* table/data for sample app */ + +USE george_chow_satest; + +DROP TABLE IF EXISTS sample_numtypes; + +CREATE TABLE sample_numtypes + ( + f_byte BYTE, + f_short SHORT, + f_int INT, + f_long LONG, + f_float FLOAT, + f_decimal DECIMAL(10,2), + f_boolean BOOLEAN + ); + +INSERT INTO sample_numtypes VALUES + ( 125, 32700, 2001002003, 9001002003004005006, 1E30, 1.5, TRUE ), + ( -125, -32700, -2001002003, -9001002003004005006, 1E-30, -1.5, FALSE ), + ( 125, 32700, 2001002003, 9001002003004005006, -1E30, 1.5, TRUE ); + +SELECT * FROM sample_numtypes; + +DESCRIBE sample_numtypes; + + +DROP TABLE IF EXISTS sample_strtypes; + +CREATE TABLE sample_strtypes + ( + f_event STRING, + f_date DATE, + f_timestamp TIMESTAMP, + f_interval INTERVAL DAY TO SECOND + ); + +INSERT INTO sample_strtypes VALUES + ( 'Everest', '1953-05-29', '1953-05-29T11:30', '3 0:0:0' ), + ( 'Mariana Trench', '1960-01-23', '1960-01-23T13:06', '0 5:00:00' ), + ( 'Moon landing', '1969-07-20', '1969-07-20T20:17', '0 21:36:0' ); + +SELECT * FROM sample_strtypes; + +DESCRIBE sample_strtypes; + + + diff --git a/src/databricks/sqlalchemy/create-table.sql b/src/databricks/sqlalchemy/create-table.sql index 5cf40e61b..73c900b94 100644 --- a/src/databricks/sqlalchemy/create-table.sql +++ b/src/databricks/sqlalchemy/create-table.sql @@ -2,23 +2,45 @@ USE george_chow_dbtest; -CREATE TABLE sample_numtypes +DROP TABLE IF EXISTS sample_numtypes; + +CREATE TABLE IF NOT EXISTS sample_numtypes ( f_byte BYTE, f_short SHORT, f_int INT, f_long LONG, f_float FLOAT, + f_double DOUBLE, f_decimal DECIMAL(10,2), f_boolean BOOLEAN ); INSERT INTO sample_numtypes VALUES - ( 125, 32700, 2001002003, 9001002003004005006, 1E30, 1.5, TRUE ), - ( -125, -32700, -2001002003, -9001002003004005006, 1E-30, -1.5, FALSE ), - ( 125, 32700, 2001002003, 9001002003004005006, -1E30, 1.5, TRUE ); + ( 125, 32700, 2001002003, 9001002003004005006, 1E30, 1E308, 1.5, TRUE ), + ( -125, -32700, -2001002003, -9001002003004005006, 1E-30, 1E-308, -1.5, FALSE ), + ( 125, 32700, 2001002003, 9001002003004005006, -1E30, -1E308, 1.5, TRUE ); SELECT * FROM sample_numtypes; DESCRIBE sample_numtypes; + +DROP TABLE IF EXISTS sample_strtypes; + +CREATE TABLE sample_strtypes + ( + f_string STRING, + f_date DATE, + f_timestamp TIMESTAMP, + f_interval INTERVAL DAY TO SECOND + ); + +INSERT INTO sample_strtypes VALUES + ( 'Everest', '1953-05-29', '1953-05-29T11:30', '3 0:0:0' ), + ( 'Mariana Trench', '1960-01-23', '1960-01-23T13:06', '0 5:00:00' ), + ( 'Moon landing', '1969-07-20', '1969-07-20T20:17', '0 21:36:0' ); + +SELECT * FROM sample_strtypes; + +DESCRIBE sample_strtypes; diff --git a/src/databricks/sqlalchemy/dialect.py b/src/databricks/sqlalchemy/dialect.py index a9ccc74a5..f23878b61 100644 --- a/src/databricks/sqlalchemy/dialect.py +++ b/src/databricks/sqlalchemy/dialect.py @@ -5,10 +5,15 @@ # cribbing from Hive from pyhive.sqlalchemy_hive import HiveExecutionContext, HiveIdentifierPreparer, HiveCompiler, HiveTypeCompiler +from pyhive.sqlalchemy_hive import HiveDate, HiveTimestamp import re from sqlalchemy import types + +from sqlalchemy.types import Integer, BigInteger, SmallInteger, Float, DECIMAL, Boolean; +from sqlalchemy.types import String; + from sqlalchemy import util from sqlalchemy import exc @@ -47,6 +52,7 @@ class DatabricksExecutionContext(default.DefaultExecutionContext): class DatabricksTypeCompiler(compiler.GenericTypeCompiler): # ref: https://spark.apache.org/docs/latest/sql-ref-datatypes.html + # TODO: we are leveraging MySQL's impl def visit_TINYINT(self, type_): return 'TINYINT' @@ -63,10 +69,10 @@ def visit_BIGINT(self, type_, **kw): def visit_FLOAT(self, type_, **kw): return "FLOAT" + # TODO: we are leveraging MySQL's impl def visit_DOUBLE(self, type_, **kw): return "DOUBLE" - def visit_DECIMAL(self, type_, **kw): if type_.precision is None: return "DECIMAL" @@ -78,13 +84,15 @@ def visit_DECIMAL(self, type_, **kw): "scale": type_.scale, } - def visit_DATE(self, type_, **kw): return "DATE" def visit_TIMESTAMP(self, type_, **kw): return "TIMESTAMP" + def visit_DATETIME(self, type_, **kw): + # TODO: incomplete? + return "INTERVAL" def visit_BOOLEAN(self, type_, **kw): return "BOOLEAN" @@ -92,6 +100,10 @@ def visit_BOOLEAN(self, type_, **kw): def visit_STRING(self, type_, **kw): return "STRING" + # TODO: why is this needed even though there's no occurence of VARCHAR? + def visit_VARCHAR(self, type_, **kw): + return "STRING" + class DatabricksCompiler(compiler.SQLCompiler): # stub @@ -99,18 +111,42 @@ class DatabricksCompiler(compiler.SQLCompiler): -# we use DATA_TYPE because TYPE_NAME is instance-specific (e.g. DECIMAL is reported as DECIMAL(precision, scale)) +# This lookup table is by DATA_TYPE and is rather nice since Decimal can be detected directly. +# However, as DATA_TYPE is rather obtuse... and potentially risky going forward, we switched to use COLUMN_TYPE_NAME instead (the table below) +# _type_map = { +# -6: types.Integer, # tiny_int +# 5: types.Integer, # small_int +# 4: types.Integer, # int +# -5: types.BigInteger, # big_int +# 6: types.Float, +# 3: types.DECIMAL, +# 16: types.Boolean, +# 12: types.String, +# 91: HiveDate, # date +# 93: HiveTimestamp, # timestamp +# # TODO: interval +# } + + +# This lookup is by TYPE_NAME which is easier to maintain and likely safer in the long term. +# NB: Decimal is explicitly excluded here as an occurence's TYPE_NAME includes its precision and scale +# See/refer to COLUMN_TYPE_DECIMAL below. + _type_map = { - 5: types.Integer, # small_int - 4: types.Integer, # int - -5: types.BigInteger, # big_int - 6: types.Float, - 3: types.DECIMAL, - 16: types.Boolean, - 'string': types.String, - # 'date': HiveDate, - # 'timestamp': HiveTimestamp, + 'TINYINT': types.Integer, # tiny_int + 'SMALLINT': types.Integer, # small_int + 'INT': types.Integer, # int + 'BIGINT': types.BigInteger, # big_int + 'FLOAT': types.Float, + 'DOUBLE': types.Float, # double fits into a Python float + 'BOOLEAN': types.Boolean, + 'STRING': types.String, + 'DATE': HiveDate, # date + 'TIMESTAMP': HiveTimestamp, # timestamp + # TODO: interval } +# this is used to match decimal's DATA_TYPE; it will map to types.DECIMAL +COLUMN_TYPE_DECIMAL=3 class DatabricksDialect(default.DefaultDialect): @@ -170,8 +206,8 @@ def get_schema_names(self, connection, **kwargs): # TODO: look up correct index for TABLE_SCHEM TABLE_SCHEM = 2 - if debugbreakpoint: - breakpoint() + # if debugbreakpoint: + # breakpoint() with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: data = cur.schemas(catalog_name='%').fetchall() _schemas = [i[TABLE_SCHEM] for i in data] @@ -179,8 +215,8 @@ def get_schema_names(self, connection, **kwargs): return _schemas def get_table_names(self, connection, schema = None, **kwargs): - if debugbreakpoint: - breakpoint() + # if debugbreakpoint: + # breakpoint() TABLE_NAME = 2 with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: @@ -190,34 +226,41 @@ def get_table_names(self, connection, schema = None, **kwargs): return _tables - # Example row - # Row(TABLE_CAT='hive_metastore', TABLE_SCHEM='george_chow_dbtest', TABLE_NAME='all_types', COLUMN_NAME='f_byte', DATA_TYPE=4, - # TYPE_NAME='INT', COLUMN_SIZE=4, BUFFER_LENGTH=None, DECIMAL_DIGITS=0, NUM_PREC_RADIX=10, - # NULLABLE=1, REMARKS='', COLUMN_DEF=None, SQL_DATA_TYPE=None, SQL_DATETIME_SUB=None, - # CHAR_OCTET_LENGTH=None, ORDINAL_POSITION=0, IS_NULLABLE='YES', SCOPE_CATALOG=None, SCOPE_SCHEMA=None, - # SCOPE_TABLE=None, SOURCE_DATA_TYPE=None, IS_AUTO_INCREMENT='NO') def get_columns(self, connection, table_name, schema=None, **kwargs): + # Example row + # Row(TABLE_CAT='hive_metastore', TABLE_SCHEM='george_chow_dbtest', TABLE_NAME='all_types', COLUMN_NAME='f_byte', DATA_TYPE=4, + # TYPE_NAME='INT', COLUMN_SIZE=4, BUFFER_LENGTH=None, DECIMAL_DIGITS=0, NUM_PREC_RADIX=10, + # NULLABLE=1, REMARKS='', COLUMN_DEF=None, SQL_DATA_TYPE=None, SQL_DATETIME_SUB=None, + # CHAR_OCTET_LENGTH=None, ORDINAL_POSITION=0, IS_NULLABLE='YES', SCOPE_CATALOG=None, SCOPE_SCHEMA=None, + # SCOPE_TABLE=None, SOURCE_DATA_TYPE=None, IS_AUTO_INCREMENT='NO') COLUMN_NAME=3 COLUMN_TYPE_NAME=5 - COLUMN_TYPE=4 # by DATA_TYPE + COLUMN_TYPE=4 COLUMN_NULLABLE=17 COLUMN_COMMENT=11 COLUMN_AUTOINCREMENT=22 + result = [] with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: - if debugbreakpoint: - breakpoint() + # if debugbreakpoint: + # breakpoint() data = cur.columns(schema_name=schema, table_name=table_name).fetchall() for i in data: - if debugbreakpoint: - breakpoint() + # if debugbreakpoint: + # breakpoint() # filled-in according to interfaces.py's class ReflectedColumn(TypedDict): try: - coltype = _type_map[i[COLUMN_TYPE]] + if (i[COLUMN_TYPE] != COLUMN_TYPE_DECIMAL): + coltype = _type_map[i[COLUMN_TYPE_NAME]] + else: + # special processing needed as DECIMAL's COLUMN_TYPE includes the precision/scale of this occurrence + coltype = types.DECIMAL; + + # coltype = _type_map[i[COLUMN_TYPE]] except KeyError: - util.warn("Did not recognize type '%s'('%s') of column '%s'" % (i[COLUMN_TYPE_NAME], i[COLUMN_TYPE], i[COLUMN_NAME])) + util.warn(f"Did not recognize type '{i[COLUMN_TYPE_NAME]}'({i[COLUMN_TYPE]}) of column '{i[COLUMN_NAME]}'") coltype = types.NullType try: @@ -252,15 +295,13 @@ def has_table( schema = None, **kwargs, ) -> bool: - if debugbreakpoint: - breakpoint() + # if debugbreakpoint: + # breakpoint() try: COLUMN_NAME=3 with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: data = cur.columns(schema_name=schema or 'default', table_name=table_name).fetchmany(1) - # as long as we have some number of columns, the table exists! - if debugbreakpoint: - breakpoint() + # the table exists as long as there's a non-zero number of columns return len(data) > 0 except exc.NoSuchTableError: return False diff --git a/src/databricks/sqlalchemy/sample-app-reflection.py b/src/databricks/sqlalchemy/sample-app-reflection.py index 9a2267119..afdec82ce 100644 --- a/src/databricks/sqlalchemy/sample-app-reflection.py +++ b/src/databricks/sqlalchemy/sample-app-reflection.py @@ -1,4 +1,4 @@ -# sample-app-reflection +# sample-app-reflection.py # # Program to demonstrate use of reflection instead of explicit declaration # diff --git a/src/databricks/sqlalchemy/sample-app-select.py b/src/databricks/sqlalchemy/sample-app-select.py index d5aa72b5b..41d63db47 100644 --- a/src/databricks/sqlalchemy/sample-app-select.py +++ b/src/databricks/sqlalchemy/sample-app-select.py @@ -7,8 +7,10 @@ from sqlalchemy import create_engine from sqlalchemy import MetaData -from sqlalchemy import Table, Column, Integer, BigInteger, Float, Boolean -from sqlalchemy import select +from sqlalchemy import select, Table, Column +from sqlalchemy import SMALLINT, Integer, BigInteger, Float, DECIMAL, BOOLEAN +from sqlalchemy.dialects.mysql.types import TINYINT, DOUBLE # borrow MySQL's impls +from sqlalchemy import String, DATE, TIMESTAMP # pickup settings from the env server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME") @@ -22,26 +24,46 @@ metadata_obj = MetaData() # NB: sample_numtypes is a pre-created/populated table -tableName = "sample_numtypes" +t1 = "sample_numtypes" numtypes = Table( - tableName, + t1, metadata_obj, - Column('f_byte', Integer), - Column('f_short', Integer), + Column('f_byte', TINYINT), + Column('f_short', SMALLINT), Column('f_int', Integer), Column('f_long', BigInteger), Column('f_float', Float), - Column('f_decimal', Float), - Column('f_boolean', Boolean) + Column('f_double', DOUBLE), + Column('f_decimal', DECIMAL), + Column('f_boolean', BOOLEAN) ) # SELECT * FROM t WHERE f_byte = -125 stmt = select(numtypes).where(numtypes.c.f_byte == -125) print(f"Attempting to execute: {stmt}\n") -print(f"Rows from table {tableName}") +print(f"Rows from table {t1}") with engine.connect() as conn: for row in conn.execute(stmt): print(row) + + +# NB: sample_strtypes is a pre-created/populated table +t2 = "sample_strtypes" + +with engine.connect() as conn: + strtypes = Table( + t2, + metadata_obj, + autoload_with=conn + ) + + # SELECT * FROM t + stmt = select(strtypes) + print(f"Attempting to execute: {stmt}\n") + + print(f"Rows from table {t2}") + for row in conn.execute(stmt): + print(row) diff --git a/tests/sqlalchemy/test_suite.py b/tests/sqlalchemy/test_suite.py index 43a7e433c..843892a1f 100644 --- a/tests/sqlalchemy/test_suite.py +++ b/tests/sqlalchemy/test_suite.py @@ -1,5 +1,7 @@ import datetime +import os +from sqlalchemy.dialects.mysql.types import TINYINT, DOUBLE # borrow MySQL's impls from sqlalchemy import BIGINT from sqlalchemy import BOOLEAN from sqlalchemy import DATE @@ -52,8 +54,12 @@ from sqlalchemy.testing.util import resolve_lambda +# provide a way to break in +debugbreakpoint = os.getenv("DATABRICKS_DIALECT_DEBUG") or False + class ReflectionTest(fixtures.TablesTest, ComparesTables): + def test_numtypes(self, metadata, connection): meta = metadata @@ -61,10 +67,13 @@ def test_numtypes(self, metadata, connection): all_num_types = Table( "reflectiontest_all_num_types", meta, + # the types below represent dialect-specific implementations that handles serialization + Column("f_byte", TINYINT), Column("f_short", SMALLINT), Column("f_int", INT), Column("f_long", BIGINT), Column("f_float", FLOAT), + Column("f_double", DOUBLE), Column("f_decimal", DECIMAL(9,3)), Column("f_boolean", BOOLEAN), ) @@ -83,20 +92,23 @@ def test_numtypes(self, metadata, connection): # TODO: not working yet - def off_test_strtypes(self, metadata, connection): + def test_strtypes(self, metadata, connection): meta = metadata - all_num_types = Table( - "all_str_types", + all_str_types = Table( + "reflectiontest_all_str_types", meta, Column("f_string", String), + Column("f_date", DATE), + Column("f_timestamp", TIMESTAMP), + # Column("f_interval", Interval), ) meta.create_all(connection) meta2 = MetaData() reflected_types = Table( - "all_str_types", meta2, autoload_with=connection + "reflectiontest_all_str_types", meta2, autoload_with=connection ) self.assert_tables_equal(all_str_types, reflected_types) @@ -112,16 +124,27 @@ def define_tables(cls, metadata): Table( "simpletest_num", metadata, - Column("f_byte", INT), + Column("f_byte", TINYINT), Column("f_short", SMALLINT), Column("f_int", INT), Column("f_long", BIGINT), Column("f_float", FLOAT), + Column("f_double", DOUBLE), Column("f_decimal", DECIMAL), Column("f_boolean", BOOLEAN), test_needs_acid=False, ) - # TODO: why is the cleanup of this table not happening? + + Table( + "simpletest_str", + metadata, + Column("f_string", String), + Column("f_date", DATE), + Column("f_timestamp", TIMESTAMP), + test_needs_acid=False, + ) + + # TODO: why are the cleanup of these tables not happening? def test_select_type_byte(self, connection): simpletest_num = self.tables.simpletest_num @@ -129,15 +152,46 @@ def test_select_type_byte(self, connection): connection.execute(stmt) - def test_select_type_inttype(self, connection): + def test_select_type_smallint(self, connection): + simpletest_num = self.tables.simpletest_num + stmt = select([simpletest_num.c.f_short]) + + connection.execute(stmt) + + def test_select_type_int(self, connection): simpletest_num = self.tables.simpletest_num stmt = select([simpletest_num.c.f_int]) connection.execute(stmt) + def test_select_type_bigint(self, connection): + simpletest_num = self.tables.simpletest_num + stmt = select([simpletest_num.c.f_long]) + + connection.execute(stmt) + + def test_select_type_float(self, connection): + simpletest_num = self.tables.simpletest_num + stmt = select([simpletest_num.c.f_float]) + + connection.execute(stmt) + + def test_select_type_double(self, connection): + simpletest_num = self.tables.simpletest_num + stmt = select([simpletest_num.c.f_double]) + + connection.execute(stmt) + def test_select_star_with_limit(self, connection): simpletest_num = self.tables.simpletest_num stmt = select([simpletest_num.c.f_byte]).limit(10) connection.execute(stmt) + + + def test_select_type_string(self, connection): + t = self.tables.simpletest_str + stmt = select([t.c.f_string]).limit(10) + + connection.execute(stmt) From 44a9e32579ac5f95573f4d3f1540a101b670fbd3 Mon Sep 17 00:00:00 2001 From: George Chow Date: Fri, 22 Jul 2022 16:22:30 -0700 Subject: [PATCH 10/30] tidy up for the week; pulled out partial interval support for the while --- Makefile | 13 ++ env.template | 2 +- src/databricks/sqlalchemy/Makefile | 20 ++- src/databricks/sqlalchemy/create-table.sql | 14 +- src/databricks/sqlalchemy/dialect.py | 153 ++++++++++++------ .../sqlalchemy/sample-app-reflection.py | 4 +- .../sqlalchemy/sample-app-select.py | 27 ++-- tests/sqlalchemy/test_suite.py | 45 ++++-- 8 files changed, 191 insertions(+), 87 deletions(-) diff --git a/Makefile b/Makefile index fe182eeab..81d46f9d0 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,13 @@ +# +# Clearinghouse for initiating adhoc PyTest runs to test the Databricks SQLAlchemy dialect +# +# NB: At present, the database fixtures aren't being cleaned up after each run. +# Use the clean pseudo-targets to remove these after your run. +# +# NB2: the target system (the parameter to --dburi) is specified via environment variables. +# See env.template. + + DBSCLI=dbsqlcli PYTEST=poetry run python3 -m pytest @@ -5,6 +15,9 @@ SUITE_PATH=tests/sqlalchemy SUITE=test_suite.py +.PHONY=all clean showtables full reflection simple str num drop_simpletest drop_reflectiontest + + all: full clean: drop_simpletest drop_reflectiontest diff --git a/env.template b/env.template index eeaadd848..44a7fffa7 100644 --- a/env.template +++ b/env.template @@ -6,6 +6,6 @@ export DATABRICKS_HTTP_PATH=your-path export DATABRICKS_TOKEN=your-token export DATABRICKS_SCHEMA=default-or-something-else -# hackey dynamic breakpoint; delete the following and/or use unset if you do *not* want the breakpoint +# hackey dynamic breakpoint; delete the following and/or unset the var if you do *not* want the breakpoint export DATABRICKS_DIALECT_DEBUG=True #unset DATABRICKS_DIALECT_DEBUG \ No newline at end of file diff --git a/src/databricks/sqlalchemy/Makefile b/src/databricks/sqlalchemy/Makefile index e47737393..5df1970aa 100644 --- a/src/databricks/sqlalchemy/Makefile +++ b/src/databricks/sqlalchemy/Makefile @@ -1,17 +1,33 @@ +# +# Temporary(?) home for sample apps to guide development of the Databricks SQLAlchemy dialect +# +# NB: This really ought to be using the standard Golden tables. For the while, there are a number +# of pre-defined tables that is created. +# +# NB2: the target system is specified via environment variables. The sample apps looks this up at run-time. +# See env.template. +# + DBSCLI=dbsqlcli PY=poetry run python3 +.PHONY=sample1 sample2 sample3 clean \ + create create-reseq create-extra create-db drop-db \ + showtables describe select drop + sample1: $(PY) sample-app-select.py sample2: $(PY) sample-app-reflection.py +sample3: + $(PY) sample-app-insert.py + clean: drop -# caution: create table in the correct schema! +# caution: create your table in the correct schema! create: - # $(DBSCLI) -e create-table-sa.sql $(DBSCLI) -e create-table.sql create-reseq: diff --git a/src/databricks/sqlalchemy/create-table.sql b/src/databricks/sqlalchemy/create-table.sql index 73c900b94..76cbdee32 100644 --- a/src/databricks/sqlalchemy/create-table.sql +++ b/src/databricks/sqlalchemy/create-table.sql @@ -32,14 +32,18 @@ CREATE TABLE sample_strtypes ( f_string STRING, f_date DATE, - f_timestamp TIMESTAMP, - f_interval INTERVAL DAY TO SECOND + f_timestamp TIMESTAMP ); + -- f_interval INTERVAL DAY TO SECOND INSERT INTO sample_strtypes VALUES - ( 'Everest', '1953-05-29', '1953-05-29T11:30', '3 0:0:0' ), - ( 'Mariana Trench', '1960-01-23', '1960-01-23T13:06', '0 5:00:00' ), - ( 'Moon landing', '1969-07-20', '1969-07-20T20:17', '0 21:36:0' ); + ( 'Everest', '1953-05-29', '1953-05-29T11:30' ), + ( 'Mariana Trench', '1960-01-23', '1960-01-23T13:06' ), + ( 'Moon landing', '1969-07-20', '1969-07-20T20:17' ); + +-- ( 'Everest', '1953-05-29', '1953-05-29T11:30', '3 0:0:0' ), +-- ( 'Mariana Trench', '1960-01-23', '1960-01-23T13:06', '0 5:00:00' ), +-- ( 'Moon landing', '1969-07-20', '1969-07-20T20:17', '0 21:36:0' ); SELECT * FROM sample_strtypes; diff --git a/src/databricks/sqlalchemy/dialect.py b/src/databricks/sqlalchemy/dialect.py index f23878b61..93555ad7b 100644 --- a/src/databricks/sqlalchemy/dialect.py +++ b/src/databricks/sqlalchemy/dialect.py @@ -3,16 +3,13 @@ from databricks import sql from databricks import sql as dbsql -# cribbing from Hive -from pyhive.sqlalchemy_hive import HiveExecutionContext, HiveIdentifierPreparer, HiveCompiler, HiveTypeCompiler -from pyhive.sqlalchemy_hive import HiveDate, HiveTimestamp - import re from sqlalchemy import types +# we leverage MySQL's implementation of TINYINT and DOUBLE from sqlalchemy.types import Integer, BigInteger, SmallInteger, Float, DECIMAL, Boolean; -from sqlalchemy.types import String; +from sqlalchemy.types import String, DATE, TIMESTAMP; from sqlalchemy import util from sqlalchemy import exc @@ -23,10 +20,75 @@ from typing import AnyStr -# provide a way to break in +# provide a way to debug debugbreakpoint = os.getenv("DATABRICKS_DIALECT_DEBUG") or False +# styled after HiveStringTypeBase; removed process_bind_param(self, value, dialect) +class DatabricksStringTypeBase(types.TypeDecorator): + impl = types.String + +# styled after HiveDate +class DatabricksDate(DatabricksStringTypeBase): + """Translates date strings to date objects""" + impl = types.DATE + + def process_result_value(self, value, dialect): + if debugbreakpoint: + breakpoint() + return processors.str_to_date(value) + + def result_processor(self, dialect, coltype): + def process(value): + if debugbreakpoint: + breakpoint() + + if isinstance(value, datetime.datetime): + return value.date() + elif isinstance(value, datetime.date): + return value + elif value is not None: + return parse(value).date() + else: + return None + + return process + + def adapt(self, impltype, **kwargs): + if debugbreakpoint: + breakpoint() + return self.impl + +# styled after HiveeTimestamp +class DatabricksTimestamp(DatabricksStringTypeBase): + """Translates timestamp strings to datetime objects""" + impl = types.TIMESTAMP + + def process_result_value(self, value, dialect): + if debugbreakpoint: + breakpoint() + return processors.str_to_datetime(value) + + def result_processor(self, dialect, coltype): + def process(value): + if debugbreakpoint: + breakpoint() + + if isinstance(value, datetime.datetime): + return value + elif value is not None: + return parse(value) + else: + return None + + return process + + def adapt(self, impltype, **kwargs): + if debugbreakpoint: + breakpoint() + return self.impl + + class DatabricksIdentifierPreparer(compiler.IdentifierPreparer): # SparkSQL identifier specification: # ref: https://spark.apache.org/docs/latest/sql-ref-identifier.html @@ -49,10 +111,12 @@ class DatabricksExecutionContext(default.DefaultExecutionContext): pass +# +# this class provides visitors that emit the dialect-specific keywords for SQLAlchemy's SQL AST +# class DatabricksTypeCompiler(compiler.GenericTypeCompiler): # ref: https://spark.apache.org/docs/latest/sql-ref-datatypes.html - # TODO: we are leveraging MySQL's impl def visit_TINYINT(self, type_): return 'TINYINT' @@ -65,11 +129,9 @@ def visit_INTEGER(self, type_, **kw): def visit_BIGINT(self, type_, **kw): return "BIGINT" - def visit_FLOAT(self, type_, **kw): return "FLOAT" - # TODO: we are leveraging MySQL's impl def visit_DOUBLE(self, type_, **kw): return "DOUBLE" @@ -90,10 +152,6 @@ def visit_DATE(self, type_, **kw): def visit_TIMESTAMP(self, type_, **kw): return "TIMESTAMP" - def visit_DATETIME(self, type_, **kw): - # TODO: incomplete? - return "INTERVAL" - def visit_BOOLEAN(self, type_, **kw): return "BOOLEAN" @@ -111,42 +169,43 @@ class DatabricksCompiler(compiler.SQLCompiler): -# This lookup table is by DATA_TYPE and is rather nice since Decimal can be detected directly. -# However, as DATA_TYPE is rather obtuse... and potentially risky going forward, we switched to use COLUMN_TYPE_NAME instead (the table below) +# The following lookup table is by DATA_TYPE and is rather nice since Decimal can be detected directly. +# However, as DATA_TYPE is rather obtuse... going forward, we switched to use COLUMN_TYPE_NAME instead (the table below) # _type_map = { -# -6: types.Integer, # tiny_int -# 5: types.Integer, # small_int -# 4: types.Integer, # int -# -5: types.BigInteger, # big_int +# -6: types.Integer, # tiny_int +# 5: types.Integer, # small_int +# 4: types.Integer, # int +# -5: types.BigInteger, # big_int # 6: types.Float, # 3: types.DECIMAL, # 16: types.Boolean, # 12: types.String, -# 91: HiveDate, # date -# 93: HiveTimestamp, # timestamp -# # TODO: interval +# 91: DatabricksDate, # date +# 93: DatabricksTimestamp, # timestamp +# 1111: interval # } # This lookup is by TYPE_NAME which is easier to maintain and likely safer in the long term. -# NB: Decimal is explicitly excluded here as an occurence's TYPE_NAME includes its precision and scale +# NB: Decimal is explicitly excluded here as each occurence's TYPE_NAME includes the occurence's precision and scale # See/refer to COLUMN_TYPE_DECIMAL below. +# this map SQL types onto Python representation; note the deliberate omission of Decimal! _type_map = { - 'TINYINT': types.Integer, # tiny_int - 'SMALLINT': types.Integer, # small_int - 'INT': types.Integer, # int - 'BIGINT': types.BigInteger, # big_int + 'TINYINT': types.Integer, # tiny_int + 'SMALLINT': types.Integer, # small_int + 'INT': types.Integer, # int + 'BIGINT': types.BigInteger, # big_int 'FLOAT': types.Float, - 'DOUBLE': types.Float, # double fits into a Python float + 'DOUBLE': types.Float, # double fits into a Python float 'BOOLEAN': types.Boolean, 'STRING': types.String, - 'DATE': HiveDate, # date - 'TIMESTAMP': HiveTimestamp, # timestamp - # TODO: interval + 'DATE': types.DATE, # date + 'TIMESTAMP': types.TIMESTAMP, # timestamp } # this is used to match decimal's DATA_TYPE; it will map to types.DECIMAL COLUMN_TYPE_DECIMAL=3 +# COLUMN_TYPE_INTERVAL=1111 class DatabricksDialect(default.DefaultDialect): @@ -193,9 +252,7 @@ def create_connect_args(self, url: "URL"): return [], kwargs - # def initialize(self, connection) -> None: - # super().initialize(connection) - + # TODO: uninvoked code to date def get_schema_names(self, connection, **kwargs): # conn = dbsql.connect( # server_hostname=kwargs['server_hostname'], @@ -225,7 +282,6 @@ def get_table_names(self, connection, schema = None, **kwargs): return _tables - def get_columns(self, connection, table_name, schema=None, **kwargs): # Example row # Row(TABLE_CAT='hive_metastore', TABLE_SCHEM='george_chow_dbtest', TABLE_NAME='all_types', COLUMN_NAME='f_byte', DATA_TYPE=4, @@ -234,8 +290,8 @@ def get_columns(self, connection, table_name, schema=None, **kwargs): # CHAR_OCTET_LENGTH=None, ORDINAL_POSITION=0, IS_NULLABLE='YES', SCOPE_CATALOG=None, SCOPE_SCHEMA=None, # SCOPE_TABLE=None, SOURCE_DATA_TYPE=None, IS_AUTO_INCREMENT='NO') COLUMN_NAME=3 - COLUMN_TYPE_NAME=5 COLUMN_TYPE=4 + COLUMN_TYPE_NAME=5 COLUMN_NULLABLE=17 COLUMN_COMMENT=11 COLUMN_AUTOINCREMENT=22 @@ -250,29 +306,26 @@ def get_columns(self, connection, table_name, schema=None, **kwargs): # if debugbreakpoint: # breakpoint() - # filled-in according to interfaces.py's class ReflectedColumn(TypedDict): try: if (i[COLUMN_TYPE] != COLUMN_TYPE_DECIMAL): coltype = _type_map[i[COLUMN_TYPE_NAME]] else: - # special processing needed as DECIMAL's COLUMN_TYPE includes the precision/scale of this occurrence - coltype = types.DECIMAL; - - # coltype = _type_map[i[COLUMN_TYPE]] + coltype = types.DECIMAL except KeyError: util.warn(f"Did not recognize type '{i[COLUMN_TYPE_NAME]}'({i[COLUMN_TYPE]}) of column '{i[COLUMN_NAME]}'") coltype = types.NullType try: - nullable = i[COLUMN_NULLABLE] == 'YES'; + nullable = i[COLUMN_NULLABLE] == 'YES' except KeyError: nullable = True; try: - autoincrement = i[COLUMN_AUTOINCREMENT] == 'YES'; + autoincrement = i[COLUMN_AUTOINCREMENT] == 'YES' except KeyError: - autoincrement = False; + autoincrement = False + # filled-in according to interfaces.py's class ReflectedColumn(TypedDict): result.append({ 'name': i[COLUMN_NAME], 'type': coltype, @@ -284,10 +337,6 @@ def get_columns(self, connection, table_name, schema=None, **kwargs): return result - def get_view_names(self, connection, schema=None, **kwargs): - # no views at present - return [] - def has_table( self, connection, @@ -306,6 +355,10 @@ def has_table( except exc.NoSuchTableError: return False + def get_view_names(self, connection, schema=None, **kwargs): + # Spark has no views + return [] + def get_foreign_keys(self, connection, table_name, schema=None, **kwargs): # Spark has no foreign keys return [] @@ -315,9 +368,9 @@ def get_pk_constraint(self, connection, table_name, schema=None, **kwargs): return [] def get_indexes(self, connection, table_name, schema=None, **kwargs): - # TODO: treat partitions as indices + # TODO: expose partition columns as indices? return [] def do_rollback(self, dbapi_connection) -> None: - # Spark/Delta transaction only covers single-table updates... to simplify things, just skip this for now. + # Spark/Delta transaction only support single-table updates... to simplify things, just skip this for now. pass diff --git a/src/databricks/sqlalchemy/sample-app-reflection.py b/src/databricks/sqlalchemy/sample-app-reflection.py index afdec82ce..2072d25e2 100644 --- a/src/databricks/sqlalchemy/sample-app-reflection.py +++ b/src/databricks/sqlalchemy/sample-app-reflection.py @@ -24,12 +24,12 @@ tableName = "sample_numtypes" # use reflection here to discover the schema dynamically - numtypes = Table( + t = Table( "sample_numtypes", metadata_obj, autoload_with=conn ) # SELECT * FROM t WHERE f_byte = -125 - stmt = select(numtypes).where(numtypes.c.f_byte == -125) + stmt = select(t).where(t.c.f_byte == -125) print(f"Attempting to execute: {stmt}\n") print(f"Rows from table {tableName}") diff --git a/src/databricks/sqlalchemy/sample-app-select.py b/src/databricks/sqlalchemy/sample-app-select.py index 41d63db47..3986a02c3 100644 --- a/src/databricks/sqlalchemy/sample-app-select.py +++ b/src/databricks/sqlalchemy/sample-app-select.py @@ -18,16 +18,19 @@ access_token = os.getenv("DATABRICKS_TOKEN") default_schema = os.getenv("DATABRICKS_SCHEMA") +# provide a way to break in +debugbreakpoint = os.getenv("DATABRICKS_DIALECT_DEBUG") or False + # use echo=True for verbose log engine = create_engine(f"databricks+thrift://token:{access_token}@{server_hostname}/{default_schema}?http_path={http_path}", echo=False, future=True) metadata_obj = MetaData() # NB: sample_numtypes is a pre-created/populated table -t1 = "sample_numtypes" +numtypes = "sample_numtypes" -numtypes = Table( - t1, +t1 = Table( + numtypes, metadata_obj, Column('f_byte', TINYINT), Column('f_short', SMALLINT), @@ -40,10 +43,10 @@ ) # SELECT * FROM t WHERE f_byte = -125 -stmt = select(numtypes).where(numtypes.c.f_byte == -125) +stmt = select(t1).where(t1.c.f_byte == -125) print(f"Attempting to execute: {stmt}\n") -print(f"Rows from table {t1}") +print(f"Rows from table {numtypes}") with engine.connect() as conn: for row in conn.execute(stmt): @@ -51,19 +54,23 @@ # NB: sample_strtypes is a pre-created/populated table -t2 = "sample_strtypes" +strtypes = "sample_strtypes" with engine.connect() as conn: - strtypes = Table( - t2, + t2 = Table( + strtypes, metadata_obj, autoload_with=conn ) # SELECT * FROM t - stmt = select(strtypes) + stmt = select(t2) print(f"Attempting to execute: {stmt}\n") - print(f"Rows from table {t2}") + print(f"Rows from table {strtypes}") + if debugbreakpoint: + breakpoint() for row in conn.execute(stmt): + if debugbreakpoint: + breakpoint() print(row) diff --git a/tests/sqlalchemy/test_suite.py b/tests/sqlalchemy/test_suite.py index 843892a1f..4827f43f6 100644 --- a/tests/sqlalchemy/test_suite.py +++ b/tests/sqlalchemy/test_suite.py @@ -1,7 +1,9 @@ import datetime import os +# TODO: fold them into our package from sqlalchemy.dialects.mysql.types import TINYINT, DOUBLE # borrow MySQL's impls + from sqlalchemy import BIGINT from sqlalchemy import BOOLEAN from sqlalchemy import DATE @@ -146,52 +148,61 @@ def define_tables(cls, metadata): # TODO: why are the cleanup of these tables not happening? + # simpletest_num + def test_select_type_byte(self, connection): - simpletest_num = self.tables.simpletest_num - stmt = select([simpletest_num.c.f_byte]) + t = self.tables.simpletest_num + stmt = select([t.c.f_byte]) connection.execute(stmt) def test_select_type_smallint(self, connection): - simpletest_num = self.tables.simpletest_num - stmt = select([simpletest_num.c.f_short]) + t = self.tables.simpletest_num + stmt = select([t.c.f_short]) connection.execute(stmt) def test_select_type_int(self, connection): - simpletest_num = self.tables.simpletest_num - stmt = select([simpletest_num.c.f_int]) + t = self.tables.simpletest_num + stmt = select([t.c.f_int]) connection.execute(stmt) def test_select_type_bigint(self, connection): - simpletest_num = self.tables.simpletest_num - stmt = select([simpletest_num.c.f_long]) + t = self.tables.simpletest_num + stmt = select([t.c.f_long]) connection.execute(stmt) def test_select_type_float(self, connection): - simpletest_num = self.tables.simpletest_num - stmt = select([simpletest_num.c.f_float]) + t = self.tables.simpletest_num + stmt = select([t.c.f_float]) connection.execute(stmt) def test_select_type_double(self, connection): - simpletest_num = self.tables.simpletest_num - stmt = select([simpletest_num.c.f_double]) + t = self.tables.simpletest_num + stmt = select([t.c.f_double]) connection.execute(stmt) - def test_select_star_with_limit(self, connection): - simpletest_num = self.tables.simpletest_num - stmt = select([simpletest_num.c.f_byte]).limit(10) + # simpletest_str + + def test_select_type_string(self, connection): + t = self.tables.simpletest_str + stmt = select([t.c.f_string]).limit(10) connection.execute(stmt) + def test_select_type_date(self, connection): + t = self.tables.simpletest_str + stmt = select([t.c.f_date]).limit(10) - def test_select_type_string(self, connection): + connection.execute(stmt) + + def test_select_type_timestamp(self, connection): t = self.tables.simpletest_str - stmt = select([t.c.f_string]).limit(10) + stmt = select([t.c.f_timestamp]).limit(10) connection.execute(stmt) From 6463fef39e5bebea77dd3180bfeabd04cd2bb2e9 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 16:48:32 -0700 Subject: [PATCH 11/30] add a block of test suites from SA --- Makefile | 87 +++++++++++++++++++++++++++- src/databricks/sqlalchemy/dialect.py | 72 ++++++++++++++++++----- tests/sqlalchemy/test_full_sa.py | 42 ++++++++++++++ 3 files changed, 183 insertions(+), 18 deletions(-) create mode 100644 tests/sqlalchemy/test_full_sa.py diff --git a/Makefile b/Makefile index 81d46f9d0..8ecbb6ad3 100644 --- a/Makefile +++ b/Makefile @@ -20,15 +20,62 @@ SUITE=test_suite.py all: full -clean: drop_simpletest drop_reflectiontest +clean: drop_simpletest drop_reflectiontest \ + drop_booleantest drop_datetest drop_datetimetest drop_integertest drop_numerictest drop_stringtest drop_tableddl showtables: $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); show tables;" - + $(DBSCLI) -e "USE test_schema; show tables;" + $(DBSCLI) -e "USE test_schema_2; show tables;" full: $(PYTEST) $(SUITE_PATH) \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" \ + --log-file=~/.pytestlogs/full.log + +sa-bool: drop_booleantest + $(PYTEST) $(SUITE_PATH)/test_full_sa.py::BooleanTest \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +sa-date: drop_datetest + $(PYTEST) $(SUITE_PATH)/test_full_sa.py::DateTest \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +sa-dt: drop_datetimetest + $(PYTEST) $(SUITE_PATH)/test_full_sa.py::DateTimeTest \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +sa-int: drop_integertest + $(PYTEST) $(SUITE_PATH)/test_full_sa.py::IntegerTest \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +sa-num: drop_numerictest + $(PYTEST) $(SUITE_PATH)/test_full_sa.py::NumericTest \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +sa-str: drop_stringtest + $(PYTEST) $(SUITE_PATH)/test_full_sa.py::StringTest \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +sa-ddl: drop_tableddl + $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +sa-ddl1: drop_tableddl + $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest:test_create_table \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +sa-ddl2: drop_tableddl + $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest:test_create_table_schema \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +sa-ddl3: drop_tableddl + $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest:test_drop_table \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + +sa-join: drop_jointest + $(PYTEST) $(SUITE_PATH)/test_full_sa.py::JoinTest \ + --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" reflection: $(PYTEST) $(SUITE_PATH)/$(SUITE)::ReflectionTest \ @@ -56,3 +103,37 @@ drop_reflectiontest: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS reflectiontest_all_num_types;" echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS reflectiontest_all_str_types;" +# clean up after SQLAlchemy test suite + +drop_booleantest: + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS boolean_table;" + +drop_datetest: +drop_datetimetest: + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS date_table;" + + +drop_integertest: +drop_numerictest: + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS t;" + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS tabletest;" + + +drop_stringtest: + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS boolean_table;" + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS t;" + +drop_tableddl: + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS _test_table;" + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS test_table;" + +drop_jointest: + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS a;" + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS b;" + + +# these two schemas are baked into SQLAlchemy's test suite +satestdb: + $(DBSCLI) -e "CREATE DATABASE test_schema;" + $(DBSCLI) -e "CREATE DATABASE test_schema_2;" + diff --git a/src/databricks/sqlalchemy/dialect.py b/src/databricks/sqlalchemy/dialect.py index 93555ad7b..1f87df6ee 100644 --- a/src/databricks/sqlalchemy/dialect.py +++ b/src/databricks/sqlalchemy/dialect.py @@ -34,15 +34,10 @@ class DatabricksDate(DatabricksStringTypeBase): impl = types.DATE def process_result_value(self, value, dialect): - if debugbreakpoint: - breakpoint() return processors.str_to_date(value) def result_processor(self, dialect, coltype): def process(value): - if debugbreakpoint: - breakpoint() - if isinstance(value, datetime.datetime): return value.date() elif isinstance(value, datetime.date): @@ -55,8 +50,6 @@ def process(value): return process def adapt(self, impltype, **kwargs): - if debugbreakpoint: - breakpoint() return self.impl # styled after HiveeTimestamp @@ -65,15 +58,10 @@ class DatabricksTimestamp(DatabricksStringTypeBase): impl = types.TIMESTAMP def process_result_value(self, value, dialect): - if debugbreakpoint: - breakpoint() return processors.str_to_datetime(value) def result_processor(self, dialect, coltype): def process(value): - if debugbreakpoint: - breakpoint() - if isinstance(value, datetime.datetime): return value elif value is not None: @@ -84,8 +72,6 @@ def process(value): return process def adapt(self, impltype, **kwargs): - if debugbreakpoint: - breakpoint() return self.impl @@ -101,6 +87,31 @@ def __init__(self, dialect): initial_quote='`', ) + @util.preload_module("sqlalchemy.sql.naming") + def format_constraint(self, constraint, _alembic_quote=True): + if debugbreakpoint: + breakpoint() + + naming = util.preloaded.sql_naming + + if constraint.name is elements._NONE_NAME: + name = naming._constraint_name_for_table( + constraint, constraint.table + ) + + if name is None: + return None + else: + name = constraint.name + + if constraint.__visit_name__ == "index": + return self.truncate_and_render_index_name( + name, _alembic_quote=_alembic_quote + ) + else: + return self.truncate_and_render_constraint_name( + name, _alembic_quote=_alembic_quote + ) class DatabricksExecutionContext(default.DefaultExecutionContext): # There doesn't seem to be any override of DefaultExecutionContext required @@ -146,9 +157,23 @@ def visit_DECIMAL(self, type_, **kw): "scale": type_.scale, } + def visit_NUMERIC(self, type_, **kw): + if type_.precision is None: + return "DECIMAL" + elif type_.scale is None: + return "DECIMAL(%(precision)s)" % {"precision": type_.precision} + else: + return "DECIMAL(%(precision)s, %(scale)s)" % { + "precision": type_.precision, + "scale": type_.scale, + } + def visit_DATE(self, type_, **kw): return "DATE" + def visit_DATETIME(self, type_, **kw): + return "TIMESTAMP" + def visit_TIMESTAMP(self, type_, **kw): return "TIMESTAMP" @@ -164,10 +189,26 @@ def visit_VARCHAR(self, type_, **kw): class DatabricksCompiler(compiler.SQLCompiler): - # stub pass +class DatabricksDDLCompiler(compiler.DDLCompiler): + + # Spark has no primary key support so ignore whatever constraint there is + def visit_primary_key_constraint(self, constraint, **kw): + return "" + + def visit_foreign_key_constraint(self, constraint, **kw): + return "" + + # stripped down from DDLCompiler::get_column_specification + # def get_column_specification(self, column, **kwargs): + # colspec = ( + # self.preparer.format_column(column) + # + " " + # ) + # return colspec + # The following lookup table is by DATA_TYPE and is rather nice since Decimal can be detected directly. # However, as DATA_TYPE is rather obtuse... going forward, we switched to use COLUMN_TYPE_NAME instead (the table below) @@ -217,6 +258,7 @@ class DatabricksDialect(default.DefaultDialect): preparer = DatabricksIdentifierPreparer execution_ctx_cls = DatabricksExecutionContext statement_compiler = DatabricksCompiler + ddl_compiler = DatabricksDDLCompiler type_compiler = DatabricksTypeCompiler # the following attributes are cribbed from HiveDialect: diff --git a/tests/sqlalchemy/test_full_sa.py b/tests/sqlalchemy/test_full_sa.py new file mode 100644 index 000000000..e5550fe36 --- /dev/null +++ b/tests/sqlalchemy/test_full_sa.py @@ -0,0 +1,42 @@ +from sqlalchemy.testing.suite import * + +from sqlalchemy.testing.suite import IntegerTest as _IntegerTest +from sqlalchemy.testing.suite import StringTest as _StringTest +from sqlalchemy.testing.suite import NumericTest as _NumericTest +from sqlalchemy.testing.suite import BooleanTest as _BooleanTest +from sqlalchemy.testing.suite import DateTest as _DateTest +from sqlalchemy.testing.suite import DateTimeTest as _DateTimeTest + + +from sqlalchemy.testing.suite import TableDDLTest as _TableDDLTest + +from sqlalchemy.testing.suite import JoinTest as _JoinTest + + + +class BooleanTest(_BooleanTest): + pass + +class DateTest(_DateTest): + pass + +class DateTimeTest(_DateTimeTest): + pass + +class IntegerTest(_IntegerTest): + pass + +class NumericTest(_NumericTest): + pass + +class StringTest(_StringTest): + pass + + +class TableDDLTest(_TableDDLTest): + pass + + +class JoinTest(_JoinTest): + pass + From d3bd5d2cfb99ae1d58f8584b3b803c606455f042 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 16:49:08 -0700 Subject: [PATCH 12/30] Trial add of Github Action for the SQLAlchemy dialect --- .github/workflows/sqlalchemy-dialect.yml | 163 +++++++++++++++++++++++ 1 file changed, 163 insertions(+) create mode 100644 .github/workflows/sqlalchemy-dialect.yml diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml new file mode 100644 index 000000000..0be2d5f93 --- /dev/null +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -0,0 +1,163 @@ +name: SQLAlchemy dialect test +on: + push: + paths: + - src/databricks/sqlalchemy + - tests/sqlalchemy +jobs: + run-tests: + runs-on: ubuntu-latest + steps: + + #---------------------------------------------- + # import secrets from the Github env + #---------------------------------------------- + env: + DATABRICKS_SERVER_HOSTNAME: ${{ secrets.ENVSEC_DATABRICKS_SERVER_HOSTNAME }} + DATABRICKS_HTTP_PATH: ${{ secrets.ENVSEC_DATABRICKS_HTTP_PATH }} + DATABRICKS_TOKEN: ${{ secrets.ENVSEC_DATABRICKS_TOKEN }} + DATABRICKS_SCHEMA: ${{ secrets.ENVSEC_DATABRICKS_SCHEMA }} + + #---------------------------------------------- + # check-out repo and set-up python + #---------------------------------------------- + - name: Check out repository + uses: actions/checkout@v2 + - name: Set up python + id: setup-python + uses: actions/setup-python@v2 + with: + python-version: 3.7 + #---------------------------------------------- + # ----- install & configure poetry ----- + #---------------------------------------------- + - name: Install Poetry + uses: snok/install-poetry@v1 + with: + virtualenvs-create: true + virtualenvs-in-project: true + installer-parallel: true + + #---------------------------------------------- + # load cached venv if cache exists + #---------------------------------------------- + - name: Load cached venv + id: cached-poetry-dependencies + uses: actions/cache@v2 + with: + path: .venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} + #---------------------------------------------- + # install dependencies if cache does not exist + #---------------------------------------------- + - name: Install dependencies + if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + run: poetry install --no-interaction --no-root + #---------------------------------------------- + # install your root project, if required + #---------------------------------------------- + - name: Install library + run: poetry install --no-interaction + #---------------------------------------------- + # run test suite + #---------------------------------------------- + - name: Run tests + run: poetry run pytest tests/sqlalchemy/test_full_sa.py::IntegerTest --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + + # check-linting: + # runs-on: ubuntu-latest + # steps: + # #---------------------------------------------- + # # check-out repo and set-up python + # #---------------------------------------------- + # - name: Check out repository + # uses: actions/checkout@v2 + # - name: Set up python + # id: setup-python + # uses: actions/setup-python@v2 + # with: + # python-version: 3.7 + # #---------------------------------------------- + # # ----- install & configure poetry ----- + # #---------------------------------------------- + # - name: Install Poetry + # uses: snok/install-poetry@v1 + # with: + # virtualenvs-create: true + # virtualenvs-in-project: true + # installer-parallel: true + + # #---------------------------------------------- + # # load cached venv if cache exists + # #---------------------------------------------- + # - name: Load cached venv + # id: cached-poetry-dependencies + # uses: actions/cache@v2 + # with: + # path: .venv + # key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} + # #---------------------------------------------- + # # install dependencies if cache does not exist + # #---------------------------------------------- + # - name: Install dependencies + # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + # run: poetry install --no-interaction --no-root + # #---------------------------------------------- + # # install your root project, if required + # #---------------------------------------------- + # - name: Install library + # run: poetry install --no-interaction + # #---------------------------------------------- + # # black the code + # #---------------------------------------------- + # - name: Black + # run: poetry run black --check src + + # check-types: + # runs-on: ubuntu-latest + # steps: + # #---------------------------------------------- + # # check-out repo and set-up python + # #---------------------------------------------- + # - name: Check out repository + # uses: actions/checkout@v2 + # - name: Set up python + # id: setup-python + # uses: actions/setup-python@v2 + # with: + # python-version: 3.7 + # #---------------------------------------------- + # # ----- install & configure poetry ----- + # #---------------------------------------------- + # - name: Install Poetry + # uses: snok/install-poetry@v1 + # with: + # virtualenvs-create: true + # virtualenvs-in-project: true + # installer-parallel: true + + # #---------------------------------------------- + # # load cached venv if cache exists + # #---------------------------------------------- + # - name: Load cached venv + # id: cached-poetry-dependencies + # uses: actions/cache@v2 + # with: + # path: .venv + # key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} + # #---------------------------------------------- + # # install dependencies if cache does not exist + # #---------------------------------------------- + # - name: Install dependencies + # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' + # run: poetry install --no-interaction --no-root + # #---------------------------------------------- + # # install your root project, if required + # #---------------------------------------------- + # - name: Install library + # run: poetry install --no-interaction + # #---------------------------------------------- + # # black the code + # #---------------------------------------------- + # - name: Mypy + # run: poetry run mypy src \ No newline at end of file From 8716645985366159830911c18606f8332f92c89a Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 16:52:20 -0700 Subject: [PATCH 13/30] yml error --- .github/workflows/sqlalchemy-dialect.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml index 0be2d5f93..3e690187d 100644 --- a/.github/workflows/sqlalchemy-dialect.yml +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -9,15 +9,6 @@ jobs: runs-on: ubuntu-latest steps: - #---------------------------------------------- - # import secrets from the Github env - #---------------------------------------------- - env: - DATABRICKS_SERVER_HOSTNAME: ${{ secrets.ENVSEC_DATABRICKS_SERVER_HOSTNAME }} - DATABRICKS_HTTP_PATH: ${{ secrets.ENVSEC_DATABRICKS_HTTP_PATH }} - DATABRICKS_TOKEN: ${{ secrets.ENVSEC_DATABRICKS_TOKEN }} - DATABRICKS_SCHEMA: ${{ secrets.ENVSEC_DATABRICKS_SCHEMA }} - #---------------------------------------------- # check-out repo and set-up python #---------------------------------------------- @@ -62,6 +53,15 @@ jobs: # run test suite #---------------------------------------------- - name: Run tests + #---------------------------------------------- + # import secrets from the Github env + #---------------------------------------------- + env: + DATABRICKS_SERVER_HOSTNAME: ${{ secrets.ENVSEC_DATABRICKS_SERVER_HOSTNAME }} + DATABRICKS_HTTP_PATH: ${{ secrets.ENVSEC_DATABRICKS_HTTP_PATH }} + DATABRICKS_TOKEN: ${{ secrets.ENVSEC_DATABRICKS_TOKEN }} + DATABRICKS_SCHEMA: ${{ secrets.ENVSEC_DATABRICKS_SCHEMA }} + run: poetry run pytest tests/sqlalchemy/test_full_sa.py::IntegerTest --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" # check-linting: From d75710a64257f49ef645a3f1d0ea32c10254e445 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 16:59:15 -0700 Subject: [PATCH 14/30] add self-reference to trigger a run when the action is update --- .github/workflows/sqlalchemy-dialect.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml index 3e690187d..c9843908f 100644 --- a/.github/workflows/sqlalchemy-dialect.yml +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -4,6 +4,7 @@ on: paths: - src/databricks/sqlalchemy - tests/sqlalchemy + - .github/workflows/sqlalchemy-dialect.yml jobs: run-tests: runs-on: ubuntu-latest From 55e4448308e6b39dcdf8a4cfc07b8707f57525d0 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 17:04:34 -0700 Subject: [PATCH 15/30] correct usage of env var --- .github/workflows/sqlalchemy-dialect.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml index c9843908f..b594b0b33 100644 --- a/.github/workflows/sqlalchemy-dialect.yml +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -63,7 +63,7 @@ jobs: DATABRICKS_TOKEN: ${{ secrets.ENVSEC_DATABRICKS_TOKEN }} DATABRICKS_SCHEMA: ${{ secrets.ENVSEC_DATABRICKS_SCHEMA }} - run: poetry run pytest tests/sqlalchemy/test_full_sa.py::IntegerTest --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + run: poetry run pytest tests/sqlalchemy/test_full_sa.py::IntegerTest --dburi "databricks+thrift://token:$DATABRICKS_TOKEN@$DATABRICKS_SERVER_HOSTNAME/$DATABRICKS_SCHEMA?http_path=$DATABRICKS_HTTP_PATH" # check-linting: # runs-on: ubuntu-latest From dc70f9010f9132a8d516f8d33f5e409d95b3708c Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 17:17:37 -0700 Subject: [PATCH 16/30] move to repo secrets instead --- .github/workflows/sqlalchemy-dialect.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml index b594b0b33..e206a7f30 100644 --- a/.github/workflows/sqlalchemy-dialect.yml +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -58,10 +58,10 @@ jobs: # import secrets from the Github env #---------------------------------------------- env: - DATABRICKS_SERVER_HOSTNAME: ${{ secrets.ENVSEC_DATABRICKS_SERVER_HOSTNAME }} - DATABRICKS_HTTP_PATH: ${{ secrets.ENVSEC_DATABRICKS_HTTP_PATH }} - DATABRICKS_TOKEN: ${{ secrets.ENVSEC_DATABRICKS_TOKEN }} - DATABRICKS_SCHEMA: ${{ secrets.ENVSEC_DATABRICKS_SCHEMA }} + DATABRICKS_SERVER_HOSTNAME: ${{ secrets.REPOSEC_DATABRICKS_SERVER_HOSTNAME }} + DATABRICKS_HTTP_PATH: ${{ secrets.REPOSEC_DATABRICKS_HTTP_PATH }} + DATABRICKS_TOKEN: ${{ secrets.REPOSEC_DATABRICKS_TOKEN }} + DATABRICKS_SCHEMA: ${{ secrets.REPOSEC_DATABRICKS_SCHEMA }} run: poetry run pytest tests/sqlalchemy/test_full_sa.py::IntegerTest --dburi "databricks+thrift://token:$DATABRICKS_TOKEN@$DATABRICKS_SERVER_HOSTNAME/$DATABRICKS_SCHEMA?http_path=$DATABRICKS_HTTP_PATH" From d6f98dd70fb6653890f7e7608fc2fac7a0d6efd2 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 17:27:45 -0700 Subject: [PATCH 17/30] correct drop pseudo-targets --- Makefile | 39 +++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 8ecbb6ad3..348c06d72 100644 --- a/Makefile +++ b/Makefile @@ -105,32 +105,47 @@ drop_reflectiontest: # clean up after SQLAlchemy test suite -drop_booleantest: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS boolean_table;" +drop_booleantest: drop_boolean_table -drop_datetest: -drop_datetimetest: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS date_table;" +drop_datetest: drop_date_table + +drop_datetimetest: drop_date_table + +drop_integertest: drop_t drop_tabletest + +drop_numerictest: drop_t drop_tabletest + +drop_stringtest: drop_t drop_boolean_table + +drop_tableddl: drop__test_table drop_test_table +drop_jointest: drop_a drop_b -drop_integertest: -drop_numerictest: + +drop_t: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS t;" - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS tabletest;" +drop_tabletest: + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS tabletest;" -drop_stringtest: +drop_boolean_table: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS boolean_table;" - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS t;" -drop_tableddl: +drop__test_table: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS _test_table;" + +drop_test_table: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS test_table;" -drop_jointest: +drop_a: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS a;" + +drop_b: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS b;" +drop_date_table: + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS date_table;" + # these two schemas are baked into SQLAlchemy's test suite satestdb: From 8e480ad340f433066b4992549a530496c7b0e91f Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 17:31:08 -0700 Subject: [PATCH 18/30] add trigger on the top-level Makefile (convenience) --- .github/workflows/sqlalchemy-dialect.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml index e206a7f30..c8694db1e 100644 --- a/.github/workflows/sqlalchemy-dialect.yml +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -2,6 +2,7 @@ name: SQLAlchemy dialect test on: push: paths: + - Makefile - src/databricks/sqlalchemy - tests/sqlalchemy - .github/workflows/sqlalchemy-dialect.yml From d68ddb54a0c9632b3ac0cae6276e3301e021af91 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 17:57:15 -0700 Subject: [PATCH 19/30] add dbsqlcli for cleanup --- .github/workflows/sqlalchemy-dialect.yml | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml index c8694db1e..cccbe6ddb 100644 --- a/.github/workflows/sqlalchemy-dialect.yml +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -22,6 +22,12 @@ jobs: with: python-version: 3.7 #---------------------------------------------- + # ----- install databricks-sql-cli ----- + #---------------------------------------------- + - name: Install databricks-sql-cli + run: python -m pip install databricks-sql-cli + + #---------------------------------------------- # ----- install & configure poetry ----- #---------------------------------------------- - name: Install Poetry @@ -64,7 +70,10 @@ jobs: DATABRICKS_TOKEN: ${{ secrets.REPOSEC_DATABRICKS_TOKEN }} DATABRICKS_SCHEMA: ${{ secrets.REPOSEC_DATABRICKS_SCHEMA }} - run: poetry run pytest tests/sqlalchemy/test_full_sa.py::IntegerTest --dburi "databricks+thrift://token:$DATABRICKS_TOKEN@$DATABRICKS_SERVER_HOSTNAME/$DATABRICKS_SCHEMA?http_path=$DATABRICKS_HTTP_PATH" + run: | + echo y | dbsqlcli --hostname $DATABRICKS_SERVER_HOSTNAME --http-path $DATABRICKS_HTTP_PATH --access-token $DATABRICKS_TOKEN -e "USE $DATABRICKS_SCHEMA; DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS tabletest; DROP TABLE IF EXISTS integer_test;" + poetry run pytest tests/sqlalchemy/test_full_sa.py::IntegerTest --dburi "databricks+thrift://token:$DATABRICKS_TOKEN@$DATABRICKS_SERVER_HOSTNAME/$DATABRICKS_SCHEMA?http_path=$DATABRICKS_HTTP_PATH" + echo y | dbsqlcli --hostname $DATABRICKS_SERVER_HOSTNAME --http-path $DATABRICKS_HTTP_PATH --access-token $DATABRICKS_TOKEN -e "USE $DATABRICKS_SCHEMA; DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS tabletest; DROP TABLE IF EXISTS integer_test;" # check-linting: # runs-on: ubuntu-latest From c6a93862bbdbd34a443dbeb89ce81e76cd77a322 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 18:00:25 -0700 Subject: [PATCH 20/30] add init invocation of dbsqlcli --- .github/workflows/sqlalchemy-dialect.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml index cccbe6ddb..6a836988a 100644 --- a/.github/workflows/sqlalchemy-dialect.yml +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -24,8 +24,10 @@ jobs: #---------------------------------------------- # ----- install databricks-sql-cli ----- #---------------------------------------------- - - name: Install databricks-sql-cli - run: python -m pip install databricks-sql-cli + - name: Install databricks-sql-cli & initalize dbsqlclirc + run: | + python -m pip install databricks-sql-cli + dbsqlcli #---------------------------------------------- # ----- install & configure poetry ----- From 01f5a16961cbf9bb77cd549b9acde0f19861ef8a Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 18:02:44 -0700 Subject: [PATCH 21/30] override the return code --- .github/workflows/sqlalchemy-dialect.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml index 6a836988a..527e993b2 100644 --- a/.github/workflows/sqlalchemy-dialect.yml +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -27,7 +27,7 @@ jobs: - name: Install databricks-sql-cli & initalize dbsqlclirc run: | python -m pip install databricks-sql-cli - dbsqlcli + dbsqlcli && true #---------------------------------------------- # ----- install & configure poetry ----- From d121881ac9a7b7fa56c2be5d2177d87bf2bcd7fa Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 18:05:39 -0700 Subject: [PATCH 22/30] wrong conjunction! --- .github/workflows/sqlalchemy-dialect.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml index 527e993b2..d91f8905d 100644 --- a/.github/workflows/sqlalchemy-dialect.yml +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -27,7 +27,7 @@ jobs: - name: Install databricks-sql-cli & initalize dbsqlclirc run: | python -m pip install databricks-sql-cli - dbsqlcli && true + dbsqlcli || true #---------------------------------------------- # ----- install & configure poetry ----- From 55aa104fb262a51f1bf0794c6c4532183f6a5b3a Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 18:09:01 -0700 Subject: [PATCH 23/30] correct table name --- .github/workflows/sqlalchemy-dialect.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml index d91f8905d..221334d6e 100644 --- a/.github/workflows/sqlalchemy-dialect.yml +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -73,9 +73,9 @@ jobs: DATABRICKS_SCHEMA: ${{ secrets.REPOSEC_DATABRICKS_SCHEMA }} run: | - echo y | dbsqlcli --hostname $DATABRICKS_SERVER_HOSTNAME --http-path $DATABRICKS_HTTP_PATH --access-token $DATABRICKS_TOKEN -e "USE $DATABRICKS_SCHEMA; DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS tabletest; DROP TABLE IF EXISTS integer_test;" + echo y | dbsqlcli --hostname $DATABRICKS_SERVER_HOSTNAME --http-path $DATABRICKS_HTTP_PATH --access-token $DATABRICKS_TOKEN -e "USE $DATABRICKS_SCHEMA; DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS tabletest; DROP TABLE IF EXISTS integer_table;" poetry run pytest tests/sqlalchemy/test_full_sa.py::IntegerTest --dburi "databricks+thrift://token:$DATABRICKS_TOKEN@$DATABRICKS_SERVER_HOSTNAME/$DATABRICKS_SCHEMA?http_path=$DATABRICKS_HTTP_PATH" - echo y | dbsqlcli --hostname $DATABRICKS_SERVER_HOSTNAME --http-path $DATABRICKS_HTTP_PATH --access-token $DATABRICKS_TOKEN -e "USE $DATABRICKS_SCHEMA; DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS tabletest; DROP TABLE IF EXISTS integer_test;" + echo y | dbsqlcli --hostname $DATABRICKS_SERVER_HOSTNAME --http-path $DATABRICKS_HTTP_PATH --access-token $DATABRICKS_TOKEN -e "USE $DATABRICKS_SCHEMA; DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS tabletest; DROP TABLE IF EXISTS integer_table;" # check-linting: # runs-on: ubuntu-latest From 66554733b398557b7b6d79633441522c3d6089c1 Mon Sep 17 00:00:00 2001 From: George Chow Date: Tue, 26 Jul 2022 18:33:05 -0700 Subject: [PATCH 24/30] restrict to the dev branch for the while --- .github/workflows/sqlalchemy-dialect.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml index 221334d6e..3a6aef814 100644 --- a/.github/workflows/sqlalchemy-dialect.yml +++ b/.github/workflows/sqlalchemy-dialect.yml @@ -1,6 +1,8 @@ name: SQLAlchemy dialect test on: push: + branches: + - sqlalchemy-dev paths: - Makefile - src/databricks/sqlalchemy From 629a510e3438a755b4f687f313cfe2ba9a59160c Mon Sep 17 00:00:00 2001 From: George Chow Date: Thu, 4 Aug 2022 16:30:16 -0700 Subject: [PATCH 25/30] various minor items -clean up Makefile for pytest -explore passing in --requirements to help with suite run -exploration of DDLCompiler --- Makefile | 45 ++-- src/databricks/sqlalchemy/dialect.py | 126 ++++++++++- src/databricks/sqlalchemy/requirements.py | 21 ++ .../sqlalchemy/sample-app-insert.py | 77 +++++++ tests/sqlalchemy/test_full_sa.py | 108 ++++++++- tests/test_dialect.py | 210 ++++++++++++++++++ 6 files changed, 557 insertions(+), 30 deletions(-) create mode 100644 src/databricks/sqlalchemy/sample-app-insert.py create mode 100644 tests/test_dialect.py diff --git a/Makefile b/Makefile index 348c06d72..a3f33a5ed 100644 --- a/Makefile +++ b/Makefile @@ -15,6 +15,9 @@ SUITE_PATH=tests/sqlalchemy SUITE=test_suite.py +REQ=--requirements src.databricks.sqlalchemy.requirements:Requirements +DBURI=--dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + .PHONY=all clean showtables full reflection simple str num drop_simpletest drop_reflectiontest @@ -30,68 +33,69 @@ showtables: full: $(PYTEST) $(SUITE_PATH) \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" \ + $(DBURI) \ --log-file=~/.pytestlogs/full.log -sa-bool: drop_booleantest +sa-bool: drop_booleantest drop_t $(PYTEST) $(SUITE_PATH)/test_full_sa.py::BooleanTest \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) sa-date: drop_datetest $(PYTEST) $(SUITE_PATH)/test_full_sa.py::DateTest \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) sa-dt: drop_datetimetest $(PYTEST) $(SUITE_PATH)/test_full_sa.py::DateTimeTest \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) sa-int: drop_integertest $(PYTEST) $(SUITE_PATH)/test_full_sa.py::IntegerTest \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) sa-num: drop_numerictest $(PYTEST) $(SUITE_PATH)/test_full_sa.py::NumericTest \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) sa-str: drop_stringtest $(PYTEST) $(SUITE_PATH)/test_full_sa.py::StringTest \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) sa-ddl: drop_tableddl $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(REQ) \ + $(DBURI) sa-ddl1: drop_tableddl $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest:test_create_table \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) sa-ddl2: drop_tableddl $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest:test_create_table_schema \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) sa-ddl3: drop_tableddl $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest:test_drop_table \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) sa-join: drop_jointest $(PYTEST) $(SUITE_PATH)/test_full_sa.py::JoinTest \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) reflection: $(PYTEST) $(SUITE_PATH)/$(SUITE)::ReflectionTest \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) num: $(PYTEST) $(SUITE_PATH)/$(SUITE)::ReflectionTest::test_numtypes \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) str: $(PYTEST) $(SUITE_PATH)/$(SUITE)::ReflectionTest::test_strtypes \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) simple: $(PYTEST) $(SUITE_PATH)/$(SUITE)::SimpleTest \ - --dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" + $(DBURI) # clean up after SimpleTest run drop_simpletest: @@ -111,7 +115,7 @@ drop_datetest: drop_date_table drop_datetimetest: drop_date_table -drop_integertest: drop_t drop_tabletest +drop_integertest: drop_t drop_tabletest drop_integer_table drop_numerictest: drop_t drop_tabletest @@ -121,7 +125,7 @@ drop_tableddl: drop__test_table drop_test_table drop_jointest: drop_a drop_b - + drop_t: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS t;" @@ -146,6 +150,9 @@ drop_b: drop_date_table: echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS date_table;" +drop_integer_table: + echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS integer_table;" + # these two schemas are baked into SQLAlchemy's test suite satestdb: diff --git a/src/databricks/sqlalchemy/dialect.py b/src/databricks/sqlalchemy/dialect.py index 1f87df6ee..b075c85dd 100644 --- a/src/databricks/sqlalchemy/dialect.py +++ b/src/databricks/sqlalchemy/dialect.py @@ -33,6 +33,11 @@ class DatabricksDate(DatabricksStringTypeBase): """Translates date strings to date objects""" impl = types.DATE + # ref: https://docs.sqlalchemy.org/en/14/core/custom_types.html + def process_bind_param(self, value, dialect): + # handle string + return "PREFIX:" + value + def process_result_value(self, value, dialect): return processors.str_to_date(value) @@ -201,14 +206,121 @@ def visit_primary_key_constraint(self, constraint, **kw): def visit_foreign_key_constraint(self, constraint, **kw): return "" - # stripped down from DDLCompiler::get_column_specification - # def get_column_specification(self, column, **kwargs): - # colspec = ( - # self.preparer.format_column(column) - # + " " - # ) - # return colspec + def visit_unique_constraint(self, constraint, **kw): + return "" + # def visit_create_table(self, create, **kw): + # # if debugbreakpoint: + # # breakpoint() + + # table = create.element + # preparer = self.preparer + + # text = "\nCREATE " + # if table._prefixes: + # text += " ".join(table._prefixes) + " " + + # text += "TABLE " + # if create.if_not_exists: + # text += "IF NOT EXISTS " + + # text += preparer.format_table(table) + " " + + # create_table_suffix = self.create_table_suffix(table) + # if create_table_suffix: + # text += create_table_suffix + " " + + # text += "(" + + # separator = "\n" + + # # if only one primary key, specify it along with the column + # first_pk = False + # for create_column in create.columns: + # column = create_column.element + # try: + # processed = self.process( + # create_column, first_pk=column.primary_key and not first_pk + # ) + # if processed is not None: + # text += separator + # separator = ", \n" + # text += "\t" + processed + # if column.primary_key: + # first_pk = True + # except exc.CompileError as ce: + # raise exc.CompileError( + # "(in table '%s', column '%s'): %s" + # % (table.description, column.name, ce.args[0]) + # ) from ce + + # const = self.create_table_constraints( + # table, + # _include_foreign_key_constraints=create.include_foreign_key_constraints, # noqa + # ) + # if const: + # text += separator + "\t" + const + + # text += "\n)%s\n\n" % self.post_create_table(table) + # return text + + # def visit_create_column(self, create, first_pk=False, **kw): + # # if debugbreakpoint: + # # breakpoint() + + # column = create.element + + # if column.system: + # return None + + # text = self.get_column_specification(column, first_pk=first_pk) + # const = " ".join( + # self.process(constraint) for constraint in column.constraints + # ) + # if const: + # text += " " + const + + # return text + + # def create_table_constraints( + # self, table, _include_foreign_key_constraints=None, **kw + # ): + # if debugbreakpoint: + # breakpoint() + + # # On some DB order is significant: visit PK first, then the + # # other constraints (engine.ReflectionTest.testbasic failed on FB2) + # constraints = [] + # if table.primary_key: + # constraints.append(table.primary_key) + + # all_fkcs = table.foreign_key_constraints + # if _include_foreign_key_constraints is not None: + # omit_fkcs = all_fkcs.difference(_include_foreign_key_constraints) + # else: + # omit_fkcs = set() + + # constraints.extend( + # [ + # c + # for c in table._sorted_constraints + # if c is not table.primary_key and c not in omit_fkcs + # ] + # ) + + # return ", \n\t".join( + # p + # for p in ( + # self.process(constraint) + # for constraint in constraints + # if (constraint._should_create_for_compiler(self)) + # and ( + # not self.dialect.supports_alter + # or not getattr(constraint, "use_alter", False) + # ) + # ) + # if p is not None + # ) # The following lookup table is by DATA_TYPE and is rather nice since Decimal can be detected directly. # However, as DATA_TYPE is rather obtuse... going forward, we switched to use COLUMN_TYPE_NAME instead (the table below) diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py index 6ce986887..8621cbd8d 100644 --- a/src/databricks/sqlalchemy/requirements.py +++ b/src/databricks/sqlalchemy/requirements.py @@ -16,3 +16,24 @@ class Requirements(SuiteRequirements): def two_phase_transactions(self): # Databricks SQL doesn't support transactions return exclusions.closed() + + @property + def table_ddl_if_exists(self): + """target platform supports IF NOT EXISTS / IF EXISTS for tables.""" + + return exclusions.open() + + @property + def foreign_keys(self): + # Databricks SQL doesn't support foreign keys + return exclusions.closed() + + @property + def self_referential_foreign_keys(self): + + return exclusions.closed() + + @property + def foreign_key_ddl(self): + + return exclusions.closed() diff --git a/src/databricks/sqlalchemy/sample-app-insert.py b/src/databricks/sqlalchemy/sample-app-insert.py new file mode 100644 index 000000000..fcb3acc9f --- /dev/null +++ b/src/databricks/sqlalchemy/sample-app-insert.py @@ -0,0 +1,77 @@ +# sample-app-insert.py +# +# Program to demonstrate the simplest INSERT statement +# + +import os +import random + +from sqlalchemy import create_engine +from sqlalchemy import MetaData +from sqlalchemy import select, insert, Table, Column +from sqlalchemy import SMALLINT, Integer, BigInteger, Float, DECIMAL, BOOLEAN + +from sqlalchemy.dialects.mysql.types import TINYINT, DOUBLE # borrow MySQL's impls +from sqlalchemy import String, DATE, TIMESTAMP + +# pickup settings from the env +server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME") +http_path = os.getenv("DATABRICKS_HTTP_PATH") +access_token = os.getenv("DATABRICKS_TOKEN") +default_schema = os.getenv("DATABRICKS_SCHEMA") + +# use echo=True for verbose log +engine = create_engine(f"databricks+thrift://token:{access_token}@{server_hostname}/{default_schema}?http_path={http_path}", echo=False, future=True) + +metadata_obj = MetaData() + +# NB: sample_numtypes is a pre-created/populated table +numtypes = "sample_numtypes" + +t1 = Table( + numtypes, + metadata_obj, + Column('f_byte', TINYINT), + Column('f_short', SMALLINT), + Column('f_int', Integer), + Column('f_long', BigInteger), + Column('f_float', Float), + Column('f_double', DOUBLE), + Column('f_decimal', DECIMAL), + Column('f_boolean', BOOLEAN) +) + +with engine.connect() as conn: + stmt = insert(t1).values(f_byte=42, + f_short=31415, + f_int=random.randint(1,1001002003), + f_long=4001002003004005006, + f_float=1.41, + f_double=1.6666, + f_decimal=2.71828, + f_boolean=False) + + print(f"Attempting to execute: {stmt}\n") + + print(f"Rows from table {numtypes}") + for row in conn.execute(stmt): + print(row) + + +# NB: sample_strtypes is a pre-created/populated table +strtypes = "sample_strtypes" + +with engine.connect() as conn: + t2 = Table( + strtypes, + metadata_obj, + autoload_with=conn + ) + + # stmt = insert(t2).values(f_string='Antarctic expedition', f_date='1911-12-14', f_timestamp='1911-12-14T15:00', f_interval='4 0:00:00' ) + stmt = insert(t2).values(f_string='Antarctic expedition', f_date='1911-12-14', f_timestamp='1911-12-14T15:00') + print(f"Attempting to execute: {stmt}\n") + + print(f"Rows from table {strtypes}") + for row in conn.execute(stmt): + print(row) diff --git a/tests/sqlalchemy/test_full_sa.py b/tests/sqlalchemy/test_full_sa.py index e5550fe36..31358895f 100644 --- a/tests/sqlalchemy/test_full_sa.py +++ b/tests/sqlalchemy/test_full_sa.py @@ -4,7 +4,10 @@ from sqlalchemy.testing.suite import StringTest as _StringTest from sqlalchemy.testing.suite import NumericTest as _NumericTest from sqlalchemy.testing.suite import BooleanTest as _BooleanTest + from sqlalchemy.testing.suite import DateTest as _DateTest +# from sqlalchemy.testing.suite import _LiteralRoundTripFixture + from sqlalchemy.testing.suite import DateTimeTest as _DateTimeTest @@ -12,14 +15,113 @@ from sqlalchemy.testing.suite import JoinTest as _JoinTest +# class _MyDateFixture(_LiteralRoundTripFixture, fixtures.TestBase): +# compare = None + +# @classmethod +# def define_tables(cls, metadata): +# # class Decorated(TypeDecorator): +# # impl = cls.datatype +# # cache_ok = True + +# Table( +# "date_table", +# metadata, +# Column( +# "id", Integer, primary_key=True, test_needs_autoincrement=True +# ), +# Column("date_data", cls.datatype), +# # Column("decorated_date_data", Decorated), +# ) + +# @testing.requires.datetime_implicit_bound +# def test_select_direct(self, connection): +# result = connection.scalar(select(literal(self.data))) +# eq_(result, self.data) + +# def test_round_trip(self, connection): +# date_table = self.tables.date_table + +# connection.execute( +# date_table.insert(), {"id": 1, "date_data": self.data} +# ) + +# row = connection.execute(select(date_table.c.date_data)).first() + +# compare = self.compare or self.data +# eq_(row, (compare,)) +# assert isinstance(row[0], type(compare)) + +# def off_test_round_trip_decorated(self, connection): +# date_table = self.tables.date_table + +# connection.execute( +# date_table.insert(), {"id": 1, "decorated_date_data": self.data} +# ) + +# row = connection.execute( +# select(date_table.c.decorated_date_data) +# ).first() + +# compare = self.compare or self.data +# eq_(row, (compare,)) +# assert isinstance(row[0], type(compare)) + +# def test_null(self, connection): +# date_table = self.tables.date_table + +# connection.execute(date_table.insert(), {"id": 1, "date_data": None}) + +# row = connection.execute(select(date_table.c.date_data)).first() +# eq_(row, (None,)) + +# @testing.requires.datetime_literals +# def test_literal(self, literal_round_trip): +# compare = self.compare or self.data + +# literal_round_trip( +# self.datatype, [self.data], [compare], compare=compare +# ) + +# @testing.requires.standalone_null_binds_whereclause +# def test_null_bound_comparison(self): +# # this test is based on an Oracle issue observed in #4886. +# # passing NULL for an expression that needs to be interpreted as +# # a certain type, does the DBAPI have the info it needs to do this. +# date_table = self.tables.date_table +# with config.db.begin() as conn: +# result = conn.execute( +# date_table.insert(), {"id": 1, "date_data": self.data} +# ) +# id_ = result.inserted_primary_key[0] +# stmt = select(date_table.c.id).where( +# case( +# ( +# bindparam("foo", type_=self.datatype) != None, +# bindparam("foo", type_=self.datatype), +# ), +# else_=date_table.c.date_data, +# ) +# == date_table.c.date_data +# ) + +# row = conn.execute(stmt, {"foo": None}).first() +# eq_(row[0], id_) -class BooleanTest(_BooleanTest): +class DateTest(_DateTest): pass + # __requires__ = ("date",) + # __backend__ = True + # datatype = Date + # data = datetime.date(2012, 10, 15) -class DateTest(_DateTest): + + +class BooleanTest(_BooleanTest): pass + class DateTimeTest(_DateTimeTest): pass @@ -32,11 +134,9 @@ class NumericTest(_NumericTest): class StringTest(_StringTest): pass - class TableDDLTest(_TableDDLTest): pass - class JoinTest(_JoinTest): pass diff --git a/tests/test_dialect.py b/tests/test_dialect.py new file mode 100644 index 000000000..a4308d57a --- /dev/null +++ b/tests/test_dialect.py @@ -0,0 +1,210 @@ +import unittest +from unittest.mock import Mock + +import pyarrow as pa + +import databricks.sql.client as client +from databricks.sql.utils import ExecuteResponse, ArrowQueue + + +class FetchTests(unittest.TestCase): + """ + Unit tests for checking the fetch logic. + """ + + @staticmethod + def make_arrow_table(batch): + n_cols = len(batch[0]) if batch else 0 + schema = pa.schema({"col%s" % i: pa.uint32() for i in range(n_cols)}) + cols = [[batch[row][col] for row in range(len(batch))] for col in range(n_cols)] + return schema, pa.Table.from_pydict(dict(zip(schema.names, cols)), schema=schema) + + @staticmethod + def make_arrow_queue(batch): + _, table = FetchTests.make_arrow_table(batch) + queue = ArrowQueue(table, len(batch)) + return queue + + @staticmethod + def make_dummy_result_set_from_initial_results(initial_results): + # If the initial results have been set, then we should never try and fetch more + schema, arrow_table = FetchTests.make_arrow_table(initial_results) + arrow_queue = ArrowQueue(arrow_table, len(initial_results), 0) + rs = client.ResultSet( + connection=Mock(), + thrift_backend=None, + execute_response=ExecuteResponse( + status=None, + has_been_closed_server_side=True, + has_more_rows=False, + description=Mock(), + command_handle=None, + arrow_queue=arrow_queue, + arrow_schema_bytes=schema.serialize().to_pybytes())) + num_cols = len(initial_results[0]) if initial_results else 0 + rs.description = [(f'col{col_id}', 'integer', None, None, None, None, None) + for col_id in range(num_cols)] + return rs + + @staticmethod + def make_dummy_result_set_from_batch_list(batch_list): + batch_index = 0 + + def fetch_results(op_handle, max_rows, max_bytes, expected_row_start_offset, + arrow_schema_bytes, description): + nonlocal batch_index + results = FetchTests.make_arrow_queue(batch_list[batch_index]) + batch_index += 1 + + return results, batch_index < len(batch_list) + + mock_thrift_backend = Mock() + mock_thrift_backend.fetch_results = fetch_results + num_cols = len(batch_list[0][0]) if batch_list and batch_list[0] else 0 + + rs = client.ResultSet( + connection=Mock(), + thrift_backend=mock_thrift_backend, + execute_response=ExecuteResponse( + status=None, + has_been_closed_server_side=False, + has_more_rows=True, + description=[(f'col{col_id}', 'integer', None, None, None, None, None) + for col_id in range(num_cols)], + command_handle=None, + arrow_queue=None, + arrow_schema_bytes=None)) + return rs + + def assertEqualRowValues(self, actual, expected): + self.assertEqual(len(actual) if actual else 0, len(expected) if expected else 0) + for act, exp in zip(actual, expected): + self.assertSequenceEqual(act, exp) + + def test_fetchmany_with_initial_results(self): + # Fetch all in one go + initial_results_1 = [[1], [2], [3]] # This is a list of rows, each row with 1 col + dummy_result_set = self.make_dummy_result_set_from_initial_results(initial_results_1) + self.assertEqualRowValues(dummy_result_set.fetchmany(3), [[1], [2], [3]]) + + # Fetch in small amounts + initial_results_2 = [[1], [2], [3], [4]] + dummy_result_set = self.make_dummy_result_set_from_initial_results(initial_results_2) + self.assertEqualRowValues(dummy_result_set.fetchmany(1), [[1]]) + self.assertEqualRowValues(dummy_result_set.fetchmany(2), [[2], [3]]) + self.assertEqualRowValues(dummy_result_set.fetchmany(1), [[4]]) + + # Fetch too many + initial_results_3 = [[2], [3]] + dummy_result_set = self.make_dummy_result_set_from_initial_results(initial_results_3) + self.assertEqualRowValues(dummy_result_set.fetchmany(5), [[2], [3]]) + + # Empty results + initial_results_4 = [[]] + dummy_result_set = self.make_dummy_result_set_from_initial_results(initial_results_4) + self.assertEqualRowValues(dummy_result_set.fetchmany(0), []) + + def test_fetch_many_without_initial_results(self): + # Fetch all in one go; single batch + batch_list_1 = [[[1], [2], [3]]] # This is a list of one batch of rows, each row with 1 col + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_1) + self.assertEqualRowValues(dummy_result_set.fetchmany(3), [[1], [2], [3]]) + + # Fetch all in one go; multiple batches + batch_list_2 = [[[1], [2]], [[3]]] # This is a list of two batches of rows + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_2) + self.assertEqualRowValues(dummy_result_set.fetchmany(3), [[1], [2], [3]]) + + # Fetch in small amounts; single batch + batch_list_3 = [[[1], [2], [3]]] + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_3) + self.assertEqualRowValues(dummy_result_set.fetchmany(1), [[1]]) + self.assertEqualRowValues(dummy_result_set.fetchmany(2), [[2], [3]]) + + # Fetch in small amounts; multiple batches + batch_list_4 = [[[1], [2]], [[3], [4], [5]]] + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_4) + self.assertEqualRowValues(dummy_result_set.fetchmany(1), [[1]]) + self.assertEqualRowValues(dummy_result_set.fetchmany(3), [[2], [3], [4]]) + self.assertEqualRowValues(dummy_result_set.fetchmany(2), [[5]]) + + # Fetch too many; single batch + batch_list_5 = [[[1], [2], [3], [4]]] + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_5) + self.assertEqualRowValues(dummy_result_set.fetchmany(6), [[1], [2], [3], [4]]) + + # Fetch too many; multiple batches + batch_list_6 = [[[1]], [[2], [3], [4]], [[5], [6]]] + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_6) + self.assertEqualRowValues(dummy_result_set.fetchmany(100), [[1], [2], [3], [4], [5], [6]]) + + # Fetch 0; 1 empty batch + batch_list_7 = [[]] + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_7) + self.assertEqualRowValues(dummy_result_set.fetchmany(0), []) + + # Fetch 0; lots of batches + batch_list_8 = [[[1], [2]], [[3]]] + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_8) + self.assertEqualRowValues(dummy_result_set.fetchmany(0), []) + + def test_fetchall_with_initial_results(self): + initial_results_1 = [[1], [2], [3]] + dummy_result_set = self.make_dummy_result_set_from_initial_results(initial_results_1) + self.assertEqualRowValues(dummy_result_set.fetchall(), [[1], [2], [3]]) + + def test_fetchall_without_initial_results(self): + # Fetch all, single batch + batch_list_1 = [[[1], [2], [3]]] # This is a list of one batch of rows, each row with 1 col + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_1) + self.assertEqualRowValues(dummy_result_set.fetchall(), [[1], [2], [3]]) + + # Fetch all, multiple batches + batch_list_2 = [[[1], [2]], [[3]], [[4], [5], [6]]] + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_2) + self.assertEqualRowValues(dummy_result_set.fetchall(), [[1], [2], [3], [4], [5], [6]]) + + batch_list_3 = [[]] + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_3) + self.assertEqualRowValues(dummy_result_set.fetchall(), []) + + def test_fetchmany_fetchall_with_initial_results(self): + initial_results_1 = [[1], [2], [3]] + dummy_result_set = self.make_dummy_result_set_from_initial_results(initial_results_1) + self.assertEqualRowValues(dummy_result_set.fetchmany(2), [[1], [2]]) + self.assertEqualRowValues(dummy_result_set.fetchall(), [[3]]) + + def test_fetchmany_fetchall_without_initial_results(self): + batch_list_1 = [[[1], [2], [3]]] # This is a list of one batch of rows, each row with 1 col + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_1) + self.assertEqualRowValues(dummy_result_set.fetchmany(2), [[1], [2]]) + self.assertEqualRowValues(dummy_result_set.fetchall(), [[3]]) + + batch_list_2 = [[[1], [2]], [[3], [4]], [[5], [6], [7]]] + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_2) + self.assertEqualRowValues(dummy_result_set.fetchmany(3), [[1], [2], [3]]) + self.assertEqualRowValues(dummy_result_set.fetchall(), [[4], [5], [6], [7]]) + + def test_fetchone_with_initial_results(self): + initial_results_1 = [[1], [2], [3]] + dummy_result_set = self.make_dummy_result_set_from_initial_results(initial_results_1) + self.assertSequenceEqual(dummy_result_set.fetchone(), [1]) + self.assertSequenceEqual(dummy_result_set.fetchone(), [2]) + self.assertSequenceEqual(dummy_result_set.fetchone(), [3]) + self.assertEqual(dummy_result_set.fetchone(), None) + + def test_fetchone_without_initial_results(self): + batch_list_1 = [[[1], [2]], [[3]]] + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_1) + self.assertSequenceEqual(dummy_result_set.fetchone(), [1]) + self.assertSequenceEqual(dummy_result_set.fetchone(), [2]) + self.assertSequenceEqual(dummy_result_set.fetchone(), [3]) + self.assertEqual(dummy_result_set.fetchone(), None) + + batch_list_2 = [[]] + dummy_result_set = self.make_dummy_result_set_from_batch_list(batch_list_2) + self.assertEqual(dummy_result_set.fetchone(), None) + + +if __name__ == '__main__': + unittest.main() From dd79718393a6cef1fda087744b22b5aab1361dbb Mon Sep 17 00:00:00 2001 From: George Chow Date: Wed, 10 Aug 2022 14:55:19 -0700 Subject: [PATCH 26/30] Cleaned up and reformatted Signed-off-by: George Chow --- pyproject.toml | 3 - src/databricks/sqlalchemy/dialect.py | 398 +++++++-------------------- tests/sqlalchemy/README.md | 21 -- tests/sqlalchemy/conftest.py | 7 - tests/sqlalchemy/integration.py | 72 ++++- tests/sqlalchemy/test_full_sa.py | 142 ---------- tests/sqlalchemy/test_suite.py | 208 -------------- 7 files changed, 157 insertions(+), 694 deletions(-) delete mode 100644 tests/sqlalchemy/README.md delete mode 100644 tests/sqlalchemy/conftest.py delete mode 100644 tests/sqlalchemy/test_full_sa.py delete mode 100644 tests/sqlalchemy/test_suite.py diff --git a/pyproject.toml b/pyproject.toml index 46a58aeaa..74defa731 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,9 +18,6 @@ PyHive = "^0.6.5" [tool.poetry.plugins."sqlalchemy.dialects"] "databricks.thrift" = "databricks.sqlalchemy:DatabricksDialect" -[tool.poetry.plugins."sqlalchemy.dialects"] -"databricks.thrift" = "databricks.sqlalchemy:DatabricksDialect" - [tool.poetry.dev-dependencies] pytest = "^7.1.2" mypy = "^0.950" diff --git a/src/databricks/sqlalchemy/dialect.py b/src/databricks/sqlalchemy/dialect.py index b075c85dd..07871d51d 100644 --- a/src/databricks/sqlalchemy/dialect.py +++ b/src/databricks/sqlalchemy/dialect.py @@ -8,8 +8,8 @@ from sqlalchemy import types # we leverage MySQL's implementation of TINYINT and DOUBLE -from sqlalchemy.types import Integer, BigInteger, SmallInteger, Float, DECIMAL, Boolean; -from sqlalchemy.types import String, DATE, TIMESTAMP; +from sqlalchemy.types import Integer, BigInteger, SmallInteger, Float, DECIMAL, Boolean +from sqlalchemy.types import String, DATE, TIMESTAMP from sqlalchemy import util from sqlalchemy import exc @@ -20,121 +20,29 @@ from typing import AnyStr -# provide a way to debug +# provide a way to debug debugbreakpoint = os.getenv("DATABRICKS_DIALECT_DEBUG") or False -# styled after HiveStringTypeBase; removed process_bind_param(self, value, dialect) -class DatabricksStringTypeBase(types.TypeDecorator): - impl = types.String - -# styled after HiveDate -class DatabricksDate(DatabricksStringTypeBase): - """Translates date strings to date objects""" - impl = types.DATE - - # ref: https://docs.sqlalchemy.org/en/14/core/custom_types.html - def process_bind_param(self, value, dialect): - # handle string - return "PREFIX:" + value - - def process_result_value(self, value, dialect): - return processors.str_to_date(value) - - def result_processor(self, dialect, coltype): - def process(value): - if isinstance(value, datetime.datetime): - return value.date() - elif isinstance(value, datetime.date): - return value - elif value is not None: - return parse(value).date() - else: - return None - - return process - - def adapt(self, impltype, **kwargs): - return self.impl - -# styled after HiveeTimestamp -class DatabricksTimestamp(DatabricksStringTypeBase): - """Translates timestamp strings to datetime objects""" - impl = types.TIMESTAMP - - def process_result_value(self, value, dialect): - return processors.str_to_datetime(value) - - def result_processor(self, dialect, coltype): - def process(value): - if isinstance(value, datetime.datetime): - return value - elif value is not None: - return parse(value) - else: - return None - - return process - - def adapt(self, impltype, **kwargs): - return self.impl - - class DatabricksIdentifierPreparer(compiler.IdentifierPreparer): # SparkSQL identifier specification: # ref: https://spark.apache.org/docs/latest/sql-ref-identifier.html - legal_characters = re.compile(r'^[A-Z0-9_]+$', re.I) + legal_characters = re.compile(r"^[A-Z0-9_]+$", re.I) def __init__(self, dialect): super(DatabricksIdentifierPreparer, self).__init__( dialect, - initial_quote='`', + initial_quote="`", ) - @util.preload_module("sqlalchemy.sql.naming") - def format_constraint(self, constraint, _alembic_quote=True): - if debugbreakpoint: - breakpoint() - - naming = util.preloaded.sql_naming - - if constraint.name is elements._NONE_NAME: - name = naming._constraint_name_for_table( - constraint, constraint.table - ) - - if name is None: - return None - else: - name = constraint.name - - if constraint.__visit_name__ == "index": - return self.truncate_and_render_index_name( - name, _alembic_quote=_alembic_quote - ) - else: - return self.truncate_and_render_constraint_name( - name, _alembic_quote=_alembic_quote - ) -class DatabricksExecutionContext(default.DefaultExecutionContext): - # There doesn't seem to be any override of DefaultExecutionContext required - # but I will nonetheless introduce this class for clarity - - # TODO: revisit server-side cursors - # ref: https://docs.databricks.com/dev-tools/python-sql-connector.html#manage-cursors-and-connections - pass - - -# -# this class provides visitors that emit the dialect-specific keywords for SQLAlchemy's SQL AST -# +# this class provides visitors that emit the dialect-specific keywords for SQLAlchemy's SQL expression parse tree class DatabricksTypeCompiler(compiler.GenericTypeCompiler): # ref: https://spark.apache.org/docs/latest/sql-ref-datatypes.html def visit_TINYINT(self, type_): - return 'TINYINT' + return "TINYINT" def visit_SMALLINT(self, type_, **kw): return "SMALLINT" @@ -188,149 +96,31 @@ def visit_BOOLEAN(self, type_, **kw): def visit_STRING(self, type_, **kw): return "STRING" - # TODO: why is this needed even though there's no occurence of VARCHAR? + # TODO: why is this needed even though there's no occurence of VARCHAR? def visit_VARCHAR(self, type_, **kw): return "STRING" -class DatabricksCompiler(compiler.SQLCompiler): - pass +class DatabricksDDLCompiler(compiler.DDLCompiler): + # Spark doesn't support any table constraint at present so ignore any and all declared constraints + # Once information constraint is complete, this will need to be implemented. + # This is needed for Connection.create_all() + def create_table_constraints( + self, table, _include_foreign_key_constraints=None, **kw + ): + return [] -class DatabricksDDLCompiler(compiler.DDLCompiler): - # Spark has no primary key support so ignore whatever constraint there is - def visit_primary_key_constraint(self, constraint, **kw): - return "" - - def visit_foreign_key_constraint(self, constraint, **kw): - return "" - - def visit_unique_constraint(self, constraint, **kw): - return "" - - # def visit_create_table(self, create, **kw): - # # if debugbreakpoint: - # # breakpoint() - - # table = create.element - # preparer = self.preparer - - # text = "\nCREATE " - # if table._prefixes: - # text += " ".join(table._prefixes) + " " - - # text += "TABLE " - # if create.if_not_exists: - # text += "IF NOT EXISTS " - - # text += preparer.format_table(table) + " " - - # create_table_suffix = self.create_table_suffix(table) - # if create_table_suffix: - # text += create_table_suffix + " " - - # text += "(" - - # separator = "\n" - - # # if only one primary key, specify it along with the column - # first_pk = False - # for create_column in create.columns: - # column = create_column.element - # try: - # processed = self.process( - # create_column, first_pk=column.primary_key and not first_pk - # ) - # if processed is not None: - # text += separator - # separator = ", \n" - # text += "\t" + processed - # if column.primary_key: - # first_pk = True - # except exc.CompileError as ce: - # raise exc.CompileError( - # "(in table '%s', column '%s'): %s" - # % (table.description, column.name, ce.args[0]) - # ) from ce - - # const = self.create_table_constraints( - # table, - # _include_foreign_key_constraints=create.include_foreign_key_constraints, # noqa - # ) - # if const: - # text += separator + "\t" + const - - # text += "\n)%s\n\n" % self.post_create_table(table) - # return text - - # def visit_create_column(self, create, first_pk=False, **kw): - # # if debugbreakpoint: - # # breakpoint() - - # column = create.element - - # if column.system: - # return None - - # text = self.get_column_specification(column, first_pk=first_pk) - # const = " ".join( - # self.process(constraint) for constraint in column.constraints - # ) - # if const: - # text += " " + const - - # return text - - # def create_table_constraints( - # self, table, _include_foreign_key_constraints=None, **kw - # ): - # if debugbreakpoint: - # breakpoint() - - # # On some DB order is significant: visit PK first, then the - # # other constraints (engine.ReflectionTest.testbasic failed on FB2) - # constraints = [] - # if table.primary_key: - # constraints.append(table.primary_key) - - # all_fkcs = table.foreign_key_constraints - # if _include_foreign_key_constraints is not None: - # omit_fkcs = all_fkcs.difference(_include_foreign_key_constraints) - # else: - # omit_fkcs = set() - - # constraints.extend( - # [ - # c - # for c in table._sorted_constraints - # if c is not table.primary_key and c not in omit_fkcs - # ] - # ) - - # return ", \n\t".join( - # p - # for p in ( - # self.process(constraint) - # for constraint in constraints - # if (constraint._should_create_for_compiler(self)) - # and ( - # not self.dialect.supports_alter - # or not getattr(constraint, "use_alter", False) - # ) - # ) - # if p is not None - # ) - -# The following lookup table is by DATA_TYPE and is rather nice since Decimal can be detected directly. -# However, as DATA_TYPE is rather obtuse... going forward, we switched to use COLUMN_TYPE_NAME instead (the table below) +# I started with the following lookup table (indexed by DATA_TYPE) and it is rather nice since Decimal can be detected directly. +# However, as DATA_TYPE values are rather obtuse, I switched to use COLUMN_TYPE_NAME instead (the table below) # _type_map = { # -6: types.Integer, # tiny_int # 5: types.Integer, # small_int # 4: types.Integer, # int # -5: types.BigInteger, # big_int -# 6: types.Float, -# 3: types.DECIMAL, +# 6: types.Float, +# 3: types.DECIMAL, # 16: types.Boolean, # 12: types.String, # 91: DatabricksDate, # date @@ -339,37 +129,41 @@ def visit_unique_constraint(self, constraint, **kw): # } -# This lookup is by TYPE_NAME which is easier to maintain and likely safer in the long term. +# This lookup is by TYPE_NAME which is easier to maintain and likely safer in the long term. # NB: Decimal is explicitly excluded here as each occurence's TYPE_NAME includes the occurence's precision and scale # See/refer to COLUMN_TYPE_DECIMAL below. # this map SQL types onto Python representation; note the deliberate omission of Decimal! _type_map = { - 'TINYINT': types.Integer, # tiny_int - 'SMALLINT': types.Integer, # small_int - 'INT': types.Integer, # int - 'BIGINT': types.BigInteger, # big_int - 'FLOAT': types.Float, - 'DOUBLE': types.Float, # double fits into a Python float - 'BOOLEAN': types.Boolean, - 'STRING': types.String, - 'DATE': types.DATE, # date - 'TIMESTAMP': types.TIMESTAMP, # timestamp + "TINYINT": types.Integer, # tiny_int + "SMALLINT": types.Integer, # small_int + "INT": types.Integer, # int + "BIGINT": types.BigInteger, # big_int + "FLOAT": types.Float, + "DOUBLE": types.Float, # double fits into a Python float + "BOOLEAN": types.Boolean, + "STRING": types.String, + "DATE": types.DATE, # date + "TIMESTAMP": types.TIMESTAMP, # timestamp } -# this is used to match decimal's DATA_TYPE; it will map to types.DECIMAL -COLUMN_TYPE_DECIMAL=3 +# this is used to match a column's DATA_TYPE for Decimal; it will map to types.DECIMAL +COLUMN_TYPE_DECIMAL = 3 # COLUMN_TYPE_INTERVAL=1111 class DatabricksDialect(default.DefaultDialect): # Possible attributes are defined here: https://docs.sqlalchemy.org/en/14/core/internals.html#sqlalchemy.engine.Dialect name: str = "databricks" - driver: str= "thrift" + driver: str = "thrift" default_schema_name: str = "default" preparer = DatabricksIdentifierPreparer - execution_ctx_cls = DatabricksExecutionContext - statement_compiler = DatabricksCompiler + + # TODO: revisit server-side cursors + # ref: https://docs.databricks.com/dev-tools/python-sql-connector.html#manage-cursors-and-connections + execution_ctx_cls = default.DefaultExecutionContext + + statement_compiler = compiler.SQLCompiler ddl_compiler = DatabricksDDLCompiler type_compiler = DatabricksTypeCompiler @@ -387,7 +181,7 @@ class DatabricksDialect(default.DefaultDialect): description_encoding = None supports_multivalues_insert = True supports_sane_rowcount = False - + # added based on comments here: https://docs.sqlalchemy.org/en/14/errors.html#error-cprf supports_statement_cache = False @@ -396,17 +190,16 @@ def dbapi(cls): return sql def create_connect_args(self, url: "URL"): - # Expected URI format is: databricks+thrift://token:dapi***@***.cloud.databricks.com/***?http_path=/sql/*** + # URI format is: databricks+thrift://token:{access_token}@{server_hostname}/{schema}?http_path={http_path} kwargs = { "server_hostname": url.host, "access_token": url.password, "http_path": url.query.get("http_path"), - "schema": url.database or "default" + "schema": url.database or "default", } return [], kwargs - # TODO: uninvoked code to date def get_schema_names(self, connection, **kwargs): # conn = dbsql.connect( # server_hostname=kwargs['server_hostname'], @@ -416,115 +209,122 @@ def get_schema_names(self, connection, **kwargs): # ) # TODO: look up correct index for TABLE_SCHEM + breakpoint() TABLE_SCHEM = 2 - # if debugbreakpoint: - # breakpoint() - with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: - data = cur.schemas(catalog_name='%').fetchall() + with self.get_driver_connection( + connection + )._dbapi_connection.dbapi_connection.cursor() as cur: + data = cur.schemas(catalog_name="%").fetchall() _schemas = [i[TABLE_SCHEM] for i in data] return _schemas - def get_table_names(self, connection, schema = None, **kwargs): - # if debugbreakpoint: - # breakpoint() - + def get_table_names(self, connection, schema=None, **kwargs): + breakpoint() TABLE_NAME = 2 - with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: + with self.get_driver_connection( + connection + )._dbapi_connection.dbapi_connection.cursor() as cur: data = cur.tables(schema_name=schema).fetchall() _tables = [i[TABLE_NAME] for i in data] return _tables + # This is needed for SQLAlchemy reflection def get_columns(self, connection, table_name, schema=None, **kwargs): # Example row - # Row(TABLE_CAT='hive_metastore', TABLE_SCHEM='george_chow_dbtest', TABLE_NAME='all_types', COLUMN_NAME='f_byte', DATA_TYPE=4, - # TYPE_NAME='INT', COLUMN_SIZE=4, BUFFER_LENGTH=None, DECIMAL_DIGITS=0, NUM_PREC_RADIX=10, - # NULLABLE=1, REMARKS='', COLUMN_DEF=None, SQL_DATA_TYPE=None, SQL_DATETIME_SUB=None, - # CHAR_OCTET_LENGTH=None, ORDINAL_POSITION=0, IS_NULLABLE='YES', SCOPE_CATALOG=None, SCOPE_SCHEMA=None, + # Row(TABLE_CAT='hive_metastore', TABLE_SCHEM='george_chow_dbtest', TABLE_NAME='all_types', COLUMN_NAME='f_byte', DATA_TYPE=4, + # TYPE_NAME='INT', COLUMN_SIZE=4, BUFFER_LENGTH=None, DECIMAL_DIGITS=0, NUM_PREC_RADIX=10, + # NULLABLE=1, REMARKS='', COLUMN_DEF=None, SQL_DATA_TYPE=None, SQL_DATETIME_SUB=None, + # CHAR_OCTET_LENGTH=None, ORDINAL_POSITION=0, IS_NULLABLE='YES', SCOPE_CATALOG=None, SCOPE_SCHEMA=None, # SCOPE_TABLE=None, SOURCE_DATA_TYPE=None, IS_AUTO_INCREMENT='NO') - COLUMN_NAME=3 - COLUMN_TYPE=4 - COLUMN_TYPE_NAME=5 - COLUMN_NULLABLE=17 - COLUMN_COMMENT=11 - COLUMN_AUTOINCREMENT=22 - + COLUMN_NAME = 3 + COLUMN_TYPE = 4 + COLUMN_TYPE_NAME = 5 + COLUMN_NULLABLE = 17 + COLUMN_COMMENT = 11 + COLUMN_AUTOINCREMENT = 22 result = [] - with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: - # if debugbreakpoint: - # breakpoint() + with self.get_driver_connection( + connection + )._dbapi_connection.dbapi_connection.cursor() as cur: data = cur.columns(schema_name=schema, table_name=table_name).fetchall() for i in data: - # if debugbreakpoint: - # breakpoint() - try: - if (i[COLUMN_TYPE] != COLUMN_TYPE_DECIMAL): + if i[COLUMN_TYPE] != COLUMN_TYPE_DECIMAL: coltype = _type_map[i[COLUMN_TYPE_NAME]] else: coltype = types.DECIMAL except KeyError: - util.warn(f"Did not recognize type '{i[COLUMN_TYPE_NAME]}'({i[COLUMN_TYPE]}) of column '{i[COLUMN_NAME]}'") + util.warn( + f"Did not recognize type '{i[COLUMN_TYPE_NAME]}'({i[COLUMN_TYPE]}) of column '{i[COLUMN_NAME]}'" + ) coltype = types.NullType try: - nullable = i[COLUMN_NULLABLE] == 'YES' + nullable = i[COLUMN_NULLABLE] == "YES" except KeyError: - nullable = True; + nullable = True try: - autoincrement = i[COLUMN_AUTOINCREMENT] == 'YES' + autoincrement = i[COLUMN_AUTOINCREMENT] == "YES" except KeyError: autoincrement = False # filled-in according to interfaces.py's class ReflectedColumn(TypedDict): - result.append({ - 'name': i[COLUMN_NAME], - 'type': coltype, - 'nullable': nullable, - 'comment': i[COLUMN_COMMENT], - 'autoincrement': autoincrement, - }) + result.append( + { + "name": i[COLUMN_NAME], + "type": coltype, + "nullable": nullable, + "comment": i[COLUMN_COMMENT], + "autoincrement": autoincrement, + } + ) return result - + # This is needed to support Connection.create_all() def has_table( self, connection, table_name, - schema = None, + schema=None, **kwargs, ) -> bool: - # if debugbreakpoint: - # breakpoint() try: - COLUMN_NAME=3 - with self.get_driver_connection(connection)._dbapi_connection.dbapi_connection.cursor() as cur: - data = cur.columns(schema_name=schema or 'default', table_name=table_name).fetchmany(1) + COLUMN_NAME = 3 + # TODO: this following expression is circuitous! + with self.get_driver_connection( + connection + )._dbapi_connection.dbapi_connection.cursor() as cur: + data = cur.columns( + schema_name=schema or "default", table_name=table_name + ).fetchmany(1) # the table exists as long as there's a non-zero number of columns return len(data) > 0 except exc.NoSuchTableError: return False - def get_view_names(self, connection, schema=None, **kwargs): - # Spark has no views - return [] - + # This is needed for SQLAlchemy reflection def get_foreign_keys(self, connection, table_name, schema=None, **kwargs): # Spark has no foreign keys return [] + # This is needed for SQLAlchemy reflection def get_pk_constraint(self, connection, table_name, schema=None, **kwargs): # Spark has no primary keys return [] + # This is needed for SQLAlchemy reflection def get_indexes(self, connection, table_name, schema=None, **kwargs): # TODO: expose partition columns as indices? return [] + # DefaultDialect's default impl delegates to the (PySQL) dbapi_connection which currently raises a NotSupportedError. + # Using a pass here is the laziest implementation (which while semantically wrong) provides barebone dialect utility. + # TODO: I suspect this is the cause for the failure to drop tables... SA is likely relying on rollback to undo the CREATE tables def do_rollback(self, dbapi_connection) -> None: # Spark/Delta transaction only support single-table updates... to simplify things, just skip this for now. pass diff --git a/tests/sqlalchemy/README.md b/tests/sqlalchemy/README.md deleted file mode 100644 index 9916b51ed..000000000 --- a/tests/sqlalchemy/README.md +++ /dev/null @@ -1,21 +0,0 @@ -# Introduction - -This is work-in-progress of a SQLAlchemy dialect for Databricks. - -The dialect is embedded within the Databricks SQL Connector. - -## Test Usage - -Initialize your project with Poetry (e.g., `poetry install`) before continuing with the sample tests here. - -A `Makefile` has been setup to reduce typing. - -Configure the test via environment using the sample `env.template`. - -To sample run would be as follows: - -```bash -$ source yourtestenv -$ make simpletest -``` - diff --git a/tests/sqlalchemy/conftest.py b/tests/sqlalchemy/conftest.py deleted file mode 100644 index d95fcd2c8..000000000 --- a/tests/sqlalchemy/conftest.py +++ /dev/null @@ -1,7 +0,0 @@ -from sqlalchemy.dialects import registry -import pytest - -registry.register("databricks.thrift", "databricks.sqlalchemy", "DatabricksDialect") -pytest.register_assert_rewrite("sqlalchemy.testing.assertions") - -from sqlalchemy.testing.plugin.pytestplugin import * \ No newline at end of file diff --git a/tests/sqlalchemy/integration.py b/tests/sqlalchemy/integration.py index 9becc8d6e..96e75063d 100644 --- a/tests/sqlalchemy/integration.py +++ b/tests/sqlalchemy/integration.py @@ -7,40 +7,84 @@ def db_engine(): host = os.getenv("host") http_path = os.getenv("http_path") - api_token = os.getenv("api_token") - engine = sqlalchemy.create_engine(f"databricks+thrift://token:{api_token}@{host}?http_path={http_path}") + access_token = os.getenv("access_token") + schema = os.getenv("schema") or "default" + engine = sqlalchemy.create_engine( + f"databricks+thrift://token:{access_token}@{host}/{schema}?http_path={http_path}" + ) return engine -def test_basic_connection(db_engine): - """Make sure we can connect and run basic query + +def test_constraints(db_engine): + """Make sure we can handle any constraints that's passed in during table declaration. + In the immediate term, the dialect silently ignores them. But when information constraint is + complete, constraints need to be handled. """ + mdo = sqlalchemy.MetaData() + this_moment = datetime.datetime.utcnow().strftime("%s") + + tname = f"integration_test_table_{this_moment}" + tname2 = f"integration_test_table_{this_moment}_items" + + t1 = sqlalchemy.Table( + tname, + mdo, + sqlalchemy.Column("f_primary", sqlalchemy.types.Integer, primary_key=True), + sqlalchemy.Column("f_nullable", sqlalchemy.types.Integer, nullable=False), + sqlalchemy.Column("f_unique", sqlalchemy.types.Integer, unique=True), + ) + + t2 = sqlalchemy.Table( + tname2, + mdo, + sqlalchemy.Column( + "f_foreign", + sqlalchemy.types.Integer, + sqlalchemy.ForeignKey(f"{tname}.f_primary"), + nullable=False, + ), + ) + + mdo.create_all(bind=db_engine, checkfirst=True) + + check_it_exists = db_engine.execute(f"DESCRIBE TABLE EXTENDED {tname}") + + mdo.drop_all(db_engine, checkfirst=True) + + +def test_basic_connection(db_engine): + """Make sure we can connect and run basic query""" + curs = db_engine.execute("SELECT id FROM RANGE(100)") result = curs.fetchall() assert len(result) == 100 + def test_create_and_drop_table(db_engine): """Make sure we can automatically create and drop a table defined with SQLAlchemy's MetaData object + while exercising all supported types. """ - + mdo = sqlalchemy.MetaData() this_moment = datetime.datetime.utcnow().strftime("%s") - + tname = f"integration_test_table_{this_moment}" t1 = sqlalchemy.Table( tname, mdo, - sqlalchemy.Column('f_short', sqlalchemy.types.SMALLINT), - sqlalchemy.Column('f_int', sqlalchemy.types.Integer), - sqlalchemy.Column('f_long', sqlalchemy.types.BigInteger), - sqlalchemy.Column('f_float', sqlalchemy.types.Float), - sqlalchemy.Column('f_decimal', sqlalchemy.types.DECIMAL), - sqlalchemy.Column('f_boolean', sqlalchemy.types.BOOLEAN) + sqlalchemy.Column("f_short", sqlalchemy.types.SMALLINT), + sqlalchemy.Column("f_int", sqlalchemy.types.Integer), + sqlalchemy.Column("f_long", sqlalchemy.types.BigInteger), + sqlalchemy.Column("f_float", sqlalchemy.types.Float), + sqlalchemy.Column("f_decimal", sqlalchemy.types.DECIMAL), + sqlalchemy.Column("f_boolean", sqlalchemy.types.BOOLEAN), + sqlalchemy.Column("f_str", sqlalchemy.types.String), ) - mdo.create_all(bind=db_engine,checkfirst=True) + mdo.create_all(bind=db_engine, checkfirst=False) check_it_exists = db_engine.execute(f"DESCRIBE TABLE EXTENDED {tname}") - + mdo.drop_all(db_engine, checkfirst=True) diff --git a/tests/sqlalchemy/test_full_sa.py b/tests/sqlalchemy/test_full_sa.py deleted file mode 100644 index 31358895f..000000000 --- a/tests/sqlalchemy/test_full_sa.py +++ /dev/null @@ -1,142 +0,0 @@ -from sqlalchemy.testing.suite import * - -from sqlalchemy.testing.suite import IntegerTest as _IntegerTest -from sqlalchemy.testing.suite import StringTest as _StringTest -from sqlalchemy.testing.suite import NumericTest as _NumericTest -from sqlalchemy.testing.suite import BooleanTest as _BooleanTest - -from sqlalchemy.testing.suite import DateTest as _DateTest -# from sqlalchemy.testing.suite import _LiteralRoundTripFixture - -from sqlalchemy.testing.suite import DateTimeTest as _DateTimeTest - - -from sqlalchemy.testing.suite import TableDDLTest as _TableDDLTest - -from sqlalchemy.testing.suite import JoinTest as _JoinTest - -# class _MyDateFixture(_LiteralRoundTripFixture, fixtures.TestBase): -# compare = None - -# @classmethod -# def define_tables(cls, metadata): -# # class Decorated(TypeDecorator): -# # impl = cls.datatype -# # cache_ok = True - -# Table( -# "date_table", -# metadata, -# Column( -# "id", Integer, primary_key=True, test_needs_autoincrement=True -# ), -# Column("date_data", cls.datatype), -# # Column("decorated_date_data", Decorated), -# ) - -# @testing.requires.datetime_implicit_bound -# def test_select_direct(self, connection): -# result = connection.scalar(select(literal(self.data))) -# eq_(result, self.data) - -# def test_round_trip(self, connection): -# date_table = self.tables.date_table - -# connection.execute( -# date_table.insert(), {"id": 1, "date_data": self.data} -# ) - -# row = connection.execute(select(date_table.c.date_data)).first() - -# compare = self.compare or self.data -# eq_(row, (compare,)) -# assert isinstance(row[0], type(compare)) - -# def off_test_round_trip_decorated(self, connection): -# date_table = self.tables.date_table - -# connection.execute( -# date_table.insert(), {"id": 1, "decorated_date_data": self.data} -# ) - -# row = connection.execute( -# select(date_table.c.decorated_date_data) -# ).first() - -# compare = self.compare or self.data -# eq_(row, (compare,)) -# assert isinstance(row[0], type(compare)) - -# def test_null(self, connection): -# date_table = self.tables.date_table - -# connection.execute(date_table.insert(), {"id": 1, "date_data": None}) - -# row = connection.execute(select(date_table.c.date_data)).first() -# eq_(row, (None,)) - -# @testing.requires.datetime_literals -# def test_literal(self, literal_round_trip): -# compare = self.compare or self.data - -# literal_round_trip( -# self.datatype, [self.data], [compare], compare=compare -# ) - -# @testing.requires.standalone_null_binds_whereclause -# def test_null_bound_comparison(self): -# # this test is based on an Oracle issue observed in #4886. -# # passing NULL for an expression that needs to be interpreted as -# # a certain type, does the DBAPI have the info it needs to do this. -# date_table = self.tables.date_table -# with config.db.begin() as conn: -# result = conn.execute( -# date_table.insert(), {"id": 1, "date_data": self.data} -# ) -# id_ = result.inserted_primary_key[0] -# stmt = select(date_table.c.id).where( -# case( -# ( -# bindparam("foo", type_=self.datatype) != None, -# bindparam("foo", type_=self.datatype), -# ), -# else_=date_table.c.date_data, -# ) -# == date_table.c.date_data -# ) - -# row = conn.execute(stmt, {"foo": None}).first() -# eq_(row[0], id_) - - -class DateTest(_DateTest): - pass - # __requires__ = ("date",) - # __backend__ = True - # datatype = Date - # data = datetime.date(2012, 10, 15) - - - -class BooleanTest(_BooleanTest): - pass - - -class DateTimeTest(_DateTimeTest): - pass - -class IntegerTest(_IntegerTest): - pass - -class NumericTest(_NumericTest): - pass - -class StringTest(_StringTest): - pass - -class TableDDLTest(_TableDDLTest): - pass - -class JoinTest(_JoinTest): - pass - diff --git a/tests/sqlalchemy/test_suite.py b/tests/sqlalchemy/test_suite.py deleted file mode 100644 index 4827f43f6..000000000 --- a/tests/sqlalchemy/test_suite.py +++ /dev/null @@ -1,208 +0,0 @@ -import datetime -import os - -# TODO: fold them into our package -from sqlalchemy.dialects.mysql.types import TINYINT, DOUBLE # borrow MySQL's impls - -from sqlalchemy import BIGINT -from sqlalchemy import BOOLEAN -from sqlalchemy import DATE -from sqlalchemy import DECIMAL -from sqlalchemy import FLOAT -from sqlalchemy import INT -from sqlalchemy import Integer -from sqlalchemy import Interval -from sqlalchemy import SMALLINT -from sqlalchemy import String -from sqlalchemy import TIMESTAMP - -from sqlalchemy import Table, Column - -from sqlalchemy import and_ -from sqlalchemy import asc -from sqlalchemy import bindparam -from sqlalchemy import cast -from sqlalchemy import desc -from sqlalchemy import exc -from sqlalchemy import except_ -from sqlalchemy import ForeignKey -from sqlalchemy import func -from sqlalchemy import intersect -from sqlalchemy import literal -from sqlalchemy import literal_column -from sqlalchemy import MetaData -from sqlalchemy import not_ -from sqlalchemy import or_ -from sqlalchemy import select -from sqlalchemy import sql -from sqlalchemy import testing -from sqlalchemy import text -from sqlalchemy import tuple_ -from sqlalchemy import TypeDecorator -from sqlalchemy import union -from sqlalchemy import union_all -from sqlalchemy import VARCHAR -from sqlalchemy.engine import default -from sqlalchemy.sql import LABEL_STYLE_TABLENAME_PLUS_COL -from sqlalchemy.sql.selectable import LABEL_STYLE_NONE -from sqlalchemy.testing import assert_raises_message -from sqlalchemy.testing import AssertsExecutionResults -from sqlalchemy.testing import ComparesTables -from sqlalchemy.testing import eq_ -from sqlalchemy.testing import fixtures -from sqlalchemy.testing import is_ -from sqlalchemy.testing.schema import Column -from sqlalchemy.testing.schema import Table -from sqlalchemy.testing.util import resolve_lambda - - -# provide a way to break in -debugbreakpoint = os.getenv("DATABRICKS_DIALECT_DEBUG") or False - - -class ReflectionTest(fixtures.TablesTest, ComparesTables): - - def test_numtypes(self, metadata, connection): - meta = metadata - - # TODO: switch over to internal golden tables once all types are implemented - all_num_types = Table( - "reflectiontest_all_num_types", - meta, - # the types below represent dialect-specific implementations that handles serialization - Column("f_byte", TINYINT), - Column("f_short", SMALLINT), - Column("f_int", INT), - Column("f_long", BIGINT), - Column("f_float", FLOAT), - Column("f_double", DOUBLE), - Column("f_decimal", DECIMAL(9,3)), - Column("f_boolean", BOOLEAN), - ) - - meta.create_all(connection) - - meta2 = MetaData() - reflected_types = Table( - "reflectiontest_all_num_types", meta2, autoload_with=connection - ) - - self.assert_tables_equal(all_num_types, reflected_types) - - # TODO: this drop_all isn't working - meta.drop_all(connection) - - - # TODO: not working yet - def test_strtypes(self, metadata, connection): - meta = metadata - - all_str_types = Table( - "reflectiontest_all_str_types", - meta, - Column("f_string", String), - Column("f_date", DATE), - Column("f_timestamp", TIMESTAMP), - # Column("f_interval", Interval), - ) - - meta.create_all(connection) - - meta2 = MetaData() - reflected_types = Table( - "reflectiontest_all_str_types", meta2, autoload_with=connection - ) - - self.assert_tables_equal(all_str_types, reflected_types) - - meta.drop_all(connection) - - -class SimpleTest(fixtures.TablesTest, ComparesTables, AssertsExecutionResults): - # __only_on__ = "databricks" - - @classmethod - def define_tables(cls, metadata): - Table( - "simpletest_num", - metadata, - Column("f_byte", TINYINT), - Column("f_short", SMALLINT), - Column("f_int", INT), - Column("f_long", BIGINT), - Column("f_float", FLOAT), - Column("f_double", DOUBLE), - Column("f_decimal", DECIMAL), - Column("f_boolean", BOOLEAN), - test_needs_acid=False, - ) - - Table( - "simpletest_str", - metadata, - Column("f_string", String), - Column("f_date", DATE), - Column("f_timestamp", TIMESTAMP), - test_needs_acid=False, - ) - - # TODO: why are the cleanup of these tables not happening? - - # simpletest_num - - def test_select_type_byte(self, connection): - t = self.tables.simpletest_num - stmt = select([t.c.f_byte]) - - connection.execute(stmt) - - def test_select_type_smallint(self, connection): - t = self.tables.simpletest_num - stmt = select([t.c.f_short]) - - connection.execute(stmt) - - def test_select_type_int(self, connection): - t = self.tables.simpletest_num - stmt = select([t.c.f_int]) - - connection.execute(stmt) - - def test_select_type_bigint(self, connection): - t = self.tables.simpletest_num - stmt = select([t.c.f_long]) - - connection.execute(stmt) - - def test_select_type_float(self, connection): - t = self.tables.simpletest_num - stmt = select([t.c.f_float]) - - connection.execute(stmt) - - def test_select_type_double(self, connection): - t = self.tables.simpletest_num - stmt = select([t.c.f_double]) - - connection.execute(stmt) - - - # simpletest_str - - def test_select_type_string(self, connection): - t = self.tables.simpletest_str - stmt = select([t.c.f_string]).limit(10) - - connection.execute(stmt) - - def test_select_type_date(self, connection): - t = self.tables.simpletest_str - stmt = select([t.c.f_date]).limit(10) - - connection.execute(stmt) - - def test_select_type_timestamp(self, connection): - t = self.tables.simpletest_str - stmt = select([t.c.f_timestamp]).limit(10) - - connection.execute(stmt) From 2a8b36da4a0944620324a72d232d394e507430c9 Mon Sep 17 00:00:00 2001 From: George Chow Date: Wed, 10 Aug 2022 15:06:36 -0700 Subject: [PATCH 27/30] removed unneeded cruft from earlier experiments Signed-off-by: George Chow --- src/databricks/sqlalchemy/Makefile | 61 --------------- .../sqlalchemy/create-table-extra.sql | 25 ------ .../sqlalchemy/create-table-reseq.sql | 24 ------ src/databricks/sqlalchemy/create-table-sa.sql | 48 ------------ .../sqlalchemy/create-table-wrong.sql | 24 ------ src/databricks/sqlalchemy/create-table.sql | 50 ------------ src/databricks/sqlalchemy/describe-table.sql | 6 -- src/databricks/sqlalchemy/drop-table.sql | 5 -- src/databricks/sqlalchemy/requirements.py | 18 ----- .../sqlalchemy/sample-app-insert.py | 77 ------------------- .../sqlalchemy/sample-app-reflection.py | 38 --------- .../sqlalchemy/sample-app-select.py | 76 ------------------ src/databricks/sqlalchemy/select-table.sql | 7 -- 13 files changed, 459 deletions(-) delete mode 100644 src/databricks/sqlalchemy/Makefile delete mode 100644 src/databricks/sqlalchemy/create-table-extra.sql delete mode 100644 src/databricks/sqlalchemy/create-table-reseq.sql delete mode 100644 src/databricks/sqlalchemy/create-table-sa.sql delete mode 100644 src/databricks/sqlalchemy/create-table-wrong.sql delete mode 100644 src/databricks/sqlalchemy/create-table.sql delete mode 100644 src/databricks/sqlalchemy/describe-table.sql delete mode 100644 src/databricks/sqlalchemy/drop-table.sql delete mode 100644 src/databricks/sqlalchemy/requirements.py delete mode 100644 src/databricks/sqlalchemy/sample-app-insert.py delete mode 100644 src/databricks/sqlalchemy/sample-app-reflection.py delete mode 100644 src/databricks/sqlalchemy/sample-app-select.py delete mode 100644 src/databricks/sqlalchemy/select-table.sql diff --git a/src/databricks/sqlalchemy/Makefile b/src/databricks/sqlalchemy/Makefile deleted file mode 100644 index 5df1970aa..000000000 --- a/src/databricks/sqlalchemy/Makefile +++ /dev/null @@ -1,61 +0,0 @@ -# -# Temporary(?) home for sample apps to guide development of the Databricks SQLAlchemy dialect -# -# NB: This really ought to be using the standard Golden tables. For the while, there are a number -# of pre-defined tables that is created. -# -# NB2: the target system is specified via environment variables. The sample apps looks this up at run-time. -# See env.template. -# - -DBSCLI=dbsqlcli -PY=poetry run python3 - -.PHONY=sample1 sample2 sample3 clean \ - create create-reseq create-extra create-db drop-db \ - showtables describe select drop - -sample1: - $(PY) sample-app-select.py - -sample2: - $(PY) sample-app-reflection.py - -sample3: - $(PY) sample-app-insert.py - -clean: drop - -# caution: create your table in the correct schema! -create: - $(DBSCLI) -e create-table.sql - -create-reseq: - $(DBSCLI) -e create-table-reseq.sql - -create-extra: - $(DBSCLI) -e create-table-extra.sql - -create-wrong: - $(DBSCLI) -e create-table-wrong.sql - -create-db: - $(DBSCLI) -e "CREATE DATABASE george_chow_satest;" - -drop-db: - $(DBSCLI) -e "DROP DATABASE IF EXISTS george_chow_satest;" - -showtables: - $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); show tables;" - -describe: - $(DBSCLI) -e "DESCRIBE $(DATABRICKS_SCHEMA).sample_numtypes;" - $(DBSCLI) -e "DESCRIBE $(DATABRICKS_SCHEMA).sample_strtypes;" - -select: - $(DBSCLI) -e "SELECT * FROM $(DATABRICKS_SCHEMA).sample_numtypes LIMIT 10;" - $(DBSCLI) -e "SELECT * FROM $(DATABRICKS_SCHEMA).sample_strtypes LIMIT 10;" - -drop: - echo y | $(DBSCLI) -e "DROP TABLE IF EXISTS $(DATABRICKS_SCHEMA).sample_numtypes;" - echo y | $(DBSCLI) -e "DROP TABLE IF EXISTS $(DATABRICKS_SCHEMA).sample_strtypes;" \ No newline at end of file diff --git a/src/databricks/sqlalchemy/create-table-extra.sql b/src/databricks/sqlalchemy/create-table-extra.sql deleted file mode 100644 index beadefee4..000000000 --- a/src/databricks/sqlalchemy/create-table-extra.sql +++ /dev/null @@ -1,25 +0,0 @@ -/* alternate table/data for sample app */ - -USE george_chow_dbtest; - -CREATE TABLE sample_numtypes - ( - f_byte BYTE, - f_short SHORT, - f_int INT, - f_long LONG, - f_float FLOAT, - f_decimal DECIMAL(10,2), - f_boolean BOOLEAN, - f_extra INT - ); - -INSERT INTO sample_numtypes VALUES - ( 125, 32700, 2001002003, 9001002003004005006, 1E30, 1.5, TRUE, 17 ), - ( -125, -32700, -2001002003, -9001002003004005006, 1E-30, -1.5, FALSE, 42 ), - ( 125, 32700, 2001002003, 9001002003004005006, -1E30, 1.5, TRUE, 11 ); - -SELECT * FROM sample_numtypes; - -DESCRIBE sample_numtypes; - diff --git a/src/databricks/sqlalchemy/create-table-reseq.sql b/src/databricks/sqlalchemy/create-table-reseq.sql deleted file mode 100644 index 9cd62e9b4..000000000 --- a/src/databricks/sqlalchemy/create-table-reseq.sql +++ /dev/null @@ -1,24 +0,0 @@ -/* alternate table/data for sample app */ - -USE george_chow_dbtest; - -CREATE TABLE sample_numtypes - ( - f_byte BYTE, - f_boolean BOOLEAN, - f_short SHORT, - f_int INT, - f_long LONG, - f_float FLOAT, - f_decimal DECIMAL(10,2) - ); - -INSERT INTO sample_numtypes VALUES - ( 125, TRUE, 32700, 2001002003, 9001002003004005006, 1E30, 1.5 ), - ( -125, FALSE, -32700, -2001002003, -9001002003004005006, 1E-30, -1.5 ), - ( 125, TRUE, 32700, 2001002003, 9001002003004005006, -1E30, 1.5 ); - -SELECT * FROM sample_numtypes; - -DESCRIBE sample_numtypes; - diff --git a/src/databricks/sqlalchemy/create-table-sa.sql b/src/databricks/sqlalchemy/create-table-sa.sql deleted file mode 100644 index 1f1c479ae..000000000 --- a/src/databricks/sqlalchemy/create-table-sa.sql +++ /dev/null @@ -1,48 +0,0 @@ -/* table/data for sample app */ - -USE george_chow_satest; - -DROP TABLE IF EXISTS sample_numtypes; - -CREATE TABLE sample_numtypes - ( - f_byte BYTE, - f_short SHORT, - f_int INT, - f_long LONG, - f_float FLOAT, - f_decimal DECIMAL(10,2), - f_boolean BOOLEAN - ); - -INSERT INTO sample_numtypes VALUES - ( 125, 32700, 2001002003, 9001002003004005006, 1E30, 1.5, TRUE ), - ( -125, -32700, -2001002003, -9001002003004005006, 1E-30, -1.5, FALSE ), - ( 125, 32700, 2001002003, 9001002003004005006, -1E30, 1.5, TRUE ); - -SELECT * FROM sample_numtypes; - -DESCRIBE sample_numtypes; - - -DROP TABLE IF EXISTS sample_strtypes; - -CREATE TABLE sample_strtypes - ( - f_event STRING, - f_date DATE, - f_timestamp TIMESTAMP, - f_interval INTERVAL DAY TO SECOND - ); - -INSERT INTO sample_strtypes VALUES - ( 'Everest', '1953-05-29', '1953-05-29T11:30', '3 0:0:0' ), - ( 'Mariana Trench', '1960-01-23', '1960-01-23T13:06', '0 5:00:00' ), - ( 'Moon landing', '1969-07-20', '1969-07-20T20:17', '0 21:36:0' ); - -SELECT * FROM sample_strtypes; - -DESCRIBE sample_strtypes; - - - diff --git a/src/databricks/sqlalchemy/create-table-wrong.sql b/src/databricks/sqlalchemy/create-table-wrong.sql deleted file mode 100644 index 9c4931e33..000000000 --- a/src/databricks/sqlalchemy/create-table-wrong.sql +++ /dev/null @@ -1,24 +0,0 @@ -/* table/data for sample app */ - -USE george_chow_dbtest; - -CREATE TABLE sample_numtypes - ( - f_byte BOOLEAN, - f_short BOOLEAN, - f_int BOOLEAN, - f_long BOOLEAN, - f_float BOOLEAN, - f_decimal DECIMAL(10,2), - f_boolean INT - ); - -INSERT INTO sample_numtypes VALUES - ( 125, 32700, 2001002003, 9001002003004005006, 1E30, 1.5, TRUE ), - ( -125, -32700, -2001002003, -9001002003004005006, 1E-30, -1.5, FALSE ), - ( 125, 32700, 2001002003, 9001002003004005006, -1E30, 1.5, TRUE ); - -SELECT * FROM sample_numtypes; - -DESCRIBE sample_numtypes; - diff --git a/src/databricks/sqlalchemy/create-table.sql b/src/databricks/sqlalchemy/create-table.sql deleted file mode 100644 index 76cbdee32..000000000 --- a/src/databricks/sqlalchemy/create-table.sql +++ /dev/null @@ -1,50 +0,0 @@ -/* table/data for sample app */ - -USE george_chow_dbtest; - -DROP TABLE IF EXISTS sample_numtypes; - -CREATE TABLE IF NOT EXISTS sample_numtypes - ( - f_byte BYTE, - f_short SHORT, - f_int INT, - f_long LONG, - f_float FLOAT, - f_double DOUBLE, - f_decimal DECIMAL(10,2), - f_boolean BOOLEAN - ); - -INSERT INTO sample_numtypes VALUES - ( 125, 32700, 2001002003, 9001002003004005006, 1E30, 1E308, 1.5, TRUE ), - ( -125, -32700, -2001002003, -9001002003004005006, 1E-30, 1E-308, -1.5, FALSE ), - ( 125, 32700, 2001002003, 9001002003004005006, -1E30, -1E308, 1.5, TRUE ); - -SELECT * FROM sample_numtypes; - -DESCRIBE sample_numtypes; - - -DROP TABLE IF EXISTS sample_strtypes; - -CREATE TABLE sample_strtypes - ( - f_string STRING, - f_date DATE, - f_timestamp TIMESTAMP - ); - -- f_interval INTERVAL DAY TO SECOND - -INSERT INTO sample_strtypes VALUES - ( 'Everest', '1953-05-29', '1953-05-29T11:30' ), - ( 'Mariana Trench', '1960-01-23', '1960-01-23T13:06' ), - ( 'Moon landing', '1969-07-20', '1969-07-20T20:17' ); - --- ( 'Everest', '1953-05-29', '1953-05-29T11:30', '3 0:0:0' ), --- ( 'Mariana Trench', '1960-01-23', '1960-01-23T13:06', '0 5:00:00' ), --- ( 'Moon landing', '1969-07-20', '1969-07-20T20:17', '0 21:36:0' ); - -SELECT * FROM sample_strtypes; - -DESCRIBE sample_strtypes; diff --git a/src/databricks/sqlalchemy/describe-table.sql b/src/databricks/sqlalchemy/describe-table.sql deleted file mode 100644 index ba36ffb1a..000000000 --- a/src/databricks/sqlalchemy/describe-table.sql +++ /dev/null @@ -1,6 +0,0 @@ -/* table/data for sample app */ - -USE george_chow_dbtest; - -DESCRIBE sample_numtypes; - diff --git a/src/databricks/sqlalchemy/drop-table.sql b/src/databricks/sqlalchemy/drop-table.sql deleted file mode 100644 index 0e419e3d4..000000000 --- a/src/databricks/sqlalchemy/drop-table.sql +++ /dev/null @@ -1,5 +0,0 @@ -/* table/data for sample app */ - -USE george_chow_dbtest; - -DROP TABLE IF EXISTS sample_numtypes; diff --git a/src/databricks/sqlalchemy/requirements.py b/src/databricks/sqlalchemy/requirements.py deleted file mode 100644 index 6ce986887..000000000 --- a/src/databricks/sqlalchemy/requirements.py +++ /dev/null @@ -1,18 +0,0 @@ -# Following official SQLAlchemy guide: -# -# https://github.com/sqlalchemy/sqlalchemy/blob/main/README.dialects.rst#dialect-layout -# -# The full group of requirements is available here: -# -# https://github.com/sqlalchemy/sqlalchemy/blob/a453256afc334acabee25ec275de555ef7287144/test/requirements.py - - -from sqlalchemy.testing.requirements import SuiteRequirements -from sqlalchemy.testing import exclusions - -class Requirements(SuiteRequirements): - - @property - def two_phase_transactions(self): - # Databricks SQL doesn't support transactions - return exclusions.closed() diff --git a/src/databricks/sqlalchemy/sample-app-insert.py b/src/databricks/sqlalchemy/sample-app-insert.py deleted file mode 100644 index fcb3acc9f..000000000 --- a/src/databricks/sqlalchemy/sample-app-insert.py +++ /dev/null @@ -1,77 +0,0 @@ -# sample-app-insert.py -# -# Program to demonstrate the simplest INSERT statement -# - -import os -import random - -from sqlalchemy import create_engine -from sqlalchemy import MetaData -from sqlalchemy import select, insert, Table, Column -from sqlalchemy import SMALLINT, Integer, BigInteger, Float, DECIMAL, BOOLEAN - -from sqlalchemy.dialects.mysql.types import TINYINT, DOUBLE # borrow MySQL's impls -from sqlalchemy import String, DATE, TIMESTAMP - -# pickup settings from the env -server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME") -http_path = os.getenv("DATABRICKS_HTTP_PATH") -access_token = os.getenv("DATABRICKS_TOKEN") -default_schema = os.getenv("DATABRICKS_SCHEMA") - -# use echo=True for verbose log -engine = create_engine(f"databricks+thrift://token:{access_token}@{server_hostname}/{default_schema}?http_path={http_path}", echo=False, future=True) - -metadata_obj = MetaData() - -# NB: sample_numtypes is a pre-created/populated table -numtypes = "sample_numtypes" - -t1 = Table( - numtypes, - metadata_obj, - Column('f_byte', TINYINT), - Column('f_short', SMALLINT), - Column('f_int', Integer), - Column('f_long', BigInteger), - Column('f_float', Float), - Column('f_double', DOUBLE), - Column('f_decimal', DECIMAL), - Column('f_boolean', BOOLEAN) -) - -with engine.connect() as conn: - stmt = insert(t1).values(f_byte=42, - f_short=31415, - f_int=random.randint(1,1001002003), - f_long=4001002003004005006, - f_float=1.41, - f_double=1.6666, - f_decimal=2.71828, - f_boolean=False) - - print(f"Attempting to execute: {stmt}\n") - - print(f"Rows from table {numtypes}") - for row in conn.execute(stmt): - print(row) - - -# NB: sample_strtypes is a pre-created/populated table -strtypes = "sample_strtypes" - -with engine.connect() as conn: - t2 = Table( - strtypes, - metadata_obj, - autoload_with=conn - ) - - # stmt = insert(t2).values(f_string='Antarctic expedition', f_date='1911-12-14', f_timestamp='1911-12-14T15:00', f_interval='4 0:00:00' ) - stmt = insert(t2).values(f_string='Antarctic expedition', f_date='1911-12-14', f_timestamp='1911-12-14T15:00') - print(f"Attempting to execute: {stmt}\n") - - print(f"Rows from table {strtypes}") - for row in conn.execute(stmt): - print(row) diff --git a/src/databricks/sqlalchemy/sample-app-reflection.py b/src/databricks/sqlalchemy/sample-app-reflection.py deleted file mode 100644 index 2072d25e2..000000000 --- a/src/databricks/sqlalchemy/sample-app-reflection.py +++ /dev/null @@ -1,38 +0,0 @@ -# sample-app-reflection.py -# -# Program to demonstrate use of reflection instead of explicit declaration -# - -import os - -from sqlalchemy import create_engine -from sqlalchemy import MetaData -from sqlalchemy import Table, Column, Integer, BigInteger, Float, Boolean -from sqlalchemy import select - -# pickup settings from the env -server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME") -http_path = os.getenv("DATABRICKS_HTTP_PATH") -access_token = os.getenv("DATABRICKS_TOKEN") -default_schema = os.getenv("DATABRICKS_SCHEMA") - -# use echo=True for verbose log -with create_engine(f"databricks+thrift://token:{access_token}@{server_hostname}/{default_schema}?http_path={http_path}", echo=False, future=True).connect() as conn: - metadata_obj = MetaData() - - # NB: sample_numtypes is a pre-created/populated table - tableName = "sample_numtypes" - - # use reflection here to discover the schema dynamically - t = Table( - "sample_numtypes", metadata_obj, autoload_with=conn - ) - - # SELECT * FROM t WHERE f_byte = -125 - stmt = select(t).where(t.c.f_byte == -125) - print(f"Attempting to execute: {stmt}\n") - - print(f"Rows from table {tableName}") - - for row in conn.execute(stmt): - print(row) diff --git a/src/databricks/sqlalchemy/sample-app-select.py b/src/databricks/sqlalchemy/sample-app-select.py deleted file mode 100644 index 3986a02c3..000000000 --- a/src/databricks/sqlalchemy/sample-app-select.py +++ /dev/null @@ -1,76 +0,0 @@ -# sample-app-select.py -# -# Program to demonstrate the simplest SELECT statement -# - -import os - -from sqlalchemy import create_engine -from sqlalchemy import MetaData -from sqlalchemy import select, Table, Column -from sqlalchemy import SMALLINT, Integer, BigInteger, Float, DECIMAL, BOOLEAN -from sqlalchemy.dialects.mysql.types import TINYINT, DOUBLE # borrow MySQL's impls -from sqlalchemy import String, DATE, TIMESTAMP - -# pickup settings from the env -server_hostname = os.getenv("DATABRICKS_SERVER_HOSTNAME") -http_path = os.getenv("DATABRICKS_HTTP_PATH") -access_token = os.getenv("DATABRICKS_TOKEN") -default_schema = os.getenv("DATABRICKS_SCHEMA") - -# provide a way to break in -debugbreakpoint = os.getenv("DATABRICKS_DIALECT_DEBUG") or False - -# use echo=True for verbose log -engine = create_engine(f"databricks+thrift://token:{access_token}@{server_hostname}/{default_schema}?http_path={http_path}", echo=False, future=True) - -metadata_obj = MetaData() - -# NB: sample_numtypes is a pre-created/populated table -numtypes = "sample_numtypes" - -t1 = Table( - numtypes, - metadata_obj, - Column('f_byte', TINYINT), - Column('f_short', SMALLINT), - Column('f_int', Integer), - Column('f_long', BigInteger), - Column('f_float', Float), - Column('f_double', DOUBLE), - Column('f_decimal', DECIMAL), - Column('f_boolean', BOOLEAN) -) - -# SELECT * FROM t WHERE f_byte = -125 -stmt = select(t1).where(t1.c.f_byte == -125) -print(f"Attempting to execute: {stmt}\n") - -print(f"Rows from table {numtypes}") - -with engine.connect() as conn: - for row in conn.execute(stmt): - print(row) - - -# NB: sample_strtypes is a pre-created/populated table -strtypes = "sample_strtypes" - -with engine.connect() as conn: - t2 = Table( - strtypes, - metadata_obj, - autoload_with=conn - ) - - # SELECT * FROM t - stmt = select(t2) - print(f"Attempting to execute: {stmt}\n") - - print(f"Rows from table {strtypes}") - if debugbreakpoint: - breakpoint() - for row in conn.execute(stmt): - if debugbreakpoint: - breakpoint() - print(row) diff --git a/src/databricks/sqlalchemy/select-table.sql b/src/databricks/sqlalchemy/select-table.sql deleted file mode 100644 index d3773ef63..000000000 --- a/src/databricks/sqlalchemy/select-table.sql +++ /dev/null @@ -1,7 +0,0 @@ -/* table/data for sample app */ - -USE george_chow_dbtest; - -SELECT * FROM sample_numtypes LIMIT 10; - - From 1fcdfc24beabf2dc11d97b548886fe4133da6cb4 Mon Sep 17 00:00:00 2001 From: George Chow Date: Wed, 10 Aug 2022 15:25:17 -0700 Subject: [PATCH 28/30] missed the other Makefile Signed-off-by: George Chow --- Makefile | 161 ------------------------------------------------------- 1 file changed, 161 deletions(-) delete mode 100644 Makefile diff --git a/Makefile b/Makefile deleted file mode 100644 index a3f33a5ed..000000000 --- a/Makefile +++ /dev/null @@ -1,161 +0,0 @@ -# -# Clearinghouse for initiating adhoc PyTest runs to test the Databricks SQLAlchemy dialect -# -# NB: At present, the database fixtures aren't being cleaned up after each run. -# Use the clean pseudo-targets to remove these after your run. -# -# NB2: the target system (the parameter to --dburi) is specified via environment variables. -# See env.template. - - -DBSCLI=dbsqlcli -PYTEST=poetry run python3 -m pytest - -SUITE_PATH=tests/sqlalchemy - -SUITE=test_suite.py - -REQ=--requirements src.databricks.sqlalchemy.requirements:Requirements -DBURI=--dburi "databricks+thrift://token:$(DATABRICKS_TOKEN)@$(DATABRICKS_SERVER_HOSTNAME)/$(DATABRICKS_SCHEMA)?http_path=$(DATABRICKS_HTTP_PATH)" - -.PHONY=all clean showtables full reflection simple str num drop_simpletest drop_reflectiontest - - -all: full - -clean: drop_simpletest drop_reflectiontest \ - drop_booleantest drop_datetest drop_datetimetest drop_integertest drop_numerictest drop_stringtest drop_tableddl - -showtables: - $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); show tables;" - $(DBSCLI) -e "USE test_schema; show tables;" - $(DBSCLI) -e "USE test_schema_2; show tables;" - -full: - $(PYTEST) $(SUITE_PATH) \ - $(DBURI) \ - --log-file=~/.pytestlogs/full.log - -sa-bool: drop_booleantest drop_t - $(PYTEST) $(SUITE_PATH)/test_full_sa.py::BooleanTest \ - $(DBURI) - -sa-date: drop_datetest - $(PYTEST) $(SUITE_PATH)/test_full_sa.py::DateTest \ - $(DBURI) - -sa-dt: drop_datetimetest - $(PYTEST) $(SUITE_PATH)/test_full_sa.py::DateTimeTest \ - $(DBURI) - -sa-int: drop_integertest - $(PYTEST) $(SUITE_PATH)/test_full_sa.py::IntegerTest \ - $(DBURI) - -sa-num: drop_numerictest - $(PYTEST) $(SUITE_PATH)/test_full_sa.py::NumericTest \ - $(DBURI) - -sa-str: drop_stringtest - $(PYTEST) $(SUITE_PATH)/test_full_sa.py::StringTest \ - $(DBURI) - -sa-ddl: drop_tableddl - $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest \ - $(REQ) \ - $(DBURI) - -sa-ddl1: drop_tableddl - $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest:test_create_table \ - $(DBURI) - -sa-ddl2: drop_tableddl - $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest:test_create_table_schema \ - $(DBURI) - -sa-ddl3: drop_tableddl - $(PYTEST) $(SUITE_PATH)/test_full_sa.py::TableDDLTest:test_drop_table \ - $(DBURI) - -sa-join: drop_jointest - $(PYTEST) $(SUITE_PATH)/test_full_sa.py::JoinTest \ - $(DBURI) - -reflection: - $(PYTEST) $(SUITE_PATH)/$(SUITE)::ReflectionTest \ - $(DBURI) - -num: - $(PYTEST) $(SUITE_PATH)/$(SUITE)::ReflectionTest::test_numtypes \ - $(DBURI) - -str: - $(PYTEST) $(SUITE_PATH)/$(SUITE)::ReflectionTest::test_strtypes \ - $(DBURI) - -simple: - $(PYTEST) $(SUITE_PATH)/$(SUITE)::SimpleTest \ - $(DBURI) - -# clean up after SimpleTest run -drop_simpletest: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS simpletest_num;" - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS simpletest_str;" - -# clean up after ReflectionTest run -drop_reflectiontest: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS reflectiontest_all_num_types;" - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS reflectiontest_all_str_types;" - -# clean up after SQLAlchemy test suite - -drop_booleantest: drop_boolean_table - -drop_datetest: drop_date_table - -drop_datetimetest: drop_date_table - -drop_integertest: drop_t drop_tabletest drop_integer_table - -drop_numerictest: drop_t drop_tabletest - -drop_stringtest: drop_t drop_boolean_table - -drop_tableddl: drop__test_table drop_test_table - -drop_jointest: drop_a drop_b - - -drop_t: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS t;" - -drop_tabletest: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS tabletest;" - -drop_boolean_table: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS boolean_table;" - -drop__test_table: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS _test_table;" - -drop_test_table: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS test_table;" - -drop_a: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS a;" - -drop_b: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS b;" - -drop_date_table: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS date_table;" - -drop_integer_table: - echo y | $(DBSCLI) -e "USE $(DATABRICKS_SCHEMA); DROP TABLE IF EXISTS integer_table;" - - -# these two schemas are baked into SQLAlchemy's test suite -satestdb: - $(DBSCLI) -e "CREATE DATABASE test_schema;" - $(DBSCLI) -e "CREATE DATABASE test_schema_2;" - From 488b94da05546d1bda42de7147bab1aa252e4f6c Mon Sep 17 00:00:00 2001 From: George Chow Date: Wed, 10 Aug 2022 15:32:00 -0700 Subject: [PATCH 29/30] revert unneeded changes; remove dead template Signed-off-by: George Chow --- env.template | 11 ----------- poetry.lock | 35 +---------------------------------- pyproject.toml | 1 - 3 files changed, 1 insertion(+), 46 deletions(-) delete mode 100644 env.template diff --git a/env.template b/env.template deleted file mode 100644 index 44a7fffa7..000000000 --- a/env.template +++ /dev/null @@ -1,11 +0,0 @@ - -#!/usr/bin/env bash - -export DATABRICKS_SERVER_HOSTNAME=your-host-name -export DATABRICKS_HTTP_PATH=your-path -export DATABRICKS_TOKEN=your-token -export DATABRICKS_SCHEMA=default-or-something-else - -# hackey dynamic breakpoint; delete the following and/or unset the var if you do *not* want the breakpoint -export DATABRICKS_DIALECT_DEBUG=True -#unset DATABRICKS_DIALECT_DEBUG \ No newline at end of file diff --git a/poetry.lock b/poetry.lock index 1ce4fe65c..bc861926f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -63,14 +63,6 @@ category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" -[[package]] -name = "future" -version = "0.18.2" -description = "Clean single-source support for Python 3 and 2" -category = "main" -optional = false -python-versions = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" - [[package]] name = "greenlet" version = "1.1.2" @@ -228,25 +220,6 @@ python-versions = ">=3.6" [package.dependencies] numpy = ">=1.16.6" -[[package]] -name = "pyhive" -version = "0.6.5" -description = "Python interface to Hive" -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -future = "*" -python-dateutil = "*" - -[package.extras] -hive = ["sasl (>=0.2.1)", "thrift (>=0.10.0)", "thrift_sasl (>=0.1.0)"] -kerberos = ["requests_kerberos (>=0.12.0)"] -presto = ["requests (>=1.0.0)"] -sqlalchemy = ["sqlalchemy (>=1.3.0)"] -trino = ["requests (>=1.0.0)"] - [[package]] name = "pyparsing" version = "3.0.9" @@ -395,7 +368,7 @@ testing = ["pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest- [metadata] lock-version = "1.1" python-versions = "^3.7.1" -content-hash = "fd3612b808472d7c5ada95fde014214d9a1a25a7a457f6d2ee933a2ff7398894" +content-hash = "46fe2288362fc103abfdcd49c9dce356736b9ea6758d57b5d7fed173e2b9ceb5" [metadata.files] atomicwrites = [] @@ -430,9 +403,6 @@ colorama = [ {file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"}, {file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"}, ] -future = [ - {file = "future-0.18.2.tar.gz", hash = "sha256:b1bead90b70cf6ec3f0710ae53a525360fa360d306a86583adc6bf83a4db537d"}, -] greenlet = [] importlib-metadata = [ {file = "importlib_metadata-4.11.3-py3-none-any.whl", hash = "sha256:1208431ca90a8cca1a6b8af391bb53c1a2db74e5d1cef6ddced95d4b2062edc6"}, @@ -502,9 +472,6 @@ pyarrow = [ {file = "pyarrow-5.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:4d8adda1892ef4553c4804af7f67cce484f4d6371564e2d8374b8e2bc85293e2"}, {file = "pyarrow-5.0.0.tar.gz", hash = "sha256:24e64ea33eed07441cc0e80c949e3a1b48211a1add8953268391d250f4d39922"}, ] -pyhive = [ - {file = "PyHive-0.6.5.tar.gz", hash = "sha256:cae07bd177527d04f6a5c7f96cb1849ba8bd9121750b75bbf5e3d4a3be566909"}, -] pyparsing = [] pytest = [] python-dateutil = [ diff --git a/pyproject.toml b/pyproject.toml index 74defa731..97790a151 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,6 @@ python = "^3.7.1" thrift = "^0.13.0" pyarrow = "^5.0.0" pandas = "^1.3.0" -PyHive = "^0.6.5" [tool.poetry.plugins."sqlalchemy.dialects"] "databricks.thrift" = "databricks.sqlalchemy:DatabricksDialect" From 7ba1f2a9eec0b08545a7853d4c856451a6d3c5f2 Mon Sep 17 00:00:00 2001 From: George Chow Date: Fri, 12 Aug 2022 16:09:59 -0700 Subject: [PATCH 30/30] remove unneeded action; expand decimal test case to cover default and specified scales Signed-off-by: George Chow --- .github/workflows/sqlalchemy-dialect.yml | 178 ----------------------- tests/sqlalchemy/integration.py | 3 +- 2 files changed, 2 insertions(+), 179 deletions(-) delete mode 100644 .github/workflows/sqlalchemy-dialect.yml diff --git a/.github/workflows/sqlalchemy-dialect.yml b/.github/workflows/sqlalchemy-dialect.yml deleted file mode 100644 index 3a6aef814..000000000 --- a/.github/workflows/sqlalchemy-dialect.yml +++ /dev/null @@ -1,178 +0,0 @@ -name: SQLAlchemy dialect test -on: - push: - branches: - - sqlalchemy-dev - paths: - - Makefile - - src/databricks/sqlalchemy - - tests/sqlalchemy - - .github/workflows/sqlalchemy-dialect.yml -jobs: - run-tests: - runs-on: ubuntu-latest - steps: - - #---------------------------------------------- - # check-out repo and set-up python - #---------------------------------------------- - - name: Check out repository - uses: actions/checkout@v2 - - name: Set up python - id: setup-python - uses: actions/setup-python@v2 - with: - python-version: 3.7 - #---------------------------------------------- - # ----- install databricks-sql-cli ----- - #---------------------------------------------- - - name: Install databricks-sql-cli & initalize dbsqlclirc - run: | - python -m pip install databricks-sql-cli - dbsqlcli || true - - #---------------------------------------------- - # ----- install & configure poetry ----- - #---------------------------------------------- - - name: Install Poetry - uses: snok/install-poetry@v1 - with: - virtualenvs-create: true - virtualenvs-in-project: true - installer-parallel: true - - #---------------------------------------------- - # load cached venv if cache exists - #---------------------------------------------- - - name: Load cached venv - id: cached-poetry-dependencies - uses: actions/cache@v2 - with: - path: .venv - key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} - #---------------------------------------------- - # install dependencies if cache does not exist - #---------------------------------------------- - - name: Install dependencies - if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - run: poetry install --no-interaction --no-root - #---------------------------------------------- - # install your root project, if required - #---------------------------------------------- - - name: Install library - run: poetry install --no-interaction - #---------------------------------------------- - # run test suite - #---------------------------------------------- - - name: Run tests - #---------------------------------------------- - # import secrets from the Github env - #---------------------------------------------- - env: - DATABRICKS_SERVER_HOSTNAME: ${{ secrets.REPOSEC_DATABRICKS_SERVER_HOSTNAME }} - DATABRICKS_HTTP_PATH: ${{ secrets.REPOSEC_DATABRICKS_HTTP_PATH }} - DATABRICKS_TOKEN: ${{ secrets.REPOSEC_DATABRICKS_TOKEN }} - DATABRICKS_SCHEMA: ${{ secrets.REPOSEC_DATABRICKS_SCHEMA }} - - run: | - echo y | dbsqlcli --hostname $DATABRICKS_SERVER_HOSTNAME --http-path $DATABRICKS_HTTP_PATH --access-token $DATABRICKS_TOKEN -e "USE $DATABRICKS_SCHEMA; DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS tabletest; DROP TABLE IF EXISTS integer_table;" - poetry run pytest tests/sqlalchemy/test_full_sa.py::IntegerTest --dburi "databricks+thrift://token:$DATABRICKS_TOKEN@$DATABRICKS_SERVER_HOSTNAME/$DATABRICKS_SCHEMA?http_path=$DATABRICKS_HTTP_PATH" - echo y | dbsqlcli --hostname $DATABRICKS_SERVER_HOSTNAME --http-path $DATABRICKS_HTTP_PATH --access-token $DATABRICKS_TOKEN -e "USE $DATABRICKS_SCHEMA; DROP TABLE IF EXISTS t; DROP TABLE IF EXISTS tabletest; DROP TABLE IF EXISTS integer_table;" - - # check-linting: - # runs-on: ubuntu-latest - # steps: - # #---------------------------------------------- - # # check-out repo and set-up python - # #---------------------------------------------- - # - name: Check out repository - # uses: actions/checkout@v2 - # - name: Set up python - # id: setup-python - # uses: actions/setup-python@v2 - # with: - # python-version: 3.7 - # #---------------------------------------------- - # # ----- install & configure poetry ----- - # #---------------------------------------------- - # - name: Install Poetry - # uses: snok/install-poetry@v1 - # with: - # virtualenvs-create: true - # virtualenvs-in-project: true - # installer-parallel: true - - # #---------------------------------------------- - # # load cached venv if cache exists - # #---------------------------------------------- - # - name: Load cached venv - # id: cached-poetry-dependencies - # uses: actions/cache@v2 - # with: - # path: .venv - # key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} - # #---------------------------------------------- - # # install dependencies if cache does not exist - # #---------------------------------------------- - # - name: Install dependencies - # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - # run: poetry install --no-interaction --no-root - # #---------------------------------------------- - # # install your root project, if required - # #---------------------------------------------- - # - name: Install library - # run: poetry install --no-interaction - # #---------------------------------------------- - # # black the code - # #---------------------------------------------- - # - name: Black - # run: poetry run black --check src - - # check-types: - # runs-on: ubuntu-latest - # steps: - # #---------------------------------------------- - # # check-out repo and set-up python - # #---------------------------------------------- - # - name: Check out repository - # uses: actions/checkout@v2 - # - name: Set up python - # id: setup-python - # uses: actions/setup-python@v2 - # with: - # python-version: 3.7 - # #---------------------------------------------- - # # ----- install & configure poetry ----- - # #---------------------------------------------- - # - name: Install Poetry - # uses: snok/install-poetry@v1 - # with: - # virtualenvs-create: true - # virtualenvs-in-project: true - # installer-parallel: true - - # #---------------------------------------------- - # # load cached venv if cache exists - # #---------------------------------------------- - # - name: Load cached venv - # id: cached-poetry-dependencies - # uses: actions/cache@v2 - # with: - # path: .venv - # key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ github.event.repository.name }}-${{ hashFiles('**/poetry.lock') }} - # #---------------------------------------------- - # # install dependencies if cache does not exist - # #---------------------------------------------- - # - name: Install dependencies - # if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true' - # run: poetry install --no-interaction --no-root - # #---------------------------------------------- - # # install your root project, if required - # #---------------------------------------------- - # - name: Install library - # run: poetry install --no-interaction - # #---------------------------------------------- - # # black the code - # #---------------------------------------------- - # - name: Mypy - # run: poetry run mypy src \ No newline at end of file diff --git a/tests/sqlalchemy/integration.py b/tests/sqlalchemy/integration.py index 96e75063d..ee8bf6c47 100644 --- a/tests/sqlalchemy/integration.py +++ b/tests/sqlalchemy/integration.py @@ -78,7 +78,8 @@ def test_create_and_drop_table(db_engine): sqlalchemy.Column("f_int", sqlalchemy.types.Integer), sqlalchemy.Column("f_long", sqlalchemy.types.BigInteger), sqlalchemy.Column("f_float", sqlalchemy.types.Float), - sqlalchemy.Column("f_decimal", sqlalchemy.types.DECIMAL), + sqlalchemy.Column("f_decimal_def", sqlalchemy.types.DECIMAL), + sqlalchemy.Column("f_decimal_spec", sqlalchemy.types.DECIMAL(precision=10, scale=2)), sqlalchemy.Column("f_boolean", sqlalchemy.types.BOOLEAN), sqlalchemy.Column("f_str", sqlalchemy.types.String), )