diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 9a5eca89..f57f0277 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -28,7 +28,9 @@ jobs: strategy: fail-fast: false - language: [ python ] + matrix: + language: [ python ] + python-version: ['3.13'] steps: - name: Checkout @@ -37,14 +39,16 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: 3.11 - architecture: x64 - cache: 'pip' - cache-dependency-path: | - setup.py + python-version: ${{ matrix.python-version }} - - name: Install uv - uses: yezz123/setup-uv@v4 + - name: Set up uv + uses: astral-sh/setup-uv@v6 + with: + cache-dependency-glob: | + setup.py + cache-suffix: ${{ matrix.python-version }} + enable-cache: true + version: "latest" - name: Initialize CodeQL uses: github/codeql-action/init@v3 @@ -52,13 +56,32 @@ jobs: languages: ${{ matrix.language }} config-file: ./.github/codeql.yml queries: +security-and-quality + # run an 'alert-suppression' query + packs: "codeql/${{ matrix.language }}-queries:AlertSuppression.ql" #- name: Autobuild # uses: github/codeql-action/autobuild@v2 - name: Install project run: | - uv pip install --editable=.[test] + uv pip install --system '.[test]' - name: Perform CodeQL Analysis + id: analyze uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" + # define the output folder for SARIF files + output: sarif-results + + # Unlock inline mechanism to suppress CodeQL warnings. + # https://github.com/github/codeql/issues/11427#issuecomment-1721059096 + - name: Dismiss alerts + # if: github.ref == 'refs/heads/main' + uses: advanced-security/dismiss-alerts@v2 + with: + # specify a 'sarif-id' and 'sarif-file' + sarif-id: ${{ steps.analyze.outputs.sarif-id }} + sarif-file: sarif-results/${{ matrix.language }}.sarif + env: + GITHUB_TOKEN: ${{ github.token }} diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 74b1bdd1..95025896 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -14,7 +14,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: ['ubuntu-latest'] + os: ['ubuntu-22.04'] python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] cratedb-version: ['nightly'] @@ -25,15 +25,20 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} + + - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: 'setup.py' - - name: Install uv - uses: yezz123/setup-uv@v4 + - name: Set up uv + uses: astral-sh/setup-uv@v6 + with: + cache-dependency-glob: | + setup.py + cache-suffix: ${{ matrix.python-version }} + enable-cache: true + version: "latest" - name: Invoke tests run: | diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b00c58d7..eb561b91 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,7 +6,12 @@ on: push jobs: pypi: name: Build & publish package to pypi - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: ['ubuntu-latest'] + python-version: ['3.11'] if: startsWith(github.event.ref, 'refs/tags') steps: - uses: actions/checkout@v4 @@ -14,16 +19,20 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version: '3.11' - cache: 'pip' - cache-dependency-path: 'setup.py' + python-version: ${{ matrix.python-version }} - - name: Install uv - uses: yezz123/setup-uv@v4 + - name: Set up uv + uses: astral-sh/setup-uv@v6 + with: + cache-dependency-glob: | + setup.py + cache-suffix: ${{ matrix.python-version }} + enable-cache: true + version: "latest" - name: Build package run: | - uv pip install build twine wheel + uv pip install --system build twine wheel python -m build twine check dist/* diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b7b4f964..47cd7414 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -40,15 +40,20 @@ jobs: steps: - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} + + - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - cache: 'pip' - cache-dependency-path: setup.py - - name: Install uv - uses: yezz123/setup-uv@v4 + - name: Set up uv + uses: astral-sh/setup-uv@v6 + with: + cache-dependency-glob: | + setup.py + cache-suffix: ${{ matrix.python-version }} + enable-cache: true + version: "latest" - name: Invoke tests run: | @@ -73,7 +78,7 @@ jobs: # https://github.com/codecov/codecov-action - name: Upload coverage results to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: diff --git a/CHANGES.rst b/CHANGES.rst index 64141cc5..e9e73d94 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,35 @@ Changes for crate Unreleased ========== +2025/01/30 2.0.0 +================ + +- Switched JSON encoder to use the `orjson`_ library, to improve JSON + marshalling performance. Thanks, @widmogrod. + + orjson is fast and in some spots even more correct when compared against + Python's stdlib ``json`` module. Contrary to the stdlib variant, orjson + will serialize to ``bytes`` instead of ``str``. When sending data to CrateDB, + ``crate-python`` uses a custom encoder to add support for additional data + types. + + - Python's ``Decimal`` type will be serialized to ``str``. + - Python's ``dt.datetime`` and ``dt.date`` types will be serialized to + ``int`` (``LONG``) after converting to milliseconds since epoch, to + optimally accommodate CrateDB's `TIMESTAMP`_ representation. + - NumPy's data types will be handled by ``orjson`` without any ado. + +.. _orjson: https://github.com/ijl/orjson +.. _TIMESTAMP: https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp + +2024/11/23 1.0.1 +================ + +- Python: Fixed "implicit namespace packages" migration by omitting + ``__init__.py`` from ``crate`` namespace package, see `PEP 420`_ + and `Package Discovery and Namespace Package » Finding namespace packages`_. + + 2024/11/05 1.0.0 ================ @@ -44,6 +73,7 @@ Unreleased .. _Migrate from crate.client to sqlalchemy-cratedb: https://cratedb.com/docs/sqlalchemy-cratedb/migrate-from-crate-client.html +.. _Package Discovery and Namespace Package » Finding namespace packages: https://setuptools.pypa.io/en/latest/userguide/package_discovery.html#namespace-packages .. _PEP 420: https://peps.python.org/pep-0420/ .. _sqlalchemy-cratedb: https://pypi.org/project/sqlalchemy-cratedb/ diff --git a/DEVELOP.rst b/DEVELOP.rst index 85dfb6f7..2f39ede0 100644 --- a/DEVELOP.rst +++ b/DEVELOP.rst @@ -5,7 +5,7 @@ CrateDB Python developer guide Setup ===== -Optionally install Python package and project manager ``uv``, +Optionally install Python package and project manager `uv`_, in order to significantly speed up the package installation:: {apt,brew,pip,zypper} install uv @@ -67,16 +67,11 @@ To inspect the whole list of test cases, run:: bin/test --list-tests -You can run the tests against multiple Python interpreters with `tox`_:: - - tox - -To do this, you will need the respective Python interpreter versions available -on your ``$PATH``. - -To run against a single interpreter, you can also invoke:: - - tox -e py37 +The CI setup on GitHub Actions (GHA) provides a full test matrix covering +relevant Python versions. You can invoke the software tests against a specific +Python interpreter or multiple `Python versions`_ on your workstation using +`uv`_, by supplying the ``--python`` command-line option, or by defining the +`UV_PYTHON`_ environment variable prior to invoking ``source bootstrap.sh``. *Note*: Before running the tests, make sure to stop all CrateDB instances which are listening on the default CrateDB transport port to avoid side effects with @@ -168,12 +163,14 @@ nothing special you need to do to get the live docs to update. .. _@crate/docs: https://github.com/orgs/crate/teams/docs .. _buildout: https://pypi.python.org/pypi/zc.buildout .. _PyPI: https://pypi.python.org/pypi +.. _Python versions: https://docs.astral.sh/uv/concepts/python-versions/ .. _Read the Docs: http://readthedocs.org .. _ReStructuredText: http://docutils.sourceforge.net/rst.html .. _Sphinx: http://sphinx-doc.org/ .. _tests/assets/pki/*.pem: https://github.com/crate/crate-python/tree/main/tests/assets/pki -.. _tox: http://testrun.org/tox/latest/ .. _twine: https://pypi.python.org/pypi/twine .. _useful command-line options for zope-testrunner: https://pypi.org/project/zope.testrunner/#some-useful-command-line-options-to-get-you-started +.. _uv: https://docs.astral.sh/uv/ +.. _UV_PYTHON: https://docs.astral.sh/uv/configuration/environment/#uv_python .. _versions hosted on ReadTheDocs: https://readthedocs.org/projects/crate-python/versions/ .. _zope.testrunner: https://pypi.org/project/zope.testrunner/ diff --git a/docs/by-example/http.rst b/docs/by-example/http.rst index aacf3481..5afd3dee 100644 --- a/docs/by-example/http.rst +++ b/docs/by-example/http.rst @@ -228,7 +228,7 @@ When connecting to non-CrateDB servers, the HttpClient will raise a ConnectionEr >>> http_client.server_infos(http_client._get_server()) Traceback (most recent call last): ... - crate.client.exceptions.ProgrammingError: Invalid server response of content-type 'text/html; charset=UTF-8': + crate.client.exceptions.ProgrammingError: Invalid server response of content-type 'text/html': ... >>> http_client.close() diff --git a/docs/connect.rst b/docs/connect.rst index 944fe263..774f6746 100644 --- a/docs/connect.rst +++ b/docs/connect.rst @@ -10,7 +10,7 @@ Connect to CrateDB `Python Database API Specification v2.0`_ (PEP 249). For help using the `SQLAlchemy`_ dialect, consult the - :ref:`SQLAlchemy dialect documentation `. + :ref:`SQLAlchemy dialect documentation `. .. SEEALSO:: diff --git a/docs/index-all.rst b/docs/index-all.rst index 85a508e9..5d9244d5 100644 --- a/docs/index-all.rst +++ b/docs/index-all.rst @@ -16,7 +16,6 @@ CrateDB Python Client -- all pages connect query blobs - sqlalchemy data-types by-example/index other-options diff --git a/docs/index.rst b/docs/index.rst index 67415c94..353f8c69 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -99,7 +99,7 @@ please consult the :ref:`data-types` documentation page. Migration Notes =============== -The :ref:`CrateDB dialect ` for `SQLAlchemy`_ is provided +The :ref:`CrateDB dialect ` for `SQLAlchemy`_ is provided by the `sqlalchemy-cratedb`_ package. If you are migrating from previous versions of ``crate[sqlalchemy]<1.0.0``, you diff --git a/docs/query.rst b/docs/query.rst index 00da8170..eb948fc0 100644 --- a/docs/query.rst +++ b/docs/query.rst @@ -10,7 +10,7 @@ Query CrateDB `Python Database API Specification v2.0`_ (PEP 249). For help using the `SQLAlchemy`_ dialect, consult - :ref:`the SQLAlchemy dialect documentation `. + :ref:`the SQLAlchemy dialect documentation `. .. SEEALSO:: diff --git a/docs/sqlalchemy.rst b/docs/sqlalchemy.rst deleted file mode 100644 index caf5ca8d..00000000 --- a/docs/sqlalchemy.rst +++ /dev/null @@ -1,17 +0,0 @@ -.. _sqlalchemy-support: -.. _using-sqlalchemy: - -================== -SQLAlchemy support -================== - -`SQLAlchemy`_ is the most popular `Object-Relational Mapping`_ (ORM) library -for Python. - -The `SQLAlchemy`_ CrateDB dialect is provided by the `sqlalchemy-cratedb`_ -package. - - -.. _Object-Relational Mapping: https://en.wikipedia.org/wiki/Object-relational_mapping -.. _SQLAlchemy: https://www.sqlalchemy.org/ -.. _sqlalchemy-cratedb: https://github.com/crate-workbench/sqlalchemy-cratedb diff --git a/pyproject.toml b/pyproject.toml index 31717680..08b0d321 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,13 @@ lint.per-file-ignores."tests/*" = [ "S106", # Possible hardcoded password assigned to argument: "password" "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes ] +lint.per-file-ignores."src/crate/client/{connection.py,http.py}" = [ + "A004", # Import `ConnectionError` is shadowing a Python builtin + "A005", # Import `ConnectionError` is shadowing a Python builtin +] +lint.per-file-ignores."tests/client/test_http.py" = [ + "A004", # Import `ConnectionError` is shadowing a Python builtin +] # =================== diff --git a/requirements.txt b/requirements.txt index f8de725a..8935d351 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ +setuptools<80.3 +urllib3<2.4 zc.buildout==3.3 zope.interface==6.4.post2 diff --git a/setup.py b/setup.py index ccece82e..386b3c35 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ def read(path): packages=find_namespace_packages("src"), package_dir={"": "src"}, install_requires=[ + "orjson<4", "urllib3", "verlib2", ], @@ -66,15 +67,14 @@ def read(path): 'backports.zoneinfo<1; python_version<"3.9"', "certifi", "createcoverage>=1,<2", - "mypy<1.14", - "poethepoet<0.30", - "ruff<0.8", + "mypy<1.16", + "poethepoet<1", + "ruff<0.12", "stopit>=1.1.2,<2", - "tox>=3,<5", "pytz", "zc.customdoctests>=1.0.1,<2", "zope.testing>=4,<6", - "zope.testrunner>=5,<7", + "zope.testrunner>=5,<8", ], }, python_requires=">=3.6", diff --git a/src/crate/__init__.py b/src/crate/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/crate/client/__init__.py b/src/crate/client/__init__.py index 35a53d6e..ac58fb77 100644 --- a/src/crate/client/__init__.py +++ b/src/crate/client/__init__.py @@ -29,8 +29,9 @@ # version string read from setup.py using a regex. Take care not to break the # regex! -__version__ = "1.0.0" +__version__ = "2.0.0" +# codeql[py/unused-global-variable] apilevel = "2.0" threadsafety = 1 paramstyle = "qmark" diff --git a/src/crate/client/http.py b/src/crate/client/http.py index d9a0598f..a1251d34 100644 --- a/src/crate/client/http.py +++ b/src/crate/client/http.py @@ -21,22 +21,22 @@ import calendar +import datetime as dt import heapq import io -import json import logging import os import re import socket import ssl import threading +import typing as t from base64 import b64encode -from datetime import date, datetime, timezone from decimal import Decimal from time import time from urllib.parse import urlparse -from uuid import UUID +import orjson import urllib3 from urllib3 import connection_from_url from urllib3.connection import HTTPConnection @@ -86,25 +86,53 @@ def super_len(o): return None -class CrateJsonEncoder(json.JSONEncoder): - epoch_aware = datetime(1970, 1, 1, tzinfo=timezone.utc) - epoch_naive = datetime(1970, 1, 1) - - def default(self, o): - if isinstance(o, (Decimal, UUID)): - return str(o) - if isinstance(o, datetime): - if o.tzinfo is not None: - delta = o - self.epoch_aware - else: - delta = o - self.epoch_naive - return int( - delta.microseconds / 1000.0 - + (delta.seconds + delta.days * 24 * 3600) * 1000.0 - ) - if isinstance(o, date): - return calendar.timegm(o.timetuple()) * 1000 - return json.JSONEncoder.default(self, o) +epoch_aware = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc) +epoch_naive = dt.datetime(1970, 1, 1) + + +def json_encoder(obj: t.Any) -> t.Union[int, str]: + """ + Encoder function for orjson, with additional type support. + + - Python's `Decimal` type will be serialized to `str`. + - Python's `dt.datetime` and `dt.date` types will be + serialized to `int` after converting to milliseconds + since epoch. + + https://github.com/ijl/orjson#default + https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp + """ + if isinstance(obj, Decimal): + return str(obj) + if isinstance(obj, dt.datetime): + if obj.tzinfo is not None: + delta = obj - epoch_aware + else: + delta = obj - epoch_naive + return int( + delta.microseconds / 1000.0 + + (delta.seconds + delta.days * 24 * 3600) * 1000.0 + ) + if isinstance(obj, dt.date): + return calendar.timegm(obj.timetuple()) * 1000 + raise TypeError + + +def json_dumps(obj: t.Any) -> bytes: + """ + Serialize to JSON format, using `orjson`, with additional type support. + + https://github.com/ijl/orjson + """ + return orjson.dumps( + obj, + default=json_encoder, + option=( + orjson.OPT_PASSTHROUGH_DATETIME + | orjson.OPT_NON_STR_KEYS + | orjson.OPT_SERIALIZE_NUMPY + ), + ) class Server: @@ -180,7 +208,7 @@ def close(self): def _json_from_response(response): try: - return json.loads(response.data.decode("utf-8")) + return orjson.loads(response.data) except ValueError as ex: raise ProgrammingError( "Invalid server response of content-type '{}':\n{}".format( @@ -223,7 +251,7 @@ def _raise_for_status_real(response): if response.status == 503: raise ConnectionError(message) if response.headers.get("content-type", "").startswith("application/json"): - data = json.loads(response.data.decode("utf-8")) + data = orjson.loads(response.data) error = data.get("error", {}) error_trace = data.get("error_trace", None) if "results" in data: @@ -323,7 +351,7 @@ def _update_pool_kwargs_for_ssl_minimum_version(server, kwargs): kwargs["ssl_minimum_version"] = ssl.TLSVersion.MINIMUM_SUPPORTED -def _create_sql_payload(stmt, args, bulk_args): +def _create_sql_payload(stmt, args, bulk_args) -> bytes: if not isinstance(stmt, str): raise ValueError("stmt is not a string") if args and bulk_args: @@ -334,7 +362,7 @@ def _create_sql_payload(stmt, args, bulk_args): data["args"] = args if bulk_args: data["bulk_args"] = bulk_args - return json.dumps(data, cls=CrateJsonEncoder) + return json_dumps(data) def _get_socket_opts( @@ -670,7 +698,7 @@ def _drop_server(self, server, message): # if this is the last server raise exception, otherwise try next if not self._active_servers: raise ConnectionError( - ("No more Servers available, " "exception from last server: %s") + ("No more Servers available, exception from last server: %s") % message ) diff --git a/tests/client/test_http.py b/tests/client/test_http.py index 610197a8..c4c0609e 100644 --- a/tests/client/test_http.py +++ b/tests/client/test_http.py @@ -49,9 +49,9 @@ ) from crate.client.http import ( Client, - CrateJsonEncoder, _get_socket_opts, _remove_certs_for_non_https, + json_dumps, ) REQUEST = "crate.client.http.Server.request" @@ -724,10 +724,10 @@ def test_username(self): class TestCrateJsonEncoder(TestCase): def test_naive_datetime(self): data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123") - result = json.dumps(data, cls=CrateJsonEncoder) - self.assertEqual(result, "1687771440123") + result = json_dumps(data) + self.assertEqual(result, b"1687771440123") def test_aware_datetime(self): data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123+02:00") - result = json.dumps(data, cls=CrateJsonEncoder) - self.assertEqual(result, "1687764240123") + result = json_dumps(data) + self.assertEqual(result, b"1687764240123") diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 1ea931fa..00000000 --- a/tox.ini +++ /dev/null @@ -1,14 +0,0 @@ -[tox] -envlist = py{py3,35,36,37,38,39}-sa_{1_0,1_1,1_2,1_3,1_4} - -[testenv] -usedevelop = True -passenv = JAVA_HOME -deps = - zope.testrunner - zope.testing - zc.customdoctests - mock - urllib3 -commands = - zope-testrunner -c --path=tests