diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index ac07837e..6d0f4436 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -45,7 +45,7 @@ jobs: setup.py - name: Install uv - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v5 with: version: "latest" @@ -77,7 +77,7 @@ jobs: # https://github.com/github/codeql/issues/11427#issuecomment-1721059096 - name: Dismiss alerts # if: github.ref == 'refs/heads/main' - uses: advanced-security/dismiss-alerts@v1 + uses: advanced-security/dismiss-alerts@v2 with: # specify a 'sarif-id' and 'sarif-file' sarif-id: ${{ steps.analyze.outputs.sarif-id }} diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 1b9b74e2..cd5fa37f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -14,7 +14,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: ['ubuntu-latest'] + os: ['ubuntu-22.04'] python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] cratedb-version: ['nightly'] @@ -33,7 +33,7 @@ jobs: cache-dependency-path: 'setup.py' - name: Install uv - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v5 with: version: "latest" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index de847c46..eca585dc 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,7 @@ jobs: cache-dependency-path: 'setup.py' - name: Install uv - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v5 with: version: "latest" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6f9b0029..16ed304a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -48,7 +48,7 @@ jobs: cache-dependency-path: setup.py - name: Install uv - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v5 with: version: "latest" @@ -75,7 +75,7 @@ jobs: # https://github.com/codecov/codecov-action - name: Upload coverage results to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: diff --git a/CHANGES.rst b/CHANGES.rst index 2cf9c9dd..e9e73d94 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,26 @@ Changes for crate Unreleased ========== +2025/01/30 2.0.0 +================ + +- Switched JSON encoder to use the `orjson`_ library, to improve JSON + marshalling performance. Thanks, @widmogrod. + + orjson is fast and in some spots even more correct when compared against + Python's stdlib ``json`` module. Contrary to the stdlib variant, orjson + will serialize to ``bytes`` instead of ``str``. When sending data to CrateDB, + ``crate-python`` uses a custom encoder to add support for additional data + types. + + - Python's ``Decimal`` type will be serialized to ``str``. + - Python's ``dt.datetime`` and ``dt.date`` types will be serialized to + ``int`` (``LONG``) after converting to milliseconds since epoch, to + optimally accommodate CrateDB's `TIMESTAMP`_ representation. + - NumPy's data types will be handled by ``orjson`` without any ado. + +.. _orjson: https://github.com/ijl/orjson +.. _TIMESTAMP: https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp 2024/11/23 1.0.1 ================ diff --git a/DEVELOP.rst b/DEVELOP.rst index 85dfb6f7..2f39ede0 100644 --- a/DEVELOP.rst +++ b/DEVELOP.rst @@ -5,7 +5,7 @@ CrateDB Python developer guide Setup ===== -Optionally install Python package and project manager ``uv``, +Optionally install Python package and project manager `uv`_, in order to significantly speed up the package installation:: {apt,brew,pip,zypper} install uv @@ -67,16 +67,11 @@ To inspect the whole list of test cases, run:: bin/test --list-tests -You can run the tests against multiple Python interpreters with `tox`_:: - - tox - -To do this, you will need the respective Python interpreter versions available -on your ``$PATH``. - -To run against a single interpreter, you can also invoke:: - - tox -e py37 +The CI setup on GitHub Actions (GHA) provides a full test matrix covering +relevant Python versions. You can invoke the software tests against a specific +Python interpreter or multiple `Python versions`_ on your workstation using +`uv`_, by supplying the ``--python`` command-line option, or by defining the +`UV_PYTHON`_ environment variable prior to invoking ``source bootstrap.sh``. *Note*: Before running the tests, make sure to stop all CrateDB instances which are listening on the default CrateDB transport port to avoid side effects with @@ -168,12 +163,14 @@ nothing special you need to do to get the live docs to update. .. _@crate/docs: https://github.com/orgs/crate/teams/docs .. _buildout: https://pypi.python.org/pypi/zc.buildout .. _PyPI: https://pypi.python.org/pypi +.. _Python versions: https://docs.astral.sh/uv/concepts/python-versions/ .. _Read the Docs: http://readthedocs.org .. _ReStructuredText: http://docutils.sourceforge.net/rst.html .. _Sphinx: http://sphinx-doc.org/ .. _tests/assets/pki/*.pem: https://github.com/crate/crate-python/tree/main/tests/assets/pki -.. _tox: http://testrun.org/tox/latest/ .. _twine: https://pypi.python.org/pypi/twine .. _useful command-line options for zope-testrunner: https://pypi.org/project/zope.testrunner/#some-useful-command-line-options-to-get-you-started +.. _uv: https://docs.astral.sh/uv/ +.. _UV_PYTHON: https://docs.astral.sh/uv/configuration/environment/#uv_python .. _versions hosted on ReadTheDocs: https://readthedocs.org/projects/crate-python/versions/ .. _zope.testrunner: https://pypi.org/project/zope.testrunner/ diff --git a/docs/by-example/http.rst b/docs/by-example/http.rst index aacf3481..5afd3dee 100644 --- a/docs/by-example/http.rst +++ b/docs/by-example/http.rst @@ -228,7 +228,7 @@ When connecting to non-CrateDB servers, the HttpClient will raise a ConnectionEr >>> http_client.server_infos(http_client._get_server()) Traceback (most recent call last): ... - crate.client.exceptions.ProgrammingError: Invalid server response of content-type 'text/html; charset=UTF-8': + crate.client.exceptions.ProgrammingError: Invalid server response of content-type 'text/html': ... >>> http_client.close() diff --git a/pyproject.toml b/pyproject.toml index 31717680..08b0d321 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,13 @@ lint.per-file-ignores."tests/*" = [ "S106", # Possible hardcoded password assigned to argument: "password" "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes ] +lint.per-file-ignores."src/crate/client/{connection.py,http.py}" = [ + "A004", # Import `ConnectionError` is shadowing a Python builtin + "A005", # Import `ConnectionError` is shadowing a Python builtin +] +lint.per-file-ignores."tests/client/test_http.py" = [ + "A004", # Import `ConnectionError` is shadowing a Python builtin +] # =================== diff --git a/setup.py b/setup.py index b5c87934..15591f88 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ def read(path): packages=find_namespace_packages("src"), package_dir={"": "src"}, install_requires=[ + "orjson<4", "urllib3", "verlib2", ], @@ -66,11 +67,10 @@ def read(path): 'backports.zoneinfo<1; python_version<"3.9"', "certifi", "createcoverage>=1,<2", - "mypy<1.14", - "poethepoet<0.31", - "ruff<0.8", + "mypy<1.15", + "poethepoet<0.33", + "ruff<0.10", "stopit>=1.1.2,<2", - "tox>=3,<5", "pytz", "zc.customdoctests>=1.0.1,<2", "zope.testing>=4,<6", diff --git a/src/crate/client/__init__.py b/src/crate/client/__init__.py index 2fec7abd..ac58fb77 100644 --- a/src/crate/client/__init__.py +++ b/src/crate/client/__init__.py @@ -29,7 +29,7 @@ # version string read from setup.py using a regex. Take care not to break the # regex! -__version__ = "1.0.1" +__version__ = "2.0.0" # codeql[py/unused-global-variable] apilevel = "2.0" diff --git a/src/crate/client/http.py b/src/crate/client/http.py index d9a0598f..a1251d34 100644 --- a/src/crate/client/http.py +++ b/src/crate/client/http.py @@ -21,22 +21,22 @@ import calendar +import datetime as dt import heapq import io -import json import logging import os import re import socket import ssl import threading +import typing as t from base64 import b64encode -from datetime import date, datetime, timezone from decimal import Decimal from time import time from urllib.parse import urlparse -from uuid import UUID +import orjson import urllib3 from urllib3 import connection_from_url from urllib3.connection import HTTPConnection @@ -86,25 +86,53 @@ def super_len(o): return None -class CrateJsonEncoder(json.JSONEncoder): - epoch_aware = datetime(1970, 1, 1, tzinfo=timezone.utc) - epoch_naive = datetime(1970, 1, 1) - - def default(self, o): - if isinstance(o, (Decimal, UUID)): - return str(o) - if isinstance(o, datetime): - if o.tzinfo is not None: - delta = o - self.epoch_aware - else: - delta = o - self.epoch_naive - return int( - delta.microseconds / 1000.0 - + (delta.seconds + delta.days * 24 * 3600) * 1000.0 - ) - if isinstance(o, date): - return calendar.timegm(o.timetuple()) * 1000 - return json.JSONEncoder.default(self, o) +epoch_aware = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc) +epoch_naive = dt.datetime(1970, 1, 1) + + +def json_encoder(obj: t.Any) -> t.Union[int, str]: + """ + Encoder function for orjson, with additional type support. + + - Python's `Decimal` type will be serialized to `str`. + - Python's `dt.datetime` and `dt.date` types will be + serialized to `int` after converting to milliseconds + since epoch. + + https://github.com/ijl/orjson#default + https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp + """ + if isinstance(obj, Decimal): + return str(obj) + if isinstance(obj, dt.datetime): + if obj.tzinfo is not None: + delta = obj - epoch_aware + else: + delta = obj - epoch_naive + return int( + delta.microseconds / 1000.0 + + (delta.seconds + delta.days * 24 * 3600) * 1000.0 + ) + if isinstance(obj, dt.date): + return calendar.timegm(obj.timetuple()) * 1000 + raise TypeError + + +def json_dumps(obj: t.Any) -> bytes: + """ + Serialize to JSON format, using `orjson`, with additional type support. + + https://github.com/ijl/orjson + """ + return orjson.dumps( + obj, + default=json_encoder, + option=( + orjson.OPT_PASSTHROUGH_DATETIME + | orjson.OPT_NON_STR_KEYS + | orjson.OPT_SERIALIZE_NUMPY + ), + ) class Server: @@ -180,7 +208,7 @@ def close(self): def _json_from_response(response): try: - return json.loads(response.data.decode("utf-8")) + return orjson.loads(response.data) except ValueError as ex: raise ProgrammingError( "Invalid server response of content-type '{}':\n{}".format( @@ -223,7 +251,7 @@ def _raise_for_status_real(response): if response.status == 503: raise ConnectionError(message) if response.headers.get("content-type", "").startswith("application/json"): - data = json.loads(response.data.decode("utf-8")) + data = orjson.loads(response.data) error = data.get("error", {}) error_trace = data.get("error_trace", None) if "results" in data: @@ -323,7 +351,7 @@ def _update_pool_kwargs_for_ssl_minimum_version(server, kwargs): kwargs["ssl_minimum_version"] = ssl.TLSVersion.MINIMUM_SUPPORTED -def _create_sql_payload(stmt, args, bulk_args): +def _create_sql_payload(stmt, args, bulk_args) -> bytes: if not isinstance(stmt, str): raise ValueError("stmt is not a string") if args and bulk_args: @@ -334,7 +362,7 @@ def _create_sql_payload(stmt, args, bulk_args): data["args"] = args if bulk_args: data["bulk_args"] = bulk_args - return json.dumps(data, cls=CrateJsonEncoder) + return json_dumps(data) def _get_socket_opts( @@ -670,7 +698,7 @@ def _drop_server(self, server, message): # if this is the last server raise exception, otherwise try next if not self._active_servers: raise ConnectionError( - ("No more Servers available, " "exception from last server: %s") + ("No more Servers available, exception from last server: %s") % message ) diff --git a/tests/client/test_http.py b/tests/client/test_http.py index 610197a8..c4c0609e 100644 --- a/tests/client/test_http.py +++ b/tests/client/test_http.py @@ -49,9 +49,9 @@ ) from crate.client.http import ( Client, - CrateJsonEncoder, _get_socket_opts, _remove_certs_for_non_https, + json_dumps, ) REQUEST = "crate.client.http.Server.request" @@ -724,10 +724,10 @@ def test_username(self): class TestCrateJsonEncoder(TestCase): def test_naive_datetime(self): data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123") - result = json.dumps(data, cls=CrateJsonEncoder) - self.assertEqual(result, "1687771440123") + result = json_dumps(data) + self.assertEqual(result, b"1687771440123") def test_aware_datetime(self): data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123+02:00") - result = json.dumps(data, cls=CrateJsonEncoder) - self.assertEqual(result, "1687764240123") + result = json_dumps(data) + self.assertEqual(result, b"1687764240123") diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 1ea931fa..00000000 --- a/tox.ini +++ /dev/null @@ -1,14 +0,0 @@ -[tox] -envlist = py{py3,35,36,37,38,39}-sa_{1_0,1_1,1_2,1_3,1_4} - -[testenv] -usedevelop = True -passenv = JAVA_HOME -deps = - zope.testrunner - zope.testing - zc.customdoctests - mock - urllib3 -commands = - zope-testrunner -c --path=tests