From 7665a0af16e6f33ae58f5688dcb95eb3c4419786 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 14:50:24 +0000 Subject: [PATCH 01/15] Update poethepoet requirement from <0.31 to <0.32 Updates the requirements on [poethepoet](https://github.com/nat-n/poethepoet) to permit the latest version. - [Release notes](https://github.com/nat-n/poethepoet/releases) - [Commits](https://github.com/nat-n/poethepoet/compare/v0.1.0...v0.31.0) --- updated-dependencies: - dependency-name: poethepoet dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index b5c87934..05ae6fe8 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ def read(path): "certifi", "createcoverage>=1,<2", "mypy<1.14", - "poethepoet<0.31", + "poethepoet<0.32", "ruff<0.8", "stopit>=1.1.2,<2", "tox>=3,<5", From 0e9873f57b8f84f31f376e9ada2df5f68fdec960 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Dec 2024 14:53:53 +0000 Subject: [PATCH 02/15] Bump codecov/codecov-action from 4 to 5 Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 4 to 5. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v4...v5) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6f9b0029..045ada7a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -75,7 +75,7 @@ jobs: # https://github.com/codecov/codecov-action - name: Upload coverage results to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} with: From d29f0cac3a7329607b8c0556f85588735f95de41 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 1 Dec 2024 14:53:58 +0000 Subject: [PATCH 03/15] Bump astral-sh/setup-uv from 3 to 4 Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 3 to 4. - [Release notes](https://github.com/astral-sh/setup-uv/releases) - [Commits](https://github.com/astral-sh/setup-uv/compare/v3...v4) --- updated-dependencies: - dependency-name: astral-sh/setup-uv dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 2 +- .github/workflows/nightly.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/tests.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index ac07837e..84d277ce 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -45,7 +45,7 @@ jobs: setup.py - name: Install uv - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v4 with: version: "latest" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 1b9b74e2..d852e321 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -33,7 +33,7 @@ jobs: cache-dependency-path: 'setup.py' - name: Install uv - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v4 with: version: "latest" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index de847c46..8db1ab52 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,7 @@ jobs: cache-dependency-path: 'setup.py' - name: Install uv - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v4 with: version: "latest" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 045ada7a..818feba8 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -48,7 +48,7 @@ jobs: cache-dependency-path: setup.py - name: Install uv - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v4 with: version: "latest" From 11a391a9a92228d226295ef94a95b4c6526b32e6 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 13 Dec 2024 18:48:33 +0100 Subject: [PATCH 04/15] CI: Provide Python 3.7. EOL, it was removed from recent GHA runners --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index d852e321..7f9e05b6 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -14,7 +14,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: ['ubuntu-latest'] + os: ['ubuntu-22.04'] python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12', '3.13'] cratedb-version: ['nightly'] From 1b7048b9af553f251ae25249a590e41947b8dd74 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 23 Dec 2024 14:37:38 +0000 Subject: [PATCH 05/15] Chore(deps-dev): Update mypy requirement from <1.14 to <1.15 Updates the requirements on [mypy](https://github.com/python/mypy) to permit the latest version. - [Changelog](https://github.com/python/mypy/blob/master/CHANGELOG.md) - [Commits](https://github.com/python/mypy/compare/v0.1.0...v1.14.0) --- updated-dependencies: - dependency-name: mypy dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 05ae6fe8..27851cab 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,7 @@ def read(path): 'backports.zoneinfo<1; python_version<"3.9"', "certifi", "createcoverage>=1,<2", - "mypy<1.14", + "mypy<1.15", "poethepoet<0.32", "ruff<0.8", "stopit>=1.1.2,<2", From 026f5c34d90a45b34391b2bf5b43dc97f3acea1a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 30 Dec 2024 14:39:00 +0000 Subject: [PATCH 06/15] Chore(deps-dev): Update poethepoet requirement from <0.32 to <0.33 Updates the requirements on [poethepoet](https://github.com/nat-n/poethepoet) to permit the latest version. - [Release notes](https://github.com/nat-n/poethepoet/releases) - [Commits](https://github.com/nat-n/poethepoet/compare/v0.1.0...v0.32.0) --- updated-dependencies: - dependency-name: poethepoet dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 27851cab..e842cc03 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ def read(path): "certifi", "createcoverage>=1,<2", "mypy<1.15", - "poethepoet<0.32", + "poethepoet<0.33", "ruff<0.8", "stopit>=1.1.2,<2", "tox>=3,<5", From b4a2ed4f623e86c7508467c8de61fed7152cabcf Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Jan 2025 14:30:59 +0000 Subject: [PATCH 07/15] Chore(deps): Bump astral-sh/setup-uv from 4 to 5 Bumps [astral-sh/setup-uv](https://github.com/astral-sh/setup-uv) from 4 to 5. - [Release notes](https://github.com/astral-sh/setup-uv/releases) - [Commits](https://github.com/astral-sh/setup-uv/compare/v4...v5) --- updated-dependencies: - dependency-name: astral-sh/setup-uv dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 2 +- .github/workflows/nightly.yml | 2 +- .github/workflows/release.yml | 2 +- .github/workflows/tests.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 84d277ce..da5cb5cc 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -45,7 +45,7 @@ jobs: setup.py - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@v5 with: version: "latest" diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 7f9e05b6..cd5fa37f 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -33,7 +33,7 @@ jobs: cache-dependency-path: 'setup.py' - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@v5 with: version: "latest" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 8db1ab52..eca585dc 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -19,7 +19,7 @@ jobs: cache-dependency-path: 'setup.py' - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@v5 with: version: "latest" diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 818feba8..16ed304a 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -48,7 +48,7 @@ jobs: cache-dependency-path: setup.py - name: Install uv - uses: astral-sh/setup-uv@v4 + uses: astral-sh/setup-uv@v5 with: version: "latest" From c6892d5f57104177da1502941cceb73e815fc5ff Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 1 Jan 2025 14:31:02 +0000 Subject: [PATCH 08/15] Chore(deps): Bump advanced-security/dismiss-alerts from 1 to 2 Bumps [advanced-security/dismiss-alerts](https://github.com/advanced-security/dismiss-alerts) from 1 to 2. - [Commits](https://github.com/advanced-security/dismiss-alerts/compare/v1...v2) --- updated-dependencies: - dependency-name: advanced-security/dismiss-alerts dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index da5cb5cc..6d0f4436 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -77,7 +77,7 @@ jobs: # https://github.com/github/codeql/issues/11427#issuecomment-1721059096 - name: Dismiss alerts # if: github.ref == 'refs/heads/main' - uses: advanced-security/dismiss-alerts@v1 + uses: advanced-security/dismiss-alerts@v2 with: # specify a 'sarif-id' and 'sarif-file' sarif-id: ${{ steps.analyze.outputs.sarif-id }} From 4820f8f0696f75500a82667aeb33876276f4f936 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 15 Jan 2025 23:37:35 +0100 Subject: [PATCH 09/15] Chore: Fix test case because https://example.org/ responds differently --- docs/by-example/http.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/by-example/http.rst b/docs/by-example/http.rst index aacf3481..5afd3dee 100644 --- a/docs/by-example/http.rst +++ b/docs/by-example/http.rst @@ -228,7 +228,7 @@ When connecting to non-CrateDB servers, the HttpClient will raise a ConnectionEr >>> http_client.server_infos(http_client._get_server()) Traceback (most recent call last): ... - crate.client.exceptions.ProgrammingError: Invalid server response of content-type 'text/html; charset=UTF-8': + crate.client.exceptions.ProgrammingError: Invalid server response of content-type 'text/html': ... >>> http_client.close() From 4d47a267097517e612f829e6c046116f20e80fdd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 13 Jan 2025 14:47:42 +0000 Subject: [PATCH 10/15] Chore(deps-dev): Update ruff requirement from <0.8 to <0.10 Updates the requirements on [ruff](https://github.com/astral-sh/ruff) to permit the latest version. - [Release notes](https://github.com/astral-sh/ruff/releases) - [Changelog](https://github.com/astral-sh/ruff/blob/main/CHANGELOG.md) - [Commits](https://github.com/astral-sh/ruff/compare/v0.0.18...0.9.1) --- updated-dependencies: - dependency-name: ruff dependency-type: direct:development ... Signed-off-by: dependabot[bot] --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e842cc03..4da8be94 100644 --- a/setup.py +++ b/setup.py @@ -68,7 +68,7 @@ def read(path): "createcoverage>=1,<2", "mypy<1.15", "poethepoet<0.33", - "ruff<0.8", + "ruff<0.10", "stopit>=1.1.2,<2", "tox>=3,<5", "pytz", From a2aae9bcb7c374ea3e54623ff73ddb2e8e3cde53 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 15 Jan 2025 22:54:21 +0100 Subject: [PATCH 11/15] Chore: Format code using Ruff 0.9 --- pyproject.toml | 7 +++++++ src/crate/client/http.py | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 31717680..08b0d321 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,13 @@ lint.per-file-ignores."tests/*" = [ "S106", # Possible hardcoded password assigned to argument: "password" "S311", # Standard pseudo-random generators are not suitable for cryptographic purposes ] +lint.per-file-ignores."src/crate/client/{connection.py,http.py}" = [ + "A004", # Import `ConnectionError` is shadowing a Python builtin + "A005", # Import `ConnectionError` is shadowing a Python builtin +] +lint.per-file-ignores."tests/client/test_http.py" = [ + "A004", # Import `ConnectionError` is shadowing a Python builtin +] # =================== diff --git a/src/crate/client/http.py b/src/crate/client/http.py index d9a0598f..e2c164d9 100644 --- a/src/crate/client/http.py +++ b/src/crate/client/http.py @@ -670,7 +670,7 @@ def _drop_server(self, server, message): # if this is the last server raise exception, otherwise try next if not self._active_servers: raise ConnectionError( - ("No more Servers available, " "exception from last server: %s") + ("No more Servers available, exception from last server: %s") % message ) From b44ca897cde9012bc3433d30ff5bcac5025ca8d6 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 23 Jan 2025 12:59:31 +0100 Subject: [PATCH 12/15] Remove tox, recommend uv tox has been popular in times when it was easier to run a full Python test matrix on your workstation, for example supported by `pyenv`, compared to provisioning and maintaining it on a Jenkins CI runner. Nowadays, it became so easy to run the full-version test matrix on GitHub Actions (GHA), so `tox` became obsolete in such environments that heavily use GHA. Now that recent advancements added `uv` to the table, sandbox environments don't need to feel ousted. --- DEVELOP.rst | 21 +++++++++------------ setup.py | 1 - tox.ini | 14 -------------- 3 files changed, 9 insertions(+), 27 deletions(-) delete mode 100644 tox.ini diff --git a/DEVELOP.rst b/DEVELOP.rst index 85dfb6f7..2f39ede0 100644 --- a/DEVELOP.rst +++ b/DEVELOP.rst @@ -5,7 +5,7 @@ CrateDB Python developer guide Setup ===== -Optionally install Python package and project manager ``uv``, +Optionally install Python package and project manager `uv`_, in order to significantly speed up the package installation:: {apt,brew,pip,zypper} install uv @@ -67,16 +67,11 @@ To inspect the whole list of test cases, run:: bin/test --list-tests -You can run the tests against multiple Python interpreters with `tox`_:: - - tox - -To do this, you will need the respective Python interpreter versions available -on your ``$PATH``. - -To run against a single interpreter, you can also invoke:: - - tox -e py37 +The CI setup on GitHub Actions (GHA) provides a full test matrix covering +relevant Python versions. You can invoke the software tests against a specific +Python interpreter or multiple `Python versions`_ on your workstation using +`uv`_, by supplying the ``--python`` command-line option, or by defining the +`UV_PYTHON`_ environment variable prior to invoking ``source bootstrap.sh``. *Note*: Before running the tests, make sure to stop all CrateDB instances which are listening on the default CrateDB transport port to avoid side effects with @@ -168,12 +163,14 @@ nothing special you need to do to get the live docs to update. .. _@crate/docs: https://github.com/orgs/crate/teams/docs .. _buildout: https://pypi.python.org/pypi/zc.buildout .. _PyPI: https://pypi.python.org/pypi +.. _Python versions: https://docs.astral.sh/uv/concepts/python-versions/ .. _Read the Docs: http://readthedocs.org .. _ReStructuredText: http://docutils.sourceforge.net/rst.html .. _Sphinx: http://sphinx-doc.org/ .. _tests/assets/pki/*.pem: https://github.com/crate/crate-python/tree/main/tests/assets/pki -.. _tox: http://testrun.org/tox/latest/ .. _twine: https://pypi.python.org/pypi/twine .. _useful command-line options for zope-testrunner: https://pypi.org/project/zope.testrunner/#some-useful-command-line-options-to-get-you-started +.. _uv: https://docs.astral.sh/uv/ +.. _UV_PYTHON: https://docs.astral.sh/uv/configuration/environment/#uv_python .. _versions hosted on ReadTheDocs: https://readthedocs.org/projects/crate-python/versions/ .. _zope.testrunner: https://pypi.org/project/zope.testrunner/ diff --git a/setup.py b/setup.py index 4da8be94..0892ed3a 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,6 @@ def read(path): "poethepoet<0.33", "ruff<0.10", "stopit>=1.1.2,<2", - "tox>=3,<5", "pytz", "zc.customdoctests>=1.0.1,<2", "zope.testing>=4,<6", diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 1ea931fa..00000000 --- a/tox.ini +++ /dev/null @@ -1,14 +0,0 @@ -[tox] -envlist = py{py3,35,36,37,38,39}-sa_{1_0,1_1,1_2,1_3,1_4} - -[testenv] -usedevelop = True -passenv = JAVA_HOME -deps = - zope.testrunner - zope.testing - zc.customdoctests - mock - urllib3 -commands = - zope-testrunner -c --path=tests From f53cfe6071ab5052fb305e7e2409bd0843afd769 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 15 Jan 2025 23:57:17 +0100 Subject: [PATCH 13/15] Marshalling: Use `orjson` to improve JSON serialization performance https://github.com/ijl/orjson --- CHANGES.rst | 11 +++++++ setup.py | 1 + src/crate/client/http.py | 60 +++++++++++++++++++++------------------ tests/client/test_http.py | 14 ++++----- 4 files changed, 52 insertions(+), 34 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 2cf9c9dd..25ecde6f 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,17 @@ Changes for crate Unreleased ========== +- Switched JSON encoder to use the `orjson`_ library, to improve JSON + marshalling performance. Thanks, @widmogrod. + orjson is fast and in some spots even more correct when compared against + Python's stdlib ``json`` module. Contrary to the stdlib variant, orjson + will serialize to ``bytes`` instead of ``str``. Please also note it + will not deserialize to dataclasses, UUIDs, decimals, etc., or support + ``object_hook``. Within ``crate-python``, it is applied with an encoder + function for additional type support about Python's ``Decimal`` type and + freezegun's ``FakeDatetime`` type. + +.. _orjson: https://github.com/ijl/orjson 2024/11/23 1.0.1 ================ diff --git a/setup.py b/setup.py index 0892ed3a..15591f88 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ def read(path): packages=find_namespace_packages("src"), package_dir={"": "src"}, install_requires=[ + "orjson<4", "urllib3", "verlib2", ], diff --git a/src/crate/client/http.py b/src/crate/client/http.py index e2c164d9..8d19b9c4 100644 --- a/src/crate/client/http.py +++ b/src/crate/client/http.py @@ -20,23 +20,21 @@ # software solely pursuant to the terms of the relevant commercial agreement. -import calendar import heapq import io -import json import logging import os import re import socket import ssl import threading +import typing as t from base64 import b64encode -from datetime import date, datetime, timezone from decimal import Decimal from time import time from urllib.parse import urlparse -from uuid import UUID +import orjson import urllib3 from urllib3 import connection_from_url from urllib3.connection import HTTPConnection @@ -86,25 +84,33 @@ def super_len(o): return None -class CrateJsonEncoder(json.JSONEncoder): - epoch_aware = datetime(1970, 1, 1, tzinfo=timezone.utc) - epoch_naive = datetime(1970, 1, 1) - - def default(self, o): - if isinstance(o, (Decimal, UUID)): - return str(o) - if isinstance(o, datetime): - if o.tzinfo is not None: - delta = o - self.epoch_aware - else: - delta = o - self.epoch_naive - return int( - delta.microseconds / 1000.0 - + (delta.seconds + delta.days * 24 * 3600) * 1000.0 - ) - if isinstance(o, date): - return calendar.timegm(o.timetuple()) * 1000 - return json.JSONEncoder.default(self, o) +def cratedb_json_encoder(obj: t.Any) -> str: + """ + Encoder function for orjson, with additional type support. + + - Python's `Decimal` type. + - freezegun's `FakeDatetime` type. + + https://github.com/ijl/orjson#default + """ + if isinstance(obj, Decimal): + return str(obj) + elif hasattr(obj, "isoformat"): + return obj.isoformat() + raise TypeError + + +def json_dumps(obj: t.Any) -> bytes: + """ + Serialize to JSON format, using `orjson`, with additional type support. + + https://github.com/ijl/orjson + """ + return orjson.dumps( + obj, + default=cratedb_json_encoder, + option=(orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY), + ) class Server: @@ -180,7 +186,7 @@ def close(self): def _json_from_response(response): try: - return json.loads(response.data.decode("utf-8")) + return orjson.loads(response.data) except ValueError as ex: raise ProgrammingError( "Invalid server response of content-type '{}':\n{}".format( @@ -223,7 +229,7 @@ def _raise_for_status_real(response): if response.status == 503: raise ConnectionError(message) if response.headers.get("content-type", "").startswith("application/json"): - data = json.loads(response.data.decode("utf-8")) + data = orjson.loads(response.data) error = data.get("error", {}) error_trace = data.get("error_trace", None) if "results" in data: @@ -323,7 +329,7 @@ def _update_pool_kwargs_for_ssl_minimum_version(server, kwargs): kwargs["ssl_minimum_version"] = ssl.TLSVersion.MINIMUM_SUPPORTED -def _create_sql_payload(stmt, args, bulk_args): +def _create_sql_payload(stmt, args, bulk_args) -> bytes: if not isinstance(stmt, str): raise ValueError("stmt is not a string") if args and bulk_args: @@ -334,7 +340,7 @@ def _create_sql_payload(stmt, args, bulk_args): data["args"] = args if bulk_args: data["bulk_args"] = bulk_args - return json.dumps(data, cls=CrateJsonEncoder) + return json_dumps(data) def _get_socket_opts( diff --git a/tests/client/test_http.py b/tests/client/test_http.py index 610197a8..554fbe5f 100644 --- a/tests/client/test_http.py +++ b/tests/client/test_http.py @@ -49,9 +49,9 @@ ) from crate.client.http import ( Client, - CrateJsonEncoder, _get_socket_opts, _remove_certs_for_non_https, + json_dumps, ) REQUEST = "crate.client.http.Server.request" @@ -318,7 +318,7 @@ def test_datetime_is_converted_to_ts(self, request): # convert string to dict # because the order of the keys isn't deterministic data = json.loads(request.call_args[1]["data"]) - self.assertEqual(data["args"], [1425108700000]) + self.assertEqual(data["args"], ["2015-02-28T07:31:40"]) client.close() @patch(REQUEST, autospec=True) @@ -329,7 +329,7 @@ def test_date_is_converted_to_ts(self, request): day = dt.date(2016, 4, 21) client.sql("insert into users (dt) values (?)", (day,)) data = json.loads(request.call_args[1]["data"]) - self.assertEqual(data["args"], [1461196800000]) + self.assertEqual(data["args"], ["2016-04-21"]) client.close() def test_socket_options_contain_keepalive(self): @@ -724,10 +724,10 @@ def test_username(self): class TestCrateJsonEncoder(TestCase): def test_naive_datetime(self): data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123") - result = json.dumps(data, cls=CrateJsonEncoder) - self.assertEqual(result, "1687771440123") + result = json_dumps(data) + self.assertEqual(result, b'"2023-06-26T09:24:00.123000"') def test_aware_datetime(self): data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123+02:00") - result = json.dumps(data, cls=CrateJsonEncoder) - self.assertEqual(result, "1687764240123") + result = json_dumps(data) + self.assertEqual(result, b'"2023-06-26T09:24:00.123000+02:00"') From 44cad01a26df16a5bccbbb7e5863b24f116d7293 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 17 Jan 2025 20:29:00 +0100 Subject: [PATCH 14/15] Marshalling: Restore CrateDB standard encoder --- CHANGES.rst | 16 +++++++++++----- src/crate/client/http.py | 36 +++++++++++++++++++++++++++++------- tests/client/test_http.py | 8 ++++---- 3 files changed, 44 insertions(+), 16 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 25ecde6f..50da0e85 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,15 +7,21 @@ Unreleased - Switched JSON encoder to use the `orjson`_ library, to improve JSON marshalling performance. Thanks, @widmogrod. + orjson is fast and in some spots even more correct when compared against Python's stdlib ``json`` module. Contrary to the stdlib variant, orjson - will serialize to ``bytes`` instead of ``str``. Please also note it - will not deserialize to dataclasses, UUIDs, decimals, etc., or support - ``object_hook``. Within ``crate-python``, it is applied with an encoder - function for additional type support about Python's ``Decimal`` type and - freezegun's ``FakeDatetime`` type. + will serialize to ``bytes`` instead of ``str``. When sending data to CrateDB, + ``crate-python`` uses a custom encoder to add support for additional data + types. + + - Python's ``Decimal`` type will be serialized to ``str``. + - Python's ``dt.datetime`` and ``dt.date`` types will be serialized to + ``int`` (``LONG``) after converting to milliseconds since epoch, to + optimally accommodate CrateDB's `TIMESTAMP`_ representation. + - NumPy's data types will be handled by ``orjson`` without any ado. .. _orjson: https://github.com/ijl/orjson +.. _TIMESTAMP: https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp 2024/11/23 1.0.1 ================ diff --git a/src/crate/client/http.py b/src/crate/client/http.py index 8d19b9c4..a1251d34 100644 --- a/src/crate/client/http.py +++ b/src/crate/client/http.py @@ -20,6 +20,8 @@ # software solely pursuant to the terms of the relevant commercial agreement. +import calendar +import datetime as dt import heapq import io import logging @@ -84,19 +86,35 @@ def super_len(o): return None -def cratedb_json_encoder(obj: t.Any) -> str: +epoch_aware = dt.datetime(1970, 1, 1, tzinfo=dt.timezone.utc) +epoch_naive = dt.datetime(1970, 1, 1) + + +def json_encoder(obj: t.Any) -> t.Union[int, str]: """ Encoder function for orjson, with additional type support. - - Python's `Decimal` type. - - freezegun's `FakeDatetime` type. + - Python's `Decimal` type will be serialized to `str`. + - Python's `dt.datetime` and `dt.date` types will be + serialized to `int` after converting to milliseconds + since epoch. https://github.com/ijl/orjson#default + https://cratedb.com/docs/crate/reference/en/latest/general/ddl/data-types.html#type-timestamp """ if isinstance(obj, Decimal): return str(obj) - elif hasattr(obj, "isoformat"): - return obj.isoformat() + if isinstance(obj, dt.datetime): + if obj.tzinfo is not None: + delta = obj - epoch_aware + else: + delta = obj - epoch_naive + return int( + delta.microseconds / 1000.0 + + (delta.seconds + delta.days * 24 * 3600) * 1000.0 + ) + if isinstance(obj, dt.date): + return calendar.timegm(obj.timetuple()) * 1000 raise TypeError @@ -108,8 +126,12 @@ def json_dumps(obj: t.Any) -> bytes: """ return orjson.dumps( obj, - default=cratedb_json_encoder, - option=(orjson.OPT_NON_STR_KEYS | orjson.OPT_SERIALIZE_NUMPY), + default=json_encoder, + option=( + orjson.OPT_PASSTHROUGH_DATETIME + | orjson.OPT_NON_STR_KEYS + | orjson.OPT_SERIALIZE_NUMPY + ), ) diff --git a/tests/client/test_http.py b/tests/client/test_http.py index 554fbe5f..c4c0609e 100644 --- a/tests/client/test_http.py +++ b/tests/client/test_http.py @@ -318,7 +318,7 @@ def test_datetime_is_converted_to_ts(self, request): # convert string to dict # because the order of the keys isn't deterministic data = json.loads(request.call_args[1]["data"]) - self.assertEqual(data["args"], ["2015-02-28T07:31:40"]) + self.assertEqual(data["args"], [1425108700000]) client.close() @patch(REQUEST, autospec=True) @@ -329,7 +329,7 @@ def test_date_is_converted_to_ts(self, request): day = dt.date(2016, 4, 21) client.sql("insert into users (dt) values (?)", (day,)) data = json.loads(request.call_args[1]["data"]) - self.assertEqual(data["args"], ["2016-04-21"]) + self.assertEqual(data["args"], [1461196800000]) client.close() def test_socket_options_contain_keepalive(self): @@ -725,9 +725,9 @@ class TestCrateJsonEncoder(TestCase): def test_naive_datetime(self): data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123") result = json_dumps(data) - self.assertEqual(result, b'"2023-06-26T09:24:00.123000"') + self.assertEqual(result, b"1687771440123") def test_aware_datetime(self): data = dt.datetime.fromisoformat("2023-06-26T09:24:00.123+02:00") result = json_dumps(data) - self.assertEqual(result, b'"2023-06-26T09:24:00.123000+02:00"') + self.assertEqual(result, b"1687764240123") From 6ec9995f3ede0f392c24ef712d7d2dadd8d99093 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 30 Jan 2025 23:42:48 +0100 Subject: [PATCH 15/15] Release 2.0.0 --- CHANGES.rst | 3 +++ src/crate/client/__init__.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 50da0e85..e9e73d94 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,9 @@ Changes for crate Unreleased ========== +2025/01/30 2.0.0 +================ + - Switched JSON encoder to use the `orjson`_ library, to improve JSON marshalling performance. Thanks, @widmogrod. diff --git a/src/crate/client/__init__.py b/src/crate/client/__init__.py index 2fec7abd..ac58fb77 100644 --- a/src/crate/client/__init__.py +++ b/src/crate/client/__init__.py @@ -29,7 +29,7 @@ # version string read from setup.py using a regex. Take care not to break the # regex! -__version__ = "1.0.1" +__version__ = "2.0.0" # codeql[py/unused-global-variable] apilevel = "2.0"