diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ec98107e1..f8f794b50 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,7 +23,7 @@ permissions:
env:
ZLIB_VERSION: "1.3.1"
LIBICONV_VERSION: "1.18"
- LIBXML2_VERSION: "2.14.5"
+ LIBXML2_VERSION: "2.14.6"
LIBXSLT_VERSION: "1.1.43"
@@ -195,7 +195,7 @@ jobs:
fetch-depth: 1
- name: Setup Python
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: ${{ matrix.python-version }}
@@ -273,7 +273,7 @@ jobs:
key: ${{ runner.os }}-benchmarks-${{ env.LIBXML2_VERSION }}-${{ env.LIBXSLT_VERSION }}
- name: Setup Python
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: |
3.12
@@ -283,7 +283,7 @@ jobs:
run: |
# Run benchmarks in all Python versions.
for PYTHON in python3.14 python3.12 ; do
- ${PYTHON} -m pip install setuptools "Cython>=3.1.2"
+ ${PYTHON} -m pip install setuptools "Cython>=3.1.4"
# Compare against arbitrary 6.0-pre baseline revision (compatible with Cython 3.1) and current master.
${PYTHON} benchmark/run_benchmarks.py 0eb4f0029497957e58a9f15280b3529bdb18d117 origin/master HEAD
done
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index 220919449..e45aecba6 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -32,12 +32,16 @@ on:
- build*
workflow_dispatch:
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
+ cancel-in-progress: true
+
permissions: {}
env:
ZLIB_VERSION: "1.3.1"
LIBICONV_VERSION: "1.18"
- LIBXML2_VERSION: "2.14.5"
+ LIBXML2_VERSION: "2.14.6"
LIBXSLT_VERSION: "1.1.43"
@@ -83,7 +87,7 @@ jobs:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Set up Python
- uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
+ uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: "3.x"
@@ -170,15 +174,17 @@ jobs:
platforms: all
- name: Build wheels
- uses: pypa/cibuildwheel@v3.1.3
+ uses: pypa/cibuildwheel@v3.1.4
env:
GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
only: ${{ matrix.only }}
- name: Build old Linux wheels
- if: contains(matrix.only, '-manylinux_') && (contains(matrix.only, 'i686') || contains(matrix.only, 'x86_64') || contains(matrix.only, 'aarch64'))
- uses: pypa/cibuildwheel@v3.1.3
+ if: >-
+ contains(matrix.only, '-manylinux_') &&
+ (contains(matrix.only, 'i686') || contains(matrix.only, 'x86_64') || contains(matrix.only, 'aarch64'))
+ uses: pypa/cibuildwheel@v3.1.4
env:
CIBW_MANYLINUX_i686_IMAGE: manylinux2014
CIBW_MANYLINUX_X86_64_IMAGE: manylinux2014
@@ -188,8 +194,8 @@ jobs:
- name: Build faster Linux wheels
# also build wheels with the most recent manylinux images and gcc
- if: runner.os == 'Linux' && !contains(matrix.only, 'i686')
- uses: pypa/cibuildwheel@v3.1.3
+ if: runner.os == 'Linux' && (contains(matrix.only, 'x86_64') || contains(matrix.only, 'aarch64'))
+ uses: pypa/cibuildwheel@v3.1.4
env:
CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28
CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28
diff --git a/CHANGES.txt b/CHANGES.txt
index bc0c1c40c..5fc501ce0 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,32 @@
lxml changelog
==============
+6.0.2 (2025-09-21)
+==================
+
+Bugs fixed
+----------
+
+* LP#2125278: Compilation with libxml2 2.15.0 failed.
+ Original patch by Xi Ruoyao.
+
+* Setting ``decompress=True`` in the parser had no effect in libxml2 2.15.
+
+* Binary wheels on Linux and macOS use the library version libxml2 2.14.6.
+ See https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.14.6
+
+* Test failures in libxml2 2.15.0 were fixed.
+
+Other changes
+-------------
+
+* Binary wheels for Py3.9-3.11 on the ``riscv64`` architecture were added.
+
+* Error constants were updated to match libxml2 2.15.0.
+
+* Built using Cython 3.1.4.
+
+
6.0.1 (2025-08-22)
==================
@@ -23,6 +49,7 @@ Bugs fixed
Patch by Miro HronĨok.
* Binary wheels use the library version libxml2 2.14.5.
+ See https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.14.5
* Windows binary wheels continue to use a security patched library version libxml2 2.11.9.
diff --git a/Makefile b/Makefile
index 711eee5c7..4560f07a9 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ PYTHON_WITH_CYTHON?=$(shell $(PYTHON) -c 'import Cython.Build.Dependencies' >/d
CYTHON_WITH_COVERAGE?=$(shell $(PYTHON) -c 'import Cython.Coverage; import sys; assert not hasattr(sys, "pypy_version_info")' >/dev/null 2>/dev/null && echo " --coverage" || true)
PYTHON_BUILD_VERSION ?= *
-MANYLINUX_LIBXML2_VERSION=2.14.5
+MANYLINUX_LIBXML2_VERSION=2.14.6
MANYLINUX_LIBXSLT_VERSION=1.1.43
MANYLINUX_CFLAGS=-O3 -g1 -pipe -fPIC -flto
MANYLINUX_LDFLAGS=-flto
diff --git a/doc/main.txt b/doc/main.txt
index b68c8e2d8..45f55241f 100644
--- a/doc/main.txt
+++ b/doc/main.txt
@@ -160,8 +160,8 @@ Index `_ (PyPI). It has the source
that compiles on various platforms. The source distribution is signed
with `this key `_.
-The latest version is `lxml 6.0.1`_, released 2025-08-22
-(`changes for 6.0.1`_). `Older versions <#old-versions>`_
+The latest version is `lxml 6.0.2`_, released 2025-09-21
+(`changes for 6.0.2`_). `Older versions <#old-versions>`_
are listed below.
Please take a look at the
@@ -262,7 +262,9 @@ See the websites of lxml
..
and the `latest in-development version `_.
-.. _`PDF documentation`: lxmldoc-6.0.0.pdf
+.. _`PDF documentation`: lxmldoc-6.0.2.pdf
+
+* `lxml 6.0.2`_, released 2025-09-21 (`changes for 6.0.2`_)
* `lxml 6.0.1`_, released 2025-08-22 (`changes for 6.0.1`_)
@@ -278,6 +280,7 @@ See the websites of lxml
* `older releases `_
+.. _`lxml 6.0.2`: /files/lxml-6.0.2.tgz
.. _`lxml 6.0.1`: /files/lxml-6.0.1.tgz
.. _`lxml 6.0.0`: /files/lxml-6.0.0.tgz
.. _`lxml 5.4.0`: /files/lxml-5.4.0.tgz
@@ -285,6 +288,7 @@ See the websites of lxml
.. _`lxml 5.3.1`: /files/lxml-5.3.1.tgz
.. _`lxml 5.3.0`: /files/lxml-5.3.0.tgz
+.. _`changes for 6.0.2`: /changes-6.0.2.html
.. _`changes for 6.0.1`: /changes-6.0.1.html
.. _`changes for 6.0.0`: /changes-6.0.0.html
.. _`changes for 5.4.0`: /changes-5.4.0.html
diff --git a/pyproject.toml b/pyproject.toml
index 42adeaf02..7935c5d5e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,9 +1,9 @@
[build-system]
-requires = ["Cython>=3.1.2", "setuptools"]
+requires = ["Cython>=3.1.4", "setuptools"]
[tool.cibuildwheel]
build-verbosity = 1
-environment = {STATIC_DEPS="true", ZLIB_VERSION = "1.3.1", LIBICONV_VERSION = "1.18", LIBXML2_VERSION = "2.14.5", LIBXSLT_VERSION = "1.1.43"}
+environment = {STATIC_DEPS="true", ZLIB_VERSION = "1.3.1", LIBICONV_VERSION = "1.18", LIBXML2_VERSION = "2.14.6", LIBXSLT_VERSION = "1.1.43"}
enable = ["pypy", "cpython-prerelease"]
# "pypy"
# "cpython-prerelease"
@@ -19,33 +19,18 @@ skip = [
# Py3.8 wheel for macos is not universal: https://bugs.launchpad.net/lxml/+bug/2055404
"cp38-macosx_universal2",
# Reduce job load and HTTP hit rate on library servers.
- "cp38-manylinux_aarch64",
- "cp38-musllinux_aarch64",
- "cp38-manylinux_armv7l",
- "cp38-musllinux_armv7l",
+ "cp38-*_aarch64",
+ "cp38-*_armv7l",
- "cp38-manylinux_ppc64le",
- "cp39-manylinux_ppc64le",
- "cp310-manylinux_ppc64le",
- "cp311-manylinux_ppc64le",
- "cp38-musllinux_ppc64le",
- "cp39-musllinux_ppc64le",
- "cp310-musllinux_ppc64le",
- "cp311-musllinux_ppc64le",
+ "cp38-*_ppc64le",
+ "cp39-*_ppc64le",
+ "cp310-*_ppc64le",
+ "cp311-*_ppc64le",
- "cp38-manylinux_riscv64",
- "cp39-manylinux_riscv64",
- "cp310-manylinux_riscv64",
- "cp311-manylinux_riscv64",
- "cp38-musllinux_riscv64",
- "cp39-musllinux_riscv64",
- "cp310-musllinux_riscv64",
- "cp311-musllinux_riscv64",
+ "cp38-*_riscv64",
- "cp38-manylinux_s390x",
- "cp39-manylinux_s390x",
- "cp38-musllinux_s390x",
- "cp39-musllinux_s390x",
+ "cp38-*_s390x",
+ "cp39-*_s390x",
]
#test-command = "python {package}/test.py -vv"
@@ -54,15 +39,15 @@ skip = [
archs = ["x86_64", "aarch64", "i686", "ppc64le", "armv7l", "riscv64"]
repair-wheel-command = "auditwheel repair --strip -w {dest_dir} {wheel}"
-[tool.cibuildwheel.linux.environment]
-CFLAGS = "-O3 -g1 -pipe -fPIC -flto"
-AR = "gcc-ar"
-NM = "gcc-nm"
-RANLIB = "gcc-ranlib"
-LDFLAGS = "-fPIC -flto"
-STATIC_DEPS = "true"
-LIBXML2_VERSION = "2.14.5"
-LIBXSLT_VERSION = "1.1.43"
+[[tool.cibuildwheel.overrides]]
+select = "*linux_*"
+inherit.environment = "append"
+environment.CFLAGS = "-O3 -g1 -pipe -fPIC -flto"
+environment.AR = "gcc-ar"
+environment.NM = "gcc-nm"
+environment.RANLIB = "gcc-ranlib"
+environment.LDFLAGS = "-fPIC -flto"
+environment.STATIC_DEPS = "true"
[[tool.cibuildwheel.overrides]]
select = "*linux_i686"
@@ -75,7 +60,7 @@ inherit.environment = "append"
environment.CFLAGS = "-O3 -g1 -pipe -fPIC -flto -march=core2 -mtune=generic"
[[tool.cibuildwheel.overrides]]
-select = "*aarch64"
+select = "*linux_aarch64"
inherit.environment = "append"
environment.CFLAGS = "-O3 -g1 -pipe -fPIC -flto -march=armv8-a -mtune=cortex-a72"
diff --git a/requirements.txt b/requirements.txt
index 7be3f9cf0..45ada6d7b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1 @@
-Cython>=3.1.2
+Cython>=3.1.4
diff --git a/setup.py b/setup.py
index 432b80ab0..100c92741 100644
--- a/setup.py
+++ b/setup.py
@@ -230,7 +230,6 @@ def build_packages(files):
versioninfo.dev_status(),
'Intended Audience :: Developers',
'Intended Audience :: Information Technology',
- 'License :: OSI Approved :: BSD License',
'Programming Language :: Cython',
# NOTE: keep in sync with 'python_requires' list above.
'Programming Language :: Python :: 3',
diff --git a/src/lxml/__init__.py b/src/lxml/__init__.py
index aceb0ca38..58c2133db 100644
--- a/src/lxml/__init__.py
+++ b/src/lxml/__init__.py
@@ -1,6 +1,6 @@
# this is a package
-__version__ = "6.0.1"
+__version__ = "6.0.2"
def get_include():
diff --git a/src/lxml/includes/etree_defs.h b/src/lxml/includes/etree_defs.h
index d0182bcad..0cb6eb3a5 100644
--- a/src/lxml/includes/etree_defs.h
+++ b/src/lxml/includes/etree_defs.h
@@ -149,7 +149,18 @@ static PyObject* PyBytes_FromFormat(const char* format, ...) {
# define HTML_PARSE_NODEFDTD 4
#endif
#if LIBXML_VERSION < 20900
-# define XML_PARSE_BIG_LINES 4194304
+# define XML_PARSE_BIG_LINES 0x400000
+#endif
+#if LIBXML_VERSION < 21300
+# define XML_PARSE_NO_XXE 0x800000
+#endif
+#if LIBXML_VERSION < 21400
+# define XML_PARSE_UNZIP 0x1000000
+# define XML_PARSE_NO_SYS_CATALOG 0x2000000
+# define XML_PARSE_CATALOG_PI 0x4000000
+#endif
+#if LIBXML_VERSION < 21500
+# define XML_PARSE_SKIP_IDS 0x8000000
#endif
#include "libxml/tree.h"
diff --git a/src/lxml/includes/xmlerror.pxd b/src/lxml/includes/xmlerror.pxd
index 589e38eab..0249a45e2 100644
--- a/src/lxml/includes/xmlerror.pxd
+++ b/src/lxml/includes/xmlerror.pxd
@@ -5,23 +5,23 @@
cdef extern from "libxml/xmlerror.h":
ctypedef enum xmlErrorLevel:
- XML_ERR_NONE = 0
- XML_ERR_WARNING = 1 # A simple warning
- XML_ERR_ERROR = 2 # A recoverable error
+ XML_ERR_NONE = 0 # Success
+ XML_ERR_WARNING = 1 # A warning
+ XML_ERR_ERROR = 2 # An error
XML_ERR_FATAL = 3 # A fatal error
ctypedef enum xmlErrorDomain:
- XML_FROM_NONE = 0
+ XML_FROM_NONE = 0 # Unknown
XML_FROM_PARSER = 1 # The XML parser
- XML_FROM_TREE = 2 # The tree module
+ XML_FROM_TREE = 2 # The tree module (unused)
XML_FROM_NAMESPACE = 3 # The XML Namespace module
- XML_FROM_DTD = 4 # The XML DTD validation with parser contex
+ XML_FROM_DTD = 4 # The XML DTD validation with parser context
XML_FROM_HTML = 5 # The HTML parser
- XML_FROM_MEMORY = 6 # The memory allocator
+ XML_FROM_MEMORY = 6 # The memory allocator (unused)
XML_FROM_OUTPUT = 7 # The serialization code
XML_FROM_IO = 8 # The Input/Output stack
- XML_FROM_FTP = 9 # The FTP module
- XML_FROM_HTTP = 10 # The HTTP module
+ XML_FROM_FTP = 9 # The FTP module (unused)
+ XML_FROM_HTTP = 10 # The HTTP module (unused)
XML_FROM_XINCLUDE = 11 # The XInclude processing
XML_FROM_XPATH = 12 # The XPath module
XML_FROM_XPOINTER = 13 # The XPointer module
@@ -33,20 +33,20 @@ cdef extern from "libxml/xmlerror.h":
XML_FROM_RELAXNGV = 19 # The Relax-NG validator module
XML_FROM_CATALOG = 20 # The Catalog module
XML_FROM_C14N = 21 # The Canonicalization module
- XML_FROM_XSLT = 22 # The XSLT engine from libxslt
+ XML_FROM_XSLT = 22 # The XSLT engine from libxslt (unused)
XML_FROM_VALID = 23 # The XML DTD validation with valid context
- XML_FROM_CHECK = 24 # The error checking module
+ XML_FROM_CHECK = 24 # The error checking module (unused)
XML_FROM_WRITER = 25 # The xmlwriter module
- XML_FROM_MODULE = 26 # The dynamically loaded module modul
- XML_FROM_I18N = 27 # The module handling character conversion
+ XML_FROM_MODULE = 26 # The dynamically loaded module module (unused)
+ XML_FROM_I18N = 27 # The module handling character conversion (unused)
XML_FROM_SCHEMATRONV = 28 # The Schematron validator module
- XML_FROM_BUFFER = 29 # The buffers module
- XML_FROM_URI = 30 # The URI module
+ XML_FROM_BUFFER = 29 # The buffers module (unused)
+ XML_FROM_URI = 30 # The URI module (unused)
ctypedef enum xmlParserErrors:
- XML_ERR_OK = 0
- XML_ERR_INTERNAL_ERROR = 1
- XML_ERR_NO_MEMORY = 2
+ XML_ERR_OK = 0 # Success
+ XML_ERR_INTERNAL_ERROR = 1 # Internal assertion failure
+ XML_ERR_NO_MEMORY = 2 # Out of memory
XML_ERR_DOCUMENT_START = 3
XML_ERR_DOCUMENT_EMPTY = 4
XML_ERR_DOCUMENT_END = 5
@@ -76,7 +76,7 @@ cdef extern from "libxml/xmlerror.h":
XML_ERR_ENTITY_IS_EXTERNAL = 29
XML_ERR_ENTITY_IS_PARAMETER = 30
XML_ERR_UNKNOWN_ENCODING = 31
- XML_ERR_UNSUPPORTED_ENCODING = 32
+ XML_ERR_UNSUPPORTED_ENCODING = 32 # Unsupported character encoding
XML_ERR_STRING_NOT_STARTED = 33
XML_ERR_STRING_NOT_CLOSED = 34
XML_ERR_NS_DECL_ERROR = 35
@@ -157,6 +157,12 @@ cdef extern from "libxml/xmlerror.h":
XML_ERR_NAME_TOO_LONG = 110
XML_ERR_USER_STOP = 111
XML_ERR_COMMENT_ABRUPTLY_ENDED = 112
+ XML_WAR_ENCODING_MISMATCH = 113
+ XML_ERR_RESOURCE_LIMIT = 114 # Internal resource limit like maximum amplification factor exceeded
+ XML_ERR_ARGUMENT = 115 # Invalid argument
+ XML_ERR_SYSTEM = 116 # Unexpected error from the OS or an external library
+ XML_ERR_REDECL_PREDEF_ENTITY = 117
+ XML_ERR_INT_SUBSET_NOT_FINISHED = 118
XML_NS_ERR_XML_NAMESPACE = 200
XML_NS_ERR_UNDEFINED_NAMESPACE = 201
XML_NS_ERR_QNAME = 202
@@ -207,6 +213,7 @@ cdef extern from "libxml/xmlerror.h":
XML_DTD_DUP_TOKEN = 541
XML_HTML_STRUCURE_ERROR = 800
XML_HTML_UNKNOWN_TAG = 801
+ XML_HTML_INCORRECTLY_OPENED_COMMENT = 802
XML_RNGP_ANYNAME_ATTR_ANCESTOR = 1000
XML_RNGP_ATTR_CONFLICT = 1001
XML_RNGP_ATTRIBUTE_CHILDREN = 1002
@@ -385,7 +392,7 @@ cdef extern from "libxml/xmlerror.h":
XML_IO_ENAMETOOLONG = 1521
XML_IO_ENFILE = 1522
XML_IO_ENODEV = 1523
- XML_IO_ENOENT = 1524
+ XML_IO_ENOENT = 1524 # File not found
XML_IO_ENOEXEC = 1525
XML_IO_ENOLCK = 1526
XML_IO_ENOMEM = 1527
@@ -418,6 +425,7 @@ cdef extern from "libxml/xmlerror.h":
XML_IO_EADDRINUSE = 1554
XML_IO_EALREADY = 1555
XML_IO_EAFNOSUPPORT = 1556
+ XML_IO_UNSUPPORTED_PROTOCOL = 1557
XML_XINCLUDE_RECURSION = 1600
XML_XINCLUDE_PARSE_VALUE = 1601
XML_XINCLUDE_ENTITY_DEF_MISMATCH = 1602
@@ -708,8 +716,8 @@ cdef extern from "libxml/xmlerror.h":
XML_SCHEMAP_SRC_IMPORT_2 = 3066
XML_SCHEMAP_SRC_IMPORT_2_1 = 3067
XML_SCHEMAP_SRC_IMPORT_2_2 = 3068
- XML_SCHEMAP_INTERNAL = 3069 # 3069 non-W3C
- XML_SCHEMAP_NOT_DETERMINISTIC = 3070 # 3070 non-W3C
+ XML_SCHEMAP_INTERNAL = 3069
+ XML_SCHEMAP_NOT_DETERMINISTIC = 3070
XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_1 = 3071
XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_2 = 3072
XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_3 = 3073
@@ -725,12 +733,12 @@ cdef extern from "libxml/xmlerror.h":
XML_SCHEMAP_WARN_SKIP_SCHEMA = 3083
XML_SCHEMAP_WARN_UNLOCATED_SCHEMA = 3084
XML_SCHEMAP_WARN_ATTR_REDECL_PROH = 3085
- XML_SCHEMAP_WARN_ATTR_POINTLESS_PROH = 3086 # 3085
- XML_SCHEMAP_AG_PROPS_CORRECT = 3087 # 3086
- XML_SCHEMAP_COS_CT_EXTENDS_1_2 = 3088 # 3087
- XML_SCHEMAP_AU_PROPS_CORRECT = 3089 # 3088
- XML_SCHEMAP_A_PROPS_CORRECT_3 = 3090 # 3089
- XML_SCHEMAP_COS_ALL_LIMITED = 3091 # 3090
+ XML_SCHEMAP_WARN_ATTR_POINTLESS_PROH = 3086
+ XML_SCHEMAP_AG_PROPS_CORRECT = 3087
+ XML_SCHEMAP_COS_CT_EXTENDS_1_2 = 3088
+ XML_SCHEMAP_AU_PROPS_CORRECT = 3089
+ XML_SCHEMAP_A_PROPS_CORRECT_3 = 3090
+ XML_SCHEMAP_COS_ALL_LIMITED = 3091
XML_SCHEMATRONV_ASSERT = 4000
XML_SCHEMATRONV_REPORT = 4001
XML_MODULE_OPEN = 4900
diff --git a/src/lxml/includes/xmlparser.pxd b/src/lxml/includes/xmlparser.pxd
index 3a721c1dc..e0ef221af 100644
--- a/src/lxml/includes/xmlparser.pxd
+++ b/src/lxml/includes/xmlparser.pxd
@@ -195,34 +195,42 @@ cdef extern from "libxml/parser.h" nogil:
xmlParserInput* inputTab[]
ctypedef enum xmlParserOption:
- XML_PARSE_RECOVER = 1 # recover on errors
- XML_PARSE_NOENT = 2 # substitute entities
- XML_PARSE_DTDLOAD = 4 # load the external subset
- XML_PARSE_DTDATTR = 8 # default DTD attributes
- XML_PARSE_DTDVALID = 16 # validate with the DTD
- XML_PARSE_NOERROR = 32 # suppress error reports
- XML_PARSE_NOWARNING = 64 # suppress warning reports
- XML_PARSE_PEDANTIC = 128 # pedantic error reporting
- XML_PARSE_NOBLANKS = 256 # remove blank nodes
- XML_PARSE_SAX1 = 512 # use the SAX1 interface internally
- XML_PARSE_XINCLUDE = 1024 # Implement XInclude substitution
- XML_PARSE_NONET = 2048 # Forbid network access
- XML_PARSE_NODICT = 4096 # Do not reuse the context dictionary
- XML_PARSE_NSCLEAN = 8192 # remove redundant namespaces declarations
- XML_PARSE_NOCDATA = 16384 # merge CDATA as text nodes
- XML_PARSE_NOXINCNODE = 32768 # do not generate XINCLUDE START/END nodes
+ XML_PARSE_RECOVER = 0x1 # recover on errors
+ XML_PARSE_NOENT = 0x2 # substitute entities
+ XML_PARSE_DTDLOAD = 0x4 # load the external subset
+ XML_PARSE_DTDATTR = 0x8 # default DTD attributes
+ XML_PARSE_DTDVALID = 0x10 # validate with the DTD
+ XML_PARSE_NOERROR = 0x20 # suppress error reports
+ XML_PARSE_NOWARNING = 0x40 # suppress warning reports
+ XML_PARSE_PEDANTIC = 0x80 # pedantic error reporting
+ XML_PARSE_NOBLANKS = 0x100 # remove blank nodes
+ XML_PARSE_SAX1 = 0x200 # use the SAX1 interface internally
+ XML_PARSE_XINCLUDE = 0x400 # Implement XInclude substitution
+ XML_PARSE_NONET = 0x800 # Forbid network access
+ XML_PARSE_NODICT = 0x1000 # Do not reuse the context dictionary
+ XML_PARSE_NSCLEAN = 0x2000 # remove redundant namespaces declarations
+ XML_PARSE_NOCDATA = 0x4000 # merge CDATA as text nodes
+ XML_PARSE_NOXINCNODE = 0x8000 # do not generate XINCLUDE START/END nodes
# libxml2 2.6.21+ only:
- XML_PARSE_COMPACT = 65536 # compact small text nodes
+ XML_PARSE_COMPACT = 0x1_0000 # compact small text nodes
# libxml2 2.7.0+ only:
- XML_PARSE_OLD10 = 131072 # parse using XML-1.0 before update 5
- XML_PARSE_NOBASEFIX = 262144 # do not fixup XINCLUDE xml:base uris
- XML_PARSE_HUGE = 524288 # relax any hardcoded limit from the parser
+ XML_PARSE_OLD10 = 0x2_0000 # parse using XML-1.0 before update 5
+ XML_PARSE_NOBASEFIX = 0x4_0000 # do not fixup XINCLUDE xml:base uris
+ XML_PARSE_HUGE = 0x8_0000 # relax any hardcoded limit from the parser
# libxml2 2.7.3+ only:
- XML_PARSE_OLDSAX = 1048576 # parse using SAX2 interface before 2.7.0
+ XML_PARSE_OLDSAX = 0x10_0000 # parse using SAX2 interface before 2.7.0
# libxml2 2.8.0+ only:
- XML_PARSE_IGNORE_ENC = 2097152 # ignore internal document encoding hint
+ XML_PARSE_IGNORE_ENC = 0x20_0000 # ignore internal document encoding hint
# libxml2 2.9.0+ only:
- XML_PARSE_BIG_LINES = 4194304 # Store big lines numbers in text PSVI field
+ XML_PARSE_BIG_LINES = 0x40_0000 # Store big lines numbers in text PSVI field
+ # libxml2 2.13.0+ only:
+ XML_PARSE_NO_XXE = 0x80_0000 # Disable loading of external DTDs or entities
+ # libxml2 2.14.0+ only:
+ XML_PARSE_UNZIP = 0x100_0000 # Enable input decompression (and potential gzip bombs)
+ XML_PARSE_NO_SYS_CATALOG = 0x200_0000 # Disable the global system XML catalog
+ XML_PARSE_CATALOG_PI = 0x400_0000 # Enable XML catalog processing instructions
+ # libxml2 2.15.0+ only:
+ XML_PARSE_SKIP_IDS = 0x800_0000 # Force the parser to ignore IDs
cdef void xmlInitParser()
cdef void xmlCleanupParser()
diff --git a/src/lxml/parser.pxi b/src/lxml/parser.pxi
index a3fbef399..3106e6102 100644
--- a/src/lxml/parser.pxi
+++ b/src/lxml/parser.pxi
@@ -859,6 +859,9 @@ cdef class _BaseParser:
if not isinstance(self, (XMLParser, HTMLParser)):
raise TypeError, "This class cannot be instantiated"
+ if not collect_ids and tree.LIBXML_VERSION >= 21500:
+ parse_options |= xmlparser.XML_PARSE_SKIP_IDS
+
self._parse_options = parse_options
self.target = target
self._for_html = for_html
@@ -1669,6 +1672,8 @@ cdef class XMLParser(_FeedParser):
resolve_external = False
if not strip_cdata:
parse_options = parse_options ^ xmlparser.XML_PARSE_NOCDATA
+ if decompress:
+ parse_options |= xmlparser.XML_PARSE_UNZIP
_BaseParser.__init__(self, parse_options, False, schema,
remove_comments, remove_pis, strip_cdata,
@@ -1847,6 +1852,8 @@ cdef class HTMLParser(_FeedParser):
parse_options = parse_options ^ htmlparser.HTML_PARSE_NODEFDTD
if huge_tree:
parse_options = parse_options | xmlparser.XML_PARSE_HUGE
+ if decompress:
+ parse_options |= xmlparser.XML_PARSE_UNZIP
if strip_cdata is not _UNUSED:
import warnings
diff --git a/src/lxml/proxy.pxi b/src/lxml/proxy.pxi
index f7b47a73a..0e6cf19ef 100644
--- a/src/lxml/proxy.pxi
+++ b/src/lxml/proxy.pxi
@@ -574,7 +574,10 @@ cdef void fixThreadDictNamesForDtd(tree.xmlDtd* c_dtd,
_fixThreadDictPtr(&c_element.content.prefix, c_src_dict, c_dict)
c_attribute = c_element.attributes
while c_attribute:
- _fixThreadDictPtr(&c_attribute.defaultValue, c_src_dict, c_dict)
+ if tree.LIBXML_VERSION < 21500:
+ # libxml2 2.15 no longer stores default values in the dict.
+ # See https://gitlab.gnome.org/GNOME/libxml2/-/commit/24628f25
+ _fixThreadDictPtr(&c_attribute.defaultValue, c_src_dict, c_dict)
_fixThreadDictPtr(&c_attribute.name, c_src_dict, c_dict)
_fixThreadDictPtr(&c_attribute.prefix, c_src_dict, c_dict)
_fixThreadDictPtr(&c_attribute.elem, c_src_dict, c_dict)
diff --git a/src/lxml/tests/test_etree.py b/src/lxml/tests/test_etree.py
index 03f387454..7a8402575 100644
--- a/src/lxml/tests/test_etree.py
+++ b/src/lxml/tests/test_etree.py
@@ -5624,9 +5624,10 @@ def test_write_file_gzip_pathlike(self):
@needs_feature("zlib")
def test_write_file_gzip_parse(self):
tree = self.parse(b''+b''*200+b'')
+ parser = etree.XMLParser(decompress=True)
with tmpfile() as filename:
tree.write(filename, compression=9)
- data = etree.tostring(etree.parse(filename))
+ data = etree.tostring(etree.parse(filename, parser))
self.assertEqual(b''+b''*200+b'',
data)
@@ -5836,7 +5837,18 @@ def test_suite():
suite.addTests(doctest.DocTestSuite(selftest2))
# add doctests
- suite.addTests(doctest.DocTestSuite(etree))
+ doctest_stubs = {}
+ if 'schematron' not in etree.LIBXML_COMPILED_FEATURES:
+ # See doctest of class "lxml.etree.Schematron".
+ class FakeSchematron:
+ def __init__(self, schema):
+ self._results = iter([0, 1])
+ def validate(self, xml):
+ return next(self._results)
+
+ doctest_stubs['Schematron'] = FakeSchematron
+
+ suite.addTests(doctest.DocTestSuite(etree, extraglobs=doctest_stubs))
suite.addTests(
[make_doctest('tutorial.txt')])
suite.addTests(
diff --git a/src/lxml/xmlerror.pxi b/src/lxml/xmlerror.pxi
index 79442a8b4..3be24d212 100644
--- a/src/lxml/xmlerror.pxi
+++ b/src/lxml/xmlerror.pxi
@@ -983,6 +983,12 @@ ERR_VERSION_MISMATCH=109
ERR_NAME_TOO_LONG=110
ERR_USER_STOP=111
ERR_COMMENT_ABRUPTLY_ENDED=112
+WAR_ENCODING_MISMATCH=113
+ERR_RESOURCE_LIMIT=114
+ERR_ARGUMENT=115
+ERR_SYSTEM=116
+ERR_REDECL_PREDEF_ENTITY=117
+ERR_INT_SUBSET_NOT_FINISHED=118
NS_ERR_XML_NAMESPACE=200
NS_ERR_UNDEFINED_NAMESPACE=201
NS_ERR_QNAME=202
@@ -1033,6 +1039,7 @@ DTD_XMLID_TYPE=540
DTD_DUP_TOKEN=541
HTML_STRUCURE_ERROR=800
HTML_UNKNOWN_TAG=801
+HTML_INCORRECTLY_OPENED_COMMENT=802
RNGP_ANYNAME_ATTR_ANCESTOR=1000
RNGP_ATTR_CONFLICT=1001
RNGP_ATTRIBUTE_CHILDREN=1002
@@ -1244,6 +1251,7 @@ IO_ENETUNREACH=1553
IO_EADDRINUSE=1554
IO_EALREADY=1555
IO_EAFNOSUPPORT=1556
+IO_UNSUPPORTED_PROTOCOL=1557
XINCLUDE_RECURSION=1600
XINCLUDE_PARSE_VALUE=1601
XINCLUDE_ENTITY_DEF_MISMATCH=1602
diff --git a/update-error-constants.py b/update-error-constants.py
index d3b09fb86..8520af5f6 100644
--- a/update-error-constants.py
+++ b/update-error-constants.py
@@ -1,7 +1,8 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
import operator
import os.path
+import pathlib
import sys
import xml.etree.ElementTree as ET
@@ -71,8 +72,8 @@ def regenerate_file(filename, result):
return True
-def parse_enums(doc_dir, api_filename, enum_dict):
- tree = ET.parse(os.path.join(doc_dir, api_filename))
+def parse_from_api_xml(api_xml_path, enum_dict):
+ tree = ET.parse(str(api_xml_path))
for enum in tree.iterfind('symbols/enum'):
enum_type = enum.get('type')
if enum_type not in ENUM_MAP:
@@ -88,15 +89,33 @@ def parse_enums(doc_dir, api_filename, enum_dict):
))
-def main(doc_dir):
- enum_dict = {}
- parse_enums(doc_dir, 'libxml2-api.xml', enum_dict)
- #parse_enums(doc_dir, 'libxml-xmlerror.html', enum_dict)
- #parse_enums(doc_dir, 'libxml-xpath.html', enum_dict)
- #parse_enums(doc_dir, 'libxml-xmlschemas.html', enum_dict)
- #parse_enums(doc_dir, 'libxml-relaxng.html', enum_dict)
-
- # regenerate source files
+def parse_from_doxygen_xml(doxygen_xml_path, enum_dict):
+ for xml_file in doxygen_xml_path.glob("*_8h.xml"):
+ for _, compound in ET.iterparse(xml_file):
+ if compound.tag != 'compounddef':
+ continue
+ if not compound.findtext('compoundname', '').endswith('.h'):
+ break
+ for memberdef in compound.iterfind('sectiondef[@kind = "enum"]/memberdef'):
+ enum_type = memberdef.findtext('name')
+ if enum_type not in ENUM_MAP:
+ continue
+ entries = enum_dict.get(enum_type)
+ if not entries:
+ print("Found enum", enum_type)
+ entries = enum_dict[enum_type] = []
+
+ enum_value = 0
+ for enum in memberdef.iterfind('enumvalue'):
+ enum_value = int(enum.findtext('initializer', '').lstrip('= ') or enum_value + 1)
+ entries.append((
+ enum.findtext('name'),
+ enum_value,
+ enum.findtext('briefdescription/para', '').rstrip('. ').strip(),
+ ))
+
+
+def generate_source_files(enum_dict):
pxi_result = []
append_pxi = pxi_result.append
pxd_result = []
@@ -148,6 +167,23 @@ def main(doc_dir):
print("Done")
+def main(doc_dir):
+ doc_path = pathlib.Path(doc_dir)
+ api_xml_path = doc_path / 'libxml2-api.xml'
+ doxygen_xml_path = doc_path / 'xml'
+
+ enum_dict = {}
+ if api_xml_path.exists():
+ parse_from_api_xml(api_xml_path, enum_dict)
+ elif doxygen_xml_path.exists():
+ parse_from_doxygen_xml(doxygen_xml_path, enum_dict)
+ else:
+ print(f"XML files for libxml2 API not found - did you generate the libxml2 documentation in {doc_dir}?")
+ return
+
+ generate_source_files(enum_dict)
+
+
if __name__ == "__main__":
if len(sys.argv) < 2 or sys.argv[1].lower() in ('-h', '--help'):
print("This script generates the constants in file %s" % BUILD_SOURCE_FILE)