diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 000000000..51d77a4e4
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,152 @@
+name: CI
+
+on: [push, pull_request]
+
+jobs:
+ ci:
+ strategy:
+ # Allows for matrix sub-jobs to fail without canceling the rest
+ fail-fast: false
+
+ # MATRIX:
+ # =======
+ # Required parameters:
+ # os the os to run on
+ # python-version the python version to use
+ # backend the backend to use
+ # env any additional env variables. Set to '{}' for none
+ # Optional parameters:
+ # allowed_failure whether the job is allowed to fail
+ # extra_hash extra hash str to differentiate from other caches with similar name (must always start with '-')
+ matrix:
+ # Tests [amd64]
+ #
+ os: [ubuntu-18.04, macos-10.15]
+ python-version:
+ - 2.7
+ - 3.5
+ - 3.6
+ - 3.7
+ - 3.8
+ - 3.9
+ - "3.10" # quotes to avoid being interpreted as the number 3.1
+ - "3.11-dev"
+ # - "3.12-dev"
+ env: [{ STATIC_DEPS: true }, { STATIC_DEPS: false }]
+
+ include:
+ # Temporary - Allow failure on all 3.11-dev jobs until beta comes out.
+ - os: ubuntu-18.04
+ python-version: 3.11-dev
+ allowed_failure: true
+ - os: ubuntu-18.04
+ python-version: 3.11-dev
+ env: {STATIC_DEPS: true, WITH_REFNANNY: true}
+ extra_hash: "-refnanny"
+ allowed_failure: true
+ # Coverage setup
+ - os: ubuntu-18.04
+ python-version: 3.9
+ env: { COVERAGE: true }
+ extra_hash: "-coverage"
+ allowed_failure: true # shouldn't fail but currently does...
+ - os: ubuntu-18.04
+ python-version: 3.9
+ env: { STATIC_DEPS: false, EXTRA_DEPS: "docutils pygments sphinx sphinx-rtd-theme" }
+ extra_hash: "-docs"
+ allowed_failure: true # shouldn't fail but currently does...
+ # Old library setup with minimum version requirements
+ - os: ubuntu-18.04
+ python-version: 3.9
+ env: {
+ STATIC_DEPS: true,
+ LIBXML2_VERSION: 2.9.2,
+ LIBXSLT_VERSION: 1.1.27,
+ }
+ extra_hash: "-oldlibs"
+ allowed_failure: true # shouldn't fail but currently does...
+ # Ubuntu sub-jobs:
+ # ================
+ # Pypy
+ - os: ubuntu-18.04
+ python-version: pypy-2.7
+ env: { STATIC_DEPS: false }
+ allowed_failure: true
+ - os: ubuntu-18.04
+ python-version: pypy-3.7
+ env: { STATIC_DEPS: false }
+ allowed_failure: true
+
+ # MacOS sub-jobs
+ # ==============
+ - os: macos-10.15
+ allowed_failure: true # Unicode parsing fails in Py3
+
+ # This defaults to 360 minutes (6h) which is way too long and if a test gets stuck, it can block other pipelines.
+ # From testing, the runs tend to take ~3 minutes, so a limit of 20 minutes should be enough. This can always be
+ # changed in the future if needed.
+ timeout-minutes: 20
+ runs-on: ${{ matrix.os }}
+
+ env:
+ OS_NAME: ${{ matrix.os }}
+ PYTHON_VERSION: ${{ matrix.python-version }}
+ MACOSX_DEPLOYMENT_TARGET: 10.15
+ LIBXML2_VERSION: 2.9.14
+ LIBXSLT_VERSION: 1.1.35
+ COVERAGE: false
+ GCC_VERSION: 8
+ USE_CCACHE: 1
+ CCACHE_SLOPPINESS: "pch_defines,time_macros"
+ CCACHE_COMPRESS: 1
+ CCACHE_MAXSIZE: "100M"
+
+ steps:
+ - name: Checkout repo
+ uses: actions/checkout@v2
+ with:
+ fetch-depth: 1
+
+ - name: Setup python
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Cache [ccache]
+ uses: pat-s/always-upload-cache@v2.1.3
+ if: startsWith(runner.os, 'Linux')
+ with:
+ path: ~/.ccache
+ key: ${{ runner.os }}-ccache${{ matrix.extra_hash }}-${{ matrix.python-version }}-${{ hashFiles('.github/workflows/ci.yml', 'tools/ci-run.sh') }}
+
+ - name: Run CI
+ continue-on-error: ${{ matrix.allowed_failure || false }}
+ env: ${{ matrix.env }}
+ run: bash ./tools/ci-run.sh
+
+ - name: Build docs
+ if: contains( env.EXTRA_DEPS, 'sphinx')
+ run: make html
+
+ - name: Upload docs
+ uses: actions/upload-artifact@v2
+ if: ${{ matrix.extra_hash == '-docs' }}
+ with:
+ name: website_html
+ path: doc/html
+ if-no-files-found: ignore
+
+ - name: Upload Coverage Report
+ uses: actions/upload-artifact@v2
+ with:
+ name: pycoverage_html
+ path: coverage*
+ if-no-files-found: ignore
+
+ - name: Upload Wheel
+ uses: actions/upload-artifact@v2
+ if: ${{ matrix.env.STATIC_DEPS == 'true' && env.COVERAGE == 'false' }}
+ with:
+ name: wheels-${{ runner.os }}
+ path: dist/*.whl
+ if-no-files-found: ignore
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
new file mode 100644
index 000000000..09dc7c9d7
--- /dev/null
+++ b/.github/workflows/wheels.yml
@@ -0,0 +1,172 @@
+name: Wheel build
+
+on:
+ release:
+ types: [created]
+
+jobs:
+ sdist:
+ runs-on: ubuntu-20.04
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Set up Python
+ uses: actions/setup-python@v1
+ with:
+ python-version: 3.9
+
+ - name: Install lib dependencies
+ run: sudo apt-get update -y -q && sudo apt-get install -y -q "libxml2=2.9.10*" "libxml2-dev=2.9.10*" libxslt1.1 libxslt1-dev
+
+ - name: Install Python dependencies
+ run: python -m pip install -U pip setuptools && python -m pip install -U docutils pygments sphinx sphinx-rtd-theme -r requirements.txt
+
+ - name: Build docs and sdist
+ run: make html sdist
+ env: { STATIC_DEPS: false }
+
+ - name: Release
+ uses: softprops/action-gh-release@v1
+ if: startsWith(github.ref, 'refs/tags/')
+ with:
+ files: dist/*.tar.gz
+
+ - name: Upload sdist
+ uses: actions/upload-artifact@v2
+ with:
+ name: sdist
+ path: dist/*.tar.gz
+
+ - name: Upload website
+ uses: actions/upload-artifact@v2
+ with:
+ name: website
+ path: doc/html
+
+ Linux:
+ runs-on: ubuntu-latest
+
+ strategy:
+ # Allows for matrix sub-jobs to fail without canceling the rest
+ fail-fast: false
+
+ matrix:
+ image:
+ - manylinux1_x86_64
+ - manylinux1_i686
+ #- manylinux2010_x86_64
+ #- manylinux2010_i686
+ - manylinux_2_24_x86_64
+ - manylinux_2_24_i686
+ - manylinux_2_24_aarch64
+ - musllinux_1_1_x86_64
+ - musllinux_1_1_aarch64
+ #- manylinux_2_24_ppc64le
+ #- manylinux_2_24_ppc64le
+ #- manylinux_2_24_s390x
+ pyversion: ["*"]
+
+ exclude:
+ - image: manylinux_2_24_aarch64
+ pyversion: "*"
+ - image: musllinux_1_1_aarch64
+ pyversion: "*"
+ include:
+ - image: manylinux2014_aarch64
+ pyversion: "cp36*"
+ - image: manylinux_2_24_aarch64
+ pyversion: "cp37*"
+ - image: manylinux_2_24_aarch64
+ pyversion: "cp38*"
+ - image: manylinux_2_24_aarch64
+ pyversion: "cp39*"
+ - image: manylinux_2_24_aarch64
+ pyversion: "cp310*"
+
+ - image: musllinux_1_1_aarch64
+ pyversion: "cp36*"
+ - image: musllinux_1_1_aarch64
+ pyversion: "cp37*"
+ - image: musllinux_1_1_aarch64
+ pyversion: "cp38*"
+ - image: musllinux_1_1_aarch64
+ pyversion: "cp39*"
+ - image: musllinux_1_1_aarch64
+ pyversion: "cp310*"
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: 3.8
+
+ - name: Install dependencies
+ run: python -m pip install -r requirements.txt
+
+ - name: Build Linux wheels
+ run: make sdist wheel_${{ matrix.image }}
+ env: { STATIC_DEPS: true, PYTHON_BUILD_VERSION: "${{ matrix.pyversion }}" }
+
+ - name: Release
+ uses: softprops/action-gh-release@v1
+ if: startsWith(github.ref, 'refs/tags/')
+ with:
+ files: wheelhouse/*/*-m*linux*.whl # manylinux / musllinux
+
+ - name: Upload wheels
+ uses: actions/upload-artifact@v2
+ with:
+ name: wheels-${{ matrix.image }}
+ path: wheelhouse/*/*-m*linux*.whl # manylinux / musllinux
+ if-no-files-found: ignore
+
+ non-Linux:
+ strategy:
+ # Allows for matrix sub-jobs to fail without canceling the rest
+ fail-fast: false
+
+ matrix:
+ #os: [macos-10.15, windows-latest]
+ #os: [macos-10.15, macOS-M1]
+ os: [macos-10.15]
+ python_version: ["2.7", "3.6", "3.7", "3.8", "3.9", "3.10", "pypy-3.7-v7.3.3", "pypy-3.8-v7.3.7"]
+
+ runs-on: ${{ matrix.os }}
+ env: { LIBXML2_VERSION: 2.9.14, LIBXSLT_VERSION: 1.1.35, MACOSX_DEPLOYMENT_TARGET: 10.15 }
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: Set up Python
+ uses: actions/setup-python@v2
+ with:
+ python-version: ${{ matrix.python_version }}
+
+ - name: Install MacOS dependencies
+ if: startsWith(matrix.os, 'mac')
+ run: |
+ brew install automake libtool
+ ln -s /usr/local/bin/glibtoolize /usr/local/bin/libtoolize
+
+ - name: Install dependencies
+ run: python -m pip install setuptools wheel -r requirements.txt
+
+ - name: Build wheels
+ run: make sdist wheel
+ env: { STATIC_DEPS: true, RUN_TESTS: true }
+
+ - name: Release
+ uses: softprops/action-gh-release@v1
+ if: startsWith(github.ref, 'refs/tags/')
+ with:
+ files: dist/lxml-*.whl
+
+ - name: Upload wheels
+ uses: actions/upload-artifact@v2
+ with:
+ name: wheels-${{ matrix.os }}
+ path: dist/lxml-*.whl
+ if-no-files-found: ignore
diff --git a/.gitignore b/.gitignore
index 8f4bad9dc..66a48a6e4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,7 @@
*.pyc
.tox
.idea
+.vscode
build
dist
wheelhouse
@@ -19,6 +20,7 @@ MANIFEST
doc/api/lxml*.rst
doc/api/_build/
doc/s5/lxml-ep2008.html
+src/lxml/includes/*/
src/lxml/includes/lxml-version.h
src/lxml/*.html
src/lxml/html/*.c
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 13ec41be7..000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,81 +0,0 @@
-os: linux
-language: python
-
-cache:
- pip: true
- directories:
- - $HOME/.ccache
- - libs
-
-python:
- - 3.9
- - 2.7
- - 3.8
- - 3.7
- - 3.6
- - 3.5
-
-env:
- global:
- - USE_CCACHE=1
- - CCACHE_SLOPPINESS=pch_defines,time_macros
- - CCACHE_COMPRESS=1
- - CCACHE_MAXSIZE=70M
- - PATH="/usr/lib/ccache:$PATH"
- - LIBXML2_VERSION=2.9.10
- - LIBXSLT_VERSION=1.1.34
- matrix:
- - STATIC_DEPS=false
- - STATIC_DEPS=true
-
-matrix:
- include:
- - python: 3.8
- env:
- - STATIC_DEPS=false
- - EXTRA_DEPS="docutils pygments sphinx sphinx-rtd-theme"
- script: make html
- - python: 3.8
- env:
- - STATIC_DEPS=false
- - EXTRA_DEPS="coverage<5"
- - python: 3.8
- env:
- - STATIC_DEPS=true
- - LIBXML2_VERSION=2.9.2 # minimum version requirements
- - LIBXSLT_VERSION=1.1.27
- - python: pypy
- env: STATIC_DEPS=false
- - python: pypy3
- env: STATIC_DEPS=false
- - python: 3.8
- env: STATIC_DEPS=false
- arch: arm64
- - python: 3.8
- env: STATIC_DEPS=true
- arch: arm64
- - python: 3.8
- env: STATIC_DEPS=false
- arch: ppc64le
- - python: 3.8
- env: STATIC_DEPS=true
- arch: ppc64le
- allow_failures:
- - python: pypy
- - python: pypy3
-
-install:
- - pip install -U pip wheel
- - if [ -z "${TRAVIS_PYTHON_VERSION##*-dev}" ];
- then pip install --install-option=--no-cython-compile https://github.com/cython/cython/archive/master.zip;
- else pip install -r requirements.txt;
- fi
- - pip install -U beautifulsoup4 cssselect html5lib rnc2rng ${EXTRA_DEPS}
-
-script:
- - CFLAGS="-O0 -g -fPIC" python -u setup.py build_ext --inplace
- $(if [ -n "${TRAVIS_PYTHON_VERSION##2.*}" -a -n "${TRAVIS_PYTHON_VERSION##3.[34]*}" ]; then echo -n " -j7 "; fi )
- $(if [ -n "$EXTRA_DEPS" -a -z "${EXTRA_DEPS##*coverage*}" ]; then echo -n "--with-coverage"; fi )
- - ccache -s || true
- - CFLAGS="-O0 -g -fPIC" PYTHONUNBUFFERED=x make test
- - ccache -s || true
diff --git a/CHANGES.txt b/CHANGES.txt
index 22f4d450b..64bba1c22 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,125 @@
lxml changelog
==============
+4.9.1 (2022-07-01)
+==================
+
+Bugs fixed
+----------
+
+* A crash was resolved when using ``iterwalk()`` (or ``canonicalize()``)
+ after parsing certain incorrect input. Note that ``iterwalk()`` can crash
+ on *valid* input parsed with the same parser *after* failing to parse the
+ incorrect input.
+
+
+4.9.0 (2022-06-01)
+==================
+
+Bugs fixed
+----------
+
+* GH#341: The mixin inheritance order in ``lxml.html`` was corrected.
+ Patch by xmo-odoo.
+
+Other changes
+-------------
+
+* Built with Cython 0.29.30 to adapt to changes in Python 3.11 and 3.12.
+
+* Wheels include zlib 1.2.12, libxml2 2.9.14 and libxslt 1.1.35
+ (libxml2 2.9.12+ and libxslt 1.1.34 on Windows).
+
+* GH#343: Windows-AArch64 build support in Visual Studio.
+ Patch by Steve Dower.
+
+
+4.8.0 (2022-02-17)
+==================
+
+Features added
+--------------
+
+* GH#337: Path-like objects are now supported throughout the API instead of just strings.
+ Patch by Henning Janssen.
+
+* The ``ElementMaker`` now supports ``QName`` values as tags, which always override
+ the default namespace of the factory.
+
+Bugs fixed
+----------
+
+* GH#338: In lxml.objectify, the XSI float annotation "nan" and "inf" were spelled in
+ lower case, whereas XML Schema datatypes define them as "NaN" and "INF" respectively.
+ Patch by Tobias Deiminger.
+
+Other changes
+-------------
+
+* Built with Cython 0.29.28.
+
+
+4.7.1 (2021-12-13)
+==================
+
+Features added
+--------------
+
+* Chunked Unicode string parsing via ``parser.feed()`` now encodes the input data
+ to the native UTF-8 encoding directly, instead of going through ``Py_UNICODE`` /
+ ``wchar_t`` encoding first, which previously required duplicate recoding in most cases.
+
+Bugs fixed
+----------
+
+* The standard namespace prefixes were mishandled during "C14N2" serialisation on Python 3.
+ See https://mail.python.org/archives/list/lxml@python.org/thread/6ZFBHFOVHOS5GFDOAMPCT6HM5HZPWQ4Q/
+
+* ``lxml.objectify`` previously accepted non-XML numbers with underscores (like "1_000")
+ as integers or float values in Python 3.6 and later. It now adheres to the number
+ format of the XML spec again.
+
+* LP#1939031: Static wheels of lxml now contain the header files of zlib and libiconv
+ (in addition to the already provided headers of libxml2/libxslt/libexslt).
+
+Other changes
+-------------
+
+* Wheels include libxml2 2.9.12+ and libxslt 1.1.34 (also on Windows).
+
+
+4.7.0 (2021-12-13)
+==================
+
+* Release retracted due to missing files in lxml/includes/.
+
+
+4.6.5 (2021-12-12)
+==================
+
+Bugs fixed
+----------
+
+* A vulnerability (GHSL-2021-1038) in the HTML cleaner allowed sneaking script
+ content through SVG images (CVE-2021-43818).
+
+* A vulnerability (GHSL-2021-1037) in the HTML cleaner allowed sneaking script
+ content through CSS imports and other crafted constructs (CVE-2021-43818).
+
+
+4.6.4 (2021-11-01)
+==================
+
+Features added
+--------------
+
+* GH#317: A new property ``system_url`` was added to DTD entities.
+ Patch by Thirdegree.
+
+* GH#314: The ``STATIC_*`` variables in ``setup.py`` can now be passed via env vars.
+ Patch by Isaac Jurado.
+
+
4.6.3 (2021-03-21)
==================
diff --git a/Makefile b/Makefile
index a8c9de829..1e0a9119a 100644
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ PYTHON3?=python3
TESTFLAGS=-p -v
TESTOPTS=
SETUPFLAGS=
-LXMLVERSION:=$(shell sed -ne '/__version__/s|.*__version__\s*=\s*"\([^"]*\)".*|\1|p' src/lxml/__init__.py)
+LXMLVERSION:=$(shell $(PYTHON3) -c 'import re; print(re.findall(r"__version__\s*=\s*\"([^\"]+)\"", open("src/lxml/__init__.py").read())[0])' )
PARALLEL?=$(shell $(PYTHON) -c 'import sys; print("-j7" if sys.version_info >= (3, 5) else "")' )
PARALLEL3?=$(shell $(PYTHON3) -c 'import sys; print("-j7" if sys.version_info >= (3, 5) else "")' )
@@ -12,17 +12,23 @@ PY3_WITH_CYTHON?=$(shell $(PYTHON3) -c 'import Cython.Build.Dependencies' >/dev/
CYTHON_WITH_COVERAGE?=$(shell $(PYTHON) -c 'import Cython.Coverage; import sys; assert not hasattr(sys, "pypy_version_info")' >/dev/null 2>/dev/null && echo " --coverage" || true)
CYTHON3_WITH_COVERAGE?=$(shell $(PYTHON3) -c 'import Cython.Coverage; import sys; assert not hasattr(sys, "pypy_version_info")' >/dev/null 2>/dev/null && echo " --coverage" || true)
-MANYLINUX_LIBXML2_VERSION=2.9.10
-MANYLINUX_LIBXSLT_VERSION=1.1.34
+PYTHON_BUILD_VERSION ?= *
+MANYLINUX_LIBXML2_VERSION=2.9.14
+MANYLINUX_LIBXSLT_VERSION=1.1.35
MANYLINUX_CFLAGS=-O3 -g1 -pipe -fPIC -flto
MANYLINUX_LDFLAGS=-flto
-MANYLINUX_IMAGE_X86_64=quay.io/pypa/manylinux1_x86_64
-MANYLINUX_IMAGE_686=quay.io/pypa/manylinux1_i686
-MANYLINUX_IMAGE_AARCH64=quay.io/pypa/manylinux2014_aarch64
-AARCH64_ENV=-e AR="/opt/rh/devtoolset-9/root/usr/bin/gcc-ar" \
- -e NM="/opt/rh/devtoolset-9/root/usr/bin/gcc-nm" \
- -e RANLIB="/opt/rh/devtoolset-9/root/usr/bin/gcc-ranlib"
+MANYLINUX_IMAGES= \
+ manylinux1_x86_64 \
+ manylinux1_i686 \
+ manylinux_2_24_x86_64 \
+ manylinux_2_24_i686 \
+ manylinux2014_aarch64 \
+ manylinux_2_24_aarch64 \
+ manylinux_2_24_ppc64le \
+ manylinux_2_24_s390x \
+ musllinux_1_1_x86_64 \
+ musllinux_1_1_aarch64
.PHONY: all inplace inplace3 rebuild-sdist sdist build require-cython wheel_manylinux wheel
@@ -55,19 +61,22 @@ require-cython:
qemu-user-static:
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
-wheel_manylinux: wheel_manylinux64 wheel_manylinux32 wheel_manylinuxaarch64
-wheel_manylinuxaarch64: qemu-user-static
+wheel_manylinux: $(addprefix wheel_,$(MANYLINUX_IMAGES))
+$(addprefix wheel_,$(filter-out %_x86_64, $(filter-out %_i686, $(MANYLINUX_IMAGES)))): qemu-user-static
-wheel_manylinux32 wheel_manylinux64 wheel_manylinuxaarch64: dist/lxml-$(LXMLVERSION).tar.gz
+wheel_%: dist/lxml-$(LXMLVERSION).tar.gz
time docker run --rm -t \
-v $(shell pwd):/io \
- $(if $(patsubst %aarch64,,$@),,$(AARCH64_ENV)) \
+ -e AR=gcc-ar \
+ -e NM=gcc-nm \
+ -e RANLIB=gcc-ranlib \
-e CFLAGS="$(MANYLINUX_CFLAGS) $(if $(patsubst %aarch64,,$@),-march=core2,-march=armv8-a -mtune=cortex-a72)" \
-e LDFLAGS="$(MANYLINUX_LDFLAGS)" \
-e LIBXML2_VERSION="$(MANYLINUX_LIBXML2_VERSION)" \
-e LIBXSLT_VERSION="$(MANYLINUX_LIBXSLT_VERSION)" \
- -e WHEELHOUSE=wheelhouse_$(subst wheel_,,$@) \
- $(if $(filter $@,wheel_manylinuxaarch64),$(MANYLINUX_IMAGE_AARCH64),$(if $(patsubst %32,,$@),$(MANYLINUX_IMAGE_X86_64),$(MANYLINUX_IMAGE_686))) \
+ -e PYTHON_BUILD_VERSION="$(PYTHON_BUILD_VERSION)" \
+ -e WHEELHOUSE=$(subst wheel_,wheelhouse/,$@) \
+ quay.io/pypa/$(subst wheel_,,$@) \
bash /io/tools/manylinux/build-wheels.sh /io/$<
wheel:
@@ -89,6 +98,15 @@ valgrind_test_inplace: inplace
valgrind --tool=memcheck --leak-check=full --num-callers=30 --suppressions=valgrind-python.supp \
$(PYTHON) test.py
+fuzz: clean
+ $(MAKE) \
+ CC="/usr/bin/clang" \
+ CFLAGS="$$CFLAGS -fsanitize=fuzzer-no-link -g2" \
+ CXX="/usr/bin/clang++" \
+ CXXFLAGS="-fsanitize=fuzzer-no-link" \
+ inplace3
+ $(PYTHON3) src/lxml/tests/fuzz_xml_parse.py
+
gdb_test_inplace: inplace
@echo "file $(PYTHON)\nrun test.py" > .gdb.command
gdb -x .gdb.command -d src -d src/lxml
@@ -105,7 +123,7 @@ ftest_build: build
ftest_inplace: inplace
$(PYTHON) test.py -f $(TESTFLAGS) $(TESTOPTS)
-apidoc: apidocclean
+apidoc: apidocclean inplace3
@[ -x "`which sphinx-apidoc`" ] \
&& (echo "Generating API docs ..." && \
PYTHONPATH=src:$(PYTHONPATH) sphinx-apidoc -e -P -T -o doc/api src/lxml \
diff --git a/README.rst b/README.rst
index 3ad1ba177..a0434b379 100644
--- a/README.rst
+++ b/README.rst
@@ -50,6 +50,11 @@ for other ways to support the lxml project,
as well as commercial consulting, customisations and trainings on lxml and
fast Python XML processing.
+Note that we are not accepting donations in crypto currencies.
+Much of the development and hosting for lxml is done in a carbon-neutral way
+or with compensated and very low emissions.
+Crypto currencies do not fit into that ambition.
+
.. |Donate| image:: https://lxml.de/paypal_btn_donateCC_LG.png
:width: 160
:height: 47
@@ -58,7 +63,7 @@ fast Python XML processing.
.. _`doc/main.txt`: https://github.com/lxml/lxml/blob/master/doc/main.txt
.. _`INSTALL.txt`: http://lxml.de/installation.html
-`Travis-CI `_ and `AppVeyor `_
+`AppVeyor `_ and `GitHub Actions `_
support the lxml project with their build and CI servers.
Jetbrains supports the lxml project by donating free licenses of their
`PyCharm IDE `_.
@@ -69,6 +74,18 @@ Another supporter of the lxml project is
Project income report
---------------------
+* Total project income in 2021: EUR 4890.37 (407.53 € / month)
+
+ - Tidelift: EUR 4066.66
+ - Paypal: EUR 223.71
+ - other: EUR 600.00
+
+* Total project income in 2020: EUR 6065,86 (506.49 € / month)
+
+ - Tidelift: EUR 4064.77
+ - Paypal: EUR 1401.09
+ - other: EUR 600.00
+
* Total project income in 2019: EUR 717.52 (59.79 € / month)
- Tidelift: EUR 360.30
diff --git a/appveyor.yml b/appveyor.yml
index b8d7a72db..344019035 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,11 +1,16 @@
version: 1.0.{build}
+image: Visual Studio 2019
environment:
matrix:
+ - python: 310
+ - python: 310-x64
- python: 39
- python: 39-x64
- python: 27
+ APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2013
- python: 27-x64
+ APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2013
- python: 38
- python: 38-x64
- python: 37
@@ -14,6 +19,9 @@ environment:
- python: 36-x64
- python: 35
- python: 35-x64
+ - python: 310
+ arch: arm64
+ env: STATIC_DEPS=true
- python: 39
arch: arm64
env: STATIC_DEPS=true
diff --git a/benchmark/bench_etree.py b/benchmark/bench_etree.py
index 0f66db8e9..69ac5208e 100644
--- a/benchmark/bench_etree.py
+++ b/benchmark/bench_etree.py
@@ -1,9 +1,10 @@
import copy
+from io import BytesIO
from itertools import *
import benchbase
from benchbase import (with_attributes, with_text, onlylib,
- serialized, children, nochange, BytesIO)
+ serialized, children, nochange)
TEXT = "some ASCII text"
UTEXT = u"some klingon: \F8D2"
diff --git a/benchmark/benchbase.py b/benchmark/benchbase.py
index e34e61036..a9f9ad857 100644
--- a/benchmark/benchbase.py
+++ b/benchmark/benchbase.py
@@ -1,4 +1,4 @@
-import sys, re, string, time, copy, gc
+import sys, re, string, copy, gc
from itertools import *
import time
@@ -474,6 +474,8 @@ def main(benchmark_class):
if import_lxml:
from lxml import etree
_etrees.append(etree)
+ print("Using lxml %s (with libxml2 %s)" % (
+ etree.__version__, '.'.join(map(str, etree.LIBXML_VERSION))))
try:
sys.argv.remove('-fel')
@@ -521,6 +523,8 @@ def main(benchmark_class):
print("No library to test. Exiting.")
sys.exit(1)
+ print("Running benchmarks in Python %s" % (sys.version_info,))
+
print("Preparing test suites and trees ...")
selected = set( sys.argv[1:] )
benchmark_suites, benchmarks = \
diff --git a/buildlibxml.py b/buildlibxml.py
index f45c86086..e0c558fad 100644
--- a/buildlibxml.py
+++ b/buildlibxml.py
@@ -1,7 +1,7 @@
-import os, re, sys, subprocess
+import os, re, sys, subprocess, platform
import tarfile
from distutils import log, version
-from contextlib import closing
+from contextlib import closing, contextmanager
from ftplib import FTP
try:
@@ -26,7 +26,7 @@
# use pre-built libraries on Windows
def download_and_extract_windows_binaries(destdir):
- url = "https://github.com/mhils/libxml2-win-binaries/releases"
+ url = "https://github.com/lxml/libxml2-win-binaries/releases"
filenames = list(_list_dir_urllib(url))
release_path = "/download/%s/" % find_max_version(
@@ -38,7 +38,15 @@ def download_and_extract_windows_binaries(destdir):
if release_path in filename
]
- arch = "win64" if sys.maxsize > 2**32 else "win32"
+ # Check for native ARM64 build or the environment variable that is set by
+ # Visual Studio for cross-compilation (same variable as setuptools uses)
+ if platform.machine() == 'ARM64' or os.getenv('VSCMD_ARG_TGT_ARCH') == 'arm64':
+ arch = "win-arm64"
+ elif sys.maxsize > 2**32:
+ arch = "win64"
+ else:
+ arch = "win32"
+
if sys.version_info < (3, 5):
arch = 'vs2008.' + arch
@@ -114,7 +122,8 @@ def get_prebuilt_libxml2xslt(download_dir, static_include_dirs, static_library_d
## Routines to download and build libxml2/xslt from sources:
-LIBXML2_LOCATION = 'http://xmlsoft.org/sources/'
+LIBXML2_LOCATION = 'https://download.gnome.org/sources/libxml2/'
+LIBXSLT_LOCATION = 'https://download.gnome.org/sources/libxslt/'
LIBICONV_LOCATION = 'https://ftp.gnu.org/pub/gnu/libiconv/'
ZLIB_LOCATION = 'https://zlib.net/'
match_libfile_version = re.compile('^[^-]*-([.0-9-]+)[.].*').match
@@ -169,6 +178,21 @@ def _list_dir_urllib(url):
return files
+def http_find_latest_version_directory(url):
+ with closing(urlopen(url)) as res:
+ charset = _find_content_encoding(res)
+ data = res.read()
+ # e.g.
+ directories = [
+ (int(v[0]), int(v[1]))
+ for v in re.findall(r' href=["\']([0-9]+)\.([0-9]+)/?["\']', data.decode(charset))
+ ]
+ if not directories:
+ return url
+ latest_dir = "%s.%s" % max(directories)
+ return urljoin(url, latest_dir) + "/"
+
+
def http_listfiles(url, re_pattern):
with closing(urlopen(url)) as res:
charset = _find_content_encoding(res)
@@ -188,7 +212,7 @@ def parse_text_ftplist(s):
def parse_html_filelist(s):
re_href = re.compile(
- r']*\s+)?href=["\']([^;?"\']+?)[;?"\']',
+ r''']*\shref=["']([^;?"']+?)[;?"']''',
re.I|re.M)
links = set(re_href.findall(s))
for link in links:
@@ -203,21 +227,40 @@ def tryint(s):
return s
+@contextmanager
+def py2_tarxz(filename):
+ import tempfile
+ with tempfile.TemporaryFile() as tmp:
+ subprocess.check_call(["xz", "-dc", filename], stdout=tmp.fileno())
+ tmp.seek(0)
+ with closing(tarfile.TarFile(fileobj=tmp)) as tf:
+ yield tf
+
+
def download_libxml2(dest_dir, version=None):
"""Downloads libxml2, returning the filename where the library was downloaded"""
#version_re = re.compile(r'LATEST_LIBXML2_IS_([0-9.]+[0-9](?:-[abrc0-9]+)?)')
- version_re = re.compile(r'libxml2-([0-9.]+[0-9]).tar.gz')
- filename = 'libxml2-%s.tar.gz'
- return download_library(dest_dir, LIBXML2_LOCATION, 'libxml2',
+ version_re = re.compile(r'libxml2-([0-9.]+[0-9]).tar.xz')
+ filename = 'libxml2-%s.tar.xz'
+
+ if version == "2.9.12":
+ # Temporarily using the latest master (2.9.12+) until there is a release that supports lxml again.
+ from_location = "https://gitlab.gnome.org/GNOME/libxml2/-/archive/dea91c97debeac7c1aaf9c19f79029809e23a353/"
+ version = "dea91c97debeac7c1aaf9c19f79029809e23a353"
+ else:
+ from_location = http_find_latest_version_directory(LIBXML2_LOCATION)
+
+ return download_library(dest_dir, from_location, 'libxml2',
version_re, filename, version=version)
def download_libxslt(dest_dir, version=None):
"""Downloads libxslt, returning the filename where the library was downloaded"""
#version_re = re.compile(r'LATEST_LIBXSLT_IS_([0-9.]+[0-9](?:-[abrc0-9]+)?)')
- version_re = re.compile(r'libxslt-([0-9.]+[0-9]).tar.gz')
- filename = 'libxslt-%s.tar.gz'
- return download_library(dest_dir, LIBXML2_LOCATION, 'libxslt',
+ version_re = re.compile(r'libxslt-([0-9.]+[0-9]).tar.xz')
+ filename = 'libxslt-%s.tar.xz'
+ from_location = http_find_latest_version_directory(LIBXSLT_LOCATION)
+ return download_library(dest_dir, from_location, 'libxslt',
version_re, filename, version=version)
@@ -263,6 +306,7 @@ def download_library(dest_dir, location, name, version_re, filename, version=Non
if location.startswith('ftp://'):
fns = remote_listdir(location)
else:
+ print(location)
fns = http_listfiles(location, '(%s)' % filename.replace('%s', '(?:[0-9.]+[0-9])'))
version = find_max_version(name, fns, version_re)
except IOError:
@@ -297,16 +341,21 @@ def download_library(dest_dir, location, name, version_re, filename, version=Non
def unpack_tarball(tar_filename, dest):
print('Unpacking %s into %s' % (os.path.basename(tar_filename), dest))
- tar = tarfile.open(tar_filename)
+ if sys.version_info[0] < 3 and tar_filename.endswith('.xz'):
+ # Py 2.7 lacks lzma support
+ tar_cm = py2_tarxz(tar_filename)
+ else:
+ tar_cm = closing(tarfile.open(tar_filename))
+
base_dir = None
- for member in tar:
- base_name = member.name.split('/')[0]
- if base_dir is None:
- base_dir = base_name
- elif base_dir != base_name:
- print('Unexpected path in %s: %s' % (tar_filename, base_name))
- tar.extractall(dest)
- tar.close()
+ with tar_cm as tar:
+ for member in tar:
+ base_name = member.name.split('/')[0]
+ if base_dir is None:
+ base_dir = base_name
+ elif base_dir != base_name:
+ print('Unexpected path in %s: %s' % (tar_filename, base_name))
+ tar.extractall(dest)
return os.path.join(dest, base_dir)
@@ -435,8 +484,20 @@ def has_current_lib(name, build_dir, _build_all_following=[False]):
except Exception:
pass # this isn't required, so ignore any errors
if not has_current_lib("libxml2", libxml2_dir):
+ if not os.path.exists(os.path.join(libxml2_dir, "configure")):
+ # Allow building from git sources by running autoconf etc.
+ libxml2_configure_cmd[0] = "./autogen.sh"
cmmi(libxml2_configure_cmd, libxml2_dir, multicore, **call_setup)
+ # Fix up libxslt configure script (needed up to and including 1.1.34)
+ # https://gitlab.gnome.org/GNOME/libxslt/-/commit/90c34c8bb90e095a8a8fe8b2ce368bd9ff1837cc
+ with open(os.path.join(libxslt_dir, "configure"), 'rb') as f:
+ config_script = f.read()
+ if b' --libs print ' in config_script:
+ config_script = config_script.replace(b' --libs print ', b' --libs ')
+ with open(os.path.join(libxslt_dir, "configure"), 'wb') as f:
+ f.write(config_script)
+
# build libxslt
libxslt_configure_cmd = configure_cmd + [
'--without-python',
diff --git a/doc/FAQ.txt b/doc/FAQ.txt
index 24ec8c42e..caf6edf81 100644
--- a/doc/FAQ.txt
+++ b/doc/FAQ.txt
@@ -117,11 +117,11 @@ wrote a nice article about high-performance aspects when `parsing
large files with lxml`_.
.. _`lxml.etree Tutorial`: tutorial.html
-.. _`tutorial for ElementTree`: https://effbot.org/zone/element.htm
+.. _`tutorial for ElementTree`: https://web.archive.org/web/20200720191942/https://effbot.org/zone/element.htm
.. _`extended etree API`: api.html
.. _`objectify documentation`: objectify.html
-.. _`Python XML processing with lxml`: http://www.nmt.edu/tcc/help/pubs/pylxml/
-.. _`element library`: https://effbot.org/zone/element-lib.htm
+.. _`Python XML processing with lxml`: https://web.archive.org/web/20190522191656/http://infohost.nmt.edu/tcc/help/pubs/pylxml/web/index.html
+.. _`element library`: https://web.archive.org/web/20200703234431/http://www.effbot.org/zone/element-lib.htm
.. _`parsing large files with lxml`: http://www.ibm.com/developerworks/xml/library/x-hiperfparse/
@@ -143,7 +143,7 @@ web page`_.
The `generated API documentation`_ is a comprehensive API reference
for the lxml package.
-.. _`ElementTree API`: https://effbot.org/zone/element-index.htm
+.. _`ElementTree API`: https://web.archive.org/web/20200703191710/http://www.effbot.org/zone/element-index.htm
.. _`the web page`: https://lxml.de/#documentation
.. _`generated API documentation`: api/index.html
@@ -431,10 +431,10 @@ Which version of libxml2 and libxslt should I use or require?
It really depends on your application, but the rule of thumb is: more recent
versions contain less bugs and provide more features.
-* Do not use libxml2 2.6.27 if you want to use XPath (including XSLT). You
- will get crashes when XPath errors occur during the evaluation (e.g. for
- unknown functions). This happens inside the evaluation call to libxml2, so
- there is nothing that lxml can do about it.
+* Do not use the stock libxml2 versions 2.9.11 or 2.9.12. They are incompatible
+ with lxml and lead to excess output on serialisation. For static builds
+ against 2.9.12, lxml automatically downloads a post-release version that
+ contains a work-around.
* Try to use versions of both libraries that were released together. At least
the libxml2 version should not be older than the libxslt version.
@@ -446,10 +446,8 @@ versions contain less bugs and provide more features.
leaks were fixed over time. If you encounter crashes or memory leaks in
XPath applications, try a more recent version of libxml2.
-* For parsing and fixing broken HTML, lxml requires at least libxml2 2.6.21.
-
* For the normal tree handling, however, any libxml2 version starting with
- 2.6.20 should do.
+ 2.7.x should do.
Read the `release notes of libxml2`_ and the `release notes of libxslt`_ to
see when (or if) a specific bug has been fixed.
@@ -683,7 +681,7 @@ Since as a user of lxml you are likely a programmer, you might find
`this article on bug reports`_ an interesting read.
.. _`bug tracker`: https://bugs.launchpad.net/lxml/
-.. _`mailing list`: http://lxml.de/mailinglist/
+.. _`mailing list`: https://lxml.de/mailinglist/
.. _`this article on bug reports`: http://www.chiark.greenend.org.uk/~sgtatham/bugs.html
@@ -862,7 +860,7 @@ for possible approaches to solve your specific problem:
Remember that lxml is fast anyway, so concurrency may not even be worth it.
* look out for fancy XSLT stuff like foreign document access or
- passing in subtrees trough XSLT variables. This might or might not
+ passing in subtrees through XSLT variables. This might or might not
work, depending on your specific usage. Again, later versions of
lxml and libxslt provide safer support here.
@@ -1239,8 +1237,8 @@ Element. Its children will then inherit this prefix for serialization.
How can I specify a default namespace for XPath expressions?
------------------------------------------------------------
-You can't. In XPath, there is no such thing as a default namespace. Just use
-an arbitrary prefix and let the namespace dictionary of the XPath evaluators
+You can't. In XPath 1.0, there is no such thing as a default namespace. Just
+use an arbitrary prefix and let the namespace dictionary of the XPath evaluators
map it to your namespace. See also the question above.
diff --git a/doc/api/conf.py b/doc/api/conf.py
index 75aa2817d..7c5f134d2 100644
--- a/doc/api/conf.py
+++ b/doc/api/conf.py
@@ -46,6 +46,7 @@
autodoc_default_options = {
'ignore-module-all': True,
'private-members': True,
+ 'inherited-members': True,
}
autodoc_member_order = 'groupwise'
diff --git a/doc/build.txt b/doc/build.txt
index 8d375f7f5..33ab0455f 100644
--- a/doc/build.txt
+++ b/doc/build.txt
@@ -47,9 +47,8 @@ working Cython installation. You can use pip_ to install it::
https://github.com/lxml/lxml/blob/master/requirements.txt
-lxml currently requires at least Cython 0.26.1, later release versions
-should work as well. For Python 3.7 support, at least Cython 0.29 is
-required.
+lxml currently requires at least Cython 0.29. Later release versions
+are generally preferred.
Github, git and hg
@@ -179,7 +178,7 @@ like to know. Please contact us on the `mailing list`_, and please specify
the version of lxml, libxml2, libxslt and Python you were using, as well as
your operating system type (Linux, Windows, MacOS-X, ...).
-.. _`mailing list`: http://lxml.de/mailinglist/
+.. _`mailing list`: https://lxml.de/mailinglist/
Building an egg or wheel
@@ -266,8 +265,8 @@ subdirectory ``libs`` in the lxml distribution, and call ``setup.py``
with the desired target versions like this::
python setup.py build --static-deps \
- --libxml2-version=2.9.1 \
- --libxslt-version=1.1.28 \
+ --libxml2-version=2.9.12 \
+ --libxslt-version=1.1.34 \
sudo python setup.py install
diff --git a/doc/capi.txt b/doc/capi.txt
index 0167a5a4e..0471d811e 100644
--- a/doc/capi.txt
+++ b/doc/capi.txt
@@ -7,11 +7,10 @@ C extensions to efficiently access public functions and classes of lxml,
without going through the Python API.
The API is described in the file `etreepublic.pxd`_, which is directly
-c-importable by extension modules implemented in Pyrex_ or Cython_.
+c-importable by extension modules implemented in Cython_.
.. _`etreepublic.pxd`: https://github.com/lxml/lxml/blob/master/src/lxml/includes/etreepublic.pxd
-.. _Cython: http://cython.org
-.. _Pyrex: http://www.cosc.canterbury.ac.nz/~greg/python/Pyrex/
+.. _Cython: https://cython.org
.. contents::
..
@@ -45,7 +44,7 @@ Writing external modules in Cython
----------------------------------
This is the easiest way of extending lxml at the C level. A Cython_
-(or Pyrex_) module should start like this::
+module should start like this::
# My Cython extension
diff --git a/doc/compatibility.txt b/doc/compatibility.txt
index e23d18171..654cb7c4e 100644
--- a/doc/compatibility.txt
+++ b/doc/compatibility.txt
@@ -146,11 +146,11 @@ ElementTree. Nonetheless, some differences and incompatibilities exist:
not. This means that a comment text "text" that ElementTree serializes as
"" will become "" in lxml.
-* When the string '*' is used as tag filter in the ``Element.getiterator()``
- method, ElementTree returns all elements in the tree, including comments and
- processing instructions. lxml.etree only returns real Elements, i.e. tree
- nodes that have a string tag name. Without a filter, both libraries iterate
- over all nodes.
+* When the string ``'*'`` is used as tag filter in the ``Element.iter()`` and
+ ``.find*()`` methods, ElementTree returns all elements in the tree, including
+ comments and processing instructions. lxml.etree only returns real Elements,
+ i.e. tree nodes that have a string tag name. Without a filter, both libraries
+ iterate over all nodes.
Note that currently only lxml.etree supports passing the ``Element`` factory
function as filter to select only Elements. Both libraries support passing
diff --git a/doc/element_classes.txt b/doc/element_classes.txt
index 4b1e72e8e..759ad7d51 100644
--- a/doc/element_classes.txt
+++ b/doc/element_classes.txt
@@ -600,6 +600,8 @@ a name (or ``None``) as argument and can then be used as decorator.
If the class has the same name as the tag, you can also leave out the call
and use the blank decorator instead:
+.. sourcecode:: pycon
+
>>> @honk_elements
... class honkel(HonkNSElement):
... @property
diff --git a/doc/html/flattr-badge-large.png b/doc/html/flattr-badge-large.png
deleted file mode 100644
index 110530585..000000000
Binary files a/doc/html/flattr-badge-large.png and /dev/null differ
diff --git a/doc/html/style.css b/doc/html/style.css
index 4cc454aac..7d1b0e675 100644
--- a/doc/html/style.css
+++ b/doc/html/style.css
@@ -164,7 +164,7 @@ div.banner {
border: 2px solid darkred;
color: darkgreen;
line-height: 1em;
- margin: 1ex;
+ margin: 3ex 1ex 1ex;
padding: 3pt;
}
@@ -321,6 +321,18 @@ html > .pagequote {
position: fixed;
}
+div.admonition {
+ border: solid 1px;
+ border-radius: 1ex;
+ margin: 0.5ex;
+ padding: 0.5ex 1.5ex 0.5ex 1.5ex;
+ background: lightyellow;
+}
+
+div.admonition > .admonition-title {
+ background: yellow;
+}
+
code {
color: Black;
background-color: #f0f0f0;
diff --git a/doc/licenses/ZopePublicLicense.txt b/doc/licenses/ZopePublicLicense.txt
deleted file mode 100644
index 44e0648b3..000000000
--- a/doc/licenses/ZopePublicLicense.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-Zope Public License (ZPL) Version 2.0
------------------------------------------------
-
-This software is Copyright (c) Zope Corporation (tm) and
-Contributors. All rights reserved.
-
-This license has been certified as open source. It has also
-been designated as GPL compatible by the Free Software
-Foundation (FSF).
-
-Redistribution and use in source and binary forms, with or
-without modification, are permitted provided that the
-following conditions are met:
-
-1. Redistributions in source code must retain the above
- copyright notice, this list of conditions, and the following
- disclaimer.
-
-2. Redistributions in binary form must reproduce the above
- copyright notice, this list of conditions, and the following
- disclaimer in the documentation and/or other materials
- provided with the distribution.
-
-3. The name Zope Corporation (tm) must not be used to
- endorse or promote products derived from this software
- without prior written permission from Zope Corporation.
-
-4. The right to distribute this software or to use it for
- any purpose does not give you the right to use Servicemarks
- (sm) or Trademarks (tm) of Zope Corporation. Use of them is
- covered in a separate agreement (see
- http://www.zope.com/Marks).
-
-5. If any files are modified, you must cause the modified
- files to carry prominent notices stating that you changed
- the files and the date of any change.
-
-Disclaimer
-
- THIS SOFTWARE IS PROVIDED BY ZOPE CORPORATION ``AS IS''
- AND ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT
- NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
- AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
- NO EVENT SHALL ZOPE CORPORATION OR ITS CONTRIBUTORS BE
- LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
- DAMAGE.
-
-
-This software consists of contributions made by Zope
-Corporation and many individuals on behalf of Zope
-Corporation. Specific attributions are listed in the
-accompanying credits file.
diff --git a/doc/lxml-source-howto.txt b/doc/lxml-source-howto.txt
index 327eae8c7..9cef1f7ba 100644
--- a/doc/lxml-source-howto.txt
+++ b/doc/lxml-source-howto.txt
@@ -13,7 +13,7 @@ This document describes how to read the source code of lxml_ and how
to start working on it. You might also be interested in the companion
document that describes `how to build lxml from sources`_.
-.. _lxml: http://lxml.de/
+.. _lxml: https://lxml.de/
.. _`how to build lxml from sources`: build.html
.. _`ReStructured Text`: http://docutils.sourceforge.net/rst.html
.. _epydoc: http://epydoc.sourceforge.net/
diff --git a/doc/lxmlhtml.txt b/doc/lxmlhtml.txt
index 9827ed9f2..3c7393be6 100644
--- a/doc/lxmlhtml.txt
+++ b/doc/lxmlhtml.txt
@@ -489,8 +489,13 @@ The module ``lxml.html.clean`` provides a ``Cleaner`` class for cleaning up
HTML pages. It supports removing embedded or script content, special tags,
CSS style annotations and much more.
-Say, you have an evil web page from an untrusted source that contains lots of
-content that upsets browsers and tries to run evil code on the client side:
+Note: the HTML Cleaner in ``lxml.html.clean`` is **not** considered
+appropriate **for security sensitive environments**.
+See e.g. `bleach `_ for an alternative.
+
+Say, you have an overburdened web page from a hideous source which contains
+lots of content that upsets browsers and tries to run unnecessary code on the
+client side:
.. sourcecode:: pycon
@@ -521,7 +526,7 @@ content that upsets browsers and tries to run evil code on the client side:
...