From 0fdf179391c5d926dc75a7dec2604cd3021c798f Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 29 Mar 2024 08:46:55 +0100 Subject: [PATCH 001/279] Build: Increase minimum CPU architecture of Linux wheels from core2 to sandybridge (launched 2011, adds e.g. SSE4.2, AVX, PCLMUL, POPCNT). --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e7f6bb66c..ba413dabd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ archs = ["x86_64", "aarch64", "i686"] repair-wheel-command = "auditwheel repair --strip -w {dest_dir} {wheel}" [tool.cibuildwheel.linux.environment] -CFLAGS = "-O3 -g1 -pipe -fPIC -flto -march=core2" +CFLAGS = "-O3 -g1 -pipe -fPIC -flto -march=sandybridge" AR = "gcc-ar" NM = "gcc-nm" RANLIB = "gcc-ranlib" From a37fac99f93447f0096e917c7286c1dad2c829b5 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 29 Mar 2024 08:57:38 +0100 Subject: [PATCH 002/279] Build: Add a build step for additional Linux wheels with manylinux 2.28 and gcc 12. --- .github/workflows/wheels.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 53e08d08d..1a8ea234f 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -143,6 +143,23 @@ jobs: with: only: ${{ matrix.only }} + - name: Build faster Linux wheels + # also build wheels with the most recent manylinux images and gcc + if: runner.os == 'Linux' + uses: pypa/cibuildwheel@v2.16.5 + env: + CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 + CIBW_MANYLINUX_I686_IMAGE: manylinux_2_28 + CIBW_MANYLINUX_PYPY_X86_64_IMAGE: manylinux_2_28 + CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28 + CIBW_MANYLINUX_PYPY_AARCH64_IMAGE: manylinux_2_28 + CIBW_MANYLINUX_PYPY_I686_IMAGE: manylinux_2_28 + CIBW_MUSLLINUX_X86_64_IMAGE: musllinux_1_2 + CIBW_MUSLLINUX_I686_IMAGE: musllinux_1_2 + CIBW_MUSLLINUX_AARCH64_IMAGE: musllinux_1_2 + with: + only: ${{ matrix.only }} + - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 with: path: ./wheelhouse/*.whl From 221e19e9fef0c4171202eb0226e21b49aef89958 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 29 Mar 2024 08:57:57 +0100 Subject: [PATCH 003/279] Build: Fix directory name. --- .github/workflows/wheels.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 1a8ea234f..1d0e5a489 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -181,7 +181,7 @@ jobs: merge-multiple: true - name: List downloaded artifacts - run: ls -la ~/downloads + run: ls -la ./wheel_upload - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 with: From c99b253ed45250ed8d17cc6360b4af9a5b364008 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 29 Mar 2024 09:14:05 +0100 Subject: [PATCH 004/279] Build: Release sdist and wheels together. --- .github/workflows/wheels.yml | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 1d0e5a489..5402842af 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -59,12 +59,6 @@ jobs: run: make html sdist env: { STATIC_DEPS: false; CFLAGS="-Og" } # it's run-once, so build more quickly - - name: Release - uses: softprops/action-gh-release@v2 - if: github.ref_type == 'tag' - with: - files: dist/*.tar.gz - - name: Upload sdist uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 with: @@ -167,29 +161,31 @@ jobs: upload_release_assets: name: Upload Release Assets - needs: [ build_wheels ] + needs: [ sdist, build_wheels ] runs-on: ubuntu-latest permissions: contents: write steps: - - name: Download bdist files + - name: Download artifacts uses: actions/download-artifact@eaceaf801fd36c7dee90939fad912460b18a1ffe # v4.1.2 with: - path: ./wheel_upload + path: ./release_upload merge-multiple: true - name: List downloaded artifacts - run: ls -la ./wheel_upload + run: ls -la ./release_upload - uses: actions/upload-artifact@5d5d22a31266ced268874388b861e4b58bb5c2f3 # v4.3.1 with: - path: ./wheel_upload/*.whl + path: ./release_upload/*.whl name: all_wheels - name: Release uses: softprops/action-gh-release@v2 if: github.ref_type == 'tag' with: - files: ./wheel_upload/*.whl + files: | + ./release_upload/*.whl + ./release_upload/*.tar.gz From 1acc1b95d73a41f071cb1516fbf3c620d2a50293 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 29 Mar 2024 09:18:46 +0100 Subject: [PATCH 005/279] Build: See if a more recent cibuildwheel allows easier manylinux image selection. --- .github/workflows/wheels.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 5402842af..c2591673f 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -133,14 +133,14 @@ jobs: platforms: all - name: Build wheels - uses: pypa/cibuildwheel@v2.16.5 + uses: pypa/cibuildwheel@v2.17.0 with: only: ${{ matrix.only }} - name: Build faster Linux wheels # also build wheels with the most recent manylinux images and gcc if: runner.os == 'Linux' - uses: pypa/cibuildwheel@v2.16.5 + uses: pypa/cibuildwheel@v2.17.0 env: CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 CIBW_MANYLINUX_I686_IMAGE: manylinux_2_28 From f2d880d3d5c652e6d19950881dd08dbe0e1b7024 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 29 Mar 2024 11:08:28 +0100 Subject: [PATCH 006/279] Build: manylinux_2_28 has no i686 images. --- .github/workflows/wheels.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index c2591673f..eef8abfb1 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -139,15 +139,15 @@ jobs: - name: Build faster Linux wheels # also build wheels with the most recent manylinux images and gcc - if: runner.os == 'Linux' + if: runner.os == 'Linux' && !contains(matrix.only, 'i686') uses: pypa/cibuildwheel@v2.17.0 env: CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 CIBW_MANYLINUX_I686_IMAGE: manylinux_2_28 - CIBW_MANYLINUX_PYPY_X86_64_IMAGE: manylinux_2_28 CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28 - CIBW_MANYLINUX_PYPY_AARCH64_IMAGE: manylinux_2_28 + CIBW_MANYLINUX_PYPY_X86_64_IMAGE: manylinux_2_28 CIBW_MANYLINUX_PYPY_I686_IMAGE: manylinux_2_28 + CIBW_MANYLINUX_PYPY_AARCH64_IMAGE: manylinux_2_28 CIBW_MUSLLINUX_X86_64_IMAGE: musllinux_1_2 CIBW_MUSLLINUX_I686_IMAGE: musllinux_1_2 CIBW_MUSLLINUX_AARCH64_IMAGE: musllinux_1_2 From eee57ec2cebe20c95b01d699d48733e85f2d87af Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 29 Mar 2024 11:16:30 +0100 Subject: [PATCH 007/279] Build: Add more architectures to wheel build. --- .github/workflows/wheels.yml | 7 ++++--- pyproject.toml | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index eef8abfb1..6a052afac 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -143,14 +143,15 @@ jobs: uses: pypa/cibuildwheel@v2.17.0 env: CIBW_MANYLINUX_X86_64_IMAGE: manylinux_2_28 - CIBW_MANYLINUX_I686_IMAGE: manylinux_2_28 CIBW_MANYLINUX_AARCH64_IMAGE: manylinux_2_28 + CIBW_MANYLINUX_PPC64LE_IMAGE: manylinux_2_28 + CIBW_MANYLINUX_S390X_IMAGE: manylinux_2_28 CIBW_MANYLINUX_PYPY_X86_64_IMAGE: manylinux_2_28 - CIBW_MANYLINUX_PYPY_I686_IMAGE: manylinux_2_28 CIBW_MANYLINUX_PYPY_AARCH64_IMAGE: manylinux_2_28 CIBW_MUSLLINUX_X86_64_IMAGE: musllinux_1_2 - CIBW_MUSLLINUX_I686_IMAGE: musllinux_1_2 CIBW_MUSLLINUX_AARCH64_IMAGE: musllinux_1_2 + CIBW_MUSLLINUX_PPC64LE_IMAGE: manylinux_2_28 + CIBW_MUSLLINUX_S390X_IMAGE: manylinux_2_28 with: only: ${{ matrix.only }} diff --git a/pyproject.toml b/pyproject.toml index ba413dabd..b7a831350 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,11 +4,11 @@ requires = ["Cython>=3.0.9", "setuptools", "wheel"] [tool.cibuildwheel] build-verbosity = 2 environment = {STATIC_DEPS="true", LIBXML2_VERSION = "2.12.6", LIBXSLT_VERSION = "1.1.39"} -skip = ["pp*-manylinux_aarch64", "pp*-manylinux_i686", "*-musllinux_i686"] +skip = ["pp*-manylinux_i686", "*-musllinux_i686"] # test-command = "python {package}/test.py -p -v" [tool.cibuildwheel.linux] -archs = ["x86_64", "aarch64", "i686"] +archs = ["x86_64", "aarch64", "i686", "ppc64le", "s390x"] repair-wheel-command = "auditwheel repair --strip -w {dest_dir} {wheel}" [tool.cibuildwheel.linux.environment] From 89cb64ed50b7b2d34378e32f06ac99b48a20945c Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 29 Mar 2024 11:59:33 +0100 Subject: [PATCH 008/279] Build: Fix non-x86 CFLAGS. --- pyproject.toml | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index b7a831350..08c579945 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ archs = ["x86_64", "aarch64", "i686", "ppc64le", "s390x"] repair-wheel-command = "auditwheel repair --strip -w {dest_dir} {wheel}" [tool.cibuildwheel.linux.environment] -CFLAGS = "-O3 -g1 -pipe -fPIC -flto -march=sandybridge" +CFLAGS = "-O3 -g1 -pipe -fPIC -flto -mtune=generic" AR = "gcc-ar" NM = "gcc-nm" RANLIB = "gcc-ranlib" @@ -21,9 +21,20 @@ STATIC_DEPS = "true" LIBXML2_VERSION = "2.12.6" LIBXSLT_VERSION = "1.1.39" +[[tool.cibuildwheel.overrides]] +select = "*linux_i686" +inherit.environment = "append" +environment.CFLAGS="-O3 -g1 -pipe -fPIC -flto -march=sandybridge" + +[[tool.cibuildwheel.overrides]] +select = "*linux_x86_64" +inherit.environment = "append" +environment.CFLAGS="-O3 -g1 -pipe -fPIC -flto -march=sandybridge" + [[tool.cibuildwheel.overrides]] select = "*aarch64" -environment = {CFLAGS = "-O3 -g1 -pipe -fPIC -flto -march=armv8-a -mtune=cortex-a72", AR = "gcc-ar", NM = "gcc-nm", RANLIB = "gcc-ranlib", LDFLAGS = "-flto", STATIC_DEPS = "true", LIBXML2_VERSION = "2.12.6", LIBXSLT_VERSION = "1.1.39" } +inherit.environment = "append" +environment.CFLAGS = "-O3 -g1 -pipe -fPIC -flto -march=armv8-a -mtune=cortex-a72" [tool.cibuildwheel.windows] archs = ["AMD64", "x86"] From 89f2e6a01fe752c3385077c2167166c1ffcbdfb5 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 29 Mar 2024 16:39:47 +0100 Subject: [PATCH 009/279] Build: Remove platforms that fail to build and apparently are not trivial to build. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 08c579945..55b5555ba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ skip = ["pp*-manylinux_i686", "*-musllinux_i686"] # test-command = "python {package}/test.py -p -v" [tool.cibuildwheel.linux] -archs = ["x86_64", "aarch64", "i686", "ppc64le", "s390x"] +archs = ["x86_64", "aarch64", "i686"] # , "ppc64le", "s390x"] repair-wheel-command = "auditwheel repair --strip -w {dest_dir} {wheel}" [tool.cibuildwheel.linux.environment] From 10bf9b7a609ffa1ca916bf778774d845efb4ea1b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 29 Mar 2024 16:42:41 +0100 Subject: [PATCH 010/279] Build(deps): Bump the github-actions group with 2 updates (GH-415) Bumps the github-actions group with 2 updates: [actions/cache](https://github.com/actions/cache) and [actions/download-artifact](https://github.com/actions/download-artifact). Updates `actions/cache` from 4.0.0 to 4.0.2 - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/13aacd865c20de90d75de3b17ebe84f7a17d57d2...0c45773b623bea8c8e75f6c82b208c3cf94ea4f9) Updates `actions/download-artifact` from 4.1.2 to 4.1.4 - [Release notes](https://github.com/actions/download-artifact/releases) - [Commits](https://github.com/actions/download-artifact/compare/eaceaf801fd36c7dee90939fad912460b18a1ffe...c850b930e6ba138125429b7e5c93fc707a7f8427) --- .github/workflows/ci.yml | 4 ++-- .github/workflows/wheels.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 78a9801ed..15314a4fc 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -174,7 +174,7 @@ jobs: key: ${{ runner.os }}-ccache${{ matrix.extra_hash }}-${{ matrix.python-version }}-${{ hashFiles('.github/workflows/ci.yml', 'tools/ci-run.sh') }} - name: Cache [libs] - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 if: matrix.env.STATIC_DEPS with: path: | @@ -221,7 +221,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Collect wheels - uses: actions/download-artifact@eaceaf801fd36c7dee90939fad912460b18a1ffe # v4.1.2 + uses: actions/download-artifact@c850b930e6ba138125429b7e5c93fc707a7f8427 # v4.1.4 with: path: ~/downloads merge-multiple: true diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml index 6a052afac..a12b47f32 100644 --- a/.github/workflows/wheels.yml +++ b/.github/workflows/wheels.yml @@ -118,7 +118,7 @@ jobs: uses: actions/checkout@v4 - name: Cache [libs] - uses: actions/cache@13aacd865c20de90d75de3b17ebe84f7a17d57d2 # v4.0.0 + uses: actions/cache@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 with: path: | libs/*.xz @@ -170,7 +170,7 @@ jobs: steps: - name: Download artifacts - uses: actions/download-artifact@eaceaf801fd36c7dee90939fad912460b18a1ffe # v4.1.2 + uses: actions/download-artifact@c850b930e6ba138125429b7e5c93fc707a7f8427 # v4.1.4 with: path: ./release_upload merge-multiple: true From 06b70c3e047e405a94e82f95bc9ff8ed03d00892 Mon Sep 17 00:00:00 2001 From: Stefan Behnel Date: Fri, 29 Mar 2024 20:55:45 +0100 Subject: [PATCH 011/279] Set master version to 5.2.0a0. --- src/lxml/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lxml/__init__.py b/src/lxml/__init__.py index 4c91c5b72..6c08eacf0 100644 --- a/src/lxml/__init__.py +++ b/src/lxml/__init__.py @@ -1,6 +1,6 @@ # this is a package -__version__ = "5.1.1" +__version__ = "5.2.0a0" def get_include(): From 73778681f14359fe6d16644e69aaca276eba525a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lum=C3=ADr=20=27Frenzy=27=20Balhar?= Date: Fri, 29 Mar 2024 21:01:38 +0100 Subject: [PATCH 012/279] Use html.clean from external project and provide "html_clean" extra dependency (GH-406) Following the discussion in https://bugs.launchpad.net/lxml/+bug/1958539, "lxml.html.clean" is now extracted into a separate project "lxml_html_clean": * Github: https://github.com/fedora-python/lxml_html_clean * PyPI: https://pypi.org/project/lxml-html-clean/ * Documentation: https://lxml-html-clean.readthedocs.io/en/latest/ The module is available as an "extra" setuptools dependency "lxml[html_clean]", so that: * Projects that use lxml without lxml.html.clean will continue to use it without any difference. Except they won't have potentially vulnerable code installed. * Projects that need lxml.html.clean will need to switch their requirements from lxml to lxml[html_clean]. The new package is added as a test dependency to continue to test the compatibility between the two projects. Closes https://bugs.launchpad.net/lxml/+bug/1958539 --- setup.py | 1 + setupinfo.py | 1 - src/lxml/html/clean.py | 787 +---------------------- src/lxml/html/tests/test_autolink.py | 10 - src/lxml/html/tests/test_autolink.txt | 79 --- src/lxml/html/tests/test_basic.txt | 1 - src/lxml/html/tests/test_clean.py | 313 --------- src/lxml/html/tests/test_clean.txt | 221 ------- src/lxml/html/tests/test_clean_embed.txt | 39 -- tools/ci-run.sh | 2 +- tox.ini | 2 + 11 files changed, 22 insertions(+), 1434 deletions(-) delete mode 100644 src/lxml/html/tests/test_autolink.py delete mode 100644 src/lxml/html/tests/test_autolink.txt delete mode 100644 src/lxml/html/tests/test_clean.py delete mode 100644 src/lxml/html/tests/test_clean.txt delete mode 100644 src/lxml/html/tests/test_clean_embed.txt diff --git a/setup.py b/setup.py index 6d7bd18df..c440c10a3 100644 --- a/setup.py +++ b/setup.py @@ -78,6 +78,7 @@ def static_env_list(name, separator=None): 'cssselect': 'cssselect>=0.7', 'html5': 'html5lib', 'htmlsoup': 'BeautifulSoup4', + 'html_clean': 'lxml_html_clean', } extra_options.update(setupinfo.extra_setup_args()) diff --git a/setupinfo.py b/setupinfo.py index 43e283fcc..97e339909 100644 --- a/setupinfo.py +++ b/setupinfo.py @@ -20,7 +20,6 @@ "lxml.builder", "lxml._elementpath", "lxml.html.diff", - "lxml.html.clean", "lxml.sax", ] HEADER_FILES = ['etree.h', 'etree_api.h'] diff --git a/src/lxml/html/clean.py b/src/lxml/html/clean.py index fdc96ab4c..d4b9e96d8 100644 --- a/src/lxml/html/clean.py +++ b/src/lxml/html/clean.py @@ -1,772 +1,21 @@ # cython: language_level=3str -"""A cleanup tool for HTML. - -Removes unwanted tags and content. See the `Cleaner` class for -details. -""" - -import copy -import re -from urllib.parse import urlsplit, unquote_plus - -from lxml import etree -from lxml.html import defs -from lxml.html import fromstring, XHTML_NAMESPACE -from lxml.html import xhtml_to_html, _transform_result - - -__all__ = ['clean_html', 'clean', 'Cleaner', 'autolink', 'autolink_html', - 'word_break', 'word_break_html'] - -# Look at http://code.sixapart.com/trac/livejournal/browser/trunk/cgi-bin/cleanhtml.pl -# Particularly the CSS cleaning; most of the tag cleaning is integrated now -# I have multiple kinds of schemes searched; but should schemes be -# whitelisted instead? -# max height? -# remove images? Also in CSS? background attribute? -# Some way to whitelist object, iframe, etc (e.g., if you want to -# allow *just* embedded YouTube movies) -# Log what was deleted and why? -# style="behavior: ..." might be bad in IE? -# Should we have something for just ? That's the worst of the -# metas. -# UTF-7 detections? Example: -# +ADw-SCRIPT+AD4-alert('XSS');+ADw-/SCRIPT+AD4- -# you don't always have to have the charset set, if the page has no charset -# and there's UTF7-like code in it. -# Look at these tests: http://htmlpurifier.org/live/smoketests/xssAttacks.php - - -# This is an IE-specific construct you can have in a stylesheet to -# run some Javascript: -_replace_css_javascript = re.compile( - r'expression\s*\(.*?\)', re.S|re.I).sub - -# Do I have to worry about @\nimport? -_replace_css_import = re.compile( - r'@\s*import', re.I).sub - -_looks_like_tag_content = re.compile( - r' safe_image_urls - -_substitute_whitespace = re.compile(r'[\s\x00-\x08\x0B\x0C\x0E-\x19]+').sub - -# FIXME: check against: http://msdn2.microsoft.com/en-us/library/ms537512.aspx -_conditional_comment_re = re.compile( - r'\[if[\s\n\r]+.*?][\s\n\r]*>', re.I|re.S) - -_find_styled_elements = etree.XPath( - "descendant-or-self::*[@style]") - -_find_external_links = etree.XPath( - ("descendant-or-self::a [normalize-space(@href) and substring(normalize-space(@href),1,1) != '#'] |" - "descendant-or-self::x:a[normalize-space(@href) and substring(normalize-space(@href),1,1) != '#']"), - namespaces={'x':XHTML_NAMESPACE}) - - -class Cleaner: - """ - Instances cleans the document of each of the possible offending - elements. The cleaning is controlled by attributes; you can - override attributes in a subclass, or set them in the constructor. - - ``scripts``: - Removes any ``