diff --git a/CHANGELOG.md b/CHANGELOG.md
index cab54816..27330072 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,11 @@
# html-validate changelog
+## 9.4.2-rc.1 (2025-03-01)
+
+### Performance Improvements
+
+- rewrite and optimize `HtmlElement.matches()` increasing performance of large documents ([9175e43](https://gitlab.com/html-validate/html-validate/commit/9175e43110b7273c9d8eca3131d9de1087c60e04)), closes [#45](https://gitlab.com/html-validate/html-validate/issues/45)
+
## 9.4.1 (2025-02-27)
### Bug Fixes
diff --git a/etc/browser.api.md b/etc/browser.api.md
index e58147dd..4989c359 100644
--- a/etc/browser.api.md
+++ b/etc/browser.api.md
@@ -510,7 +510,7 @@ export class HtmlElement extends DOMNode {
is(tagName: string): boolean;
get lastElementChild(): HtmlElement | null;
loadMeta(meta: MetaElement): void;
- matches(selector: string): boolean;
+ matches(selectorList: string): boolean;
// (undocumented)
get meta(): MetaElement | null;
// (undocumented)
diff --git a/etc/index.api.md b/etc/index.api.md
index 6dabf89f..332a4870 100644
--- a/etc/index.api.md
+++ b/etc/index.api.md
@@ -601,7 +601,7 @@ export class HtmlElement extends DOMNode {
is(tagName: string): boolean;
get lastElementChild(): HtmlElement | null;
loadMeta(meta: MetaElement): void;
- matches(selector: string): boolean;
+ matches(selectorList: string): boolean;
// (undocumented)
get meta(): MetaElement | null;
// (undocumented)
diff --git a/package-lock.json b/package-lock.json
index 9ca1c302..c1f5a1c5 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "html-validate",
- "version": "9.4.1",
+ "version": "9.4.2-rc.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "html-validate",
- "version": "9.4.1",
+ "version": "9.4.2-rc.1",
"funding": [
{
"type": "github",
diff --git a/package.json b/package.json
index cb998ef9..70a8ec4d 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "html-validate",
- "version": "9.4.1",
+ "version": "9.4.2-rc.1",
"description": "Offline html5 validator",
"keywords": [
"html",
diff --git a/src/dom/htmlelement.ts b/src/dom/htmlelement.ts
index 5537b995..b51897ee 100644
--- a/src/dom/htmlelement.ts
+++ b/src/dom/htmlelement.ts
@@ -375,25 +375,11 @@ export class HtmlElement extends DOMNode {
*
* Implementation of DOM specification of Element.matches(selectors).
*/
- public matches(selector: string): boolean {
- /* find root element */
- /* eslint-disable-next-line @typescript-eslint/no-this-alias -- false positive */
- let root: HtmlElement = this;
- while (root.parent) {
- root = root.parent;
- }
-
- /* a bit slow implementation as it finds all candidates for the selector and
- * then tests if any of them are the current element. A better
- * implementation would be to walk the selector right-to-left and test
- * ancestors. */
- for (const match of root.querySelectorAll(selector)) {
- if (match.unique === this.unique) {
- return true;
- }
- }
-
- return false;
+ public matches(selectorList: string): boolean {
+ return selectorList.split(",").some((it) => {
+ const selector = new Selector(it.trim());
+ return selector.matchElement(this);
+ });
}
public get meta(): MetaElement | null {
diff --git a/src/dom/pseudoclass/scope.ts b/src/dom/pseudoclass/scope.ts
index 89371d0f..34f41800 100644
--- a/src/dom/pseudoclass/scope.ts
+++ b/src/dom/pseudoclass/scope.ts
@@ -2,5 +2,5 @@ import { type HtmlElement } from "../htmlelement";
import { type SelectorContext } from "../selector";
export function scope(this: SelectorContext, node: HtmlElement): boolean {
- return node.isSameNode(this.scope);
+ return Boolean(this.scope && node.isSameNode(this.scope));
}
diff --git a/src/dom/selector/match-element.spec.ts b/src/dom/selector/match-element.spec.ts
new file mode 100644
index 00000000..e7a3eb1b
--- /dev/null
+++ b/src/dom/selector/match-element.spec.ts
@@ -0,0 +1,100 @@
+import { Config } from "../../config";
+import { Parser } from "../../parser";
+import { Selector } from "./selector";
+
+let parser: Parser;
+
+beforeAll(async () => {
+ const resolvedConfig = await Config.empty().resolve();
+ parser = new Parser(resolvedConfig);
+});
+
+it("should match simple selector", () => {
+ expect.assertions(8);
+ const markup = /* HTML */ `
lorem ipsum
`;
+ const document = parser.parseHtml(markup);
+ const p = document.querySelector("p")!;
+ const em = document.querySelector("em")!;
+ expect(new Selector("p").matchElement(p)).toBeTruthy();
+ expect(new Selector("p").matchElement(em)).toBeFalsy();
+ expect(new Selector("em").matchElement(p)).toBeFalsy();
+ expect(new Selector("em").matchElement(em)).toBeTruthy();
+ expect(new Selector("div").matchElement(p)).toBeFalsy();
+ expect(new Selector("div").matchElement(em)).toBeFalsy();
+ expect(new Selector(".foo").matchElement(p)).toBeTruthy();
+ expect(new Selector(".foo").matchElement(em)).toBeFalsy();
+});
+
+it("should match simple selectors with descendant combinator", () => {
+ expect.assertions(5);
+ const markup = /* HTML */ `
+
+
lorem ipsum
+
lorem ipsum
+
lorem ipsum
+
+ `;
+ const document = parser.parseHtml(markup);
+ const em = document.querySelector("p > em")!;
+ expect(new Selector("p em").matchElement(em)).toBeTruthy();
+ expect(new Selector("div em").matchElement(em)).toBeTruthy();
+ expect(new Selector("div p em").matchElement(em)).toBeTruthy();
+ expect(new Selector("h1 em").matchElement(em)).toBeFalsy();
+ expect(new Selector("h2 em").matchElement(em)).toBeFalsy();
+});
+
+it("should match simple selectors with child combinator", () => {
+ expect.assertions(5);
+ const markup = /* HTML */ `
+
+
lorem ipsum
+
lorem ipsum
+
lorem ipsum
+
+ `;
+ const document = parser.parseHtml(markup);
+ const em = document.querySelector("p > em")!;
+ expect(new Selector("p > em").matchElement(em)).toBeTruthy();
+ expect(new Selector("div > em").matchElement(em)).toBeFalsy();
+ expect(new Selector("div > p > em").matchElement(em)).toBeTruthy();
+ expect(new Selector("h1 > em").matchElement(em)).toBeFalsy();
+ expect(new Selector("h2 > em").matchElement(em)).toBeFalsy();
+});
+
+it("should match simple selectors with adjacent sibling combinator", () => {
+ expect.assertions(5);
+ const markup = /* HTML */ `
+
+
lorem ipsum
+
lorem ipsum
+
lorem ipsum
+
+ `;
+ const document = parser.parseHtml(markup);
+ const p = document.querySelector("p")!;
+ const h2 = document.querySelector("h2")!;
+ expect(new Selector("h1 + p").matchElement(p)).toBeTruthy();
+ expect(new Selector("p + h2").matchElement(h2)).toBeTruthy();
+ expect(new Selector("h2 + p").matchElement(p)).toBeFalsy();
+ expect(new Selector("h1 + h2").matchElement(h2)).toBeFalsy();
+ expect(new Selector("div + p").matchElement(p)).toBeFalsy();
+});
+
+it("should match simple selectors with general sibling combinator", () => {
+ expect.assertions(5);
+ const markup = /* HTML */ `
+
+
lorem ipsum
+
lorem ipsum
+
lorem ipsum
+
+ `;
+ const document = parser.parseHtml(markup);
+ const p = document.querySelector("p")!;
+ const h2 = document.querySelector("h2")!;
+ expect(new Selector("h1 ~ p").matchElement(p)).toBeTruthy();
+ expect(new Selector("p ~ h2").matchElement(h2)).toBeTruthy();
+ expect(new Selector("h2 ~ p").matchElement(p)).toBeFalsy();
+ expect(new Selector("h1 ~ h2").matchElement(h2)).toBeTruthy();
+ expect(new Selector("div ~ p").matchElement(p)).toBeFalsy();
+});
diff --git a/src/dom/selector/match-element.ts b/src/dom/selector/match-element.ts
new file mode 100644
index 00000000..7a2b74ac
--- /dev/null
+++ b/src/dom/selector/match-element.ts
@@ -0,0 +1,87 @@
+import { type HtmlElement } from "../htmlelement";
+import { Combinator } from "./combinator";
+import { type Compound } from "./compound";
+import { type SelectorContext } from "./selector-context";
+
+function* ancestors(element: HtmlElement): Generator {
+ let current = element.parent;
+ while (current && !current.isRootElement()) {
+ yield current;
+ current = current.parent;
+ }
+}
+
+function* parent(element: HtmlElement): Generator {
+ const parent = element.parent;
+ if (parent && !parent.isRootElement()) {
+ yield parent;
+ }
+}
+
+function* adjacentSibling(element: HtmlElement): Generator {
+ const sibling = element.previousSibling;
+ if (sibling) {
+ yield sibling;
+ }
+}
+
+function* generalSibling(element: HtmlElement): Generator {
+ const siblings = element.siblings;
+ const index = siblings.findIndex((it) => it.isSameNode(element));
+ for (let i = 0; i < index; i++) {
+ yield siblings[i];
+ }
+}
+
+function* scope(element: HtmlElement): Generator {
+ yield element;
+}
+
+function candidatesFromCombinator(
+ element: HtmlElement,
+ combinator: Combinator,
+): Generator {
+ switch (combinator) {
+ case Combinator.DESCENDANT:
+ return ancestors(element);
+ case Combinator.CHILD:
+ return parent(element);
+ case Combinator.ADJACENT_SIBLING:
+ return adjacentSibling(element);
+ case Combinator.GENERAL_SIBLING:
+ return generalSibling(element);
+ case Combinator.SCOPE:
+ return scope(element);
+ }
+}
+
+/**
+ * @internal
+ */
+export function matchElement(
+ element: HtmlElement,
+ compounds: Compound[],
+ context: SelectorContext,
+): boolean {
+ if (compounds.length === 0) {
+ return true;
+ }
+ const last = compounds[compounds.length - 1];
+ if (!last.match(element, context)) {
+ return false;
+ }
+
+ const remainder = compounds.slice(0, -1);
+ if (remainder.length === 0) {
+ return true;
+ }
+
+ const candidates = candidatesFromCombinator(element, last.combinator);
+ for (const candidate of candidates) {
+ if (matchElement(candidate, remainder, context)) {
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/src/dom/selector/selector-context.ts b/src/dom/selector/selector-context.ts
index 650518f5..b72fc043 100644
--- a/src/dom/selector/selector-context.ts
+++ b/src/dom/selector/selector-context.ts
@@ -5,5 +5,5 @@ import { type HtmlElement } from "../htmlelement";
*/
export interface SelectorContext {
/** Scope element */
- scope: HtmlElement;
+ scope: HtmlElement | null;
}
diff --git a/src/dom/selector/selector.ts b/src/dom/selector/selector.ts
index 9654c680..be25ec02 100644
--- a/src/dom/selector/selector.ts
+++ b/src/dom/selector/selector.ts
@@ -2,6 +2,7 @@ import { type DynamicValue } from "../dynamic-value";
import { type HtmlElement } from "../htmlelement";
import { Combinator } from "./combinator";
import { Compound } from "./compound";
+import { matchElement } from "./match-element";
import { type SelectorContext } from "./selector-context";
import { splitSelectorElements } from "./split-selector-elements";
@@ -70,6 +71,14 @@ export class Selector {
yield* this.matchInternal(root, 0, context);
}
+ /**
+ * Returns `true` if the element matches this selector.
+ */
+ public matchElement(element: HtmlElement): boolean {
+ const context: SelectorContext = { scope: null };
+ return matchElement(element, this.pattern, context);
+ }
+
private *matchInternal(
root: HtmlElement,
level: number,