diff --git a/CHANGELOG.md b/CHANGELOG.md index cab54816..27330072 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # html-validate changelog +## 9.4.2-rc.1 (2025-03-01) + +### Performance Improvements + +- rewrite and optimize `HtmlElement.matches()` increasing performance of large documents ([9175e43](https://gitlab.com/html-validate/html-validate/commit/9175e43110b7273c9d8eca3131d9de1087c60e04)), closes [#45](https://gitlab.com/html-validate/html-validate/issues/45) + ## 9.4.1 (2025-02-27) ### Bug Fixes diff --git a/etc/browser.api.md b/etc/browser.api.md index e58147dd..4989c359 100644 --- a/etc/browser.api.md +++ b/etc/browser.api.md @@ -510,7 +510,7 @@ export class HtmlElement extends DOMNode { is(tagName: string): boolean; get lastElementChild(): HtmlElement | null; loadMeta(meta: MetaElement): void; - matches(selector: string): boolean; + matches(selectorList: string): boolean; // (undocumented) get meta(): MetaElement | null; // (undocumented) diff --git a/etc/index.api.md b/etc/index.api.md index 6dabf89f..332a4870 100644 --- a/etc/index.api.md +++ b/etc/index.api.md @@ -601,7 +601,7 @@ export class HtmlElement extends DOMNode { is(tagName: string): boolean; get lastElementChild(): HtmlElement | null; loadMeta(meta: MetaElement): void; - matches(selector: string): boolean; + matches(selectorList: string): boolean; // (undocumented) get meta(): MetaElement | null; // (undocumented) diff --git a/package-lock.json b/package-lock.json index 9ca1c302..c1f5a1c5 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "html-validate", - "version": "9.4.1", + "version": "9.4.2-rc.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "html-validate", - "version": "9.4.1", + "version": "9.4.2-rc.1", "funding": [ { "type": "github", diff --git a/package.json b/package.json index cb998ef9..70a8ec4d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "html-validate", - "version": "9.4.1", + "version": "9.4.2-rc.1", "description": "Offline html5 validator", "keywords": [ "html", diff --git a/src/dom/htmlelement.ts b/src/dom/htmlelement.ts index 5537b995..b51897ee 100644 --- a/src/dom/htmlelement.ts +++ b/src/dom/htmlelement.ts @@ -375,25 +375,11 @@ export class HtmlElement extends DOMNode { * * Implementation of DOM specification of Element.matches(selectors). */ - public matches(selector: string): boolean { - /* find root element */ - /* eslint-disable-next-line @typescript-eslint/no-this-alias -- false positive */ - let root: HtmlElement = this; - while (root.parent) { - root = root.parent; - } - - /* a bit slow implementation as it finds all candidates for the selector and - * then tests if any of them are the current element. A better - * implementation would be to walk the selector right-to-left and test - * ancestors. */ - for (const match of root.querySelectorAll(selector)) { - if (match.unique === this.unique) { - return true; - } - } - - return false; + public matches(selectorList: string): boolean { + return selectorList.split(",").some((it) => { + const selector = new Selector(it.trim()); + return selector.matchElement(this); + }); } public get meta(): MetaElement | null { diff --git a/src/dom/pseudoclass/scope.ts b/src/dom/pseudoclass/scope.ts index 89371d0f..34f41800 100644 --- a/src/dom/pseudoclass/scope.ts +++ b/src/dom/pseudoclass/scope.ts @@ -2,5 +2,5 @@ import { type HtmlElement } from "../htmlelement"; import { type SelectorContext } from "../selector"; export function scope(this: SelectorContext, node: HtmlElement): boolean { - return node.isSameNode(this.scope); + return Boolean(this.scope && node.isSameNode(this.scope)); } diff --git a/src/dom/selector/match-element.spec.ts b/src/dom/selector/match-element.spec.ts new file mode 100644 index 00000000..e7a3eb1b --- /dev/null +++ b/src/dom/selector/match-element.spec.ts @@ -0,0 +1,100 @@ +import { Config } from "../../config"; +import { Parser } from "../../parser"; +import { Selector } from "./selector"; + +let parser: Parser; + +beforeAll(async () => { + const resolvedConfig = await Config.empty().resolve(); + parser = new Parser(resolvedConfig); +}); + +it("should match simple selector", () => { + expect.assertions(8); + const markup = /* HTML */ `

lorem ipsum

`; + const document = parser.parseHtml(markup); + const p = document.querySelector("p")!; + const em = document.querySelector("em")!; + expect(new Selector("p").matchElement(p)).toBeTruthy(); + expect(new Selector("p").matchElement(em)).toBeFalsy(); + expect(new Selector("em").matchElement(p)).toBeFalsy(); + expect(new Selector("em").matchElement(em)).toBeTruthy(); + expect(new Selector("div").matchElement(p)).toBeFalsy(); + expect(new Selector("div").matchElement(em)).toBeFalsy(); + expect(new Selector(".foo").matchElement(p)).toBeTruthy(); + expect(new Selector(".foo").matchElement(em)).toBeFalsy(); +}); + +it("should match simple selectors with descendant combinator", () => { + expect.assertions(5); + const markup = /* HTML */ ` +
+

lorem ipsum

+

lorem ipsum

+

lorem ipsum

+
+ `; + const document = parser.parseHtml(markup); + const em = document.querySelector("p > em")!; + expect(new Selector("p em").matchElement(em)).toBeTruthy(); + expect(new Selector("div em").matchElement(em)).toBeTruthy(); + expect(new Selector("div p em").matchElement(em)).toBeTruthy(); + expect(new Selector("h1 em").matchElement(em)).toBeFalsy(); + expect(new Selector("h2 em").matchElement(em)).toBeFalsy(); +}); + +it("should match simple selectors with child combinator", () => { + expect.assertions(5); + const markup = /* HTML */ ` +
+

lorem ipsum

+

lorem ipsum

+

lorem ipsum

+
+ `; + const document = parser.parseHtml(markup); + const em = document.querySelector("p > em")!; + expect(new Selector("p > em").matchElement(em)).toBeTruthy(); + expect(new Selector("div > em").matchElement(em)).toBeFalsy(); + expect(new Selector("div > p > em").matchElement(em)).toBeTruthy(); + expect(new Selector("h1 > em").matchElement(em)).toBeFalsy(); + expect(new Selector("h2 > em").matchElement(em)).toBeFalsy(); +}); + +it("should match simple selectors with adjacent sibling combinator", () => { + expect.assertions(5); + const markup = /* HTML */ ` +
+

lorem ipsum

+

lorem ipsum

+

lorem ipsum

+
+ `; + const document = parser.parseHtml(markup); + const p = document.querySelector("p")!; + const h2 = document.querySelector("h2")!; + expect(new Selector("h1 + p").matchElement(p)).toBeTruthy(); + expect(new Selector("p + h2").matchElement(h2)).toBeTruthy(); + expect(new Selector("h2 + p").matchElement(p)).toBeFalsy(); + expect(new Selector("h1 + h2").matchElement(h2)).toBeFalsy(); + expect(new Selector("div + p").matchElement(p)).toBeFalsy(); +}); + +it("should match simple selectors with general sibling combinator", () => { + expect.assertions(5); + const markup = /* HTML */ ` +
+

lorem ipsum

+

lorem ipsum

+

lorem ipsum

+
+ `; + const document = parser.parseHtml(markup); + const p = document.querySelector("p")!; + const h2 = document.querySelector("h2")!; + expect(new Selector("h1 ~ p").matchElement(p)).toBeTruthy(); + expect(new Selector("p ~ h2").matchElement(h2)).toBeTruthy(); + expect(new Selector("h2 ~ p").matchElement(p)).toBeFalsy(); + expect(new Selector("h1 ~ h2").matchElement(h2)).toBeTruthy(); + expect(new Selector("div ~ p").matchElement(p)).toBeFalsy(); +}); diff --git a/src/dom/selector/match-element.ts b/src/dom/selector/match-element.ts new file mode 100644 index 00000000..7a2b74ac --- /dev/null +++ b/src/dom/selector/match-element.ts @@ -0,0 +1,87 @@ +import { type HtmlElement } from "../htmlelement"; +import { Combinator } from "./combinator"; +import { type Compound } from "./compound"; +import { type SelectorContext } from "./selector-context"; + +function* ancestors(element: HtmlElement): Generator { + let current = element.parent; + while (current && !current.isRootElement()) { + yield current; + current = current.parent; + } +} + +function* parent(element: HtmlElement): Generator { + const parent = element.parent; + if (parent && !parent.isRootElement()) { + yield parent; + } +} + +function* adjacentSibling(element: HtmlElement): Generator { + const sibling = element.previousSibling; + if (sibling) { + yield sibling; + } +} + +function* generalSibling(element: HtmlElement): Generator { + const siblings = element.siblings; + const index = siblings.findIndex((it) => it.isSameNode(element)); + for (let i = 0; i < index; i++) { + yield siblings[i]; + } +} + +function* scope(element: HtmlElement): Generator { + yield element; +} + +function candidatesFromCombinator( + element: HtmlElement, + combinator: Combinator, +): Generator { + switch (combinator) { + case Combinator.DESCENDANT: + return ancestors(element); + case Combinator.CHILD: + return parent(element); + case Combinator.ADJACENT_SIBLING: + return adjacentSibling(element); + case Combinator.GENERAL_SIBLING: + return generalSibling(element); + case Combinator.SCOPE: + return scope(element); + } +} + +/** + * @internal + */ +export function matchElement( + element: HtmlElement, + compounds: Compound[], + context: SelectorContext, +): boolean { + if (compounds.length === 0) { + return true; + } + const last = compounds[compounds.length - 1]; + if (!last.match(element, context)) { + return false; + } + + const remainder = compounds.slice(0, -1); + if (remainder.length === 0) { + return true; + } + + const candidates = candidatesFromCombinator(element, last.combinator); + for (const candidate of candidates) { + if (matchElement(candidate, remainder, context)) { + return true; + } + } + + return false; +} diff --git a/src/dom/selector/selector-context.ts b/src/dom/selector/selector-context.ts index 650518f5..b72fc043 100644 --- a/src/dom/selector/selector-context.ts +++ b/src/dom/selector/selector-context.ts @@ -5,5 +5,5 @@ import { type HtmlElement } from "../htmlelement"; */ export interface SelectorContext { /** Scope element */ - scope: HtmlElement; + scope: HtmlElement | null; } diff --git a/src/dom/selector/selector.ts b/src/dom/selector/selector.ts index 9654c680..be25ec02 100644 --- a/src/dom/selector/selector.ts +++ b/src/dom/selector/selector.ts @@ -2,6 +2,7 @@ import { type DynamicValue } from "../dynamic-value"; import { type HtmlElement } from "../htmlelement"; import { Combinator } from "./combinator"; import { Compound } from "./compound"; +import { matchElement } from "./match-element"; import { type SelectorContext } from "./selector-context"; import { splitSelectorElements } from "./split-selector-elements"; @@ -70,6 +71,14 @@ export class Selector { yield* this.matchInternal(root, 0, context); } + /** + * Returns `true` if the element matches this selector. + */ + public matchElement(element: HtmlElement): boolean { + const context: SelectorContext = { scope: null }; + return matchElement(element, this.pattern, context); + } + private *matchInternal( root: HtmlElement, level: number,