From 377c17768d194186599249129fd59a26db65cae9 Mon Sep 17 00:00:00 2001 From: ota-meshi Date: Sat, 2 Dec 2023 18:06:56 +0900 Subject: [PATCH] chore: refactor parseAttributes --- src/parser/html.ts | 255 +++++++++++++++++++-------------------------- 1 file changed, 110 insertions(+), 145 deletions(-) diff --git a/src/parser/html.ts b/src/parser/html.ts index 09330f29..25438573 100644 --- a/src/parser/html.ts +++ b/src/parser/html.ts @@ -14,7 +14,52 @@ export type AttributeValueToken = { end: number; }; -const spacePattern = /\s/; +const RE_IS_SPACE = /^\s$/u; + +class State { + public readonly code: string; + + public index: number; + + public curr: string | null = null; + + public constructor(code: string, index: number) { + this.code = code; + this.index = index; + this.curr = code[index] || null; + } + + public skipSpaces() { + while (this.currIsSpace()) { + this.index++; + if (this.eof()) break; + } + } + + public currIsSpace() { + return RE_IS_SPACE.test(this.curr || ""); + } + + public currIs(expect: string): any { + return this.code.startsWith(expect, this.index); + } + + public eof(): boolean { + return this.index >= this.code.length; + } + + public eat(expect: E) { + if (!this.currIs(expect)) { + return null; + } + this.index += expect.length; + return expect; + } + + public advance() { + return (this.curr = this.code[++this.index] || null); + } +} /** Parse HTML attributes */ export function parseAttributes( @@ -23,182 +68,102 @@ export function parseAttributes( ): { attributes: AttributeToken[]; index: number } { const attributes: AttributeToken[] = []; - let index = startIndex; - while (index < code.length) { - const char = code[index]; - if (spacePattern.test(char)) { - index++; - continue; - } - if (char === ">" || (char === "/" && code[index + 1] === ">")) break; - const attrData = parseAttribute(code, index); - attributes.push(attrData.attribute); - index = attrData.index; + const state = new State(code, startIndex); + + while (!state.eof()) { + state.skipSpaces(); + if (state.currIs(">") || state.currIs("/>") || state.eof()) break; + attributes.push(parseAttribute(state)); } - return { attributes, index }; + return { attributes, index: state.index }; } /** Parse HTML attribute */ -function parseAttribute( - code: string, - startIndex: number, -): { attribute: AttributeToken; index: number } { +function parseAttribute(state: State): AttributeToken { // parse key - const keyData = parseAttributeKey(code, startIndex); - const key = keyData.key; - let index = keyData.index; - if (code[index] !== "=") { + const key = parseAttributeKey(state); + state.skipSpaces(); + if (!state.eat("=")) { return { - attribute: { - key, - value: null, - }, - index, + key, + value: null, }; } - - index++; - - // skip spaces - while (index < code.length) { - const char = code[index]; - if (spacePattern.test(char)) { - index++; - continue; - } - break; + state.skipSpaces(); + if (state.eof()) { + return { + key, + value: null, + }; } - // parse value - const valueData = parseAttributeValue(code, index); - + const value = parseAttributeValue(state); return { - attribute: { - key, - value: valueData.value, - }, - index: valueData.index, + key, + value, }; } /** Parse HTML attribute key */ -function parseAttributeKey( - code: string, - startIndex: number, -): { key: AttributeKeyToken; index: number } { - const key: AttributeKeyToken = { - name: code[startIndex], - start: startIndex, - end: startIndex + 1, - }; - let index = key.end; - while (index < code.length) { - const char = code[index]; +function parseAttributeKey(state: State): AttributeKeyToken { + const start = state.index; + while (state.advance()) { if ( - char === "=" || - char === ">" || - (char === "/" && code[index + 1] === ">") + state.currIs("=") || + state.currIs(">") || + state.currIs("/>") || + state.currIsSpace() ) { break; } - if (spacePattern.test(char)) { - for (let i = index; i < code.length; i++) { - const c = code[i]; - if (c === "=") { - return { - key, - index: i, - }; - } - if (spacePattern.test(c)) { - continue; - } - return { - key, - index, - }; - } - break; - } - key.name += char; - index++; - key.end = index; } + const end = state.index; return { - key, - index, + name: state.code.slice(start, end), + start, + end, }; } /** Parse HTML attribute value */ -function parseAttributeValue( - code: string, - startIndex: number, -): { value: AttributeValueToken | null; index: number } { - let index = startIndex; - const maybeQuote = code[index]; - if (maybeQuote == null) { - return { - value: null, - index, - }; - } - const quote = maybeQuote === '"' || maybeQuote === "'" ? maybeQuote : null; +function parseAttributeValue(state: State): AttributeValueToken { + const start = state.index; + const quote = state.eat('"') || state.eat("'"); if (quote) { - index++; - } - const valueFirstChar = code[index]; - if (valueFirstChar == null) { - return { - value: { - value: maybeQuote, + if (state.eof()) { + return { + value: quote, quote: null, - start: startIndex, - end: index, - }, - index, - }; - } - if (valueFirstChar === quote) { - return { - value: { - value: "", - quote, - start: startIndex, - end: index + 1, - }, - index: index + 1, - }; - } - const value: AttributeValueToken = { - value: valueFirstChar, - quote, - start: startIndex, - end: index + 1, - }; - index = value.end; - while (index < code.length) { - const char = code[index]; - if (quote) { - if (quote === char) { - index++; - value.end = index; + start, + end: state.index, + }; + } + let c: string | null; + while ((c = state.curr)) { + state.advance(); + if (c === quote) { + const end = state.index; + return { + value: state.code.slice(start + 1, end - 1), + quote, + start, + end, + }; + } + } + } else { + while (state.advance()) { + if (state.currIsSpace() || state.currIs(">") || state.currIs("/>")) { break; } - } else if ( - spacePattern.test(char) || - char === ">" || - (char === "/" && code[index + 1] === ">") - ) { - break; } - value.value += char; - index++; - value.end = index; } + const end = state.index; return { - value, - index, + value: state.code.slice(start, end), + quote: null, + start, + end, }; }