Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 692ade5

Browse files
committed
add HTMLElements.getElementLC(String) as a slightly optimized version for code paths where we know the lower case element name
1 parent d92517a commit 692ade5

File tree

2 files changed

+48
-27
lines changed

2 files changed

+48
-27
lines changed

src/main/java/org/htmlunit/cyberneko/HTMLElements.java

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,7 +620,7 @@ public final Element getElement(final short code) {
620620
/**
621621
* @return the element information for the specified element name.
622622
*
623-
* @param ename The element name.
623+
* @param ename the element name.
624624
*/
625625
public final Element getElement(final String ename) {
626626
Element element = getElement(ename, NO_SUCH_ELEMENT);
@@ -636,6 +636,25 @@ public final Element getElement(final String ename) {
636636
return element;
637637
}
638638

639+
/**
640+
* @return the element information for the specified element name.
641+
*
642+
* @param enameLC the element name as lower case.
643+
*/
644+
public final Element getElementLC(final String enameLC) {
645+
Element element = getElementLC(enameLC, NO_SUCH_ELEMENT);
646+
if (element == NO_SUCH_ELEMENT) {
647+
element = new Element(UNKNOWN,
648+
enameLC.toUpperCase(Locale.ROOT),
649+
NO_SUCH_ELEMENT.flags,
650+
NO_SUCH_ELEMENT.parentCodes_,
651+
NO_SUCH_ELEMENT.bounds,
652+
NO_SUCH_ELEMENT.closes);
653+
element.parent = NO_SUCH_ELEMENT.parent;
654+
}
655+
return element;
656+
}
657+
639658
/**
640659
* @return the element information for the specified element name.
641660
*
@@ -673,7 +692,7 @@ public final Element getElement(final String ename, final Element elementIfNotFo
673692
/**
674693
* @return the element information for the specified element name.
675694
*
676-
* @param ename the element name in lower case
695+
* @param enameLC the element name in lower case
677696
* @param elementIfNotFound the default element to return if not found.
678697
*/
679698
public final Element getElementLC(final String enameLC, final Element elementIfNotFound) {

src/main/java/org/htmlunit/cyberneko/HTMLScanner.java

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2874,41 +2874,43 @@ protected String scanStartElement(final boolean[] empty) throws IOException {
28742874
fBeginColumnNumber = beginColumnNumber;
28752875
fBeginCharacterOffset = beginCharacterOffset;
28762876
if (fElementDepth == -1) {
2877-
if (fByteStream != null && !fIgnoreSpecifiedCharset_ && "META".equalsIgnoreCase(ename)) {
2878-
if (DEBUG_CHARSET) {
2879-
System.out.println("+++ <META>");
2880-
}
2881-
final String httpEquiv = getValue(attributes_, "http-equiv");
2882-
if ("content-type".equalsIgnoreCase(httpEquiv)) {
2877+
if (fByteStream != null) {
2878+
final String enameLC = ename.toLowerCase(Locale.ROOT);
2879+
2880+
if ( !fIgnoreSpecifiedCharset_ && "meta".equals(enameLC)) {
28832881
if (DEBUG_CHARSET) {
2884-
System.out.println("+++ @content-type: \"" + httpEquiv + '"');
2882+
System.out.println("+++ <META>");
28852883
}
2886-
String content = getValue(attributes_, "content");
2887-
if (content != null) {
2888-
content = removeSpaces(content);
2889-
final int index1 = content.toLowerCase(Locale.ROOT).indexOf("charset=");
2890-
if (index1 != -1) {
2891-
final int index2 = content.indexOf(';', index1);
2892-
final String charset = index2 != -1 ? content.substring(index1 + 8, index2)
2893-
: content.substring(index1 + 8);
2894-
changeEncoding(charset);
2884+
final String httpEquiv = getValue(attributes_, "http-equiv");
2885+
if ("content-type".equalsIgnoreCase(httpEquiv)) {
2886+
if (DEBUG_CHARSET) {
2887+
System.out.println("+++ @content-type: \"" + httpEquiv + '"');
2888+
}
2889+
String content = getValue(attributes_, "content");
2890+
if (content != null) {
2891+
content = removeSpaces(content);
2892+
final int index1 = content.toLowerCase(Locale.ROOT).indexOf("charset=");
2893+
if (index1 != -1) {
2894+
final int index2 = content.indexOf(';', index1);
2895+
final String charset = index2 != -1 ? content.substring(index1 + 8, index2)
2896+
: content.substring(index1 + 8);
2897+
changeEncoding(charset);
2898+
}
28952899
}
28962900
}
2897-
}
2898-
else {
2899-
final String metaCharset = getValue(attributes_, "charset");
2900-
if (metaCharset != null) {
2901-
changeEncoding(metaCharset);
2901+
else {
2902+
final String metaCharset = getValue(attributes_, "charset");
2903+
if (metaCharset != null) {
2904+
changeEncoding(metaCharset);
2905+
}
29022906
}
29032907
}
2904-
}
2905-
else if (fByteStream != null) {
2906-
if ("BODY".equalsIgnoreCase(ename)) {
2908+
else if ("body".equals(enameLC)) {
29072909
fByteStream.clear();
29082910
fByteStream = null;
29092911
}
29102912
else {
2911-
final HTMLElements.Element element = htmlConfiguration_.getHtmlElements().getElement(ename);
2913+
final HTMLElements.Element element = htmlConfiguration_.getHtmlElements().getElementLC(enameLC);
29122914
if (element.parent != null
29132915
&& element.parent.length > 0
29142916
&& element.parent[0].code == HTMLElements.BODY) {

0 commit comments

Comments
 (0)