Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 37df9c2

Browse files
authored
Merge pull request scala-js#5198 from sjrd/regex-more-unicode-case-insensitive-tests
Add more tests for Unicode case-insensitivity in regexes.
2 parents c0b6ce2 + 5696a1a commit 37df9c2

File tree

1 file changed

+51
-6
lines changed

1 file changed

+51
-6
lines changed

test-suite/shared/src/test/scala/org/scalajs/testsuite/javalib/util/regex/RegexEngineTest.scala

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,12 @@ class RegexEngineTest {
5454

5555
private def debugEscape(pattern: String): String = {
5656
pattern.flatMap {
57-
case '\t' => "`t"
58-
case '\n' => "`n"
59-
case '\r' => "`r"
60-
case c if c < ' ' => "`x%02X".format(c.toInt)
61-
case c => c.toString()
57+
case '\t' => "`t"
58+
case '\n' => "`n"
59+
case '\r' => "`r"
60+
case c if c < 0x10 => "`x0" + c.toInt.toHexString
61+
case c if c < ' ' => "`x" + c.toInt.toHexString
62+
case c => c.toString()
6263
}
6364
}
6465

@@ -1976,8 +1977,52 @@ class RegexEngineTest {
19761977
val s = compile("s", CaseInsensitive | UnicodeCase)
19771978
assertMatches(s, "s")
19781979
assertMatches(s, "S")
1979-
assertMatches(s, "\u017F") // ſ LATIN SMALL LETTER LONG S
1980+
assertMatches(s, "\u017F") // ſ LATIN SMALL LETTER LONG S; 017F folds to 's'
19801981
assertNotMatches(s, "t")
1982+
1983+
val ranges = compile("[g-l\uFB00\u0175-\u0182\u0540-\u0550\u1F68-\u1F8E\u1FAA-\u1FAF\u2126]",
1984+
CaseInsensitive | UnicodeCase)
1985+
// g-l
1986+
assertMatches(ranges, "H")
1987+
assertMatches(ranges, "\u212A") // K KELVIN SIGN, folds to 'k'
1988+
// FB00
1989+
assertMatches(ranges, "\uFB00") // ff LATIN SMALL LIGATURE FF
1990+
// 0175-0182 (contains 017F which folds to 's')
1991+
if (!executingInJVM) {
1992+
// https://bugs.openjdk.org/browse/JDK-8360459
1993+
assertMatches(ranges, "s")
1994+
assertMatches(ranges, "S")
1995+
}
1996+
assertMatches(ranges, "\u017F")
1997+
assertMatches(ranges, "\u0180") // in range; does not participate in case folding
1998+
// 0540-0550
1999+
assertMatches(ranges, "\u0547") // in range
2000+
assertMatches(ranges, "\u0577") // 0547 folds to 0577
2001+
// 1F68-1F8E
2002+
assertMatches(ranges, "\u1F65") // 1F6D folds to 1F65
2003+
assertMatches(ranges, "\u1F6D") // in range
2004+
assertMatches(ranges, "\u1F82") // 1F8A folds to 1F82, and 1F82 is also in range
2005+
// 1FAA-1FAF
2006+
assertMatches(ranges, "\u1FA4") // 1FAC folds to 1FA4 only in simple case folding
2007+
// 2126
2008+
assertMatches(ranges, "\u2126") // in the set
2009+
assertMatches(ranges, "\u03C9") // 2126 folds to 03C9
2010+
assertMatches(ranges, "\u03A9") // 03A9 also folds to 03C9
2011+
// No matches
2012+
assertNotMatches(ranges, "t")
2013+
assertNotMatches(ranges, "ff") // ff FB00 would only match with full case folding
2014+
2015+
// Demonstrate that the JVM recognizes 017F as folding to 's' if the range is ASCII
2016+
val rangeWithASCII_S = compile("[P-U]", CaseInsensitive | UnicodeCase)
2017+
assertMatches(rangeWithASCII_S, "s")
2018+
assertMatches(rangeWithASCII_S, "S")
2019+
assertMatches(rangeWithASCII_S, "\u017F")
2020+
2021+
// Demonstrate that the JVM recognizes 017F as folding to 's' if it is not a range
2022+
val nonRangeWith_017F = compile("[\u017F\u0184]", CaseInsensitive | UnicodeCase)
2023+
assertMatches(nonRangeWith_017F, "s")
2024+
assertMatches(nonRangeWith_017F, "S")
2025+
assertMatches(nonRangeWith_017F, "\u017F")
19812026
}
19822027

19832028
@Test def wordBoundary(): Unit = {

0 commit comments

Comments
 (0)