@@ -54,11 +54,12 @@ class RegexEngineTest {
54
54
55
55
private def debugEscape (pattern : String ): String = {
56
56
pattern.flatMap {
57
- case '\t ' => " `t"
58
- case '\n ' => " `n"
59
- case '\r ' => " `r"
60
- case c if c < ' ' => " `x%02X" .format(c.toInt)
61
- case c => c.toString()
57
+ case '\t ' => " `t"
58
+ case '\n ' => " `n"
59
+ case '\r ' => " `r"
60
+ case c if c < 0x10 => " `x0" + c.toInt.toHexString
61
+ case c if c < ' ' => " `x" + c.toInt.toHexString
62
+ case c => c.toString()
62
63
}
63
64
}
64
65
@@ -1976,8 +1977,52 @@ class RegexEngineTest {
1976
1977
val s = compile(" s" , CaseInsensitive | UnicodeCase )
1977
1978
assertMatches(s, " s" )
1978
1979
assertMatches(s, " S" )
1979
- assertMatches(s, " \u017F " ) // ſ LATIN SMALL LETTER LONG S
1980
+ assertMatches(s, " \u017F " ) // ſ LATIN SMALL LETTER LONG S; 017F folds to 's'
1980
1981
assertNotMatches(s, " t" )
1982
+
1983
+ val ranges = compile(" [g-l\uFB00\u0175 -\u0182\u0540 -\u0550\u1F68 -\u1F8E\u1FAA -\u1FAF\u2126 ]" ,
1984
+ CaseInsensitive | UnicodeCase )
1985
+ // g-l
1986
+ assertMatches(ranges, " H" )
1987
+ assertMatches(ranges, " \u212A " ) // K KELVIN SIGN, folds to 'k'
1988
+ // FB00
1989
+ assertMatches(ranges, " \uFB00 " ) // ff LATIN SMALL LIGATURE FF
1990
+ // 0175-0182 (contains 017F which folds to 's')
1991
+ if (! executingInJVM) {
1992
+ // https://bugs.openjdk.org/browse/JDK-8360459
1993
+ assertMatches(ranges, " s" )
1994
+ assertMatches(ranges, " S" )
1995
+ }
1996
+ assertMatches(ranges, " \u017F " )
1997
+ assertMatches(ranges, " \u0180 " ) // in range; does not participate in case folding
1998
+ // 0540-0550
1999
+ assertMatches(ranges, " \u0547 " ) // in range
2000
+ assertMatches(ranges, " \u0577 " ) // 0547 folds to 0577
2001
+ // 1F68-1F8E
2002
+ assertMatches(ranges, " \u1F65 " ) // 1F6D folds to 1F65
2003
+ assertMatches(ranges, " \u1F6D " ) // in range
2004
+ assertMatches(ranges, " \u1F82 " ) // 1F8A folds to 1F82, and 1F82 is also in range
2005
+ // 1FAA-1FAF
2006
+ assertMatches(ranges, " \u1FA4 " ) // 1FAC folds to 1FA4 only in simple case folding
2007
+ // 2126
2008
+ assertMatches(ranges, " \u2126 " ) // in the set
2009
+ assertMatches(ranges, " \u03C9 " ) // 2126 folds to 03C9
2010
+ assertMatches(ranges, " \u03A9 " ) // 03A9 also folds to 03C9
2011
+ // No matches
2012
+ assertNotMatches(ranges, " t" )
2013
+ assertNotMatches(ranges, " ff" ) // ff FB00 would only match with full case folding
2014
+
2015
+ // Demonstrate that the JVM recognizes 017F as folding to 's' if the range is ASCII
2016
+ val rangeWithASCII_S = compile(" [P-U]" , CaseInsensitive | UnicodeCase )
2017
+ assertMatches(rangeWithASCII_S, " s" )
2018
+ assertMatches(rangeWithASCII_S, " S" )
2019
+ assertMatches(rangeWithASCII_S, " \u017F " )
2020
+
2021
+ // Demonstrate that the JVM recognizes 017F as folding to 's' if it is not a range
2022
+ val nonRangeWith_017F = compile(" [\u017F\u0184 ]" , CaseInsensitive | UnicodeCase )
2023
+ assertMatches(nonRangeWith_017F, " s" )
2024
+ assertMatches(nonRangeWith_017F, " S" )
2025
+ assertMatches(nonRangeWith_017F, " \u017F " )
1981
2026
}
1982
2027
1983
2028
@ Test def wordBoundary (): Unit = {
0 commit comments