diff --git a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
index 5f585ba7d343..3da864922acd 100644
--- a/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
+++ b/src/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
@@ -72,6 +72,7 @@ internal abstract class RegexCompiler
private LocalBuilder _tempV;
private LocalBuilder _temp2V;
private LocalBuilder _temp3V;
+ private LocalBuilder _cultureV; // current culture is cached in local variable to prevent many thread local storage accesses for CultureInfo.CurrentCulture
protected RegexCode _code; // the RegexCode object (used for debugging only)
protected int[] _codes; // the RegexCodes being translated
@@ -961,14 +962,20 @@ private void Advance()
_ilg.Emit(OpCodes.Br, AdvanceLabel());
}
- private void CallToLower()
+ private void InitLocalCultureInfo()
{
if ((_options & RegexOptions.CultureInvariant) != 0)
Call(s_getInvariantCulture);
else
Call(s_getCurrentCulture);
- Call(s_chartolowerM);
+ Stloc(_cultureV);
+ }
+
+ private void CallToLower()
+ {
+ Ldloc(_cultureV);
+ Call(s_chartolowerM);
}
/*
@@ -1089,6 +1096,9 @@ protected void GenerateFindFirstChar()
_textV = DeclareString();
_tempV = DeclareInt();
_temp2V = DeclareInt();
+ _cultureV = DeclareCultureInfo();
+
+ InitLocalCultureInfo();
if (0 != (_anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End)))
{
@@ -1552,6 +1562,14 @@ private LocalBuilder DeclareInt()
return _ilg.DeclareLocal(typeof(int));
}
+ /*
+ * Declares a local CultureInfo
+ */
+ private LocalBuilder DeclareCultureInfo()
+ {
+ return _ilg.DeclareLocal(typeof(CultureInfo));
+ }
+
/*
* Declares a local int array
*/
@@ -1587,6 +1605,7 @@ protected void GenerateGo()
_textbegV = DeclareInt();
_textendV = DeclareInt();
_textstartV = DeclareInt();
+ _cultureV = DeclareCultureInfo();
// clear some tables
@@ -1600,6 +1619,9 @@ protected void GenerateGo()
// emit the code!
+ // cache CultureInfo in local variable which saves excessive thread local storage accesses
+ InitLocalCultureInfo();
+
GenerateForwardSection();
GenerateMiddleSection();
GenerateBacktrackSection();
diff --git a/src/System.Text.RegularExpressions/tests/RegexCultureTests.cs b/src/System.Text.RegularExpressions/tests/RegexCultureTests.cs
new file mode 100644
index 000000000000..9097c2f02e7f
--- /dev/null
+++ b/src/System.Text.RegularExpressions/tests/RegexCultureTests.cs
@@ -0,0 +1,65 @@
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Text;
+using Xunit;
+
+namespace System.Text.RegularExpressions.Tests
+{
+ public class RegexCultureTests
+ {
+ ///
+ /// See https://en.wikipedia.org/wiki/Dotted_and_dotless_I
+ ///
+ [Fact]
+ public void TurkishI_Is_Differently_LowerUpperCased_In_Turkish_Culture()
+ {
+ var turkish = new CultureInfo("tr-TR");
+ string input = "Iıİi";
+
+ Regex[] cultInvariantRegex = Create(input, CultureInfo.InvariantCulture, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant);
+ Regex[] turkishRegex = Create(input, turkish, RegexOptions.IgnoreCase);
+
+ // same input and regex does match so far so good
+ Assert.All(cultInvariantRegex, rex => Assert.Equal(true, rex.IsMatch(input)) );
+
+ // when the Regex was created with a turkish locale the lower cased turkish version will
+ // no longer match the input string which contains upper and lower case iiiis hence even the input string
+ // will no longer match
+ Assert.All(turkishRegex, rex => Assert.Equal(false, rex.IsMatch(input)));
+
+ // Now comes the tricky part depending on the use locale in ToUpper the results differ
+ // Hence the regular expression will not match if different locales were used
+ Assert.All(cultInvariantRegex, rex => Assert.Equal(true, rex.IsMatch(input.ToLowerInvariant())));
+ Assert.All(cultInvariantRegex, rex => Assert.Equal(false, rex.IsMatch(input.ToLower(turkish))));
+
+ Assert.All(turkishRegex, rex => Assert.Equal(false, rex.IsMatch(input.ToLowerInvariant())));
+ Assert.All(turkishRegex, rex => Assert.Equal(true, rex.IsMatch(input.ToLower(turkish))));
+ }
+
+ ///
+ /// Create regular expression once compiled and once interpreted to check if both behave the same
+ ///
+ /// Input regex string
+ /// thread culture to use when creating the regex
+ /// Additional regex options
+ ///
+ Regex[] Create(string input, CultureInfo info, RegexOptions additional)
+ {
+ CultureInfo current = CultureInfo.CurrentCulture;
+ try
+ {
+ CultureInfo.CurrentCulture = info;
+
+ // When RegexOptions.IgnoreCase is supplied the current thread culture is used to lowercase the input string.
+ // Except if RegexOptions.CultureInvariant is additionally added locale dependent effects on the generated code or state machine may happen.
+ var localizedRegex = new Regex[] { new Regex(input, additional), new Regex(input, RegexOptions.Compiled | additional) };
+ return localizedRegex;
+ }
+ finally
+ {
+ CultureInfo.CurrentCulture = current;
+ }
+ }
+ }
+}
diff --git a/src/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj b/src/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj
index f51615f1c434..9ff04485d62d 100644
--- a/src/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj
+++ b/src/System.Text.RegularExpressions/tests/System.Text.RegularExpressions.Tests.csproj
@@ -25,6 +25,7 @@
+
System\Text\RegularExpressions\RegexParseError.cs