From 171a23af17f95991851cd0a33e12acc0922a0781 Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Fri, 16 Jun 2017 03:13:24 -0700 Subject: [PATCH 01/16] Optimizing some int32 parsers and clean up --- .../System/Text/Parsing/InvariantSigned.cs | 192 ++++-------------- .../System/Text/Parsing/Signed.cs | 181 ++++++++++------- .../System/Text/TextEncoder.cs | 71 ++++++- .../System/Text/TextFormat.cs | 23 ++- .../Parsing/PrimitiveParserInt32PerfTests.cs | 155 ++++++++++++++ .../Parsing/PrimitiveParserIntegerTests.cs | 7 + .../Parsing/PrimitiveParserPerfTests.cs | 28 +++ 7 files changed, 420 insertions(+), 237 deletions(-) create mode 100644 tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs diff --git a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs index d951371a655..8350328bac1 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs @@ -9,6 +9,9 @@ // NOTE: This file is generated via a T4 template. Please do not edit this file directly. Any changes should be made // in InvariantSigned.tt. + +using System.Runtime.CompilerServices; + namespace System.Text { public static partial class PrimitiveParser @@ -899,179 +902,64 @@ public unsafe static bool TryParseInt32(byte* text, int length, out int value, o public static bool TryParseInt32(ReadOnlySpan text, out int value) { - if (text.Length < 1) - { - value = default(int); - return false; - } - - int indexOfFirstDigit = 0; - int sign = 1; - if (text[0] == '-') - { - indexOfFirstDigit = 1; - sign = -1; - } - else if (text[0] == '+') - { - indexOfFirstDigit = 1; - } - - int overflowLength = Int32OverflowLength + indexOfFirstDigit; - - // Parse the first digit separately. If invalid here, we need to return false. - int firstDigit = text[indexOfFirstDigit] - 48; // '0' - if (firstDigit < 0 || firstDigit > 9) - { - value = default(int); - return false; - } - int parsedValue = firstDigit; - - if (text.Length < overflowLength) - { - // Length is less than Int32OverflowLength; overflow is not possible - for (int index = indexOfFirstDigit + 1; index < text.Length; index++) - { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - value = parsedValue * sign; - return true; - } - parsedValue = parsedValue * 10 + nextDigit; - } - } - else - { - // Length is greater than Int32OverflowLength; overflow is only possible after Int32OverflowLength - // digits. There may be no overflow after Int32OverflowLength if there are leading zeroes. - for (int index = indexOfFirstDigit + 1; index < overflowLength - 1; index++) - { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - value = parsedValue * sign; - return true; - } - parsedValue = parsedValue * 10 + nextDigit; - } - for (int index = overflowLength - 1; index < text.Length; index++) - { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - value = parsedValue * sign; - return true; - } - // If parsedValue > (int.MaxValue / 10), any more appended digits will cause overflow. - // if parsedValue == (int.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. - bool positive = sign > 0; - bool nextDigitTooLarge = nextDigit > 8 || (positive && nextDigit > 7); - if (parsedValue > int.MaxValue / 10 || parsedValue == int.MaxValue / 10 && nextDigitTooLarge) - { - value = default(int); - return false; - } - parsedValue = parsedValue * 10 + nextDigit; - } - } - - value = parsedValue * sign; - return true; + return TryParseInt32(text, out value, out int bytesConsumed); } public static bool TryParseInt32(ReadOnlySpan text, out int value, out int bytesConsumed) { - if (text.Length < 1) - { - bytesConsumed = 0; - value = default(int); - return false; - } + ref byte textByte = ref text.DangerousGetPinnableReference(); - int indexOfFirstDigit = 0; int sign = 1; - if (text[0] == '-') + int index = 0; + if (textByte == '-') { - indexOfFirstDigit = 1; sign = -1; + index++; } - else if (text[0] == '+') + else if (textByte == '+') { - indexOfFirstDigit = 1; + index++; } - int overflowLength = Int32OverflowLength + indexOfFirstDigit; + int textLength = text.Length; + int overflowLength = Int32OverflowLength + index; + if (textLength > overflowLength) textLength = overflowLength; - // Parse the first digit separately. If invalid here, we need to return false. - int firstDigit = text[indexOfFirstDigit] - 48; // '0' - if (firstDigit < 0 || firstDigit > 9) + int answer = 0; + int num = 0; + bool containsDigitsAsPrefix = false; + while (index < textLength - 1) { - bytesConsumed = 0; - value = default(int); - return false; - } - int parsedValue = firstDigit; - - if (text.Length < overflowLength) - { - // Length is less than Int32OverflowLength; overflow is not possible - for (int index = indexOfFirstDigit + 1; index < text.Length; index++) + num = Unsafe.Add(ref textByte, index) - 48; // '0' + if (!IsDigit(num)) { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - bytesConsumed = index; - value = parsedValue * sign; - return true; - } - parsedValue = parsedValue * 10 + nextDigit; + goto Done; } + answer = answer * 10 + num; + containsDigitsAsPrefix = true; + index++; } - else + + num = Unsafe.Add(ref textByte, textLength - 1) - 48; // '0' + if (IsDigit(num)) { - // Length is greater than Int32OverflowLength; overflow is only possible after Int32OverflowLength - // digits. There may be no overflow after Int32OverflowLength if there are leading zeroes. - for (int index = indexOfFirstDigit + 1; index < overflowLength - 1; index++) - { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - bytesConsumed = index; - value = parsedValue * sign; - return true; - } - parsedValue = parsedValue * 10 + nextDigit; - } - for (int index = overflowLength - 1; index < text.Length; index++) - { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - bytesConsumed = index; - value = parsedValue * sign; - return true; - } - // If parsedValue > (int.MaxValue / 10), any more appended digits will cause overflow. - // if parsedValue == (int.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. - bool positive = sign > 0; - bool nextDigitTooLarge = nextDigit > 8 || (positive && nextDigit > 7); - if (parsedValue > int.MaxValue / 10 || parsedValue == int.MaxValue / 10 && nextDigitTooLarge) - { - bytesConsumed = 0; - value = default(int); - return false; - } - parsedValue = parsedValue * 10 + nextDigit; - } + if (WillOverFlow(answer, num, sign)) goto FalseExit; + containsDigitsAsPrefix = true; + answer = answer * 10 + num; + index++; } - bytesConsumed = text.Length; - value = parsedValue * sign; + Done: + if (!containsDigitsAsPrefix) goto FalseExit; + bytesConsumed = index; + value = answer * sign; return true; - } + FalseExit: + bytesConsumed = 0; + value = default; + return false; + } #endregion #region Int64 @@ -2844,4 +2732,4 @@ public static bool TryParseInt64(ReadOnlySpan text, out long value, out in } } -} \ No newline at end of file +} diff --git a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs index 049315431b4..dee5579811c 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs @@ -4,10 +4,69 @@ // NOTE: This file is generated via a T4 template. Please do not edit this file directly. Any changes should be made // in Signed.tt. +using System.Runtime.CompilerServices; + namespace System.Text { public static partial class PrimitiveParser { + #region Helpers + + private const int maxValueDiv10 = int.MaxValue / 10; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsDigit(int i) + { + return i >= 0 && i <= 9; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool IsInvalid(uint i) + { + return i > (uint)TextEncoder.Symbol.D9 || i < (uint)TextEncoder.Symbol.D0; + } + + // If parsedValue > (sbyte.MaxValue / 10), any more appended digits will cause overflow. + // if parsedValue == (sbyte.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool WillOverFlow(sbyte value, int nextDigit, int sign) + { + sbyte maxValueDiv10 = sbyte.MaxValue / 10; + bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); + return (value > maxValueDiv10 || value == maxValueDiv10 && nextDigitTooLarge); + } + + // If parsedValue > (short.MaxValue / 10), any more appended digits will cause overflow. + // if parsedValue == (short.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool WillOverFlow(short value, int nextDigit, int sign) + { + short maxValueDiv10 = short.MaxValue / 10; + bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); + return (value > maxValueDiv10 || value == maxValueDiv10 && nextDigitTooLarge); + } + + // If parsedValue > (int.MaxValue / 10), any more appended digits will cause overflow. + // if parsedValue == (int.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool WillOverFlow(int value, int nextDigit, int sign) + { + bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); + return (value > maxValueDiv10 || nextDigitTooLarge); + } + + // If parsedValue > (long.MaxValue / 10), any more appended digits will cause overflow. + // if parsedValue == (long.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static bool WillOverFlow(long value, int nextDigit, int sign) + { + long maxValueDiv10 = long.MaxValue / 10; + bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); + return (value > maxValueDiv10 || value == maxValueDiv10 && nextDigitTooLarge); + } + + #endregion + public static bool TryParseSByte(ReadOnlySpan text, out sbyte value, out int bytesConsumed, TextFormat format = default(TextFormat), TextEncoder encoder = null) { encoder = encoder == null ? TextEncoder.Utf8 : encoder; @@ -240,119 +299,99 @@ public static partial class PrimitiveParser public static bool TryParseInt32(ReadOnlySpan text, out int value, out int bytesConsumed, TextFormat format = default(TextFormat), TextEncoder encoder = null) { - encoder = encoder == null ? TextEncoder.Utf8 : encoder; + bool isDefault = format.IsDefault; - if (!format.IsDefault && format.HasPrecision) + if (!isDefault && format.HasPrecision) { throw new NotImplementedException("Format with precision not supported."); } + bool isHex = format.IsHexadecimal; + + encoder = encoder ?? TextEncoder.Utf8; + if (encoder.IsInvariantUtf8) { - if (format.IsHexadecimal) - { - return InvariantUtf8.Hex.TryParseInt32(text, out value, out bytesConsumed); - } - else - { - return InvariantUtf8.TryParseInt32(text, out value, out bytesConsumed); - } + return isHex ? InvariantUtf8.Hex.TryParseInt32(text, out value, out bytesConsumed) : + InvariantUtf8.TryParseInt32(text, out value, out bytesConsumed); } else if (encoder.IsInvariantUtf16) { + /*return isHex ? InvariantUtf16.Hex.TryParseInt32(text, out value, out bytesConsumed) : + InvariantUtf16.TryParseInt32(text, out value, out bytesConsumed);*/ ReadOnlySpan utf16Text = text.NonPortableCast(); - int charsConsumed; - bool result; - if (format.IsHexadecimal) - { - result = InvariantUtf16.Hex.TryParseInt32(utf16Text, out value, out charsConsumed); - } - else - { - result = InvariantUtf16.TryParseInt32(utf16Text, out value, out charsConsumed); - } + bool result = isHex ? InvariantUtf16.Hex.TryParseInt32(utf16Text, out value, out int charsConsumed) : + InvariantUtf16.TryParseInt32(utf16Text, out value, out charsConsumed); bytesConsumed = charsConsumed * sizeof(char); return result; } - if (format.IsHexadecimal) + if (isHex) { throw new NotImplementedException("The only supported encodings for hexadecimal parsing are InvariantUtf8 and InvariantUtf16."); } - if (!(format.IsDefault || format.Symbol == 'G' || format.Symbol == 'g')) + if (!(isDefault || format.Symbol == 'G' || format.Symbol == 'g')) { throw new NotImplementedException(String.Format("Format '{0}' not supported.", format.Symbol)); } - uint nextSymbol; - int thisSymbolConsumed; - if (!encoder.TryParseSymbol(text, out nextSymbol, out thisSymbolConsumed)) + ref byte textByte = ref text.DangerousGetPinnableReference(); + if (!encoder.TryParseSymbol(ref textByte, out uint symbol, out int consumed)) { - value = default(int); - bytesConsumed = 0; - return false; + goto FalseExit; } + int sign = 1; - if ((TextEncoder.Symbol)nextSymbol == TextEncoder.Symbol.MinusSign) + int index = 0; + if (symbol == (uint)TextEncoder.Symbol.MinusSign) { sign = -1; + index += consumed; } - - int signConsumed = 0; - if ((TextEncoder.Symbol)nextSymbol == TextEncoder.Symbol.PlusSign || (TextEncoder.Symbol)nextSymbol == TextEncoder.Symbol.MinusSign) + else if (symbol == (uint)TextEncoder.Symbol.PlusSign) { - signConsumed = thisSymbolConsumed; - if (!encoder.TryParseSymbol(text.Slice(signConsumed), out nextSymbol, out thisSymbolConsumed)) - { - value = default(int); - bytesConsumed = 0; - return false; - } - } - - if (nextSymbol > 9) - { - value = default(int); - bytesConsumed = 0; - return false; + index += consumed; } - int parsedValue = (int)nextSymbol; - int index = signConsumed + thisSymbolConsumed; + int textLength = text.Length; + int overflowLength = Int32OverflowLength + index; + if (textLength > overflowLength) textLength = overflowLength; - while (index < text.Length) + int answer = 0; + bool containsDigitsAsPrefix = false; + while (index < textLength - 1) { - bool success = encoder.TryParseSymbol(text.Slice(index), out nextSymbol, out thisSymbolConsumed); - if (!success || nextSymbol > 9) + if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed) || IsInvalid(symbol)) { - bytesConsumed = index; - value = (int)(parsedValue * sign); - return true; - } - - // If parsedValue > (int.MaxValue / 10), any more appended digits will cause overflow. - // if parsedValue == (int.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. - bool positive = sign > 0; - bool nextDigitTooLarge = nextSymbol > 8 || (positive && nextSymbol > 7); - if (parsedValue > int.MaxValue / 10 || (parsedValue == int.MaxValue / 10 && nextDigitTooLarge)) - { - bytesConsumed = 0; - value = default(int); - return false; + goto Done; } + answer = answer * 10 + (int)symbol; + containsDigitsAsPrefix = true; + index += consumed; + } - index += thisSymbolConsumed; - parsedValue = parsedValue * 10 + (int)nextSymbol; + if (encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, textLength - 1), out symbol, out consumed) && IsInvalid(symbol)) + { + if (WillOverFlow(answer, (int)symbol, sign)) goto FalseExit; + containsDigitsAsPrefix = true; + answer = answer * 10 + (int)symbol; + index += consumed; } - bytesConsumed = text.Length; - value = (int)(parsedValue * sign); + Done: + if (!containsDigitsAsPrefix) goto FalseExit; + bytesConsumed = index; + value = answer * sign; return true; - } - + FalseExit: + bytesConsumed = 0; + value = default; + return false; + } + public static bool TryParseInt64(ReadOnlySpan text, out long value, out int bytesConsumed, TextFormat format = default(TextFormat), TextEncoder encoder = null) { encoder = encoder == null ? TextEncoder.Utf8 : encoder; diff --git a/src/System.Text.Primitives/System/Text/TextEncoder.cs b/src/System.Text.Primitives/System/Text/TextEncoder.cs index c87cc8cbb99..1b755514178 100644 --- a/src/System.Text.Primitives/System/Text/TextEncoder.cs +++ b/src/System.Text.Primitives/System/Text/TextEncoder.cs @@ -3,6 +3,7 @@ using System.Text.Utf8; using System.Text.Utf16; +using System.Runtime.CompilerServices; namespace System.Text { @@ -261,7 +262,7 @@ public virtual unsafe bool TryComputeEncodedBytes(string text, out int bytesNeed #region Symbol Parsing / Formatting - public bool TryParseSymbol(ReadOnlySpan encodedBytes, out Symbol symbol, out int bytesConsumed) + public bool TryParseSymbol(ref byte encodedBytes, out Symbol symbol, out int bytesConsumed) { int trieIndex = 0; int bufferIndex = 0; @@ -272,7 +273,7 @@ public bool TryParseSymbol(ReadOnlySpan encodedBytes, out Symbol symbol, o if (node.ValueOrNumChildren == 0) // if numChildren == 0, we're on a leaf & we've found our value { symbol = (Symbol)node.IndexOrSymbol; - if (VerifySuffix(encodedBytes, bufferIndex, symbol)) + if (VerifySuffix(ref encodedBytes, bufferIndex, symbol)) { bytesConsumed = _symbols[node.IndexOrSymbol].Length - bufferIndex; return true; @@ -286,7 +287,7 @@ public bool TryParseSymbol(ReadOnlySpan encodedBytes, out Symbol symbol, o } else { - int search = BinarySearch(trieIndex, bufferIndex, encodedBytes[0]); // we search the _parsingTrie for the nextByte + int search = BinarySearch(trieIndex, bufferIndex, encodedBytes); // we search the _parsingTrie for the nextByte if (search > 0) // if we found a node { @@ -380,6 +381,54 @@ internal bool TryParseSymbol(ReadOnlySpan encodedBytes, out uint symbol, o } } + /// + /// Parse the next byte in a byte array. Will return either a DigitOrSymbol value, an InvalidCharacter, or a Continue + /// + /// The next byte to be parsed + /// The total number of bytes parsed (will be zero until a code unit is deciphered) + /// + internal bool TryParseSymbol(ref byte encodedBytes, out uint symbol, out int consumed) + { + int trieIndex = 0; + int codeUnitIndex = 0; + consumed = 0; + while (true) + { + if (_parsingTrie[trieIndex].ValueOrNumChildren == 0) // if numChildren == 0, we're on a leaf & we've found our value and completed the code unit + { + symbol = (uint)_parsingTrie[trieIndex].IndexOrSymbol; // return the parsed value + if (VerifySuffix(ref encodedBytes, codeUnitIndex, (Symbol)symbol)) + { + consumed = _symbols[(int)symbol].Length; + return true; + } + else + { + symbol = 0; + consumed = 0; + return false; + } + } + else + { + int search = BinarySearch(trieIndex, codeUnitIndex, Unsafe.Add(ref encodedBytes, codeUnitIndex)); // we search the _parsingTrie for the nextByte + + if (search > 0) // if we found a node + { + trieIndex = _parsingTrie[search].IndexOrSymbol; + consumed++; + codeUnitIndex++; + } + else + { + symbol = 0; + consumed = 0; + return false; + } + } + } + } + #endregion Symbol Parsing / Formatting #region Private helpers @@ -441,6 +490,22 @@ private bool VerifySuffix(ReadOnlySpan buffer, int codeUnitIndex, Symbol s return true; } + private bool VerifySuffix(ref byte buffer, int codeUnitIndex, Symbol symbol) + { + int codeUnitLength = _symbols[(int)symbol].Length; + if (codeUnitIndex == codeUnitLength - 1) + return true; + + for (int i = 0; i < codeUnitLength - codeUnitIndex; i++) + { + int index = i + codeUnitIndex; + if (Unsafe.Add(ref buffer, index) != _symbols[(int)symbol][index]) + return false; + } + + return true; + } + #endregion Private helpers #region Static factory methods diff --git a/src/System.Text.Primitives/System/Text/TextFormat.cs b/src/System.Text.Primitives/System/Text/TextFormat.cs index 38fee5e63b4..dfba90f6299 100644 --- a/src/System.Text.Primitives/System/Text/TextFormat.cs +++ b/src/System.Text.Primitives/System/Text/TextFormat.cs @@ -33,15 +33,16 @@ public TextFormat(char symbol, byte precision = NoPrecision) public static TextFormat Parse(ReadOnlySpan format) { - if (format.Length == 0) + int formatLength = format.Length; + if (formatLength == 0) { - return default(TextFormat); + return default; } uint precision = NoPrecision; - if (format.Length > 1) + if (formatLength > 1) { - var span = format.Slice(1, format.Length - 1); + var span = format.Slice(1, formatLength - 1); if (!PrimitiveParser.InvariantUtf16.TryParseUInt32(span, out precision)) { @@ -65,22 +66,22 @@ public static TextFormat Parse(ReadOnlySpan format) // once we have a non allocating conversion from string to ReadOnlySpan, we can remove this overload public static TextFormat Parse(string format) { - if (format == null) return default(TextFormat); + if (format == null) return default; return Parse(format.AsSpan()); } - public bool IsHexadecimal => Symbol == 'X' || Symbol == 'x'; + public bool IsHexadecimal => _format == 'X' || _format == 'x'; - public bool HasPrecision => Precision != NoPrecision; + public bool HasPrecision => _precision != NoPrecision; public bool IsDefault => _format == 0 && _precision == 0; public override string ToString() { - return string.Format("{0}:{1}", Symbol, Precision); + return string.Format("{0}:{1}", _format, _precision); } - public static bool operator==(TextFormat left, TextFormat right) => left.Equals(right); + public static bool operator ==(TextFormat left, TextFormat right) => left.Equals(right); public static bool operator !=(TextFormat left, TextFormat right) => !left.Equals(right); [EditorBrowsable(EditorBrowsableState.Never)] @@ -93,13 +94,13 @@ public override bool Equals(object obj) [EditorBrowsable(EditorBrowsableState.Never)] public bool Equals(TextFormat other) { - return Symbol.Equals(other.Symbol) && Precision.Equals(other.Precision); + return _format == other._format && _precision == other._precision; } [EditorBrowsable(EditorBrowsableState.Never)] public override int GetHashCode() { - return CombineHashCodes(Symbol.GetHashCode(), Precision.GetHashCode()); + return CombineHashCodes(_format.GetHashCode(), _precision.GetHashCode()); } static int CombineHashCodes(int h1, int h2) diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs new file mode 100644 index 00000000000..77ea50158ef --- /dev/null +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs @@ -0,0 +1,155 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using Xunit; +using Microsoft.Xunit.Performance; +using System.Runtime.CompilerServices; + +namespace System.Text.Primitives.Tests +{ + public partial class PrimitiveParserPerfTests + { + private const int InnerCount = 100000; + + private static readonly string[] s_Int32TextArray = new string[20] + { + "214748364", + "2", + "21474836", + "-21474", + "21474", + "-21", + "-2", + "214", + "-21474836", + "-214748364", + "2147", + "-2147", + "-214748", + "-2147483", + "214748", + "-2147483648", + "2147483647", + "21", + "2147483", + "-214" + }; + + [Benchmark(InnerIterationCount = InnerCount)] + [InlineData("107374182")] // standard parse + [InlineData("2147483647")] // max value + [InlineData("0")] + [InlineData("-2147483648")] // min value + private static void PrimitiveParserByteSpanToInt32(string text) + { + byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); + var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); + + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for(int i = 0; i < Benchmark.InnerIterationCount; i++) + { + PrimitiveParser.InvariantUtf8.TryParseInt32(utf8ByteSpan, out int value); + DoNotIgnore(value, 0); + } + } + } + } + + [Benchmark(InnerIterationCount = InnerCount)] + private static void PrimitiveParserByteSpanToInt32_VariableLength() + { + int textLength = s_Int32TextArray.Length; + byte[][] utf8ByteArray = (byte[][])Array.CreateInstance(typeof(byte[]), textLength); + for (var i = 0; i < textLength; i++) + { + utf8ByteArray[i] = Text.Encoding.UTF8.GetBytes(s_Int32TextArray[i]); + } + + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for (int i = 0; i < Benchmark.InnerIterationCount; i++) + { + ReadOnlySpan utf8ByteSpan = utf8ByteArray[i % textLength]; + PrimitiveParser.InvariantUtf8.TryParseInt32(utf8ByteSpan, out int value); + DoNotIgnore(value, 0); + } + } + } + } + + [Benchmark(InnerIterationCount = InnerCount)] + [InlineData("107374182")] // standard parse + [InlineData("2147483647")] // max value + [InlineData("0")] + [InlineData("-2147483648")] // min value + private static void PrimitiveParserByteSpanToInt32_BytesConsumed(string text) + { + byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); + var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); + + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for (int i = 0; i < Benchmark.InnerIterationCount; i++) + { + PrimitiveParser.InvariantUtf8.TryParseInt32(utf8ByteSpan, out int value, out int bytesConsumed); + DoNotIgnore(value, bytesConsumed); + } + } + } + } + + [Benchmark(InnerIterationCount = InnerCount)] + private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength() + { + int textLength = s_Int32TextArray.Length; + byte[][] utf8ByteArray = (byte[][])Array.CreateInstance(typeof(byte[]), textLength); + for (var i = 0; i < textLength; i++) + { + utf8ByteArray[i] = Text.Encoding.UTF8.GetBytes(s_Int32TextArray[i]); + } + + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for (int i = 0; i < Benchmark.InnerIterationCount; i++) + { + ReadOnlySpan utf8ByteSpan = utf8ByteArray[i % textLength]; + PrimitiveParser.InvariantUtf8.TryParseInt32(utf8ByteSpan, out int value, out int bytesConsumed); + DoNotIgnore(value, bytesConsumed); + } + } + } + } + + [Benchmark(InnerIterationCount = InnerCount)] + [InlineData("๑๐๗๓๗๔๑๘๒")] // standard parse + [InlineData("๒๑๔๗๔๘๓๖๔๗")] // max value + [InlineData("๐")] + [InlineData("๑๐๗")] + [InlineData("ลบ๒๑๔๗๔๘๓๖๔๘")] // min value + public unsafe void ParseInt32Thai(string text) + { + ReadOnlySpan utf8Span = UtfEncode(text, false); + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for (int i = 0; i < Benchmark.InnerIterationCount; i++) + { + PrimitiveParser.TryParseInt32(utf8Span, out int value, out int bytesConsumed, 'G', s_thaiEncoder); + DoNotIgnore(value, bytesConsumed); + } + } + } + } + } +} diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs index 075a41d90b2..86cb5151bf4 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs @@ -1145,6 +1145,13 @@ public unsafe void ParseInt16Hex(string text, bool expectSuccess, short expected #region int [Theory] + [InlineData("a1", false, 0, 0)] + [InlineData("1", true, 1, 1)] + [InlineData("-1", true, -1, 2)] + [InlineData("11", true, 11, 2)] + [InlineData("-11", true, -11, 3)] + [InlineData("00a0", true, 0, 2)] + [InlineData("00a", true, 0, 2)] [InlineData("111", true, 111, 3)] [InlineData("492206507abcdefg", true, 492206507, 9)] [InlineData("2147483647", true, 2147483647, 10)] // max diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs index 5d2202fef61..b0bc0756a66 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs @@ -20,6 +20,11 @@ private static void DoNotIgnore(ulong value, int consumed) { } + [MethodImpl(MethodImplOptions.NoInlining)] + private static void DoNotIgnore(int value, int consumed) + { + } + private static void PrintTestName(string testString, [CallerMemberName] string testName = "") { if (testString != null) @@ -31,5 +36,28 @@ private static void PrintTestName(string testString, [CallerMemberName] string t Console.WriteLine("{0} called with no test string.", testName); } } + + static byte[][] s_thaiUtf8DigitsAndSymbols = new byte[][] + { + new byte[] { 0xe0, 0xb9, 0x90 }, new byte[] { 0xe0, 0xb9, 0x91 }, new byte[] { 0xe0, 0xb9, 0x92 }, + new byte[] { 0xe0, 0xb9, 0x93 }, new byte[] { 0xe0, 0xb9, 0x94 }, new byte[] { 0xe0, 0xb9, 0x95 }, new byte[] { 0xe0, 0xb9, 0x96 }, + new byte[] { 0xe0, 0xb9, 0x97 }, new byte[] { 0xe0, 0xb9, 0x98 }, new byte[] { 0xe0, 0xb9, 0x99 }, new byte[] { 0xE0, 0xB8, 0x88, 0xE0, 0xB8, 0x94 }, null, + new byte[] { 0xE0, 0xB8, 0xAA, 0xE0, 0xB8, 0xB4, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x87, 0xE0, 0xB8, 0x97, 0xE0, 0xB8, 0xB5, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x83, + 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0x8D, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x82, 0xE0, 0xB8, 0x95, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0xA5, 0xE0, + 0xB8, 0xB7, 0xE0, 0xB8, 0xAD, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0x81, 0xE0, 0xB8, 0xB4, 0xE0, 0xB8, 0x99 }, + new byte[] { 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x9A }, new byte[] { 43 }, new byte[] { 0xE0, 0xB9, 0x84, 0xE0, 0xB8, 0xA1, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, + 0x83, 0xE0, 0xB8, 0x8A, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x95, 0xE0, 0xB8, 0xB1, 0xE0, 0xB8, 0xA7, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x82 }, + new byte[] { 69 }, new byte[] { 101 }, + }; + + static TextEncoder s_thaiEncoder = TextEncoder.CreateUtf8Encoder(s_thaiUtf8DigitsAndSymbols); + + private byte[] UtfEncode(string s, bool utf16) + { + if (utf16) + return Text.Encoding.Unicode.GetBytes(s); + else + return Text.Encoding.UTF8.GetBytes(s); + } } } From befd080c50876d64bf8712554b68c2ab7b96756f Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Mon, 19 Jun 2017 15:33:41 -0700 Subject: [PATCH 02/16] wip - adding tests and fixing impl bugs --- .../System/Text/Parsing/InvariantSigned.cs | 260 ++++++++++++++++- .../System/Text/Parsing/Signed.cs | 18 +- .../Parsing/PrimitiveParserInt32PerfTests.cs | 276 +++++++++++++++++- .../Parsing/PrimitiveParserIntegerTests.cs | 135 ++++++--- 4 files changed, 620 insertions(+), 69 deletions(-) diff --git a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs index 8350328bac1..84dbfe1b619 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs @@ -905,18 +905,169 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value) return TryParseInt32(text, out value, out int bytesConsumed); } + public static bool TryParseInt32_CUR(ReadOnlySpan text, out int value, out int bytesConsumed) + { + int textLength = text.Length; + if (textLength < 1) goto FalseExit; + + sbyte sign = 1; + int index = 0; + byte num = text[index]; + if (num == 45) + { + sign = -1; + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; + } + else if (num == 43) + { + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; + } + + bool containsDigitsAsPrefix = false; + int answer = 0; + while (num == 48) + { + index++; + if (index >= textLength) + { + bytesConsumed = index; + value = 0; + return true; + } + num = text[index]; + containsDigitsAsPrefix = true; + } + + int overflowLength = Int32OverflowLength + index; + if (textLength - index > Int32OverflowLength) textLength = overflowLength; + + while (num > 47 && num < 58) + { + containsDigitsAsPrefix = true; + answer = answer * 10 + num - 48; + index++; + if (index >= textLength - 1) break; + num = text[index]; + } + + if (index >= textLength) goto Done; + num = text[index]; + if (num > 47 && num < 58) + { + num -= 48; + if (WillOverFlow(answer, num, sign)) + { + bytesConsumed = index; + value = answer * sign; + return true; + } + containsDigitsAsPrefix = true; + answer = answer * 10 + num; + index++; + } + + Done: + if (!containsDigitsAsPrefix) goto FalseExit; + bytesConsumed = index; + value = answer * sign; + return true; + + FalseExit: + bytesConsumed = 0; + value = 0; + return false; + } + public static bool TryParseInt32(ReadOnlySpan text, out int value, out int bytesConsumed) { - ref byte textByte = ref text.DangerousGetPinnableReference(); + int textLength = text.Length; + if (textLength < 1) goto FalseExit; + + sbyte sign = 1; + int index = textLength; + byte num = text[textLength - index]; + if (num == 45) + { + sign = -1; + index--; + if (index <= 0) goto FalseExit; + num = text[textLength - index]; + } + else if (num == 43) + { + index--; + if (index <= 0) goto FalseExit; + num = text[textLength - index]; + } + + bool containsDigitsAsPrefix = false; + int answer = 0; + while (num == 48) + { + index--; + if (index <= 0) + { + bytesConsumed = textLength - index; + value = 0; + return true; + } + num = text[textLength - index]; + containsDigitsAsPrefix = true; + } + + if (textLength - index > Int32OverflowLength) textLength = Int32OverflowLength; + + while (num > 47 && num < 58) + { + containsDigitsAsPrefix = true; + answer = answer * 10 + num - 48; + index--; + if (index <= 1) break; + num = text[textLength - index]; + } + + if (index <= 0) goto Done; + num = text[textLength - index]; + if (num > 47 && num < 58) + { + num -= 48; + if (WillOverFlow(answer, num, sign)) + { + bytesConsumed = textLength - index; + value = answer * sign; + return true; + } + containsDigitsAsPrefix = true; + answer = answer * 10 + num; + index--; + } + + Done: + if (!containsDigitsAsPrefix) goto FalseExit; + bytesConsumed = textLength - index; + value = answer * sign; + return true; + FalseExit: + bytesConsumed = 0; + value = 0; + return false; + } + + public static bool TryParseInt32_PREV(ReadOnlySpan text, out int value, out int bytesConsumed) + { int sign = 1; int index = 0; - if (textByte == '-') + if (text[0] == '-') { sign = -1; index++; } - else if (textByte == '+') + else if (text[0] == '+') { index++; } @@ -930,7 +1081,7 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int bool containsDigitsAsPrefix = false; while (index < textLength - 1) { - num = Unsafe.Add(ref textByte, index) - 48; // '0' + num = text[index] - 48; // '0' if (!IsDigit(num)) { goto Done; @@ -939,8 +1090,8 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int containsDigitsAsPrefix = true; index++; } - - num = Unsafe.Add(ref textByte, textLength - 1) - 48; // '0' + + num = text[textLength - 1] - 48; // '0' if (IsDigit(num)) { if (WillOverFlow(answer, num, sign)) goto FalseExit; @@ -960,6 +1111,103 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int value = default; return false; } + + public static bool TryParseInt32_OLD(ReadOnlySpan text, out int value, out int bytesConsumed) + { + if (text.Length < 1) + { + bytesConsumed = 0; + value = default(int); + return false; + } + + int indexOfFirstDigit = 0; + int sign = 1; + if (text[0] == '-') + { + indexOfFirstDigit = 1; + sign = -1; + } + else if (text[0] == '+') + { + indexOfFirstDigit = 1; + } + + int overflowLength = Int32OverflowLength + indexOfFirstDigit; + + // Parse the first digit separately. If invalid here, we need to return false. + if (indexOfFirstDigit >= text.Length) + { + bytesConsumed = 0; + value = default(int); + return false; + } + int firstDigit = text[indexOfFirstDigit] - 48; // '0' + if (firstDigit < 0 || firstDigit > 9) + { + bytesConsumed = 0; + value = default(int); + return false; + } + int parsedValue = firstDigit; + + if (text.Length < overflowLength) + { + // Length is less than Int32OverflowLength; overflow is not possible + for (int index = indexOfFirstDigit + 1; index < text.Length; index++) + { + int nextDigit = text[index] - 48; // '0' + if (nextDigit < 0 || nextDigit > 9) + { + bytesConsumed = index; + value = parsedValue * sign; + return true; + } + parsedValue = parsedValue * 10 + nextDigit; + } + } + else + { + // Length is greater than Int32OverflowLength; overflow is only possible after Int32OverflowLength + // digits. There may be no overflow after Int32OverflowLength if there are leading zeroes. + for (int index = indexOfFirstDigit + 1; index < overflowLength - 1; index++) + { + int nextDigit = text[index] - 48; // '0' + if (nextDigit < 0 || nextDigit > 9) + { + bytesConsumed = index; + value = parsedValue * sign; + return true; + } + parsedValue = parsedValue * 10 + nextDigit; + } + for (int index = overflowLength - 1; index < text.Length; index++) + { + int nextDigit = text[index] - 48; // '0' + if (nextDigit < 0 || nextDigit > 9) + { + bytesConsumed = index; + value = parsedValue * sign; + return true; + } + // If parsedValue > (int.MaxValue / 10), any more appended digits will cause overflow. + // if parsedValue == (int.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. + bool positive = sign > 0; + bool nextDigitTooLarge = nextDigit > 8 || (positive && nextDigit > 7); + if (parsedValue > int.MaxValue / 10 || parsedValue == int.MaxValue / 10 && nextDigitTooLarge) + { + bytesConsumed = index; + value = parsedValue * sign; + return true; + } + parsedValue = parsedValue * 10 + nextDigit; + } + } + + bytesConsumed = text.Length; + value = parsedValue * sign; + return true; + } #endregion #region Int64 diff --git a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs index dee5579811c..5cba4755169 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs @@ -12,7 +12,10 @@ public static partial class PrimitiveParser { #region Helpers - private const int maxValueDiv10 = int.MaxValue / 10; + private const sbyte maxValueSbyteDiv10 = sbyte.MaxValue / 10; + private const short maxValueShortDiv10 = short.MaxValue / 10; + private const int maxValueIntDiv10 = int.MaxValue / 10; + private const long maxValueLongDiv10 = long.MaxValue / 10; [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsDigit(int i) @@ -31,9 +34,8 @@ private static bool IsInvalid(uint i) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool WillOverFlow(sbyte value, int nextDigit, int sign) { - sbyte maxValueDiv10 = sbyte.MaxValue / 10; bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); - return (value > maxValueDiv10 || value == maxValueDiv10 && nextDigitTooLarge); + return (value > maxValueSbyteDiv10 || nextDigitTooLarge); } // If parsedValue > (short.MaxValue / 10), any more appended digits will cause overflow. @@ -41,18 +43,17 @@ private static bool WillOverFlow(sbyte value, int nextDigit, int sign) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool WillOverFlow(short value, int nextDigit, int sign) { - short maxValueDiv10 = short.MaxValue / 10; bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); - return (value > maxValueDiv10 || value == maxValueDiv10 && nextDigitTooLarge); + return (value > maxValueShortDiv10 || nextDigitTooLarge); } // If parsedValue > (int.MaxValue / 10), any more appended digits will cause overflow. // if parsedValue == (int.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool WillOverFlow(int value, int nextDigit, int sign) + private static bool WillOverFlow(int value, byte nextDigit, sbyte sign) { bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); - return (value > maxValueDiv10 || nextDigitTooLarge); + return (value > maxValueIntDiv10 || nextDigitTooLarge); } // If parsedValue > (long.MaxValue / 10), any more appended digits will cause overflow. @@ -60,9 +61,8 @@ private static bool WillOverFlow(int value, int nextDigit, int sign) [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool WillOverFlow(long value, int nextDigit, int sign) { - long maxValueDiv10 = long.MaxValue / 10; bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); - return (value > maxValueDiv10 || value == maxValueDiv10 && nextDigitTooLarge); + return (value > maxValueLongDiv10 || nextDigitTooLarge); } #endregion diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs index 77ea50158ef..9696bd0473c 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs @@ -8,9 +8,9 @@ namespace System.Text.Primitives.Tests { - public partial class PrimitiveParserPerfTests + public class ParserPerfTests { - private const int InnerCount = 100000; + private const int InnerCount = 10000; private static readonly string[] s_Int32TextArray = new string[20] { @@ -36,7 +36,7 @@ public partial class PrimitiveParserPerfTests "-214" }; - [Benchmark(InnerIterationCount = InnerCount)] + //[Benchmark(InnerIterationCount = InnerCount)] [InlineData("107374182")] // standard parse [InlineData("2147483647")] // max value [InlineData("0")] @@ -59,7 +59,7 @@ private static void PrimitiveParserByteSpanToInt32(string text) } } - [Benchmark(InnerIterationCount = InnerCount)] + //[Benchmark(InnerIterationCount = InnerCount)] private static void PrimitiveParserByteSpanToInt32_VariableLength() { int textLength = s_Int32TextArray.Length; @@ -83,11 +83,44 @@ private static void PrimitiveParserByteSpanToInt32_VariableLength() } } - [Benchmark(InnerIterationCount = InnerCount)] + [MethodImpl(MethodImplOptions.NoInlining)] + private static void DoNotIgnore(int value, int consumed) + { + } + + //[Benchmark(InnerIterationCount = InnerCount)] [InlineData("107374182")] // standard parse [InlineData("2147483647")] // max value [InlineData("0")] [InlineData("-2147483648")] // min value + [InlineData("214748364")] + [InlineData("2")] + [InlineData("21474836")] + [InlineData("-21474")] + [InlineData("21474")] + [InlineData("-21")] + [InlineData("-2")] + [InlineData("214")] + [InlineData("-21474836")] + [InlineData("-214748364")] + [InlineData("2147")] + [InlineData("-2147")] + [InlineData("-214748")] + [InlineData("-2147483")] + [InlineData("214748")] + [InlineData("21")] + [InlineData("2147483")] + [InlineData("-214")] + [InlineData("+21474")] + [InlineData("+21")] + [InlineData("+2")] + [InlineData("+21474836")] + [InlineData("+214748364")] + [InlineData("+2147")] + [InlineData("+214748")] + [InlineData("+2147483")] + [InlineData("+2147483648")] + [InlineData("+214")] private static void PrimitiveParserByteSpanToInt32_BytesConsumed(string text) { byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); @@ -107,6 +140,86 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed(string text) } [Benchmark(InnerIterationCount = InnerCount)] + [InlineData(10)] + [InlineData(100)] + [InlineData(1000)] + private static void ParseTestNew(int count) + { + string text = GenerateRandomDigitString(count); + byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); + var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); + + int final = 0; + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for (int i = 0; i < Benchmark.InnerIterationCount; i++) + { + int totalConsumed = 0; + while (totalConsumed < utf8ByteSpan.Length) + { + PrimitiveParser.InvariantUtf8.TryParseInt32(utf8ByteSpan.Slice(totalConsumed), out int value, out int bytesConsumed); + totalConsumed += bytesConsumed; + final |= value; + } + } + } + } + Assert.Equal(-1, final); + } + + [Benchmark(InnerIterationCount = InnerCount)] + [InlineData(10)] + [InlineData(100)] + [InlineData(1000)] + private static void ParseTestNew_OLD(int count) + { + string text = GenerateRandomDigitString(count); + byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); + var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); + + int final = 0; + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for (int i = 0; i < Benchmark.InnerIterationCount; i++) + { + int totalConsumed = 0; + while (totalConsumed < utf8ByteSpan.Length) + { + PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(utf8ByteSpan.Slice(totalConsumed), out int value, out int bytesConsumed); + totalConsumed += bytesConsumed; + final |= value; + } + } + } + } + Assert.Equal(-1, final); + } + + private static string GenerateRandomDigitString(int count = 1000) + { + Random rnd = new Random(count); + var builder = new StringBuilder(); + + for (int j = 0; j < count; j++) + { + int sign = rnd.Next(0, 3); + if (sign == 1) builder.Append("+"); + if (sign == 2) builder.Append("-"); + var length = rnd.Next(1, 14); + for (int i = 0; i < length; i++) + { + int digit = rnd.Next(0, 10); + builder.Append(digit.ToString()); + } + } + return builder.ToString(); + } + + //[Benchmark(InnerIterationCount = InnerCount)] private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength() { int textLength = s_Int32TextArray.Length; @@ -130,7 +243,156 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength( } } - [Benchmark(InnerIterationCount = InnerCount)] + //[Benchmark(InnerIterationCount = InnerCount)] + [InlineData("107374182")] // standard parse + [InlineData("2147483647")] // max value + [InlineData("0")] + [InlineData("-2147483648")] // min value + [InlineData("214748364")] + [InlineData("2")] + [InlineData("21474836")] + [InlineData("-21474")] + [InlineData("21474")] + [InlineData("-21")] + [InlineData("-2")] + [InlineData("214")] + [InlineData("-21474836")] + [InlineData("-214748364")] + [InlineData("2147")] + [InlineData("-2147")] + [InlineData("-214748")] + [InlineData("-2147483")] + [InlineData("214748")] + [InlineData("21")] + [InlineData("2147483")] + [InlineData("-214")] + [InlineData("+21474")] + [InlineData("+21")] + [InlineData("+2")] + [InlineData("+21474836")] + [InlineData("+214748364")] + [InlineData("+2147")] + [InlineData("+214748")] + [InlineData("+2147483")] + [InlineData("+2147483648")] + [InlineData("+214")] + private static void PrimitiveParserByteSpanToInt32_BytesConsumed_BASE(string text) + { + byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); + var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); + + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for (int i = 0; i < Benchmark.InnerIterationCount; i++) + { + int.TryParse(text, out int value); + DoNotIgnore(value, 0); + } + } + } + } + + //[Benchmark(InnerIterationCount = InnerCount)] + private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_BASE() + { + int textLength = s_Int32TextArray.Length; + byte[][] utf8ByteArray = (byte[][])Array.CreateInstance(typeof(byte[]), textLength); + for (var i = 0; i < textLength; i++) + { + utf8ByteArray[i] = Text.Encoding.UTF8.GetBytes(s_Int32TextArray[i]); + } + + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for (int i = 0; i < Benchmark.InnerIterationCount; i++) + { + int.TryParse(s_Int32TextArray[i % textLength], out int value); + DoNotIgnore(value, 0); + } + } + } + } + + //[Benchmark(InnerIterationCount = InnerCount)] + [InlineData("107374182")] // standard parse + [InlineData("2147483647")] // max value + [InlineData("0")] + [InlineData("-2147483648")] // min value + [InlineData("214748364")] + [InlineData("2")] + [InlineData("21474836")] + [InlineData("-21474")] + [InlineData("21474")] + [InlineData("-21")] + [InlineData("-2")] + [InlineData("214")] + [InlineData("-21474836")] + [InlineData("-214748364")] + [InlineData("2147")] + [InlineData("-2147")] + [InlineData("-214748")] + [InlineData("-2147483")] + [InlineData("214748")] + [InlineData("21")] + [InlineData("2147483")] + [InlineData("-214")] + [InlineData("+21474")] + [InlineData("+21")] + [InlineData("+2")] + [InlineData("+21474836")] + [InlineData("+214748364")] + [InlineData("+2147")] + [InlineData("+214748")] + [InlineData("+2147483")] + [InlineData("+2147483648")] + [InlineData("+214")] + private static void PrimitiveParserByteSpanToInt32_BytesConsumed_OLD(string text) + { + byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); + var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); + + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for (int i = 0; i < Benchmark.InnerIterationCount; i++) + { + PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(utf8ByteSpan, out int value, out int bytesConsumed); + DoNotIgnore(value, bytesConsumed); + } + } + } + } + + //[Benchmark(InnerIterationCount = InnerCount)] + private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_OLD() + { + int textLength = s_Int32TextArray.Length; + byte[][] utf8ByteArray = (byte[][])Array.CreateInstance(typeof(byte[]), textLength); + for (var i = 0; i < textLength; i++) + { + utf8ByteArray[i] = Text.Encoding.UTF8.GetBytes(s_Int32TextArray[i]); + } + + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for (int i = 0; i < Benchmark.InnerIterationCount; i++) + { + ReadOnlySpan utf8ByteSpan = utf8ByteArray[i % textLength]; + PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(utf8ByteSpan, out int value, out int bytesConsumed); + DoNotIgnore(value, bytesConsumed); + } + } + } + } + + /*[Benchmark(InnerIterationCount = InnerCount)] [InlineData("๑๐๗๓๗๔๑๘๒")] // standard parse [InlineData("๒๑๔๗๔๘๓๖๔๗")] // max value [InlineData("๐")] @@ -150,6 +412,6 @@ public unsafe void ParseInt32Thai(string text) } } } - } + }*/ } } diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs index 86cb5151bf4..a47a2eca926 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs @@ -1145,6 +1145,40 @@ public unsafe void ParseInt16Hex(string text, bool expectSuccess, short expected #region int [Theory] + /*[InlineData("a1", true, 0, 0)] + [InlineData("1", true, 1, 1)] + [InlineData("-1", true, -1, 2)] + [InlineData("11", true, 11, 2)] + [InlineData("-11", true, -11, 3)] + [InlineData("00a0", true, 0, 2)] + [InlineData("00a", true, 0, 2)] + [InlineData("111", true, 111, 3)] + [InlineData("492206507abcdefg", true, 492206507, 9)] + [InlineData("2147483647", true, 2147483647, 10)] // max + [InlineData("-2147483648", true, -2147483648, 11)] // min + [InlineData("-A", true, 0, 0)] // invalid character after a sign + [InlineData("I am 1", true, 0, 0)] // invalid character test + [InlineData(" !", true, 0, 0)] // invalid character test w/ char < '0' + [InlineData("2147483648", true, 214748364, 9)] // positive overflow test + [InlineData("-2147483649", true, -214748364, 10)] // negative overflow test + [InlineData("0", true, 0, 1)] + [InlineData("+1", true, 1, 2)] + [InlineData("+2147483647", true, 2147483647, 11)] + [InlineData("as3gf31t`2c", true, 0, 0)] + [InlineData("agbagbagb5", true, 0, 0)] + [InlineData("1faag", true, 1, 1)] + [InlineData("-1sdg", true, -1, 2)] + [InlineData("-afsagsag4", true, 0, 0)] + [InlineData("+a", true, 0, 0)] + [InlineData("-000012345abcdefg1", true, -12345, 10)] + [InlineData("+000012345abcdefg1", true, 12345, 10)] + [InlineData("000012345abcdefg1", true, 12345, 9)] + [InlineData("0000001234145abcdefg1", true, 1234145, 13)] + [InlineData("+", true, 0, 0)] + [InlineData("-", true, 0, 0)] + [InlineData("", true, 0, 0)] + [InlineData("5", true, 5, 1)] + [InlineData("^", true, 0, 0)]*/ [InlineData("a1", false, 0, 0)] [InlineData("1", true, 1, 1)] [InlineData("-1", true, -1, 2)] @@ -1159,8 +1193,27 @@ public unsafe void ParseInt16Hex(string text, bool expectSuccess, short expected [InlineData("-A", false, 0, 0)] // invalid character after a sign [InlineData("I am 1", false, 0, 0)] // invalid character test [InlineData(" !", false, 0, 0)] // invalid character test w/ char < '0' - [InlineData("2147483648", false, 0, 0)] // positive overflow test - [InlineData("-2147483649", false, 0, 0)] // negative overflow test + [InlineData("2147483648", true, 214748364, 9)] // positive overflow test + [InlineData("-2147483649", true, -214748364, 10)] // negative overflow test + [InlineData("0", true, 0, 1)] + [InlineData("+1", true, 1, 2)] + [InlineData("+2147483647", true, 2147483647, 11)] + [InlineData("as3gf31t`2c", false, 0, 0)] + [InlineData("agbagbagb5", false, 0, 0)] + [InlineData("1faag", true, 1, 1)] + [InlineData("-1sdg", true, -1, 2)] + [InlineData("-afsagsag4", false, 0, 0)] + [InlineData("+a", false, 0, 0)] + [InlineData("-000012345abcdefg1", true, -12345, 10)] + [InlineData("+000012345abcdefg1", true, 12345, 10)] + [InlineData("000012345abcdefg1", true, 12345, 9)] + [InlineData("0000001234145abcdefg1", true, 1234145, 13)] + [InlineData("+", false, 0, 0)] + [InlineData("-", false, 0, 0)] + [InlineData("", false, 0, 0)] + [InlineData("5", true, 5, 1)] + [InlineData("^", false, 0, 0)] + [InlineData("41474836482145", true, 414748364, 9)] public unsafe void ParseInt32Dec(string text, bool expectSuccess, int expectedValue, int expectedConsumed) { int parsedValue; @@ -1171,66 +1224,54 @@ public unsafe void ParseInt32Dec(string text, bool expectSuccess, int expectedVa byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); bool result; - - result = PrimitiveParser.TryParseInt32(utf8Span, out parsedValue, out consumed, 'G', TextEncoder.Utf8); - Assert.Equal(expectSuccess, result); - Assert.Equal(expectedValue, parsedValue); - Assert.Equal(expectedConsumed, consumed); - - result = PrimitiveParser.TryParseInt32(utf8Span, out parsedValue, out consumed); - Assert.Equal(expectSuccess, result); - Assert.Equal(expectedValue, parsedValue); - Assert.Equal(expectedConsumed, consumed); - - result = PrimitiveParser.InvariantUtf8.TryParseInt32(utf8Span, out parsedValue); - Assert.Equal(expectSuccess, result); - Assert.Equal(expectedValue, parsedValue); + result = PrimitiveParser.InvariantUtf8.TryParseInt32(utf8Span, out parsedValue, out consumed); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); Assert.Equal(expectedConsumed, consumed); + } - fixed (byte* arrayPointer = textBytes) - { - result = PrimitiveParser.InvariantUtf8.TryParseInt32(arrayPointer, textBytes.Length, out parsedValue); - Assert.Equal(expectSuccess, result); - Assert.Equal(expectedValue, parsedValue); + [Fact] + private static void ParseTestNew() + { + string text = GenerateRandomDigitString(); + byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); + var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); - result = PrimitiveParser.InvariantUtf8.TryParseInt32(arrayPointer, textBytes.Length, out parsedValue, out consumed); - Assert.Equal(expectSuccess, result); - Assert.Equal(expectedValue, parsedValue); - Assert.Equal(expectedConsumed, consumed); + int final = 0; + int totalConsumed = 0; + while (totalConsumed < utf8ByteSpan.Length) + { + PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(utf8ByteSpan.Slice(totalConsumed), out int value, out int bytesConsumed); + totalConsumed += bytesConsumed; + final |= value; } + Assert.Equal(-1, final); + } - result = PrimitiveParser.TryParseInt32(utf16ByteSpan, out parsedValue, out consumed, 'G', TextEncoder.Utf16); - Assert.Equal(expectSuccess, result); - Assert.Equal(expectedValue, parsedValue); - Assert.Equal(expectedConsumed * sizeof(char), consumed); - - result = PrimitiveParser.InvariantUtf16.TryParseInt32(utf16CharSpan, out parsedValue); - Assert.Equal(expectSuccess, result); - Assert.Equal(expectedValue, parsedValue); - - result = PrimitiveParser.InvariantUtf16.TryParseInt32(utf16CharSpan, out parsedValue, out consumed); - Assert.Equal(expectSuccess, result); - Assert.Equal(expectedValue, parsedValue); - Assert.Equal(expectedConsumed, consumed); + private static string GenerateRandomDigitString(int count = 1000) + { + Random rnd = new Random(42); + var builder = new StringBuilder(); - fixed (char* arrayPointer = textChars) + for (int j = 0; j < count; j++) { - result = PrimitiveParser.InvariantUtf16.TryParseInt32(arrayPointer, textBytes.Length, out parsedValue); - Assert.Equal(expectSuccess, result); - Assert.Equal(expectedValue, parsedValue); - - result = PrimitiveParser.InvariantUtf16.TryParseInt32(arrayPointer, textBytes.Length, out parsedValue, out consumed); - Assert.Equal(expectSuccess, result); - Assert.Equal(expectedValue, parsedValue); - Assert.Equal(expectedConsumed, consumed); + int sign = rnd.Next(0, 3); + if (sign == 1) builder.Append("+"); + if (sign == 2) builder.Append("-"); + var length = rnd.Next(1, 14); + for (int i = 0; i < length; i++) + { + int digit = rnd.Next(0, 10); + builder.Append(digit.ToString()); + } } + return builder.ToString(); } + [Theory] [InlineData("๑๑๑", true, 0, 111, 9)] [InlineData("เรื่องเหลวไหล๒๗", true, 39, 27, 6)] From 174e2f80033100304aab3c440f2f36341d269af9 Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Tue, 20 Jun 2017 14:44:13 -0700 Subject: [PATCH 03/16] WIP - new implementation and tests to compare with previous --- .../System/Text/Parsing/InvariantSigned.cs | 394 ++++++++++++++++-- .../Parsing/PrimitiveParserInt32PerfTests.cs | 103 ++++- .../Parsing/PrimitiveParserIntegerTests.cs | 129 +++++- 3 files changed, 582 insertions(+), 44 deletions(-) diff --git a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs index 84dbfe1b619..1735092aad7 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs @@ -988,67 +988,398 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int if (textLength < 1) goto FalseExit; sbyte sign = 1; - int index = textLength; - byte num = text[textLength - index]; + int index = 0; + byte num = text[index]; + if (num == '-') + { + sign = -1; + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; + } + else if (num == '+') + { + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; + } + + int answer = -1; + + if (num >= '0' && num <= '9') + { + answer = num - '0'; + index++; + if (index >= textLength) goto Done; + num = text[index]; + int overflow = Int32OverflowLength + index - 1; + if (textLength < overflow) + { + while (num >= '0' && num <= '9') + { + answer = answer * 10 + num - '0'; + index++; + if (index >= textLength) goto Done; + } + } + else + { + while (num >= '0' && num <= '9') + { + answer = answer * 10 + num - '0'; + index++; + if (index >= overflow) goto Done; + if (index == overflow - 1) + { + num = text[index]; + if (num >= '0' && num <= '9') + { + num -= (byte)'0'; + if (WillOverFlow(answer, num, sign)) goto FalseExit; + answer = answer * 10 + num; + index++; + } + goto Done; + } + num = text[index]; + } + } + goto Done; + } + + FalseExit: + bytesConsumed = 0; + value = 0; + return false; + + Done: + bytesConsumed = index; + value = answer * sign; + return true; + } + + public static bool TryParseInt32_FINAL2(ReadOnlySpan text, out int value, out int bytesConsumed) + { + int textLength = text.Length; + if (textLength < 1) goto FalseExit; + + sbyte sign = 1; + int index = 0; + byte num = text[index]; + if (num == 45) + { + sign = -1; + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; + } + else if (num == 43) + { + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; + } + + int answer = -1; + if (num == 48) + { + answer = 0; + index++; + if (index >= textLength) + { + bytesConsumed = index; + value = 0; + return true; + } + num = text[index]; + + while (num == 48) + { + index++; + if (index >= textLength) + { + bytesConsumed = index; + value = 0; + return true; + } + num = text[index]; + } + } + + textLength = Math.Min(Int32OverflowLength + index, textLength); + + if (num > 47 && num < 58) + { + answer = num - 48; + index++; + if (index >= textLength - 1) goto What; + num = text[index]; + + while (num > 47 && num < 58) + { + answer = answer * 10 + num - 48; + index++; + if (index == textLength - 1) break; + num = text[index]; + } + } + + What: + if (index >= textLength) goto Done; + num = text[index]; + if (num > 47 && num < 58) + { + num -= 48; + if (WillOverFlow(answer, num, sign)) + { + goto FalseExit; + } + answer = answer * 10 + num; + index++; + } + + Done: + if (answer == -1) goto FalseExit; + bytesConsumed = index; + value = answer * sign; + return true; + + FalseExit: + bytesConsumed = 0; + value = 0; + return false; + } + + public static bool TryParseInt32_C(ReadOnlySpan text, out int value, out int bytesConsumed) + { + int textLength = text.Length; + if (textLength < 1) goto FalseExit; + + sbyte sign = 1; + int index = 0; + byte num = text[index]; + if (num == 45) + { + sign = -1; + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; + } + else if (num == 43) + { + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; + } + + int answer = -1; + if (num == 48) + { + answer = 0; + index++; + if (index >= textLength) + { + bytesConsumed = index; + value = 0; + return true; + } + num = text[index]; + + while (num == 48) + { + index++; + if (index >= textLength) + { + bytesConsumed = index; + value = 0; + return true; + } + num = text[index]; + } + } + + textLength = Math.Min(Int32OverflowLength + index, textLength); + + if (num > 47 && num < 58) + { + answer = num - 48; + index++; + if (index >= textLength - 1) goto What; + num = text[index]; + + while (num > 47 && num < 58) + { + answer = answer * 10 + num - 48; + index++; + if (index >= textLength - 1) break; + num = text[index]; + } + } + + What: + if (index >= textLength) goto Done; + num = text[index]; + if (num > 47 && num < 58) + { + num -= 48; + if (WillOverFlow(answer, num, sign)) + { + goto FalseExit; + } + answer = answer * 10 + num; + index++; + } + + Done: + if (answer == -1) goto FalseExit; + bytesConsumed = index; + value = answer * sign; + return true; + + FalseExit: + bytesConsumed = 0; + value = 0; + return false; + } + + public static bool TryParseInt32_A(ReadOnlySpan text, out int value, out int bytesConsumed) + { + int textLength = text.Length; + if (textLength < 1) goto FalseExit; + + sbyte sign = 1; + int index = 0; + byte num = text[index]; if (num == 45) { sign = -1; - index--; - if (index <= 0) goto FalseExit; - num = text[textLength - index]; + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; } else if (num == 43) { - index--; - if (index <= 0) goto FalseExit; - num = text[textLength - index]; + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; } bool containsDigitsAsPrefix = false; int answer = 0; while (num == 48) { - index--; - if (index <= 0) + containsDigitsAsPrefix = true; + index++; + if (index >= textLength) goto Done; + num = text[index]; + } + + textLength = Math.Min(Int32OverflowLength + index, textLength); + + if (num > 47 && num < 58) + { + containsDigitsAsPrefix = true; + answer = answer * 10 + num - 48; + index++; + if (index >= textLength - 1) goto What; + num = text[index]; + } + + while (num > 47 && num < 58) + { + answer = answer * 10 + num - 48; + index++; + if (index >= textLength - 1) break; + num = text[index]; + } + + What: + if (index >= textLength) goto Done; + num = text[index]; + if (num > 47 && num < 58) + { + num -= 48; + if (WillOverFlow(answer, num, sign)) { - bytesConsumed = textLength - index; - value = 0; - return true; + goto FalseExit; } - num = text[textLength - index]; containsDigitsAsPrefix = true; + answer = answer * 10 + num; + index++; + } + + Done: + if (!containsDigitsAsPrefix) goto FalseExit; + bytesConsumed = index; + value = answer * sign; + return true; + + FalseExit: + bytesConsumed = 0; + value = 0; + return false; + } + + public static bool TryParseInt32_BACKUP(ReadOnlySpan text, out int value, out int bytesConsumed) + { + int textLength = text.Length; + if (textLength < 1) goto FalseExit; + + sbyte sign = 1; + int index = 0; + byte num = text[index]; + if (num == 45) + { + sign = -1; + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; + } + else if (num == 43) + { + index++; + if (index >= textLength) goto FalseExit; + num = text[index]; + } + + bool containsDigitsAsPrefix = false; + int answer = 0; + while (num == 48) + { + containsDigitsAsPrefix = true; + index++; + if (index >= textLength) goto Done; + num = text[index]; } - if (textLength - index > Int32OverflowLength) textLength = Int32OverflowLength; + textLength = Math.Min(Int32OverflowLength + index, textLength); while (num > 47 && num < 58) { containsDigitsAsPrefix = true; answer = answer * 10 + num - 48; - index--; - if (index <= 1) break; - num = text[textLength - index]; + index++; + if (index >= textLength - 1) break; + num = text[index]; } - if (index <= 0) goto Done; - num = text[textLength - index]; + if (index >= textLength) goto Done; + num = text[index]; if (num > 47 && num < 58) { num -= 48; if (WillOverFlow(answer, num, sign)) { - bytesConsumed = textLength - index; - value = answer * sign; - return true; + goto FalseExit; } containsDigitsAsPrefix = true; answer = answer * 10 + num; - index--; + index++; } Done: if (!containsDigitsAsPrefix) goto FalseExit; - bytesConsumed = textLength - index; + bytesConsumed = index; value = answer * sign; return true; @@ -1133,15 +1464,16 @@ public static bool TryParseInt32_OLD(ReadOnlySpan text, out int value, out indexOfFirstDigit = 1; } - int overflowLength = Int32OverflowLength + indexOfFirstDigit; - - // Parse the first digit separately. If invalid here, we need to return false. if (indexOfFirstDigit >= text.Length) { bytesConsumed = 0; value = default(int); return false; } + + int overflowLength = Int32OverflowLength + indexOfFirstDigit; + + // Parse the first digit separately. If invalid here, we need to return false. int firstDigit = text[indexOfFirstDigit] - 48; // '0' if (firstDigit < 0 || firstDigit > 9) { @@ -1196,9 +1528,9 @@ public static bool TryParseInt32_OLD(ReadOnlySpan text, out int value, out bool nextDigitTooLarge = nextDigit > 8 || (positive && nextDigit > 7); if (parsedValue > int.MaxValue / 10 || parsedValue == int.MaxValue / 10 && nextDigitTooLarge) { - bytesConsumed = index; - value = parsedValue * sign; - return true; + bytesConsumed = 0; + value = 0; + return false; } parsedValue = parsedValue * 10 + nextDigit; } diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs index 9696bd0473c..c4cc7e22a5f 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs @@ -88,7 +88,7 @@ private static void DoNotIgnore(int value, int consumed) { } - //[Benchmark(InnerIterationCount = InnerCount)] + [Benchmark(InnerIterationCount = InnerCount)] [InlineData("107374182")] // standard parse [InlineData("2147483647")] // max value [InlineData("0")] @@ -121,6 +121,35 @@ private static void DoNotIgnore(int value, int consumed) [InlineData("+2147483")] [InlineData("+2147483648")] [InlineData("+214")] + [InlineData("000000000000000000001235abcdfg")] + [InlineData("214748364abcdefghijklmnop")] + [InlineData("2abcdefghijklmnop")] + [InlineData("21474836abcdefghijklmnop")] + [InlineData("-21474abcdefghijklmnop")] + [InlineData("21474abcdefghijklmnop")] + [InlineData("-21abcdefghijklmnop")] + [InlineData("-2abcdefghijklmnop")] + [InlineData("214abcdefghijklmnop")] + [InlineData("-21474836abcdefghijklmnop")] + [InlineData("-214748364abcdefghijklmnop")] + [InlineData("2147abcdefghijklmnop")] + [InlineData("-2147abcdefghijklmnop")] + [InlineData("-214748abcdefghijklmnop")] + [InlineData("-2147483abcdefghijklmnop")] + [InlineData("214748abcdefghijklmnop")] + [InlineData("21abcdefghijklmnop")] + [InlineData("2147483abcdefghijklmnop")] + [InlineData("-214abcdefghijklmnop")] + [InlineData("+21474abcdefghijklmnop")] + [InlineData("+21abcdefghijklmnop")] + [InlineData("+2abcdefghijklmnop")] + [InlineData("+21474836abcdefghijklmnop")] + [InlineData("+214748364abcdefghijklmnop")] + [InlineData("+2147abcdefghijklmnop")] + [InlineData("+214748abcdefghijklmnop")] + [InlineData("+2147483abcdefghijklmnop")] + [InlineData("+2147483648abcdefghijklmnop")] + [InlineData("+214abcdefghijklmnop")] private static void PrimitiveParserByteSpanToInt32_BytesConsumed(string text) { byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); @@ -139,7 +168,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed(string text) } } - [Benchmark(InnerIterationCount = InnerCount)] + //[Benchmark(InnerIterationCount = InnerCount)] [InlineData(10)] [InlineData(100)] [InlineData(1000)] @@ -169,7 +198,7 @@ private static void ParseTestNew(int count) Assert.Equal(-1, final); } - [Benchmark(InnerIterationCount = InnerCount)] + //[Benchmark(InnerIterationCount = InnerCount)] [InlineData(10)] [InlineData(100)] [InlineData(1000)] @@ -219,7 +248,7 @@ private static string GenerateRandomDigitString(int count = 1000) return builder.ToString(); } - //[Benchmark(InnerIterationCount = InnerCount)] + [Benchmark(InnerIterationCount = InnerCount)] private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength() { int textLength = s_Int32TextArray.Length; @@ -243,7 +272,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength( } } - //[Benchmark(InnerIterationCount = InnerCount)] + [Benchmark(InnerIterationCount = InnerCount)] [InlineData("107374182")] // standard parse [InlineData("2147483647")] // max value [InlineData("0")] @@ -276,6 +305,35 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength( [InlineData("+2147483")] [InlineData("+2147483648")] [InlineData("+214")] + [InlineData("000000000000000000001235abcdfg")] + [InlineData("214748364abcdefghijklmnop")] + [InlineData("2abcdefghijklmnop")] + [InlineData("21474836abcdefghijklmnop")] + [InlineData("-21474abcdefghijklmnop")] + [InlineData("21474abcdefghijklmnop")] + [InlineData("-21abcdefghijklmnop")] + [InlineData("-2abcdefghijklmnop")] + [InlineData("214abcdefghijklmnop")] + [InlineData("-21474836abcdefghijklmnop")] + [InlineData("-214748364abcdefghijklmnop")] + [InlineData("2147abcdefghijklmnop")] + [InlineData("-2147abcdefghijklmnop")] + [InlineData("-214748abcdefghijklmnop")] + [InlineData("-2147483abcdefghijklmnop")] + [InlineData("214748abcdefghijklmnop")] + [InlineData("21abcdefghijklmnop")] + [InlineData("2147483abcdefghijklmnop")] + [InlineData("-214abcdefghijklmnop")] + [InlineData("+21474abcdefghijklmnop")] + [InlineData("+21abcdefghijklmnop")] + [InlineData("+2abcdefghijklmnop")] + [InlineData("+21474836abcdefghijklmnop")] + [InlineData("+214748364abcdefghijklmnop")] + [InlineData("+2147abcdefghijklmnop")] + [InlineData("+214748abcdefghijklmnop")] + [InlineData("+2147483abcdefghijklmnop")] + [InlineData("+2147483648abcdefghijklmnop")] + [InlineData("+214abcdefghijklmnop")] private static void PrimitiveParserByteSpanToInt32_BytesConsumed_BASE(string text) { byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); @@ -294,7 +352,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_BASE(string tex } } - //[Benchmark(InnerIterationCount = InnerCount)] + [Benchmark(InnerIterationCount = InnerCount)] private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_BASE() { int textLength = s_Int32TextArray.Length; @@ -317,7 +375,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_ } } - //[Benchmark(InnerIterationCount = InnerCount)] + [Benchmark(InnerIterationCount = InnerCount)] [InlineData("107374182")] // standard parse [InlineData("2147483647")] // max value [InlineData("0")] @@ -350,6 +408,35 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_ [InlineData("+2147483")] [InlineData("+2147483648")] [InlineData("+214")] + [InlineData("000000000000000000001235abcdfg")] + [InlineData("214748364abcdefghijklmnop")] + [InlineData("2abcdefghijklmnop")] + [InlineData("21474836abcdefghijklmnop")] + [InlineData("-21474abcdefghijklmnop")] + [InlineData("21474abcdefghijklmnop")] + [InlineData("-21abcdefghijklmnop")] + [InlineData("-2abcdefghijklmnop")] + [InlineData("214abcdefghijklmnop")] + [InlineData("-21474836abcdefghijklmnop")] + [InlineData("-214748364abcdefghijklmnop")] + [InlineData("2147abcdefghijklmnop")] + [InlineData("-2147abcdefghijklmnop")] + [InlineData("-214748abcdefghijklmnop")] + [InlineData("-2147483abcdefghijklmnop")] + [InlineData("214748abcdefghijklmnop")] + [InlineData("21abcdefghijklmnop")] + [InlineData("2147483abcdefghijklmnop")] + [InlineData("-214abcdefghijklmnop")] + [InlineData("+21474abcdefghijklmnop")] + [InlineData("+21abcdefghijklmnop")] + [InlineData("+2abcdefghijklmnop")] + [InlineData("+21474836abcdefghijklmnop")] + [InlineData("+214748364abcdefghijklmnop")] + [InlineData("+2147abcdefghijklmnop")] + [InlineData("+214748abcdefghijklmnop")] + [InlineData("+2147483abcdefghijklmnop")] + [InlineData("+2147483648abcdefghijklmnop")] + [InlineData("+214abcdefghijklmnop")] private static void PrimitiveParserByteSpanToInt32_BytesConsumed_OLD(string text) { byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); @@ -368,7 +455,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_OLD(string text } } - //[Benchmark(InnerIterationCount = InnerCount)] + [Benchmark(InnerIterationCount = InnerCount)] private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_OLD() { int textLength = s_Int32TextArray.Length; diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs index a47a2eca926..ef1b30d6b0d 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs @@ -12,6 +12,10 @@ // NOTE: This file is generated via a T4 template. Please do not edit this file directly. Any changes should be made // in PrimitiveParserIntegerTests.tt. + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Threading; using Xunit; namespace System.Text.Primitives.Tests @@ -1193,8 +1197,8 @@ public unsafe void ParseInt16Hex(string text, bool expectSuccess, short expected [InlineData("-A", false, 0, 0)] // invalid character after a sign [InlineData("I am 1", false, 0, 0)] // invalid character test [InlineData(" !", false, 0, 0)] // invalid character test w/ char < '0' - [InlineData("2147483648", true, 214748364, 9)] // positive overflow test - [InlineData("-2147483649", true, -214748364, 10)] // negative overflow test + [InlineData("2147483648", false, 0, 0)] // positive overflow test + [InlineData("-2147483649", false, 0, 0)] // negative overflow test [InlineData("0", true, 0, 1)] [InlineData("+1", true, 1, 2)] [InlineData("+2147483647", true, 2147483647, 11)] @@ -1213,7 +1217,7 @@ public unsafe void ParseInt16Hex(string text, bool expectSuccess, short expected [InlineData("", false, 0, 0)] [InlineData("5", true, 5, 1)] [InlineData("^", false, 0, 0)] - [InlineData("41474836482145", true, 414748364, 9)] + [InlineData("41474836482145", false, 0, 0)] public unsafe void ParseInt32Dec(string text, bool expectSuccess, int expectedValue, int expectedConsumed) { int parsedValue; @@ -1224,14 +1228,89 @@ public unsafe void ParseInt32Dec(string text, bool expectSuccess, int expectedVa byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); bool result; - - result = PrimitiveParser.InvariantUtf8.TryParseInt32(utf8Span, out parsedValue, out consumed); + + result = PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(utf8Span, out parsedValue, out consumed); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); Assert.Equal(expectedConsumed, consumed); } + private static readonly string[] s_Int32TextArray = new string[20] + { + "214748364", + "2", + "21474836", + "-21474", + "21474", + "-21", + "-2", + "214", + "-21474836", + "-214748364", + "2147", + "-2147", + "-214748", + "-2147483", + "214748", + "-2147483648", + "2147483647", + "21", + "2147483", + "-214" + }; + + [Fact] + private static void ParseInt32DecVariableLength() + { + int textLength = s_Int32TextArray.Length; + byte[][] utf8ByteArray = (byte[][])Array.CreateInstance(typeof(byte[]), textLength); + for (var i = 0; i < textLength; i++) + { + utf8ByteArray[i] = Text.Encoding.UTF8.GetBytes(s_Int32TextArray[i]); + } + + for (int i = 0; i < 100; i++) + { + ReadOnlySpan utf8ByteSpan = utf8ByteArray[i % textLength]; + PrimitiveParser.InvariantUtf8.TryParseInt32(utf8ByteSpan, out int value, out int bytesConsumed); + } + } + + [Fact] + public unsafe void ParseInt32Dec2() + { + const int TwoGiB = int.MaxValue; + + unsafe + { + if (!AllocationHelper.TryAllocNative((IntPtr)TwoGiB, out IntPtr memBlock)) + return; // It's not implausible to believe that a 2gb allocation will fail - if so, skip this test to avoid unnecessary test flakiness. + + try + { + ref byte memory = ref Unsafe.AsRef(memBlock.ToPointer()); + var span = new Span(memBlock.ToPointer(), TwoGiB); + + for (int i = 0; i < TwoGiB - 2; i++) + { + span[i] = 48; + } + span[TwoGiB - 1] = 49; + span[TwoGiB - 2] = 49; + + bool result = PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(span, out int parsedValue, out int consumed); + Assert.Equal(true, result); + Assert.Equal(11, parsedValue); + Assert.Equal(TwoGiB, consumed); + } + finally + { + AllocationHelper.ReleaseNative(ref memBlock); + } + } + } + [Fact] private static void ParseTestNew() @@ -1561,4 +1640,44 @@ public unsafe void ParseInt64Hex(string text, bool expectSuccess, long expectedV } + + static class AllocationHelper + { + private static readonly Mutex MemoryLock = new Mutex(); + private static readonly TimeSpan WaitTimeout = TimeSpan.FromSeconds(120); + + public static bool TryAllocNative(IntPtr size, out IntPtr memory) + { + memory = IntPtr.Zero; + + if (!MemoryLock.WaitOne(WaitTimeout)) + return false; + + try + { + memory = Marshal.AllocHGlobal(size); + } + catch (OutOfMemoryException) + { + memory = IntPtr.Zero; + MemoryLock.ReleaseMutex(); + } + + return memory != IntPtr.Zero; + + } + + public static void ReleaseNative(ref IntPtr memory) + { + try + { + Marshal.FreeHGlobal(memory); + memory = IntPtr.Zero; + } + finally + { + MemoryLock.ReleaseMutex(); + } + } + } } From 568e736e7c14d26ca0375acf52805bc13a0b2fdf Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Tue, 20 Jun 2017 22:19:42 -0700 Subject: [PATCH 04/16] WIP - fixing non-invariant parser bugs and adding tests --- .../System/Text/Parsing/InvariantSigned.cs | 581 +----------------- .../System/Text/Parsing/Signed.cs | 215 ++++++- .../Parsing/PrimitiveParserInt32PerfTests.cs | 183 +++++- .../Parsing/PrimitiveParserIntegerTests.cs | 387 ++++++++---- 4 files changed, 636 insertions(+), 730 deletions(-) diff --git a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs index 1735092aad7..69af9d79674 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs @@ -905,7 +905,7 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value) return TryParseInt32(text, out value, out int bytesConsumed); } - public static bool TryParseInt32_CUR(ReadOnlySpan text, out int value, out int bytesConsumed) + public static bool TryParseInt32(ReadOnlySpan text, out int value, out int bytesConsumed) { int textLength = text.Length; if (textLength < 1) goto FalseExit; @@ -913,123 +913,53 @@ public static bool TryParseInt32_CUR(ReadOnlySpan text, out int value, out sbyte sign = 1; int index = 0; byte num = text[index]; - if (num == 45) + if (num == '-') { sign = -1; index++; if (index >= textLength) goto FalseExit; num = text[index]; } - else if (num == 43) + else if (num == '+') { index++; if (index >= textLength) goto FalseExit; num = text[index]; } - bool containsDigitsAsPrefix = false; int answer = 0; - while (num == 48) - { - index++; - if (index >= textLength) - { - bytesConsumed = index; - value = 0; - return true; - } - num = text[index]; - containsDigitsAsPrefix = true; - } - - int overflowLength = Int32OverflowLength + index; - if (textLength - index > Int32OverflowLength) textLength = overflowLength; - - while (num > 47 && num < 58) - { - containsDigitsAsPrefix = true; - answer = answer * 10 + num - 48; - index++; - if (index >= textLength - 1) break; - num = text[index]; - } - if (index >= textLength) goto Done; - num = text[index]; - if (num > 47 && num < 58) + if (num >= '0' && num <= '9') { - num -= 48; - if (WillOverFlow(answer, num, sign)) + if (num == '0') { - bytesConsumed = index; - value = answer * sign; - return true; + do + { + index++; + if (index >= textLength) goto Done; + num = text[index]; + } while (num == '0') ; + if (num < '0' || num > '9') goto Done; } - containsDigitsAsPrefix = true; - answer = answer * 10 + num; - index++; - } - Done: - if (!containsDigitsAsPrefix) goto FalseExit; - bytesConsumed = index; - value = answer * sign; - return true; - - FalseExit: - bytesConsumed = 0; - value = 0; - return false; - } - - public static bool TryParseInt32(ReadOnlySpan text, out int value, out int bytesConsumed) - { - int textLength = text.Length; - if (textLength < 1) goto FalseExit; - - sbyte sign = 1; - int index = 0; - byte num = text[index]; - if (num == '-') - { - sign = -1; - index++; - if (index >= textLength) goto FalseExit; - num = text[index]; - } - else if (num == '+') - { - index++; - if (index >= textLength) goto FalseExit; - num = text[index]; - } - - int answer = -1; - - if (num >= '0' && num <= '9') - { - answer = num - '0'; - index++; - if (index >= textLength) goto Done; - num = text[index]; - int overflow = Int32OverflowLength + index - 1; - if (textLength < overflow) + int firstNonZeroDigitIndex = index; + if (textLength - firstNonZeroDigitIndex < Int32OverflowLength) { - while (num >= '0' && num <= '9') + do { answer = answer * 10 + num - '0'; index++; if (index >= textLength) goto Done; - } + num = text[index]; + } while (num >= '0' && num <= '9'); } else { - while (num >= '0' && num <= '9') + do { answer = answer * 10 + num - '0'; index++; - if (index >= overflow) goto Done; - if (index == overflow - 1) + if (index - firstNonZeroDigitIndex == Int32OverflowLength - 1) { num = text[index]; if (num >= '0' && num <= '9') @@ -1042,7 +972,7 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int goto Done; } num = text[index]; - } + } while (num >= '0' && num <= '9'); } goto Done; } @@ -1058,391 +988,6 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int return true; } - public static bool TryParseInt32_FINAL2(ReadOnlySpan text, out int value, out int bytesConsumed) - { - int textLength = text.Length; - if (textLength < 1) goto FalseExit; - - sbyte sign = 1; - int index = 0; - byte num = text[index]; - if (num == 45) - { - sign = -1; - index++; - if (index >= textLength) goto FalseExit; - num = text[index]; - } - else if (num == 43) - { - index++; - if (index >= textLength) goto FalseExit; - num = text[index]; - } - - int answer = -1; - if (num == 48) - { - answer = 0; - index++; - if (index >= textLength) - { - bytesConsumed = index; - value = 0; - return true; - } - num = text[index]; - - while (num == 48) - { - index++; - if (index >= textLength) - { - bytesConsumed = index; - value = 0; - return true; - } - num = text[index]; - } - } - - textLength = Math.Min(Int32OverflowLength + index, textLength); - - if (num > 47 && num < 58) - { - answer = num - 48; - index++; - if (index >= textLength - 1) goto What; - num = text[index]; - - while (num > 47 && num < 58) - { - answer = answer * 10 + num - 48; - index++; - if (index == textLength - 1) break; - num = text[index]; - } - } - - What: - if (index >= textLength) goto Done; - num = text[index]; - if (num > 47 && num < 58) - { - num -= 48; - if (WillOverFlow(answer, num, sign)) - { - goto FalseExit; - } - answer = answer * 10 + num; - index++; - } - - Done: - if (answer == -1) goto FalseExit; - bytesConsumed = index; - value = answer * sign; - return true; - - FalseExit: - bytesConsumed = 0; - value = 0; - return false; - } - - public static bool TryParseInt32_C(ReadOnlySpan text, out int value, out int bytesConsumed) - { - int textLength = text.Length; - if (textLength < 1) goto FalseExit; - - sbyte sign = 1; - int index = 0; - byte num = text[index]; - if (num == 45) - { - sign = -1; - index++; - if (index >= textLength) goto FalseExit; - num = text[index]; - } - else if (num == 43) - { - index++; - if (index >= textLength) goto FalseExit; - num = text[index]; - } - - int answer = -1; - if (num == 48) - { - answer = 0; - index++; - if (index >= textLength) - { - bytesConsumed = index; - value = 0; - return true; - } - num = text[index]; - - while (num == 48) - { - index++; - if (index >= textLength) - { - bytesConsumed = index; - value = 0; - return true; - } - num = text[index]; - } - } - - textLength = Math.Min(Int32OverflowLength + index, textLength); - - if (num > 47 && num < 58) - { - answer = num - 48; - index++; - if (index >= textLength - 1) goto What; - num = text[index]; - - while (num > 47 && num < 58) - { - answer = answer * 10 + num - 48; - index++; - if (index >= textLength - 1) break; - num = text[index]; - } - } - - What: - if (index >= textLength) goto Done; - num = text[index]; - if (num > 47 && num < 58) - { - num -= 48; - if (WillOverFlow(answer, num, sign)) - { - goto FalseExit; - } - answer = answer * 10 + num; - index++; - } - - Done: - if (answer == -1) goto FalseExit; - bytesConsumed = index; - value = answer * sign; - return true; - - FalseExit: - bytesConsumed = 0; - value = 0; - return false; - } - - public static bool TryParseInt32_A(ReadOnlySpan text, out int value, out int bytesConsumed) - { - int textLength = text.Length; - if (textLength < 1) goto FalseExit; - - sbyte sign = 1; - int index = 0; - byte num = text[index]; - if (num == 45) - { - sign = -1; - index++; - if (index >= textLength) goto FalseExit; - num = text[index]; - } - else if (num == 43) - { - index++; - if (index >= textLength) goto FalseExit; - num = text[index]; - } - - bool containsDigitsAsPrefix = false; - int answer = 0; - while (num == 48) - { - containsDigitsAsPrefix = true; - index++; - if (index >= textLength) goto Done; - num = text[index]; - } - - textLength = Math.Min(Int32OverflowLength + index, textLength); - - if (num > 47 && num < 58) - { - containsDigitsAsPrefix = true; - answer = answer * 10 + num - 48; - index++; - if (index >= textLength - 1) goto What; - num = text[index]; - } - - while (num > 47 && num < 58) - { - answer = answer * 10 + num - 48; - index++; - if (index >= textLength - 1) break; - num = text[index]; - } - - What: - if (index >= textLength) goto Done; - num = text[index]; - if (num > 47 && num < 58) - { - num -= 48; - if (WillOverFlow(answer, num, sign)) - { - goto FalseExit; - } - containsDigitsAsPrefix = true; - answer = answer * 10 + num; - index++; - } - - Done: - if (!containsDigitsAsPrefix) goto FalseExit; - bytesConsumed = index; - value = answer * sign; - return true; - - FalseExit: - bytesConsumed = 0; - value = 0; - return false; - } - - public static bool TryParseInt32_BACKUP(ReadOnlySpan text, out int value, out int bytesConsumed) - { - int textLength = text.Length; - if (textLength < 1) goto FalseExit; - - sbyte sign = 1; - int index = 0; - byte num = text[index]; - if (num == 45) - { - sign = -1; - index++; - if (index >= textLength) goto FalseExit; - num = text[index]; - } - else if (num == 43) - { - index++; - if (index >= textLength) goto FalseExit; - num = text[index]; - } - - bool containsDigitsAsPrefix = false; - int answer = 0; - while (num == 48) - { - containsDigitsAsPrefix = true; - index++; - if (index >= textLength) goto Done; - num = text[index]; - } - - textLength = Math.Min(Int32OverflowLength + index, textLength); - - while (num > 47 && num < 58) - { - containsDigitsAsPrefix = true; - answer = answer * 10 + num - 48; - index++; - if (index >= textLength - 1) break; - num = text[index]; - } - - if (index >= textLength) goto Done; - num = text[index]; - if (num > 47 && num < 58) - { - num -= 48; - if (WillOverFlow(answer, num, sign)) - { - goto FalseExit; - } - containsDigitsAsPrefix = true; - answer = answer * 10 + num; - index++; - } - - Done: - if (!containsDigitsAsPrefix) goto FalseExit; - bytesConsumed = index; - value = answer * sign; - return true; - - FalseExit: - bytesConsumed = 0; - value = 0; - return false; - } - - public static bool TryParseInt32_PREV(ReadOnlySpan text, out int value, out int bytesConsumed) - { - int sign = 1; - int index = 0; - if (text[0] == '-') - { - sign = -1; - index++; - } - else if (text[0] == '+') - { - index++; - } - - int textLength = text.Length; - int overflowLength = Int32OverflowLength + index; - if (textLength > overflowLength) textLength = overflowLength; - - int answer = 0; - int num = 0; - bool containsDigitsAsPrefix = false; - while (index < textLength - 1) - { - num = text[index] - 48; // '0' - if (!IsDigit(num)) - { - goto Done; - } - answer = answer * 10 + num; - containsDigitsAsPrefix = true; - index++; - } - - num = text[textLength - 1] - 48; // '0' - if (IsDigit(num)) - { - if (WillOverFlow(answer, num, sign)) goto FalseExit; - containsDigitsAsPrefix = true; - answer = answer * 10 + num; - index++; - } - - Done: - if (!containsDigitsAsPrefix) goto FalseExit; - bytesConsumed = index; - value = answer * sign; - return true; - - FalseExit: - bytesConsumed = 0; - value = default; - return false; - } - public static bool TryParseInt32_OLD(ReadOnlySpan text, out int value, out int bytesConsumed) { if (text.Length < 1) @@ -2781,9 +2326,15 @@ public unsafe static bool TryParseInt32(char* text, int length, out int value, o } public static bool TryParseInt32(ReadOnlySpan text, out int value) + { + return TryParseInt32(text, out value, out int charsConsumed); + } + + public static bool TryParseInt32(ReadOnlySpan text, out int value, out int charsConsumed) { if (text.Length < 1) { + charsConsumed = 0; value = default(int); return false; } @@ -2800,91 +2351,13 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value) indexOfFirstDigit = 1; } - int overflowLength = Int32OverflowLength + indexOfFirstDigit; - - // Parse the first digit separately. If invalid here, we need to return false. - int firstDigit = text[indexOfFirstDigit] - 48; // '0' - if (firstDigit < 0 || firstDigit > 9) - { - value = default(int); - return false; - } - int parsedValue = firstDigit; - - if (text.Length < overflowLength) - { - // Length is less than Int32OverflowLength; overflow is not possible - for (int index = indexOfFirstDigit + 1; index < text.Length; index++) - { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - value = parsedValue * sign; - return true; - } - parsedValue = parsedValue * 10 + nextDigit; - } - } - else - { - // Length is greater than Int32OverflowLength; overflow is only possible after Int32OverflowLength - // digits. There may be no overflow after Int32OverflowLength if there are leading zeroes. - for (int index = indexOfFirstDigit + 1; index < overflowLength - 1; index++) - { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - value = parsedValue * sign; - return true; - } - parsedValue = parsedValue * 10 + nextDigit; - } - for (int index = overflowLength - 1; index < text.Length; index++) - { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - value = parsedValue * sign; - return true; - } - // If parsedValue > (int.MaxValue / 10), any more appended digits will cause overflow. - // if parsedValue == (int.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. - bool positive = sign > 0; - bool nextDigitTooLarge = nextDigit > 8 || (positive && nextDigit > 7); - if (parsedValue > int.MaxValue / 10 || parsedValue == int.MaxValue / 10 && nextDigitTooLarge) - { - value = default(int); - return false; - } - parsedValue = parsedValue * 10 + nextDigit; - } - } - - value = parsedValue * sign; - return true; - } - - public static bool TryParseInt32(ReadOnlySpan text, out int value, out int charsConsumed) - { - if (text.Length < 1) + if (indexOfFirstDigit >= text.Length) { charsConsumed = 0; value = default(int); return false; } - int indexOfFirstDigit = 0; - int sign = 1; - if (text[0] == '-') - { - indexOfFirstDigit = 1; - sign = -1; - } - else if (text[0] == '+') - { - indexOfFirstDigit = 1; - } - int overflowLength = Int32OverflowLength + indexOfFirstDigit; // Parse the first digit separately. If invalid here, we need to return false. diff --git a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs index 5cba4755169..cc4607ed69e 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs @@ -55,7 +55,7 @@ private static bool WillOverFlow(int value, byte nextDigit, sbyte sign) bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); return (value > maxValueIntDiv10 || nextDigitTooLarge); } - + // If parsedValue > (long.MaxValue / 10), any more appended digits will cause overflow. // if parsedValue == (long.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -336,60 +336,215 @@ private static bool WillOverFlow(long value, int nextDigit, int sign) throw new NotImplementedException(String.Format("Format '{0}' not supported.", format.Symbol)); } - ref byte textByte = ref text.DangerousGetPinnableReference(); - if (!encoder.TryParseSymbol(ref textByte, out uint symbol, out int consumed)) - { - goto FalseExit; - } + int textLength = text.Length; + if (textLength < 1) goto FalseExit; - int sign = 1; + ref byte textByte = ref text.DangerousGetPinnableReference(); + if (!encoder.TryParseSymbol(ref textByte, out uint symbol, out int consumed)) goto FalseExit; + + sbyte sign = 1; int index = 0; if (symbol == (uint)TextEncoder.Symbol.MinusSign) { sign = -1; index += consumed; + if (index >= textLength) goto FalseExit; + if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed)) goto FalseExit; } else if (symbol == (uint)TextEncoder.Symbol.PlusSign) { index += consumed; + if (index >= textLength) goto FalseExit; + if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed)) goto FalseExit; } - int textLength = text.Length; - int overflowLength = Int32OverflowLength + index; - if (textLength > overflowLength) textLength = overflowLength; - int answer = 0; - bool containsDigitsAsPrefix = false; - while (index < textLength - 1) + if (symbol <= (uint)TextEncoder.Symbol.D9) { - if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed) || IsInvalid(symbol)) + int numBytes = consumed; + if (symbol == (uint)TextEncoder.Symbol.D0) { - goto Done; + do + { + index += consumed; + if (index >= textLength) goto Done; + if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed)) goto Done; + } while (symbol == (uint)TextEncoder.Symbol.D0); + if (IsInvalid(symbol)) goto Done; } - answer = answer * 10 + (int)symbol; - containsDigitsAsPrefix = true; - index += consumed; - } - if (encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, textLength - 1), out symbol, out consumed) && IsInvalid(symbol)) - { - if (WillOverFlow(answer, (int)symbol, sign)) goto FalseExit; - containsDigitsAsPrefix = true; - answer = answer * 10 + (int)symbol; - index += consumed; + int firstNonZeroDigitIndex = index; + if (textLength - firstNonZeroDigitIndex < Int32OverflowLength * numBytes) + { + do + { + answer = answer * 10 + (int)symbol - (int)TextEncoder.Symbol.D0; + index += consumed; + if (index >= textLength) goto Done; + if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed)) goto Done; + } while (symbol <= (uint)TextEncoder.Symbol.D9); + } + else + { + do + { + answer = answer * 10 + (int)symbol - (int)TextEncoder.Symbol.D0; + index += consumed; + if (index - firstNonZeroDigitIndex == Int32OverflowLength * numBytes - 1) + { + if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed)) goto Done; + if (symbol <= (uint)TextEncoder.Symbol.D9) + { + symbol -= (int)TextEncoder.Symbol.D0; + if (WillOverFlow(answer, (int)symbol, sign)) goto FalseExit; + answer = answer * 10 + (int)symbol; + index += consumed; + } + goto Done; + } + if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed)) goto Done; + } while (symbol <= (uint)TextEncoder.Symbol.D9); + } + goto Done; } + FalseExit: + bytesConsumed = 0; + value = 0; + return false; + Done: - if (!containsDigitsAsPrefix) goto FalseExit; bytesConsumed = index; value = answer * sign; return true; + } - FalseExit: - bytesConsumed = 0; - value = default; - return false; + public static bool TryParseInt32_OLD(ReadOnlySpan text, out int value, out int bytesConsumed, TextFormat format = default(TextFormat), TextEncoder encoder = null) + { + encoder = encoder == null ? TextEncoder.Utf8 : encoder; + + if (!format.IsDefault && format.HasPrecision) + { + throw new NotImplementedException("Format with precision not supported."); + } + + if (encoder.IsInvariantUtf8) + { + if (format.IsHexadecimal) + { + return InvariantUtf8.Hex.TryParseInt32(text, out value, out bytesConsumed); + } + else + { + return InvariantUtf8.TryParseInt32(text, out value, out bytesConsumed); + } + } + else if (encoder.IsInvariantUtf16) + { + ReadOnlySpan utf16Text = text.NonPortableCast(); + int charsConsumed; + bool result; + if (format.IsHexadecimal) + { + result = InvariantUtf16.Hex.TryParseInt32(utf16Text, out value, out charsConsumed); + } + else + { + result = InvariantUtf16.TryParseInt32(utf16Text, out value, out charsConsumed); + } + bytesConsumed = charsConsumed * sizeof(char); + return result; + } + + if (format.IsHexadecimal) + { + throw new NotImplementedException("The only supported encodings for hexadecimal parsing are InvariantUtf8 and InvariantUtf16."); + } + + if (!(format.IsDefault || format.Symbol == 'G' || format.Symbol == 'g')) + { + throw new NotImplementedException(String.Format("Format '{0}' not supported.", format.Symbol)); + } + + if (text.Length < 1) + { + bytesConsumed = 0; + value = default(int); + return false; + } + + uint nextSymbol; + int thisSymbolConsumed; + if (!encoder.TryParseSymbol(text, out nextSymbol, out thisSymbolConsumed)) + { + value = default(int); + bytesConsumed = 0; + return false; + } + + int sign = 1; + if ((TextEncoder.Symbol)nextSymbol == TextEncoder.Symbol.MinusSign) + { + sign = -1; + } + + int signConsumed = 0; + if ((TextEncoder.Symbol)nextSymbol == TextEncoder.Symbol.PlusSign || (TextEncoder.Symbol)nextSymbol == TextEncoder.Symbol.MinusSign) + { + signConsumed = thisSymbolConsumed; + if (signConsumed >= text.Length) + { + bytesConsumed = 0; + value = default(int); + return false; + } + if (!encoder.TryParseSymbol(text.Slice(signConsumed), out nextSymbol, out thisSymbolConsumed)) + { + value = default(int); + bytesConsumed = 0; + return false; + } + } + + if (nextSymbol > 9) + { + value = default(int); + bytesConsumed = 0; + return false; + } + + int parsedValue = (int)nextSymbol; + int index = signConsumed + thisSymbolConsumed; + + while (index < text.Length) + { + bool success = encoder.TryParseSymbol(text.Slice(index), out nextSymbol, out thisSymbolConsumed); + if (!success || nextSymbol > 9) + { + bytesConsumed = index; + value = (int)(parsedValue * sign); + return true; + } + + // If parsedValue > (int.MaxValue / 10), any more appended digits will cause overflow. + // if parsedValue == (int.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. + bool positive = sign > 0; + bool nextDigitTooLarge = nextSymbol > 8 || (positive && nextSymbol > 7); + if (parsedValue > int.MaxValue / 10 || (parsedValue == int.MaxValue / 10 && nextDigitTooLarge)) + { + bytesConsumed = 0; + value = default(int); + return false; + } + + index += thisSymbolConsumed; + parsedValue = parsedValue * 10 + (int)nextSymbol; + } + + bytesConsumed = text.Length; + value = (int)(parsedValue * sign); + return true; } public static bool TryParseInt64(ReadOnlySpan text, out long value, out int bytesConsumed, TextFormat format = default(TextFormat), TextEncoder encoder = null) diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs index c4cc7e22a5f..cb662cb9d33 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs @@ -119,7 +119,7 @@ private static void DoNotIgnore(int value, int consumed) [InlineData("+2147")] [InlineData("+214748")] [InlineData("+2147483")] - [InlineData("+2147483648")] + [InlineData("+2147483647")] [InlineData("+214")] [InlineData("000000000000000000001235abcdfg")] [InlineData("214748364abcdefghijklmnop")] @@ -148,7 +148,7 @@ private static void DoNotIgnore(int value, int consumed) [InlineData("+2147abcdefghijklmnop")] [InlineData("+214748abcdefghijklmnop")] [InlineData("+2147483abcdefghijklmnop")] - [InlineData("+2147483648abcdefghijklmnop")] + [InlineData("+2147483647abcdefghijklmnop")] [InlineData("+214abcdefghijklmnop")] private static void PrimitiveParserByteSpanToInt32_BytesConsumed(string text) { @@ -303,7 +303,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength( [InlineData("+2147")] [InlineData("+214748")] [InlineData("+2147483")] - [InlineData("+2147483648")] + [InlineData("+2147483647")] [InlineData("+214")] [InlineData("000000000000000000001235abcdfg")] [InlineData("214748364abcdefghijklmnop")] @@ -332,7 +332,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength( [InlineData("+2147abcdefghijklmnop")] [InlineData("+214748abcdefghijklmnop")] [InlineData("+2147483abcdefghijklmnop")] - [InlineData("+2147483648abcdefghijklmnop")] + [InlineData("+2147483647abcdefghijklmnop")] [InlineData("+214abcdefghijklmnop")] private static void PrimitiveParserByteSpanToInt32_BytesConsumed_BASE(string text) { @@ -379,7 +379,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_ [InlineData("107374182")] // standard parse [InlineData("2147483647")] // max value [InlineData("0")] - [InlineData("-2147483648")] // min value + [InlineData("-2147483647")] // min value [InlineData("214748364")] [InlineData("2")] [InlineData("21474836")] @@ -406,7 +406,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_ [InlineData("+2147")] [InlineData("+214748")] [InlineData("+2147483")] - [InlineData("+2147483648")] + [InlineData("+2147483647")] [InlineData("+214")] [InlineData("000000000000000000001235abcdfg")] [InlineData("214748364abcdefghijklmnop")] @@ -435,7 +435,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_ [InlineData("+2147abcdefghijklmnop")] [InlineData("+214748abcdefghijklmnop")] [InlineData("+2147483abcdefghijklmnop")] - [InlineData("+2147483648abcdefghijklmnop")] + [InlineData("+2147483647abcdefghijklmnop")] [InlineData("+214abcdefghijklmnop")] private static void PrimitiveParserByteSpanToInt32_BytesConsumed_OLD(string text) { @@ -479,12 +479,68 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_ } } - /*[Benchmark(InnerIterationCount = InnerCount)] - [InlineData("๑๐๗๓๗๔๑๘๒")] // standard parse - [InlineData("๒๑๔๗๔๘๓๖๔๗")] // max value + //[Benchmark(InnerIterationCount = InnerCount)] + [InlineData("๑๐๗๓๗๔๑๘๒")] + [InlineData("๒๑๔๗๔๘๓๖๔๗")] [InlineData("๐")] - [InlineData("๑๐๗")] - [InlineData("ลบ๒๑๔๗๔๘๓๖๔๘")] // min value + [InlineData("ลบ๒๑๔๗๔๘๓๖๔๘")] + [InlineData("๒๑๔๗๔๘๓๖๔")] + [InlineData("๒")] + [InlineData("๒๑๔๗๔๘๓๖")] + [InlineData("ลบ๒๑๔๗๔")] + [InlineData("๒๑๔๗๔")] + [InlineData("ลบ๒๑")] + [InlineData("ลบ๒")] + [InlineData("๒๑๔")] + [InlineData("ลบ๒๑๔๗๔๘๓๖")] + [InlineData("ลบ๒๑๔๗๔๘๓๖๔")] + [InlineData("๒๑๔๗")] + [InlineData("ลบ๒๑๔๗")] + [InlineData("ลบ๒๑๔๗๔๘")] + [InlineData("ลบ๒๑๔๗๔๘๓")] + [InlineData("๒๑๔๗๔๘")] + [InlineData("๒๑")] + [InlineData("๒๑๔๗๔๘๓")] + [InlineData("ลบ๒๑๔")] + [InlineData("+๒๑๔๗๔")] + [InlineData("+๒๑")] + [InlineData("+๒")] + [InlineData("+๒๑๔๗๔๘๓๖")] + [InlineData("+๒๑๔๗๔๘๓๖๔")] + [InlineData("+๒๑๔๗")] + [InlineData("+๒๑๔๗๔๘")] + [InlineData("+๒๑๔๗๔๘๓")] + [InlineData("+๒๑๔๗๔๘๓๖๔๗")] + [InlineData("+๒๑๔")] + [InlineData("๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๑๒๓๕abcdfg")] + [InlineData("๒๑๔๗๔๘๓๖๔abcdefghijklmnop")] + [InlineData("๒abcdefghijklmnop")] + [InlineData("๒๑๔๗๔๘๓๖abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗๔abcdefghijklmnop")] + [InlineData("๒๑๔๗๔abcdefghijklmnop")] + [InlineData("ลบ๒๑abcdefghijklmnop")] + [InlineData("ลบ๒abcdefghijklmnop")] + [InlineData("๒๑๔abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗๔๘๓๖abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗๔๘๓๖๔abcdefghijklmnop")] + [InlineData("๒๑๔๗abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗๔๘abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗๔๘๓abcdefghijklmnop")] + [InlineData("๒๑๔๗๔๘abcdefghijklmnop")] + [InlineData("๒๑abcdefghijklmnop")] + [InlineData("๒๑๔๗๔๘๓abcdefghijklmnop")] + [InlineData("ลบ๒๑๔abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔abcdefghijklmnop")] + [InlineData("+๒๑abcdefghijklmnop")] + [InlineData("+๒abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔๘๓๖abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔๘๓๖๔abcdefghijklmnop")] + [InlineData("+๒๑๔๗abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔๘abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔๘๓abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔๘๓๖๔๗abcdefghijklmnop")] + [InlineData("+๒๑๔abcdefghijklmnop")] public unsafe void ParseInt32Thai(string text) { ReadOnlySpan utf8Span = UtfEncode(text, false); @@ -499,6 +555,107 @@ public unsafe void ParseInt32Thai(string text) } } } - }*/ + } + + //[Benchmark(InnerIterationCount = InnerCount)] + [InlineData("๑๐๗๓๗๔๑๘๒")] + [InlineData("๒๑๔๗๔๘๓๖๔๗")] + [InlineData("๐")] + [InlineData("ลบ๒๑๔๗๔๘๓๖๔๘")] + [InlineData("๒๑๔๗๔๘๓๖๔")] + [InlineData("๒")] + [InlineData("๒๑๔๗๔๘๓๖")] + [InlineData("ลบ๒๑๔๗๔")] + [InlineData("๒๑๔๗๔")] + [InlineData("ลบ๒๑")] + [InlineData("ลบ๒")] + [InlineData("๒๑๔")] + [InlineData("ลบ๒๑๔๗๔๘๓๖")] + [InlineData("ลบ๒๑๔๗๔๘๓๖๔")] + [InlineData("๒๑๔๗")] + [InlineData("ลบ๒๑๔๗")] + [InlineData("ลบ๒๑๔๗๔๘")] + [InlineData("ลบ๒๑๔๗๔๘๓")] + [InlineData("๒๑๔๗๔๘")] + [InlineData("๒๑")] + [InlineData("๒๑๔๗๔๘๓")] + [InlineData("ลบ๒๑๔")] + [InlineData("+๒๑๔๗๔")] + [InlineData("+๒๑")] + [InlineData("+๒")] + [InlineData("+๒๑๔๗๔๘๓๖")] + [InlineData("+๒๑๔๗๔๘๓๖๔")] + [InlineData("+๒๑๔๗")] + [InlineData("+๒๑๔๗๔๘")] + [InlineData("+๒๑๔๗๔๘๓")] + [InlineData("+๒๑๔๗๔๘๓๖๔๗")] + [InlineData("+๒๑๔")] + [InlineData("๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๑๒๓๕abcdfg")] + [InlineData("๒๑๔๗๔๘๓๖๔abcdefghijklmnop")] + [InlineData("๒abcdefghijklmnop")] + [InlineData("๒๑๔๗๔๘๓๖abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗๔abcdefghijklmnop")] + [InlineData("๒๑๔๗๔abcdefghijklmnop")] + [InlineData("ลบ๒๑abcdefghijklmnop")] + [InlineData("ลบ๒abcdefghijklmnop")] + [InlineData("๒๑๔abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗๔๘๓๖abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗๔๘๓๖๔abcdefghijklmnop")] + [InlineData("๒๑๔๗abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗๔๘abcdefghijklmnop")] + [InlineData("ลบ๒๑๔๗๔๘๓abcdefghijklmnop")] + [InlineData("๒๑๔๗๔๘abcdefghijklmnop")] + [InlineData("๒๑abcdefghijklmnop")] + [InlineData("๒๑๔๗๔๘๓abcdefghijklmnop")] + [InlineData("ลบ๒๑๔abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔abcdefghijklmnop")] + [InlineData("+๒๑abcdefghijklmnop")] + [InlineData("+๒abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔๘๓๖abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔๘๓๖๔abcdefghijklmnop")] + [InlineData("+๒๑๔๗abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔๘abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔๘๓abcdefghijklmnop")] + [InlineData("+๒๑๔๗๔๘๓๖๔๗abcdefghijklmnop")] + [InlineData("+๒๑๔abcdefghijklmnop")] + public unsafe void ParseInt32Thai_OLD(string text) + { + ReadOnlySpan utf8Span = UtfEncode(text, false); + foreach (var iteration in Benchmark.Iterations) + { + using (iteration.StartMeasurement()) + { + for (int i = 0; i < Benchmark.InnerIterationCount; i++) + { + PrimitiveParser.TryParseInt32_OLD(utf8Span, out int value, out int bytesConsumed, 'G', s_thaiEncoder); + DoNotIgnore(value, bytesConsumed); + } + } + } + } + + static byte[][] s_thaiUtf8DigitsAndSymbols = new byte[][] +{ + new byte[] { 0xe0, 0xb9, 0x90 }, new byte[] { 0xe0, 0xb9, 0x91 }, new byte[] { 0xe0, 0xb9, 0x92 }, + new byte[] { 0xe0, 0xb9, 0x93 }, new byte[] { 0xe0, 0xb9, 0x94 }, new byte[] { 0xe0, 0xb9, 0x95 }, new byte[] { 0xe0, 0xb9, 0x96 }, + new byte[] { 0xe0, 0xb9, 0x97 }, new byte[] { 0xe0, 0xb9, 0x98 }, new byte[] { 0xe0, 0xb9, 0x99 }, new byte[] { 0xE0, 0xB8, 0x88, 0xE0, 0xB8, 0x94 }, null, + new byte[] { 0xE0, 0xB8, 0xAA, 0xE0, 0xB8, 0xB4, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x87, 0xE0, 0xB8, 0x97, 0xE0, 0xB8, 0xB5, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x83, + 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0x8D, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x82, 0xE0, 0xB8, 0x95, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0xA5, 0xE0, + 0xB8, 0xB7, 0xE0, 0xB8, 0xAD, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0x81, 0xE0, 0xB8, 0xB4, 0xE0, 0xB8, 0x99 }, + new byte[] { 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x9A }, new byte[] { 43 }, new byte[] { 0xE0, 0xB9, 0x84, 0xE0, 0xB8, 0xA1, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, + 0x83, 0xE0, 0xB8, 0x8A, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x95, 0xE0, 0xB8, 0xB1, 0xE0, 0xB8, 0xA7, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x82 }, + new byte[] { 69 }, new byte[] { 101 }, +}; + + static TextEncoder s_thaiEncoder = TextEncoder.CreateUtf8Encoder(s_thaiUtf8DigitsAndSymbols); + + private byte[] UtfEncode(string s, bool utf16) + { + if (utf16) + return Text.Encoding.Unicode.GetBytes(s); + else + return Text.Encoding.UTF8.GetBytes(s); + } } } diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs index ef1b30d6b0d..44e82fe3f1a 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs @@ -1149,40 +1149,6 @@ public unsafe void ParseInt16Hex(string text, bool expectSuccess, short expected #region int [Theory] - /*[InlineData("a1", true, 0, 0)] - [InlineData("1", true, 1, 1)] - [InlineData("-1", true, -1, 2)] - [InlineData("11", true, 11, 2)] - [InlineData("-11", true, -11, 3)] - [InlineData("00a0", true, 0, 2)] - [InlineData("00a", true, 0, 2)] - [InlineData("111", true, 111, 3)] - [InlineData("492206507abcdefg", true, 492206507, 9)] - [InlineData("2147483647", true, 2147483647, 10)] // max - [InlineData("-2147483648", true, -2147483648, 11)] // min - [InlineData("-A", true, 0, 0)] // invalid character after a sign - [InlineData("I am 1", true, 0, 0)] // invalid character test - [InlineData(" !", true, 0, 0)] // invalid character test w/ char < '0' - [InlineData("2147483648", true, 214748364, 9)] // positive overflow test - [InlineData("-2147483649", true, -214748364, 10)] // negative overflow test - [InlineData("0", true, 0, 1)] - [InlineData("+1", true, 1, 2)] - [InlineData("+2147483647", true, 2147483647, 11)] - [InlineData("as3gf31t`2c", true, 0, 0)] - [InlineData("agbagbagb5", true, 0, 0)] - [InlineData("1faag", true, 1, 1)] - [InlineData("-1sdg", true, -1, 2)] - [InlineData("-afsagsag4", true, 0, 0)] - [InlineData("+a", true, 0, 0)] - [InlineData("-000012345abcdefg1", true, -12345, 10)] - [InlineData("+000012345abcdefg1", true, 12345, 10)] - [InlineData("000012345abcdefg1", true, 12345, 9)] - [InlineData("0000001234145abcdefg1", true, 1234145, 13)] - [InlineData("+", true, 0, 0)] - [InlineData("-", true, 0, 0)] - [InlineData("", true, 0, 0)] - [InlineData("5", true, 5, 1)] - [InlineData("^", true, 0, 0)]*/ [InlineData("a1", false, 0, 0)] [InlineData("1", true, 1, 1)] [InlineData("-1", true, -1, 2)] @@ -1218,6 +1184,10 @@ public unsafe void ParseInt16Hex(string text, bool expectSuccess, short expected [InlineData("5", true, 5, 1)] [InlineData("^", false, 0, 0)] [InlineData("41474836482145", false, 0, 0)] + [InlineData("02147483647", true, 2147483647, 11)] // max + [InlineData("-02147483648", true, -2147483648, 12)] // min + [InlineData("02147483648", false, 0, 0)] // positive overflow test + [InlineData("-02147483649", false, 0, 0)] // negative overflow test public unsafe void ParseInt32Dec(string text, bool expectSuccess, int expectedValue, int expectedConsumed) { int parsedValue; @@ -1229,57 +1199,122 @@ public unsafe void ParseInt32Dec(string text, bool expectSuccess, int expectedVa char[] textChars = utf16CharSpan.ToArray(); bool result; + result = PrimitiveParser.TryParseInt32(utf8Span, out parsedValue, out consumed, 'G', TextEncoder.Utf8); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + Assert.Equal(expectedConsumed, consumed); - result = PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(utf8Span, out parsedValue, out consumed); + result = PrimitiveParser.TryParseInt32(utf8Span, out parsedValue, out consumed); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); Assert.Equal(expectedConsumed, consumed); - } - private static readonly string[] s_Int32TextArray = new string[20] - { - "214748364", - "2", - "21474836", - "-21474", - "21474", - "-21", - "-2", - "214", - "-21474836", - "-214748364", - "2147", - "-2147", - "-214748", - "-2147483", - "214748", - "-2147483648", - "2147483647", - "21", - "2147483", - "-214" - }; + result = PrimitiveParser.InvariantUtf8.TryParseInt32(utf8Span, out parsedValue); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); - [Fact] - private static void ParseInt32DecVariableLength() - { - int textLength = s_Int32TextArray.Length; - byte[][] utf8ByteArray = (byte[][])Array.CreateInstance(typeof(byte[]), textLength); - for (var i = 0; i < textLength; i++) + result = PrimitiveParser.InvariantUtf8.TryParseInt32(utf8Span, out parsedValue, out consumed); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + Assert.Equal(expectedConsumed, consumed); + + fixed (byte* arrayPointer = textBytes) { - utf8ByteArray[i] = Text.Encoding.UTF8.GetBytes(s_Int32TextArray[i]); + result = PrimitiveParser.InvariantUtf8.TryParseInt32(arrayPointer, textBytes.Length, out parsedValue); + + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + + result = PrimitiveParser.InvariantUtf8.TryParseInt32(arrayPointer, textBytes.Length, out parsedValue, out consumed); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + Assert.Equal(expectedConsumed, consumed); } - for (int i = 0; i < 100; i++) + result = PrimitiveParser.TryParseInt32(utf16ByteSpan, out parsedValue, out consumed, 'G', TextEncoder.Utf16); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + Assert.Equal(expectedConsumed * sizeof(char), consumed); + + result = PrimitiveParser.InvariantUtf16.TryParseInt32(utf16CharSpan, out parsedValue); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + + result = PrimitiveParser.InvariantUtf16.TryParseInt32(utf16CharSpan, out parsedValue, out consumed); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + Assert.Equal(expectedConsumed, consumed); + + fixed (char* arrayPointer = textChars) { - ReadOnlySpan utf8ByteSpan = utf8ByteArray[i % textLength]; - PrimitiveParser.InvariantUtf8.TryParseInt32(utf8ByteSpan, out int value, out int bytesConsumed); + result = PrimitiveParser.InvariantUtf16.TryParseInt32(arrayPointer, textBytes.Length, out parsedValue); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + + result = PrimitiveParser.InvariantUtf16.TryParseInt32(arrayPointer, textBytes.Length, out parsedValue, out consumed); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + Assert.Equal(expectedConsumed, consumed); } } + + [Theory] + [InlineData("2", true, 2, 1)] + [InlineData("21", true, 21, 2)] + [InlineData("214", true, 214, 3)] + [InlineData("2147", true, 2147, 4)] + [InlineData("21474", true, 21474, 5)] + [InlineData("214748", true, 214748, 6)] + [InlineData("2147483", true, 2147483, 7)] + [InlineData("21474836", true, 21474836, 8)] + [InlineData("214748364", true, 214748364, 9)] + [InlineData("2147483647", true, 2147483647, 10)] + [InlineData("+2", true, 2, 2)] + [InlineData("+21", true, 21, 3)] + [InlineData("+214", true, 214, 4)] + [InlineData("+2147", true, 2147, 5)] + [InlineData("+21474", true, 21474, 6)] + [InlineData("+214748", true, 214748, 7)] + [InlineData("+2147483", true, 2147483, 8)] + [InlineData("+21474836", true, 21474836, 9)] + [InlineData("+214748364", true, 214748364, 10)] + [InlineData("+2147483647", true, 2147483647, 11)] + [InlineData("-2", true, -2, 2)] + [InlineData("-21", true, -21, 3)] + [InlineData("-214", true, -214, 4)] + [InlineData("-2147", true, -2147, 5)] + [InlineData("-21474", true, -21474, 6)] + [InlineData("-214748", true, -214748, 7)] + [InlineData("-2147483", true, -2147483, 8)] + [InlineData("-21474836", true, -21474836, 9)] + [InlineData("-214748364", true, -214748364, 10)] + [InlineData("-2147483647", true, -2147483647, 11)] + private void ParseInt32VariableLength(string text, bool expectSuccess, int expectedValue, int expectedConsumed) + { + ReadOnlySpan utf8Span = UtfEncode(text, false); + bool result = PrimitiveParser.InvariantUtf8.TryParseInt32(utf8Span, out int parsedValue, out int consumed); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + Assert.Equal(expectedConsumed, consumed); + } - [Fact] - public unsafe void ParseInt32Dec2() + //[Theory] + [InlineData("0", true, 0, int.MaxValue)] + [InlineData("2", true, 2, int.MaxValue)] + [InlineData("21", true, 21, int.MaxValue)] + [InlineData("+2", true, 2, int.MaxValue)] + [InlineData("-2", true, -2, int.MaxValue)] + [InlineData("2147483647", true, 2147483647, int.MaxValue)] // max + [InlineData("-2147483648", true, -2147483648, int.MaxValue)] // min + [InlineData("2147483648", false, 0, 0)] // positive overflow test + [InlineData("-2147483649", false, 0, 0)] // negative overflow test + [InlineData("12345abcdefg1", true, 12345, int.MaxValue - 8)] + [InlineData("1234145abcdefg1", true, 1234145, int.MaxValue - 8)] + [InlineData("abcdefghijklmnop1", true, 0, int.MaxValue - 17)] + public unsafe void ParseInt32OverflowCheck(string text, bool expectSuccess, int expectedValue, int expectedConsumed) { + ReadOnlySpan utf8Span = UtfEncode(text, false); + const int TwoGiB = int.MaxValue; unsafe @@ -1291,18 +1326,20 @@ public unsafe void ParseInt32Dec2() { ref byte memory = ref Unsafe.AsRef(memBlock.ToPointer()); var span = new Span(memBlock.ToPointer(), TwoGiB); + span.Fill(48); - for (int i = 0; i < TwoGiB - 2; i++) + byte sign = utf8Span[0]; + if (sign == '-' || sign == '+') { - span[i] = 48; + span[0] = sign; + utf8Span = utf8Span.Slice(1); } - span[TwoGiB - 1] = 49; - span[TwoGiB - 2] = 49; + utf8Span.CopyTo(span.Slice(TwoGiB - utf8Span.Length)); - bool result = PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(span, out int parsedValue, out int consumed); - Assert.Equal(true, result); - Assert.Equal(11, parsedValue); - Assert.Equal(TwoGiB, consumed); + bool result = PrimitiveParser.InvariantUtf8.TryParseInt32(span, out int parsedValue, out int consumed); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + Assert.Equal(expectedConsumed, consumed); } finally { @@ -1311,70 +1348,154 @@ public unsafe void ParseInt32Dec2() } } - - [Fact] - private static void ParseTestNew() + [Theory] + [InlineData("๑๑๑", true, 111, 9)] + [InlineData("๕๖กขฃคฅฆง", true, 56, 6)] + [InlineData("๑๒๗", true, 127, 9)] // max + [InlineData("ลบ๑๒๘", true, -128, 15)] // min + [InlineData("ลบA", false, 0, 0)] // invalid character after a sign + [InlineData("I am ๑", false, 0, 0)] // invalid character test + [InlineData(" !", false, 0, 0)] // invalid character test w/ char < '0' + [InlineData("ลป๑", false, 0, 0)] + [InlineData("๑๐๗๓๗๔๑๘๒", true, 107374182, 9 * 3)] + [InlineData("๒๑๔๗๔๘๓๖๔๗", true, 2147483647, 10 * 3)] + [InlineData("๐๒๑๔๗๔๘๓๖๔๗", true, 2147483647, 11 * 3)] + [InlineData("๐", true, 0, 1 * 3)] + [InlineData("ลบ๒๑๔๗๔๘๓๖๔๘", true, -2147483648, 10 * 3 + 6)] + [InlineData("ลบ๐๒๑๔๗๔๘๓๖๔๘", true, -2147483648, 11 * 3 + 6)] + [InlineData("๒๑๔๗๔๘๓๖๔", true, 214748364, 9 * 3)] + [InlineData("๒", true, 2, 1 * 3)] + [InlineData("๒๑๔๗๔๘๓๖", true, 21474836, 8 * 3)] + [InlineData("ลบ๒๑๔๗๔", true, -21474, 5 * 3 + 6)] + [InlineData("๒๑๔๗๔", true, 21474, 5 * 3)] + [InlineData("ลบ๒๑", true, -21, 2 * 3 + 6)] + [InlineData("ลบ๒", true, -2, 1 * 3 + 6)] + [InlineData("๒๑๔", true, 214, 3 * 3)] + [InlineData("ลบ๒๑๔๗๔๘๓๖", true, -21474836, 8 * 3 + 6)] + [InlineData("ลบ๒๑๔๗๔๘๓๖๔", true, -214748364, 9 * 3 + 6)] + [InlineData("๒๑๔๗", true, 2147, 4 * 3)] + [InlineData("ลบ๒๑๔๗", true, -2147, 4 * 3 + 6)] + [InlineData("ลบ๒๑๔๗๔๘", true, -214748, 6 * 3 + 6)] + [InlineData("ลบ๒๑๔๗๔๘๓", true, -2147483, 7 * 3 + 6)] + [InlineData("๒๑๔๗๔๘", true, 214748, 6 * 3)] + [InlineData("๒๑", true, 21, 2 * 3)] + [InlineData("๒๑๔๗๔๘๓", true, 2147483, 7 * 3)] + [InlineData("ลบ๒๑๔", true, -214, 3 * 3 + 6)] + [InlineData("+๒๑๔๗๔", true, 21474, 5 * 3 + 1)] + [InlineData("+๒๑", true, 21, 2 * 3 + 1)] + [InlineData("+๒", true, 2, 1 * 3 + 1)] + [InlineData("+๒๑๔๗๔๘๓๖", true, 21474836, 8 * 3 + 1)] + [InlineData("+๒๑๔๗๔๘๓๖๔", true, 214748364, 9 * 3 + 1)] + [InlineData("+๒๑๔๗", true, 2147, 4 * 3 + 1)] + [InlineData("+๒๑๔๗๔๘", true, 214748, 6 * 3 + 1)] + [InlineData("+๒๑๔๗๔๘๓", true, 2147483, 7 * 3 + 1)] + [InlineData("+๒๑๔๗๔๘๓๖๔๗", true, 2147483647, 10 * 3 + 1)] + [InlineData("+๒๑๔", true, 214, 3 * 3 + 1)] + [InlineData("๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๑๒๓๕abcdfg", true, 1235, 24 * 3)] + [InlineData("๒๑๔๗๔๘๓๖๔abcdefghijklmnop", true, 214748364, 9 * 3)] + [InlineData("๒abcdefghijklmnop", true, 2, 1 * 3)] + [InlineData("๒๑๔๗๔๘๓๖abcdefghijklmnop", true, 21474836, 8 * 3)] + [InlineData("ลบ๒๑๔๗๔abcdefghijklmnop", true, -21474, 5 * 3 + 6)] + [InlineData("๒๑๔๗๔abcdefghijklmnop", true, 21474, 5 * 3)] + [InlineData("ลบ๒๑abcdefghijklmnop", true, -21, 2 * 3 + 6)] + [InlineData("ลบ๒abcdefghijklmnop", true, -2, 1 * 3 + 6)] + [InlineData("๒๑๔abcdefghijklmnop", true, 214, 3 * 3)] + [InlineData("ลบ๒๑๔๗๔๘๓๖abcdefghijklmnop", true, -21474836, 8 * 3 + 6)] + [InlineData("ลบ๒๑๔๗๔๘๓๖๔abcdefghijklmnop", true, -214748364, 9 * 3 + 6)] + [InlineData("๒๑๔๗abcdefghijklmnop", true, 2147, 4 * 3)] + [InlineData("ลบ๒๑๔๗abcdefghijklmnop", true, -2147, 4 * 3 + 6)] + [InlineData("ลบ๒๑๔๗๔๘abcdefghijklmnop", true, -214748, 6 * 3 + 6)] + [InlineData("ลบ๒๑๔๗๔๘๓abcdefghijklmnop", true, -2147483, 7 * 3 + 6)] + [InlineData("๒๑๔๗๔๘abcdefghijklmnop", true, 214748, 6 * 3)] + [InlineData("๒๑abcdefghijklmnop", true, 21, 2 * 3)] + [InlineData("๒๑๔๗๔๘๓abcdefghijklmnop", true, 2147483, 7 * 3)] + [InlineData("ลบ๒๑๔abcdefghijklmnop", true, -214, 3 * 3 + 6)] + [InlineData("+๒๑๔๗๔abcdefghijklmnop", true, 21474, 5 * 3 + 1)] + [InlineData("+๒๑abcdefghijklmnop", true, 21, 2 * 3 + 1)] + [InlineData("+๒abcdefghijklmnop", true, 2, 1 * 3 + 1)] + [InlineData("+๒๑๔๗๔๘๓๖abcdefghijklmnop", true, 21474836, 8 * 3 + 1)] + [InlineData("+๒๑๔๗๔๘๓๖๔abcdefghijklmnop", true, 214748364, 9 * 3 + 1)] + [InlineData("+๒๑๔๗abcdefghijklmnop", true, 2147, 4 * 3 + 1)] + [InlineData("+๒๑๔๗๔๘abcdefghijklmnop", true, 214748, 6 * 3 + 1)] + [InlineData("+๒๑๔๗๔๘๓abcdefghijklmnop", true, 2147483, 7 * 3 + 1)] + [InlineData("+๒๑๔๗๔๘๓๖๔๗abcdefghijklmnop", true, 2147483647, 10 * 3 + 1)] + [InlineData("+๒๑๔abcdefghijklmnop", true, 214, 3 * 3 + 1)] + [InlineData("๐๐a๐", true, 0, 6)] + [InlineData("๐๐a", true, 0, 6)] + [InlineData("", false, 0, 0)] + [InlineData("+", false, 0, 0)] + [InlineData("ลบ", false, 0, 0)] + public unsafe void ParseInt32Thai(string text, bool expectSuccess, int expectedValue, int expectedConsumed) { - string text = GenerateRandomDigitString(); - byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); - var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); + ReadOnlySpan utf8Span = UtfEncode(text, false); + bool result; - int final = 0; - int totalConsumed = 0; - while (totalConsumed < utf8ByteSpan.Length) - { - PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(utf8ByteSpan.Slice(totalConsumed), out int value, out int bytesConsumed); - totalConsumed += bytesConsumed; - final |= value; - } - Assert.Equal(-1, final); + result = PrimitiveParser.TryParseInt32(utf8Span, out int parsedValue, out int consumed, 'G', s_thaiEncoder); + + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + Assert.Equal(expectedConsumed, consumed); } - private static string GenerateRandomDigitString(int count = 1000) + //[Theory] + [InlineData("๐", true, 0, int.MaxValue)] + [InlineData("๒", true, 2, int.MaxValue)] + [InlineData("๒๑", true, 21, int.MaxValue)] + [InlineData("+๒", true, 2, int.MaxValue)] + [InlineData("ลบ๒", true, -2, int.MaxValue)] + [InlineData("๒๑๔๗๔๘๓๖๔๗", true, 2147483647, int.MaxValue)] // max + [InlineData("ลบ๒๑๔๗๔๘๓๖๔๘", true, -2147483648, int.MaxValue)] // min + [InlineData("๒๑๔๗๔๘๓๖๔๘", false, 0, 0)] // positive overflow test + [InlineData("ลบ๒๑๔๗๔๘๓๖๔๙", false, 0, 0)] // negative overflow test + [InlineData("๑๒๓๔๕abcdefg๑", true, 12345, int.MaxValue - 8)] + [InlineData("๑๒๓๔๑๔๕abcdefg๑", true, 1234145, int.MaxValue - 8)] + [InlineData("abcdefghijklmnop๑", true, 0, int.MaxValue - 17)] + public unsafe void ParseInt32ThaiOverflowCheck(string text, bool expectSuccess, int expectedValue, int expectedConsumed) { - Random rnd = new Random(42); - var builder = new StringBuilder(); + ReadOnlySpan utf8Span = UtfEncode(text, false); + + const int TwoGiB = int.MaxValue; - for (int j = 0; j < count; j++) + unsafe { - int sign = rnd.Next(0, 3); - if (sign == 1) builder.Append("+"); - if (sign == 2) builder.Append("-"); - var length = rnd.Next(1, 14); - for (int i = 0; i < length; i++) - { - int digit = rnd.Next(0, 10); - builder.Append(digit.ToString()); - } - } - return builder.ToString(); - } + if (!AllocationHelper.TryAllocNative((IntPtr)TwoGiB, out IntPtr memBlock)) + return; // It's not implausible to believe that a 2gb allocation will fail - if so, skip this test to avoid unnecessary test flakiness. + try + { + ref byte memory = ref Unsafe.AsRef(memBlock.ToPointer()); + var span = new Span(memBlock.ToPointer(), TwoGiB); + for (int i = 0; i < TwoGiB / 3; i ++) + { + span[i * 3] = 0xe0; + span[i * 3 + 1] = 0xb9; + span[i * 3 + 2] = 0x90; + } - [Theory] - [InlineData("๑๑๑", true, 0, 111, 9)] - [InlineData("เรื่องเหลวไหล๒๗", true, 39, 27, 6)] - [InlineData("๕๖กขฃคฅฆง", true, 0, 56, 6)] - [InlineData("ที่เล็กที่สุดของประเภทนี้คือลบ๑๒๘.", true, 84, -128, 15)] - [InlineData("ปล่อยให้พวกเขา ลบ๒๘ กินเค้ก", true, 43, -28, 12)] - [InlineData("๑๒๗", true, 0, 127, 9)] // max - [InlineData("ลบ๑๒๘", true, 0, -128, 15)] // min - [InlineData("ลบA", false, 0, 0, 0)] // invalid character after a sign - [InlineData("I am ๑", false, 0, 0, 0)] // invalid character test - [InlineData(" !", false, 0, 0, 0)] // invalid character test w/ char < '0' - [InlineData("ลป๑", false, 0, 0, 0)] // - public unsafe void ParseInt32Thai(string text, bool expectSuccess, int index, int expectedValue, int expectedConsumed) - { - int parsedValue; - int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - bool result; + byte sign = utf8Span[0]; + Span minusSpan = new byte[] { sign, 0xb8, 0xa5, 0xe0, 0xb8, 0x9a }; + if (sign == '+') + { + span[0] = sign; + utf8Span = utf8Span.Slice(1); + } + else if (span.StartsWith(minusSpan)) + { + utf8Span = utf8Span.Slice(6); + } - result = PrimitiveParser.TryParseInt32(utf8Span.Slice(index), out parsedValue, out consumed, 'G', s_thaiEncoder); + utf8Span.CopyTo(span.Slice(TwoGiB - utf8Span.Length)); - Assert.Equal(expectSuccess, result); - Assert.Equal(expectedValue, parsedValue); - Assert.Equal(expectedConsumed, consumed); + bool result = PrimitiveParser.TryParseInt32(span, out int parsedValue, out int consumed, 'G', s_thaiEncoder); + Assert.Equal(expectSuccess, result); + Assert.Equal(expectedValue, parsedValue); + Assert.Equal(expectedConsumed, consumed); + } + finally + { + AllocationHelper.ReleaseNative(ref memBlock); + } + } } [Theory] @@ -1547,7 +1668,7 @@ public unsafe void ParseInt64Dec(string text, bool expectSuccess, long expectedV [InlineData("ลบA", false, 0, 0, 0)] // invalid character after a sign [InlineData("I am ๑", false, 0, 0, 0)] // invalid character test [InlineData(" !", false, 0, 0, 0)] // invalid character test w/ char < '0' - [InlineData("ลป๑", false, 0, 0, 0)] // + [InlineData("ลป๑", false, 0, 0, 0)] public unsafe void ParseInt64Thai(string text, bool expectSuccess, int index, long expectedValue, int expectedConsumed) { long parsedValue; From c8f775d2c7260487efc652ad2bedc5f40bf1b92a Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Tue, 20 Jun 2017 22:41:17 -0700 Subject: [PATCH 05/16] Cleaning up functional and performance tests and removing old code. --- .../System/Text/Parsing/InvariantSigned.cs | 97 ------ .../System/Text/Parsing/Signed.cs | 143 +-------- .../Parsing/PrimitiveParserInt32PerfTests.cs | 300 +----------------- .../Parsing/PrimitiveParserIntegerTests.cs | 56 +--- .../TestHelper.cs | 40 +++ 5 files changed, 60 insertions(+), 576 deletions(-) diff --git a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs index 69af9d79674..d656b775771 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs @@ -988,103 +988,6 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int return true; } - public static bool TryParseInt32_OLD(ReadOnlySpan text, out int value, out int bytesConsumed) - { - if (text.Length < 1) - { - bytesConsumed = 0; - value = default(int); - return false; - } - - int indexOfFirstDigit = 0; - int sign = 1; - if (text[0] == '-') - { - indexOfFirstDigit = 1; - sign = -1; - } - else if (text[0] == '+') - { - indexOfFirstDigit = 1; - } - - if (indexOfFirstDigit >= text.Length) - { - bytesConsumed = 0; - value = default(int); - return false; - } - - int overflowLength = Int32OverflowLength + indexOfFirstDigit; - - // Parse the first digit separately. If invalid here, we need to return false. - int firstDigit = text[indexOfFirstDigit] - 48; // '0' - if (firstDigit < 0 || firstDigit > 9) - { - bytesConsumed = 0; - value = default(int); - return false; - } - int parsedValue = firstDigit; - - if (text.Length < overflowLength) - { - // Length is less than Int32OverflowLength; overflow is not possible - for (int index = indexOfFirstDigit + 1; index < text.Length; index++) - { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - bytesConsumed = index; - value = parsedValue * sign; - return true; - } - parsedValue = parsedValue * 10 + nextDigit; - } - } - else - { - // Length is greater than Int32OverflowLength; overflow is only possible after Int32OverflowLength - // digits. There may be no overflow after Int32OverflowLength if there are leading zeroes. - for (int index = indexOfFirstDigit + 1; index < overflowLength - 1; index++) - { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - bytesConsumed = index; - value = parsedValue * sign; - return true; - } - parsedValue = parsedValue * 10 + nextDigit; - } - for (int index = overflowLength - 1; index < text.Length; index++) - { - int nextDigit = text[index] - 48; // '0' - if (nextDigit < 0 || nextDigit > 9) - { - bytesConsumed = index; - value = parsedValue * sign; - return true; - } - // If parsedValue > (int.MaxValue / 10), any more appended digits will cause overflow. - // if parsedValue == (int.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. - bool positive = sign > 0; - bool nextDigitTooLarge = nextDigit > 8 || (positive && nextDigit > 7); - if (parsedValue > int.MaxValue / 10 || parsedValue == int.MaxValue / 10 && nextDigitTooLarge) - { - bytesConsumed = 0; - value = 0; - return false; - } - parsedValue = parsedValue * 10 + nextDigit; - } - } - - bytesConsumed = text.Length; - value = parsedValue * sign; - return true; - } #endregion #region Int64 diff --git a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs index cc4607ed69e..cfa57fed12d 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs @@ -24,9 +24,9 @@ private static bool IsDigit(int i) } [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool IsInvalid(uint i) + private static bool IsValid(uint i) { - return i > (uint)TextEncoder.Symbol.D9 || i < (uint)TextEncoder.Symbol.D0; + return i <= (uint)TextEncoder.Symbol.D9; } // If parsedValue > (sbyte.MaxValue / 10), any more appended digits will cause overflow. @@ -181,7 +181,6 @@ private static bool WillOverFlow(long value, int nextDigit, int sign) return true; } - public static bool TryParseInt16(ReadOnlySpan text, out short value, out int bytesConsumed, TextFormat format = default(TextFormat), TextEncoder encoder = null) { encoder = encoder == null ? TextEncoder.Utf8 : encoder; @@ -296,7 +295,6 @@ private static bool WillOverFlow(long value, int nextDigit, int sign) return true; } - public static bool TryParseInt32(ReadOnlySpan text, out int value, out int bytesConsumed, TextFormat format = default(TextFormat), TextEncoder encoder = null) { bool isDefault = format.IsDefault; @@ -360,7 +358,7 @@ private static bool WillOverFlow(long value, int nextDigit, int sign) } int answer = 0; - if (symbol <= (uint)TextEncoder.Symbol.D9) + if (IsValid(symbol)) { int numBytes = consumed; if (symbol == (uint)TextEncoder.Symbol.D0) @@ -371,7 +369,7 @@ private static bool WillOverFlow(long value, int nextDigit, int sign) if (index >= textLength) goto Done; if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed)) goto Done; } while (symbol == (uint)TextEncoder.Symbol.D0); - if (IsInvalid(symbol)) goto Done; + if (!IsValid(symbol)) goto Done; } int firstNonZeroDigitIndex = index; @@ -383,7 +381,7 @@ private static bool WillOverFlow(long value, int nextDigit, int sign) index += consumed; if (index >= textLength) goto Done; if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed)) goto Done; - } while (symbol <= (uint)TextEncoder.Symbol.D9); + } while (IsValid(symbol)); } else { @@ -394,7 +392,7 @@ private static bool WillOverFlow(long value, int nextDigit, int sign) if (index - firstNonZeroDigitIndex == Int32OverflowLength * numBytes - 1) { if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed)) goto Done; - if (symbol <= (uint)TextEncoder.Symbol.D9) + if (IsValid(symbol)) { symbol -= (int)TextEncoder.Symbol.D0; if (WillOverFlow(answer, (int)symbol, sign)) goto FalseExit; @@ -404,7 +402,7 @@ private static bool WillOverFlow(long value, int nextDigit, int sign) goto Done; } if (!encoder.TryParseSymbol(ref Unsafe.Add(ref textByte, index), out symbol, out consumed)) goto Done; - } while (symbol <= (uint)TextEncoder.Symbol.D9); + } while (IsValid(symbol)); } goto Done; } @@ -420,133 +418,6 @@ private static bool WillOverFlow(long value, int nextDigit, int sign) return true; } - public static bool TryParseInt32_OLD(ReadOnlySpan text, out int value, out int bytesConsumed, TextFormat format = default(TextFormat), TextEncoder encoder = null) - { - encoder = encoder == null ? TextEncoder.Utf8 : encoder; - - if (!format.IsDefault && format.HasPrecision) - { - throw new NotImplementedException("Format with precision not supported."); - } - - if (encoder.IsInvariantUtf8) - { - if (format.IsHexadecimal) - { - return InvariantUtf8.Hex.TryParseInt32(text, out value, out bytesConsumed); - } - else - { - return InvariantUtf8.TryParseInt32(text, out value, out bytesConsumed); - } - } - else if (encoder.IsInvariantUtf16) - { - ReadOnlySpan utf16Text = text.NonPortableCast(); - int charsConsumed; - bool result; - if (format.IsHexadecimal) - { - result = InvariantUtf16.Hex.TryParseInt32(utf16Text, out value, out charsConsumed); - } - else - { - result = InvariantUtf16.TryParseInt32(utf16Text, out value, out charsConsumed); - } - bytesConsumed = charsConsumed * sizeof(char); - return result; - } - - if (format.IsHexadecimal) - { - throw new NotImplementedException("The only supported encodings for hexadecimal parsing are InvariantUtf8 and InvariantUtf16."); - } - - if (!(format.IsDefault || format.Symbol == 'G' || format.Symbol == 'g')) - { - throw new NotImplementedException(String.Format("Format '{0}' not supported.", format.Symbol)); - } - - if (text.Length < 1) - { - bytesConsumed = 0; - value = default(int); - return false; - } - - uint nextSymbol; - int thisSymbolConsumed; - if (!encoder.TryParseSymbol(text, out nextSymbol, out thisSymbolConsumed)) - { - value = default(int); - bytesConsumed = 0; - return false; - } - - int sign = 1; - if ((TextEncoder.Symbol)nextSymbol == TextEncoder.Symbol.MinusSign) - { - sign = -1; - } - - int signConsumed = 0; - if ((TextEncoder.Symbol)nextSymbol == TextEncoder.Symbol.PlusSign || (TextEncoder.Symbol)nextSymbol == TextEncoder.Symbol.MinusSign) - { - signConsumed = thisSymbolConsumed; - if (signConsumed >= text.Length) - { - bytesConsumed = 0; - value = default(int); - return false; - } - if (!encoder.TryParseSymbol(text.Slice(signConsumed), out nextSymbol, out thisSymbolConsumed)) - { - value = default(int); - bytesConsumed = 0; - return false; - } - } - - if (nextSymbol > 9) - { - value = default(int); - bytesConsumed = 0; - return false; - } - - int parsedValue = (int)nextSymbol; - int index = signConsumed + thisSymbolConsumed; - - while (index < text.Length) - { - bool success = encoder.TryParseSymbol(text.Slice(index), out nextSymbol, out thisSymbolConsumed); - if (!success || nextSymbol > 9) - { - bytesConsumed = index; - value = (int)(parsedValue * sign); - return true; - } - - // If parsedValue > (int.MaxValue / 10), any more appended digits will cause overflow. - // if parsedValue == (int.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. - bool positive = sign > 0; - bool nextDigitTooLarge = nextSymbol > 8 || (positive && nextSymbol > 7); - if (parsedValue > int.MaxValue / 10 || (parsedValue == int.MaxValue / 10 && nextDigitTooLarge)) - { - bytesConsumed = 0; - value = default(int); - return false; - } - - index += thisSymbolConsumed; - parsedValue = parsedValue * 10 + (int)nextSymbol; - } - - bytesConsumed = text.Length; - value = (int)(parsedValue * sign); - return true; - } - public static bool TryParseInt64(ReadOnlySpan text, out long value, out int bytesConsumed, TextFormat format = default(TextFormat), TextEncoder encoder = null) { encoder = encoder == null ? TextEncoder.Utf8 : encoder; diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs index cb662cb9d33..0d78a2c3153 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs @@ -8,7 +8,7 @@ namespace System.Text.Primitives.Tests { - public class ParserPerfTests + public partial class PrimitiveParserPerfTests { private const int InnerCount = 10000; @@ -36,7 +36,7 @@ public class ParserPerfTests "-214" }; - //[Benchmark(InnerIterationCount = InnerCount)] + [Benchmark(InnerIterationCount = InnerCount)] [InlineData("107374182")] // standard parse [InlineData("2147483647")] // max value [InlineData("0")] @@ -59,7 +59,7 @@ private static void PrimitiveParserByteSpanToInt32(string text) } } - //[Benchmark(InnerIterationCount = InnerCount)] + [Benchmark(InnerIterationCount = InnerCount)] private static void PrimitiveParserByteSpanToInt32_VariableLength() { int textLength = s_Int32TextArray.Length; @@ -83,11 +83,6 @@ private static void PrimitiveParserByteSpanToInt32_VariableLength() } } - [MethodImpl(MethodImplOptions.NoInlining)] - private static void DoNotIgnore(int value, int consumed) - { - } - [Benchmark(InnerIterationCount = InnerCount)] [InlineData("107374182")] // standard parse [InlineData("2147483647")] // max value @@ -168,86 +163,6 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed(string text) } } - //[Benchmark(InnerIterationCount = InnerCount)] - [InlineData(10)] - [InlineData(100)] - [InlineData(1000)] - private static void ParseTestNew(int count) - { - string text = GenerateRandomDigitString(count); - byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); - var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); - - int final = 0; - foreach (var iteration in Benchmark.Iterations) - { - using (iteration.StartMeasurement()) - { - for (int i = 0; i < Benchmark.InnerIterationCount; i++) - { - int totalConsumed = 0; - while (totalConsumed < utf8ByteSpan.Length) - { - PrimitiveParser.InvariantUtf8.TryParseInt32(utf8ByteSpan.Slice(totalConsumed), out int value, out int bytesConsumed); - totalConsumed += bytesConsumed; - final |= value; - } - } - } - } - Assert.Equal(-1, final); - } - - //[Benchmark(InnerIterationCount = InnerCount)] - [InlineData(10)] - [InlineData(100)] - [InlineData(1000)] - private static void ParseTestNew_OLD(int count) - { - string text = GenerateRandomDigitString(count); - byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); - var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); - - int final = 0; - foreach (var iteration in Benchmark.Iterations) - { - using (iteration.StartMeasurement()) - { - for (int i = 0; i < Benchmark.InnerIterationCount; i++) - { - int totalConsumed = 0; - while (totalConsumed < utf8ByteSpan.Length) - { - PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(utf8ByteSpan.Slice(totalConsumed), out int value, out int bytesConsumed); - totalConsumed += bytesConsumed; - final |= value; - } - } - } - } - Assert.Equal(-1, final); - } - - private static string GenerateRandomDigitString(int count = 1000) - { - Random rnd = new Random(count); - var builder = new StringBuilder(); - - for (int j = 0; j < count; j++) - { - int sign = rnd.Next(0, 3); - if (sign == 1) builder.Append("+"); - if (sign == 2) builder.Append("-"); - var length = rnd.Next(1, 14); - for (int i = 0; i < length; i++) - { - int digit = rnd.Next(0, 10); - builder.Append(digit.ToString()); - } - } - return builder.ToString(); - } - [Benchmark(InnerIterationCount = InnerCount)] private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength() { @@ -334,7 +249,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength( [InlineData("+2147483abcdefghijklmnop")] [InlineData("+2147483647abcdefghijklmnop")] [InlineData("+214abcdefghijklmnop")] - private static void PrimitiveParserByteSpanToInt32_BytesConsumed_BASE(string text) + private static void PrimitiveParserByteSpanToInt32_BytesConsumed_Baseline(string text) { byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); @@ -353,7 +268,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_BASE(string tex } [Benchmark(InnerIterationCount = InnerCount)] - private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_BASE() + private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_Baseline() { int textLength = s_Int32TextArray.Length; byte[][] utf8ByteArray = (byte[][])Array.CreateInstance(typeof(byte[]), textLength); @@ -376,110 +291,6 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_ } [Benchmark(InnerIterationCount = InnerCount)] - [InlineData("107374182")] // standard parse - [InlineData("2147483647")] // max value - [InlineData("0")] - [InlineData("-2147483647")] // min value - [InlineData("214748364")] - [InlineData("2")] - [InlineData("21474836")] - [InlineData("-21474")] - [InlineData("21474")] - [InlineData("-21")] - [InlineData("-2")] - [InlineData("214")] - [InlineData("-21474836")] - [InlineData("-214748364")] - [InlineData("2147")] - [InlineData("-2147")] - [InlineData("-214748")] - [InlineData("-2147483")] - [InlineData("214748")] - [InlineData("21")] - [InlineData("2147483")] - [InlineData("-214")] - [InlineData("+21474")] - [InlineData("+21")] - [InlineData("+2")] - [InlineData("+21474836")] - [InlineData("+214748364")] - [InlineData("+2147")] - [InlineData("+214748")] - [InlineData("+2147483")] - [InlineData("+2147483647")] - [InlineData("+214")] - [InlineData("000000000000000000001235abcdfg")] - [InlineData("214748364abcdefghijklmnop")] - [InlineData("2abcdefghijklmnop")] - [InlineData("21474836abcdefghijklmnop")] - [InlineData("-21474abcdefghijklmnop")] - [InlineData("21474abcdefghijklmnop")] - [InlineData("-21abcdefghijklmnop")] - [InlineData("-2abcdefghijklmnop")] - [InlineData("214abcdefghijklmnop")] - [InlineData("-21474836abcdefghijklmnop")] - [InlineData("-214748364abcdefghijklmnop")] - [InlineData("2147abcdefghijklmnop")] - [InlineData("-2147abcdefghijklmnop")] - [InlineData("-214748abcdefghijklmnop")] - [InlineData("-2147483abcdefghijklmnop")] - [InlineData("214748abcdefghijklmnop")] - [InlineData("21abcdefghijklmnop")] - [InlineData("2147483abcdefghijklmnop")] - [InlineData("-214abcdefghijklmnop")] - [InlineData("+21474abcdefghijklmnop")] - [InlineData("+21abcdefghijklmnop")] - [InlineData("+2abcdefghijklmnop")] - [InlineData("+21474836abcdefghijklmnop")] - [InlineData("+214748364abcdefghijklmnop")] - [InlineData("+2147abcdefghijklmnop")] - [InlineData("+214748abcdefghijklmnop")] - [InlineData("+2147483abcdefghijklmnop")] - [InlineData("+2147483647abcdefghijklmnop")] - [InlineData("+214abcdefghijklmnop")] - private static void PrimitiveParserByteSpanToInt32_BytesConsumed_OLD(string text) - { - byte[] utf8ByteArray = Text.Encoding.UTF8.GetBytes(text); - var utf8ByteSpan = new ReadOnlySpan(utf8ByteArray); - - foreach (var iteration in Benchmark.Iterations) - { - using (iteration.StartMeasurement()) - { - for (int i = 0; i < Benchmark.InnerIterationCount; i++) - { - PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(utf8ByteSpan, out int value, out int bytesConsumed); - DoNotIgnore(value, bytesConsumed); - } - } - } - } - - [Benchmark(InnerIterationCount = InnerCount)] - private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_OLD() - { - int textLength = s_Int32TextArray.Length; - byte[][] utf8ByteArray = (byte[][])Array.CreateInstance(typeof(byte[]), textLength); - for (var i = 0; i < textLength; i++) - { - utf8ByteArray[i] = Text.Encoding.UTF8.GetBytes(s_Int32TextArray[i]); - } - - foreach (var iteration in Benchmark.Iterations) - { - using (iteration.StartMeasurement()) - { - for (int i = 0; i < Benchmark.InnerIterationCount; i++) - { - ReadOnlySpan utf8ByteSpan = utf8ByteArray[i % textLength]; - PrimitiveParser.InvariantUtf8.TryParseInt32_OLD(utf8ByteSpan, out int value, out int bytesConsumed); - DoNotIgnore(value, bytesConsumed); - } - } - } - } - - //[Benchmark(InnerIterationCount = InnerCount)] [InlineData("๑๐๗๓๗๔๑๘๒")] [InlineData("๒๑๔๗๔๘๓๖๔๗")] [InlineData("๐")] @@ -556,106 +367,5 @@ public unsafe void ParseInt32Thai(string text) } } } - - //[Benchmark(InnerIterationCount = InnerCount)] - [InlineData("๑๐๗๓๗๔๑๘๒")] - [InlineData("๒๑๔๗๔๘๓๖๔๗")] - [InlineData("๐")] - [InlineData("ลบ๒๑๔๗๔๘๓๖๔๘")] - [InlineData("๒๑๔๗๔๘๓๖๔")] - [InlineData("๒")] - [InlineData("๒๑๔๗๔๘๓๖")] - [InlineData("ลบ๒๑๔๗๔")] - [InlineData("๒๑๔๗๔")] - [InlineData("ลบ๒๑")] - [InlineData("ลบ๒")] - [InlineData("๒๑๔")] - [InlineData("ลบ๒๑๔๗๔๘๓๖")] - [InlineData("ลบ๒๑๔๗๔๘๓๖๔")] - [InlineData("๒๑๔๗")] - [InlineData("ลบ๒๑๔๗")] - [InlineData("ลบ๒๑๔๗๔๘")] - [InlineData("ลบ๒๑๔๗๔๘๓")] - [InlineData("๒๑๔๗๔๘")] - [InlineData("๒๑")] - [InlineData("๒๑๔๗๔๘๓")] - [InlineData("ลบ๒๑๔")] - [InlineData("+๒๑๔๗๔")] - [InlineData("+๒๑")] - [InlineData("+๒")] - [InlineData("+๒๑๔๗๔๘๓๖")] - [InlineData("+๒๑๔๗๔๘๓๖๔")] - [InlineData("+๒๑๔๗")] - [InlineData("+๒๑๔๗๔๘")] - [InlineData("+๒๑๔๗๔๘๓")] - [InlineData("+๒๑๔๗๔๘๓๖๔๗")] - [InlineData("+๒๑๔")] - [InlineData("๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๐๑๒๓๕abcdfg")] - [InlineData("๒๑๔๗๔๘๓๖๔abcdefghijklmnop")] - [InlineData("๒abcdefghijklmnop")] - [InlineData("๒๑๔๗๔๘๓๖abcdefghijklmnop")] - [InlineData("ลบ๒๑๔๗๔abcdefghijklmnop")] - [InlineData("๒๑๔๗๔abcdefghijklmnop")] - [InlineData("ลบ๒๑abcdefghijklmnop")] - [InlineData("ลบ๒abcdefghijklmnop")] - [InlineData("๒๑๔abcdefghijklmnop")] - [InlineData("ลบ๒๑๔๗๔๘๓๖abcdefghijklmnop")] - [InlineData("ลบ๒๑๔๗๔๘๓๖๔abcdefghijklmnop")] - [InlineData("๒๑๔๗abcdefghijklmnop")] - [InlineData("ลบ๒๑๔๗abcdefghijklmnop")] - [InlineData("ลบ๒๑๔๗๔๘abcdefghijklmnop")] - [InlineData("ลบ๒๑๔๗๔๘๓abcdefghijklmnop")] - [InlineData("๒๑๔๗๔๘abcdefghijklmnop")] - [InlineData("๒๑abcdefghijklmnop")] - [InlineData("๒๑๔๗๔๘๓abcdefghijklmnop")] - [InlineData("ลบ๒๑๔abcdefghijklmnop")] - [InlineData("+๒๑๔๗๔abcdefghijklmnop")] - [InlineData("+๒๑abcdefghijklmnop")] - [InlineData("+๒abcdefghijklmnop")] - [InlineData("+๒๑๔๗๔๘๓๖abcdefghijklmnop")] - [InlineData("+๒๑๔๗๔๘๓๖๔abcdefghijklmnop")] - [InlineData("+๒๑๔๗abcdefghijklmnop")] - [InlineData("+๒๑๔๗๔๘abcdefghijklmnop")] - [InlineData("+๒๑๔๗๔๘๓abcdefghijklmnop")] - [InlineData("+๒๑๔๗๔๘๓๖๔๗abcdefghijklmnop")] - [InlineData("+๒๑๔abcdefghijklmnop")] - public unsafe void ParseInt32Thai_OLD(string text) - { - ReadOnlySpan utf8Span = UtfEncode(text, false); - foreach (var iteration in Benchmark.Iterations) - { - using (iteration.StartMeasurement()) - { - for (int i = 0; i < Benchmark.InnerIterationCount; i++) - { - PrimitiveParser.TryParseInt32_OLD(utf8Span, out int value, out int bytesConsumed, 'G', s_thaiEncoder); - DoNotIgnore(value, bytesConsumed); - } - } - } - } - - static byte[][] s_thaiUtf8DigitsAndSymbols = new byte[][] -{ - new byte[] { 0xe0, 0xb9, 0x90 }, new byte[] { 0xe0, 0xb9, 0x91 }, new byte[] { 0xe0, 0xb9, 0x92 }, - new byte[] { 0xe0, 0xb9, 0x93 }, new byte[] { 0xe0, 0xb9, 0x94 }, new byte[] { 0xe0, 0xb9, 0x95 }, new byte[] { 0xe0, 0xb9, 0x96 }, - new byte[] { 0xe0, 0xb9, 0x97 }, new byte[] { 0xe0, 0xb9, 0x98 }, new byte[] { 0xe0, 0xb9, 0x99 }, new byte[] { 0xE0, 0xB8, 0x88, 0xE0, 0xB8, 0x94 }, null, - new byte[] { 0xE0, 0xB8, 0xAA, 0xE0, 0xB8, 0xB4, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x87, 0xE0, 0xB8, 0x97, 0xE0, 0xB8, 0xB5, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x83, - 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0x8D, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x82, 0xE0, 0xB8, 0x95, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0xA5, 0xE0, - 0xB8, 0xB7, 0xE0, 0xB8, 0xAD, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0x81, 0xE0, 0xB8, 0xB4, 0xE0, 0xB8, 0x99 }, - new byte[] { 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x9A }, new byte[] { 43 }, new byte[] { 0xE0, 0xB9, 0x84, 0xE0, 0xB8, 0xA1, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, - 0x83, 0xE0, 0xB8, 0x8A, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x95, 0xE0, 0xB8, 0xB1, 0xE0, 0xB8, 0xA7, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x82 }, - new byte[] { 69 }, new byte[] { 101 }, -}; - - static TextEncoder s_thaiEncoder = TextEncoder.CreateUtf8Encoder(s_thaiUtf8DigitsAndSymbols); - - private byte[] UtfEncode(string s, bool utf16) - { - if (utf16) - return Text.Encoding.Unicode.GetBytes(s); - else - return Text.Encoding.UTF8.GetBytes(s); - } } } diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs index 44e82fe3f1a..3f65671ea32 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs @@ -14,8 +14,6 @@ using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Threading; using Xunit; namespace System.Text.Primitives.Tests @@ -133,6 +131,8 @@ private byte[] UtfEncode(string s, bool utf16) new byte[] { 101, }, // e }; + // TODO: Fix Thai + symbol and adjust tests. + // Change from new byte[] { 43 }, i.e. '+' to new byte[] { 0xE0, 0xB8, 0x9A, 0xE0, 0xB8, 0xA7, 0xE0, 0xB8, 0x81 }, i.e. 'บวก' static byte[][] s_thaiUtf8DigitsAndSymbols = new byte[][] { new byte[] { 0xe0, 0xb9, 0x90 }, new byte[] { 0xe0, 0xb9, 0x91 }, new byte[] { 0xe0, 0xb9, 0x92 }, @@ -1298,7 +1298,7 @@ private void ParseInt32VariableLength(string text, bool expectSuccess, int expec Assert.Equal(expectedConsumed, consumed); } - //[Theory] + [Theory] [InlineData("0", true, 0, int.MaxValue)] [InlineData("2", true, 2, int.MaxValue)] [InlineData("21", true, 21, int.MaxValue)] @@ -1319,7 +1319,7 @@ public unsafe void ParseInt32OverflowCheck(string text, bool expectSuccess, int unsafe { - if (!AllocationHelper.TryAllocNative((IntPtr)TwoGiB, out IntPtr memBlock)) + if (!TestHelper.TryAllocNative((IntPtr)TwoGiB, out IntPtr memBlock)) return; // It's not implausible to believe that a 2gb allocation will fail - if so, skip this test to avoid unnecessary test flakiness. try @@ -1343,7 +1343,7 @@ public unsafe void ParseInt32OverflowCheck(string text, bool expectSuccess, int } finally { - AllocationHelper.ReleaseNative(ref memBlock); + TestHelper.ReleaseNative(ref memBlock); } } } @@ -1437,7 +1437,7 @@ public unsafe void ParseInt32Thai(string text, bool expectSuccess, int expectedV Assert.Equal(expectedConsumed, consumed); } - //[Theory] + //[Theory] // TODO: Test is too slow, only enable for "outerloop" [InlineData("๐", true, 0, int.MaxValue)] [InlineData("๒", true, 2, int.MaxValue)] [InlineData("๒๑", true, 21, int.MaxValue)] @@ -1458,7 +1458,7 @@ public unsafe void ParseInt32ThaiOverflowCheck(string text, bool expectSuccess, unsafe { - if (!AllocationHelper.TryAllocNative((IntPtr)TwoGiB, out IntPtr memBlock)) + if (!TestHelper.TryAllocNative((IntPtr)TwoGiB, out IntPtr memBlock)) return; // It's not implausible to believe that a 2gb allocation will fail - if so, skip this test to avoid unnecessary test flakiness. try @@ -1493,7 +1493,7 @@ public unsafe void ParseInt32ThaiOverflowCheck(string text, bool expectSuccess, } finally { - AllocationHelper.ReleaseNative(ref memBlock); + TestHelper.ReleaseNative(ref memBlock); } } } @@ -1761,44 +1761,4 @@ public unsafe void ParseInt64Hex(string text, bool expectSuccess, long expectedV } - - static class AllocationHelper - { - private static readonly Mutex MemoryLock = new Mutex(); - private static readonly TimeSpan WaitTimeout = TimeSpan.FromSeconds(120); - - public static bool TryAllocNative(IntPtr size, out IntPtr memory) - { - memory = IntPtr.Zero; - - if (!MemoryLock.WaitOne(WaitTimeout)) - return false; - - try - { - memory = Marshal.AllocHGlobal(size); - } - catch (OutOfMemoryException) - { - memory = IntPtr.Zero; - MemoryLock.ReleaseMutex(); - } - - return memory != IntPtr.Zero; - - } - - public static void ReleaseNative(ref IntPtr memory) - { - try - { - Marshal.FreeHGlobal(memory); - memory = IntPtr.Zero; - } - finally - { - MemoryLock.ReleaseMutex(); - } - } - } } diff --git a/tests/System.Text.Primitives.Tests/TestHelper.cs b/tests/System.Text.Primitives.Tests/TestHelper.cs index 08b13f8718a..e34ca7fcc08 100644 --- a/tests/System.Text.Primitives.Tests/TestHelper.cs +++ b/tests/System.Text.Primitives.Tests/TestHelper.cs @@ -3,6 +3,8 @@ // See the LICENSE file in the project root for more information. using Xunit; +using System.Runtime.InteropServices; +using System.Threading; namespace System.Text.Primitives.Tests { @@ -15,5 +17,43 @@ public static string SpanToString(Span span, TextEncoder encoder = null) Assert.True(encoder.TryDecode(span, out string text, out int consumed)); return text; } + + // Borrowed from https://github.com/dotnet/corefx/blob/master/src/System.Memory/tests/AllocationHelper.cs + + private static readonly Mutex MemoryLock = new Mutex(); + private static readonly TimeSpan WaitTimeout = TimeSpan.FromSeconds(120); + + public static bool TryAllocNative(IntPtr size, out IntPtr memory) + { + memory = IntPtr.Zero; + + if (!MemoryLock.WaitOne(WaitTimeout)) + return false; + + try + { + memory = Marshal.AllocHGlobal(size); + } + catch (OutOfMemoryException) + { + memory = IntPtr.Zero; + MemoryLock.ReleaseMutex(); + } + + return memory != IntPtr.Zero; + } + + public static void ReleaseNative(ref IntPtr memory) + { + try + { + Marshal.FreeHGlobal(memory); + memory = IntPtr.Zero; + } + finally + { + MemoryLock.ReleaseMutex(); + } + } } } From e5c0cac134644be6f8b208442a3e8089f522b515 Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Tue, 20 Jun 2017 22:50:29 -0700 Subject: [PATCH 06/16] Removing unnecessary using directive --- .../System/Text/Parsing/InvariantSigned.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs index d656b775771..a642f8b4ee1 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs @@ -9,9 +9,6 @@ // NOTE: This file is generated via a T4 template. Please do not edit this file directly. Any changes should be made // in InvariantSigned.tt. - -using System.Runtime.CompilerServices; - namespace System.Text { public static partial class PrimitiveParser From 09cd24c00ba0264d6f929a59eb9eb70854a62d87 Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Fri, 23 Jun 2017 01:09:57 -0700 Subject: [PATCH 07/16] Addressing PR comments and adding more tests. --- .../System/Text/Parsing/InvariantSigned.cs | 53 ++++++++++++----- .../System/Text/Parsing/Signed.cs | 12 ++-- .../Parsing/PrimitiveParserIntegerTests.cs | 58 ++++++++++++++++++- 3 files changed, 100 insertions(+), 23 deletions(-) diff --git a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs index a642f8b4ee1..0d2d0d395a5 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs @@ -907,9 +907,9 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int int textLength = text.Length; if (textLength < 1) goto FalseExit; - sbyte sign = 1; - int index = 0; - byte num = text[index]; + int sign = 1; + int index = default; + int num = text[index]; if (num == '-') { sign = -1; @@ -924,9 +924,9 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int num = text[index]; } - int answer = 0; + int answer = default; - if (num >= '0' && num <= '9') + if (IsDigit(num)) { if (num == '0') { @@ -935,12 +935,11 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int index++; if (index >= textLength) goto Done; num = text[index]; - } while (num == '0') ; - if (num < '0' || num > '9') goto Done; + } while (num == '0'); + if (!IsDigit(num)) goto Done; } - int firstNonZeroDigitIndex = index; - if (textLength - firstNonZeroDigitIndex < Int32OverflowLength) + if (textLength - index < Int32OverflowLength) { do { @@ -948,10 +947,33 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int index++; if (index >= textLength) goto Done; num = text[index]; - } while (num >= '0' && num <= '9'); + } while (IsDigit(num)); + } + else if (textLength - index == Int32OverflowLength) + { + int firstNonZeroDigitIndex = index; + do + { + answer = answer * 10 + num - '0'; + index++; + if (index - firstNonZeroDigitIndex == Int32OverflowLength - 1) + { + num = text[index]; + if (IsDigit(num)) + { + num -= '0'; + if (WillOverFlow(answer, num, sign)) goto FalseExit; + answer = answer * 10 + num; + index++; + } + goto Done; + } + num = text[index]; + } while (IsDigit(num)); } else { + int firstNonZeroDigitIndex = index; do { answer = answer * 10 + num - '0'; @@ -959,24 +981,25 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int if (index - firstNonZeroDigitIndex == Int32OverflowLength - 1) { num = text[index]; - if (num >= '0' && num <= '9') + if (IsDigit(num)) { - num -= (byte)'0'; + num -= '0'; if (WillOverFlow(answer, num, sign)) goto FalseExit; answer = answer * 10 + num; index++; } + if (IsDigit(text[index])) goto FalseExit; goto Done; } num = text[index]; - } while (num >= '0' && num <= '9'); + } while (IsDigit(num)); } goto Done; } FalseExit: - bytesConsumed = 0; - value = 0; + bytesConsumed = default; + value = default; return false; Done: diff --git a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs index cfa57fed12d..f003a891705 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs @@ -20,7 +20,7 @@ public static partial class PrimitiveParser [MethodImpl(MethodImplOptions.AggressiveInlining)] private static bool IsDigit(int i) { - return i >= 0 && i <= 9; + return (uint)(i - '0') <= ('9' - '0'); } [MethodImpl(MethodImplOptions.AggressiveInlining)] @@ -35,7 +35,7 @@ private static bool IsValid(uint i) private static bool WillOverFlow(sbyte value, int nextDigit, int sign) { bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); - return (value > maxValueSbyteDiv10 || nextDigitTooLarge); + return (value > maxValueSbyteDiv10 || (value == maxValueSbyteDiv10 && nextDigitTooLarge)); } // If parsedValue > (short.MaxValue / 10), any more appended digits will cause overflow. @@ -44,16 +44,16 @@ private static bool WillOverFlow(sbyte value, int nextDigit, int sign) private static bool WillOverFlow(short value, int nextDigit, int sign) { bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); - return (value > maxValueShortDiv10 || nextDigitTooLarge); + return (value > maxValueShortDiv10 || (value == maxValueShortDiv10 && nextDigitTooLarge)); } // If parsedValue > (int.MaxValue / 10), any more appended digits will cause overflow. // if parsedValue == (int.MaxValue / 10), any nextDigit greater than 7 or 8 (depending on sign) implies overflow. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static bool WillOverFlow(int value, byte nextDigit, sbyte sign) + private static bool WillOverFlow(int value, int nextDigit, int sign) { bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); - return (value > maxValueIntDiv10 || nextDigitTooLarge); + return (value > maxValueIntDiv10 || (value == maxValueIntDiv10 && nextDigitTooLarge)); } // If parsedValue > (long.MaxValue / 10), any more appended digits will cause overflow. @@ -62,7 +62,7 @@ private static bool WillOverFlow(int value, byte nextDigit, sbyte sign) private static bool WillOverFlow(long value, int nextDigit, int sign) { bool nextDigitTooLarge = nextDigit > 8 || (sign > 0 && nextDigit > 7); - return (value > maxValueLongDiv10 || nextDigitTooLarge); + return (value > maxValueLongDiv10 || (value == maxValueLongDiv10 && nextDigitTooLarge)); } #endregion diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs index 3f65671ea32..c122546eb7b 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs @@ -1162,7 +1162,7 @@ public unsafe void ParseInt16Hex(string text, bool expectSuccess, short expected [InlineData("-2147483648", true, -2147483648, 11)] // min [InlineData("-A", false, 0, 0)] // invalid character after a sign [InlineData("I am 1", false, 0, 0)] // invalid character test - [InlineData(" !", false, 0, 0)] // invalid character test w/ char < '0' + [InlineData("123!", true, 123, 3)] // invalid character test w/ char < '0' // TODO: Fix test case elsewhere [InlineData("2147483648", false, 0, 0)] // positive overflow test [InlineData("-2147483649", false, 0, 0)] // negative overflow test [InlineData("0", true, 0, 1)] @@ -1178,6 +1178,15 @@ public unsafe void ParseInt16Hex(string text, bool expectSuccess, short expected [InlineData("+000012345abcdefg1", true, 12345, 10)] [InlineData("000012345abcdefg1", true, 12345, 9)] [InlineData("0000001234145abcdefg1", true, 1234145, 13)] + [InlineData("00000000000000abcdefghijklmnop", true, 0, 14)] + [InlineData("000000a", true, 0, 6)] + [InlineData("00000000000000!", true, 0, 14)] + [InlineData("00000000000000", true, 0, 14)] + [InlineData("1147483648", true, 1147483648, 10)] + [InlineData("-1147483649", true, -1147483649, 11)] + [InlineData("12345!6", true, 12345, 5)] + [InlineData("12345!abc", true, 12345, 5)] + [InlineData("!!", false, 0, 0)] [InlineData("+", false, 0, 0)] [InlineData("-", false, 0, 0)] [InlineData("", false, 0, 0)] @@ -1257,7 +1266,7 @@ public unsafe void ParseInt32Dec(string text, bool expectSuccess, int expectedVa Assert.Equal(expectedConsumed, consumed); } } - + [Theory] [InlineData("2", true, 2, 1)] [InlineData("21", true, 21, 2)] @@ -1298,6 +1307,49 @@ private void ParseInt32VariableLength(string text, bool expectSuccess, int expec Assert.Equal(expectedConsumed, consumed); } + [Theory] + [InlineData("3147483647")] + [InlineData("4147483647")] + [InlineData("5147483647")] + [InlineData("6147483647")] + [InlineData("7147483647")] + [InlineData("8147483647")] + [InlineData("9147483647")] + [InlineData("2147483648")] + [InlineData("3147483648")] + [InlineData("4147483648")] + [InlineData("5147483648")] + [InlineData("6147483648")] + [InlineData("7147483648")] + [InlineData("8147483648")] + [InlineData("9147483648")] + [InlineData("11474836471")] + [InlineData("21474836471")] + [InlineData("31474836471")] + [InlineData("41474836471")] + [InlineData("51474836471")] + [InlineData("61474836471")] + [InlineData("71474836471")] + [InlineData("81474836471")] + [InlineData("91474836471")] + [InlineData("11474836481")] + [InlineData("21474836481")] + [InlineData("31474836481")] + [InlineData("41474836481")] + [InlineData("51474836481")] + [InlineData("61474836481")] + [InlineData("71474836481")] + [InlineData("81474836481")] + [InlineData("91474836481")] + private void ParseInt32VariableOverflowTests(string text) + { + ReadOnlySpan utf8Span = UtfEncode(text, false); + bool result = PrimitiveParser.InvariantUtf8.TryParseInt32(utf8Span, out int parsedValue, out int consumed); + Assert.Equal(false, result); + Assert.Equal(0, parsedValue); + Assert.Equal(0, consumed); + } + [Theory] [InlineData("0", true, 0, int.MaxValue)] [InlineData("2", true, 2, int.MaxValue)] @@ -1311,6 +1363,8 @@ private void ParseInt32VariableLength(string text, bool expectSuccess, int expec [InlineData("12345abcdefg1", true, 12345, int.MaxValue - 8)] [InlineData("1234145abcdefg1", true, 1234145, int.MaxValue - 8)] [InlineData("abcdefghijklmnop1", true, 0, int.MaxValue - 17)] + [InlineData("1147483648", true, 1147483648, int.MaxValue)] + [InlineData("-1147483649", true, -1147483649, int.MaxValue)] public unsafe void ParseInt32OverflowCheck(string text, bool expectSuccess, int expectedValue, int expectedConsumed) { ReadOnlySpan utf8Span = UtfEncode(text, false); From 67183fefc4298896eb39f74903d33109d81f1cc5 Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Fri, 23 Jun 2017 15:47:25 -0700 Subject: [PATCH 08/16] Addressing PR comments and adding loop unrolling. --- .../System/Text/Parsing/InvariantSigned.cs | 125 ++++++++++-------- 1 file changed, 67 insertions(+), 58 deletions(-) diff --git a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs index b3919a493d6..9ec129717dd 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs @@ -908,7 +908,7 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int if (textLength < 1) goto FalseExit; int sign = 1; - int index = default; + int index = 0; int num = text[index]; if (num == '-') { @@ -924,7 +924,7 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int num = text[index]; } - int answer = default; + int answer = 0; if (IsDigit(num)) { @@ -939,62 +939,71 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int if (!IsDigit(num)) goto Done; } - if (textLength - index < Int32OverflowLength) - { - do - { - answer = answer * 10 + num - '0'; - index++; - if (index >= textLength) goto Done; - num = text[index]; - } while (IsDigit(num)); - } - else if (textLength - index == Int32OverflowLength) - { - int firstNonZeroDigitIndex = index; - do - { - answer = answer * 10 + num - '0'; - index++; - if (index - firstNonZeroDigitIndex == Int32OverflowLength - 1) - { - num = text[index]; - if (IsDigit(num)) - { - num -= '0'; - if (WillOverFlow(answer, num, sign)) goto FalseExit; - answer = answer * 10 + num; - index++; - } - goto Done; - } - num = text[index]; - } while (IsDigit(num)); - } - else - { - int firstNonZeroDigitIndex = index; - do - { - answer = answer * 10 + num - '0'; - index++; - if (index - firstNonZeroDigitIndex == Int32OverflowLength - 1) - { - num = text[index]; - if (IsDigit(num)) - { - num -= '0'; - if (WillOverFlow(answer, num, sign)) goto FalseExit; - answer = answer * 10 + num; - index++; - } - if (IsDigit(text[index])) goto FalseExit; - goto Done; - } - num = text[index]; - } while (IsDigit(num)); - } - goto Done; + answer = num - '0'; + index++; + + if (index >= textLength) goto Done; + num = text[index]; + if (!IsDigit(num)) goto Done; + index++; + answer = 10 * answer + num - '0'; + + if (index >= textLength) goto Done; + num = text[index]; + if (!IsDigit(num)) goto Done; + index++; + answer = 10 * answer + num - '0'; + + if (index >= textLength) goto Done; + num = text[index]; + if (!IsDigit(num)) goto Done; + index++; + answer = 10 * answer + num - '0'; + + if (index >= textLength) goto Done; + num = text[index]; + if (!IsDigit(num)) goto Done; + index++; + answer = 10 * answer + num - '0'; + + if (index >= textLength) goto Done; + num = text[index]; + if (!IsDigit(num)) goto Done; + index++; + answer = 10 * answer + num - '0'; + + if (index >= textLength) goto Done; + num = text[index]; + if (!IsDigit(num)) goto Done; + index++; + answer = 10 * answer + num - '0'; + + if (index >= textLength) goto Done; + num = text[index]; + if (!IsDigit(num)) goto Done; + index++; + answer = 10 * answer + num - '0'; + + if (index >= textLength) goto Done; + num = text[index]; + if (!IsDigit(num)) goto Done; + index++; + answer = 10 * answer + num - '0'; + + // Potential overflow + if (index >= textLength) goto Done; + num = text[index]; + if (!IsDigit(num)) goto Done; + long lAnswer = (long)answer * 10 + num - '0'; + + if (lAnswer > (long)Int32.MaxValue + (-1 * sign + 1) / 2) goto FalseExit; + answer = (int)lAnswer; + index++; + if (index >= textLength) goto Done; + if (!IsDigit(text[index])) goto Done; + + // Guaranteed overflow + goto FalseExit; } FalseExit: From 91a6191196ae6ffbd4faeb3b3d269fca3109b2f2 Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Fri, 23 Jun 2017 16:24:42 -0700 Subject: [PATCH 09/16] Fixing issues missed after merge --- .../Parsing/PrimitiveParserInt32PerfTests.cs | 4 +- .../Parsing/PrimitiveParserIntegerTests.cs | 131 ++++++------------ .../Parsing/PrimitiveParserPerfTests.cs | 23 --- .../TestHelper.cs | 44 +++++- 4 files changed, 90 insertions(+), 112 deletions(-) diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs index 0d78a2c3153..06c4736f317 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserInt32PerfTests.cs @@ -354,14 +354,14 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_ [InlineData("+๒๑๔abcdefghijklmnop")] public unsafe void ParseInt32Thai(string text) { - ReadOnlySpan utf8Span = UtfEncode(text, false); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); foreach (var iteration in Benchmark.Iterations) { using (iteration.StartMeasurement()) { for (int i = 0; i < Benchmark.InnerIterationCount; i++) { - PrimitiveParser.TryParseInt32(utf8Span, out int value, out int bytesConsumed, 'G', s_thaiEncoder); + PrimitiveParser.TryParseInt32(utf8Span, out int value, out int bytesConsumed, 'G', TestHelper.ThaiTable); DoNotIgnore(value, bytesConsumed); } } diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs index a7d4c6497b6..fe6026c6325 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserIntegerTests.cs @@ -10,47 +10,6 @@ namespace System.Text.Primitives.Tests { public partial class PrimitiveParserTests { - private byte[] UtfEncode(string s, bool utf16) - { - if (utf16) - return Text.Encoding.Unicode.GetBytes(s); - else - return Text.Encoding.UTF8.GetBytes(s); - } - - // TODO: Fix Thai + symbol and adjust tests. - // Change from new byte[] { 43 }, i.e. '+' to new byte[] { 0xE0, 0xB8, 0x9A, 0xE0, 0xB8, 0xA7, 0xE0, 0xB8, 0x81 }, i.e. 'บวก' - static byte[][] s_thaiUtf8DigitsAndSymbols = new byte[][] - { - new byte[] { 0xe0, 0xb9, 0x90 }, new byte[] { 0xe0, 0xb9, 0x91 }, new byte[] { 0xe0, 0xb9, 0x92 }, - new byte[] { 0xe0, 0xb9, 0x93 }, new byte[] { 0xe0, 0xb9, 0x94 }, new byte[] { 0xe0, 0xb9, 0x95 }, new byte[] { 0xe0, 0xb9, 0x96 }, - new byte[] { 0xe0, 0xb9, 0x97 }, new byte[] { 0xe0, 0xb9, 0x98 }, new byte[] { 0xe0, 0xb9, 0x99 }, new byte[] { 0xE0, 0xB8, 0x88, 0xE0, 0xB8, 0x94 }, null, - new byte[] { 0xE0, 0xB8, 0xAA, 0xE0, 0xB8, 0xB4, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x87, 0xE0, 0xB8, 0x97, 0xE0, 0xB8, 0xB5, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x83, - 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0x8D, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x82, 0xE0, 0xB8, 0x95, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0xA5, 0xE0, - 0xB8, 0xB7, 0xE0, 0xB8, 0xAD, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0x81, 0xE0, 0xB8, 0xB4, 0xE0, 0xB8, 0x99 }, - new byte[] { 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x9A }, new byte[] { 43 }, new byte[] { 0xE0, 0xB9, 0x84, 0xE0, 0xB8, 0xA1, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, - 0x83, 0xE0, 0xB8, 0x8A, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x95, 0xE0, 0xB8, 0xB1, 0xE0, 0xB8, 0xA7, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x82 }, - new byte[] { 69 }, new byte[] { 101 }, - }; - - public class ThaiSymbolTable : SymbolTable - { - public ThaiSymbolTable() : base(s_thaiUtf8DigitsAndSymbols) {} - - public override bool TryEncode(byte utf8, Span destination, out int bytesWritten) - => SymbolTable.InvariantUtf8.TryEncode(utf8, destination, out bytesWritten); - - public override bool TryEncode(ReadOnlySpan utf8, Span destination, out int bytesConsumed, out int bytesWritten) - => SymbolTable.InvariantUtf8.TryEncode(utf8, destination, out bytesConsumed, out bytesWritten); - - public override bool TryParse(ReadOnlySpan source, out byte utf8, out int bytesConsumed) - => SymbolTable.InvariantUtf8.TryParse(source, out utf8, out bytesConsumed); - - public override bool TryParse(ReadOnlySpan source, Span utf8, out int bytesConsumed, out int bytesWritten) - => SymbolTable.InvariantUtf8.TryParse(source, utf8, out bytesConsumed, out bytesWritten); - } - - static SymbolTable s_thaiTable = new ThaiSymbolTable(); #region byte @@ -66,8 +25,8 @@ public unsafe void ParseByteDec(string text, bool expectSuccess, byte expectedVa { byte parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -144,8 +103,8 @@ public unsafe void ParseByteHex(string text, bool expectSuccess, Byte expectedVa { byte parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -222,8 +181,8 @@ public unsafe void ParseUInt16Dec(string text, bool expectSuccess, ushort expect { ushort parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -300,8 +259,8 @@ public unsafe void ParseUInt16Hex(string text, bool expectSuccess, UInt16 expect { ushort parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -378,8 +337,8 @@ public unsafe void ParseUInt32Dec(string text, bool expectSuccess, uint expected { uint parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -456,8 +415,8 @@ public unsafe void ParseUInt32Hex(string text, bool expectSuccess, UInt32 expect { uint parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -534,8 +493,8 @@ public unsafe void ParseUInt64Dec(string text, bool expectSuccess, ulong expecte { ulong parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -612,8 +571,8 @@ public unsafe void ParseUInt64Hex(string text, bool expectSuccess, UInt64 expect { ulong parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -693,8 +652,8 @@ public unsafe void ParseSByteDec(string text, bool expectSuccess, sbyte expected { sbyte parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -775,10 +734,10 @@ public unsafe void ParseSByteThai(string text, bool expectSuccess, int index, sb { sbyte parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); bool result; - result = PrimitiveParser.TryParseSByte(utf8Span.Slice(index), out parsedValue, out consumed, 'G', s_thaiTable); + result = PrimitiveParser.TryParseSByte(utf8Span.Slice(index), out parsedValue, out consumed, 'G', TestHelper.ThaiTable); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); @@ -798,8 +757,8 @@ public unsafe void ParseSByteHex(string text, bool expectSuccess, sbyte expected { sbyte parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -878,8 +837,8 @@ public unsafe void ParseInt16Dec(string text, bool expectSuccess, short expected { short parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -960,10 +919,10 @@ public unsafe void ParseInt16Thai(string text, bool expectSuccess, int index, sh { short parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); bool result; - result = PrimitiveParser.TryParseInt16(utf8Span.Slice(index), out parsedValue, out consumed, 'G', s_thaiTable); + result = PrimitiveParser.TryParseInt16(utf8Span.Slice(index), out parsedValue, out consumed, 'G', TestHelper.ThaiTable); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); @@ -983,8 +942,8 @@ public unsafe void ParseInt16Hex(string text, bool expectSuccess, short expected { short parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -1102,8 +1061,8 @@ public unsafe void ParseInt32Dec(string text, bool expectSuccess, int expectedVa { int parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -1201,7 +1160,7 @@ public unsafe void ParseInt32Dec(string text, bool expectSuccess, int expectedVa [InlineData("-2147483647", true, -2147483647, 11)] private void ParseInt32VariableLength(string text, bool expectSuccess, int expectedValue, int expectedConsumed) { - ReadOnlySpan utf8Span = UtfEncode(text, false); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); bool result = PrimitiveParser.InvariantUtf8.TryParseInt32(utf8Span, out int parsedValue, out int consumed); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); @@ -1244,7 +1203,7 @@ private void ParseInt32VariableLength(string text, bool expectSuccess, int expec [InlineData("91474836481")] private void ParseInt32VariableOverflowTests(string text) { - ReadOnlySpan utf8Span = UtfEncode(text, false); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); bool result = PrimitiveParser.InvariantUtf8.TryParseInt32(utf8Span, out int parsedValue, out int consumed); Assert.Equal(false, result); Assert.Equal(0, parsedValue); @@ -1268,7 +1227,7 @@ private void ParseInt32VariableOverflowTests(string text) [InlineData("-1147483649", true, -1147483649, int.MaxValue)] public unsafe void ParseInt32OverflowCheck(string text, bool expectSuccess, int expectedValue, int expectedConsumed) { - ReadOnlySpan utf8Span = UtfEncode(text, false); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); const int TwoGiB = int.MaxValue; @@ -1382,8 +1341,8 @@ public unsafe void ParseInt32OverflowCheck(string text, bool expectSuccess, int [InlineData("ลบ", false, 0, 0)] public unsafe void ParseInt32Thai(string text, bool expectSuccess, int expectedValue, int expectedConsumed) { - ReadOnlySpan utf8Span = UtfEncode(text, false); - bool result = PrimitiveParser.TryParseInt32(utf8Span, out int parsedValue, out int consumed, 'G', s_thaiTable); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + bool result = PrimitiveParser.TryParseInt32(utf8Span, out int parsedValue, out int consumed, 'G', TestHelper.ThaiTable); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); @@ -1405,7 +1364,7 @@ public unsafe void ParseInt32Thai(string text, bool expectSuccess, int expectedV [InlineData("abcdefghijklmnop๑", true, 0, int.MaxValue - 17)] public unsafe void ParseInt32ThaiOverflowCheck(string text, bool expectSuccess, int expectedValue, int expectedConsumed) { - ReadOnlySpan utf8Span = UtfEncode(text, false); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); const int TwoGiB = int.MaxValue; @@ -1439,7 +1398,7 @@ public unsafe void ParseInt32ThaiOverflowCheck(string text, bool expectSuccess, utf8Span.CopyTo(span.Slice(TwoGiB - utf8Span.Length)); - bool result = PrimitiveParser.TryParseInt32(span, out int parsedValue, out int consumed, 'G', s_thaiTable); + bool result = PrimitiveParser.TryParseInt32(span, out int parsedValue, out int consumed, 'G', TestHelper.ThaiTable); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); Assert.Equal(expectedConsumed, consumed); @@ -1464,8 +1423,8 @@ public unsafe void ParseInt32Hex(string text, bool expectSuccess, int expectedVa { int parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -1544,8 +1503,8 @@ public unsafe void ParseInt64Dec(string text, bool expectSuccess, long expectedV { long parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); @@ -1626,10 +1585,10 @@ public unsafe void ParseInt64Thai(string text, bool expectSuccess, int index, lo { long parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); bool result; - result = PrimitiveParser.TryParseInt64(utf8Span.Slice(index), out parsedValue, out consumed, 'G', s_thaiTable); + result = PrimitiveParser.TryParseInt64(utf8Span.Slice(index), out parsedValue, out consumed, 'G', TestHelper.ThaiTable); Assert.Equal(expectSuccess, result); Assert.Equal(expectedValue, parsedValue); @@ -1649,8 +1608,8 @@ public unsafe void ParseInt64Hex(string text, bool expectSuccess, long expectedV { long parsedValue; int consumed; - ReadOnlySpan utf8Span = UtfEncode(text, false); - ReadOnlySpan utf16ByteSpan = UtfEncode(text, true); + ReadOnlySpan utf8Span = TestHelper.UtfEncode(text, false); + ReadOnlySpan utf16ByteSpan = TestHelper.UtfEncode(text, true); ReadOnlySpan utf16CharSpan = utf16ByteSpan.NonPortableCast(); byte[] textBytes = utf8Span.ToArray(); char[] textChars = utf16CharSpan.ToArray(); diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs index b0bc0756a66..f8779d9fef6 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs @@ -36,28 +36,5 @@ private static void PrintTestName(string testString, [CallerMemberName] string t Console.WriteLine("{0} called with no test string.", testName); } } - - static byte[][] s_thaiUtf8DigitsAndSymbols = new byte[][] - { - new byte[] { 0xe0, 0xb9, 0x90 }, new byte[] { 0xe0, 0xb9, 0x91 }, new byte[] { 0xe0, 0xb9, 0x92 }, - new byte[] { 0xe0, 0xb9, 0x93 }, new byte[] { 0xe0, 0xb9, 0x94 }, new byte[] { 0xe0, 0xb9, 0x95 }, new byte[] { 0xe0, 0xb9, 0x96 }, - new byte[] { 0xe0, 0xb9, 0x97 }, new byte[] { 0xe0, 0xb9, 0x98 }, new byte[] { 0xe0, 0xb9, 0x99 }, new byte[] { 0xE0, 0xB8, 0x88, 0xE0, 0xB8, 0x94 }, null, - new byte[] { 0xE0, 0xB8, 0xAA, 0xE0, 0xB8, 0xB4, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x87, 0xE0, 0xB8, 0x97, 0xE0, 0xB8, 0xB5, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x83, - 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0x8D, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x82, 0xE0, 0xB8, 0x95, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0xA5, 0xE0, - 0xB8, 0xB7, 0xE0, 0xB8, 0xAD, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0x81, 0xE0, 0xB8, 0xB4, 0xE0, 0xB8, 0x99 }, - new byte[] { 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x9A }, new byte[] { 43 }, new byte[] { 0xE0, 0xB9, 0x84, 0xE0, 0xB8, 0xA1, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, - 0x83, 0xE0, 0xB8, 0x8A, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x95, 0xE0, 0xB8, 0xB1, 0xE0, 0xB8, 0xA7, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x82 }, - new byte[] { 69 }, new byte[] { 101 }, - }; - - static TextEncoder s_thaiEncoder = TextEncoder.CreateUtf8Encoder(s_thaiUtf8DigitsAndSymbols); - - private byte[] UtfEncode(string s, bool utf16) - { - if (utf16) - return Text.Encoding.Unicode.GetBytes(s); - else - return Text.Encoding.UTF8.GetBytes(s); - } } } diff --git a/tests/System.Text.Primitives.Tests/TestHelper.cs b/tests/System.Text.Primitives.Tests/TestHelper.cs index 2286139790a..0f2e3a103b3 100644 --- a/tests/System.Text.Primitives.Tests/TestHelper.cs +++ b/tests/System.Text.Primitives.Tests/TestHelper.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. @@ -65,5 +65,47 @@ public static void ReleaseNative(ref IntPtr memory) MemoryLock.ReleaseMutex(); } } + + public static byte[] UtfEncode(string s, bool utf16) + { + if (utf16) + return Text.Encoding.Unicode.GetBytes(s); + else + return Text.Encoding.UTF8.GetBytes(s); + } + + // TODO: Fix Thai + symbol and adjust tests. + // Change from new byte[] { 43 }, i.e. '+' to new byte[] { 0xE0, 0xB8, 0x9A, 0xE0, 0xB8, 0xA7, 0xE0, 0xB8, 0x81 }, i.e. 'บวก' + static byte[][] s_thaiUtf8DigitsAndSymbols = new byte[][] + { + new byte[] { 0xe0, 0xb9, 0x90 }, new byte[] { 0xe0, 0xb9, 0x91 }, new byte[] { 0xe0, 0xb9, 0x92 }, + new byte[] { 0xe0, 0xb9, 0x93 }, new byte[] { 0xe0, 0xb9, 0x94 }, new byte[] { 0xe0, 0xb9, 0x95 }, new byte[] { 0xe0, 0xb9, 0x96 }, + new byte[] { 0xe0, 0xb9, 0x97 }, new byte[] { 0xe0, 0xb9, 0x98 }, new byte[] { 0xe0, 0xb9, 0x99 }, new byte[] { 0xE0, 0xB8, 0x88, 0xE0, 0xB8, 0x94 }, null, + new byte[] { 0xE0, 0xB8, 0xAA, 0xE0, 0xB8, 0xB4, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x87, 0xE0, 0xB8, 0x97, 0xE0, 0xB8, 0xB5, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x83, + 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0x8D, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, 0x82, 0xE0, 0xB8, 0x95, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xAB, 0xE0, 0xB8, 0xA5, 0xE0, + 0xB8, 0xB7, 0xE0, 0xB8, 0xAD, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0x81, 0xE0, 0xB8, 0xB4, 0xE0, 0xB8, 0x99 }, + new byte[] { 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x9A }, new byte[] { 43 }, new byte[] { 0xE0, 0xB9, 0x84, 0xE0, 0xB8, 0xA1, 0xE0, 0xB9, 0x88, 0xE0, 0xB9, + 0x83, 0xE0, 0xB8, 0x8A, 0xE0, 0xB9, 0x88, 0xE0, 0xB8, 0x95, 0xE0, 0xB8, 0xB1, 0xE0, 0xB8, 0xA7, 0xE0, 0xB9, 0x80, 0xE0, 0xB8, 0xA5, 0xE0, 0xB8, 0x82 }, + new byte[] { 69 }, new byte[] { 101 }, + }; + + public class ThaiSymbolTable : SymbolTable + { + public ThaiSymbolTable() : base(s_thaiUtf8DigitsAndSymbols) { } + + public override bool TryEncode(byte utf8, Span destination, out int bytesWritten) + => SymbolTable.InvariantUtf8.TryEncode(utf8, destination, out bytesWritten); + + public override bool TryEncode(ReadOnlySpan utf8, Span destination, out int bytesConsumed, out int bytesWritten) + => SymbolTable.InvariantUtf8.TryEncode(utf8, destination, out bytesConsumed, out bytesWritten); + + public override bool TryParse(ReadOnlySpan source, out byte utf8, out int bytesConsumed) + => SymbolTable.InvariantUtf8.TryParse(source, out utf8, out bytesConsumed); + + public override bool TryParse(ReadOnlySpan source, Span utf8, out int bytesConsumed, out int bytesWritten) + => SymbolTable.InvariantUtf8.TryParse(source, utf8, out bytesConsumed, out bytesWritten); + } + + public static SymbolTable ThaiTable = new ThaiSymbolTable(); } } From 65b8221702a94b75481f73ec6a9cd258017769d8 Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Fri, 23 Jun 2017 18:47:28 -0700 Subject: [PATCH 10/16] Fixing non-invariant int32 parser --- .../LowAllocationWebServerLibrary/OwnedBuffer.cs | 2 +- src/System.Text.Primitives/System/Text/Parsing/Signed.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/LowAllocationWebServer/LowAllocationWebServerLibrary/OwnedBuffer.cs b/samples/LowAllocationWebServer/LowAllocationWebServerLibrary/OwnedBuffer.cs index c072236fc04..2cf98cfc3e4 100644 --- a/samples/LowAllocationWebServer/LowAllocationWebServerLibrary/OwnedBuffer.cs +++ b/samples/LowAllocationWebServer/LowAllocationWebServerLibrary/OwnedBuffer.cs @@ -136,7 +136,7 @@ public override BufferHandle Pin(int index = 0) { Retain(); var handle = GCHandle.Alloc(_array, GCHandleType.Pinned); - var pointer = Add(handle.AddrOfPinnedObject(), index); + var pointer = Add((void*)handle.AddrOfPinnedObject(), index); return new BufferHandle(this, pointer, handle); } } diff --git a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs index 2a7b52aa5e3..ccc4023acb3 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs @@ -384,7 +384,7 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int { answer = answer * 10 + (int)symbol - (int)SymbolTable.Symbol.D0; index += consumed; - if (index - firstNonZeroDigitIndex == Int32OverflowLength * numBytes - 1) + if (index - firstNonZeroDigitIndex == (Int32OverflowLength - 1) * numBytes) { if (!symbolTable.TryParse(text.Slice(index), out symbol, out consumed)) goto Done; if (IsValid(symbol)) From cfa230fe5825e8ba8b592e6227722e9c7efa805e Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Fri, 23 Jun 2017 18:53:30 -0700 Subject: [PATCH 11/16] Addressing PR comment, removing unused DangerousGetPinnableReference --- src/System.Text.Primitives/System/Text/Parsing/Signed.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs index ccc4023acb3..3c6919ea9f5 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs @@ -333,7 +333,6 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int int textLength = text.Length; if (textLength < 1) goto FalseExit; - ref byte textByte = ref text.DangerousGetPinnableReference(); if (!symbolTable.TryParse(text, out SymbolTable.Symbol symbol, out int consumed)) goto FalseExit; sbyte sign = 1; From 86f4969b2fa43e5c2d308f4638d93ae2aa974a99 Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Fri, 23 Jun 2017 21:34:23 -0700 Subject: [PATCH 12/16] Cleanup and updating test helpers --- .../PerformanceTests.cs | 9 +- .../PrimitiveFormattingTests.cs | 124 +++++++++--------- .../Parsing/PrimitiveParserBoolPerfTests.cs | 6 +- .../Parsing/PrimitiveParserInt32PerfTests.cs | 15 +-- .../Parsing/PrimitiveParserPerfTests.cs | 40 ------ .../Parsing/PrimitiveParserUInt32PerfTests.cs | 88 ++++++------- .../Parsing/PrimitiveParserUInt64PerfTests.cs | 44 +++---- .../TestHelper.cs | 30 +++++ 8 files changed, 174 insertions(+), 182 deletions(-) delete mode 100644 tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs diff --git a/tests/System.Text.Formatting.Tests/PerformanceTests.cs b/tests/System.Text.Formatting.Tests/PerformanceTests.cs index e162467e33d..b9b120eeed1 100644 --- a/tests/System.Text.Formatting.Tests/PerformanceTests.cs +++ b/tests/System.Text.Formatting.Tests/PerformanceTests.cs @@ -34,7 +34,7 @@ private void InvariantFormatIntDec() StringFormatter sb = new StringFormatter(numbersToWrite, pool); for (int i = 0; i < numbersToWrite; i++) { - sb.Append(i % 10); + sb.Append(((int)(i % 10))); } var text = sb.ToString(); if (text.Length != numbersToWrite) @@ -54,7 +54,7 @@ private void InvariantFormatIntDecClr() StringBuilder sb = new StringBuilder(numbersToWrite); for (int i = 0; i < numbersToWrite; i++) { - sb.Append(i % 10); + sb.Append(((int)(i % 10))); } var text = sb.ToString(); if (text.Length != numbersToWrite) @@ -74,7 +74,7 @@ private void InvariantFormatIntHex() StringFormatter sb = new StringFormatter(numbersToWrite, pool); for (int i = 0; i < numbersToWrite; i++) { - sb.Append(i % 10, TextFormat.HexUppercase); + sb.Append(((int)(i % 10)), TextFormat.HexUppercase); } var text = sb.ToString(); if (text.Length != numbersToWrite) @@ -94,7 +94,7 @@ private void InvariantFormatIntHexClr() StringBuilder sb = new StringBuilder(numbersToWrite); for (int i = 0; i < numbersToWrite; i++) { - sb.Append((i % 10).ToString("X")); + sb.Append(((int)(i % 10)).ToString("X")); } var text = sb.ToString(); if (text.Length != numbersToWrite) @@ -279,4 +279,3 @@ static byte[] GetBytesUtf16(string text) } } } - diff --git a/tests/System.Text.Formatting.Tests/PrimitiveFormattingTests.cs b/tests/System.Text.Formatting.Tests/PrimitiveFormattingTests.cs index 9fb2a94b8c3..18b4e98f9d5 100644 --- a/tests/System.Text.Formatting.Tests/PrimitiveFormattingTests.cs +++ b/tests/System.Text.Formatting.Tests/PrimitiveFormattingTests.cs @@ -46,7 +46,7 @@ public void BasicStringFormatter() sb.Append("hi"); sb.Append(1); sb.Append("hello"); - sb.Append(-20); + sb.Append((sbyte)-20); Assert.Equal("hi1hello-20", sb.ToString()); } @@ -140,9 +140,9 @@ public void Int64BasicTests() CheckInt64(long.MaxValue, "x", "7fffffffffffffff", formatter); CheckInt64(long.MinValue, "X", "8000000000000000", formatter); - CheckInt64(-10, "X", "FFFFFFFFFFFFFFF6", formatter); - CheckInt64(-1, "X", "FFFFFFFFFFFFFFFF", formatter); - CheckInt64(0, "X", "0", formatter); + CheckInt64(-10, "X", "FFFFFFFFFFFFFFF6", formatter); + CheckInt64(-1, "X", "FFFFFFFFFFFFFFFF", formatter); + CheckInt64(0, "X", "0", formatter); CheckInt64(1, "X", "1", formatter); CheckInt64(10, "X", "A", formatter); CheckInt64(long.MaxValue, "X", "7FFFFFFFFFFFFFFF", formatter); @@ -198,14 +198,14 @@ public void FormatDefault() { var sb = new StringFormatter(); sb.Append('C'); - sb.Append(-10); - sb.Append(99); - sb.Append(-10); - sb.Append(99); - sb.Append(-10); - sb.Append(99); - sb.Append(-10); - sb.Append(99); + sb.Append((sbyte)-10); + sb.Append((byte)99); + sb.Append((short)-10); + sb.Append((ushort)99); + sb.Append((int)-10); + sb.Append((uint)99); + sb.Append((long)-10); + sb.Append((ulong)99); var result = sb.ToString(); Assert.Equal("C-1099-1099-1099-1099", result); } @@ -215,14 +215,14 @@ public void FormatD() { var format = TextFormat.Parse("D"); var sb = new StringFormatter(); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); + sb.Append((sbyte)-10, format); + sb.Append((byte)99, format); + sb.Append((short)-10, format); + sb.Append((ushort)99, format); + sb.Append((int)-10, format); + sb.Append((uint)99, format); + sb.Append((long)-10, format); + sb.Append((ulong)99, format); var result = sb.ToString(); Assert.Equal("-1099-1099-1099-1099", result); } @@ -232,14 +232,14 @@ public void FormatDPrecision() { var format = TextFormat.Parse("D3"); var sb = new StringFormatter(); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); + sb.Append((sbyte)-10, format); + sb.Append((byte)99, format); + sb.Append((short)-10, format); + sb.Append((ushort)99, format); + sb.Append((int)-10, format); + sb.Append((uint)99, format); + sb.Append((long)-10, format); + sb.Append((ulong)99, format); var result = sb.ToString(); Assert.Equal("-010099-010099-010099-010099", result); } @@ -249,14 +249,14 @@ public void FormatG() { var format = TextFormat.Parse("G"); var sb = new StringFormatter(); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); + sb.Append((sbyte)-10, format); + sb.Append((byte)99, format); + sb.Append((short)-10, format); + sb.Append((ushort)99, format); + sb.Append((int)-10, format); + sb.Append((uint)99, format); + sb.Append((long)-10, format); + sb.Append((ulong)99, format); var result = sb.ToString(); Assert.Equal("-1099-1099-1099-1099", result); } @@ -266,14 +266,14 @@ public void FormatNPrecision() { var format = TextFormat.Parse("N1"); var sb = new StringFormatter(); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); - sb.Append(-10, format); - sb.Append(99, format); + sb.Append((sbyte)-10, format); + sb.Append((byte)99, format); + sb.Append((short)-10, format); + sb.Append((ushort)99, format); + sb.Append((int)-10, format); + sb.Append((uint)99, format); + sb.Append((long)-10, format); + sb.Append((ulong)99, format); var result = sb.ToString(); Assert.Equal("-10.099.0-10.099.0-10.099.0-10.099.0", result); } @@ -285,16 +285,16 @@ public void FormatX() var X = TextFormat.Parse("X"); var sb = new StringFormatter(); - sb.Append(255, x); - sb.Append(255, X); + sb.Append((ulong)255, x); + sb.Append((uint)255, X); Assert.Equal("ffFF", sb.ToString()); sb.Clear(); - sb.Append(-1, X); + sb.Append((int)-1, X); Assert.Equal("FFFFFFFF", sb.ToString()); sb.Clear(); - sb.Append(-2, X); + sb.Append((int)-2, X); Assert.Equal("FFFFFFFE", sb.ToString()); } @@ -305,17 +305,17 @@ public void FormatXUtf8() var X = TextFormat.Parse("X"); var sb = new ArrayFormatter(256, SymbolTable.InvariantUtf8); - sb.Append(255, x); - sb.Append(255, X); + sb.Append((ulong)255, x); + sb.Append((uint)255, X); Assert.Equal("ffFF", new Utf8String(sb.Formatted.AsSpan()).ToString()); sb.Clear(); - sb.Append(-1, X); + sb.Append((int)-1, X); Assert.Equal("FFFFFFFF", new Utf8String(sb.Formatted.AsSpan()).ToString()); sb.Clear(); - sb.Append(-2, X); + sb.Append((int)-2, X); Assert.Equal("FFFFFFFE", new Utf8String(sb.Formatted.AsSpan()).ToString()); } @@ -326,16 +326,16 @@ public void FormatXPrecision() var X = TextFormat.Parse("X10"); var sb = new StringFormatter(); - sb.Append(255, x); - sb.Append(255, X); + sb.Append((ulong)255, x); + sb.Append((uint)255, X); Assert.Equal("00000000ff00000000FF", sb.ToString()); sb.Clear(); - sb.Append(-1, X); + sb.Append((int)-1, X); Assert.Equal("00FFFFFFFF", sb.ToString()); sb.Clear(); - sb.Append(-2, X); + sb.Append((int)-2, X); Assert.Equal("00FFFFFFFE", sb.ToString()); } @@ -345,7 +345,8 @@ public void Int32ToStreamUtf8() var buffer = new byte[1024]; MemoryStream stream = new MemoryStream(buffer); - using(var writer = new StreamFormatter(stream, SymbolTable.InvariantUtf8, pool)) { + using (var writer = new StreamFormatter(stream, SymbolTable.InvariantUtf8, pool)) + { writer.Append(100); writer.Append(-100); writer.Append('h'); @@ -360,7 +361,8 @@ public void FormatString() var buffer = new byte[1024]; MemoryStream stream = new MemoryStream(buffer); - using(var utf8Writer = new StreamFormatter(stream, SymbolTable.InvariantUtf8, pool)) { + using (var utf8Writer = new StreamFormatter(stream, SymbolTable.InvariantUtf8, pool)) + { utf8Writer.Append("Hello"); utf8Writer.Append(" "); utf8Writer.Append("World!"); @@ -371,7 +373,8 @@ public void FormatString() } stream.Position = 0; - using(var utf16Writer = new StreamFormatter(stream, SymbolTable.InvariantUtf16, pool)) { + using (var utf16Writer = new StreamFormatter(stream, SymbolTable.InvariantUtf16, pool)) + { utf16Writer.Append("Hello"); utf16Writer.Append(" "); utf16Writer.Append("World!"); @@ -390,7 +393,8 @@ public void FormatLongStringToUtf8() string data = new string('#', length); formatter.Append(data); Assert.Equal(length, formatter.CommitedByteCount); - for(int i=0; i utf8ByteSpan = utf8ByteArray[i % textLength]; PrimitiveParser.InvariantUtf8.TryParseInt32(utf8ByteSpan, out int value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -157,7 +156,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed(string text) for (int i = 0; i < Benchmark.InnerIterationCount; i++) { PrimitiveParser.InvariantUtf8.TryParseInt32(utf8ByteSpan, out int value, out int bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -181,7 +180,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength( { ReadOnlySpan utf8ByteSpan = utf8ByteArray[i % textLength]; PrimitiveParser.InvariantUtf8.TryParseInt32(utf8ByteSpan, out int value, out int bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -261,7 +260,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_Baseline(string for (int i = 0; i < Benchmark.InnerIterationCount; i++) { int.TryParse(text, out int value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -284,7 +283,7 @@ private static void PrimitiveParserByteSpanToInt32_BytesConsumed_VariableLength_ for (int i = 0; i < Benchmark.InnerIterationCount; i++) { int.TryParse(s_Int32TextArray[i % textLength], out int value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -362,7 +361,7 @@ public unsafe void ParseInt32Thai(string text) for (int i = 0; i < Benchmark.InnerIterationCount; i++) { PrimitiveParser.TryParseInt32(utf8Span, out int value, out int bytesConsumed, 'G', TestHelper.ThaiTable); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs deleted file mode 100644 index f8779d9fef6..00000000000 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserPerfTests.cs +++ /dev/null @@ -1,40 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System.Runtime.CompilerServices; - -namespace System.Text.Primitives.Tests -{ - public partial class PrimitiveParserPerfTests - { - private const int LoadIterations = 30000; - - [MethodImpl(MethodImplOptions.NoInlining)] - private static void DoNotIgnore(uint value, int consumed) - { - } - - [MethodImpl(MethodImplOptions.NoInlining)] - private static void DoNotIgnore(ulong value, int consumed) - { - } - - [MethodImpl(MethodImplOptions.NoInlining)] - private static void DoNotIgnore(int value, int consumed) - { - } - - private static void PrintTestName(string testString, [CallerMemberName] string testName = "") - { - if (testString != null) - { - Console.WriteLine("{0} called with test string \"{1}\".", testName, testString); - } - else - { - Console.WriteLine("{0} called with no test string.", testName); - } - } - } -} diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserUInt32PerfTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserUInt32PerfTests.cs index e3f5dd6c941..a49950f0a7e 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserUInt32PerfTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserUInt32PerfTests.cs @@ -47,11 +47,11 @@ private static void BaselineSimpleByteStarToUInt32(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; uint.TryParse(text, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -64,11 +64,11 @@ private static void BaselineSimpleByteStarToUInt32_VariableLength() { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; uint.TryParse(s_UInt32TextArray[i % 10], out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -84,11 +84,11 @@ private static void BaselineByteStarToUInt32(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; uint.TryParse(text, NumberStyles.None, CultureInfo.InvariantCulture, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -101,11 +101,11 @@ private static void BaselineByteStarToUInt32_VariableLength() { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; uint.TryParse(s_UInt32TextArray[i % 10], NumberStyles.None, CultureInfo.InvariantCulture, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -121,11 +121,11 @@ private static void BaselineByteStarToUInt32Hex(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; uint.TryParse(text, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -138,11 +138,11 @@ private static void BaselineByteStarToUInt32Hex_VariableLength() { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; uint.TryParse(s_UInt32TextArrayHex[i % 8], NumberStyles.HexNumber, CultureInfo.InvariantCulture, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -162,11 +162,11 @@ private unsafe static void PrimitiveParserByteStarToUInt32(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; PrimitiveParser.InvariantUtf8.TryParseUInt32(utf8ByteStar, length, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -186,14 +186,14 @@ private unsafe static void PrimitiveParserByteStarToUInt32_VariableLength() { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { byte[] utf8ByteArray = byteArrayList[i % 10]; fixed (byte* utf8ByteStar = utf8ByteArray) { uint value; PrimitiveParser.InvariantUtf8.TryParseUInt32(utf8ByteStar, utf8ByteArray.Length, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -214,12 +214,12 @@ private unsafe static void PrimitiveParserByteStarToUInt32_BytesConsumed(string { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; int bytesConsumed; PrimitiveParser.InvariantUtf8.TryParseUInt32(utf8ByteStar, length, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -239,7 +239,7 @@ private unsafe static void PrimitiveParserByteStarToUInt32_BytesConsumed_Variabl { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { byte[] utf8ByteArray = byteArrayList[i % 10]; fixed (byte* utf8ByteStar = utf8ByteArray) @@ -247,7 +247,7 @@ private unsafe static void PrimitiveParserByteStarToUInt32_BytesConsumed_Variabl uint value; int bytesConsumed; PrimitiveParser.InvariantUtf8.TryParseUInt32(utf8ByteStar, utf8ByteArray.Length, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -266,11 +266,11 @@ private unsafe static void PrimitiveParserByteSpanToUInt32(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; PrimitiveParser.InvariantUtf8.TryParseUInt32(utf8ByteSpan, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -290,12 +290,12 @@ private unsafe static void PrimitiveParserByteSpanToUInt32_VariableLength() { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ReadOnlySpan utf8ByteSpan = utf8ByteArray[i % textLength]; uint value; PrimitiveParser.InvariantUtf8.TryParseUInt32(utf8ByteSpan, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -313,12 +313,12 @@ private unsafe static void PrimitiveParserByteSpanToUInt32_BytesConsumed(string { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; int bytesConsumed; PrimitiveParser.InvariantUtf8.TryParseUInt32(utf8ByteSpan, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -337,13 +337,13 @@ private unsafe static void PrimitiveParserByteSpanToUInt32_BytesConsumed_Variabl { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ReadOnlySpan utf8ByteSpan = utf8ByteArray[i % textLength]; uint value; int bytesConsumed; PrimitiveParser.InvariantUtf8.TryParseUInt32(utf8ByteSpan, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -363,11 +363,11 @@ private unsafe static void PrimitiveParserByteStarToUInt32Hex(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt32(utf8ByteStar, length, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -387,14 +387,14 @@ private unsafe static void PrimitiveParserByteStarToUInt32Hex_VariableLength() { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { byte[] utf8ByteArray = byteArrayList[i % 8]; fixed (byte* utf8ByteStar = utf8ByteArray) { uint value; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt32(utf8ByteStar, utf8ByteArray.Length, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -415,12 +415,12 @@ private unsafe static void PrimitiveParserByteStarToUInt32Hex_BytesConsumed(stri { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; int bytesConsumed; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt32(utf8ByteStar, length, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -440,7 +440,7 @@ private unsafe static void PrimitiveParserByteStarToUInt32Hex_BytesConsumed_Vari { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { byte[] utf8ByteArray = byteArrayList[i % 8]; fixed (byte* utf8ByteStar = utf8ByteArray) @@ -448,7 +448,7 @@ private unsafe static void PrimitiveParserByteStarToUInt32Hex_BytesConsumed_Vari uint value; int bytesConsumed; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt32(utf8ByteStar, utf8ByteArray.Length, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -467,11 +467,11 @@ private unsafe static void PrimitiveParserByteSpanToUInt32Hex(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt32(utf8ByteSpan, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -490,12 +490,12 @@ private unsafe static void PrimitiveParserByteSpanToUInt32Hex_VariableLength() { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ReadOnlySpan utf8ByteSpan = utf8ByteArray[i % textLength]; uint value; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt32(utf8ByteSpan, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -513,12 +513,12 @@ private unsafe static void PrimitiveParserByteSpanToUInt32Hex_BytesConsumed(stri { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { uint value; int bytesConsumed; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt32(utf8ByteSpan, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -537,13 +537,13 @@ private unsafe static void PrimitiveParserByteSpanToUInt32Hex_BytesConsumed_Vari { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ReadOnlySpan utf8ByteSpan = utf8ByteArray[i % textLength]; uint value; int bytesConsumed; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt32(utf8ByteSpan, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } diff --git a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserUInt64PerfTests.cs b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserUInt64PerfTests.cs index e5e93a51114..04c3a9d2661 100644 --- a/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserUInt64PerfTests.cs +++ b/tests/System.Text.Primitives.Tests/Parsing/PrimitiveParserUInt64PerfTests.cs @@ -20,11 +20,11 @@ private static void BaselineSimpleByteStarToUInt64(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ulong value; ulong.TryParse(text, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -40,11 +40,11 @@ private static void BaselineByteStarToUInt64(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ulong value; ulong.TryParse(text, NumberStyles.None, CultureInfo.InvariantCulture, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -60,11 +60,11 @@ private static void BaselineByteStarToUInt64Hex(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ulong value; ulong.TryParse(text, NumberStyles.HexNumber, CultureInfo.InvariantCulture, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -84,11 +84,11 @@ private unsafe static void PrimitiveParserByteStarToUInt64(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ulong value; PrimitiveParser.InvariantUtf8.TryParseUInt64(utf8ByteStar, length, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -109,12 +109,12 @@ private unsafe static void PrimitiveParserByteStarToUInt64_BytesConsumed(string { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ulong value; int bytesConsumed; PrimitiveParser.InvariantUtf8.TryParseUInt64(utf8ByteStar, length, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -133,11 +133,11 @@ private unsafe static void PrimitiveParserByteSpanToUInt64(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ulong value; PrimitiveParser.InvariantUtf8.TryParseUInt64(utf8ByteSpan, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -155,12 +155,12 @@ private unsafe static void PrimitiveParserByteSpanToUInt64_BytesConsumed(string { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ulong value; int bytesConsumed; PrimitiveParser.InvariantUtf8.TryParseUInt64(utf8ByteSpan, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -180,11 +180,11 @@ private unsafe static void PrimitiveParserByteStarToUInt64Hex(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ulong value; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt64(utf8ByteStar, length, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -205,12 +205,12 @@ private unsafe static void PrimitiveParserByteStarToUInt64Hex_BytesConsumed(stri { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ulong value; int bytesConsumed; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt64(utf8ByteStar, length, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } @@ -229,11 +229,11 @@ private unsafe static void PrimitiveParserByteSpanToUInt64Hex(string text) { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ulong value; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt64(utf8ByteSpan, out value); - DoNotIgnore(value, 0); + TestHelper.DoNotIgnore(value, 0); } } } @@ -251,12 +251,12 @@ private unsafe static void PrimitiveParserByteSpanToUInt64Hex_BytesConsumed(stri { using (iteration.StartMeasurement()) { - for (int i = 0; i < LoadIterations; i++) + for (int i = 0; i < TestHelper.LoadIterations; i++) { ulong value; int bytesConsumed; PrimitiveParser.InvariantUtf8.Hex.TryParseUInt64(utf8ByteSpan, out value, out bytesConsumed); - DoNotIgnore(value, bytesConsumed); + TestHelper.DoNotIgnore(value, bytesConsumed); } } } diff --git a/tests/System.Text.Primitives.Tests/TestHelper.cs b/tests/System.Text.Primitives.Tests/TestHelper.cs index 0f2e3a103b3..d53e504247b 100644 --- a/tests/System.Text.Primitives.Tests/TestHelper.cs +++ b/tests/System.Text.Primitives.Tests/TestHelper.cs @@ -6,11 +6,41 @@ using Xunit; using System.Runtime.InteropServices; using System.Threading; +using System.Runtime.CompilerServices; namespace System.Text.Primitives.Tests { public static class TestHelper { + public const int LoadIterations = 30000; + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void DoNotIgnore(uint value, int consumed) + { + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void DoNotIgnore(ulong value, int consumed) + { + } + + [MethodImpl(MethodImplOptions.NoInlining)] + public static void DoNotIgnore(int value, int consumed) + { + } + + public static void PrintTestName(string testString, [CallerMemberName] string testName = "") + { + if (testString != null) + { + Console.WriteLine("{0} called with test string \"{1}\".", testName, testString); + } + else + { + Console.WriteLine("{0} called with no test string.", testName); + } + } + public static string SpanToString(Span span, SymbolTable symbolTable = null) { if (symbolTable == null || symbolTable == SymbolTable.InvariantUtf8) From e89a809eadc75430eac7445fdf5a597da30c4bcd Mon Sep 17 00:00:00 2001 From: Robert Anderson Date: Mon, 26 Jun 2017 14:08:02 -0700 Subject: [PATCH 13/16] Patch to dotnet install scripts for downloading via new blob URL. (#1632) --- scripts/install-dotnet.ps1 | 2 +- scripts/install-dotnet.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/install-dotnet.ps1 b/scripts/install-dotnet.ps1 index 15cfb72fcca..92f1e58ea8c 100644 --- a/scripts/install-dotnet.ps1 +++ b/scripts/install-dotnet.ps1 @@ -253,7 +253,7 @@ function Get-Download-Link([string]$AzureFeed, [string]$Channel, [string]$Specif $PayloadURL = "$AzureFeed/Runtime/$SpecificVersion/dotnet-runtime-$SpecificVersion-win-$CLIArchitecture.zip" } else { - $PayloadURL = "$AzureFeed/Sdk/$SpecificVersion/dotnet-dev-$SpecificVersion-win-$CLIArchitecture.zip" + $PayloadURL = "$AzureFeed/Sdk/$SpecificVersion/dotnet-sdk-$SpecificVersion-win-$CLIArchitecture.zip" } Say-Verbose "Constructed primary payload URL: $PayloadURL" diff --git a/scripts/install-dotnet.sh b/scripts/install-dotnet.sh index 99857d7d726..15108013d8d 100755 --- a/scripts/install-dotnet.sh +++ b/scripts/install-dotnet.sh @@ -400,7 +400,7 @@ construct_download_link() { if [ "$shared_runtime" = true ]; then download_link="$azure_feed/Runtime/$specific_version/dotnet-runtime-$specific_version-$osname-$normalized_architecture.tar.gz" else - download_link="$azure_feed/Sdk/$specific_version/dotnet-dev-$specific_version-$osname-$normalized_architecture.tar.gz" + download_link="$azure_feed/Sdk/$specific_version/dotnet-sdk-$specific_version-$osname-$normalized_architecture.tar.gz" fi echo "$download_link" From f4c8e8315f3fab567f256828dcc8a06ffd7b004f Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Mon, 26 Jun 2017 14:16:48 -0700 Subject: [PATCH 14/16] Removing text.Length cache and changing to unsigned comparison --- .../System/Text/Parsing/InvariantSigned.cs | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs index 70abfdee0c2..14cbb0e4fe3 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/InvariantSigned.cs @@ -901,8 +901,7 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value) public static bool TryParseInt32(ReadOnlySpan text, out int value, out int bytesConsumed) { - int textLength = text.Length; - if (textLength < 1) goto FalseExit; + if (text.Length < 1) goto FalseExit; int sign = 1; int index = 0; @@ -911,13 +910,13 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int { sign = -1; index++; - if (index >= textLength) goto FalseExit; + if ((uint)index >= (uint)text.Length) goto FalseExit; num = text[index]; } else if (num == '+') { index++; - if (index >= textLength) goto FalseExit; + if ((uint)index >= (uint)text.Length) goto FalseExit; num = text[index]; } @@ -930,7 +929,7 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int do { index++; - if (index >= textLength) goto Done; + if ((uint)index >= (uint)text.Length) goto Done; num = text[index]; } while (num == '0'); if (!IsDigit(num)) goto Done; @@ -939,64 +938,66 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int answer = num - '0'; index++; - if (index >= textLength) goto Done; + if ((uint)index >= (uint)text.Length) goto Done; num = text[index]; if (!IsDigit(num)) goto Done; index++; answer = 10 * answer + num - '0'; - if (index >= textLength) goto Done; + if ((uint)index >= (uint)text.Length) goto Done; num = text[index]; if (!IsDigit(num)) goto Done; index++; answer = 10 * answer + num - '0'; - if (index >= textLength) goto Done; + if ((uint)index >= (uint)text.Length) goto Done; num = text[index]; if (!IsDigit(num)) goto Done; index++; answer = 10 * answer + num - '0'; - if (index >= textLength) goto Done; + if ((uint)index >= (uint)text.Length) goto Done; num = text[index]; if (!IsDigit(num)) goto Done; index++; answer = 10 * answer + num - '0'; - if (index >= textLength) goto Done; + if ((uint)index >= (uint)text.Length) goto Done; num = text[index]; if (!IsDigit(num)) goto Done; index++; answer = 10 * answer + num - '0'; - if (index >= textLength) goto Done; + if ((uint)index >= (uint)text.Length) goto Done; num = text[index]; if (!IsDigit(num)) goto Done; index++; answer = 10 * answer + num - '0'; - if (index >= textLength) goto Done; + if ((uint)index >= (uint)text.Length) goto Done; num = text[index]; if (!IsDigit(num)) goto Done; index++; answer = 10 * answer + num - '0'; - if (index >= textLength) goto Done; + if ((uint)index >= (uint)text.Length) goto Done; num = text[index]; if (!IsDigit(num)) goto Done; index++; answer = 10 * answer + num - '0'; // Potential overflow - if (index >= textLength) goto Done; + if ((uint)index >= (uint)text.Length) goto Done; num = text[index]; if (!IsDigit(num)) goto Done; - long lAnswer = (long)answer * 10 + num - '0'; + if (answer > Int32.MaxValue / 10 + 1) goto FalseExit; // Overflow + answer = answer * 10 + num - '0'; - if (lAnswer > (long)Int32.MaxValue + (-1 * sign + 1) / 2) goto FalseExit; - answer = (int)lAnswer; + // if sign < 0, (-1 * sign + 1) / 2 = 1 + // else, (-1 * sign + 1) / 2 = 0 + if ((uint)answer > (uint)Int32.MaxValue + (-1 * sign + 1) / 2) goto FalseExit; // Overflow index++; - if (index >= textLength) goto Done; + if ((uint)index >= (uint)text.Length) goto Done; if (!IsDigit(text[index])) goto Done; // Guaranteed overflow From b7ce27518dccb6146eb5caf21d218bf942895ee8 Mon Sep 17 00:00:00 2001 From: ahsonkhan Date: Mon, 26 Jun 2017 16:51:54 -0700 Subject: [PATCH 15/16] Adding comment and removing unnecessary math operations using 0 (D0) --- .../System/Text/Encoding/SymbolTable.Symbol.cs | 1 + src/System.Text.Primitives/System/Text/Parsing/Signed.cs | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/System.Text.Primitives/System/Text/Encoding/SymbolTable.Symbol.cs b/src/System.Text.Primitives/System/Text/Encoding/SymbolTable.Symbol.cs index 0511c8bb59c..7e6e317469c 100644 --- a/src/System.Text.Primitives/System/Text/Encoding/SymbolTable.Symbol.cs +++ b/src/System.Text.Primitives/System/Text/Encoding/SymbolTable.Symbol.cs @@ -5,6 +5,7 @@ namespace System.Text { public partial class SymbolTable { + // Do not change the specific enum values without careful consideration of the impacts to the parsers. public enum Symbol : ushort { D0 = (ushort)0, D1 = (ushort)1, diff --git a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs index 3c6919ea9f5..67040c462cf 100644 --- a/src/System.Text.Primitives/System/Text/Parsing/Signed.cs +++ b/src/System.Text.Primitives/System/Text/Parsing/Signed.cs @@ -371,7 +371,7 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int { do { - answer = answer * 10 + (int)symbol - (int)SymbolTable.Symbol.D0; + answer = answer * 10 + (int)symbol; index += consumed; if (index >= textLength) goto Done; if (!symbolTable.TryParse(text.Slice(index), out symbol, out consumed)) goto Done; @@ -381,14 +381,13 @@ public static bool TryParseInt32(ReadOnlySpan text, out int value, out int { do { - answer = answer * 10 + (int)symbol - (int)SymbolTable.Symbol.D0; + answer = answer * 10 + (int)symbol; index += consumed; if (index - firstNonZeroDigitIndex == (Int32OverflowLength - 1) * numBytes) { if (!symbolTable.TryParse(text.Slice(index), out symbol, out consumed)) goto Done; if (IsValid(symbol)) { - symbol -= (int)SymbolTable.Symbol.D0; if (WillOverFlow(answer, (int)symbol, sign)) goto FalseExit; answer = answer * 10 + (int)symbol; index += consumed; From 1a329f4ca844036dfc144d5fd5693586100101b9 Mon Sep 17 00:00:00 2001 From: Denys Tsomenko Date: Tue, 27 Jun 2017 03:00:36 +0300 Subject: [PATCH 16/16] Api brotli changes (#1621) * CaterburyPerf * flush/compress * huge changes * space * small * flush/compress * huge changes * space * small * unsaved chagnes * test fix depends on APIchanges * resolve issues * Less alocation, change State * issue * remove unnecessary * resolve * clean up changes, change state * resolve * guidelines * fix bug * resolve issues * change access * small issues --- .../Resources/BrotliEx.Designer.cs | 9 + .../Resources/BrotliEx.resx | 3 + .../System/IO/Compression/Brotli.cs | 214 +++++++++++++++--- .../System/IO/Compression/BrotliNative.cs | 25 +- .../System/IO/Compression/BrotliStream.cs | 214 +++++------------- .../System/Interop/Interop.Brotli.cs | 8 - ...anceTests.cs => BrotliPerformanceTests.cs} | 14 +- .../BrotliPrimitivesTests.cs | 36 +-- .../BrotliStreamTests.cs | 9 +- 9 files changed, 295 insertions(+), 237 deletions(-) rename tests/System.IO.Compression.Tests/{BrotliPerfomanceTests.cs => BrotliPerformanceTests.cs} (92%) diff --git a/src/System.IO.Compression.Brotli/Resources/BrotliEx.Designer.cs b/src/System.IO.Compression.Brotli/Resources/BrotliEx.Designer.cs index 71486d0f038..dd7271e7315 100644 --- a/src/System.IO.Compression.Brotli/Resources/BrotliEx.Designer.cs +++ b/src/System.IO.Compression.Brotli/Resources/BrotliEx.Designer.cs @@ -97,6 +97,15 @@ internal static string InvalidArgument { } } + /// + /// Looks up a localized string similar to Mode change is not permitted . + /// + internal static string InvalidModeChange { + get { + return ResourceManager.GetString("InvalidModeChange", resourceCulture); + } + } + /// /// Looks up a localized string similar to Quality and WindowSize is ambitious for Decompress mode. /// diff --git a/src/System.IO.Compression.Brotli/Resources/BrotliEx.resx b/src/System.IO.Compression.Brotli/Resources/BrotliEx.resx index 6026a452418..d7cdc32491a 100644 --- a/src/System.IO.Compression.Brotli/Resources/BrotliEx.resx +++ b/src/System.IO.Compression.Brotli/Resources/BrotliEx.resx @@ -159,4 +159,7 @@ Error WriteTimeout exceeded + + Mode change is not permitted + \ No newline at end of file diff --git a/src/System.IO.Compression.Brotli/System/IO/Compression/Brotli.cs b/src/System.IO.Compression.Brotli/System/IO/Compression/Brotli.cs index 3ae06a26c44..8e3c48f1816 100644 --- a/src/System.IO.Compression.Brotli/System/IO/Compression/Brotli.cs +++ b/src/System.IO.Compression.Brotli/System/IO/Compression/Brotli.cs @@ -4,15 +4,116 @@ #if BIT64 using nuint = System.UInt64; #else -using nuint = System.UInt32; + using nuint = System.UInt32; #endif namespace System.IO.Compression { public static class Brotli { - const int DefaultQuality = 11; - const int DefaultWindowSize = 24; + private const int MinWindowBits = 10; + private const int MaxWindowBits = 24; + private const int MinQuality = 0; + private const int MaxQuality = 11; + + public struct State : IDisposable + { + internal IntPtr BrotliNativeState { get; private set; } + internal BrotliDecoderResult LastDecoderResult; + public bool CompressMode { get; private set; } + + public void Dispose() + { + if (CompressMode) + { + BrotliNative.BrotliEncoderDestroyInstance(BrotliNativeState); + } + else + { + BrotliNative.BrotliDecoderDestroyInstance(BrotliNativeState); + } + } + + internal void InitializeDecoder() + { + BrotliNativeState = BrotliNative.BrotliDecoderCreateInstance(); + LastDecoderResult = BrotliDecoderResult.NeedsMoreInput; + if (BrotliNativeState == IntPtr.Zero) + { + throw new System.Exception(BrotliEx.DecoderInstanceCreate); + } + CompressMode = false; + } + + internal void InitializeEncoder() + { + + BrotliNativeState = BrotliNative.BrotliEncoderCreateInstance(); + if (BrotliNativeState == IntPtr.Zero) + { + throw new System.Exception(BrotliEx.EncoderInstanceCreate); + } + CompressMode = true; + } + + public void SetQuality(uint quality) + { + if (BrotliNativeState == IntPtr.Zero) + { + InitializeEncoder(); + } + if (quality > MaxQuality) + { + throw new ArgumentException(BrotliEx.WrongQuality); + } + BrotliNative.BrotliEncoderSetParameter(BrotliNativeState, BrotliEncoderParameter.Quality, quality); + } + + public void SetQuality() + { + SetQuality(MaxQuality); + } + + public void SetWindow(uint window) + { + if (BrotliNativeState == IntPtr.Zero) + { + InitializeEncoder(); + } + if (window - MinWindowBits > MaxWindowBits - MinWindowBits) + { + throw new ArgumentException(BrotliEx.WrongWindowSize); + } + BrotliNative.BrotliEncoderSetParameter(BrotliNativeState, BrotliEncoderParameter.LGWin, window); + } + + public void SetWindow() + { + SetWindow(MaxWindowBits); + } + } + + internal static void EnsureInitialized(ref State state, bool compress) + { + if (state.BrotliNativeState != IntPtr.Zero) + { + if (state.CompressMode != compress) + { + throw new System.Exception((BrotliEx.InvalidModeChange)); + } + return; + } + if (compress) + { + state.SetQuality(); + state.SetWindow(); + } + else + { + state.InitializeDecoder(); + state.LastDecoderResult = BrotliDecoderResult.NeedsMoreInput; + } + } public static int GetMaximumCompressedSize(int inputSize) { @@ -40,16 +141,14 @@ private static TransformationStatus GetTransformationStatusFromBrotliDecoderResu return TransformationStatus.InvalidData; } - public static TransformationStatus Compress(ReadOnlySpan source, Span destination, out int bytesConsumed, out int bytesWritten, CompressionLevel quality = (CompressionLevel)DefaultQuality, int windowSize = DefaultWindowSize, BrotliEncoderMode encMode = BrotliEncoderMode.Generic) + public static TransformationStatus FlushEncoder(ReadOnlySpan source, Span destination, out int bytesConsumed, out int bytesWritten, ref State state, bool isFinished = true) { - return Compress(source, destination, out bytesConsumed, out bytesWritten, GetQualityFromCompressionLevel(quality), windowSize, encMode); - } - - internal static TransformationStatus Compress(ReadOnlySpan source, Span destination, out int bytesConsumed, out int bytesWritten, int quality = DefaultQuality, int windowSize = DefaultWindowSize, BrotliEncoderMode encMode = BrotliEncoderMode.Generic) - { - if (quality > DefaultQuality || quality < 0) throw new System.ArgumentOutOfRangeException(BrotliEx.WrongQuality); - if (windowSize > DefaultWindowSize || windowSize <= 0) throw new System.ArgumentOutOfRangeException(BrotliEx.WrongWindowSize); - bytesConsumed = bytesWritten = 0; + EnsureInitialized(ref state, true); + BrotliEncoderOperation operation = isFinished ? BrotliEncoderOperation.Finish : BrotliEncoderOperation.Flush; + bytesWritten = destination.Length; + bytesConsumed = 0; + if (state.BrotliNativeState == IntPtr.Zero) return TransformationStatus.InvalidData; + if (BrotliNative.BrotliEncoderIsFinished(state.BrotliNativeState)) return TransformationStatus.Done; unsafe { IntPtr bufIn, bufOut; @@ -58,22 +157,64 @@ internal static TransformationStatus Compress(ReadOnlySpan source, Span 0) + { + if (BrotliNative.BrotliEncoderIsFinished(state.BrotliNativeState)) return TransformationStatus.Done; + else return TransformationStatus.DestinationTooSmall; + } + } + return TransformationStatus.Done; + } + + public static TransformationStatus Compress(ReadOnlySpan source, Span destination, out int bytesConsumed, out int bytesWritten, ref State state) + { + EnsureInitialized(ref state, true); + bytesWritten = destination.Length; + bytesConsumed = source.Length; + unsafe + { + IntPtr bufIn, bufOut; + while (bytesConsumed > 0) + { + fixed (byte* inBytes = &source.DangerousGetPinnableReference()) + fixed (byte* outBytes = &destination.DangerousGetPinnableReference()) + { + bufIn = new IntPtr(inBytes); + bufOut = new IntPtr(outBytes); + nuint availableOutput = (nuint)bytesWritten; + nuint consumed = (nuint)bytesConsumed; + if (!BrotliNative.BrotliEncoderCompressStream(state.BrotliNativeState, BrotliEncoderOperation.Process, ref consumed, ref bufIn, ref availableOutput, ref bufOut, out nuint totalOut)) + { + return TransformationStatus.InvalidData; + }; + bytesConsumed = (int)consumed; + bytesWritten = destination.Length - (int)availableOutput; + if (availableOutput != (nuint)destination.Length) + { + return TransformationStatus.DestinationTooSmall; + } + } } + return TransformationStatus.Done; } } - public static TransformationStatus Decompress(ReadOnlySpan source, Span destination, out int bytesConsumed, out int bytesWritten) + public static TransformationStatus Decompress(ReadOnlySpan source, Span destination, out int bytesConsumed, out int bytesWritten, ref State state) { - bytesConsumed = bytesWritten = 0; + EnsureInitialized(ref state, false); + bytesConsumed = source.Length; + bytesWritten = destination.Length; + if (BrotliNative.BrotliDecoderIsFinished(state.BrotliNativeState)) return TransformationStatus.Done; unsafe { IntPtr bufIn, bufOut; @@ -82,17 +223,34 @@ public static TransformationStatus Decompress(ReadOnlySpan source, Span - /// Generic - Default (Compressor does not know anything in advance properties of the input) - /// Text - For UTF-8 formatted text input - /// Font - Mode used in WOFF 2.0 - /// - public enum BrotliEncoderMode - { - Generic, - Text, - Font - }; - /// /// This class provides declaration for constants and PInvokes as well as some basic tools for exposing the /// native Brotli library to managed code. @@ -78,12 +66,6 @@ internal class BrotliNative #region Encoder - public static bool BrotliEncoderCompress(int quality, int windowSize, BrotliEncoderMode mode, nuint inputSize, - IntPtr inputBuffer, ref nuint encodedSize, IntPtr encodedBuffer) - { - return Interop.Brotli.BrotliEncoderCompress(quality, windowSize, mode, inputSize, inputBuffer, ref encodedSize, encodedBuffer); - } - public static IntPtr BrotliEncoderCreateInstance() { return Interop.Brotli.BrotliEncoderCreateInstance(IntPtr.Zero, IntPtr.Zero, IntPtr.Zero); @@ -126,11 +108,6 @@ public static UInt32 BrotliEncoderVersion() #region Decoder - public static BrotliDecoderResult BrotliDecoderDecompress(ref nuint availableIn, IntPtr nextIn, ref nuint availableOut, IntPtr nextOut) - { - return Interop.Brotli.BrotliDecoderDecompress(ref availableIn, nextIn, ref availableOut, nextOut); - } - public static IntPtr BrotliDecoderCreateInstance() { return Interop.Brotli.BrotliDecoderCreateInstance(IntPtr.Zero, IntPtr.Zero, IntPtr.Zero); diff --git a/src/System.IO.Compression.Brotli/System/IO/Compression/BrotliStream.cs b/src/System.IO.Compression.Brotli/System/IO/Compression/BrotliStream.cs index ddbf1987179..56f03c9b416 100644 --- a/src/System.IO.Compression.Brotli/System/IO/Compression/BrotliStream.cs +++ b/src/System.IO.Compression.Brotli/System/IO/Compression/BrotliStream.cs @@ -1,34 +1,34 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; +using System.Buffers; +using System.ComponentModel; +using System.Diagnostics; +using System.IO; using System.IO.Compression.Resources; -using System.Runtime.InteropServices; #if BIT64 using nuint = System.UInt64; #else -using nuint = System.UInt32; + using nuint = System.UInt32; #endif namespace System.IO.Compression { public partial class BrotliStream : Stream { - private const int DefaultBufferSize = (1 << 16) - 1; + private const int DefaultBufferSize = (1 << 16) - 16; //65520 private int _bufferSize; private Stream _stream; private CompressionMode _mode; - private nuint _availableOutput; - private IntPtr _nextOutput = IntPtr.Zero; - private nuint _availableInput; - private IntPtr _nextInput = IntPtr.Zero; - private IntPtr _bufferInput; - private IntPtr _bufferOutput; + private int _availableOutput; + private int _availableInput; + private byte[] _buffer; private bool _leaveOpen; private int totalWrote; - private int _readOffset = 0; - Decoder _decoder; - Encoder _encoder; + private Brotli.State _state; + private TransformationStatus _transformationResult; public override bool CanTimeout => true; @@ -38,26 +38,18 @@ public partial class BrotliStream : Stream public BrotliStream(Stream baseStream, CompressionMode mode, bool leaveOpen, int bufferSize, CompressionLevel quality) : this(baseStream, mode, leaveOpen, bufferSize) { - if (_mode == CompressionMode.Decompress) - { - throw new System.IO.IOException(BrotliEx.QualityAndWinSize); - } - else + if (_mode == CompressionMode.Compress) { - _encoder.SetQuality((uint)Brotli.GetQualityFromCompressionLevel(quality)); + _state.SetQuality((uint)Brotli.GetQualityFromCompressionLevel(quality)); } } public BrotliStream(Stream baseStream, CompressionMode mode, bool leaveOpen, int bufferSize, CompressionLevel quality, uint windowSize) : this(baseStream, mode, leaveOpen, bufferSize) { - if (_mode == CompressionMode.Decompress) - { - throw new System.IO.IOException(BrotliEx.QualityAndWinSize); - } - else + if (_mode == CompressionMode.Compress) { - _encoder.SetQuality((uint)Brotli.GetQualityFromCompressionLevel(quality)); - _encoder.SetWindow(windowSize); + _state.SetQuality((uint)Brotli.GetQualityFromCompressionLevel(quality)); + _state.SetWindow(windowSize); } } @@ -67,27 +59,24 @@ public BrotliStream(Stream baseStream, CompressionMode mode, bool leaveOpen = fa { throw new ArgumentNullException("baseStream"); } + _bufferSize = bufferSize; _mode = mode; _stream = baseStream; _leaveOpen = leaveOpen; + _state = new Brotli.State(); if (_mode == CompressionMode.Compress) { - _encoder = new Encoder(); - _encoder.SetQuality(); - _encoder.SetWindow(); + _state.SetQuality(); + _state.SetWindow(); WriteTimeout = 0; } else { - _decoder = new Decoder(); ReadTimeout = 0; } - _bufferSize = bufferSize; - _bufferInput = Marshal.AllocHGlobal(_bufferSize); - _bufferOutput = Marshal.AllocHGlobal(_bufferSize); - _nextInput = _bufferInput; - _nextOutput = _bufferOutput; - _availableOutput = (nuint)_bufferSize; + _buffer = new byte[_bufferSize]; + _transformationResult = TransformationStatus.NeedMoreSourceData; + _availableOutput = _bufferSize; } public override bool CanRead @@ -134,26 +123,19 @@ public override long Position protected virtual void FlushEncoder(bool finished) { - if (_encoder.State == IntPtr.Zero) return; - if (BrotliNative.BrotliEncoderIsFinished(_encoder.State)) return; - BrotliEncoderOperation op = finished ? BrotliEncoderOperation.Finish : BrotliEncoderOperation.Flush; - nuint totalOut = 0; - while (true) + if (_state.BrotliNativeState == IntPtr.Zero) return; + if (BrotliNative.BrotliEncoderIsFinished(_state.BrotliNativeState)) return; + TransformationStatus flushStatus = TransformationStatus.DestinationTooSmall; + while (flushStatus == TransformationStatus.DestinationTooSmall) { - if (!BrotliNative.BrotliEncoderCompressStream(_encoder.State, op, ref _availableInput, ref _nextInput, ref _availableOutput, ref _nextOutput, out totalOut)) - throw new System.IO.IOException(BrotliEx.unableEncode); - var extraData = (nuint)_availableOutput != (nuint)_bufferSize; - if (extraData) + flushStatus = Brotli.FlushEncoder(Array.Empty(), _buffer, out _availableInput, out _availableOutput, ref _state, finished); + _stream.Write(_buffer, 0, _availableOutput); + _availableOutput = _bufferSize; + + if (BrotliNative.BrotliEncoderIsFinished(_state.BrotliNativeState)) { - var bytesWrote = (int)((nuint)_bufferSize - (nuint)_availableOutput); - Byte[] buf = new Byte[bytesWrote]; - Marshal.Copy(_bufferOutput, buf, 0, bytesWrote); - _stream.Write(buf, 0, bytesWrote); - _availableOutput = (nuint)_bufferSize; - _nextOutput = _bufferOutput; + break; } - if (BrotliNative.BrotliEncoderIsFinished(_encoder.State)) break; - if (!extraData) break; } } @@ -165,28 +147,14 @@ protected override void Dispose(bool disposing) } try { - if (_bufferInput != IntPtr.Zero) Marshal.FreeHGlobal(_bufferInput); - if (_bufferOutput != IntPtr.Zero) Marshal.FreeHGlobal(_bufferOutput); - _bufferInput = IntPtr.Zero; - _bufferOutput = IntPtr.Zero; if (disposing && !_leaveOpen) _stream?.Dispose(); } finally { _stream = null; - try - { - _decoder?.Dispose(); - _encoder?.Dispose(); - } - finally - { - _encoder = null; - _decoder = null; - } + _state.Dispose(); base.Dispose(disposing); } - } public override void Flush() @@ -231,82 +199,31 @@ public override int Read(byte[] buffer, int offset, int count) ValidateParameters(buffer, offset, count); EnsureNotDisposed(); DateTime begin = DateTime.Now; - int bytesRead = (int)(_decoder.BufferStream.Length - _readOffset); - nuint totalCount = 0; - bool endOfStream = false; - bool errorDetected = false; - Byte[] buf = new Byte[_bufferSize]; - while (bytesRead < count) + _availableOutput = 0; + TimeSpan ExecutionTime = DateTime.Now - begin; + if (ReadTimeout > 0 && ExecutionTime.TotalMilliseconds >= ReadTimeout) { - TimeSpan ExecutionTime = DateTime.Now - begin; - if (ReadTimeout > 0 && ExecutionTime.TotalMilliseconds >= ReadTimeout) - { - throw new TimeoutException(BrotliEx.TimeoutRead); - } - while (true) + throw new TimeoutException(BrotliEx.TimeoutRead); + } + while (true) + { + if (_transformationResult == TransformationStatus.NeedMoreSourceData) { - if (_decoder.LastDecoderResult == BrotliDecoderResult.NeedsMoreInput) - { - _availableInput = (nuint)_stream.Read(buf, 0, (int)_bufferSize); - _nextInput = _bufferInput; - if ((int)_availableInput <= 0) - { - endOfStream = true; - break; - } - Marshal.Copy(buf, 0, _bufferInput, (int)_availableInput); - } - else if (_decoder.LastDecoderResult == BrotliDecoderResult.NeedsMoreOutput) - { - Marshal.Copy(_bufferOutput, buf, 0, _bufferSize); - _decoder.BufferStream.Write(buf, 0, _bufferSize); - bytesRead += _bufferSize; - _availableOutput = (nuint)_bufferSize; - _nextOutput = _bufferOutput; - } - else + _availableInput = _stream.Read(_buffer, 0, _bufferSize); + if ((int)_availableInput <= 0) { - //Error or OK - endOfStream = true; break; } - _decoder.LastDecoderResult = BrotliNative.BrotliDecoderDecompressStream(_decoder.State, ref _availableInput, ref _nextInput, - ref _availableOutput, ref _nextOutput, out totalCount); - if (bytesRead >= count) break; } - if (endOfStream && !BrotliNative.BrotliDecoderIsFinished(_decoder.State)) + else if (_transformationResult != TransformationStatus.DestinationTooSmall) { - errorDetected = true; + break; } - if (_decoder.LastDecoderResult == BrotliDecoderResult.Error || errorDetected) + _transformationResult = Brotli.Decompress(_buffer, buffer, out _availableInput, out _availableOutput, ref _state); + if (_availableOutput != 0) { - var error = BrotliNative.BrotliDecoderGetErrorCode(_decoder.State); - var text = BrotliNative.BrotliDecoderErrorString(error); - throw new System.IO.IOException(text + BrotliEx.unableDecode); + return _availableOutput; } - if (endOfStream && !BrotliNative.BrotliDecoderIsFinished(_decoder.State) && _decoder.LastDecoderResult == BrotliDecoderResult.NeedsMoreInput) - { - throw new System.IO.IOException(BrotliEx.FinishDecompress); - } - if (endOfStream && _nextOutput != _bufferOutput) - { - int remainBytes = (int)(_nextOutput.ToInt64() - _bufferOutput.ToInt64()); - bytesRead += remainBytes; - Marshal.Copy(_bufferOutput, buf, 0, remainBytes); - _decoder.BufferStream.Write(buf, 0, remainBytes); - _nextOutput = _bufferOutput; - } - if (endOfStream) break; - } - if (_decoder.BufferStream.Length - _readOffset >= count || endOfStream) - { - _decoder.BufferStream.Seek(_readOffset, SeekOrigin.Begin); - var bytesToRead = (int)(_decoder.BufferStream.Length - _readOffset); - if (bytesToRead > count) bytesToRead = count; - _decoder.BufferStream.Read(buffer, offset, bytesToRead); - _decoder.RemoveBytes(_readOffset + bytesToRead); - _readOffset = 0; - return bytesToRead; } return 0; } @@ -330,7 +247,6 @@ public override void Write(byte[] buffer, int offset, int count) if (_mode != CompressionMode.Compress) totalWrote += count; DateTime begin = DateTime.Now; - nuint totalOut = 0; int bytesRemain = count; int currentOffset = offset; int copyLen; @@ -342,27 +258,19 @@ public override void Write(byte[] buffer, int offset, int count) throw new TimeoutException(BrotliEx.TimeoutWrite); } copyLen = bytesRemain > _bufferSize ? _bufferSize : bytesRemain; - Marshal.Copy(buffer, currentOffset, _bufferInput, copyLen); - bytesRemain -= copyLen; - currentOffset += copyLen; - _availableInput = (nuint)copyLen; - _nextInput = _bufferInput; - while ((int)_availableInput > 0) + Span bufferInput = new Span(buffer, currentOffset, copyLen); + _transformationResult = TransformationStatus.DestinationTooSmall; + _transformationResult = Brotli.Compress(bufferInput, _buffer, out _availableInput, out _availableOutput, ref _state); + if (_transformationResult == TransformationStatus.InvalidData) { - if (!BrotliNative.BrotliEncoderCompressStream(_encoder.State, BrotliEncoderOperation.Process, ref _availableInput, ref _nextInput, ref _availableOutput, - ref _nextOutput, out totalOut)) throw new System.IO.IOException(BrotliEx.unableEncode); - - if (_availableOutput != (nuint)_bufferSize) - { - var bytesWrote = (int)((nuint)_bufferSize - _availableOutput); - Byte[] buf = new Byte[bytesWrote]; - Marshal.Copy(_bufferOutput, buf, 0, bytesWrote); - _stream.Write(buf, 0, bytesWrote); - _availableOutput = (nuint)_bufferSize; - _nextOutput = _bufferOutput; - } + throw new System.Exception(BrotliEx.unableEncode); + } + if (_transformationResult == TransformationStatus.DestinationTooSmall) + { + _stream.Write(_buffer, 0, _availableOutput); } - if (BrotliNative.BrotliEncoderIsFinished(_encoder.State)) break; + bytesRemain -= copyLen; + currentOffset += copyLen; } } } diff --git a/src/System.IO.Compression.Brotli/System/Interop/Interop.Brotli.cs b/src/System.IO.Compression.Brotli/System/Interop/Interop.Brotli.cs index cee29ccfdd7..77ef5581290 100644 --- a/src/System.IO.Compression.Brotli/System/Interop/Interop.Brotli.cs +++ b/src/System.IO.Compression.Brotli/System/Interop/Interop.Brotli.cs @@ -20,10 +20,6 @@ internal static partial class Brotli #region Encoder - [DllImport(LibNameEncoder, CallingConvention = CallingConvention.Cdecl)] - internal static extern bool BrotliEncoderCompress(int quality, int lgwin, BrotliEncoderMode mode, nuint input_size, - IntPtr input_buffer, ref nuint encoded_size, IntPtr encoded_buffer); - [DllImport(LibNameEncoder, CallingConvention = CallingConvention.Cdecl)] internal static extern IntPtr BrotliEncoderCreateInstance(IntPtr allocFunc, IntPtr freeFunc, IntPtr opaque); @@ -51,10 +47,6 @@ internal static extern bool BrotliEncoderCompressStream( #region Decoder - [DllImport(LibNameDecoder, CallingConvention = CallingConvention.Cdecl)] - internal static extern BrotliDecoderResult BrotliDecoderDecompress(ref nuint availableIn, IntPtr nextIn, - ref nuint availableOut, IntPtr nextOut); - [DllImport(LibNameDecoder, CallingConvention = CallingConvention.Cdecl)] internal static extern IntPtr BrotliDecoderCreateInstance(IntPtr allocFunc, IntPtr freeFunc, IntPtr opaque); diff --git a/tests/System.IO.Compression.Tests/BrotliPerfomanceTests.cs b/tests/System.IO.Compression.Tests/BrotliPerformanceTests.cs similarity index 92% rename from tests/System.IO.Compression.Tests/BrotliPerfomanceTests.cs rename to tests/System.IO.Compression.Tests/BrotliPerformanceTests.cs index 36293b781d4..e328c24f581 100644 --- a/tests/System.IO.Compression.Tests/BrotliPerfomanceTests.cs +++ b/tests/System.IO.Compression.Tests/BrotliPerformanceTests.cs @@ -6,7 +6,7 @@ namespace System.IO.Compression.Tests { - public class BrotliPerfomanceTests + public class BrotliPerformanceTests { private const int Iter = 1000; @@ -71,9 +71,16 @@ public void Decompress(Util.CompressionType type) byte[] data = File.ReadAllBytes(testFilePath); var bytes = new byte[bufferSize]; foreach (var iteration in Benchmark.Iterations) + { using (iteration.StartMeasurement()) + { for (int i = 0; i < Benchmark.InnerIterationCount; i++) - Brotli.Decompress(data, bytes, out int consumed, out int written); + { + Brotli.State state = new Brotli.State(); + Brotli.Decompress(data, bytes, out int consumed, out int written, ref state); + } + } + } File.Delete(testFilePath); } @@ -90,7 +97,8 @@ public void Compress(Util.CompressionType type) byte[] compressed = new byte[bytes.Length]; using (iteration.StartMeasurement()) { - Brotli.Compress(bytes, compressed, out int consumed, out int writen); + Brotli.State state = new Brotli.State(); + Brotli.Compress(bytes, compressed, out int consumed, out int writen, ref state); } } } diff --git a/tests/System.IO.Compression.Tests/BrotliPrimitivesTests.cs b/tests/System.IO.Compression.Tests/BrotliPrimitivesTests.cs index 45e1fa0ab59..a6cce082957 100644 --- a/tests/System.IO.Compression.Tests/BrotliPrimitivesTests.cs +++ b/tests/System.IO.Compression.Tests/BrotliPrimitivesTests.cs @@ -10,16 +10,6 @@ public class BrotliTests { static string brTestFile(string fileName) => Path.Combine("BrotliTestData", fileName); - [Theory] - [InlineData(25, 1)] - [InlineData(-1, 1)] - [InlineData(24, 0)] - [InlineData(24, 12)] - public void TestMethodCompressEx(CompressionLevel quality, int lgWinSize) - { - Assert.Throws(() => Brotli.Compress(new byte[1], new byte[1], out int consumed, out int written, quality, lgWinSize)); - } - [Theory(Skip = "Fails in VS - System.BadImageFormatException : An attempt was made to load a program with an incorrect format.")] [InlineData(1)] [InlineData(5)] @@ -29,19 +19,31 @@ public void RoundtripCompressDecompress(int totalSize) { byte[] data = new byte[totalSize]; new Random(42).NextBytes(data); - Span compressed = new byte[Brotli.GetMaximumCompressedSize(totalSize)]; - TransformationStatus result = Brotli.Compress(data, compressed, out int consumed, out int written); + byte[] compressed = new byte[Brotli.GetMaximumCompressedSize(totalSize)]; + Assert.NotEqual(compressed.Length, 0); + Brotli.State state = new Brotli.State(); + TransformationStatus result = Brotli.Compress(data, compressed, out int consumed, out int written, ref state); + while (consumed != 0 || result != TransformationStatus.Done) + { + result = Brotli.Compress(data, compressed, out consumed, out written, ref state); + } + byte[] flush = new byte[0]; + result = Brotli.FlushEncoder(flush, compressed, out consumed, out written, ref state); Assert.Equal(TransformationStatus.Done, result); - Assert.Equal(totalSize, consumed); - compressed = compressed.Slice(0, written); - ValidateCompressedData(compressed, data); + Assert.Equal(consumed, 0); + byte[] resultCompressed = new byte[written]; + Array.Copy(compressed, resultCompressed, written); + ValidateCompressedData(resultCompressed, data); } - private void ValidateCompressedData(Span data, byte[] expected) + private void ValidateCompressedData(byte[] data, byte[] expected) { byte[] decompressed = new byte[expected.Length]; - TransformationStatus result = Brotli.Decompress(data, decompressed, out int consumed, out int written); + Brotli.State state = new Brotli.State(); + TransformationStatus result = Brotli.Decompress(data, decompressed, out int consumed, out int written, ref state); Assert.Equal(TransformationStatus.Done, result); + Assert.Equal(expected.Length, written); + Assert.Equal(consumed, 0); Assert.Equal(expected, decompressed); } diff --git a/tests/System.IO.Compression.Tests/BrotliStreamTests.cs b/tests/System.IO.Compression.Tests/BrotliStreamTests.cs index 29ca824d693..5df15c0cbb5 100644 --- a/tests/System.IO.Compression.Tests/BrotliStreamTests.cs +++ b/tests/System.IO.Compression.Tests/BrotliStreamTests.cs @@ -214,19 +214,20 @@ public void RoundtripCompressDecompress(int chunkSize, int totalSize) { compressor.Write(data, i, chunkSize); } + compressor.Dispose(); } compressed.Position = 0; - ValidateCompressedData(chunkSize, compressed, data); + ValidateCompressedData(chunkSize, compressed.ToArray(), data); compressed.Dispose(); } - private void ValidateCompressedData(int chunkSize, MemoryStream compressed, byte[] expected) + private void ValidateCompressedData(int chunkSize, byte[] compressedData, byte[] expected) { + MemoryStream compressed = new MemoryStream(compressedData); using (MemoryStream decompressed = new MemoryStream()) using (var decompressor = new BrotliStream(compressed, CompressionMode.Decompress, true)) { - decompressor.CopyTo(decompressed, chunkSize); - + decompressor.CopyTo(decompressed, expected.Length); Assert.Equal(expected, decompressed.ToArray()); } }