Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Next Next commit
Use UTF8 encoding on Tar string fields
  • Loading branch information
jozkee authored and github-actions committed Sep 28, 2022
commit abe6dafcc6ab03ea81928f436bb9e0ffa3b5e0ed
Original file line number Diff line number Diff line change
Expand Up @@ -517,8 +517,8 @@ private void ReadVersionAttribute(Span<byte> buffer)
private void ReadPosixAndGnuSharedAttributes(Span<byte> buffer)
{
// Convert the byte arrays
_uName = TarHelpers.GetTrimmedAsciiString(buffer.Slice(FieldLocations.UName, FieldLengths.UName));
_gName = TarHelpers.GetTrimmedAsciiString(buffer.Slice(FieldLocations.GName, FieldLengths.GName));
_uName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.UName, FieldLengths.UName));
_gName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.GName, FieldLengths.GName));

// DevMajor and DevMinor only have values with character devices and block devices.
// For all other typeflags, the values in these fields are irrelevant.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,28 +358,41 @@ private void WriteAsPaxSharedInternal(Span<byte> buffer, out long actualLength)
_checksum = WriteChecksum(tmpChecksum, buffer);
}

// All formats save in the name byte array only the ASCII bytes that fit.
// All formats save in the name byte array only the UTF8 bytes that fit.
private int WriteName(Span<byte> buffer)
{
ReadOnlySpan<char> src = _name.AsSpan(0, Math.Min(_name.Length, FieldLengths.Name));
Span<byte> dest = buffer.Slice(FieldLocations.Name, FieldLengths.Name);
int encoded = Encoding.ASCII.GetBytes(src, dest);
return Checksum(dest.Slice(0, encoded));
ReadOnlySpan<char> name = _name;
int utf16NameTruncatedLength = GetUtf16TruncatedTextLength(name, FieldLengths.Name);

ReadOnlySpan<char> truncatedName = name.Slice(0, utf16NameTruncatedLength);
Span<byte> destination = buffer.Slice(FieldLocations.Name, FieldLengths.Name);
Encoding.UTF8.GetBytes(truncatedName, destination);

return Checksum(destination);
}

// Ustar and PAX save in the name byte array only the ASCII bytes that fit, and the rest of that string is saved in the prefix field.
// Ustar and PAX save in the name byte array only the UTF8 bytes that fit, and the rest of that string is saved in the prefix field.
private int WritePosixName(Span<byte> buffer)
{
int checksum = WriteName(buffer);
ReadOnlySpan<char> name = _name;
int utf16NameTruncatedLength = GetUtf16TruncatedTextLength(name, FieldLengths.Name);

if (_name.Length > FieldLengths.Name)
ReadOnlySpan<char> truncatedName = name.Slice(0, utf16NameTruncatedLength);
Span<byte> destination = buffer.Slice(FieldLocations.Name, FieldLengths.Name);
Encoding.UTF8.GetBytes(truncatedName, destination);

int checksum = Checksum(destination);

if (utf16NameTruncatedLength < name.Length)
{
int prefixBytesLength = Math.Min(_name.Length - FieldLengths.Name, FieldLengths.Prefix);
Span<byte> remaining = stackalloc byte[prefixBytesLength];
int encoded = Encoding.ASCII.GetBytes(_name.AsSpan(FieldLengths.Name, prefixBytesLength), remaining);
Debug.Assert(encoded == remaining.Length);
ReadOnlySpan<char> prefix = name.Slice(utf16NameTruncatedLength);
int utf16PrefixTruncatedLength = GetUtf16TruncatedTextLength(prefix, FieldLengths.Prefix);

destination = buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix);
ReadOnlySpan<char> truncatedPrefix = prefix.Slice(0, utf16PrefixTruncatedLength);
Encoding.UTF8.GetBytes(truncatedPrefix, destination);

checksum += WriteLeftAlignedBytesAndGetChecksum(remaining, buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix));
checksum += Checksum(destination);
}

return checksum;
Expand Down Expand Up @@ -423,7 +436,7 @@ private int WriteCommonFields(Span<byte> buffer, long actualLength, TarEntryType

if (!string.IsNullOrEmpty(_linkName))
{
checksum += WriteAsAsciiString(_linkName, buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName));
checksum += WriteAsUtf8String(_linkName, buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName));
}

return checksum;
Expand Down Expand Up @@ -467,12 +480,12 @@ private int WritePosixAndGnuSharedFields(Span<byte> buffer)

if (!string.IsNullOrEmpty(_uName))
{
checksum += WriteAsAsciiString(_uName, buffer.Slice(FieldLocations.UName, FieldLengths.UName));
checksum += WriteAsUtf8String(_uName, buffer.Slice(FieldLocations.UName, FieldLengths.UName));
}

if (!string.IsNullOrEmpty(_gName))
{
checksum += WriteAsAsciiString(_gName, buffer.Slice(FieldLocations.GName, FieldLengths.GName));
checksum += WriteAsUtf8String(_gName, buffer.Slice(FieldLocations.GName, FieldLengths.GName));
}

if (_devMajor > 0)
Expand Down Expand Up @@ -766,10 +779,10 @@ private static int WriteAsTimestamp(DateTimeOffset timestamp, Span<byte> destina
return FormatOctal(unixTimeSeconds, destination);
}

// Writes the specified text as an ASCII string aligned to the left, and returns its checksum.
private static int WriteAsAsciiString(string str, Span<byte> buffer)
// Writes the specified text as an UTF8 string aligned to the left, and returns its checksum.
private static int WriteAsUtf8String(string str, Span<byte> buffer)
{
byte[] bytes = Encoding.ASCII.GetBytes(str);
byte[] bytes = Encoding.UTF8.GetBytes(str);
return WriteLeftAlignedBytesAndGetChecksum(bytes.AsSpan(), buffer);
}

Expand Down Expand Up @@ -819,5 +832,27 @@ private static string GenerateGlobalExtendedAttributeName(int globalExtendedAttr

return result;
}

// Returns the text's utf16 length truncated at the specified max length.
private static int GetUtf16TruncatedTextLength(ReadOnlySpan<char> text, int maxLength)
{
int utf8Length = 0;
int utf16TruncatedLength = 0;

foreach (Rune rune in text.EnumerateRunes())
{
utf8Length += rune.Utf8SequenceLength;
if (utf8Length <= maxLength)
{
utf16TruncatedLength += rune.Utf16SequenceLength;
}
else
{
break;
}
}

return utf16TruncatedLength;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
<Compile Include="TarTestsBase.Posix.cs" />
<Compile Include="TarTestsBase.Ustar.cs" />
<Compile Include="TarTestsBase.V7.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntry.Entry.Roundtrip.Tests.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntryAsync.File.Tests.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntry.Base.cs" />
<Compile Include="TarWriter\TarWriter.WriteEntryAsync.Tests.cs" />
Expand Down
114 changes: 113 additions & 1 deletion src/libraries/System.Formats.Tar/tests/TarTestsBase.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Licensed to the .NET Foundation under one or more agreements.
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Collections.Generic;
Expand Down Expand Up @@ -89,6 +89,11 @@ public abstract partial class TarTestsBase : FileCleanupTestBase
protected const string PaxEaSize = "size";
protected const string PaxEaDevMajor = "devmajor";
protected const string PaxEaDevMinor = "devminor";
internal const char OneByteCharacter = 'a';
internal const char TwoBytesCharacter = 'ö';
internal const string FourBytesCharacter = "😒";
internal const char Separator = '/';
internal const int MaxPathComponent = 255;

private static readonly string[] V7TestCaseNames = new[]
{
Expand Down Expand Up @@ -622,5 +627,112 @@ public static IEnumerable<object[]> GetPaxAndGnuTestCaseNames()
yield return new object[] { name };
}
}

private static List<string> GetPrefixes()
{
List<string> prefixes = new() { "", "/", "./", "../" };

if (OperatingSystem.IsWindows())
prefixes.Add("C:/");

return prefixes;
}

internal static IEnumerable<string> GetNamesPrefixedTestData(NameCapabilities max)
{
Assert.True(Enum.IsDefined(max));
List<string> prefixes = GetPrefixes();

foreach (string prefix in prefixes)
{
int nameLength = 100 - prefix.Length;
yield return prefix + Repeat(OneByteCharacter, nameLength);
yield return prefix + Repeat(OneByteCharacter, nameLength - 2) + Repeat(TwoBytesCharacter, 1);
yield return prefix + Repeat(OneByteCharacter, nameLength - 4) + Repeat(FourBytesCharacter, 1);
}

if (max == NameCapabilities.Name)
yield break;

// test maxed out prefix.
foreach (string prefix in prefixes)
{
int directoryLength = 155 - prefix.Length;
yield return prefix + Repeat(OneByteCharacter, directoryLength) + Separator + Repeat(OneByteCharacter, 100);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 2) + Repeat(TwoBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, 100);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 4) + Repeat(FourBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, 100);
}

if (max == NameCapabilities.NameAndPrefix)
yield break;

foreach (string prefix in prefixes)
{
int directoryLength = MaxPathComponent - prefix.Length;
yield return prefix + Repeat(OneByteCharacter, directoryLength) + Separator + Repeat(OneByteCharacter, MaxPathComponent);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 2) + Repeat(TwoBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, MaxPathComponent);
yield return prefix + Repeat(OneByteCharacter, directoryLength - 4) + Repeat(FourBytesCharacter, 1) + Separator + Repeat(OneByteCharacter, MaxPathComponent);
}
}

internal static IEnumerable<string> GetNamesNonAsciiTestData(NameCapabilities max)
{
Assert.True(Enum.IsDefined(max));

yield return Repeat(OneByteCharacter, 100);
yield return Repeat(TwoBytesCharacter, 100 / 2);
yield return Repeat(OneByteCharacter, 2) + Repeat(TwoBytesCharacter, 49);

yield return Repeat(FourBytesCharacter, 100 / 4);
yield return Repeat(OneByteCharacter, 4) + Repeat(FourBytesCharacter, 24);

if (max == NameCapabilities.Name)
yield break;

// prefix + name
// this is 256 but is supported because prefix is not required to end in separator.
yield return Repeat(OneByteCharacter, 155) + Separator + Repeat(OneByteCharacter, 100);

// non-ascii prefix + name
yield return Repeat(TwoBytesCharacter, 155 / 2) + Separator + Repeat(OneByteCharacter, 100);
yield return Repeat(FourBytesCharacter, 155 / 4) + Separator + Repeat(OneByteCharacter, 100);

// prefix + non-ascii name
yield return Repeat(OneByteCharacter, 155) + Separator + Repeat(TwoBytesCharacter, 100 / 2);
yield return Repeat(OneByteCharacter, 155) + Separator + Repeat(FourBytesCharacter, 100 / 4);

// non-ascii prefix + non-ascii name
yield return Repeat(TwoBytesCharacter, 155 / 2) + Separator + Repeat(TwoBytesCharacter, 100 / 2);
yield return Repeat(FourBytesCharacter, 155 / 4) + Separator + Repeat(FourBytesCharacter, 100 / 4);

if (max == NameCapabilities.NameAndPrefix)
yield break;

// Pax and Gnu support unlimited paths.
yield return Repeat(OneByteCharacter, MaxPathComponent);
yield return Repeat(TwoBytesCharacter, MaxPathComponent / 2);
yield return Repeat(FourBytesCharacter, MaxPathComponent / 4);

yield return Repeat(OneByteCharacter, MaxPathComponent) + Separator + Repeat(OneByteCharacter, MaxPathComponent);
yield return Repeat(TwoBytesCharacter, MaxPathComponent / 2) + Separator + Repeat(TwoBytesCharacter, MaxPathComponent / 2);
yield return Repeat(FourBytesCharacter, MaxPathComponent / 4) + Separator + Repeat(FourBytesCharacter, MaxPathComponent / 4);
}

internal static string Repeat(char c, int count)
{
return new string(c, count);
}

internal static string Repeat(string c, int count)
{
return string.Concat(Enumerable.Repeat(c, count));
}

internal enum NameCapabilities
{
Name,
NameAndPrefix,
Unlimited
}
}
}
Loading