Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Apache.Arrow/Apache.Arrow.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<DefineConstants>$(DefineConstants);UNSAFE_BYTEBUFFER;BYTEBUFFER_NO_BOUNDS_CHECK;ENABLE_SPAN_T</DefineConstants>
<DefineConstants>$(DefineConstants);UNSAFE_BYTEBUFFER;ENABLE_SPAN_T</DefineConstants>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just checking we're happy with leaving UNSAFE_BYTEBUFFER enabled? As far as I understand, bounds checks should prevent any invalid reads and this should be safe in theory, but it means bugs in the Flatbuffers implementation (like missing bounds checks) could still cause memory safety issues. It sounds like this can increase performance significantly though so is worth keeping enabled (https://flatbuffers.dev/languages/c_sharp/#conditional-compilation-symbols).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't try benchmarking without UNSAFE_BYTEBUFFER but I did audit all the code and confirmed that there was only one missing bounds check.


<Description>Apache Arrow is a cross-language development platform for in-memory data. It specifies a standardized language-independent columnar memory format for flat and hierarchical data, organized for efficient analytic operations on modern hardware.</Description>
</PropertyGroup>
Expand Down
1 change: 1 addition & 0 deletions src/Apache.Arrow/Flatbuf/FlatBuffers/ByteBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -693,6 +693,7 @@ public byte Get(int index)
#if ENABLE_SPAN_T && UNSAFE_BYTEBUFFER
public unsafe string GetStringUTF8(int startPos, int len)
{
AssertOffsetAndLength(startPos, len);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For reference, this has been added in the upstream Flatbuffers project but not yet released: google/flatbuffers#8673

So we should make sure if we upgrade the bundled Flatbuffers code we do so after that change is released or add it in manually again. The new test should catch this though.

fixed (byte* buffer = &MemoryMarshal.GetReference(_buffer.ReadOnlySpan.Slice(startPos)))
{
return Encoding.UTF8.GetString(buffer, len);
Expand Down
4 changes: 2 additions & 2 deletions test/Apache.Arrow.Benchmarks/ArrowWriterBenchmark.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ namespace Apache.Arrow.Benchmarks
[MemoryDiagnoser]
public class ArrowWriterBenchmark
{
[Params(10_000, 1_000_000)]
[Params(10_000, 300_000)]
public int BatchLength { get; set; }

//Max column set count is 15 before reaching 2gb limit of memory stream
//Max column set count is 14 before reaching 2gb limit of memory stream
[Params(10, 14)]
public int ColumnSetCount { get; set; }

Expand Down
36 changes: 35 additions & 1 deletion test/Apache.Arrow.Tests/ArrowStreamReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
using System.Threading.Tasks;
using Apache.Arrow.Ipc;
using Apache.Arrow.Memory;
using Apache.Arrow.Types;
using Xunit;

namespace Apache.Arrow.Tests
Expand Down Expand Up @@ -280,6 +281,39 @@ public override Task<int> ReadAsync(byte[] buffer, int offset, int length, Cance
}
#endif
}

[Fact]
public unsafe void MalformedColumnNameLength()
{
const int FieldNameLengthOffset = 108;
const int FakeFieldNameLength = 165535;

byte[] buffer;
using (var stream = new MemoryStream())
{
Schema schema = new(
[new Field("index", Int32Type.Default, nullable: false)],
metadata: []);
using (var writer = new ArrowStreamWriter(stream, schema, leaveOpen: true))
{
writer.WriteStart();
writer.WriteEnd();
}
buffer = stream.ToArray();
}

Span<int> length = buffer.AsSpan().Slice(FieldNameLengthOffset, sizeof(int)).CastTo<int>();
Assert.Equal(5, length[0]);
length[0] = FakeFieldNameLength;

Assert.Throws<ArgumentOutOfRangeException>(() =>
{
using (var stream = new MemoryStream(buffer))
using (var reader = new ArrowStreamReader(stream))
{
reader.ReadNextRecordBatch();
}
});
}
}
}

13 changes: 9 additions & 4 deletions test/Apache.Arrow.Tests/TestData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

using System;
using System.Collections.Generic;
using System.Data.SqlTypes;
using System.Linq;
using Apache.Arrow.Arrays;
using Apache.Arrow.Scalars;
Expand Down Expand Up @@ -201,7 +202,8 @@ public void Visit(Decimal32Type type)

for (var i = 0; i < Length; i++)
{
builder.Append((decimal)i / Length);
SqlDecimal value = SqlDecimal.ConvertToPrecScale((decimal)i / Length, type.Precision, type.Scale);
builder.Append((decimal)value);
}

Array = builder.Build();
Expand All @@ -213,7 +215,8 @@ public void Visit(Decimal64Type type)

for (var i = 0; i < Length; i++)
{
builder.Append((decimal)i / Length);
SqlDecimal value = SqlDecimal.ConvertToPrecScale((decimal)i / Length, type.Precision, type.Scale);
builder.Append((decimal)value);
}

Array = builder.Build();
Expand All @@ -225,7 +228,8 @@ public void Visit(Decimal128Type type)

for (var i = 0; i < Length; i++)
{
builder.Append((decimal)i / Length);
SqlDecimal value = SqlDecimal.ConvertToPrecScale((decimal)i / Length, type.Precision, type.Scale);
builder.Append((decimal)value);
}

Array = builder.Build();
Expand All @@ -237,7 +241,8 @@ public void Visit(Decimal256Type type)

for (var i = 0; i < Length; i++)
{
builder.Append((decimal)i / Length);
SqlDecimal value = SqlDecimal.ConvertToPrecScale((decimal)i / Length, type.Precision, type.Scale);
builder.Append((decimal)value);
}

Array = builder.Build();
Expand Down
Loading