Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Conversation

EgorBo
Copy link
Member

@EgorBo EgorBo commented Mar 16, 2024

No description provided.

@ghost ghost added the area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI label Mar 16, 2024
@dotnet dotnet deleted a comment from EgorBot Mar 16, 2024
@dotnet dotnet deleted a comment from EgorBot Mar 17, 2024
@EgorBo EgorBo closed this Mar 17, 2024
@EgorBo EgorBo reopened this Mar 17, 2024
@EgorBo EgorBo closed this Mar 17, 2024
@github-actions github-actions bot locked and limited conversation to collaborators Apr 17, 2024
@EgorBo EgorBo reopened this May 3, 2024
@dotnet dotnet unlocked this conversation May 3, 2024
@EgorBo EgorBo closed this May 11, 2024
@EgorBo EgorBo reopened this May 18, 2024
@EgorBo EgorBo closed this May 19, 2024
@dotnet dotnet deleted a comment from EgorBot May 19, 2024
@dotnet dotnet deleted a comment from EgorBot May 19, 2024
@dotnet dotnet deleted a comment from EgorBot May 19, 2024
@dotnet dotnet deleted a comment from EgorBot May 19, 2024
@dotnet dotnet deleted a comment from EgorBot May 19, 2024
@dotnet dotnet deleted a comment from EgorBot May 19, 2024
@dotnet dotnet deleted a comment from EgorBot May 19, 2024
@dotnet dotnet deleted a comment from EgorBot May 19, 2024
@EgorBo EgorBo reopened this May 20, 2024
@dotnet dotnet deleted a comment from EgorBot May 20, 2024
@dotnet dotnet deleted a comment from EgorBot May 20, 2024
@dotnet dotnet deleted a comment from EgorBot May 20, 2024
@dotnet dotnet deleted a comment from EgorBot May 20, 2024
@dotnet dotnet deleted a comment from EgorBot May 20, 2024
@dotnet dotnet deleted a comment from EgorBot May 20, 2024
@dotnet dotnet deleted a comment from EgorBot May 20, 2024
@dotnet dotnet deleted a comment from EgorBot Jun 28, 2024
@dotnet dotnet deleted a comment from EgorBot Jun 28, 2024
@EgorBo
Copy link
Member Author

EgorBo commented Jun 28, 2024

@EgorBot -arm64 -amd -intel

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Runtime.InteropServices;

BenchmarkRunner.Run<Utf8Bench>(args: args);

public unsafe class Utf8Bench
{
    byte* _ptr1 = null;
    byte* _ptr2 = null;

    [GlobalSetup]
    public void GlobalSetup()
    {
        _ptr1 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64) + 8;
        _ptr2 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64) + 8;
    }

    [GlobalCleanup]
    public void GlobalCleanup()
    {
        NativeMemory.Free(_ptr1);
        NativeMemory.Free(_ptr2);
    }

    public static IEnumerable<long> Sizes()
    {
        yield return       1_000;
        yield return      10_000;
        yield return     100_000;
        yield return   1_000_000;
        yield return  10_000_000;
        yield return 100_000_000;
    }

    [Benchmark]
    [ArgumentsSource(nameof(Sizes))]
    public void Managed(long size) => Buffer.MemoryCopy(_ptr1, _ptr2, size, size);

    [Benchmark]
    [ArgumentsSource(nameof(Sizes))]
    public void Native(long size) => memcpy(_ptr1, _ptr2, (nuint)size);

    [DllImport("libc")]
    static extern nuint memcpy(byte* dest, byte* src, nuint size);
}

@dotnet dotnet deleted a comment from EgorBot Jun 28, 2024
@dotnet dotnet deleted a comment from EgorBot Jun 28, 2024
@dotnet dotnet deleted a comment from EgorBot Jun 28, 2024
@EgorBo
Copy link
Member Author

EgorBo commented Jun 28, 2024

@EgorBot -intel

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Runtime.InteropServices;

BenchmarkRunner.Run<Utf8Bench>(args: args);

public unsafe class Utf8Bench
{
    byte* _ptr1 = null;
    byte* _ptr2 = null;

    [GlobalSetup]
    public void GlobalSetup()
    {
        _ptr1 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
        _ptr2 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
    }

    [GlobalCleanup]
    public void GlobalCleanup()
    {
        NativeMemory.Free(_ptr1);
        NativeMemory.Free(_ptr2);
    }

    public static IEnumerable<long> Sizes()
    {
        yield return     100_000;
    }

    [Benchmark]
    [ArgumentsSource(nameof(Sizes))]
    public void Managed(long size) => Buffer.MemoryCopy(_ptr1, _ptr2, size, size);
}

4 similar comments
@EgorBo
Copy link
Member Author

EgorBo commented Jun 28, 2024

@EgorBot -intel

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Runtime.InteropServices;

BenchmarkRunner.Run<Utf8Bench>(args: args);

public unsafe class Utf8Bench
{
    byte* _ptr1 = null;
    byte* _ptr2 = null;

    [GlobalSetup]
    public void GlobalSetup()
    {
        _ptr1 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
        _ptr2 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
    }

    [GlobalCleanup]
    public void GlobalCleanup()
    {
        NativeMemory.Free(_ptr1);
        NativeMemory.Free(_ptr2);
    }

    public static IEnumerable<long> Sizes()
    {
        yield return     100_000;
    }

    [Benchmark]
    [ArgumentsSource(nameof(Sizes))]
    public void Managed(long size) => Buffer.MemoryCopy(_ptr1, _ptr2, size, size);
}

@EgorBo
Copy link
Member Author

EgorBo commented Jun 28, 2024

@EgorBot -intel

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Runtime.InteropServices;

BenchmarkRunner.Run<Utf8Bench>(args: args);

public unsafe class Utf8Bench
{
    byte* _ptr1 = null;
    byte* _ptr2 = null;

    [GlobalSetup]
    public void GlobalSetup()
    {
        _ptr1 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
        _ptr2 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
    }

    [GlobalCleanup]
    public void GlobalCleanup()
    {
        NativeMemory.Free(_ptr1);
        NativeMemory.Free(_ptr2);
    }

    public static IEnumerable<long> Sizes()
    {
        yield return     100_000;
    }

    [Benchmark]
    [ArgumentsSource(nameof(Sizes))]
    public void Managed(long size) => Buffer.MemoryCopy(_ptr1, _ptr2, size, size);
}

@EgorBo
Copy link
Member Author

EgorBo commented Jun 28, 2024

@EgorBot -intel

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Runtime.InteropServices;

BenchmarkRunner.Run<Utf8Bench>(args: args);

public unsafe class Utf8Bench
{
    byte* _ptr1 = null;
    byte* _ptr2 = null;

    [GlobalSetup]
    public void GlobalSetup()
    {
        _ptr1 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
        _ptr2 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
    }

    [GlobalCleanup]
    public void GlobalCleanup()
    {
        NativeMemory.Free(_ptr1);
        NativeMemory.Free(_ptr2);
    }

    public static IEnumerable<long> Sizes()
    {
        yield return     100_000;
    }

    [Benchmark]
    [ArgumentsSource(nameof(Sizes))]
    public void Managed(long size) => Buffer.MemoryCopy(_ptr1, _ptr2, size, size);
}

@EgorBo
Copy link
Member Author

EgorBo commented Jun 28, 2024

@EgorBot -intel

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Runtime.InteropServices;

BenchmarkRunner.Run<Utf8Bench>(args: args);

public unsafe class Utf8Bench
{
    byte* _ptr1 = null;
    byte* _ptr2 = null;

    [GlobalSetup]
    public void GlobalSetup()
    {
        _ptr1 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
        _ptr2 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
    }

    [GlobalCleanup]
    public void GlobalCleanup()
    {
        NativeMemory.Free(_ptr1);
        NativeMemory.Free(_ptr2);
    }

    public static IEnumerable<long> Sizes()
    {
        yield return     100_000;
    }

    [Benchmark]
    [ArgumentsSource(nameof(Sizes))]
    public void Managed(long size) => Buffer.MemoryCopy(_ptr1, _ptr2, size, size);
}

@EgorBot
Copy link

EgorBot commented Jun 28, 2024

❌ Benchmark failed on Intel
publishing results failed

@EgorBo
Copy link
Member Author

EgorBo commented Jun 28, 2024

@EgorBot -intel

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Runtime.InteropServices;

BenchmarkRunner.Run<Utf8Bench>(args: args);

public unsafe class Utf8Bench
{
    byte* _ptr1 = null;
    byte* _ptr2 = null;

    [GlobalSetup]
    public void GlobalSetup()
    {
        _ptr1 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
        _ptr2 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
    }

    [GlobalCleanup]
    public void GlobalCleanup()
    {
        NativeMemory.Free(_ptr1);
        NativeMemory.Free(_ptr2);
    }

    public static IEnumerable<long> Sizes()
    {
        yield return     100_000;
    }

    [Benchmark]
    [ArgumentsSource(nameof(Sizes))]
    public void Managed(long size) => Buffer.MemoryCopy(_ptr1, _ptr2, size, size);
}

@EgorBot
Copy link

EgorBot commented Jun 28, 2024

❌ Benchmark failed on Intel
publishing results failed

@EgorBo
Copy link
Member Author

EgorBo commented Jun 28, 2024

@EgorBot -intel

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Runtime.InteropServices;

BenchmarkRunner.Run<Utf8Bench>(args: args);

public unsafe class Utf8Bench
{
    byte* _ptr1 = null;
    byte* _ptr2 = null;

    [GlobalSetup]
    public void GlobalSetup()
    {
        _ptr1 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
        _ptr2 = (byte*)NativeMemory.AlignedAlloc(300 * 1024 * 1024, 64);
    }

    [GlobalCleanup]
    public void GlobalCleanup()
    {
        NativeMemory.Free(_ptr1);
        NativeMemory.Free(_ptr2);
    }

    public static IEnumerable<long> Sizes()
    {
        yield return     100_000;
    }

    [Benchmark]
    [ArgumentsSource(nameof(Sizes))]
    public void Managed(long size) => Buffer.MemoryCopy(_ptr1, _ptr2, size, size);
}

@EgorBot
Copy link

EgorBot commented Jun 28, 2024

Benchmark results on Intel
BenchmarkDotNet v0.13.12, Ubuntu 22.04.4 LTS (Jammy Jellyfish)
Intel Xeon Platinum 8370C CPU 2.80GHz, 1 CPU, 8 logical and 4 physical cores
  Job-FRYCZO : .NET 9.0.0 (42.42.42.42424), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI
  Job-VHWOZG : .NET 9.0.0 (42.42.42.42424), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI
Method Toolchain size Mean Error Ratio
Managed Main 100000 2.167 μs 0.0004 μs 1.00
Managed PR 100000 2.265 μs 0.0002 μs 1.05

BDN_Artifacts.zip

@EgorBo EgorBo closed this Jun 29, 2024
@github-actions github-actions bot locked and limited conversation to collaborators Jul 31, 2024
@EgorBo
Copy link
Member Author

EgorBo commented Aug 13, 2024

@EgorBot -intel

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkRunner.Run<MyBench>(args: args); // args must be forwarded to BDN!

public class MyBench
{
    string _str = "dotnet/RUNTIME";

    [Benchmark]
    public bool Test() =>
        // JIT is expected to unroll this with SIMD:
        _str.Equals("dotnet/runtime", StringComparison.OrdinalIgnoreCase);
}

@dotnet dotnet unlocked this conversation Aug 13, 2024
@EgorBo
Copy link
Member Author

EgorBo commented Aug 13, 2024

@EgorBot -intel

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkRunner.Run<MyBench>(args: args); // args must be forwarded to BDN!

public class MyBench
{
    string _str = "dotnet/RUNTIME";

    [Benchmark]
    public bool Test() =>
        // JIT is expected to unroll this with SIMD:
        _str.Equals("dotnet/runtime", StringComparison.OrdinalIgnoreCase);
}

@EgorBot
Copy link

EgorBot commented Aug 13, 2024

Benchmark results on Intel
BenchmarkDotNet v0.14.0, Ubuntu 22.04.4 LTS (Jammy Jellyfish)
Intel Xeon Platinum 8370C CPU 2.80GHz, 1 CPU, 16 logical and 8 physical cores
  Job-PYUHGT : .NET 9.0.0 (42.42.42.42424), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI
  Job-TVEMWZ : .NET 9.0.0 (42.42.42.42424), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI
StdDev=0.0002 ns
Method Toolchain Mean Error Ratio
Test Main 0.5734 ns 0.0002 ns 1.00
Test PR 0.5738 ns 0.0002 ns 1.00

BDN_Artifacts.zip

@EgorBo
Copy link
Member Author

EgorBo commented Aug 13, 2024

@EgorBot -arm64 -profile

using System.Runtime.CompilerServices;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

BenchmarkRunner.Run<Bench>(args: args);

public class Bench
{
    // struct in heap
    MyStruct MS = new();

    [Benchmark]
    public void CheckedWB_InHeap()
    {
        string str = new string('x', 1);
        for (long i = 0; i < 200000000; i++)
            CheckedWriteBarrier(ref MS, str);
    }

    [MethodImpl(MethodImplOptions.NoInlining)]
    void CheckedWriteBarrier(ref MyStruct ms, string str) => ms.Str = str;
}

public record struct MyStruct(string Str);

internal class Foo
{
    public volatile Foo x;
}

@EgorBot
Copy link

EgorBot commented Aug 13, 2024

Benchmark results on Arm64
BenchmarkDotNet v0.14.0, Ubuntu 22.04.4 LTS (Jammy Jellyfish)
Unknown processor
  Job-ISALJM : .NET 9.0.0 (42.42.42.42424), Arm64 RyuJIT AdvSIMD
  Job-BBFFUH : .NET 9.0.0 (42.42.42.42424), Arm64 RyuJIT AdvSIMD
Method Toolchain Mean Error Ratio
CheckedWB_InHeap Main 801.7 ms 0.42 ms 1.00
CheckedWB_InHeap PR 801.6 ms 0.48 ms 1.00

BDN_Artifacts.zip

Flame graphs: Main vs PR 🔥
Hot asm: Main vs PR
Hot functions: Main vs PR

For clean perf results, make sure you have just one [Benchmark] in your app.

@github-actions github-actions bot locked and limited conversation to collaborators Sep 14, 2024
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants