Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Conversation

stephentoub
Copy link
Member

When there's no ThenBy, we can take a more optimized path that uses the TKey's comparer directly. We already have a fast path for this case that converts the O(n log n) operation into O(n), but it employs a comparer that's much more complicated, and as that comparer is used in the inner loop, it makes a meaningful difference.

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Runtime.InteropServices;

BenchmarkSwitcher.FromAssembly(typeof(Tests).Assembly).Run(args);

[MemoryDiagnoser(false)]
public class Tests
{
    [Params(8, 8000)]
    public int Count { get; set; }

    private List<double> _doubles;
    private List<int> _ints;
    private string[] _strings;

    [GlobalSetup]
    public void Setup()
    {
        _doubles = new(Enumerable.Range(-Count, Count * 2).Select(x => (double)x));
        new Random(42).Shuffle(CollectionsMarshal.AsSpan(_doubles));

        _ints = new(Enumerable.Range(-Count, Count * 2));
        new Random(42).Shuffle(CollectionsMarshal.AsSpan(_doubles));

        _strings = Enumerable.Range(-Count, Count * 2).Select(x => x.ToString()).ToArray();
        new Random(42).Shuffle(_strings);
    }

    [Benchmark] public double OrderByFirst_Double() => _doubles.OrderBy(x => x).First();
    [Benchmark] public double OrderLast_Double() => _doubles.Order().Last();

    [Benchmark] public int OrderByFirst_Int32() => _ints.OrderBy(x => x).First();
    [Benchmark] public int OrderLast_Int32() => _ints.Order().Last();

    [Benchmark] public string OrderByFirst_String() => _strings.OrderBy(x => x).First();
    [Benchmark] public string OrderLast_String() => _strings.Order().Last();
}
Method Toolchain Count Mean Ratio
OrderByFirst_Double \main\corerun.exe 8 113.16 ns 1.00
OrderByFirst_Double \pr\corerun.exe 8 63.40 ns 0.56
OrderLast_Double \main\corerun.exe 8 120.98 ns 1.00
OrderLast_Double \pr\corerun.exe 8 58.75 ns 0.49
OrderByFirst_Int32 \main\corerun.exe 8 112.58 ns 1.00
OrderByFirst_Int32 \pr\corerun.exe 8 57.93 ns 0.51
OrderLast_Int32 \main\corerun.exe 8 106.44 ns 1.00
OrderLast_Int32 \pr\corerun.exe 8 21.46 ns 0.20
OrderByFirst_String \main\corerun.exe 8 768.86 ns 1.00
OrderByFirst_String \pr\corerun.exe 8 710.81 ns 0.93
OrderLast_String \main\corerun.exe 8 684.53 ns 1.00
OrderLast_String \pr\corerun.exe 8 682.01 ns 1.01
OrderByFirst_Double \main\corerun.exe 8000 77,141.19 ns 1.00
OrderByFirst_Double \pr\corerun.exe 8000 31,144.22 ns 0.40
OrderLast_Double \main\corerun.exe 8000 71,845.74 ns 1.00
OrderLast_Double \pr\corerun.exe 8000 31,260.30 ns 0.44
OrderByFirst_Int32 \main\corerun.exe 8000 66,197.03 ns 1.00
OrderByFirst_Int32 \pr\corerun.exe 8000 34,464.89 ns 0.52
OrderLast_Int32 \main\corerun.exe 8000 55,260.46 ns 1.00
OrderLast_Int32 \pr\corerun.exe 8000 598.10 ns 0.01
OrderByFirst_String \main\corerun.exe 8000 835,626.65 ns 1.00
OrderByFirst_String \pr\corerun.exe 8000 824,369.56 ns 0.99
OrderLast_String \main\corerun.exe 8000 714,980.82 ns 1.00
OrderLast_String \pr\corerun.exe 8000 676,364.24 ns 0.95

Fixes #87921

When there's no ThenBy, we can take a more optimized path that uses the TKey's comparer directly.  We already have a fast path for this case that converts the O(n log n) operation into O(n), but it employs a comparer that's much more complicated, and as that comparer is used in the inner loop, it makes a meaningful difference.
@stephentoub stephentoub added area-System.Linq tenet-performance Performance related issue labels Jan 24, 2024
@ghost
Copy link

ghost commented Jan 24, 2024

Tagging subscribers to this area: @dotnet/area-system-linq
See info in area-owners.md if you want to be subscribed.

Issue Details

When there's no ThenBy, we can take a more optimized path that uses the TKey's comparer directly. We already have a fast path for this case that converts the O(n log n) operation into O(n), but it employs a comparer that's much more complicated, and as that comparer is used in the inner loop, it makes a meaningful difference.

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Runtime.InteropServices;

BenchmarkSwitcher.FromAssembly(typeof(Tests).Assembly).Run(args);

[MemoryDiagnoser(false)]
public class Tests
{
    [Params(8, 8000)]
    public int Count { get; set; }

    private List<double> _doubles;
    private List<int> _ints;
    private string[] _strings;

    [GlobalSetup]
    public void Setup()
    {
        _doubles = new(Enumerable.Range(-Count, Count * 2).Select(x => (double)x));
        new Random(42).Shuffle(CollectionsMarshal.AsSpan(_doubles));

        _ints = new(Enumerable.Range(-Count, Count * 2));
        new Random(42).Shuffle(CollectionsMarshal.AsSpan(_doubles));

        _strings = Enumerable.Range(-Count, Count * 2).Select(x => x.ToString()).ToArray();
        new Random(42).Shuffle(_strings);
    }

    [Benchmark] public double OrderByFirst_Double() => _doubles.OrderBy(x => x).First();
    [Benchmark] public double OrderLast_Double() => _doubles.Order().Last();

    [Benchmark] public int OrderByFirst_Int32() => _ints.OrderBy(x => x).First();
    [Benchmark] public int OrderLast_Int32() => _ints.Order().Last();

    [Benchmark] public string OrderByFirst_String() => _strings.OrderBy(x => x).First();
    [Benchmark] public string OrderLast_String() => _strings.Order().Last();
}
Method Toolchain Count Mean Ratio
OrderByFirst_Double \main\corerun.exe 8 113.16 ns 1.00
OrderByFirst_Double \pr\corerun.exe 8 63.40 ns 0.56
OrderLast_Double \main\corerun.exe 8 120.98 ns 1.00
OrderLast_Double \pr\corerun.exe 8 58.75 ns 0.49
OrderByFirst_Int32 \main\corerun.exe 8 112.58 ns 1.00
OrderByFirst_Int32 \pr\corerun.exe 8 57.93 ns 0.51
OrderLast_Int32 \main\corerun.exe 8 106.44 ns 1.00
OrderLast_Int32 \pr\corerun.exe 8 21.46 ns 0.20
OrderByFirst_String \main\corerun.exe 8 768.86 ns 1.00
OrderByFirst_String \pr\corerun.exe 8 710.81 ns 0.93
OrderLast_String \main\corerun.exe 8 684.53 ns 1.00
OrderLast_String \pr\corerun.exe 8 682.01 ns 1.01
OrderByFirst_Double \main\corerun.exe 8000 77,141.19 ns 1.00
OrderByFirst_Double \pr\corerun.exe 8000 31,144.22 ns 0.40
OrderLast_Double \main\corerun.exe 8000 71,845.74 ns 1.00
OrderLast_Double \pr\corerun.exe 8000 31,260.30 ns 0.44
OrderByFirst_Int32 \main\corerun.exe 8000 66,197.03 ns 1.00
OrderByFirst_Int32 \pr\corerun.exe 8000 34,464.89 ns 0.52
OrderLast_Int32 \main\corerun.exe 8000 55,260.46 ns 1.00
OrderLast_Int32 \pr\corerun.exe 8000 598.10 ns 0.01
OrderByFirst_String \main\corerun.exe 8000 835,626.65 ns 1.00
OrderByFirst_String \pr\corerun.exe 8000 824,369.56 ns 0.99
OrderLast_String \main\corerun.exe 8000 714,980.82 ns 1.00
OrderLast_String \pr\corerun.exe 8000 676,364.24 ns 0.95

Fixes #87921

Author: stephentoub
Assignees: -
Labels:

area-System.Linq, tenet-performance

Milestone: -

internal sealed partial class OrderedEnumerable<TElement, TKey> : OrderedEnumerable<TElement>
{
// For complicated cases, rely on the base implementation that's more comprehensive.
// For the simple case of OrderBy(...).First() or OrderByDescending(...).First() (i.e. where
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't it be even faster if MinBy or MaxBy were used in this case?

Copy link
Member Author

@stephentoub stephentoub Jan 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They're not the same thing. It would work for OrderBy(...).First but not OrderBy(...).Last, because of stability. Consider this:

object[] values = [9, 1, 2, 3, 4, 5, 6, 7, 8, 9];

object result1 = values.OrderBy(x => x).Last();
object result2 = values.MaxBy(x => x)!;

Console.WriteLine(result1);
Console.WriteLine(result2);
Console.WriteLine(ReferenceEquals(result1, result2));

That prints:

9
9
false

because the OrderBy(...).Last will find the last boxed 9 and the MaxBy(...) will find the first boxed 9.

It would also require refactoring MinBy/MaxBy to separate out the workhorse, as they might throw for empty where these implementations might not if they're being used as part of FirstOrDefault/LastOrDefault.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair enough. As you're pointing out though the above changes if I use values.OrderByDescending(x => x).First() instead. Arguably then it's just an artifact of the particular workaround used to encode MaxBy.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Arguably then it's just an artifact of the particular workaround used to encode MaxBy.

I don't understand. The issue is very specifically that MaxBy uses >:

if (nextKey != null && comparer.Compare(nextKey, key) > 0)

and the OrderBy.Last semantics need it to be >=.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is using Last common enough in such cases? grep.app seems to suggest that OrderByDescending.First is more common than OrderBy.Last (although the total number of hits don't appear to be large enough for this to be conclusive).

My earlier point was mostly an assertion that in most cases, what chain does get picked is accidental rather than intentional use of the stability guarantees of ordered enumerables, and that in most cases these patterns survive from when MaxBy and MinBy weren't available -- so perhaps it might be best to guide folks to just use that (e.g. via an analyzer). You're right that these are no substitute for chaining with FirstOrDefault though.

Copy link
Member

@eiriktsarpalis eiriktsarpalis left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do the benchmarks compare to using MinBy or MaxBy?

@stephentoub
Copy link
Member Author

How do the benchmarks compare to using MinBy or MaxBy?

using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;
using System.Runtime.InteropServices;

BenchmarkSwitcher.FromAssembly(typeof(Tests).Assembly).Run(args);

[MemoryDiagnoser(false)]
public class Tests
{
    [Params(1000)]
    public int Count { get; set; }

    private List<double> _doubles;
    private List<int> _ints;
    private string[] _strings;

    [GlobalSetup]
    public void Setup()
    {
        _doubles = new(Enumerable.Range(-Count, Count * 2).Select(x => (double)x));
        new Random(42).Shuffle(CollectionsMarshal.AsSpan(_doubles));

        _ints = new(Enumerable.Range(-Count, Count * 2));
        new Random(42).Shuffle(CollectionsMarshal.AsSpan(_doubles));

        _strings = Enumerable.Range(-Count, Count * 2).Select(x => x.ToString()).ToArray();
        new Random(42).Shuffle(_strings);
    }

    [Benchmark] public double OrderByLast_Double() => _doubles.OrderBy(x => x).Last();
    [Benchmark] public double OrderLast_Double() => _doubles.Order().Last();

    [Benchmark] public int OrderByLast_Int32() => _ints.OrderBy(x => x).Last();
    [Benchmark] public int OrderLast_Int32() => _ints.Order().Last();

    [Benchmark] public string OrderByLast_String() => _strings.OrderBy(x => x).Last();
    [Benchmark] public string OrderLast_String() => _strings.Order().Last();

    [Benchmark] public double MaxBy_Double() => _doubles.MaxBy(x => x);
    [Benchmark] public string MaxBy_String() => _strings.MaxBy(x => x);
    [Benchmark] public int MaxBy_Int32() => _ints.MaxBy(x => x);
}
Method Count Mean Error StdDev Allocated
OrderByLast_Double 1000 3,908.00 ns 43.877 ns 36.639 ns 96 B
OrderLast_Double 1000 3,873.96 ns 33.805 ns 29.968 ns 96 B
OrderByLast_Int32 1000 3,115.45 ns 7.384 ns 6.907 ns 96 B
OrderLast_Int32 1000 88.72 ns 0.331 ns 0.309 ns 32 B
OrderByLast_String 1000 81,043.47 ns 430.201 ns 402.410 ns 88 B
OrderLast_String 1000 81,615.21 ns 804.978 ns 752.977 ns 88 B
MaxBy_Double 1000 3,187.21 ns 3.413 ns 3.025 ns 40 B
MaxBy_String 1000 85,013.36 ns 1,665.653 ns 1,558.053 ns 32 B
MaxBy_Int32 1000 3,805.06 ns 13.573 ns 12.032 ns 40 B

Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
area-System.Linq tenet-performance Performance related issue
Projects
None yet
Development

Successfully merging this pull request may close these issues.

Analyzer suggestion : replace .OrderBy().First() with MinBy()
2 participants