-
Notifications
You must be signed in to change notification settings - Fork 1.6k
vectorize min/max_element using SSE4.1 for floats
#3928
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
43 commits
Select commit
Hold shift + click to select a range
cd7420a
vectorize `min/max_element` using SSE4.1/AVX for floats
AlexGuteniev 80b561c
`const`
AlexGuteniev 86b3100
reverse init
AlexGuteniev 55c5add
format
AlexGuteniev c88fe9a
more interesting values
AlexGuteniev 8f70406
fix x68 build
AlexGuteniev 52c8191
format
AlexGuteniev af9c9b5
coverage
AlexGuteniev ff97c5b
ouch
AlexGuteniev 90b7999
format
AlexGuteniev 568a793
include `/fp:strict` / `/fp:precise`
AlexGuteniev 7f2a635
-extra casts
AlexGuteniev f6b24a9
copypaste error
AlexGuteniev 91ed8a3
more interesting input
AlexGuteniev 46baf22
Unsupport 80-bit long double
AlexGuteniev 83d208f
+benchmark
AlexGuteniev 1be219f
include order
AlexGuteniev 99b1746
-copy
AlexGuteniev 58fc6b9
simplify benchmark
AlexGuteniev eb388ad
fix build
AlexGuteniev 5efca15
Merge branch 'main' into guess_whos_back
StephanTLavavej 6e90c6c
Merge remote-tracking branch 'upstream/main' into guess_whos_back
AlexGuteniev 8249aa5
load noexcept
AlexGuteniev 7404970
fix copypasta during merge
AlexGuteniev b8c61d2
Merge branch 'main' into guess_whos_back
StephanTLavavej e6cf685
Merge remote-tracking branch 'upstream/main' into guess_whos_back
AlexGuteniev 33834c3
Merge branch 'guess_whos_back' of https://github.com/AlexGuteniev/STL…
AlexGuteniev b0867f8
ADL-wary
AlexGuteniev ce7cdc1
ADL-wary
AlexGuteniev 9e87e91
ADL-wary
AlexGuteniev 99fb9c7
Merge branch 'main' into guess_whos_back
StephanTLavavej 43f7d92
Use `_Is_any_of_v`.
StephanTLavavej 83035c0
Comment nitpicks.
StephanTLavavej 30b8748
Fix `#error` message, use "must imply" phrasing.
StephanTLavavej d2b3320
Style: Unnamed `const bool` => `bool`
StephanTLavavej eef60ce
Style: Add newline.
StephanTLavavej 00ba973
`test_min_max_element_f` => `test_min_max_element_floating`
StephanTLavavej e9a76e4
Test ordinary negative values too.
StephanTLavavej 3e17d05
Drop `static_cast<T>` as `input_of_input` is `vector<T>`.
StephanTLavavej 40ca00b
Enable warnings when building the benchmarks.
StephanTLavavej af4df71
Fix truncation warnings in benchmarks.
StephanTLavavej bf79787
Fix x86 size_t truncation warnings in the vector.bool benchmarks.
StephanTLavavej 358dd22
Revert enabling warnings for benchmarks.
StephanTLavavej File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,86 @@ | ||
| // Copyright (c) Microsoft Corporation. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
|
|
||
| #include <algorithm> | ||
| #include <benchmark/benchmark.h> | ||
| #include <cstddef> | ||
| #include <cstdint> | ||
| #include <random> | ||
| #include <ranges> | ||
| #include <type_traits> | ||
|
|
||
| enum class Op { | ||
| Min, | ||
| Max, | ||
| Both, | ||
| }; | ||
|
|
||
| using namespace std; | ||
|
|
||
| template <class T, size_t Size, Op Operation> | ||
| void bm(benchmark::State& state) { | ||
| T a[Size]; | ||
|
|
||
| mt19937 gen(84710); | ||
|
|
||
| if constexpr (is_floating_point_v<T>) { | ||
| normal_distribution<T> dis(0, 10000.0); | ||
| ranges::generate(a, [&] { return dis(gen); }); | ||
| } else { | ||
| uniform_int_distribution<conditional_t<sizeof(T) != 1, T, int>> dis(1, 20); | ||
| ranges::generate(a, [&] { return static_cast<T>(dis(gen)); }); | ||
| } | ||
|
|
||
| for (auto _ : state) { | ||
| if constexpr (Operation == Op::Min) { | ||
| benchmark::DoNotOptimize(ranges::min_element(a)); | ||
| } else if constexpr (Operation == Op::Max) { | ||
| benchmark::DoNotOptimize(ranges::max_element(a)); | ||
| } else if constexpr (Operation == Op::Both) { | ||
| benchmark::DoNotOptimize(ranges::minmax_element(a)); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| BENCHMARK(bm<uint8_t, 8021, Op::Min>); | ||
| BENCHMARK(bm<uint8_t, 8021, Op::Max>); | ||
| BENCHMARK(bm<uint8_t, 8021, Op::Both>); | ||
|
|
||
| BENCHMARK(bm<uint16_t, 8021, Op::Min>); | ||
| BENCHMARK(bm<uint16_t, 8021, Op::Max>); | ||
| BENCHMARK(bm<uint16_t, 8021, Op::Both>); | ||
|
|
||
| BENCHMARK(bm<uint32_t, 8021, Op::Min>); | ||
| BENCHMARK(bm<uint32_t, 8021, Op::Max>); | ||
| BENCHMARK(bm<uint32_t, 8021, Op::Both>); | ||
|
|
||
| BENCHMARK(bm<uint64_t, 8021, Op::Min>); | ||
| BENCHMARK(bm<uint64_t, 8021, Op::Max>); | ||
| BENCHMARK(bm<uint64_t, 8021, Op::Both>); | ||
|
|
||
| BENCHMARK(bm<int8_t, 8021, Op::Min>); | ||
| BENCHMARK(bm<int8_t, 8021, Op::Max>); | ||
| BENCHMARK(bm<int8_t, 8021, Op::Both>); | ||
|
|
||
| BENCHMARK(bm<int16_t, 8021, Op::Min>); | ||
| BENCHMARK(bm<int16_t, 8021, Op::Max>); | ||
| BENCHMARK(bm<int16_t, 8021, Op::Both>); | ||
|
|
||
| BENCHMARK(bm<int32_t, 8021, Op::Min>); | ||
| BENCHMARK(bm<int32_t, 8021, Op::Max>); | ||
| BENCHMARK(bm<int32_t, 8021, Op::Both>); | ||
|
|
||
| BENCHMARK(bm<int64_t, 8021, Op::Min>); | ||
| BENCHMARK(bm<int64_t, 8021, Op::Max>); | ||
| BENCHMARK(bm<int64_t, 8021, Op::Both>); | ||
|
|
||
| BENCHMARK(bm<float, 8021, Op::Min>); | ||
| BENCHMARK(bm<float, 8021, Op::Max>); | ||
| BENCHMARK(bm<float, 8021, Op::Both>); | ||
|
|
||
| BENCHMARK(bm<double, 8021, Op::Min>); | ||
| BENCHMARK(bm<double, 8021, Op::Max>); | ||
| BENCHMARK(bm<double, 8021, Op::Both>); | ||
|
|
||
|
|
||
| BENCHMARK_MAIN(); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.