diff --git a/libcxx/docs/ReleaseNotes/21.rst b/libcxx/docs/ReleaseNotes/21.rst index 6cbc0baf29487..349c37ae625f1 100644 --- a/libcxx/docs/ReleaseNotes/21.rst +++ b/libcxx/docs/ReleaseNotes/21.rst @@ -70,8 +70,9 @@ Improvements and New Features - The segmented iterator optimization for ``std::for_each`` has been backported to C++11. Previously it was only available in C++23 and later. -- The ``std::for_each_n`` algorithm has been optimized for segmented iterators, resulting in a performance improvement of - up to 17.7x for ``std::deque`` iterators, and up to 13.9x for ``std::join_view>>`` iterators. +- The ``std::for_each_n``, ``std::ranges::for_each`` and ``std::ranges::for_each_n`` algorithms have been optimized for + segmented iterators, resulting in a performance improvement of up to 17.7x for ``std::deque`` iterators, and up + to 13.9x for ``std::join_view>>`` iterators. - The ``bitset::to_string`` function has been optimized, resulting in a performance improvement of up to 8.3x for bitsets with uniformly distributed zeros and ones, and up to 13.5x and 16.1x for sparse and dense bitsets, respectively. diff --git a/libcxx/include/__algorithm/for_each.h b/libcxx/include/__algorithm/for_each.h index b6c2c7c056edd..4167eec3506e4 100644 --- a/libcxx/include/__algorithm/for_each.h +++ b/libcxx/include/__algorithm/for_each.h @@ -12,41 +12,54 @@ #include <__algorithm/for_each_segment.h> #include <__config> +#include <__functional/identity.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> +#include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + _LIBCPP_BEGIN_NAMESPACE_STD -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void __for_each(_InputIterator __first, _Sent __last, _Func& __f) { +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator +__for_each(_InputIterator __first, _Sent __last, _Func& __f, _Proj& __proj) { for (; __first != __last; ++__first) - __f(*__first); + std::__invoke(__f, std::__invoke(__proj, *__first)); + return __first; } #ifndef _LIBCPP_CXX03_LANG template ::value, int> = 0> -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 void -__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Function& __func) { +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator +__for_each(_SegmentedIterator __first, _SegmentedIterator __last, _Func& __func, _Proj& __proj) { using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; std::__for_each_segment(__first, __last, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { - std::__for_each(__lfirst, __llast, __func); + std::__for_each(__lfirst, __llast, __func, __proj); }); + return __last; } #endif // !_LIBCPP_CXX03_LANG -template -_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Function -for_each(_InputIterator __first, _InputIterator __last, _Function __f) { - std::__for_each(__first, __last, __f); +template +_LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _Func +for_each(_InputIterator __first, _InputIterator __last, _Func __f) { + __identity __proj; + std::__for_each(__first, __last, __f, __proj); return __f; } _LIBCPP_END_NAMESPACE_STD +_LIBCPP_POP_MACROS + #endif // _LIBCPP___ALGORITHM_FOR_EACH_H diff --git a/libcxx/include/__algorithm/for_each_n.h b/libcxx/include/__algorithm/for_each_n.h index 29351ec39f4e7..9a6c6bb5175d6 100644 --- a/libcxx/include/__algorithm/for_each_n.h +++ b/libcxx/include/__algorithm/for_each_n.h @@ -13,10 +13,12 @@ #include <__algorithm/for_each.h> #include <__algorithm/for_each_n_segment.h> #include <__config> +#include <__functional/identity.h> #include <__iterator/iterator_traits.h> #include <__iterator/segmented_iterator.h> #include <__type_traits/disjunction.h> #include <__type_traits/enable_if.h> +#include <__type_traits/invoke.h> #include <__type_traits/negation.h> #include <__utility/convert_to_integral.h> #include <__utility/move.h> @@ -33,16 +35,17 @@ _LIBCPP_BEGIN_NAMESPACE_STD template ::value && _Or< _Not<__is_segmented_iterator<_InputIterator> >, _Not<__has_random_access_local_iterator<_InputIterator> > >::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator -__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) { +__for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) { typedef decltype(std::__convert_to_integral(__orig_n)) _IntegralSize; _IntegralSize __n = __orig_n; while (__n > 0) { - __f(*__first); + std::__invoke(__f, std::__invoke(__proj, *__first)); ++__first; --__n; } @@ -52,39 +55,42 @@ __for_each_n(_InputIterator __first, _Size __orig_n, _Func& __f) { template ::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _RandIter -__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f) { +__for_each_n(_RandIter __first, _Size __orig_n, _Func& __f, _Proj& __proj) { typename std::iterator_traits<_RandIter>::difference_type __n = __orig_n; auto __last = __first + __n; - std::__for_each(__first, __last, __f); - return std::move(__last); + std::__for_each(__first, __last, __f, __proj); + return __last; } #ifndef _LIBCPP_CXX03_LANG template ::value && __is_segmented_iterator<_SegmentedIterator>::value && __has_random_access_iterator_category< typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator>::value, int> = 0> _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _SegmentedIterator -__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f) { +__for_each_n(_SegmentedIterator __first, _Size __orig_n, _Func& __f, _Proj& __proj) { using __local_iterator_t = typename __segmented_iterator_traits<_SegmentedIterator>::__local_iterator; return std::__for_each_n_segment(__first, __orig_n, [&](__local_iterator_t __lfirst, __local_iterator_t __llast) { - std::__for_each(__lfirst, __llast, __f); + std::__for_each(__lfirst, __llast, __f, __proj); }); } #endif // !_LIBCPP_CXX03_LANG #if _LIBCPP_STD_VER >= 17 -template +template inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 _InputIterator -for_each_n(_InputIterator __first, _Size __orig_n, _Function __f) { - return std::__for_each_n(__first, __orig_n, __f); +for_each_n(_InputIterator __first, _Size __orig_n, _Func __f) { + __identity __proj; + return std::__for_each_n(__first, __orig_n, __f, __proj); } #endif // _LIBCPP_STD_VER >= 17 diff --git a/libcxx/include/__algorithm/ranges_for_each.h b/libcxx/include/__algorithm/ranges_for_each.h index de39bc5522753..e9c84e8583f87 100644 --- a/libcxx/include/__algorithm/ranges_for_each.h +++ b/libcxx/include/__algorithm/ranges_for_each.h @@ -9,10 +9,12 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H #define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_H +#include <__algorithm/for_each.h> +#include <__algorithm/for_each_n.h> #include <__algorithm/in_fun_result.h> +#include <__concepts/assignable.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/projected.h> #include <__ranges/access.h> @@ -41,9 +43,17 @@ struct __for_each { template _LIBCPP_HIDE_FROM_ABI constexpr static for_each_result<_Iter, _Func> __for_each_impl(_Iter __first, _Sent __last, _Func& __func, _Proj& __proj) { - for (; __first != __last; ++__first) - std::invoke(__func, std::invoke(__proj, *__first)); - return {std::move(__first), std::move(__func)}; + // In the case where we have different iterator and sentinel types, the segmented iterator optimization + // in std::for_each will not kick in. Therefore, we prefer std::for_each_n in that case (whenever we can + // obtain the `n`). + if constexpr (!std::assignable_from<_Iter&, _Sent> && std::sized_sentinel_for<_Sent, _Iter>) { + auto __n = __last - __first; + auto __end = std::__for_each_n(std::move(__first), __n, __func, __proj); + return {std::move(__end), std::move(__func)}; + } else { + auto __end = std::__for_each(std::move(__first), std::move(__last), __func, __proj); + return {std::move(__end), std::move(__func)}; + } } public: diff --git a/libcxx/include/__algorithm/ranges_for_each_n.h b/libcxx/include/__algorithm/ranges_for_each_n.h index 603cb723233c8..3aab1b79c10a1 100644 --- a/libcxx/include/__algorithm/ranges_for_each_n.h +++ b/libcxx/include/__algorithm/ranges_for_each_n.h @@ -9,10 +9,10 @@ #ifndef _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H #define _LIBCPP___ALGORITHM_RANGES_FOR_EACH_N_H +#include <__algorithm/for_each_n.h> #include <__algorithm/in_fun_result.h> #include <__config> #include <__functional/identity.h> -#include <__functional/invoke.h> #include <__iterator/concepts.h> #include <__iterator/incrementable_traits.h> #include <__iterator/iterator_traits.h> @@ -40,11 +40,8 @@ struct __for_each_n { template > _Func> _LIBCPP_HIDE_FROM_ABI constexpr for_each_n_result<_Iter, _Func> operator()(_Iter __first, iter_difference_t<_Iter> __count, _Func __func, _Proj __proj = {}) const { - while (__count-- > 0) { - std::invoke(__func, std::invoke(__proj, *__first)); - ++__first; - } - return {std::move(__first), std::move(__func)}; + auto __last = std::__for_each_n(std::move(__first), __count, __func, __proj); + return {std::move(__last), std::move(__func)}; } }; diff --git a/libcxx/include/experimental/iterator b/libcxx/include/experimental/iterator index d92613845a662..565bb83903ac3 100644 --- a/libcxx/include/experimental/iterator +++ b/libcxx/include/experimental/iterator @@ -127,6 +127,7 @@ _LIBCPP_POP_MACROS # if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 # include # include +# include # include # endif #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) diff --git a/libcxx/include/mutex b/libcxx/include/mutex index e058b3113073e..f616bad3ac171 100644 --- a/libcxx/include/mutex +++ b/libcxx/include/mutex @@ -504,6 +504,7 @@ _LIBCPP_POP_MACROS # include # include # include +# include # include # include # include diff --git a/libcxx/include/shared_mutex b/libcxx/include/shared_mutex index e6759e413dfef..6469c02ca5874 100644 --- a/libcxx/include/shared_mutex +++ b/libcxx/include/shared_mutex @@ -457,6 +457,7 @@ _LIBCPP_POP_MACROS # endif // _LIBCPP_HAS_THREADS # if !defined(_LIBCPP_REMOVE_TRANSITIVE_INCLUDES) && _LIBCPP_STD_VER <= 20 +# include # include # endif #endif // __cplusplus < 201103L && defined(_LIBCPP_USE_FROZEN_CXX03_HEADERS) diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp index 760accbe4d929..f58f336f8b892 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each.bench.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -23,6 +24,7 @@ int main(int argc, char** argv) { // {std,ranges}::for_each { auto bm = [](std::string name, auto for_each) { + using ElemType = typename Container::value_type; benchmark::RegisterBenchmark( name, [for_each](auto& st) { @@ -33,15 +35,14 @@ int main(int argc, char** argv) { for ([[maybe_unused]] auto _ : st) { benchmark::DoNotOptimize(c); - auto result = for_each(first, last, [](int& x) { x = std::clamp(x, 10, 100); }); + auto result = for_each(first, last, [](ElemType& x) { x = std::clamp(x, 10, 100); }); benchmark::DoNotOptimize(result); } }) ->Arg(8) ->Arg(32) ->Arg(50) // non power-of-two - ->Arg(8192) - ->Arg(1 << 20); + ->Arg(8192); }; bm.operator()>("std::for_each(vector)", std_for_each); bm.operator()>("std::for_each(deque)", std_for_each); @@ -51,6 +52,42 @@ int main(int argc, char** argv) { bm.operator()>("rng::for_each(list)", std::ranges::for_each); } + // {std,ranges}::for_each for join_view + { + auto bm = [](std::string name, auto for_each) { + using C1 = typename Container::value_type; + using ElemType = typename C1::value_type; + + benchmark::RegisterBenchmark( + name, + [for_each](auto& st) { + std::size_t const size = st.range(0); + std::size_t const seg_size = 256; + std::size_t const segments = (size + seg_size - 1) / seg_size; + Container c(segments); + for (std::size_t i = 0, n = size; i < segments; ++i, n -= seg_size) { + c[i].resize(std::min(seg_size, n), ElemType(1)); + } + + auto view = c | std::views::join; + auto first = view.begin(); + auto last = view.end(); + + for ([[maybe_unused]] auto _ : st) { + benchmark::DoNotOptimize(c); + auto result = for_each(first, last, [](ElemType& x) { x = std::clamp(x, 10, 100); }); + benchmark::DoNotOptimize(result); + } + }) + ->Arg(8) + ->Arg(32) + ->Arg(50) // non power-of-two + ->Arg(8192); + }; + bm.operator()>>("std::for_each(join_view(vector>))", std_for_each); + bm.operator()>>("rng::for_each(join_view(vector>)", std::ranges::for_each); + } + benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); benchmark::Shutdown(); diff --git a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp index 784708c7e01eb..e643e647722cb 100644 --- a/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp +++ b/libcxx/test/benchmarks/algorithms/nonmodifying/for_each_n.bench.cpp @@ -21,7 +21,7 @@ int main(int argc, char** argv) { auto std_for_each_n = [](auto first, auto n, auto f) { return std::for_each_n(first, n, f); }; - // std::for_each_n + // {std,ranges}::for_each_n { auto bm = [](std::string name, auto for_each_n) { using ElemType = typename Container::value_type; @@ -41,19 +41,17 @@ int main(int argc, char** argv) { ->Arg(8) ->Arg(32) ->Arg(50) // non power-of-two - ->Arg(1024) - ->Arg(4096) - ->Arg(8192) - ->Arg(1 << 14) - ->Arg(1 << 16) - ->Arg(1 << 18); + ->Arg(8192); }; bm.operator()>("std::for_each_n(vector)", std_for_each_n); bm.operator()>("std::for_each_n(deque)", std_for_each_n); bm.operator()>("std::for_each_n(list)", std_for_each_n); + bm.operator()>("rng::for_each_n(vector)", std::ranges::for_each_n); + bm.operator()>("rng::for_each_n(deque)", std::ranges::for_each_n); + bm.operator()>("rng::for_each_n(list)", std::ranges::for_each_n); } - // std::for_each_n for join_view + // {std,ranges}::for_each_n for join_view { auto bm = [](std::string name, auto for_each_n) { using C1 = typename Container::value_type; @@ -81,14 +79,11 @@ int main(int argc, char** argv) { ->Arg(8) ->Arg(32) ->Arg(50) // non power-of-two - ->Arg(1024) - ->Arg(4096) - ->Arg(8192) - ->Arg(1 << 14) - ->Arg(1 << 16) - ->Arg(1 << 18); + ->Arg(8192); }; bm.operator()>>("std::for_each_n(join_view(vector>))", std_for_each_n); + bm.operator()>>( + "rng::for_each_n(join_view(vector>)", std::ranges::for_each_n); } benchmark::Initialize(&argc, argv); diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp index 8b9b6e82cbcb2..a6d0afde3186a 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each.pass.cpp @@ -20,7 +20,10 @@ #include #include +#include +#include #include +#include #include "almost_satisfies_types.h" #include "test_iterators.h" @@ -30,7 +33,7 @@ struct Callable { }; template -concept HasForEachIt = requires (Iter iter, Sent sent) { std::ranges::for_each(iter, sent, Callable{}); }; +concept HasForEachIt = requires(Iter iter, Sent sent) { std::ranges::for_each(iter, sent, Callable{}); }; static_assert(HasForEachIt); static_assert(!HasForEachIt); @@ -47,7 +50,7 @@ static_assert(!HasForEachItFunc); static_assert(!HasForEachItFunc); template -concept HasForEachR = requires (Range range) { std::ranges::for_each(range, Callable{}); }; +concept HasForEachR = requires(Range range) { std::ranges::for_each(range, Callable{}); }; static_assert(HasForEachR>); static_assert(!HasForEachR); @@ -68,7 +71,7 @@ constexpr void test_iterator() { { // simple test { auto func = [i = 0](int& a) mutable { a += i++; }; - int a[] = {1, 6, 3, 4}; + int a[] = {1, 6, 3, 4}; std::same_as> decltype(auto) ret = std::ranges::for_each(Iter(a), Sent(Iter(a + 4)), func); assert(a[0] == 1); @@ -81,8 +84,8 @@ constexpr void test_iterator() { assert(i == 4); } { - auto func = [i = 0](int& a) mutable { a += i++; }; - int a[] = {1, 6, 3, 4}; + auto func = [i = 0](int& a) mutable { a += i++; }; + int a[] = {1, 6, 3, 4}; auto range = std::ranges::subrange(Iter(a), Sent(Iter(a + 4))); std::same_as> decltype(auto) ret = std::ranges::for_each(range, func); @@ -110,6 +113,30 @@ constexpr void test_iterator() { } } +struct deque_test { + std::deque* d_; + int* i_; + + deque_test(std::deque& d, int& i) : d_(&d), i_(&i) {} + + void operator()(int& v) { + assert(&(*d_)[*i_] == &v); + ++*i_; + } +}; + +/*TEST_CONSTEXPR_CXX26*/ +void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr + // check that segmented deque iterators work properly + int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; + for (const int size : sizes) { + std::deque d(size); + int index = 0; + + std::ranges::for_each(d, deque_test(d, index)); + } +} + constexpr bool test() { test_iterator, sentinel_wrapper>>(); test_iterator, sentinel_wrapper>>(); @@ -146,6 +173,15 @@ constexpr bool test() { } } + if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr + test_segmented_deque_iterator(); + + { + std::vector> vec = {{0}, {1, 2}, {3, 4, 5}, {6, 7, 8, 9}, {10}, {11, 12, 13}}; + auto v = vec | std::views::join; + std::ranges::for_each(v, [i = 0](int x) mutable { assert(x == 2 * i++); }, [](int x) { return 2 * x; }); + } + return true; } diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp index d4b2d053d08ce..1578763694231 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.foreach/ranges.for_each_n.pass.cpp @@ -17,7 +17,12 @@ #include #include +#include +#include +#include #include +#include +#include #include "almost_satisfies_types.h" #include "test_iterators.h" @@ -27,7 +32,7 @@ struct Callable { }; template -concept HasForEachN = requires (Iter iter) { std::ranges::for_each_n(iter, 0, Callable{}); }; +concept HasForEachN = requires(Iter iter) { std::ranges::for_each_n(iter, 0, Callable{}); }; static_assert(HasForEachN); static_assert(!HasForEachN); @@ -45,7 +50,7 @@ template constexpr void test_iterator() { { // simple test auto func = [i = 0](int& a) mutable { a += i++; }; - int a[] = {1, 6, 3, 4}; + int a[] = {1, 6, 3, 4}; std::same_as> auto ret = std::ranges::for_each_n(Iter(a), 4, func); assert(a[0] == 1); @@ -64,6 +69,30 @@ constexpr void test_iterator() { } } +struct deque_test { + std::deque* d_; + int* i_; + + deque_test(std::deque& d, int& i) : d_(&d), i_(&i) {} + + void operator()(int& v) { + assert(&(*d_)[*i_] == &v); + ++*i_; + } +}; + +/*TEST_CONSTEXPR_CXX26*/ +void test_segmented_deque_iterator() { // TODO: Mark as TEST_CONSTEXPR_CXX26 once std::deque is constexpr + // check that segmented deque iterators work properly + int sizes[] = {0, 1, 2, 1023, 1024, 1025, 2047, 2048, 2049}; + for (const int size : sizes) { + std::deque d(size); + int index = 0; + + std::ranges::for_each_n(d.begin(), d.size(), deque_test(d, index)); + } +} + constexpr bool test() { test_iterator>(); test_iterator>(); @@ -89,6 +118,19 @@ constexpr bool test() { assert(a[2].other == 6); } + if (!TEST_IS_CONSTANT_EVALUATED) // TODO: Use TEST_STD_AT_LEAST_26_OR_RUNTIME_EVALUATED when std::deque is made constexpr + test_segmented_deque_iterator(); + + { + std::vector> vec = {{0}, {1, 2}, {3, 4, 5}, {6, 7, 8, 9}, {10}, {11, 12, 13}}; + auto v = vec | std::views::join; + std::ranges::for_each_n( + v.begin(), + std::ranges::distance(v), + [i = 0](int x) mutable { assert(x == 2 * i++); }, + [](int x) { return 2 * x; }); + } + return true; }