|
1 | 1 | #include <algorithm> |
2 | 2 | #include <functional> |
3 | 3 | #include <iostream> |
| 4 | +#include <numeric> |
4 | 5 |
|
5 | 6 | /// N is divisible by common VFs. Used to test vector code path without the |
6 | 7 | /// scalar epilogue. |
@@ -73,6 +74,68 @@ checkVectorFunction(TestFnTy<Ty> ScalarFn, TestFnTy<Ty> VectorFn, |
73 | 74 | } |
74 | 75 | } |
75 | 76 |
|
| 77 | + |
| 78 | +// Test function types for two arrays, one used for stores and the other |
| 79 | +// for loop control |
| 80 | +template <typename Ty> |
| 81 | +using InitFnStTy = |
| 82 | + std::function<void(Ty *, Ty *)>; |
| 83 | +template <typename Ty> |
| 84 | +using TestFnStTy = |
| 85 | + std::function<Ty(InitFnStTy<Ty>)>; |
| 86 | + |
| 87 | +template <typename Ty> |
| 88 | +static void |
| 89 | +checkVectorWithStoresFunction(TestFnStTy<Ty> ScalarFn, TestFnStTy<Ty> VectorFn, |
| 90 | + TestFnStTy<Ty> ForcedVectorFn, TestFnStTy<Ty> InterleavedFn, |
| 91 | + TestFnStTy<Ty> InterleavedOnlyFn, const char *Name) { |
| 92 | + std::cout << "Checking " << Name << "\n"; |
| 93 | + |
| 94 | + std::array Tests = {std::make_pair(VectorFn, "autovec"), |
| 95 | + std::make_pair(ForcedVectorFn, "vector-forced"), |
| 96 | + std::make_pair(InterleavedFn, "interleave-forced"), |
| 97 | + std::make_pair(InterleavedOnlyFn, "interleave-only")}; |
| 98 | + |
| 99 | + // Check finding the target element at all indices between 0 and N. |
| 100 | + for (unsigned IdxToFind = 0; IdxToFind < N; ++IdxToFind) { |
| 101 | + // Lambda to initialize all array elements to one, except the one to look |
| 102 | + // for to zero. |
| 103 | + auto Init1 = [IdxToFind](Ty *Data, Ty *Pred) { |
| 104 | + std::fill_n(Pred, N, 1); |
| 105 | + Pred[IdxToFind] = 0; |
| 106 | + std::fill_n(Data, N, 0); |
| 107 | + }; |
| 108 | + |
| 109 | + // Lambda to initialize all array elements to one, except the one to look |
| 110 | + // for and the IdxToFind + 3 to zero. |
| 111 | + auto Init2 = [IdxToFind](Ty *Data, Ty *Pred) { |
| 112 | + std::fill_n(Pred, N, 1); |
| 113 | + Pred[IdxToFind] = 0; |
| 114 | + if (IdxToFind + 3 < N) |
| 115 | + Pred[IdxToFind + 3] = 0; |
| 116 | + std::fill_n(Data, N, 0); |
| 117 | + }; |
| 118 | + |
| 119 | + auto Reference1 = ScalarFn(Init1); |
| 120 | + auto Reference2 = ScalarFn(Init2); |
| 121 | + // Run vector functions and check against the scalar result. |
| 122 | + for (const auto &[Fn, Name] : Tests) { |
| 123 | + auto ToCheck1 = callThroughOptnone(Fn, Init1); |
| 124 | + if (Reference1 != ToCheck1) { |
| 125 | + std::cerr << "Miscompare for " << Name << ": " << Reference1 |
| 126 | + << " != " << ToCheck1 << "\n"; |
| 127 | + exit(1); |
| 128 | + } |
| 129 | + auto ToCheck2 = callThroughOptnone(Fn, Init2); |
| 130 | + if (Reference2 != ToCheck2) { |
| 131 | + std::cerr << "Miscompare for " << Name << ": " << Reference2 |
| 132 | + << " != " << ToCheck2 << "\n"; |
| 133 | + exit(1); |
| 134 | + } |
| 135 | + } |
| 136 | + } |
| 137 | +} |
| 138 | + |
76 | 139 | // Test function type for three arrays |
77 | 140 | template <typename Ty> |
78 | 141 | using TestFnTy3 = |
@@ -248,6 +311,34 @@ checkVectorFunctionMulti(TestFnTy3<Ty> ScalarFn, TestFnTy3<Ty> VectorFn, |
248 | 311 | _Pragma("clang loop vectorize_width(1) interleave_count(4)") Loop \ |
249 | 312 | }; |
250 | 313 |
|
| 314 | +/// Define test functions for single early exits with stores |
| 315 | +#define DEFINE_EARLY_EXIT_WITH_STORES(Ty, Init, Data, Pred, Loop) \ |
| 316 | + auto ScalarFn = [](InitFnStTy<Ty> II) -> Ty { \ |
| 317 | + Init; \ |
| 318 | + II(Data, Pred); \ |
| 319 | + _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \ |
| 320 | + }; \ |
| 321 | + auto VectorFn = [](InitFnStTy<Ty> II) -> Ty { \ |
| 322 | + Init; \ |
| 323 | + II(Data, Pred); \ |
| 324 | + _Pragma("clang loop vectorize(enable)") Loop \ |
| 325 | + }; \ |
| 326 | + auto ForcedVectorFn = [](InitFnStTy<Ty> II) -> Ty { \ |
| 327 | + Init; \ |
| 328 | + II(Data, Pred); \ |
| 329 | + _Pragma("clang loop vectorize_width(8) interleave_count(1)") Loop \ |
| 330 | + }; \ |
| 331 | + auto InterleavedFn = [](InitFnStTy<Ty> II) -> Ty { \ |
| 332 | + Init; \ |
| 333 | + II(Data, Pred); \ |
| 334 | + _Pragma("clang loop vectorize(enable) interleave_count(4)") Loop \ |
| 335 | + }; \ |
| 336 | + auto InterleavedOnlyFn = [](InitFnStTy<Ty> II) -> Ty { \ |
| 337 | + Init; \ |
| 338 | + II(Data, Pred); \ |
| 339 | + _Pragma("clang loop vectorize_width(1) interleave_count(4)") Loop \ |
| 340 | + }; |
| 341 | + |
251 | 342 | int main(void) { |
252 | 343 | { |
253 | 344 | DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT( |
@@ -454,5 +545,85 @@ int main(void) { |
454 | 545 | "two_early_exits_small_different_values"); |
455 | 546 | } |
456 | 547 |
|
| 548 | + { |
| 549 | + DEFINE_EARLY_EXIT_WITH_STORES( |
| 550 | + int, |
| 551 | + int Data[N]; int Pred[N]; |
| 552 | + , Data, Pred, for (unsigned I = 0; I < N; I++) { |
| 553 | + Data[I] = I; |
| 554 | + if (Pred[I] == 0) |
| 555 | + break; |
| 556 | + } return std::reduce(Data, Data + N);); |
| 557 | + checkVectorWithStoresFunction<int>(ScalarFn, VectorFn, ForcedVectorFn, |
| 558 | + InterleavedFn, InterleavedOnlyFn, "exit_after_store"); |
| 559 | + } |
| 560 | + |
| 561 | + { |
| 562 | + DEFINE_EARLY_EXIT_WITH_STORES( |
| 563 | + int, |
| 564 | + int Data[N]; int Pred[N]; |
| 565 | + , Data, Pred, for (unsigned I = 0; I < N; I++) { |
| 566 | + if (Pred[I] == 0) |
| 567 | + break; |
| 568 | + Data[I] = I; |
| 569 | + } return std::reduce(Data, Data + N);); |
| 570 | + checkVectorWithStoresFunction<int>(ScalarFn, VectorFn, ForcedVectorFn, |
| 571 | + InterleavedFn, InterleavedOnlyFn, "exit_before_store"); |
| 572 | + } |
| 573 | + |
| 574 | + { |
| 575 | + DEFINE_EARLY_EXIT_WITH_STORES( |
| 576 | + int, |
| 577 | + int Data[N]; int Pred[N]; |
| 578 | + , Data, Pred, for (unsigned I = 0; I < N; I++) { |
| 579 | + Data[I] = I; |
| 580 | + if (Pred[I] == 0) |
| 581 | + break; |
| 582 | + Data[I] = I + 3; |
| 583 | + } return std::reduce(Data, Data + N);); |
| 584 | + checkVectorWithStoresFunction<int>(ScalarFn, VectorFn, ForcedVectorFn, |
| 585 | + InterleavedFn, InterleavedOnlyFn, "exit_between_stores"); |
| 586 | + } |
| 587 | + |
| 588 | + { |
| 589 | + DEFINE_EARLY_EXIT_WITH_STORES( |
| 590 | + int, |
| 591 | + int Data[N]; int Pred[N]; unsigned I; |
| 592 | + , Data, Pred, for (I = 0; I < N; I++) { |
| 593 | + Data[I] = I; |
| 594 | + if (Pred[I] == 0) |
| 595 | + break; |
| 596 | + } return I + std::reduce(Data, Data + N);); |
| 597 | + checkVectorWithStoresFunction<int>(ScalarFn, VectorFn, ForcedVectorFn, |
| 598 | + InterleavedFn, InterleavedOnlyFn, |
| 599 | + "exit_after_store_live_out"); |
| 600 | + } |
| 601 | + |
| 602 | + { |
| 603 | + DEFINE_EARLY_EXIT_WITH_STORES( |
| 604 | + float, |
| 605 | + float Data[N]; float Pred[N]; |
| 606 | + , Data, Pred, for (unsigned I = 0; I < N; I++) { |
| 607 | + Data[I] = I; |
| 608 | + if (Pred[I] == 0.0f) |
| 609 | + break; |
| 610 | + } return std::reduce(Data, Data + N);); |
| 611 | + checkVectorWithStoresFunction<float>(ScalarFn, VectorFn, ForcedVectorFn, |
| 612 | + InterleavedFn, InterleavedOnlyFn, "exit_after_float_store"); |
| 613 | + } |
| 614 | + |
| 615 | + { |
| 616 | + DEFINE_EARLY_EXIT_WITH_STORES( |
| 617 | + int, int Data[N]; int Pred[N]; unsigned I; |
| 618 | + , Data, Pred, for (I = 0; I < N; I++) { |
| 619 | + Data[I] = Data[I] * 3 + 5; |
| 620 | + if (Pred[I] == 0) |
| 621 | + break; |
| 622 | + } return I + std::reduce(Data, Data + N);); |
| 623 | + checkVectorWithStoresFunction<int>(ScalarFn, VectorFn, ForcedVectorFn, |
| 624 | + InterleavedFn, InterleavedOnlyFn, |
| 625 | + "exit_after_load_update_store"); |
| 626 | + } |
| 627 | + |
457 | 628 | return 0; |
458 | 629 | } |
0 commit comments