diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a97f27093..ae8f287a8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,9 +10,9 @@ jobs: sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" sudo wget https://apt.llvm.org/llvm.sh sudo chmod +x llvm.sh - sudo ./llvm.sh 16 all + sudo ./llvm.sh 17 all sudo apt install g++-12 libgtest-dev ninja-build pkg-config cmake gcovr - sudo ln -s $(which opt-16) /usr/local/bin/opt + sudo ln -s $(which opt-17) /usr/local/bin/opt - run: cmake -G Ninja -S test -B build/test -DUSE_SANITIZER='Undefined' -DENABLE_TEST_COVERAGE=1 -DCMAKE_BUILD_TYPE=Debug -DENABLE_LLD=OFF env: CXX: g++-12 @@ -22,7 +22,7 @@ jobs: CTEST_OUTPUT_ON_FAILURE: 1 - run: cmake -G Ninja -S test -B builddirclang/test -DCMAKE_BUILD_TYPE=RelWithDebInfo -DUSE_SANITIZER='Undefined' -DCMAKE_PREFIX_PATH=/usr/local -DENABLE_LLD=OFF env: - CXX: clang++-16 + CXX: clang++-17 - run: cmake --build builddirclang/test - run: cmake --build builddirclang/test --target test env: @@ -37,7 +37,7 @@ jobs: # - uses: actions/checkout@v3 # - run: sudo xcode-select --switch /Library/Developer/CommandLineTools # - run: echo $(pkgutil --pkg-info=com.apple.pkg.CLTools_Executables) - # - run: brew install llvm@16 ninja pkg-config cmake gcovr # gcc + # - run: brew install llvm@17 ninja pkg-config cmake gcovr # gcc # - run: echo "/usr/local/opt/llvm/bin" >> $GITHUB_PATH # - run: echo $(which clang++) # - run: cmake -G Ninja -S test -B build/test -DUSE_SANITIZER='Undefined' -DCMAKE_PREFIX_PATH=/usr/local -DCMAKE_BUILD_TYPE=Debug -DENABLE_LLD=OFF diff --git a/.gitignore b/.gitignore index 5461dd8e0..f5a066a46 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,5 @@ latex/ html/ coverage.* coverage-final.json -Testing \ No newline at end of file +Testing +compile_commands.json diff --git a/CMakeLists.txt b/CMakeLists.txt index bf5ec5a04..663dacf44 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,7 @@ CPMAddPackage("gh:TheLartians/PackageProject.cmake@1.8.0") # clang;clang-tools-extra;lld;lldb;polly;pstl" "LLVM_ENABLE_RUNTIMES all" "LLVM_ENABLE_RTTI OFF" # "BUILD_SHARED_LIBS OFF" "LLVM_CCACHE_BUILD ON" "LLVM_OPTIMIZED_TABLEGEN ON" "LLVM_ENABLE_LTO ON" # "LLVM_ENABLE_Z3_SOLVER ON" ) -find_package(LLVM 16 REQUIRED CONFIG) +find_package(LLVM 17 REQUIRED CONFIG) list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR}) # include(AddLLVM) include(${LLVM_DIR}/AddLLVM.cmake) @@ -85,7 +85,7 @@ add_library(${PROJECT_NAME} MODULE ${headers} ${sources}) set(CXX_STANDARD_REQUIRED ON) set_target_properties( ${PROJECT_NAME} - PROPERTIES CXX_STANDARD 20 + PROPERTIES CXX_STANDARD 23 CXX_VISIBILITY_PRESET hidden VISIBILITY_INLINES_HIDDEN ON ) @@ -186,5 +186,5 @@ packageProject( INCLUDE_DESTINATION include/${PROJECT_NAME}-${PROJECT_VERSION} VERSION_HEADER "${VERSION_HEADER_LOCATION}" COMPATIBILITY SameMinorVersion - DEPENDENCIES "LLVM 15.0.6" + DEPENDENCIES "LLVM 17.0.1" ) diff --git a/Doxyfile b/Doxyfile index cbabce903..709946500 100644 --- a/Doxyfile +++ b/Doxyfile @@ -86,7 +86,7 @@ CREATE_SUBDIRS = NO # level increment doubles the number of directories, resulting in 4096 # directories at level 8 which is the default and also the maximum value. The # sub-directories are organized in 2 levels, the first level always has a fixed -# numer of 16 directories. +# number of 16 directories. # Minimum value: 0, maximum value: 8, default value: 8. # This tag requires that the tag CREATE_SUBDIRS is set to YES. diff --git a/LICENSE b/LICENSE index 94472c35c..2c88274e8 100644 --- a/LICENSE +++ b/LICENSE @@ -1,222 +1,26 @@ -============================================================================== -The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: -============================================================================== +MIT License - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ +Copyright (c) 2021-2024: Chris Elrod, Yingbo Ma, JuliaHub, and other contributors: https://github.com/LoopModels/LoopModels/graphs/contributors - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: - 1. Definitions. +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. +end of terms and conditions - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - ----- LLVM Exceptions to the Apache 2.0 License ---- - -As an exception, if, as a result of your compiling your source code, portions -of this Software are embedded into an Object form of such source code, you -may redistribute such embedded portions in such Object form without complying -with the conditions of Sections 4(a), 4(b) and 4(d) of the License. - -In addition, if you combine or link compiled forms of this Software with -software that is licensed under the GPLv2 ("Combined Software") and if a -court of competent jurisdiction determines that the patent provision (Section -3), the indemnity provision (Section 9) or other Section of the License -conflicts with the conditions of the GPLv2, you may retroactively and -prospectively choose to deem waived or otherwise exclude such Section(s) of -the License, but only in their entirety and only with respect to the Combined -Software. +Please see [THIRDPARTY.md](./THIRDPARTY.md) for license information for other software used in this project. diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 5b707b3bc..13ffd9b66 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.23) project(LoopModelsBenchmarks LANGUAGES C CXX) option(ENABLE_NATIVE_COMPILATION "Compile with -march=native" ON) +option(ENABLE_OPENMP "Use OpenMP for a multithreading benchmark" OFF) # --- Import tools ---- @@ -33,10 +34,17 @@ CPMAddPackage( GIT_TAG fnoexceptions SYSTEM TRUE ) +FetchContent_Declare( + Math + GIT_REPOSITORY git@github.com:LoopModels/Math.git + GIT_TAG origin/main +) +FetchContent_MakeAvailable(Math) + # file(GLOB_RECURSE headers CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/include/*.hpp) file(GLOB benchmarks CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/*.cpp) -find_package(LLVM 16 REQUIRED CONFIG) +find_package(LLVM 17 REQUIRED CONFIG) list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR}) include(${LLVM_DIR}/AddLLVM.cmake) # message(STATUS "headers: ${headers}") add_executable(${PROJECT_NAME} ${headers} ${benchmarks}) @@ -50,10 +58,12 @@ target_include_directories(${PROJECT_NAME} SYSTEM PRIVATE ${LLVM_INCLUDE_DIRS}) target_include_directories( ${PROJECT_NAME} PRIVATE ${PROJECT_SOURCE_DIR}/../include ${PROJECT_SOURCE_DIR}/include ) -find_package(OpenMP) +if(ENABLE_OPENMP) + find_package(OpenMP) + target_link_libraries(${PROJECT_NAME} OpenMP::OpenMP_CXX) +endif() target_link_libraries( - ${PROJECT_NAME} PRIVATE benchmark::benchmark LLVM unordered_dense::unordered_dense - OpenMP::OpenMP_CXX + ${PROJECT_NAME} PRIVATE benchmark::benchmark LLVM unordered_dense::unordered_dense Math ) if((CMAKE_CXX_COMPILER_ID MATCHES "Clang") OR (CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM")) @@ -78,7 +88,7 @@ if(ENABLE_NATIVE_COMPILATION) endif() set_target_properties( ${PROJECT_NAME} - PROPERTIES CXX_STANDARD 20 + PROPERTIES CXX_STANDARD 23 CXX_VISIBILITY_PRESET hidden VISIBILITY_INLINES_HIDDEN ON ) @@ -103,10 +113,12 @@ target_compile_options( -Wextra -save-temps ) -if(CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM") - target_compile_options(${PROJECT_NAME} PRIVATE -fiopenmp) -else() - target_compile_options(${PROJECT_NAME} PRIVATE -fopenmp) +if(ENABLE_OPENMP) + if(CMAKE_CXX_COMPILER_ID MATCHES "IntelLLVM") + target_compile_options(${PROJECT_NAME} PRIVATE -fiopenmp) + else() + target_compile_options(${PROJECT_NAME} PRIVATE -fopenmp) + endif() endif() if(CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)") target_compile_options(${PROJECT_NAME} PRIVATE -masm=intel) diff --git a/benchmark/include/constraint_pruning_benchmark.hpp b/benchmark/include/constraint_pruning_benchmark.hpp index 06210a395..e2a2a0493 100644 --- a/benchmark/include/constraint_pruning_benchmark.hpp +++ b/benchmark/include/constraint_pruning_benchmark.hpp @@ -1,63 +1,66 @@ #pragma once -#include "Math/NormalForm.hpp" -#include "Math/Orthogonalize.hpp" -#include "MatrixStringParse.hpp" +#include +#include +#include #include #include #include #include +using poly::math::Vector, poly::math::IntMatrix, poly::math::Row, + poly::math::Col, poly::math::DenseDims, poly::math::_, + poly::utils::operator""_mat; static void BM_NullSpace(benchmark::State &state) { - IntMatrix B(DenseDims{Row{6}, Col{3}}); - B(0, 0) = 1; - B(1, 0) = 0; - B(2, 0) = -3; - B(3, 0) = 0; - B(4, 0) = 2; - B(5, 0) = -8; + IntMatrix<> B(poly::math::DenseDims{Row<>{6}, Col<>{3}}); + B[0, 0] = 1; + B[1, 0] = 0; + B[2, 0] = -3; + B[3, 0] = 0; + B[4, 0] = 2; + B[5, 0] = -8; - B(0, 1) = 0; - B(1, 1) = 1; - B(2, 1) = 5; - B(3, 1) = 0; - B(4, 1) = -1; - B(5, 1) = 4; + B[0, 1] = 0; + B[1, 1] = 1; + B[2, 1] = 5; + B[3, 1] = 0; + B[4, 1] = -1; + B[5, 1] = 4; - B(0, 2) = 0; - B(1, 2) = 0; - B(2, 2) = 0; - B(3, 2) = 1; - B(4, 2) = 7; - B(5, 2) = -9; + B[0, 2] = 0; + B[1, 2] = 0; + B[2, 2] = 0; + B[3, 2] = 1; + B[4, 2] = 7; + B[5, 2] = -9; // fourth row is 0 // std::cout << "B=\n" << B << "\nnullSpace(B) =\n" << // NormalForm::nullSpace(B) << std::endl; - IntMatrix A; - for (auto b : state) A = NormalForm::nullSpace(B); + poly::math::IntMatrix<> A; + for (auto b : state) A = poly::math::NormalForm::nullSpace(B); } // Register the function as a benchmark BENCHMARK(BM_NullSpace); static void BM_NullSpace2000(benchmark::State &state) { const size_t N = 20; - IntMatrix A(DenseDims{Row{N}, Col{N}}); + IntMatrix<> A(DenseDims{Row<>{N}, Col<>{N}}); A << 0; - A(0, 0) = 2; + A[0, 0] = 2; for (size_t i = 1; i < N; ++i) { - A(i - 1, i) = -1; - A(i, i) = 2; - A(i, i - 1) = -1; + A[i - 1, i] = -1; + A[i, i] = 2; + A[i, i - 1] = -1; } for (size_t j = 0; j < N; j += 8) { - A(j, _) << 0; - for (size_t i = 0; i < N; i += 7) A(j, _) += ((i & 1) ? 1 : -1) * A(i, _); + A[j, _] << 0; + for (size_t i = 0; i < N; i += 7) A[j, _] += ((i & 1) ? 1 : -1) * A[i, _]; } // fourth row is 0 - IntMatrix NS; - for (auto b : state) NS = NormalForm::nullSpace(A); + IntMatrix<> NS; + for (auto b : state) NS = poly::math::NormalForm::nullSpace(A); // std::cout << "NS.size() = (" << NS.numRow() << ", " << NS.numCol() << ")" // << std::endl; } @@ -65,39 +68,38 @@ static void BM_NullSpace2000(benchmark::State &state) { BENCHMARK(BM_NullSpace2000); static void BM_Orthogonalize(benchmark::State &state) { - IntMatrix A = + IntMatrix<> A = "[-2 2 0 1 1 1 2; 3 -3 2 3 2 3 2; -3 0 2 3 -2 0 1; 2 1 0 -1 3 -1 1; 1 -3 -3 -2 2 -2 2; 0 0 1 2 -3 -2 -2; 0 -3 -2 -1 1 0 1]"_mat; - IntMatrix B; + IntMatrix<> B; for (auto b : state) B = orthogonalize(A); } BENCHMARK(BM_Orthogonalize); static void BM_Bareiss2000(benchmark::State &state) { const size_t N = 20; - IntMatrix A(DenseDims{Row{N}, Col{N}}); + IntMatrix<> A(DenseDims{Row<>{N}, Col<>{N}}); A << 0; - A(0, 0) = 2; + A[0, 0] = 2; for (size_t i = 1; i < N; ++i) { - A(i - 1, i) = -1; - A(i, i) = 2; - A(i, i - 1) = -1; + A[i - 1, i] = -1; + A[i, i] = 2; + A[i, i - 1] = -1; } for (size_t j = 0; j < N; j += 8) { - // A(j,:) - for (size_t i = 0; i < N; ++i) A(j, i) = 0; + A[j, _] << 0; for (size_t i = 0; i < N; i += 7) { int64_t s = (i & 1) ? 1 : -1; - for (size_t k = 0; k < N; ++k) A(j, k) += s * A(i, k); + A[j, _] += s * A[i, _]; } } // std::cout << A << std::endl; // fourth row is 0 - Vector pivots(N); - IntMatrix B; + Vector pivots(N); + IntMatrix<> B; for (auto b : state) { B = A; - NormalForm::bareiss(B, pivots); + poly::math::NormalForm::bareiss(B, pivots); } // std::cout << "NS.size() = (" << NS.numRow() << ", " << NS.numCol() << ")" // << std::endl; diff --git a/benchmark/include/map_benchmark.hpp b/benchmark/include/map_benchmark.hpp index b1cca726f..b385e4edb 100644 --- a/benchmark/include/map_benchmark.hpp +++ b/benchmark/include/map_benchmark.hpp @@ -1,181 +1,222 @@ #pragma once -#include "Containers/BumpMapSet.hpp" +#include "Alloc/Arena.hpp" +#include "Dicts/BumpMapSet.hpp" #include "Dicts/BumpVector.hpp" -#include "Utilities/Allocators.hpp" +#include "Dicts/Trie.hpp" #include #include -#include #include -#include #include #include #include +template struct TrieWrap { + D d; + poly::alloc::Arena<> *alloc; + + template auto operator[](const K &k) -> auto & { + return d[alloc, k]; + }; + template void erase(const K &k) { d.erase(k); } +}; + +inline auto randvp(std::mt19937_64 &rng, uint64_t mask) { + return reinterpret_cast((rng() & mask) | 8); +} + template -void InsertLookup2(std::mt19937_64 &mt, D &map, uint64_t mask) { +void InsertLookup2(std::mt19937_64 &rng, D &map, uint64_t mask) { for (uint64_t i = 0; i < 256; ++i) { - map[reinterpret_cast(mt() & mask)] += - i + map[reinterpret_cast(mt() & mask)]; + void *p0 = randvp(rng, mask); + void *p1 = randvp(rng, mask); + map[p0] += i + map[p1]; } } template -void InsertErase(std::mt19937_64 &mt, D &map, uint64_t mask) { +void InsertErase(std::mt19937_64 &rng, D &map, uint64_t mask) { for (uint64_t i = 0; i < 256; ++i) { - map[reinterpret_cast(mt() & mask)] = i; - map.erase(reinterpret_cast(mt() & mask)); + void *p0 = randvp(rng, mask); + void *p1 = randvp(rng, mask); + map[p0] = i; + map.erase(p1); } } template -void InsertLookup3(std::mt19937_64 &mt, D &map, uint64_t mask) { +void InsertLookup3(std::mt19937_64 &rng, D &map, uint64_t mask) { for (uint64_t i = 0; i < 256; ++i) { - map[reinterpret_cast(mt() & mask)] += - map[reinterpret_cast(mt() & mask)] + - map[reinterpret_cast(mt() & mask)]; + void *p0 = randvp(rng, mask); + void *p1 = randvp(rng, mask); + void *p2 = randvp(rng, mask); + map[p0] += map[p1] + map[p2]; } } static void BM_llvmDenseMapInsertErase(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng{}; for (auto b : state) { llvm::DenseMap map{}; - InsertErase(mt, map, mask); + InsertErase(rng, map, mask); } } BENCHMARK(BM_llvmDenseMapInsertErase)->DenseRange(2, 8, 1); static void BM_llvmSmallDenseMapInsertErase(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { llvm::SmallDenseMap map{}; - InsertErase(mt, map, mask); + InsertErase(rng, map, mask); } } BENCHMARK(BM_llvmSmallDenseMapInsertErase)->DenseRange(2, 8, 1); static void BM_BumpMapInsertErase(benchmark::State &state) { - OwningArena<> alloc; - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + poly::alloc::OwningArena<> alloc; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { - amap map{alloc}; - InsertErase(mt, map, mask); + poly::dict::amap map{&alloc}; + InsertErase(rng, map, mask); alloc.reset(); } } BENCHMARK(BM_BumpMapInsertErase)->DenseRange(2, 8, 1); +static void BM_TrieInsertErase(benchmark::State &state) { + poly::alloc::OwningArena<> alloc; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; + for (auto b : state) { + TrieWrap> map{{}, &alloc}; + InsertErase(rng, map, mask); + alloc.reset(); + } +} +BENCHMARK(BM_TrieInsertErase)->DenseRange(2, 8, 1); + +static void BM_InlineTrieInsertErase(benchmark::State &state) { + poly::alloc::OwningArena<> alloc; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; + for (auto b : state) { + TrieWrap> map{{}, &alloc}; + InsertErase(rng, map, mask); + alloc.reset(); + } +} +BENCHMARK(BM_InlineTrieInsertErase)->DenseRange(2, 8, 1); + static void BM_ankerlMapInsertErase(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { ankerl::unordered_dense::map map; - InsertErase(mt, map, mask); + InsertErase(rng, map, mask); } } BENCHMARK(BM_ankerlMapInsertErase)->DenseRange(2, 8, 1); static void BM_stdUnorderedMapInsertErase(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { std::unordered_map map; - InsertErase(mt, map, mask); + InsertErase(rng, map, mask); } } BENCHMARK(BM_stdUnorderedMapInsertErase)->DenseRange(2, 8, 1); static void BM_llvmDenseMapInsertLookup(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { llvm::DenseMap map{}; - InsertLookup2(mt, map, mask); + InsertLookup2(rng, map, mask); } } BENCHMARK(BM_llvmDenseMapInsertLookup)->DenseRange(2, 8, 1); static void BM_llvmSmallDenseMapInsertLookup(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { llvm::SmallDenseMap map{}; - InsertLookup2(mt, map, mask); + InsertLookup2(rng, map, mask); } } BENCHMARK(BM_llvmSmallDenseMapInsertLookup)->DenseRange(2, 8, 1); static void BM_BumpMapInsertLookup(benchmark::State &state) { - OwningArena<> alloc; - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + poly::alloc::OwningArena<> alloc; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { - amap map{alloc}; - InsertLookup2(mt, map, mask); + poly::dict::amap map{&alloc}; + InsertLookup2(rng, map, mask); alloc.reset(); } } BENCHMARK(BM_BumpMapInsertLookup)->DenseRange(2, 8, 1); static void BM_ankerlMapInsertLookup(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { ankerl::unordered_dense::map map; - InsertLookup2(mt, map, mask); + InsertLookup2(rng, map, mask); } } BENCHMARK(BM_ankerlMapInsertLookup)->DenseRange(2, 8, 1); static void BM_stdUnorderedMapInsertLookup(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { std::unordered_map map; - InsertLookup2(mt, map, mask); + InsertLookup2(rng, map, mask); } } BENCHMARK(BM_stdUnorderedMapInsertLookup)->DenseRange(2, 8, 1); static void BM_llvmDenseMapInsertLookup3(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { llvm::DenseMap map{}; - InsertLookup3(mt, map, mask); + InsertLookup3(rng, map, mask); } } BENCHMARK(BM_llvmDenseMapInsertLookup3)->DenseRange(2, 8, 1); static void BM_llvmSmallDenseMapInsertLookup3(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { llvm::SmallDenseMap map{}; - InsertLookup3(mt, map, mask); + InsertLookup3(rng, map, mask); } } BENCHMARK(BM_llvmSmallDenseMapInsertLookup3)->DenseRange(2, 8, 1); static void BM_BumpMapInsertLookup3(benchmark::State &state) { - OwningArena<> alloc; - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + poly::alloc::OwningArena<> alloc; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { - amap map{alloc}; - InsertLookup3(mt, map, mask); + poly::dict::amap map{&alloc}; + InsertLookup3(rng, map, mask); alloc.reset(); } } BENCHMARK(BM_BumpMapInsertLookup3)->DenseRange(2, 8, 1); static void BM_ankerlMapInsertLookup3(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { ankerl::unordered_dense::map map; - InsertLookup3(mt, map, mask); + InsertLookup3(rng, map, mask); } } BENCHMARK(BM_ankerlMapInsertLookup3)->DenseRange(2, 8, 1); static void BM_stdUnorderedMapInsertLookup3(benchmark::State &state) { - uint64_t mask = ((1ull << state.range(0)) - 1) << 3ull; - std::mt19937_64 mt; + uint64_t mask = ((1ULL << state.range(0)) - 1) << 4ULL; + std::mt19937_64 rng; for (auto b : state) { std::unordered_map map; - InsertLookup3(mt, map, mask); + InsertLookup3(rng, map, mask); } } BENCHMARK(BM_stdUnorderedMapInsertLookup3)->DenseRange(2, 8, 1); @@ -205,9 +246,9 @@ static void BM_llvmSmallDenseMapSeq(benchmark::State &state) { BENCHMARK(BM_llvmSmallDenseMapSeq)->RangeMultiplier(2)->Range(1 << 2, 1 << 10); static void BM_BumpMapSeq(benchmark::State &state) { - OwningArena<> alloc; + poly::alloc::OwningArena<> alloc; for (auto b : state) { - amap map{alloc}; + poly::dict::amap map{&alloc}; for (uint64_t i = 1; i <= uint64_t(state.range(0)); ++i) map[reinterpret_cast(8 * i)] = i; for (uint64_t i = 1; i <= uint64_t(state.range(0)); ++i) diff --git a/benchmark/include/matrix_exp.hpp b/benchmark/include/matrix_exp.hpp index 213499ad8..84ada1e17 100644 --- a/benchmark/include/matrix_exp.hpp +++ b/benchmark/include/matrix_exp.hpp @@ -1,11 +1,12 @@ #pragma once -#include "Containers/TinyVector.hpp" -#include "Math/Array.hpp" -#include "Math/LinearAlgebra.hpp" -#include "Math/Matrix.hpp" -#include "Math/StaticArrays.hpp" -#include "Utilities/Invariant.hpp" +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -13,154 +14,28 @@ #include #include -template class Dual { - T val{}; - SVector partials{}; +using poly::math::Dual, poly::math::Vector, poly::containers::TinyVector, + poly::math::SquareMatrix, poly::math::AbstractMatrix, poly::math::SquareDims, + poly::math::I, poly::utils::eltype_t, poly::utils::invariant; -public: - using val_type = T; - static constexpr size_t num_partials = N; - constexpr Dual() = default; - constexpr Dual(T v) : val(v) {} - constexpr Dual(T v, size_t n) : val(v) { partials[n] = T{1}; } - constexpr Dual(T v, SVector g) : val(v), partials(g) {} - constexpr Dual(std::integral auto v) : val(v) {} - constexpr Dual(std::floating_point auto v) : val(v) {} - constexpr auto value() -> T & { return val; } - constexpr auto gradient() -> SVector & { return partials; } - [[nodiscard]] constexpr auto value() const -> const T & { return val; } - [[nodiscard]] constexpr auto gradient() const -> const SVector & { - return partials; - } - // constexpr auto operator[](size_t i) const -> T { return grad[i]; } - // constexpr auto operator[](size_t i) -> T & { return grad[i]; } - constexpr auto operator-() const -> Dual { return Dual(-val, -partials); } - constexpr auto operator+(const Dual &other) const -> Dual { - return {val + other.val, partials + other.partials}; - } - constexpr auto operator-(const Dual &other) const -> Dual { - return {val - other.val, partials - other.partials}; - } - constexpr auto operator*(const Dual &other) const -> Dual { - return {val * other.val, val * other.partials + other.val * partials}; - } - constexpr auto operator/(const Dual &other) const -> Dual { - return {val / other.val, (other.val * partials - val * other.partials) / - (other.val * other.val)}; - } - constexpr auto operator+=(const Dual &other) -> Dual & { - val += other.val; - partials += other.partials; - return *this; - } - constexpr auto operator-=(const Dual &other) -> Dual & { - val -= other.val; - partials -= other.partials; - return *this; - } - constexpr auto operator*=(const Dual &other) -> Dual & { - val *= other.val; - partials = val * other.partials + other.val * partials; - return *this; - } - constexpr auto operator/=(const Dual &other) -> Dual & { - val /= other.val; - partials = - (other.val * partials - val * other.partials) / (other.val * other.val); - return *this; - } - constexpr auto operator+(double other) const -> Dual { - return {val + other, partials}; - } - constexpr auto operator-(double other) const -> Dual { - return {val - other, partials}; - } - constexpr auto operator*(double other) const -> Dual { - return {val * other, other * partials}; - } - constexpr auto operator/(double other) const -> Dual { - return {val / other, partials / other}; - } - constexpr auto operator+=(double other) -> Dual & { - val += other; - return *this; - } - constexpr auto operator-=(double other) -> Dual & { - val -= other; - return *this; - } - constexpr auto operator*=(double other) -> Dual & { - val *= other; - partials *= other; - return *this; - } - constexpr auto operator/=(double other) -> Dual & { - val /= other; - partials /= other; - return *this; - } - constexpr auto operator==(const Dual &other) const -> bool { - return val == other.val; // && grad == other.grad; - } - constexpr auto operator!=(const Dual &other) const -> bool { - return val != other.val; // || grad != other.grad; - } - constexpr auto operator==(double other) const -> bool { return val == other; } - constexpr auto operator!=(double other) const -> bool { return val != other; } - constexpr auto operator<(double other) const -> bool { return val < other; } - constexpr auto operator>(double other) const -> bool { return val > other; } - constexpr auto operator<=(double other) const -> bool { return val <= other; } - constexpr auto operator>=(double other) const -> bool { return val >= other; } - constexpr auto operator<(const Dual &other) const -> bool { - return val < other.val; - } - constexpr auto operator>(const Dual &other) const -> bool { - return val > other.val; - } - constexpr auto operator<=(const Dual &other) const -> bool { - return val <= other.val; - } - constexpr auto operator>=(const Dual &other) const -> bool { - return val >= other.val; - } -}; -template Dual(T, SVector) -> Dual; - -template -constexpr auto operator+(double other, Dual x) -> Dual { - return {x.value() + other, x.gradient()}; -} -template -constexpr auto operator-(double other, Dual x) -> Dual { - return {x.value() - other, -x.gradient()}; -} -template -constexpr auto operator*(double other, Dual x) -> Dual { - return {x.value() * other, other * x.gradient()}; -} -template -constexpr auto operator/(double other, Dual x) -> Dual { - return {other / x.value(), -other * x.gradient() / (x.value() * x.value())}; -} -static_assert(ElementOf, 2>>>); // auto x = Dual, 2>{1.0}; // auto y = x * 3.4; static_assert(std::convertible_to>); static_assert(std::convertible_to, 2>>); -template struct URand { - using T = typename D::val_type; - static constexpr size_t N = D::num_partials; - auto operator()(std::mt19937_64 &mt) -> D { - Dual x{URand{}(mt)}; - for (size_t i = 0; i < N; ++i) x.gradient()[i] = URand{}(mt); +template struct URand {}; + +template struct URand> { + auto operator()(std::mt19937_64 &rng) -> Dual { + Dual x{URand{}(rng)}; + for (size_t i = 0; i < N; ++i) x.gradient()[i] = URand{}(rng); return x; } }; template <> struct URand { - auto operator()(std::mt19937_64 &mt) -> double { - return std::uniform_real_distribution(-2, 2)(mt); + auto operator()(std::mt19937_64 &rng) -> double { + return std::uniform_real_distribution(-2, 2)(rng); } }; @@ -207,10 +82,10 @@ template constexpr auto opnorm1(const T &A) { v.resizeForOverwrite(n); invariant(A.numRow() > 0); for (size_t j = 0; j < n; ++j) - v[j] = std::abs(extractDualValRecurse(A(0, j))); + v[j] = std::abs(extractDualValRecurse(A[0, j])); for (size_t i = 1; i < n; ++i) for (size_t j = 0; j < n; ++j) - v[j] += std::abs(extractDualValRecurse(A(i, j))); + v[j] += std::abs(extractDualValRecurse(A[i, j])); return *std::max_element(v.begin(), v.end()); } @@ -262,7 +137,7 @@ template constexpr auto expm(const T &A) { *v += *u; } // return (V - U) \ (V + U); - LU::fact(std::move(A2)).ldiv(MutPtrMatrix(V)); + poly::math::LU::fact(std::move(A2)).ldiv(poly::math::MutPtrMatrix(V)); for (; s--;) { U = V * V; std::swap(U, V); @@ -286,28 +161,28 @@ void expbench(const auto &A) { static void BM_expm(benchmark::State &state) { unsigned dim = state.range(0); - std::mt19937_64 mt(0); + std::mt19937_64 rng0; SquareMatrix A{SquareDims{dim}}; - for (auto &a : A) a = URand{}(mt); + for (auto &a : A) a = URand{}(rng0); for (auto b : state) expbench(A); } BENCHMARK(BM_expm)->DenseRange(2, 10, 1); static void BM_expm_dual4(benchmark::State &state) { unsigned dim = state.range(0); - std::mt19937_64 mt(0); + std::mt19937_64 rng0; using D = Dual; SquareMatrix A{SquareDims{dim}}; - for (auto &a : A) a = URand{}(mt); + for (auto &a : A) a = URand{}(rng0); for (auto b : state) expbench(A); } BENCHMARK(BM_expm_dual4)->DenseRange(2, 10, 1); static void BM_expm_dual4x2(benchmark::State &state) { unsigned dim = state.range(0); - std::mt19937_64 mt(0); + std::mt19937_64 rng0; using D = Dual, 2>; SquareMatrix A{SquareDims{dim}}; - for (auto &a : A) a = URand{}(mt); + for (auto &a : A) a = URand{}(rng0); for (auto b : state) expbench(A); } BENCHMARK(BM_expm_dual4x2)->DenseRange(2, 10, 1); @@ -315,19 +190,19 @@ BENCHMARK(BM_expm_dual4x2)->DenseRange(2, 10, 1); using D4D2 = Dual, 2>; using SMDD = SquareMatrix; #ifdef __INTEL_LLVM_COMPILER -using SMDD0 = math::ManagedArray; +using SMDD0 = poly::math::ManagedArray; #else -using SMDD0 = math::ManagedArray; +using SMDD0 = poly::math::ManagedArray; #endif #pragma omp declare reduction(+ : SMDD0 : omp_out += omp_in) \ initializer(omp_priv = SMDD0{omp_orig.dim(), D4D2{}}) static void BM_expm_dual4x2_threads(benchmark::State &state) { unsigned dim = state.range(0); - std::mt19937_64 mt(0); + std::mt19937_64 rng0; using D = Dual, 2>; SquareMatrix A{SquareDims{dim}}; - for (auto &a : A) a = URand{}(mt); + for (auto &a : A) a = URand{}(rng0); for (auto bch : state) { SMDD0 B{SquareDims{dim}}; B.fill(D{0}); diff --git a/benchmark/include/simplex_benchmark.hpp b/benchmark/include/simplex_benchmark.hpp index 798ce6e94..a9da9843c 100644 --- a/benchmark/include/simplex_benchmark.hpp +++ b/benchmark/include/simplex_benchmark.hpp @@ -1,10 +1,12 @@ #pragma once -#include "Math/Simplex.hpp" -#include "MatrixStringParse.hpp" +#include +#include #include +using poly::utils::operator""_mat, poly::math::_; + static void BM_Simplex0(benchmark::State &state) { - math::DenseMatrix tableau{ + poly::math::DenseMatrix tableau{ "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 " @@ -871,15 +873,16 @@ static void BM_Simplex0(benchmark::State &state) { "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 " "1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ]"_mat}; - tableau(0, _) << -5859553999884210514; - OwningArena<> alloc; + tableau[0, _] << -5859553999884210514; + poly::alloc::OwningArena<> alloc; unsigned numCon = unsigned(tableau.numRow()) - 1; unsigned numVar = unsigned(tableau.numCol()) - 1; - NotNull simpBackup{Simplex::create(alloc, numCon, numVar, 0)}; + poly::utils::Valid simpBackup{ + poly::math::Simplex::create(&alloc, numCon, numVar)}; simpBackup->getTableau() << tableau; // Simplex simpBackup{tableau}; - NotNull simp{Simplex::create(alloc, simpBackup->getNumCons(), - simpBackup->getNumVars(), 0)}; + poly::utils::Valid simp{poly::math::Simplex::create( + &alloc, simpBackup->getNumCons(), simpBackup->getNumVars())}; // Vector sol(37); for (auto b : state) { *simp << *simpBackup; @@ -892,7 +895,7 @@ static void BM_Simplex0(benchmark::State &state) { BENCHMARK(BM_Simplex0); static void BM_Simplex1(benchmark::State &state) { - IntMatrix tableau{ + poly::math::IntMatrix<> tableau{ "[0 0 0 1 0 -1 0 0 0 0 0 0 0 0 0 -1 0 0 0 0 0 0 1 0 -1 0 0 725849473193 " "94205055327856 11 11 11 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 -1 0 0 0 0 0 " "0 0 0 0 0 0 0 0 0 0 0 1 0 -1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 " @@ -1111,13 +1114,14 @@ static void BM_Simplex1(benchmark::State &state) { "0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 " "0 0 0 0 0 0 0 0 0 ]"_mat}; - OwningArena<> alloc; + poly::alloc::OwningArena<> alloc; unsigned numCon = unsigned(tableau.numRow()) - 1; unsigned numVar = unsigned(tableau.numCol()) - 1; - NotNull simpBackup{Simplex::create(alloc, numCon, numVar, 0)}; + poly::utils::Valid simpBackup{ + poly::math::Simplex::create(&alloc, numCon, numVar, 0)}; simpBackup->getTableau() << tableau; - NotNull simp{Simplex::create(alloc, simpBackup->getNumCons(), - simpBackup->getNumVars(), 0)}; + poly::utils::Valid simp{poly::math::Simplex::create( + &alloc, simpBackup->getNumCons(), simpBackup->getNumVars(), 0)}; for (auto b : state) { *simp << *simpBackup; bool fail = simp->initiateFeasible(); diff --git a/benchmark/include/vector.hpp b/benchmark/include/vector.hpp index ce94f2185..4337dfb86 100644 --- a/benchmark/include/vector.hpp +++ b/benchmark/include/vector.hpp @@ -1,6 +1,5 @@ #pragma once #include "Math/Array.hpp" -#include "Math/Vector.hpp" #include #include #include diff --git a/compile_commands.json b/compile_commands.json deleted file mode 100644 index 59630f355..000000000 --- a/compile_commands.json +++ /dev/null @@ -1,637 +0,0 @@ -[ -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -fpch-instantiate-templates -Xclang -emit-pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -x c++-header -o CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.cxx", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.cxx", - "output": "CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -o CMakeFiles/LoopModelsTests.dir/bumpmap_test.cpp.o -c /home/chriselrod/Documents/progwork/cxx/LoopModels/test/bumpmap_test.cpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/test/bumpmap_test.cpp", - "output": "CMakeFiles/LoopModelsTests.dir/bumpmap_test.cpp.o" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -o CMakeFiles/LoopModelsTests.dir/comparator_test.cpp.o -c /home/chriselrod/Documents/progwork/cxx/LoopModels/test/comparator_test.cpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/test/comparator_test.cpp", - "output": "CMakeFiles/LoopModelsTests.dir/comparator_test.cpp.o" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -o CMakeFiles/LoopModelsTests.dir/compat_test.cpp.o -c /home/chriselrod/Documents/progwork/cxx/LoopModels/test/compat_test.cpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/test/compat_test.cpp", - "output": "CMakeFiles/LoopModelsTests.dir/compat_test.cpp.o" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -o CMakeFiles/LoopModelsTests.dir/dependence_test.cpp.o -c /home/chriselrod/Documents/progwork/cxx/LoopModels/test/dependence_test.cpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/test/dependence_test.cpp", - "output": "CMakeFiles/LoopModelsTests.dir/dependence_test.cpp.o" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -o CMakeFiles/LoopModelsTests.dir/graph_test.cpp.o -c /home/chriselrod/Documents/progwork/cxx/LoopModels/test/graph_test.cpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/test/graph_test.cpp", - "output": "CMakeFiles/LoopModelsTests.dir/graph_test.cpp.o" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -o CMakeFiles/LoopModelsTests.dir/orthogonalize_test.cpp.o -c /home/chriselrod/Documents/progwork/cxx/LoopModels/test/orthogonalize_test.cpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/test/orthogonalize_test.cpp", - "output": "CMakeFiles/LoopModelsTests.dir/orthogonalize_test.cpp.o" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -o CMakeFiles/LoopModelsTests.dir/remarks_test.cpp.o -c /home/chriselrod/Documents/progwork/cxx/LoopModels/test/remarks_test.cpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/test/remarks_test.cpp", - "output": "CMakeFiles/LoopModelsTests.dir/remarks_test.cpp.o" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -o CMakeFiles/gtest.dir/src/gtest-all.cc.o -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-all.cc", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-all.cc", - "output": "_deps/googletest-build/googletest/CMakeFiles/gtest.dir/src/gtest-all.cc.o" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -o CMakeFiles/gtest_main.dir/src/gtest_main.cc.o -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest_main.cc", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest_main.cc", - "output": "_deps/googletest-build/googletest/CMakeFiles/gtest_main.dir/src/gtest_main.cc.o" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -fpch-instantiate-templates -Xclang -emit-pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -x c++-header -o CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.cxx", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.cxx", - "output": "LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -o CMakeFiles/LoopModels.dir/lib/TurboLoop.cpp.o -c /home/chriselrod/Documents/progwork/cxx/LoopModels/lib/TurboLoop.cpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/lib/TurboLoop.cpp", - "output": "LoopModels/CMakeFiles/LoopModels.dir/lib/TurboLoop.cpp.o" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/Dicts/BumpMapSet.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/Dicts/BumpMapSet.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/Dicts/BumpVector.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/Dicts/BumpVector.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Array.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Array.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Containers/Storage.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Containers/Storage.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/ArrayOps.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/ArrayOps.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Indexing.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Indexing.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/AxisTypes.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/AxisTypes.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/Invariant.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/Invariant.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Iterators.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Iterators.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/MatrixDimensions.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/MatrixDimensions.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Matrix.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Matrix.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/TypePromotion.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/TypePromotion.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/UniformScaling.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/UniformScaling.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Vector.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Vector.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Rational.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Rational.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/GreatestCommonDivisor.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/GreatestCommonDivisor.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/Allocators.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/Allocators.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/Valid.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/Valid.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/Optional.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/Optional.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include/ankerl/unordered_dense.h", - "file": "/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include/ankerl/unordered_dense.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/Comparators.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/Comparators.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Constraints.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Constraints.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Containers/BitSets.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Containers/BitSets.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Comparisons.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Comparisons.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/EmptyArrays.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/EmptyArrays.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Math.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Math.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/NormalForm.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/NormalForm.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Constructors.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Constructors.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/VectorGreatestCommonDivisor.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/VectorGreatestCommonDivisor.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Simplex.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Simplex.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/MatrixStringParse.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/MatrixStringParse.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/Loops.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/Loops.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/Polyhedra.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/Polyhedra.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/RemarkAnalysis.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/RemarkAnalysis.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/test/TestUtilities.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/test/TestUtilities.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/test/ArrayReference.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/test/ArrayReference.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Address.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Address.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/InstructionCost.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/InstructionCost.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Node.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Node.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Containers/UnrolledList.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Containers/UnrolledList.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Users.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Users.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/ListRanges.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Utilities/ListRanges.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/Support/OStream.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/Support/OStream.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan", - "command": "/usr/bin/clang++ -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test/../include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/test -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -isystem /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -std=gnu++20 -fno-exceptions -fno-rtti -ferror-limit=8 -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx.pch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/CMakeFiles/LoopModelsTests.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Orthogonalize.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/Orthogonalize.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-assertion-result.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-assertion-result.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-message.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-message.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-port.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-port.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/custom/gtest-port.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/custom/gtest-port.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-port-arch.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-port-arch.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-death-test.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-death-test.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-death-test-internal.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-death-test-internal.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-matchers.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-matchers.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-printers.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-printers.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-internal.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-internal.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-filepath.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-filepath.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-string.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-string.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-type-util.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-type-util.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/custom/gtest-printers.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/custom/gtest-printers.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-param-test.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-param-test.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-param-util.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/gtest-param-util.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-test-part.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-test-part.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-typed-test.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-typed-test.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest_pred_impl.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest_pred_impl.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest_prod.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest_prod.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-assertion-result.cc", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-assertion-result.cc" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-death-test.cc", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-death-test.cc" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/custom/gtest.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/internal/custom/gtest.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-internal-inl.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-internal-inl.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-spi.h", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include/gtest/gtest-spi.h" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-filepath.cc", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-filepath.cc" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-matchers.cc", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-matchers.cc" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-port.cc", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-port.cc" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-printers.cc", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-printers.cc" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-test-part.cc", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-test-part.cc" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-typed-test.cc", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest-typed-test.cc" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/googletest-build/googletest", - "command": "/usr/bin/clang++ -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/include -I/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest -g -fvisibility=hidden -fvisibility-inlines-hidden -Wall -Wshadow -Wconversion -DGTEST_HAS_PTHREAD=1 -fexceptions -W -Wpointer-arith -Wreturn-type -Wcast-qual -Wwrite-strings -Wswitch -Wunused-parameter -Wcast-align -Wchar-subscripts -Winline -Wredundant-decls -c /home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest.cc", - "file": "/home/chriselrod/.cache/CPM/googletest/96129d89f45386492ae46d6bb8c027bc3df5f949/googletest/src/gtest.cc" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/TurboLoop.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/TurboLoop.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Cache.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Cache.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/BBPredPath.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/BBPredPath.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/Dicts/MapVector.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/Dicts/MapVector.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Predicate.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Predicate.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Containers/TinyVector.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Containers/TinyVector.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Instruction.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/Instruction.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/CostModeling.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/IR/CostModeling.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/Graphs/Graphs.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/Graphs/Graphs.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/LinearProgramming/LoopBlock.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/LinearProgramming/LoopBlock.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/LinearProgramming/ScheduledNode.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/LinearProgramming/ScheduledNode.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/Dependence.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/Dependence.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/DependencyPolyhedra.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/DependencyPolyhedra.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/Schedule.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/include/Polyhedra/Schedule.hpp" -}, - -{ - "directory": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels", - "command": "/usr/bin/clang++ -DLoopModels_EXPORTS -I/home/chriselrod/Documents/progwork/cxx/LoopModels/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/PackageProjectInclude -I/home/chriselrod/.cache/CPM/unordered_dense/01b84887a44155645ad5caf7efee7b8e4141e179/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include -I/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-build/PackageProjectInclude -g -std=gnu++20 -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -fno-exceptions -fno-rtti -fstrict-aliasing -fno-plt -fstrict-overflow -fno-omit-frame-pointer -fcolor-diagnostics -Wall -Wpedantic -Wextra -Wshadow -D_GLIBCXX_ASSERTIONS -Winvalid-pch -Xclang -include-pch -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx.gch -Xclang -include -Xclang /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/LoopModels/CMakeFiles/LoopModels.dir/cmake_pch.hxx -c /home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/StaticArrays.hpp", - "file": "/home/chriselrod/Documents/progwork/cxx/LoopModels/buildclang/nosan/_deps/math-src/include/Math/StaticArrays.hpp" -} -] diff --git a/include/Dicts/BumpMapSet.hpp b/include/Dicts/BumpMapSet.hpp index 0a2b836e9..e88247d5e 100644 --- a/include/Dicts/BumpMapSet.hpp +++ b/include/Dicts/BumpMapSet.hpp @@ -1,6 +1,7 @@ #pragma once +#include #include -#include +#include #include namespace poly::dict { @@ -10,7 +11,7 @@ template using map = ankerl::unordered_dense::map; template -struct amap +struct amap // NOLINT(readability-identifier-naming) : ankerl::unordered_dense::map, std::equal_to, math::BumpPtrVector>> { @@ -21,7 +22,7 @@ struct amap amap(Arena<> *alloc) : Base{WArena>(alloc)} {} }; template -struct aset +struct aset // NOLINT(readability-identifier-naming) : ankerl::unordered_dense::set, std::equal_to, math::BumpPtrVector> { using Base = @@ -29,4 +30,10 @@ struct aset std::equal_to, math::BumpPtrVector>; aset(Arena<> *alloc) : Base{WArena(alloc)} {} }; + +static_assert(std::same_as::value_container_type, + math::BumpPtrVector>>); +static_assert(std::same_as::allocator_type, + alloc::WArena, 16384, true>>); + } // namespace poly::dict diff --git a/include/Dicts/BumpVector.hpp b/include/Dicts/BumpVector.hpp index bcc22821f..c34807f26 100644 --- a/include/Dicts/BumpVector.hpp +++ b/include/Dicts/BumpVector.hpp @@ -1,13 +1,13 @@ #pragma once +#include #include #include -#include #include // In include/Dicts, as it primarily serves to support amap/aset namespace poly { -using utils::WArena, utils::Arena; +using alloc::WArena, alloc::Arena; } // namespace poly namespace poly::math { @@ -31,7 +31,7 @@ template struct BumpPtrVector { [[no_unique_address]] T *mem; [[no_unique_address]] unsigned Size; [[no_unique_address]] unsigned Capacity; - [[no_unique_address]] NotNull> Alloc; + [[no_unique_address]] Valid> Alloc; constexpr BumpPtrVector(WArena a) : mem(a.allocate(InitialCapacity)), Size(0), Capacity(InitialCapacity), @@ -90,19 +90,19 @@ template struct BumpPtrVector { return mem[canonicalize(i, Size)]; } [[nodiscard]] constexpr auto front() -> T & { - assert(Size > 0); + invariant(Size > 0); return mem[0]; } [[nodiscard]] constexpr auto back() -> T & { - assert(Size > 0); + invariant(Size > 0); return mem[Size - 1]; } [[nodiscard]] constexpr auto front() const -> const T & { - assert(Size > 0); + invariant(Size > 0); return mem[0]; } [[nodiscard]] constexpr auto back() const -> const T & { - assert(Size > 0); + invariant(Size > 0); return mem[Size - 1]; } [[nodiscard]] constexpr auto isEmpty() const -> bool { return Size == 0; } @@ -114,13 +114,13 @@ template struct BumpPtrVector { // : mem(x.data()), N(x.size()) {} // constexpr MutPtrVector(T *pt, size_t NN) : mem(pt), N(NN) {} constexpr auto operator[](Range i) -> MutPtrVector { - assert(i.b <= i.e); - assert(i.e <= Size); + invariant(i.b <= i.e); + invariant(i.e <= Size); return MutPtrVector{mem + i.b, i.e - i.b}; } constexpr auto operator[](Range i) const -> PtrVector { - assert(i.b <= i.e); - assert(i.e <= Size); + invariant(i.b <= i.e); + invariant(i.e <= Size); return PtrVector{mem + i.b, i.e - i.b}; } template @@ -228,7 +228,7 @@ template struct BumpPtrVector { Capacity = N; } constexpr void truncate(size_t N) { - assert(N <= Capacity); + invariant(N <= Capacity); Size = N; } constexpr void resize(size_t N) { @@ -267,7 +267,7 @@ template struct BumpPtrVector { [[nodiscard]] constexpr auto empty() const -> bool { return Size == 0; } constexpr void pop_back() { --Size; } constexpr void erase(T *x) { - assert(x >= mem && x < mem + Size); + invariant(x >= mem && x < mem + Size); std::destroy_at(x); std::copy_n(x + 1, Size, x); --Size; diff --git a/include/Dicts/MapVector.hpp b/include/Dicts/MapVector.hpp index 4dc6eec27..4b375e19d 100644 --- a/include/Dicts/MapVector.hpp +++ b/include/Dicts/MapVector.hpp @@ -7,8 +7,8 @@ namespace poly::dict { template class OrderedMap { amap map; - // math::BumpPtrVector> vector; - math::ResizeableView, unsigned> vector; + // math::BumpPtrVector> vector; + math::ResizeableView, unsigned> vector; public: constexpr OrderedMap(Arena<> *alloc) : map(alloc), vector() {} @@ -64,10 +64,10 @@ template class OrderedMap { } constexpr void grow(unsigned i) { if (i == vector.getCapacity()) - vector.reserve(*(map.get_allocator().get_allocator()), + vector.reserve((map.get_allocator().get_allocator()), std::max(8, 2 * i)); } - constexpr void insert(std::pair &&value) { + constexpr void insert(containers::Pair &&value) { insert(std::move(value.first), std::move(value.second)); } constexpr void clear() { diff --git a/include/Dicts/Trie.hpp b/include/Dicts/Trie.hpp new file mode 100644 index 000000000..8c3f2e1f8 --- /dev/null +++ b/include/Dicts/Trie.hpp @@ -0,0 +1,229 @@ +#pragma once +#include "Containers/Pair.hpp" +#include +#include +#include +#include +#include +#include + +namespace poly::dict { +using utils::invariant, containers::Pair; + +template constexpr auto fastHash(const T &x) -> uint64_t { + return ankerl::unordered_dense::hash{}(x); +} +template constexpr auto fastHash(T *x) -> uint64_t { + return reinterpret_cast(x) >> + std::countr_zero(alignof(std::max_align_t)); +} + +// Idea from from https://nullprogram.com/blog/2023/09/30/ +template struct TrieMapNode { + K first; + V second{}; + std::array *, 4> children{}; + + constexpr auto find(const K &k) -> TrieMapNode * { + return findChild(k).child; + } + +protected: + struct Child { + TrieMapNode *child; + TrieMapNode *parent; + uint64_t index; // child == parent->children[index]; + }; + constexpr auto isLeaf() -> bool { + return first && !std::ranges::any_of(children); + } + constexpr auto getLeaf() -> Child { + if (!first) return {nullptr, nullptr, 0}; + for (size_t i = 0; i < std::size(children); ++i) + if (TrieMapNode *child = children[i]) + if (Child leaf = child->getLeaf(); leaf.child) + return leaf.parent ? leaf : Child{leaf.child, this, i}; + return {this, nullptr, 0}; + } + constexpr auto getSubLeaf() -> Child { + Child c = getLeaf(); + return c.child != this ? c : Child{nullptr, nullptr, 0}; + } + auto findChild(const K &k) -> Child { + if (k == first) return {this, nullptr, 0}; + TrieMapNode *p = this, *c = nullptr; + for (uint64_t h = fastHash(k);; h >>= 2) { + c = p->children[h & 3]; + if (!c || (c->first == k)) return {c, p, h & 3}; + p = c; + } + } + // Returns the removed node + auto eraseImpl(const K &k) -> TrieMapNode * { + Child child = findChild(k); + if (!child.child) return nullptr; + // we're erasing `child` + Child l = child.child->getSubLeaf(); + if (l.child) { + l.parent->children[l.index] = nullptr; // leaf is moved up + std::swap(l.child->children, child.child->children); + } + child.parent->children[child.index] = l.child; // leaf replaces deleted + child.child->second = {}; + return child.child; + } +}; + +// If `EfficientErase = true`, it stores a list of erased nodes. +// Future allocations will allocate from this list if possible. +// Thus, whenever using a pattern that involves interleaving erase and +// insertions, it is worth setting `EfficientErase = true`. It is common enough +// not to do this, that the option for `false` also exists. Don't pay for what +// you don't use. +template +struct TrieMap : TrieMapNode { + using NodeT = TrieMapNode; + NodeT *list{nullptr}; + // TODO: implement using `list` to avoid allocs + void erase(const K &k) { + if (NodeT *erased = this->eraseImpl(k)) + erased->children[0] = std::exchange(list, erased); + } + auto operator[](utils::Valid> alloc, const K &k) -> V & { + typename NodeT::Child c = this->findChild(k); + if (c.child) return c.child->second; + invariant(c.parent != nullptr); + invariant(c.index < 4); + NodeT *&res = c.parent->children[c.index]; + invariant(res == nullptr); + if (list) { + res = list; + list = std::exchange(list->children[0], nullptr); + res->second = {}; + } else { + res = alloc->create(); + invariant(res->second == V{}); + } + res->first = k; + return res->second; + } +}; + +template struct TrieMap : TrieMapNode { + using NodeT = TrieMapNode; + void erase(const K &k) { this->eraseImpl(k); } + auto operator[](utils::Valid> alloc, const K &k) -> V & { + typename NodeT::Child c = findChild(k); + if (c.child) return c.child->second; + invariant(c.parent != nullptr); + invariant(c.index < 4); + invariant(c.parent->children[c.index] == nullptr); + TrieMapNode res = c.parent->children[c.index] = alloc->create(); + res->first = k; + return res->second; + } +}; + +static_assert(sizeof(TrieMap) == + sizeof(TrieMapNode)); +static_assert(sizeof(TrieMap) == + sizeof(TrieMapNode) + sizeof(TrieMapNode *)); + +// Optional can be specialized for types to add dead-values without requiring +// extra space. E.g., `sizeof(utils::Optional) == sizeof(T*)`, as `nullptr` +// indicates empty. +template struct InlineTrie { + InlineTrie *children[4]{}; + utils::Optional keys[4]{}; + V values[4]{}; + + // Returns an optional pointer to the value. + constexpr auto find(const K &k) -> utils::Optional { + auto [node, index] = findChild(this, k); + return node ? utils::Optional{node->values[index]} : std::nullopt; + } + + auto operator[](utils::Valid> alloc, const K &k) -> V & { + Child c = findChild(this, k); + if (c.subIndex) { + c.node = c.node->children[*c.subIndex] = + alloc->create>(); + c.node->keys[c.index] = k; + } + return c.node->values[c.index]; + } + + void erase(const K &k) { + auto [child, index] = findChild(this, k); + if (!child) return; // was not found + // We now find a leaf key/value pair, and move them here. + if (InlineTrie *descendent = child->children[index]) { + auto [lc, li] = descendent->findLeaf(); + if (lc) { + child->keys[index] = std::move(lc->keys[li]); + child->values[index] = std::move(lc->values[li]); + child = lc; + index = li; + } + } + child->keys[index] = {}; // set to null + child->values[index] = {}; + } + +private: + auto isLeaf(int i) -> bool { + if (!keys[i]) return false; + if (!children[i]) return true; + for (int j = 0; j < 4; ++j) + if (!children[i]->isLeaf(j)) return false; + return true; + } + // A leaf is a key without any child keys. + // A leaf may have children without keys. + auto findLeaf() -> Pair { + InlineTrie *leaf = this; + bool descend[4]{false, false, false, false}; + for (ptrdiff_t i = 0; i < std::ssize(children); ++i) { + if (!leaf->keys[i]) continue; // need key to be leaf + if (!leaf->children[i]) return {leaf, i}; // no children, no child keys + descend[i] = true; + } + for (ptrdiff_t i = 0; i < std::ssize(children); ++i) { + if (!descend[i]) continue; + auto ret = leaf->children[i]->findLeaf(); + return ret.first ? ret : Pair{this, i}; + }; + return {nullptr, 0}; + } + struct Child { + InlineTrie *node; + size_t index; + utils::Optional subIndex; + }; + + template + static constexpr auto findChild(InlineTrie *node, const K &k) { + for (uint64_t h = fastHash(k);;) { + uint64_t ind = h & 3; + bool noKey = !node->keys[ind]; + if constexpr (Insert) { + if (noKey) node->keys[ind] = k; + if (noKey || (*node->keys[ind] == k)) return Child{node, ind, {}}; + } else { + if (noKey) return Pair{nullptr, ind}; + if (*node->keys[ind] == k) + return Pair{node, ind}; + } + h >>= 2; + if (!node->children[ind]) { + if constexpr (Insert) return Child{node, h & 3, ind}; + else return Pair{nullptr, ind}; + } + node = node->children[ind]; + } + }; +}; + +// static_assert(sizeof(std::array*,0 >)==1); + +} // namespace poly::dict diff --git a/include/Graphs/Bipartite.hpp b/include/Graphs/Bipartite.hpp index 059b8c9f6..8cdeaba6d 100644 --- a/include/Graphs/Bipartite.hpp +++ b/include/Graphs/Bipartite.hpp @@ -31,7 +31,7 @@ inline auto bipartiteMatch(Matrix &bpGraph, int u, /// Returns maximum number /// of matching from M to N inline auto maxBipartiteMatch(Matrix &bpGraph) - -> std::pair> { + -> containers::Pair> { // An array to keep track of the // applicants assigned to jobs. // The value of matchR[i] is the @@ -39,7 +39,7 @@ inline auto maxBipartiteMatch(Matrix &bpGraph) // the value -1 indicates nobody is // assigned. auto [N, M] = bpGraph.size(); - std::pair> res{0, {unsigned(N), -1}}; + containers::Pair> res{0, {unsigned(N), -1}}; size_t &result = res.first; Vector &matchR{res.second}; if (M) { diff --git a/include/Graphs/Graphs.hpp b/include/Graphs/Graphs.hpp index 32f621b8a..afdb1a33f 100644 --- a/include/Graphs/Graphs.hpp +++ b/include/Graphs/Graphs.hpp @@ -1,6 +1,6 @@ #pragma once +#include #include -#include namespace poly::graph { // Currently, only implements top sort, and Tarjan's strongly connected @@ -34,13 +34,9 @@ namespace poly::graph { // template concept AbstractPtrGraph = requires(G g, typename G::VertexType *v) { - { - *(g.getVertices(v).begin()) - } -> std::template same_as; + { *(g.getVertices(v).begin()) } -> std::same_as; { g.getVertices(v) } -> std::ranges::forward_range; - { - *(g.outNeighbors(v).begin()) - } -> std::template same_as; + { *(g.outNeighbors(v).begin()) } -> std::same_as; { g.outNeighbors(v) } -> std::ranges::forward_range; { v->index() } -> std::assignable_from; { v->lowLink() } -> std::assignable_from; @@ -50,10 +46,10 @@ concept AbstractPtrGraph = requires(G g, typename G::VertexType *v) { { v->visited() } -> std::same_as; { v->visit() }; { v->unVisit() }; - { v->setNext(v) } -> std::template same_as; - { v->getNext() } -> std::template same_as; - { v->setNextComponent(v) } -> std::template same_as; - { v->getNextComponent() } -> std::template same_as; + { v->setNext(v) } -> std::same_as; + { v->getNext() } -> std::same_as; + { v->setNextComponent(v) } -> std::same_as; + { v->getNextComponent() } -> std::same_as; }; template struct State { @@ -99,8 +95,8 @@ template inline auto stronglyConnectedComponents(G g, vertex_t *seed) -> vertex_t * { using N = vertex_t; - State state{}; - for (auto *v : g->getVertices(seed)) + State state{}; + for (auto *v : g.getVertices(seed)) if (!v->wasVisited()) state = strongConnect(g, state, v); return state.components; } diff --git a/include/IR/Address.hpp b/include/IR/Address.hpp index 6a07806a1..862c49068 100644 --- a/include/IR/Address.hpp +++ b/include/IR/Address.hpp @@ -2,35 +2,39 @@ #include "IR/InstructionCost.hpp" #include "IR/Node.hpp" +#include "IR/OrthogonalAxes.hpp" #include "IR/Users.hpp" #include "Polyhedra/Loops.hpp" #include "Support/OStream.hpp" #include "Utilities/ListRanges.hpp" +#include #include #include #include #include -#include #include #include #include #include +#include #include #include #include +#include namespace poly { namespace lp { class ScheduledNode; } // namespace lp namespace poly { -class Dependence; +struct Dependence; class Dependencies; } // namespace poly namespace IR { using math::PtrVector, math::MutPtrVector, math::DensePtrMatrix, math::MutDensePtrMatrix, math::SquarePtrMatrix, math::_, math::DenseDims, - math::PtrMatrix, math::end, poly::Dependence, poly::Dependencies; + math::PtrMatrix, math::end, poly::Dependence, poly::Dependencies, + utils::ListRange; /// Represents a memory access that has been rotated according to some affine /// transform. @@ -78,14 +82,22 @@ class Addr : public Instruction { int32_t edgeIn{-1}; int32_t edgeOut{-1}; lp::ScheduledNode *node; - NotNull basePointer; + Valid basePointer; poly::Loop *loop{nullptr}; llvm::Instruction *instr; int64_t *offSym{nullptr}; const llvm::SCEV **syms; Value *predicate{nullptr}; Addr *origNext{nullptr}; - unsigned numDim{0}, numDynSym{0}; + /// We find reductionns during `IROptimizer` initialization + /// after sorting edges and removing redundant `Addr` + /// this is because we may have multiple repeat stores to the the same + /// location, and a reduction would be the closest pair. Thus, we want to have + /// an ordering. + Addr *reassociableReduction{nullptr}; // if reduction, corresponding addr + uint16_t numDim{0}, numDynSym{0}; + int32_t topologicalPosition; + OrthogonalAxes axes; #if !defined(__clang__) && defined(__GNUC__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" @@ -107,12 +119,6 @@ class Addr : public Instruction { numLoops, natDepth, maxNumLoops), basePointer(arrayPtr), instr(user), offSym(offsym), syms(s), numDim(dimOff[0]), numDynSym(dimOff[1]){}; - explicit Addr(const llvm::SCEVUnknown *arrayPtr, llvm::Instruction *user, - unsigned numLoops) - : Instruction(llvm::isa(user) ? VK_Stow : VK_Load, - numLoops), - basePointer(arrayPtr), instr(user){}; - /// Constructor for 0 dimensional memory access [[nodiscard]] constexpr auto getIntMemory() -> int64_t * { return mem; } [[nodiscard]] constexpr auto getIntMemory() const -> int64_t * { @@ -127,15 +133,63 @@ class Addr : public Instruction { [[nodiscard]] constexpr auto indMatPtr() const -> int64_t * { return getIntMemory() + 1 + getArrayDim(); } - [[nodiscard]] auto getSymbolicOffsets() -> MutPtrVector { + [[nodiscard]] constexpr auto getSymbolicOffsets() + -> MutPtrVector { return {syms + numDim, numDynSym}; } [[nodiscard]] constexpr auto offsetMatrix() -> MutDensePtrMatrix { - return {offSym, DenseDims{getArrayDim(), numDynSym}}; + return {offSym, DenseDims<>{{getArrayDim()}, {numDynSym}}}; } + /// recursive reassociability search public: - constexpr void rotate(NotNull explicitLoop, + [[nodiscard]] constexpr auto getOrthAxes() const -> OrthogonalAxes { + return axes; + } + constexpr auto calcOrthAxes(ptrdiff_t depth) -> OrthogonalAxes { + invariant((depth <= 24) && (depth >= 0)); + invariant(depth >= naturalDepth); + invariant(currentDepth >= depth); + currentDepth = depth; + bool indepAxes = true; + uint32_t contig{0}, indep{(uint32_t(1) << depth) - 1}; + /// indexMatrix() -> arrayDim() x getNumLoops() + DensePtrMatrix inds{indexMatrix()}; + for (ptrdiff_t l = 0; l < inds.numCol(); ++l) { + if (!inds[0, l]) continue; + contig |= uint32_t(1) << l; + indep &= ~(uint32_t(1) << l); + } + for (ptrdiff_t d = 1; d < inds.numRow(); ++d) { + for (ptrdiff_t l = 0; l < inds.numCol(); ++l) { + if (!inds[d, l]) continue; + if (!(indep & (uint32_t(1) << l))) indepAxes = false; + indep &= ~(uint32_t(1) << l); + } + } + axes = {indepAxes, contig, indep}; + return axes; + } + [[nodiscard]] constexpr auto isDropped() const -> bool { + return (getNext() == nullptr) && (getPrev() == nullptr); + } + constexpr void setTopPosition(int32_t pos) { topologicalPosition = pos; } + [[nodiscard]] constexpr auto getTopPosition() const -> int32_t { + return topologicalPosition; + } + + /// Constructor for 0 dimensional memory access + /// public for use with `std::construct_at` + /// Perhaps it should use a passkey? + explicit Addr(const llvm::SCEVUnknown *arrayPtr, llvm::Instruction *user, + unsigned numLoops) + : Instruction(llvm::isa(user) ? VK_Stow : VK_Load, + numLoops), + basePointer(arrayPtr), instr(user){}; + + /// This gets called to rotate so that we can make direct comparisons down the + /// road without needing rotations. + constexpr void rotate(Valid explicitLoop, SquarePtrMatrix Pinv, int64_t denom, PtrVector omega, int64_t *offsets) { loop = explicitLoop; @@ -143,7 +197,7 @@ class Addr : public Instruction { unsigned oldNatDepth = getNaturalDepth(); DensePtrMatrix M{indexMatrix()}; // aD x nLma MutPtrVector offsetOmega{getOffsetOmega()}; - unsigned depth = this->naturalDepth = uint8_t(Pinv.numCol()); + unsigned depth = uint8_t(ptrdiff_t(Pinv.numCol())); MutDensePtrMatrix mStar{indexMatrix()}; // M is implicitly padded with zeros, newNumLoops >= oldNumLoops invariant(maxDepth >= naturalDepth); @@ -159,32 +213,32 @@ class Addr : public Instruction { // as a temporary, to avoid the aliasing problem. // // Use `M` before updating it, to update `offsetOmega` - if (offsets) offsetOmega -= M * PtrVector{offsets, oldNatDepth}; + if (offsets) + offsetOmega -= PtrVector{offsets, oldNatDepth} * M.t(); // update `M` into `mStar` // mStar << M * Pinv(_(0, oldNumLoops), _); MutPtrVector buff{getFusionOmega()[_(0, math::last)]}; - invariant(buff.size(), unsigned(depth)); + invariant(buff.size(), ptrdiff_t(depth)); unsigned newNatDepth = 0; for (ptrdiff_t d = getArrayDim(); d--;) { buff << 0; - for (ptrdiff_t k = 0; k < oldNatDepth; ++k) buff += M(d, k) * Pinv(k, _); - mStar(d, _) << buff; + for (ptrdiff_t k = 0; k < oldNatDepth; ++k) buff += M[d, k] * Pinv[k, _]; + mStar[d, _] << buff; if (newNatDepth == depth) continue; - // find last + // find last, as buf goes outer<->inner auto range = std::ranges::reverse_view{buff[_(newNatDepth, depth)]}; auto m = std::ranges::find_if(range, [](int64_t i) { return i != 0; }); if (m == range.end()) continue; newNatDepth = depth - std::distance(range.begin(), m); } // use `mStar` to update offsetOmega` - offsetOmega -= mStar * omega; + offsetOmega -= omega * mStar.t(); + this->naturalDepth = newNatDepth; if (newNatDepth == depth) return; invariant(newNatDepth < depth); - this->naturalDepth = newNatDepth; MutDensePtrMatrix indMat{this->indexMatrix()}; for (ptrdiff_t d = 1; d < getArrayDim(); ++d) - indMat(d, _) << mStar(d, _(0, newNatDepth)); - this->naturalDepth = newNatDepth; + indMat[d, _] << mStar[d, _(0, newNatDepth)]; } // NOTE: this requires `nodeOrDepth` to be set to innmost loop depth [[nodiscard]] constexpr auto indexedByInnermostLoop() -> bool { @@ -193,7 +247,7 @@ class Addr : public Instruction { return ret; } [[nodiscard]] constexpr auto eachAddr() { - return utils::ListRange{this, [](Addr *a) { return a->getNextAddr(); }}; + return ListRange{this, [](Addr *a) -> Addr * { return a->getNextAddr(); }}; } constexpr auto getNextAddr() -> Addr * { return origNext; } [[nodiscard]] constexpr auto getNextAddr() const -> const Addr * { @@ -208,6 +262,17 @@ class Addr : public Instruction { origNext = a; return this; } + // Called from IROptimizer + // In a reduction, `in` must be a load and `out` a store + // This should only be called once, between nearest load/store pair + // as it doesn't store detecting invalidity. + // It checks for invalidity, in which case it doesn't set the reassociable + // reduction. + constexpr inline void maybeReassociableReduction(const Dependencies &); + constexpr auto reassociableReductionPair() -> Addr * { + return reassociableReduction; + } + [[nodiscard]] static constexpr auto intMemNeeded(size_t numLoops, size_t dim) -> size_t { // d = dim, l = numLoops @@ -243,26 +308,37 @@ class Addr : public Instruction { return node; } constexpr void setNode(lp::ScheduledNode *n) { node = n; } - [[nodiscard]] inline auto inputAddrs(Dependencies) const; - [[nodiscard]] inline auto outputAddrs(Dependencies) const; - [[nodiscard]] inline auto inputAddrs(Dependencies, unsigned depth) const; - [[nodiscard]] inline auto outputAddrs(Dependencies, unsigned depth) const; - [[nodiscard]] inline auto inputEdges(Dependencies) const; - [[nodiscard]] inline auto outputEdges(Dependencies) const; - [[nodiscard]] inline auto inputEdges(Dependencies, unsigned depth) const; - [[nodiscard]] inline auto outputEdges(Dependencies, unsigned depth) const; - [[nodiscard]] inline auto inputEdgeIDs(Dependencies) const; - [[nodiscard]] inline auto outputEdgeIDs(Dependencies) const; - [[nodiscard]] inline auto inputEdgeIDs(Dependencies, unsigned depth) const; - [[nodiscard]] inline auto outputEdgeIDs(Dependencies, unsigned depth) const; + [[nodiscard]] inline auto inputAddrs(const Dependencies &) const; + [[nodiscard]] inline auto outputAddrs(const Dependencies &) const; + [[nodiscard]] inline auto inputAddrs(const Dependencies &, int depth) const; + [[nodiscard]] inline auto outputAddrs(const Dependencies &, int depth) const; + [[nodiscard]] inline auto inputEdges(const Dependencies &) const; + [[nodiscard]] inline auto outputEdges(const Dependencies &) const; + [[nodiscard]] inline auto inputEdges(const Dependencies &, int depth) const; + [[nodiscard]] inline auto outputEdges(const Dependencies &, int depth) const; + [[nodiscard]] inline auto inputEdgeIDs(const Dependencies &) const + -> utils::VForwardRange; + [[nodiscard]] inline auto outputEdgeIDs(const Dependencies &) const + -> utils::VForwardRange; + [[nodiscard]] inline auto inputEdgeIDs(const Dependencies &, int depth) const; + [[nodiscard]] inline auto outputEdgeIDs(const Dependencies &, + int depth) const; + [[nodiscard]] inline auto unhoistableOutputs(const Dependencies &, + int depth) const; + [[nodiscard]] static auto zeroDim(Arena<> *alloc, + llvm::SCEVUnknown const *arrayPtr, + llvm::Instruction *loadOrStore, + unsigned numLoops) { + return alloc->create(arrayPtr, loadOrStore, numLoops); + } /// Constructor for regular indexing [[nodiscard]] static auto construct(Arena<> *alloc, const llvm::SCEVUnknown *arrayPtr, llvm::Instruction *user, PtrMatrix indMat, std::array, 2> szOff, PtrVector coffsets, int64_t *offsets, unsigned numLoops, - unsigned maxNumLoops) -> NotNull { + unsigned maxNumLoops) -> Valid { // we don't want to hold any other pointers that may need freeing unsigned arrayDim = szOff[0].size(), nOff = szOff[1].size(); size_t memNeeded = intMemNeeded(maxNumLoops, arrayDim); @@ -272,24 +348,24 @@ class Addr : public Instruction { alloc->allocate(arrayDim + nOff + numLoops - 1); unsigned natDepth = numLoops; for (; natDepth; --natDepth) - if (math::anyNEZero(indMat(_, natDepth - 1))) break; + if (math::anyNEZero(indMat[_, natDepth - 1])) break; auto *ma = new (mem) Addr(arrayPtr, user, offsets, syms, std::array{arrayDim, nOff}, numLoops, natDepth, maxNumLoops); std::copy_n(szOff[0].begin(), arrayDim, syms); std::copy_n(szOff[1].begin(), nOff, syms + arrayDim); - ma->indexMatrix() << indMat(_, _(0, natDepth)); // naturalDepth + ma->indexMatrix() << indMat[_, _(0, natDepth)]; // naturalDepth ma->getOffsetOmega() << coffsets; return ma; } /// copies `o` and decrements the last element /// it decrements, as we iterate in reverse order constexpr void setFusionOmega(MutPtrVector o) { - invariant(o.size(), getCurrentDepth() + 1); + invariant(o.size(), ptrdiff_t(getCurrentDepth()) + 1); std::copy_n(o.begin(), getCurrentDepth(), getFusionOmega().begin()); getFusionOmega().back() = o.back()--; } - [[nodiscard]] auto reload(Arena<> *alloc) -> NotNull { + [[nodiscard]] auto reload(Arena<> *alloc) -> Valid { size_t memNeeded = intMemNeeded(maxDepth, numDim); void *p = alloc->allocate(sizeof(Addr) + memNeeded * sizeof(int64_t)); *static_cast(p) = VK_Load; @@ -305,10 +381,11 @@ class Addr : public Instruction { r->edgeOut = -1; return r; } - [[nodiscard]] auto getSizes() const -> PtrVector { + [[nodiscard]] constexpr auto getSizes() const + -> PtrVector { return {syms, numDim}; } - [[nodiscard]] auto getSymbolicOffsets() const + [[nodiscard]] constexpr auto getSymbolicOffsets() const -> PtrVector { return {syms + numDim, numDynSym}; } @@ -316,7 +393,7 @@ class Addr : public Instruction { return v->getKind() <= VK_Stow; } [[nodiscard]] constexpr auto getArrayPointer() const - -> NotNull { + -> Valid { return basePointer; } [[nodiscard]] auto getType() const -> llvm::Type * { @@ -324,12 +401,16 @@ class Addr : public Instruction { } [[nodiscard]] constexpr auto dependsOnIndVars(size_t d) -> bool { for (size_t i = 0, D = getArrayDim(); i < D; ++i) - if (anyNEZero(indexMatrix()(i, _(d, end)))) return true; + if (anyNEZero(indexMatrix()[i, _(d, end)])) return true; return false; } - [[nodiscard]] constexpr auto getAffLoop() const -> NotNull { + [[nodiscard]] constexpr auto getAffLoop() const -> Valid { return loop; } + /// Get the value stored by this instruction. + /// invariant: this instruction must only be called if `Addr` is a store! + /// For a load, use `getUsers()` to get a range of the users. + /// Returns the parent (other than predicates). [[nodiscard]] constexpr auto getStoredVal() const -> Value * { invariant(isStore()); return users.getVal(); @@ -351,6 +432,10 @@ class Addr : public Instruction { invariant(Value::classof(n)); predicate = static_cast(n); } + /// Get the users of this load. + /// invariant: this instruction must only be called if `Addr` is a load! + /// For a store, use `getStoredVal()` to get the stored value. + /// Returns the children. [[nodiscard]] constexpr auto getUsers() -> Users & { invariant(isLoad()); return users; @@ -375,22 +460,22 @@ class Addr : public Instruction { MutPtrVector sym{getSymbolicOffsets()}; offSym = alloc->allocate(size_t(numDynSym) * numDim); MutDensePtrMatrix offsMat{offsetMatrix()}; - if (dynSymInd) offsMat(_, _(0, dynSymInd)) << oldOffsMat; + if (dynSymInd) offsMat[_, _(0, dynSymInd)] << oldOffsMat; llvm::Loop *L = loop->getLLVMLoop(); for (unsigned d = loop->getNumLoops() - numToPeel; d--;) L = L->getParentLoop(); for (size_t i = numToPeel; i;) { L = L->getParentLoop(); - if (allZero(Rt(_, --i))) continue; + if (allZero(Rt[_, --i])) continue; // push the SCEV auto *iTyp = L->getInductionVariable(*SE)->getType(); const llvm::SCEV *S = SE->getAddRecExpr( SE->getZero(iTyp), SE->getOne(iTyp), L, llvm::SCEV::NoWrapMask); if (const llvm::SCEV **j = std::ranges::find(sym, S); j != sym.end()) { --numDynSym; - offsMat(_, std::distance(sym.begin(), j)) += Rt(_, i); + offsMat[_, std::distance(sym.begin(), j)] += Rt[_, i]; } else { - offsMat(_, dynSymInd) << Rt(_, i); + offsMat[_, dynSymInd] << Rt[_, i]; sym[dynSymInd++] = S; } } @@ -454,12 +539,14 @@ class Addr : public Instruction { return {getIntMemory() + 1, getArrayDim()}; } /// indexMatrix() -> arrayDim() x getNumLoops() + /// First dimension is contiguous [[nodiscard]] constexpr auto indexMatrix() -> MutDensePtrMatrix { - return {indMatPtr(), DenseDims{getArrayDim(), getNaturalDepth()}}; + return {indMatPtr(), DenseDims<>{{getArrayDim()}, {getNaturalDepth()}}}; } /// indexMatrix() -> arrayDim() x getNumLoops() + /// First dimension is contiguous [[nodiscard]] constexpr auto indexMatrix() const -> DensePtrMatrix { - return {indMatPtr(), DenseDims{getArrayDim(), getNaturalDepth()}}; + return {indMatPtr(), DenseDims<>{{getArrayDim()}, {getNaturalDepth()}}}; } [[nodiscard]] constexpr auto getFusionOmega() -> MutPtrVector { unsigned L = getCurrentDepth() + 1; @@ -475,15 +562,17 @@ class Addr : public Instruction { } [[nodiscard]] constexpr auto offsetMatrix() const -> DensePtrMatrix { invariant(offSym != nullptr || numDynSym == 0); - return {offSym, DenseDims{getArrayDim(), numDynSym}}; + return {offSym, DenseDims<>{{getArrayDim()}, {numDynSym}}}; } - [[nodiscard]] constexpr auto getLoop() -> NotNull { return loop; } - [[nodiscard]] constexpr auto sizesMatch(NotNull x) const -> bool { + [[nodiscard]] constexpr auto getAffineLoop() -> Valid { + return loop; + } + [[nodiscard]] constexpr auto sizesMatch(Valid x) const -> bool { auto thisSizes = getSizes(), xSizes = x->getSizes(); return std::equal(thisSizes.begin(), thisSizes.end(), xSizes.begin(), xSizes.end()); } - auto calculateCostContiguousLoadStore(llvm::TargetTransformInfo &TTI, + auto calculateCostContiguousLoadStore(const llvm::TargetTransformInfo &TTI, unsigned int vectorWidth) -> cost::RecipThroughputLatency { constexpr unsigned int addrSpace = 0; @@ -507,11 +596,61 @@ class Addr : public Instruction { llvm::TargetTransformInfo::TCK_Latency)}; } - auto getCost(llvm::TargetTransformInfo &TTI, cost::VectorWidth W) - -> cost::RecipThroughputLatency { - // TODO: cache? - return calculateCostContiguousLoadStore(TTI, W.getWidth()); - } + /// RecipThroughput + struct Costs { + double contiguous; + double discontiguous; + double scalar; + constexpr auto operator+=(Costs c) -> Costs & { + contiguous += c.contiguous; + discontiguous += c.discontiguous; + scalar += c.scalar; + return *this; + } + }; + auto calcCostContigDiscontig(const llvm::TargetTransformInfo &TTI, + unsigned int vectorWidth) -> Costs { + constexpr unsigned int addrSpace = 0; + llvm::Type *T = cost::getType(getType(), vectorWidth); + llvm::Align alignment = getAlign(); + + llvm::Intrinsic::ID id = + isLoad() ? llvm::Instruction::Load : llvm::Instruction::Store; + + llvm::InstructionCost gsc{TTI.getGatherScatterOpCost( + id, T, basePointer->getValue(), predicate, alignment, + llvm::TargetTransformInfo::TCK_RecipThroughput)}, + contig, scalar; + + if (!predicate) { + contig = + TTI.getMemoryOpCost(id, T, alignment, addrSpace, + llvm::TargetTransformInfo::TCK_RecipThroughput); + scalar = + TTI.getMemoryOpCost(id, T, alignment, addrSpace, + llvm::TargetTransformInfo::TCK_RecipThroughput); + } else { + llvm::Intrinsic::ID mid = + isLoad() ? llvm::Intrinsic::masked_load : llvm::Intrinsic::masked_store; + contig = TTI.getMaskedMemoryOpCost( + mid, T, alignment, addrSpace, + llvm::TargetTransformInfo::TCK_RecipThroughput); + scalar = TTI.getMaskedMemoryOpCost( + mid, T, alignment, addrSpace, + llvm::TargetTransformInfo::TCK_RecipThroughput); + } + double dc{NAN}, dd{NAN}, ds{NAN}; + if (std::optional o = contig.getValue()) dc = *o; + if (std::optional o = gsc.getValue()) dc = *o; + if (std::optional o = scalar.getValue()) dc = *o; + return {dc, dd, ds}; + } + inline auto reductionLatency(const llvm::TargetTransformInfo &TTI, + unsigned vectorWidth) + -> llvm::InstructionCost::CostType; + + /// drop `this` and remove it from `Dependencies` + inline void drop(Dependencies &); void printDotName(llvm::raw_ostream &os) const { if (isLoad()) os << "... = "; @@ -524,7 +663,7 @@ class Addr : public Instruction { if (i) os << ", "; bool printPlus = false; for (ptrdiff_t j = 0; j < numLoops; ++j) { - if (int64_t Aji = A(i, j)) { + if (int64_t Aji = A[i, j]) { if (printPlus) { if (Aji <= 0) { Aji *= -1; @@ -537,7 +676,7 @@ class Addr : public Instruction { } } for (ptrdiff_t j = 0; j < B.numCol(); ++j) { - if (int64_t offij = j ? B(i, j) : b[i]) { + if (int64_t offij = j ? B[i, j] : b[i]) { if (printPlus) { if (offij <= 0) { offij *= -1; @@ -579,7 +718,7 @@ inline auto operator<<(llvm::raw_ostream &os, const Addr &m) if (i) os << ", "; bool printPlus = false; for (ptrdiff_t j = 0; j < numLoops; ++j) { - if (int64_t Aji = A(i, j)) { + if (int64_t Aji = A[i, j]) { if (printPlus) { if (Aji <= 0) { Aji *= -1; @@ -592,7 +731,7 @@ inline auto operator<<(llvm::raw_ostream &os, const Addr &m) } } for (ptrdiff_t j = 0; j < offs.numCol(); ++j) { - if (int64_t offij = offs(i, j)) { + if (int64_t offij = offs[i, j]) { if (printPlus) { if (offij <= 0) { offij *= -1; @@ -640,7 +779,7 @@ class AddrWrapper { return addr == other.addr; } [[nodiscard]] constexpr auto getLoop() const -> poly::Loop * { - return addr->getLoop(); + return addr->getAffineLoop(); } constexpr operator Addr *() { return addr; } }; @@ -652,7 +791,7 @@ class Load : public AddrWrapper { Load(Node *a) : AddrWrapper(a->getKind() == Node::VK_Load ? static_cast(a) : nullptr) {} - [[nodiscard]] constexpr auto getInstruction() const -> llvm::Instruction * { + [[nodiscard]] auto getInstruction() const -> llvm::Instruction * { // could be load or store return llvm::cast(this->addr->getInstruction()); } @@ -664,7 +803,7 @@ class Stow : public AddrWrapper { Stow(Node *a) : AddrWrapper(a->getKind() == Node::VK_Stow ? static_cast(a) : nullptr) {} - [[nodiscard]] constexpr auto getInstruction() const -> llvm::StoreInst * { + [[nodiscard]] auto getInstruction() const -> llvm::StoreInst * { // must be store return llvm::cast(this->addr->getInstruction()); } diff --git a/include/IR/BBPredPath.hpp b/include/IR/BBPredPath.hpp index d0121a5d7..22ca821f1 100644 --- a/include/IR/BBPredPath.hpp +++ b/include/IR/BBPredPath.hpp @@ -55,7 +55,7 @@ class Map { [[nodiscard]] auto operator[](llvm::Instruction *inst) -> std::optional { return (*this)[inst->getParent()]; } - void insert(std::pair &&pair) { + void insert(containers::Pair &&pair) { map.insert(std::move(pair)); } [[nodiscard]] auto contains(llvm::BasicBlock *BB) const -> bool { diff --git a/include/IR/Cache.hpp b/include/IR/Cache.hpp index 4ca7d45c2..508db3f6e 100644 --- a/include/IR/Cache.hpp +++ b/include/IR/Cache.hpp @@ -44,8 +44,8 @@ struct AddrChain { [[nodiscard]] constexpr auto getStores() const { Addr *S = (addr && addr->isStore()) ? addr : nullptr; return utils::ListRange(S, [](Addr *A) -> Addr * { - Addr *S = A->getNextAddr(); - if (S && S->isStore()) return S; + Addr *W = A->getNextAddr(); + if (W && W->isStore()) return W; return nullptr; }); } @@ -145,7 +145,7 @@ struct TreeResult { [[nodiscard]] constexpr auto getAddr() const { return addr.getAddr(); } [[nodiscard]] constexpr auto getLoads() const { return addr.getLoads(); } [[nodiscard]] constexpr auto getStores() const { return addr.getStores(); } - void setLoopNest(NotNull L) const { + void setLoopNest(Valid L) const { for (Addr *A : getAddr()) A->setLoopNest(L); } constexpr auto operator*=(TreeResult tr) -> TreeResult & { @@ -156,7 +156,7 @@ struct TreeResult { } [[nodiscard]] constexpr auto getLoop() const -> poly::Loop * { - return (addr.addr) ? addr.addr->getLoop() : nullptr; + return (addr.addr) ? addr.addr->getAffineLoop() : nullptr; } [[nodiscard]] constexpr auto getMaxDepth() const -> unsigned { return maxDepth - rejectDepth; @@ -176,7 +176,7 @@ class Cache { map llvmToInternalMap; map instCSEMap; map constMap; - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; llvm::LoopInfo *LI; llvm::ScalarEvolution *SE; Compute *freeInstList{nullptr}; // positive numOps/complete, but empty @@ -196,7 +196,7 @@ class Cache { auto getCSE(Compute *I) -> Compute *& { return instCSEMap[InstByValue{I}]; } // NOLINTNEXTLINE(misc-no-recursion) auto createValue(llvm::Value *v, Predicate::Map *M, TreeResult tr, Value *&n) - -> std::pair { + -> containers::Pair { if (auto *i = llvm::dyn_cast(v)) return createInstruction(i, M, tr, n); if (auto *c = llvm::dyn_cast(v)) @@ -205,6 +205,12 @@ class Cache { return {createConstant(c, n), tr}; return {createConstantVal(v, n), tr}; } + /// void replaceUsesByUsers(Value *oldNode, Value *newNode) + /// The name is confusing. This iterates through oldNode's users + /// (i.e. things using oldNode), and swaps the `oldNode` for `newNode`. + /// It checks if those users are `newNode` itself, if so, it does not modify. + /// This allows replacing `x` with `f(x)`, for example. That feature is used + /// for control flow merging. // NOLINTNEXTLINE(misc-no-recursion) constexpr void replaceUsesByUsers(Value *oldNode, Value *newNode) { invariant(oldNode->getKind() == Node::VK_Load || @@ -333,12 +339,12 @@ class Cache { return blackList | blackListAllDependentLoops(S, numPeeled); } static void extendDensePtrMatCols(Arena<> *alloc, - MutDensePtrMatrix &A, math::Row R, - math::Col C) { + MutDensePtrMatrix &A, + math::Row<> R, math::Col<> C) { MutDensePtrMatrix B{matrix(alloc, A.numRow(), C)}; for (ptrdiff_t j = 0; j < R; ++j) { - B(j, _(0, A.numCol())) << A(j, _); - B(j, _(A.numCol(), end)) << 0; + B[j, _(0, A.numCol())] << A[j, _]; + B[j, _(A.numCol(), end)] << 0; } std::swap(A, B); } @@ -359,7 +365,7 @@ class Cache { /// complete the operands // NOLINTNEXTLINE(misc-no-recursion) auto complete(Compute *I, Predicate::Map *M, TreeResult tr) - -> std::pair { + -> containers::Pair { auto *i = I->getLLVMInstruction(); unsigned nOps = I->numCompleteOps(); auto ops = I->getOperands(); @@ -374,7 +380,7 @@ class Cache { } // update list of incomplets inline auto completeInstructions(Predicate::Map *M, TreeResult tr) - -> std::pair { + -> containers::Pair { Compute *completed = nullptr; for (Compute *I = tr.incomplete; I; I = static_cast(I->getNext())) { @@ -393,7 +399,7 @@ class Cache { /// try to remove `I` as a duplicate /// this travels downstream; /// if `I` is eliminated, all users of `I` - /// get updated, making them CSE-candiates. + /// get updated, making them CSE-candidates. /// In this manner, we travel downstream through users. // NOLINTNEXTLINE(misc-no-recursion) auto cse(Compute *I) -> Compute * { @@ -410,7 +416,7 @@ class Cache { /// updating the operands of all users of `oldNode` /// and the `users` of all operands of `oldNode` // NOLINTNEXTLINE(misc-no-recursion) - constexpr void replaceAllUsesWith(Instruction *oldNode, Value *newNode) { + void replaceAllUsesWith(Instruction *oldNode, Value *newNode) { invariant(oldNode->getKind() == Node::VK_Load || oldNode->getKind() >= Node::VK_Func); replaceUsesByUsers(oldNode, newNode); @@ -433,21 +439,21 @@ class Cache { /// `nullptr`, then all operands will be left incomplete. // NOLINTNEXTLINE(misc-no-recursion) auto getValue(llvm::Value *v, Predicate::Map *M, TreeResult tr) - -> std::pair { + -> containers::Pair { Value *&n = llvmToInternalMap[v]; if (n) return {n, tr}; // by reference, so we can update in creation return createValue(v, M, tr, n); } auto getValue(llvm::Instruction *I, Predicate::Map *M, TreeResult tr) - -> std::pair { + -> containers::Pair { auto [v, tret] = getValue(static_cast(I), M, tr); return {llvm::cast(v), tret}; } // NOLINTNEXTLINE(misc-no-recursion) auto createInstruction(llvm::Instruction *I, Predicate::Map *M, TreeResult tr, - Value *&t) -> std::pair { + Value *&t) -> containers::Pair { auto *load = llvm::dyn_cast(I); auto *store = llvm::dyn_cast(I); if (!load && !store) return createCompute(I, M, tr, t); @@ -467,7 +473,7 @@ class Cache { // NOLINTNEXTLINE(misc-no-recursion) auto createCompute(llvm::Instruction *I, Predicate::Map *M, TreeResult tr, - Value *&t) -> std::pair { + Value *&t) -> containers::Pair { auto [id, kind] = Compute::getIDKind(I); int numOps = int(I->getNumOperands()); Compute *n = std::construct_at(allocateInst(numOps), kind, I, id, -numOps); @@ -483,12 +489,12 @@ class Cache { auto zeroDimRef(llvm::Instruction *loadOrStore, llvm::SCEVUnknown const *arrayPtr, unsigned numLoops) -> Addr * { - return Addr::construct(&alloc, arrayPtr, loadOrStore, numLoops); + return Addr::zeroDim(&alloc, arrayPtr, loadOrStore, numLoops); } // create Addr auto getArrayRef(llvm::Instruction *loadOrStore, llvm::Loop *L, llvm::Value *ptr, TreeResult tr) - -> std::pair { + -> containers::Pair { Value *&n = llvmToInternalMap[loadOrStore]; if (n) return {n, tr}; auto ret = createArrayRef(loadOrStore, L, ptr, tr); @@ -497,14 +503,14 @@ class Cache { } // create Addr auto createArrayRef(llvm::Instruction *loadOrStore, llvm::Value *ptr, - TreeResult tr) -> std::pair { + TreeResult tr) -> containers::Pair { llvm::Loop *L = LI->getLoopFor(loadOrStore->getParent()); return createArrayRef(loadOrStore, L, ptr, tr); } // create Addr auto createArrayRef(llvm::Instruction *loadOrStore, llvm::Loop *L, llvm::Value *ptr, TreeResult tr) - -> std::pair { + -> containers::Pair { const auto *elSz = SE->getElementSize(loadOrStore); const llvm::SCEV *accessFn = SE->getSCEVAtScope(ptr, L); unsigned numLoops = L->getLoopDepth(); @@ -517,7 +523,7 @@ class Cache { auto createArrayRef(llvm::Instruction *loadOrStore, const llvm::SCEV *accessFn, unsigned numLoops, const llvm::SCEV *elSz, TreeResult tr) - -> std::pair { + -> containers::Pair { // https://llvm.org/doxygen/Delinearization_8cpp_source.html#l00582 const llvm::SCEV *pb = SE->getPointerBase(accessFn); @@ -535,27 +541,28 @@ class Cache { if (numDims == 0) return {zeroDimRef(loadOrStore, arrayPtr, 0), tr}; unsigned numPeeled = tr.rejectDepth; numLoops -= numPeeled; - math::IntMatrix Rt{math::StridedDims{numDims, numLoops}, 0}; + math::IntMatrix> Rt{ + math::StridedDims<>{{numDims}, {numLoops}}, 0}; llvm::SmallVector symbolicOffsets; uint64_t blackList{0}; math::Vector coffsets{unsigned(numDims), 0}; - MutDensePtrMatrix offsMat{nullptr, DenseDims{numDims, 0}}; + MutDensePtrMatrix offsMat{nullptr, DenseDims<>{{numDims}, {0}}}; { math::Vector offsets; for (ptrdiff_t i = 0; i < numDims; ++i) { offsets << 0; blackList |= - fillAffineIndices(Rt(i, _), &coffsets[i], offsets, symbolicOffsets, + fillAffineIndices(Rt[i, _], &coffsets[i], offsets, symbolicOffsets, subscripts[i], 1, numPeeled); if (offsets.size() > offsMat.numCol()) - extendDensePtrMatCols(&alloc, offsMat, math::Row{i}, - math::Col{offsets.size()}); - offsMat(i, _) << offsets; + extendDensePtrMatCols(&alloc, offsMat, math::Row<>{i}, + math::Col<>{offsets.size()}); + offsMat[i, _] << offsets; } } size_t numExtraLoopsToPeel = 64 - std::countl_zero(blackList); Addr *op = Addr::construct(&alloc, arrayPtr, loadOrStore, - Rt(_, _(numExtraLoopsToPeel, end)), + Rt[_, _(numExtraLoopsToPeel, end)], {std::move(sizes), std::move(symbolicOffsets)}, coffsets, offsMat.data(), numLoops, tr.maxDepth); tr.addAddr(op); @@ -601,7 +608,7 @@ class Cache { return B; } auto similarCompute(Compute *A, PtrVector ops) -> Compute * { - invariant(A->getNumOperands(), ops.size()); + invariant(ptrdiff_t(A->getNumOperands()), ops.size()); return createCompute(A->getOpId(), A->getKind(), ops, A->getType(), A->getFastMathFlags()); } diff --git a/include/IR/ControlFlowMerging.hpp b/include/IR/ControlFlowMerging.hpp index a0f8e73de..a228756a0 100644 --- a/include/IR/ControlFlowMerging.hpp +++ b/include/IR/ControlFlowMerging.hpp @@ -1,12 +1,13 @@ #pragma once +#include "Alloc/Arena.hpp" #include "Dicts/BumpMapSet.hpp" #include "IR/BBPredPath.hpp" #include "IR/Cache.hpp" #include "IR/Instruction.hpp" #include "IR/Predicate.hpp" -#include "Utilities/Allocators.hpp" #include +#include #include #include #include @@ -57,7 +58,7 @@ struct MergingCost { // that is, if we're fusing c and d, we can make each point toward // what the other one was pointing to, in order to link the chains. amap mergeMap; - math::BumpPtrVector> mergeList; + math::BumpPtrVector> mergeList; amap *> ancestorMap; llvm::InstructionCost cost; @@ -146,8 +147,8 @@ struct MergingCost { H = mergeMap[H]; } } - static constexpr auto popBit(uint8_t x) -> std::pair { - return {x & 1, x >> 1}; + static constexpr auto popBit(uint8_t x) -> containers::Pair { + return {bool(x & 1), uint8_t(x >> 1)}; } struct Allocate { @@ -215,7 +216,7 @@ struct MergingCost { // select(p, f(a,b), f(c,d)) => f(select(p, a, c), select(p, b, d)) // but we can often do better, e.g. we may have // select(p, f(a,b), f(c,b)) => f(select(p, a, c), b) - // additionally, we can check `I->associativeOperandsFlag()` + // additionally, we can check `I->commutativeOperandsFlag()` // select(p, f(a,b), f(c,a)) => f(a, select(p, b, c)) // we need to figure out which operands we're merging with which, // @@ -223,26 +224,26 @@ struct MergingCost { // arguments are merged, as this may be common when two // control flow branches have relatively similar pieces. // E.g., if b and c are already merged, - // and if `f`'s ops are associative, then we'd get + // and if `f`'s ops are commutative, then we'd get // select(p, f(a,b), f(c,a)) => f(a, b) // so we need to check if any operand pairs are merged with each other. // note `isMerged(a,a) == true`, so that's the one query we need to use. auto selector = init(selects, A, B); MutPtrVector operandsA = A->getOperands(); MutPtrVector operandsB = B->getOperands(); - size_t numOperands = operandsA.size(); + ptrdiff_t numOperands = operandsA.size(); assert(numOperands == operandsB.size()); /// associate ops means `f(a, b) == f(b, a)` - uint8_t associativeOpsFlag = B->associativeOperandsFlag(); + uint8_t commutativeOpsFlag = B->commutativeOperandsFlag(); // For example, // we keep track of which operands we've already merged, // f(a, b), f(b, b) // we can't merge b twice! - for (size_t i = 0; i < numOperands; ++i) { + for (ptrdiff_t i = 0; i < numOperands; ++i) { auto *opA = A->getOperand(i); auto *opB = B->getOperand(i); - auto [assoc, assocFlag] = popBit(associativeOpsFlag); - associativeOpsFlag = assocFlag; + auto [assoc, assocFlag] = popBit(commutativeOpsFlag); + commutativeOpsFlag = assocFlag; if (opA == opB) continue; // if both operands were merged, we can ignore it's associativity if (isMerged(opB, opA)) { @@ -251,7 +252,7 @@ struct MergingCost { continue; } if (!((assoc) && (assocFlag))) { - // this op isn't associative with any remaining + // this op isn't commutative with any remaining selector.select(i, opA, opB); continue; } @@ -284,7 +285,7 @@ struct MergingCost { return unsigned(selector); } - void merge(Arena<> *alloc, llvm::TargetTransformInfo &TTI, + void merge(Arena<> *alloc, const llvm::TargetTransformInfo &TTI, unsigned int vectorBits, Instruction *A, Instruction *B) { mergeList.emplace_back(A, B); auto *aA = ancestorMap.find(B); @@ -369,8 +370,8 @@ struct MergingCost { // NOLINTNEXTLINE(misc-no-recursion) inline void mergeInstructions( Arena<> *alloc, IR::Cache &cache, Predicate::Map &predMap, - llvm::TargetTransformInfo &TTI, unsigned int vectorBits, - amap> + const llvm::TargetTransformInfo &TTI, unsigned int vectorBits, + amap> opMap, amap &valToPred, llvm::SmallVectorImpl &mergingCosts, Instruction *J, @@ -428,7 +429,7 @@ inline void mergeInstructions( } // descendants aren't legal merge candidates, so push after merging if (vec.getCapacity() <= vec.size()) - vec.reserve(alloc, std::max(unsigned(8), 2 * vec.size())); + vec.reserve(alloc, std::max(ptrdiff_t(8), 2 * vec.size())); vec.push_back(J); valToPred[J] = preds; // TODO: prune bad candidates from mergingCosts @@ -445,20 +446,19 @@ inline void mergeInstructions( /// merging as it allocates a lot of memory that it can free when it is done. /// TODO: this algorithm is exponential in time and memory. /// Odds are that there's way smarter things we can do. -[[nodiscard]] inline auto mergeInstructions(IR::Cache &cache, - Predicate::Map &predMap, - llvm::TargetTransformInfo &TTI, - Arena<> tAlloc, unsigned vectorBits, - TreeResult tr) -> TreeResult { +[[nodiscard]] inline auto +mergeInstructions(IR::Cache &cache, Predicate::Map &predMap, + const llvm::TargetTransformInfo &TTI, Arena<> tAlloc, + unsigned vectorBits, TreeResult tr) -> TreeResult { auto [completed, trret] = cache.completeInstructions(&predMap, tr); tr = trret; if (!predMap.isDivergent()) return tr; // there is a divergence in the control flow that we can ideally merge - amap> + amap> opMap{&tAlloc}; amap valToPred{&tAlloc}; llvm::SmallVector mergingCosts; - mergingCosts.emplace_back(tAlloc); + mergingCosts.emplace_back(&tAlloc); // We search through incomplete instructions inside the predMap // this should yield all merge candidates.L for (auto *C = completed; C; C = static_cast(C->getNext())) { diff --git a/include/IR/CostModeling.hpp b/include/IR/CostModeling.hpp deleted file mode 100644 index 7f38643b7..000000000 --- a/include/IR/CostModeling.hpp +++ /dev/null @@ -1,658 +0,0 @@ -#pragma once - -// #include "./ControlFlowMerging.hpp" -#include "Graphs/Graphs.hpp" -#include "IR/Address.hpp" -#include "LinearProgramming/LoopBlock.hpp" -#include "LinearProgramming/ScheduledNode.hpp" -#include "Polyhedra/Dependence.hpp" -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace poly::CostModeling { - -class CPURegisterFile { - [[no_unique_address]] uint8_t maximumVectorWidth; - [[no_unique_address]] uint8_t numVectorRegisters; - [[no_unique_address]] uint8_t numGeneralPurposeRegisters; - [[no_unique_address]] uint8_t numPredicateRegisters; - - // hacky check for has AVX512 - static inline auto hasAVX512(llvm::LLVMContext &C, - const llvm::TargetTransformInfo &TTI) -> bool { - return TTI.isLegalMaskedExpandLoad( - llvm::FixedVectorType::get(llvm::Type::getDoubleTy(C), 8)); - } - - static auto estimateNumPredicateRegisters( - llvm::LLVMContext &C, const llvm::TargetTransformInfo &TTI) -> uint8_t { - if (TTI.supportsScalableVectors()) return 8; - // hacky check for AVX512 - if (hasAVX512(C, TTI)) return 7; // 7, because k0 is reserved for unmasked - return 0; - } - // returns vector width in bits, ignoring mprefer-vector-width - static auto estimateMaximumVectorWidth(llvm::LLVMContext &C, - const llvm::TargetTransformInfo &TTI) - -> uint8_t { - uint8_t twiceMaxVectorWidth = 2; - auto *f32 = llvm::Type::getFloatTy(C); - llvm::InstructionCost prevCost = TTI.getArithmeticInstrCost( - llvm::Instruction::FAdd, - llvm::FixedVectorType::get(f32, twiceMaxVectorWidth)); - while (true) { - llvm::InstructionCost nextCost = TTI.getArithmeticInstrCost( - llvm::Instruction::FAdd, - llvm::FixedVectorType::get(f32, twiceMaxVectorWidth *= 2)); - if (nextCost > prevCost) break; - prevCost = nextCost; - } - return 16 * twiceMaxVectorWidth; - } - -public: - CPURegisterFile(llvm::LLVMContext &C, const llvm::TargetTransformInfo &TTI) { - maximumVectorWidth = estimateMaximumVectorWidth(C, TTI); - numVectorRegisters = TTI.getNumberOfRegisters(true); - numGeneralPurposeRegisters = TTI.getNumberOfRegisters(false); - numPredicateRegisters = estimateNumPredicateRegisters(C, TTI); - } - [[nodiscard]] constexpr auto getNumVectorBits() const -> uint8_t { - return maximumVectorWidth; - } - [[nodiscard]] constexpr auto getNumVector() const -> uint8_t { - return numVectorRegisters; - } - [[nodiscard]] constexpr auto getNumScalar() const -> uint8_t { - return numGeneralPurposeRegisters; - } - [[nodiscard]] constexpr auto getNumPredicate() const -> uint8_t { - return numPredicateRegisters; - } -}; -// struct CPUExecutionModel {}; - -// Plan for cost modeling: -// 1. Build Instruction graph -// 2. Iterate over all PredicatedChains, merging instructions across branches -// where possible -// 3. Create a loop tree structure for optimization -// 4. Create InstructionBlocks at each level. - -// void pushBlock(llvm::SmallPtrSet &trackInstr, -// llvm::SmallPtrSet &chainBBs, -// Predicates &pred, llvm::BasicBlock *BB) { -// assert(chainBBs.contains(block)); -// chainBBs.erase(BB); -// // we only want to extract relevant instructions, i.e. parents of -// stores for (llvm::Instruction &instr : *BB) { -// if (trackInstr.contains(&instr)) -// instructions.emplace_back(pred, instr); -// } -// llvm::Instruction *term = BB->getTerminator(); -// if (!term) -// return; -// switch (term->getNumSuccessors()) { -// case 0: -// return; -// case 1: -// BB = term->getSuccessor(0); -// if (chainBBs.contains(BB)) -// pushBlock(trackInstr, chainBBs, pred, BB); -// return; -// case 2: -// break; -// default: -// assert(false); -// } -// auto succ0 = term->getSuccessor(0); -// auto succ1 = term->getSuccessor(1); -// if (chainBBs.contains(succ0) && chainBBs.contains(succ1)) { -// // TODO: we need to fuse these blocks. - -// } else if (chainBBs.contains(succ0)) { -// pushBlock(trackInstr, chainBBs, pred, succ0); -// } else if (chainBBs.contains(succ1)) { -// pushBlock(trackInstr, chainBBs, pred, succ1); -// } -// } -template using Vec = math::ResizeableView; - -// TODO: instead of this, update in-place and ensure all Addr are over-allocated -// to correspond with max depth? -// Because we parse in reverse order, we have max possible depth of -// `ScheduledNode`s using it at time we create. - -/// LoopTree -/// A tree of loops, with an indexable vector of IR::Loop*s, to facilitate -/// construction of the IR::Loop graph, from the fusion omegas -class LoopTree { - // The root of this subtree - NotNull loop; - LoopTree *parent{nullptr}; // do we need this? - Vec children{}; - unsigned depth{0}; - // We do not need to know the previous loop, as dependencies between - // the `Addr`s and instructions will determine the ordering. - constexpr LoopTree(Arena<> *lalloc, LoopTree *parent_) - : loop{lalloc->create(parent_->depth + 1)}, parent(parent_), - depth(parent_->depth + 1) { - // allocate the root node, and connect it to parent's node, as well as - // previous loop of the same level. - loop->setParent(parent_->loop); - } - constexpr LoopTree(Arena<> *lalloc) : loop{lalloc->create(0)} {} - -public: - static auto root(Arena<> *salloc, Arena<> *lalloc) -> LoopTree * { - return new (salloc) LoopTree(lalloc); - } - // salloc: Short lived allocator, for the indexable `Vec`s - // Longer lived allocator, for the IR::Loop nodes - // NOLINTNEXTLINE(misc-no-recursion) - void addNode(Arena<> *salloc, Arena<> *lalloc, lp::ScheduledNode *node) { - if (node->getNumLoops() == depth) { - // Then it belongs here, and we add loop's dependencies. - // We only need to add deps to support SCC/top sort now. - // We also apply the rotation here. - // For dependencies in SCC iteration, only indvar deps get iterated. - auto [Pinv, denom] = math::NormalForm::scaledInv(node->getPhi()); - NotNull affloop = - node->getLoopNest()->rotate(lalloc, Pinv, node->getOffset()); - for (IR::Addr *m : node->localAddr()) { - m->rotate(affloop, Pinv, denom, node->getOffsetOmega(), - node->getOffset()); - loop->insertAfter(m); - } - return; - } - // we need to find the sub-loop tree to which we add `node` - ptrdiff_t idx = node->getFusionOmega(depth); - invariant(idx >= 0); - ptrdiff_t numChildren = children.size(); - if (idx >= children.size()) { - if (idx >= children.getCapacity()) { - // allocate extra capacity - children.reserve(salloc, 2 * (idx + 1)); - } - // allocate new nodes and resize - children.resize(idx + 1); - for (ptrdiff_t i = numChildren; i < idx + 1; ++i) - children[i] = new (salloc) LoopTree{lalloc, this}; - numChildren = idx + 1; - } - children[idx]->addNode(salloc, lalloc, node); - } - constexpr auto getChildren() -> Vec { return children; } - constexpr auto getLoop() -> IR::Loop * { return loop; } -}; - -struct LoopDepSummary { - IR::Node *afterExit{nullptr}; - IR::Addr *indexedByLoop{nullptr}; - IR::Addr *notIndexedByLoop{nullptr}; -}; -struct LoopIndependent { - LoopDepSummary summary; - bool independent; - constexpr auto operator*=(LoopIndependent other) -> LoopIndependent & { - summary = other.summary; - independent = independent && other.independent; - return *this; - } -}; -// - -// searches `N` and it's users for loop-independent users -// this exits early if it finds a dependent user; we search everything -// anyway, so we'll revist later anyway. -// We return a `IR::Node *, bool` pair, where the `bool` is true if -// `N` was loop independent. -// We do this rather than something like returning a `nullptr`, as -// we may have descended into instructions, found some users that are -// but then also found some that are not; we need to return `false` -// in this case, but we of course want to still return those we found. -// NOLINTNEXTLINE(misc-no-recursion) -inline auto searchLoopIndependentUsers(IR::Dependencies deps, IR::Loop *L, - IR::Node *N, uint8_t depth, - LoopDepSummary summary) - -> LoopIndependent { - if (N->dependsOnParentLoop()) return {summary, false}; - if (llvm::isa(N)) return {summary, false}; - if (IR::Loop *P = N->getLoop(); P != L) - return {summary, !(P && L->contains(P))}; - LoopIndependent ret{summary, true}; - auto *a = llvm::dyn_cast(N); - if (a) { - a->removeFromList(); - if (a->indexedByInnermostLoop()) { - a->insertAfter(ret.summary.indexedByLoop); - ret.summary.indexedByLoop = a; - return {summary, false}; - } - a->insertAfter(ret.summary.notIndexedByLoop); - ret.summary.notIndexedByLoop = a; - for (IR::Addr *m : a->outputAddrs(deps, depth)) { - ret *= searchLoopIndependentUsers(deps, L, m, depth, summary); - if (ret.independent) continue; - a->setDependsOnParentLoop(); - return ret; - } - } - // if it isn't a Loop, must be an `Instruction` - IR::Value *I = llvm::cast(N); - for (IR::Node *U : I->getUsers()) { - ret *= searchLoopIndependentUsers(deps, L, U, depth, summary); - if (ret.independent) continue; - I->setDependsOnParentLoop(); - return ret; - } - // then we can push it to the front of the list, meaning it is hoisted out - if (a) { - if (ret.summary.notIndexedByLoop == a) - ret.summary.notIndexedByLoop = llvm::cast_or_null(a->getNext()); - } - I->removeFromList(); - I->insertAfter(ret.summary.afterExit); - ret.summary.afterExit = I; - I->visit(depth); - return ret; -} -// NOLINTNEXTLINE(misc-no-recursion) -inline auto visitLoopDependent(IR::Dependencies deps, IR::Loop *L, IR::Node *N, - uint8_t depth, IR::Node *body) -> IR::Node * { - invariant(N->getVisitDepth() != 254); - // N may have been visited as a dependent of an inner loop, which is why - // `visited` accepts a depth argument - if (N->wasVisited(depth) || !(L->contains(N))) return body; -#ifndef NDEBUG - // Our goal here is to check for cycles in debug mode. - // Each level of our graph is acyclic, meaning that there are no cycles at - // that level when traversing only edges active at that given level. However, - // when considering edges active at level `I`, we may have cycles at level `J` - // if `J>I`. In otherwords, here we are travering all edges active at - // `I=depth`. Within subloops, which necessarilly have depth `J>I`, we may - // have cycles. - // - // Thus, we need to prevent getting stuck in a cycle for these deeper loops by - // setting `N->visit(depth)` here, so `wasVisited` will allow them to - // immediately return. But, in debug mode, we'll set nodes of the same depth - // to `254` to check for cycles. - if (N->getLoop() == L) N->visit(254); - else N->visit(depth); -#else - N->visit(depth); -#endif - // iterate over users - if (auto *A = llvm::dyn_cast(N)) { - for (IR::Addr *m : A->outputAddrs(deps, depth)) { - if (m->wasVisited(depth)) continue; - body = visitLoopDependent(deps, L, m, depth, body); - } - } - if (auto *I = llvm::dyn_cast(N)) { - for (IR::Node *U : I->getUsers()) { - if (U->wasVisited(depth)) continue; - body = visitLoopDependent(deps, L, U, depth, body); - } - } else if (auto *S = llvm::dyn_cast(N)) { - for (IR::Node *U : S->getChild()->nodes()) { - if (U->wasVisited(depth)) continue; - body = visitLoopDependent(deps, L, U, depth, body); - } - } -#ifndef NDEBUG - if (N->getLoop() == L) N->visit(depth); -#endif - if (N->getLoop() == L) body = N->setNext(body); - return body; -} -inline auto topologicalSort(IR::Dependencies deps, IR::Loop *root, - unsigned depth) -> IR::Node * { - // basic plan for the top sort: - // We iterate across all users, once all of node's users have been added, - // we push it to the front of the list. Thus, we get a top-sorted list. - // We're careful about the order, so that this top sort should LICM all the - // addresses that it can. - // - // We must push the exit before the root (as the exit depends on the loop, and - // we iterate users). - // The exit doesn't use any in this block, so we begin by trying to push any - // instructions that don't depend on the loop. If we fail to push them (i.e., - // because they have uses that do depend on the loop), then they get added to - // a revisit queue. Any instructions we are able to push-front before we push - // the exit, implicitly happen after the exit, i.e. they have been LICMed into - // the exit block. We unvisit the revisit-queue, and add them back to the main - // worklist. Then, we proceed with a depth-first topological sort normally - // (iterating over uses, pushing to the front), starting with the loop root, - // so that it gets pushed to the front as soon as possible. That is, so that - // it happens as late as possible Any instructions that get pushed to the - // front afterwards have been LICMed into the loop pre-header. - // - // In this first pass, we iterate over all nodes, pushing those - // that can be hoisted after the exit block. - IR::Node *C = root->getChild(); - LoopDepSummary summary; - for (IR::Node *N : C->nodes()) - summary = searchLoopIndependentUsers(deps, root, N, depth, summary).summary; - // summary.afterExit will be hoisted out; every member has been marked as - // `visited` So, now we search all of root's users, i.e. every addr that - // depends on it - IR::Node *body; - for (IR::Node *N : summary.indexedByLoop->nodes()) - body = visitLoopDependent(deps, root, N, depth, body); - body = root->setNext(body); // now we can place the loop - for (IR::Node *N : summary.notIndexedByLoop->nodes()) - body = visitLoopDependent(deps, root, N, depth, body); - // and any remaining edges - return body; -} -// NOLINTNEXTLINE(misc-no-recursion) -inline auto buildGraph(IR::Dependencies deps, IR::Loop *root, unsigned depth) - -> IR::Node * { - // We build the instruction graph, via traversing the tree, and then - // top sorting as we recurse out - for (IR::Loop *child : root->subLoops()) buildGraph(deps, child, depth + 1); - return topologicalSort(deps, root, depth); -} - -inline auto addAddrToGraph(Arena<> *salloc, Arena<> *lalloc, - lp::ScheduledNode *nodes) -> IR::Loop * { - auto s = salloc->scope(); - LoopTree *root = LoopTree::root(salloc, lalloc); - for (lp::ScheduledNode *node : nodes->getAllVertices()) - root->addNode(salloc, lalloc, node); - return root->getLoop(); -} -inline void eliminateAddr(IR::Addr *a, IR::Addr *b) { - if (a->indexMatrix() != b->indexMatrix()) return; - /// are there any addr between them? - if (a->isStore()) { - if (b->isStore()) { // Write->Write - // Are there reads in between? If so, we must keep-- - // --unless we're storing the same value twice (???) - // without other intervening store-edges. - // Without reads in between, it's safe. - } else { // Write->Read - // Can we replace the read with using the written value? - if (a->getLoop() != b->getLoop()) return; - } - } else if (b->isLoad()) { // Read->Read - // If they don't have the same parent, either... - // They're in different branches of loops, and load can't live - // in between them - // for (i : I){ - // for (j : J){ - // A[i,j]; - // } - // for (j : J){ - // A[i,j]; - // } - // } - // or it is a subloop, but dependencies prevented us from hoisting. - if (a->getLoop() != b->getLoop()) return; - // Any writes in between them? - } // Read->Write, can't delete either -} -// plan: SCC? Iterate over nodes in program order? -// then we can iterate in order. -// What to do about depth? -// We may have -// for (i : I){ -// for (j : J){ -// A[j] = x; // store -// y = A[j]; // load -// } -// } -// In this case, we do have a cycle: -// A[j]^s_i -> A[j]^l_i -// A[j]^l_i -> A[j]^s_{i+1} -// However, this cycle does not prohibit deleting the load, -// replacing it with `y = x`. -// This still holds true if the load were a second store: -// for (i : I){ -// for (j : J){ -// A[j] = x; // store -// A[j] = y; // load -// } -// } -// We could stick with the single `y` store. -// Thus, for eliminating memory operations at a depth of 2, -// we are only concerned with dependencies still valid at a depth of 2. -// for (int i = 0 : i < I; ++i){ -// x[i] /= U[i,i]; -// for (int j = i+1; j < I; ++j){ -// x[j] -= x[i]*U[i,j]; -// } -// } -// Maybe just do the dumb thing? -// Walk the graph for addr costs, and at the same time, -// check the addr for eliminability, checking against what we've stored thus -// far. -// We currently do not store load-load edges, which is why only checking -// edge relationships is not ideal. -// We may store load-load edges in the future, as these could be used as -// part of the cost function of the linear program, i.e. we'd want to -// minimize the distance between loads (but allow reordering them). -// -// I think a reasonable approach is: -// Have a map from array pointer to Addr. Addrs form a chain. -// as we walk the graph, add each newly encountered addr to the front of the -// chain and check if we can eliminate it, or any of its predecessors. -// -// Note (bracketed means we might be able to eliminate): -// Read->[Read] could eliminate read -// Read->Write no change -// Write->[Read] can forward written value -// [Write]->Write can eliminate first write -// Thus, we can fuse this pass with our address cost calculation. -// We check if we can eliminate before calculating the new cost. -// The only case where we may remove an old value, write->write, -// we could just take the old cost and assign it to the new write. -// TODO: if we have only writes to a non-escaping array, we should -// be able to eliminate these writes too, and then also potentially -// remove that array temporary (e.g., if it were malloc'd). -// E.g. check if the array is a `llvm::isNonEscapingLocalObject` and allocated -// by `llvm::isRemovableAlloc`. -inline void removeRedundantAddr(IR::Dependencies deps, IR::Addr *addr) { - for (IR::Addr *a : addr->eachAddr()) { - for (poly::Dependence *d = a->getEdgeOut(); d; d = d->getNextOutput()) { - IR::Addr *b = d->output(); - eliminateAddr(a, b); - } - } -} -// -// Considering reordering legality, example -// for (int i = 0: i < I; ++i){ -// for (int j = 0 : j < i; ++j){ -// x[i] -= x[j]*U[j,i]; -// } -// x[i] /= U[i,i]; -// } -// We have an edge from the store `x[i] = x[i] / U[i,i]=` to the load of -// `x[j]`, when `j = ` the current `i`, on some future iteration. -// We want to unroll; -// for (int i = 0: i < I-3; i += 4){ -// for (int j = 0 : j < i; ++j){ -// x[i] -= x[j]*U[j,i]; -// x[i+1] -= x[j]*U[j,i+1]; -// x[i+2] -= x[j]*U[j,i+2]; -// x[i+3] -= x[j]*U[j,i+3]; -// } -// x[i] /= U[i,i]; // store 0 -// { // perform unrolled j = i iter -// int j = i; // these all depend on store 0 -// x[i+1] -= x[j]*U[j,i+1]; -// x[i+2] -= x[j]*U[j,i+2]; -// x[i+3] -= x[j]*U[j,i+3]; -// } -// x[i+1] /= U[i+1,i+1]; // store 1 -// { // perform unrolled j = i + 1 iter -// int j = i+1; // these all depend on store 1 -// x[i+2] -= x[j]*U[j,i+2]; -// x[i+3] -= x[j]*U[j,i+3]; -// } -// x[i+2] /= U[i+2,i+2]; // store 2 -// { // perform unrolled j = i + 2 iter -// int j = i+2; // this depends on store 2 -// x[i+3] -= x[j]*U[j,i+3]; -// } -// x[i+3] /= U[i+3,i+3]; -// } -// The key to legality here is that we peel off the dependence polyhedra -// from the loop's iteration space. -// We can then perform the dependent iterations in order. -// With masking, the above code can be vectorized in this manner. -// The basic approach is that we have the dependence polyhedra: -// -// 0 <= i_s < I -// 0 <= i_l < I -// 0 <= j_l < i_l -// i_s = j_l // dependence, yields same address in `x` -// -// Note that our schedule sets -// i_s = i_l -// Which gives: -// i_l = i_s = j_l < i_l -// a contradiction, meaning that the dependency is -// conditionally (on our schedule) independent. -// Excluding the `i_s = i_l` constraint from the -// polyhedra gives us the region of overlap. -// -// When unrolling by `U`, we get using `U=4` as an example: -// i^0_s + 1 = i^1_s -// i^0_s + 2 = i^2_s -// i^0_s + 3 = i^3_s -// 0 <= i^0_s < I -// 0 <= i^1_s < I -// 0 <= i^2_s < I -// 0 <= i^3_s < I -// 0 <= i^0_l < I -// 0 <= i^1_l < I -// 0 <= i^2_l < I -// 0 <= i^3_l < I -// 0 <= j_l < i^0_l -// 0 <= j_l < i^1_l -// 0 <= j_l < i^2_l -// 0 <= j_l < i^3_l -// i^0_s = j_l || i^1_s = j_l || i^2_s = j_l || i^3_s = j_l -// where the final union can be replaced with -// i^0_s = j_l || i^0_s+1 = j_l || i^0_s+2 = j_l || i^0_s+3 = j_l -// i^0_s <= j_1 <= i^0_s+3 -// -// Similarly, we can compress the other inequalities... -// 0 <= i^0_s < I - 3 -// 0 <= i^0_l < I - 3 -// 0 <= j_l < i^0_l -// i^0_s <= j_1 <= i^0_s+3 // dependence region -// -// So, the parallel region is the union -// i^0_s > j_1 || j_1 > i^0_s+3 -// -// In this example, note that the region `j_1 > i^0_s+3` is empty -// so we have one parallel region, and then one serial region. -// -/// -/// Optimize the schedule -inline void optimize(IR::Cache &instr, Arena<> *lalloc, - lp::LoopBlock::OptimizationResult res) { - /// we must build the IR::Loop - /// Initially, to help, we use a nested vector, so that we can index into it - /// using the fusion omegas. We allocate it with the longer lived `instr` - /// alloc, so we can checkpoint it here, and use alloc for other IR nodes. - Arena<> *salloc = instr.getAllocator(); - - IR::Node *N = buildGraph(addAddrToGraph(salloc, lalloc, res.nodes), 0); - // `N` is the head of the topologically sorted graph - // We now try to remove redundant memory operations - - removeRedundantAddr(res.addr.addr); -} - -/* -// NOLINTNEXTLINE(misc-no-recursion) -inline auto printSubDotFile(Arena<> *alloc, llvm::raw_ostream &out, - map &names, - llvm::SmallVectorImpl &addrNames, - unsigned addrIndOffset, poly::Loop *lret) --> poly::Loop * { -poly::Loop *loop{nullptr}; -size_t j = 0; -for (auto *addr : header.getAddr()) loop = addr->getAffLoop(); -for (auto &subTree : subTrees) { - // `names` might realloc, relocating `names[this]` - if (getDepth()) - names[subTree.subTree] = names[this] + "SubLoop#" + std::to_string(j++); - else names[subTree.subTree] = "LoopNest#" + std::to_string(j++); - if (loop == nullptr) - for (auto *addr : subTree.exit.getAddr()) loop = addr->getAffLoop(); - loop = subTree.subTree->printSubDotFile(alloc, out, names, addrNames, - addrIndOffset, loop); -} -const std::string &name = names[this]; -out << "\"" << name - << "\" [shape=plain\nlabel = <\n"; -size_t i = header.printDotNodes(out, 0, addrNames, addrIndOffset, name); -j = 0; -std::string loopEdges; -for (auto &subTree : subTrees) { - std::string label = "f" + std::to_string(++i); - out << " \n"; - loopEdges += "\"" + name + "\":f" + std::to_string(i) + " -> \"" + - names[subTree.subTree] + "\":f0 [color=\"#ff0000\"];\n"; - i = subTree.exit.printDotNodes(out, i, addrNames, addrIndOffset, name); -} -out << "
"; -// assert(depth == 0 || (loop != nullptr)); -if (loop && (getDepth() > 0)) { - for (size_t i = loop->getNumLoops(), k = getDepth(); i > k;) - loop = loop->removeLoop(alloc, --i); - loop->pruneBounds(alloc); - loop->printBounds(out); -} else out << "Top Level"; -out << "
SubLoop#" << j++ - << "
>];\n" << loopEdges; -if (lret) return lret; -if ((loop == nullptr) || (getDepth() <= 1)) return nullptr; -return loop->removeLoop(alloc, getDepth() - 1); -} - -inline void printDotFile(Arena<> *alloc, llvm::raw_ostream &out) { -map names; -llvm::SmallVector addrNames(numAddr_); -names[this] = "toplevel"; -out << "digraph LoopNest {\n"; -auto p = alloc.scope(); -printSubDotFile(alloc, out, names, addrNames, subTrees.size(), nullptr); -printDotEdges(out, addrNames); -out << "}\n"; -} -*/ -// class LoopForestSchedule : LoopTreeSchedule { -// [[no_unique_address]] Arena<> *allocator; -// }; -} // namespace poly::CostModeling diff --git a/include/IR/Hash.hpp b/include/IR/Hash.hpp index 7e384075e..b5039fccf 100644 --- a/include/IR/Hash.hpp +++ b/include/IR/Hash.hpp @@ -1,6 +1,6 @@ #pragma once -#include "IR/Instruction.hpp" #include "IR/Node.hpp" +#include #include #include @@ -31,15 +31,17 @@ template <> struct ankerl::unordered_dense::hash { case poly::IR::Node::VK_Bint: return combineHash(seed, llvm::hash_value(*x.payload.ci)); default: - poly::invariant(x.kind == poly::IR::Node::VK_Bint); + poly::utils::invariant(x.kind == poly::IR::Node::VK_Bint); return combineHash(seed, llvm::hash_value(*x.payload.cf)); } } }; -template <> struct ankerl::unordered_dense::hash { +template <> +struct ankerl::unordered_dense::hash { using is_avalanching = void; - [[nodiscard]] auto operator()(poly::IR::Identifier const &x) const noexcept + [[nodiscard]] auto + operator()(poly::IR::Instruction::Identifier const &x) const noexcept -> uint64_t { using poly::Hash::combineHash, poly::Hash::getHash; uint64_t seed = getHash(x.kind); @@ -52,27 +54,6 @@ template <> struct ankerl::unordered_dense::hash { /// template <> struct ankerl::unordered_dense::hash { using is_avalanching = void; - [[nodiscard]] auto operator()(poly::IR::InstByValue const &x) const noexcept - -> uint64_t { - using poly::Hash::combineHash, poly::Hash::getHash, poly::containers::UList, - poly::IR::Value; - uint64_t seed = getHash(x.inst->getKind()); - seed = combineHash(seed, getHash(x.inst->getType())); - seed = combineHash(seed, getHash(x.inst->getOpId())); - if (x.inst->isIncomplete()) - return combineHash(seed, getHash(x.inst->getLLVMInstruction())); - uint8_t assocFlag = x.inst->associativeOperandsFlag(); - // combine all operands - size_t offset = 0; - poly::PtrVector operands = x.inst->getOperands(); - if (assocFlag) { - poly::invariant(assocFlag, uint8_t(3)); - // we combine hashes in a commutative way - seed = combineHash(seed, getHash(operands[0]) + getHash(operands[1])); - offset = 2; - } - for (auto B = operands.begin() + offset, E = operands.end(); B != E; ++B) - seed = combineHash(seed, getHash(*B)); - return seed; - } + [[nodiscard]] inline auto + operator()(poly::IR::InstByValue const &x) const noexcept -> uint64_t; }; diff --git a/include/IR/Instruction.hpp b/include/IR/Instruction.hpp index d23c78e21..cdf991a49 100644 --- a/include/IR/Instruction.hpp +++ b/include/IR/Instruction.hpp @@ -5,14 +5,12 @@ #include "IR/InstructionCost.hpp" #include "IR/Node.hpp" #include "IR/Predicate.hpp" +#include #include #include -#include #include -#include #include #include -#include #include #include #include @@ -29,11 +27,10 @@ #include #include #include -#include namespace poly { -using math::PtrVector, math::MutPtrVector, utils::Arena, utils::invariant, - utils::NotNull; +using math::PtrVector, math::MutPtrVector, alloc::Arena, utils::invariant, + utils::Valid; }; // namespace poly namespace poly::IR { @@ -75,9 +72,10 @@ class Compute : public Instruction { llvm::Instruction *inst{nullptr}; llvm::Type *type; llvm::Intrinsic::ID opId; // unsigned - int numOperands; // negative means incomplete llvm::FastMathFlags fastMathFlags; // holds unsigned VectorizationCosts costs; + uint32_t loopIndepFlag; + int numOperands; // negative means incomplete #if !defined(__clang__) && defined(__GNUC__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpedantic" @@ -92,10 +90,22 @@ class Compute : public Instruction { #pragma clang diagnostic pop #endif + static constexpr auto diffMask(ptrdiff_t smaller, ptrdiff_t larger) + -> uint32_t { + invariant(smaller <= larger); + invariant(larger < 32); + // return ((uint32_t(1) << (larger - smaller)) - 1) << smaller; + uint32_t umask = ((uint32_t(1) << larger) - 1), + lmask = ((uint32_t(1) << smaller) - 1); + return umask ^ lmask; + } + static constexpr auto diffMask(Value *v, ptrdiff_t depth) -> uint32_t { + return diffMask(v->getCurrentDepth(), depth); + } + public: Compute(const Compute &) = delete; - constexpr Compute(ValKind k, llvm::Instruction *i, llvm::Intrinsic::ID id, - int numOps) + Compute(ValKind k, llvm::Instruction *i, llvm::Intrinsic::ID id, int numOps) : Instruction(k), inst(i), type(i->getType()), opId(id), numOperands(numOps), fastMathFlags(i->getFastMathFlags()) {} constexpr Compute(ValKind k, llvm::Intrinsic::ID id, int numOps, @@ -121,7 +131,7 @@ class Compute : public Instruction { return inst; } static auto getIDKind(llvm::Instruction *I) - -> std::pair { + -> containers::Pair { if (auto *c = llvm::dyn_cast(I)) { if (auto *J = llvm::dyn_cast(c)) return {J->getIntrinsicID(), VK_Call}; @@ -150,9 +160,24 @@ class Compute : public Instruction { constexpr auto getOperands() -> MutPtrVector { return {operands, numOperands}; } + [[nodiscard]] constexpr auto getLoopIndepFlag() const { + return loopIndepFlag; + } + constexpr auto calcLoopDepFlag(ptrdiff_t depth) -> uint32_t { + this->currentDepth = depth; + loopIndepFlag = (1 << depth) - 1; + for (auto *op : getOperands()) + if (auto *C = llvm::dyn_cast(op)) + loopIndepFlag &= C->getLoopIndepFlag() | diffMask(C, depth); + else if (auto *A = llvm::dyn_cast(op)) + loopIndepFlag &= A->getOrthAxes().indep | diffMask(C, depth); + return loopIndepFlag; + } + /// Get the arguments to this function [[nodiscard]] constexpr auto getOperands() const -> PtrVector { return {const_cast(operands), unsigned(numOperands)}; } + /// Get the `i`th argument of this function [[nodiscard]] constexpr auto getOperand(size_t i) const -> Value * { return operands[i]; } @@ -167,6 +192,10 @@ class Compute : public Instruction { [[nodiscard]] auto allowsContract() const -> bool { return fastMathFlags.allowContract(); } + [[nodiscard]] auto reassociableArgs() const -> uint32_t { + if (!fastMathFlags.allowReassoc()) return 0; + return isMulAdd() ? 0x4 : ((0x1 << numOperands) - 1); + } // Incomplete stores the correct number of ops it was allocated with as a // negative number. The primary reason for being able to check // completeness is for `==` checks and hashing. @@ -181,7 +210,12 @@ class Compute : public Instruction { return (getKind() == VK_Call) && ((opId == llvm::Intrinsic::fmuladd) || (opId == llvm::Intrinsic::fma)); } - [[nodiscard]] auto associativeOperandsFlag() const -> uint8_t { + // Bitmask indicating which args are commutative + // E.g. `muladd(a, b, c)` returns `0x3` + // where the bitpattern is 11000000 + // indicating that the first two arguments are commutative. + // That is, `muladd(a, b, c) == muladd(b, a, c)`. + [[nodiscard]] auto commuatativeOperandsFlag() const -> uint8_t { switch (getKind()) { case VK_Call: return (isMulAdd() || isCommutativeCall()) ? 0x3 : 0; case VK_Oprn: @@ -210,7 +244,7 @@ class Compute : public Instruction { size_t offset = 0; auto opst = getOperands(); auto opso = other.getOperands(); - if (uint8_t flag = associativeOperandsFlag()) { + if (uint8_t flag = commuatativeOperandsFlag()) { invariant(flag, uint8_t(3)); auto *ot0 = opst[0]; auto *oo0 = opso[0]; @@ -225,15 +259,13 @@ class Compute : public Instruction { return true; } - /// fall back in case we need value operand - // [[nodiscard]] auto isValue() const -> bool { return id.isValue(); } - auto getCost(llvm::TargetTransformInfo &TTI, VectorWidth W) + auto getCost(const llvm::TargetTransformInfo &TTI, VectorWidth W) -> RecipThroughputLatency { RecipThroughputLatency c = costs[W]; if (c.notYetComputed()) costs[W] = c = calcCost(TTI, W.getWidth()); return c; } - [[nodiscard]] inline auto calcCost(llvm::TargetTransformInfo &TTI, + [[nodiscard]] inline auto calcCost(const llvm::TargetTransformInfo &TTI, unsigned vectorWidth) -> RecipThroughputLatency; [[nodiscard]] auto getType(unsigned int vectorWidth) const -> llvm::Type * { @@ -260,14 +292,12 @@ class Compute : public Instruction { // which case it is free [[nodiscard]] inline auto allUsersAdditiveContract() const -> bool; -}; // class Inst +}; // class Compute -struct InstByValue { - Compute *inst; - auto operator==(InstByValue const &other) const -> bool { - return *inst == *other.inst; - } -}; +inline auto InstByValue::operator==(InstByValue const &other) const -> bool { + if (inst == other.inst) return true; + return *inst == *other.inst; +} // some opaque function class OpaqueFunc { @@ -284,8 +314,8 @@ class OpaqueFunc { auto getFunction() -> llvm::Function * { return ins->getLLVMInstruction()->getFunction(); } - auto calcCallCost(llvm::TargetTransformInfo &TTI, unsigned int vectorWidth) - -> RecipThroughputLatency { + auto calcCallCost(const llvm::TargetTransformInfo &TTI, + unsigned int vectorWidth) -> RecipThroughputLatency { llvm::Type *T = ins->getType(vectorWidth); llvm::SmallVector argTypes; for (auto *op : getOperands()) argTypes.push_back(op->getType(vectorWidth)); @@ -296,7 +326,7 @@ class OpaqueFunc { TTI.getCallInstrCost(getFunction(), T, argTypes, llvm::TargetTransformInfo::TCK_Latency)}; } - auto calcCallCost(llvm::TargetTransformInfo &TTI, llvm::Function *F, + auto calcCallCost(const llvm::TargetTransformInfo &TTI, llvm::Function *F, unsigned int vectorWidth) -> RecipThroughputLatency { llvm::Type *T = ins->getType(vectorWidth); llvm::SmallVector argTypes; @@ -335,8 +365,7 @@ class Operation { [[nodiscard]] constexpr auto getNumOperands() const -> unsigned { return ins->getNumOperands(); } - [[nodiscard]] constexpr auto isInstruction(llvm::Intrinsic::ID opCode) const - -> bool { + [[nodiscard]] auto isInstruction(llvm::Intrinsic::ID opCode) const -> bool { return getOpCode() == opCode; } static auto isFMul(Node *n) -> bool { @@ -440,8 +469,9 @@ class Operation { [[nodiscard]] auto getType(unsigned w) const -> llvm::Type * { return ins->getType(w); } - auto calcUnaryArithmeticCost(llvm::TargetTransformInfo &TTI, - unsigned int vectorWidth) const + [[nodiscard]] auto + calcUnaryArithmeticCost(const llvm::TargetTransformInfo &TTI, + unsigned int vectorWidth) const -> RecipThroughputLatency { auto op0info = ins->getOperandInfo(0); llvm::Type *T = getType(vectorWidth); @@ -454,8 +484,9 @@ class Operation { [[nodiscard]] auto getInstruction() const -> llvm::Instruction * { return ins->getLLVMInstruction(); } - auto calcBinaryArithmeticCost(llvm::TargetTransformInfo &TTI, - unsigned int vectorWidth) const + [[nodiscard]] auto + calcBinaryArithmeticCost(const llvm::TargetTransformInfo &TTI, + unsigned int vectorWidth) const -> RecipThroughputLatency { auto op0info = ins->getOperandInfo(0); auto op1info = ins->getOperandInfo(1); @@ -477,8 +508,8 @@ class Operation { return isFcmp() ? llvm::CmpInst::BAD_FCMP_PREDICATE : llvm::CmpInst::BAD_ICMP_PREDICATE; } - auto calcCmpSelectCost(llvm::TargetTransformInfo &TTI, - unsigned int vectorWidth) const + [[nodiscard]] auto calcCmpSelectCost(const llvm::TargetTransformInfo &TTI, + unsigned int vectorWidth) const -> RecipThroughputLatency { llvm::Type *T = getType(vectorWidth), *cmpT = llvm::CmpInst::makeCmpResultType(T); @@ -493,11 +524,12 @@ class Operation { /// for calculating the cost of a select when merging this instruction with /// another one. - auto selectCost(llvm::TargetTransformInfo &TTI, - unsigned int vectorWidth) const -> llvm::InstructionCost { + [[nodiscard]] auto selectCost(const llvm::TargetTransformInfo &TTI, + unsigned int vectorWidth) const + -> llvm::InstructionCost { return selectCost(TTI, getType(vectorWidth)); } - static auto selectCost(llvm::TargetTransformInfo &TTI, llvm::Type *T) + static auto selectCost(const llvm::TargetTransformInfo &TTI, llvm::Type *T) -> llvm::InstructionCost { llvm::Type *cmpT = llvm::CmpInst::makeCmpResultType(T); // llvm::CmpInst::Predicate pred = @@ -512,7 +544,8 @@ class Operation { llvm::Instruction::Select, T, cmpT, pred, llvm::TargetTransformInfo::TCK_RecipThroughput); } - auto getCastContext(llvm::TargetTransformInfo & /*TTI*/) const + [[nodiscard]] auto + getCastContext(const llvm::TargetTransformInfo & /*TTI*/) const -> llvm::TargetTransformInfo::CastContextHint { if (ins->operandIsLoad() || ins->userIsStore()) return llvm::TargetTransformInfo::CastContextHint::Normal; @@ -521,8 +554,9 @@ class Operation { // TODO: check for whether mask, interleave, or reversed is likely. return llvm::TargetTransformInfo::CastContextHint::None; } - auto calcCastCost(llvm::TargetTransformInfo &TTI, - unsigned int vectorWidth) const -> RecipThroughputLatency { + [[nodiscard]] auto calcCastCost(const llvm::TargetTransformInfo &TTI, + unsigned int vectorWidth) const + -> RecipThroughputLatency { llvm::Type *srcT = cost::getType(getOperand(0)->getType(), vectorWidth), *dstT = getType(vectorWidth); llvm::TargetTransformInfo::CastContextHint ctx = getCastContext(TTI); @@ -533,8 +567,8 @@ class Operation { TTI.getCastInstrCost(idt, dstT, srcT, ctx, llvm::TargetTransformInfo::TCK_Latency)}; } - auto calculateCostFAddFSub(llvm::TargetTransformInfo &TTI, - unsigned int vectorWidth) const + [[nodiscard]] auto calculateCostFAddFSub(const llvm::TargetTransformInfo &TTI, + unsigned int vectorWidth) const -> RecipThroughputLatency { // TODO: allow not assuming hardware FMA support if ((isFMulOrFNegOfFMul(getOperand(0)) || @@ -543,15 +577,15 @@ class Operation { return {}; return calcBinaryArithmeticCost(TTI, vectorWidth); } - auto calculateFNegCost(llvm::TargetTransformInfo &TTI, - unsigned int vectorWidth) const + [[nodiscard]] auto calculateFNegCost(const llvm::TargetTransformInfo &TTI, + unsigned int vectorWidth) const -> RecipThroughputLatency { if (isFMul(getOperand(0)) && ins->allUsersAdditiveContract()) return {}; return calcUnaryArithmeticCost(TTI, vectorWidth); } - [[nodiscard]] auto calcCost(llvm::TargetTransformInfo &TTI, + [[nodiscard]] auto calcCost(const llvm::TargetTransformInfo &TTI, unsigned int vectorWidth) const -> RecipThroughputLatency { switch (getOpCode()) { @@ -647,8 +681,8 @@ class Call { [[nodiscard]] auto getNumOperands() const -> size_t { return ins->getNumOperands(); } - auto calcCallCost(llvm::TargetTransformInfo &TTI, unsigned int vectorWidth) - -> RecipThroughputLatency { + auto calcCallCost(const llvm::TargetTransformInfo &TTI, + unsigned int vectorWidth) -> RecipThroughputLatency { llvm::Type *T = ins->getType(vectorWidth); llvm::SmallVector argTypes; for (auto *op : ins->getOperands()) @@ -663,7 +697,8 @@ class Call { } }; -inline auto Value::getCost(llvm::TargetTransformInfo &TTI, cost::VectorWidth W) +inline auto Value::getCost(const llvm::TargetTransformInfo &TTI, + cost::VectorWidth W) -> cost::RecipThroughputLatency { if (auto *a = llvm::dyn_cast(this)) return a->getCost(TTI, W); invariant(getKind() >= VK_Func); @@ -702,8 +737,8 @@ inline auto Value::getType() const -> llvm::Type * { inline auto Value::getType(unsigned w) const -> llvm::Type * { return cost::getType(getType(), w); } -[[nodiscard]] inline auto Compute::calcCost(llvm::TargetTransformInfo &TTI, - unsigned vectorWidth) +[[nodiscard]] inline auto +Compute::calcCost(const llvm::TargetTransformInfo &TTI, unsigned vectorWidth) -> RecipThroughputLatency { if (auto op = Operation(this)) return op.calcCost(TTI, vectorWidth); if (auto call = Call(this)) return call.calcCallCost(TTI, vectorWidth); @@ -750,9 +785,9 @@ inline auto Value::getType(unsigned w) const -> llvm::Type * { if (const auto *I = llvm::dyn_cast(this)) return I->getNumOperands(); return getKind() == VK_Stow; } -[[nodiscard]] inline auto Value::associativeOperandsFlag() const -> uint8_t { +[[nodiscard]] inline auto Value::commutativeOperandsFlag() const -> uint8_t { if (const auto *I = llvm::dyn_cast(this)) - return I->associativeOperandsFlag(); + return I->commuatativeOperandsFlag(); return 0; } [[nodiscard]] inline auto Value::getNumScalarBits() const -> unsigned int { @@ -770,7 +805,7 @@ inline auto Value::getType(unsigned w) const -> llvm::Type * { if (auto *I = getInstruction()) return I->getParent(); return nullptr; } -[[nodiscard]] constexpr auto Instruction::getIdentifier() const +[[nodiscard]] inline auto Instruction::getIdentifier() const -> Instruction::Identifier { llvm::Intrinsic::ID id; if (const auto *I = llvm::dyn_cast(this)) id = I->getOpId(); @@ -804,4 +839,89 @@ inline void Instruction::setOperands(Arena<> *alloc, // llvm::Intrinsic::IndependentIntrinsics x = llvm::Intrinsic::sqrt; // llvm::Intrinsic::IndependentIntrinsics y = llvm::Intrinsic::sin; +constexpr auto findComp(Addr *src, Compute *dst) -> bool; +// NOLINTNEXTLINE misc-no-recursion +constexpr auto find(Addr *src, Value *op) { + auto *c = llvm::dyn_cast(op); + return c && findComp(src, c); +} + +/// Defined here, because we're using `Compute` +// NOLINTNEXTLINE misc-no-recursion +constexpr auto findComp(Addr *src, Compute *dst) -> bool { + return std::ranges::any_of(dst->getOperands(), [=](Value *op) -> bool { + if (op != src && !find(src, op)) return false; + static_cast(op)->linkReductionDst(dst); + return true; + }); +} +// from dst, search through operands for `src` +// TODO: accumulate latency as we go! +// Maybe store visited, to avoid potentially revisiting? +// NOLINTNEXTLINE misc-no-recursion +constexpr auto findThroughReassociable(Addr *src, Compute *dst) -> unsigned { + invariant(src->isLoad()); + uint32_t reassociable = dst->reassociableArgs(); + // foundflag&1 == found reassociable + // foundflag&2 == found non-reassociable + unsigned foundflag = 0; + for (Value *op : dst->getOperands()) { + auto *c = llvm::dyn_cast(op); + bool found{false}; + if (reassociable & 1) { + if (op == src) { + foundflag |= 1; + found = true; + } else if (c) { + unsigned f = findThroughReassociable(src, c); + if (!f) continue; + foundflag |= f; + found = true; + } + } else if ((op == src) || (c && findComp(src, c))) { + found = true; + foundflag = 0x2; + } + if (found) static_cast(op)->linkReductionDst(dst); + if (foundflag & 2) return 0x2; + reassociable >>= 1; + } + return foundflag; +} + +inline auto Addr::reductionLatency(const llvm::TargetTransformInfo &TTI, + unsigned vectorWidth) + -> llvm::InstructionCost::CostType { + llvm::InstructionCost::CostType latency{0}; + for (Instruction *d = getReductionDst(); d; d = d->getReductionDst()) + if (Compute *c = llvm::dyn_cast(d)) + latency += c->calcCost(TTI, vectorWidth).latency; + return latency; +} + } // namespace poly::IR + +[[nodiscard]] inline auto +ankerl::unordered_dense::hash::operator()( + poly::IR::InstByValue const &x) const noexcept -> uint64_t { + using poly::Hash::combineHash, poly::Hash::getHash, poly::containers::UList, + poly::IR::Value; + uint64_t seed = getHash(x.inst->getKind()); + seed = combineHash(seed, getHash(x.inst->getType())); + seed = combineHash(seed, getHash(x.inst->getOpId())); + if (x.inst->isIncomplete()) + return combineHash(seed, getHash(x.inst->getLLVMInstruction())); + uint8_t assocFlag = x.inst->commuatativeOperandsFlag(); + // combine all operands + size_t offset = 0; + poly::PtrVector operands = x.inst->getOperands(); + if (assocFlag) { + poly::invariant(assocFlag, uint8_t(3)); + // we combine hashes in a commutative way + seed = combineHash(seed, getHash(operands[0]) + getHash(operands[1])); + offset = 2; + } + for (auto B = operands.begin() + offset, E = operands.end(); B != E; ++B) + seed = combineHash(seed, getHash(*B)); + return seed; +} diff --git a/include/IR/InstructionCost.hpp b/include/IR/InstructionCost.hpp index 8198ab7fa..2ce8a9b80 100644 --- a/include/IR/InstructionCost.hpp +++ b/include/IR/InstructionCost.hpp @@ -1,7 +1,8 @@ #pragma once #include #include -#include +#include +#include #include namespace poly::IR::cost { diff --git a/include/IR/Node.hpp b/include/IR/Node.hpp index c824b5f55..2427a9182 100644 --- a/include/IR/Node.hpp +++ b/include/IR/Node.hpp @@ -1,10 +1,12 @@ #pragma once +#include "Alloc/Arena.hpp" #include "Containers/UnrolledList.hpp" #include "IR/InstructionCost.hpp" #include "IR/Users.hpp" +#include "Optimize/Legality.hpp" #include "Polyhedra/Loops.hpp" -#include "Utilities/Allocators.hpp" +#include "Support/Iterators.hpp" #include "Utilities/ListRanges.hpp" #include #include @@ -16,10 +18,12 @@ #include #include #include -#include +namespace poly::poly { +class Dependencies; +} // namespace poly::poly namespace poly::IR { -using utils::NotNull, utils::invariant, utils::Arena, containers::UList; +using utils::Valid, utils::invariant, alloc::Arena, containers::UList; class Loop; /// We take an approach similar to LLVM's RTTI /// however, we want to take advantage of FAMs while having a "hieararchy" @@ -76,7 +80,7 @@ class Loop; /// while (C){ /// // do stuff with `C` /// C = C->getNext() -/// C = (C || llvm::isa(C)) ? C : C->getChild(); +/// C = (!C || llvm::isa(C)) ? C : C->getChild(); /// } /// ``` /// IR types: Loop, Block, Addr, Instr, Consts @@ -87,6 +91,7 @@ class Node { VK_Load, VK_Stow, // used for ordered comparisons; all `Addr` types <= Stow VK_Loop, + VK_Exit, VK_CVal, VK_Cint, VK_Bint, @@ -98,7 +103,7 @@ class Node { }; // we have a private pointer so different types can share - // in manner not exacctly congruent with type hiearchy + // in manner not exacctly congruent with type hierarchy // in particular, `Inst` and `Load` want `User` lists // while `Stow`s do not. // `Addr` is the common load/store subtype @@ -106,19 +111,20 @@ class Node { // but only load to inherit 'hasUsers' and only store to inherit the operand. // `Inst` would also inherit 'hasUsers', but would want a different operands // type. - // Addr has a FAM, so multiple inheritence isn't an option for `Load`/`Stow`, + // Addr has a FAM, so multiple inheritance isn't an option for `Load`/`Stow`, // and we want a common base that we can query to avoid monomorphization. protected: const ValKind kind; + /// The current position, `0` means top level, 1 inside a single loop uint8_t currentDepth{0}; // current depth + /// For an `Addr`, this is the "natural depth" where it would be + /// placed in a loop without dependencies, i.e., the inner mostindex + /// `0` means top level, `1` inside a single loop, etc uint8_t naturalDepth{0}; // original, or, for Addr, `indMat.numCol()` uint8_t visitDepth{255}; uint8_t maxDepth; // memory allocated to support up to this depth bool dependsOnParentLoop_{false}; - // 7 bytes; we have 1 left! - // uint16_t index_; - // uint16_t lowLink_; - // uint16_t bitfield; + uint16_t topologicalIndex{0}; constexpr Node(ValKind kind_) : kind(kind_) {} constexpr Node(ValKind kind_, unsigned depth) @@ -142,6 +148,7 @@ class Node { return visitDepth; } constexpr void clearVisited() { visitDepth = 255; } + /// bool wasVisited(uint8_t d) { return visitDepth == d; } [[nodiscard]] constexpr auto wasVisited(uint8_t d) const -> bool { return visitDepth == d; } @@ -152,7 +159,14 @@ class Node { [[nodiscard]] constexpr auto sameBlock(const Node *other) const -> bool { return other && other->parent == parent && other->child == child; } - + constexpr void setTopIndex(uint16_t idx) { topologicalIndex = idx; } + constexpr auto getTopIndex() const -> uint16_t { return topologicalIndex; } + constexpr auto isAfter(Node *v) const -> bool { + return topologicalIndex > v->getTopIndex(); + } + constexpr auto isBefore(Node *v) const -> bool { + return topologicalIndex < v->getTopIndex(); + } // [[nodiscard]] constexpr auto wasVisited() const -> bool { // return bitfield & 0x1; // } @@ -171,10 +185,10 @@ class Node { // [[nodiscard]] constexpr auto getIndex() const -> unsigned { return index_; // } constexpr void setIndex(unsigned i) { index_ = i; } [[nodiscard]] constexpr auto getKind() const -> ValKind { return kind; } - [[nodiscard]] constexpr auto getCurrentDepth() const -> unsigned { + [[nodiscard]] constexpr auto getCurrentDepth() const -> int { return currentDepth; } - [[nodiscard]] constexpr auto getNaturalDepth() const -> unsigned { + [[nodiscard]] constexpr auto getNaturalDepth() const -> int { return naturalDepth; } @@ -202,7 +216,8 @@ class Node { if (n) n->child = this; return this; } - constexpr void setCurrentDepth(unsigned d) { + constexpr void setCurrentDepth(int d) { + invariant(d >= 0); invariant(d <= std::numeric_limits::max()); currentDepth = d; } @@ -259,6 +274,7 @@ class Node { if (llvm::isa(v)) return VK_Bflt; return VK_CVal; } + /// Iterate through all instructions [[nodiscard]] constexpr auto nodes() noexcept -> utils::ListRange { return utils::ListRange{this, utils::GetNext{}}; @@ -274,21 +290,34 @@ static_assert(sizeof(Node) == 4 * sizeof(Node *) + 8); /// Loop /// parent: outer loop /// child: inner (sub) loop +/// last is the last instruction in the body /// exit is the associated exit block class Loop : public Node { poly::Loop *affineLoop{nullptr}; + Node *last{nullptr}; + /// IDs are in topologically sorted order. + CostModeling::Legality legality{}; + int32_t edgeId{-1}; // edge cycle id + // while `child` points to the first contained instruction, + // `last` points to the last contained instruction, + // and can be used for backwards iteration over the graph. public: - constexpr Loop(unsigned d) : Node(VK_Loop, d) {} + /// Get the IDs for the Dependencies carried by this loop + [[nodiscard]] constexpr auto edges(poly::PtrVector edges) const + -> utils::VForwardRange { + return utils::VForwardRange{edges, edgeId}; + } + constexpr Loop(unsigned d) : Node{VK_Loop, d} {} constexpr Loop(unsigned d, poly::Loop *AL) - : Node(VK_Loop, d), affineLoop(AL) {} + : Node{VK_Loop, d}, affineLoop{AL} {} static constexpr auto classof(const Node *v) -> bool { return v->getKind() == VK_Loop; } /// Get the first subloop. [[nodiscard]] constexpr auto getSubLoop() const -> Loop * { Node *C = getChild(); - C = (C || llvm::isa(C)) ? C : C->getChild(); + C = (!C || llvm::isa(C)) ? C : C->getChild(); return static_cast(C); } /// Return the enclosing, parent loop. @@ -304,12 +333,13 @@ class Loop : public Node { } [[nodiscard]] constexpr auto subLoops() const { return utils::ListRange{getSubLoop(), - [](Loop *L) { return L->getNextLoop(); }}; - } - static constexpr auto create(Arena<> *alloc, poly::Loop *AL, size_t depth) - -> Loop * { - return alloc->create(depth, AL); + [](Loop *L) -> Loop * { return L->getNextLoop(); }}; } + /// getLast() + /// Get the last node in the loop. + /// Useful for iterating backwards. + [[nodiscard]] constexpr auto getLast() const -> Node * { return last; } + constexpr void setLast(Node *n) { last = n; } [[nodiscard]] constexpr auto getLLVMLoop() const -> llvm::Loop * { return affineLoop->getLLVMLoop(); } @@ -323,22 +353,47 @@ class Loop : public Node { } // get the outermost subloop of `this` to which `N` belongs [[nodiscard]] constexpr auto getSubloop(IR::Node *N) -> Loop * { - Loop *L = N->getLoop(); + Loop *L = N->getLoop(), *O; if (L == this) return this; - for (; L;) { - Loop *O = L->getOuterLoop(); + for (; L; L = O) { + O = L->getOuterLoop(); if (O == this) return L; - L = O; } return nullptr; } + [[nodiscard]] constexpr auto getEdge() const -> int32_t { return edgeId; } + constexpr void addEdge(math::MutPtrVector deps, int32_t d) { + invariant(d >= 0); + // [ -1, -1, -1, -1, -1 ] // d = 2, edgeId = -1 + // [ 2, -1, -1, -1, -1 ] // d = 0, edgeId = 2 + // [ 2, -1, -1, -1, 0 ] // d = 4, edgeId = 0 + // now edgeId = 4, and we can follow path 4->0->2 + deps[d] = std::exchange(edgeId, d); + } + constexpr auto getLoopAtDepth(uint8_t d) -> Loop * { + Loop *L = this; + for (uint8_t currDepth = this->currentDepth; currDepth > d; --currDepth) + L = L->getOuterLoop(); + return L; + } + constexpr auto getLegality() -> CostModeling::Legality { return legality; } + inline void setLegality(CostModeling::LoopDepSatisfaction &deps); }; + [[nodiscard]] inline constexpr auto Node::getLoop() const noexcept -> Loop * { - if (!parent) return nullptr; - if (parent->kind != VK_Loop) return nullptr; + if (!parent || (parent->kind != VK_Loop)) return nullptr; return static_cast(parent); } +/// This is used for convenience in top sort, but our canonical IR +/// does not actually contain Exit nodes! +struct Exit : Node { + Exit() : Node(VK_Exit) {} + static constexpr auto classof(const Node *v) -> bool { + return v->getKind() == VK_Exit; + } +}; + class Instruction; class Value : public Node { @@ -381,30 +436,6 @@ class Value : public Node { users.push_back(alloc, I); } constexpr void removeFromUsers(Instruction *I) { users.remove(I); } - // unionPtr methods - // [[nodiscard]] constexpr auto getUsers() const - // -> const UList * { - // invariant(kind == VK_Load || kind >= VK_Func); - // return unionPtr.users; - // } - // [[nodiscard]] constexpr auto getUsers() -> UList * { - // invariant(kind == VK_Load || kind >= VK_Func); - // return unionPtr.users; - // } - // constexpr void setUsers(UList *users) { - // invariant(kind == VK_Load || kind >= VK_Func); - // unionPtr.users = users; - // } - // constexpr void addUser(Arena<> *alloc, Instruction *n) { - // invariant(kind == VK_Load || kind >= VK_Func); - // if (!unionPtr.users) - // unionPtr.users = alloc->create>(n); - // else unionPtr.users = unionPtr.users->pushUnique(alloc, n); - // } - // constexpr void removeFromUsers(Instruction *n) { - // invariant(kind == VK_Load || kind >= VK_Func); - // unionPtr.users->eraseUnordered(n); - // } /// isStore() is true if the address is a store, false if it is a load /// If the memory access is a store, this can still be a reload @@ -417,7 +448,7 @@ class Value : public Node { [[nodiscard]] inline auto getFastMathFlags() const -> llvm::FastMathFlags; /// these methods are overloaded for specific subtypes - inline auto getCost(llvm::TargetTransformInfo &TTI, cost::VectorWidth W) + inline auto getCost(const llvm::TargetTransformInfo &TTI, cost::VectorWidth W) -> cost::RecipThroughputLatency; [[nodiscard]] inline auto getValue() -> llvm::Value *; [[nodiscard]] inline auto getValue() const -> const llvm::Value *; @@ -430,7 +461,7 @@ class Value : public Node { [[nodiscard]] inline auto getNumOperands() const -> unsigned; [[nodiscard]] inline auto getOperand(unsigned) -> Value *; [[nodiscard]] inline auto getOperand(unsigned) const -> const Value *; - [[nodiscard]] inline auto associativeOperandsFlag() const -> uint8_t; + [[nodiscard]] inline auto commutativeOperandsFlag() const -> uint8_t; [[nodiscard]] inline auto getNumScalarBits() const -> unsigned; [[nodiscard]] inline auto getNumScalarBytes() const -> unsigned; [[nodiscard]] inline auto getBasicBlock() -> llvm::BasicBlock *; @@ -449,6 +480,7 @@ class Instruction : public Value { constexpr Instruction(ValKind kind_, unsigned curDepth, unsigned natDepth, unsigned maxDepth_) : Value(kind_, curDepth, natDepth, maxDepth_) {} + Instruction *reductionDst{nullptr}; public: static constexpr auto classof(const Node *v) -> bool { @@ -458,11 +490,17 @@ class Instruction : public Value { llvm::Intrinsic::ID ID; Node::ValKind kind; llvm::Type *type; + constexpr auto operator==(const Identifier &other) const -> bool = default; }; // declarations - [[nodiscard]] constexpr auto getIdentifier() const -> Identifier; + [[nodiscard]] auto getIdentifier() const -> Identifier; inline void setOperands(Arena<> *alloc, math::PtrVector); + constexpr void linkReductionDst(Instruction *op) { reductionDst = op; } + constexpr auto getReductionDst() const -> Instruction * { + return reductionDst; + } }; +static_assert(std::is_copy_assignable_v); /// CVal /// A constant value w/ respect to the loopnest. @@ -487,7 +525,7 @@ class Cnst : public Value { llvm::Type *typ; protected: - constexpr Cnst(ValKind kind, llvm::Type *t) : Value(kind) { typ = t; } + constexpr Cnst(ValKind knd, llvm::Type *t) : Value(knd) { typ = t; } public: static constexpr auto classof(const Node *v) -> bool { @@ -565,7 +603,7 @@ class Bint : public Cnst { const llvm::APInt &val; public: - constexpr Bint(llvm::ConstantInt *v, llvm::Type *t) + Bint(llvm::ConstantInt *v, llvm::Type *t) : Cnst(VK_Bint, t), val(v->getValue()) {} static constexpr auto create(Arena<> *alloc, llvm::ConstantInt *v, llvm::Type *t) -> Bint * { @@ -585,7 +623,7 @@ class Bflt : public Cnst { const llvm::APFloat &val; public: - constexpr Bflt(llvm::ConstantFP *v, llvm::Type *t) + Bflt(llvm::ConstantFP *v, llvm::Type *t) : Cnst(VK_Bflt, t), val(v->getValue()) {} static constexpr auto create(Arena<> *alloc, llvm::ConstantFP *v, llvm::Type *t) -> Bflt * { @@ -606,4 +644,10 @@ class Bflt : public Cnst { return false; } +class Compute; +struct InstByValue { + Compute *inst; + inline auto operator==(InstByValue const &other) const -> bool; +}; + } // namespace poly::IR diff --git a/include/IR/OrthogonalAxes.hpp b/include/IR/OrthogonalAxes.hpp new file mode 100644 index 000000000..297d498f1 --- /dev/null +++ b/include/IR/OrthogonalAxes.hpp @@ -0,0 +1,23 @@ +#pragma once +#ifndef OrthogonalAxes_hpp_INCLUDED +#define OrthogonalAxes_hpp_INCLUDED + +#include +#include + +/// `indep` must be `0` for any `invunrolls` it doesn't depend on +struct OrthogonalAxes { + /// Boolean: Are the axes independent? + uint32_t indep_axes : 1; + /// Bit mask: are the axes contiguous? + uint32_t contig : 31; // max number of dims of 31 + /// Flag indicating whether the axis is independent of loops + /// `1` per independent loops + uint32_t indep; // max loop depth of 32 +}; +static_assert(sizeof(OrthogonalAxes) == 8); +constexpr auto operator==(OrthogonalAxes a, OrthogonalAxes b) -> bool { + return std::bit_cast(a) == std::bit_cast(b); +} + +#endif // OrthogonalAxes_hpp_INCLUDED diff --git a/include/IR/Predicate.hpp b/include/IR/Predicate.hpp index 2c176e657..d290bc9a6 100644 --- a/include/IR/Predicate.hpp +++ b/include/IR/Predicate.hpp @@ -1,9 +1,11 @@ #pragma once +#include "Containers/UnrolledList.hpp" #include "Dicts/BumpVector.hpp" +#include #include -#include #include +#include #include #include #include @@ -15,7 +17,6 @@ #include #include #include -#include namespace poly::IR { @@ -49,18 +50,18 @@ struct Intersection { constexpr Intersection(size_t index, Relation value) : predicates(static_cast(value) << (2 * index)) {} constexpr auto operator[](size_t index) const -> Relation { - assert(index < 32); + invariant(index < 32); return static_cast((predicates >> (2 * (index))) & 3); } void set(size_t index, Relation value) { - assert(index < 32); + invariant(index < 32); index += index; uint64_t maskedOff = predicates & ~(3ULL << (index)); predicates = maskedOff | static_cast(value) << (index); } [[nodiscard]] auto intersect(size_t index, Relation value) const -> Intersection { - assert(index < 32); + invariant(index < 32); index += index; return {predicates | static_cast(value) << (index)}; } @@ -153,7 +154,7 @@ struct Intersection { uint64_t mask = emptyMask(bitUnion); if (std::popcount(mask) == 1) { // a single b & !b case uint64_t remUnionMask = - ~(mask | (mask << 1)); // 0s `b`, meaning b can be either. + ~(mask | (mask << 1)); // 0s `b`, meaning b can be either. uint64_t w = remUnionMask & x; uint64_t z = remUnionMask & y; if (w == z) return {Intersection{w}}; @@ -281,7 +282,7 @@ struct Set { } else { allocated = true; intersectUnion.intersects = - alloc.create>(); + alloc->create>(); if (u.size() == 2) { intersectUnion.intersects->pushHasCapacity(u[0]); intersectUnion.intersects->pushHasCapacity(u[1]); @@ -377,7 +378,7 @@ struct Set { [[nodiscard]] auto operator&=(Set &pred) -> Set & { if (!pred.allocated) return *this &= pred.intersectUnion.intersect; pred.intersectUnion.intersects->forEach( - [&](Intersection pred) { *this &= pred; }); + [&](Intersection prd) { *this &= prd; }); return *this; } auto copy(Arena<> *alloc) const -> Set { diff --git a/include/IR/Users.hpp b/include/IR/Users.hpp index ab67c9cf1..4f542f431 100644 --- a/include/IR/Users.hpp +++ b/include/IR/Users.hpp @@ -1,12 +1,11 @@ #pragma once #include "Utilities/Invariant.hpp" -#include -#include +#include #include namespace poly::IR { -using utils::Arena, utils::invariant; +using alloc::Arena, utils::invariant; class Value; class Instruction; class Addr; diff --git a/include/LinearProgramming/LoopBlock.hpp b/include/LinearProgramming/LoopBlock.hpp index 3dc29e5d1..6910b4322 100644 --- a/include/LinearProgramming/LoopBlock.hpp +++ b/include/LinearProgramming/LoopBlock.hpp @@ -9,6 +9,7 @@ #include "Polyhedra/DependencyPolyhedra.hpp" #include "Polyhedra/Loops.hpp" #include "Polyhedra/Schedule.hpp" +#include #include #include #include @@ -17,12 +18,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include #include @@ -138,8 +137,8 @@ class LoopBlock { // dict::map userToMem{}; // dict::set visited{}; // llvm::LoopInfo *LI; - IR::Dependencies deps; - utils::OwningArena<> allocator{}; + IR::Dependencies &deps; + alloc::Arena<> &allocator; // we may turn off edges because we've exceeded its loop depth // or because the dependence has already been satisfied at an // earlier level. @@ -154,6 +153,9 @@ class LoopBlock { }; public: + constexpr LoopBlock(IR::Dependencies &deps_, alloc::Arena<> &allocator_) + : deps(deps_), allocator(allocator_) {} + struct OptimizationResult { IR::AddrChain addr; ScheduledNode *nodes; @@ -166,7 +168,6 @@ class LoopBlock { } }; - constexpr LoopBlock() = default; [[nodiscard]] auto optimize(IR::Cache &cache, IR::TreeResult tr) -> OptimizationResult { // first, we peel loops for which affine repr failed @@ -194,8 +195,20 @@ class LoopBlock { [[nodiscard]] constexpr auto getAllocator() -> Arena<> * { return &allocator; } + [[nodiscard]] constexpr auto getDependencies() -> IR::Dependencies & { + return deps; + } + [[nodiscard]] constexpr auto getDependencies() const + -> const IR::Dependencies & { + return deps; + } private: + struct LoadSummary { + Value *store; + poly::Loop *deepestLoop; + IR::AddrChain ac; + }; auto addScheduledNode(IR::Cache &cache, IR::Stow stow, IR::AddrChain addr) -> OptimizationResult { // how are we going to handle load duplication? @@ -256,8 +269,7 @@ class LoopBlock { /// // NOLINTNEXTLINE(misc-no-recursion) auto searchOperandsForLoads(IR::Cache &cache, IR::Stow stow, Value *val, - IR::AddrChain addr) - -> std::tuple { + IR::AddrChain addr) -> LoadSummary { auto *inst = llvm::dyn_cast(val); if (!inst) return {val, nullptr, addr}; // we use parent/child relationships here instead of next/prev @@ -267,7 +279,7 @@ class LoopBlock { if (load.getParent() != nullptr) { Arena<> *alloc = cache.getAllocator(); IR::Addr *reload = ((Addr *)load)->reload(alloc); - deps.copyDependencies(alloc, load, reload); + deps.copyDependencies(load, reload); invariant(reload->isLoad()); load = reload; addr.addAddr(reload); @@ -288,7 +300,7 @@ class LoopBlock { Addr *load = deps.reload(&allocator, store); stow.insertAfter(load); // insert load after stow addr.addAddr(load); - return {load, load->getLoop(), addr}; + return {load, load->getAffineLoop(), addr}; } auto *C = llvm::cast(inst); // could not find a load, so now we recurse, searching operands @@ -311,6 +323,9 @@ class LoopBlock { return {val, maxLoop, addr}; } + // We canonicalize offsets from `x[i - 1]` to `x[i]`, but being omega-shifted + // The LP minimizes omegas, which is intended to reduce distances. Thus, we + // want the distances to be reflected in the omegas. void shiftOmega(ScheduledNode *node) { unsigned nLoops = node->getNumLoops(); if (nLoops == 0) return; @@ -319,7 +334,8 @@ class LoopBlock { auto p1 = allocator.checkpoint(); MutSquarePtrMatrix A = math::matrix(&allocator, nLoops + 1); - // BumpPtrVector> omegaOffsets{allocator}; + // BumpPtrVector> + // omegaOffsets{allocator}; // // we check all memory accesses in the node, to see if applying the same // omega offsets can zero dependence offsets. If so, we apply the shift. // we look for offsets, then try and validate that the shift @@ -342,18 +358,18 @@ class LoopBlock { // input and output, no relative shift of shared loops possible // but indices may of course differ. for (ptrdiff_t d = 0; d < E.numRow(); ++d) { - MutPtrVector x = A(rank, _); - x[last] = E(d, 0); + MutPtrVector x = A[rank, _]; + x[last] = E[d, 0]; foundNonZeroOffset |= x[last] != 0; ptrdiff_t j = 0; for (; j < depCommon; ++j) - x[L - j] = E(d, j + numSyms) + E(d, j + numSyms + dep0); + x[L - j] = E[d, j + numSyms] + E[d, j + numSyms + dep0]; if (dep0 != dep1) { ptrdiff_t offset = dep0 > dep1 ? numSyms : numSyms + dep0; - for (; j < depMax; ++j) x[L - j] = E(d, j + offset); + for (; j < depMax; ++j) x[L - j] = E[d, j + offset]; } for (; j < nLoops; ++j) x[L - j] = 0; - rank = math::NormalForm::updateForNewRow(A(_(0, rank + 1), _)); + rank = math::NormalForm::updateForNewRow(A[_(0, rank + 1), _]); } } else { // dep between nodes @@ -361,13 +377,13 @@ class LoopBlock { unsigned offset = dep.isForward() ? numSyms + dep0 : numSyms, numDep = dep.isForward() ? dep1 : dep0; for (ptrdiff_t d = 0; d < E.numRow(); ++d) { - MutPtrVector x = A(rank, _); - x[last] = E(d, 0); + MutPtrVector x = A[rank, _]; + x[last] = E[d, 0]; foundNonZeroOffset |= x[last] != 0; ptrdiff_t j = 0; - for (; j < numDep; ++j) x[L - j] = E(d, j + offset); + for (; j < numDep; ++j) x[L - j] = E[d, j + offset]; for (; j < nLoops; ++j) x[L - j] = 0; - rank = math::NormalForm::updateForNewRow(A(_(0, rank + 1), _)); + rank = math::NormalForm::updateForNewRow(A[_(0, rank + 1), _]); } } } @@ -381,13 +397,13 @@ class LoopBlock { unsigned offset = dep.isForward() ? numSyms : numSyms + dep0, numDep = dep.isForward() ? dep0 : dep1; for (ptrdiff_t d = 0; d < E.numRow(); ++d) { - MutPtrVector x = A(rank, _); - x[last] = E(d, 0); + MutPtrVector x = A[rank, _]; + x[last] = E[d, 0]; foundNonZeroOffset |= x[last] != 0; ptrdiff_t j = 0; - for (; j < numDep; ++j) x[L - j] = E(d, j + offset); + for (; j < numDep; ++j) x[L - j] = E[d, j + offset]; for (; j < nLoops; ++j) x[L - j] = 0; - rank = math::NormalForm::updateForNewRow(A(_(0, rank + 1), _)); + rank = math::NormalForm::updateForNewRow(A[_(0, rank + 1), _]); } } } @@ -397,14 +413,14 @@ class LoopBlock { // matrix A is reasonably diagonalized, should indicate ptrdiff_t c = 0; for (ptrdiff_t r = 0; r < rank; ++r) { - int64_t off = A(r, last); + int64_t off = A[r, last]; if (off == 0) continue; for (; c < nLoops; ++c) { - if (A(r, c) != 0) break; + if (A[r, c] != 0) break; offs[L - c] = 0; } if (c == nLoops) return; - int64_t Arc = A(r, c), x = off / Arc; + int64_t Arc = A[r, c], x = off / Arc; if (x * Arc != off) continue; offs[L - c++] = x; // decrement loop `L-c` by `x` nonZero = true; @@ -433,8 +449,8 @@ class LoopBlock { for (ptrdiff_t l = 0; l < numDep; ++l) { int64_t mlt = offs[l]; if (mlt == 0) continue; - satL(0, _) -= mlt * satL(offset + l, _); - bndL(0, _) -= mlt * bndL(offset + l, _); + satL[0, _] -= mlt * satL[offset + l, _]; + bndL[0, _] -= mlt * bndL[offset + l, _]; } if (!repeat) break; repeat = false; @@ -454,8 +470,8 @@ class LoopBlock { for (size_t l = 0; l < numDep; ++l) { int64_t mlt = offs[l]; if (mlt == 0) continue; - satL(0, _) -= mlt * satL(offset + l, _); - bndL(0, _) -= mlt * bndL(offset + l, _); + satL[0, _] -= mlt * satL[offset + l, _]; + bndL[0, _] -= mlt * bndL[offset + l, _]; } } } @@ -478,7 +494,7 @@ class LoopBlock { continue; ptrdiff_t r = math::NormalForm::rank(indMat); if (r == edge.getInCurrentDepth()) continue; - // TODO handle linearly dependent acceses, filtering them out + // TODO handle linearly dependent accesses, filtering them out if (r != ptrdiff_t(indMat.numRow())) continue; node->schedulePhi(indMat, r); tryOrth = true; @@ -496,25 +512,25 @@ class LoopBlock { return math::SVector{edge.getNumLambda(), edge.getDynSymDim(), edge.getNumConstraints(), 1}; } - static constexpr auto countAuxParamsAndConstraints(IR::Dependencies deps, - ScheduledNode *nodes, - unsigned depth) + static constexpr auto + countAuxParamsAndConstraints(const IR::Dependencies &deps, + ScheduledNode *nodes, int depth) -> math::SVector { math::SVector params{}; assert(allZero(params)); for (ScheduledNode *node : nodes->getVertices()) for (Dependence d : node->inputEdges(deps)) - if (d.isActive(depth)) params += numParams(d); + if (!d.isSat(depth)) params += numParams(d); return params; } - using BackupSchedule = - math::ResizeableView, - unsigned>; - using BackupSat = math::ResizeableView, unsigned>; - using Backup = std::pair; + using BackupSchedule = math::ResizeableView< + containers::Pair, ptrdiff_t>; + using BackupSat = math::ResizeableView, ptrdiff_t>; + using Backup = containers::Pair; - static constexpr auto - setScheduleMemoryOffsets(Dependencies deps, ScheduledNode *nodes, unsigned d) + static constexpr auto setScheduleMemoryOffsets(const Dependencies &deps, + ScheduledNode *nodes, + unsigned d) -> std::array { // C, lambdas, omegas, Phis unsigned numOmegaCoefs = 0, numPhiCoefs = 0, numSlack = 0; @@ -531,8 +547,8 @@ class LoopBlock { } return {numOmegaCoefs, numPhiCoefs, numSlack}; } - static constexpr auto calcCoefs(Dependencies deps, ScheduledNode *nodes, - unsigned d) -> CoefCounts { + static constexpr auto calcCoefs(const Dependencies &deps, + ScheduledNode *nodes, int d) -> CoefCounts { auto [numOmegaCoefs, numPhiCoefs, numSlack] = setScheduleMemoryOffsets(deps, nodes, d); auto [numLambda, numBounding, numConstraints, numActiveEdges] = @@ -542,11 +558,11 @@ class LoopBlock { } // NOLINTNEXTLINE(misc-no-recursion) - [[nodiscard]] auto optimize(ScheduledNode *nodes, unsigned d, - unsigned maxDepth) -> Result { + [[nodiscard]] auto optimize(ScheduledNode *nodes, int d, int maxDepth) + -> Result { if (d >= maxDepth) return Result::independent(); if (Result r = solveGraph(nodes, maxDepth, false)) { - unsigned descend = d + 1; + int descend = d + 1; if (descend == maxDepth) return r; if (Result n = optimize(nodes, descend, maxDepth)) { if ((r == Result::dependent()) && @@ -557,17 +573,17 @@ class LoopBlock { } return breakGraph(nodes, d); } - /// solveGraph(ScheduledNode *nodes, unsigned depth, bool satisfyDeps) + /// solveGraph(ScheduledNode *nodes, int depth, bool satisfyDeps) /// solve the `nodes` graph at depth `d` /// if `satisfyDeps` is true, then we are trying to satisfy dependencies at /// this level /// - [[nodiscard]] auto solveGraph(ScheduledNode *nodes, unsigned depth, + [[nodiscard]] auto solveGraph(ScheduledNode *nodes, int depth, bool satisfyDeps) -> Result { CoefCounts counts{calcCoefs(deps, nodes, depth)}; return solveGraph(nodes, depth, satisfyDeps, counts); } - [[nodiscard]] auto solveGraph(ScheduledNode *nodes, unsigned depth, + [[nodiscard]] auto solveGraph(ScheduledNode *nodes, int depth, bool satisfyDeps, CoefCounts counts) -> Result { if (counts.numLambda == 0) { setSchedulesIndependent(nodes, depth); @@ -587,7 +603,7 @@ class LoopBlock { nodes, depth, counts, sol[_(counts.numPhiCoefs + counts.numOmegaCoefs, end)]); } - void setSchedulesIndependent(ScheduledNode *nodes, unsigned depth) { + void setSchedulesIndependent(ScheduledNode *nodes, int depth) { // IntMatrix A, N; for (ScheduledNode *node : nodes->getVertices()) { if ((depth >= node->getNumLoops()) || node->phiIsScheduled(depth)) @@ -596,7 +612,7 @@ class LoopBlock { setDepFreeSchedule(node, depth); } } - static void setDepFreeSchedule(ScheduledNode *node, unsigned depth) { + static void setDepFreeSchedule(ScheduledNode *node, int depth) { node->getOffsetOmega(depth) = 0; if (node->phiIsScheduled(depth)) return; // we'll check the null space of the phi's so far @@ -610,38 +626,39 @@ class LoopBlock { } // auto s = allocator->scope(); // TODO: use bumpalloc DenseMatrix nullSpace; // d x lfull - DenseMatrix A{node->getPhi()(_(0, depth), _).transpose()}; + DenseMatrix A{node->getPhi()[_(0, depth), _].t()}; math::NormalForm::nullSpace11(nullSpace, A); - invariant(unsigned(nullSpace.numRow()), node->getNumLoops() - depth); + invariant(ptrdiff_t(nullSpace.numRow()), + ptrdiff_t(node->getNumLoops()) - depth); // Now, we search index matrices for schedules not in the null space of // existing phi. This is because we're looking to orthogonalize a // memory access if possible, rather than setting a schedule // arbitrarily. // Here, we collect candidates for the next schedule DenseMatrix candidates{ - math::DenseDims{0, node->getNumLoops() + 1}}; + math::DenseDims<>{{0}, {node->getNumLoops() + 1}}}; Vector indv; indv.resizeForOverwrite(node->getNumLoops()); for (Addr *mem : node->localAddr()) { PtrMatrix indMat = mem->indexMatrix(); // lsub x d A.resizeForOverwrite( math::DenseDims{nullSpace.numRow(), indMat.numCol()}); - A = nullSpace(_, _(0, indMat.numRow())) * indMat; + A = nullSpace[_, _(0, indMat.numRow())] * indMat; // we search A for rows that aren't all zero for (ptrdiff_t d = 0; d < A.numCol(); ++d) { - if (allZero(A(_, d))) continue; - indv << indMat(_, d); + if (allZero(A[_, d])) continue; + indv << indMat[_, d]; bool found = false; for (ptrdiff_t j = 0; j < candidates.numRow(); ++j) { - if (candidates(j, _(0, last)) != indv) continue; + if (candidates[j, _(0, last)] != indv) continue; found = true; - ++candidates(j, 0); + ++candidates[j, 0]; break; } if (!found) { - candidates.resize(candidates.numRow() + 1); - assert(candidates(last, 0) == 0); - candidates(last, _(1, end)) << indv; + candidates.resize(++auto{candidates.numRow()}); + assert((candidates[last, 0]) == 0); + candidates[last, _(1, end)] << indv; } } } @@ -650,21 +667,21 @@ class LoopBlock { // number of repetitions (which were placed in first index) ptrdiff_t i = 0; for (ptrdiff_t j = 1; j < candidates.numRow(); ++j) - if (candidates(j, _) > candidates(i, _)) i = j; - node->getSchedule(depth) << candidates(i, _(1, end)); + if (candidates[j, _] > candidates[i, _]) i = j; + node->getSchedule(depth) << candidates[i, _(1, end)]; return; } // do we want to pick the outermost original loop, // or do we want to pick the outermost lex null space? node->getSchedule(depth) << 0; for (ptrdiff_t c = 0; c < nullSpace.numCol(); ++c) { - if (allZero(nullSpace(_, c))) continue; + if (allZero(nullSpace[_, c])) continue; node->getSchedule(depth)[c] = 1; return; } invariant(false); } - void updateSchedules(ScheduledNode *nodes, unsigned depth, CoefCounts counts, + void updateSchedules(ScheduledNode *nodes, int depth, CoefCounts counts, Simplex::Solution sol) { #ifndef NDEBUG if (counts.numPhiCoefs > 0) @@ -705,14 +722,14 @@ class LoopBlock { if (!node->phiIsScheduled(depth)) { int64_t l = sol[node->getPhiOffsetRange() + o].denomLCM(); for (ptrdiff_t i = 0; i < node->getPhi().numCol(); ++i) - assert(node->getPhi()(depth, i) == + assert((node->getPhi()[depth, i]) == sol[node->getPhiOffsetRange() + o][i] * l); } #endif } } - [[nodiscard]] auto deactivateSatisfiedEdges(ScheduledNode *nodes, - unsigned depth, CoefCounts counts, + [[nodiscard]] auto deactivateSatisfiedEdges(ScheduledNode *nodes, int depth, + CoefCounts counts, Simplex::Solution sol) -> Result { if (allZero(sol[_(begin, counts.numBounding + counts.numActiveEdges)])) return checkEmptySatEdges(nodes, depth); @@ -734,15 +751,15 @@ class LoopBlock { for (ScheduledNode *outNode : nodes->getVertices()) { for (Dependence edge : outNode->inputEdges(deps)) { if (edge.isInactive(depth)) continue; - Col uu = u + edge.getNumDynamicBoundingVar(); + ptrdiff_t uu = u + edge.getNumDynamicBoundingVar(); if ((sol[w++] != 0) || (anyNEZero(sol[_(u, uu)]))) { edge.setSatLevelLP(depth); result = Result::dependent(); } else { ScheduledNode *inNode = edge.input()->getNode(); - DensePtrMatrix inPhi = inNode->getPhi()(_(0, depth + 1), _), + DensePtrMatrix inPhi = inNode->getPhi()[_(0, depth + 1), _], outPhi = - outNode->getPhi()(_(0, depth + 1), _); + outNode->getPhi()[_(0, depth + 1), _]; edge.checkEmptySat(&allocator, inNode->getLoopNest(), inNode->getOffset(), inPhi, outNode->getLoopNest(), outNode->getOffset(), outPhi); @@ -752,14 +769,14 @@ class LoopBlock { } return result; } - auto checkEmptySatEdges(ScheduledNode *nodes, unsigned depth) -> Result { + auto checkEmptySatEdges(ScheduledNode *nodes, int depth) -> Result { for (ScheduledNode *outNode : nodes->getVertices()) { for (Dependence edge : outNode->inputEdges(deps)) { if (edge.isSat(depth)) continue; ScheduledNode *inNode = edge.input()->getNode(); invariant(edge.output()->getNode(), outNode); - DensePtrMatrix inPhi = inNode->getPhi()(_(0, depth + 1), _), - outPhi = outNode->getPhi()(_(0, depth + 1), _); + DensePtrMatrix inPhi = inNode->getPhi()[_(0, depth + 1), _], + outPhi = outNode->getPhi()[_(0, depth + 1), _]; edge.checkEmptySat(&allocator, inNode->getLoopNest(), inNode->getOffset(), inPhi, outNode->getLoopNest(), outNode->getOffset(), outPhi); @@ -810,8 +827,8 @@ class LoopBlock { deps.satLevelPair(Dependence::ID{dID}) = sat[i++]; } // NOLINTNEXTLINE(misc-no-recursion) - [[nodiscard]] auto optimizeSatDep(ScheduledNode *nodes, unsigned depth, - unsigned maxDepth, Result backupResult) + [[nodiscard]] auto optimizeSatDep(ScheduledNode *nodes, int depth, + int maxDepth, Result backupResult) -> Result { // if we're here, there are satisfied deps in both // depSatLevel and depSatNest @@ -830,7 +847,7 @@ class LoopBlock { return backupResult; } // NOLINTNEXTLINE(misc-no-recursion) - auto tryFuse(ScheduledNode *n0, ScheduledNode *n1, unsigned depth) -> Result { + auto tryFuse(ScheduledNode *n0, ScheduledNode *n1, int depth) -> Result { auto s = allocator.scope(); auto old0 = stashFit(n0); // FIXME: stash dep sat level auto old1 = stashFit(n1); // FIXME: stash dep sat level @@ -842,7 +859,7 @@ class LoopBlock { popStash(old1); return Result::failure(); } - auto satisfySplitEdges(ScheduledNode *nodes, unsigned depth) -> Result { + auto satisfySplitEdges(ScheduledNode *nodes, int depth) -> Result { auto s = allocator.scope(); dict::aset graph{&allocator}; for (ScheduledNode *node : nodes->getVertices()) graph.insert(node); @@ -857,18 +874,18 @@ class LoopBlock { } return (found) ? Result::dependent() : Result::independent(); } - auto solveSplitGraph(ScheduledNode *nodes, unsigned depth) -> Result { + auto solveSplitGraph(ScheduledNode *nodes, int depth) -> Result { Result sat = satisfySplitEdges(nodes, depth); Result opt = solveGraph(nodes, depth, false, calcCoefs(deps, nodes, depth)); if (!opt) return opt; return opt & sat; } // NOLINTNEXTLINE(misc-no-recursion) - [[nodiscard]] auto breakGraph(ScheduledNode *node, unsigned d) -> Result { + [[nodiscard]] auto breakGraph(ScheduledNode *node, int d) -> Result { // Get a top sorting of SCC's; because we couldn't solve the graph // with these dependencies fused, we'll try splitting them. ScheduledNode *components = - graph::stronglyConnectedComponents(ScheduleGraph(d), node); + graph::stronglyConnectedComponents(ScheduleGraph(deps, d), node); if (components->getNextComponent() == nullptr) return {}; // components are sorted in topological order. // We split all of them, solve independently, @@ -917,9 +934,8 @@ class LoopBlock { /// Phis: scheduling rotations /// w: bounding offsets, independent of symbolic variables /// u: bounding offsets, dependent on symbolic variables - auto instantiateOmniSimplex(ScheduledNode *nodes, unsigned d, - bool satisfyDeps, CoefCounts counts) - -> std::unique_ptr { + auto instantiateOmniSimplex(ScheduledNode *nodes, int d, bool satisfyDeps, + CoefCounts counts) -> std::unique_ptr { auto [numOmegaCoefs, numPhiCoefs, numSlack, numLambda, numBounding, numConstraints, numActiveEdges] = counts; auto omniSimplex = Simplex::create( @@ -933,9 +949,9 @@ class LoopBlock { // rows give constraints; each edge gets its own // numBounding = num u // numActiveEdges = num w - Row c = 0; - Col l = 1, o = 1 + numLambda + numSlack, p = o + numOmegaCoefs, - w = p + numPhiCoefs, u = w + numActiveEdges; + ptrdiff_t c = 0; + ptrdiff_t l = 1, o = 1 + numLambda + numSlack, p = o + numOmegaCoefs, + w = p + numPhiCoefs, u = w + numActiveEdges; for (ScheduledNode *inNode : nodes->getVertices()) { for (Dependence edge : inNode->outputEdges(deps, d)) { ScheduledNode *outNode = edge.output()->getNode(); @@ -948,26 +964,27 @@ class LoopBlock { bndO{edge.getBndOmegaCoefs()}, bndWU{edge.getBndCoefs()}; const ptrdiff_t numSatConstraints = satC.size(), numBndConstraints = bndC.size(); - const Col nPc = satPc.numCol(), nPp = satPp.numCol(); - invariant(nPc, bndPc.numCol()); - invariant(nPp, bndPp.numCol()); - Row cc = c + numSatConstraints; - Row ccc = cc + numBndConstraints; + const ptrdiff_t nPc = ptrdiff_t(satPc.numCol()), + nPp = ptrdiff_t(satPp.numCol()); + invariant(nPc, ptrdiff_t(bndPc.numCol())); + invariant(nPp, ptrdiff_t(bndPp.numCol())); + ptrdiff_t cc = c + numSatConstraints; + ptrdiff_t ccc = cc + numBndConstraints; - Col ll = l + satL.numCol(); - Col lll = ll + bndL.numCol(); - C(_(c, cc), _(l, ll)) << satL; - C(_(cc, ccc), _(ll, lll)) << bndL; + ptrdiff_t ll = l + ptrdiff_t(satL.numCol()); + ptrdiff_t lll = ll + ptrdiff_t(bndL.numCol()); + C[_(c, cc), _(l, ll)] << satL; + C[_(cc, ccc), _(ll, lll)] << bndL; l = lll; // bounding - C(_(cc, ccc), w++) << bndWU(_, 0); - Col uu = u + bndWU.numCol() - 1; - C(_(cc, ccc), _(u, uu)) << bndWU(_, _(1, end)); + C[_(cc, ccc), w++] << bndWU[_, 0]; + ptrdiff_t uu = u + ptrdiff_t(bndWU.numCol()) - 1; + C[_(cc, ccc), _(u, uu)] << bndWU[_, _(1, end)]; u = uu; if (!satisfyDeps || !edge.stashedPreventsReordering(d)) - C(_(c, cc), 0) << satC; - else C(_(c, cc), 0) << satC + satW; - C(_(cc, ccc), 0) << bndC; + C[_(c, cc), 0] << satC; + else C[_(c, cc), 0] << satC + satW; + C[_(cc, ccc), 0] << bndC; // now, handle Phi and Omega // phis are not constrained to be 0 if (outNode == inNode) { @@ -976,17 +993,17 @@ class LoopBlock { if (outNode->phiIsScheduled(d)) { // add it constants auto sch = outNode->getSchedule(d); - C(_(c, cc), 0) -= - satPc * sch[_(0, nPc)] + satPp * sch[_(0, nPp)]; - C(_(cc, ccc), 0) -= - bndPc * sch[_(0, nPc)] + bndPp * sch[_(0, nPp)]; + C[_(c, cc), 0] -= + satPc * sch[_(0, nPc)].t() + satPp * sch[_(0, nPp)].t(); + C[_(cc, ccc), 0] -= + bndPc * sch[_(0, nPc)].t() + bndPp * sch[_(0, nPp)].t(); } else { // FIXME: phiChild = [14:18), 4 cols // while Dependence seems to indicate 2 // loops why the disagreement? auto po = outNode->getPhiOffset() + p; - C(_(c, cc), _(po, po + nPc)) << satPc + satPp; - C(_(cc, ccc), _(po, po + nPc)) << bndPc + bndPp; + C[_(c, cc), _(po, po + nPc)] << satPc + satPp; + C[_(cc, ccc), _(po, po + nPc)] << bndPc + bndPp; } } else if (outNode->phiIsScheduled(d)) { // add it constants @@ -994,30 +1011,30 @@ class LoopBlock { // inner -> outer // so we need to drop inner most if one has less auto sch = outNode->getSchedule(d); - auto schP = sch[_(0, nPp)]; - auto schC = sch[_(0, nPc)]; - C(_(c, cc), 0) -= satPc * schC + satPp * schP; - C(_(cc, ccc), 0) -= bndPc * schC + bndPp * schP; + auto schP = sch[_(0, nPp)].t(); + auto schC = sch[_(0, nPc)].t(); + C[_(c, cc), 0] -= satPc * schC + satPp * schP; + C[_(cc, ccc), 0] -= bndPc * schC + bndPp * schP; } else if (nPc < nPp) { // Pp has more cols, so outer/leftmost overlap auto po = outNode->getPhiOffset() + p, poc = po + nPc, pop = po + nPp; - C(_(c, cc), _(po, poc)) << satPc + satPp(_, _(0, nPc)); - C(_(cc, ccc), _(po, poc)) << bndPc + bndPp(_, _(0, nPc)); - C(_(c, cc), _(poc, pop)) << satPp(_, _(nPc, end)); - C(_(cc, ccc), _(poc, pop)) << bndPp(_, _(nPc, end)); + C[_(c, cc), _(po, poc)] << satPc + satPp[_, _(0, nPc)]; + C[_(cc, ccc), _(po, poc)] << bndPc + bndPp[_, _(0, nPc)]; + C[_(c, cc), _(poc, pop)] << satPp[_, _(nPc, end)]; + C[_(cc, ccc), _(poc, pop)] << bndPp[_, _(nPc, end)]; } else /* if (nPc > nPp) */ { auto po = outNode->getPhiOffset() + p, poc = po + nPc, pop = po + nPp; - C(_(c, cc), _(po, pop)) << satPc(_, _(0, nPp)) + satPp; - C(_(cc, ccc), _(po, pop)) << bndPc(_, _(0, nPp)) + bndPp; - C(_(c, cc), _(pop, poc)) << satPc(_, _(nPp, end)); - C(_(cc, ccc), _(pop, poc)) << bndPc(_, _(nPp, end)); + C[_(c, cc), _(po, pop)] << satPc[_, _(0, nPp)] + satPp; + C[_(cc, ccc), _(po, pop)] << bndPc[_, _(0, nPp)] + bndPp; + C[_(c, cc), _(pop, poc)] << satPc[_, _(nPp, end)]; + C[_(cc, ccc), _(pop, poc)] << bndPc[_, _(nPp, end)]; } - C(_(c, cc), outNode->getOmegaOffset() + o) - << satO(_, 0) + satO(_, 1); - C(_(cc, ccc), outNode->getOmegaOffset() + o) - << bndO(_, 0) + bndO(_, 1); + C[_(c, cc), outNode->getOmegaOffset() + o] + << satO[_, 0] + satO[_, 1]; + C[_(cc, ccc), outNode->getOmegaOffset() + o] + << bndO[_, 0] + bndO[_, 1]; } } else { if (d < edge.getOutCurrentDepth()) @@ -1033,16 +1050,16 @@ class LoopBlock { if (d < edge.getOutCurrentDepth()) { if (d < edge.getInCurrentDepth()) invariant(inNode->getOmegaOffset() != outNode->getOmegaOffset()); - C(_(c, cc), outNode->getOmegaOffset() + o) - << satO(_, edge.isForward()); - C(_(cc, ccc), outNode->getOmegaOffset() + o) - << bndO(_, edge.isForward()); + C[_(c, cc), outNode->getOmegaOffset() + o] + << satO[_, edge.isForward()]; + C[_(cc, ccc), outNode->getOmegaOffset() + o] + << bndO[_, edge.isForward()]; } if (d < edge.getInCurrentDepth()) { - C(_(c, cc), inNode->getOmegaOffset() + o) - << satO(_, !edge.isForward()); - C(_(cc, ccc), inNode->getOmegaOffset() + o) - << bndO(_, !edge.isForward()); + C[_(c, cc), inNode->getOmegaOffset() + o] + << satO[_, !edge.isForward()]; + C[_(cc, ccc), inNode->getOmegaOffset() + o] + << bndO[_, !edge.isForward()]; } } c = ccc; @@ -1056,25 +1073,26 @@ class LoopBlock { static void updateConstraints(MutPtrMatrix C, const ScheduledNode *node, PtrMatrix sat, PtrMatrix bnd, - unsigned d, Row c, Row cc, Row ccc, Col p) { + unsigned d, ptrdiff_t c, ptrdiff_t cc, + ptrdiff_t ccc, ptrdiff_t p) { invariant(sat.numCol(), bnd.numCol()); if (node->phiIsScheduled(d)) { // add it constants - auto sch = node->getSchedule(d)[_(0, sat.numCol())]; + auto sch = node->getSchedule(d)[_(0, sat.numCol())].t(); // order is inner <-> outer // so we need the end of schedule if it is larger - C(_(c, cc), 0) -= sat * sch; - C(_(cc, ccc), 0) -= bnd * sch; + C[_(c, cc), 0] -= sat * sch; + C[_(cc, ccc), 0] -= bnd * sch; } else { // add it to C auto po = node->getPhiOffset() + p; - C(_(c, cc), _(po, po + sat.numCol())) << sat; - C(_(cc, ccc), _(po, po + bnd.numCol())) << bnd; + C[_(c, cc), _(po, po + ptrdiff_t(sat.numCol()))] << sat; + C[_(cc, ccc), _(po, po + ptrdiff_t(bnd.numCol()))] << bnd; } } - void addIndependentSolutionConstraints(NotNull omniSimplex, - const ScheduledNode *nodes, unsigned d, - CoefCounts counts) { + void addIndependentSolutionConstraints(Valid omniSimplex, + const ScheduledNode *nodes, + ptrdiff_t d, CoefCounts counts) { // omniSimplex->setNumCons(omniSimplex->getNumCons() + // memory.size()); // omniSimplex->reserveExtraRows(memory.size()); @@ -1086,9 +1104,9 @@ class LoopBlock { for (const ScheduledNode *node : nodes->getVertices()) { if (node->phiIsScheduled(d) || (!node->hasActiveEdges(deps, d))) continue; - C(i, 0) = 1; - C(i, node->getPhiOffsetRange() + o) << 1; - C(i++, ++s) = -1; // for >= + C[i, 0] = 1; + C[i, node->getPhiOffsetRange() + o] << 1; + C[i++, ++s] = -1; // for >= } } else { DenseMatrix A, N; @@ -1096,21 +1114,22 @@ class LoopBlock { if (node->phiIsScheduled(d) || (d >= node->getNumLoops()) || (!node->hasActiveEdges(deps, d))) continue; - A.resizeForOverwrite(Row{ptrdiff_t(node->getPhi().numCol())}, Col{d}); - A << node->getPhi()(_(0, d), _).transpose(); + A.resizeForOverwrite(Row<>{ptrdiff_t(node->getPhi().numCol())}, + Col<>{d}); + A << node->getPhi()[_(0, d), _].t(); math::NormalForm::nullSpace11(N, A); // we add sum(NullSpace,dims=1) >= 1 // via 1 = sum(NullSpace,dims=1) - s, s >= 0 - C(i, 0) = 1; - MutPtrVector cc{C(i, node->getPhiOffsetRange() + o)}; + C[i, 0] = 1; + MutPtrVector cc{C[i, node->getPhiOffsetRange() + o]}; // sum(N,dims=1) >= 1 after flipping row signs to be lex > 0 for (ptrdiff_t m = 0; m < N.numRow(); ++m) - cc += N(m, _) * lexSign(N(m, _)); - C(i++, ++s) = -1; // for >= + cc += N[m, _] * lexSign(N[m, _]); + C[i++, ++s] = -1; // for >= } } invariant(ptrdiff_t(omniSimplex->getNumCons()), i); - assert(!allZero(omniSimplex->getConstraints()(last, _))); + assert(!allZero(omniSimplex->getConstraints()[last, _])); } [[nodiscard]] static constexpr auto lexSign(PtrVector x) -> int64_t { for (auto a : x) @@ -1118,7 +1137,6 @@ class LoopBlock { invariant(false); return 0; } - // // // @@ -1143,10 +1161,11 @@ class LoopBlock { return os; } }; -inline auto operator<<(llvm::raw_ostream &os, - std::pair nodesdeps) +inline auto +operator<<(llvm::raw_ostream &os, + containers::Pair nodesdeps) -> llvm::raw_ostream & { - auto [nodes, deps] = nodesdeps; + const auto &[nodes, deps] = nodesdeps; os << "\nLoopBlock graph:\n"; size_t i = 0; for (ScheduledNode *v : nodes->getVertices()) { @@ -1158,7 +1177,7 @@ inline auto operator<<(llvm::raw_ostream &os, os << "\nLoopBlock Edges:"; for (ScheduledNode *inNode : nodes->getVertices()) { poly::AffineSchedule sin = inNode->getSchedule(); - for (Dependence edge : nodes->outputEdges(deps)) { + for (Dependence edge : nodes->outputEdges(*deps)) { os << "\n\n\tEdge = " << edge; ScheduledNode *outNode = edge.output()->getNode(); os << "Schedule In: s.getPhi() =" << sin.getPhi() diff --git a/include/LinearProgramming/ScheduledNode.hpp b/include/LinearProgramming/ScheduledNode.hpp index 9f89d9c63..c851f0d24 100644 --- a/include/LinearProgramming/ScheduledNode.hpp +++ b/include/LinearProgramming/ScheduledNode.hpp @@ -8,6 +8,7 @@ #include "Utilities/ListRanges.hpp" #include #include +#include #include #include #include @@ -19,7 +20,7 @@ using math::PtrVector, math::MutPtrVector, math::DensePtrMatrix, math::MutDensePtrMatrix, math::SquarePtrMatrix, math::MutSquarePtrMatrix, math::end, math::last, math::_, math::Simplex; using poly::Dependence, poly::DepPoly; -using utils::NotNull, utils::invariant, utils::Optional, utils::Arena; +using utils::Valid, utils::invariant, utils::Optional, alloc::Arena; /// ScheduledNode /// Represents a set of memory accesses that are optimized together in the LP. @@ -32,8 +33,8 @@ using utils::NotNull, utils::invariant, utils::Optional, utils::Arena; /// class ScheduledNode { - NotNull store; // linked list to loads, iterate over getChild - NotNull loopNest; + Valid store; // linked list to loads, iterate over getChild + Valid loopNest; ScheduledNode *next{nullptr}; ScheduledNode *component{nullptr}; // SCC cycle, or last node in a chain // Dependence *dep{nullptr}; // input edges (points to parents) @@ -62,8 +63,8 @@ class ScheduledNode { auto L = getNumLoops(); return L * L; } - constexpr ScheduledNode(Addr *store, poly::Loop *L) - : store(store), loopNest(L) { + constexpr ScheduledNode(Addr *write, poly::Loop *L) + : store(write), loopNest(L) { mem[0] = L->getNumLoops(); getFusionOmega() << 0; } @@ -122,10 +123,10 @@ class ScheduledNode { } constexpr void setOffsets(int64_t *o) { offsets = o; } struct NextAddr { - constexpr auto operator()(Addr *a) const -> Addr * { + auto operator()(Addr *a) const -> Addr * { return llvm::cast_or_null(a->getNext()); } - constexpr auto operator()(const Addr *a) const -> const Addr * { + auto operator()(const Addr *a) const -> const Addr * { return llvm::cast_or_null(a->getNext()); } }; @@ -192,12 +193,12 @@ class ScheduledNode { } }; template struct Deps { - poly::Dependencies dep; + const poly::Dependencies *dep; constexpr auto operator()(int32_t id) const { if constexpr (Out) - return dep.outputEdgeIDs(id) | std::views::transform(OutNode{dep}); - else return dep.inputEdgeIDs(id) | std::views::transform(InNode{dep}); + return dep->outputEdgeIDs(id) | std::views::transform(OutNode{dep}); + else return dep->inputEdgeIDs(id) | std::views::transform(InNode{dep}); } constexpr auto operator()(IR::Addr *a) const { if constexpr (Out) return (*this)(a->getEdgeOut()); @@ -205,11 +206,11 @@ class ScheduledNode { } }; template struct DepIDs { - poly::Dependencies dep; + const poly::Dependencies *dep; constexpr auto operator()(int32_t id) const { - if constexpr (Out) return dep.outputEdgeIDs(id); - else return dep.inputEdgeIDs(id); + if constexpr (Out) return dep->outputEdgeIDs(id); + else return dep->inputEdgeIDs(id); } constexpr auto operator()(IR::Addr *a) const { if constexpr (Out) return (*this)(a->getEdgeOut()); @@ -217,15 +218,15 @@ class ScheduledNode { } }; template struct DepFilter { - poly::Dependencies dep; + const poly::Dependencies *dep; unsigned depth; constexpr auto operator()(int32_t id) const { if constexpr (Out) - return dep.outputEdgeIDs(id) | dep.activeFilter(depth) | + return dep->outputEdgeIDs(id) | dep->activeFilter(depth) | std::views::transform(OutNode{dep}); else - return dep.inputEdgeIDs(id) | dep.activeFilter(depth) | + return dep->inputEdgeIDs(id) | dep->activeFilter(depth) | std::views::transform(InNode{dep}); } constexpr auto operator()(IR::Addr *a) const { @@ -236,31 +237,31 @@ class ScheduledNode { // all nodes that are memory inputs to this one; i.e. all parents // NOTE: we may reach each node multiple times - [[nodiscard]] inline auto inNeighbors(IR::Dependencies dep) { + [[nodiscard]] inline auto inNeighbors(const IR::Dependencies &dep) { return utils::NestedList{utils::ListRange{store, NextAddr{}}, - Deps{dep}}; + Deps{&dep}}; } // all nodes that are memory inputs to this one; i.e. all parents // NOTE: we may reach each node multiple times // all nodes that are memory outputs of this one; i.e. all children // NOTE: we may reach each node multiple times - [[nodiscard]] inline auto outNeighbors(IR::Dependencies dep) { + [[nodiscard]] inline auto outNeighbors(const IR::Dependencies &dep) { return utils::NestedList{utils::ListRange{store, NextAddr{}}, - Deps{dep}}; + Deps{&dep}}; } - [[nodiscard]] inline auto inputEdgeIds(IR::Dependencies dep) const { + [[nodiscard]] inline auto inputEdgeIds(const IR::Dependencies &dep) const { return utils::NestedList{utils::ListRange{store, NextAddr{}}, - DepIDs{dep}}; + DepIDs{&dep}}; } - [[nodiscard]] inline auto outputEdgeIds(IR::Dependencies dep) const { + [[nodiscard]] inline auto outputEdgeIds(const IR::Dependencies &dep) const { return utils::NestedList{utils::ListRange{store, NextAddr{}}, - DepIDs{dep}}; + DepIDs{&dep}}; } - [[nodiscard]] inline auto inputEdgeIds(IR::Dependencies dep, - unsigned depth) const { + [[nodiscard]] inline auto inputEdgeIds(const IR::Dependencies &dep, + int depth) const { static_assert(std::forward_iterator< - decltype(DepIDs{dep}((IR::Addr *)nullptr).begin())>); + decltype(DepIDs{&dep}((IR::Addr *)nullptr).begin())>); static_assert(std::forward_iterator); static_assert(std::forward_iterator); @@ -268,90 +269,89 @@ class ScheduledNode { return inputEdgeIds(dep) | dep.activeFilter(depth); } [[nodiscard]] inline auto outputEdgeIds(IR::Dependencies dep, - unsigned depth) const { + int depth) const { static_assert(std::forward_iterator); static_assert(std::ranges::range); return outputEdgeIds(dep) | dep.activeFilter(depth); } - [[nodiscard]] inline auto inputEdges(IR::Dependencies dep) { + [[nodiscard]] inline auto inputEdges(const IR::Dependencies &dep) { return utils::NestedList{ utils::ListRange{store, NextAddr{}, [](Addr *a) -> int32_t { return a->getEdgeIn(); }}, - [=](int32_t id) { + [&](int32_t id) { return dep.inputEdgeIDs(id) | - std::views::transform([=](int32_t i) -> Dependence { - IR::Dependencies d2 = dep; - return d2.get(Dependence::ID{i}); + std::views::transform([&](int32_t i) -> Dependence { + return dep.get(Dependence::ID{i}); }); }}; } - [[nodiscard]] inline auto outputEdges(IR::Dependencies dep) { + [[nodiscard]] inline auto outputEdges(const IR::Dependencies &dep) { return utils::NestedList{ utils::ListRange{store, NextAddr{}, [](Addr *a) -> int32_t { return a->getEdgeOut(); }}, - [=](int32_t id) { + [&](int32_t id) { return dep.outputEdgeIDs(id) | - std::views::transform([=](int32_t i) -> Dependence { - IR::Dependencies d2 = dep; - return d2.get(Dependence::ID{i}); + std::views::transform([&](int32_t i) -> Dependence { + return dep.get(Dependence::ID{i}); }); }}; } - [[nodiscard]] inline auto inputEdges(IR::Dependencies dep, unsigned depth) { + [[nodiscard]] inline auto inputEdges(const IR::Dependencies &dep, int depth) { return utils::NestedList{ utils::ListRange{store, NextAddr{}, [](Addr *a) -> int32_t { return a->getEdgeIn(); }}, - [=](int32_t id) { + [&](int32_t id) { return dep.inputEdgeIDs(id) | dep.activeFilter(depth) | - std::views::transform([=](int32_t i) -> Dependence { - IR::Dependencies d2 = dep; - return d2.get(Dependence::ID{i}); + std::views::transform([&](int32_t i) -> Dependence { + return dep.get(Dependence::ID{i}); }); }}; } - [[nodiscard]] inline auto outputEdges(IR::Dependencies dep, unsigned depth) { + [[nodiscard]] inline auto outputEdges(const IR::Dependencies &dep, + int depth) { return utils::NestedList{ utils::ListRange{store, NextAddr{}, [](Addr *a) -> int32_t { return a->getEdgeOut(); }}, - [=](int32_t id) { + [&](int32_t id) { return dep.outputEdgeIDs(id) | dep.activeFilter(depth) | - std::views::transform([=](int32_t i) -> Dependence { - IR::Dependencies d2 = dep; - return d2.get(Dependence::ID{i}); + std::views::transform([&](int32_t i) -> Dependence { + return dep.get(Dependence::ID{i}); }); }}; } struct InNode { - poly::Dependencies dep; - constexpr auto operator()(int32_t id) -> ScheduledNode * { - return dep.input(Dependence::ID{id})->getNode(); + const poly::Dependencies *dep; + constexpr auto operator()(int32_t id) const -> ScheduledNode * { + return dep->input(Dependence::ID{id})->getNode(); } }; struct OutNode { - poly::Dependencies dep; - constexpr auto operator()(int32_t id) -> ScheduledNode * { - return dep.output(Dependence::ID{id})->getNode(); + const poly::Dependencies *dep; + constexpr auto operator()(int32_t id) const -> ScheduledNode * { + return dep->output(Dependence::ID{id})->getNode(); } }; - [[nodiscard]] inline auto outNeighbors(IR::Dependencies dep, unsigned depth) { + [[nodiscard]] inline auto outNeighbors(const IR::Dependencies &dep, + unsigned depth) { return utils::NestedList{ utils::ListRange{store, NextAddr{}, GetEdge{}}, - DepFilter{dep, depth}}; + DepFilter{&dep, depth}}; } - [[nodiscard]] inline auto inNeighbors(IR::Dependencies dep, unsigned depth) { + [[nodiscard]] inline auto inNeighbors(const IR::Dependencies &dep, + unsigned depth) { return utils::NestedList{ utils::ListRange{store, NextAddr{}, GetEdge{}}, - DepFilter{dep, depth}}; + DepFilter{&dep, depth}}; } - [[nodiscard]] inline auto hasActiveEdges(IR::Dependencies dep, + [[nodiscard]] inline auto hasActiveEdges(const IR::Dependencies &dep, unsigned depth) const -> bool { - const auto f = [=](int32_t d) { + const auto f = [&](int32_t d) { return !dep.isSat(Dependence::ID{d}, depth); }; return std::ranges::any_of(inputEdgeIds(dep), f) || @@ -361,8 +361,7 @@ class ScheduledNode { [[nodiscard]] constexpr auto getSchedule() -> poly::AffineSchedule { return {mem}; } - [[nodiscard]] constexpr auto getLoopNest() const - -> NotNull { + [[nodiscard]] constexpr auto getLoopNest() const -> poly::Loop * { return loopNest; } @@ -373,8 +372,8 @@ class ScheduledNode { // [[nodiscard]] constexpr auto wasVisited2() const -> bool { return visited2; // } constexpr void visit2() { visited2 = true; } constexpr void unVisit2() { // visited2 = false; } - [[nodiscard]] constexpr auto getNumLoops() const -> unsigned { - return unsigned(mem[0]); + [[nodiscard]] constexpr auto getNumLoops() const -> ptrdiff_t { + return mem[0]; } // 'phiIsScheduled()` means that `phi`'s schedule has been // set for the outer `rank` loops. @@ -397,21 +396,23 @@ class ScheduledNode { -> math::Range { return _(phiOffset, phiOffset + getNumLoops()); } + /// numLoops x numLoops // NOLINTNEXTLINE(readability-make-member-function-const) [[nodiscard]] constexpr auto getPhi() -> MutSquarePtrMatrix { - return {mem + 1, math::SquareDims{unsigned(getNumLoops())}}; + return {mem + 1, math::SquareDims<>{unsigned(getNumLoops())}}; } + /// numLoops x numLoops [[nodiscard]] constexpr auto getPhi() const -> SquarePtrMatrix { - return {const_cast(mem) + 1, math::SquareDims{getNumLoops()}}; + return {const_cast(mem) + 1, math::SquareDims<>{getNumLoops()}}; } /// getSchedule, loops are always indexed from outer to inner [[nodiscard]] constexpr auto getSchedule(ptrdiff_t d) const -> PtrVector { - return getPhi()(d, _); + return getPhi()[d, _]; } [[nodiscard]] constexpr auto getSchedule(ptrdiff_t d) -> MutPtrVector { - return getPhi()(d, _); + return getPhi()[d, _]; } [[nodiscard]] constexpr auto getFusionOmega(ptrdiff_t i) const -> int64_t { return (mem + 1)[getNumLoopsSquared() + i]; @@ -453,8 +454,8 @@ class ScheduledNode { MutSquarePtrMatrix phi = getPhi(); ptrdiff_t indR = ptrdiff_t(indMat.numCol()); for (ptrdiff_t i = 0; i < r; ++i) { - phi(i, _(0, indR)) << indMat(i, _); - phi(i, _(indR, end)) << 0; + phi[i, _(0, indR)] << indMat[i, _]; + phi[i, _(indR, end)] << 0; } rank = r; } @@ -463,10 +464,10 @@ class ScheduledNode { return omegaOffset; } void resetPhiOffset() { phiOffset = std::numeric_limits::max(); } - [[nodiscard]] constexpr auto calcGraphMaxDepth() const -> unsigned { - unsigned maxDepth = 0; + [[nodiscard]] constexpr auto calcGraphMaxDepth() const -> int { + int maxDepth = 0; for (const ScheduledNode *n : getVertices()) - maxDepth = std::max(maxDepth, n->getNumLoops()); + maxDepth = std::max(maxDepth, int(n->getNumLoops())); return maxDepth; } friend inline auto operator<<(llvm::raw_ostream &os, @@ -481,12 +482,13 @@ static_assert(std::is_trivially_destructible_v); static_assert(sizeof(ScheduledNode) <= 64); // fits in cache line class ScheduleGraph { - IR::Dependencies deps; - unsigned depth; + const IR::Dependencies &deps; + unsigned depth_; public: using VertexType = ScheduledNode; - constexpr ScheduleGraph(unsigned depth) : depth(depth) {} + constexpr ScheduleGraph(const IR::Dependencies &deps_, unsigned depth) + : deps(deps_), depth_(depth) {} [[nodiscard]] static constexpr auto getVertices(ScheduledNode *nodes) -> utils::ListRange { @@ -496,11 +498,11 @@ class ScheduleGraph { -> utils::ListRange { return static_cast(nodes)->getVertices(); } - [[nodiscard]] constexpr auto outNeighbors(ScheduledNode *v) const { - return v->outNeighbors(deps, depth); + [[nodiscard]] auto outNeighbors(ScheduledNode *v) const { + return v->outNeighbors(deps, depth_); } - [[nodiscard]] constexpr auto inNeighbors(ScheduledNode *v) const { - return v->inNeighbors(deps, depth); + [[nodiscard]] auto inNeighbors(ScheduledNode *v) const { + return v->inNeighbors(deps, depth_); } }; @@ -508,10 +510,14 @@ class ScheduleGraph { namespace graph { // static_assert(AbstractPtrGraph); -static_assert(std::forward_iterator< - decltype(lp::ScheduleGraph{0}.outNeighbors(nullptr).begin())>); -static_assert(std::forward_iterator< - decltype(lp::ScheduleGraph{0}.inNeighbors(nullptr).begin())>); +static_assert(std::forward_iterator(), 0} + .outNeighbors(nullptr) + .begin())>); +static_assert(std::forward_iterator(), 0} + .inNeighbors(nullptr) + .begin())>); static_assert(AbstractPtrGraph); } // namespace graph } // namespace poly diff --git a/include/Optimize/CostFunction.hpp b/include/Optimize/CostFunction.hpp new file mode 100644 index 000000000..7891ab6be --- /dev/null +++ b/include/Optimize/CostFunction.hpp @@ -0,0 +1,798 @@ +#pragma once + +#include "IR/Address.hpp" +#include "IR/Instruction.hpp" +#include "IR/Node.hpp" +#include "IR/OrthogonalAxes.hpp" +#include "Optimize/CostModeling.hpp" +#include "Optimize/Legality.hpp" +#include "Optimize/RegisterFile.hpp" +#include "Polyhedra/Dependence.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace poly::CostModeling { +using containers::Pair; +using math::AbstractVector, math::AbstractMatrix, math::DensePtrMatrix, math::_; +using utils::Optional; + +/// POD. Gives counts for the different kinds of costs. +/// Fields: +/// `int16_t trip_count`- we're unlikely to change decisions for >32k +/// negative indicates compile-time known size. +/// `uint16_t memory` number of mem sets. +/// `bool exit` loop exit/entry. +/// `uint31_t compute` number of compute sets. +/// These give us info for iterating over the costs associated with a loop. +/// for (i : I){ +/// for (j : J){ +/// for (k : K){ // leaf +/// ... +/// } +/// for (k : K){ // leaf +/// ... +/// } +/// } +/// for (j : J){ // leaf +/// ... +/// } +/// } +/// For leaves, we compute latency as well as register cost. +/// Note that we compute all costs at the header for a given depth, +/// thus we only need headers and num-pops. +struct LoopCostCounts { + uint16_t known_trip : 1; + uint16_t trip_count : 15; + uint16_t compute; + uint16_t omemory; + uint8_t cmemory; + uint8_t exit : 5; /// how many blocks we exit after this + uint8_t l2vectorWidth : 3 {0}; // 1<<7 == 128 +}; +static_assert(sizeof(LoopCostCounts) == 8); + +/// Order is outermost -> innermost +struct VectorizationFactor { + uint32_t l2factor; + // trailing bit is outermost loop, so if iterating by shifting, + // we go outer->inner + uint32_t indexMask; + constexpr operator IR::VectorWidth() const { + return IR::VectorWidth{unsigned(1) << l2factor, l2factor}; + } +}; + +/// TODO: maybe two `uint8_t`s + `uint16_t` +/// We only get up to 16 dimensions, but that is already excessive +/// One `uint8_t` gives contig axis, the other the index into +/// the memory cost kind. Thus, the struct could differentiate +/// loads vs stores by itself, while also differentiating +/// between eltypes. +/// Another option is to store individual `MemoryCosts`, +/// so that we can aggregate/sum up. +struct MemCostSummary { + IR::Addr::Costs memcost; + OrthogonalAxes orth; + // [[nodiscard]] constexpr auto contigAxis() const -> uint32_t { + // return data & 0xff; + // } + // mask containing `0` for dependent axes, 1s for independent + // should contain `0` for all non-existent loops, e.g. + // for (i = I, j = J, k = K, l = L) { + // A[j,l] + // for (a = A, b = B){ .... } + // } + // The mask should equal (1<<0) | (1<<2) (for the i and k). + // Only loops it is nested in that it doesn't depend on count. + // [[nodiscard]] constexpr auto indepAxes() const -> uint32_t { + // return data >> 8; + // }; +}; +constexpr auto operator&(MemCostSummary a, MemCostSummary b) -> uint32_t { + return a.orth.indep & b.orth.indep; +} +/// Basic idea is that costs are divided by loops they do not depend on +/// +constexpr auto cost(const AbstractMatrix auto &invunrolls, uint32_t indepAxes) + -> utils::eltype_t { + if (!indepAxes) return 1; + uint32_t tz = std::countr_zero(indepAxes); + utils::eltype_t c{invunrolls[0, tz++]}; + for (uint32_t d = indepAxes >> tz, i = tz; d; d >>= tz, i += tz) { + tz = std::countr_zero(d); + c *= invunrolls[0, i + tz++]; + } + return c; +} +// costs is an array of length two. +// memory costs, unnormalized by `prod(unrolls)` +// `invunrolls` is a matrix, row-0 are the inverse unrolls, row-1 unrolls. +constexpr auto cost(const AbstractMatrix auto &invunrolls, MemCostSummary mcs, + VectorizationFactor vfi) + -> utils::eltype_t { + auto [mc, orth] = mcs; + using T = utils::eltype_t; + T c{cost(invunrolls, orth.indep)}; + if (!(orth.indep & vfi.indexMask)) { + // depends on vectorized index + if (vfi.indexMask & orth.contig) { + c *= mc.contiguous; + } else if (!orth.contig) { + c *= mc.discontiguous; + } else { + // Discontiguous vector load. + // We consider two alternatives: + // 1. gather/scatter (discontiguous) + // 2. contiguous load for each vectorization factor of length equal to + // unroll, followed by shuffles. + // E.g., unroll contig by 4, another dim is vectorized by 8: + // we'd have 8 vloads (max(4/8,1) * 8), followed by 4*log2(8) shuffles. + // w_0 = [0, 8, 16, 24] + // w_1 = [1, 9, 17, 25] + // w_2 = [2, 10, 18, 26] + // w_3 = [3, 11, 19, 27] + // w_4 = [4, 12, 20, 28] + // w_5 = [5, 13, 21, 29] + // w_6 = [6, 14, 22, 30] + // w_7 = [7, 15, 23, 31] + // + // x_0 = [0, 8, 16, 24, 4, 12, 20, 28] + // x_1 = [1, 9, 17, 25, 5, 13, 21, 29] + // x_2 = [2, 10, 18, 26, 6, 14, 22, 30] + // x_3 = [3, 11, 19, 27, 7, 15, 23, 31] + // + // y_0 = [0, 1, 16, 17, 4, 5, 20, 21] + // y_1 = [8, 9, 24, 25, 12, 13, 28, 29] + // y_2 = [2, 3, 18, 19, 6, 7, 22, 23] + // y_3 = [10, 11, 26, 27, 14, 15, 30, 31] + // + // z_0 = [0, 1, 2, 3, 4, 5, 6, 7] + // z_1 = [8, 9, 10, 11, 12, 13, 14, 15] + // z_2 = [16, 17, 18, 19, 20, 21, 22, 23] + // z_3 = [24, 25, 26, 27, 28, 29, 30, 31] + // + // Or, if we unroll contig by 8, and another dim is vectorzeed by 2, we'd + // have 8 = (max(8/2,1) * 2) vloads, 8*log2(2) + // shuffles. + // w_0_0 = [0, 2] + // w_0_1 = [4, 6] + // w_0_2 = [8, 10] + // w_0_3 = [12, 14] + // w_1_0 = [1, 3] + // w_1_1 = [5, 7] + // w_1_2 = [9, 11] + // w_1_3 = [13, 15] + // + // z_0 = [0, 1] + // z_1 = [2, 3] + // z_2 = [4, 5] + // z_3 = [6, 7] + // z_4 = [8, 9] + // z_5 = [10, 11] + // z_6 = [12, 13] + // z_7 = [14, 15] + // Earlier, I had another term, `4*log2(max(8/4,1)) `8*log2(max(2/8,1))` + // i.e. u*log2(max(v/u,1)) + // but I think we can avoid this by always working with vectors that are + // the larger of `u` and `v`, inserting at the start or extracting at the + // end, whichever is necessary. + // We divide by `u[contig]`, as it is now accounted for + // So we have + // v*max(u/v, 1) + u*log2(v) + T iu{invunrolls[0, orth.contig]}, u{invunrolls[1, orth.contig]}; + utils::invariant(iu == 1 / u); + // FIXME: memory and shuffle cost should be separate? + c *= math::smin(mc.contiguous * math::smax(u, (1 << vfi.l2factor) * iu) + + u * vfi.l2factor, + mc.discontiguous); + } + } else c *= mc.scalar; + return c; +} +/// General fallback method for those without easy to represent structure +/// inds is an `IR::Address->indexMatrix()`, thus it is `arrayDim() x +/// getNumLoops()` +/// Non-standard structure here means that we have at least one loop with +/// more than one array dimension. +/// For these, we use the incorrect formula: +/// +constexpr auto cost(const AbstractMatrix auto &invunrolls, MemCostSummary orth, + VectorizationFactor vfi, DensePtrMatrix inds) + -> utils::eltype_t { + using T = utils::eltype_t; + T c{1}; + auto [arrayDim, numLoops] = shape(inds); + utils::invariant(numLoops > 0); + utils::invariant(arrayDim > 0); + utils::invariant(arrayDim <= 64); + utils::invariant(invunrolls.numCol(), inds.numCol()); + for (ptrdiff_t d = 0; d < arrayDim; ++d) { + int64_t g = 0; + containers::BitSet64 bs; + T uprod; + for (ptrdiff_t l = 0; l < numLoops; ++l) { + if ((uint32_t(1) << l) == vfi.indexMask) continue; + int64_t a = inds[d, l]; + if (!a) continue; + bool docontinue{false}; + // We only + for (ptrdiff_t k = 0; k < arrayDim; ++k) { + if ((k == d) || (!inds[k, l])) continue; + docontinue = (inds[d, _] != inds[k, _]) || (d > k); + if (docontinue) break; + } + if (docontinue) continue; + if (bs.empty()) { + g = a; + uprod = invunrolls[0, l]; + } else { + g = math::gcd(g, a); + uprod *= invunrolls[0, l]; + } + bs.insert(l); + }; + if (bs.size() < 2) continue; + T prod{1}; + for (ptrdiff_t l : bs) { + if ((uint32_t(1) << l) == vfi.indexMask) continue; + int64_t a = inds[d, l]; + if (!a) continue; + prod *= (1 - (a / g) * (uprod / invunrolls[0, l])); + } + c *= (1 - prod); + } + // c is a scaling factor; now we proceed to calculate cost similaly to the + // orth-axis implementation above. + return c * cost(invunrolls, orth, vfi); +} + +// We need to define an unroll ordering. +struct RegisterUseByUnroll { + math::Vector> masks{}; // coef, mask pairs + unsigned register_count; // includes constant offset + [[nodiscard]] constexpr auto begin() const + -> const std::array * { + return masks.begin(); + } + [[nodiscard]] constexpr auto end() const -> const std::array * { + return masks.end(); + } +}; +// TODO: define function to implement register_count +constexpr auto registerPressure(const AbstractMatrix auto &invunrolls, + const RegisterUseByUnroll &r) + -> utils::eltype_t { + utils::eltype_t acc{0}; + for (auto [c, m] : r) { + utils::eltype_t t{1}; + containers::BitSet64 bs{std::array{m}}; + for (ptrdiff_t i : bs) t *= invunrolls[1, i]; + acc += c * t; + } + // note the softplus(8x)/4, so 2x scaling on penalty representing + // the stack load+store combination. + return 0.25 * math::softplus(8.0 * (acc - r.register_count)); +} + +inline auto registerUse(const llvm::TargetTransformInfo &TTI, + LoopDepSatisfaction deps, IR::Loop *L) + -> RegisterUseByUnroll { + RegisterUseByUnroll u; + // Ideally, we'd have the transitive closure of depencencies, or better yet + // top-sorted IDs for quick checks on relative order w/ respect to the current + // top-sorting. + // E.g. ID_x < ID_y proves it is legal for ID_x to be first, but does not + // prove the opposite is illegal. The weak proof may often be enough. "better + // yet" is because the check is very efficient, not because it is powerful. + // For a somewhat-efficient check of the former variety, we'd probably want + // to use `BitSet`s + canonical ID-values (not position-based) for each + // `Value`, which is something we could do if switching to a more + // data-oriented design. + // The simple top-index check is enough for checking if something is infront, + // behind, or within a loop. + // + // We scan dependencies, looking for reduction latencies + // can we use `TTI.getRegUsageForType()`? + // TargetTransformInfoImplBase defaults to `1`, and some backends like x86 + // do not override it, so it is not something we can rely on. + // + // d.input()->reductionLatency() > 0 indicates a dependence is live across the + // loop otherwise, we only consider instructions within the loop? + // ...or...perhaps we only need to consider the loop leaf's instructions; we + // can see what is written and referenced. + // For now, will try and follow the latter approach. + for (IR::Value *v : L->nodes()) { + // use `v` + } + return u; +} + +inline auto +memcosts(const AbstractMatrix auto &invunrolls, VectorizationFactor vf, + math::PtrVector> orth_axes) { + utils::eltype_t ic{}; + for (auto [oa, mc] : orth_axes) ic += cost(invunrolls, oa, mc, vf); + return ic; +} +inline auto +memcosts(const AbstractMatrix auto &invunrolls, VectorizationFactor vf, + math::PtrVector>> + orth_axes) { + utils::eltype_t ic{}; + for (auto [oa, inds] : orth_axes) ic += cost(invunrolls, oa, vf, inds); + return ic; +} +inline auto compcosts(const AbstractMatrix auto &invunrolls, + math::PtrVector> compindep) { + utils::eltype_t cc{}; + for (auto [oa, sf] : compindep) cc += cost(invunrolls, oa) * sf; + return cc; +} + +// We then additionally need a throughput vs latency estimator, and code for +// handling the tail. +// Standard throughput is fairly trivial/should be a vector sum, +// although we may have some operations not dependent on all loops, +// in which case unrolling the loops they don't depend on will help. +// Thus, it would probably be best to handle these with code +// similar to the memory cost-fun above, ideally we can abstract away the core. +// +/// memcost = I*J*(Ui*Uj*C_{Al} + Uj*C_{yl}) / (Ui*Uj) + +/// I*(C_{xl}*Ui + C_{xs}*Ui) / Ui +/// cthroughput = I*J*(Ui*Uj*C_{t,fma}) / (Ui*Uj) + I*(Ui*C_{t,add}*(Uj-1)) / Ui +/// Ui clatency = I*J*C_{l,fma}/smin(Ui*Uj, C_{l,fma}/C_{t,fma}) + +/// I*C_{l,add}*log2(Uj) +/// +/// Here, we define a cost fn that can be optimized to produce +/// +/// vectorization and unrolling factors. +/// We assemble all addrs into a vector, sorted by depth first traversal order +/// of the loop tree, e.g. +/// A(0) --> B(1) --> C(2) --> D(3) +/// \-> E(5) --> F(6) \-> G(4) +/// \-> H(7) --> I(8) --> J(9) +/// Focusing only on memory addresses initially... +/// The cost of a particular read/write can be looked up from LLVM +/// as a function of scalar/gather/scatter/broadcast/contiguous. +/// Then this can be adjusted by the product of all unroll factors of loops +/// it depends on, divided by the product of all unroll factors of all +/// containing loops. +/// To optimize, we can branch and bound. Unrolling factors lead to a natural +/// relaxation that plays well, but less so for binary variables like which +/// loop is vectorized. Additionally, patterns such as replacing +/// gather/scatters with shuffle sequences need special handling, that +/// restricts the branch and bound to powers of 2. To be able to build such a +/// cost model, we need to estimate the number of live variables as a result +/// of unroll factors, in order to impose constraints. +/// +/// We use soft constraints for register pressuring, representing the +/// store/reload pair of a spill. +/// +/// Furthermore, we also need to consider the possibility of dependency +/// chains. Consider, for example +/// ``` +/// for (ptrdiff_t i = 0; i < I; ++i){ +/// eltype_t xi = x[i]; +/// for (ptrdiff_t j = 0; j < J; ++j) +/// xi += A[i][j] * y[j]; +/// x[i] = xi; +/// } +/// ``` +/// The `j` loop itself has a dependency chain. +/// Two options for addressing this: +/// 1. unrolling `j`, cloning the accumulation registers, and reducing at the +/// end. +/// 2. unrolling the `i` loop. +/// The second option is better, but may not be possible, e.g. if there is no +/// `i` loop or it carries some dependency. Thus, we want our model to unroll +/// `i` when legal, and unroll `j` otherwise. +/// Assuming a throughput of 2 fma/cycle and a latency of 4 cycles, an +/// estimate of the cost as a function of I, J, Ui, and Uj is (ignoring +/// vectorization): 4*I*J/min(Ui*Uj, 2*4) + 4*I*log2(Uj) The first term is +/// latency per fma (because of the dependency chain) * the number of +/// iterations, divided by however many unrolling allows us to have inflight. +/// The second term is for the reduction of the cloned `Uj` accumulators. Each +/// step in the reduction has a latency of 4 cycles, and we need to do +/// `log2(Uj)` steps. +/// +/// Note, `y-softplus(l*(y-x))/l` is a good smooth minimum function, +/// monotonic in `x` and differentiable everywhere. `l` controls +/// sharpness. Likewise, `y+softplus(l*(x-y))/l` for `max`. +/// +/// Thus, a cost function for the above gemv could be something like +/// memcost = I*J*(Ui*Uj*C_{Al} + Uj*C_{yl}) / (Ui*Uj) + +/// I*(C_{xl}*Ui + C_{xs}*Ui) / Ui +/// cthroughput = I*J*(Ui*Uj*C_{t,fma}) / (Ui*Uj) + I*(C_{t,add}*(Uj-1)) / +/// Ui clatency = I*J*C_{l,fma}/smin(Ui*Uj, C_{l,fma}/C_{t,fma}) + +/// I*C_{l,add}*log2(Uj) +/// cost = memcost + smax(cthroughput, clatency) +/// or, if the it is easier to solve: +/// cost = memcost + cthroughput + clatency +/// +/// We may initially want to add a small cost for loop increment and +/// cmp/branch, to encourage unrolling more generally, plus a cost for +/// unrolling to discourse any excess unrolling when it doesn't provide +/// meaningful benefits (representing the general cost of code size/ filling +/// uop cache -- we definitely want loops to fit in the uop cache of any CPU +/// sporting one!!! ). +/// +/// +/// +/// Note that if we had +/// ``` +/// for (ptrdiff_t i = 0; i < I; ++i){ +/// eltype_t yi = y[i]; +/// for (ptrdiff_t j = 0; j < J; ++j) +/// x[j] += A[i][j] * yi; +/// } +/// ``` +/// then unrolling the `i` loop doesn't increase OOO (Out Of Order execution), +/// but we can assume that as successive `j` iterations are independent/do not +/// have a dependency chain, this isn't an issue. That is, we only consider +/// reductions across the inner-most loop as requiring cloning of accumulators. +/// +/// On throughput modeling, LLVM seems to generally give a recip throughput of +/// 1 for pipelined instructions, regardless of number of ports. This is +/// actually what we want, as this allows RTs to be additive (e.g., we may +/// have a fma that is able to run on 2 ports (e.g. p0 or p5) and a permute +/// that can only execute on one (e.g. p5); when mixing these instructions, +/// they have the same effective cost -- they use a port -- and the more +/// limited port choices of one isn't a problem so long as others can use what +/// remains. For our purposes, it isn't worth getting too fancy here. It is +/// worth noting that the baseline model presented here +/// https://arxiv.org/pdf/2107.14210.pdf +/// performed respectively well when compared to vastly more sophisticated +/// tools; for example, it performed similarly well as llvm-mca on most tested +/// architectures! +/// The baseline model used above for loops was +/// max(1, (n-1)/i, m_r/m, m_w/w) +/// where +/// n - the number of instructions in the benchmark (-1 because of assumption +/// that the cmp and branch are macro-fused, meaning the last two instructions +/// count as 1) +/// m_r - number of memory reads +/// m_w - number of memory writes +/// i - the issue width, e.g. 4 for Intel Skylake CPUs. +/// m - number of reads the CPU can do per cycle (2 for all in the article) +/// w - number of writes the CPU can do per cycle (e.g. 2 for Ice Lake and +/// newer, 1 for older) Unfortunately, we cannot get the CPU-specific +/// information (`i`,`m`,or`w`) from LLVM. However, these are largely a matter +/// of scale, and are generally correlated. E.g., Intel's Alderlake's values +/// would be 6, 3, and 2, vs the older Skylake's 4, 2, and 1. While not all +/// the ratios are equal (`w`'s is 2 instead of 1.5), it is unlikely that many +/// optimization decisions are going to be made differently between them. +/// A possible exception is that we may wish to unroll more for CPUs with more +/// out of order execution abilities. `getMaxInterleaveFactor` is an indicator +/// of whether the pipeline might be very narrow. +/// +/// +/// Given `x[a*i + b*j]`, where neither `i` or `j` are vectorized (and `a` and +/// `b` are compile time constants), we use: +/// (a_g*U_i + b_g*U_j - a_g*b_g) / (U_i*U_j) +/// = a_g/U_j + b_g/U_i - a_g*b_g / (U_i*U_j) +/// = 1 - (1 - a_g/U_j ) * (1 - b_g/U_i) +/// as the cost, where `a_g = abs(a/gcd(a,b))` and `b_g = abs(b/gcd(a,b))`. +/// +/// For more, we generalize this pattern +/// = 1 - \prod_{d}^{D}\left(1 - \frac{coef_{g,d}U_d}{\prod_{i}^{D}U_i}\right) +/// +/// In the `D=3` case, this expands to +/// 1 - (1 - a_g/(U_j*U_k))(1 - b_g/(U_i*U_k))(1 - c_g/(U_i*U_j)) +/// = 1 - (1 - c_g/(U_i*U_j))* +/// (1 - a_g/(U_j*U_k) - b_g/(U_i*U_k)) + a_g*b_g/(U_i*U_j*U_k^2)) +/// = a_g/(U_j*U_k) + b_g/(U_i*U_k)) + c_g/(U_i*U_j) - a_g*b_g/(U_i*U_j*U_k^2)) +/// - a_g*c_g/(U_i*U_j^2*U_k) - b_g*c_g/(U_i^2*U_j*U_k)) +/// + a_g*b_g*c_g/(U_i^2*U_j^2*U_k^2)) +/// +/// TODO: check the degree of correctness... +/// I kind of just made something up that looks sort of right. +/// +/// For register consumption, we +/// 1. Determine an ordering of unroll factors for each inner most loop. +/// 2. Define a registers used as a function of these unroll factors. +/// +/// Loads from inner unrolls that don't depend on any outer-unrolls must have +/// lifetimes spanning all outer-unrolls, if they're re-used by an op +/// depending on that outer. Our heuristic for ordering unrolls is based on +/// the twin observations: +/// 1. Inner unrolls are likely to consume more registers for longer. +/// 2. More ops with overlapping lifetimes dependent on one particular loop +/// require more registers. +/// +/// As the ordering of unrolls influences register pressure, we sort them +/// first by register cost per unroll (placing those with the highest register +/// cost outside), and then by memory op cost within these categories, placing +/// the highest costs innermost (higher memory cost means lower unroll +/// relative to the lower cost, so that we get more reuse on the higher cost +/// operations; lower unroll means we place inside, reducing the cost of these +/// unrolls). +/// +/// So, how do we define register cost per unroll in an unroll-order +/// independent manner, so that we can use this for determining the order? +/// ``` +/// for (int m=0; m*--> (Cmn +=) +/// B[k,n] -/ +/// +/// Register Costs: +/// Amk_rc = U_m * U_k // live until use +/// Bkn_rc = U_k * U_n // live until use +/// Cmn_rc = U_m * U_n // live until end of loop +/// Memory Op Costs, m-vectorized (assuming column-major): +/// Amk_rc = L_c * U_m * U_k +/// Bkn_rc = L_b * U_k * U_n +/// Cmn_rc = 0 * U_m * U_n +/// L_c > L_b, so A-contiguous load should be interior to B-broadcast load. +/// +/// As the cost function is evaluated many times, we try and move as much work +/// to the setup as possible. Loop cost is thus divided into some structured +/// components, and much of the interpreting work hoisted to a step defining a +/// parameterization. +/// Ideally, we would avoid repeating this work for different vectorization +/// decisions. However, vectorization decisions may impact unroll ordering +/// decisions. +/// +/// +/// +/// /// +class LoopTreeCostFn { + // counts per loop, indicating how many of each of the following three fields + math::Vector cost_counts{}; + // orthogonal axes and costs + math::Vector orth_axes{}; + // non-orthogonal axes and costs + math::Vector>> conv_axes{}; + // compute cost summary + math::Vector> compute_independence{}; + // for leaves, we need latency information + llvm::SmallVector>> + leafs{}; + unsigned maxVectorWidth; + ptrdiff_t max_depth{}; + + constexpr void clear() { + cost_counts.clear(); + orth_axes.clear(); + conv_axes.clear(); + compute_independence.clear(); + leafs.clear(); + max_depth = 0; + } + + // should only have to `init` once per `root`, with `VectorizationFactor` + // being adjustable. + // Note: we are dependent upon scanning in top order, so that operands' + // `calcLoopDepFlag()` are calculated before we get. + // TODO: vec factor should be a tree-flag + // Iteration order: + // We fully iterate over a loop before descending + // for (i : I){ + // // block 0 + // for (j : J){ + // // block 1 + // } + // // block 2 + // for (j : J){ + // // block 3 + // } + // // block 4 + // } + // we'd iterate 0, 2, 4, 1, 3. + // This way we can store once we hit the end. + // If there are no subloops to iterate to after, then we store the exit count. + // If there are, then the exit-count is 0, forward '1+exit' count to the last + // sub-loop, and `1` to all previous sub-loops. + // It's thus natural to implement recursively. + // NOLINTNEXTLINE(misc-no-recursion) + void initLoop(LoopDepSatisfaction deps, IR::Loop *L, unsigned maxl2VF, + const llvm::TargetTransformInfo &TTI, ptrdiff_t depth, + unsigned exitCount) { + invariant(depth > 0); + ptrdiff_t compute = compute_independence.size(), omemory = orth_axes.size(), + cmemory = conv_axes.size(); + unsigned maxVF = 1 << maxl2VF; + // Loop and push throughput costs + for (IR::Node *N = L->getChild(); N; N = N->getNext()) { + if (auto *A = llvm::dyn_cast(N)) { + OrthogonalAxes oa = A->calcOrthAxes(depth); + IR::Addr::Costs rtl = A->calcCostContigDiscontig(TTI, maxVF); + if (oa.indep_axes) { + // check for duplicate + bool found = false; + for (ptrdiff_t i = omemory; i < orth_axes.size(); ++i) { + if (orth_axes[i].orth != oa) continue; + found = true; + orth_axes[i].memcost += rtl; + break; + } + if (!found) orth_axes.emplace_back(rtl, oa); + } else { + bool found = false; + for (ptrdiff_t i = cmemory; i < conv_axes.size(); ++i) { + if (conv_axes[i].first.orth != oa) continue; + if (conv_axes[i].second != A->indexMatrix()) continue; + found = true; + conv_axes[i].first.memcost += rtl; + break; + } + if (!found) + conv_axes.emplace_back(MemCostSummary{rtl, oa}, A->indexMatrix()); + } + } else if (auto *C = llvm::dyn_cast(N)) { + bool found = false; + uint32_t indep = C->calcLoopDepFlag(depth); + float cc{float( + C->getCost(TTI, IR::VectorWidth{maxVF, maxl2VF}).recipThroughput)}; + for (ptrdiff_t i = compute; i < compute_independence.size(); ++i) { + if (compute_independence[i].second != indep) continue; + found = true; + compute_independence[i].first += cc; + break; + } + if (!found) compute_independence.emplace_back(cc, indep); + } // else if (auto *S = llvm::dyn_cast(N)) { + } + auto [known_trip, trip_count] = L->getAffineLoop()->tripCount(depth); + uint16_t compcnt = compute_independence.size() - compute, + omemcnt = orth_axes.size() - omemory, + cmemcnt = conv_axes.size() - cmemory; + IR::Loop *SL = L->getSubLoop(); + cost_counts.emplace_back(known_trip, trip_count, compcnt, omemcnt, cmemcnt, + SL ? 0 : exitCount); + if (SL) iterLoopLevel(deps, SL, maxl2VF, TTI, ++depth, exitCount); + else leafCosts(deps, L, maxl2VF, TTI); + } + void leafCosts(LoopDepSatisfaction deps, IR::Loop *L, unsigned maxl2VF, + const llvm::TargetTransformInfo &TTI) { + // TODO: if (!SL) we're in a leaf, and need compute latency + // We use the `IROptimizer::loopDepSats` to check the depencencies held at + // the loop. We check these for those that look like reductions that are + // legal to reassociate (we check this earlier and set + // `in->reassociableReductionPair()==out`), e.g. integer add chains or + // floating point with the reassociate FMF set. + // When we have reductions, we have src->dst chains stored through + // `linkReductionDst()` that can be used for accumulating latencies. + // FIXME (maybe): Current implementation only allows each instruction to be + // a part of 1 chain. + // for (j in J){ // arbitrary number of outer + // loops + // %w = %array[j...]; + // %x = foo(%w); + // for (i in I){ // inner loop(s) + // %y = bar(%x); + // } + // %z = quz(%y); + // %array[j...] = %z; + // } + // Rather than using PhiNodes, we represent dependencies through addresses. + // we can get legality from the loop. + // The tricker thing to compute here is register pressure + llvm::InstructionCost::CostType latency{0}; + for (poly::Dependence d : deps.depencencies(L)) { + // instruction latency can be a function of vector width + latency = + std::max(latency, d.input()->reductionLatency(TTI, maxVectorWidth)); + } + CostModeling::Legality legality = L->getLegality(); + uint16_t l = std::numeric_limits::max(); + if (l > latency) l = latency; + // for reg use, lets add register dep flag + // what kind of traversal would minimize width? + // breadth-first lets us retire early, but can increase + // live count? + // Note, every reduction must add register contribution. + leafs.emplace_back(registerUse(TTI, deps, L), + Pair{l, legality.numReductions()}); + // for (IR::Node *N = L->getChild(); N; N = N->getNext()) {} + return; + }; + + // NOLINTNEXTLINE(misc-no-recursion) + void iterLoopLevel(LoopDepSatisfaction deps, IR::Loop *L, unsigned maxl2VF, + const llvm::TargetTransformInfo &TTI, ptrdiff_t depth, + unsigned exitCount) { + do { + IR::Loop *N = L->getNextLoop(); + unsigned ec = N ? ++exitCount : 1; + initLoop(deps, N, maxl2VF, TTI, depth, ec); + L = N; + } while (L); + } + +public: + // this is a vector fun, where indexing may do non-trivial computation + // also, mapping from this vector to loop position isn't trivial either + // hence, we use a 2 x max_depth matrix that we copy into as we descend + // (and pop from as we ascend). Row `0` is for inverse values, + // and row `1` for direct values. + // Inverses are favored as our costs fns use them more often. + constexpr auto operator()(alloc::Arena<> alloc, + const AbstractVector auto &x) const { + using T = utils::eltype_t; + utils::invariant(max_depth < 16); + // row 0: inverse unrolls + // row 1: unrolls + // row 2: cumprod invunroll + math::MutArray> invunrolls{ + math::matrix(alloc, math::Row<3>{}, math::Col<>{max_depth})}; + ptrdiff_t i = 0, depth = 0, mi = 0, mc = 0, ci = 0, li = 0; + double tripcounts[16]; + VectorizationFactor vf{}; + // we evaluate every iteration + T c{}; + for (auto [comptimetrip, trip_count, compute, omem, cmem, exit, l2vw] : + cost_counts) { + if (l2vw) { + invariant(vf.l2factor == 0); + invariant(vf.indexMask == 0); + vf.l2factor = l2vw; + vf.indexMask = uint32_t(1) << depth; + } + invunrolls[1, depth] = x[i++]; + invunrolls[2, depth] = invunrolls[0, depth] = 1 / invunrolls[1, depth]; + if (depth) invunrolls[2, depth] *= invunrolls[2, depth - 1]; + tripcounts[depth] = + (depth ? tripcounts[depth - 1] * trip_count : trip_count); + T cc{compcosts(invunrolls, compute_independence[_(0, compute) + ci])}; + ci += compute; + if (exit) { + auto [reguse, lt] = leafs[li++]; + auto [l, numreduct] = lt; + // we're now in a leaf, meaning we must consider register costs, + // as well as reduction costs and latency of reduction chains. + cc = smax(cc, l * invunrolls[2, depth]); + cc += registerPressure(invunrolls, reguse); + if (numreduct) { + cc += + compcost(invunrolls, compute_independence[_(0, numreduct) + ci]) * + log2(invunrolls[1, depth]) / trip_count; + ci += numreduct; + } + } + cc += memcosts(invunrolls, vf, orth_axes[_(0, omem) + mi]); + mi += omem; + cc += memcosts(invunrolls, vf, conv_axes[_(0, cmem) + mc]); + mc += cmem; + c += tripcounts[depth] * cc; + // Decrement depth by `exit - 1`; the `-1` corresponds + // to descending into this header, while we exit `exit` loops afterwards. + depth -= exit - 1; // don't fuse `-1` to keep `exit` unsigned + if (depth <= std::countr_zero(vf.indexMask)) { + vf.l2factor = 0; + vf.indexMask = 0; + } + } + return c; + } + void init(LoopDepSatisfaction deps, IR::Loop *root, unsigned maxl2VF, + llvm::LLVMContext &C, const llvm::TargetTransformInfo &TTI) { + clear(); // max_depth = 0; + maxVectorWidth = RegisterFile::estimateMaximumVectorWidth(C, TTI); + iterLoopLevel(deps, root->getSubLoop(), maxl2VF, TTI, 0, 0); + } + LoopTreeCostFn(LoopDepSatisfaction deps, IR::Loop *root, unsigned maxVF, + llvm::LLVMContext &C, const llvm::TargetTransformInfo &TTI) + : maxVectorWidth{unsigned(1) << maxVF} { + init(deps, root, maxVF, C, TTI); + } +}; + +} // namespace poly::CostModeling diff --git a/include/Optimize/CostModeling.hpp b/include/Optimize/CostModeling.hpp new file mode 100644 index 000000000..a8176ea4c --- /dev/null +++ b/include/Optimize/CostModeling.hpp @@ -0,0 +1,948 @@ +#pragma once + +#include "Dicts/BumpMapSet.hpp" +#include "Graphs/Graphs.hpp" +#include "IR/Address.hpp" +#include "LinearProgramming/LoopBlock.hpp" +#include "LinearProgramming/ScheduledNode.hpp" +#include "Optimize/Legality.hpp" +#include "Polyhedra/Dependence.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +namespace poly { +namespace IR { +/// If this is a store of a reassocialbe reduction, this sets the +/// `reassociableReduction` field to the corresponding load, and that field of +/// the load to `this` store. +/// It requires `Addr` to have been sorted, so we check the first output edge of +/// this store. If that edge is a load within the same loop, and has a time +/// dependence, we check for a reassociable chain of compute operations +/// connecting them. If such a chain, without any non-reassociable chains, +/// exists, then we mark them as reassociable. +/// Note, with sorting +/// for (int i = 0; i < I; ++i) +/// for (int j = 0; j < J; ++j) +/// x[i] = x[i] + A[j,i] * y[j]; +/// x[i] = acc; +/// +/// we have the store `x[i]` is the source for the `x[i]` load on a future +/// `j` iteration. +/// However, our IR would be optimized into: +/// +/// for (int i = 0; i < I; ++i){ +/// acc = x[i]; +/// for (int j = 0; j < J; ++j) +/// acc += A[j,i] * y[j]; +/// x[i] = acc; +/// } +/// +/// The same thing applies: `j` is the loop that satifies the dependency, +/// but we hoisted the load/store pair out. +/// This must be called after `sortEdges`, so that output edges of the store +/// `x[i] = acc` are top sorted. The load `acc = x[i]` should be the very +/// first output topologically -- afterall, it occus before the store!! +/// TODO: does `Addr` hoisting handle this?? +/// Consider also the example: +/// int64_t x[1]{}; +/// for (ptrdiff_t n = 0; n < N; ++n){ +/// x[0] = x[0] + y[n]; +/// z[n] = x[0]; +/// } +/// this is harder to understand than, but behaves the same as +/// z[0] = y[n]; +/// for (ptrdiff_t n = 1; n < N; ++n){ +/// z[n] = z[n-1] + y[n]; +/// } +/// int64_t x[1]{z[N-1]}; +/// which does not have any reductions. +/// This should be handled because, if we had a loop like +/// int64_t x[1]{}; +/// for (ptrdiff_t n = 0; n < N; ++n) x[0] = x[0] + y[n]; +/// it should be optimized into +/// int64_t x[1]{}; +/// auto xv = x[0]; +/// for (ptrdiff_t n = 0; n < N; ++n) xv = xv + y[n]; +/// x[0] = xv; +/// However, the assignment `z[n]` should block the hoisting of the load/store +/// and we can check that failure to hoist for verifying legality. +constexpr inline void +Addr::maybeReassociableReduction(const Dependencies &deps) { + // we only run for `this->isStore() && dst->isLoad()` + if (isLoad()) return; + // we should have a store whose first output edge is the load for + // the following iteration. This iter is the reverse-time edge. + auto edges{outputEdgeIDs(deps, getCurrentDepth())}; + auto B = edges.begin(); + if (B == edges.end()) return; + poly::Dependence::ID id{*B}; + if (deps.revTimeEdge(id) < 0) return; + IR::Addr *dst = deps.output(id); + if (dst->isStore() || (getLoop() != dst->getLoop())) return; + // if we failed to hoist the `Addr` out of time-dims, then we cannot optimize. + if (getCurrentDepth() > deps.satLevel(id)) return; + if (reassociableReduction == dst) return; // multiple time dims, already found + auto *c = llvm::dyn_cast(getStoredVal()); + if (!c) return; + if (findThroughReassociable(dst, c) != 1) return; + reassociableReduction = dst; + dst->reassociableReduction = this; +} + +} // namespace IR +namespace CostModeling { +using poly::Dependence; +// struct CPUExecutionModel {}; + +template using Vec = math::ResizeableView; + +// TODO: instead of this, update in-place and ensure all Addr are +// over-allocated to correspond with max depth? Because we parse in reverse +// order, we have max possible depth of `ScheduledNode`s using it at time we +// create. + +/// LoopTree +/// A tree of loops, with an indexable vector of IR::Loop*s, to facilitate +/// construction of the IR::Loop graph, from the fusion omegas +class LoopTree { + // The root of this subtree + Valid loop; + // LoopTree *parent{nullptr}; // do we need this? + Vec children{}; + unsigned depth{0}; + // We do not need to know the previous loop, as dependencies between + // the `Addr`s and instructions will determine the ordering. + constexpr LoopTree(Arena<> *lalloc, poly::Loop *L, LoopTree *parent_) + : loop{lalloc->create(parent_->depth + 1, L)}, + depth(parent_->depth + 1) { + // allocate the root node, and connect it to parent's node, as well as + // previous loop of the same level. + loop->setParent(parent_->loop); + } + constexpr LoopTree(Arena<> *lalloc) : loop{lalloc->create(0)} {} + +public: + static auto root(Arena<> *salloc, Arena<> *lalloc) -> LoopTree * { + return new (salloc) LoopTree(lalloc); + } + // salloc: Short lived allocator, for the indexable `Vec`s + // Longer lived allocator, for the IR::Loop nodes + // NOLINTNEXTLINE(misc-no-recursion) + void addNode(Arena<> *salloc, Arena<> *lalloc, lp::ScheduledNode *node) { + if (node->getNumLoops() == depth) { + // Then it belongs here, and we add loop's dependencies. + // We only need to add deps to support SCC/top sort now. + // We also apply the rotation here. + // For dependencies in SCC iteration, only indvar deps get iterated. + auto [Pinv, denom] = math::NormalForm::scaledInv(node->getPhi()); + Valid explicitLoop = + node->getLoopNest()->rotate(lalloc, Pinv, node->getOffset()); + for (IR::Addr *m : node->localAddr()) { + m->rotate(explicitLoop, Pinv, denom, node->getOffsetOmega(), + node->getOffset()); + loop->insertAfter(m); + } + return; + } + // we need to find the sub-loop tree to which we add `node` + ptrdiff_t idx = node->getFusionOmega(depth); + invariant(idx >= 0); + ptrdiff_t numChildren = children.size(); + if (idx >= children.size()) { + if (idx >= children.getCapacity()) { + // allocate extra capacity + children.reserve(salloc, 2 * (idx + 1)); + } + // allocate new nodes and resize + children.resize(idx + 1); + for (ptrdiff_t i = numChildren; i < idx + 1; ++i) + children[i] = new (salloc) LoopTree{lalloc, node->getLoopNest(), this}; + numChildren = idx + 1; + } + children[idx]->addNode(salloc, lalloc, node); + } + constexpr auto getChildren() -> Vec { return children; } + constexpr auto getLoop() -> IR::Loop * { return loop; } +}; + +inline void hoist(IR::Node *N, IR::Loop *P, int depth) { + N->setParent(P); + N->setCurrentDepth(depth); +} + +struct LoopDepSummary { + IR::Node *afterExit{nullptr}; + IR::Addr *indexedByLoop{nullptr}; + IR::Addr *notIndexedByLoop{nullptr}; +}; +struct LoopIndependent { + LoopDepSummary summary; + bool independent; + constexpr auto operator*=(LoopIndependent other) -> LoopIndependent & { + summary = other.summary; + independent = independent && other.independent; + return *this; + } +}; +/// inline auto searchLoopIndependentUsers(IR::Dependencies deps, IR::Loop *L, +/// IR::Node *N, uint8_t depth, +/// LoopDepSummary summary) +/// +/// Searches `N` and it's users for loop-independent users, and returns them +/// as a list to process. +/// This exits early if it finds a dependent user, meaning it will only return +/// a partial list in this case. We search the entire graph eventually, meaning +/// the remainder will be processed later. +/// We return a `LoopDepSummary, bool` pair, where the `bool` is true if `N` +/// was +/// loop independent. We use the `bool` rather than a `nullptr` or optional so +/// that we can still return those results we did find on failure. +/// NOLINTNEXTLINE(misc-no-recursion) +inline auto searchLoopIndependentUsers(const IR::Dependencies &deps, + IR::Loop *L, IR::Node *N, int depth, + LoopDepSummary summary) + -> LoopIndependent { + if (N->dependsOnParentLoop()) return {summary, false}; + if (llvm::isa(N)) return {summary, false}; + if (IR::Loop *P = N->getLoop(); P != L) + return {summary, !(P && L->contains(P))}; + LoopIndependent ret{summary, true}; + auto *a = llvm::dyn_cast(N); + if (a) { + a->removeFromList(); + if (a->indexedByInnermostLoop()) { + a->insertAfter(ret.summary.indexedByLoop); + ret.summary.indexedByLoop = a; + return {summary, false}; + } + a->insertAfter(ret.summary.notIndexedByLoop); + ret.summary.notIndexedByLoop = a; + for (IR::Addr *m : a->unhoistableOutputs(deps, depth - 1)) { + ret *= searchLoopIndependentUsers(deps, L, m, depth, summary); + if (ret.independent) continue; + a->setDependsOnParentLoop(); + return ret; + } + } + // if it isn't a Loop or Addr, must be an `Instruction` + IR::Value *I = llvm::cast(N); + for (IR::Node *U : I->getUsers()) { + ret *= searchLoopIndependentUsers(deps, L, U, depth, summary); + if (ret.independent) continue; + I->setDependsOnParentLoop(); + return ret; + } + // then we can push it to the front of the list, meaning it is hoisted out + if (a && (ret.summary.notIndexedByLoop == a)) + ret.summary.notIndexedByLoop = llvm::cast_or_null(a->getNext()); + I->removeFromList(); + I->insertAfter(ret.summary.afterExit); + ret.summary.afterExit = I; + I->visit(depth); + return ret; +} +/// `R`: remove from loop, if not `nullptr`, set the parent of `N` to `R` +/// `R` is applied recursivvely, forwarded to all calls. +// NOLINTNEXTLINE(misc-no-recursion) +inline auto visitLoopDependent(const IR::Dependencies &deps, IR::Loop *L, + IR::Node *N, int depth, IR::Node *body, + IR::Loop *R = nullptr) -> IR::Node * { + invariant(N->getVisitDepth() != 254); + // N may have been visited as a dependent of an inner loop, which is why + // `visited` accepts a depth argument + if (N->wasVisited(depth) || !(L->contains(N))) return body; +#ifndef NDEBUG + // Our goal here is to check for cycles in debug mode. + // Each level of our graph is acyclic, meaning that there are no cycles at + // that level when traversing only edges active at that given level. + // However, when considering edges active at level `I`, we may have cycles + // at level `J` if `J>I`. In otherwords, here we are traversing all edges + // active at `I=depth`. Within subloops, which necessarily have depth + // `J>I`, we may have cycles. + // + // Thus, we need to prevent getting stuck in a cycle for these deeper loops + // by setting `N->visit(depth)` here, so `wasVisited` will allow them to + // immediately return. But, in debug mode, we'll set nodes of the same depth + // to `254` to check for cycles. + if (N->getLoop() == L) N->visit(254); + else N->visit(depth); +#else + N->visit(depth); +#endif + // iterate over users + if (auto *A = llvm::dyn_cast(N)) { + // Note that `topologicalSort` calls `searchLoopIndependentUsers` which + // checks whether an `Addr` is `indexedByInnermostLoop`. + // + // Note that here `depth` is `0` for top-level, 1 for the outer most loop, + // etc. That is, loops are effectively 1-indexed here, while `satLevel` + // is effectively 0-indexed by loop. + // Example 1: + // for (ptrdiff_t m = 0; m < M; ++m) + // for (ptrdiff_t n = 0; n < N; ++n) + // for (ptrdiff_t k = 0; k < K; ++k) C[m,n] = C[m,n] + A[m,k]*B[k,n]; + // we have cyclic dependencies between the load from/store to `C[m,n]`. + // The `C[m,n]` load -> `C[m,n]` store was not satisfied by any loop, so + // the sat level is 255. + // The `C[m,n]` store -> `C[m,n]` load has satLevel = 2. + // Example 2: + // for (ptrdiff_t m = 0; m < M; ++m) + // for (ptrdiff_t n = 1; n < N; ++n) C[m,n] = C[m,n] + C[m,n-1]; + // we again have a cyple, from the load `C[m,n-1]` to the store `C[m,n]`, + // and from the store `C[m,n]` to the load `C[m,n-1]` on the following + // iteration. + // The former has a sat level of 255, while the latter has a sat level of + // `1`. + // + // isActive(depth) == satLevel() > depth + // + // a. load->store is not satisfied by any loop, instead handled by sorting + // of instructions in the innermost loop, i.e. sat is depth=3. + // b. store->load is carried by the `k` loop, i.e. sat is depth=2. + // Because `2 > (3-1) == false`, we do not add it here, + // its sorting isn't positional! + // + // TODO: + // - [ ] I think the current algorithm may illegally hoist certain + // dependencies carried on this loop. Specifically, we can hoist + // addresses that (a) are not indexed by this loop, but need to be + // repeated anyway because of some other address operation, while that + // combination can't be moved to registers, e.g. because their index + // matrices are not equal. + // We need to distinguish between order within the loop, for the + // purpose of this topsort, and placement with respect to the loop. + // Simply, we perhaps should simply avoid hoisting when we carry + // a dependence that doesn't meet the criteria of `unhoistableOutputs` + // - [ ] Incorporate the legality setting here? + for (IR::Addr *m : A->unhoistableOutputs(deps, depth - 1)) { + if (m->wasVisited(depth)) continue; + body = visitLoopDependent(deps, L, m, depth, body, R); + } + } + if (auto *I = llvm::dyn_cast(N)) { + for (IR::Node *U : I->getUsers()) { + if (U->wasVisited(depth)) continue; + body = visitLoopDependent(deps, L, U, depth, body, R); + } + } else if (auto *S = llvm::dyn_cast(N)) { + for (IR::Node *U : S->getChild()->nodes()) { + if (U->wasVisited(depth)) continue; + body = visitLoopDependent(deps, L, U, depth, body, R); + } + } +#ifndef NDEBUG + if (N->getLoop() == L) N->visit(depth); +#endif + if (N->getLoop() == L) body = N->setNext(body); + if (R) hoist(N, R, depth - 1); + return body; +} +inline void addBody(const IR::Dependencies &deps, IR::Loop *root, int depth, + IR::Node *nodes) { + IR::Exit exit{}; // use to capture last node + IR::Node *body{&exit}; + for (IR::Node *N : nodes->nodes()) + body = visitLoopDependent(deps, root, N, depth, body); + body = root->setChild(body); // now we can place the loop + IR::Node *last = exit.getPrev(); + if (last) last->setNext(nullptr); + root->setLast(last); +} +inline void topologicalSort(const IR::Dependencies &deps, IR::Loop *root, + int depth) { + // basic plan for the top sort: + // We iterate across all users, once all of node's users have been added, + // we push it to the front of the list. Thus, we get a top-sorted list. + // We're careful about the order, so that this top sort should LICM all the + // addresses that it can. + // + // We must push the exit before the root (as the exit depends on the loop, + // and we iterate users). The exit doesn't use any in this block, so we + // begin by trying to push any instructions that don't depend on the loop. + // If we fail to push them (i.e., because they have uses that do depend on + // the loop), then they get added to a revisit queue. Any instructions we + // are able to push-front before we push the exit, implicitly happen after + // the exit, i.e. they have been LICMed into the exit block. We unvisit the + // revisit-queue, and add them back to the main worklist. Then, we proceed + // with a depth-first topological sort normally (iterating over uses, + // pushing to the front), starting with the loop root, so that it gets + // pushed to the front as soon as possible. That is, so that it happens as + // late as possible Any instructions that get pushed to the front afterwards + // have been LICMed into the loop pre-header. + // + // In this first pass, we iterate over all nodes, pushing those + // that can be hoisted after the exit block. + // + IR::Node *C = root->getChild(); + LoopDepSummary summary{}; + for (IR::Node *N : C->nodes()) + summary = searchLoopIndependentUsers(deps, root, N, depth, summary).summary; + // summary.afterExit will be hoisted out; every member has been marked as + // `visited` So, now we search all of root's users, i.e. every addr that + // depends on it + root->setNext(summary.afterExit); + IR::Loop *P = root->getLoop(); + for (IR::Node *N : summary.afterExit->nodes()) hoist(N, P, depth - 1); + addBody(deps, root, depth, summary.indexedByLoop); + IR::Node *body{root}; + for (IR::Node *N : summary.notIndexedByLoop->nodes()) + body = visitLoopDependent(deps, root, N, depth, body, P); +} +// NOLINTNEXTLINE(misc-no-recursion) +inline void buildSubGraph(const IR::Dependencies &deps, IR::Loop *root, + int depth) { + // We build the instruction graph, via traversing the tree, and then + // top sorting as we recurse out + for (IR::Loop *child : root->subLoops()) + buildSubGraph(deps, child, depth + 1); + // The very outer `root` needs to have all instr constituents + // we also need to add the last instruction of each loop as `last` + topologicalSort(deps, root, depth); +} +inline void buildGraph(const IR::Dependencies &deps, IR::Loop *root) { + // We build the instruction graph, via traversing the tree, and then + // top sorting as we recurse out + for (IR::Loop *child : root->subLoops()) buildSubGraph(deps, child, 1); + + // The very outer `root` needs to have all instr constituents + // we also need to add the last instruction of each loop as `last` + addBody(deps, root, 0, root->getChild()); + // Add top sort idx + uint32_t idx = 0; // we use ++idx, so only `const` have idx==0 + for (IR::Node *n : root->allNodes()) n->setTopIndex(++idx); +} + +inline auto addAddrToGraph(Arena<> *salloc, Arena<> *lalloc, + lp::ScheduledNode *nodes) -> IR::Loop * { + auto s = salloc->scope(); + // `root` is top level loop + LoopTree *root = LoopTree::root(salloc, lalloc); + for (lp::ScheduledNode *node : nodes->getAllVertices()) + root->addNode(salloc, lalloc, node); + return root->getLoop(); +} +// NOLINTNEXTLINE(misc-no-recursion) +inline auto hasFutureReadsCore(dict::aset &successors, + llvm::Instruction *I) -> bool { + for (auto *U : I->users()) { + auto *UI = llvm::dyn_cast(U); + if (!UI) continue; + if (UI->mayReadFromMemory() && successors.count(UI->getParent())) + return true; + if (llvm::isa(UI) && + hasFutureReadsCore(successors, UI)) + return true; + // TODO: don't just give up if we cast to int? + if (llvm::isa(UI) || llvm::isa(UI)) + return true; + } + return false; +} +inline auto hasFutureReads(Arena<> *alloc, dict::set &LBBs, + llvm::Instruction *I) -> bool { + auto s = alloc->scope(); + dict::aset successors{alloc}; + for (llvm::BasicBlock *S : llvm::successors(I->getParent())) + if (!LBBs.count(S)) successors.insert(S); + return hasFutureReadsCore(successors, I); +} + +struct LoopDepSatisfaction { + IR::Dependencies &deps; + MutPtrVector loopDeps; + + constexpr auto dependencyIDs(IR::Loop *L) { + return utils::VForwardRange{loopDeps.begin(), L->getEdge()}; + } + constexpr auto depencencies(IR::Loop *L) { + return dependencyIDs(L) | deps.getEdgeTransform(); + } +}; +inline Legality::Legality(LoopDepSatisfaction &deps, IR::Loop *L) { + for (int32_t did : deps.dependencyIDs(L)) + if (!update(deps.deps, L, did)) break; +} +inline auto Legality::update(poly::Dependencies &deps, IR::Loop *L, int32_t did) + -> bool { + // note: the dependence hasn't been rotated + Dependence d{deps.get(Dependence::ID{did})}; + IR::Addr *in = d.out, *out = d.in; + utils::Optional peel = deps.determinePeelDepth(L, did); + if (peel) peelFlag |= (1 << (*peel)); + + if (d.revTimeEdge()) { + bool reassociable = in->reassociableReductionPair() != out; + if (reassociable) ++unordered_reduction_count; + if (!reassociable) ++ordered_reduction_count; + return reorderable = reassociable || peel; + } + return reorderable = peel.hasValue(); +}; + +class IROptimizer { + IR::Dependencies &deps; + IR::Cache &instructions; + dict::set &LBBs; + dict::set &eraseCandidates; + IR::Loop *root_; + MutPtrVector loopDeps; + Arena<> *lalloc_; + llvm::TargetLibraryInfo *TLI; + + /// `loopDepSats` places the dependencies at the correct loop level so that + /// we can more easily check all dependencies carried by a particular loop. + /// We use these for checks w/ respect to unrolling and vectorization + /// legality. + /// The returned vector is an integer vector, giving a mapping of loops + /// to depencencies handled at that level. + /// We can use these dependencies for searching reductions for + /// trying to prove legality. + static auto loopDepSats(Arena<> *alloc, IR::Dependencies &deps, + lp::LoopBlock::OptimizationResult res) + -> MutPtrVector { + MutPtrVector loopDeps{math::vector(alloc, deps.size())}; + // place deps at sat level for loops + for (IR::Addr *a : res.addr.getAddr()) { + IR::Loop *L = a->getLoop(); + for (int32_t id : a->inputEdgeIDs(deps)) { + uint8_t lvl = deps.satLevel(IR::Dependence::ID{id}); + L->getLoopAtDepth(lvl)->addEdge(loopDeps, id); + } + } + return loopDeps; + } + [[nodiscard]] constexpr auto getLoopDeps() const -> LoopDepSatisfaction { + return {deps, loopDeps}; + } + // this compares `a` with each of its active outputs. + inline void eliminateAddr(IR::Addr *a) { + for (int32_t id : a->outputEdgeIDs(deps, a->getCurrentDepth())) { + IR::Addr *b = deps.output(Dependence::ID{id}); + // TODO: also check loop extants + if (a->indexMatrix() != b->indexMatrix() || + a->getOffsetOmega() != b->getOffsetOmega()) + return; + if (a->isStore()) { + // On a Write->Write, we remove the first write. + if (b->isStore()) return a->drop(deps); + // Write->Load, we will remove the load if it's in the same block as the + // write, and we can forward the stored value. + if (a->getLoop() != b->getLoop()) return; + instructions.replaceAllUsesWith(b, a->getStoredVal()); + b->drop(deps); + } else if (b->isLoad()) { // Read->Read + // If they're not in the same loop, we need to reload anyway + if (a->getLoop() != b->getLoop()) return; + // If they're in the same loop, we can delete the second read + instructions.replaceAllUsesWith(b, a); + b->drop(deps); + } else return; // Read->Write, can't delete either + } + } + // we eliminate temporaries that meet these conditions: + // 1. are only ever stored to (this can be achieved via + // load-elimination/stored-val forwarding in `removeRedundantAddr`) + // 2. are non-escaping, i.e. `llvm::isNonEscapingLocalObject` + // 3. returned by `llvm::isRemovableAlloc` + inline auto eliminateTemporaries(IR::AddrChain addr) -> unsigned { + auto s = lalloc_->scope(); + dict::aset loaded{lalloc_}; + for (IR::Addr *a : addr.getAddr()) + if (a->isLoad()) loaded.insert(a); + unsigned remaining = 0; + for (IR::Addr *a : addr.getAddr()) { + if (a->isDropped()) continue; + ++remaining; + if (loaded.contains(a)) continue; + const llvm::SCEVUnknown *ptr = a->getArrayPointer(); + auto *call = llvm::dyn_cast(ptr->getValue()); + if (!call) continue; + if (!llvm::isNonEscapingLocalObject(call, nullptr)) continue; + if (!llvm::isRemovableAlloc(call, TLI)) continue; + if (hasFutureReads(lalloc_, LBBs, call)) continue; + a->drop(deps); + // we later check if any uses remain other than the associated free + // if not, we can delete them. + // We may want to go ahead and do this here. We don't for now, + // because we have live `llvm::Instruction`s that we haven't removed yet. + // TODO: revisit when handling code generation (and deleting old code) + eraseCandidates.insert(call); + --remaining; + } + return remaining; + } + + // plan: SCC? Iterate over nodes in program order? + // then we can iterate in order. + // What to do about depth? + // We may have + // for (i : I){ + // for (j : J){ + // A[j] = x; // store + // y = A[j]; // load + // } + // } + // In this case, we do have a cycle: + // A[j]^s_i -> A[j]^l_i + // A[j]^l_i -> A[j]^s_{i+1} + // However, this cycle does not prohibit deleting the load, + // replacing it with `y = x`. + // This still holds true if the load were a second store: + // for (i : I){ + // for (j : J){ + // A[j] = x; // store + // A[j] = y; // load + // } + // } + // We could stick with the single `y` store. + // Thus, for eliminating memory operations at a depth of 2, + // we are only concerned with dependencies still valid at a depth of 2. + // for (int i = 0 : i < I; ++i){ + // x[i] /= U[i,i]; + // for (int j = i+1; j < I; ++j){ + // x[j] -= x[i]*U[i,j]; + // } + // } + // Maybe just do the dumb thing? + // Walk the graph for addr costs, and at the same time, + // check the addr for eliminability, checking against what we've stored thus + // far. + // We currently do not store load-load edges, which is why only checking + // edge relationships is not ideal. + // We may store load-load edges in the future, as these could be used as + // part of the cost function of the linear program, i.e. we'd want to + // minimize the distance between loads (but allow reordering them). + // + // I think a reasonable approach is: + // Have a map from array pointer to Addr. Addrs form a chain. + // as we walk the graph, add each newly encountered addr to the front of the + // chain and check if we can eliminate it, or any of its predecessors. + // + // Note (bracketed means we might be able to eliminate): + // Read->[Read] could eliminate read + // Read->Write no change + // Write->[Read] can forward written value + // [Write]->Write can eliminate first write + // Thus, we can fuse this pass with our address cost calculation. + // We check if we can eliminate before calculating the new cost. + // The only case where we may remove an old value, write->write, + // we could just take the old cost and assign it to the new write. + // TODO: if we have only writes to a non-escaping array, we should + // be able to eliminate these writes too, and then also potentially + // remove that array temporary (e.g., if it were malloc'd). + // E.g. check if the array is a `llvm::isNonEscapingLocalObject` and + // allocated by `llvm::isRemovableAlloc`. + void removeRedundantAddr(IR::AddrChain addr) { + // outputEdges are sorted topologically from first to last. + // Example: + // for (int i = 0; i < I; ++i){ + // acc = x[i]; // Statement: 0 + // for (int j = 0; j < i; ++j){ + // acc -= x[j]*U[j,i]; // Statement: 1 + // } + // x[i] = acc; // Statement: 2 + // x[i] = x[i] / U[i,i]; // Statement: 3 + // } + // Here, we have a lot of redundant edges connecting the various `x[i]`s. + // We also have output edges between the `x[i]` and the `x[j]` load in + // statement 1. It is, however, satisfied at `x[i]`'s depth, and ignored. + // So, what would happen here: + // S0R->S2W, no change; break. + // S2W->S3R, replace read with stored value forwarding. + // S2W->S3W, remove S2W as it is shadowed by S3W. + // NOTE: we rely on the `ListRange` iterator supporting safely removing the + // current iter from the list. + for (IR::Addr *a : addr.getAddr()) eliminateAddr(a); + } + /// `sortEdges` sorts each `Addr`'s output edges + /// So that each `Addr`'s output edges are sorted based on the + /// topological ordering of the outputs. + /// The approach to sorting edges is to iterate through nodes backwards + /// whenever we encounter an `Addr`, we push it to the front of each + /// output edge list to which it belongs. + /// We also assigning each `Addr` an order by decrementing an integer each + /// time we encounter one. This is also necessary for Addr elimination, as we + /// want to find the first topologically greater Addr. + // NOLINTNEXTLINE(misc-no-recursion) + auto sortEdges(IR::Loop *R, int32_t pos) -> int32_t { + for (IR::Node *n = R->getLast(); n != R; n = n->getPrev()) { + if (auto *L = llvm::dyn_cast(n)) { + pos = sortEdges(L, pos); + continue; + } + auto *a = llvm::dyn_cast(n); + if (!a) continue; + a->setTopPosition(pos--); + // for each input edge, we push `a` to the front of the output list + for (int32_t id : a->inputEdgeIDs(deps)) { + if (deps.prevOut(Dependence::ID{id}) < 0) continue; + deps.removeOutEdge(id); + IR::Addr *b = deps.input(Dependence::ID{id}); + int32_t oldFirst = b->getEdgeOut(); + deps.prevOut(Dependence::ID{oldFirst}) = id; + deps.prevOut(Dependence::ID{id}) = -1; + deps.nextOut(Dependence::ID{id}) = oldFirst; + b->setEdgeOut(id); + } + } + return pos; + } + void findReductions(IR::AddrChain addr) { + for (IR::Addr *a : addr.getAddr()) a->maybeReassociableReduction(deps); + }; + +public: + IROptimizer(IR::Dependencies &deps, IR::Cache &instr, + dict::set &loopBBs, + dict::set &eraseCandidates_, IR::Loop *root, + Arena<> *lalloc, lp::LoopBlock::OptimizationResult res) + : deps{deps}, instructions{instr}, LBBs{loopBBs}, + eraseCandidates{eraseCandidates_}, root_{root}, lalloc_{lalloc} { + sortEdges(root_, 0); + removeRedundantAddr(res.addr); + unsigned numAddr = eliminateTemporaries(res.addr); + findReductions(res.addr); + loopDeps = loopDepSats(lalloc, deps, res); + /// TODO: legality check + // plan now is to have a `BitArray` big enough to hold `numLoops` entries + // and `numAddr` rows; final axis is contiguous vs non-contiguous + // Additionally, we will have a vector of unroll strategies to consider + // LoopDependencies *ld = LoopDependencies::create(lalloc_, numLoops, + // numAddr); + } +}; + +// +// Considering reordering legality, example +// for (int i = 0: i < I; ++i){ +// for (int j = 0 : j < i; ++j){ +// x[i] -= x[j]*U[j,i]; +// } +// x[i] /= U[i,i]; +// } +// We have an edge from the store `x[i] = x[i] / U[i,i]` to the load of +// `x[j]`, when `j = ` the current `i`, on some future iteration. +// We want to unroll; +// for (int i = 0: i < I-3; i += 4){ +// for (int j = 0 : j < i; ++j){ +// x[i] -= x[j]*U[j,i]; +// x[i+1] -= x[j]*U[j,i+1]; +// x[i+2] -= x[j]*U[j,i+2]; +// x[i+3] -= x[j]*U[j,i+3]; +// } +// x[i] /= U[i,i]; // store 0 +// { // perform unrolled j = i iter +// int j = i; // these all depend on store 0 +// x[i+1] -= x[j]*U[j,i+1]; +// x[i+2] -= x[j]*U[j,i+2]; +// x[i+3] -= x[j]*U[j,i+3]; +// } +// // j+1 iteration for i=i iter goes here (but doesn't happen) +// x[i+1] /= U[i+1,i+1]; // store 1 +// { // perform unrolled j = i + 1 iter +// int j = i+1; // these all depend on store 1 +// x[i+2] -= x[j]*U[j,i+2]; +// x[i+3] -= x[j]*U[j,i+3]; +// } +// // j+2 iteration for i=i iter goes here (but doesn't happen) +// // j+2 iteration for i=i+1 iter goes here (but doesn't happen) +// x[i+2] /= U[i+2,i+2]; // store 2 +// { // perform unrolled j = i + 2 iter +// int j = i+2; // this depends on store 2 +// x[i+3] -= x[j]*U[j,i+3]; +// } +// // j+3 iteration for i=i iter goes here (but doesn't happen) +// // j+3 iteration for i=i+1 iter goes here (but doesn't happen) +// // j+3 iteration for i=i+2 iter goes here (but doesn't happen) +// x[i+3] /= U[i+3,i+3]; +// } +// The key to legality here is that we peel off the dependence polyhedra +// from the loop's iteration space. +// We can then perform the dependent iterations in order. +// With masking, the above code can be vectorized in this manner. +// The basic approach is that we have the dependence polyhedra: +// +// 0 <= i_s < I +// 0 <= i_l < I +// 0 <= j_l < i_l +// i_s = j_l // dependence, yields same address in `x` +// +// Note that our schedule sets +// i_s = i_l +// Which gives: +// i_l = i_s = j_l < i_l +// a contradiction, meaning that the dependency is +// conditionally (on our schedule) satisfied. +// Excluding the `i_s = i_l` constraint from the +// polyhedra gives us the region of overlap. +// +// When unrolling by `U`, we get using `U=4` as an example: +// i^0_s + 1 = i^1_s +// i^0_s + 2 = i^2_s +// i^0_s + 3 = i^3_s +// 0 <= i^0_s < I +// 0 <= i^1_s < I +// 0 <= i^2_s < I +// 0 <= i^3_s < I +// 0 <= i^0_l < I +// 0 <= i^1_l < I +// 0 <= i^2_l < I +// 0 <= i^3_l < I +// 0 <= j_l < i^0_l +// 0 <= j_l < i^1_l +// 0 <= j_l < i^2_l +// 0 <= j_l < i^3_l +// i^0_s = j_l || i^1_s = j_l || i^2_s = j_l || i^3_s = j_l +// where the final union can be replaced with +// i^0_s = j_l || i^0_s+1 = j_l || i^0_s+2 = j_l || i^0_s+3 = j_l +// i^0_s <= j_1 <= i^0_s+3 +// +// Similarly, we can compress the other inequalities... +// 0 <= i^0_s < I - 3 +// 0 <= i^0_l < I - 3 +// 0 <= j_l < i^0_l +// i^0_s <= j_1 <= i^0_s+3 // dependence region +// +// So, the parallel region is the union +// i^0_s > j_1 || j_1 > i^0_s+3 +// +// In this example, note that the region `j_1 > i^0_s+3` is empty +// so we have one parallel region, and then one serial region. +// +// Lets consider simpler checks. We have +// [ 1 0 ] : x[i] -= +// [ 0 1 ] : x[j] +// [ 1 ] : x[i] /= +// we have a dependency when `i == j`. `i` carries the dependency, but we can +// peel off the independent iters from `j`, and unroll `i` for these. +// +// How to identify: +// [ 1 -1 ] +// vs, if we had two `x[i]` or two `x[j]` +// [ 0, 0 ] +// An idea: look for non-zero so we can peel? +// Or should we look specifically for `x[i] == x[j]` type pattern? +// E.g., if we had +// [ i, j, k, l ] +// [ 2, -1, 2, -1 ] +// we'd need a splitting algorithm. +// E.g., split on the 2nd loop, so we get `j == 2*i + 2*k - l` +// With this, we'd split iterations into groups +// j < 2*i + 2*k - l +// j == 2*i + 2*k - l +// j > 2*i + 2*k - l +// Subsetting the `k` and `l` iteration spaces may be a little annoying, +// so we may initially want to restrict ourselves to peeling the innermost loop. +/// +/// Optimize the schedule +inline void optimize(IR::Dependencies deps, IR::Cache &instr, + dict::set &loopBBs, + dict::set &eraseCandidates, + Arena<> *lalloc, lp::LoopBlock::OptimizationResult res) { + // we must build the IR::Loop + // Initially, to help, we use a nested vector, so that we can index into it + // using the fusion omegas. We allocate it with the longer lived `instr` + // alloc, so we can checkpoint it here, and use alloc for other IR nodes. + // The `instr` allocator is more generally the longer lived allocator, + // as it allocates the actual nodes. + + IR::Loop *root = addAddrToGraph(instr.getAllocator(), lalloc, res.nodes); + buildGraph(deps, root); + // `N` is the head of the topologically sorted graph + // We now try to remove redundant memory operations + + IROptimizer(deps, instr, loopBBs, eraseCandidates, root, lalloc, res); +} + +/* +// NOLINTNEXTLINE(misc-no-recursion) +inline auto printSubDotFile(Arena<> *alloc, llvm::raw_ostream &out, + map &names, + llvm::SmallVectorImpl &addrNames, + unsigned addrIndOffset, poly::Loop *lret) +-> poly::Loop * { +poly::Loop *loop{nullptr}; +size_t j = 0; +for (auto *addr : header.getAddr()) loop = addr->getAffLoop(); +for (auto &subTree : subTrees) { + // `names` might realloc, relocating `names[this]` + if (getDepth()) + names[subTree.subTree] = names[this] + "SubLoop#" + std::to_string(j++); + else names[subTree.subTree] = "LoopNest#" + std::to_string(j++); + if (loop == nullptr) + for (auto *addr : subTree.exit.getAddr()) loop = addr->getAffLoop(); + loop = subTree.subTree->printSubDotFile(alloc, out, names, addrNames, + addrIndOffset, loop); +} +const std::string &name = names[this]; +out << "\"" << name + << "\" [shape=plain\nlabel = <\n"; +size_t i = header.printDotNodes(out, 0, addrNames, addrIndOffset, name); +j = 0; +std::string loopEdges; +for (auto &subTree : subTrees) { + std::string label = "f" + std::to_string(++i); + out << " \n"; + loopEdges += "\"" + name + "\":f" + std::to_string(i) + " -> \"" + + names[subTree.subTree] + "\":f0 [color=\"#ff0000\"];\n"; + i = subTree.exit.printDotNodes(out, i, addrNames, addrIndOffset, name); +} +out << "
"; +// assert(depth == 0 || (loop != nullptr)); +if (loop && (getDepth() > 0)) { + for (size_t i = loop->getNumLoops(), k = getDepth(); i > k;) + loop = loop->removeLoop(alloc, --i); + loop->pruneBounds(alloc); + loop->printBounds(out); +} else out << "Top Level"; +out << "
SubLoop#" << j++ + << "
>];\n" << loopEdges; +if (lret) return lret; +if ((loop == nullptr) || (getDepth() <= 1)) return nullptr; +return loop->removeLoop(alloc, getDepth() - 1); +} + +inline void printDotFile(Arena<> *alloc, llvm::raw_ostream &out) { +map names; +llvm::SmallVector addrNames(numAddr_); +names[this] = "toplevel"; +out << "digraph LoopNest {\n"; +auto p = alloc.scope(); +printSubDotFile(alloc, out, names, addrNames, subTrees.size(), nullptr); +printDotEdges(out, addrNames); +out << "}\n"; +} +*/ +// class LoopForestSchedule : LoopTreeSchedule { +// [[no_unique_address]] Arena<> *allocator; +// }; +} // namespace CostModeling + +namespace IR { + +inline void Loop::setLegality(CostModeling::LoopDepSatisfaction &deps) { + for (int32_t did : deps.dependencyIDs(this)) + if (!legality.update(deps.deps, this, did)) break; +} + +} // namespace IR +} // namespace poly diff --git a/include/Optimize/Legality.hpp b/include/Optimize/Legality.hpp new file mode 100644 index 000000000..b84b8c0d7 --- /dev/null +++ b/include/Optimize/Legality.hpp @@ -0,0 +1,196 @@ +#pragma once +#ifndef POLY_LEGALITY_HPP_INCLUDED +#define POLY_LEGALITY_HPP_INCLUDED + +#include + +namespace poly { +namespace IR { +class Loop; +class Addr; +}; // namespace IR +namespace poly { +struct Dependence; +class Dependencies; +}; // namespace poly +namespace CostModeling { + +struct LoopDepSatisfaction; + +// If a loop doesn't carry a dependency, it is legal +// If a loop does carry a dependency, we can still consider +// unrolling and vectorization if at least one of: +// - that depenedncy is a reassociable reduction +// - the overlap is for a bounded number of iters, in which case we can peel +// Contains: +// - `getReduction()` enum indicating +// none vs unordered vs ordered +// - `minDistance()`, indicates the minimum distance +// between dependent loop iterations. +// for (ptrdiff_t i; i::max()}; + // uint8_t maxdistance{0}; + uint16_t ordered_reduction_count{0}; + uint16_t unordered_reduction_count{0}; + bool reorderable{true}; + // uint8_t illegalFlag{0}; + +public: + // [[nodiscard]] constexpr auto minDistance() const -> uint16_t { + // return mindistance; + // } + // [[nodiscard]] constexpr auto maxDistance() const -> uint16_t { + // return maxdistance; + // } + // [[nodiscard]] constexpr auto noUnroll() const -> bool { + // return illegalFlag & uint8_t(Illegal::Unroll); + // } + // [[nodiscard]] constexpr auto canUnroll() const -> bool { return + // !noUnroll(); } + constexpr auto operator&=(Legality other) -> Legality & { + ordered_reduction_count += other.ordered_reduction_count; + unordered_reduction_count += other.unordered_reduction_count; + // mindistance = std::min(mindistance, other.mindistance); + // maxdistance = std::max(maxdistance, other.maxdistance); + peelFlag |= other.peelFlag; + // illegalFlag |= other.illegalFlag; + return *this; + } + constexpr auto operator=(const Legality &) -> Legality & = default; + [[nodiscard]] constexpr auto operator&(Legality other) const -> Legality { + Legality l{*this}; + return l &= other; + } + constexpr Legality() = default; + constexpr Legality(const Legality &) = default; + Legality(LoopDepSatisfaction &deps, IR::Loop *L); + // deeperAccess(const poly::Dependencies &deps, IR::Loop *L, IR::Addr *in) + // are any of the outputs of `in` in a subloop of `L` + // static auto deeperAccess(const poly::Dependencies &deps, IR::Loop *L, + // IR::Addr *in) -> bool { + // return std::ranges::any_of(in->outputEdgeIDs(deps), + // [&](int32_t id) -> bool { + // IR::Addr *a = + // deps.output(Dependence::ID{id}); return + // (a->getLoop() != L) && L->contains(a); + // }); + // } + // inline auto anyInteriorDependents(IR::Loop *L, IR::Addr *out) -> bool { + // return std::ranges::any_of(out->outputEdgeIDs(*this), + // [&](int32_t i) -> bool { + // IR::Addr *a = output(Dependence::ID{i}); + // return (a->getLoop() != L) && + // L->contains(a); + // }); + // } + + // inline auto anyInteriorDependencies(IR::Loop *L, IR::Addr *in) -> bool { + + // return std::ranges::any_of(in->inputEdgeIDs(*this), [&](int32_t i) -> + // bool { + // IR::Addr *a = input(Dependence::ID{i}); + // return (a->getLoop() != L) && L->contains(a); + // }); + // } + auto update(poly::Dependencies &deps, IR::Loop *L, int32_t did) -> bool; + constexpr auto numReductions() const -> uint16_t { + uint16_t numReduct; + if (__builtin_add_overflow(ordered_reduction_count, + unordered_reduction_count, &numReduct)) + return std::numeric_limits::max(); + return numReduct; + } +}; +static_assert(sizeof(Legality) == 8); +} // namespace CostModeling +} // namespace poly +#endif // POLY_LEGALITY_HPP_INCLUDED diff --git a/include/Optimize/RegisterFile.hpp b/include/Optimize/RegisterFile.hpp new file mode 100644 index 000000000..3ede8b7d6 --- /dev/null +++ b/include/Optimize/RegisterFile.hpp @@ -0,0 +1,81 @@ +#pragma once +#ifndef RegisterFile_hpp_INCLUDED +#define RegisterFile_hpp_INCLUDED + +#include +#include + +namespace poly::RegisterFile { +// returns vector width in bytes, ignoring mprefer-vector-width +inline auto estimateMaximumVectorWidth(llvm::LLVMContext &C, + const llvm::TargetTransformInfo &TTI) + -> uint8_t { + uint8_t twiceMaxVectorWidth = 2; + auto *f32 = llvm::Type::getFloatTy(C); + llvm::InstructionCost prevCost = TTI.getArithmeticInstrCost( + llvm::Instruction::FAdd, + llvm::FixedVectorType::get(f32, twiceMaxVectorWidth)); + while (true) { + llvm::InstructionCost nextCost = TTI.getArithmeticInstrCost( + llvm::Instruction::FAdd, + llvm::FixedVectorType::get(f32, twiceMaxVectorWidth *= 2)); + if (nextCost > prevCost) break; + prevCost = nextCost; + } + return 2 * twiceMaxVectorWidth; +} + +class CPURegisterFile { + uint8_t maximumVectorWidth; + uint8_t numVectorRegisters; + uint8_t numGeneralPurposeRegisters; + uint8_t numPredicateRegisters; + +#if defined(__x86_64__) + // hacky check for has AVX512 + static inline auto hasAVX512(llvm::LLVMContext &C, + const llvm::TargetTransformInfo &TTI) -> bool { + return TTI.isLegalMaskedExpandLoad( + llvm::FixedVectorType::get(llvm::Type::getDoubleTy(C), 8)); + } +#else + // assume we're not cross-compiling to x64 from some other arch to reduce the + // risk of false positives + static constexpr hasAVX512(llvm::LLVMContext &, + const llvm::TargetTransformInfo &) + ->bool { + return false; + } +#endif + + static auto estimateNumPredicateRegisters( + llvm::LLVMContext &C, const llvm::TargetTransformInfo &TTI) -> uint8_t { + if (TTI.supportsScalableVectors()) return 8; + // hacky check for AVX512 + if (hasAVX512(C, TTI)) return 7; // 7, because k0 is reserved for unmasked + return 0; + } + +public: + CPURegisterFile(llvm::LLVMContext &C, const llvm::TargetTransformInfo &TTI) { + maximumVectorWidth = estimateMaximumVectorWidth(C, TTI); + numVectorRegisters = TTI.getNumberOfRegisters(true); + numGeneralPurposeRegisters = TTI.getNumberOfRegisters(false); + numPredicateRegisters = estimateNumPredicateRegisters(C, TTI); + } + [[nodiscard]] constexpr auto getNumVectorBits() const -> uint8_t { + return maximumVectorWidth; + } + [[nodiscard]] constexpr auto getNumVector() const -> uint8_t { + return numVectorRegisters; + } + [[nodiscard]] constexpr auto getNumScalar() const -> uint8_t { + return numGeneralPurposeRegisters; + } + [[nodiscard]] constexpr auto getNumPredicate() const -> uint8_t { + return numPredicateRegisters; + } +}; + +} // namespace poly::RegisterFile +#endif // RegisterFile_hpp_INCLUDED diff --git a/include/Polyhedra/Comparators.hpp b/include/Polyhedra/Comparators.hpp index 829de427e..306d64033 100644 --- a/include/Polyhedra/Comparators.hpp +++ b/include/Polyhedra/Comparators.hpp @@ -1,6 +1,8 @@ #pragma once #include "Utilities/Optional.hpp" +#include +#include #include #include #include @@ -9,13 +11,10 @@ #include #include #include -#include #include #include -#include #include #include -#include namespace poly::comparator { using math::PtrVector, math::MutPtrVector, math::Vector, math::_, math::Row, @@ -24,7 +23,7 @@ using math::PtrVector, math::MutPtrVector, math::Vector, math::_, math::Row, math::NormalForm::simplifySystemsImpl, math::NormalForm::solveSystem, math::StridedVector, math::vector, math::matrix, math::identity, math::Simplex, math::DenseDims, math::DenseMatrix; -using utils::invariant, utils::Arena, utils::Optional; +using utils::invariant, alloc::Arena, utils::Optional; // For `== 0` constraints struct EmptyComparator { static constexpr auto getNumConstTerms() -> ptrdiff_t { return 0; } @@ -126,9 +125,9 @@ template struct BaseComparator { PtrVector y) const -> bool { const ptrdiff_t N = getNumConstTerms(); - assert(delta.size() >= N); - assert(x.size() >= N); - assert(y.size() >= N); + invariant(delta.size() >= N); + invariant(x.size() >= N); + invariant(y.size() >= N); for (ptrdiff_t n = 0; n < N; ++n) delta[n] = x[n] - y[n]; return static_cast(this)->greaterEqual(delta); } @@ -145,8 +144,8 @@ template struct BaseComparator { [[nodiscard]] constexpr auto greater(PtrVector x, PtrVector y) const -> bool { const ptrdiff_t N = getNumConstTerms(); - assert(N <= x.size()); - assert(N <= y.size()); + invariant(N <= x.size()); + invariant(N <= y.size()); Vector delta(N); for (ptrdiff_t n = 0; n < N; ++n) delta[n] = x[n] - y[n]; --delta[0]; @@ -170,7 +169,7 @@ template struct BaseComparator { [[nodiscard]] constexpr auto lessEqual(MutPtrVector x) const -> bool { const ptrdiff_t N = getNumConstTerms(); - assert(N <= x.size()); + invariant(N <= x.size()); for (ptrdiff_t n = 0; n < N; ++n) x[n] *= -1; bool ret = static_cast(this)->greaterEqual(x); for (ptrdiff_t n = 0; n < N; ++n) x[n] *= -1; @@ -178,7 +177,7 @@ template struct BaseComparator { } [[nodiscard]] constexpr auto lessEqual(PtrVector x) const -> bool { const ptrdiff_t N = getNumConstTerms(); - assert(N <= x.size()); + invariant(N <= x.size()); Vector y{x[_(0, N)]}; return lessEqual(y); } @@ -193,13 +192,13 @@ template struct BaseComparator { [[nodiscard]] constexpr auto lessEqual(PtrVector x, int64_t y) const -> bool { const ptrdiff_t N = getNumConstTerms(); - assert(N <= x.size()); + invariant(N <= x.size()); Vector z{x[_(0, N)]}; return lessEqual(z, y); } [[nodiscard]] constexpr auto less(MutPtrVector x) const -> bool { const ptrdiff_t N = getNumConstTerms(); - assert(N <= x.size()); + invariant(N <= x.size()); int64_t x0 = x[0]; x[0] = -x0 - 1; for (ptrdiff_t i = 1; i < N; ++i) x[i] *= -1; @@ -210,7 +209,7 @@ template struct BaseComparator { } [[nodiscard]] constexpr auto less(PtrVector x) const -> bool { const ptrdiff_t N = getNumConstTerms(); - assert(N <= x.size()); + invariant(N <= x.size()); Vector y{x[_(0, N)]}; return less(y); } @@ -223,7 +222,7 @@ template struct BaseComparator { [[nodiscard]] constexpr auto greater(PtrVector x) const -> bool { // TODO: avoid this needless memcopy and (possible) allocation? const ptrdiff_t N = getNumConstTerms(); - assert(N <= x.size()); + invariant(N <= x.size()); Vector xm{x[_(0, N)]}; return greater(math::view(xm)); } @@ -237,8 +236,8 @@ template struct BaseComparator { PtrVector y) const -> bool { const ptrdiff_t N = getNumConstTerms(); - assert(x.size() >= N); - assert(y.size() >= N); + invariant(x.size() >= N); + invariant(y.size() >= N); if (x[_(0, N)] == y[_(0, N)]) return true; Vector delta{x[_(0, N)] - y[_(0, N)]}; return equal(delta); @@ -264,8 +263,8 @@ concept Comparator = requires(T t, PtrVector x, int64_t y) { template struct BaseSymbolicComparator : BaseComparator> { - [[no_unique_address]] unsigned int numVar{0}; - [[no_unique_address]] unsigned int numEquations{0}; + [[no_unique_address]] ptrdiff_t numVar{0}; + [[no_unique_address]] ptrdiff_t numEquations{0}; using ThisT = BaseSymbolicComparator; using BaseT = BaseComparator; using BaseT::greaterEqual; @@ -291,16 +290,16 @@ struct BaseSymbolicComparator : BaseComparator> { [[nodiscard]] constexpr auto getD() const -> PtrVector { return static_cast(this)->getDImpl(); } - constexpr auto getV(Row r, Col c) -> MutDensePtrMatrix { + constexpr auto getV(Row<> r, Col<> c) -> MutDensePtrMatrix { return static_cast(this)->getVImpl(r, c); } - constexpr auto getU(Row r, Col c) -> MutDensePtrMatrix { + constexpr auto getU(Row<> r, Col<> c) -> MutDensePtrMatrix { return static_cast(this)->getUImpl(r, c); } - constexpr auto getD(Row n) -> MutPtrVector { + constexpr auto getD(Row<> n) -> MutPtrVector { return static_cast(this)->getDImpl(n); } - constexpr void setURank(Row r) { static_cast(this)->setURankImpl(r); } + constexpr void setURank(Row<> r) { static_cast(this)->setURankImpl(r); } [[nodiscard]] constexpr auto getURank() const -> ptrdiff_t { return static_cast(this)->getURankImpl(); } @@ -320,14 +319,14 @@ struct BaseSymbolicComparator : BaseComparator> { const ptrdiff_t numConExplicit = ptrdiff_t(A.numRow()) + 1; const ptrdiff_t numConTotal = numConExplicit + numNonNegative; numVar = ptrdiff_t(A.numCol()); - Row rowV = Row{numVar + numConTotal}; - Col colV = Col{2 * numConTotal}; + Row rowV = Row<>{numVar + numConTotal}; + Col colV = Col<>{2 * numConTotal}; /// B.size() == (A.numCol() + A.numRow() + 1 + numNonNegative) x /// (2 * (A.numRow() + 1 + numNonNegative)) /// auto B = getV(rowV, colV); - std::fill_n(B.begin(), B.numRow() * B.numCol(), 0); - B(0, 0) = 1; + std::fill_n(B.begin(), ptrdiff_t(B.numRow()) * ptrdiff_t(B.numCol()), 0); + B[0, 0] = 1; // B = [ A_0 A_1 // 0 I ] // V = [B' 0 @@ -335,12 +334,12 @@ struct BaseSymbolicComparator : BaseComparator> { // V = [A_0' 0 0 // A_1' I 0 // S_0 S_1 I] - B(_(begin, numVar), _(1, numConExplicit)) << A.transpose(); + B[_(begin, numVar), _(1, numConExplicit)] << A.t(); for (ptrdiff_t j = 0; j < numNonNegative; ++j) - B(j + numVar - numNonNegative, numConExplicit + j) = 1; + B[j + numVar - numNonNegative, numConExplicit + j] = 1; for (ptrdiff_t j = 0; j < numConTotal; ++j) { - B(j + numVar, j) = -1; - B(j + numVar, j + numConTotal) = 1; + B[j + numVar, j] = -1; + B[j + numVar, j + numConTotal] = 1; } numEquations = numConTotal; initCore(alloc); @@ -354,11 +353,11 @@ struct BaseSymbolicComparator : BaseComparator> { const ptrdiff_t numInEqConTotal = numInEqConExplicit + numNonNegative; const ptrdiff_t numEqCon = ptrdiff_t(E.numRow()); numVar = ptrdiff_t(A.numCol()); - Row rowV = Row{numVar + numInEqConTotal}; - Col colV = Col{2 * numInEqConTotal + numEqCon}; + Row rowV = Row<>{numVar + numInEqConTotal}; + Col colV = Col<>{2 * numInEqConTotal + numEqCon}; auto B = getV(rowV, colV); - std::fill_n(B.begin(), B.numRow() * B.numCol(), 0); - B(0, 0) = 1; + std::fill_n(B.begin(), ptrdiff_t(B.numRow()) * ptrdiff_t(B.numCol()), 0); + B[0, 0] = 1; // B is `A` augmented with the implicit non-negative constraints // B = [ A_0 A_1 // 0 I ] @@ -368,17 +367,17 @@ struct BaseSymbolicComparator : BaseComparator> { // A_1' I E_1' 0 // S_0 S_1 0 I] numEquations = numInEqConTotal + numEqCon; - B(_(begin, numVar), _(1, numInEqConExplicit)) << A.transpose(); - B(_(begin, numVar), _(numInEqConTotal, numInEqConTotal + numEqCon)) - << E.transpose(); + B[_(begin, numVar), _(1, numInEqConExplicit)] << A.t(); + B[_(begin, numVar), _(numInEqConTotal, numInEqConTotal + numEqCon)] + << E.t(); if (numNonNegative) - B(_(numVar - numNonNegative, numVar), - _(numInEqConExplicit, numInEqConExplicit + numNonNegative)) + B[_(numVar - numNonNegative, numVar), + _(numInEqConExplicit, numInEqConExplicit + numNonNegative)] .diag() << 1; for (ptrdiff_t j = 0; j < numInEqConTotal; ++j) { - B(j + numVar, j) = -1; - B(j + numVar, j + numEquations) = 1; + B[j + numVar, j] = -1; + B[j + numVar, j + numEquations] = 1; } initCore(alloc); } @@ -386,10 +385,11 @@ struct BaseSymbolicComparator : BaseComparator> { [[nodiscard]] static constexpr auto memoryNeededNonNegative(PtrMatrix A, EmptyMatrix, ptrdiff_t numNonNegative) -> ptrdiff_t { - return memoryNeededImpl(A.numRow(), A.numCol(), Row{0}, ++numNonNegative); + return memoryNeededImpl(A.numRow(), A.numCol(), Row<>{0}, ++numNonNegative); } [[nodiscard]] inline static constexpr auto - memoryNeededImpl(Row Ar, Col Ac, Row Er, ptrdiff_t numPos) -> ptrdiff_t { + memoryNeededImpl(Row<> Ar, Col<> Ac, Row<> Er, ptrdiff_t numPos) + -> ptrdiff_t { // alternative: ptrdiff_t numInEqConTotal = ptrdiff_t(Ar) + numPos; ptrdiff_t colV = (numInEqConTotal << 1) + ptrdiff_t(Er); @@ -399,7 +399,7 @@ struct BaseSymbolicComparator : BaseComparator> { [[nodiscard]] static constexpr auto memoryNeededNonNegative(PtrMatrix A, ptrdiff_t numNonNegative) -> ptrdiff_t { - return memoryNeededImpl(A.numRow(), A.numCol(), Row{0}, ++numNonNegative); + return memoryNeededImpl(A.numRow(), A.numCol(), Row<>{0}, ++numNonNegative); } [[nodiscard]] static constexpr auto memoryNeededNonNegative(PtrMatrix A, PtrMatrix E, @@ -410,11 +410,11 @@ struct BaseSymbolicComparator : BaseComparator> { [[nodiscard]] static constexpr auto memoryNeeded(PtrMatrix A, EmptyMatrix, bool pos0) -> ptrdiff_t { - return memoryNeededImpl(A.numRow(), A.numCol(), Row{0}, pos0); + return memoryNeededImpl(A.numRow(), A.numCol(), Row<>{0}, pos0); } [[nodiscard]] static constexpr auto memoryNeeded(PtrMatrix A, bool pos0) -> ptrdiff_t { - return memoryNeededImpl(A.numRow(), A.numCol(), Row{0}, pos0); + return memoryNeededImpl(A.numRow(), A.numCol(), Row<>{0}, pos0); } [[nodiscard]] static constexpr auto memoryNeeded(PtrMatrix A, PtrMatrix E, @@ -425,17 +425,17 @@ struct BaseSymbolicComparator : BaseComparator> { bool pos0) { const ptrdiff_t numCon = ptrdiff_t(A.numRow()) + pos0; numVar = ptrdiff_t(A.numCol()); - Row rowV = numVar + numCon; - Col colV = 2 * numCon; + Row<> rowV = {numVar + numCon}; + Col<> colV = {2 * numCon}; auto B = getV(rowV, colV); - std::fill_n(B.begin(), B.numRow() * B.numCol(), 0); - B(0, 0) = pos0; + std::fill_n(B.begin(), ptrdiff_t(B.numRow()) * ptrdiff_t(B.numCol()), 0); + B[0, 0] = pos0; // V = [A' 0 // S I] - B(_(begin, numVar), _(pos0, numCon)) << A.transpose(); + B[_(begin, numVar), _(pos0, numCon)] << A.t(); for (ptrdiff_t j = 0; j < numCon; ++j) { - B(j + numVar, j) = -1; - B(j + numVar, j + numCon) = 1; + B[j + numVar, j] = -1; + B[j + numVar, j + numCon] = 1; } numEquations = numCon; initCore(alloc); @@ -449,21 +449,21 @@ struct BaseSymbolicComparator : BaseComparator> { const ptrdiff_t numInEqCon = ptrdiff_t(A.numRow()) + pos0; numVar = ptrdiff_t(A.numCol()); const ptrdiff_t numEqCon = ptrdiff_t(E.numRow()); - Row rowV = Row{numVar + numInEqCon}; - Col colV = Col{2 * numInEqCon + numEqCon}; + Row rowV = Row<>{numVar + numInEqCon}; + Col colV = Col<>{2 * numInEqCon + numEqCon}; auto B = getV(rowV, colV); B << 0; // V = [A' E' 0 // S 0 I] - B(0, 0) = pos0; - B(_(begin, numVar), _(pos0, numInEqCon)) << A.transpose(); - // A(_, _(pos0, end)).transpose(); - B(_(begin, numVar), _(numInEqCon, numInEqCon + numEqCon)) << E.transpose(); + B[0, 0] = pos0; + B[_(begin, numVar), _(pos0, numInEqCon)] << A.t(); + // A(_, _(pos0, end)).t(); + B[_(begin, numVar), _(numInEqCon, numInEqCon + numEqCon)] << E.t(); numEquations = numInEqCon + numEqCon; for (ptrdiff_t j = 0; j < numInEqCon; ++j) { - B(j + numVar, j) = -1; - B(j + numVar, j + numEquations) = 1; + B[j + numVar, j] = -1; + B[j + numVar, j + numEquations] = 1; } initCore(alloc); } @@ -477,7 +477,7 @@ struct BaseSymbolicComparator : BaseComparator> { U.diag() << 1; // We will have query of the form Ax = q; simplifySystemsImpl({B, U}); - while ((R) && allZero(B(R - 1, _))) --R; + while ((R) && allZero(B[ptrdiff_t(R) - 1, _])) --R; setURank(R); ptrdiff_t numColB = ptrdiff_t(B.numCol()); // upper bounded by numVar + numInEq x numVar + numInEq @@ -490,15 +490,15 @@ struct BaseSymbolicComparator : BaseComparator> { auto Vt{identity(alloc, numColB)}; // Ht.numRow() > Ht.numCol() = R // (2*numInEq + numEq) x R - auto Ht = matrix(alloc, Row{numColB}, Col{ptrdiff_t(R)}); - Ht << B(_(0, R), _).transpose(); + auto Ht = matrix(alloc, Row<>{numColB}, Col<>{ptrdiff_t(R)}); + Ht << B[_(0, R), _].t(); solveSystem(Ht, Vt); // upper bounded by numVar + numInEq // rows/cols, but of rank R // smaller based on rank getD(R) << Ht.diag(); // d.size() == R // upper bounded by 2*numInEq + numEq x 2*numInEq + numEq - getV() << Vt.transpose(); + getV() << Vt.t(); } // Note that this is only valid when the comparator was constructed @@ -507,26 +507,21 @@ struct BaseSymbolicComparator : BaseComparator> { auto V = getV(); auto U = getU(); auto d = getD(); - StridedVector b{U(_, 0)}; + StridedVector b{U[_, 0]}; if (d.empty()) { if (!allZero(b[_(V.numRow(), end)])) return false; Col oldn = V.numCol(); - auto H{matrix(&alloc, V.numRow(), oldn + 1)}; + auto H{matrix(&alloc, V.numRow(), ++auto{oldn})}; // IntMatrix H{V.numRow(), oldn + 1}; - H(_, _(0, oldn)) << V; - H(_, oldn) << -b; + H[_, _(0, oldn)] << V; + H[_, oldn] << -b; solveSystem(H); - bool ret = true; for (ptrdiff_t i = numEquations; i < H.numRow(); ++i) - if (auto rhs = H(i, oldn)) - if ((rhs > 0) != (H(i, i) > 0)) { - ret = false; - break; - } - return ret; + if ((H[i, oldn] > 0) != (H[i, i] > 0)) return false; + return true; } // Column rank deficient case - Row numSlack = V.numRow() - numEquations; + Row numSlack = Row<>{ptrdiff_t(V.numRow()) - numEquations}; // Vector dinv = d; // copy // We represent D martix as a vector, and multiply the lcm to the // linear equation to avoid store D^(-1) as rational type @@ -535,33 +530,33 @@ struct BaseSymbolicComparator : BaseComparator> { b2 << -b * lcmD / d; // Vector b2 = -b * Dlcm / d; ptrdiff_t numRowTrunc = ptrdiff_t(U.numRow()); - auto c{vector(&alloc, ptrdiff_t(V.numRow() - numEquations))}; - c << V(_(numEquations, end), _(begin, numRowTrunc)) * b2; + auto c{vector(&alloc, ptrdiff_t(V.numRow()) - numEquations)}; + c << b2 * V[_(numEquations, end), _(begin, numRowTrunc)].t(); // Vector c = V(_(numEquations, end), _(begin, numRowTrunc)) * // b2; - auto dimNS = V.numCol() - numRowTrunc; + ptrdiff_t dimNS = ptrdiff_t(V.numCol()) - numRowTrunc; // expand W stores [c -JV2 JV2] // we use simplex to solve [-JV2 JV2][y2+ y2-]' <= JV1D^(-1)Uq // where y2 = y2+ - y2- - auto expandW{matrix(&alloc, Row{numSlack}, Col{dimNS * 2 + 1})}; + auto expandW{matrix(&alloc, numSlack, Col<>{dimNS * 2 + 1})}; for (ptrdiff_t i = 0; i < numSlack; ++i) { - expandW(i, 0) = c[i]; + expandW[i, 0] = c[i]; // expandW(i, 0) *= Dlcm; for (ptrdiff_t j = 0; j < dimNS; ++j) { - auto val = V(i + numEquations, numRowTrunc + j) * lcmD; - expandW(i, j + 1) = -val; - expandW(i, dimNS + 1 + j) = val; + auto val = V[i + numEquations, numRowTrunc + j] * lcmD; + expandW[i, j + 1] = -val; + expandW[i, dimNS + 1 + j] = val; } } return Simplex::positiveVariables(&alloc, expandW).hasValue(); } [[nodiscard]] constexpr auto isEmpty() const -> bool { - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; return isEmpty(alloc); } [[nodiscard]] constexpr auto greaterEqual(PtrVector query) const -> bool { - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; return greaterEqual(alloc, query); } [[nodiscard]] constexpr auto greaterEqualFullRank(Arena<> *alloc, @@ -569,16 +564,15 @@ struct BaseSymbolicComparator : BaseComparator> { -> bool { auto V = getV(); if (!allZero(b[_(V.numRow(), end)])) return false; - auto H = matrix(alloc, V.numRow(), V.numCol() + 1); + auto H = matrix(alloc, V.numRow(), ++auto{V.numCol()}); Col oldn = V.numCol(); - H(_, _(0, oldn)) << V; + H[_, _(0, oldn)] << V; // H.numRow() == b.size(), because we're only here if dimD == 0, // in which case V.numRow() == U.numRow() == b.size() - H(_, oldn) << b; + H[_, oldn] << b; solveSystem(H); for (ptrdiff_t i = numEquations; i < H.numRow(); ++i) - if (auto rhs = H(i, oldn)) - if ((rhs > 0) != (H(i, i) > 0)) return false; + if ((H[i, oldn] > 0) != (H[i, i] > 0)) return false; return true; } [[nodiscard]] constexpr auto @@ -586,7 +580,7 @@ struct BaseSymbolicComparator : BaseComparator> { -> bool { auto V = getV(); auto d = getD(); - Row numSlack = V.numRow() - numEquations; + Row numSlack = Row<>{ptrdiff_t(V.numRow()) - numEquations}; auto dinv = vector(alloc, d.size()); dinv << d; // copy // We represent D martix as a vector, and multiply the lcm to the @@ -598,20 +592,20 @@ struct BaseSymbolicComparator : BaseComparator> { b[i] *= x; } ptrdiff_t numRowTrunc = getURank(); - auto c = vector(alloc, unsigned(V.numRow() - numEquations)); - c << V(_(numEquations, end), _(begin, numRowTrunc)) * b; - auto dimNS = V.numCol() - numRowTrunc; + auto c = vector(alloc, ptrdiff_t(V.numRow()) - numEquations); + c << b * V[_(numEquations, end), _(begin, numRowTrunc)].t(); + auto dimNS = ptrdiff_t(V.numCol()) - numRowTrunc; // expand W stores [c -JV2 JV2] // we use simplex to solve [-JV2 JV2][y2+ y2-]' <= JV1D^(-1)Uq // where y2 = y2+ - y2- - auto expandW = matrix(alloc, numSlack, dimNS * 2 + 1); + auto expandW = matrix(alloc, numSlack, Col<>{dimNS * 2 + 1}); for (ptrdiff_t i = 0; i < numSlack; ++i) { - expandW(i, 0) = c[i]; + expandW[i, 0] = c[i]; // expandW(i, 0) *= Dlcm; for (ptrdiff_t j = 0; j < dimNS;) { - auto val = V(i + numEquations, numRowTrunc + j++) * lcmD; - expandW(i, j) = -val; - expandW(i, dimNS + j) = val; + auto val = V[i + numEquations, numRowTrunc + j++] * lcmD; + expandW[i, j] = -val; + expandW[i, dimNS + j] = val; } } Optional optS{Simplex::positiveVariables(alloc, expandW)}; @@ -621,8 +615,8 @@ struct BaseSymbolicComparator : BaseComparator> { PtrVector query) const -> bool { auto U = getU(); - auto b = vector(&alloc, unsigned(U.numRow())); - b << U(_, _(begin, query.size())) * query; + auto b = vector(&alloc, ptrdiff_t(U.numRow())); + b << query * U[_, _(begin, query.size())].t(); return getD().size() ? greaterEqualRankDeficient(&alloc, b) : greaterEqualFullRank(&alloc, b); } @@ -631,7 +625,7 @@ struct LinearSymbolicComparator : public BaseSymbolicComparator { using Base = BaseSymbolicComparator; using Base::init; - using Matrix = math::ManagedArray; + using Matrix = math::ManagedArray>; [[no_unique_address]] Matrix U; [[no_unique_address]] Matrix V; [[no_unique_address]] Vector d; @@ -648,7 +642,7 @@ struct LinearSymbolicComparator return d; } - constexpr void setURankImpl(Row r) { + constexpr void setURankImpl(Row<> r) { V.truncate(r); U.truncate(r); } @@ -661,18 +655,18 @@ struct LinearSymbolicComparator [[nodiscard]] constexpr auto getURankImpl() const -> ptrdiff_t { return ptrdiff_t(U.numRow()); } - constexpr auto getUImpl(Row r, Col c) -> MutDensePtrMatrix { + constexpr auto getUImpl(Row<> r, Col<> c) -> MutDensePtrMatrix { U.resizeForOverwrite(r, c); return U; } - constexpr auto getVImpl(Row r, Col c) -> MutDensePtrMatrix { + constexpr auto getVImpl(Row<> r, Col<> c) -> MutDensePtrMatrix { V.setSize(r, c); - U.setSize(r, Col{ptrdiff_t(r)}); + U.setSize(r, Col<>{ptrdiff_t(r)}); return V; } - constexpr auto getDImpl(Row N) -> MutPtrVector { + constexpr auto getDImpl(Row<> N) -> MutPtrVector { d.resizeForOverwrite(ptrdiff_t(N)); - V.resizeForOverwrite(Row{ptrdiff_t{V.numCol()}}); + V.resizeForOverwrite(Row<>{ptrdiff_t{V.numCol()}}); return d; } static constexpr auto construct(PtrMatrix Ap, EmptyMatrix, @@ -682,14 +676,14 @@ struct LinearSymbolicComparator static constexpr auto construct(PtrMatrix Ap, bool pos0) -> LinearSymbolicComparator { LinearSymbolicComparator cmp; - std::allocator alloc{}; + alloc::Mallocator alloc{}; cmp.init(alloc, Ap, pos0); return cmp; }; static constexpr auto construct(PtrMatrix Ap, PtrMatrix Ep, bool pos0) -> LinearSymbolicComparator { LinearSymbolicComparator cmp; - std::allocator alloc{}; + alloc::Mallocator alloc{}; cmp.init(alloc, Ap, Ep, pos0); return cmp; }; @@ -703,7 +697,7 @@ struct LinearSymbolicComparator ptrdiff_t numNonNeg) -> LinearSymbolicComparator { LinearSymbolicComparator cmp; - std::allocator alloc{}; + alloc::Mallocator alloc{}; cmp.initNonNegative(alloc, Ap, numNonNeg); return cmp; }; @@ -712,7 +706,7 @@ struct LinearSymbolicComparator ptrdiff_t numNonNeg) -> LinearSymbolicComparator { LinearSymbolicComparator cmp; - std::allocator alloc{}; + alloc::Mallocator alloc{}; cmp.initNonNegative(alloc, Ap, Ep, numNonNeg); return cmp; }; @@ -722,15 +716,12 @@ struct PtrSymbolicComparator using Base = BaseSymbolicComparator; using Base::init; int64_t *mem; - // unsigned int numVar; - // unsigned int numInEq; - // unsigned int numEq; - unsigned int rankU{0}; - unsigned int colU{0}; - unsigned int dimV{0}; - unsigned int dimD{0}; + ptrdiff_t rankU{0}; + ptrdiff_t colU{0}; + ptrdiff_t dimV{0}; + ptrdiff_t dimD{0}; - constexpr void setURankImpl(Row r) { rankU = unsigned(r); } + constexpr void setURankImpl(Row<> r) { rankU = ptrdiff_t(r); } [[nodiscard]] constexpr auto getURankImpl() const -> ptrdiff_t { return rankU; } @@ -747,18 +738,15 @@ struct PtrSymbolicComparator // } // NOLINTNEXTLINE(readability-make-member-function-const) constexpr auto getUImpl() -> MutDensePtrMatrix { - return {mem, DenseDims{rankU, colU}}; + return {mem, DenseDims<>{{rankU}, {colU}}}; } // A = V // H = A // H.truncate(Row()); // size is H.numCol() * H.numCol() - [[nodiscard]] constexpr auto numVRows() const -> unsigned { - return dimD ? dimV : rankU; - } // offset by (numVar + numInEq)*(numVar + numInEq) constexpr auto getVImpl() -> MutDensePtrMatrix { - return {getUImpl().end(), DenseDims{numVRows(), dimV}}; + return {getUImpl().end(), DenseDims<>{numVRows(), Col<>{dimV}}}; } // size D constexpr auto getDImpl() -> MutPtrVector { @@ -766,24 +754,25 @@ struct PtrSymbolicComparator return {getVImpl().end(), dimD}; } [[nodiscard]] constexpr auto getUImpl() const -> DensePtrMatrix { - return {mem, DenseDims{rankU, colU}}; + return {mem, DenseDims<>{Row<>{rankU}, Col<>{colU}}}; } [[nodiscard]] constexpr auto getVImpl() const -> DensePtrMatrix { - return {mem + ptrdiff_t(rankU) * colU, DenseDims{numVRows(), dimV}}; + return {mem + ptrdiff_t(rankU) * colU, + DenseDims<>{numVRows(), Col<>{dimV}}}; } [[nodiscard]] constexpr auto getDImpl() const -> PtrVector { return {mem + ptrdiff_t(rankU) * colU + ptrdiff_t(numVRows()) * dimV, dimD}; } // constexpr auto getUImpl(Row r, Col c) -> MutPtrMatrix {} - constexpr auto getVImpl(Row r, Col c) -> MutDensePtrMatrix { - colU = rankU = unsigned(r); - dimV = unsigned(c); + constexpr auto getVImpl(Row<> r, Col<> c) -> MutDensePtrMatrix { + colU = rankU = ptrdiff_t(r); + dimV = ptrdiff_t(c); getUImpl() << 0; dimD = 0; return getVImpl(); } - constexpr auto getDImpl(Row r) -> MutPtrVector { - dimD = unsigned(r); + constexpr auto getDImpl(Row<> r) -> MutPtrVector { + dimD = ptrdiff_t(r); invariant(dimD > 0); return getDImpl(); } @@ -831,6 +820,10 @@ struct PtrSymbolicComparator }; private: + [[nodiscard]] constexpr auto numVRows() const -> Row<> { + return {ptrdiff_t(dimD ? dimV : rankU)}; + } + constexpr PtrSymbolicComparator(int64_t *p) : mem(p) {} }; @@ -839,24 +832,24 @@ static_assert(Comparator); constexpr void moveEqualities(DenseMatrix &, EmptyMatrix, const Comparator auto &) {} -constexpr void moveEqualities(DenseMatrix &A, math::IntMatrix &E, +constexpr void moveEqualities(DenseMatrix &A, math::IntMatrix<> &E, const Comparator auto &C) { const ptrdiff_t numVar = ptrdiff_t(E.numCol()); - assert(A.numCol() == numVar); + invariant(A.numCol() == numVar); if (A.numRow() <= 1) return; for (ptrdiff_t o = ptrdiff_t(A.numRow()) - 1; o > 0;) { for (ptrdiff_t i = o--; i < A.numRow(); ++i) { bool isNeg = true; for (ptrdiff_t v = 0; v < numVar; ++v) { - if (A(i, v) != -A(o, v)) { + if (A[i, v] != -A[o, v]) { isNeg = false; break; } } - if (isNeg && C.equalNegative(A(i, _), A(o, _))) { + if (isNeg && C.equalNegative(A[i, _], A[o, _])) { ptrdiff_t e = ptrdiff_t(E.numRow()); - E.resize(e + 1, numVar); - for (ptrdiff_t v = 0; v < numVar; ++v) E(e, v) = A(i, v); + E.resize(Row<>{e + 1}, Col<>{numVar}); + for (ptrdiff_t v = 0; v < numVar; ++v) E[e, v] = A[i, v]; eraseConstraint(A, i, o); break; } @@ -865,7 +858,7 @@ constexpr void moveEqualities(DenseMatrix &A, math::IntMatrix &E, } // NOLINTNEXTLINE(performance-unnecessary-value-param) -constexpr auto linear(std::allocator, PtrMatrix A, +constexpr auto linear(alloc::Mallocator, PtrMatrix A, EmptyMatrix, bool pos0) { return LinearSymbolicComparator::construct(A, pos0); } @@ -874,7 +867,7 @@ constexpr auto linear(Arena<> *alloc, PtrMatrix A, return PtrSymbolicComparator::construct(alloc, A, pos0); } // NOLINTNEXTLINE(performance-unnecessary-value-param) -constexpr auto linear(std::allocator, PtrMatrix A, +constexpr auto linear(alloc::Mallocator, PtrMatrix A, PtrMatrix E, bool pos0) { return LinearSymbolicComparator::construct(A, E, pos0); } @@ -884,8 +877,9 @@ constexpr auto linear(Arena<> *alloc, PtrMatrix A, } // NOLINTNEXTLINE(performance-unnecessary-value-param) -constexpr auto linearNonNegative(std::allocator, PtrMatrix A, - EmptyMatrix, ptrdiff_t numNonNeg) { +constexpr auto linearNonNegative(alloc::Mallocator, + PtrMatrix A, EmptyMatrix, + ptrdiff_t numNonNeg) { return LinearSymbolicComparator::constructNonNeg(A, numNonNeg); } constexpr auto linearNonNegative(Arena<> *alloc, PtrMatrix A, @@ -893,8 +887,9 @@ constexpr auto linearNonNegative(Arena<> *alloc, PtrMatrix A, return PtrSymbolicComparator::constructNonNeg(alloc, A, numNonNeg); } // NOLINTNEXTLINE(performance-unnecessary-value-param) -constexpr auto linearNonNegative(std::allocator, PtrMatrix A, - PtrMatrix E, ptrdiff_t numNonNeg) { +constexpr auto linearNonNegative(alloc::Mallocator, + PtrMatrix A, PtrMatrix E, + ptrdiff_t numNonNeg) { return LinearSymbolicComparator::constructNonNeg(A, E, numNonNeg); } constexpr auto linearNonNegative(Arena<> *alloc, PtrMatrix A, diff --git a/include/Polyhedra/Dependence.hpp b/include/Polyhedra/Dependence.hpp index 626b2aeb7..059747c48 100644 --- a/include/Polyhedra/Dependence.hpp +++ b/include/Polyhedra/Dependence.hpp @@ -1,11 +1,16 @@ #pragma once #include "IR/Address.hpp" +#include "IR/Node.hpp" +#include "Math/Array.hpp" +#include "Math/Simplex.hpp" #include "Polyhedra/DependencyPolyhedra.hpp" #include "Polyhedra/Loops.hpp" #include "Polyhedra/Schedule.hpp" #include "Support/Iterators.hpp" +#include +#include #include -#include +#include #include #include #include @@ -17,20 +22,29 @@ namespace poly { /// Represents a dependence relationship between two memory accesses. /// It contains simplices representing constraints that affine schedules /// are allowed to take. -class Dependence { -public: +struct Dependence { + + // public: struct ID { int32_t id; + [[nodiscard]] constexpr explicit operator bool() const { return id >= 0; } + }; + // TODO: revert to `bool` flag for `Forward`? + enum MetaFlags : uint8_t { + Forward = 1, + FreeOfDeeperDeps = 2, + Reassociable = 4, + NotReassociable = 8 }; -private: + // private: // // - NotNull depPoly; - NotNull dependenceSatisfaction; - NotNull dependenceBounding; - NotNull in; - NotNull out; + Valid depPoly; + Valid dependenceSatisfaction; + Valid dependenceBounding; + Valid in; + Valid out; // Dependence *nextInput{nullptr}; // all share same `in` // Dependence *nextOutput{nullptr}; // // all share same `out` @@ -39,14 +53,24 @@ class Dependence { // // was because of offsets when solving the linear program (value = // // 1). // std::array satLvl{255, 255, 255, 255, 255, 255, 255}; + ID revTimeEdge_{-1}; std::array satLvl; - bool forward; + uint8_t meta{0}; + uint8_t peel{255}; // sentinal value for cannot peel + + // template [[nodiscard]] auto get() const -> const auto & { + // if constexpr (I == 0) return depPoly; + // else if constexpr (I==1) return dependenceSatisfaction; + // else if constexpr (I==1) return dependenceBounding; + // } - constexpr auto getSimplexPair() -> std::array, 2> { + constexpr auto getSimplexPair() -> std::array, 2> { return {dependenceSatisfaction, dependenceBounding}; } + [[nodiscard]] constexpr auto getMeta() const -> uint8_t { return meta; } + [[nodiscard]] constexpr auto getPeel() const -> uint8_t { return peel; } -public: + // public: friend class Dependencies; // constexpr auto getNextInput() -> Dependence * { return nextInput; } // [[nodiscard]] constexpr auto getNextInput() const -> const Dependence * { @@ -58,32 +82,30 @@ class Dependence { // [[nodiscard]] constexpr auto getNextOutput() const -> const Dependence * { // return nextOutput; // } - [[nodiscard]] constexpr auto input() -> NotNull { return in; } - [[nodiscard]] constexpr auto output() -> NotNull { return out; } - [[nodiscard]] constexpr auto input() const -> NotNull { - return in; - } - [[nodiscard]] constexpr auto output() const -> NotNull { - return out; + [[nodiscard]] constexpr auto input() const -> Valid { return in; } + [[nodiscard]] constexpr auto output() const -> Valid { return out; } + [[nodiscard]] constexpr auto revTimeEdge() const -> ID { + return revTimeEdge_; } + [[nodiscard]] constexpr auto peelable() const -> bool { return peel != 255; } // constexpr auto setNextInput(Dependence *n) -> Dependence * { // return nextInput = n; // } // constexpr auto setNextOutput(Dependence *n) -> Dependence * { // return nextOutput = n; // } - constexpr Dependence(NotNull poly, - std::array, 2> depSatBound, - NotNull i, NotNull o, bool fwd) - : depPoly(poly), dependenceSatisfaction(depSatBound[0]), - dependenceBounding(depSatBound[1]), in(i), out(o), forward(fwd) {} - constexpr Dependence(NotNull poly, - std::array, 2> depSatBound, - NotNull i, NotNull o, - std::array sL, bool fwd) - : depPoly(poly), dependenceSatisfaction(depSatBound[0]), - dependenceBounding(depSatBound[1]), in(i), out(o), satLvl(sL), - forward(fwd) {} + // constexpr Dependence(Valid poly, + // std::array, 2> depSatBound, + // Valid i, Valid o, bool fwd) + // : depPoly(poly), dependenceSatisfaction(depSatBound[0]), + // dependenceBounding(depSatBound[1]), in(i), out(o), forward(fwd) {} + // constexpr Dependence(Valid poly, + // std::array, 2> depSatBound, + // Valid i, Valid o, + // std::array sL, bool fwd) + // : depPoly(poly), dependenceSatisfaction(depSatBound[0]), + // dependenceBounding(depSatBound[1]), in(i), out(o), satLvl(sL), + // forward(fwd) {} /// stashSatLevel() -> Dependence & /// This is used to track sat levels in the LP recursion. @@ -113,14 +135,19 @@ class Dependence { static constexpr auto satLevelMask(uint8_t slvl) -> uint8_t { return slvl & uint8_t(127); // NOTE: deduces to `int` } + // note that sat levels start at `0`, `0` meaning the outer most loop + // satisfies it. Thus, `satLevel() == 0` means the `depth == 1` loop satisfied + // it. [[nodiscard]] constexpr auto satLevel() const -> uint8_t { return satLevelMask(satLvl[0]); } - [[nodiscard]] constexpr auto isSat(unsigned depth) const -> bool { + /// `isSat` returns `true` on the level that satisfies it + [[nodiscard]] constexpr auto isSat(int depth) const -> bool { invariant(depth <= 127); return satLevel() <= depth; } - [[nodiscard]] constexpr auto isActive(unsigned depth) const -> bool { + /// `isActive` returns `false` on the level that satisfies it + [[nodiscard]] constexpr auto isActive(int depth) const -> bool { invariant(depth <= 127); return satLevel() > depth; } @@ -147,7 +174,9 @@ class Dependence { return in->getArrayPointer(); } /// indicates whether forward is non-empty - [[nodiscard]] constexpr auto isForward() const -> bool { return forward; } + /// Direction in simplex [x,y]: Forward ? x -> y : y -> x + /// i.e., is the simplex `[in, out]` (forward) or `[out, in]` (!forward) + [[nodiscard]] constexpr auto isForward() const -> bool { return meta & 1; } [[nodiscard]] constexpr auto nodeIn() const -> const lp::ScheduledNode * { return in->getNode(); } @@ -166,9 +195,9 @@ class Dependence { return in->indexMatrix(); } // satisfies dep if it is empty when conditioning on inPhi and outPhi - void checkEmptySat(Arena<> *alloc, NotNull inLoop, + void checkEmptySat(Arena<> *alloc, Valid inLoop, const int64_t *inOff, DensePtrMatrix inPhi, - NotNull outLoop, const int64_t *outOff, + Valid outLoop, const int64_t *outOff, DensePtrMatrix outPhi) { if (!isForward()) { std::swap(inLoop, outLoop); @@ -178,7 +207,7 @@ class Dependence { invariant(inPhi.numRow(), outPhi.numRow()); if (depPoly->checkSat(*alloc, inLoop, inOff, inPhi, outLoop, outOff, outPhi)) - satLvl[0] = uint8_t(inPhi.numRow() - 1); + satLvl[0] = uint8_t(ptrdiff_t(inPhi.numRow()) - 1); } constexpr void copySimplices(Arena<> *alloc) { dependenceSatisfaction = dependenceSatisfaction->copy(alloc); @@ -211,7 +240,8 @@ class Dependence { return out->getNaturalDepth(); } [[nodiscard]] constexpr auto isInactive(size_t depth) const -> bool { - return (depth >= std::min(out->getCurrentDepth(), in->getCurrentDepth())); + return (depth >= + size_t(std::min(out->getCurrentDepth(), in->getCurrentDepth()))); } [[nodiscard]] constexpr auto getNumLambda() const -> unsigned { return depPoly->getNumLambda() << 1; @@ -241,14 +271,14 @@ class Dependence { // 2 == 1 for const offset + 1 for w assert(2 + depPoly->getNumLambda() + getNumPhiCoefficients() + getNumOmegaCoefficients() == - size_t(dependenceSatisfaction->getConstraints().numCol())); - } - [[nodiscard]] constexpr auto getDepPoly() -> NotNull { - return depPoly; + ptrdiff_t(dependenceSatisfaction->getConstraints().numCol())); } - [[nodiscard]] constexpr auto getDepPoly() const -> NotNull { + [[nodiscard]] constexpr auto getDepPoly() const -> Valid { return depPoly; } + // [[nodiscard]] constexpr auto getDepPoly() const -> Valid { + // return depPoly; + // } [[nodiscard]] constexpr auto getNumConstraints() const -> unsigned { return dependenceBounding->getNumCons() + dependenceSatisfaction->getNumCons(); @@ -266,52 +296,52 @@ class Dependence { return dependenceBounding->getConstraints(); } [[nodiscard]] auto getSatLambda() const -> PtrMatrix { - return getSatConstraints()(_, _(1, 1 + depPoly->getNumLambda())); + return getSatConstraints()[_, _(1, 1 + depPoly->getNumLambda())]; } [[nodiscard]] auto getBndLambda() const -> PtrMatrix { - return getBndConstraints()(_, _(1, 1 + depPoly->getNumLambda())); + return getBndConstraints()[_, _(1, 1 + depPoly->getNumLambda())]; } [[nodiscard]] auto getSatPhiCoefs() const -> PtrMatrix { auto l = 3 + depPoly->getNumLambda(); - return getSatConstraints()(_, _(l, l + getNumPhiCoefficients())); + return getSatConstraints()[_, _(l, l + getNumPhiCoefficients())]; } [[nodiscard]] auto getSatPhi0Coefs() const -> PtrMatrix { auto l = 3 + depPoly->getNumLambda(); - return getSatConstraints()(_, _(l, l + depPoly->getDim0())); + return getSatConstraints()[_, _(l, l + depPoly->getDim0())]; } [[nodiscard]] auto getSatPhi1Coefs() const -> PtrMatrix { auto l = 3 + depPoly->getNumLambda() + depPoly->getDim0(); - return getSatConstraints()(_, _(l, l + depPoly->getDim1())); + return getSatConstraints()[_, _(l, l + depPoly->getDim1())]; } [[nodiscard]] auto getBndPhiCoefs() const -> PtrMatrix { auto l = 3 + depPoly->getNumLambda(); - return getBndConstraints()(_, _(l, l + getNumPhiCoefficients())); + return getBndConstraints()[_, _(l, l + getNumPhiCoefficients())]; } [[nodiscard]] auto getBndPhi0Coefs() const -> PtrMatrix { auto l = 3 + depPoly->getNumLambda(); - return getBndConstraints()(_, _(l, l + depPoly->getDim0())); + return getBndConstraints()[_, _(l, l + depPoly->getDim0())]; } [[nodiscard]] auto getBndPhi1Coefs() const -> PtrMatrix { auto l = 3 + depPoly->getNumLambda() + depPoly->getDim0(); - return getBndConstraints()(_, _(l, l + depPoly->getDim1())); + return getBndConstraints()[_, _(l, l + depPoly->getDim1())]; } [[nodiscard]] auto getSatOmegaCoefs() const -> PtrMatrix { auto l = 1 + depPoly->getNumLambda(); - return getSatConstraints()(_, _(l, l + getNumOmegaCoefficients())); + return getSatConstraints()[_, _(l, l + getNumOmegaCoefficients())]; } [[nodiscard]] auto getBndOmegaCoefs() const -> PtrMatrix { auto l = 1 + depPoly->getNumLambda(); - return getBndConstraints()(_, _(l, l + getNumOmegaCoefficients())); + return getBndConstraints()[_, _(l, l + getNumOmegaCoefficients())]; } [[nodiscard]] auto getSatW() const -> math::StridedVector { - return getSatConstraints()(_, 1 + depPoly->getNumLambda() + + return getSatConstraints()[_, 1 + depPoly->getNumLambda() + getNumPhiCoefficients() + - getNumOmegaCoefficients()); + getNumOmegaCoefficients()]; } [[nodiscard]] auto getBndCoefs() const -> PtrMatrix { size_t lb = 1 + depPoly->getNumLambda() + getNumPhiCoefficients() + getNumOmegaCoefficients(); - return getBndConstraints()(_, _(lb, end)); + return getBndConstraints()[_, _(lb, end)]; } [[nodiscard]] auto satPhiCoefs() const -> std::array, 2> { PtrMatrix phiCoefsIn = getSatPhi1Coefs(), @@ -326,14 +356,14 @@ class Dependence { return {phiCoefsIn, phiCoefsOut}; } [[nodiscard]] auto isSatisfied(Arena<> alloc, - NotNull schIn, - NotNull schOut) const + Valid schIn, + Valid schOut) const -> bool { - unsigned numLoopsIn = in->getCurrentDepth(), - numLoopsOut = out->getCurrentDepth(), - numLoopsCommon = std::min(numLoopsIn, numLoopsOut), - numLoopsTotal = numLoopsIn + numLoopsOut, - numVar = numLoopsIn + numLoopsOut + 2; + ptrdiff_t numLoopsIn = in->getCurrentDepth(), + numLoopsOut = out->getCurrentDepth(), + numLoopsCommon = std::min(numLoopsIn, numLoopsOut), + numLoopsTotal = numLoopsIn + numLoopsOut, + numVar = numLoopsIn + numLoopsOut + 2; invariant(dependenceSatisfaction->getNumVars(), numVar); auto schv = vector(&alloc, numVar, int64_t(0)); const SquarePtrMatrix inPhi = schIn->getPhi(); @@ -362,8 +392,8 @@ class Dependence { // forward means offset is 2nd - 1st schv[0] = outOffOmega[i]; schv[1] = inOffOmega[i]; - schv[_(2, 2 + numLoopsIn)] << inPhi(last - i, _); - schv[_(2 + numLoopsIn, 2 + numLoopsTotal)] << outPhi(last - i, _); + schv[_(2, 2 + numLoopsIn)] << inPhi[last - i, _]; + schv[_(2 + numLoopsIn, 2 + numLoopsTotal)] << outPhi[last - i, _]; // dependenceSatisfaction is phi_t - phi_s >= 0 // dependenceBounding is w + u'N - (phi_t - phi_s) >= 0 // we implicitly 0-out `w` and `u` here, @@ -379,16 +409,16 @@ class Dependence { [[nodiscard]] auto isSatisfied(Arena<> alloc, PtrVector inFusOmega, PtrVector outFusOmega) const -> bool { - unsigned numLoopsIn = in->getCurrentDepth(), - numLoopsOut = out->getCurrentDepth(), - numLoopsCommon = std::min(numLoopsIn, numLoopsOut), - numVar = numLoopsIn + numLoopsOut + 2; + ptrdiff_t numLoopsIn = in->getCurrentDepth(), + numLoopsOut = out->getCurrentDepth(), + numLoopsCommon = std::min(numLoopsIn, numLoopsOut), + numVar = numLoopsIn + numLoopsOut + 2; invariant(dependenceSatisfaction->getNumVars(), numVar); auto schv = vector(&alloc, numVar, int64_t(0)); // Vector schv(dependenceSatisfaction->getNumVars(),int64_t(0)); const unsigned numLambda = getNumLambda(); // when i == numLoopsCommon, we've passed the last loop - for (size_t i = 0; i <= numLoopsCommon; ++i) { + for (ptrdiff_t i = 0; i <= numLoopsCommon; ++i) { if (int64_t o2idiff = outFusOmega[i] - inFusOmega[i]) return (o2idiff > 0); // we should not be able to reach `numLoopsCommon` @@ -420,10 +450,9 @@ class Dependence { } return true; } - [[nodiscard]] auto isSatisfied(Arena<> alloc, - NotNull sx, - NotNull sy, - size_t d) const -> bool { + [[nodiscard]] auto isSatisfied(Arena<> alloc, Valid sx, + Valid sy, size_t d) const + -> bool { unsigned numLambda = depPoly->getNumLambda(), nLoopX = depPoly->getDim0(), nLoopY = depPoly->getDim1(), numLoopsTotal = nLoopX + nLoopY; MutPtrVector sch{math::vector(&alloc, numLoopsTotal + 2)}; @@ -435,9 +464,9 @@ class Dependence { return dependenceSatisfaction->satisfiable(alloc, sch, numLambda); } [[nodiscard]] auto isSatisfied(Arena<> alloc, size_t d) const -> bool { - unsigned numLambda = depPoly->getNumLambda(), - numLoopsX = depPoly->getDim0(), - numLoopsTotal = numLoopsX + depPoly->getDim1(); + ptrdiff_t numLambda = depPoly->getNumLambda(), + numLoopsX = depPoly->getDim0(), + numLoopsTotal = numLoopsX + depPoly->getDim1(); MutPtrVector sch{math::vector(&alloc, numLoopsTotal + 2)}; sch << 0; invariant(sch.size(), numLoopsTotal + 2); @@ -446,18 +475,6 @@ class Dependence { return dependenceSatisfaction->satisfiable(alloc, sch, numLambda); } - struct Active { - unsigned depth; - constexpr Active(const Active &) noexcept = default; - constexpr Active(Active &&) noexcept = default; - constexpr Active() noexcept = default; - constexpr auto operator=(const Active &) noexcept -> Active & = default; - constexpr Active(unsigned depth) : depth(depth) {} - constexpr auto operator()(const Dependence *d) const -> bool { - return d->isActive(depth); - } - }; - friend inline auto operator<<(llvm::raw_ostream &os, const Dependence &d) -> llvm::raw_ostream & { os << "Dependence Poly "; @@ -548,72 +565,134 @@ static_assert(sizeof(Dependence) <= 64); // i_0 = i_1 // j_0 = j_1 - k_1 class Dependencies { - char *data{nullptr}; - int32_t numData{0}; - // int32_t tombstone{-1}; + using Tuple = + containers::Tuple, 2>, DepPoly *, int32_t, + int32_t, int32_t, int32_t, int32_t, + std::array, uint8_t, uint8_t>; + + math::ManagedSOA datadeps; + + static constexpr size_t OutI = 0; + static constexpr size_t InI = 1; + static constexpr size_t SimplexPairI = 2; + static constexpr size_t DepPolyI = 3; + static constexpr size_t NextEdgeOutI = 4; + static constexpr size_t PrevEdgeOutI = 5; + static constexpr size_t NextEdgeInI = 6; + static constexpr size_t PrevEdgeInI = 7; + static constexpr size_t RevTimeEdgeI = 8; + static constexpr size_t SatLevelI = 9; + static constexpr size_t GetMetaI = 10; + static constexpr size_t GetPeelI = 11; public: using ID = Dependence::ID; - constexpr Dependencies() noexcept = default; - constexpr Dependencies(Arena<> *alloc) - : data(alloc->allocate(memNeeded(64))) {} - constexpr Dependencies(const Dependencies &) noexcept = default; // or delete? - constexpr Dependencies(Dependencies &&) noexcept = default; // or delete? - constexpr auto operator=(Dependencies &&other) noexcept - -> Dependencies & = default; - constexpr auto operator=(const Dependencies &other) noexcept - -> Dependencies & = default; + Dependencies(ptrdiff_t len) : datadeps(len) {} + Dependencies(const Dependencies &) noexcept = delete; + constexpr Dependencies(Dependencies &&) noexcept = default; + constexpr auto operator=(Dependencies &&other) noexcept -> Dependencies & { + datadeps = std::move(other.datadeps); + return *this; + }; - [[nodiscard]] constexpr auto size() const noexcept -> int32_t { - return numData; + [[nodiscard]] constexpr auto size() const noexcept -> ptrdiff_t { + return datadeps.size(); } private: - void addEdge(Arena<> *alloc, Dependence d) { - int32_t id = size(); - push_pack(alloc, d); - d.input()->setEdgeOut(id); - d.output()->setEdgeIn(id); - } - static constexpr auto memNeeded(size_t N) -> size_t { - constexpr size_t memPer = sizeof(int32_t) * 2 + sizeof(DepPoly *) + - sizeof(math::Simplex *) * 2 + sizeof(bool) + - sizeof(uint8_t); - return N * memPer; + constexpr auto tup(Dependence d, int32_t i) -> Tuple { + IR::Addr *out = d.output(), *in = d.input(); + if (out->getEdgeOut() >= 0) prevOut(ID{out->getEdgeOut()}) = i; + if (in->getEdgeIn() >= 0) prevIn(ID{in->getEdgeIn()}) = i; + in->setEdgeOut(i); + out->setEdgeIn(i); + return Tuple{out, + in, + d.getSimplexPair(), + d.getDepPoly(), + out->getEdgeOut(), + -1, + in->getEdgeIn(), + -1, + d.revTimeEdge().id, + d.satLvl, + d.getMeta(), + d.getPeel()}; } - void timelessCheck(Arena<> *alloc, NotNull dxy, NotNull x, - NotNull y, - std::array, 2> pair, bool isFwd) { - const size_t numLambda = dxy->getNumLambda(); - invariant(dxy->getTimeDim(), unsigned(0)); + /// set(ID i, Dependence d) + /// stores `d` at index `i` + /// Dependence `d` is pushed to the fronts of the edgeOut and edgeIn chains. + constexpr void set(int32_t i, Dependence d) { datadeps[i] = tup(d, i); } + constexpr void set(ID i, Dependence d) { set(i.id, d); } + auto addEdge(Dependence d) -> ID { + int32_t id{int32_t(datadeps.size())}; + invariant(id >= 0); + datadeps.push_back(tup(d, id)); + return {int32_t(id)}; + } + + void addOrdered(Valid dxy, Valid x, + Valid y, std::array, 2> pair, + bool isFwd) { + ptrdiff_t numLambda = dxy->getNumLambda(); if (!isFwd) { std::swap(pair[0], pair[1]); std::swap(x, y); } pair[0]->truncateVars(1 + numLambda + dxy->getNumScheduleCoef()); - addEdge(alloc, Dependence{dxy, pair, x, y, isFwd}); - } - void timelessCheck(Arena<> *alloc, NotNull dxy, NotNull x, - NotNull y, - std::array, 2> pair) { - return timelessCheck(alloc, dxy, x, y, pair, - checkDirection(*alloc, pair, x, y, dxy->getNumLambda(), - dxy->getNumVar() + 1)); + addEdge(Dependence{.depPoly = dxy, + .dependenceSatisfaction = pair[0], + .dependenceBounding = pair[1], + .in = x, + .out = y, + .meta = isFwd}); + } + void timelessCheck(Arena<> *alloc, Valid dxy, Valid x, + Valid y, + std::array, 2> pair) { + invariant(dxy->getTimeDim(), unsigned(0)); + return addOrdered(dxy, x, y, pair, + checkDirection(*alloc, pair, x, y, dxy->getNumLambda(), + Col<>{dxy->getNumVar() + 1})); } // emplaces dependencies with repeat accesses to the same memory across // time - void timeCheck(Arena<> *alloc, NotNull dxy, NotNull x, - NotNull y, - std::array, 2> pair) { - bool isFwd = checkDirection(*alloc, pair, x, y, dxy->getNumLambda(), - dxy->getA().numCol() - dxy->getTimeDim()); + void timeCheck(Arena<> *alloc, Valid dxy, Valid x, + Valid y, std::array, 2> pair) { + bool isFwd = checkDirection( + *alloc, pair, x, y, dxy->getNumLambda(), + Col<>{ptrdiff_t(dxy->getA().numCol()) - dxy->getTimeDim()}); timeCheck(alloc, dxy, x, y, pair, isFwd); } - void timeCheck(Arena<> *alloc, NotNull dxy, NotNull x, - NotNull y, - std::array, 2> pair, bool isFwd) { + static void timeStep(Valid dxy, MutPtrMatrix fE, + MutPtrMatrix sE, + ptrdiff_t numInequalityConstraintsOld, + ptrdiff_t numEqualityConstraintsOld, ptrdiff_t ineqEnd, + ptrdiff_t posEqEnd, ptrdiff_t v, ptrdiff_t step) { + for (ptrdiff_t c = 0; c < numInequalityConstraintsOld; ++c) { + int64_t Acv = dxy->getA(Row<>{c}, Col<>{v}); + if (!Acv) continue; + Acv *= step; + fE[0, c + 1] -= Acv; // *1 + sE[0, c + 1] -= Acv; // *1 + } + for (ptrdiff_t c = 0; c < numEqualityConstraintsOld; ++c) { + // each of these actually represents 2 inds + int64_t Ecv = dxy->getE(Row<>{c}, Col<>{v}); + if (!Ecv) continue; + Ecv *= step; + fE[0, c + ineqEnd] -= Ecv; + fE[0, c + posEqEnd] += Ecv; + sE[0, c + ineqEnd] -= Ecv; + sE[0, c + posEqEnd] += Ecv; + } + } + void timeCheck(Arena<> *alloc, Valid dxy, Valid x, + Valid y, std::array, 2> pair, + bool isFwd) { const unsigned numInequalityConstraintsOld = dxy->getNumInequalityConstraints(), numEqualityConstraintsOld = dxy->getNumEqualityConstraints(), @@ -623,9 +702,9 @@ class Dependencies { numScheduleCoefs = dxy->getNumScheduleCoef(); invariant(numLambda, dxy->getNumLambda()); // copy backup - std::array, 2> farkasBackups{pair[0]->copy(alloc), - pair[1]->copy(alloc)}; - NotNull in = x, out = y; + std::array, 2> farkasBackups{pair[0]->copy(alloc), + pair[1]->copy(alloc)}; + Valid in = x, out = y; if (isFwd) { std::swap(farkasBackups[0], farkasBackups[1]); } else { @@ -633,10 +712,15 @@ class Dependencies { std::swap(pair[0], pair[1]); } pair[0]->truncateVars(1 + numLambda + numScheduleCoefs); - auto dep0 = Dependence{dxy->copy(alloc), pair, in, out, isFwd}; - invariant(out->getCurrentDepth() + in->getCurrentDepth(), - dep0.getNumPhiCoefficients()); - addEdge(alloc, dep0); + Dependence dep0{.depPoly = dxy->copy(alloc), + .dependenceSatisfaction = pair[0], + .dependenceBounding = pair[1], + .in = in, + .out = out, + .meta = isFwd}; + invariant(ptrdiff_t(out->getCurrentDepth()) + in->getCurrentDepth(), + ptrdiff_t(dep0.getNumPhiCoefficients())); + ID d0ID{addEdge(dep0)}, prevID = d0ID; // pair is invalid const ptrdiff_t timeDim = dxy->getTimeDim(), numVar = 1 + dxy->getNumVar() - timeDim; @@ -646,90 +730,63 @@ class Dependencies { // dep0.depPoly->truncateVars(numVar); // dep0.depPoly->setTimeDim(0); - invariant(out->getCurrentDepth() + in->getCurrentDepth(), - dep0.getNumPhiCoefficients()); + invariant(ptrdiff_t(out->getCurrentDepth()) + in->getCurrentDepth(), + ptrdiff_t(dep0.getNumPhiCoefficients())); // now we need to check the time direction for all times - // anything approaching 16 time dimensions would be absolutely - // insane - math::Vector timeDirection(timeDim); - ptrdiff_t t = 0; - auto fE{farkasBackups[0]->getConstraints()(_, _(1, end))}; - auto sE{farkasBackups[1]->getConstraints()(_, _(1, end))}; - do { + // anything approaching 16 time dimensions would be insane + for (ptrdiff_t t = 0;;) { // set `t`th timeDim to +1/-1 // basically, what we do here is set it to `step` and pretend it was // a constant. so a value of c = a'x + t*step -> c - t*step = a'x so // we update the constant `c` via `c -= t*step`. // we have the problem that. int64_t step = dxy->getNullStep(t); - ptrdiff_t v = numVar + t, i = 0; - while (true) { - for (ptrdiff_t c = 0; c < numInequalityConstraintsOld; ++c) { - int64_t Acv = dxy->getA(c, v); - if (!Acv) continue; - Acv *= step; - fE(0, c + 1) -= Acv; // *1 - sE(0, c + 1) -= Acv; // *1 - } - for (ptrdiff_t c = 0; c < numEqualityConstraintsOld; ++c) { - // each of these actually represents 2 inds - int64_t Ecv = dxy->getE(c, v); - if (!Ecv) continue; - Ecv *= step; - fE(0, c + ineqEnd) -= Ecv; - fE(0, c + posEqEnd) += Ecv; - sE(0, c + ineqEnd) -= Ecv; - sE(0, c + posEqEnd) += Ecv; - } - if (i++ != 0) break; // break after undoing - timeDirection[t] = - checkDirection(*alloc, farkasBackups, *out, *in, numLambda, - dxy->getA().numCol() - dxy->getTimeDim()); - step *= -1; // flip to undo, then break - } - } while (++t < timeDim); - t = 0; - do { - // checkDirection(farkasBackups, x, y, numLambda) == false - // correct time direction would make it return true - // thus sign = timeDirection[t] ? 1 : -1 - int64_t step = (2 * timeDirection[t] - 1) * dxy->getNullStep(t); ptrdiff_t v = numVar + t; - for (ptrdiff_t c = 0; c < numInequalityConstraintsOld; ++c) { - int64_t Acv = dxy->getA(c, v); - if (!Acv) continue; - Acv *= step; - dxy->getA(c, 0) -= Acv; - fE(0, c + 1) -= Acv; // *1 - sE(0, c + 1) -= Acv; // *-1 - } - for (ptrdiff_t c = 0; c < numEqualityConstraintsOld; ++c) { - // each of these actually represents 2 inds - int64_t Ecv = dxy->getE(c, v); - if (!Ecv) continue; - Ecv *= step; - dxy->getE(c, 0) -= Ecv; - fE(0, c + ineqEnd) -= Ecv; - fE(0, c + posEqEnd) += Ecv; - sE(0, c + ineqEnd) -= Ecv; - sE(0, c + posEqEnd) += Ecv; + bool repeat = (++t < timeDim); + std::array, 2> fp{farkasBackups}; + if (repeat) { + fp[0] = fp[0]->copy(alloc); + fp[1] = fp[1]->copy(alloc); } - } while (++t < timeDim); - // dxy->truncateVars(numVar); - // dxy->setTimeDim(0); - farkasBackups[0]->truncateVars(1 + numLambda + numScheduleCoefs); - auto dep1 = Dependence{dxy, farkasBackups, out, in, !isFwd}; - invariant(out->getCurrentDepth() + in->getCurrentDepth(), - dep1.getNumPhiCoefficients()); - addEdge(alloc, dep1); + // set (or unset) for this timedim + auto fE{fp[0]->getConstraints()[_, _(1, end)]}; + auto sE{fp[1]->getConstraints()[_, _(1, end)]}; + timeStep(dxy, fE, sE, numInequalityConstraintsOld, + numEqualityConstraintsOld, ineqEnd, posEqEnd, v, step); + // checkDirection should be `true`, so if `false` we flip the sign + // this is because `isFwd = checkDirection` of the original + // `if (isFwd)`, we swapped farkasBackups args, making the result + // `false`; for our timeDim to capture the opposite movement + // through time, we thus need to flip it back to `true`. + // `if (!isFwd)`, i.e. the `else` branch above, we don't flip the + // args, so it'd still return `false` and a flip would still mean `true`. + if (!checkDirection( + *alloc, fp, *out, *in, numLambda, + Col<>{ptrdiff_t(dxy->getA().numCol()) - dxy->getTimeDim()})) + timeStep(dxy, fE, sE, numInequalityConstraintsOld, + numEqualityConstraintsOld, ineqEnd, posEqEnd, v, -2 * step); + + fp[0]->truncateVars(1 + numLambda + numScheduleCoefs); + Dependence dep1{.depPoly = dxy, + .dependenceSatisfaction = farkasBackups[0], + .dependenceBounding = farkasBackups[1], + .in = out, + .out = in, + .revTimeEdge_ = prevID, + .meta = !isFwd}; + invariant(ptrdiff_t(out->getCurrentDepth()) + in->getCurrentDepth(), + ptrdiff_t(dep1.getNumPhiCoefficients())); + prevID = addEdge(dep1); + if (!repeat) break; + } + revTimeEdge(d0ID) = prevID.id; } static auto checkDirection(Arena<> alloc, - const std::array, 2> &p, - NotNull x, - NotNull y, - NotNull xSchedule, - NotNull ySchedule, - unsigned numLambda, Col nonTimeDim) -> bool { + const std::array, 2> &p, + Valid x, Valid y, + Valid xSchedule, + Valid ySchedule, + ptrdiff_t numLambda, Col<> nonTimeDim) -> bool { const auto &[fxy, fyx] = p; unsigned numLoopsX = x->getCurrentDepth(), numLoopsY = y->getCurrentDepth(), numLoopsTotal = numLoopsX + numLoopsY; @@ -760,16 +817,16 @@ class Dependencies { assert(i != numLoopsCommon); sch[0] = xOffOmega[i]; sch[1] = yOffOmega[i]; - sch[_(2, 2 + numLoopsX)] << xPhi(last - i, _); - sch[_(2 + numLoopsX, 2 + numLoopsTotal)] << yPhi(last - i, _); + sch[_(2, 2 + numLoopsX)] << xPhi[last - i, _]; + sch[_(2 + numLoopsX, 2 + numLoopsTotal)] << yPhi[last - i, _]; if (fxy->unSatisfiableZeroRem(alloc, sch, numLambda, - unsigned(nonTimeDim))) { + ptrdiff_t(nonTimeDim))) { assert(!fyx->unSatisfiableZeroRem(alloc, sch, numLambda, - unsigned(nonTimeDim))); + ptrdiff_t(nonTimeDim))); return false; } if (fyx->unSatisfiableZeroRem(alloc, sch, numLambda, - unsigned(nonTimeDim))) + ptrdiff_t(nonTimeDim))) return true; } // assert(false); @@ -777,14 +834,14 @@ class Dependencies { } // returns `true` if forward, x->y static auto checkDirection(Arena<> alloc, - const std::array, 2> &p, - NotNull x, - NotNull y, unsigned numLambda, - Col nonTimeDim) -> bool { + const std::array, 2> &p, + Valid x, Valid y, + ptrdiff_t numLambda, Col<> nonTimeDim) -> bool { const auto &[fxy, fyx] = p; - unsigned numLoopsX = x->getCurrentDepth(), nTD = unsigned(nonTimeDim); + unsigned numLoopsX = x->getCurrentDepth(), nTD = ptrdiff_t(nonTimeDim); #ifndef NDEBUG - const unsigned numLoopsCommon = std::min(numLoopsX, y->getCurrentDepth()); + ptrdiff_t numLoopsCommon = + std::min(ptrdiff_t(numLoopsX), ptrdiff_t(y->getCurrentDepth())); #endif PtrVector xFusOmega = x->getFusionOmega(); PtrVector yFusOmega = y->getFusionOmega(); @@ -813,231 +870,190 @@ class Dependencies { invariant(false); return false; } - constexpr auto get(ID i, IR::Addr *in, IR::Addr *out) -> Dependence { - return Dependence{depPoly(i), depSatBnd(i), in, out, - satLevelPair(i), isForward(i) + constexpr auto get(ID i, IR::Addr *in, IR::Addr *out) const -> Dependence { + auto [depSat, depBnd] = depSatBnd(i); + return Dependence{.depPoly = depPoly(i), + .dependenceSatisfaction = depSat, + .dependenceBounding = depBnd, + .in = in, + .out = out, + .satLvl = satLevelPair(i), + .meta = getMeta(i) }; } - - constexpr void set(ID i, Dependence d) { - auto out = d.output(); - auto in = d.input(); - output(i) = out; - nextOut(i) = out->getEdgeOut(); - input(i) = in; - nextIn(i) = in->getEdgeIn(); - depSatBnd(i) = d.getSimplexPair(); - depPoly(i) = d.getDepPoly(); - satLevelPair(i) = d.satLvl; - isForward(i) = d.isForward(); - } - - auto push_pack(Arena<> *alloc, Dependence d) -> void * { - void *ret = nullptr; - if (numData == getCapacity()) { - auto newCapacity = getCapacity() * 2; - auto *newData = alloc->allocate(memNeeded(newCapacity)); - std::memcpy(newData, data, memNeeded(numData)); - ret = std::exchange(data, newData); + static auto innermostNonZero(PtrMatrix A, ptrdiff_t skip) + -> ptrdiff_t { + for (ptrdiff_t i = ptrdiff_t(A.numCol()); --i;) { + if (i == skip) continue; + if (!math::allZero(A[_, i])) return i; } - set(ID{numData++}, d); - return ret; - } - [[nodiscard]] constexpr auto getCapacity() const noexcept -> int32_t { - return int32_t(std::bit_ceil(uint32_t(numData))); - } - - constexpr auto outAddrPtr() -> IR::Addr ** { - void *p = data; - return static_cast(p); - } - [[nodiscard]] constexpr auto outAddrPtr() const -> IR::Addr *const * { - const void *p = data; - return static_cast(p); - } - constexpr auto inAddrPtr() -> IR::Addr ** { - void *p = data + sizeof(IR::Addr *) * getCapacity(); - return static_cast(p); - } - [[nodiscard]] constexpr auto inAddrPtr() const -> IR::Addr *const * { - const void *p = data + sizeof(IR::Addr *) * getCapacity(); - return static_cast(p); - } - constexpr auto outEdgePtr() -> int32_t * { - unsigned cap = getCapacity(); - void *p = data + sizeof(IR::Addr *) * 2 * cap; - return static_cast(p); - } - [[nodiscard]] constexpr auto outEdgePtr() const -> const int32_t * { - unsigned cap = getCapacity(); - const void *p = data + sizeof(IR::Addr *) * 2 * cap; - return static_cast(p); - } - constexpr auto inEdgePtr() -> int32_t * { - unsigned cap = getCapacity(); - void *p = data + (sizeof(IR::Addr *) * 2 + sizeof(int32_t)) * cap; - return static_cast(p); - } - [[nodiscard]] constexpr auto inEdgePtr() const -> const int32_t * { - unsigned cap = getCapacity(); - const void *p = data + (sizeof(IR::Addr *) * 2 + sizeof(int32_t)) * cap; - return static_cast(p); - } - constexpr auto satLevelsPtr() -> std::array * { - unsigned cap = getCapacity(); - void *p = - data + - ((sizeof(IR::Addr *) + sizeof(int32_t) + sizeof(math::Simplex *)) * 2 + - sizeof(DepPoly *)) * - cap; - return static_cast *>(p); - } - [[nodiscard]] constexpr auto satLevelsPtr() const - -> const std::array * { - unsigned cap = getCapacity(); - const void *p = - data + - ((sizeof(IR::Addr *) + sizeof(int32_t) + sizeof(math::Simplex *)) * 2 + - sizeof(DepPoly *)) * - cap; - return static_cast *>(p); + return -1; } public: - // field order: - // AddrOut - // AddrIn - // nextOut - // nextIn - // dependenceSatisfaction - // dependenceBounding - // depPoly - // satLevel - // isForward - constexpr auto get(ID i) -> Dependence { return get(i, input(i), output(i)); } - constexpr auto outAddrs() -> MutPtrVector { - return {outAddrPtr(), numData}; - } - constexpr auto inAddrs() -> MutPtrVector { - return {inAddrPtr(), numData}; - } + constexpr void removeEdge(ID id) { + removeOutEdge(id.id); + removeInEdge(id.id); + /// TODO: remove revTimeEdge? + } + constexpr void removeOutEdge(int32_t id) { + int32_t prev = prevOut(poly::Dependence::ID{id}); + int32_t next = nextOut(poly::Dependence::ID{id}); + if (prev >= 0) nextOut(poly::Dependence::ID{prev}) = next; + if (next >= 0) prevOut(poly::Dependence::ID{next}) = prev; + } + constexpr void removeInEdge(int32_t id) { + int32_t prev = prevIn(poly::Dependence::ID{id}); + int32_t next = nextIn(poly::Dependence::ID{id}); + if (prev >= 0) nextIn(poly::Dependence::ID{prev}) = next; + if (next >= 0) prevIn(poly::Dependence::ID{next}) = prev; + } + [[nodiscard]] constexpr auto get(ID i) const -> Dependence { + return get(i, input(i), output(i)); + } + // constexpr auto outAddrs() -> MutPtrVector { + // return {outAddrPtr(), numData}; + // } + // constexpr auto inAddrs() -> MutPtrVector { + // return {inAddrPtr(), numData}; + // } constexpr auto outEdges() -> MutPtrVector { - return {outEdgePtr(), numData}; + return datadeps.template get(); } constexpr auto inEdges() -> MutPtrVector { - return {inEdgePtr(), numData}; + return datadeps.template get(); } [[nodiscard]] constexpr auto outEdges() const -> PtrVector { - return {outEdgePtr(), unsigned(numData)}; + return datadeps.template get(); } [[nodiscard]] constexpr auto inEdges() const -> PtrVector { - return {inEdgePtr(), unsigned(numData)}; - } - constexpr auto satLevels() -> MutPtrVector> { - return {satLevelsPtr(), numData}; + return datadeps.template get(); } + // [[nodiscard]] constexpr auto outEdges() const -> PtrVector { + // return {outEdgePtr(), unsigned(numData)}; + // } + // [[nodiscard]] constexpr auto inEdges() const -> PtrVector { + // return {inEdgePtr(), unsigned(numData)}; + // } + // constexpr auto satLevels() -> MutPtrVector> { + // return {satLevelsPtr(), numData}; + // } [[nodiscard]] constexpr auto output(ID i) -> IR::Addr *& { - return outAddrPtr()[i.id]; + return datadeps.template get(i.id); } - [[nodiscard]] constexpr auto output(ID i) const -> const IR::Addr * { - return outAddrPtr()[i.id]; + [[nodiscard]] constexpr auto output(ID i) const -> IR::Addr * { + return datadeps.template get(i.id); } [[nodiscard]] constexpr auto input(ID i) -> IR::Addr *& { - return inAddrPtr()[i.id]; + return datadeps.template get(i.id); } - [[nodiscard]] constexpr auto input(ID i) const -> const IR::Addr * { - return inAddrPtr()[i.id]; + [[nodiscard]] constexpr auto input(ID i) const -> IR::Addr * { + return datadeps.template get(i.id); } constexpr auto nextOut(ID i) -> int32_t & { - unsigned cap = getCapacity(); - void *p = data + sizeof(int32_t) * i.id + sizeof(IR::Addr *) * 2 * cap; - return *static_cast(p); + return datadeps.template get(i.id); + } + constexpr auto prevOut(ID i) -> int32_t & { + return datadeps.template get(i.id); } constexpr auto nextIn(ID i) -> int32_t & { - unsigned cap = getCapacity(); - void *p = data + sizeof(int32_t) * i.id + - (sizeof(IR::Addr *) * 2 + sizeof(int32_t)) * cap; - return *static_cast(p); + return datadeps.template get(i.id); + } + constexpr auto prevIn(ID i) -> int32_t & { + return datadeps.template get(i.id); + } + constexpr auto depSatBnd(ID i) -> std::array, 2> & { + return datadeps.template get(i.id); } - constexpr auto depSatBnd(ID i) -> std::array, 2> & { - unsigned cap = getCapacity(); - void *p = data + 2 * sizeof(math::Simplex *) * i.id + - (sizeof(IR::Addr *) + sizeof(int32_t)) * 2 * cap; - return *static_cast, 2> *>(p); + constexpr auto revTimeEdge(ID i) -> int32_t & { + return datadeps.template get(i.id); + } + [[nodiscard]] constexpr auto revTimeEdge(ID i) const -> int32_t { + return datadeps.template get(i.id); } constexpr auto depPoly(ID i) -> DepPoly *& { - unsigned cap = getCapacity(); - void *p = data + sizeof(DepPoly *) * i.id + - (sizeof(IR::Addr *) + sizeof(int32_t) + sizeof(math::Simplex *)) * - 2 * cap; - return *static_cast(p); + return datadeps.template get(i.id); + } + [[nodiscard]] constexpr auto depSatBnd(ID i) const + -> std::array, 2> { + return datadeps.template get(i.id); + } + [[nodiscard]] constexpr auto depPoly(ID i) const -> DepPoly * { + return datadeps.template get(i.id); } constexpr auto satLevelPair(ID i) -> std::array & { - return satLevelsPtr()[i.id]; + return datadeps.template get(i.id); } [[nodiscard]] constexpr auto satLevelPair(ID i) const - -> const std::array & { - return satLevelsPtr()[i.id]; + -> std::array { + return datadeps.template get(i.id); } - constexpr auto satLevel(ID i) -> uint8_t { - auto pair = satLevelPair(i); - return Dependence::satLevelMask(pair[0]); + [[nodiscard]] constexpr auto satLevel(ID i) const -> uint8_t { + return Dependence::satLevelMask(satLevelPair(i)[0]); } [[nodiscard]] constexpr auto isSat(ID i, unsigned depth) const -> uint8_t { - auto pair = satLevelPair(i); - return Dependence::satLevelMask(pair[0]) <= depth; + return Dependence::satLevelMask(satLevelPair(i)[0]) <= depth; + } + [[nodiscard]] constexpr auto isActive(ID i, unsigned depth) const -> uint8_t { + return Dependence::satLevelMask(satLevelPair(i)[0]) > depth; } - [[nodiscard]] constexpr auto isForward(ID i) const noexcept -> bool & { - unsigned cap = getCapacity(); - void *p = - data + sizeof(bool) * i.id + - ((sizeof(IR::Addr *) + sizeof(int32_t) + sizeof(math::Simplex *)) * 2 + - sizeof(std::array) + sizeof(DepPoly *)) * - cap; - return *static_cast(p); + [[nodiscard]] constexpr auto getMeta(ID i) noexcept -> uint8_t & { + return datadeps.template get(i.id); + } + [[nodiscard]] constexpr auto getMeta(ID i) const noexcept -> uint8_t { + return datadeps.template get(i.id); + } + [[nodiscard]] constexpr auto getPeel(ID i) noexcept -> uint8_t & { + return datadeps.template get(i.id); + } + [[nodiscard]] constexpr auto getPeel(ID i) const noexcept -> uint8_t { + return datadeps.template get(i.id); + } + [[nodiscard]] constexpr auto isForward(ID i) const noexcept -> bool { + return getMeta(i) & 1; } class Ref { - Dependencies *deps; - ID i; + Dependencies *deps_; + ID i_; public: - Ref(Dependencies *deps, ID i) : deps(deps), i(i) {} - operator Dependence() const { return deps->get(i); } + constexpr Ref(Dependencies *deps, ID i) : deps_(deps), i_(i) {} + operator Dependence() const { return deps_->get(i_); } auto operator=(Dependence d) -> Ref & { - deps->set(i, d); + deps_->set(i_, d); return *this; } }; - void check(Arena<> *alloc, NotNull x, NotNull y) { + void check(Arena<> *alloc, Valid x, Valid y) { // TODO: implement gcd test // if (x.gcdKnownIndependent(y)) return {}; DepPoly *dxy{DepPoly::dependence(alloc, x, y)}; if (!dxy) return; - invariant(x->getCurrentDepth(), dxy->getDim0()); - invariant(y->getCurrentDepth(), dxy->getDim1()); - invariant(x->getCurrentDepth() + y->getCurrentDepth(), - dxy->getNumPhiCoef()); + invariant(x->getCurrentDepth() == ptrdiff_t(dxy->getDim0())); + invariant(y->getCurrentDepth() == ptrdiff_t(dxy->getDim1())); + invariant(x->getCurrentDepth() + y->getCurrentDepth() == + ptrdiff_t(dxy->getNumPhiCoef())); // note that we set boundAbove=true, so we reverse the // dependence direction for the dependency we week, we'll // discard the program variables x then y - std::array, 2> pair(dxy->farkasPair(alloc)); + std::array, 2> pair(dxy->farkasPair(alloc)); if (dxy->getTimeDim()) timeCheck(alloc, dxy, x, y, pair); else timelessCheck(alloc, dxy, x, y, pair); } - inline void copyDependencies(Arena<> *alloc, IR::Addr *src, IR::Addr *dst); + inline void copyDependencies(IR::Addr *src, IR::Addr *dst); // reload store `x` - auto reload(Arena<> *alloc, NotNull store) -> NotNull { - NotNull dxy{DepPoly::self(alloc, store)}; - std::array, 2> pair(dxy->farkasPair(alloc)); - NotNull load = store->reload(alloc); - copyDependencies(alloc, store, load); + auto reload(Arena<> *alloc, Valid store) -> Valid { + Valid dxy{DepPoly::self(alloc, store)}; + std::array, 2> pair(dxy->farkasPair(alloc)); + Valid load = store->reload(alloc); + copyDependencies(store, load); if (dxy->getTimeDim()) timeCheck(alloc, dxy, store, load, pair, true); - else timelessCheck(alloc, dxy, store, load, pair, true); + else addOrdered(dxy, store, load, pair, true); return load; } [[nodiscard]] constexpr auto inputEdgeIDs(int32_t id) const { @@ -1046,96 +1062,210 @@ class Dependencies { [[nodiscard]] constexpr auto outputEdgeIDs(int32_t id) const { return utils::VForwardRange{outEdges(), id}; } + [[nodiscard]] constexpr auto getEdgeTransform() const { + auto f = [=, this](int32_t id) { return get(Dependence::ID{id}); }; + return std::views::transform(f); + } [[nodiscard]] constexpr auto inputEdges(int32_t id) const { - auto f = [this](int32_t id) { - Dependencies d = *this; - return d.get(ID{id}); - }; - return inputEdgeIDs(id) | std::views::transform(f); + return inputEdgeIDs(id) | getEdgeTransform(); } [[nodiscard]] constexpr auto outputEdges(int32_t id) const { - auto f = [this](int32_t id) { - Dependencies d = *this; - return d.get(Dependence::ID{id}); - }; - return outputEdgeIDs(id) | std::views::transform(f); + return outputEdgeIDs(id) | getEdgeTransform(); } - [[nodiscard]] constexpr auto activeFilter(unsigned depth) const { - auto f = [=](int32_t id) -> bool { - return !isSat(Dependence::ID{id}, depth); + [[nodiscard]] constexpr auto activeFilter(int depth) const { + auto f = [=, this](int32_t id) -> bool { + return isActive(Dependence::ID{id}, depth); }; return std::views::filter(f); } [[nodiscard]] constexpr auto inputAddrTransform() { - auto f = [=](int32_t id) { return input(Dependence::ID{id}); }; + auto f = [=, this](int32_t id) { return input(Dependence::ID{id}); }; return std::views::transform(f); } [[nodiscard]] constexpr auto outputAddrTransform() { - auto f = [=](int32_t id) { return output(Dependence::ID{id}); }; + auto f = [=, this](int32_t id) { return output(Dependence::ID{id}); }; return std::views::transform(f); } + [[nodiscard]] constexpr auto inputAddrTransform() const { + auto f = [=, this](int32_t id) { return input(Dependence::ID{id}); }; + return std::views::transform(f); + } + [[nodiscard]] constexpr auto outputAddrTransform() const { + auto f = [=, this](int32_t id) { return output(Dependence::ID{id}); }; + return std::views::transform(f); + } + /// this function essentially indicates that this dependency does not prevent + /// the hoisting of a memory access out of a loop, because a memory->register + /// transform is possible. + /// The requirements are that the `indexMatrix` match + [[nodiscard]] constexpr auto registerEligible(ID id) const -> bool { + /// If no repeated accesses across time, it can't be hoisted out + if (revTimeEdge(id) < 0) return false; + DensePtrMatrix inMat{input(id)->indexMatrix()}, + outMat{output(id)->indexMatrix()}; + ptrdiff_t numLoopsIn = ptrdiff_t(inMat.numCol()), + numLoopsOut = ptrdiff_t(outMat.numCol()), + numLoops = std::min(numLoopsIn, numLoopsOut); + if ((numLoopsIn != numLoopsOut) && + math::anyNEZero(numLoopsIn > numLoopsOut + ? inMat[_, _(numLoopsOut, numLoopsIn)] + : outMat[_, _(numLoopsIn, numLoopsOut)])) + return false; + return inMat[_, _(0, numLoops)] == outMat[_, _(0, numLoops)]; + } + [[nodiscard]] constexpr auto registerEligibleFilter() const { + auto f = [=, this](int32_t id) -> bool { + return registerEligible(Dependence::ID{id}); + }; + return std::views::filter(f); + } + /// NOTE: this method uses `in` and `out` to check for reorderability, as + /// these get rotated after the simplex solve, while the stored `DepPoly` and + /// simplices do not. + inline auto determinePeelDepth(IR::Loop *, int32_t) + -> utils::Optional; }; -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_destructible_v); } // namespace poly namespace IR { using poly::Dependencies; -inline auto Addr::inputEdges(Dependencies deps) const { +inline auto Addr::inputEdges(const Dependencies &deps) const { return deps.inputEdges(getEdgeIn()); } -inline auto Addr::outputEdges(Dependencies deps) const { +inline auto Addr::outputEdges(const Dependencies &deps) const { return deps.outputEdges(getEdgeOut()); } -inline auto Addr::inputEdgeIDs(Dependencies deps) const { +inline auto Addr::inputEdgeIDs(const Dependencies &deps) const + -> utils::VForwardRange { return deps.inputEdgeIDs(getEdgeIn()); } -inline auto Addr::outputEdgeIDs(Dependencies deps) const { +inline auto Addr::outputEdgeIDs(const Dependencies &deps) const + -> utils::VForwardRange { return deps.outputEdgeIDs(getEdgeOut()); } +inline auto Addr::inputEdgeIDs(const Dependencies &deps, int depth) const { + return inputEdgeIDs(deps) | deps.activeFilter(depth); +} +inline auto Addr::outputEdgeIDs(const Dependencies &deps, int depth) const { + return outputEdgeIDs(deps) | deps.activeFilter(depth); +} -inline auto IR::Addr::inputAddrs(Dependencies deps) const { +inline auto IR::Addr::inputAddrs(const Dependencies &deps) const { return inputEdgeIDs(deps) | deps.inputAddrTransform(); } -inline auto IR::Addr::outputAddrs(Dependencies deps) const { +inline auto IR::Addr::outputAddrs(const Dependencies &deps) const { return outputEdgeIDs(deps) | deps.outputAddrTransform(); } - -inline auto Addr::inputEdges(Dependencies deps, unsigned depth) const { - return inputEdgeIDs(deps) | deps.activeFilter(depth); +inline auto Addr::inputEdges(const Dependencies &deps, int depth) const { + return inputEdgeIDs(deps) | deps.activeFilter(depth) | + deps.getEdgeTransform(); } -inline auto Addr::outputEdges(Dependencies deps, unsigned depth) const { - return outputEdgeIDs(deps) | deps.activeFilter(depth); +inline auto Addr::outputEdges(const Dependencies &deps, int depth) const { + return outputEdgeIDs(deps) | deps.activeFilter(depth) | + deps.getEdgeTransform(); } - -inline auto IR::Addr::inputAddrs(Dependencies deps, unsigned depth) const { - return inputEdges(deps, depth) | deps.inputAddrTransform(); +inline auto IR::Addr::inputAddrs(const Dependencies &deps, int depth) const { + return inputEdgeIDs(deps, depth) | deps.inputAddrTransform(); } -inline auto IR::Addr::outputAddrs(Dependencies deps, unsigned depth) const { - return outputEdges(deps, depth) | deps.outputAddrTransform(); +inline auto IR::Addr::outputAddrs(const Dependencies &deps, int depth) const { + return outputEdgeIDs(deps, depth) | deps.outputAddrTransform(); +} +inline auto IR::Addr::unhoistableOutputs(const Dependencies &deps, + int depth) const { + return outputEdgeIDs(deps, depth) | deps.registerEligibleFilter() | + deps.outputAddrTransform(); +} + +/// Addr::operator->(const Dependencies& deps) +/// drop `this` from the graph, and remove it from `deps` +inline void IR::Addr::drop(Dependencies &deps) { + // NOTE: this doesn't get removed from the `origAddr` list/the addrChain + if (IR::Loop *L = getLoop(); L->getChild() == this) L->setChild(getNext()); + removeFromList(); + for (int32_t id : inputEdgeIDs(deps)) deps.removeEdge(Dependence::ID{id}); + for (int32_t id : outputEdgeIDs(deps)) deps.removeEdge(Dependence::ID{id}); } +using math::StridedVector; } // namespace IR namespace poly { -inline void Dependencies::copyDependencies(Arena<> *alloc, IR::Addr *src, - IR::Addr *dst) { +inline void Dependencies::copyDependencies(IR::Addr *src, IR::Addr *dst) { for (int32_t id : src->inputEdgeIDs(*this)) { IR::Addr *input = this->input(Dependence::ID{id}); if (input->isLoad()) continue; Dependence d = get(Dependence::ID{id}, input, dst); - addEdge(alloc, d); + addEdge(d); } for (int32_t id : src->outputEdgeIDs(*this)) { IR::Addr *output = this->output(Dependence::ID{id}); if (output->isLoad()) continue; Dependence d = get(Dependence::ID{id}, dst, output); - addEdge(alloc, d); + addEdge(d); } } +// returns `true` if this dependence can be reordered due to peelinng, `false` +// otherwise note that the associated loop itself may need scalarization, but +// subloop evaluations could be reorderable How would we capture +// dependencies/uses like +// int64_t x = 0; +// for (ptrdiff_t m = 0; m < M; ++m){ +// x += a[m]; +// b[m] = x; +// } +// we have `x +=` as a reassociable self-dependence, but the fact it is stored +// into `b[m]` means that we can't really reassociate, as each nominal +// intermediate value of `x` must be realized! +// We must check that there are no other reads. Note that this is represented as +// int64_t x[1]{}; +// for (ptrdiff_t m = 0; m < M; ++m){ +// x[0] = x[0] + a[m]; +// b[m] = x[0]; +// } +// So we have write->read dependence for the store `x[0] =` to the read in +// `b[m] = x[0]`. The key observation here is that `x[0]` has a time component; +// the violation occurs because we store in another location, providing a +// non-reassociable component. +inline auto Dependencies::determinePeelDepth(IR::Loop *L, int32_t id) + -> utils::Optional { + auto id_ = Dependence::ID{id}; + IR::Addr *in = input(id_), *out = output(id_); + // clang-format off + // If we have a dependency nested inside `L`, we won't be able to reorder if either + // a) that dependency's output is `in` + // b) that dependency's input is `out` + // as we'd then have to maintain the order of this loop level's evaluations with respect + // to the subloop. + // Otherwise, we check + // 1. If this dependency may be peeled. For this, it must + // a) be indexed by both `L` and a subloop of `L`. + // b) have an equality relation, so that it occurs for a single iteration fo the subloop. + // Then, we can split the subloop across this value, scalarizing around it. + // 2. Is this dependency reassociable? E.g., if it's connected by reassociable adds + // (such as integer adds, or floating point with the reassociable FMF), then mark it as such. + // clang-format on + // + // if (anyInteriorDependencies(L, in) || anyInteriorDependents(L, out)) + // return false; + // no inner dependence + PtrMatrix inInd = in->indexMatrix(), outInd = out->indexMatrix(); + invariant(inInd.numRow(), outInd.numRow()); + ptrdiff_t d = L->getCurrentDepth(); + invariant(inInd.numRow() >= d); + bool noInIndAtDepth = math::allZero(inInd[_, d]), + noOutIndAtDepth = math::allZero(outInd[_, d]); + if (noInIndAtDepth == noOutIndAtDepth) return -1; + // now, we want to find a loop that `in` depends on but `out` does not + // so that we can split over this loop. + // For now, to simplify codegen, we only accept the innermost non-zero + ptrdiff_t i = innermostNonZero(noInIndAtDepth ? inInd : outInd, d); + if (i >= 0) getPeel(id_) = i; + return i >= 0 ? utils::Optional{size_t(i)} + : utils::Optional{}; +} } // namespace poly } // namespace poly diff --git a/include/Polyhedra/DependencyPolyhedra.hpp b/include/Polyhedra/DependencyPolyhedra.hpp index df4c6f26d..1acdec1ab 100644 --- a/include/Polyhedra/DependencyPolyhedra.hpp +++ b/include/Polyhedra/DependencyPolyhedra.hpp @@ -1,24 +1,22 @@ - #pragma once #include "IR/Address.hpp" #include "Polyhedra/Loops.hpp" #include "Polyhedra/Polyhedra.hpp" #include "Support/OStream.hpp" +#include #include #include #include #include #include #include -#include #include #include #include #include #include #include -#include #include #include #include @@ -27,20 +25,21 @@ #include namespace poly::poly { +using math::shape; /// prints in current permutation order. /// TODO: decide if we want to make poly::Loop a `SymbolicPolyhedra` /// in which case, we have to remove `currentToOriginalPerm`, -/// which menas either change printing, or move prints `<<` into +/// which means either change printing, or move prints `<<` into /// the derived classes. inline auto printConstraints(std::ostream &os, DensePtrMatrix A, llvm::ArrayRef syms, bool inequality = true) -> std::ostream & { - const Row numConstraints = A.numRow(); - const unsigned numSyms = syms.size() + 1; - for (Row c = 0; c < numConstraints; ++c) { - printConstraint(os, A(c, _), numSyms, inequality); + Row numConstraints = A.numRow(); + unsigned numSyms = syms.size() + 1; + for (ptrdiff_t c = 0; c < numConstraints; ++c) { + printConstraint(os, A[c, _], numSyms, inequality); for (ptrdiff_t v = 1; v < numSyms; ++v) { - if (int64_t Acv = A(c, v)) { + if (int64_t Acv = A[c, v]) { os << (Acv > 0 ? " + " : " - "); Acv = math::constexpr_abs(Acv); if (Acv != 1) os << Acv << "*"; @@ -181,12 +180,12 @@ class DepPoly : public BasePolyhedra { constexpr void decrementNumConstraints() { invariant(numCon-- > 0); } constexpr auto getA() -> MutDensePtrMatrix { void *p = memory; - return {(int64_t *)p, math::DenseDims{numCon, getNumVar() + 1}}; + return {(int64_t *)p, math::DenseDims<>{{numCon}, {getNumVar() + 1}}}; } constexpr auto getE() -> MutDensePtrMatrix { void *p = memory; return {(int64_t *)p + size_t(conCapacity) * (getNumVar() + 1), - math::DenseDims{numEqCon, getNumVar() + 1}}; + math::DenseDims<>{{numEqCon}, {getNumVar() + 1}}}; } constexpr auto getNullStep() -> math::MutPtrVector { void *p = memory; @@ -211,28 +210,28 @@ class DepPoly : public BasePolyhedra { [[nodiscard]] auto getA() const -> DensePtrMatrix { const char *p = memory; return {const_cast(reinterpret_cast(p)), - math::DenseDims{numCon, getNumVar() + 1}}; + math::DenseDims<>{{numCon}, {getNumVar() + 1}}}; } - [[nodiscard]] auto getA(Row r, Col c) -> int64_t & { + [[nodiscard]] auto getA(Row<> r, Col<> c) -> int64_t & { auto *p = reinterpret_cast(memory); - return p[size_t(r) * (getNumVar() + 1) + size_t(c)]; + return p[ptrdiff_t(r) * (getNumVar() + 1) + ptrdiff_t(c)]; } - [[nodiscard]] auto getA(Row r, Col c) const -> int64_t { + [[nodiscard]] auto getA(Row<> r, Col<> c) const -> int64_t { const auto *p = reinterpret_cast(memory); - return p[size_t(r) * (getNumVar() + 1) + size_t(c)]; + return p[ptrdiff_t(r) * (getNumVar() + 1) + ptrdiff_t(c)]; } [[nodiscard]] auto getE() const -> DensePtrMatrix { const auto *p = reinterpret_cast(memory); return {const_cast(p + size_t(conCapacity) * (getNumVar() + 1)), - math::DenseDims{numEqCon, getNumVar() + 1}}; + math::DenseDims<>{numEqCon, getNumVar() + 1}}; } - [[nodiscard]] auto getE(Row r, Col c) -> int64_t & { + [[nodiscard]] auto getE(Row<> r, Col<> c) -> int64_t & { auto *p = reinterpret_cast(memory); - return p[(conCapacity + size_t(r)) * (getNumVar() + 1) + size_t(c)]; + return p[(conCapacity + ptrdiff_t(r)) * (getNumVar() + 1) + ptrdiff_t(c)]; } - [[nodiscard]] auto getE(Row r, Col c) const -> int64_t { + [[nodiscard]] auto getE(Row<> r, Col<> c) const -> int64_t { const auto *p = reinterpret_cast(memory); - return p[(conCapacity + size_t(r)) * (getNumVar() + 1) + size_t(c)]; + return p[(conCapacity + ptrdiff_t(r)) * (getNumVar() + 1) + ptrdiff_t(c)]; } [[nodiscard]] auto getNullStep() const -> PtrVector { const auto *p = reinterpret_cast(memory); @@ -249,56 +248,56 @@ class DepPoly : public BasePolyhedra { numDynSym}; } auto getSymbols(ptrdiff_t i) -> math::MutPtrVector { - return getA()(i, _(math::begin, getNumSymbols())); + return getA()[i, _(math::begin, getNumSymbols())]; } [[nodiscard]] auto getInEqSymbols(ptrdiff_t i) const -> PtrVector { - return getA()(i, _(math::begin, getNumSymbols())); + return getA()[i, _(math::begin, getNumSymbols())]; } [[nodiscard]] auto getEqSymbols(ptrdiff_t i) const -> PtrVector { - return getE()(i, _(math::begin, getNumSymbols())); + return getE()[i, _(math::begin, getNumSymbols())]; } [[nodiscard]] auto getCompTimeInEqOffset(ptrdiff_t i) const -> std::optional { - if (!allZero(getA()(i, _(1, getNumSymbols())))) return {}; - return getA()(i, 0); + if (!allZero(getA()[i, _(1, getNumSymbols())])) return {}; + return getA()[i, 0]; } [[nodiscard]] auto getCompTimeEqOffset(ptrdiff_t i) const -> std::optional { - if (!allZero(getE()(i, _(1, getNumSymbols())))) return {}; - return getE()(i, 0); + if (!allZero(getE()[i, _(1, getNumSymbols())])) return {}; + return getE()[i, 0]; } static constexpr auto findFirstNonEqual(PtrVector x, PtrVector y) -> ptrdiff_t { return std::distance( x.begin(), std::mismatch(x.begin(), x.end(), y.begin(), y.end()).first); } - static auto nullSpace(NotNull x, NotNull y) + static auto nullSpace(Valid x, Valid y) -> math::DenseMatrix { unsigned numLoopsCommon = findFirstNonEqual(x->getFusionOmega(), y->getFusionOmega()), xDim = x->getArrayDim(), yDim = y->getArrayDim(); - math::DenseMatrix A(math::DenseDims{numLoopsCommon, xDim + yDim}); + math::DenseMatrix A( + math::DenseDims<>{numLoopsCommon, xDim + yDim}); if (!numLoopsCommon) return A; // indMats cols are [outerMostLoop,...,innerMostLoop] PtrMatrix indMatX = x->indexMatrix(), indMatY = y->indexMatrix(); unsigned indDepth = std::min(x->getNaturalDepth(), y->getNaturalDepth()); for (ptrdiff_t i = 0; i < std::min(numLoopsCommon, indDepth); ++i) { - A(i, _(0, xDim)) << indMatX(_, i); - A(i, _(xDim, end)) << indMatY(_, i); + A[i, _(0, xDim)] << indMatX[_, i]; + A[i, _(xDim, end)] << indMatY[_, i]; } - for (ptrdiff_t i = indDepth; i < numLoopsCommon; ++i) A(i, _) << 0; + for (ptrdiff_t i = indDepth; i < numLoopsCommon; ++i) A[i, _] << 0; // returns rank x num loops return orthogonalNullSpace(std::move(A)); } - static auto nullSpace(NotNull x) - -> math::DenseMatrix { + static auto nullSpace(Valid x) -> math::DenseMatrix { unsigned numLoopsCommon = x->getCurrentDepth(), dim = x->getArrayDim(), natDepth = x->getNaturalDepth(); - math::DenseMatrix A(math::DenseDims{numLoopsCommon, dim}); + math::DenseMatrix A(math::DenseDims<>{numLoopsCommon, dim}); if (!numLoopsCommon) return A; // indMats cols are [outerMostLoop,...,innerMostLoop] - A(_(0, natDepth), _) << x->indexMatrix().transpose(); - if (natDepth < numLoopsCommon) A(_(natDepth, end), _) << 0; + A[_(0, natDepth), _] << x->indexMatrix().t(); + if (natDepth < numLoopsCommon) A[_(natDepth, end), _] << 0; // returns rank x num loops return orthogonalNullSpace(std::move(A)); } @@ -340,19 +339,19 @@ class DepPoly : public BasePolyhedra { ((conCapacity + eqConCapacity) * (getNumVar() + 1) + timeDim) + sizeof(const llvm::SCEV *) * numDynSym; } - auto copy(Arena<> *alloc) const -> NotNull { + auto copy(Arena<> *alloc) const -> Valid { auto *p = alloc->template allocate(neededBytes()); std::memcpy(p, this, neededBytes()); - return NotNull{p}; + return Valid{p}; } - static auto dependence(Arena<> *alloc, NotNull aix, - NotNull aiy) -> DepPoly * { + static auto dependence(Arena<> *alloc, Valid aix, + Valid aiy) -> DepPoly * { assert(aix->sizesMatch(aiy)); unsigned numDep0Var = aix->getCurrentDepth(), numDep1Var = aiy->getCurrentDepth(), numVar = numDep0Var + numDep1Var; - NotNull loopx = aix->getAffLoop(); - NotNull loopy = aiy->getAffLoop(); + Valid loopx = aix->getAffLoop(); + Valid loopy = aiy->getAffLoop(); PtrMatrix Ax{loopx->getOuterA(numDep0Var)}, Ay{loopy->getOuterA(numDep1Var)}; auto Sx{loopx->getSyms()}, Sy{loopy->getSyms()}; @@ -362,18 +361,18 @@ class DepPoly : public BasePolyhedra { invariant(Cx.numRow(), Cy.numRow()); invariant(Cx.numCol() <= numDep0Var); invariant(Cy.numCol() <= numDep1Var); - auto [nc0, nv0] = Ax.size(); - auto [nc1, nv1] = Ay.size(); + auto [nc0, nv0] = shape(Ax); + auto [nc1, nv1] = shape(Ay); math::Vector map; unsigned numDynSym = mergeMap(map, Sx, Sy); invariant(ptrdiff_t(map.size()), ptrdiff_t(Sy.size())); unsigned numSym = numDynSym + 1; math::DenseMatrix NS{nullSpace(aix, aiy)}; - unsigned timeDim = unsigned{NS.numRow()}, - numCols = numVar + timeDim + numDynSym + 1, - conCapacity = unsigned(Ax.numRow() + Ay.numRow()) + numVar, - eqConCapacity = unsigned(Cx.numRow()) + timeDim; + ptrdiff_t timeDim = ptrdiff_t{NS.numRow()}, + numCols = numVar + timeDim + numDynSym + 1, + conCapacity = ptrdiff_t(Ax.numRow() + Ay.numRow()) + numVar, + eqConCapacity = ptrdiff_t(Cx.numRow()) + timeDim; size_t memNeeded = sizeof(int64_t) * ((conCapacity + eqConCapacity) * numCols + timeDim) + @@ -385,10 +384,10 @@ class DepPoly : public BasePolyhedra { timeDim, conCapacity, eqConCapacity); // numDep1Var = nv1; - Row nc = nc0 + nc1; + ptrdiff_t nc = nc0 + nc1; unsigned indexDim{aix->getArrayDim()}; auto nullStep{dp->getNullStep()}; - for (ptrdiff_t i = 0; i < timeDim; ++i) nullStep[i] = selfDot(NS(i, _)); + for (ptrdiff_t i = 0; i < timeDim; ++i) nullStep[i] = norm2(NS[i, _]); // column meansing in in order // const size_t numSymbols = getNumSymbols(); auto A{dp->getA()}; @@ -399,38 +398,38 @@ class DepPoly : public BasePolyhedra { // E.resize(indexDim + nullDim, A.numCol()); // ma0 loop for (ptrdiff_t i = 0; i < nc0; ++i) { - A(i, _(0, 1 + Sx.size())) << Ax(i, _(0, 1 + Sx.size())); - A(i, _(numSym, numSym + numDep0Var)) - << Ax(i, _(1 + Sx.size(), 1 + Sx.size() + numDep0Var)); + A[i, _(0, 1 + Sx.size())] << Ax[i, _(0, 1 + Sx.size())]; + A[i, _(numSym, numSym + numDep0Var)] + << Ax[i, _(1 + Sx.size(), 1 + Sx.size() + numDep0Var)]; } for (ptrdiff_t i = 0; i < nc1; ++i) { - A(nc0 + i, 0) = Ay(i, 0); + A[nc0 + i, 0] = Ay[i, 0]; for (ptrdiff_t j = 0; j < map.size(); ++j) - A(nc0 + i, 1 + map[j]) = Ay(i, 1 + j); + A[nc0 + i, 1 + map[j]] = Ay[i, 1 + j]; for (ptrdiff_t j = 0; j < numDep1Var; ++j) - A(nc0 + i, j + numSym + numDep0Var) = Ay(i, j + 1 + Sy.size()); + A[nc0 + i, j + numSym + numDep0Var] = Ay[i, j + 1 + Sy.size()]; } - A(_(nc, end), _(numSym, numSym + numVar)).diag() << 1; + A[_(nc, end), _(numSym, numSym + numVar)].diag() << 1; // indMats are [outerMostLoop, ..., innerMostLoop] x arrayDim // offsetMats are arrayDim x numSymbols // E(i,:)* indVars = q[i] // e.g. i_0 + j_0 + off_0 = i_1 + j_1 + off_1 // i_0 + j_0 - i_1 - j_1 = off_1 - off_0 for (ptrdiff_t i = 0; i < indexDim; ++i) { - E(i, _(0, Ox.numCol())) << Ox(i, _); - E(i, _(0, Cx.numCol()) + numSym) << Cx(i, _); - E(i, 0) -= Oy(i, 0); - for (ptrdiff_t j = 0; j < Oy.numCol() - 1; ++j) - E(i, 1 + map[j]) -= Oy(i, 1 + j); - E(i, _(0, Cy.numCol()) + numSym + numDep0Var) << -Cy(i, _); + E[i, _(0, Ox.numCol())] << Ox[i, _]; + E[i, _(0, Cx.numCol()) + numSym] << Cx[i, _]; + E[i, 0] -= Oy[i, 0]; + for (ptrdiff_t j = 0, J = ptrdiff_t(Oy.numCol()) - 1; j < J; ++j) + E[i, 1 + map[j]] -= Oy[i, 1 + j]; + E[i, _(0, Cy.numCol()) + numSym + numDep0Var] << -Cy[i, _]; } for (ptrdiff_t i = 0; i < timeDim; ++i) { for (ptrdiff_t j = 0; j < NS.numCol(); ++j) { - int64_t nsij = NS(i, j); - E(indexDim + i, j + numSym) = nsij; - E(indexDim + i, j + numSym + numDep0Var) = -nsij; + int64_t nsij = NS[i, j]; + E[indexDim + i, j + numSym] = nsij; + E[indexDim + i, j + numSym + numDep0Var] = -nsij; } - E(indexDim + i, numSym + numVar + i) = 1; + E[indexDim + i, numSym + numVar + i] = 1; } dp->pruneBounds(*alloc); if (dp->getNumCon()) return dp; @@ -438,22 +437,21 @@ class DepPoly : public BasePolyhedra { return nullptr; } // self dependence - static auto self(Arena<> *alloc, NotNull ai) - -> NotNull { - NotNull loop = ai->getAffLoop(); + static auto self(Arena<> *alloc, Valid ai) -> Valid { + Valid loop = ai->getAffLoop(); unsigned numDepVar = ai->getCurrentDepth(), numVar = numDepVar + numDepVar; PtrMatrix B{loop->getOuterA(numDepVar)}; auto S{loop->getSyms()}; // numLoops x numDim PtrMatrix C{ai->indexMatrix()}, O{ai->offsetMatrix()}; - auto [nco, nv] = B.size(); + auto [nco, nv] = shape(B); math::DenseMatrix NS{nullSpace(ai)}; - unsigned numDynSym = S.size(), numSym = numDynSym + 1, - timeDim = unsigned{NS.numRow()}, - numCols = numVar + timeDim + numDynSym + 1, - conCapacity = unsigned(2 * B.numRow()) + numVar, - eqConCapacity = unsigned(C.numRow()) + timeDim; + ptrdiff_t numDynSym = ptrdiff_t(S.size()), numSym = numDynSym + 1, + timeDim = ptrdiff_t{NS.numRow()}, + numCols = numVar + timeDim + numDynSym + 1, + conCapacity = 2 * ptrdiff_t(B.numRow()) + numVar, + eqConCapacity = ptrdiff_t(C.numRow()) + timeDim; size_t memNeeded = sizeof(int64_t) * ((conCapacity + eqConCapacity) * numCols + timeDim) + @@ -464,10 +462,10 @@ class DepPoly : public BasePolyhedra { conCapacity, eqConCapacity); // numDep1Var = nv1; - Row nc = nco + nco; + ptrdiff_t nc = nco + nco; unsigned indexDim{ai->getArrayDim()}; auto nullStep{dp->getNullStep()}; - for (ptrdiff_t i = 0; i < timeDim; ++i) nullStep[i] = selfDot(NS(i, _)); + for (ptrdiff_t i = 0; i < timeDim; ++i) nullStep[i] = norm2(NS[i, _]); // column meansing in in order // const size_t numSymbols = getNumSymbols(); auto A{dp->getA()}; @@ -478,12 +476,12 @@ class DepPoly : public BasePolyhedra { // E.resize(indexDim + nullDim, A.numCol()); // ma0 loop for (ptrdiff_t i = 0; i < nco; ++i) { - for (ptrdiff_t j = 0; j < numSym; ++j) A(i + nco, j) = A(i, j) = B(i, j); + for (ptrdiff_t j = 0; j < numSym; ++j) A[i + nco, j] = A[i, j] = B[i, j]; for (ptrdiff_t j = 0; j < numDepVar; ++j) - A(i + nco, j + numSym + numDepVar) = A(i, j + numSym) = - B(i, j + numSym); + A[i + nco, j + numSym + numDepVar] = A[i, j + numSym] = + B[i, j + numSym]; } - A(_(nc, end), _(numSym, numSym + numVar)).diag() << 1; + A[_(nc, end), _(numSym, numSym + numVar)].diag() << 1; // L254: Assertion `col < numCol()` failed // indMats are [innerMostLoop, ..., outerMostLoop] x arrayDim // offsetMats are arrayDim x numSymbols @@ -492,18 +490,18 @@ class DepPoly : public BasePolyhedra { // i_0 + j_0 - i_1 - j_1 = off_1 - off_0 for (ptrdiff_t i = 0; i < indexDim; ++i) { for (ptrdiff_t j = 0; j < C.numCol(); ++j) { - int64_t Cji = C(i, j); - E(i, j + numSym) = Cji; - E(i, j + numSym + numDepVar) = -Cji; + int64_t Cji = C[i, j]; + E[i, j + numSym] = Cji; + E[i, j + numSym + numDepVar] = -Cji; } } for (ptrdiff_t i = 0; i < timeDim; ++i) { for (ptrdiff_t j = 0; j < NS.numCol(); ++j) { - int64_t nsij = NS(i, j); - E(indexDim + i, j + numSym) = nsij; - E(indexDim + i, j + numSym + numDepVar) = -nsij; + int64_t nsij = NS[i, j]; + E[indexDim + i, j + numSym] = nsij; + E[indexDim + i, j + numSym + numDepVar] = -nsij; } - E(indexDim + i, numSym + numVar + i) = 1; + E[indexDim + i, numSym + numVar + i] = 1; } dp->pruneBounds(*alloc); invariant(dp->getNumCon() > 0); @@ -524,7 +522,7 @@ class DepPoly : public BasePolyhedra { // // Time parameters are carried over into farkas polys [[nodiscard]] auto farkasPair(Arena<> *alloc) const - -> std::array, 2> { + -> std::array, 2> { auto A{getA()}, E{getE()}; const ptrdiff_t numEqualityConstraintsOld = ptrdiff_t(E.numRow()); @@ -551,28 +549,28 @@ class DepPoly : public BasePolyhedra { const ptrdiff_t numLambda = posEqEnd + numEqualityConstraintsOld; const ptrdiff_t numVarNew = numVarInterest + numLambda; invariant(ptrdiff_t(getNumLambda()), numLambda); - // std::array, 2> pair; - NotNull fw = + // std::array, 2> pair; + Valid fw = math::Simplex::create(alloc, numConstraintsNew, numVarNew, 0); // Simplex &fw(pair[0]); // fw.resize(numConstraintsNew, numVarNew + 1); auto fCF{fw->getConstraints()}; fCF << 0; - math::MutPtrMatrix fC{fCF(_, _(1, end))}; + math::MutPtrMatrix fC{fCF[_, _(1, end)]}; // fC(_, 0) << 0; - fC(0, 0) = 1; // lambda_0 - fC(_, _(1, 1 + numInequalityConstraintsOld)) - << A(_, _(math::begin, numConstraintsNew)).transpose(); - // fC(_, _(ineqEnd, posEqEnd)) = E.transpose(); - // fC(_, _(posEqEnd, numVarNew)) = -E.transpose(); + fC[0, 0] = 1; // lambda_0 + fC[_, _(1, 1 + numInequalityConstraintsOld)] + << A[_, _(math::begin, numConstraintsNew)].t(); + // fC(_, _(ineqEnd, posEqEnd)) = E.t(); + // fC(_, _(posEqEnd, numVarNew)) = -E.t(); // loading from `E` is expensive // NOTE: if optimizing expression templates, should also // go through and optimize loops like this for (ptrdiff_t j = 0; j < numConstraintsNew; ++j) { for (ptrdiff_t i = 0; i < numEqualityConstraintsOld; ++i) { - int64_t Eji = E(i, j); - fC(j, i + ineqEnd) = Eji; - fC(j, i + posEqEnd) = -Eji; + int64_t Eji = E[i, j]; + fC[j, i + ineqEnd] = Eji; + fC[j, i + posEqEnd] = -Eji; } } // schedule @@ -595,16 +593,16 @@ class DepPoly : public BasePolyhedra { // ... == w + u'*N + psi // -1 as we flip sign for (ptrdiff_t i = 0; i < numBoundingCoefs; ++i) - fC(i, i + numScheduleCoefs + numLambda) = -1; + fC[i, i + numScheduleCoefs + numLambda] = -1; // so far, both have been identical - NotNull bw = + Valid bw = math::Simplex::create(alloc, numConstraintsNew, numVarNew, 0); auto bCF{bw->getConstraints()}; bCF << fCF; // bCF(_, _(0, numVarNew + 1)) << fCF(_, _(0, numVarNew + 1)); - math::MutPtrMatrix bC{bCF(_, _(1, end))}; + math::MutPtrMatrix bC{bCF[_, _(1, end)]}; // equality constraints get expanded into two inequalities // a == 0 -> @@ -617,14 +615,14 @@ class DepPoly : public BasePolyhedra { // so that the ILP rLexMin on coefficients // will tend to preserve the initial order (which is // better than tending to reverse the initial order). - fC(0, numLambda) = 1; - fC(0, 1 + numLambda) = -1; - bC(0, numLambda) = -1; - bC(0, 1 + numLambda) = 1; + fC[0, numLambda] = 1; + fC[0, 1 + numLambda] = -1; + bC[0, numLambda] = -1; + bC[0, 1 + numLambda] = 1; for (ptrdiff_t i = 0; i < numPhiCoefs; ++i) { int64_t s = (2 * (i < numDep0Var) - 1); - fC(i + numBoundingCoefs, i + numLambda + 2) = s; - bC(i + numBoundingCoefs, i + numLambda + 2) = -s; + fC[i + numBoundingCoefs, i + numLambda + 2] = s; + bC[i + numBoundingCoefs, i + numLambda + 2] = -s; } // note that delta/constant coef is handled as last `s` return {fw, bw}; @@ -632,9 +630,9 @@ class DepPoly : public BasePolyhedra { /// returns `true` if the array accesses are guaranteed independent /// conditioning on partial schedules xPhi and yPhi - [[nodiscard]] auto checkSat(Arena<> alloc, NotNull xLoop, + [[nodiscard]] auto checkSat(Arena<> alloc, Valid xLoop, const int64_t *xOff, DensePtrMatrix xPhi, - NotNull yLoop, + Valid yLoop, const int64_t *yOff, DensePtrMatrix yPhi) -> bool { // we take in loops because we might be moving deeper inside the loopnest @@ -642,42 +640,42 @@ class DepPoly : public BasePolyhedra { Row numPhi = xPhi.numRow(); invariant(yPhi.numRow(), numPhi); DensePtrMatrix E{getE()}; - unsigned xNumLoops = unsigned(xPhi.numCol()), - yNumLoops = unsigned(yPhi.numCol()); - if ((numDep0Var == xNumLoops) || allZero(xPhi(_, _(numDep0Var, end)))) + ptrdiff_t xNumLoops = ptrdiff_t(xPhi.numCol()), + yNumLoops = ptrdiff_t(yPhi.numCol()); + if ((numDep0Var == xNumLoops) || allZero(xPhi[_, _(numDep0Var, end)])) xNumLoops = numDep0Var; else invariant(numDep0Var < xNumLoops); - if ((numDep1Var == yNumLoops) || allZero(yPhi(_, _(numDep1Var, end)))) + if ((numDep1Var == yNumLoops) || allZero(yPhi[_, _(numDep1Var, end)])) yNumLoops = numDep1Var; else invariant(numDep1Var < yNumLoops); unsigned numSym = getNumSymbols(), numSymX = numSym + xNumLoops, numSymD0 = numSym + numDep0Var, nCol = numSymX + yNumLoops; MutDensePtrMatrix B{ - matrix(&alloc, numEqCon + numPhi, nCol)}; + matrix(&alloc, numEqCon + ptrdiff_t(numPhi), nCol)}; bool extend = (numDep0Var != xNumLoops) || (numDep1Var != yNumLoops); // we truncate time dim if (extend || timeDim) { for (ptrdiff_t r = 0; r < numEqCon; ++r) { - B(r, _(0, numSymD0)) << E(r, _(0, numSymD0)); - B(r, _(numDep0Var, xNumLoops) + numSym) << 0; - B(r, _(0, numDep1Var) + numSymX) << E(r, _(0, numDep1Var) + numSymD0); - B(r, _(numDep1Var, yNumLoops) + numSymX) << 0; + B[r, _(0, numSymD0)] << E[r, _(0, numSymD0)]; + B[r, _(numDep0Var, xNumLoops) + numSym] << 0; + B[r, _(0, numDep1Var) + numSymX] << E[r, _(0, numDep1Var) + numSymD0]; + B[r, _(numDep1Var, yNumLoops) + numSymX] << 0; } } else std::copy_n(E.begin(), E.numRow() * E.numCol(), B.begin()); if (xOff) for (ptrdiff_t c = 0; c < numDep0Var; ++c) if (int64_t mlt = xOff[c]) - B(_(0, numEqCon), 0) -= mlt * B(_(0, numEqCon), numSym + c); + B[_(0, numEqCon), 0] -= mlt * B[_(0, numEqCon), numSym + c]; if (yOff) for (ptrdiff_t c = 0; c < numDep1Var; ++c) if (int64_t mlt = yOff[c]) - B(_(0, numEqCon), 0) -= mlt * B(_(0, numEqCon), numSymX + c); + B[_(0, numEqCon), 0] -= mlt * B[_(0, numEqCon), numSymX + c]; for (ptrdiff_t r = 0; r < numPhi; ++r) { - B(r + numEqCon, _(0, numSym)) << 0; - B(r + numEqCon, _(0, xNumLoops) + numSym) << xPhi(r, _(0, xNumLoops)); - B(r + numEqCon, _(0, yNumLoops) + numSymX) << -yPhi(r, _(0, yNumLoops)); + B[r + numEqCon, _(0, numSym)] << 0; + B[r + numEqCon, _(0, xNumLoops) + numSym] << xPhi[r, _(0, xNumLoops)]; + B[r + numEqCon, _(0, yNumLoops) + numSymX] << -yPhi[r, _(0, yNumLoops)]; } - unsigned rank = unsigned(math::NormalForm::simplifySystemImpl(B)); + unsigned rank = ptrdiff_t(math::NormalForm::simplifySystemImpl(B)); if (rank <= numEqCon) return false; unsigned numConstraints = extend ? (xLoop->getNumCon() + xNumLoops + yLoop->getNumCon() + yNumLoops) @@ -699,29 +697,29 @@ class DepPoly : public BasePolyhedra { // numSyms should be the same; we aren't pruning symbols invariant(numSym, 1 + nDS); for (ptrdiff_t r = 0; r < xCon; ++r) { - A(r, _(0, xNumSym)) << Ax(r, _(0, xNumSym)); - A(r, _(xNumSym, numSym)) << 0; - A(r, _(0, xNumLoops) + numSym) << Ax(r, _(0, xNumLoops) + xNumSym); - A(r, _(0, yNumLoops) + numSymX) << 0; + A[r, _(0, xNumSym)] << Ax[r, _(0, xNumSym)]; + A[r, _(xNumSym, numSym)] << 0; + A[r, _(0, xNumLoops) + numSym] << Ax[r, _(0, xNumLoops) + xNumSym]; + A[r, _(0, yNumLoops) + numSymX] << 0; } for (ptrdiff_t r = 0; r < yCon; ++r) { - A(r + xCon, _(0, numSym)) << 0; + A[r + xCon, _(0, numSym)] << 0; for (ptrdiff_t j = 0; j < map.size(); ++j) - A(r + xCon, 1 + map[j]) = Ay(r, 1 + j); - A(r + xCon, _(0, xNumLoops) + numSym) << 0; - A(r + xCon, _(0, yNumLoops) + numSymX) - << Ay(r, _(0, yNumLoops) + yNumSym); + A[r + xCon, 1 + map[j]] = Ay[r, 1 + j]; + A[r + xCon, _(0, xNumLoops) + numSym] << 0; + A[r + xCon, _(0, yNumLoops) + numSymX] + << Ay[r, _(0, yNumLoops) + yNumSym]; } std::fill(A.begin() + size_t(xCon + yCon) * nCol, A.end(), 0); - A(_(0, nLoop) + (xCon + yCon), _(0, nLoop) + numSym).diag() << 1; - } else dp->getA() << getA()(_, _(0, nCol)); // truncate time + A[_(0, nLoop) + (xCon + yCon), _(0, nLoop) + numSym].diag() << 1; + } else dp->getA() << getA()[_, _(0, nCol)]; // truncate time if (xOff) for (ptrdiff_t c = 0; c < xNumLoops; ++c) - if (int64_t mlt = xOff[c]) A(_, 0) -= mlt * A(_, numSym + c); + if (int64_t mlt = xOff[c]) A[_, 0] -= mlt * A[_, numSym + c]; if (yOff) for (ptrdiff_t c = 0; c < yNumLoops; ++c) - if (int64_t mlt = yOff[c]) A(_, 0) -= mlt * A(_, numSymX + c); - dp->getE() << B(_(0, rank), _); + if (int64_t mlt = yOff[c]) A[_, 0] -= mlt * A[_, numSymX + c]; + dp->getE() << B[_(0, rank), _]; dp->pruneBounds(alloc); return dp->getNumCon() == 0; } diff --git a/include/Polyhedra/Loops.hpp b/include/Polyhedra/Loops.hpp index 970d90c1f..7c681d9a6 100644 --- a/include/Polyhedra/Loops.hpp +++ b/include/Polyhedra/Loops.hpp @@ -1,20 +1,20 @@ #pragma once +#include "Containers/Pair.hpp" #include "Polyhedra/Comparators.hpp" #include "Polyhedra/Polyhedra.hpp" #include "RemarkAnalysis.hpp" +#include #include #include #include #include #include #include -#include #include #include #include #include -#include #include #include #include @@ -36,7 +36,7 @@ namespace poly::poly { using math::IntMatrix, math::PtrVector, math::PtrMatrix, math::MutPtrMatrix; -using utils::Optional, utils::NotNull, utils::invariant; +using utils::Optional, utils::Valid, utils::invariant; inline auto isKnownOne(llvm::ScalarEvolution &SE, llvm::Value *v) -> bool { return v && SE.getSCEV(v)->isOne(); } @@ -113,26 +113,26 @@ findSymbolicIndex(llvm::ArrayRef symbols, [[nodiscard]] inline auto getMinMaxValueSCEV(llvm::ScalarEvolution &SE, const llvm::SCEVAddRecExpr *S) - -> std::pair { + -> containers::Pair { // if (!SE.containsAddRecurrence(S)) // return S; - if ((!S) || (!(S->isAffine()))) return std::make_pair(S, S); + if ((!S) || (!(S->isAffine()))) return {S, S}; const auto *opStart = S->getStart(); const auto *opStep = S->getStepRecurrence(SE); const auto *opFinal = SE.getSCEVAtScope(S, nullptr); // auto opFinal = SE.getSCEVAtScope(S, S->getLoop()->getParentLoop()); // FIXME: what if there are more AddRecs nested inside? - if (SE.isKnownNonNegative(opStep)) return std::make_pair(opStart, opFinal); - if (SE.isKnownNonPositive(opStep)) return std::make_pair(opFinal, opStart); - return std::make_pair(S, S); + if (SE.isKnownNonNegative(opStep)) return {opStart, opFinal}; + if (SE.isKnownNonPositive(opStep)) return {opFinal, opStart}; + return {S, S}; } // TODO: strengthen through recursion [[nodiscard]] inline auto getMinMaxValueSCEV(llvm::ScalarEvolution &SE, const llvm::SCEV *S) - -> std::pair { + -> containers::Pair { if (const auto *T = llvm::dyn_cast(S)) return getMinMaxValueSCEV(SE, T); - return std::make_pair(S, S); + return {S, S}; } [[nodiscard]] inline auto simplifyMinMax(llvm::ScalarEvolution &SE, const llvm::SCEVMinMaxExpr *S) @@ -168,22 +168,22 @@ namespace loopNestCtor { /// we try to break down value `v`, so that adding /// N, N - 1, N - 3 only adds the variable `N`, and adds the constant /// offsets -inline void addSymbol(IntMatrix &A, +inline void addSymbol(IntMatrix> &A, llvm::SmallVectorImpl &symbols, const llvm::SCEV *v, math::Range lu, int64_t mlt) { assert(lu.size()); symbols.push_back(v); - A.resize(A.numCol() + 1); - A(lu, symbols.size()) << mlt; + A.resize(++auto{A.numCol()}); + A[lu, symbols.size()] << mlt; } inline auto addRecMatchesLoop(const llvm::SCEV *S, llvm::Loop *L) -> bool { if (const auto *x = llvm::dyn_cast(S)) return x->getLoop() == L; return false; } -[[nodiscard]] inline auto -addSymbol(std::array &AB, // NOLINT(misc-no-recursion) +[[nodiscard]] inline auto // NOLINTNEXTLINE(misc-no-recursion) +addSymbol(std::array>, 2> &AB, llvm::SmallVectorImpl &symbols, llvm::Loop *L, const llvm::SCEV *v, llvm::ScalarEvolution &SE, math::Range lu, int64_t mlt, ptrdiff_t minDepth) @@ -191,11 +191,11 @@ addSymbol(std::array &AB, // NOLINT(misc-no-recursion) auto &[A, B] = AB; // first, we check if `v` in `Symbols` if (ptrdiff_t i = findSymbolicIndex(symbols, v)) { - A(lu, i) += mlt; + A[lu, i] += mlt; return minDepth; } if (std::optional c = getConstantInt(v)) { - A(lu, 0) += mlt * (*c); + A[lu, 0] += mlt * (*c); return minDepth; } if (const auto *ar = llvm::dyn_cast(v)) { @@ -221,8 +221,7 @@ addSymbol(std::array &AB, // NOLINT(misc-no-recursion) minDepth = addSymbol(AB, symbols, L, x->getOperand(0), SE, lu, mlt, minDepth); if (auto opc = getConstantInt(x->getOperand(1))) { - // swap order vs recDepth to go inner<->outer - B(lu, recDepth - 1) << mlt * (*opc); + B[lu, recDepth - 1] << mlt * (*opc); return minDepth; } v = SE.getAddRecExpr(SE.getZero(x->getOperand(0)->getType()), @@ -242,11 +241,11 @@ addSymbol(std::array &AB, // NOLINT(misc-no-recursion) const llvm::SCEV *op1 = mm->getOperand(1); if (isMin ^ (mlt < 0)) { // we can represent this as additional constraints Row M = A.numRow(); - Row Mp = M + std::ssize(lu); + Row Mp = Row<>{ptrdiff_t(M) + std::ssize(lu)}; A.resize(Mp); B.resize(Mp); - A(_(M, Mp), _) = A(lu, _); - B(_(M, Mp), _) = B(lu, _); + A[_(M, Mp), _] = A[lu, _]; + B[_(M, Mp), _] = B[lu, _]; minDepth = addSymbol(AB, symbols, L, op0, SE, lu, mlt, minDepth); minDepth = addSymbol(AB, symbols, L, op1, SE, _(M, Mp), mlt, minDepth); } else if (addRecMatchesLoop(op0, L)) { @@ -260,29 +259,29 @@ addSymbol(std::array &AB, // NOLINT(misc-no-recursion) return minDepth; } inline auto -areSymbolsLoopInvariant(IntMatrix &A, +areSymbolsLoopInvariant(IntMatrix> &A, llvm::SmallVectorImpl &symbols, llvm::Loop *L, llvm::ScalarEvolution &SE) -> bool { for (ptrdiff_t i = 0; i < std::ssize(symbols); ++i) - if ((!allZero(A(_, i + 1))) && (!SE.isLoopInvariant(symbols[i], L))) + if ((!allZero(A[_, i + 1])) && (!SE.isLoopInvariant(symbols[i], L))) return false; return true; } inline auto // NOLINTNEXTLINE(misc-no-recursion) -addBackedgeTakenCount(std::array &AB, +addBackedgeTakenCount(std::array>, 2> &AB, llvm::SmallVectorImpl &symbols, llvm::Loop *L, const llvm::SCEV *BT, llvm::ScalarEvolution &SE, ptrdiff_t minDepth, llvm::OptimizationRemarkEmitter *ORE) -> ptrdiff_t { // A contains syms auto &[A, B] = AB; - Row M = A.numRow(); - A.resize(M + 1); - B.resize(M + 1); - minDepth = addSymbol(AB, symbols, L, BT, SE, _(M, M + 1), 1, minDepth); + Row M = A.numRow(), MM = M; + A.resize(++MM); + B.resize(MM); + minDepth = addSymbol(AB, symbols, L, BT, SE, _(M, MM), 1, minDepth); assert(A.numRow() == B.numRow()); ptrdiff_t depth = L->getLoopDepth() - 1; - for (auto m = ptrdiff_t(M); m < A.numRow(); ++m) B(m, depth) = -1; // indvar + for (auto m = ptrdiff_t(M); m < A.numRow(); ++m) B[m, depth] = -1; // indvar // recurse, if possible to add an outer layer if (llvm::Loop *P = L->getParentLoop()) { if (areSymbolsLoopInvariant(A, symbols, P, SE)) { @@ -360,19 +359,19 @@ class Loop : public BasePolyhedra { static inline auto construct(Arena<> *alloc, llvm::Loop *L, const llvm::SCEV *BT, llvm::ScalarEvolution &SE, llvm::OptimizationRemarkEmitter *ORE = nullptr) - -> NotNull { + -> Valid { // A holds symbols // B holds loop bounds // they're separate so we can grow them independently - std::array AB; + std::array>, 2> AB; auto &[A, B] = AB; // once we're done assembling these, we'll concatenate A and B unsigned maxDepth = L->getLoopDepth(); invariant(maxDepth > 0); // ptrdiff_t maxNumSymbols = BT->getExpressionSize(); A.resizeForOverwrite( - math::StridedDims{0, 1, unsigned(1) + BT->getExpressionSize()}); - B.resizeForOverwrite(math::StridedDims{0, maxDepth, maxDepth}); + math::StridedDims<>{{0}, {1}, {ptrdiff_t(1) + BT->getExpressionSize()}}); + B.resizeForOverwrite(math::StridedDims<>{{0}, {maxDepth}, {maxDepth}}); llvm::SmallVector symbols; ptrdiff_t minDepth = loopNestCtor::addBackedgeTakenCount(AB, symbols, L, BT, SE, 0, ORE); @@ -387,7 +386,7 @@ class Loop : public BasePolyhedra { // search B(_,d) for references for (ptrdiff_t i = 0; i < B.numRow(); ++i) { // TODO; confirm `last` vs `end` - if (int64_t Bid = B(i, d)) { + if (int64_t Bid = B[i, d]) { if (!P) { // find P P = L; for (ptrdiff_t r = d + 1; r < maxDepth; ++r) P = P->getParentLoop(); @@ -404,20 +403,45 @@ class Loop : public BasePolyhedra { } invariant(1 + std::ssize(symbols), ptrdiff_t(A.numCol())); ptrdiff_t depth = maxDepth - minDepth; - unsigned numConstraints = unsigned(A.numRow()), N = unsigned(A.numCol()); - NotNull aln{ + ptrdiff_t numConstraints = ptrdiff_t(A.numRow()), N = ptrdiff_t(A.numCol()); + Valid aln{ Loop::allocate(alloc, L, numConstraints, depth, symbols, maxDepth)}; - aln->getA()(_, _(0, N)) << A; + aln->getA()[_, _(0, N)] << A; // copy the included loops from B // we use outer <-> inner order, so we skip unsupported outer loops. - aln->getA()(_, _(N, N + depth)) << B(_, _(end - depth, end)); + aln->getA()[_, _(N, N + depth)] << B[_, _(end - depth, end)]; return aln; // addZeroLowerBounds(); // NOTE: pruneBounds() is not legal here if we wish to use // removeInnerMost later. // pruneBounds(); } - + /// Gives a very rough trip count estimate (second return value) + /// with a boolean fist arg indicating whether it is exact or estimated. + /// The estimation approach here can be seriously improved. + /// Currently, if not exact, it simply returns 128. + [[nodiscard]] auto tripCount(ptrdiff_t depth) const + -> std::array { + auto A{getA()}; + // `i` is position of depth's indvar + ptrdiff_t i = 1 + numDynSymbols + depth, j = -1, k = -1; + // `A * loopindvars >= 0` + // Aci >= 0 is a lower bound + // Aci <= 0 is an upper bound + for (ptrdiff_t c = 0; c < A.numRow(); ++c) { + int64_t Aci = A[c, i]; + if (Aci > 0) { + if ((j >= 0) || (!math::allZero(A[c, _(1, i)]))) return {0, 128}; + j = c; + } else if (Aci < 0) { + if ((k >= 0) || (!math::allZero(A[c, _(1, i)]))) return {0, 128}; + k = c; + } + } + invariant(j >= 0); // must have lower bound + invariant(k >= 0); // must have upper bound + return {1, std::min(0xffff, A[k, 0] - A[j, 0])}; + } auto findIndex(const llvm::SCEV *v) const -> ptrdiff_t { return findSymbolicIndex(getSyms(), v); } @@ -431,9 +455,8 @@ class Loop : public BasePolyhedra { /// offset the loops by `offsets`, e.g. if we have /// offsets[0] = 2, then the first loop is shifted by 2. /// this shifting is applied before rotation. - [[nodiscard]] constexpr auto rotate(Arena<> *alloc, DensePtrMatrix R, - const int64_t *offsets) const - -> NotNull { + [[nodiscard]] auto rotate(Arena<> *alloc, DensePtrMatrix R, + const int64_t *offsets) const -> Valid { // if offsets is not null, we have the equivalent of // A * O * [I 0; 0 R] // where O = I - [0 0; offsets 0], @@ -442,21 +465,21 @@ class Loop : public BasePolyhedra { bool thisNonNeg = isNonNegative(), nonNeg = thisNonNeg && allGEZero(R), addExtra = thisNonNeg != nonNeg; if (addExtra) numExtraVar = getNumLoops(); - invariant(unsigned(R.numCol()), getNumLoops()); - invariant(unsigned(R.numRow()), getNumLoops()); + invariant(ptrdiff_t(R.numCol()), getNumLoops()); + invariant(ptrdiff_t(R.numRow()), getNumLoops()); auto A{getA()}; - const auto [M, N] = A.size(); + const auto [M, N] = shape(A); auto syms{getSyms()}; - NotNull aln{Loop::allocate(alloc, L, ptrdiff_t(M) + numExtraVar, - numLoops, syms, nonNeg)}; + Valid aln{Loop::allocate(alloc, L, ptrdiff_t(M) + numExtraVar, + numLoops, syms, nonNeg)}; auto B{aln->getA()}; - invariant(B.numRow(), M + numExtraVar); - invariant(B.numCol(), N); - B(_(0, M), _(0, numConst)) << A(_, _(0, numConst)); - B(_(0, M), _(numConst, end)) << A(_, _(numConst, end)) * R; + invariant(B.numRow() == M + numExtraVar); + invariant(B.numCol() == N); + B[_(0, M), _(0, numConst)] << A[_, _(0, numConst)]; + B[_(0, M), _(numConst, end)] << A[_, _(numConst, end)] * R; if (addExtra) { - B(_(M, end), _(0, numConst)) << 0; - B(_(M, end), _(numConst, end)) << R; + B[_(M, end), _(0, numConst)] << 0; + B[_(M, end), _(numConst, end)] << R; } // A * O * [I 0; 0 R] = A * [I 0; 0 R] - A * [0 0; offs 0] * [I 0; 0 R] // above, we computed `A * [I 0; 0 R]`, now if offsets != nullptr, @@ -470,8 +493,8 @@ class Loop : public BasePolyhedra { if (offsets) { for (ptrdiff_t l = 0, D = getNumLoops(); l < D; ++l) { if (int64_t mlt = offsets[l]) { - B(_(0, M), 0) -= mlt * A(_, numConst + l); - if (addExtra) B(M + l, 0) = -mlt; + B[_(0, M), 0] -= mlt * A[_, numConst + l]; + if (addExtra) B[M + l, 0] = -mlt; } } } @@ -479,30 +502,30 @@ class Loop : public BasePolyhedra { return aln; } [[nodiscard]] constexpr auto rotate(Arena<> *alloc, DensePtrMatrix R, - const int64_t *offsets) -> NotNull { + const int64_t *offsets) -> Valid { if (R == math::I) return this; return ((const Loop *)this)->rotate(alloc, R, offsets); } - [[nodiscard]] auto removeInnerMost(Arena<> *alloc) const -> NotNull { + [[nodiscard]] auto removeInnerMost(Arena<> *alloc) const -> Valid { // order is outer<->inner auto A{getA()}; - auto ret = Loop::allocate(alloc, L->getParentLoop(), unsigned(A.numRow()), + auto ret = Loop::allocate(alloc, L->getParentLoop(), ptrdiff_t(A.numRow()), getNumLoops() - 1, getSyms(), isNonNegative()); MutPtrMatrix B{ret->getA()}; - B << A(_, _(0, last)); + B << A[_, _(0, last)]; // no loop may be conditioned on the innermost loop, so we should be able to // safely remove all constraints that reference it for (Row m = B.numRow(); m--;) { - if (A(m, last)) { - if (m != B.numRow() - 1) B(m, _) << B(last, _); - B.truncate(B.numRow() - 1); + if (A[m, last]) { + if (m != --auto{B.numRow()}) B[m, _] << B[last, _]; + B.truncate(--B.numRow()); } } - ret->truncateConstraints(unsigned(B.numRow())); + ret->truncateConstraints(ptrdiff_t(B.numRow())); return ret; } - constexpr void truncateConstraints(unsigned newNumConstraints) { + constexpr void truncateConstraints(ptrdiff_t newNumConstraints) { assert(newNumConstraints <= numConstraints); numConstraints = newNumConstraints; } @@ -549,34 +572,33 @@ class Loop : public BasePolyhedra { ptrdiff_t M = numConstraints; numConstraints += numLoops; auto A{getA()}; - A(_(M, end), _) << 0; - for (ptrdiff_t i = 0; i < numLoops; ++i) A(M + i, end - numLoops + i) = 1; + A[_(M, end), _] << 0; + for (ptrdiff_t i = 0; i < numLoops; ++i) A[M + i, end - numLoops + i] = 1; // this->pruneBounds(alloc); } [[nodiscard]] constexpr auto getProgVars(ptrdiff_t j) const -> PtrVector { - return getA()(j, _(0, getNumSymbols())); + return getA()[j, _(0, getNumSymbols())]; } - [[nodiscard]] constexpr auto copy(Arena<> *alloc) const -> NotNull { + [[nodiscard]] auto copy(Arena<> *alloc) const -> Valid { auto ret = Loop::allocate(alloc, L, numConstraints, numLoops, getSyms(), isNonNegative()); ret->getA() << getA(); return ret; } - [[nodiscard]] constexpr auto removeLoop(Arena<> *alloc, ptrdiff_t v) const - -> Loop * { + [[nodiscard]] auto removeLoop(Arena<> *alloc, ptrdiff_t v) const -> Loop * { auto A{getA()}; v += getNumSymbols(); - auto zeroNegPos = indsZeroNegPos(A(_, v)); + auto zeroNegPos = indsZeroNegPos(A[_, v]); auto &[zer, neg, pos] = zeroNegPos; - unsigned numCon = - unsigned(A.numRow()) - pos.size() + neg.size() * pos.size(); + ptrdiff_t numCon = + ptrdiff_t(A.numRow()) - pos.size() + neg.size() * pos.size(); if (!isNonNegative()) numCon -= neg.size(); auto p = checkpoint(alloc); auto ret = Loop::allocate(alloc, nullptr, numCon, numLoops - 1, getSyms(), isNonNegative()); - ret->numConstraints = unsigned( + ret->numConstraints = ptrdiff_t( isNonNegative() ? fourierMotzkinCore(ret->getA(), getA(), v, zeroNegPos) : fourierMotzkinCore(ret->getA(), getA(), v, zeroNegPos)); @@ -590,7 +612,7 @@ class Loop : public BasePolyhedra { return ret; } constexpr void eraseConstraint(ptrdiff_t c) { - eraseConstraintImpl(getA(), c); + eraseConstraintImpl(getA(), Row<>{c}); --numConstraints; } [[nodiscard]] auto zeroExtraItersUponExtending(Arena<> alloc, ptrdiff_t _i, @@ -610,23 +632,23 @@ class Loop : public BasePolyhedra { const ptrdiff_t numConst = getNumSymbols(); auto A{tmp->getA()}; for (ptrdiff_t n = 0; n < A.numRow(); ++n) - if ((A(n, numConst) != 0) && (A(n, 1 + numConst) != 0)) indep = false; + if ((A[n, numConst] != 0) && (A[n, 1 + numConst] != 0)) indep = false; if (indep) return false; Loop *margi = tmp->removeLoop(&alloc, 1), *tmp2; - invariant(margi->getNumLoops(), unsigned(1)); - invariant(tmp->getNumLoops(), unsigned(2)); - invariant(margi->getA().numCol() + 1, tmp->getA().numCol()); + invariant(margi->getNumLoops(), ptrdiff_t(1)); + invariant(tmp->getNumLoops(), ptrdiff_t(2)); + invariant(++auto{margi->getA().numCol()}, tmp->getA().numCol()); // margi contains extrema for `_i` // we can substitute extended for value of `_i` // in `tmp` auto p2 = alloc.checkpoint(); int64_t sign = 2 * extendLower - 1; // extendLower ? 1 : -1 for (ptrdiff_t c = 0; c < margi->getNumInequalityConstraints(); ++c) { - int64_t b = sign * margi->getA()(c, numConst); + int64_t b = sign * margi->getA()[c, numConst]; if (b <= 0) continue; alloc.rollback(p2); tmp2 = tmp->copy(&alloc); - invariant(tmp2->getNumLoops(), unsigned(2)); + invariant(tmp2->getNumLoops(), ptrdiff_t(2)); invariant(margi->getNumLoops() + 1, tmp2->getNumLoops()); // increment to increase bound // this is correct for both extending lower and extending upper @@ -634,18 +656,18 @@ class Loop : public BasePolyhedra { // upper: a'x - i + b >= 0 -> i <= a'x + b // to decrease the lower bound or increase the upper, we increment // `b` - ++(margi->getA())(c, 0); + ++(margi->getA())[c, 0]; // our approach here is to set `_i` equal to the extended bound // and then check if the resulting polyhedra is empty. // if not, then we may have >0 iterations. for (ptrdiff_t cc = 0; cc < tmp2->getNumCon(); ++cc) { - if (int64_t d = tmp2->getA()(cc, numConst)) { - tmp2->getA()(cc, _(0, last)) << b * tmp2->getA()(cc, _(0, last)) - - (d * sign) * margi->getA()(c, _); + if (int64_t d = tmp2->getA()[cc, numConst]) { + tmp2->getA()[cc, _(0, last)] << b * tmp2->getA()[cc, _(0, last)] - + (d * sign) * margi->getA()[c, _]; } } for (auto cc = ptrdiff_t(tmp2->getNumCon()); cc;) - if (tmp2->getA()(--cc, 1 + numConst) == 0) tmp2->eraseConstraint(cc); + if (tmp2->getA()[--cc, 1 + numConst] == 0) tmp2->eraseConstraint(cc); if (!(tmp2->calcIsEmpty(alloc))) return false; } if (isNonNegative()) { @@ -659,16 +681,16 @@ class Loop : public BasePolyhedra { // extended bound and then check if the resulting polyhedra is // empty. if not, then we may have >0 iterations. for (ptrdiff_t cc = 0; cc < tmp->getNumCon(); ++cc) { - if (int64_t d = tmp->getA()(cc, numConst)) { + if (int64_t d = tmp->getA()[cc, numConst]) { // lower bound is i >= 0 // so setting equal to the extended lower bound now // means that i = -1 so we decrement `d` from the column - tmp->getA()(cc, 0) -= d; - tmp->getA()(cc, numConst) = 0; + tmp->getA()[cc, 0] -= d; + tmp->getA()[cc, numConst] = 0; } } for (auto cc = ptrdiff_t(tmp->getNumCon()); cc;) - if (tmp->getA()(--cc, 1 + numConst) == 0) tmp->eraseConstraint(cc); + if (tmp->getA()[--cc, 1 + numConst] == 0) tmp->eraseConstraint(cc); if (!(tmp->calcIsEmpty(alloc))) return false; } } @@ -706,7 +728,7 @@ class Loop : public BasePolyhedra { DensePtrMatrix A{getA()}; bool printed = printSymbol(os, b, -sign); for (ptrdiff_t k = 0; k < numVarMinus1; ++k) { - if (int64_t lakj = A(j, k + numConst)) { + if (int64_t lakj = A[j, k + numConst]) { if (lakj * sign > 0) os << " - "; else if (printed) os << " + "; lakj = math::constexpr_abs(lakj); @@ -730,7 +752,7 @@ class Loop : public BasePolyhedra { if (numRow > 1) os << (isUpper ? "min(" : "max("); DensePtrMatrix A{getA()}; for (ptrdiff_t j = 0, k = 0; j < A.numRow(); ++j) { - if (A(j, last) * sign <= 0) continue; + if (A[j, last] * sign <= 0) continue; if (k++) os << ", "; printBound(os, sign, numVarMinus1, numConst, j); } @@ -753,7 +775,7 @@ class Loop : public BasePolyhedra { ptrdiff_t numRow = 0; int64_t allAj = 0; for (ptrdiff_t j = 0; j < A.numRow(); ++j) { - int64_t Ajr = A(j, last), Aj = Ajr * sign; + int64_t Ajr = A[j, last], Aj = Ajr * sign; if (Aj <= 0) continue; if (allAj) allAj = allAj == Aj ? allAj : -1; else allAj = Aj; @@ -769,7 +791,7 @@ class Loop : public BasePolyhedra { if (allAj > 0) return printBoundShort(os, sign, numVarM1, numConst, allAj, numRow, true); for (ptrdiff_t j = 0; j < A.numRow(); ++j) { - int64_t Ajr = A(j, end - 1), Aj = Ajr * sign; + int64_t Ajr = A[j, end - 1], Aj = Ajr * sign; if (Aj <= 0) continue; if (hasPrintedLine) for (ptrdiff_t k = 0; k < 21; ++k) os << ' '; @@ -790,7 +812,7 @@ class Loop : public BasePolyhedra { int64_t allAj = 0; ptrdiff_t numPos = 0, numNeg = 0; for (ptrdiff_t j = 0; j < A.numRow(); ++j) { - int64_t Ajr = A(j, last); + int64_t Ajr = A[j, last]; if (Ajr == 0) continue; numPos += Ajr > 0; numNeg += Ajr < 0; @@ -823,45 +845,52 @@ class Loop : public BasePolyhedra { // We pop off the outer most loop on every iteration. friend inline auto operator<<(llvm::raw_ostream &os, const Loop &aln) -> llvm::raw_ostream & { - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; aln.dump(os, &alloc); return os; } #ifndef NDEBUG [[gnu::used]] void dump() const { llvm::errs() << *this; } #endif - [[nodiscard]] constexpr auto getNumCon() const -> unsigned { + [[nodiscard]] constexpr auto getNumCon() const -> ptrdiff_t { return numConstraints; } [[nodiscard]] constexpr auto getA() -> MutDensePtrMatrix { const void *ptr = memory + sizeof(const llvm::SCEV *const *) * numDynSymbols; auto *p = (int64_t *)const_cast(ptr); - return {p, math::DenseDims{numConstraints, numLoops + numDynSymbols + 1}}; + return { + p, math::DenseDims<>{{numConstraints}, {numLoops + numDynSymbols + 1}}}; }; + /// returns the `A` where `A * i >= 0`, `i` are loop indvars + /// Number of rows indicate number of constraints, columns are + /// /// returns the `A` where `A * i >= 0`, `i` are loop indvars + /// Number of rows indicate number of constraints, columns are + /// 1 (constant) + numDynSymbols + number of loops [[nodiscard]] constexpr auto getA() const -> DensePtrMatrix { const void *ptr = memory + sizeof(const llvm::SCEV *const *) * numDynSymbols; auto *p = (int64_t *)const_cast(ptr); - return {p, math::DenseDims{numConstraints, numLoops + numDynSymbols + 1}}; + return { + p, math::DenseDims<>{{numConstraints}, {numLoops + numDynSymbols + 1}}}; }; - [[nodiscard]] constexpr auto getOuterA(unsigned subLoop) + [[nodiscard]] constexpr auto getOuterA(ptrdiff_t subLoop) -> MutPtrMatrix { const void *ptr = memory + sizeof(const llvm::SCEV *const *) * numDynSymbols; auto *p = (int64_t *)const_cast(ptr); - unsigned numSym = numDynSymbols + 1; - return {p, math::StridedDims{numConstraints, subLoop + numSym, - numLoops + numSym}}; + ptrdiff_t numSym = numDynSymbols + 1; + return {p, math::StridedDims<>{ + {numConstraints}, {subLoop + numSym}, {numLoops + numSym}}}; }; - [[nodiscard]] constexpr auto getOuterA(unsigned subLoop) const + [[nodiscard]] constexpr auto getOuterA(ptrdiff_t subLoop) const -> PtrMatrix { const void *ptr = memory + sizeof(const llvm::SCEV *const *) * numDynSymbols; auto *p = (int64_t *)const_cast(ptr); - unsigned numSym = numDynSymbols + 1; - return {p, math::StridedDims{numConstraints, subLoop + numSym, - numLoops + numSym}}; + ptrdiff_t numSym = numDynSymbols + 1; + return {p, math::StridedDims<>{ + {numConstraints}, {subLoop + numSym}, {numLoops + numSym}}}; }; [[nodiscard]] auto getSyms() -> llvm::MutableArrayRef { void *ptr = memory; @@ -871,31 +900,31 @@ class Loop : public BasePolyhedra { const void *ptr = memory; return {(const llvm::SCEV *const *)ptr, numDynSymbols}; } - [[nodiscard]] constexpr auto getNumLoops() const -> unsigned { + [[nodiscard]] constexpr auto getNumLoops() const -> ptrdiff_t { return numLoops; } - [[nodiscard]] constexpr auto getNumSymbols() const -> unsigned { + [[nodiscard]] constexpr auto getNumSymbols() const -> ptrdiff_t { return numDynSymbols + 1; } - constexpr void truncNumInEqCon(Row r) { + constexpr void truncNumInEqCon(Row<> r) { invariant(r < numConstraints); - numConstraints = unsigned(r); + numConstraints = ptrdiff_t(r); } [[nodiscard]] static auto construct(Arena<> *alloc, llvm::Loop *L, PtrMatrix A, llvm::ArrayRef syms, bool nonNeg) -> Loop * { - unsigned numLoops = unsigned(A.numCol()) - 1 - syms.size(); + ptrdiff_t numLoops = ptrdiff_t(A.numCol()) - 1 - syms.size(); Loop *aln = - allocate(alloc, L, unsigned(A.numRow()), numLoops, syms, nonNeg); + allocate(alloc, L, ptrdiff_t(A.numRow()), numLoops, syms, nonNeg); aln->getA() << A; return aln; } [[nodiscard]] static auto allocate(Arena<> *alloc, llvm::Loop *L, unsigned numCon, unsigned numLoops, llvm::ArrayRef syms, - bool nonNegative) -> NotNull { + bool nonNegative) -> Valid { unsigned numDynSym = syms.size(); unsigned N = numLoops + numDynSym + 1; // extra capacity for adding 0 lower bounds later, see @@ -908,7 +937,7 @@ class Loop : public BasePolyhedra { auto *mem = (Loop *)alloc->allocate(sizeof(Loop) + memNeeded); auto *aln = std::construct_at(mem, L, numCon, numLoops, numDynSym, M); std::copy_n(syms.begin(), numDynSym, aln->getSyms().begin()); - return NotNull{aln}; + return Valid{aln}; } explicit constexpr Loop(llvm::Loop *loop, unsigned _numConstraints, unsigned _numLoops, unsigned _numDynSymbols, diff --git a/include/Polyhedra/Polyhedra.hpp b/include/Polyhedra/Polyhedra.hpp index c187671e3..6c440c838 100644 --- a/include/Polyhedra/Polyhedra.hpp +++ b/include/Polyhedra/Polyhedra.hpp @@ -1,26 +1,24 @@ #pragma once #include "Polyhedra/Comparators.hpp" +#include #include #include +#include #include #include #include -#include -#include #include #include -#include #include -#include #ifndef NDEBUG #include #endif namespace poly::poly { +using alloc::Arena; using math::DensePtrMatrix, math::MutDensePtrMatrix, math::EmptyMatrix, math::Row, math::Col, math::vector, math::matrix, math::_, math::end, - math::last; -using utils::Arena; + math::last, math::operator<<; inline auto printPositive(std::ostream &os, ptrdiff_t stop) -> std::ostream & { for (ptrdiff_t i = 0; i < stop; ++i) os << "v_" << i << " >= 0\n"; return os; @@ -82,14 +80,14 @@ struct BasePolyhedra { if constexpr (HasEqualities) return static_cast(this)->getE(); else return EmptyMatrix(); } - constexpr void truncNumInEqCon(Row r) { + constexpr void truncNumInEqCon(Row<> r) { static_cast

(this)->truncNumInEqCon(r); } - constexpr void truncNumEqCon(Row r) { + constexpr void truncNumEqCon(Row<> r) { if constexpr (HasEqualities) static_cast

(this)->truncNumEqCon(r); } [[nodiscard]] constexpr auto - initializeComparator(std::allocator alloc = + initializeComparator(alloc::Mallocator alloc = {}) // NOLINT(performance-unnecessary-value-param) -> comparator::LinearSymbolicComparator { if constexpr (MaybeNonNeg) @@ -133,24 +131,24 @@ struct BasePolyhedra { pruneBoundsCore(&alloc); } constexpr void pruneBounds() { - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; pruneBounds(alloc); } constexpr void eraseConstraint(ptrdiff_t constraint) { - eraseConstraintImpl(getA(), constraint); + eraseConstraintImpl(getA(), Row<>{constraint}); decrementNumConstraints(); } template constexpr void pruneBoundsCore(Arena<> *alloc) { - auto diff = vector(alloc, unsigned(getA().numCol())); + auto diff = vector(alloc, ptrdiff_t(getA().numCol())); auto p = checkpoint(alloc); const ptrdiff_t dyn = getNumDynamic(); if constexpr (HasEqualities) { auto [ar, er] = removeRedundantRows(getA(), getE()); - setNumConstraints(unsigned(ar)); - setNumEqConstraints(unsigned(er)); + setNumConstraints(ptrdiff_t(ar)); + setNumEqConstraints(ptrdiff_t(er)); for (ptrdiff_t i = 0; i < getNumEqualityConstraints(); ++i) { - auto l = gcd(getE()(i, _)); - if (l != 1) getE()(i, _) /= l; + auto l = gcd(getE()[i, _]); + if (l != 1) getE()[i, _] /= l; } } auto C = initializeComparator(alloc); @@ -165,7 +163,7 @@ struct BasePolyhedra { bool broke = false; for (auto i = --j; i;) { if (getNumCon() <= 1) return; - diff << getA()(--i, _) - getA()(j, _); + diff << getA()[--i, _] - getA()[j, _]; if (C.greaterEqual(*alloc, diff)) { eraseConstraint(i); rollback(alloc, p); @@ -182,7 +180,7 @@ struct BasePolyhedra { if constexpr (MaybeNonNeg) { if (isNonNegative() && !broke) { for (ptrdiff_t i = 0; i < dyn; ++i) { - diff << getA()(j, _); + diff << getA()[j, _]; --diff[last - i]; if (C.greaterEqual(*alloc, diff)) { eraseConstraint(j); @@ -229,13 +227,13 @@ struct BasePolyhedra { dropEmptyConstraints(getA()); if constexpr (HasEqualities) dropEmptyConstraints(getE()); } - friend inline auto operator<<(llvm::raw_ostream &os, const BasePolyhedra &p) - -> llvm::raw_ostream & { - auto &&os2 = printConstraints(os << "\n", p.getA()); + friend inline auto operator<<(std::ostream &os, const BasePolyhedra &p) + -> std::ostream & { + printConstraints(os << "\n", p.getA()); if constexpr (MaybeNonNeg) - if (p.isNonNegative()) printPositive(os2, p.getNumDynamic()); - if constexpr (HasEqualities) return printConstraints(os2, p.getE(), false); - return os2; + if (p.isNonNegative()) printPositive(os, p.getNumDynamic()); + if constexpr (HasEqualities) return printConstraints(os, p.getE(), false); + return os; } #ifndef NDEBUG [[gnu::used]] void dump() const { @@ -252,8 +250,8 @@ struct BasePolyhedra { // return false; } void truncateVars(ptrdiff_t numVar) { - if constexpr (HasEqualities) getE().truncate(Col{numVar}); - getA().truncate(Col{numVar}); + if constexpr (HasEqualities) getE().truncate(Col<>{numVar}); + getA().truncate(Col<>{numVar}); } }; } // namespace poly::poly diff --git a/include/Polyhedra/Schedule.hpp b/include/Polyhedra/Schedule.hpp index bf50d6ba1..ee2949977 100644 --- a/include/Polyhedra/Schedule.hpp +++ b/include/Polyhedra/Schedule.hpp @@ -1,16 +1,14 @@ #pragma once +#include "Alloc/Arena.hpp" #include "Math/Array.hpp" -#include "Utilities/Allocators.hpp" #include #include #include -#include #include #include #include #include -#include namespace poly::poly { using math::_, math::PtrVector, math::MutPtrVector, math::SquarePtrMatrix, @@ -46,11 +44,11 @@ struct AffineSchedule { constexpr AffineSchedule() : mem(nullptr) {} constexpr AffineSchedule(int64_t *m) : mem(m) {} - constexpr AffineSchedule(utils::Arena<> *alloc, unsigned nL) + constexpr AffineSchedule(alloc::Arena<> *alloc, unsigned nL) : mem(alloc->allocate(requiredScheduleStorage(nL))) { mem[0] = nL; } - constexpr auto copy(utils::Arena<> *alloc) const -> AffineSchedule { + constexpr auto copy(alloc::Arena<> *alloc) const -> AffineSchedule { size_t reqMem = requiredScheduleStorage(getNumLoops()); AffineSchedule res{alloc->allocate(reqMem)}; std::copy_n(mem, reqMem, res.mem); @@ -73,18 +71,18 @@ struct AffineSchedule { } // NOLINTNEXTLINE(readability-make-member-function-const) [[nodiscard]] constexpr auto getPhi() -> MutSquarePtrMatrix { - return {data(), math::SquareDims{unsigned(getNumLoops())}}; + return {data(), math::SquareDims<>{getNumLoops()}}; } [[nodiscard]] constexpr auto getPhi() const -> SquarePtrMatrix { - return {data(), math::SquareDims{getNumLoops()}}; // + return {data(), math::SquareDims<>{getNumLoops()}}; // } /// getSchedule, loops are always indexed from outer to inner [[nodiscard]] constexpr auto getSchedule(size_t d) const -> math::PtrVector { - return getPhi()(d, _); + return getPhi()[d, _]; } [[nodiscard]] constexpr auto getSchedule(size_t d) -> MutPtrVector { - return getPhi()(d, _); + return getPhi()[d, _]; } [[nodiscard]] constexpr auto getFusionOmega(size_t i) const -> int64_t { return data()[getNumLoopsSquared() + i]; diff --git a/include/Support/Iterators.hpp b/include/Support/Iterators.hpp index f4d9b4878..9d1063a52 100644 --- a/include/Support/Iterators.hpp +++ b/include/Support/Iterators.hpp @@ -1,8 +1,6 @@ #pragma once #include #include -#include -#include #include #include @@ -18,8 +16,8 @@ class VCycleIterator { public: using value_type = int32_t; constexpr VCycleIterator() noexcept = default; - constexpr VCycleIterator(const int32_t *data, int32_t start) noexcept - : data(data), state(start), start(start), dobreak(start < 0) {} + constexpr VCycleIterator(const int32_t *data_, int32_t start_) noexcept + : data(data_), state(start_), start(start_), dobreak(start_ < 0) {} constexpr auto operator*() const noexcept -> int32_t { return state; } constexpr auto operator++() noexcept -> VCycleIterator & { state = data[state]; @@ -64,10 +62,10 @@ class VCycleRange : public std::ranges::view_interface { int32_t start; public: - constexpr VCycleRange(math::PtrVector data, int32_t start) noexcept - : data(data.begin()), start(start) {} - constexpr VCycleRange(const int32_t *data, int32_t start) noexcept - : data(data), start(start) {} + constexpr VCycleRange(math::PtrVector data_, int32_t start_) noexcept + : data(data_.begin()), start(start_) {} + constexpr VCycleRange(const int32_t *data_, int32_t start_) noexcept + : data(data_), start(start_) {} [[nodiscard]] constexpr auto begin() const noexcept -> VCycleIterator { return {data, start}; @@ -76,19 +74,24 @@ class VCycleRange : public std::ranges::view_interface { }; static_assert(std::ranges::forward_range); +/// VForwardIterator is safe with respect to removing the current iteration from +/// the list. However, behavior is undefined if you remove or move the next +/// element. class VForwardIterator { const int32_t *data{nullptr}; int32_t state{-1}; + int32_t next{-1}; public: using value_type = int32_t; constexpr VForwardIterator() noexcept = default; - constexpr VForwardIterator(const int32_t *data, int32_t start) noexcept - : data(data), state(start) {} + constexpr VForwardIterator(const int32_t *data_, int32_t start_) noexcept + : data{data_}, state{start_}, next{start_ < 0 ? start_ : data_[start_]} {} constexpr auto operator*() const noexcept -> int32_t { return state; } constexpr auto operator++() noexcept -> VForwardIterator & { - state = data[state]; + state = next; + if (next >= 0) next = data[next]; return *this; } constexpr auto operator++(int) noexcept -> VForwardIterator { @@ -129,10 +132,11 @@ class VForwardRange : public std::ranges::view_interface { int32_t start; public: - constexpr VForwardRange(math::PtrVector data, int32_t start) noexcept - : data(data.begin()), start(start) {} - constexpr VForwardRange(const int32_t *data, int32_t start) noexcept - : data(data), start(start) {} + constexpr VForwardRange(math::PtrVector data_, + int32_t start_) noexcept + : data(data_.begin()), start(start_) {} + constexpr VForwardRange(const int32_t *data_, int32_t start_) noexcept + : data(data_), start(start_) {} [[nodiscard]] constexpr auto begin() const noexcept -> VForwardIterator { return {data, start}; diff --git a/include/Support/OStream.hpp b/include/Support/OStream.hpp index 1671b0830..e004194a3 100644 --- a/include/Support/OStream.hpp +++ b/include/Support/OStream.hpp @@ -13,7 +13,8 @@ template inline auto operator<<(llvm::raw_ostream &os, PtrVector const &A) -> llvm::raw_ostream & { std::ostringstream sos; - return os << printVector(sos, A).str(); + printVector(sos, A); + return os << sos.str(); } inline auto operator<<(llvm::raw_ostream &os, const AbstractVector auto &A) -> llvm::raw_ostream & { @@ -25,17 +26,18 @@ template inline auto operator<<(llvm::raw_ostream &os, PtrMatrix A) -> llvm::raw_ostream & { std::ostringstream sos; - return os << printMatrix(sos, A).str(); + printMatrix(sos, A); + return os << sos.str(); } template -inline auto operator<<(llvm::raw_ostream &os, Array A) - -> std::ostream & { - return printMatrix(os, PtrMatrix{A}); +inline auto operator<<(llvm::raw_ostream &os, Array> A) + -> llvm::raw_ostream & { + return os << PtrMatrix{A}; } template -inline auto operator<<(llvm::raw_ostream &os, Array A) - -> std::ostream & { - return printMatrix(os, PtrMatrix{A}); +inline auto operator<<(llvm::raw_ostream &os, Array> A) + -> llvm::raw_ostream & { + return os << PtrMatrix{A}; } } // namespace math namespace utils { diff --git a/include/TurboLoop.hpp b/include/TurboLoop.hpp index 9ca9ed33f..af15d79f2 100644 --- a/include/TurboLoop.hpp +++ b/include/TurboLoop.hpp @@ -67,14 +67,15 @@ concept LoadOrStoreInst = std::same_as>; class TurboLoop { - dict::map loopMap; - // const llvm::TargetLibraryInfo *TLI; + const llvm::TargetLibraryInfo *TLI; const llvm::TargetTransformInfo *TTI; llvm::LoopInfo *LI; llvm::ScalarEvolution *SE; llvm::OptimizationRemarkEmitter *ORE; lp::LoopBlock loopBlock{}; IR::Cache instructions{}; + dict::set loopBBs; + dict::set eraseCandidates; CostModeling::CPURegisterFile registers; // this is an allocator that it is safe to reset completely when @@ -163,6 +164,7 @@ class TurboLoop { instructions.addPredicate(A, P, &(*predMapAbridged)); A->setLoopNest(AL); } + loopBBs.insert(BB); } return IR::mergeInstructions(instructions, *predMapAbridged, *TTI, *shortAllocator(), @@ -181,8 +183,7 @@ class TurboLoop { // we'd have to make sure none of the allocated instructions // can be referenced again (e.g., through the free list) // auto p = lalloc.checkpoint(); - NotNull AL = - poly::Loop::construct(lalloc, L, nwr.visit(BT), *SE); + Valid AL = poly::Loop::construct(lalloc, L, nwr.visit(BT), *SE); IR::TreeResult tr = parseExitBlocks(L); tr.rejectDepth = std::max(tr.rejectDepth, omega.size() - AL->getNumLoops()); omega.push_back(0); // we start with 0 at the end, walking backwards @@ -248,7 +249,7 @@ class TurboLoop { /// large nest. /// /// If any of the subloops fail, or we fail to draw the connection, then we - /// can optimize the continuous succesful block we've produced, and return a + /// can optimize the continuous successful block we've produced, and return a /// failure up the tree. /// /// @@ -331,9 +332,14 @@ class TurboLoop { void optimize(IR::TreeResult tr) { // now we build the LinearProgram - lp::OptimizationResult lpor = loopBlock.optimize(instructions, tr); + lp::LoopBlock::OptimizationResult lpor = + loopBlock.optimize(instructions, tr); if (!lpor.nodes) return; - CostModeling::optimize(instructions, loopBlock.getAllocator(), lpor); + for (IR::Addr *addr : lpor.addr.getAddr()) + loopBBs.insert(addr->getBasicBlock()); + CostModeling::optimize(loopBlock.getDependencies(), instructions, loopBBs, + eraseCandidates, loopBlock.getAllocator(), lpor); + loopBBs.clear(); } /* auto isLoopPreHeader(const llvm::BasicBlock *BB) const -> bool { @@ -367,7 +373,8 @@ class TurboLoop { // } public: TurboLoop(llvm::Function &F, llvm::FunctionAnalysisManager &FAM) - : TTI{&FAM.getResult(F)}, + : TLI{&FAM.getResult(F)}, + TTI{&FAM.getResult(F)}, LI{&FAM.getResult(F)}, SE{&FAM.getResult(F)}, ORE{&FAM.getResult(F)}, diff --git a/test/ArrayReference.hpp b/test/ArrayReference.hpp index 33ec953fd..f33b917b1 100644 --- a/test/ArrayReference.hpp +++ b/test/ArrayReference.hpp @@ -1,14 +1,15 @@ #pragma once #include "IR/Address.hpp" #include "Math/Math.hpp" +#include "Math/MatrixDimensions.hpp" #include "Polyhedra/Loops.hpp" #include #include namespace poly { -using math::DenseMatrix, math::PtrMatrix, math::MutPtrMatrix, utils::Arena, - math::PtrVector, utils::NotNull; +using math::DenseMatrix, math::PtrMatrix, math::MutPtrMatrix, alloc::Arena, + math::PtrVector, math::DenseDims, utils::Valid; struct ArrayReference { const llvm::SCEVUnknown *basePointer; @@ -16,7 +17,7 @@ struct ArrayReference { DenseMatrix indMat; DenseMatrix offMat; llvm::SmallVector sizes; - ArrayReference(const llvm::SCEVUnknown *p, poly::Loop *l, size_t dim) + ArrayReference(const llvm::SCEVUnknown *p, poly::Loop *l, unsigned dim) : basePointer(p), loop(l), indMat(DenseDims{loop->getNumLoops(), dim}), offMat(DenseDims{dim, 1}), sizes(dim) { indexMatrix() << 0; @@ -32,12 +33,13 @@ struct ArrayReference { return ptrdiff_t(offMat.numRow()); } }; -inline auto createMemAccess(Arena<> *alloc, ArrayReference &ar, - llvm::Instruction *IC, PtrVector omegas) - -> NotNull { +// inline auto createMemAccess(Arena<> *alloc, ArrayReference &ar, +// llvm::Instruction *IC, PtrVector +// omegas) +// -> Valid { - IntMatrix indMatT(ar.indMat.transpose()); - return IR::Addr::construct(alloc, ar.basePointer, *ar.loop, IC, indMatT, - {ar.sizes, {}}, ar.offsetMatrix(), omegas); -} +// math::IntMatrix indMatT{ar.indMat.t()}; +// return IR::Addr::construct(alloc, ar.basePointer, *ar.loop, IC, indMatT, +// {ar.sizes, {}}, ar.offsetMatrix(), omegas); +// } } // namespace poly diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bf694dbe6..12581edd0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -7,6 +7,7 @@ project(LoopModelsTests LANGUAGES CXX) option(ENABLE_TEST_COVERAGE "Enable test coverage" OFF) option(TEST_INSTALLED_VERSION "Test the version found by find_package" OFF) option(ENABLE_LLD "Use lld for linking" ON) +option(TEST_LOOPMODELS "Test LoopModels" OFF) # ON FIXME # --- Import tools ---- @@ -19,6 +20,12 @@ include(../cmake/CPM.cmake) # ---- compile_commands.json ---- set(CMAKE_EXPORT_COMPILE_COMMANDS ON) +if(((USE_SANITIZER MATCHES "([Aa]ddress)") OR (USE_SANITIZER MATCHES "([Aa]ddress);([Uu]ndefined)")) + AND (CMAKE_CXX_COMPILER_ID MATCHES "Clang") +) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lunwind -Wno-unused-command-line-argument") +endif() + # CPMAddPackage("gh:onqtam/doctest@2.4.9") CPMAddPackage("gh:TheLartians/Format.cmake@1.7.3") CPMAddPackage( @@ -40,10 +47,12 @@ FetchContent_Declare( ) FetchContent_MakeAvailable(Math) -if(TEST_INSTALLED_VERSION) - find_package(LoopModels REQUIRED) -else() - add_subdirectory(.. LoopModels) +if(TEST_LOOPMODELS) + if(TEST_INSTALLED_VERSION) + find_package(LoopModels REQUIRED) + else() + add_subdirectory(.. LoopModels) + endif() endif() # ---- Create binary ---- @@ -53,25 +62,21 @@ file( GLOB tests CONFIGURE_DEPENDS - ${CMAKE_CURRENT_SOURCE_DIR}/bitset_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/bumpmap_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/comparator_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/compat_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/dependence_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/graph_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/linear_algebra_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/linear_diophantine_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/matrix_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/normal_form_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/orthogonalize_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/remarks_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/simplex_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/string_to_intmat_test.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/unimodularization_test.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/dict_test.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/bitset_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/bumpmap_test.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/comparator_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/compat_test.cpp # + # ${CMAKE_CURRENT_SOURCE_DIR}/dependence_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/graph_test.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/linear_algebra_test.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/linear_diophantine_test.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/matrix_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/normal_form_test.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/orthogonalize_test.cpp ${CMAKE_CURRENT_SOURCE_DIR}/remarks_test.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/simplex_test.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/string_to_intmat_test.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/unimodularization_test.cpp ) # list(FILTER tests EXCLUDE REGEX "remarks.*") for remarks test -find_package(LLVM 16 REQUIRED CONFIG) +find_package(LLVM 17 REQUIRED CONFIG) list(APPEND CMAKE_MODULE_PATH ${LLVM_CMAKE_DIR}) # include(AddLLVM) include(${LLVM_DIR}/AddLLVM.cmake) @@ -88,7 +93,6 @@ target_precompile_headers( ${PROJECT_NAME} PRIVATE - @@ -137,7 +141,7 @@ target_link_libraries( ${PROJECT_NAME} PRIVATE GTest::gtest_main LLVM unordered_dense::unordered_dense Math ) set(CXX_STANDARD_REQUIRED ON) -set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 20) +set_target_properties(${PROJECT_NAME} PROPERTIES CXX_STANDARD 23) set_target_properties( ${PROJECT_NAME} PROPERTIES ENVIRONMENT WORKING_DIRECTORY=${PROJECT_BINARY_DIR} ) @@ -176,26 +180,35 @@ endif() if(NOT TEST_INSTALLED_VERSION) if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID MATCHES "GNU") # -Werror? - target_compile_options(LoopModels PUBLIC -Wall -Wpedantic -Wextra -Wshadow) + if(TEST_LOOPMODELS) + target_compile_options(LoopModels PUBLIC -Wall -Wpedantic -Wextra -Wshadow) + endif() target_compile_options(${PROJECT_NAME} PUBLIC -Wall -Wpedantic -Wextra -Wshadow) - elseif(MSVC) + elseif(MSVC and TEST_LOOPMODELS) target_compile_options(LoopModels PUBLIC /W4 /WX) endif() endif() # target_compile_options(LoopModels PRIVATE -D_GLIBCXX_DEBUG) target_compile_options(${PROJECT_NAME} # PRIVATE -D_GLIBCXX_DEBUG) -target_compile_options(LoopModels PRIVATE -D_GLIBCXX_ASSERTIONS) + +if(TEST_LOOPMODELS) + target_compile_options(LoopModels PRIVATE -D_GLIBCXX_ASSERTIONS) +endif() target_compile_options(${PROJECT_NAME} PRIVATE -D_GLIBCXX_ASSERTIONS) if(ENABLE_LLD) target_link_options(${PROJECT_NAME} PRIVATE -fuse-ld=lld) - target_link_options(LoopModels PRIVATE -fuse-ld=lld) + if(TEST_LOOPMODELS) + target_link_options(LoopModels PRIVATE -fuse-ld=lld) + endif() endif() # ---- code coverage ---- message(STATUS "ENABLE_TEST_COVERAGE: ${ENABLE_TEST_COVERAGE}") if(ENABLE_TEST_COVERAGE) - target_compile_options(LoopModels PUBLIC -O0 -g --coverage) - target_link_options(LoopModels PUBLIC --coverage) + if(TEST_LOOPMODELS) + target_compile_options(LoopModels PUBLIC -O0 -g --coverage) + target_link_options(LoopModels PUBLIC --coverage) + endif() target_compile_options(${PROJECT_NAME} PUBLIC -O0 -g --coverage) target_link_options(${PROJECT_NAME} PUBLIC --coverage) add_custom_target( diff --git a/test/TestUtilities.hpp b/test/TestUtilities.hpp index 1de5f585a..b52ca71f4 100644 --- a/test/TestUtilities.hpp +++ b/test/TestUtilities.hpp @@ -1,10 +1,9 @@ #pragma once #include "Polyhedra/Loops.hpp" -#include +#include #include #include #include -#include #include #include #include @@ -24,7 +23,7 @@ namespace poly { using math::PtrMatrix; class TestLoopFunction { - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; llvm::LLVMContext ctx; llvm::Module *mod; llvm::LoopInfo LI{}; @@ -33,7 +32,7 @@ class TestLoopFunction { llvm::Function *F; llvm::DataLayout dl; llvm::TargetTransformInfo TTI; - llvm::Triple targetTripple{}; + llvm::Triple targetTriple{}; llvm::TargetLibraryInfo TLI; llvm::AssumptionCache AC; llvm::ScalarEvolution SE; @@ -45,7 +44,7 @@ class TestLoopFunction { size_t ptrIntOffset{0}; public: - auto getAlloc() -> utils::Arena<> * { return &alloc; } + auto getAlloc() -> alloc::Arena<> * { return &alloc; } auto getLoopNest(size_t i) -> poly::Loop * { return alns[i]; } auto getNumLoopNests() -> size_t { return alns.size(); } void addLoop(PtrMatrix A, size_t numLoops) { @@ -93,7 +92,7 @@ class TestLoopFunction { llvm::SmallVector(), false)}, F{llvm::Function::Create( FT, llvm::GlobalValue::LinkageTypes::ExternalLinkage, "foo", mod)}, - dl{mod}, TTI{dl}, TLI{llvm::TargetLibraryInfoImpl{targetTripple}, F}, + dl{mod}, TTI{dl}, TLI{llvm::TargetLibraryInfoImpl{targetTriple}, F}, AC{*F, &TTI}, SE{*F, TLI, AC, DT, LI}, BB{llvm::BasicBlock::Create(ctx, "entry", F)}, builder{llvm::IRBuilder(BB)} { diff --git a/test/bumpmap_test.cpp b/test/bumpmap_test.cpp index 326e4a928..44b0be6af 100644 --- a/test/bumpmap_test.cpp +++ b/test/bumpmap_test.cpp @@ -1,4 +1,5 @@ #include "Dicts/BumpMapSet.hpp" +#include #include // // NOLINTNEXTLINE(modernize-use-trailing-return-type) @@ -13,9 +14,22 @@ // } // NOLINTNEXTLINE(modernize-use-trailing-return-type) TEST(BumpDownMapTest, BasicAssertions) { - OwningArena<> alloc; + using M = poly::dict::amap; + static_assert( + std::same_as< + M::value_container_type, + poly::math::BumpPtrVector>>); + static_assert( + std::same_as< + M::allocator_type, + poly::alloc::WArena, 16384, true>>); + + poly::alloc::OwningArena<> alloc; + M::allocator_type walloc{&alloc}; + M::value_container_type mvals{walloc}; + // poly::math::BumpPtrVector vec{&alloc}; for (int i = 0; i < 100; ++i) { - poly::dict::amap map{&alloc}; + M map{&alloc}; for (int j = 0; j < 100; ++j) map.insert({j, j}); for (int j = 0; j < 100; ++j) EXPECT_EQ(map.find(j)->second, j); alloc.reset(); diff --git a/test/comparator_test.cpp b/test/comparator_test.cpp index 4f42507c5..63fe63821 100644 --- a/test/comparator_test.cpp +++ b/test/comparator_test.cpp @@ -23,7 +23,7 @@ TEST(BasicCompare, BasicAssertions) { // 0 0 -1 1 0 // 0 0 -1 0 1 ] IntMatrix A = "[-1 0 1 0 0; 0 -1 1 0 0; 0 0 -1 1 0; 0 0 -1 0 1]"_mat; - auto comp = poly::comparator::linear(std::allocator{}, A, + auto comp = poly::comparator::linear(alloc::Mallocator{}, A, EmptyMatrix{}, false); Vector query{{-1, 0, 0, 1, 0}}; @@ -98,7 +98,7 @@ TEST(V2Matrix, BasicAssertions) { // 0 0 0 1 0 0 -1 0; 0 0 0 0 0 0 0 0 1 0 0 1; 0 0 0 0 0 0 0 0 0 1 0 0]"_mat; auto comp = poly::comparator::LinearSymbolicComparator::construct(A, false); auto [H, U] = NormalForm::hermite(std::move(A)); - IntMatrix Ht = H.transpose(); + IntMatrix Ht = H.t(); // llvm::errs() << "Ht matrix:" << Ht << "\n"; auto Vt = IntMatrix::identity(Ht.numRow()); auto NS = NormalForm::nullSpace(Ht); diff --git a/test/compat_test.cpp b/test/compat_test.cpp index 2cbbca17c..b3b1813d3 100644 --- a/test/compat_test.cpp +++ b/test/compat_test.cpp @@ -2,6 +2,7 @@ #include "Polyhedra/Loops.hpp" #include "Support/OStream.hpp" #include "TestUtilities.hpp" +#include "Utilities/Valid.hpp" #include #include #include @@ -14,7 +15,7 @@ namespace poly { -using math::IntMatrix, utils::operator""_mat; +using math::IntMatrix, math::DenseMatrix, utils::operator""_mat; // NOLINTNEXTLINE(modernize-use-trailing-return-type) TEST(TrivialPruneBounds0, BasicAssertions) { @@ -94,7 +95,7 @@ TEST(LessTrivialPruneBounds, BasicAssertions) { poly::Loop &aff = *tlf.getLoopNest(0); aff.pruneBounds(); - llvm::errs() << "LessTrival test Bounds pruned:\n"; + llvm::errs() << "LessTrivial test Bounds pruned:\n"; #ifndef NDEBUG aff.dump(); #endif @@ -148,9 +149,9 @@ TEST(AffineTest0, BasicAssertions) { #endif llvm::errs() << "About to run first set of bounds tests\n"; llvm::errs() << "\nPermuting loops 1 and 2\n"; - utils::OwningArena<> allocator; - utils::NotNull affp021ptr{ - aff.rotate(allocator, "[1 0 0; 0 0 1; 0 1 0]"_mat, nullptr)}; + alloc::OwningArena<> allocator; + utils::Valid affp021ptr{ + aff.rotate(&allocator, "[1 0 0; 0 0 1; 0 1 0]"_mat, nullptr)}; poly::Loop &affp021 = *affp021ptr; // Now that we've swapped loops 1 and 2, we should have // for m in 0:M-1, k in 1:N-1, n in 0:k-1 @@ -164,9 +165,9 @@ TEST(AffineTest0, BasicAssertions) { << "\n"; llvm::errs() << "Constructed affine obj\n"; llvm::errs() << "About to run first compat test\n"; - EXPECT_FALSE(affp021.zeroExtraItersUponExtending(tlf.getAlloc(), 1, false)); + EXPECT_FALSE(affp021.zeroExtraItersUponExtending(*tlf.getAlloc(), 1, false)); llvm::errs() << "About to run second compat test\n"; - EXPECT_TRUE(affp021.zeroExtraItersUponExtending(tlf.getAlloc(), 1, true)); + EXPECT_TRUE(affp021.zeroExtraItersUponExtending(*tlf.getAlloc(), 1, true)); // affp021.zeroExtraIterationsUponExtending(poset, 1, ) } @@ -198,8 +199,9 @@ TEST(NonUnimodularExperiment, BasicAssertions) { tlf.addLoop(std::move(B), 2); poly::Loop &aff2 = *tlf.getLoopNest(tlf.getNumLoopNests() - 1); EXPECT_FALSE(aff2.isEmpty()); - OwningArena<> allocator; - NotNull affp10{aff2.rotate(allocator, "[0 1; 1 0]"_mat, nullptr)}; + alloc::OwningArena<> allocator; + utils::Valid affp10{ + aff2.rotate(&allocator, "[0 1; 1 0]"_mat, nullptr)}; llvm::errs() << "Swapped order:\n"; #ifndef NDEBUG diff --git a/test/dependence_test.cpp b/test/dependence_test.cpp index e738f487e..b6d8fbaed 100644 --- a/test/dependence_test.cpp +++ b/test/dependence_test.cpp @@ -129,7 +129,7 @@ TEST(DependenceTest, BasicAssertions) { Vector schLoad0(3, 0); Vector schStore(3, 0); schStore[2] = 2; - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; IR::Addr *msrc{createMemAccess(&alloc, srcA, storeA11, schStore)}; IR::Addr *mtgt01{createMemAccess(&alloc, tgtA01, loadA01, schLoad0)}; poly::DepPoly *dep0{poly::DepPoly::dependence(&alloc, *msrc, *mtgt01)}; @@ -256,7 +256,7 @@ TEST(SymmetricIndependentTest, BasicAssertions) { Vector schLoad(3, 0); Vector schStore(3, 0); schStore[2] = 1; - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; IR::Addr *msrc{createMemAccess(&alloc, srcA, storeAij, schStore)}; IR::Addr *mtgt{createMemAccess(&alloc, tgtA, loadAji, schLoad)}; poly::DepPoly *dep{poly::DepPoly::dependence(&alloc, *msrc, *mtgt)}; @@ -341,7 +341,7 @@ TEST(RankDeficientLoad, BasicAssertions) { Vector schLoad(2 + 1, 0); Vector schStore(2 + 1, 0); schStore[2] = 1; - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; IR::Addr *msrc{createMemAccess(&alloc, srcA, storeAij, schStore)}; IR::Addr *mtgt{createMemAccess(&alloc, tgtA, loadAii, schLoad)}; @@ -437,7 +437,7 @@ TEST(TimeHidingInRankDeficiency, BasicAssertions) { Vector schLoad(3 + 1, 0); Vector schStore(3 + 1, 0); schStore[3] = 1; - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; IR::Addr *msrc{createMemAccess(&alloc, refA, storeA, schStore)}; IR::Addr *mtgt{createMemAccess(&alloc, refA, loadA, schLoad)}; @@ -555,12 +555,12 @@ TEST(TriangularExampleTest, BasicAssertions) { // badly written triangular solve: // for (m = 0; m < M; ++m){ // for (n = 0; n < N; ++n){ - // A(n,m) = B(n,m); + // A[n,m] = B[n,m]; // } // for (n = 0; n < N; ++n){ - // A(n,m) = A(n,m) / U(n,n); + // A[n,m] = A[n,m] / U[n,n]; // for (k = n+1; k < N; ++k){ - // A(k,m) = A(k,m) - A(n,m)*U(k,n); + // A[k,m] = A[k,m] - U[k,n]*A[n,m]; // } // } // } @@ -652,7 +652,7 @@ TEST(TriangularExampleTest, BasicAssertions) { IR::AddrChain addr; Vector sch2t0t0(2 + 1, 0); Vector sch2t0t1{sch2t0t0}; - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; // A(n,m) = -> B(n,m) <- IR::Addr *mSch2t0t0(createMemAccess(&alloc, indBmn, loadB, sch2t0t0)); addr.addAddr(mSch2t0t0); @@ -1140,7 +1140,7 @@ TEST(MeanStDevTest0, BasicAssertions) { lp::LoopBlock iOuterLoopNest; llvm::SmallVector iOuterMem; - utils::OwningArena<> alloc; + alloc::OwningArena<> alloc; iOuterMem.emplace_back(createMemAccess(&alloc, xInd1, storeX0, sch0t0)); // 0 iOuterMem.emplace_back( @@ -1446,7 +1446,7 @@ TEST(DoubleDependenceTest, BasicAssertions) { EXPECT_TRUE(loopBlock.optimize().has_value()); EXPECT_EQ(loopBlock.numEdges(), 2); map memAccessIds; - for (size_t jj = 0; jj < loopBlock.numIR::Addres(); ++jj) + for (size_t jj = 0; jj < loopBlock.numIR::Address(); ++jj) memAccessIds[loopBlock.getIR::Addr(jj)] = jj; for (auto &e : loopBlock.getEdges()) { auto [in, out] = e.getInOutPair(); diff --git a/test/dict_test.cpp b/test/dict_test.cpp new file mode 100644 index 000000000..f08324c8d --- /dev/null +++ b/test/dict_test.cpp @@ -0,0 +1,122 @@ +#include "Dicts/Trie.hpp" +#include +#include +#include +#include + +using poly::dict::TrieMap, poly::dict::InlineTrie; + +// NOLINTNEXTLINE(modernize-use-trailing-return-type) +TEST(TrieTest, BasicAssertions) { + std::mt19937_64 rng; + poly::alloc::OwningArena<> alloc{}; + + TrieMap d; + EXPECT_FALSE(d.find(3)); + d[&alloc, 3] = 11; + EXPECT_EQ(d.find(3)->second, 11); + d[&alloc, 3] += 11; + EXPECT_EQ(d.find(3)->second, 22); + + InlineTrie t; + EXPECT_FALSE(t.find(7)); + t[alloc, 7] = 13; + EXPECT_TRUE(t.find(7)); + EXPECT_EQ(*t.find(7), 13); + t[alloc, 7] += 14; + EXPECT_EQ(*t.find(7), 27); + //// More thorough test: + TrieMap tm; + InlineTrie it; + ankerl::unordered_dense::map m; + + // uint64_t mask = ((1ULL << 5) - 1) << 4ULL; + uint64_t mask = ((1ULL << 10) - 1) << 4ULL; + bool found = false; + // static constexpr auto debugval = 0xc38; + static constexpr auto debugval = 0x3c00; + // static constexpr auto debugval = 0x1358; + // static constexpr auto debugval = 0x12e8; + for (uint64_t i = 0; i < 512;) { + void *x = reinterpret_cast(rng() & mask); + if (!x) continue; + void *y = reinterpret_cast(rng() & mask); + if (!y) continue; + if (reinterpret_cast(x) == debugval) { + found = true; + auto *tmf = tm.find(y); + auto itf = it.find(y); + auto *tmfx = tm.find(x); + auto itfx = it.find(x); + std::cout << "i = " << i + 1 << "; m[y] = " << m[y] << "\n" + << "tm.find(y) = " << (tmf ? tmf->second : 0) + << "\nit.find(y) = " << (itf ? *itf : 0) + << "\ntm.find(x) = " << (tmfx ? tmfx->second : -1) + << "\nit.find(x) = " << (itfx ? *itfx : -1) + << "\ntm[a, x] = " << tm[&alloc, x] + << "\nit[a, x] = " << it[&alloc, x] + << "\ntm.find(x) = " << tm.find(x)->second + << "\nit.find(x) = " << *it.find(x) << "\n"; + } + if (found) { + void *p = reinterpret_cast(debugval); + EXPECT_EQ(m[p], tm.find(p)->second); + EXPECT_EQ(m[p], *it.find(p)); + ASSERT(m[p] == tm.find(p)->second); + ASSERT(m[p] == *it.find(p)); + } + m[x] += (++i) + m[y]; + tm[&alloc, x] += i + tm[&alloc, y]; + it[&alloc, x] += i + it[&alloc, y]; + if (reinterpret_cast(x) == debugval) { + auto *tmf = tm.find(x); + auto itf = it.find(x); + std::cout << "i = " << i << "; m[x] = " << m[x] << "\n" + << "tm.find(x) = " << (tmf ? tmf->second : -1) + << "\nit.find(x) = " << (itf ? *itf : -1) << "\n"; + } + EXPECT_TRUE(tm.find(x)); + EXPECT_TRUE(it.find(x)); + if (tm.find(x)->second != m[x]) std::cout << "x = " << x << "\n"; + if (*it.find(x) != m[x]) std::cout << "x = " << x << "\n"; + EXPECT_EQ(tm.find(x)->second, m[x]); + EXPECT_EQ(*it.find(x), m[x]); + void *z = reinterpret_cast(rng() & mask); + if (!z) continue; + // std::cout << "i = " << i << "\n"; + if (found) { + void *p = reinterpret_cast(debugval); + EXPECT_EQ(m[p], tm.find(p)->second); + EXPECT_EQ(m[p], *it.find(p)); + ASSERT(m[p] == tm.find(p)->second); + ASSERT(m[p] == *it.find(p)); + } + if (void *p = reinterpret_cast(debugval); p == z) { + auto *tmf = tm.find(z); + auto itf = it.find(z); + std::cout << "i = " << i << "; m[z] = " << m[z] << "\n" + << "tm.find(z) = " << (tmf ? tmf->second : -1) + << "\nit.find(z) = " << (itf ? *itf : -1) << "\n"; + } + m.erase(z); + tm.erase(z); + it.erase(z); + EXPECT_FALSE(tm.find(z)); + EXPECT_FALSE(it.find(z)); + if (reinterpret_cast(debugval) == z) found = false; + if (found) { + void *p = reinterpret_cast(debugval); + EXPECT_EQ(m[p], tm.find(p)->second); + EXPECT_EQ(m[p], *it.find(p)); + ASSERT(m[p] == tm.find(p)->second); + ASSERT(m[p] == *it.find(p)); + } + } + for (auto [k, v] : m) { + // std::cout << "k = " << k << "; v = " << v << "\n"; + EXPECT_TRUE(tm.find(k)); + EXPECT_TRUE(it.find(k)); + EXPECT_EQ(tm.find(k)->second, v); + EXPECT_EQ(*it.find(k), v); + } +} diff --git a/test/orthogonalize_test.cpp b/test/orthogonalize_test.cpp index 91efde138..a2254a7f3 100644 --- a/test/orthogonalize_test.cpp +++ b/test/orthogonalize_test.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -30,10 +29,10 @@ using math::DenseMatrix, math::DenseDims, math::PtrMatrix, math::MutPtrMatrix, math::Col, math::end, math::_, utils::operator""_mat; namespace { -auto orthogonalize(utils::Arena<> *alloc, +auto orthogonalize(alloc::Arena<> *alloc, llvm::SmallVectorImpl const &ai) -> std::optional< - std::pair>> { + containers::Pair>> { // need to construct matrix `A` of relationship // B*L = I @@ -66,14 +65,13 @@ auto orthogonalize(utils::Arena<> *alloc, // now, we have (A = alnp.aln->A, r = alnp.aln->r) // (A*K')*J <= r DenseMatrix AK{alnp.getA()}; - AK(_, _(numSymbols, end)) - << alnp.getA()(_, _(numSymbols, end)) * K.transpose(); + AK(_, _(numSymbols, end)) << alnp.getA()(_, _(numSymbols, end)) * K.t(); auto *alnNew = poly::Loop::construct(alloc, nullptr, std::move(AK), alnp.getSyms(), true); alnNew->pruneBounds(); math::IntMatrix KS{K * S}; - std::pair> ret{ + containers::Pair> ret{ std::make_pair(alnNew, llvm::SmallVector())}; llvm::SmallVector &newArrayRefs = ret.second; newArrayRefs.reserve(numRow); @@ -116,7 +114,7 @@ TEST(OrthogonalizeTest, BasicAssertions) { const llvm::SCEVUnknown *scevB = tlf.getSCEVUnknown(tlf.createArray()); // we have three array refs // W[i+m, j+n] - // llvm::SmallVector> + // llvm::SmallVector> ArrayReference War{scevW, aln, 2}; { MutPtrMatrix indMat = War.indexMatrix(); @@ -159,7 +157,8 @@ TEST(OrthogonalizeTest, BasicAssertions) { llvm::SmallVector ai{ allArrayRefs.data(), allArrayRefs.data() + 1, allArrayRefs.data() + 2}; - std::optional>> + std::optional< + containers::Pair>> orth(orthogonalize(tlf.getAlloc(), ai)); EXPECT_TRUE(orth.has_value()); @@ -281,7 +280,8 @@ TEST(BadMul, BasicAssertions) { llvm::SmallVector ai{ allArrayRefs.data(), allArrayRefs.data() + 1, allArrayRefs.data() + 2}; - std::optional>> + std::optional< + containers::Pair>> orth{orthogonalize(tlf.getAlloc(), ai)}; EXPECT_TRUE(orth.has_value()); @@ -328,7 +328,7 @@ TEST(OrthogonalizeMatricesTest, BasicAssertions) { // llvm::errs() << "Orthogonal A =\n" << A << "\n"; // note, A'A is not diagonal // but AA' is - B = A * A.transpose(); + B = A * A.t(); // llvm::errs() << "A'A =\n" << B << "\n"; #if !defined(__clang__) && defined(__GNUC__) #pragma GCC diagnostic push diff --git a/tools/prettyprinters.py b/tools/prettyprinters.py index 9d041f8e9..50eb504cb 100644 --- a/tools/prettyprinters.py +++ b/tools/prettyprinters.py @@ -99,21 +99,21 @@ def __init__(self, val): ) pp = gdb.printing.RegexpCollectionPrettyPrinter("LoopModels") -pp.add_printer("poly::math::Array", "^poly::math::Array<.*, unsigned int>$", VectorPrinter) -pp.add_printer("poly::math::::ManagedArray", "^poly::math::ManagedArray<.*, unsigned int, .*, std::allocator<.*>, .*>$", VectorPrinter) +pp.add_printer("poly::math::Array", "^poly::math::Array<.*, ptrdiff_t>$", VectorPrinter) +pp.add_printer("poly::math::::ManagedArray", "^poly::math::ManagedArray<.*, ptrdiff_t, .*, alloc::Mallocator<.*>, .*>$", VectorPrinter) pp.add_printer( "poly::math::::Array", - "^poly::math::::Array<.*, poly::math::::SquareDims>$", + "^poly::math::::Array<.*, poly::math::::SquareDims<>>$", SquareMatrixPrinter, ) pp.add_printer( "poly::math::::Array", - "^poly::math::::Array<.*, poly::math::::DenseDims>$", + "^poly::math::::Array<.*, poly::math::::DenseDims<>>$", DenseMatrixPrinter, ) pp.add_printer( "poly::math::::Array", - "^poly::math::::Array<.*, poly::math::::StridedDims>$", + "^poly::math::::Array<.*, poly::math::::StridedDims<>>$", StridedMatrixPrinter, ) gdb.printing.register_pretty_printer(gdb.current_objfile(), pp)