From f10ee0f82e9f05ff8329dd7b5000e75b63c03954 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 25 Oct 2024 15:06:26 +0200 Subject: [PATCH 001/466] Disable temporarily failing CI job with ICX compiler Signed-off-by: Lukasz Dorau --- .github/workflows/basic.yml | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/basic.yml b/.github/workflows/basic.yml index 7c3d2ebc9..bf4f31f90 100644 --- a/.github/workflows/basic.yml +++ b/.github/workflows/basic.yml @@ -58,12 +58,15 @@ jobs: level_zero_provider: 'OFF' install_tbb: 'ON' # test icx compiler - - os: 'ubuntu-22.04' - build_type: Release - compiler: {c: icx, cxx: icpx} - shared_library: 'ON' - level_zero_provider: 'ON' - install_tbb: 'ON' + # - os: 'ubuntu-22.04' + # build_type: Release + # compiler: {c: icx, cxx: icpx} + # shared_library: 'ON' + # level_zero_provider: 'ON' + # cuda_provider: 'ON' + # install_tbb: 'ON' + # disable_hwloc: 'OFF' + # link_hwloc_statically: 'OFF' # test without installing TBB - os: 'ubuntu-22.04' build_type: Release From ef1e1cd56ad726c8a3d1e1b46cd044ebf98981c5 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Thu, 31 Oct 2024 15:57:52 +0100 Subject: [PATCH 002/466] fix for the apply of the HWLOC security patch --- CMakeLists.txt | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cb4045776..f4f6b8c2e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,12 +131,6 @@ elseif(WINDOWS AND NOT UMF_DISABLE_HWLOC) set(HWLOC_ENABLE_TESTING OFF) set(HWLOC_SKIP_LSTOPO ON) set(HWLOC_SKIP_TOOLS ON) - set(HWLOC_PATCH - git - apply - ${PROJECT_SOURCE_DIR}/cmake/fix_coverity_issues.patch - || - (exit 0)) message(STATUS "Will fetch hwloc from ${UMF_HWLOC_REPO}") @@ -144,8 +138,7 @@ elseif(WINDOWS AND NOT UMF_DISABLE_HWLOC) hwloc_targ GIT_REPOSITORY ${UMF_HWLOC_REPO} GIT_TAG ${UMF_HWLOC_TAG} - PATCH_COMMAND ${HWLOC_PATCH} SOURCE_SUBDIR contrib/windows-cmake/ - FIND_PACKAGE_ARGS) + SOURCE_SUBDIR contrib/windows-cmake/ FIND_PACKAGE_ARGS) FetchContent_GetProperties(hwloc_targ) if(NOT hwloc_targ_POPULATED) @@ -162,20 +155,13 @@ elseif(WINDOWS AND NOT UMF_DISABLE_HWLOC) message(STATUS " LIBHWLOC_LIBRARY_DIRS = ${LIBHWLOC_LIBRARY_DIRS}") elseif(NOT UMF_DISABLE_HWLOC) include(FetchContent) - set(HWLOC_PATCH - git - apply - ${PROJECT_SOURCE_DIR}/cmake/fix_coverity_issues.patch - || - (exit 0)) message(STATUS "Will fetch hwloc from ${UMF_HWLOC_REPO}") FetchContent_Declare( hwloc_targ GIT_REPOSITORY ${UMF_HWLOC_REPO} - GIT_TAG ${UMF_HWLOC_TAG} - PATCH_COMMAND ${HWLOC_PATCH}) + GIT_TAG ${UMF_HWLOC_TAG}) FetchContent_GetProperties(hwloc_targ) if(NOT hwloc_targ_POPULATED) @@ -222,6 +208,22 @@ elseif(NOT UMF_DISABLE_HWLOC) message(STATUS " LIBHWLOC_LIBRARY_DIRS = ${LIBHWLOC_LIBRARY_DIRS}") endif() +if(hwloc_targ_SOURCE_DIR) + # apply security patch for HWLOC + execute_process( + COMMAND git apply ${PROJECT_SOURCE_DIR}/cmake/fix_coverity_issues.patch + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT_VARIABLE UMF_HWLOC_PATCH_OUTPUT + ERROR_VARIABLE UMF_HWLOC_PATCH_ERROR) + + if(UMF_HWLOC_PATCH_OUTPUT) + message(STATUS "HWLOC patch command output:\n${UMF_HWLOC_PATCH_OUTPUT}") + endif() + if(UMF_HWLOC_PATCH_ERROR) + message(WARNING "HWLOC patch command output:\n${UMF_HWLOC_PATCH_ERROR}") + endif() +endif() + # This build type check is not possible on Windows when CMAKE_BUILD_TYPE is not # set, because in this case the build type is determined after a CMake # configuration is done (at the build time) From 8923281e0f9b1dc905f7fb0f2c4ca155dfd49a3d Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Thu, 3 Oct 2024 16:11:36 +0200 Subject: [PATCH 003/466] Fix disabling of pci support in hwloc The flag that hwloc recognize is --disable-pci, not --disable-pciaccess. --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cb4045776..0fe20025a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -189,7 +189,7 @@ elseif(NOT UMF_DISABLE_HWLOC) add_custom_command( COMMAND ./configure --prefix=${hwloc_targ_BINARY_DIR} --enable-static=yes - --enable-shared=no --disable-libxml2 --disable-pciaccess + --enable-shared=no --disable-libxml2 --disable-pci --disable-levelzero --disable-opencl --disable-cuda --disable-nvml CFLAGS=-fPIC CXXFLAGS=-fPIC WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} From d107b05f379414e5c90ac903efd29dfbab6497d9 Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Fri, 8 Nov 2024 10:14:50 +0100 Subject: [PATCH 004/466] Add a RUNPATH to installed libraries --- src/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 0ebd1160f..76479926c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -153,6 +153,9 @@ elseif(MACOSX) endif() if(UMF_BUILD_SHARED_LIBRARY) + # Set the runtime search path to the directory containing hwloc library + set(CMAKE_INSTALL_RPATH "\$ORIGIN") + if(NOT UMF_DISABLE_HWLOC) set(HWLOC_LIB ${UMF_HWLOC_NAME}) endif() From 23298fe3c4a643880bd5aac52ee3364c4f94a636 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Tue, 12 Nov 2024 10:57:04 +0100 Subject: [PATCH 005/466] 0.9.1 release --- .github/workflows/basic.yml | 2 +- ChangeLog | 6 ++++++ scripts/docs_config/conf.py | 2 +- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/basic.yml b/.github/workflows/basic.yml index bf4f31f90..4652a7a1d 100644 --- a/.github/workflows/basic.yml +++ b/.github/workflows/basic.yml @@ -8,7 +8,7 @@ permissions: env: # for installation testing - it should match with version set in CMake - UMF_VERSION: 0.9.0 + UMF_VERSION: 0.9.1 BUILD_DIR : "${{github.workspace}}/build" INSTL_DIR : "${{github.workspace}}/../install-dir" diff --git a/ChangeLog b/ChangeLog index 867e59f0f..2b41d7d9b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Tue Nov 12 2024 Łukasz Stolarczuk + + * Version 0.9.1 + + This patch release contains only 3 small fixes in build system of UMF. + Thu Sep 12 2024 Łukasz Stolarczuk * Version 0.9.0 diff --git a/scripts/docs_config/conf.py b/scripts/docs_config/conf.py index b93d7d977..13a975983 100644 --- a/scripts/docs_config/conf.py +++ b/scripts/docs_config/conf.py @@ -22,7 +22,7 @@ author = "Intel" # The full version, including alpha/beta/rc tags -release = "0.9.0" +release = "0.9.1" # -- General configuration --------------------------------------------------- From 0d118afc5be9393ecadf42f68c2720153a5bf623 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 29 Nov 2024 11:26:19 +0100 Subject: [PATCH 006/466] Clean up target_*() calls in test/CMakeLists.txt Signed-off-by: Lukasz Dorau --- test/CMakeLists.txt | 64 ++++++++++++++++++--------------------------- 1 file changed, 26 insertions(+), 38 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bf9884dc9..71a9a46e2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -40,12 +40,26 @@ function(build_umf_test) set(LIB_DIRS ${LIB_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) - if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) + if(UMF_BUILD_CUDA_PROVIDER) + set(INC_DIRS ${INC_DIRS} ${CUDA_INCLUDE_DIRS}) + set(LIB_DIRS ${LIB_DIRS} ${CUDA_LIBRARY_DIRS}) + endif() + + if(UMF_BUILD_LEVEL_ZERO_PROVIDER) + set(INC_DIRS ${INC_DIRS} ${LEVEL_ZERO_INCLUDE_DIRS}) + endif() + + if(UMF_POOL_JEMALLOC_ENABLED) set(LIB_DIRS ${LIB_DIRS} ${JEMALLOC_LIBRARY_DIRS}) + set(CPL_DEFS ${CPL_DEFS} UMF_POOL_JEMALLOC_ENABLED=1) endif() - if(UMF_BUILD_CUDA_PROVIDER) - set(LIB_DIRS ${LIB_DIRS} ${CUDA_LIBRARY_DIRS}) + if(UMF_POOL_SCALABLE_ENABLED) + set(CPL_DEFS ${CPL_DEFS} UMF_POOL_SCALABLE_ENABLED=1) + endif() + + if(UMF_BUILD_LIBUMF_POOL_DISJOINT) + set(CPL_DEFS ${CPL_DEFS} UMF_POOL_DISJOINT_ENABLED=1) endif() set(TEST_LIBS @@ -60,15 +74,7 @@ function(build_umf_test) SRCS ${ARG_SRCS} LIBS ${TEST_LIBS}) - if(UMF_POOL_JEMALLOC_ENABLED) - target_compile_definitions(${TEST_TARGET_NAME} - PRIVATE UMF_POOL_JEMALLOC_ENABLED=1) - endif() - - if(UMF_POOL_SCALABLE_ENABLED) - target_compile_definitions(${TEST_TARGET_NAME} - PRIVATE UMF_POOL_SCALABLE_ENABLED=1) - endif() + target_compile_definitions(${TEST_TARGET_NAME} PRIVATE ${CPL_DEFS}) if(NOT MSVC) # Suppress 'cast discards const qualifier' warnings. Parametrized GTEST @@ -80,6 +86,7 @@ function(build_umf_test) target_compile_options(${TEST_TARGET_NAME} PRIVATE -Werror) endif() endif() + target_link_directories(${TEST_TARGET_NAME} PRIVATE ${LIB_DIRS}) target_include_directories( @@ -89,7 +96,8 @@ function(build_umf_test) ${UMF_CMAKE_SOURCE_DIR}/src/base_alloc ${UMF_CMAKE_SOURCE_DIR}/src/utils ${UMF_TEST_DIR}/common - ${UMF_TEST_DIR}) + ${UMF_TEST_DIR} + ${INC_DIRS}) endfunction() function(add_umf_test) @@ -157,6 +165,10 @@ if(UMF_POOL_JEMALLOC_ENABLED) set(LIB_JEMALLOC_POOL jemalloc_pool) endif() +if(UMF_BUILD_LIBUMF_POOL_DISJOINT) + set(LIB_DISJOINT_POOL disjoint_pool) +endif() + if(UMF_BUILD_SHARED_LIBRARY) # if build as shared library, ba symbols won't be visible in tests set(BA_SOURCES_FOR_TEST ${BA_SOURCES}) @@ -245,13 +257,7 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_os_memory SRCS provider_os_memory.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST} ${LIB_JEMALLOC_POOL}) - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - target_compile_definitions(umf_test-provider_os_memory - PRIVATE UMF_POOL_DISJOINT_ENABLED=1) - target_link_libraries(umf_test-provider_os_memory PRIVATE disjoint_pool) - endif() - + LIBS ${UMF_UTILS_FOR_TEST} ${LIB_JEMALLOC_POOL} ${LIB_DISJOINT_POOL}) add_umf_test( NAME provider_os_memory_multiple_numa_nodes SRCS provider_os_memory_multiple_numa_nodes.cpp @@ -365,8 +371,6 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) SRCS providers/provider_level_zero.cpp providers/level_zero_helpers.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST} ze_loader) - target_include_directories(umf_test-provider_level_zero - PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) add_umf_test( NAME provider_level_zero_dlopen @@ -375,8 +379,6 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) LIBS ${UMF_UTILS_FOR_TEST}) target_compile_definitions(umf_test-provider_level_zero_dlopen PUBLIC USE_DLOPEN=1) - target_include_directories(umf_test-provider_level_zero_dlopen - PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) endif() if(NOT UMF_BUILD_LEVEL_ZERO_PROVIDER) @@ -396,10 +398,6 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) SRCS providers/provider_cuda.cpp providers/cuda_helpers.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST} cuda) - target_include_directories(umf_test-provider_cuda - PRIVATE ${CUDA_INCLUDE_DIRS}) - target_link_directories(umf_test-provider_cuda PRIVATE - ${CUDA_LIBRARY_DIRS}) add_umf_test( NAME provider_cuda_dlopen @@ -408,8 +406,6 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) LIBS ${UMF_UTILS_FOR_TEST}) target_compile_definitions(umf_test-provider_cuda_dlopen PUBLIC USE_DLOPEN=1) - target_include_directories(umf_test-provider_cuda_dlopen - PRIVATE ${CUDA_INCLUDE_DIRS}) else() message( STATUS @@ -601,10 +597,6 @@ if(LINUX) ze_loader disjoint_pool ${UMF_UTILS_FOR_TEST}) - target_include_directories(umf_test-ipc_level_zero_prov_producer - PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) - target_include_directories(umf_test-ipc_level_zero_prov_consumer - PRIVATE ${LEVEL_ZERO_INCLUDE_DIRS}) add_umf_ipc_test(TEST ipc_level_zero_prov SRC_DIR providers) endif() @@ -635,10 +627,6 @@ if(LINUX) cuda disjoint_pool ${UMF_UTILS_FOR_TEST}) - target_include_directories(umf_test-ipc_cuda_prov_producer - PRIVATE ${CUDA_INCLUDE_DIRS}) - target_include_directories(umf_test-ipc_cuda_prov_consumer - PRIVATE ${CUDA_INCLUDE_DIRS}) add_umf_ipc_test(TEST ipc_cuda_prov SRC_DIR providers) endif() else() From 2d28e062793c681f5523000923eb0a1bd91db4ce Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 25 Nov 2024 11:21:10 +0100 Subject: [PATCH 007/466] Add the coarse library Add the coarse library that will replace the coarse provider. Signed-off-by: Lukasz Dorau --- cmake/helpers.cmake | 3 +- src/CMakeLists.txt | 11 +- src/coarse/CMakeLists.txt | 26 + src/coarse/coarse.c | 1351 +++++++++++++++++++++++++++++++++++++ src/coarse/coarse.h | 112 +++ 5 files changed, 1498 insertions(+), 5 deletions(-) create mode 100644 src/coarse/CMakeLists.txt create mode 100644 src/coarse/coarse.c create mode 100644 src/coarse/coarse.h diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 2544a1518..0a165bc3a 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -387,7 +387,8 @@ function(add_umf_library) ${ARG_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/include ${UMF_CMAKE_SOURCE_DIR}/src/utils - ${UMF_CMAKE_SOURCE_DIR}/src/base_alloc) + ${UMF_CMAKE_SOURCE_DIR}/src/base_alloc + ${UMF_CMAKE_SOURCE_DIR}/src/coarse) add_umf_target_compile_options(${ARG_NAME}) add_umf_target_link_options(${ARG_NAME}) endfunction() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b4736ed0f..ffd928f7c 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -16,15 +16,16 @@ set(UMF_CUDA_INCLUDE_DIR # TODO: Cleanup the compile definitions across all the CMake files set(UMF_COMMON_COMPILE_DEFINITIONS UMF_VERSION=${UMF_VERSION}) -add_subdirectory(utils) - -set(UMF_LIBS $) - set(BA_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/base_alloc/base_alloc.c ${CMAKE_CURRENT_SOURCE_DIR}/base_alloc/base_alloc_linear.c ${CMAKE_CURRENT_SOURCE_DIR}/base_alloc/base_alloc_global.c) +add_subdirectory(utils) +add_subdirectory(coarse) + +set(UMF_LIBS $ $) + if(LINUX) set(BA_SOURCES ${BA_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/base_alloc/base_alloc_linux.c) @@ -145,6 +146,8 @@ else() LIBS ${UMF_LIBS}) endif() +add_dependencies(umf coarse) + if(UMF_LINK_HWLOC_STATICALLY) add_dependencies(umf ${UMF_HWLOC_NAME}) endif() diff --git a/src/coarse/CMakeLists.txt b/src/coarse/CMakeLists.txt new file mode 100644 index 000000000..8806b6b55 --- /dev/null +++ b/src/coarse/CMakeLists.txt @@ -0,0 +1,26 @@ +# Copyright (C) 2024 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +include(${UMF_CMAKE_SOURCE_DIR}/cmake/helpers.cmake) + +set(COARSE_SOURCES coarse.c ../ravl/ravl.c) + +if(UMF_BUILD_SHARED_LIBRARY) + set(COARSE_EXTRA_SRCS ${BA_SOURCES}) + set(COARSE_EXTRA_LIBS $) +endif() + +add_umf_library( + NAME coarse + TYPE STATIC + SRCS ${COARSE_SOURCES} ${COARSE_EXTRA_SRCS} + LIBS ${COARSE_EXTRA_LIBS}) + +target_include_directories( + coarse + PRIVATE $ + $ + $) + +add_library(${PROJECT_NAME}::coarse ALIAS coarse) diff --git a/src/coarse/coarse.c b/src/coarse/coarse.c new file mode 100644 index 000000000..729480154 --- /dev/null +++ b/src/coarse/coarse.c @@ -0,0 +1,1351 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include +#include +#include +#include +#include +#include + +#include + +#include "base_alloc_global.h" +#include "coarse.h" +#include "libumf.h" +#include "ravl.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" + +#ifdef _WIN32 +UTIL_ONCE_FLAG Log_initialized = UTIL_ONCE_FLAG_INIT; +#else +void __attribute__((constructor)) coarse_init(void) { utils_log_init(); } +void __attribute__((destructor)) coarse_destroy(void) {} +#endif /* _WIN32 */ + +typedef struct coarse_t { + // handle of the memory provider + void *provider; + + // coarse callbacks + coarse_callbacks_t cb; + + // memory allocation strategy + coarse_strategy_t allocation_strategy; + + // page size of the memory provider + size_t page_size; + + // all_blocks - tree of all blocks - sorted by an address of data + struct ravl *all_blocks; + + // free_blocks - tree of free blocks - sorted by a size of data, + // each node contains a pointer (ravl_free_blocks_head_t) + // to the head of the list of free blocks of the same size + struct ravl *free_blocks; + + struct utils_mutex_t lock; + + // statistics + size_t used_size; + size_t alloc_size; +} coarse_t; + +typedef struct ravl_node ravl_node_t; + +typedef enum check_free_blocks_t { + CHECK_ONLY_THE_FIRST_BLOCK = 0, + CHECK_ALL_BLOCKS_OF_SIZE, +} check_free_blocks_t; + +typedef struct block_t { + size_t size; + unsigned char *data; + bool used; + + // Node in the list of free blocks of the same size pointing to this block. + // The list is located in the (coarse->free_blocks) RAVL tree. + struct ravl_free_blocks_elem_t *free_list_ptr; +} block_t; + +// A general node in a RAVL tree. +// 1) coarse->all_blocks RAVL tree (tree of all blocks - sorted by an address of data): +// key - pointer (block_t->data) to the beginning of the block data +// value - pointer (block_t) to the block of the allocation +// 2) coarse->free_blocks RAVL tree (tree of free blocks - sorted by a size of data): +// key - size of the allocation (block_t->size) +// value - pointer (ravl_free_blocks_head_t) to the head of the list of free blocks of the same size +typedef struct ravl_data_t { + uintptr_t key; + void *value; +} ravl_data_t; + +// The head of the list of free blocks of the same size. +typedef struct ravl_free_blocks_head_t { + struct ravl_free_blocks_elem_t *head; +} ravl_free_blocks_head_t; + +// The node of the list of free blocks of the same size +typedef struct ravl_free_blocks_elem_t { + struct block_t *block; + struct ravl_free_blocks_elem_t *next; + struct ravl_free_blocks_elem_t *prev; +} ravl_free_blocks_elem_t; + +// The compare function of a RAVL tree +static int coarse_ravl_comp(const void *lhs, const void *rhs) { + const ravl_data_t *lhs_ravl = (const ravl_data_t *)lhs; + const ravl_data_t *rhs_ravl = (const ravl_data_t *)rhs; + + if (lhs_ravl->key < rhs_ravl->key) { + return -1; + } + + if (lhs_ravl->key > rhs_ravl->key) { + return 1; + } + + // lhs_ravl->key == rhs_ravl->key + return 0; +} + +static inline block_t *get_node_block(ravl_node_t *node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + assert(node_data->value); + return node_data->value; +} + +static inline ravl_node_t *get_node_prev(ravl_node_t *node) { + return ravl_node_predecessor(node); +} + +static inline ravl_node_t *get_node_next(ravl_node_t *node) { + return ravl_node_successor(node); +} + +#ifndef NDEBUG +static block_t *get_block_prev(ravl_node_t *node) { + ravl_node_t *ravl_prev = ravl_node_predecessor(node); + if (!ravl_prev) { + return NULL; + } + + return get_node_block(ravl_prev); +} + +static block_t *get_block_next(ravl_node_t *node) { + ravl_node_t *ravl_next = ravl_node_successor(node); + if (!ravl_next) { + return NULL; + } + + return get_node_block(ravl_next); +} +#endif /* NDEBUG */ + +// The functions "coarse_ravl_*" handles the coarse->all_blocks list of blocks +// sorted by a pointer (block_t->data) to the beginning of the block data. +// +// coarse_ravl_add_new - allocate and add a new block to the tree +// and link this block to the next and the previous one. +static block_t *coarse_ravl_add_new(struct ravl *rtree, unsigned char *data, + size_t size, ravl_node_t **node) { + assert(rtree); + assert(data); + assert(size); + + // TODO add valgrind annotations + block_t *block = umf_ba_global_alloc(sizeof(*block)); + if (block == NULL) { + return NULL; + } + + block->data = data; + block->size = size; + block->free_list_ptr = NULL; + + ravl_data_t rdata = {(uintptr_t)block->data, block}; + assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); + int ret = ravl_emplace_copy(rtree, &rdata); + if (ret) { + umf_ba_global_free(block); + return NULL; + } + + ravl_node_t *new_node = ravl_find(rtree, &rdata, RAVL_PREDICATE_EQUAL); + assert(NULL != new_node); + + if (node) { + *node = new_node; + } + + return block; +} + +// coarse_ravl_find_node - find the node in the tree +static ravl_node_t *coarse_ravl_find_node(struct ravl *rtree, void *ptr) { + ravl_data_t data = {(uintptr_t)ptr, NULL}; + return ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); +} + +// coarse_ravl_rm - remove the block from the tree +static block_t *coarse_ravl_rm(struct ravl *rtree, void *ptr) { + ravl_data_t data = {(uintptr_t)ptr, NULL}; + ravl_node_t *node; + node = ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); + if (node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + block_t *block = node_data->value; + assert(block); + ravl_remove(rtree, node); + assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); + return block; + } + return NULL; +} + +// The functions "node_list_*" handle lists of free blocks of the same size. +// The heads (ravl_free_blocks_head_t) of those lists are stored in nodes of +// the coarse->free_blocks RAVL tree. +// +// node_list_add - add a free block to the list of free blocks of the same size +static ravl_free_blocks_elem_t * +node_list_add(ravl_free_blocks_head_t *head_node, struct block_t *block) { + assert(head_node); + assert(block); + + ravl_free_blocks_elem_t *node = umf_ba_global_alloc(sizeof(*node)); + if (node == NULL) { + return NULL; + } + + if (head_node->head) { + head_node->head->prev = node; + } + + node->block = block; + node->next = head_node->head; + node->prev = NULL; + head_node->head = node; + + return node; +} + +// node_list_rm - remove the given free block from the list of free blocks of the same size +static block_t *node_list_rm(ravl_free_blocks_head_t *head_node, + ravl_free_blocks_elem_t *node) { + assert(head_node); + assert(node); + assert(head_node->head); + + if (node == head_node->head) { + assert(node->prev == NULL); + head_node->head = node->next; + } + + ravl_free_blocks_elem_t *node_next = node->next; + ravl_free_blocks_elem_t *node_prev = node->prev; + if (node_next) { + node_next->prev = node_prev; + } + + if (node_prev) { + node_prev->next = node_next; + } + + struct block_t *block = node->block; + block->free_list_ptr = NULL; + umf_ba_global_free(node); + + return block; +} + +// node_list_rm_first - remove the first free block from the list of free blocks of the same size only if it can be properly aligned +static block_t *node_list_rm_first(ravl_free_blocks_head_t *head_node, + size_t alignment) { + assert(head_node); + assert(head_node->head); + + ravl_free_blocks_elem_t *node = head_node->head; + assert(node->prev == NULL); + struct block_t *block = node->block; + + if (IS_NOT_ALIGNED(block->size, alignment)) { + return NULL; + } + + if (node->next) { + node->next->prev = NULL; + } + + head_node->head = node->next; + block->free_list_ptr = NULL; + umf_ba_global_free(node); + + return block; +} + +// node_list_rm_with_alignment - remove the first free block with the correct alignment from the list of free blocks of the same size +static block_t *node_list_rm_with_alignment(ravl_free_blocks_head_t *head_node, + size_t alignment) { + assert(head_node); + assert(head_node->head); + + assert(((ravl_free_blocks_elem_t *)head_node->head)->prev == NULL); + + ravl_free_blocks_elem_t *node; + for (node = head_node->head; node != NULL; node = node->next) { + if (IS_ALIGNED(node->block->size, alignment)) { + return node_list_rm(head_node, node); + } + } + + return NULL; +} + +// The functions "free_blocks_*" handle the coarse->free_blocks RAVL tree +// sorted by a size of the allocation (block_t->size). +// This is a tree of heads (ravl_free_blocks_head_t) of lists of free blocks of the same size. +// +// free_blocks_add - add a free block to the list of free blocks of the same size +static int free_blocks_add(struct ravl *free_blocks, block_t *block) { + ravl_free_blocks_head_t *head_node = NULL; + int rv; + + ravl_data_t head_node_data = {(uintptr_t)block->size, NULL}; + ravl_node_t *node; + node = ravl_find(free_blocks, &head_node_data, RAVL_PREDICATE_EQUAL); + if (node) { + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + head_node = node_data->value; + assert(head_node); + } else { // no head_node + head_node = umf_ba_global_alloc(sizeof(*head_node)); + if (!head_node) { + return -1; + } + + head_node->head = NULL; + + ravl_data_t data = {(uintptr_t)block->size, head_node}; + rv = ravl_emplace_copy(free_blocks, &data); + if (rv) { + umf_ba_global_free(head_node); + return -1; + } + } + + block->free_list_ptr = node_list_add(head_node, block); + if (!block->free_list_ptr) { + return -1; // out of memory + } + + assert(block->free_list_ptr->block->size == block->size); + + return 0; +} + +// free_blocks_rm_ge - remove the first free block of a size greater or equal to the given size only if it can be properly aligned +// If it was the last block, the head node is freed and removed from the tree. +// It is used during memory allocation (looking for a free block). +static block_t *free_blocks_rm_ge(struct ravl *free_blocks, size_t size, + size_t alignment, + check_free_blocks_t check_blocks) { + ravl_data_t data = {(uintptr_t)size, NULL}; + ravl_node_t *node; + node = ravl_find(free_blocks, &data, RAVL_PREDICATE_GREATER_EQUAL); + if (!node) { + return NULL; + } + + ravl_data_t *node_data = ravl_data(node); + assert(node_data); + assert(node_data->key >= size); + + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + + block_t *block = NULL; + switch (check_blocks) { + case CHECK_ONLY_THE_FIRST_BLOCK: + block = node_list_rm_first(head_node, alignment); + break; + case CHECK_ALL_BLOCKS_OF_SIZE: + block = node_list_rm_with_alignment(head_node, alignment); + break; + } + + if (head_node->head == NULL) { + umf_ba_global_free(head_node); + ravl_remove(free_blocks, node); + } + + return block; +} + +// free_blocks_rm_node - remove the free block pointed by the given node. +// If it was the last block, the head node is freed and removed from the tree. +// It is used during merging free blocks and destroying the coarse->free_blocks tree. +static block_t *free_blocks_rm_node(struct ravl *free_blocks, + ravl_free_blocks_elem_t *node) { + assert(free_blocks); + assert(node); + size_t size = node->block->size; + ravl_data_t data = {(uintptr_t)size, NULL}; + ravl_node_t *ravl_node; + ravl_node = ravl_find(free_blocks, &data, RAVL_PREDICATE_EQUAL); + assert(ravl_node); + + ravl_data_t *node_data = ravl_data(ravl_node); + assert(node_data); + assert(node_data->key == size); + + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + + block_t *block = node_list_rm(head_node, node); + + if (head_node->head == NULL) { + umf_ba_global_free(head_node); + ravl_remove(free_blocks, ravl_node); + } + + return block; +} + +// user_block_merge - merge two blocks from one of two lists of user blocks: all_blocks or free_blocks +static umf_result_t user_block_merge(coarse_t *coarse, ravl_node_t *node1, + ravl_node_t *node2, bool used, + ravl_node_t **merged_node) { + assert(node1); + assert(node2); + assert(node1 == get_node_prev(node2)); + assert(node2 == get_node_next(node1)); + assert(merged_node); + + *merged_node = NULL; + + struct ravl *all_blocks = coarse->all_blocks; + struct ravl *free_blocks = coarse->free_blocks; + + block_t *block1 = get_node_block(node1); + block_t *block2 = get_node_block(node2); + assert(block1->data < block2->data); + + bool same_used = ((block1->used == used) && (block2->used == used)); + bool contignous_data = (block1->data + block1->size == block2->data); + + // check if blocks can be merged + if (!same_used || !contignous_data) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // check if blocks can be merged + umf_result_t umf_result = + coarse->cb.merge(coarse->provider, block1->data, block2->data, + block1->size + block2->size); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("coarse_merge_cb(lowPtr=%p, highPtr=%p, totalSize=%zu) failed", + (void *)block1->data, (void *)block2->data, + block1->size + block2->size); + return umf_result; + } + + if (block1->free_list_ptr) { + free_blocks_rm_node(free_blocks, block1->free_list_ptr); + block1->free_list_ptr = NULL; + } + + if (block2->free_list_ptr) { + free_blocks_rm_node(free_blocks, block2->free_list_ptr); + block2->free_list_ptr = NULL; + } + + // update the size + block1->size += block2->size; + + block_t *block_rm = coarse_ravl_rm(all_blocks, block2->data); + assert(block_rm == block2); + (void)block_rm; // WA for unused variable error + umf_ba_global_free(block2); + + *merged_node = node1; + + return UMF_RESULT_SUCCESS; +} + +// free_block_merge_with_prev - merge the given free block +// with the previous one if both are unused and have continuous data. +// Remove the merged block from the tree of free blocks. +static ravl_node_t *free_block_merge_with_prev(coarse_t *coarse, + ravl_node_t *node) { + ravl_node_t *node_prev = get_node_prev(node); + if (!node_prev) { + return node; + } + + ravl_node_t *merged_node = NULL; + umf_result_t umf_result = + user_block_merge(coarse, node_prev, node, false, &merged_node); + if (umf_result != UMF_RESULT_SUCCESS) { + return node; + } + + assert(merged_node != NULL); + + return merged_node; +} + +// free_block_merge_with_next - merge the given free block +// with the next one if both are unused and have continuous data. +// Remove the merged block from the tree of free blocks. +static ravl_node_t *free_block_merge_with_next(coarse_t *coarse, + ravl_node_t *node) { + ravl_node_t *node_next = get_node_next(node); + if (!node_next) { + return node; + } + + ravl_node_t *merged_node = NULL; + umf_result_t umf_result = + user_block_merge(coarse, node, node_next, false, &merged_node); + if (umf_result != UMF_RESULT_SUCCESS) { + return node; + } + + assert(merged_node != NULL); + + return merged_node; +} + +#ifndef NDEBUG // begin of DEBUG code + +typedef struct debug_cb_args_t { + coarse_t *provider; + size_t sum_used; + size_t sum_blocks_size; + size_t num_all_blocks; + size_t num_free_blocks; +} debug_cb_args_t; + +static void debug_verify_all_blocks_cb(void *data, void *arg) { + assert(data); + assert(arg); + + ravl_data_t *node_data = data; + block_t *block = node_data->value; + assert(block); + + debug_cb_args_t *cb_args = (debug_cb_args_t *)arg; + coarse_t *provider = cb_args->provider; + + ravl_node_t *node = + ravl_find(provider->all_blocks, data, RAVL_PREDICATE_EQUAL); + assert(node); + + block_t *block_next = get_block_next(node); + block_t *block_prev = get_block_prev(node); + + cb_args->num_all_blocks++; + if (!block->used) { + cb_args->num_free_blocks++; + } + + assert(block->data); + assert(block->size > 0); + + // data addresses in the list are in ascending order + if (block_prev) { + assert(block_prev->data < block->data); + } + + if (block_next) { + assert(block->data < block_next->data); + } + + // two block's data should not overlap + if (block_next) { + assert((block->data + block->size) <= block_next->data); + } + + cb_args->sum_blocks_size += block->size; + if (block->used) { + cb_args->sum_used += block->size; + } +} + +static umf_result_t coarse_get_stats_no_lock(coarse_t *coarse, + coarse_stats_t *stats); + +static bool debug_check(coarse_t *provider) { + assert(provider); + + coarse_stats_t stats = {0}; + coarse_get_stats_no_lock(provider, &stats); + + debug_cb_args_t cb_args = {0}; + cb_args.provider = provider; + + // verify the all_blocks list + ravl_foreach(provider->all_blocks, debug_verify_all_blocks_cb, &cb_args); + + assert(cb_args.num_all_blocks == stats.num_all_blocks); + assert(cb_args.num_free_blocks == stats.num_free_blocks); + assert(cb_args.sum_used == provider->used_size); + assert(cb_args.sum_blocks_size == provider->alloc_size); + assert(provider->alloc_size >= provider->used_size); + + return true; +} +#endif /* NDEBUG */ // end of DEBUG code + +static umf_result_t coarse_add_used_block(coarse_t *coarse, void *addr, + size_t size) { + block_t *new_block = + coarse_ravl_add_new(coarse->all_blocks, addr, size, NULL); + if (new_block == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + new_block->used = true; + coarse->alloc_size += size; + coarse->used_size += size; + + return UMF_RESULT_SUCCESS; +} + +static void coarse_ravl_cb_rm_all_blocks_node(void *data, void *arg) { + assert(data); + assert(arg); + + coarse_t *coarse = (struct coarse_t *)arg; + ravl_data_t *node_data = data; + block_t *block = node_data->value; + assert(block); + + if (block->used) { +#ifndef NDEBUG + LOG_WARN("not freed block (addr: %p, size: %zu)", (void *)block->data, + block->size); +#endif + assert(coarse->used_size >= block->size); + coarse->used_size -= block->size; + } + + if (block->free_list_ptr) { + free_blocks_rm_node(coarse->free_blocks, block->free_list_ptr); + } + + if (coarse->cb.free) { + coarse->cb.free(coarse->provider, block->data, block->size); + } + + assert(coarse->alloc_size >= block->size); + coarse->alloc_size -= block->size; + + umf_ba_global_free(block); +} + +static umf_result_t can_provider_split(coarse_t *coarse, void *ptr, + size_t totalSize, size_t firstSize) { + // check if the block can be split + umf_result_t umf_result = + coarse->cb.split(coarse->provider, ptr, totalSize, firstSize); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR( + "coarse_split_cb->(ptr=%p, totalSize = %zu = (%zu + %zu)) failed", + ptr, totalSize, firstSize, totalSize - firstSize); + } + + return umf_result; +} + +static umf_result_t create_aligned_block(coarse_t *coarse, size_t orig_size, + size_t alignment, block_t **current) { + (void)orig_size; // unused in the Release version + int rv; + + block_t *curr = *current; + + // In case of non-zero alignment create an aligned block what would be further used. + uintptr_t orig_data = (uintptr_t)curr->data; + uintptr_t aligned_data = ALIGN_UP(orig_data, alignment); + size_t padding = aligned_data - orig_data; + if (alignment > 0 && padding > 0) { + // check if block can be split by the upstream provider + umf_result_t umf_result = + can_provider_split(coarse, curr->data, curr->size, padding); + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; + } + + block_t *aligned_block = + coarse_ravl_add_new(coarse->all_blocks, curr->data + padding, + curr->size - padding, NULL); + if (aligned_block == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + curr->used = false; + curr->size = padding; + + rv = free_blocks_add(coarse->free_blocks, curr); + if (rv) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + // use aligned block + *current = aligned_block; + assert((*current)->size >= orig_size); + } + + return UMF_RESULT_SUCCESS; +} + +// Split the current block and put the new block after the one that we use. +static umf_result_t split_current_block(coarse_t *coarse, block_t *curr, + size_t size) { + + // check if block can be split by the upstream provider + umf_result_t umf_result = + can_provider_split(coarse, curr->data, curr->size, size); + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; + } + + ravl_node_t *new_node = NULL; + + block_t *new_block = coarse_ravl_add_new( + coarse->all_blocks, curr->data + size, curr->size - size, &new_node); + if (new_block == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + new_block->used = false; + + int rv = free_blocks_add(coarse->free_blocks, get_node_block(new_node)); + if (rv) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return UMF_RESULT_SUCCESS; +} + +static block_t *find_free_block(struct ravl *free_blocks, size_t size, + size_t alignment, + coarse_strategy_t allocation_strategy) { + block_t *block; + + switch (allocation_strategy) { + case UMF_COARSE_MEMORY_STRATEGY_FASTEST: + // Always allocate a free block of the (size + alignment) size + // and later cut out the properly aligned part leaving two remaining parts. + return free_blocks_rm_ge(free_blocks, size + alignment, 0, + CHECK_ONLY_THE_FIRST_BLOCK); + + case UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE: + // First check if the first free block of the 'size' size has the correct alignment. + block = free_blocks_rm_ge(free_blocks, size, alignment, + CHECK_ONLY_THE_FIRST_BLOCK); + if (block) { + return block; + } + + // If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + return free_blocks_rm_ge(free_blocks, size + alignment, 0, + CHECK_ONLY_THE_FIRST_BLOCK); + + case UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE: + // First look through all free blocks of the 'size' size + // and choose the first one with the correct alignment. + block = free_blocks_rm_ge(free_blocks, size, alignment, + CHECK_ALL_BLOCKS_OF_SIZE); + if (block) { + return block; + } + + // If none of them had the correct alignment, + // use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + return free_blocks_rm_ge(free_blocks, size + alignment, 0, + CHECK_ONLY_THE_FIRST_BLOCK); + } + + return NULL; +} + +static int free_blocks_re_add(coarse_t *coarse, block_t *block) { + assert(coarse); + + ravl_node_t *node = coarse_ravl_find_node(coarse->all_blocks, block->data); + assert(node); + + // merge with prev and/or next block if they are unused and have continuous data + node = free_block_merge_with_prev(coarse, node); + node = free_block_merge_with_next(coarse, node); + + return free_blocks_add(coarse->free_blocks, get_node_block(node)); +} + +static void ravl_cb_count(void *data, void *arg) { + assert(arg); + (void)data; // unused + + size_t *num_all_blocks = arg; + (*num_all_blocks)++; +} + +static void ravl_cb_count_free(void *data, void *arg) { + assert(data); + assert(arg); + + ravl_data_t *node_data = data; + assert(node_data); + ravl_free_blocks_head_t *head_node = node_data->value; + assert(head_node); + struct ravl_free_blocks_elem_t *free_block = head_node->head; + assert(free_block); + + size_t *num_all_blocks = arg; + while (free_block) { + (*num_all_blocks)++; + free_block = free_block->next; + } +} + +static umf_result_t coarse_get_stats_no_lock(coarse_t *coarse, + coarse_stats_t *stats) { + assert(coarse); + + size_t num_all_blocks = 0; + ravl_foreach(coarse->all_blocks, ravl_cb_count, &num_all_blocks); + + size_t num_free_blocks = 0; + ravl_foreach(coarse->free_blocks, ravl_cb_count_free, &num_free_blocks); + + stats->alloc_size = coarse->alloc_size; + stats->used_size = coarse->used_size; + stats->num_all_blocks = num_all_blocks; + stats->num_free_blocks = num_free_blocks; + + return UMF_RESULT_SUCCESS; +} + +// PUBLIC API + +umf_result_t coarse_new(coarse_params_t *coarse_params, coarse_t **pcoarse) { +#ifdef _WIN32 + utils_init_once(&Log_initialized, utils_log_init); +#endif /* _WIN32 */ + + if (coarse_params == NULL || pcoarse == NULL) { + LOG_ERR("coarse parameters or handle is missing"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (!coarse_params->provider) { + LOG_ERR("memory provider is not set"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (!coarse_params->page_size) { + LOG_ERR("page size of the memory provider is not set"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (!coarse_params->cb.split) { + LOG_ERR("coarse split callback is not set"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (!coarse_params->cb.merge) { + LOG_ERR("coarse merge callback is not set"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // alloc() and free() callbacks are optional + + coarse_t *coarse = umf_ba_global_alloc(sizeof(*coarse)); + if (!coarse) { + LOG_ERR("out of the host memory"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + memset(coarse, 0, sizeof(*coarse)); + + coarse->provider = coarse_params->provider; + coarse->page_size = coarse_params->page_size; + coarse->cb = coarse_params->cb; + coarse->allocation_strategy = coarse_params->allocation_strategy; + + umf_result_t umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + + coarse->free_blocks = ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); + if (coarse->free_blocks == NULL) { + LOG_ERR("out of the host memory"); + goto err_free_coarse; + } + + coarse->all_blocks = ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); + if (coarse->all_blocks == NULL) { + LOG_ERR("out of the host memory"); + goto err_delete_ravl_free_blocks; + } + + coarse->alloc_size = 0; + coarse->used_size = 0; + + umf_result = UMF_RESULT_ERROR_UNKNOWN; + + if (utils_mutex_init(&coarse->lock) == NULL) { + LOG_ERR("lock initialization failed"); + goto err_delete_ravl_all_blocks; + } + + assert(coarse->used_size == 0); + assert(coarse->alloc_size == 0); + assert(debug_check(coarse)); + + *pcoarse = coarse; + + return UMF_RESULT_SUCCESS; + +err_delete_ravl_all_blocks: + ravl_delete(coarse->all_blocks); +err_delete_ravl_free_blocks: + ravl_delete(coarse->free_blocks); +err_free_coarse: + umf_ba_global_free(coarse); + return umf_result; +} + +void coarse_delete(coarse_t *coarse) { + if (coarse == NULL) { + LOG_ERR("coarse handle is missing"); + return; + } + + utils_mutex_destroy_not_free(&coarse->lock); + + ravl_foreach(coarse->all_blocks, coarse_ravl_cb_rm_all_blocks_node, coarse); + assert(coarse->used_size == 0); + assert(coarse->alloc_size == 0); + + ravl_delete(coarse->all_blocks); + ravl_delete(coarse->free_blocks); + + umf_ba_global_free(coarse); +} + +umf_result_t coarse_add_memory_from_provider(coarse_t *coarse, size_t size) { + umf_result_t umf_result; + void *ptr = NULL; + + if (coarse == NULL || size == 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (!coarse->cb.alloc) { + LOG_ERR("error: alloc callback is not set"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + umf_result = coarse_alloc(coarse, size, coarse->page_size, &ptr); + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; + } + + assert(ptr); + + return coarse_free(coarse, ptr, size); +} + +umf_result_t coarse_add_memory_fixed(coarse_t *coarse, void *addr, + size_t size) { + umf_result_t umf_result; + + if (coarse == NULL || addr == NULL || size == 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (coarse->cb.alloc || coarse->cb.free) { + LOG_ERR("error: alloc or free callback is set"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse)); + + umf_result = coarse_add_used_block(coarse, addr, size); + + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; + } + + umf_result = coarse_free(coarse, addr, size); + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; + } + + LOG_DEBUG("coarse_ALLOC (add_memory_block) %zu used %zu alloc %zu", size, + coarse->used_size, coarse->alloc_size); + + return UMF_RESULT_SUCCESS; +} + +umf_result_t coarse_alloc(coarse_t *coarse, size_t size, size_t alignment, + void **resultPtr) { + umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN; + + if (coarse == NULL || resultPtr == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // alignment must be a power of two and a multiple or a divider of the page size + if (alignment && + ((alignment & (alignment - 1)) || ((alignment % coarse->page_size) && + (coarse->page_size % alignment)))) { + LOG_ERR("wrong alignment: %zu (not a power of 2 or a multiple or a " + "divider of the page size (%zu))", + alignment, coarse->page_size); + return UMF_RESULT_ERROR_INVALID_ALIGNMENT; + } + + if (IS_NOT_ALIGNED(alignment, coarse->page_size)) { + alignment = ALIGN_UP(alignment, coarse->page_size); + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse)); + + // Find a block with greater or equal size using the given memory allocation strategy + block_t *curr = find_free_block(coarse->free_blocks, size, alignment, + coarse->allocation_strategy); + + // If the block that we want to reuse has a greater size, split it. + // Try to merge the split part with the successor if it is not used. + enum { ACTION_NONE = 0, ACTION_USE, ACTION_SPLIT } action = ACTION_NONE; + + if (curr && curr->size > size) { + action = ACTION_SPLIT; + } else if (curr && curr->size == size) { + action = ACTION_USE; + } + + if (action) { // ACTION_SPLIT or ACTION_USE + assert(curr->used == false); + + // In case of non-zero alignment create an aligned block what would be further used. + if (alignment > 0) { + umf_result = create_aligned_block(coarse, size, alignment, &curr); + if (umf_result != UMF_RESULT_SUCCESS) { + (void)free_blocks_re_add(coarse, curr); + goto err_unlock; + } + } + + if (action == ACTION_SPLIT) { + // Split the current block and put the new block after the one that we use. + umf_result = split_current_block(coarse, curr, size); + if (umf_result != UMF_RESULT_SUCCESS) { + (void)free_blocks_re_add(coarse, curr); + goto err_unlock; + } + + curr->size = size; + + LOG_DEBUG("coarse_ALLOC (split_block) %zu used %zu alloc %zu", size, + coarse->used_size, coarse->alloc_size); + + } else { // action == ACTION_USE + LOG_DEBUG("coarse_ALLOC (same_block) %zu used %zu alloc %zu", size, + coarse->used_size, coarse->alloc_size); + } + + curr->used = true; + *resultPtr = curr->data; + coarse->used_size += size; + + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + return UMF_RESULT_SUCCESS; + } + + // no suitable block found - try to get more memory from the upstream provider + umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + + *resultPtr = NULL; + + if (!coarse->cb.alloc) { + LOG_ERR("out of memory"); + goto err_unlock; + } + + umf_result = coarse->cb.alloc(coarse->provider, size, alignment, resultPtr); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("coarse_alloc_cb() failed: out of memory"); + goto err_unlock; + } + + ASSERT_IS_ALIGNED(((uintptr_t)(*resultPtr)), alignment); + + umf_result = coarse_add_used_block(coarse, *resultPtr, size); + if (umf_result != UMF_RESULT_SUCCESS) { + if (coarse->cb.free) { + coarse->cb.free(coarse->provider, *resultPtr, size); + } + goto err_unlock; + } + + LOG_DEBUG("coarse_ALLOC (memory_provider) %zu used %zu alloc %zu", size, + coarse->used_size, coarse->alloc_size); + + umf_result = UMF_RESULT_SUCCESS; + +err_unlock: + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + return umf_result; +} + +umf_result_t coarse_free(coarse_t *coarse, void *ptr, size_t bytes) { + if (coarse == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (ptr == NULL) { + return UMF_RESULT_SUCCESS; + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse)); + + ravl_node_t *node = coarse_ravl_find_node(coarse->all_blocks, ptr); + if (node == NULL) { + // the block was not found + LOG_ERR("memory block not found (ptr = %p, size = %zu)", ptr, bytes); + utils_mutex_unlock(&coarse->lock); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + block_t *block = get_node_block(node); + assert(block->used); + + if (bytes > 0 && bytes != block->size) { + // wrong size of allocation + LOG_ERR("wrong size of allocation"); + utils_mutex_unlock(&coarse->lock); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + LOG_DEBUG("coarse_FREE (return_block_to_pool) %zu used %zu alloc %zu", + block->size, coarse->used_size - block->size, coarse->alloc_size); + + assert(coarse->used_size >= block->size); + coarse->used_size -= block->size; + + block->used = false; + + // Merge with prev and/or next block if they are unused and have continuous data. + node = free_block_merge_with_prev(coarse, node); + node = free_block_merge_with_next(coarse, node); + + int rv = free_blocks_add(coarse->free_blocks, get_node_block(node)); + if (rv) { + utils_mutex_unlock(&coarse->lock); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + return UMF_RESULT_SUCCESS; +} + +umf_result_t coarse_merge(coarse_t *coarse, void *lowPtr, void *highPtr, + size_t totalSize) { + umf_result_t umf_result; + + if (coarse == NULL || lowPtr == NULL || highPtr == NULL || totalSize == 0 || + ((uintptr_t)highPtr <= (uintptr_t)lowPtr) || + ((uintptr_t)highPtr - (uintptr_t)lowPtr >= totalSize)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse)); + + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + + ravl_node_t *low_node = coarse_ravl_find_node(coarse->all_blocks, lowPtr); + if (low_node == NULL) { + LOG_ERR("the lowPtr memory block not found"); + goto err_mutex_unlock; + } + + block_t *low_block = get_node_block(low_node); + if (!low_block->used) { + LOG_ERR("the lowPtr block is not allocated"); + goto err_mutex_unlock; + } + + ravl_node_t *high_node = coarse_ravl_find_node(coarse->all_blocks, highPtr); + if (high_node == NULL) { + LOG_ERR("the highPtr memory block not found"); + goto err_mutex_unlock; + } + + block_t *high_block = get_node_block(high_node); + if (!high_block->used) { + LOG_ERR("the highPtr block is not allocated"); + goto err_mutex_unlock; + } + + if (get_node_next(low_node) != high_node || + ((uintptr_t)highPtr != ((uintptr_t)lowPtr + low_block->size))) { + LOG_ERR("given allocations are not adjacent"); + goto err_mutex_unlock; + } + + assert(get_node_prev(high_node) == low_node); + + if (low_block->size + high_block->size != totalSize) { + LOG_ERR("wrong totalSize: %zu != %zu", totalSize, + low_block->size + high_block->size); + goto err_mutex_unlock; + } + + ravl_node_t *merged_node = NULL; + + umf_result = + user_block_merge(coarse, low_node, high_node, true, &merged_node); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("merging a block failed"); + goto err_mutex_unlock; + } + + assert(merged_node == low_node); + assert(low_block->size == totalSize); + + umf_result = UMF_RESULT_SUCCESS; + +err_mutex_unlock: + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + return umf_result; +} + +umf_result_t coarse_split(coarse_t *coarse, void *ptr, size_t totalSize, + size_t firstSize) { + umf_result_t umf_result; + + if (coarse == NULL || ptr == NULL || (firstSize >= totalSize) || + firstSize == 0 || totalSize == 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + assert(debug_check(coarse)); + + umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; + + ravl_node_t *node = coarse_ravl_find_node(coarse->all_blocks, ptr); + if (node == NULL) { + LOG_ERR("memory block not found"); + goto err_mutex_unlock; + } + + block_t *block = get_node_block(node); + + if (block->size != totalSize) { + LOG_ERR("wrong totalSize: %zu != %zu", totalSize, block->size); + goto err_mutex_unlock; + } + + if (!block->used) { + LOG_ERR("block is not allocated"); + goto err_mutex_unlock; + } + + // check if block can be split by the memory provider + umf_result = can_provider_split(coarse, ptr, totalSize, firstSize); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("memory provider cannot split a memory block"); + goto err_mutex_unlock; + } + + umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + + block_t *new_block = + coarse_ravl_add_new(coarse->all_blocks, block->data + firstSize, + block->size - firstSize, NULL); + if (new_block == NULL) { + goto err_mutex_unlock; + } + + block->size = firstSize; + new_block->used = true; + + assert(new_block->size == (totalSize - firstSize)); + + umf_result = UMF_RESULT_SUCCESS; + +err_mutex_unlock: + assert(debug_check(coarse)); + utils_mutex_unlock(&coarse->lock); + + return umf_result; +} + +coarse_stats_t coarse_get_stats(coarse_t *coarse) { + coarse_stats_t stats = {0}; + + if (coarse == NULL) { + return stats; + } + + if (utils_mutex_lock(&coarse->lock) != 0) { + LOG_ERR("locking the lock failed"); + return stats; + } + + coarse_get_stats_no_lock(coarse, &stats); + + utils_mutex_unlock(&coarse->lock); + + return stats; +} diff --git a/src/coarse/coarse.h b/src/coarse/coarse.h new file mode 100644 index 000000000..cd151ca27 --- /dev/null +++ b/src/coarse/coarse.h @@ -0,0 +1,112 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_COARSE_H +#define UMF_COARSE_H + +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct coarse_t coarse_t; + +// coarse callbacks implement provider-specific actions +typedef struct coarse_callbacks_t { + // alloc() is optional (can be NULL for the fixed-size memory provider) + umf_result_t (*alloc)(void *provider, size_t size, size_t alignment, + void **ptr); + // free() is optional (can be NULL if the provider does not support the free() op) + umf_result_t (*free)(void *provider, void *ptr, size_t size); + umf_result_t (*split)(void *provider, void *ptr, size_t totalSize, + size_t firstSize); + umf_result_t (*merge)(void *provider, void *lowPtr, void *highPtr, + size_t totalSize); +} coarse_callbacks_t; + +// coarse library allocation strategy +typedef enum coarse_strategy_t { + // Always allocate a free block of the (size + alignment) size + // and cut out the properly aligned part leaving two remaining parts. + // It is the fastest strategy but causes memory fragmentation + // when alignment is greater than 0. + // It is the best strategy when alignment always equals 0. + UMF_COARSE_MEMORY_STRATEGY_FASTEST = 0, + + // Check if the first free block of the 'size' size has the correct alignment. + // If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, + + // Look through all free blocks of the 'size' size + // and choose the first one with the correct alignment. + // If none of them had the correct alignment, + // use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE, +} coarse_strategy_t; + +// coarse library settings structure +typedef struct coarse_params_t { + // handle of the memory provider + void *provider; + + // coarse callbacks + coarse_callbacks_t cb; + + // memory allocation strategy, + // see coarse_strategy_t for details + coarse_strategy_t allocation_strategy; + + // page size of the memory provider + size_t page_size; +} coarse_params_t; + +// coarse library statistics +typedef struct coarse_stats_t { + // total allocation size + size_t alloc_size; + + // size of used memory + size_t used_size; + + // total number of allocated memory blocks + size_t num_all_blocks; + + // number of free memory blocks + size_t num_free_blocks; +} coarse_stats_t; + +umf_result_t coarse_new(coarse_params_t *coarse_params, coarse_t **pcoarse); +void coarse_delete(coarse_t *coarse); + +umf_result_t coarse_alloc(coarse_t *coarse, size_t size, size_t alignment, + void **resultPtr); +umf_result_t coarse_free(coarse_t *coarse, void *ptr, size_t bytes); + +umf_result_t coarse_merge(coarse_t *coarse, void *lowPtr, void *highPtr, + size_t totalSize); +umf_result_t coarse_split(coarse_t *coarse, void *ptr, size_t totalSize, + size_t firstSize); + +// supported only if the alloc callback is set, +// returns UMF_RESULT_ERROR_NOT_SUPPORTED otherwise +umf_result_t coarse_add_memory_from_provider(coarse_t *coarse, size_t size); + +// supported only if the alloc and the free callbacks are NOT set +// returns UMF_RESULT_ERROR_NOT_SUPPORTED otherwise +umf_result_t coarse_add_memory_fixed(coarse_t *coarse, void *addr, size_t size); + +coarse_stats_t coarse_get_stats(coarse_t *coarse); + +#ifdef __cplusplus +} +#endif + +#endif // UMF_COARSE_H From 958b6905d7d5160af9328dc745d480025fb74945 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 2 Dec 2024 08:38:32 +0100 Subject: [PATCH 008/466] Add tests for the coarse library Signed-off-by: Lukasz Dorau --- test/CMakeLists.txt | 6 + test/coarse_lib.cpp | 1319 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1325 insertions(+) create mode 100644 test/coarse_lib.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c8b854ba5..ffa4bd20d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -88,6 +88,7 @@ function(build_umf_test) PRIVATE ${UMF_CMAKE_SOURCE_DIR}/include ${UMF_CMAKE_SOURCE_DIR}/src ${UMF_CMAKE_SOURCE_DIR}/src/base_alloc + ${UMF_CMAKE_SOURCE_DIR}/src/coarse ${UMF_CMAKE_SOURCE_DIR}/src/utils ${UMF_TEST_DIR}/common ${UMF_TEST_DIR}) @@ -196,6 +197,11 @@ add_umf_test( SRCS provider_coarse.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) +add_umf_test( + NAME coarse_lib + SRCS coarse_lib.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST} coarse) + if(UMF_BUILD_LIBUMF_POOL_DISJOINT) add_umf_test( NAME disjointPool diff --git a/test/coarse_lib.cpp b/test/coarse_lib.cpp new file mode 100644 index 000000000..6a3d9637e --- /dev/null +++ b/test/coarse_lib.cpp @@ -0,0 +1,1319 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include "coarse.h" +#include "provider.hpp" + +using umf_test::KB; +using umf_test::MB; +using umf_test::test; + +#define MOCKED_COARSE ((coarse_t *)0x01) +#define MOCKED_PROVIDER ((umf_memory_provider_handle_t)0x02) +#define INVALID_PTR ((void *)0x03) + +static umf_result_t alloc_cb(void *provider, size_t size, size_t alignment, + void **ptr) { + return umfMemoryProviderAlloc((umf_memory_provider_handle_t)provider, size, + alignment, ptr); +} + +static umf_result_t free_cb(void *provider, void *ptr, size_t size) { + return umfMemoryProviderFree((umf_memory_provider_handle_t)provider, ptr, + size); +} + +static umf_result_t split_cb(void *provider, void *ptr, size_t totalSize, + size_t firstSize) { + if (provider == NULL || ptr == NULL || (firstSize >= totalSize) || + firstSize == 0 || totalSize == 0) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t merge_cb(void *provider, void *lowPtr, void *highPtr, + size_t totalSize) { + if (provider == NULL || lowPtr == NULL || highPtr == NULL || + totalSize == 0 || ((uintptr_t)highPtr <= (uintptr_t)lowPtr) || + ((uintptr_t)highPtr - (uintptr_t)lowPtr >= totalSize)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t alloc_cb_fails(void *provider, size_t size, + size_t alignment, void **ptr) { + (void)provider; //unused + (void)size; //unused + (void)alignment; //unused + (void)ptr; //unused + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; +} + +static umf_result_t free_cb_fails(void *provider, void *ptr, size_t size) { + (void)provider; //unused + (void)ptr; //unused + (void)size; //unused + return UMF_RESULT_ERROR_USER_SPECIFIC; +} + +static umf_result_t split_cb_fails(void *provider, void *ptr, size_t totalSize, + size_t firstSize) { + (void)provider; //unused + (void)ptr; //unused + (void)totalSize; //unused + (void)firstSize; //unused + return UMF_RESULT_ERROR_USER_SPECIFIC; +} + +static umf_result_t merge_cb_fails(void *provider, void *lowPtr, void *highPtr, + size_t totalSize) { + (void)provider; //unused + (void)lowPtr; //unused + (void)highPtr; //unused + (void)totalSize; //unused + return UMF_RESULT_ERROR_USER_SPECIFIC; +} + +static void coarse_params_set_default(coarse_params_t *coarse_params, + umf_memory_provider_handle_t provider, + coarse_strategy_t allocation_strategy) { + memset(coarse_params, 0, sizeof(*coarse_params)); + coarse_params->provider = provider; + coarse_params->allocation_strategy = allocation_strategy; + coarse_params->cb.split = split_cb; + coarse_params->cb.merge = merge_cb; + coarse_params->page_size = utils_get_page_size(); + + if (provider) { + coarse_params->cb.alloc = alloc_cb; + coarse_params->cb.free = free_cb; + } +} + +umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = + umf::providerMakeCOps(); + +struct CoarseWithMemoryStrategyTest + : umf_test::test, + ::testing::WithParamInterface { + void SetUp() override { + test::SetUp(); + allocation_strategy = this->GetParam(); + coarse_params_set_default(&coarse_params, MOCKED_PROVIDER, + allocation_strategy); + } + + coarse_t *coarse_handle = nullptr; + coarse_params_t coarse_params; + coarse_strategy_t allocation_strategy; + umf_result_t umf_result; +}; + +INSTANTIATE_TEST_SUITE_P( + CoarseWithMemoryStrategyTest, CoarseWithMemoryStrategyTest, + ::testing::Values(UMF_COARSE_MEMORY_STRATEGY_FASTEST, + UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, + UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE)); + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_provider) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t alloc_size = 20 * MB; + void *ptr; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_free(ch, ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_fixed_memory) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB; + std::vector buffer(buff_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_free(ch, ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_fixed_memory_various) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB; + std::vector buffer(buff_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // free NULL + umf_result = coarse_free(ch, nullptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // free invalid pointer + umf_result = coarse_free(ch, INVALID_PTR, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // wrong alignment (3 bytes) + ptr = nullptr; + umf_result = coarse_alloc(ch, 2 * MB, 3, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ALIGNMENT); + ASSERT_EQ(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // not freed allocation + // coarse_delete() prints LOG_WARN() in Debug mode + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + coarse_delete(ch); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_split_merge) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + const size_t alloc_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + /* test coarse_split */ + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_split(ch, ptr, 2 * MB, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_free(ch, (ptr + 1 * MB), 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 1 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_free(ch, ptr, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + /* test coarse_merge */ + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_split(ch, ptr, 2 * MB, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB), 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_free(ch, ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(coarse_handle); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +// negative tests + +// NULL parameters +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_no_params) { + umf_result = coarse_new(nullptr, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_handle, nullptr); +} + +// no provider +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_no_provider) { + coarse_params.provider = NULL; + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_handle, nullptr); +} + +// no page size +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_no_page_size) { + coarse_params.page_size = 0; + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_handle, nullptr); +} + +// no split callback +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_no_split_cb) { + coarse_params.cb.split = NULL; + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_handle, nullptr); +} + +// no merge callback +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_no_merge_cb) { + coarse_params.cb.merge = NULL; + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_handle, nullptr); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_alloc_invalid) { + void *ptr = nullptr; + + umf_result = coarse_alloc(nullptr, MB, 0, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(ptr, nullptr); + + umf_result = coarse_alloc(nullptr, MB, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(ptr, nullptr); + + umf_result = coarse_alloc(MOCKED_COARSE, MB, 0, nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(ptr, nullptr); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_free_invalid) { + // coarse handle is NULL + umf_result = coarse_free(nullptr, nullptr, MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_delete_null) { + coarse_delete(nullptr); +} + +TEST_P(CoarseWithMemoryStrategyTest, + coarseTest_add_memory_from_provider_null_0) { + umf_result = coarse_add_memory_from_provider(nullptr, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_add_memory_fixed_null_0) { + umf_result = coarse_add_memory_fixed(nullptr, nullptr, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_null_stats) { + ASSERT_EQ(coarse_get_stats(nullptr).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(nullptr).used_size, 0); + ASSERT_EQ(coarse_get_stats(nullptr).num_all_blocks, 0); + ASSERT_EQ(coarse_get_stats(nullptr).num_free_blocks, 0); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_split_merge_negative) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + const size_t alloc_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + /* test coarse_split */ + + umf_result = coarse_alloc(ch, 6 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 6 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + // firstSize >= totalSize + umf_result = coarse_split(ch, ptr, 6 * MB, 6 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // firstSize == 0 + umf_result = coarse_split(ch, ptr, 6 * MB, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // totalSize == 0 + umf_result = coarse_split(ch, ptr, 0, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // wrong totalSize + umf_result = coarse_split(ch, ptr, 5 * MB, 1 * KB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // memory block not found + umf_result = coarse_split(ch, ptr + 1, 6 * MB, 1 * KB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = coarse_free(ch, ptr, 6 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // split freed block + umf_result = coarse_split(ch, ptr, alloc_size, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + /* test coarse_merge */ + + umf_result = coarse_alloc(ch, 6 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 6 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + // split (6 * MB) block into (1 * MB) + (5 * MB) + umf_result = coarse_split(ch, ptr, 6 * MB, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 6 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + // split (5 * MB) block into (2 * MB) + (3 * MB) + umf_result = coarse_split(ch, (ptr + 1 * MB), 5 * MB, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 6 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 4); + + // now we have 3 used blocks: (1 * MB) + (2 * MB) + (3 * MB) + + // highPtr <= lowPtr + umf_result = coarse_merge(ch, (ptr + 1 * MB), ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // highPtr - lowPtr >= totalSize + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB), 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // low ptr does not exist + umf_result = coarse_merge(ch, ptr + 1, (ptr + 1 * MB), 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // high ptr does not exist + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB + 1), 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // low_block->size + high_block->size != totalSize + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB), 5 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // not adjacent blocks + umf_result = coarse_merge(ch, ptr, (ptr + 3 * MB), 4 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // free the 2 MB block in the middle + umf_result = coarse_free(ch, (ptr + 1 * MB), 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 4 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 4); + + // now we have 3 blocks: (1 * MB) used + (2 * MB) freed + (3 * MB) used + + // the low ptr block is not allocated + umf_result = coarse_merge(ch, (ptr + 1 * MB), (ptr + 3 * MB), 5 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + // the high ptr block is not allocated + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB), 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = coarse_free(ch, ptr, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 3 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_free(ch, (ptr + 3 * MB), 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(coarse_handle); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_alloc_cb_fails) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + coarse_params.cb.alloc = alloc_cb_fails; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t alloc_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_free_cb_fails) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + coarse_params.cb.free = free_cb_fails; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t alloc_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_split_cb_fails) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + coarse_params.cb.split = split_cb_fails; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + void *ptr = nullptr; + const size_t alloc_size = 20 * MB; + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // coarse_alloc(alloc_size / 2, alignment = 0) + umf_result = coarse_alloc(ch, alloc_size / 2, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_USER_SPECIFIC); + ASSERT_EQ(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // coarse_alloc(alloc_size / 2, alignment = 2 * MB) + umf_result = coarse_alloc(ch, alloc_size / 2, 2 * MB, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_USER_SPECIFIC); + ASSERT_EQ(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // coarse_alloc(alloc_size, alignment = 0) - OK + umf_result = coarse_alloc(ch, alloc_size, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + umf_result = coarse_split(ch, ptr, alloc_size, alloc_size / 2); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_USER_SPECIFIC); + + ASSERT_EQ(coarse_get_stats(ch).used_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + umf_result = coarse_free(ch, ptr, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(coarse_handle); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_merge_cb_fails) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 10 * MB; + std::vector buffer(buff_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; + coarse_params.cb.merge = merge_cb_fails; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + /* test coarse_merge */ + umf_result = coarse_alloc(ch, 3 * MB, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 3 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_split(ch, ptr, 3 * MB, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 3 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_merge(ch, ptr, (ptr + 1 * MB), 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_USER_SPECIFIC); + ASSERT_EQ(coarse_get_stats(ch).used_size, 3 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_free(ch, ptr, 3 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_get_stats(ch).used_size, 3 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_free(ch, ptr, 1 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_free(ch, (ptr + 1 * MB), 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + coarse_delete(coarse_handle); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_fixed_memory_alloc_set) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB; + std::vector buffer(buff_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.free = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + coarse_delete(ch); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_fixed_memory_free_set) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB; + std::vector buffer(buff_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.alloc = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + coarse_delete(ch); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_fixed_memory_alloc_free_set) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB; + std::vector buffer(buff_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_add_memory_fixed(ch, buf, buff_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + coarse_delete(ch); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_provider_alloc_not_set) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + coarse_params.cb.alloc = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t alloc_size = 20 * MB; + void *ptr; + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_alloc(ch, 2 * MB, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY); + ASSERT_EQ(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + umf_result = coarse_alloc(ch, 2 * MB, 2 * MB, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY); + ASSERT_EQ(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 0); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t init_buffer_size = 20 * MB; + void *p1, *p2; + + umf_result = coarse_add_memory_from_provider(ch, init_buffer_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // alloc 2x 2MB + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p1, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&p2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p2, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 4 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + ASSERT_NE(p1, p2); + + // swap pointers to get p1 < p2 + if (p1 > p2) { + std::swap(p1, p2); + } + + // free + alloc first block + // the block should be reused + // currently there is no purging, so the alloc size shouldn't change + // there should be no block merging between used and not-used blocks + umf_result = coarse_free(ch, p1, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p1, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 4 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + + // free all allocs + // overall alloc size shouldn't change + // block p2 should merge with the prev free block p1 + // and the remaining init block + umf_result = coarse_free(ch, p1, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + umf_result = coarse_free(ch, p2, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // test allocations with alignment + // TODO: what about holes? + umf_result = coarse_alloc(ch, 1 * MB - 4, 128, (void **)&p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p1, nullptr); + ASSERT_EQ((uintptr_t)p1 & 127, 0); + + umf_result = coarse_alloc(ch, 1 * MB - 4, 128, (void **)&p2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p2, nullptr); + ASSERT_EQ((uintptr_t)p2 & 127, 0); + + umf_result = coarse_free(ch, p1, 1 * MB - 4); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umf_result = coarse_free(ch, p2, 1 * MB - 4); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // alloc whole buffer + // after this, there should be one single block + umf_result = coarse_alloc(ch, init_buffer_size, 0, (void **)&p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p1, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // free all memory + umf_result = coarse_free(ch, p1, init_buffer_size); + + // alloc 2 MB block - the init block should be split + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&p1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p1, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 2); + + // alloc additional 2 MB + // the non-used block should be used + umf_result = coarse_alloc(ch, 2 * MB, 0, (void **)&p2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(p2, nullptr); + ASSERT_EQ(coarse_get_stats(ch).used_size, 4 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 3); + ASSERT_NE(p1, p2); + + // make sure that p1 < p2 + if (p1 > p2) { + std::swap(p1, p2); + } + + // free blocks in order: p2, p1 + // block p1 should merge with the next block p2 + // swap pointers to get p1 < p2 + coarse_free(ch, p2, 2 * MB); + coarse_free(ch, p1, 2 * MB); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // alloc 10x 2 MB - this should occupy all allocated memory + constexpr int allocs_size = 10; + void *allocs[allocs_size] = {0}; + for (int i = 0; i < allocs_size; i++) { + ASSERT_EQ(coarse_get_stats(ch).used_size, i * 2 * MB); + umf_result = coarse_alloc(ch, 2 * MB, 0, &allocs[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(allocs[i], nullptr); + } + ASSERT_EQ(coarse_get_stats(ch).used_size, 20 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + // there should be no block with the free memory + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, allocs_size); + + // free all memory + for (int i = 0; i < allocs_size; i++) { + umf_result = coarse_free(ch, allocs[i], 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_simple1) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t init_buffer_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, init_buffer_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // test 1 + + size_t s1 = 74659 * KB; + size_t s2 = 8206 * KB; + + size_t max_alloc_size = 0; + + const int nreps = 2; + const int nptrs = 6; + + // s1 + for (int j = 0; j < nreps; j++) { + void *t[nptrs] = {0}; + for (int i = 0; i < nptrs; i++) { + umf_result = coarse_alloc(ch, s1, 0, &t[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(t[i], nullptr); + } + + if (max_alloc_size == 0) { + max_alloc_size = coarse_get_stats(ch).alloc_size; + } + + for (int i = 0; i < nptrs; i++) { + umf_result = coarse_free(ch, t[i], s1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + // s2 + for (int j = 0; j < nreps; j++) { + void *t[nptrs] = {0}; + for (int i = 0; i < nptrs; i++) { + umf_result = coarse_alloc(ch, s2, 0, &t[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(t[i], nullptr); + } + + // all s2 should fit into single block leaved after freeing s1 + ASSERT_LE(coarse_get_stats(ch).alloc_size, max_alloc_size); + + for (int i = 0; i < nptrs; i++) { + umf_result = coarse_free(ch, t[i], s2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_simple2) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t init_buffer_size = 20 * MB; + + umf_result = coarse_add_memory_from_provider(ch, init_buffer_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, init_buffer_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + // test + double sizes[] = {2, 4, 0.5, 1, 8, 0.25}; + size_t alignment[] = {0, 4, 0, 16, 32, 128}; + for (int i = 0; i < 6; i++) { + size_t s = (size_t)(sizes[i] * MB); + void *t[8] = {0}; + for (int j = 0; j < 8; j++) { + umf_result = coarse_alloc(ch, s, alignment[i], &t[j]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(t[j], nullptr); + } + + for (int j = 0; j < 8; j++) { + umf_result = coarse_free(ch, t[j], s); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + } + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_alignment_provider) { + umf_memory_provider_handle_t malloc_memory_provider; + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, + &malloc_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(malloc_memory_provider, nullptr); + + coarse_params.provider = malloc_memory_provider; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + const size_t alloc_size = 40 * MB; + + umf_result = coarse_add_memory_from_provider(ch, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + const int niter = 10; + const int size = 1 * MB; + void *ptr[niter] = {0}; + + for (int i = 0; i < niter; i++) { + umf_result = coarse_alloc(ch, size, 0, &ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr[i], nullptr); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, niter + 1); + + for (int i = 0; i < niter; i += 2) { + umf_result = coarse_free(ch, ptr[i], size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ptr[i] = nullptr; + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size / 2); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, niter + 1); + + for (int i = 0; i < niter; i += 2) { + ASSERT_EQ(ptr[i], nullptr); + umf_result = coarse_alloc(ch, size, 2 * MB, &ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr[i], nullptr); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size); + + for (int i = 0; i < niter; i++) { + umf_result = coarse_free(ch, ptr[i], size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); + umfMemoryProviderDestroy(malloc_memory_provider); +} + +TEST_P(CoarseWithMemoryStrategyTest, coarseTest_alignment_fixed_memory) { + // preallocate some memory and initialize the vector with zeros + const size_t alloc_size = 40 * MB; + std::vector buffer(alloc_size, 0); + void *buf = (void *)buffer.data(); + ASSERT_NE(buf, nullptr); + + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + + umf_result = coarse_add_memory_fixed(ch, buf, alloc_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + const int niter = 10; + const int size = 1 * MB; + void *ptr[niter] = {0}; + + for (int i = 0; i < niter; i++) { + umf_result = coarse_alloc(ch, size, 0, &ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr[i], nullptr); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, niter + 1); + + for (int i = 0; i < niter; i += 2) { + umf_result = coarse_free(ch, ptr[i], size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ptr[i] = nullptr; + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size / 2); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, niter + 1); + + for (int i = 0; i < niter; i += 2) { + ASSERT_EQ(ptr[i], nullptr); + umf_result = coarse_alloc(ch, size, 2 * MB, &ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr[i], nullptr); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, niter * size); + + for (int i = 0; i < niter; i++) { + umf_result = coarse_free(ch, ptr[i], size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); +} From aff5982afcd53455fa881575059b7a4ed9f8bc56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Mon, 2 Dec 2024 17:06:11 +0100 Subject: [PATCH 009/466] Bump version on main to 0.11.0 --- .github/workflows/reusable_basic.yml | 4 ++-- RELEASE_STEPS.md | 2 +- include/umf/base.h | 2 +- scripts/docs_config/conf.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 1c13a771b..9b71c7d1b 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -7,8 +7,8 @@ permissions: contents: read env: - # for installation testing - it should match with version set in CMake - UMF_VERSION: 0.10.0 + # for installation testing - it should match with version set in git + UMF_VERSION: 0.11.0 BUILD_DIR : "${{github.workspace}}/build" INSTL_DIR : "${{github.workspace}}/../install-dir" COVERAGE_DIR : "${{github.workspace}}/coverage" diff --git a/RELEASE_STEPS.md b/RELEASE_STEPS.md index e88ca9c2d..fb46f156b 100644 --- a/RELEASE_STEPS.md +++ b/RELEASE_STEPS.md @@ -42,7 +42,7 @@ Do changes for a release: - Update project's version in a few places: - For major and minor releases: `UMF_VERSION_CURRENT` in `include/umf/base.h` (the API version) - `release` variable in `scripts/docs_config/conf.py` (for docs) - - `UMF_VERSION` variable in `.github/workflows/basic.yml` (for installation test) + - `UMF_VERSION` variable in `.github/workflows/reusable_basic.yml` (for installation test) - For major releases update ABI version in `.map` and `.def` files - These files are defined for all public libraries (`libumf` and `proxy_lib`, at the moment) - Commit these changes and tag the release: diff --git a/include/umf/base.h b/include/umf/base.h index 53378195d..32d84771f 100644 --- a/include/umf/base.h +++ b/include/umf/base.h @@ -28,7 +28,7 @@ extern "C" { #define UMF_MINOR_VERSION(_ver) (_ver & 0x0000ffff) /// @brief Current version of the UMF headers -#define UMF_VERSION_CURRENT UMF_MAKE_VERSION(0, 10) +#define UMF_VERSION_CURRENT UMF_MAKE_VERSION(0, 11) /// @brief Operation results typedef enum umf_result_t { diff --git a/scripts/docs_config/conf.py b/scripts/docs_config/conf.py index 28c9b5f9f..577bc0b48 100644 --- a/scripts/docs_config/conf.py +++ b/scripts/docs_config/conf.py @@ -22,7 +22,7 @@ author = "Intel" # The full version, including alpha/beta/rc tags -release = "0.10.0" +release = "0.11.0" # -- General configuration --------------------------------------------------- From dcc1ec1ae4ad932800d160b1ddb1ac61f326f08d Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 3 Dec 2024 10:11:51 +0100 Subject: [PATCH 010/466] Add info about the `PTRACE_MODE_ATTACH_REALCREDS` permission Add info about the `PTRACE_MODE_ATTACH_REALCREDS` permission required by the OS and the L0 providers to `README.md`. Signed-off-by: Lukasz Dorau --- README.md | 31 +++++++++++++++++++++++++++---- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 6f1233c63..3379132e7 100644 --- a/README.md +++ b/README.md @@ -150,6 +150,15 @@ OS memory provider supports two types of memory mappings (set by the `visibility IPC API requires the `UMF_MEM_MAP_SHARED` memory `visibility` mode (`UMF_RESULT_ERROR_INVALID_ARGUMENT` is returned otherwise). +IPC API uses the file descriptor duplication. It requires using `pidfd_getfd(2)` to obtain +a duplicate of another process's file descriptor (`pidfd_getfd(2)` is supported since Linux 5.6). +Permission to duplicate another process's file descriptor is governed by a ptrace access mode +`PTRACE_MODE_ATTACH_REALCREDS` check (see `ptrace(2)`) that can be changed using +the `/proc/sys/kernel/yama/ptrace_scope` interface in the following way: +```sh +$ sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" +``` + There are available two mechanisms for the shared memory mapping: 1) a named shared memory object (used if the `shm_name` parameter is not NULL) or 2) an anonymous file descriptor (used if the `shm_name` parameter is NULL) @@ -162,23 +171,37 @@ An anonymous file descriptor for the shared memory mapping will be created using ##### Requirements -Required packages for tests (Linux-only yet): +IPC API on Linux requires the `PTRACE_MODE_ATTACH_REALCREDS` permission (see `ptrace(2)`) +to duplicate another process's file descriptor (see above). + +Packages required for tests (Linux-only yet): - libnuma-dev #### Level Zero memory provider A memory provider that provides memory from L0 device. +IPC API uses the file descriptor duplication. It requires using `pidfd_getfd(2)` to obtain +a duplicate of another process's file descriptor (`pidfd_getfd(2)` is supported since Linux 5.6). +Permission to duplicate another process's file descriptor is governed by a ptrace access mode +`PTRACE_MODE_ATTACH_REALCREDS` check (see `ptrace(2)`) that can be changed using +the `/proc/sys/kernel/yama/ptrace_scope` interface in the following way: +```sh +$ sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" +``` + ##### Requirements 1) Linux or Windows OS 2) The `UMF_BUILD_LEVEL_ZERO_PROVIDER` option turned `ON` (by default) +3) IPC API on Linux requires the `PTRACE_MODE_ATTACH_REALCREDS` permission (see `ptrace(2)`) + to duplicate another process's file descriptor (see above). Additionally, required for tests: -3) The `UMF_BUILD_GPU_TESTS` option turned `ON` -4) System with Level Zero compatible GPU -5) Required packages: +4) The `UMF_BUILD_GPU_TESTS` option turned `ON` +5) System with Level Zero compatible GPU +6) Required packages: - liblevel-zero-dev (Linux) or level-zero-sdk (Windows) #### DevDax memory provider (Linux only) From 73d012e4d626c08b739d952710f0287ceb0b62ba Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 21 Nov 2024 08:38:47 +0100 Subject: [PATCH 011/466] Use libcoarse in the file provider Signed-off-by: Lukasz Dorau --- src/provider/provider_file_memory.c | 55 ++++++++++++++++++++++++++--- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/src/provider/provider_file_memory.c b/src/provider/provider_file_memory.c index 32383a5ec..9d332fd46 100644 --- a/src/provider/provider_file_memory.c +++ b/src/provider/provider_file_memory.c @@ -63,6 +63,7 @@ umf_result_t umfFileMemoryProviderParamsSetVisibility( #else // !defined(_WIN32) && !defined(UMF_NO_HWLOC) #include "base_alloc_global.h" +#include "coarse.h" #include "critnib.h" #include "libumf.h" #include "utils_common.h" @@ -101,6 +102,8 @@ typedef struct file_memory_provider_t { // It is needed mainly in the get_ipc_handle and open_ipc_handle hooks // to mmap a specific part of a file. critnib *fd_offset_map; + + coarse_t *coarse; // coarse library handle } file_memory_provider_t; // File Memory Provider settings struct @@ -166,6 +169,14 @@ file_translate_params(umf_file_memory_provider_params_t *in_params, return UMF_RESULT_SUCCESS; } +static umf_result_t file_alloc_cb(void *provider, size_t size, size_t alignment, + void **resultPtr); +static umf_result_t file_allocation_split_cb(void *provider, void *ptr, + size_t totalSize, + size_t firstSize); +static umf_result_t file_allocation_merge_cb(void *provider, void *lowPtr, + void *highPtr, size_t totalSize); + static umf_result_t file_initialize(void *params, void **provider) { umf_result_t ret; @@ -233,10 +244,27 @@ static umf_result_t file_initialize(void *params, void **provider) { file_provider->page_size = utils_get_page_size(); } + coarse_params_t coarse_params = {0}; + coarse_params.provider = file_provider; + coarse_params.page_size = file_provider->page_size; + coarse_params.cb.alloc = file_alloc_cb; + coarse_params.cb.free = NULL; // not available for the file provider + coarse_params.cb.split = file_allocation_split_cb; + coarse_params.cb.merge = file_allocation_merge_cb; + + coarse_t *coarse = NULL; + ret = coarse_new(&coarse_params, &coarse); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("coarse_new() failed"); + goto err_close_fd; + } + + file_provider->coarse = coarse; + if (utils_mutex_init(&file_provider->lock) == NULL) { LOG_ERR("lock init failed"); ret = UMF_RESULT_ERROR_UNKNOWN; - goto err_close_fd; + goto err_coarse_delete; } file_provider->fd_offset_map = critnib_new(); @@ -261,6 +289,8 @@ static umf_result_t file_initialize(void *params, void **provider) { critnib_delete(file_provider->fd_offset_map); err_mutex_destroy_not_free: utils_mutex_destroy_not_free(&file_provider->lock); +err_coarse_delete: + coarse_delete(file_provider->coarse); err_close_fd: utils_close_fd(file_provider->fd); err_free_file_provider: @@ -285,6 +315,7 @@ static void file_finalize(void *provider) { utils_close_fd(file_provider->fd); critnib_delete(file_provider->fd_offset_map); critnib_delete(file_provider->mmaps); + coarse_delete(file_provider->coarse); umf_ba_global_free(file_provider); } @@ -443,6 +474,12 @@ static umf_result_t file_alloc_aligned(file_memory_provider_t *file_provider, static umf_result_t file_alloc(void *provider, size_t size, size_t alignment, void **resultPtr) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + return coarse_alloc(file_provider->coarse, size, alignment, resultPtr); +} + +static umf_result_t file_alloc_cb(void *provider, size_t size, size_t alignment, + void **resultPtr) { umf_result_t umf_result; int ret; @@ -568,10 +605,15 @@ static const char *file_get_name(void *provider) { return "FILE"; } -// This function is supposed to be thread-safe, so it should NOT be called concurrently -// with file_allocation_merge() with the same pointer. static umf_result_t file_allocation_split(void *provider, void *ptr, size_t totalSize, size_t firstSize) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + return coarse_split(file_provider->coarse, ptr, totalSize, firstSize); +} + +static umf_result_t file_allocation_split_cb(void *provider, void *ptr, + size_t totalSize, + size_t firstSize) { (void)totalSize; file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; @@ -601,9 +643,14 @@ static umf_result_t file_allocation_split(void *provider, void *ptr, return UMF_RESULT_SUCCESS; } -// It should NOT be called concurrently with file_allocation_split() with the same pointer. static umf_result_t file_allocation_merge(void *provider, void *lowPtr, void *highPtr, size_t totalSize) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + return coarse_merge(file_provider->coarse, lowPtr, highPtr, totalSize); +} + +static umf_result_t file_allocation_merge_cb(void *provider, void *lowPtr, + void *highPtr, size_t totalSize) { (void)lowPtr; (void)totalSize; From f81c64702db468a9bde7d13d2d2df039fe04e7e4 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 25 Nov 2024 10:05:23 +0100 Subject: [PATCH 012/466] Add free() op to the file provider Signed-off-by: Lukasz Dorau --- src/provider/provider_file_memory.c | 6 ++++++ test/provider_file_memory.cpp | 18 +++++++++--------- test/provider_file_memory_ipc.cpp | 2 +- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/provider/provider_file_memory.c b/src/provider/provider_file_memory.c index 9d332fd46..558b1062a 100644 --- a/src/provider/provider_file_memory.c +++ b/src/provider/provider_file_memory.c @@ -815,6 +815,11 @@ static umf_result_t file_close_ipc_handle(void *provider, void *ptr, return UMF_RESULT_SUCCESS; } +static umf_result_t file_free(void *provider, void *ptr, size_t size) { + file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + return coarse_free(file_provider->coarse, ptr, size); +} + static umf_memory_provider_ops_t UMF_FILE_MEMORY_PROVIDER_OPS = { .version = UMF_VERSION_CURRENT, .initialize = file_initialize, @@ -824,6 +829,7 @@ static umf_memory_provider_ops_t UMF_FILE_MEMORY_PROVIDER_OPS = { .get_recommended_page_size = file_get_recommended_page_size, .get_min_page_size = file_get_min_page_size, .get_name = file_get_name, + .ext.free = file_free, .ext.purge_lazy = file_purge_lazy, .ext.purge_force = file_purge_force, .ext.allocation_merge = file_allocation_merge, diff --git a/test/provider_file_memory.cpp b/test/provider_file_memory.cpp index d3124aa11..0d54c287c 100644 --- a/test/provider_file_memory.cpp +++ b/test/provider_file_memory.cpp @@ -98,7 +98,7 @@ static void test_alloc_free_success(umf_memory_provider_handle_t provider, } umf_result = umfMemoryProviderFree(provider, ptr, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } static void verify_last_native_error(umf_memory_provider_handle_t provider, @@ -159,7 +159,7 @@ TEST_F(test, test_if_mapped_with_MAP_SYNC) { bool flag_found = is_mapped_with_MAP_SYNC(path, buf, size); umf_result = umfMemoryProviderFree(hProvider, buf, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); umfMemoryProviderDestroy(hProvider); @@ -244,10 +244,10 @@ TEST_P(FileProviderParamsDefault, two_allocations) { memset(ptr2, 0x22, size); umf_result = umfMemoryProviderFree(provider.get(), ptr1, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); umf_result = umfMemoryProviderFree(provider.get(), ptr2, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } TEST_P(FileProviderParamsDefault, alloc_page64_align_0) { @@ -366,12 +366,12 @@ TEST_P(FileProviderParamsDefault, get_name) { TEST_P(FileProviderParamsDefault, free_size_0_ptr_not_null) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), INVALID_PTR, 0); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); } TEST_P(FileProviderParamsDefault, free_NULL) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), nullptr, 0); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } // other negative tests @@ -449,7 +449,7 @@ TEST_F(test, set_null_path) { TEST_P(FileProviderParamsDefault, free_INVALID_POINTER_SIZE_GT_0) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), INVALID_PTR, page_plus_64); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); } TEST_P(FileProviderParamsDefault, purge_lazy_INVALID_POINTER) { @@ -512,7 +512,7 @@ TEST_P(FileProviderParamsShared, IPC_base_success_test) { ASSERT_EQ(ret, 0); umf_result = umfMemoryProviderFree(provider.get(), ptr, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } TEST_P(FileProviderParamsShared, IPC_file_not_exist) { @@ -552,5 +552,5 @@ TEST_P(FileProviderParamsShared, IPC_file_not_exist) { ASSERT_EQ(new_ptr, nullptr); umf_result = umfMemoryProviderFree(provider.get(), ptr, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } diff --git a/test/provider_file_memory_ipc.cpp b/test/provider_file_memory_ipc.cpp index ee7ab6c8f..115322a47 100644 --- a/test/provider_file_memory_ipc.cpp +++ b/test/provider_file_memory_ipc.cpp @@ -73,7 +73,7 @@ HostMemoryAccessor hostAccessor; static std::vector ipcManyPoolsTestParamsList = { // TODO: enable it when sizes of allocations in ipcFixtures.hpp are fixed // {umfProxyPoolOps(), nullptr, umfFileMemoryProviderOps(), -// file_params_shared.get(), &hostAccessor, true}, +// file_params_shared.get(), &hostAccessor, false}, #ifdef UMF_POOL_JEMALLOC_ENABLED {umfJemallocPoolOps(), nullptr, umfFileMemoryProviderOps(), file_params_shared.get(), &hostAccessor, false}, From a27c45fa76ef8d0528e98b818b32e87a8a52efad Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 3 Dec 2024 11:46:55 +0100 Subject: [PATCH 013/466] Use libcoarse in the devdax provider Signed-off-by: Lukasz Dorau --- src/provider/provider_devdax_memory.c | 141 ++++++++++++-------------- test/provider_devdax_memory.cpp | 11 +- 2 files changed, 70 insertions(+), 82 deletions(-) diff --git a/src/provider/provider_devdax_memory.c b/src/provider/provider_devdax_memory.c index 32407acbb..c013c7ffb 100644 --- a/src/provider/provider_devdax_memory.c +++ b/src/provider/provider_devdax_memory.c @@ -58,6 +58,7 @@ umf_result_t umfDevDaxMemoryProviderParamsSetProtection( #else // !defined(_WIN32) && !defined(UMF_NO_HWLOC) #include "base_alloc_global.h" +#include "coarse.h" #include "libumf.h" #include "utils_common.h" #include "utils_concurrency.h" @@ -74,6 +75,7 @@ typedef struct devdax_memory_provider_t { size_t offset; // offset in the file used for memory mapping utils_mutex_t lock; // lock of ptr and offset unsigned protection; // combination of OS-specific protection flags + coarse_t *coarse; // coarse library handle } devdax_memory_provider_t; // DevDax Memory provider settings struct @@ -133,6 +135,12 @@ devdax_translate_params(umf_devdax_memory_provider_params_t *in_params, return UMF_RESULT_SUCCESS; } +static umf_result_t devdax_allocation_split_cb(void *provider, void *ptr, + size_t totalSize, + size_t firstSize); +static umf_result_t devdax_allocation_merge_cb(void *provider, void *lowPtr, + void *highPtr, size_t totalSize); + static umf_result_t devdax_initialize(void *params, void **provider) { umf_result_t ret; @@ -161,21 +169,42 @@ static umf_result_t devdax_initialize(void *params, void **provider) { memset(devdax_provider, 0, sizeof(*devdax_provider)); - ret = devdax_translate_params(in_params, devdax_provider); + coarse_params_t coarse_params = {0}; + coarse_params.provider = devdax_provider; + coarse_params.page_size = DEVDAX_PAGE_SIZE_2MB; + // The alloc callback is not available in case of the devdax provider + // because it is a fixed-size memory provider + // and the entire devdax memory is added as a single block + // to the coarse library. + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; // not available for the devdax provider + coarse_params.cb.split = devdax_allocation_split_cb; + coarse_params.cb.merge = devdax_allocation_merge_cb; + + coarse_t *coarse = NULL; + ret = coarse_new(&coarse_params, &coarse); if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("coarse_new() failed"); goto err_free_devdax_provider; } + devdax_provider->coarse = coarse; + + ret = devdax_translate_params(in_params, devdax_provider); + if (ret != UMF_RESULT_SUCCESS) { + goto err_coarse_delete; + } + devdax_provider->size = in_params->size; if (utils_copy_path(in_params->path, devdax_provider->path, PATH_MAX)) { - goto err_free_devdax_provider; + goto err_coarse_delete; } int fd = utils_devdax_open(in_params->path); if (fd == -1) { LOG_ERR("cannot open the device DAX: %s", in_params->path); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_free_devdax_provider; + goto err_coarse_delete; } bool is_dax = false; @@ -189,23 +218,26 @@ static umf_result_t devdax_initialize(void *params, void **provider) { LOG_PDEBUG("mapping the devdax failed (path=%s, size=%zu)", in_params->path, devdax_provider->size); ret = UMF_RESULT_ERROR_UNKNOWN; - goto err_free_devdax_provider; + goto err_coarse_delete; } if (!is_dax) { LOG_ERR("mapping the devdax with MAP_SYNC failed: %s", in_params->path); ret = UMF_RESULT_ERROR_UNKNOWN; - - if (devdax_provider->base) { - utils_munmap(devdax_provider->base, devdax_provider->size); - } - - goto err_free_devdax_provider; + goto err_unmap_devdax; } LOG_DEBUG("devdax memory mapped (path=%s, size=%zu, addr=%p)", in_params->path, devdax_provider->size, devdax_provider->base); + // add the entire devdax memory as a single block + ret = coarse_add_memory_fixed(coarse, devdax_provider->base, + devdax_provider->size); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("adding memory block failed"); + goto err_unmap_devdax; + } + if (utils_mutex_init(&devdax_provider->lock) == NULL) { LOG_ERR("lock init failed"); ret = UMF_RESULT_ERROR_UNKNOWN; @@ -217,7 +249,11 @@ static umf_result_t devdax_initialize(void *params, void **provider) { return UMF_RESULT_SUCCESS; err_unmap_devdax: - utils_munmap(devdax_provider->base, devdax_provider->size); + if (devdax_provider->base) { + utils_munmap(devdax_provider->base, devdax_provider->size); + } +err_coarse_delete: + coarse_delete(devdax_provider->coarse); err_free_devdax_provider: umf_ba_global_free(devdax_provider); return ret; @@ -227,78 +263,15 @@ static void devdax_finalize(void *provider) { devdax_memory_provider_t *devdax_provider = provider; utils_mutex_destroy_not_free(&devdax_provider->lock); utils_munmap(devdax_provider->base, devdax_provider->size); + coarse_delete(devdax_provider->coarse); umf_ba_global_free(devdax_provider); } -static int devdax_alloc_aligned(size_t length, size_t alignment, void *base, - size_t size, utils_mutex_t *lock, - void **out_addr, size_t *offset) { - assert(out_addr); - - if (utils_mutex_lock(lock)) { - LOG_ERR("locking file offset failed"); - return -1; - } - - uintptr_t ptr = (uintptr_t)base + *offset; - uintptr_t rest_of_div = alignment ? (ptr % alignment) : 0; - - if (alignment > 0 && rest_of_div > 0) { - ptr += alignment - rest_of_div; - } - - size_t new_offset = ptr - (uintptr_t)base + length; - - if (new_offset > size) { - utils_mutex_unlock(lock); - LOG_ERR("cannot allocate more memory than the device DAX size: %zu", - size); - return -1; - } - - *offset = new_offset; - *out_addr = (void *)ptr; - - utils_mutex_unlock(lock); - - return 0; -} - static umf_result_t devdax_alloc(void *provider, size_t size, size_t alignment, void **resultPtr) { - int ret; - - // alignment must be a power of two and a multiple or a divider of the page size - if (alignment && ((alignment & (alignment - 1)) || - ((alignment % DEVDAX_PAGE_SIZE_2MB) && - (DEVDAX_PAGE_SIZE_2MB % alignment)))) { - LOG_ERR("wrong alignment: %zu (not a power of 2 or a multiple or a " - "divider of the page size (%zu))", - alignment, DEVDAX_PAGE_SIZE_2MB); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (IS_NOT_ALIGNED(alignment, DEVDAX_PAGE_SIZE_2MB)) { - alignment = ALIGN_UP(alignment, DEVDAX_PAGE_SIZE_2MB); - } - devdax_memory_provider_t *devdax_provider = (devdax_memory_provider_t *)provider; - - void *addr = NULL; - errno = 0; - ret = devdax_alloc_aligned(size, alignment, devdax_provider->base, - devdax_provider->size, &devdax_provider->lock, - &addr, &devdax_provider->offset); - if (ret) { - devdax_store_last_native_error(UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED, 0); - LOG_ERR("memory allocation failed"); - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; - } - - *resultPtr = addr; - - return UMF_RESULT_SUCCESS; + return coarse_alloc(devdax_provider->coarse, size, alignment, resultPtr); } static void devdax_get_last_native_error(void *provider, const char **ppMessage, @@ -384,6 +357,14 @@ static const char *devdax_get_name(void *provider) { static umf_result_t devdax_allocation_split(void *provider, void *ptr, size_t totalSize, size_t firstSize) { + devdax_memory_provider_t *devdax_provider = + (devdax_memory_provider_t *)provider; + return coarse_split(devdax_provider->coarse, ptr, totalSize, firstSize); +} + +static umf_result_t devdax_allocation_split_cb(void *provider, void *ptr, + size_t totalSize, + size_t firstSize) { (void)provider; (void)ptr; (void)totalSize; @@ -393,6 +374,14 @@ static umf_result_t devdax_allocation_split(void *provider, void *ptr, static umf_result_t devdax_allocation_merge(void *provider, void *lowPtr, void *highPtr, size_t totalSize) { + devdax_memory_provider_t *devdax_provider = + (devdax_memory_provider_t *)provider; + return coarse_merge(devdax_provider->coarse, lowPtr, highPtr, totalSize); +} + +static umf_result_t devdax_allocation_merge_cb(void *provider, void *lowPtr, + void *highPtr, + size_t totalSize) { (void)provider; (void)lowPtr; (void)highPtr; diff --git a/test/provider_devdax_memory.cpp b/test/provider_devdax_memory.cpp index 0fd0705da..bb00f205a 100644 --- a/test/provider_devdax_memory.cpp +++ b/test/provider_devdax_memory.cpp @@ -237,30 +237,29 @@ TEST_P(umfProviderTest, purge_force) { TEST_P(umfProviderTest, alloc_page64_align_page_minus_1_WRONG_ALIGNMENT_1) { test_alloc_failure(provider.get(), page_plus_64, page_size - 1, - UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); } TEST_P(umfProviderTest, alloc_page64_align_one_half_pages_WRONG_ALIGNMENT_2) { test_alloc_failure(provider.get(), page_plus_64, page_size + (page_size / 2), - UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); } TEST_P(umfProviderTest, alloc_page64_WRONG_ALIGNMENT_3_pages) { test_alloc_failure(provider.get(), page_plus_64, 3 * page_size, - UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); } TEST_P(umfProviderTest, alloc_3_pages_WRONG_ALIGNMENT_3_pages) { test_alloc_failure(provider.get(), 3 * page_size, 3 * page_size, - UMF_RESULT_ERROR_INVALID_ARGUMENT, 0); + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); } TEST_P(umfProviderTest, alloc_WRONG_SIZE) { size_t size = (size_t)(-1) & ~(page_size - 1); test_alloc_failure(provider.get(), size, 0, - UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC, - UMF_DEVDAX_RESULT_ERROR_ALLOC_FAILED); + UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY, 0); } // other positive tests From 96605c4a3a962d698c5871cf40ae861585abfc7e Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 25 Nov 2024 10:15:34 +0100 Subject: [PATCH 014/466] Add free() to the devdax provider --- src/provider/provider_devdax_memory.c | 7 +++++++ test/provider_devdax_memory.cpp | 10 +++++----- test/provider_devdax_memory_ipc.cpp | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/src/provider/provider_devdax_memory.c b/src/provider/provider_devdax_memory.c index c013c7ffb..463b796ec 100644 --- a/src/provider/provider_devdax_memory.c +++ b/src/provider/provider_devdax_memory.c @@ -516,6 +516,12 @@ static umf_result_t devdax_close_ipc_handle(void *provider, void *ptr, return UMF_RESULT_SUCCESS; } +static umf_result_t devdax_free(void *provider, void *ptr, size_t size) { + devdax_memory_provider_t *devdax_provider = + (devdax_memory_provider_t *)provider; + return coarse_free(devdax_provider->coarse, ptr, size); +} + static umf_memory_provider_ops_t UMF_DEVDAX_MEMORY_PROVIDER_OPS = { .version = UMF_VERSION_CURRENT, .initialize = devdax_initialize, @@ -525,6 +531,7 @@ static umf_memory_provider_ops_t UMF_DEVDAX_MEMORY_PROVIDER_OPS = { .get_recommended_page_size = devdax_get_recommended_page_size, .get_min_page_size = devdax_get_min_page_size, .get_name = devdax_get_name, + .ext.free = devdax_free, .ext.purge_lazy = devdax_purge_lazy, .ext.purge_force = devdax_purge_force, .ext.allocation_merge = devdax_allocation_merge, diff --git a/test/provider_devdax_memory.cpp b/test/provider_devdax_memory.cpp index bb00f205a..afff1de4f 100644 --- a/test/provider_devdax_memory.cpp +++ b/test/provider_devdax_memory.cpp @@ -100,7 +100,7 @@ static void test_alloc_free_success(umf_memory_provider_handle_t provider, } umf_result = umfMemoryProviderFree(provider, ptr, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } static void verify_last_native_error(umf_memory_provider_handle_t provider, @@ -162,7 +162,7 @@ TEST_F(test, test_if_mapped_with_MAP_SYNC) { bool flag_found = is_mapped_with_MAP_SYNC(path, buf, size); umf_result = umfMemoryProviderFree(hProvider, buf, size); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); umfMemoryProviderDestroy(hProvider); @@ -294,12 +294,12 @@ TEST_P(umfProviderTest, get_name) { TEST_P(umfProviderTest, free_size_0_ptr_not_null) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), INVALID_PTR, 0); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); } TEST_P(umfProviderTest, free_NULL) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), nullptr, 0); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } // other negative tests @@ -307,7 +307,7 @@ TEST_P(umfProviderTest, free_NULL) { TEST_P(umfProviderTest, free_INVALID_POINTER_SIZE_GT_0) { umf_result_t umf_result = umfMemoryProviderFree(provider.get(), INVALID_PTR, page_plus_64); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); } TEST_P(umfProviderTest, purge_lazy_INVALID_POINTER) { diff --git a/test/provider_devdax_memory_ipc.cpp b/test/provider_devdax_memory_ipc.cpp index 3941f66e9..921347f40 100644 --- a/test/provider_devdax_memory_ipc.cpp +++ b/test/provider_devdax_memory_ipc.cpp @@ -53,7 +53,7 @@ static std::vector getIpcProxyPoolTestParamsList(void) { ipcProxyPoolTestParamsList = { {umfProxyPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - defaultDevDaxParams.get(), &hostAccessor, true}, + defaultDevDaxParams.get(), &hostAccessor, false}, #ifdef UMF_POOL_JEMALLOC_ENABLED {umfJemallocPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), defaultDevDaxParams.get(), &hostAccessor, false}, From d9f1feec2d314784b7bd2da0281fd4411544cf9d Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 3 Dec 2024 11:47:23 +0100 Subject: [PATCH 015/466] Remove free_not_supp from the ipcTestParams tuple Remove free_not_supp from the ipcTestParams tuple. It is not needed any more. Signed-off-by: Lukasz Dorau --- test/ipcAPI.cpp | 2 +- test/ipcFixtures.hpp | 44 +++++++------------------- test/provider_devdax_memory_ipc.cpp | 6 ++-- test/provider_file_memory_ipc.cpp | 12 +++---- test/provider_os_memory.cpp | 4 +-- test/providers/provider_level_zero.cpp | 10 +++--- 6 files changed, 29 insertions(+), 49 deletions(-) diff --git a/test/ipcAPI.cpp b/test/ipcAPI.cpp index 4df32a1c9..aa22f353d 100644 --- a/test/ipcAPI.cpp +++ b/test/ipcAPI.cpp @@ -116,4 +116,4 @@ HostMemoryAccessor hostMemoryAccessor; INSTANTIATE_TEST_SUITE_P(umfIpcTestSuite, umfIpcTest, ::testing::Values(ipcTestParams{ umfProxyPoolOps(), nullptr, &IPC_MOCK_PROVIDER_OPS, - nullptr, &hostMemoryAccessor, false})); + nullptr, &hostMemoryAccessor})); diff --git a/test/ipcFixtures.hpp b/test/ipcFixtures.hpp index 161a84844..6fbacfa22 100644 --- a/test/ipcFixtures.hpp +++ b/test/ipcFixtures.hpp @@ -47,25 +47,23 @@ class HostMemoryAccessor : public MemoryAccessor { }; // ipcTestParams: -// pool_ops, pool_params, provider_ops, provider_params, memoryAccessor, free_not_supp -// free_not_supp (bool) - provider does not support the free() op +// pool_ops, pool_params, provider_ops, provider_params, memoryAccessor using ipcTestParams = std::tuple; + void *, MemoryAccessor *>; struct umfIpcTest : umf_test::test, ::testing::WithParamInterface { umfIpcTest() {} void SetUp() override { test::SetUp(); - auto [pool_ops, pool_params, provider_ops, provider_params, accessor, - free_not_supp] = this->GetParam(); + auto [pool_ops, pool_params, provider_ops, provider_params, accessor] = + this->GetParam(); poolOps = pool_ops; poolParams = pool_params; providerOps = provider_ops; providerParams = provider_params; memAccessor = accessor; - freeNotSupported = free_not_supp; } void TearDown() override { test::TearDown(); } @@ -124,18 +122,8 @@ struct umfIpcTest : umf_test::test, void *poolParams = nullptr; umf_memory_provider_ops_t *providerOps = nullptr; void *providerParams = nullptr; - bool freeNotSupported = false; }; -static inline umf_result_t -get_umf_result_of_free(bool freeNotSupported, umf_result_t expected_result) { - if (freeNotSupported) { - return UMF_RESULT_ERROR_NOT_SUPPORTED; - } - - return expected_result; -} - TEST_P(umfIpcTest, GetIPCHandleSize) { size_t size = 0; umf::pool_unique_handle_t pool = makePool(); @@ -177,8 +165,7 @@ TEST_P(umfIpcTest, GetIPCHandleInvalidArgs) { EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); ret = umfFree(ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); } TEST_P(umfIpcTest, CloseIPCHandleInvalidPtr) { @@ -239,8 +226,7 @@ TEST_P(umfIpcTest, BasicFlow) { EXPECT_EQ(ret, UMF_RESULT_SUCCESS); ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); pool.reset(nullptr); EXPECT_EQ(stat.getCount, 1); @@ -303,8 +289,7 @@ TEST_P(umfIpcTest, GetPoolByOpenedHandle) { for (size_t i = 0; i < NUM_ALLOCS; ++i) { umf_result_t ret = umfFree(ptrs[i]); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); } } @@ -330,8 +315,7 @@ TEST_P(umfIpcTest, AllocFreeAllocTest) { EXPECT_EQ(ret, UMF_RESULT_SUCCESS); ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); ptr = umfPoolMalloc(pool.get(), SIZE); ASSERT_NE(ptr, nullptr); @@ -353,8 +337,7 @@ TEST_P(umfIpcTest, AllocFreeAllocTest) { EXPECT_EQ(ret, UMF_RESULT_SUCCESS); ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); pool.reset(nullptr); EXPECT_EQ(stat.getCount, stat.putCount); @@ -405,8 +388,7 @@ TEST_P(umfIpcTest, openInTwoPools) { EXPECT_EQ(ret, UMF_RESULT_SUCCESS); ret = umfPoolFree(pool1.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); pool1.reset(nullptr); pool2.reset(nullptr); @@ -457,8 +439,7 @@ TEST_P(umfIpcTest, ConcurrentGetPutHandles) { for (void *ptr : ptrs) { umf_result_t ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); } pool.reset(nullptr); @@ -520,8 +501,7 @@ TEST_P(umfIpcTest, ConcurrentOpenCloseHandles) { for (void *ptr : ptrs) { umf_result_t ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, - get_umf_result_of_free(freeNotSupported, UMF_RESULT_SUCCESS)); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); } pool.reset(nullptr); diff --git a/test/provider_devdax_memory_ipc.cpp b/test/provider_devdax_memory_ipc.cpp index 921347f40..ed4f1a5f8 100644 --- a/test/provider_devdax_memory_ipc.cpp +++ b/test/provider_devdax_memory_ipc.cpp @@ -53,14 +53,14 @@ static std::vector getIpcProxyPoolTestParamsList(void) { ipcProxyPoolTestParamsList = { {umfProxyPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - defaultDevDaxParams.get(), &hostAccessor, false}, + defaultDevDaxParams.get(), &hostAccessor}, #ifdef UMF_POOL_JEMALLOC_ENABLED {umfJemallocPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - defaultDevDaxParams.get(), &hostAccessor, false}, + defaultDevDaxParams.get(), &hostAccessor}, #endif #ifdef UMF_POOL_SCALABLE_ENABLED {umfScalablePoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - defaultDevDaxParams.get(), &hostAccessor, false}, + defaultDevDaxParams.get(), &hostAccessor}, #endif }; diff --git a/test/provider_file_memory_ipc.cpp b/test/provider_file_memory_ipc.cpp index 115322a47..70c1acd8f 100644 --- a/test/provider_file_memory_ipc.cpp +++ b/test/provider_file_memory_ipc.cpp @@ -73,14 +73,14 @@ HostMemoryAccessor hostAccessor; static std::vector ipcManyPoolsTestParamsList = { // TODO: enable it when sizes of allocations in ipcFixtures.hpp are fixed // {umfProxyPoolOps(), nullptr, umfFileMemoryProviderOps(), -// file_params_shared.get(), &hostAccessor, false}, +// file_params_shared.get(), &hostAccessor}, #ifdef UMF_POOL_JEMALLOC_ENABLED {umfJemallocPoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_shared.get(), &hostAccessor, false}, + file_params_shared.get(), &hostAccessor}, #endif #ifdef UMF_POOL_SCALABLE_ENABLED {umfScalablePoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_shared.get(), &hostAccessor, false}, + file_params_shared.get(), &hostAccessor}, #endif }; @@ -96,14 +96,14 @@ static std::vector getIpcFsDaxTestParamsList(void) { ipcFsDaxTestParamsList = { // TODO: enable it when sizes of allocations in ipcFixtures.hpp are fixed // {umfProxyPoolOps(), nullptr, umfFileMemoryProviderOps(), -// file_params_fsdax.get(), &hostAccessor, true}, +// file_params_fsdax.get(), &hostAccessor}, #ifdef UMF_POOL_JEMALLOC_ENABLED {umfJemallocPoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_fsdax.get(), &hostAccessor, false}, + file_params_fsdax.get(), &hostAccessor}, #endif #ifdef UMF_POOL_SCALABLE_ENABLED {umfScalablePoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_fsdax.get(), &hostAccessor, false}, + file_params_fsdax.get(), &hostAccessor}, #endif }; diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index 57bce46d2..687db0805 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -465,11 +465,11 @@ disjoint_params_unique_handle_t disjointParams = disjointPoolParams(); static std::vector ipcTestParamsList = { #if (defined UMF_POOL_DISJOINT_ENABLED) {umfDisjointPoolOps(), disjointParams.get(), umfOsMemoryProviderOps(), - os_params.get(), &hostAccessor, false}, + os_params.get(), &hostAccessor}, #endif #ifdef UMF_POOL_JEMALLOC_ENABLED {umfJemallocPoolOps(), nullptr, umfOsMemoryProviderOps(), os_params.get(), - &hostAccessor, false}, + &hostAccessor}, #endif }; diff --git a/test/providers/provider_level_zero.cpp b/test/providers/provider_level_zero.cpp index d0584777b..78b5e4847 100644 --- a/test/providers/provider_level_zero.cpp +++ b/test/providers/provider_level_zero.cpp @@ -454,9 +454,9 @@ INSTANTIATE_TEST_SUITE_P( #ifdef _WIN32 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); #else -INSTANTIATE_TEST_SUITE_P( - umfLevelZeroProviderTestSuite, umfIpcTest, - ::testing::Values(ipcTestParams{ - umfProxyPoolOps(), nullptr, umfLevelZeroMemoryProviderOps(), - l0Params_device_memory.get(), &l0Accessor, false})); +INSTANTIATE_TEST_SUITE_P(umfLevelZeroProviderTestSuite, umfIpcTest, + ::testing::Values(ipcTestParams{ + umfProxyPoolOps(), nullptr, + umfLevelZeroMemoryProviderOps(), + l0Params_device_memory.get(), &l0Accessor})); #endif From d8a340e06749ca6943472e6ac33fb443bcdb791e Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 27 Nov 2024 14:25:28 +0100 Subject: [PATCH 016/466] Disable jemalloc pool on RHEL Signed-off-by: Lukasz Dorau --- .github/workflows/reusable_multi_numa.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reusable_multi_numa.yml b/.github/workflows/reusable_multi_numa.yml index c012f3e19..2ccb2d8f3 100644 --- a/.github/workflows/reusable_multi_numa.yml +++ b/.github/workflows/reusable_multi_numa.yml @@ -45,7 +45,7 @@ jobs: -DUMF_BUILD_TESTS=ON -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON - -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=${{ matrix.os == 'rhel-9.1' && 'OFF' || 'ON' }} -DUMF_TESTS_FAIL_ON_SKIP=ON ${{ matrix.build_type == 'Debug' && matrix.os == 'ubuntu-22.04' && '-DUMF_USE_COVERAGE=ON' || '' }} From e95d92edb5faa3aa7ae99e56e3f2dc0a4153e641 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 4 Dec 2024 12:25:20 +0100 Subject: [PATCH 017/466] Link statically with custom jemalloc with disabled initial TLS Link statically with custom jemalloc built from sources with the following non-default options enabled: --with-jemalloc-prefix=je_ - add je_ prefix to all public APIs --disable-cxx - Disable C++ integration. This will cause new and delete operators implementations to be omitted. --disable-initial-exec-tls - Disable the initial-exec TLS model for jemalloc's internal thread-local storage (on those platforms that support explicit settings). This can allow jemalloc to be dynamically loaded after program startup (e.g. using dlopen). Fixes: #891 Fixes: #894 Fixes: #903 Signed-off-by: Lukasz Dorau --- .github/docker/ubuntu-20.04.Dockerfile | 2 +- .github/docker/ubuntu-22.04.Dockerfile | 2 +- .github/workflows/coverity.yml | 2 +- .github/workflows/nightly.yml | 2 +- .github/workflows/reusable_basic.yml | 4 +- .github/workflows/reusable_benchmarks.yml | 2 +- .github/workflows/reusable_codeql.yml | 2 +- .github/workflows/reusable_fast.yml | 4 +- .github/workflows/reusable_proxy_lib.yml | 2 +- .github/workflows/reusable_sanitizers.yml | 2 +- .github/workflows/reusable_valgrind.yml | 2 +- CMakeLists.txt | 94 ++++++++++++++++--- README.md | 16 +++- benchmark/ubench.c | 4 +- examples/CMakeLists.txt | 7 +- examples/cmake/FindJEMALLOC.cmake | 6 +- examples/dram_and_fsdax/CMakeLists.txt | 6 +- scripts/qemu/run-build.sh | 2 +- src/pool/CMakeLists.txt | 5 +- src/pool/pool_jemalloc.c | 10 -- test/CMakeLists.txt | 7 ++ .../drd-umf_test-jemalloc_coarse_devdax.supp | 23 +---- .../drd-umf_test-jemalloc_coarse_file.supp | 23 +---- test/supp/drd-umf_test-jemalloc_pool.supp | 5 +- ...grind-umf_test-jemalloc_coarse_devdax.supp | 23 +---- ...elgrind-umf_test-jemalloc_coarse_file.supp | 23 +---- .../supp/helgrind-umf_test-jemalloc_pool.supp | 5 +- 27 files changed, 148 insertions(+), 137 deletions(-) diff --git a/.github/docker/ubuntu-20.04.Dockerfile b/.github/docker/ubuntu-20.04.Dockerfile index 069deeac9..a6a45a8c1 100644 --- a/.github/docker/ubuntu-20.04.Dockerfile +++ b/.github/docker/ubuntu-20.04.Dockerfile @@ -24,7 +24,6 @@ ARG BASE_DEPS="\ # UMF's dependencies ARG UMF_DEPS="\ - libjemalloc-dev \ libhwloc-dev \ libtbb-dev" @@ -34,6 +33,7 @@ ARG TEST_DEPS="\ # Miscellaneous for our builds/CI (optional) ARG MISC_DEPS="\ + automake \ clang \ g++-7 \ python3-pip \ diff --git a/.github/docker/ubuntu-22.04.Dockerfile b/.github/docker/ubuntu-22.04.Dockerfile index 08d546083..75c71c526 100644 --- a/.github/docker/ubuntu-22.04.Dockerfile +++ b/.github/docker/ubuntu-22.04.Dockerfile @@ -24,7 +24,6 @@ ARG BASE_DEPS="\ # UMF's dependencies ARG UMF_DEPS="\ - libjemalloc-dev \ libhwloc-dev \ libtbb-dev" @@ -34,6 +33,7 @@ ARG TEST_DEPS="\ # Miscellaneous for our builds/CI (optional) ARG MISC_DEPS="\ + automake \ clang \ python3-pip \ sudo \ diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index dfa03fc4f..531a463c7 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -31,7 +31,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y cmake hwloc libhwloc-dev libjemalloc-dev libnuma-dev libtbb-dev + sudo apt-get install -y cmake hwloc libhwloc-dev libnuma-dev libtbb-dev - name: Download Coverity run: | diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 281ae0061..46543fac8 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -67,7 +67,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y cmake hwloc libhwloc-dev libjemalloc-dev libnuma-dev libtbb-dev valgrind + sudo apt-get install -y cmake hwloc libhwloc-dev libnuma-dev libtbb-dev valgrind - name: Configure CMake run: > diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 9b71c7d1b..0d27fb9c3 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -124,7 +124,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y clang cmake libnuma-dev libjemalloc-dev lcov + sudo apt-get install -y clang cmake libnuma-dev lcov - name: Install TBB apt package if: matrix.install_tbb == 'ON' @@ -469,7 +469,7 @@ jobs: python3 -m pip install -r third_party/requirements.txt - name: Install hwloc - run: brew install hwloc jemalloc tbb + run: brew install hwloc tbb automake - name: Configure build run: > diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index 41710029c..ed6a48294 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -34,7 +34,7 @@ jobs: if: matrix.os == 'ubuntu-latest' run: | sudo apt-get update - sudo apt-get install -y cmake libhwloc-dev libnuma-dev libjemalloc-dev libtbb-dev + sudo apt-get install -y cmake libhwloc-dev libnuma-dev libtbb-dev - name: Initialize vcpkg if: matrix.os == 'windows-latest' diff --git a/.github/workflows/reusable_codeql.yml b/.github/workflows/reusable_codeql.yml index e76456310..046c32081 100644 --- a/.github/workflows/reusable_codeql.yml +++ b/.github/workflows/reusable_codeql.yml @@ -62,7 +62,7 @@ jobs: if: matrix.os == 'ubuntu-latest' run: | sudo apt-get update - sudo apt-get install -y cmake clang libhwloc-dev libnuma-dev libjemalloc-dev libtbb-dev + sudo apt-get install -y cmake clang libhwloc-dev libnuma-dev libtbb-dev # Latest distros do not allow global pip installation - name: "[Lin] Install Python requirements in venv" diff --git a/.github/workflows/reusable_fast.yml b/.github/workflows/reusable_fast.yml index e25de68a1..5673727ac 100644 --- a/.github/workflows/reusable_fast.yml +++ b/.github/workflows/reusable_fast.yml @@ -79,13 +79,13 @@ jobs: if: matrix.os == 'ubuntu-latest' run: | sudo apt-get update - sudo apt-get install -y cmake libjemalloc-dev libhwloc-dev libnuma-dev libtbb-dev + sudo apt-get install -y cmake libhwloc-dev libnuma-dev libtbb-dev - name: Install dependencies (ubuntu-20.04) if: matrix.os == 'ubuntu-20.04' run: | sudo apt-get update - sudo apt-get install -y cmake libjemalloc-dev libnuma-dev libtbb-dev + sudo apt-get install -y cmake libnuma-dev libtbb-dev .github/scripts/install_hwloc.sh # install hwloc-2.3.0 instead of hwloc-2.1.0 present in the OS package - name: Set ptrace value for IPC test (on Linux only) diff --git a/.github/workflows/reusable_proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml index 2a27161b3..103c4a516 100644 --- a/.github/workflows/reusable_proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -32,7 +32,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y cmake libhwloc-dev libjemalloc-dev libtbb-dev lcov + sudo apt-get install -y cmake libhwloc-dev libtbb-dev lcov - name: Set ptrace value for IPC test run: sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" diff --git a/.github/workflows/reusable_sanitizers.yml b/.github/workflows/reusable_sanitizers.yml index 3acda6833..f9e121f88 100644 --- a/.github/workflows/reusable_sanitizers.yml +++ b/.github/workflows/reusable_sanitizers.yml @@ -29,7 +29,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y clang cmake libhwloc-dev libnuma-dev libjemalloc-dev libtbb-dev + sudo apt-get install -y clang cmake libhwloc-dev libnuma-dev libtbb-dev - name: Install oneAPI basekit if: matrix.compiler.cxx == 'icpx' diff --git a/.github/workflows/reusable_valgrind.yml b/.github/workflows/reusable_valgrind.yml index 86ceb68c6..3e0af273a 100644 --- a/.github/workflows/reusable_valgrind.yml +++ b/.github/workflows/reusable_valgrind.yml @@ -20,7 +20,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y cmake hwloc libhwloc-dev libjemalloc-dev libnuma-dev libtbb-dev valgrind + sudo apt-get install -y cmake hwloc libhwloc-dev libnuma-dev libtbb-dev valgrind - name: Configure CMake run: > diff --git a/CMakeLists.txt b/CMakeLists.txt index 4dcc293d2..fefe64685 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,6 +111,87 @@ else() message(FATAL_ERROR "Unknown OS type") endif() +if(NOT UMF_BUILD_LIBUMF_POOL_JEMALLOC) + set(UMF_POOL_JEMALLOC_ENABLED FALSE) +elseif(WINDOWS) + pkg_check_modules(JEMALLOC jemalloc) + if(NOT JEMALLOC_FOUND) + find_package(JEMALLOC REQUIRED jemalloc) + endif() +else() + if(NOT DEFINED UMF_JEMALLOC_REPO) + set(UMF_JEMALLOC_REPO "https://github.com/jemalloc/jemalloc.git") + endif() + + if(NOT DEFINED UMF_JEMALLOC_TAG) + set(UMF_JEMALLOC_TAG 5.3.0) + endif() + + include(FetchContent) + message( + STATUS + "Will fetch jemalloc from ${UMF_JEMALLOC_REPO} (tag: ${UMF_JEMALLOC_TAG})" + ) + + FetchContent_Declare( + jemalloc_targ + GIT_REPOSITORY ${UMF_JEMALLOC_REPO} + GIT_TAG ${UMF_JEMALLOC_TAG}) + FetchContent_MakeAvailable(jemalloc_targ) + + add_custom_command( + COMMAND ./autogen.sh + WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} + OUTPUT ${jemalloc_targ_SOURCE_DIR}/configure) + add_custom_command( + # Custom jemalloc build. Non-default options used: + # --with-jemalloc-prefix=je_ - add je_ prefix to all public APIs + # --disable-cxx - Disable C++ integration. This will cause new and + # delete operators implementations to be omitted. + # --disable-initial-exec-tls - Disable the initial-exec TLS model for + # jemalloc's internal thread-local storage (on those platforms that + # support explicit settings). This can allow jemalloc to be dynamically + # loaded after program startup (e.g. using dlopen). + COMMAND + ./configure --prefix=${jemalloc_targ_BINARY_DIR} + --with-jemalloc-prefix=je_ --disable-cxx --disable-initial-exec-tls + CFLAGS=-fPIC + WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} + OUTPUT ${jemalloc_targ_SOURCE_DIR}/Makefile + DEPENDS ${jemalloc_targ_SOURCE_DIR}/configure) + add_custom_command( + COMMAND make + WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} + OUTPUT ${jemalloc_targ_SOURCE_DIR}/lib/libjemalloc.la + DEPENDS ${jemalloc_targ_SOURCE_DIR}/Makefile) + add_custom_command( + COMMAND make install + WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} + OUTPUT ${jemalloc_targ_BINARY_DIR}/lib/libjemalloc.a + DEPENDS ${jemalloc_targ_SOURCE_DIR}/lib/libjemalloc.la) + + add_custom_target(jemalloc_prod + DEPENDS ${jemalloc_targ_BINARY_DIR}/lib/libjemalloc.a) + add_library(jemalloc INTERFACE) + target_link_libraries( + jemalloc INTERFACE ${jemalloc_targ_BINARY_DIR}/lib/libjemalloc.a) + add_dependencies(jemalloc jemalloc_prod) + + set(JEMALLOC_LIBRARY_DIRS ${jemalloc_targ_BINARY_DIR}/lib) + set(JEMALLOC_INCLUDE_DIRS ${jemalloc_targ_BINARY_DIR}/include) + set(JEMALLOC_LIBRARIES ${jemalloc_targ_BINARY_DIR}/lib/libjemalloc.a) +endif() + +if(JEMALLOC_FOUND OR JEMALLOC_LIBRARIES) + set(UMF_POOL_JEMALLOC_ENABLED TRUE) + # add PATH to DLL on Windows + set(DLL_PATH_LIST + "${DLL_PATH_LIST};PATH=path_list_append:${JEMALLOC_DLL_DIRS}") + message(STATUS " JEMALLOC_LIBRARIES = ${JEMALLOC_LIBRARIES}") + message(STATUS " JEMALLOC_INCLUDE_DIRS = ${JEMALLOC_INCLUDE_DIRS}") + message(STATUS " JEMALLOC_LIBRARY_DIRS = ${JEMALLOC_LIBRARY_DIRS}") +endif() + if(UMF_DISABLE_HWLOC) message(STATUS "hwloc is disabled, hence OS provider, memtargets, " "topology discovery, examples won't be available!") @@ -402,19 +483,6 @@ else() set(UMF_POOL_SCALABLE_ENABLED FALSE) endif() -if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) - pkg_check_modules(JEMALLOC jemalloc) - if(NOT JEMALLOC_FOUND) - find_package(JEMALLOC REQUIRED jemalloc) - endif() - if(JEMALLOC_FOUND OR JEMALLOC_LIBRARIES) - set(UMF_POOL_JEMALLOC_ENABLED TRUE) - # add PATH to DLL on Windows - set(DLL_PATH_LIST - "${DLL_PATH_LIST};PATH=path_list_append:${JEMALLOC_DLL_DIRS}") - endif() -endif() - if(WINDOWS) # TODO: enable the proxy library in the Debug build on Windows # diff --git a/README.md b/README.md index 3379132e7..c04d7d22e 100644 --- a/README.md +++ b/README.md @@ -281,11 +281,25 @@ pool manager built as a separate static library: libjemalloc_pool.a on Linux and jemalloc_pool.lib on Windows. The `UMF_BUILD_LIBUMF_POOL_JEMALLOC` option has to be turned `ON` to build this library. +[jemalloc](https://github.com/jemalloc/jemalloc) is required to build the jemalloc pool. + +In case of Linux OS jemalloc is built from the (fetched) sources with the following +non-default options enabled: +- `--with-jemalloc-prefix=je_` - adds the `je_` prefix to all public APIs, +- `--disable-cxx` - disables C++ integration, it will cause the `new` and the `delete` + operators implementations to be omitted. +- `--disable-initial-exec-tls` - disables the initial-exec TLS model for jemalloc's + internal thread-local storage (on those platforms that support + explicit settings), it can allow jemalloc to be dynamically + loaded after program startup (e.g. using `dlopen()`). + +The default jemalloc package is required on Windows. + ##### Requirements 1) The `UMF_BUILD_LIBUMF_POOL_JEMALLOC` option turned `ON` 2) Required packages: - - libjemalloc-dev (Linux) or jemalloc (Windows) + - jemalloc (Windows only) #### Scalable Pool (part of libumf) diff --git a/benchmark/ubench.c b/benchmark/ubench.c index 5f1bfe9e4..845dc881d 100644 --- a/benchmark/ubench.c +++ b/benchmark/ubench.c @@ -445,8 +445,8 @@ static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs, } } -int create_level_zero_params(ze_context_handle_t *context, - ze_device_handle_t *device) { +static int create_level_zero_params(ze_context_handle_t *context, + ze_device_handle_t *device) { uint32_t driver_idx = 0; ze_driver_handle_t driver = NULL; diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 942579a30..ee99fb07d 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -284,8 +284,11 @@ if(LINUX) SRCS dram_and_fsdax/dram_and_fsdax.c LIBS umf jemalloc_pool) - target_link_directories(${EXAMPLE_NAME} PRIVATE - ${LIBHWLOC_LIBRARY_DIRS}) + target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--no-as-needed,-ldl") + + target_link_directories( + ${EXAMPLE_NAME} PRIVATE ${LIBHWLOC_LIBRARY_DIRS} + ${JEMALLOC_LIBRARY_DIRS}) add_test( NAME ${EXAMPLE_NAME} diff --git a/examples/cmake/FindJEMALLOC.cmake b/examples/cmake/FindJEMALLOC.cmake index 89d488ecc..e6db190d4 100644 --- a/examples/cmake/FindJEMALLOC.cmake +++ b/examples/cmake/FindJEMALLOC.cmake @@ -2,9 +2,11 @@ # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -message(STATUS "Checking for module 'jemalloc' using find_library()") +message( + STATUS "Looking for the static 'libjemalloc.a' library using find_library()" +) -find_library(JEMALLOC_LIBRARY NAMES libjemalloc jemalloc) +find_library(JEMALLOC_LIBRARY NAMES libjemalloc.a jemalloc.a) set(JEMALLOC_LIBRARIES ${JEMALLOC_LIBRARY}) get_filename_component(JEMALLOC_LIB_DIR ${JEMALLOC_LIBRARIES} DIRECTORY) diff --git a/examples/dram_and_fsdax/CMakeLists.txt b/examples/dram_and_fsdax/CMakeLists.txt index 0d0bf2593..014a08fcc 100644 --- a/examples/dram_and_fsdax/CMakeLists.txt +++ b/examples/dram_and_fsdax/CMakeLists.txt @@ -21,10 +21,8 @@ if(NOT LIBHWLOC_FOUND) find_package(LIBHWLOC 2.3.0 REQUIRED hwloc) endif() -pkg_check_modules(JEMALLOC jemalloc) -if(NOT JEMALLOC_FOUND) - find_package(JEMALLOC REQUIRED jemalloc) -endif() +# find the custom jemalloc pointed by CMAKE_PREFIX_PATH +find_package(JEMALLOC REQUIRED jemalloc) # build the example set(EXAMPLE_NAME umf_example_dram_and_fsdax) diff --git a/scripts/qemu/run-build.sh b/scripts/qemu/run-build.sh index 06d6043f6..b0f4bee1e 100755 --- a/scripts/qemu/run-build.sh +++ b/scripts/qemu/run-build.sh @@ -14,7 +14,7 @@ pwd echo password | sudo -Sk apt-get update echo password | sudo -Sk apt-get install -y git cmake gcc g++ pkg-config \ - numactl libnuma-dev hwloc libhwloc-dev libjemalloc-dev libtbb-dev valgrind lcov + numactl libnuma-dev hwloc libhwloc-dev libtbb-dev valgrind lcov mkdir build cd build diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt index bdd196b04..9cf9d1665 100644 --- a/src/pool/CMakeLists.txt +++ b/src/pool/CMakeLists.txt @@ -45,10 +45,13 @@ if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) NAME jemalloc_pool TYPE STATIC SRCS pool_jemalloc.c ${POOL_EXTRA_SRCS} - LIBS jemalloc ${POOL_EXTRA_LIBS}) + LIBS ${JEMALLOC_LIBRARIES} ${POOL_EXTRA_LIBS}) target_include_directories(jemalloc_pool PRIVATE ${JEMALLOC_INCLUDE_DIRS}) target_compile_definitions(jemalloc_pool PRIVATE ${POOL_COMPILE_DEFINITIONS}) add_library(${PROJECT_NAME}::jemalloc_pool ALIAS jemalloc_pool) + if(NOT WINDOWS) + add_dependencies(jemalloc_pool jemalloc) + endif() install(TARGETS jemalloc_pool EXPORT ${PROJECT_NAME}-targets) endif() diff --git a/src/pool/pool_jemalloc.c b/src/pool/pool_jemalloc.c index 3ec7c7805..94fd655cc 100644 --- a/src/pool/pool_jemalloc.c +++ b/src/pool/pool_jemalloc.c @@ -22,16 +22,6 @@ #include -// The Windows version of jemalloc uses API with je_ prefix, -// while the Linux one does not. -#ifndef _WIN32 -#define je_mallocx mallocx -#define je_dallocx dallocx -#define je_rallocx rallocx -#define je_mallctl mallctl -#define je_malloc_usable_size malloc_usable_size -#endif - #define MALLOCX_ARENA_MAX (MALLCTL_ARENAS_ALL - 1) typedef struct jemalloc_memory_pool_t { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b23742866..b54822b96 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -747,6 +747,13 @@ if(LINUX set(STANDALONE_CMAKE_OPTIONS "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" ) + if(JEMALLOC_INCLUDE_DIRS) + # add custom jemalloc installation + set(STANDALONE_CMAKE_OPTIONS + "${STANDALONE_CMAKE_OPTIONS} -DCMAKE_PREFIX_PATH=${JEMALLOC_INCLUDE_DIRS}/../" + ) + endif() + add_test( NAME umf-standalone_examples COMMAND diff --git a/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp b/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp index fd071432b..bc4f2295f 100644 --- a/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp +++ b/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp @@ -1,27 +1,8 @@ { - False-positive ConflictingAccess in libjemalloc.so + False-positive ConflictingAccess in jemalloc drd:ConflictingAccess - obj:*/libjemalloc.so* ... - fun:mallocx - ... -} - -{ - False-positive ConflictingAccess in libjemalloc.so - drd:ConflictingAccess - obj:*/libjemalloc.so* - ... - fun:op_free - ... -} - -{ - False-positive ConflictingAccess in libjemalloc.so - drd:ConflictingAccess - obj:*/libjemalloc.so* - ... - fun:__nptl_deallocate_tsd + fun:je_* ... } diff --git a/test/supp/drd-umf_test-jemalloc_coarse_file.supp b/test/supp/drd-umf_test-jemalloc_coarse_file.supp index fd071432b..bc4f2295f 100644 --- a/test/supp/drd-umf_test-jemalloc_coarse_file.supp +++ b/test/supp/drd-umf_test-jemalloc_coarse_file.supp @@ -1,27 +1,8 @@ { - False-positive ConflictingAccess in libjemalloc.so + False-positive ConflictingAccess in jemalloc drd:ConflictingAccess - obj:*/libjemalloc.so* ... - fun:mallocx - ... -} - -{ - False-positive ConflictingAccess in libjemalloc.so - drd:ConflictingAccess - obj:*/libjemalloc.so* - ... - fun:op_free - ... -} - -{ - False-positive ConflictingAccess in libjemalloc.so - drd:ConflictingAccess - obj:*/libjemalloc.so* - ... - fun:__nptl_deallocate_tsd + fun:je_* ... } diff --git a/test/supp/drd-umf_test-jemalloc_pool.supp b/test/supp/drd-umf_test-jemalloc_pool.supp index 965ef3884..cb6179f87 100644 --- a/test/supp/drd-umf_test-jemalloc_pool.supp +++ b/test/supp/drd-umf_test-jemalloc_pool.supp @@ -1,6 +1,7 @@ { - Conflicting Access in libjemalloc.so - internal issue of libjemalloc + False-positive ConflictingAccess in jemalloc drd:ConflictingAccess - obj:*libjemalloc.so* + ... + fun:je_* ... } diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp b/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp index 18774f387..ac8969c5a 100644 --- a/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp +++ b/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp @@ -1,27 +1,8 @@ { - False-positive Race in libjemalloc.so + False-positive Race in jemalloc Helgrind:Race - obj:*/libjemalloc.so* ... - fun:mallocx - ... -} - -{ - False-positive Race in libjemalloc.so - Helgrind:Race - obj:*/libjemalloc.so* - ... - fun:op_free - ... -} - -{ - False-positive Race in libjemalloc.so - Helgrind:Race - obj:*/libjemalloc.so* - ... - fun:__nptl_deallocate_tsd + fun:je_* ... } diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp b/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp index 18774f387..ac8969c5a 100644 --- a/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp +++ b/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp @@ -1,27 +1,8 @@ { - False-positive Race in libjemalloc.so + False-positive Race in jemalloc Helgrind:Race - obj:*/libjemalloc.so* ... - fun:mallocx - ... -} - -{ - False-positive Race in libjemalloc.so - Helgrind:Race - obj:*/libjemalloc.so* - ... - fun:op_free - ... -} - -{ - False-positive Race in libjemalloc.so - Helgrind:Race - obj:*/libjemalloc.so* - ... - fun:__nptl_deallocate_tsd + fun:je_* ... } diff --git a/test/supp/helgrind-umf_test-jemalloc_pool.supp b/test/supp/helgrind-umf_test-jemalloc_pool.supp index 8068b023d..98d748fea 100644 --- a/test/supp/helgrind-umf_test-jemalloc_pool.supp +++ b/test/supp/helgrind-umf_test-jemalloc_pool.supp @@ -1,6 +1,7 @@ { - Race in libjemalloc.so - internal issue of libjemalloc + False-positive Race in jemalloc Helgrind:Race - obj:*libjemalloc.so* + ... + fun:je_* ... } From 614c4b54e9b8240ea1f0fd830b35286131c257f1 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 4 Dec 2024 17:01:14 +0100 Subject: [PATCH 018/466] Incorporate jemalloc_pool into libumf Remove the separate static `jemalloc_pool` library. Make the `UMF_BUILD_LIBUMF_POOL_JEMALLOC` option turned ON by default. Incorporate jemalloc_pool into libumf. Signed-off-by: Lukasz Dorau --- .github/workflows/reusable_basic.yml | 3 --- CMakeLists.txt | 14 +++++++--- README.md | 5 ++-- benchmark/CMakeLists.txt | 5 ++-- benchmark/multithread.cpp | 2 ++ examples/CMakeLists.txt | 2 +- examples/dram_and_fsdax/CMakeLists.txt | 17 +++--------- src/CMakeLists.txt | 21 ++++++++++++--- src/libumf.def | 4 +++ src/libumf.map | 4 +++ src/pool/CMakeLists.txt | 17 ------------ src/pool/pool_jemalloc.c | 27 +++++++++++++++++++ test/CMakeLists.txt | 23 ++++++++-------- ...check-umf_test-jemalloc_coarse_devdax.supp | 7 +++++ ...emcheck-umf_test-jemalloc_coarse_file.supp | 7 +++++ test/test_installation.py | 7 ----- 16 files changed, 101 insertions(+), 64 deletions(-) create mode 100644 test/supp/memcheck-umf_test-jemalloc_coarse_devdax.supp create mode 100644 test/supp/memcheck-umf_test-jemalloc_coarse_file.supp diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 0d27fb9c3..3b573453d 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -209,7 +209,6 @@ jobs: --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} --disjoint-pool - --jemalloc-pool ${{ matrix.install_tbb == 'ON' && matrix.disable_hwloc != 'ON' && matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || '' }} @@ -300,7 +299,6 @@ jobs: --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} --disjoint-pool - --jemalloc-pool ${{matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || ''}} @@ -495,7 +493,6 @@ jobs: --install-dir ${{env.INSTL_DIR}} --build-type ${{env.BUILD_TYPE}} --disjoint-pool - --jemalloc-pool --proxy --umf-version ${{env.UMF_VERSION}} --shared-library diff --git a/CMakeLists.txt b/CMakeLists.txt index fefe64685..7fcfcbb95 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -113,6 +113,8 @@ endif() if(NOT UMF_BUILD_LIBUMF_POOL_JEMALLOC) set(UMF_POOL_JEMALLOC_ENABLED FALSE) + set(JEMALLOC_FOUND FALSE) + set(JEMALLOC_LIBRARIES FALSE) elseif(WINDOWS) pkg_check_modules(JEMALLOC jemalloc) if(NOT JEMALLOC_FOUND) @@ -190,6 +192,12 @@ if(JEMALLOC_FOUND OR JEMALLOC_LIBRARIES) message(STATUS " JEMALLOC_LIBRARIES = ${JEMALLOC_LIBRARIES}") message(STATUS " JEMALLOC_INCLUDE_DIRS = ${JEMALLOC_INCLUDE_DIRS}") message(STATUS " JEMALLOC_LIBRARY_DIRS = ${JEMALLOC_LIBRARY_DIRS}") +else() + set(UMF_POOL_JEMALLOC_ENABLED FALSE) + message( + STATUS + "Disabling the Jemalloc Pool and tests and benchmarks that use it because jemalloc was not built/found." + ) endif() if(UMF_DISABLE_HWLOC) @@ -523,14 +531,14 @@ elseif(UMF_PROXY_LIB_BASED_ON_POOL STREQUAL SCALABLE) ) endif() elseif(UMF_PROXY_LIB_BASED_ON_POOL STREQUAL JEMALLOC) - if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) + if(UMF_POOL_JEMALLOC_ENABLED) set(UMF_PROXY_LIB_ENABLED ON) set(PROXY_LIB_USES_JEMALLOC_POOL ON) - set(PROXY_LIBS jemalloc_pool umf) + set(PROXY_LIBS umf) else() message( STATUS - "Disabling the proxy library, because UMF_PROXY_LIB_BASED_ON_POOL==JEMALLOC but UMF_BUILD_LIBUMF_POOL_JEMALLOC is OFF" + "Disabling the proxy library, because UMF_PROXY_LIB_BASED_ON_POOL==JEMALLOC but the jemalloc pool is disabled" ) endif() else() diff --git a/README.md b/README.md index c04d7d22e..81a82bfab 100644 --- a/README.md +++ b/README.md @@ -298,8 +298,9 @@ The default jemalloc package is required on Windows. ##### Requirements 1) The `UMF_BUILD_LIBUMF_POOL_JEMALLOC` option turned `ON` -2) Required packages: - - jemalloc (Windows only) +2) jemalloc is required: +- on Linux and MacOS: jemalloc is fetched and built from sources (a custom build), +- on Windows: the default jemalloc package is required #### Scalable Pool (part of libumf) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 5605519ee..b2f1299be 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -51,7 +51,7 @@ function(add_umf_benchmark) set(BENCH_NAME umf-${ARG_NAME}) - set(BENCH_LIBS ${ARG_LIBS} umf) + set(BENCH_LIBS ${ARG_LIBS} umf umf_utils) add_umf_executable( NAME ${BENCH_NAME} @@ -121,8 +121,7 @@ set(LIB_DIRS ${LIBHWLOC_LIBRARY_DIRS}) if(UMF_BUILD_LIBUMF_POOL_DISJOINT) set(LIBS_OPTIONAL ${LIBS_OPTIONAL} disjoint_pool) endif() -if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) - set(LIBS_OPTIONAL ${LIBS_OPTIONAL} jemalloc_pool ${JEMALLOC_LIBRARIES}) +if(UMF_POOL_JEMALLOC_ENABLED) set(LIB_DIRS ${LIB_DIRS} ${JEMALLOC_LIBRARY_DIRS}) endif() if(LINUX) diff --git a/benchmark/multithread.cpp b/benchmark/multithread.cpp index 4558942ec..ecc238529 100644 --- a/benchmark/multithread.cpp +++ b/benchmark/multithread.cpp @@ -139,11 +139,13 @@ int main() { // ctest looks for "PASSED" in the output std::cout << "PASSED" << std::endl; +#if defined(UMF_POOL_DISJOINT_ENABLED) ret = umfDisjointPoolParamsDestroy(hDisjointParams); if (ret != UMF_RESULT_SUCCESS) { std::cerr << "disjoint pool params destroy failed" << std::endl; return -1; } +#endif return 0; } diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index ee99fb07d..986ad5641 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -282,7 +282,7 @@ if(LINUX) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS dram_and_fsdax/dram_and_fsdax.c - LIBS umf jemalloc_pool) + LIBS umf) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--no-as-needed,-ldl") diff --git a/examples/dram_and_fsdax/CMakeLists.txt b/examples/dram_and_fsdax/CMakeLists.txt index 014a08fcc..dcb538085 100644 --- a/examples/dram_and_fsdax/CMakeLists.txt +++ b/examples/dram_and_fsdax/CMakeLists.txt @@ -21,24 +21,15 @@ if(NOT LIBHWLOC_FOUND) find_package(LIBHWLOC 2.3.0 REQUIRED hwloc) endif() -# find the custom jemalloc pointed by CMAKE_PREFIX_PATH -find_package(JEMALLOC REQUIRED jemalloc) - # build the example set(EXAMPLE_NAME umf_example_dram_and_fsdax) add_executable(${EXAMPLE_NAME} dram_and_fsdax.c) target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS}) -target_link_directories( - ${EXAMPLE_NAME} - PRIVATE - ${LIBUMF_LIBRARY_DIRS} - ${LIBHWLOC_LIBRARY_DIRS} - ${JEMALLOC_LIBRARY_DIRS}) +target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} + ${LIBHWLOC_LIBRARY_DIRS}) -target_link_libraries( - ${EXAMPLE_NAME} PRIVATE hwloc jemalloc_pool ${JEMALLOC_LIBRARIES} - ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE hwloc ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( @@ -54,6 +45,6 @@ if(LINUX) TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION - "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${JEMALLOC_LIBRARY_DIRS}" + "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" ) endif() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ffd928f7c..8b1e2248a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -69,8 +69,19 @@ set(UMF_SOURCES critnib/critnib.c ravl/ravl.c pool/pool_proxy.c + pool/pool_jemalloc.c pool/pool_scalable.c) +if(UMF_POOL_JEMALLOC_ENABLED) + set(UMF_LIBS ${UMF_LIBS} ${JEMALLOC_LIBRARIES}) + set(UMF_PRIVATE_LIBRARY_DIRS ${UMF_PRIVATE_LIBRARY_DIRS} + ${JEMALLOC_LIBRARY_DIRS}) + set(UMF_PRIVATE_INCLUDE_DIRS ${UMF_PRIVATE_INCLUDE_DIRS} + ${JEMALLOC_INCLUDE_DIRS}) + set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} + "UMF_POOL_JEMALLOC_ENABLED=1") +endif() + if(NOT UMF_DISABLE_HWLOC) set(UMF_SOURCES ${UMF_SOURCES} ${HWLOC_DEPENDENT_SOURCES} memtargets/memtarget_numa.c) @@ -146,15 +157,19 @@ else() LIBS ${UMF_LIBS}) endif() +target_include_directories(umf PRIVATE ${UMF_PRIVATE_INCLUDE_DIRS}) +target_link_directories(umf PRIVATE ${UMF_PRIVATE_LIBRARY_DIRS}) +target_compile_definitions(umf PRIVATE ${UMF_COMMON_COMPILE_DEFINITIONS}) + add_dependencies(umf coarse) if(UMF_LINK_HWLOC_STATICALLY) add_dependencies(umf ${UMF_HWLOC_NAME}) endif() -target_link_directories(umf PRIVATE ${UMF_PRIVATE_LIBRARY_DIRS}) - -target_compile_definitions(umf PRIVATE ${UMF_COMMON_COMPILE_DEFINITIONS}) +if(NOT WINDOWS AND UMF_POOL_JEMALLOC_ENABLED) + add_dependencies(umf jemalloc) +endif() if(UMF_BUILD_LEVEL_ZERO_PROVIDER) if(LINUX) diff --git a/src/libumf.def b/src/libumf.def index 0b4588bb8..f2b24be6c 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -36,6 +36,10 @@ EXPORTS umfFileMemoryProviderParamsSetVisibility umfGetIPCHandle umfGetLastFailedMemoryProvider + umfJemallocPoolOps + umfJemallocPoolParamsCreate + umfJemallocPoolParamsDestroy + umfJemallocPoolParamsSetKeepAllMemory umfLevelZeroMemoryProviderOps umfLevelZeroMemoryProviderParamsCreate umfLevelZeroMemoryProviderParamsDestroy diff --git a/src/libumf.map b/src/libumf.map index 41467bad5..067ec8838 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -30,6 +30,10 @@ UMF_1.0 { umfFileMemoryProviderParamsSetVisibility; umfGetIPCHandle; umfGetLastFailedMemoryProvider; + umfJemallocPoolOps; + umfJemallocPoolParamsCreate; + umfJemallocPoolParamsDestroy; + umfJemallocPoolParamsSetKeepAllMemory; umfLevelZeroMemoryProviderOps; umfLevelZeroMemoryProviderParamsCreate; umfLevelZeroMemoryProviderParamsDestroy; diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt index 9cf9d1665..17be932a4 100644 --- a/src/pool/CMakeLists.txt +++ b/src/pool/CMakeLists.txt @@ -38,20 +38,3 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT) install(TARGETS disjoint_pool EXPORT ${PROJECT_NAME}-targets) endif() - -# libumf_pool_jemalloc -if(UMF_BUILD_LIBUMF_POOL_JEMALLOC) - add_umf_library( - NAME jemalloc_pool - TYPE STATIC - SRCS pool_jemalloc.c ${POOL_EXTRA_SRCS} - LIBS ${JEMALLOC_LIBRARIES} ${POOL_EXTRA_LIBS}) - target_include_directories(jemalloc_pool PRIVATE ${JEMALLOC_INCLUDE_DIRS}) - target_compile_definitions(jemalloc_pool - PRIVATE ${POOL_COMPILE_DEFINITIONS}) - add_library(${PROJECT_NAME}::jemalloc_pool ALIAS jemalloc_pool) - if(NOT WINDOWS) - add_dependencies(jemalloc_pool jemalloc) - endif() - install(TARGETS jemalloc_pool EXPORT ${PROJECT_NAME}-targets) -endif() diff --git a/src/pool/pool_jemalloc.c b/src/pool/pool_jemalloc.c index 94fd655cc..47bc6497f 100644 --- a/src/pool/pool_jemalloc.c +++ b/src/pool/pool_jemalloc.c @@ -20,6 +20,32 @@ #include #include +#ifndef UMF_POOL_JEMALLOC_ENABLED + +umf_memory_pool_ops_t *umfJemallocPoolOps(void) { return NULL; } + +umf_result_t +umfJemallocPoolParamsCreate(umf_jemalloc_pool_params_handle_t *hParams) { + (void)hParams; // unused + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t +umfJemallocPoolParamsDestroy(umf_jemalloc_pool_params_handle_t hParams) { + (void)hParams; // unused + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +umf_result_t +umfJemallocPoolParamsSetKeepAllMemory(umf_jemalloc_pool_params_handle_t hParams, + bool keepAllMemory) { + (void)hParams; // unused + (void)keepAllMemory; // unused + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +#else + #include #define MALLOCX_ARENA_MAX (MALLCTL_ARENAS_ALL - 1) @@ -535,3 +561,4 @@ static umf_memory_pool_ops_t UMF_JEMALLOC_POOL_OPS = { umf_memory_pool_ops_t *umfJemallocPoolOps(void) { return &UMF_JEMALLOC_POOL_OPS; } +#endif /* UMF_POOL_JEMALLOC_ENABLED */ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b54822b96..58584a4e1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -165,10 +165,6 @@ if(UMF_BUILD_SHARED_LIBRARY) endif() endif() -if(UMF_POOL_JEMALLOC_ENABLED) - set(LIB_JEMALLOC_POOL jemalloc_pool) -endif() - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) set(LIB_DISJOINT_POOL disjoint_pool) endif() @@ -236,14 +232,15 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT add_umf_test( NAME c_api_multi_pool SRCS c_api/multi_pool.c - LIBS disjoint_pool jemalloc_pool ${JEMALLOC_LIBRARIES}) + LIBS disjoint_pool) endif() if(UMF_POOL_JEMALLOC_ENABLED AND (NOT UMF_DISABLE_HWLOC)) add_umf_test( NAME jemalloc_pool SRCS pools/jemalloc_pool.cpp malloc_compliance_tests.cpp - LIBS jemalloc_pool) + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) endif() if(UMF_POOL_SCALABLE_ENABLED AND (NOT UMF_DISABLE_HWLOC)) @@ -266,7 +263,7 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_os_memory SRCS provider_os_memory.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST} ${LIB_JEMALLOC_POOL} ${LIB_DISJOINT_POOL}) + LIBS ${UMF_UTILS_FOR_TEST} ${LIB_DISJOINT_POOL}) add_umf_test( NAME provider_os_memory_multiple_numa_nodes SRCS provider_os_memory_multiple_numa_nodes.cpp @@ -314,7 +311,7 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_devdax_memory_ipc SRCS provider_devdax_memory_ipc.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST} ${LIB_JEMALLOC_POOL}) + LIBS ${UMF_UTILS_FOR_TEST}) add_umf_test( NAME provider_file_memory SRCS provider_file_memory.cpp @@ -322,18 +319,20 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_file_memory_ipc SRCS provider_file_memory_ipc.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST} ${LIB_JEMALLOC_POOL}) + LIBS ${UMF_UTILS_FOR_TEST}) # This test requires Linux-only file memory provider if(UMF_POOL_JEMALLOC_ENABLED) add_umf_test( NAME jemalloc_coarse_file SRCS pools/jemalloc_coarse_file.cpp malloc_compliance_tests.cpp - LIBS jemalloc_pool) + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) add_umf_test( NAME jemalloc_coarse_devdax SRCS pools/jemalloc_coarse_devdax.cpp malloc_compliance_tests.cpp - LIBS jemalloc_pool) + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) endif() # This test requires Linux-only file memory provider @@ -739,7 +738,7 @@ if(LINUX else() message( STATUS - "The dram_and_fsdax example is supported on Linux only and requires UMF_BUILD_LIBUMF_POOL_JEMALLOC to be turned ON - skipping" + "The dram_and_fsdax example is supported on Linux only and requires the jemalloc pool, but it is disabled - skipping" ) endif() diff --git a/test/supp/memcheck-umf_test-jemalloc_coarse_devdax.supp b/test/supp/memcheck-umf_test-jemalloc_coarse_devdax.supp new file mode 100644 index 000000000..f71903277 --- /dev/null +++ b/test/supp/memcheck-umf_test-jemalloc_coarse_devdax.supp @@ -0,0 +1,7 @@ +{ + False-positive invalid write of size 8 + Memcheck:Addr8 + ... + fun:je_* + ... +} diff --git a/test/supp/memcheck-umf_test-jemalloc_coarse_file.supp b/test/supp/memcheck-umf_test-jemalloc_coarse_file.supp new file mode 100644 index 000000000..f71903277 --- /dev/null +++ b/test/supp/memcheck-umf_test-jemalloc_coarse_file.supp @@ -0,0 +1,7 @@ +{ + False-positive invalid write of size 8 + Memcheck:Addr8 + ... + fun:je_* + ... +} diff --git a/test/test_installation.py b/test/test_installation.py index 49a382969..b5dd676dc 100644 --- a/test/test_installation.py +++ b/test/test_installation.py @@ -283,11 +283,6 @@ def parse_arguments(self) -> argparse.Namespace: action="store_true", help="Add this argument if the UMF was built with Disjoint Pool enabled", ) - self.parser.add_argument( - "--jemalloc-pool", - action="store_true", - help="Add this argument if the UMF was built with Jemalloc Pool enabled", - ) self.parser.add_argument( "--umf-version", action="store", @@ -306,8 +301,6 @@ def run(self) -> None: pools = [] if self.args.disjoint_pool: pools.append("disjoint_pool") - if self.args.jemalloc_pool: - pools.append("jemalloc_pool") umf_version = Version(self.args.umf_version) From 6b005c8cdd118343e5c985ebbc0d9e9f67bc69fe Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 5 Dec 2024 10:37:00 +0100 Subject: [PATCH 019/466] Remove JEMALLOC_LIBRARY_DIRS from tests and benchmarks Signed-off-by: Lukasz Dorau --- benchmark/CMakeLists.txt | 3 --- test/CMakeLists.txt | 1 - 2 files changed, 4 deletions(-) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index b2f1299be..efad0baf3 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -121,9 +121,6 @@ set(LIB_DIRS ${LIBHWLOC_LIBRARY_DIRS}) if(UMF_BUILD_LIBUMF_POOL_DISJOINT) set(LIBS_OPTIONAL ${LIBS_OPTIONAL} disjoint_pool) endif() -if(UMF_POOL_JEMALLOC_ENABLED) - set(LIB_DIRS ${LIB_DIRS} ${JEMALLOC_LIBRARY_DIRS}) -endif() if(LINUX) set(LIBS_OPTIONAL ${LIBS_OPTIONAL} m) endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 58584a4e1..b56478970 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -51,7 +51,6 @@ function(build_umf_test) endif() if(UMF_POOL_JEMALLOC_ENABLED) - set(LIB_DIRS ${LIB_DIRS} ${JEMALLOC_LIBRARY_DIRS}) set(CPL_DEFS ${CPL_DEFS} UMF_POOL_JEMALLOC_ENABLED=1) endif() From d8b63dddde1761902f4fe3970612c6b5f73e370d Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 5 Dec 2024 10:37:56 +0100 Subject: [PATCH 020/466] Revert WA for the issue with jemalloc in the proxy library This reverts commit a4fced635067affb6787da41116ae3ffadfc5597. Fixes: #894 Signed-off-by: Lukasz Dorau --- src/proxy_lib/proxy_lib.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/proxy_lib/proxy_lib.c b/src/proxy_lib/proxy_lib.c index f8bae304d..15ddfca1b 100644 --- a/src/proxy_lib/proxy_lib.c +++ b/src/proxy_lib/proxy_lib.c @@ -128,13 +128,6 @@ static umf_memory_pool_handle_t Proxy_pool = NULL; // it protects us from recursion in umfPool*() static __TLS int was_called_from_umfPool = 0; -// This WA for the issue: -// https://github.com/oneapi-src/unified-memory-framework/issues/894 -// It protects us from a recursion in malloc_usable_size() -// when the JEMALLOC proxy_lib_pool is used. -// TODO remove this WA when the issue is fixed. -static __TLS int was_called_from_malloc_usable_size = 0; - /*****************************************************************************/ /*** The constructor and destructor of the proxy library *********************/ /*****************************************************************************/ @@ -478,18 +471,15 @@ size_t malloc_usable_size(void *ptr) { return 0; // unsupported in case of the ba_leak allocator } - if (!was_called_from_malloc_usable_size && Proxy_pool && - (umfPoolByPtr(ptr) == Proxy_pool)) { - was_called_from_malloc_usable_size = 1; + if (Proxy_pool && (umfPoolByPtr(ptr) == Proxy_pool)) { was_called_from_umfPool = 1; size_t size = umfPoolMallocUsableSize(Proxy_pool, ptr); was_called_from_umfPool = 0; - was_called_from_malloc_usable_size = 0; return size; } #ifndef _WIN32 - if (!was_called_from_malloc_usable_size && Size_threshold_value) { + if (Size_threshold_value) { return System_malloc_usable_size(ptr); } #endif /* _WIN32 */ From 3112e2b4f1d894a10513712dc020cf5884f269c6 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 5 Dec 2024 11:09:40 +0100 Subject: [PATCH 021/466] Revert moving free() to optional (ext) provider ops This reverts commit b0bfbb7e92b76cc56c3bcd91c7ac063467136376. Remove umfDefaultFree() and umfIsFreeOpDefault(). Remove the `disable_upstream_provider_free` parameter of the Coarse provider. Remove the `upstreamDoesNotFree` argument of the `umfTrackingMemoryProviderCreate()` function. Signed-off-by: Lukasz Dorau --- .../custom_file_provider.c | 2 +- include/umf/memory_provider_ops.h | 18 +++++++-------- src/cpp_helpers.hpp | 2 +- src/memory_pool.c | 5 +---- src/memory_provider.c | 19 ++-------------- src/memory_provider_internal.h | 1 - src/provider/provider_coarse.c | 22 ++++--------------- src/provider/provider_cuda.c | 2 +- src/provider/provider_devdax_memory.c | 2 +- src/provider/provider_file_memory.c | 2 +- src/provider/provider_level_zero.c | 2 +- src/provider/provider_os_memory.c | 2 +- src/provider/provider_tracking.c | 21 ++++++------------ src/provider/provider_tracking.h | 2 +- test/common/provider_null.c | 2 +- test/common/provider_trace.c | 2 +- test/memoryProviderAPI.cpp | 21 +++++++----------- 17 files changed, 41 insertions(+), 86 deletions(-) diff --git a/examples/custom_file_provider/custom_file_provider.c b/examples/custom_file_provider/custom_file_provider.c index ffa61d63f..ad897fe5e 100644 --- a/examples/custom_file_provider/custom_file_provider.c +++ b/examples/custom_file_provider/custom_file_provider.c @@ -237,11 +237,11 @@ static umf_memory_provider_ops_t file_ops = { .initialize = file_init, .finalize = file_deinit, .alloc = file_alloc, + .free = file_free, .get_name = file_get_name, .get_last_native_error = file_get_last_native_error, .get_recommended_page_size = file_get_recommended_page_size, .get_min_page_size = file_get_min_page_size, - .ext.free = file_free, }; // Main function diff --git a/include/umf/memory_provider_ops.h b/include/umf/memory_provider_ops.h index 0b9c7cfce..a61e0aad0 100644 --- a/include/umf/memory_provider_ops.h +++ b/include/umf/memory_provider_ops.h @@ -22,15 +22,6 @@ extern "C" { /// can keep them NULL. /// typedef struct umf_memory_provider_ext_ops_t { - /// - /// @brief Frees the memory space pointed by \p ptr from the memory \p provider - /// @param provider pointer to the memory provider - /// @param ptr pointer to the allocated memory to free - /// @param size size of the allocation - /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure - /// - umf_result_t (*free)(void *provider, void *ptr, size_t size); - /// /// @brief Discard physical pages within the virtual memory mapping associated at the given addr /// and \p size. This call is asynchronous and may delay purging the pages indefinitely. @@ -181,6 +172,15 @@ typedef struct umf_memory_provider_ops_t { umf_result_t (*alloc)(void *provider, size_t size, size_t alignment, void **ptr); + /// + /// @brief Frees the memory space pointed by \p ptr from the memory \p provider + /// @param provider pointer to the memory provider + /// @param ptr pointer to the allocated memory to free + /// @param size size of the allocation + /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure + /// + umf_result_t (*free)(void *provider, void *ptr, size_t size); + /// /// @brief Retrieve string representation of the underlying provider specific /// result reported by the last API that returned diff --git a/src/cpp_helpers.hpp b/src/cpp_helpers.hpp index 6316ccbc7..878910581 100644 --- a/src/cpp_helpers.hpp +++ b/src/cpp_helpers.hpp @@ -84,7 +84,7 @@ template constexpr umf_memory_provider_ops_t providerOpsBase() { ops.version = UMF_VERSION_CURRENT; ops.finalize = [](void *obj) { delete reinterpret_cast(obj); }; UMF_ASSIGN_OP(ops, T, alloc, UMF_RESULT_ERROR_UNKNOWN); - UMF_ASSIGN_OP(ops.ext, T, free, UMF_RESULT_ERROR_UNKNOWN); + UMF_ASSIGN_OP(ops, T, free, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP_NORETURN(ops, T, get_last_native_error); UMF_ASSIGN_OP(ops, T, get_recommended_page_size, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP(ops, T, get_min_page_size, UMF_RESULT_ERROR_UNKNOWN); diff --git a/src/memory_pool.c b/src/memory_pool.c index 4a85955ef..cb1d303f5 100644 --- a/src/memory_pool.c +++ b/src/memory_pool.c @@ -42,10 +42,7 @@ static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, if (!(flags & UMF_POOL_CREATE_FLAG_DISABLE_TRACKING)) { // Wrap provider with memory tracking provider. - // Check if the provider supports the free() operation. - bool upstreamDoesNotFree = umfIsFreeOpDefault(provider); - ret = umfTrackingMemoryProviderCreate(provider, pool, &pool->provider, - upstreamDoesNotFree); + ret = umfTrackingMemoryProviderCreate(provider, pool, &pool->provider); if (ret != UMF_RESULT_SUCCESS) { goto err_provider_create; } diff --git a/src/memory_provider.c b/src/memory_provider.c index 883f1be26..59f3f1259 100644 --- a/src/memory_provider.c +++ b/src/memory_provider.c @@ -25,13 +25,6 @@ typedef struct umf_memory_provider_t { void *provider_priv; } umf_memory_provider_t; -static umf_result_t umfDefaultFree(void *provider, void *ptr, size_t size) { - (void)provider; - (void)ptr; - (void)size; - return UMF_RESULT_ERROR_NOT_SUPPORTED; -} - static umf_result_t umfDefaultPurgeLazy(void *provider, void *ptr, size_t size) { (void)provider; @@ -106,9 +99,6 @@ static umf_result_t umfDefaultCloseIPCHandle(void *provider, void *ptr, } void assignOpsExtDefaults(umf_memory_provider_ops_t *ops) { - if (!ops->ext.free) { - ops->ext.free = umfDefaultFree; - } if (!ops->ext.purge_lazy) { ops->ext.purge_lazy = umfDefaultPurgeLazy; } @@ -143,7 +133,7 @@ void assignOpsIpcDefaults(umf_memory_provider_ops_t *ops) { static bool validateOpsMandatory(const umf_memory_provider_ops_t *ops) { // Mandatory ops should be non-NULL - return ops->alloc && ops->get_recommended_page_size && + return ops->alloc && ops->free && ops->get_recommended_page_size && ops->get_min_page_size && ops->initialize && ops->finalize && ops->get_last_native_error && ops->get_name; } @@ -169,10 +159,6 @@ static bool validateOps(const umf_memory_provider_ops_t *ops) { validateOpsIpc(&(ops->ipc)); } -bool umfIsFreeOpDefault(umf_memory_provider_handle_t hProvider) { - return (hProvider->ops.ext.free == umfDefaultFree); -} - umf_result_t umfMemoryProviderCreate(const umf_memory_provider_ops_t *ops, void *params, umf_memory_provider_handle_t *hProvider) { @@ -236,8 +222,7 @@ umf_result_t umfMemoryProviderAlloc(umf_memory_provider_handle_t hProvider, umf_result_t umfMemoryProviderFree(umf_memory_provider_handle_t hProvider, void *ptr, size_t size) { UMF_CHECK((hProvider != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); - umf_result_t res = - hProvider->ops.ext.free(hProvider->provider_priv, ptr, size); + umf_result_t res = hProvider->ops.free(hProvider->provider_priv, ptr, size); checkErrorAndSetLastProvider(res, hProvider); return res; } diff --git a/src/memory_provider_internal.h b/src/memory_provider_internal.h index 49b2f2e53..60955e0fb 100644 --- a/src/memory_provider_internal.h +++ b/src/memory_provider_internal.h @@ -20,7 +20,6 @@ extern "C" { void *umfMemoryProviderGetPriv(umf_memory_provider_handle_t hProvider); umf_memory_provider_handle_t *umfGetLastFailedMemoryProviderPtr(void); -bool umfIsFreeOpDefault(umf_memory_provider_handle_t hProvider); #ifdef __cplusplus } diff --git a/src/provider/provider_coarse.c b/src/provider/provider_coarse.c index c3027b91d..72985faaf 100644 --- a/src/provider/provider_coarse.c +++ b/src/provider/provider_coarse.c @@ -59,10 +59,6 @@ typedef struct coarse_memory_provider_t { // "coarse ()" // for example: "coarse (L0)" char *name; - - // Set to true if the free() operation of the upstream memory provider is not supported - // (i.e. if (umfMemoryProviderFree(upstream_memory_provider, NULL, 0) == UMF_RESULT_ERROR_NOT_SUPPORTED) - bool disable_upstream_provider_free; } coarse_memory_provider_t; typedef struct ravl_node ravl_node_t; @@ -918,13 +914,6 @@ static umf_result_t coarse_memory_provider_initialize(void *params, coarse_provider->allocation_strategy = coarse_params->allocation_strategy; coarse_provider->init_buffer = coarse_params->init_buffer; - if (coarse_provider->upstream_memory_provider) { - coarse_provider->disable_upstream_provider_free = - umfIsFreeOpDefault(coarse_provider->upstream_memory_provider); - } else { - coarse_provider->disable_upstream_provider_free = false; - } - umf_result_t umf_result = coarse_memory_provider_set_name(coarse_provider); if (umf_result != UMF_RESULT_SUCCESS) { LOG_ERR("name initialization failed"); @@ -1027,8 +1016,7 @@ static void coarse_ravl_cb_rm_upstream_blocks_node(void *data, void *arg) { block_t *alloc = node_data->value; assert(alloc); - if (coarse_provider->upstream_memory_provider && - !coarse_provider->disable_upstream_provider_free) { + if (coarse_provider->upstream_memory_provider) { // We continue to deallocate alloc blocks even if the upstream provider doesn't return success. umfMemoryProviderFree(coarse_provider->upstream_memory_provider, alloc->data, alloc->size); @@ -1288,10 +1276,8 @@ static umf_result_t coarse_memory_provider_alloc(void *provider, size_t size, umf_result = coarse_add_upstream_block(coarse_provider, *resultPtr, size); if (umf_result != UMF_RESULT_SUCCESS) { - if (!coarse_provider->disable_upstream_provider_free) { - umfMemoryProviderFree(coarse_provider->upstream_memory_provider, - *resultPtr, size); - } + umfMemoryProviderFree(coarse_provider->upstream_memory_provider, + *resultPtr, size); goto err_unlock; } @@ -1657,12 +1643,12 @@ umf_memory_provider_ops_t UMF_COARSE_MEMORY_PROVIDER_OPS = { .initialize = coarse_memory_provider_initialize, .finalize = coarse_memory_provider_finalize, .alloc = coarse_memory_provider_alloc, + .free = coarse_memory_provider_free, .get_last_native_error = coarse_memory_provider_get_last_native_error, .get_recommended_page_size = coarse_memory_provider_get_recommended_page_size, .get_min_page_size = coarse_memory_provider_get_min_page_size, .get_name = coarse_memory_provider_get_name, - .ext.free = coarse_memory_provider_free, .ext.purge_lazy = coarse_memory_provider_purge_lazy, .ext.purge_force = coarse_memory_provider_purge_force, .ext.allocation_merge = coarse_memory_provider_allocation_merge, diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index baccbd023..f46e04972 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -599,11 +599,11 @@ static struct umf_memory_provider_ops_t UMF_CUDA_MEMORY_PROVIDER_OPS = { .initialize = cu_memory_provider_initialize, .finalize = cu_memory_provider_finalize, .alloc = cu_memory_provider_alloc, + .free = cu_memory_provider_free, .get_last_native_error = cu_memory_provider_get_last_native_error, .get_recommended_page_size = cu_memory_provider_get_recommended_page_size, .get_min_page_size = cu_memory_provider_get_min_page_size, .get_name = cu_memory_provider_get_name, - .ext.free = cu_memory_provider_free, // TODO /* .ext.purge_lazy = cu_memory_provider_purge_lazy, diff --git a/src/provider/provider_devdax_memory.c b/src/provider/provider_devdax_memory.c index 463b796ec..79a066275 100644 --- a/src/provider/provider_devdax_memory.c +++ b/src/provider/provider_devdax_memory.c @@ -527,11 +527,11 @@ static umf_memory_provider_ops_t UMF_DEVDAX_MEMORY_PROVIDER_OPS = { .initialize = devdax_initialize, .finalize = devdax_finalize, .alloc = devdax_alloc, + .free = devdax_free, .get_last_native_error = devdax_get_last_native_error, .get_recommended_page_size = devdax_get_recommended_page_size, .get_min_page_size = devdax_get_min_page_size, .get_name = devdax_get_name, - .ext.free = devdax_free, .ext.purge_lazy = devdax_purge_lazy, .ext.purge_force = devdax_purge_force, .ext.allocation_merge = devdax_allocation_merge, diff --git a/src/provider/provider_file_memory.c b/src/provider/provider_file_memory.c index 558b1062a..edf733180 100644 --- a/src/provider/provider_file_memory.c +++ b/src/provider/provider_file_memory.c @@ -825,11 +825,11 @@ static umf_memory_provider_ops_t UMF_FILE_MEMORY_PROVIDER_OPS = { .initialize = file_initialize, .finalize = file_finalize, .alloc = file_alloc, + .free = file_free, .get_last_native_error = file_get_last_native_error, .get_recommended_page_size = file_get_recommended_page_size, .get_min_page_size = file_get_min_page_size, .get_name = file_get_name, - .ext.free = file_free, .ext.purge_lazy = file_purge_lazy, .ext.purge_force = file_purge_force, .ext.allocation_merge = file_allocation_merge, diff --git a/src/provider/provider_level_zero.c b/src/provider/provider_level_zero.c index f4a3e97c2..70f0acfe5 100644 --- a/src/provider/provider_level_zero.c +++ b/src/provider/provider_level_zero.c @@ -682,11 +682,11 @@ static struct umf_memory_provider_ops_t UMF_LEVEL_ZERO_MEMORY_PROVIDER_OPS = { .initialize = ze_memory_provider_initialize, .finalize = ze_memory_provider_finalize, .alloc = ze_memory_provider_alloc, + .free = ze_memory_provider_free, .get_last_native_error = ze_memory_provider_get_last_native_error, .get_recommended_page_size = ze_memory_provider_get_recommended_page_size, .get_min_page_size = ze_memory_provider_get_min_page_size, .get_name = ze_memory_provider_get_name, - .ext.free = ze_memory_provider_free, .ext.purge_lazy = ze_memory_provider_purge_lazy, .ext.purge_force = ze_memory_provider_purge_force, .ext.allocation_merge = ze_memory_provider_allocation_merge, diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index 4c19944a9..2cc8e9827 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -1408,11 +1408,11 @@ static umf_memory_provider_ops_t UMF_OS_MEMORY_PROVIDER_OPS = { .initialize = os_initialize, .finalize = os_finalize, .alloc = os_alloc, + .free = os_free, .get_last_native_error = os_get_last_native_error, .get_recommended_page_size = os_get_recommended_page_size, .get_min_page_size = os_get_min_page_size, .get_name = os_get_name, - .ext.free = os_free, .ext.purge_lazy = os_purge_lazy, .ext.purge_force = os_purge_force, .ext.allocation_merge = os_allocation_merge, diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index e726feefb..c4fff4133 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -154,9 +154,6 @@ typedef struct umf_tracking_memory_provider_t { umf_memory_pool_handle_t pool; critnib *ipcCache; ipc_mapped_handle_cache_handle_t hIpcMappedCache; - - // the upstream provider does not support the free() operation - bool upstreamDoesNotFree; } umf_tracking_memory_provider_t; typedef struct umf_tracking_memory_provider_t umf_tracking_memory_provider_t; @@ -422,8 +419,7 @@ static umf_result_t trackingInitialize(void *params, void **ret) { // TODO clearing the tracker is a temporary solution and should be removed. // The tracker should be cleared using the provider's free() operation. static void clear_tracker_for_the_pool(umf_memory_tracker_handle_t hTracker, - umf_memory_pool_handle_t pool, - bool upstreamDoesNotFree) { + umf_memory_pool_handle_t pool) { uintptr_t rkey; void *rvalue; size_t n_items = 0; @@ -448,7 +444,7 @@ static void clear_tracker_for_the_pool(umf_memory_tracker_handle_t hTracker, #ifndef NDEBUG // print error messages only if provider supports the free() operation - if (n_items && !upstreamDoesNotFree) { + if (n_items) { if (pool) { LOG_ERR( "tracking provider of pool %p is not empty! (%zu items left)", @@ -459,13 +455,12 @@ static void clear_tracker_for_the_pool(umf_memory_tracker_handle_t hTracker, } } #else /* DEBUG */ - (void)upstreamDoesNotFree; // unused in DEBUG build - (void)n_items; // unused in DEBUG build + (void)n_items; // unused in DEBUG build #endif /* DEBUG */ } static void clear_tracker(umf_memory_tracker_handle_t hTracker) { - clear_tracker_for_the_pool(hTracker, NULL, false); + clear_tracker_for_the_pool(hTracker, NULL); } static void trackingFinalize(void *provider) { @@ -480,8 +475,7 @@ static void trackingFinalize(void *provider) { // because it may need those resources till // the very end of exiting the application. if (!utils_is_running_in_proxy_lib()) { - clear_tracker_for_the_pool(p->hTracker, p->pool, - p->upstreamDoesNotFree); + clear_tracker_for_the_pool(p->hTracker, p->pool); } umf_ba_global_free(provider); @@ -760,11 +754,11 @@ umf_memory_provider_ops_t UMF_TRACKING_MEMORY_PROVIDER_OPS = { .initialize = trackingInitialize, .finalize = trackingFinalize, .alloc = trackingAlloc, + .free = trackingFree, .get_last_native_error = trackingGetLastError, .get_min_page_size = trackingGetMinPageSize, .get_recommended_page_size = trackingGetRecommendedPageSize, .get_name = trackingName, - .ext.free = trackingFree, .ext.purge_force = trackingPurgeForce, .ext.purge_lazy = trackingPurgeLazy, .ext.allocation_split = trackingAllocationSplit, @@ -777,11 +771,10 @@ umf_memory_provider_ops_t UMF_TRACKING_MEMORY_PROVIDER_OPS = { umf_result_t umfTrackingMemoryProviderCreate( umf_memory_provider_handle_t hUpstream, umf_memory_pool_handle_t hPool, - umf_memory_provider_handle_t *hTrackingProvider, bool upstreamDoesNotFree) { + umf_memory_provider_handle_t *hTrackingProvider) { umf_tracking_memory_provider_t params; params.hUpstream = hUpstream; - params.upstreamDoesNotFree = upstreamDoesNotFree; params.hTracker = TRACKER; if (!params.hTracker) { LOG_ERR("failed, TRACKER is NULL"); diff --git a/src/provider/provider_tracking.h b/src/provider/provider_tracking.h index 9444ee475..2abc36505 100644 --- a/src/provider/provider_tracking.h +++ b/src/provider/provider_tracking.h @@ -54,7 +54,7 @@ umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, // forwards all requests to hUpstream memory Provider. hUpstream lifetime should be managed by the user of this function. umf_result_t umfTrackingMemoryProviderCreate( umf_memory_provider_handle_t hUpstream, umf_memory_pool_handle_t hPool, - umf_memory_provider_handle_t *hTrackingProvider, bool upstreamDoesNotFree); + umf_memory_provider_handle_t *hTrackingProvider); void umfTrackingMemoryProviderGetUpstreamProvider( umf_memory_provider_handle_t hTrackingProvider, diff --git a/test/common/provider_null.c b/test/common/provider_null.c index 5db389e89..e667bfce4 100644 --- a/test/common/provider_null.c +++ b/test/common/provider_null.c @@ -134,11 +134,11 @@ umf_memory_provider_ops_t UMF_NULL_PROVIDER_OPS = { .initialize = nullInitialize, .finalize = nullFinalize, .alloc = nullAlloc, + .free = nullFree, .get_last_native_error = nullGetLastError, .get_recommended_page_size = nullGetRecommendedPageSize, .get_min_page_size = nullGetPageSize, .get_name = nullName, - .ext.free = nullFree, .ext.purge_lazy = nullPurgeLazy, .ext.purge_force = nullPurgeForce, .ext.allocation_merge = nullAllocationMerge, diff --git a/test/common/provider_trace.c b/test/common/provider_trace.c index 219dde5cd..9d063b4f5 100644 --- a/test/common/provider_trace.c +++ b/test/common/provider_trace.c @@ -195,11 +195,11 @@ umf_memory_provider_ops_t UMF_TRACE_PROVIDER_OPS = { .initialize = traceInitialize, .finalize = traceFinalize, .alloc = traceAlloc, + .free = traceFree, .get_last_native_error = traceGetLastError, .get_recommended_page_size = traceGetRecommendedPageSize, .get_min_page_size = traceGetPageSize, .get_name = traceName, - .ext.free = traceFree, .ext.purge_lazy = tracePurgeLazy, .ext.purge_force = tracePurgeForce, .ext.allocation_merge = traceAllocationMerge, diff --git a/test/memoryProviderAPI.cpp b/test/memoryProviderAPI.cpp index 866ae6dae..2dc7261f0 100644 --- a/test/memoryProviderAPI.cpp +++ b/test/memoryProviderAPI.cpp @@ -89,19 +89,6 @@ TEST_F(test, memoryProviderTrace) { ASSERT_EQ(calls.size(), ++call_count); } -TEST_F(test, memoryProviderOpsNullFreeField) { - umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; - provider_ops.ext.free = nullptr; - umf_memory_provider_handle_t hProvider; - auto ret = umfMemoryProviderCreate(&provider_ops, nullptr, &hProvider); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - - ret = umfMemoryProviderFree(hProvider, nullptr, 0); - ASSERT_EQ(ret, UMF_RESULT_ERROR_NOT_SUPPORTED); - - umfMemoryProviderDestroy(hProvider); -} - TEST_F(test, memoryProviderOpsNullPurgeLazyField) { umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; provider_ops.ext.purge_lazy = nullptr; @@ -204,6 +191,14 @@ TEST_F(test, memoryProviderOpsNullAllocField) { ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); } +TEST_F(test, memoryProviderOpsNullFreeField) { + umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; + provider_ops.free = nullptr; + umf_memory_provider_handle_t hProvider; + auto ret = umfMemoryProviderCreate(&provider_ops, nullptr, &hProvider); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + TEST_F(test, memoryProviderOpsNullGetLastNativeErrorField) { umf_memory_provider_ops_t provider_ops = UMF_NULL_PROVIDER_OPS; provider_ops.get_last_native_error = nullptr; From 35332da8007882d97e3aeda8d58a4f9ac44c2fea Mon Sep 17 00:00:00 2001 From: "Vinogradov, Sergei" Date: Thu, 5 Dec 2024 11:29:15 +0100 Subject: [PATCH 022/466] Minor fixes to clean up resources in tests --- test/pools/disjoint_pool.cpp | 4 ++++ test/provider_os_memory.cpp | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index 319997c82..471e53dc2 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -29,20 +29,24 @@ disjoint_params_unique_handle_t poolConfig() { res = umfDisjointPoolParamsSetSlabMinSize(config, DEFAULT_DISJOINT_SLAB_MIN_SIZE); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); throw std::runtime_error("Failed to set slab min size"); } res = umfDisjointPoolParamsSetMaxPoolableSize( config, DEFAULT_DISJOINT_MAX_POOLABLE_SIZE); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); throw std::runtime_error("Failed to set max poolable size"); } res = umfDisjointPoolParamsSetCapacity(config, DEFAULT_DISJOINT_CAPACITY); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); throw std::runtime_error("Failed to set capacity"); } res = umfDisjointPoolParamsSetMinBucketSize( config, DEFAULT_DISJOINT_MIN_BUCKET_SIZE); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); throw std::runtime_error("Failed to set min bucket size"); } diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index 687db0805..4c81b84f9 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -441,18 +441,22 @@ disjoint_params_unique_handle_t disjointPoolParams() { } res = umfDisjointPoolParamsSetSlabMinSize(params, 4096); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(params); throw std::runtime_error("Failed to set slab min size"); } res = umfDisjointPoolParamsSetMaxPoolableSize(params, 4096); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(params); throw std::runtime_error("Failed to set max poolable size"); } res = umfDisjointPoolParamsSetCapacity(params, 4); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(params); throw std::runtime_error("Failed to set capacity"); } res = umfDisjointPoolParamsSetMinBucketSize(params, 64); if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(params); throw std::runtime_error("Failed to set min bucket size"); } From f311aee1c39c3717b739ec2bc8a5a508d7c5b437 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 5 Dec 2024 14:55:29 +0100 Subject: [PATCH 023/466] Enable all IPC tests Signed-off-by: Lukasz Dorau --- .github/workflows/reusable_dax.yml | 8 ++------ .github/workflows/reusable_proxy_lib.yml | 8 ++------ 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/.github/workflows/reusable_dax.yml b/.github/workflows/reusable_dax.yml index f7c5d0d21..af15226d2 100644 --- a/.github/workflows/reusable_dax.yml +++ b/.github/workflows/reusable_dax.yml @@ -106,8 +106,6 @@ jobs: UMF_TESTS_FSDAX_PATH_2=${{env.UMF_TESTS_FSDAX_PATH_2}} ctest -C ${{matrix.build_type}} -V -R "file|fsdax" - # TODO: enable the provider_devdax_memory_ipc test when the IPC tests with the proxy library are fixed - # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 - name: Run the DEVDAX tests with the proxy library # proxy library is built only if libumf is a shared library if: ${{ matrix.shared_library == 'ON' }} @@ -116,10 +114,8 @@ jobs: LD_PRELOAD=./lib/libumf_proxy.so UMF_TESTS_DEVDAX_PATH="/dev/dax${{env.DEVDAX_NAMESPACE}}" UMF_TESTS_DEVDAX_SIZE="$(ndctl list --namespace=namespace${{env.DEVDAX_NAMESPACE}} | grep size | cut -d':' -f2 | cut -d',' -f1)" - ctest -C ${{matrix.build_type}} -V -R devdax -E provider_devdax_memory_ipc + ctest -C ${{matrix.build_type}} -V -R devdax - # TODO: enable the provider_file_memory_ipc test when the IPC tests with the proxy library are fixed - # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 - name: Run the FSDAX tests with the proxy library # proxy library is built only if libumf is a shared library if: ${{ matrix.shared_library == 'ON' }} @@ -128,7 +124,7 @@ jobs: LD_PRELOAD=./lib/libumf_proxy.so UMF_TESTS_FSDAX_PATH=${{env.UMF_TESTS_FSDAX_PATH}} UMF_TESTS_FSDAX_PATH_2=${{env.UMF_TESTS_FSDAX_PATH_2}} - ctest -C ${{matrix.build_type}} -V -R "file|fsdax" -E provider_file_memory_ipc + ctest -C ${{matrix.build_type}} -V -R "file|fsdax" - name: Check coverage if: ${{ matrix.build_type == 'Debug' }} diff --git a/.github/workflows/reusable_proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml index 103c4a516..e73dabe29 100644 --- a/.github/workflows/reusable_proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -59,11 +59,9 @@ jobs: - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) - # TODO enable the provider_file_memory_ipc test when the IPC tests with the proxy library are fixed - # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 - name: Run "ctest --output-on-failure" with proxy library working-directory: ${{env.BUILD_DIR}} - run: LD_PRELOAD=./lib/libumf_proxy.so ctest --output-on-failure -E provider_file_memory_ipc + run: LD_PRELOAD=./lib/libumf_proxy.so ctest --output-on-failure - name: Run "./test/umf_test-memoryPool" with proxy library working-directory: ${{env.BUILD_DIR}} @@ -77,14 +75,12 @@ jobs: working-directory: ${{env.BUILD_DIR}} run: UMF_PROXY="page.disposition=shared-shm" LD_PRELOAD=./lib/libumf_proxy.so /usr/bin/date - # TODO enable the provider_file_memory_ipc test when the IPC tests with the proxy library are fixed - # see the issue: https://github.com/oneapi-src/unified-memory-framework/issues/864 - name: Run "ctest --output-on-failure" with proxy library and size.threshold=128 working-directory: ${{env.BUILD_DIR}} run: > UMF_PROXY="page.disposition=shared-shm;size.threshold=128" LD_PRELOAD=./lib/libumf_proxy.so - ctest --output-on-failure -E provider_file_memory_ipc + ctest --output-on-failure - name: Check coverage if: ${{ matrix.build_type == 'Debug' }} From c173e56c494f9091f2f2d9450bfe2df7afd14cbc Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 5 Dec 2024 15:39:46 +0100 Subject: [PATCH 024/466] Fix and add error messages Fix two error messages and add some new ones. Signed-off-by: Lukasz Dorau --- src/provider/provider_file_memory.c | 37 ++++++++++++++++++++++------- src/provider/provider_os_memory.c | 4 +--- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/provider/provider_file_memory.c b/src/provider/provider_file_memory.c index 558b1062a..14e3b6cb0 100644 --- a/src/provider/provider_file_memory.c +++ b/src/provider/provider_file_memory.c @@ -485,6 +485,8 @@ static umf_result_t file_alloc_cb(void *provider, size_t size, size_t alignment, file_memory_provider_t *file_provider = (file_memory_provider_t *)provider; + *resultPtr = NULL; + // alignment must be a power of two and a multiple or a divider of the page size if (alignment && ((alignment & (alignment - 1)) || ((alignment % file_provider->page_size) && @@ -517,8 +519,15 @@ static umf_result_t file_alloc_cb(void *provider, size_t size, size_t alignment, LOG_ERR("inserting a value to the file descriptor offset map failed " "(addr=%p, offset=%zu)", addr, alloc_offset_fd); + // We cannot undo the file_alloc_aligned() call here, + // because the file memory provider does not support the free operation. + return UMF_RESULT_ERROR_UNKNOWN; } + LOG_DEBUG("inserted a value to the file descriptor offset map (addr=%p, " + "offset=%zu)", + addr, alloc_offset_fd); + *resultPtr = addr; return UMF_RESULT_SUCCESS; @@ -623,23 +632,31 @@ static umf_result_t file_allocation_split_cb(void *provider, void *ptr, void *value = critnib_get(file_provider->fd_offset_map, (uintptr_t)ptr); if (value == NULL) { - LOG_ERR("file_allocation_split(): getting a value from the file " - "descriptor offset map failed (addr=%p)", + LOG_ERR("getting a value from the file descriptor offset map failed " + "(addr=%p)", ptr); return UMF_RESULT_ERROR_UNKNOWN; } + LOG_DEBUG("split the value from the file descriptor offset map (addr=%p) " + "from size %zu to %zu + %zu", + ptr, totalSize, firstSize, totalSize - firstSize); + uintptr_t new_key = (uintptr_t)ptr + firstSize; void *new_value = (void *)((uintptr_t)value + firstSize); int ret = critnib_insert(file_provider->fd_offset_map, new_key, new_value, 0 /* update */); if (ret) { - LOG_ERR("file_allocation_split(): inserting a value to the file " - "descriptor offset map failed (addr=%p, offset=%zu)", + LOG_ERR("inserting a value to the file descriptor offset map failed " + "(addr=%p, offset=%zu)", (void *)new_key, (size_t)new_value - 1); return UMF_RESULT_ERROR_UNKNOWN; } + LOG_DEBUG("inserted a value to the file descriptor offset map (addr=%p, " + "offset=%zu)", + (void *)new_key, (size_t)new_value - 1); + return UMF_RESULT_SUCCESS; } @@ -662,12 +679,16 @@ static umf_result_t file_allocation_merge_cb(void *provider, void *lowPtr, void *value = critnib_remove(file_provider->fd_offset_map, (uintptr_t)highPtr); if (value == NULL) { - LOG_ERR("file_allocation_merge(): removing a value from the file " - "descriptor offset map failed (addr=%p)", + LOG_ERR("removing a value from the file descriptor offset map failed " + "(addr=%p)", highPtr); return UMF_RESULT_ERROR_UNKNOWN; } + LOG_DEBUG("removed a value from the file descriptor offset map (addr=%p) - " + "merged with %p", + highPtr, lowPtr); + return UMF_RESULT_SUCCESS; } @@ -701,9 +722,7 @@ static umf_result_t file_get_ipc_handle(void *provider, const void *ptr, void *value = critnib_get(file_provider->fd_offset_map, (uintptr_t)ptr); if (value == NULL) { - LOG_ERR("file_get_ipc_handle(): getting a value from the IPC cache " - "failed (addr=%p)", - ptr); + LOG_ERR("getting a value from the IPC cache failed (addr=%p)", ptr); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index 4c19944a9..04054c20c 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -1286,9 +1286,7 @@ static umf_result_t os_get_ipc_handle(void *provider, const void *ptr, void *value = critnib_get(os_provider->fd_offset_map, (uintptr_t)ptr); if (value == NULL) { - LOG_ERR("os_get_ipc_handle(): getting a value from the IPC cache " - "failed (addr=%p)", - ptr); + LOG_ERR("getting a value from the IPC cache failed (addr=%p)", ptr); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } From c6749f2b7a92f0d11d7b831d984249e5d3d4d786 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 29 Nov 2024 11:54:44 +0100 Subject: [PATCH 025/466] Remove the Coarse provider Fixes: #900 Signed-off-by: Lukasz Dorau --- include/umf/providers/provider_coarse.h | 116 -- src/CMakeLists.txt | 1 - src/libumf.def | 2 - src/libumf.map | 2 - src/provider/provider_coarse.c | 1693 ----------------------- test/CMakeLists.txt | 15 +- test/disjointCoarseMallocPool.cpp | 317 +---- test/memoryPoolAPI.cpp | 8 +- test/poolFixtures.hpp | 25 +- test/pools/disjoint_pool.cpp | 19 +- test/pools/jemalloc_coarse_devdax.cpp | 10 +- test/pools/jemalloc_coarse_file.cpp | 4 +- test/pools/jemalloc_pool.cpp | 8 +- test/pools/pool_base_alloc.cpp | 2 +- test/pools/pool_coarse.hpp | 2 - test/pools/scalable_coarse_devdax.cpp | 10 +- test/pools/scalable_coarse_file.cpp | 4 +- test/pools/scalable_pool.cpp | 3 +- test/provider_coarse.cpp | 668 --------- 19 files changed, 86 insertions(+), 2823 deletions(-) delete mode 100644 include/umf/providers/provider_coarse.h delete mode 100644 src/provider/provider_coarse.c delete mode 100644 test/provider_coarse.cpp diff --git a/include/umf/providers/provider_coarse.h b/include/umf/providers/provider_coarse.h deleted file mode 100644 index 6ed6e0fbc..000000000 --- a/include/umf/providers/provider_coarse.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (C) 2023-2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ - -#ifndef UMF_COARSE_PROVIDER_H -#define UMF_COARSE_PROVIDER_H - -#include -#include - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/// @brief Coarse Memory Provider allocation strategy -typedef enum coarse_memory_provider_strategy_t { - /// Always allocate a free block of the (size + alignment) size - /// and cut out the properly aligned part leaving two remaining parts. - /// It is the fastest strategy but causes memory fragmentation - /// when alignment is greater than 0. - /// It is the best strategy when alignment always equals 0. - UMF_COARSE_MEMORY_STRATEGY_FASTEST = 0, - - /// Check if the first free block of the 'size' size has the correct alignment. - /// If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. - UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, - - /// Look through all free blocks of the 'size' size - /// and choose the first one with the correct alignment. - /// If none of them had the correct alignment, - /// use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. - UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE, - - /// The maximum value (it has to be the last one). - UMF_COARSE_MEMORY_STRATEGY_MAX -} coarse_memory_provider_strategy_t; - -/// @brief Coarse Memory Provider settings struct. -typedef struct coarse_memory_provider_params_t { - /// Handle to the upstream memory provider. - /// It has to be NULL if init_buffer is set - /// (exactly one of them has to be non-NULL). - umf_memory_provider_handle_t upstream_memory_provider; - - /// Memory allocation strategy. - /// See coarse_memory_provider_strategy_t for details. - coarse_memory_provider_strategy_t allocation_strategy; - - /// A pre-allocated buffer that will be the only memory that - /// the coarse provider can provide (the fixed-size memory provider option). - /// If it is non-NULL, `init_buffer_size ` has to contain its size. - /// It has to be NULL if upstream_memory_provider is set - /// (exactly one of them has to be non-NULL). - void *init_buffer; - - /// Size of the initial buffer: - /// 1) `init_buffer` if it is non-NULL xor - /// 2) that will be allocated from the upstream_memory_provider - /// (if it is non-NULL) in the `.initialize` operation. - size_t init_buffer_size; - - /// When it is true and the upstream_memory_provider is given, - /// the init buffer (of `init_buffer_size` bytes) would be pre-allocated - /// during creation time using the `upstream_memory_provider`. - /// If upstream_memory_provider is not given, - /// the init_buffer is always used instead - /// (regardless of the value of this parameter). - bool immediate_init_from_upstream; - - /// Destroy upstream_memory_provider in finalize(). - bool destroy_upstream_memory_provider; -} coarse_memory_provider_params_t; - -/// @brief Coarse Memory Provider stats (TODO move to CTL) -typedef struct coarse_memory_provider_stats_t { - /// Total allocation size. - size_t alloc_size; - - /// Size of used memory. - size_t used_size; - - /// Number of memory blocks allocated from the upstream provider. - size_t num_upstream_blocks; - - /// Total number of allocated memory blocks. - size_t num_all_blocks; - - /// Number of free memory blocks. - size_t num_free_blocks; -} coarse_memory_provider_stats_t; - -umf_memory_provider_ops_t *umfCoarseMemoryProviderOps(void); - -// TODO use CTL -coarse_memory_provider_stats_t -umfCoarseMemoryProviderGetStats(umf_memory_provider_handle_t provider); - -/// @brief Create default params for the coarse memory provider -static inline coarse_memory_provider_params_t -umfCoarseMemoryProviderParamsDefault(void) { - coarse_memory_provider_params_t coarse_memory_provider_params; - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - return coarse_memory_provider_params; -} - -#ifdef __cplusplus -} -#endif - -#endif // UMF_COARSE_PROVIDER_H diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8b1e2248a..fb32b6d2e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -59,7 +59,6 @@ set(UMF_SOURCES memspaces/memspace_highest_bandwidth.c memspaces/memspace_lowest_latency.c memspaces/memspace_numa.c - provider/provider_coarse.c provider/provider_cuda.c provider/provider_devdax_memory.c provider/provider_file_memory.c diff --git a/src/libumf.def b/src/libumf.def index f2b24be6c..c0cd1c90c 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -14,8 +14,6 @@ EXPORTS umfTearDown umfGetCurrentVersion umfCloseIPCHandle - umfCoarseMemoryProviderGetStats - umfCoarseMemoryProviderOps umfCUDAMemoryProviderOps umfCUDAMemoryProviderParamsCreate umfCUDAMemoryProviderParamsDestroy diff --git a/src/libumf.map b/src/libumf.map index 067ec8838..8a7bdc81c 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -8,8 +8,6 @@ UMF_1.0 { umfTearDown; umfGetCurrentVersion; umfCloseIPCHandle; - umfCoarseMemoryProviderGetStats; - umfCoarseMemoryProviderOps; umfCUDAMemoryProviderOps; umfCUDAMemoryProviderParamsCreate; umfCUDAMemoryProviderParamsDestroy; diff --git a/src/provider/provider_coarse.c b/src/provider/provider_coarse.c deleted file mode 100644 index 72985faaf..000000000 --- a/src/provider/provider_coarse.c +++ /dev/null @@ -1,1693 +0,0 @@ -/* - * Copyright (C) 2023-2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ - -#include -#include -#include -#include -#include -#include - -#include - -#include "base_alloc_global.h" -#include "memory_provider_internal.h" -#include "ravl.h" -#include "utils_common.h" -#include "utils_concurrency.h" -#include "utils_log.h" - -#define COARSE_BASE_NAME "coarse" - -#define IS_ORIGIN_OF_BLOCK(origin, block) \ - (((uintptr_t)(block)->data >= (uintptr_t)(origin)->data) && \ - ((uintptr_t)(block)->data + (block)->size <= \ - (uintptr_t)(origin)->data + (origin)->size)) - -typedef struct coarse_memory_provider_t { - umf_memory_provider_handle_t upstream_memory_provider; - - // destroy upstream_memory_provider in finalize() - bool destroy_upstream_memory_provider; - - // memory allocation strategy - coarse_memory_provider_strategy_t allocation_strategy; - - void *init_buffer; - - size_t used_size; - size_t alloc_size; - - // upstream_blocks - tree of all blocks allocated from the upstream provider - struct ravl *upstream_blocks; - - // all_blocks - tree of all blocks - sorted by an address of data - struct ravl *all_blocks; - - // free_blocks - tree of free blocks - sorted by a size of data, - // each node contains a pointer (ravl_free_blocks_head_t) - // to the head of the list of free blocks of the same size - struct ravl *free_blocks; - - struct utils_mutex_t lock; - - // Name of the provider with the upstream provider: - // "coarse ()" - // for example: "coarse (L0)" - char *name; -} coarse_memory_provider_t; - -typedef struct ravl_node ravl_node_t; - -typedef enum check_free_blocks_t { - CHECK_ONLY_THE_FIRST_BLOCK = 0, - CHECK_ALL_BLOCKS_OF_SIZE, -} check_free_blocks_t; - -typedef struct block_t { - size_t size; - unsigned char *data; - bool used; - - // Node in the list of free blocks of the same size pointing to this block. - // The list is located in the (coarse_provider->free_blocks) RAVL tree. - struct ravl_free_blocks_elem_t *free_list_ptr; -} block_t; - -// A general node in a RAVL tree. -// 1) coarse_provider->all_blocks RAVL tree (tree of all blocks - sorted by an address of data): -// key - pointer (block_t->data) to the beginning of the block data -// value - pointer (block_t) to the block of the allocation -// 2) coarse_provider->free_blocks RAVL tree (tree of free blocks - sorted by a size of data): -// key - size of the allocation (block_t->size) -// value - pointer (ravl_free_blocks_head_t) to the head of the list of free blocks of the same size -typedef struct ravl_data_t { - uintptr_t key; - void *value; -} ravl_data_t; - -// The head of the list of free blocks of the same size. -typedef struct ravl_free_blocks_head_t { - struct ravl_free_blocks_elem_t *head; -} ravl_free_blocks_head_t; - -// The node of the list of free blocks of the same size -typedef struct ravl_free_blocks_elem_t { - struct block_t *block; - struct ravl_free_blocks_elem_t *next; - struct ravl_free_blocks_elem_t *prev; -} ravl_free_blocks_elem_t; - -// The compare function of a RAVL tree -static int coarse_ravl_comp(const void *lhs, const void *rhs) { - const ravl_data_t *lhs_ravl = (const ravl_data_t *)lhs; - const ravl_data_t *rhs_ravl = (const ravl_data_t *)rhs; - - if (lhs_ravl->key < rhs_ravl->key) { - return -1; - } - - if (lhs_ravl->key > rhs_ravl->key) { - return 1; - } - - // lhs_ravl->key == rhs_ravl->key - return 0; -} - -static inline block_t *get_node_block(ravl_node_t *node) { - ravl_data_t *node_data = ravl_data(node); - assert(node_data); - assert(node_data->value); - return node_data->value; -} - -static inline ravl_node_t *get_node_prev(ravl_node_t *node) { - return ravl_node_predecessor(node); -} - -static inline ravl_node_t *get_node_next(ravl_node_t *node) { - return ravl_node_successor(node); -} - -#ifndef NDEBUG -static block_t *get_block_prev(ravl_node_t *node) { - ravl_node_t *ravl_prev = ravl_node_predecessor(node); - if (!ravl_prev) { - return NULL; - } - - return get_node_block(ravl_prev); -} - -static block_t *get_block_next(ravl_node_t *node) { - ravl_node_t *ravl_next = ravl_node_successor(node); - if (!ravl_next) { - return NULL; - } - - return get_node_block(ravl_next); -} -#endif /* NDEBUG */ - -static bool is_same_origin(struct ravl *upstream_blocks, block_t *block1, - block_t *block2) { - ravl_data_t rdata1 = {(uintptr_t)block1->data, NULL}; - ravl_node_t *ravl_origin1 = - ravl_find(upstream_blocks, &rdata1, RAVL_PREDICATE_LESS_EQUAL); - assert(ravl_origin1); - - block_t *origin1 = get_node_block(ravl_origin1); - assert(IS_ORIGIN_OF_BLOCK(origin1, block1)); - - return (IS_ORIGIN_OF_BLOCK(origin1, block2)); -} - -// The functions "coarse_ravl_*" handle lists of blocks: -// - coarse_provider->all_blocks and coarse_provider->upstream_blocks -// sorted by a pointer (block_t->data) to the beginning of the block data. -// -// coarse_ravl_add_new - allocate and add a new block to the tree -// and link this block to the next and the previous one. -static block_t *coarse_ravl_add_new(struct ravl *rtree, unsigned char *data, - size_t size, ravl_node_t **node) { - assert(rtree); - assert(data); - assert(size); - - // TODO add valgrind annotations - block_t *block = umf_ba_global_alloc(sizeof(*block)); - if (block == NULL) { - return NULL; - } - - block->data = data; - block->size = size; - block->free_list_ptr = NULL; - - ravl_data_t rdata = {(uintptr_t)block->data, block}; - assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); - int ret = ravl_emplace_copy(rtree, &rdata); - if (ret) { - umf_ba_global_free(block); - return NULL; - } - - ravl_node_t *new_node = ravl_find(rtree, &rdata, RAVL_PREDICATE_EQUAL); - assert(NULL != new_node); - - if (node) { - *node = new_node; - } - - return block; -} - -// coarse_ravl_find_node - find the node in the tree -static ravl_node_t *coarse_ravl_find_node(struct ravl *rtree, void *ptr) { - ravl_data_t data = {(uintptr_t)ptr, NULL}; - return ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); -} - -// coarse_ravl_rm - remove the block from the tree -static block_t *coarse_ravl_rm(struct ravl *rtree, void *ptr) { - ravl_data_t data = {(uintptr_t)ptr, NULL}; - ravl_node_t *node; - node = ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL); - if (node) { - ravl_data_t *node_data = ravl_data(node); - assert(node_data); - block_t *block = node_data->value; - assert(block); - ravl_remove(rtree, node); - assert(NULL == ravl_find(rtree, &data, RAVL_PREDICATE_EQUAL)); - return block; - } - return NULL; -} - -// The functions "node_list_*" handle lists of free blocks of the same size. -// The heads (ravl_free_blocks_head_t) of those lists are stored in nodes of -// the coarse_provider->free_blocks RAVL tree. -// -// node_list_add - add a free block to the list of free blocks of the same size -static ravl_free_blocks_elem_t * -node_list_add(ravl_free_blocks_head_t *head_node, struct block_t *block) { - assert(head_node); - assert(block); - - ravl_free_blocks_elem_t *node = umf_ba_global_alloc(sizeof(*node)); - if (node == NULL) { - return NULL; - } - - if (head_node->head) { - head_node->head->prev = node; - } - - node->block = block; - node->next = head_node->head; - node->prev = NULL; - head_node->head = node; - - return node; -} - -// node_list_rm - remove the given free block from the list of free blocks of the same size -static block_t *node_list_rm(ravl_free_blocks_head_t *head_node, - ravl_free_blocks_elem_t *node) { - assert(head_node); - assert(node); - - if (!head_node->head) { - return NULL; - } - - if (node == head_node->head) { - assert(node->prev == NULL); - head_node->head = node->next; - } - - ravl_free_blocks_elem_t *node_next = node->next; - ravl_free_blocks_elem_t *node_prev = node->prev; - if (node_next) { - node_next->prev = node_prev; - } - - if (node_prev) { - node_prev->next = node_next; - } - - struct block_t *block = node->block; - block->free_list_ptr = NULL; - umf_ba_global_free(node); - - return block; -} - -// node_list_rm_first - remove the first free block from the list of free blocks of the same size only if it can be properly aligned -static block_t *node_list_rm_first(ravl_free_blocks_head_t *head_node, - size_t alignment) { - assert(head_node); - - if (!head_node->head) { - return NULL; - } - - ravl_free_blocks_elem_t *node = head_node->head; - assert(node->prev == NULL); - struct block_t *block = node->block; - - if (IS_NOT_ALIGNED(block->size, alignment)) { - return NULL; - } - - if (node->next) { - node->next->prev = NULL; - } - - head_node->head = node->next; - block->free_list_ptr = NULL; - umf_ba_global_free(node); - - return block; -} - -// node_list_rm_with_alignment - remove the first free block with the correct alignment from the list of free blocks of the same size -static block_t *node_list_rm_with_alignment(ravl_free_blocks_head_t *head_node, - size_t alignment) { - assert(head_node); - - if (!head_node->head) { - return NULL; - } - - assert(((ravl_free_blocks_elem_t *)head_node->head)->prev == NULL); - - ravl_free_blocks_elem_t *node; - for (node = head_node->head; node != NULL; node = node->next) { - if (IS_ALIGNED(node->block->size, alignment)) { - return node_list_rm(head_node, node); - } - } - - return NULL; -} - -// The functions "free_blocks_*" handle the coarse_provider->free_blocks RAVL tree -// sorted by a size of the allocation (block_t->size). -// This is a tree of heads (ravl_free_blocks_head_t) of lists of free blocks of the same size. -// -// free_blocks_add - add a free block to the list of free blocks of the same size -static int free_blocks_add(struct ravl *free_blocks, block_t *block) { - ravl_free_blocks_head_t *head_node = NULL; - int rv; - - ravl_data_t head_node_data = {(uintptr_t)block->size, NULL}; - ravl_node_t *node; - node = ravl_find(free_blocks, &head_node_data, RAVL_PREDICATE_EQUAL); - if (node) { - ravl_data_t *node_data = ravl_data(node); - assert(node_data); - head_node = node_data->value; - assert(head_node); - } else { // no head_node - head_node = umf_ba_global_alloc(sizeof(*head_node)); - if (!head_node) { - return -1; - } - - head_node->head = NULL; - - ravl_data_t data = {(uintptr_t)block->size, head_node}; - rv = ravl_emplace_copy(free_blocks, &data); - if (rv) { - umf_ba_global_free(head_node); - return -1; - } - } - - block->free_list_ptr = node_list_add(head_node, block); - if (!block->free_list_ptr) { - return -1; - } - - assert(block->free_list_ptr->block->size == block->size); - - return 0; -} - -// free_blocks_rm_ge - remove the first free block of a size greater or equal to the given size only if it can be properly aligned -// If it was the last block, the head node is freed and removed from the tree. -// It is used during memory allocation (looking for a free block). -static block_t *free_blocks_rm_ge(struct ravl *free_blocks, size_t size, - size_t alignment, - check_free_blocks_t check_blocks) { - ravl_data_t data = {(uintptr_t)size, NULL}; - ravl_node_t *node; - node = ravl_find(free_blocks, &data, RAVL_PREDICATE_GREATER_EQUAL); - if (!node) { - return NULL; - } - - ravl_data_t *node_data = ravl_data(node); - assert(node_data); - assert(node_data->key >= size); - - ravl_free_blocks_head_t *head_node = node_data->value; - assert(head_node); - - block_t *block; - switch (check_blocks) { - case CHECK_ONLY_THE_FIRST_BLOCK: - block = node_list_rm_first(head_node, alignment); - break; - case CHECK_ALL_BLOCKS_OF_SIZE: - block = node_list_rm_with_alignment(head_node, alignment); - break; - // wrong value of check_blocks - default: - abort(); - } - - if (head_node->head == NULL) { - umf_ba_global_free(head_node); - ravl_remove(free_blocks, node); - } - - return block; -} - -// free_blocks_rm_node - remove the free block pointed by the given node. -// If it was the last block, the head node is freed and removed from the tree. -// It is used during merging free blocks and destroying the coarse_provider->free_blocks tree. -static block_t *free_blocks_rm_node(struct ravl *free_blocks, - ravl_free_blocks_elem_t *node) { - assert(free_blocks); - assert(node); - size_t size = node->block->size; - ravl_data_t data = {(uintptr_t)size, NULL}; - ravl_node_t *ravl_node; - ravl_node = ravl_find(free_blocks, &data, RAVL_PREDICATE_EQUAL); - assert(ravl_node); - - ravl_data_t *node_data = ravl_data(ravl_node); - assert(node_data); - assert(node_data->key == size); - - ravl_free_blocks_head_t *head_node = node_data->value; - assert(head_node); - - block_t *block = node_list_rm(head_node, node); - - if (head_node->head == NULL) { - umf_ba_global_free(head_node); - ravl_remove(free_blocks, ravl_node); - } - - return block; -} - -// user_block_merge - merge two blocks from one of two lists of user blocks: all_blocks or free_blocks -static umf_result_t user_block_merge(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node1, ravl_node_t *node2, - bool used, ravl_node_t **merged_node) { - assert(node1); - assert(node2); - assert(node1 == get_node_prev(node2)); - assert(node2 == get_node_next(node1)); - assert(merged_node); - - *merged_node = NULL; - - struct ravl *upstream_blocks = coarse_provider->upstream_blocks; - struct ravl *all_blocks = coarse_provider->all_blocks; - struct ravl *free_blocks = coarse_provider->free_blocks; - - block_t *block1 = get_node_block(node1); - block_t *block2 = get_node_block(node2); - assert(block1->data < block2->data); - - bool same_used = ((block1->used == used) && (block2->used == used)); - bool contignous_data = (block1->data + block1->size == block2->data); - bool same_origin = is_same_origin(upstream_blocks, block1, block2); - - // check if blocks can be merged - if (!same_used || !contignous_data || !same_origin) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (block1->free_list_ptr) { - free_blocks_rm_node(free_blocks, block1->free_list_ptr); - block1->free_list_ptr = NULL; - } - - if (block2->free_list_ptr) { - free_blocks_rm_node(free_blocks, block2->free_list_ptr); - block2->free_list_ptr = NULL; - } - - // update the size - block1->size += block2->size; - - block_t *block_rm = coarse_ravl_rm(all_blocks, block2->data); - assert(block_rm == block2); - (void)block_rm; // WA for unused variable error - umf_ba_global_free(block2); - - *merged_node = node1; - - return UMF_RESULT_SUCCESS; -} - -// free_block_merge_with_prev - merge the given free block -// with the previous one if both are unused and have continuous data. -// Remove the merged block from the tree of free blocks. -static ravl_node_t * -free_block_merge_with_prev(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node) { - ravl_node_t *node_prev = get_node_prev(node); - if (!node_prev) { - return node; - } - - ravl_node_t *merged_node = NULL; - umf_result_t umf_result = - user_block_merge(coarse_provider, node_prev, node, false, &merged_node); - if (umf_result != UMF_RESULT_SUCCESS) { - return node; - } - - assert(merged_node != NULL); - - return merged_node; -} - -// free_block_merge_with_next - merge the given free block -// with the next one if both are unused and have continuous data. -// Remove the merged block from the tree of free blocks. -static ravl_node_t * -free_block_merge_with_next(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node) { - ravl_node_t *node_next = get_node_next(node); - if (!node_next) { - return node; - } - - ravl_node_t *merged_node = NULL; - umf_result_t umf_result = - user_block_merge(coarse_provider, node, node_next, false, &merged_node); - if (umf_result != UMF_RESULT_SUCCESS) { - return node; - } - - assert(merged_node != NULL); - - return merged_node; -} - -// upstream_block_merge - merge the given two upstream blocks -static umf_result_t -upstream_block_merge(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node1, ravl_node_t *node2, - ravl_node_t **merged_node) { - assert(node1); - assert(node2); - assert(merged_node); - - *merged_node = NULL; - - umf_memory_provider_handle_t upstream_provider = - coarse_provider->upstream_memory_provider; - if (!upstream_provider) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - block_t *block1 = get_node_block(node1); - block_t *block2 = get_node_block(node2); - assert(block1->data < block2->data); - - bool contignous_data = (block1->data + block1->size == block2->data); - if (!contignous_data) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - // check if blocks can be merged by the upstream provider - umf_result_t merge_status = umfMemoryProviderAllocationMerge( - coarse_provider->upstream_memory_provider, block1->data, block2->data, - block1->size + block2->size); - if (merge_status != UMF_RESULT_SUCCESS) { - return merge_status; - } - - // update the size - block1->size += block2->size; - - struct ravl *upstream_blocks = coarse_provider->upstream_blocks; - block_t *block_rm = coarse_ravl_rm(upstream_blocks, block2->data); - assert(block_rm == block2); - (void)block_rm; // WA for unused variable error - umf_ba_global_free(block2); - - *merged_node = node1; - - return UMF_RESULT_SUCCESS; -} - -// upstream_block_merge_with_prev - merge the given upstream block -// with the previous one if both have continuous data. -// Remove the merged block from the tree of upstream blocks. -static ravl_node_t * -upstream_block_merge_with_prev(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node) { - assert(node); - - ravl_node_t *node_prev = get_node_prev(node); - if (!node_prev) { - return node; - } - - ravl_node_t *merged_node = NULL; - umf_result_t umf_result = - upstream_block_merge(coarse_provider, node_prev, node, &merged_node); - if (umf_result != UMF_RESULT_SUCCESS) { - return node; - } - - assert(merged_node != NULL); - - return merged_node; -} - -// upstream_block_merge_with_next - merge the given upstream block -// with the next one if both have continuous data. -// Remove the merged block from the tree of upstream blocks. -static ravl_node_t * -upstream_block_merge_with_next(coarse_memory_provider_t *coarse_provider, - ravl_node_t *node) { - assert(node); - - ravl_node_t *node_next = get_node_next(node); - if (!node_next) { - return node; - } - - ravl_node_t *merged_node = NULL; - umf_result_t umf_result = - upstream_block_merge(coarse_provider, node, node_next, &merged_node); - if (umf_result != UMF_RESULT_SUCCESS) { - return node; - } - - assert(merged_node != NULL); - - return merged_node; -} - -#ifndef NDEBUG // begin of DEBUG code - -typedef struct debug_cb_args_t { - coarse_memory_provider_t *provider; - size_t sum_used; - size_t sum_blocks_size; - size_t num_all_blocks; - size_t num_free_blocks; - size_t num_alloc_blocks; - size_t sum_alloc_size; -} debug_cb_args_t; - -static void debug_verify_all_blocks_cb(void *data, void *arg) { - assert(data); - assert(arg); - - ravl_data_t *node_data = data; - block_t *block = node_data->value; - assert(block); - - debug_cb_args_t *cb_args = (debug_cb_args_t *)arg; - coarse_memory_provider_t *provider = cb_args->provider; - - ravl_node_t *node = - ravl_find(provider->all_blocks, data, RAVL_PREDICATE_EQUAL); - assert(node); - - block_t *block_next = get_block_next(node); - block_t *block_prev = get_block_prev(node); - - cb_args->num_all_blocks++; - if (!block->used) { - cb_args->num_free_blocks++; - } - - assert(block->data); - assert(block->size > 0); - - // There shouldn't be two adjacent unused blocks - // if they are continuous and have the same origin. - if (block_prev && !block_prev->used && !block->used && - (block_prev->data + block_prev->size == block->data)) { - assert(!is_same_origin(provider->upstream_blocks, block_prev, block)); - } - - if (block_next && !block_next->used && !block->used && - (block->data + block->size == block_next->data)) { - assert(!is_same_origin(provider->upstream_blocks, block, block_next)); - } - - // data addresses in the list are in ascending order - if (block_prev) { - assert(block_prev->data < block->data); - } - - if (block_next) { - assert(block->data < block_next->data); - } - - // two block's data should not overlap - if (block_next) { - assert((block->data + block->size) <= block_next->data); - } - - cb_args->sum_blocks_size += block->size; - if (block->used) { - cb_args->sum_used += block->size; - } -} - -static void debug_verify_upstream_blocks_cb(void *data, void *arg) { - assert(data); - assert(arg); - - ravl_data_t *node_data = data; - block_t *alloc = node_data->value; - assert(alloc); - - debug_cb_args_t *cb_args = (debug_cb_args_t *)arg; - coarse_memory_provider_t *provider = cb_args->provider; - - ravl_node_t *node = - ravl_find(provider->upstream_blocks, data, RAVL_PREDICATE_EQUAL); - assert(node); - - block_t *alloc_next = get_block_next(node); - block_t *alloc_prev = get_block_prev(node); - - cb_args->num_alloc_blocks++; - cb_args->sum_alloc_size += alloc->size; - - assert(alloc->data); - assert(alloc->size > 0); - - // data addresses in the list are in ascending order - if (alloc_prev) { - assert(alloc_prev->data < alloc->data); - } - - if (alloc_next) { - assert(alloc->data < alloc_next->data); - } - - // data should not overlap - if (alloc_next) { - assert((alloc->data + alloc->size) <= alloc_next->data); - } -} - -static umf_result_t -coarse_memory_provider_get_stats(void *provider, - coarse_memory_provider_stats_t *stats); - -static bool debug_check(coarse_memory_provider_t *provider) { - assert(provider); - - coarse_memory_provider_stats_t stats = {0}; - coarse_memory_provider_get_stats(provider, &stats); - - debug_cb_args_t cb_args = {0}; - cb_args.provider = provider; - - // verify the all_blocks list - ravl_foreach(provider->all_blocks, debug_verify_all_blocks_cb, &cb_args); - - assert(cb_args.num_all_blocks == stats.num_all_blocks); - assert(cb_args.num_free_blocks == stats.num_free_blocks); - assert(cb_args.sum_used == provider->used_size); - assert(cb_args.sum_blocks_size == provider->alloc_size); - assert(provider->alloc_size >= provider->used_size); - - // verify the upstream_blocks list - ravl_foreach(provider->upstream_blocks, debug_verify_upstream_blocks_cb, - &cb_args); - - assert(cb_args.sum_alloc_size == provider->alloc_size); - assert(cb_args.num_alloc_blocks == stats.num_upstream_blocks); - - return true; -} -#endif /* NDEBUG */ // end of DEBUG code - -static umf_result_t -coarse_add_upstream_block(coarse_memory_provider_t *coarse_provider, void *addr, - size_t size) { - ravl_node_t *alloc_node = NULL; - - block_t *alloc = coarse_ravl_add_new(coarse_provider->upstream_blocks, addr, - size, &alloc_node); - if (alloc == NULL) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - block_t *new_block = - coarse_ravl_add_new(coarse_provider->all_blocks, addr, size, NULL); - if (new_block == NULL) { - coarse_ravl_rm(coarse_provider->upstream_blocks, addr); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - // check if the new upstream block can be merged with its neighbours - alloc_node = upstream_block_merge_with_prev(coarse_provider, alloc_node); - alloc_node = upstream_block_merge_with_next(coarse_provider, alloc_node); - - new_block->used = true; - coarse_provider->alloc_size += size; - coarse_provider->used_size += size; - - return UMF_RESULT_SUCCESS; -} - -static umf_result_t -coarse_memory_provider_set_name(coarse_memory_provider_t *coarse_provider) { - if (coarse_provider->upstream_memory_provider == NULL) { - // COARSE_BASE_NAME will be used - coarse_provider->name = NULL; - return UMF_RESULT_SUCCESS; - } - - const char *up_name = - umfMemoryProviderGetName(coarse_provider->upstream_memory_provider); - if (!up_name) { - return UMF_RESULT_ERROR_UNKNOWN; - } - - size_t length = - strlen(COARSE_BASE_NAME) + strlen(up_name) + 3; // + 3 for " ()" - - coarse_provider->name = umf_ba_global_alloc(length + 1); // + 1 for '\0' - if (coarse_provider->name == NULL) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - sprintf(coarse_provider->name, "%s (%s)", COARSE_BASE_NAME, up_name); - - return UMF_RESULT_SUCCESS; -} - -// needed for coarse_memory_provider_initialize() -static umf_result_t coarse_memory_provider_alloc(void *provider, size_t size, - size_t alignment, - void **resultPtr); - -// needed for coarse_memory_provider_initialize() -static umf_result_t coarse_memory_provider_free(void *provider, void *ptr, - size_t bytes); - -static umf_result_t coarse_memory_provider_initialize(void *params, - void **provider) { - assert(provider); - - if (params == NULL) { - LOG_ERR("coarse provider parameters are missing"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - coarse_memory_provider_params_t *coarse_params = - (coarse_memory_provider_params_t *)params; - - // check params - if (!coarse_params->upstream_memory_provider == - !coarse_params->init_buffer) { - LOG_ERR("either upstream provider or init buffer has to be provided in " - "the parameters (exactly one of them)"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (coarse_params->init_buffer_size == 0 && - (coarse_params->immediate_init_from_upstream || - coarse_params->init_buffer != NULL)) { - LOG_ERR("init_buffer_size has to be greater than 0 if " - "immediate_init_from_upstream or init_buffer is set"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (coarse_params->init_buffer_size != 0 && - (!coarse_params->immediate_init_from_upstream && - coarse_params->init_buffer == NULL)) { - LOG_ERR("init_buffer_size is greater than 0 but none of " - "immediate_init_from_upstream nor init_buffer is set"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (coarse_params->destroy_upstream_memory_provider && - !coarse_params->upstream_memory_provider) { - LOG_ERR("destroy_upstream_memory_provider is true, but an upstream " - "provider is not provided"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - coarse_memory_provider_t *coarse_provider = - umf_ba_global_alloc(sizeof(*coarse_provider)); - if (!coarse_provider) { - LOG_ERR("out of the host memory"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - memset(coarse_provider, 0, sizeof(*coarse_provider)); - - coarse_provider->upstream_memory_provider = - coarse_params->upstream_memory_provider; - coarse_provider->destroy_upstream_memory_provider = - coarse_params->destroy_upstream_memory_provider; - coarse_provider->allocation_strategy = coarse_params->allocation_strategy; - coarse_provider->init_buffer = coarse_params->init_buffer; - - umf_result_t umf_result = coarse_memory_provider_set_name(coarse_provider); - if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("name initialization failed"); - goto err_free_coarse_provider; - } - - // most of the error handling paths below set this error - umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - - coarse_provider->upstream_blocks = - ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); - if (coarse_provider->upstream_blocks == NULL) { - LOG_ERR("out of the host memory"); - goto err_free_name; - } - - coarse_provider->free_blocks = - ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); - if (coarse_provider->free_blocks == NULL) { - LOG_ERR("out of the host memory"); - goto err_delete_ravl_upstream_blocks; - } - - coarse_provider->all_blocks = - ravl_new_sized(coarse_ravl_comp, sizeof(ravl_data_t)); - if (coarse_provider->all_blocks == NULL) { - LOG_ERR("out of the host memory"); - goto err_delete_ravl_free_blocks; - } - - coarse_provider->alloc_size = 0; - coarse_provider->used_size = 0; - - if (utils_mutex_init(&coarse_provider->lock) == NULL) { - LOG_ERR("lock initialization failed"); - umf_result = UMF_RESULT_ERROR_UNKNOWN; - goto err_delete_ravl_all_blocks; - } - - if (coarse_params->upstream_memory_provider && - coarse_params->immediate_init_from_upstream) { - // allocate and immediately deallocate memory using the upstream provider - void *init_buffer = NULL; - coarse_memory_provider_alloc( - coarse_provider, coarse_params->init_buffer_size, 0, &init_buffer); - if (init_buffer == NULL) { - goto err_destroy_mutex; - } - - coarse_memory_provider_free(coarse_provider, init_buffer, - coarse_params->init_buffer_size); - - } else if (coarse_params->init_buffer) { - umf_result = coarse_add_upstream_block(coarse_provider, - coarse_provider->init_buffer, - coarse_params->init_buffer_size); - if (umf_result != UMF_RESULT_SUCCESS) { - goto err_destroy_mutex; - } - - LOG_DEBUG("coarse_ALLOC (init_buffer) %zu used %zu alloc %zu", - coarse_params->init_buffer_size, coarse_provider->used_size, - coarse_provider->alloc_size); - - coarse_memory_provider_free(coarse_provider, - coarse_provider->init_buffer, - coarse_params->init_buffer_size); - } - - assert(coarse_provider->used_size == 0); - assert(coarse_provider->alloc_size == coarse_params->init_buffer_size); - assert(debug_check(coarse_provider)); - - *provider = coarse_provider; - - return UMF_RESULT_SUCCESS; - -err_destroy_mutex: - utils_mutex_destroy_not_free(&coarse_provider->lock); -err_delete_ravl_all_blocks: - ravl_delete(coarse_provider->all_blocks); -err_delete_ravl_free_blocks: - ravl_delete(coarse_provider->free_blocks); -err_delete_ravl_upstream_blocks: - ravl_delete(coarse_provider->upstream_blocks); -err_free_name: - umf_ba_global_free(coarse_provider->name); -err_free_coarse_provider: - umf_ba_global_free(coarse_provider); - return umf_result; -} - -static void coarse_ravl_cb_rm_upstream_blocks_node(void *data, void *arg) { - assert(data); - assert(arg); - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)arg; - ravl_data_t *node_data = data; - block_t *alloc = node_data->value; - assert(alloc); - - if (coarse_provider->upstream_memory_provider) { - // We continue to deallocate alloc blocks even if the upstream provider doesn't return success. - umfMemoryProviderFree(coarse_provider->upstream_memory_provider, - alloc->data, alloc->size); - } - - assert(coarse_provider->alloc_size >= alloc->size); - coarse_provider->alloc_size -= alloc->size; - - umf_ba_global_free(alloc); -} - -static void coarse_ravl_cb_rm_all_blocks_node(void *data, void *arg) { - assert(data); - assert(arg); - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)arg; - ravl_data_t *node_data = data; - block_t *block = node_data->value; - assert(block); - - if (block->used) { - assert(coarse_provider->used_size >= block->size); - coarse_provider->used_size -= block->size; - } - - if (block->free_list_ptr) { - free_blocks_rm_node(coarse_provider->free_blocks, block->free_list_ptr); - } - - umf_ba_global_free(block); -} - -static void coarse_memory_provider_finalize(void *provider) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - utils_mutex_destroy_not_free(&coarse_provider->lock); - - ravl_foreach(coarse_provider->all_blocks, coarse_ravl_cb_rm_all_blocks_node, - coarse_provider); - assert(coarse_provider->used_size == 0); - - ravl_foreach(coarse_provider->upstream_blocks, - coarse_ravl_cb_rm_upstream_blocks_node, coarse_provider); - assert(coarse_provider->alloc_size == 0); - - ravl_delete(coarse_provider->upstream_blocks); - ravl_delete(coarse_provider->all_blocks); - ravl_delete(coarse_provider->free_blocks); - - umf_ba_global_free(coarse_provider->name); - - if (coarse_provider->destroy_upstream_memory_provider && - coarse_provider->upstream_memory_provider) { - umfMemoryProviderDestroy(coarse_provider->upstream_memory_provider); - } - - umf_ba_global_free(coarse_provider); -} - -static umf_result_t -create_aligned_block(coarse_memory_provider_t *coarse_provider, - size_t orig_size, size_t alignment, block_t **current) { - (void)orig_size; // unused in the Release version - int rv; - - block_t *curr = *current; - - // In case of non-zero alignment create an aligned block what would be further used. - uintptr_t orig_data = (uintptr_t)curr->data; - uintptr_t aligned_data = ALIGN_UP(orig_data, alignment); - size_t padding = aligned_data - orig_data; - if (alignment > 0 && padding > 0) { - block_t *aligned_block = coarse_ravl_add_new( - coarse_provider->all_blocks, curr->data + padding, - curr->size - padding, NULL); - if (aligned_block == NULL) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - curr->used = false; - curr->size = padding; - - rv = free_blocks_add(coarse_provider->free_blocks, curr); - if (rv) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - // use aligned block - *current = aligned_block; - assert((*current)->size >= orig_size); - } - - return UMF_RESULT_SUCCESS; -} - -// Split the current block and put the new block after the one that we use. -static umf_result_t -split_current_block(coarse_memory_provider_t *coarse_provider, block_t *curr, - size_t size) { - ravl_node_t *new_node = NULL; - - block_t *new_block = - coarse_ravl_add_new(coarse_provider->all_blocks, curr->data + size, - curr->size - size, &new_node); - if (new_block == NULL) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - new_block->used = false; - - int rv = - free_blocks_add(coarse_provider->free_blocks, get_node_block(new_node)); - if (rv) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - return UMF_RESULT_SUCCESS; -} - -static block_t * -find_free_block(struct ravl *free_blocks, size_t size, size_t alignment, - coarse_memory_provider_strategy_t allocation_strategy) { - block_t *block; - - switch (allocation_strategy) { - case UMF_COARSE_MEMORY_STRATEGY_FASTEST: - // Always allocate a free block of the (size + alignment) size - // and later cut out the properly aligned part leaving two remaining parts. - return free_blocks_rm_ge(free_blocks, size + alignment, 0, - CHECK_ONLY_THE_FIRST_BLOCK); - - case UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE: - // First check if the first free block of the 'size' size has the correct alignment. - block = free_blocks_rm_ge(free_blocks, size, alignment, - CHECK_ONLY_THE_FIRST_BLOCK); - if (block) { - return block; - } - - // If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. - return free_blocks_rm_ge(free_blocks, size + alignment, 0, - CHECK_ONLY_THE_FIRST_BLOCK); - - case UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE: - // First look through all free blocks of the 'size' size - // and choose the first one with the correct alignment. - block = free_blocks_rm_ge(free_blocks, size, alignment, - CHECK_ALL_BLOCKS_OF_SIZE); - if (block) { - return block; - } - - // If none of them had the correct alignment, - // use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. - return free_blocks_rm_ge(free_blocks, size + alignment, 0, - CHECK_ONLY_THE_FIRST_BLOCK); - - // unknown memory allocation strategy - default: - abort(); - } -} - -static umf_result_t coarse_memory_provider_alloc(void *provider, size_t size, - size_t alignment, - void **resultPtr) { - umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN; - - if (resultPtr == NULL) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (utils_mutex_lock(&coarse_provider->lock) != 0) { - LOG_ERR("locking the lock failed"); - return UMF_RESULT_ERROR_UNKNOWN; - } - - assert(debug_check(coarse_provider)); - - // Find a block with greater or equal size using the given memory allocation strategy - block_t *curr = - find_free_block(coarse_provider->free_blocks, size, alignment, - coarse_provider->allocation_strategy); - - // If the block that we want to reuse has a greater size, split it. - // Try to merge the split part with the successor if it is not used. - enum { ACTION_NONE = 0, ACTION_USE, ACTION_SPLIT } action = ACTION_NONE; - - if (curr && curr->size > size) { - action = ACTION_SPLIT; - } else if (curr && curr->size == size) { - action = ACTION_USE; - } - - if (action) { // ACTION_SPLIT or ACTION_USE - assert(curr->used == false); - - // In case of non-zero alignment create an aligned block what would be further used. - if (alignment > 0) { - umf_result = - create_aligned_block(coarse_provider, size, alignment, &curr); - if (umf_result != UMF_RESULT_SUCCESS) { - utils_mutex_unlock(&coarse_provider->lock); - return umf_result; - } - } - - if (action == ACTION_SPLIT) { - // Split the current block and put the new block after the one that we use. - umf_result = split_current_block(coarse_provider, curr, size); - if (umf_result != UMF_RESULT_SUCCESS) { - utils_mutex_unlock(&coarse_provider->lock); - return umf_result; - } - - curr->size = size; - - LOG_DEBUG("coarse_ALLOC (split_block) %zu used %zu alloc %zu", size, - coarse_provider->used_size, coarse_provider->alloc_size); - - } else { // action == ACTION_USE - LOG_DEBUG("coarse_ALLOC (same_block) %zu used %zu alloc %zu", size, - coarse_provider->used_size, coarse_provider->alloc_size); - } - - curr->used = true; - *resultPtr = curr->data; - coarse_provider->used_size += size; - - assert(debug_check(coarse_provider)); - utils_mutex_unlock(&coarse_provider->lock); - - return UMF_RESULT_SUCCESS; - } - - // no suitable block found - try to get more memory from the upstream provider - umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - - if (coarse_provider->upstream_memory_provider == NULL) { - LOG_ERR("out of memory - no upstream memory provider given"); - goto err_unlock; - } - - umfMemoryProviderAlloc(coarse_provider->upstream_memory_provider, size, - alignment, resultPtr); - if (*resultPtr == NULL) { - LOG_ERR("out of memory - upstream memory provider allocation failed"); - goto err_unlock; - } - - ASSERT_IS_ALIGNED(((uintptr_t)(*resultPtr)), alignment); - - umf_result = coarse_add_upstream_block(coarse_provider, *resultPtr, size); - if (umf_result != UMF_RESULT_SUCCESS) { - umfMemoryProviderFree(coarse_provider->upstream_memory_provider, - *resultPtr, size); - goto err_unlock; - } - - LOG_DEBUG("coarse_ALLOC (upstream) %zu used %zu alloc %zu", size, - coarse_provider->used_size, coarse_provider->alloc_size); - - umf_result = UMF_RESULT_SUCCESS; - -err_unlock: - assert(debug_check(coarse_provider)); - utils_mutex_unlock(&coarse_provider->lock); - - return umf_result; -} - -static umf_result_t coarse_memory_provider_free(void *provider, void *ptr, - size_t bytes) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (utils_mutex_lock(&coarse_provider->lock) != 0) { - LOG_ERR("locking the lock failed"); - return UMF_RESULT_ERROR_UNKNOWN; - } - - assert(debug_check(coarse_provider)); - - ravl_node_t *node = coarse_ravl_find_node(coarse_provider->all_blocks, ptr); - if (node == NULL) { - // the block was not found - utils_mutex_unlock(&coarse_provider->lock); - LOG_ERR("memory block not found (ptr = %p, size = %zu)", ptr, bytes); - return UMF_RESULT_ERROR_UNKNOWN; - } - - block_t *block = get_node_block(node); - if (!block->used) { - // the block is already free - utils_mutex_unlock(&coarse_provider->lock); - LOG_ERR("the block is already free"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if (bytes > 0 && bytes != block->size) { - // wrong size of allocation - utils_mutex_unlock(&coarse_provider->lock); - LOG_ERR("wrong size of allocation"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - LOG_DEBUG("coarse_FREE (return_block_to_pool) %zu used %zu alloc %zu", - block->size, coarse_provider->used_size - block->size, - coarse_provider->alloc_size); - - assert(coarse_provider->used_size >= block->size); - coarse_provider->used_size -= block->size; - - block->used = false; - - // Merge with prev and/or next block if they are unused and have continuous data. - node = free_block_merge_with_prev(coarse_provider, node); - node = free_block_merge_with_next(coarse_provider, node); - - int rv = - free_blocks_add(coarse_provider->free_blocks, get_node_block(node)); - if (rv) { - utils_mutex_unlock(&coarse_provider->lock); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - assert(debug_check(coarse_provider)); - utils_mutex_unlock(&coarse_provider->lock); - - return UMF_RESULT_SUCCESS; -} - -static void coarse_memory_provider_get_last_native_error(void *provider, - const char **ppMessage, - int32_t *pError) { - (void)provider; // unused - - if (ppMessage == NULL || pError == NULL) { - assert(0); - return; - } - - // Nothing more is needed here, since - // there is no UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC error used. -} - -static umf_result_t coarse_memory_provider_get_min_page_size(void *provider, - void *ptr, - size_t *pageSize) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (!coarse_provider->upstream_memory_provider) { - *pageSize = utils_get_page_size(); - return UMF_RESULT_SUCCESS; - } - - return umfMemoryProviderGetMinPageSize( - coarse_provider->upstream_memory_provider, ptr, pageSize); -} - -static umf_result_t -coarse_memory_provider_get_recommended_page_size(void *provider, size_t size, - size_t *pageSize) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (!coarse_provider->upstream_memory_provider) { - *pageSize = utils_get_page_size(); - return UMF_RESULT_SUCCESS; - } - - return umfMemoryProviderGetRecommendedPageSize( - coarse_provider->upstream_memory_provider, size, pageSize); -} - -static const char *coarse_memory_provider_get_name(void *provider) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (!coarse_provider->name) { - return COARSE_BASE_NAME; - } - - return coarse_provider->name; -} - -static void ravl_cb_count(void *data, void *arg) { - assert(arg); - (void)data; /* unused */ - - size_t *num_all_blocks = arg; - (*num_all_blocks)++; -} - -static void ravl_cb_count_free(void *data, void *arg) { - assert(data); - assert(arg); - - ravl_data_t *node_data = data; - assert(node_data); - ravl_free_blocks_head_t *head_node = node_data->value; - assert(head_node); - struct ravl_free_blocks_elem_t *free_block = head_node->head; - assert(free_block); - - size_t *num_all_blocks = arg; - while (free_block) { - (*num_all_blocks)++; - free_block = free_block->next; - } -} - -static umf_result_t -coarse_memory_provider_get_stats(void *provider, - coarse_memory_provider_stats_t *stats) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - // count blocks - size_t num_upstream_blocks = 0; - ravl_foreach(coarse_provider->upstream_blocks, ravl_cb_count, - &num_upstream_blocks); - - size_t num_all_blocks = 0; - ravl_foreach(coarse_provider->all_blocks, ravl_cb_count, &num_all_blocks); - - size_t num_free_blocks = 0; - ravl_foreach(coarse_provider->free_blocks, ravl_cb_count_free, - &num_free_blocks); - - stats->alloc_size = coarse_provider->alloc_size; - stats->used_size = coarse_provider->used_size; - stats->num_upstream_blocks = num_upstream_blocks; - stats->num_all_blocks = num_all_blocks; - stats->num_free_blocks = num_free_blocks; - - return UMF_RESULT_SUCCESS; -} - -static umf_result_t coarse_memory_provider_purge_lazy(void *provider, void *ptr, - size_t size) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - if (coarse_provider->upstream_memory_provider == NULL) { - LOG_ERR("no upstream memory provider given"); - return UMF_RESULT_ERROR_NOT_SUPPORTED; - } - - return umfMemoryProviderPurgeLazy(coarse_provider->upstream_memory_provider, - ptr, size); -} - -static umf_result_t coarse_memory_provider_purge_force(void *provider, - void *ptr, size_t size) { - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - if (coarse_provider->upstream_memory_provider == NULL) { - LOG_ERR("no upstream memory provider given"); - return UMF_RESULT_ERROR_NOT_SUPPORTED; - } - - return umfMemoryProviderPurgeForce( - coarse_provider->upstream_memory_provider, ptr, size); -} - -static umf_result_t coarse_memory_provider_allocation_split(void *provider, - void *ptr, - size_t totalSize, - size_t firstSize) { - umf_result_t umf_result; - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (utils_mutex_lock(&coarse_provider->lock) != 0) { - LOG_ERR("locking the lock failed"); - return UMF_RESULT_ERROR_UNKNOWN; - } - - assert(debug_check(coarse_provider)); - - ravl_node_t *node = coarse_ravl_find_node(coarse_provider->all_blocks, ptr); - if (node == NULL) { - LOG_ERR("memory block not found"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - block_t *block = get_node_block(node); - - if (block->size != totalSize) { - LOG_ERR("wrong totalSize"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - if (!block->used) { - LOG_ERR("block is not allocated"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - block_t *new_block = coarse_ravl_add_new(coarse_provider->all_blocks, - block->data + firstSize, - block->size - firstSize, NULL); - if (new_block == NULL) { - umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - goto err_mutex_unlock; - } - - block->size = firstSize; - new_block->used = true; - - assert(new_block->size == (totalSize - firstSize)); - - umf_result = UMF_RESULT_SUCCESS; - -err_mutex_unlock: - assert(debug_check(coarse_provider)); - utils_mutex_unlock(&coarse_provider->lock); - - return umf_result; -} - -static umf_result_t coarse_memory_provider_allocation_merge(void *provider, - void *lowPtr, - void *highPtr, - size_t totalSize) { - umf_result_t umf_result; - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)provider; - - if (utils_mutex_lock(&coarse_provider->lock) != 0) { - LOG_ERR("locking the lock failed"); - return UMF_RESULT_ERROR_UNKNOWN; - } - - assert(debug_check(coarse_provider)); - - ravl_node_t *low_node = - coarse_ravl_find_node(coarse_provider->all_blocks, lowPtr); - if (low_node == NULL) { - LOG_ERR("the lowPtr memory block not found"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - block_t *low_block = get_node_block(low_node); - if (!low_block->used) { - LOG_ERR("the lowPtr block is not allocated"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - ravl_node_t *high_node = - coarse_ravl_find_node(coarse_provider->all_blocks, highPtr); - if (high_node == NULL) { - LOG_ERR("the highPtr memory block not found"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - block_t *high_block = get_node_block(high_node); - if (!high_block->used) { - LOG_ERR("the highPtr block is not allocated"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - if (get_node_next(low_node) != high_node) { - LOG_ERR("given pointers cannot be merged"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - if (get_node_prev(high_node) != low_node) { - LOG_ERR("given pointers cannot be merged"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - if (low_block->size + high_block->size != totalSize) { - LOG_ERR("wrong totalSize"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - if ((uintptr_t)highPtr != ((uintptr_t)lowPtr + low_block->size)) { - LOG_ERR("given pointers cannot be merged"); - umf_result = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err_mutex_unlock; - } - - ravl_node_t *merged_node = NULL; - - umf_result = user_block_merge(coarse_provider, low_node, high_node, true, - &merged_node); - if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("merging failed"); - goto err_mutex_unlock; - } - - assert(merged_node == low_node); - assert(low_block->size == totalSize); - - umf_result = UMF_RESULT_SUCCESS; - -err_mutex_unlock: - assert(debug_check(coarse_provider)); - utils_mutex_unlock(&coarse_provider->lock); - - return umf_result; -} - -umf_memory_provider_ops_t UMF_COARSE_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, - .initialize = coarse_memory_provider_initialize, - .finalize = coarse_memory_provider_finalize, - .alloc = coarse_memory_provider_alloc, - .free = coarse_memory_provider_free, - .get_last_native_error = coarse_memory_provider_get_last_native_error, - .get_recommended_page_size = - coarse_memory_provider_get_recommended_page_size, - .get_min_page_size = coarse_memory_provider_get_min_page_size, - .get_name = coarse_memory_provider_get_name, - .ext.purge_lazy = coarse_memory_provider_purge_lazy, - .ext.purge_force = coarse_memory_provider_purge_force, - .ext.allocation_merge = coarse_memory_provider_allocation_merge, - .ext.allocation_split = coarse_memory_provider_allocation_split, - // TODO - /* - .ipc.get_ipc_handle_size = coarse_memory_provider_get_ipc_handle_size, - .ipc.get_ipc_handle = coarse_memory_provider_get_ipc_handle, - .ipc.put_ipc_handle = coarse_memory_provider_put_ipc_handle, - .ipc.open_ipc_handle = coarse_memory_provider_open_ipc_handle, - .ipc.close_ipc_handle = coarse_memory_provider_close_ipc_handle, - */ -}; - -umf_memory_provider_ops_t *umfCoarseMemoryProviderOps(void) { - return &UMF_COARSE_MEMORY_PROVIDER_OPS; -} - -coarse_memory_provider_stats_t -umfCoarseMemoryProviderGetStats(umf_memory_provider_handle_t provider) { - coarse_memory_provider_stats_t stats = {0}; - - if (provider == NULL) { - return stats; - } - - void *priv = umfMemoryProviderGetPriv(provider); - - coarse_memory_provider_t *coarse_provider = - (struct coarse_memory_provider_t *)priv; - - if (utils_mutex_lock(&coarse_provider->lock) != 0) { - LOG_ERR("locking the lock failed"); - return stats; - } - - coarse_memory_provider_get_stats(priv, &stats); - - utils_mutex_unlock(&coarse_provider->lock); - - return stats; -} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b56478970..593268a52 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -199,11 +199,6 @@ if(LINUX) LIBS ${UMF_UTILS_FOR_TEST}) endif() -add_umf_test( - NAME provider_coarse - SRCS provider_coarse.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST}) - add_umf_test( NAME coarse_lib SRCS coarse_lib.cpp ${BA_SOURCES_FOR_TEST} @@ -218,10 +213,12 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT) NAME c_api_disjoint_pool SRCS c_api/disjoint_pool.c LIBS disjoint_pool) - add_umf_test( - NAME disjointCoarseMallocPool - SRCS disjointCoarseMallocPool.cpp - LIBS disjoint_pool) + if(LINUX AND (NOT UMF_DISABLE_HWLOC)) + add_umf_test( + NAME disjointCoarseMallocPool + SRCS disjointCoarseMallocPool.cpp + LIBS disjoint_pool) + endif() endif() if(UMF_BUILD_LIBUMF_POOL_DISJOINT diff --git a/test/disjointCoarseMallocPool.cpp b/test/disjointCoarseMallocPool.cpp index 32e1d24f3..45502b192 100644 --- a/test/disjointCoarseMallocPool.cpp +++ b/test/disjointCoarseMallocPool.cpp @@ -7,39 +7,40 @@ #include -#include "provider.hpp" - #include -#include +#include + +#include "coarse.h" +#include "provider.hpp" using umf_test::KB; using umf_test::MB; using umf_test::test; -#define GetStats umfCoarseMemoryProviderGetStats +#define FILE_PATH ((char *)"tmp_file") umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = umf::providerMakeCOps(); -struct CoarseWithMemoryStrategyTest +struct FileWithMemoryStrategyTest : umf_test::test, - ::testing::WithParamInterface { + ::testing::WithParamInterface { void SetUp() override { test::SetUp(); allocation_strategy = this->GetParam(); } - coarse_memory_provider_strategy_t allocation_strategy; + coarse_strategy_t allocation_strategy; }; INSTANTIATE_TEST_SUITE_P( - CoarseWithMemoryStrategyTest, CoarseWithMemoryStrategyTest, + FileWithMemoryStrategyTest, FileWithMemoryStrategyTest, ::testing::Values(UMF_COARSE_MEMORY_STRATEGY_FASTEST, UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE)); -TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_basic) { - umf_memory_provider_handle_t malloc_memory_provider; +TEST_P(FileWithMemoryStrategyTest, disjointFileMallocPool_simple1) { + umf_memory_provider_handle_t malloc_memory_provider = nullptr; umf_result_t umf_result; umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, @@ -47,217 +48,19 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_basic) { ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(malloc_memory_provider, nullptr); - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.destroy_upstream_memory_provider = true; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = nullptr; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; - umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(disjoint_pool_params, nullptr); - umf_result = - umfDisjointPoolParamsSetSlabMinSize(disjoint_pool_params, 4096); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = - umfDisjointPoolParamsSetMaxPoolableSize(disjoint_pool_params, 4096); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfDisjointPoolParamsSetCapacity(disjoint_pool_params, 4); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = - umfDisjointPoolParamsSetMinBucketSize(disjoint_pool_params, 64); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfDisjointPoolParamsSetTrace(disjoint_pool_params, 1); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - - umf_memory_pool_handle_t pool; - umf_result = umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, - disjoint_pool_params, - UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &pool); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(pool, nullptr); - - umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - - // test - - umf_memory_provider_handle_t prov = NULL; - umf_result = umfPoolGetMemoryProvider(pool, &prov); + umf_file_memory_provider_params_handle_t file_params = nullptr; + umf_result = umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(prov, nullptr); - - // alloc 2x 2MB - void *p1 = umfPoolMalloc(pool, 2 * MB); - ASSERT_NE(p1, nullptr); - ASSERT_EQ(GetStats(prov).used_size, 2 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 2); - - void *p2 = umfPoolMalloc(pool, 2 * MB); - ASSERT_NE(p2, nullptr); - ASSERT_EQ(GetStats(prov).used_size, 4 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 3); - ASSERT_NE(p1, p2); - - // swap pointers to get p1 < p2 - if (p1 > p2) { - std::swap(p1, p2); - } - - // free + alloc first block - // the block should be reused - // currently there is no purging, so the alloc size shouldn't change - // there should be no block merging between used and not-used blocks - umf_result = umfPoolFree(pool, p1); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(prov).used_size, 2 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 3); - - p1 = umfPoolMalloc(pool, 2 * MB); - ASSERT_EQ(GetStats(prov).used_size, 4 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 3); - - // free all allocs - // overall alloc size shouldn't change - // block p2 should merge with the prev free block p1 - // and the remaining init block - umf_result = umfPoolFree(pool, p1); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(prov).num_all_blocks, 3); - umf_result = umfPoolFree(pool, p2); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(prov).used_size, 0 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 1); - - // test allocations with alignment - // TODO: what about holes? - p1 = umfPoolAlignedMalloc(pool, 1 * MB - 4, 128); - ASSERT_NE(p1, nullptr); - ASSERT_EQ((uintptr_t)p1 & 127, 0); - p2 = umfPoolAlignedMalloc(pool, 1 * MB - 4, 128); - ASSERT_NE(p2, nullptr); - ASSERT_EQ((uintptr_t)p1 & 127, 0); - umf_result = umfPoolFree(pool, p1); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_result = umfPoolFree(pool, p2); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - - // alloc whole buffer - // after this, there should be one single block - p1 = umfPoolMalloc(pool, init_buffer_size); - ASSERT_EQ(GetStats(prov).used_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 1); - - // free all memory - // alloc 2 MB block - the init block should be split - umf_result = umfPoolFree(pool, p1); - p1 = umfPoolMalloc(pool, 2 * MB); - ASSERT_EQ(GetStats(prov).used_size, 2 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 2); - - // alloc additional 2 MB - // the non-used block should be used - p2 = umfPoolMalloc(pool, 2 * MB); - ASSERT_EQ(GetStats(prov).used_size, 4 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 3); - ASSERT_NE(p1, p2); - - // make sure that p1 < p2 - if (p1 > p2) { - std::swap(p1, p2); - } - - // free blocks in order: p2, p1 - // block p1 should merge with the next block p2 - // swap pointers to get p1 < p2 - umfPoolFree(pool, p2); - umfPoolFree(pool, p1); - ASSERT_EQ(GetStats(prov).used_size, 0 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(prov).num_all_blocks, 1); - - // alloc 10x 2 MB - this should occupy all allocated memory - constexpr int allocs_size = 10; - void *allocs[allocs_size] = {0}; - for (int i = 0; i < allocs_size; i++) { - ASSERT_EQ(GetStats(prov).used_size, i * 2 * MB); - allocs[i] = umfPoolMalloc(pool, 2 * MB); - ASSERT_NE(allocs[i], nullptr); - } - ASSERT_EQ(GetStats(prov).used_size, 20 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - // there should be no block with the free memory - ASSERT_EQ(GetStats(prov).num_all_blocks, allocs_size); - - // free all memory - for (int i = 0; i < allocs_size; i++) { - umf_result = umfPoolFree(pool, allocs[i]); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - } - - ASSERT_EQ(GetStats(prov).num_all_blocks, 1); - ASSERT_EQ(GetStats(prov).used_size, 0 * MB); - ASSERT_EQ(GetStats(prov).alloc_size, init_buffer_size); - - umfPoolDestroy(pool); - // Both coarse_memory_provider and malloc_memory_provider - // have already been destroyed by umfPoolDestroy(), because: - // UMF_POOL_CREATE_FLAG_OWN_PROVIDER was set in umfPoolCreate() and - // coarse_memory_provider_params.destroy_upstream_memory_provider = true; -} - -TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple1) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; + ASSERT_NE(file_params, nullptr); - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); + umf_memory_provider_handle_t file_memory_provider; + umf_result = umfMemoryProviderCreate(umfFileMemoryProviderOps(), + file_params, &file_memory_provider); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; + ASSERT_NE(file_memory_provider, nullptr); - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); + umf_result = umfFileMemoryProviderParamsDestroy(file_params); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); @@ -279,7 +82,7 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple1) { umf_memory_pool_handle_t pool; umf_result = - umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, + umfPoolCreate(umfDisjointPoolOps(), file_memory_provider, disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(pool, nullptr); @@ -295,8 +98,6 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple1) { size_t s1 = 74659 * KB; size_t s2 = 8206 * KB; - size_t max_alloc_size = 0; - const int nreps = 2; const int nptrs = 6; @@ -308,10 +109,6 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple1) { ASSERT_NE(t[i], nullptr); } - if (max_alloc_size == 0) { - max_alloc_size = GetStats(prov).alloc_size; - } - for (int i = 0; i < nptrs; i++) { umf_result = umfPoolFree(pool, t[i]); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); @@ -326,9 +123,6 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple1) { ASSERT_NE(t[i], nullptr); } - // all s2 should fit into single block leaved after freeing s1 - ASSERT_LE(GetStats(prov).alloc_size, max_alloc_size); - for (int i = 0; i < nptrs; i++) { umf_result = umfPoolFree(pool, t[i]); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); @@ -336,12 +130,12 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple1) { } umfPoolDestroy(pool); - umfMemoryProviderDestroy(coarse_memory_provider); + umfMemoryProviderDestroy(file_memory_provider); umfMemoryProviderDestroy(malloc_memory_provider); } -TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple2) { - umf_memory_provider_handle_t malloc_memory_provider; +TEST_P(FileWithMemoryStrategyTest, disjointFileMallocPool_simple2) { + umf_memory_provider_handle_t malloc_memory_provider = nullptr; umf_result_t umf_result; umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, @@ -349,25 +143,19 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple2) { ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(malloc_memory_provider, nullptr); - const size_t init_buffer_size = 20 * MB; + umf_file_memory_provider_params_handle_t file_params = nullptr; + umf_result = umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(file_params, nullptr); - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; + umf_memory_provider_handle_t file_memory_provider; + umf_result = umfMemoryProviderCreate(umfFileMemoryProviderOps(), + file_params, &file_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(file_memory_provider, nullptr); - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); + umf_result = umfFileMemoryProviderParamsDestroy(file_params); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); @@ -389,7 +177,7 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple2) { umf_memory_pool_handle_t pool; umf_result = - umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, + umfPoolCreate(umfDisjointPoolOps(), file_memory_provider, disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(pool, nullptr); @@ -415,7 +203,7 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMallocPool_simple2) { } umfPoolDestroy(pool); - umfMemoryProviderDestroy(coarse_memory_provider); + umfMemoryProviderDestroy(file_memory_provider); umfMemoryProviderDestroy(malloc_memory_provider); } @@ -431,7 +219,7 @@ struct alloc_ptr_size { } }; -TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMMapPool_random) { +TEST_P(FileWithMemoryStrategyTest, disjointFileMMapPool_random) { umf_result_t umf_result; const size_t init_buffer_size = 200 * MB; @@ -443,22 +231,19 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMMapPool_random) { const unsigned char alloc_check_val = 11; - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = NULL; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; + umf_file_memory_provider_params_handle_t file_params = nullptr; + umf_result = umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(file_params, nullptr); + + umf_memory_provider_handle_t file_memory_provider; + umf_result = umfMemoryProviderCreate(umfFileMemoryProviderOps(), + file_params, &file_memory_provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(file_memory_provider, nullptr); - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); + umf_result = umfFileMemoryProviderParamsDestroy(file_params); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); @@ -480,7 +265,7 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMMapPool_random) { umf_memory_pool_handle_t pool; umf_result = - umfPoolCreate(umfDisjointPoolOps(), coarse_memory_provider, + umfPoolCreate(umfDisjointPoolOps(), file_memory_provider, disjoint_pool_params, UMF_POOL_CREATE_FLAG_NONE, &pool); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(pool, nullptr); @@ -520,9 +305,7 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMMapPool_random) { // alloc for (size_t j = 0; j < count; j++) { - void *ptr = umfPoolMalloc(pool, size); - ASSERT_NE(ptr, nullptr); - + void *ptr = umfPoolCalloc(pool, 1, size); if (ptr == nullptr) { break; } @@ -576,5 +359,5 @@ TEST_P(CoarseWithMemoryStrategyTest, disjointCoarseMMapPool_random) { } umfPoolDestroy(pool); - umfMemoryProviderDestroy(coarse_memory_provider); + umfMemoryProviderDestroy(file_memory_provider); } diff --git a/test/memoryPoolAPI.cpp b/test/memoryPoolAPI.cpp index 1c6d83f2a..95dcfabb2 100644 --- a/test/memoryPoolAPI.cpp +++ b/test/memoryPoolAPI.cpp @@ -181,16 +181,14 @@ TEST_F(test, BasicPoolByPtrTest) { INSTANTIATE_TEST_SUITE_P( mallocPoolTest, umfPoolTest, ::testing::Values(poolCreateExtParams{&MALLOC_POOL_OPS, nullptr, - &UMF_NULL_PROVIDER_OPS, nullptr, - nullptr}, + &UMF_NULL_PROVIDER_OPS, nullptr}, poolCreateExtParams{umfProxyPoolOps(), nullptr, - &BA_GLOBAL_PROVIDER_OPS, nullptr, - nullptr})); + &BA_GLOBAL_PROVIDER_OPS, nullptr})); INSTANTIATE_TEST_SUITE_P(mallocMultiPoolTest, umfMultiPoolTest, ::testing::Values(poolCreateExtParams{ umfProxyPoolOps(), nullptr, - &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); + &BA_GLOBAL_PROVIDER_OPS, nullptr})); INSTANTIATE_TEST_SUITE_P(umfPoolWithCreateFlagsTest, umfPoolWithCreateFlagsTest, ::testing::Values(0, diff --git a/test/poolFixtures.hpp b/test/poolFixtures.hpp index e5ec85012..6f54fe114 100644 --- a/test/poolFixtures.hpp +++ b/test/poolFixtures.hpp @@ -7,7 +7,6 @@ #include "pool.hpp" #include "provider.hpp" -#include "umf/providers/provider_coarse.h" #include "umf/providers/provider_devdax_memory.h" #include "utils/utils_sanitizers.h" @@ -20,13 +19,11 @@ #include "../malloc_compliance_tests.hpp" -using poolCreateExtParams = - std::tuple; +using poolCreateExtParams = std::tuple; umf::pool_unique_handle_t poolCreateExtUnique(poolCreateExtParams params) { - auto [pool_ops, pool_params, provider_ops, provider_params, coarse_params] = - params; + auto [pool_ops, pool_params, provider_ops, provider_params] = params; umf_memory_provider_handle_t upstream_provider = nullptr; umf_memory_provider_handle_t provider = nullptr; @@ -40,22 +37,6 @@ umf::pool_unique_handle_t poolCreateExtUnique(poolCreateExtParams params) { provider = upstream_provider; - if (coarse_params) { - coarse_memory_provider_params_t *coarse_memory_provider_params = - (coarse_memory_provider_params_t *)coarse_params; - coarse_memory_provider_params->upstream_memory_provider = - upstream_provider; - coarse_memory_provider_params->destroy_upstream_memory_provider = true; - - umf_memory_provider_handle_t coarse_provider = nullptr; - ret = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - coarse_params, &coarse_provider); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - EXPECT_NE(coarse_provider, nullptr); - - provider = coarse_provider; - } - ret = umfPoolCreate(pool_ops, provider, pool_params, UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index 471e53dc2..c254400db 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -248,19 +248,18 @@ INSTANTIATE_TEST_SUITE_P(disjointPoolTests, umfPoolTest, ::testing::Values(poolCreateExtParams{ umfDisjointPoolOps(), (void *)defaultPoolConfig.get(), - &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); + &BA_GLOBAL_PROVIDER_OPS, nullptr})); -INSTANTIATE_TEST_SUITE_P(disjointPoolTests, umfMemTest, - ::testing::Values(std::make_tuple( - poolCreateExtParams{ - umfDisjointPoolOps(), - (void *)defaultPoolConfig.get(), - &MOCK_OUT_OF_MEM_PROVIDER_OPS, - (void *)&DEFAULT_DISJOINT_CAPACITY, nullptr}, - static_cast(DEFAULT_DISJOINT_CAPACITY) / 2))); +INSTANTIATE_TEST_SUITE_P( + disjointPoolTests, umfMemTest, + ::testing::Values(std::make_tuple( + poolCreateExtParams{ + umfDisjointPoolOps(), (void *)defaultPoolConfig.get(), + &MOCK_OUT_OF_MEM_PROVIDER_OPS, (void *)&DEFAULT_DISJOINT_CAPACITY}, + static_cast(DEFAULT_DISJOINT_CAPACITY) / 2))); INSTANTIATE_TEST_SUITE_P(disjointMultiPoolTests, umfMultiPoolTest, ::testing::Values(poolCreateExtParams{ umfDisjointPoolOps(), (void *)defaultPoolConfig.get(), - &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); + &BA_GLOBAL_PROVIDER_OPS, nullptr})); diff --git a/test/pools/jemalloc_coarse_devdax.cpp b/test/pools/jemalloc_coarse_devdax.cpp index 350e053ab..72906e625 100644 --- a/test/pools/jemalloc_coarse_devdax.cpp +++ b/test/pools/jemalloc_coarse_devdax.cpp @@ -31,15 +31,13 @@ devdax_params_unique_handle_t create_devdax_params() { &umfDevDaxMemoryProviderParamsDestroy); } -auto coarseParams = umfCoarseMemoryProviderParamsDefault(); auto devdaxParams = create_devdax_params(); static std::vector poolParamsList = - devdaxParams.get() - ? std::vector{poolCreateExtParams{ - umfJemallocPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - devdaxParams.get(), &coarseParams}} - : std::vector{}; + devdaxParams.get() ? std::vector{poolCreateExtParams{ + umfJemallocPoolOps(), nullptr, + umfDevDaxMemoryProviderOps(), devdaxParams.get()}} + : std::vector{}; INSTANTIATE_TEST_SUITE_P(jemallocCoarseDevDaxTest, umfPoolTest, ::testing::ValuesIn(poolParamsList)); diff --git a/test/pools/jemalloc_coarse_file.cpp b/test/pools/jemalloc_coarse_file.cpp index 74ad36d56..68a602df6 100644 --- a/test/pools/jemalloc_coarse_file.cpp +++ b/test/pools/jemalloc_coarse_file.cpp @@ -23,11 +23,9 @@ file_params_unique_handle_t get_file_params_default(char *path) { &umfFileMemoryProviderParamsDestroy); } -auto coarseParams = umfCoarseMemoryProviderParamsDefault(); file_params_unique_handle_t fileParams = get_file_params_default(FILE_PATH); INSTANTIATE_TEST_SUITE_P(jemallocCoarseFileTest, umfPoolTest, ::testing::Values(poolCreateExtParams{ umfJemallocPoolOps(), nullptr, - umfFileMemoryProviderOps(), fileParams.get(), - &coarseParams})); + umfFileMemoryProviderOps(), fileParams.get()})); diff --git a/test/pools/jemalloc_pool.cpp b/test/pools/jemalloc_pool.cpp index 4dddbcd32..bcc9623c7 100644 --- a/test/pools/jemalloc_pool.cpp +++ b/test/pools/jemalloc_pool.cpp @@ -29,8 +29,7 @@ auto defaultParams = createOsMemoryProviderParams(); INSTANTIATE_TEST_SUITE_P(jemallocPoolTest, umfPoolTest, ::testing::Values(poolCreateExtParams{ umfJemallocPoolOps(), nullptr, - umfOsMemoryProviderOps(), defaultParams.get(), - nullptr})); + umfOsMemoryProviderOps(), defaultParams.get()})); // this test makes sure that jemalloc does not use // memory provider to allocate metadata (and hence @@ -48,9 +47,8 @@ TEST_F(test, metadataNotAllocatedUsingProvider) { res = umfOsMemoryProviderParamsSetProtection(params, UMF_PROTECTION_NONE); ASSERT_EQ(res, UMF_RESULT_SUCCESS); - auto pool = - poolCreateExtUnique({umfJemallocPoolOps(), nullptr, - umfOsMemoryProviderOps(), params, nullptr}); + auto pool = poolCreateExtUnique( + {umfJemallocPoolOps(), nullptr, umfOsMemoryProviderOps(), params}); res = umfOsMemoryProviderParamsDestroy(params); ASSERT_EQ(res, UMF_RESULT_SUCCESS); diff --git a/test/pools/pool_base_alloc.cpp b/test/pools/pool_base_alloc.cpp index 7c9a3701a..ec07a7c2f 100644 --- a/test/pools/pool_base_alloc.cpp +++ b/test/pools/pool_base_alloc.cpp @@ -48,4 +48,4 @@ umf_memory_pool_ops_t BA_POOL_OPS = umf::poolMakeCOps(); INSTANTIATE_TEST_SUITE_P(baPool, umfPoolTest, ::testing::Values(poolCreateExtParams{ &BA_POOL_OPS, nullptr, - &umf_test::BASE_PROVIDER_OPS, nullptr, nullptr})); + &umf_test::BASE_PROVIDER_OPS, nullptr})); diff --git a/test/pools/pool_coarse.hpp b/test/pools/pool_coarse.hpp index 7baa612f1..b1efb4fee 100644 --- a/test/pools/pool_coarse.hpp +++ b/test/pools/pool_coarse.hpp @@ -5,8 +5,6 @@ #ifndef UMF_TEST_POOL_COARSE_HPP #define UMF_TEST_POOL_COARSE_HPP 1 -#include "umf/providers/provider_coarse.h" - #include "pool.hpp" #include "poolFixtures.hpp" diff --git a/test/pools/scalable_coarse_devdax.cpp b/test/pools/scalable_coarse_devdax.cpp index 1bf77c61c..970f45ef9 100644 --- a/test/pools/scalable_coarse_devdax.cpp +++ b/test/pools/scalable_coarse_devdax.cpp @@ -31,15 +31,13 @@ devdax_params_unique_handle_t create_devdax_params() { &umfDevDaxMemoryProviderParamsDestroy); } -auto coarseParams = umfCoarseMemoryProviderParamsDefault(); auto devdaxParams = create_devdax_params(); static std::vector poolParamsList = - devdaxParams.get() - ? std::vector{poolCreateExtParams{ - umfScalablePoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - devdaxParams.get(), &coarseParams}} - : std::vector{}; + devdaxParams.get() ? std::vector{poolCreateExtParams{ + umfScalablePoolOps(), nullptr, + umfDevDaxMemoryProviderOps(), devdaxParams.get()}} + : std::vector{}; INSTANTIATE_TEST_SUITE_P(scalableCoarseDevDaxTest, umfPoolTest, ::testing::ValuesIn(poolParamsList)); diff --git a/test/pools/scalable_coarse_file.cpp b/test/pools/scalable_coarse_file.cpp index b45c112be..30134f5eb 100644 --- a/test/pools/scalable_coarse_file.cpp +++ b/test/pools/scalable_coarse_file.cpp @@ -23,11 +23,9 @@ file_params_unique_handle_t get_file_params_default(char *path) { &umfFileMemoryProviderParamsDestroy); } -auto coarseParams = umfCoarseMemoryProviderParamsDefault(); file_params_unique_handle_t fileParams = get_file_params_default(FILE_PATH); INSTANTIATE_TEST_SUITE_P(scalableCoarseFileTest, umfPoolTest, ::testing::Values(poolCreateExtParams{ umfScalablePoolOps(), nullptr, - umfFileMemoryProviderOps(), fileParams.get(), - &coarseParams})); + umfFileMemoryProviderOps(), fileParams.get()})); diff --git a/test/pools/scalable_pool.cpp b/test/pools/scalable_pool.cpp index 3edacd965..51cc02030 100644 --- a/test/pools/scalable_pool.cpp +++ b/test/pools/scalable_pool.cpp @@ -27,8 +27,7 @@ auto defaultParams = createOsMemoryProviderParams(); INSTANTIATE_TEST_SUITE_P(scalablePoolTest, umfPoolTest, ::testing::Values(poolCreateExtParams{ umfScalablePoolOps(), nullptr, - umfOsMemoryProviderOps(), defaultParams.get(), - nullptr})); + umfOsMemoryProviderOps(), defaultParams.get()})); using scalablePoolParams = std::tuple; struct umfScalablePoolParamsTest diff --git a/test/provider_coarse.cpp b/test/provider_coarse.cpp deleted file mode 100644 index c2de4c06a..000000000 --- a/test/provider_coarse.cpp +++ /dev/null @@ -1,668 +0,0 @@ -/* - * Copyright (C) 2023-2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ - -#include - -#include "provider.hpp" - -#include - -using umf_test::KB; -using umf_test::MB; -using umf_test::test; - -#define GetStats umfCoarseMemoryProviderGetStats - -#define UPSTREAM_NAME "umf_ba_global" -#define BASE_NAME "coarse" -#define COARSE_NAME BASE_NAME " (" UPSTREAM_NAME ")" - -umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = - umf::providerMakeCOps(); - -struct CoarseWithMemoryStrategyTest - : umf_test::test, - ::testing::WithParamInterface { - void SetUp() override { - test::SetUp(); - allocation_strategy = this->GetParam(); - } - - coarse_memory_provider_strategy_t allocation_strategy; -}; - -INSTANTIATE_TEST_SUITE_P( - CoarseWithMemoryStrategyTest, CoarseWithMemoryStrategyTest, - ::testing::Values(UMF_COARSE_MEMORY_STRATEGY_FASTEST, - UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, - UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE)); - -TEST_F(test, coarseProvider_name_upstream) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.destroy_upstream_memory_provider = true; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = nullptr; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - size_t minPageSize = 0; - umf_result = umfMemoryProviderGetMinPageSize(coarse_memory_provider, - nullptr, &minPageSize); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); - ASSERT_EQ(minPageSize, 0); - - size_t pageSize = 0; - umf_result = umfMemoryProviderGetRecommendedPageSize( - coarse_memory_provider, minPageSize, &pageSize); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); - ASSERT_EQ(pageSize, minPageSize); - - ASSERT_EQ( - strcmp(umfMemoryProviderGetName(coarse_memory_provider), COARSE_NAME), - 0); - - umfMemoryProviderDestroy(coarse_memory_provider); - // malloc_memory_provider has already been destroyed - // by umfMemoryProviderDestroy(coarse_memory_provider), because: - // coarse_memory_provider_params.destroy_upstream_memory_provider = true; -} - -TEST_F(test, coarseProvider_name_no_upstream) { - umf_result_t umf_result; - - const size_t init_buffer_size = 20 * MB; - - // preallocate some memory and initialize the vector with zeros - std::vector buffer(init_buffer_size, 0); - void *buf = (void *)buffer.data(); - ASSERT_NE(buf, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.upstream_memory_provider = nullptr; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - size_t minPageSize = 0; - umf_result = umfMemoryProviderGetMinPageSize(coarse_memory_provider, - nullptr, &minPageSize); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_GT(minPageSize, 0); - - size_t pageSize = 0; - umf_result = umfMemoryProviderGetRecommendedPageSize( - coarse_memory_provider, minPageSize, &pageSize); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_GE(pageSize, minPageSize); - - ASSERT_EQ( - strcmp(umfMemoryProviderGetName(coarse_memory_provider), BASE_NAME), 0); - - umfMemoryProviderDestroy(coarse_memory_provider); -} - -// negative tests - -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_null_stats) { - ASSERT_EQ(GetStats(nullptr).alloc_size, 0); - ASSERT_EQ(GetStats(nullptr).used_size, 0); - ASSERT_EQ(GetStats(nullptr).num_upstream_blocks, 0); - ASSERT_EQ(GetStats(nullptr).num_all_blocks, 0); - ASSERT_EQ(GetStats(nullptr).num_free_blocks, 0); -} - -// wrong NULL parameters -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_NULL_params) { - umf_result_t umf_result; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), nullptr, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); -} - -// wrong parameters: given no upstream_memory_provider -// nor init_buffer while exactly one of them must be set -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_0) { - umf_result_t umf_result; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = nullptr; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = nullptr; - coarse_memory_provider_params.init_buffer_size = 0; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); -} - -// wrong parameters: given both an upstream_memory_provider -// and an init_buffer while only one of them is allowed -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_1) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - // preallocate some memory and initialize the vector with zeros - std::vector buffer(init_buffer_size, 0); - void *buf = (void *)buffer.data(); - ASSERT_NE(buf, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); - - umfMemoryProviderDestroy(malloc_memory_provider); -} - -// wrong parameters: init_buffer_size must not equal 0 when immediate_init_from_upstream is true -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_2) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = nullptr; - coarse_memory_provider_params.init_buffer_size = 0; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); - - umfMemoryProviderDestroy(malloc_memory_provider); -} - -// wrong parameters: init_buffer_size must not equal 0 when init_buffer is not NULL -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_3) { - umf_result_t umf_result; - - const size_t init_buffer_size = 20 * MB; - - // preallocate some memory and initialize the vector with zeros - std::vector buffer(init_buffer_size, 0); - void *buf = (void *)buffer.data(); - ASSERT_NE(buf, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = nullptr; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = 0; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); -} - -// wrong parameters: init_buffer_size must equal 0 when init_buffer is NULL and immediate_init_from_upstream is false -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_4) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = 20 * MB; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); - - umfMemoryProviderDestroy(malloc_memory_provider); -} - -// wrong parameters: destroy_upstream_memory_provider is true, but an upstream provider is not provided -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_wrong_params_5) { - umf_result_t umf_result; - - const size_t init_buffer_size = 20 * MB; - - // preallocate some memory and initialize the vector with zeros - std::vector buffer(init_buffer_size, 0); - void *buf = (void *)buffer.data(); - ASSERT_NE(buf, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = nullptr; - coarse_memory_provider_params.destroy_upstream_memory_provider = true; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - ASSERT_EQ(coarse_memory_provider, nullptr); -} - -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_split_merge) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - umf_memory_provider_handle_t cp = coarse_memory_provider; - char *ptr = nullptr; - - ASSERT_EQ(GetStats(cp).used_size, 0 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 1); - - /* test umfMemoryProviderAllocationSplit */ - umf_result = umfMemoryProviderAlloc(cp, 2 * MB, 0, (void **)&ptr); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(ptr, nullptr); - ASSERT_EQ(GetStats(cp).used_size, 2 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 2); - - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 2 * MB, 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 2 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 3); - - umf_result = umfMemoryProviderFree(cp, (ptr + 1 * MB), 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 1 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 2); - - umf_result = umfMemoryProviderFree(cp, ptr, 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 0); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 1); - - /* test umfMemoryProviderAllocationMerge */ - umf_result = umfMemoryProviderAlloc(cp, 2 * MB, 0, (void **)&ptr); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(ptr, nullptr); - ASSERT_EQ(GetStats(cp).used_size, 2 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 2); - - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 2 * MB, 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 2 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 3); - - umf_result = - umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 1 * MB), 2 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 2 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 2); - - umf_result = umfMemoryProviderFree(cp, ptr, 2 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 0); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 1); - - umfMemoryProviderDestroy(coarse_memory_provider); - umfMemoryProviderDestroy(malloc_memory_provider); -} - -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_split_merge_negative) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - umf_memory_provider_handle_t cp = coarse_memory_provider; - char *ptr = nullptr; - - ASSERT_EQ(GetStats(cp).used_size, 0 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 1); - - /* test umfMemoryProviderAllocationSplit */ - umf_result = umfMemoryProviderAlloc(cp, 6 * MB, 0, (void **)&ptr); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(ptr, nullptr); - ASSERT_EQ(GetStats(cp).used_size, 6 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 2); - - // firstSize >= totalSize - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 6 * MB, 6 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // firstSize == 0 - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 6 * MB, 0); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // wrong totalSize - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 5 * MB, 1 * KB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - /* test umfMemoryProviderAllocationMerge */ - // split (6 * MB) block into (1 * MB) + (5 * MB) - umf_result = umfMemoryProviderAllocationSplit(cp, ptr, 6 * MB, 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 6 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 3); - - // split (5 * MB) block into (2 * MB) + (3 * MB) - umf_result = - umfMemoryProviderAllocationSplit(cp, (ptr + 1 * MB), 5 * MB, 2 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 6 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 4); - - // now we have 3 blocks: (1 * MB) + (2 * MB) + (3 * MB) - - // highPtr <= lowPtr - umf_result = - umfMemoryProviderAllocationMerge(cp, (ptr + 1 * MB), ptr, 2 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // highPtr - lowPtr >= totalSize - umf_result = - umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 1 * MB), 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // low_block->size + high_block->size != totalSize - umf_result = - umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 1 * MB), 5 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // not adjacent blocks - umf_result = - umfMemoryProviderAllocationMerge(cp, ptr, (ptr + 3 * MB), 4 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - umf_result = umfMemoryProviderFree(cp, ptr, 1 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 5 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 4); - - umf_result = umfMemoryProviderFree(cp, (ptr + 1 * MB), 2 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 3 * MB); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 3); - - umf_result = umfMemoryProviderFree(cp, (ptr + 3 * MB), 3 * MB); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_EQ(GetStats(cp).used_size, 0); - ASSERT_EQ(GetStats(cp).alloc_size, init_buffer_size); - ASSERT_EQ(GetStats(cp).num_all_blocks, 1); - - umfMemoryProviderDestroy(coarse_memory_provider); - umfMemoryProviderDestroy(malloc_memory_provider); -} - -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_purge_no_upstream) { - umf_result_t umf_result; - - const size_t init_buffer_size = 20 * MB; - - // preallocate some memory and initialize the vector with zeros - std::vector buffer(init_buffer_size, 0); - void *buf = (void *)buffer.data(); - ASSERT_NE(buf, nullptr); - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.allocation_strategy = allocation_strategy; - coarse_memory_provider_params.upstream_memory_provider = nullptr; - coarse_memory_provider_params.immediate_init_from_upstream = false; - coarse_memory_provider_params.init_buffer = buf; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider = nullptr; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - // umfMemoryProviderPurgeLazy - // provider == NULL - umf_result = umfMemoryProviderPurgeLazy(nullptr, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // ptr == NULL - umf_result = umfMemoryProviderPurgeLazy(coarse_memory_provider, nullptr, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // no upstream_memory_provider - umf_result = - umfMemoryProviderPurgeLazy(coarse_memory_provider, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); - - // umfMemoryProviderPurgeForce - // provider == NULL - umf_result = umfMemoryProviderPurgeForce(nullptr, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // ptr == NULL - umf_result = - umfMemoryProviderPurgeForce(coarse_memory_provider, nullptr, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // no upstream_memory_provider - umf_result = - umfMemoryProviderPurgeForce(coarse_memory_provider, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); - - umfMemoryProviderDestroy(coarse_memory_provider); -} - -TEST_P(CoarseWithMemoryStrategyTest, coarseProvider_purge_with_upstream) { - umf_memory_provider_handle_t malloc_memory_provider; - umf_result_t umf_result; - - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(malloc_memory_provider, nullptr); - - const size_t init_buffer_size = 20 * MB; - - coarse_memory_provider_params_t coarse_memory_provider_params; - // make sure there are no undefined members - prevent a UB - memset(&coarse_memory_provider_params, 0, - sizeof(coarse_memory_provider_params)); - coarse_memory_provider_params.upstream_memory_provider = - malloc_memory_provider; - coarse_memory_provider_params.immediate_init_from_upstream = true; - coarse_memory_provider_params.init_buffer = NULL; - coarse_memory_provider_params.init_buffer_size = init_buffer_size; - - umf_memory_provider_handle_t coarse_memory_provider; - umf_result = umfMemoryProviderCreate(umfCoarseMemoryProviderOps(), - &coarse_memory_provider_params, - &coarse_memory_provider); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - ASSERT_NE(coarse_memory_provider, nullptr); - - // umfMemoryProviderPurgeLazy - // provider == NULL - umf_result = umfMemoryProviderPurgeLazy(nullptr, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // ptr == NULL - umf_result = umfMemoryProviderPurgeLazy(coarse_memory_provider, nullptr, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // malloc_memory_provider returns UMF_RESULT_ERROR_UNKNOWN - umf_result = - umfMemoryProviderPurgeLazy(coarse_memory_provider, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); - - // umfMemoryProviderPurgeForce - // provider == NULL - umf_result = umfMemoryProviderPurgeForce(nullptr, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // ptr == NULL - umf_result = - umfMemoryProviderPurgeForce(coarse_memory_provider, nullptr, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - // malloc_memory_provider returns UMF_RESULT_ERROR_UNKNOWN - umf_result = - umfMemoryProviderPurgeForce(coarse_memory_provider, (void *)0x01, 1); - ASSERT_EQ(umf_result, UMF_RESULT_ERROR_UNKNOWN); - - umfMemoryProviderDestroy(coarse_memory_provider); - umfMemoryProviderDestroy(malloc_memory_provider); -} From 450c5a96d7cc52e29d7c411b0e09b27df0f879f8 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 5 Dec 2024 11:55:03 +0100 Subject: [PATCH 026/466] Replace all NULL with nullptr in disjointCoarseMallocPool.cpp Signed-off-by: Lukasz Dorau --- test/disjointCoarseMallocPool.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/test/disjointCoarseMallocPool.cpp b/test/disjointCoarseMallocPool.cpp index 45502b192..383487a87 100644 --- a/test/disjointCoarseMallocPool.cpp +++ b/test/disjointCoarseMallocPool.cpp @@ -43,8 +43,8 @@ TEST_P(FileWithMemoryStrategyTest, disjointFileMallocPool_simple1) { umf_memory_provider_handle_t malloc_memory_provider = nullptr; umf_result_t umf_result; - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, + nullptr, &malloc_memory_provider); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(malloc_memory_provider, nullptr); @@ -62,7 +62,7 @@ TEST_P(FileWithMemoryStrategyTest, disjointFileMallocPool_simple1) { umf_result = umfFileMemoryProviderParamsDestroy(file_params); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; + umf_disjoint_pool_params_handle_t disjoint_pool_params = nullptr; umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(disjoint_pool_params, nullptr); @@ -89,7 +89,7 @@ TEST_P(FileWithMemoryStrategyTest, disjointFileMallocPool_simple1) { umf_result = umfDisjointPoolParamsDestroy(disjoint_pool_params); - umf_memory_provider_handle_t prov = NULL; + umf_memory_provider_handle_t prov = nullptr; umfPoolGetMemoryProvider(pool, &prov); ASSERT_NE(prov, nullptr); @@ -138,8 +138,8 @@ TEST_P(FileWithMemoryStrategyTest, disjointFileMallocPool_simple2) { umf_memory_provider_handle_t malloc_memory_provider = nullptr; umf_result_t umf_result; - umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, - &malloc_memory_provider); + umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, + nullptr, &malloc_memory_provider); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(malloc_memory_provider, nullptr); @@ -157,7 +157,7 @@ TEST_P(FileWithMemoryStrategyTest, disjointFileMallocPool_simple2) { umf_result = umfFileMemoryProviderParamsDestroy(file_params); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; + umf_disjoint_pool_params_handle_t disjoint_pool_params = nullptr; umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(disjoint_pool_params, nullptr); @@ -245,7 +245,7 @@ TEST_P(FileWithMemoryStrategyTest, disjointFileMMapPool_random) { umf_result = umfFileMemoryProviderParamsDestroy(file_params); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - umf_disjoint_pool_params_handle_t disjoint_pool_params = NULL; + umf_disjoint_pool_params_handle_t disjoint_pool_params = nullptr; umf_result = umfDisjointPoolParamsCreate(&disjoint_pool_params); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(disjoint_pool_params, nullptr); From 7fec989edd71dd7cc2a4dc26bccd3d12423e0daf Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 5 Dec 2024 15:55:48 +0100 Subject: [PATCH 027/466] Rename disjointCoarseMallocPool test to disjointPoolFileProv Signed-off-by: Lukasz Dorau --- test/CMakeLists.txt | 5 +++-- ...disjointCoarseMallocPool.cpp => disjointPoolFileProv.cpp} | 0 2 files changed, 3 insertions(+), 2 deletions(-) rename test/{disjointCoarseMallocPool.cpp => disjointPoolFileProv.cpp} (100%) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 593268a52..d5a07bfbb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -214,9 +214,10 @@ if(UMF_BUILD_LIBUMF_POOL_DISJOINT) SRCS c_api/disjoint_pool.c LIBS disjoint_pool) if(LINUX AND (NOT UMF_DISABLE_HWLOC)) + # this test uses the file provider add_umf_test( - NAME disjointCoarseMallocPool - SRCS disjointCoarseMallocPool.cpp + NAME disjointPoolFileProv + SRCS disjointPoolFileProv.cpp LIBS disjoint_pool) endif() endif() diff --git a/test/disjointCoarseMallocPool.cpp b/test/disjointPoolFileProv.cpp similarity index 100% rename from test/disjointCoarseMallocPool.cpp rename to test/disjointPoolFileProv.cpp From 86d23413c3374d3020725811422baca770f8962c Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Wed, 4 Dec 2024 21:52:56 +0000 Subject: [PATCH 028/466] Implement umfPool[Set/Get]Tag Implements https://github.com/oneapi-src/unified-memory-framework/issues/687 --- include/umf/memory_pool.h | 16 ++++++ src/libumf.def | 2 + src/libumf.map | 2 + src/memory_pool.c | 32 +++++++++++ src/memory_pool_internal.h | 4 ++ test/memoryPoolAPI.cpp | 115 +++++++++++++++++++++++++++++++++++++ 6 files changed, 171 insertions(+) diff --git a/include/umf/memory_pool.h b/include/umf/memory_pool.h index a93d400f9..de045acf4 100644 --- a/include/umf/memory_pool.h +++ b/include/umf/memory_pool.h @@ -170,6 +170,22 @@ umf_memory_pool_handle_t umfPoolByPtr(const void *ptr); umf_result_t umfPoolGetMemoryProvider(umf_memory_pool_handle_t hPool, umf_memory_provider_handle_t *hProvider); +/// +/// @brief Set a custom tag on the memory pool that can be later retrieved using umfPoolGetTag. +/// @param hPool specified memory pool +/// @param tag tag to be set +/// @param oldTag [out][optional] previous tag set on the memory pool +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfPoolSetTag(umf_memory_pool_handle_t hPool, void *tag, + void **oldTag); + +/// +/// @brief Retrieve the tag associated with the memory pool or NULL if no tag is set. +/// @param hPool specified memory pool +/// @param tag [out] tag associated with the memory pool +/// @return UMF_RESULT_SUCCESS on success. +umf_result_t umfPoolGetTag(umf_memory_pool_handle_t hPool, void **tag); + #ifdef __cplusplus } #endif diff --git a/src/libumf.def b/src/libumf.def index 0b4588bb8..f0f38ee15 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -104,10 +104,12 @@ EXPORTS umfPoolFree umfPoolGetIPCHandleSize umfPoolGetLastAllocationError + umfPoolGetTag umfPoolGetMemoryProvider umfPoolMalloc umfPoolMallocUsableSize umfPoolRealloc + umfPoolSetTag umfProxyPoolOps umfPutIPCHandle umfScalablePoolOps diff --git a/src/libumf.map b/src/libumf.map index 41467bad5..fd1d48d34 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -99,9 +99,11 @@ UMF_1.0 { umfPoolGetIPCHandleSize; umfPoolGetLastAllocationError; umfPoolGetMemoryProvider; + umfPoolGetTag; umfPoolMalloc; umfPoolMallocUsableSize; umfPoolRealloc; + umfPoolSetTag; umfProxyPoolOps; umfPutIPCHandle; umfScalablePoolOps; diff --git a/src/memory_pool.c b/src/memory_pool.c index 4a85955ef..f4289c215 100644 --- a/src/memory_pool.c +++ b/src/memory_pool.c @@ -55,6 +55,13 @@ static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, pool->flags = flags; pool->ops = *ops; + pool->tag = NULL; + + if (NULL == utils_mutex_init(&pool->lock)) { + LOG_ERR("Failed to initialize mutex for pool"); + ret = UMF_RESULT_ERROR_UNKNOWN; + goto err_lock_init; + } ret = ops->initialize(pool->provider, params, &pool->pool_priv); if (ret != UMF_RESULT_SUCCESS) { @@ -66,6 +73,8 @@ static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, return UMF_RESULT_SUCCESS; err_pool_init: + utils_mutex_destroy_not_free(&pool->lock); +err_lock_init: if (!(flags & UMF_POOL_CREATE_FLAG_DISABLE_TRACKING)) { umfMemoryProviderDestroy(pool->provider); } @@ -90,6 +99,8 @@ void umfPoolDestroy(umf_memory_pool_handle_t hPool) { umfMemoryProviderDestroy(hUpstreamProvider); } + utils_mutex_destroy_not_free(&hPool->lock); + LOG_INFO("Memory pool destroyed: %p", (void *)hPool); // TODO: this free keeps memory in base allocator, so it can lead to OOM in some scenarios (it should be optimized) @@ -175,3 +186,24 @@ umf_result_t umfPoolGetLastAllocationError(umf_memory_pool_handle_t hPool) { UMF_CHECK((hPool != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); return hPool->ops.get_last_allocation_error(hPool->pool_priv); } + +umf_result_t umfPoolSetTag(umf_memory_pool_handle_t hPool, void *tag, + void **oldTag) { + UMF_CHECK((hPool != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + utils_mutex_lock(&hPool->lock); + if (oldTag) { + *oldTag = hPool->tag; + } + hPool->tag = tag; + utils_mutex_unlock(&hPool->lock); + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfPoolGetTag(umf_memory_pool_handle_t hPool, void **tag) { + UMF_CHECK((hPool != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + UMF_CHECK((tag != NULL), UMF_RESULT_ERROR_INVALID_ARGUMENT); + utils_mutex_lock(&hPool->lock); + *tag = hPool->tag; + utils_mutex_unlock(&hPool->lock); + return UMF_RESULT_SUCCESS; +} diff --git a/src/memory_pool_internal.h b/src/memory_pool_internal.h index 90f2f1629..e556ace21 100644 --- a/src/memory_pool_internal.h +++ b/src/memory_pool_internal.h @@ -22,6 +22,7 @@ extern "C" { #endif #include "base_alloc.h" +#include "utils_concurrency.h" typedef struct umf_memory_pool_t { void *pool_priv; @@ -30,6 +31,9 @@ typedef struct umf_memory_pool_t { // Memory provider used by the pool. umf_memory_provider_handle_t provider; + + utils_mutex_t lock; + void *tag; } umf_memory_pool_t; #ifdef __cplusplus diff --git a/test/memoryPoolAPI.cpp b/test/memoryPoolAPI.cpp index 1c6d83f2a..ec137b549 100644 --- a/test/memoryPoolAPI.cpp +++ b/test/memoryPoolAPI.cpp @@ -178,6 +178,121 @@ TEST_F(test, BasicPoolByPtrTest) { ASSERT_EQ(ret, UMF_RESULT_SUCCESS); } +struct tagTest : umf_test::test { + void SetUp() override { + test::SetUp(); + provider = umf_test::wrapProviderUnique(nullProviderCreate()); + pool = umf_test::wrapPoolUnique( + createPoolChecked(umfProxyPoolOps(), provider.get(), nullptr)); + } + + umf::provider_unique_handle_t provider; + umf::pool_unique_handle_t pool; +}; + +TEST_F(tagTest, SetAndGet) { + umf_result_t ret = umfPoolSetTag(pool.get(), (void *)0x99, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + void *tag; + ret = umfPoolGetTag(pool.get(), &tag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(tag, (void *)0x99); + + void *oldTag; + ret = umfPoolSetTag(pool.get(), (void *)0x100, &oldTag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(oldTag, (void *)0x99); + + ret = umfPoolGetTag(pool.get(), &tag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(tag, (void *)0x100); +} + +TEST_F(tagTest, SetAndGetNull) { + umf_result_t ret = umfPoolSetTag(pool.get(), nullptr, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + void *tag; + ret = umfPoolGetTag(pool.get(), &tag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(tag, nullptr); +} + +TEST_F(tagTest, NoSetAndGet) { + void *tag; + umf_result_t ret = umfPoolGetTag(pool.get(), &tag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(tag, nullptr); +} + +TEST_F(tagTest, SetAndGetMt) { + static constexpr size_t NUM_THREADS = 8; + static constexpr size_t NUM_OPS_PER_THREAD = 16; + + std::vector threads; + + auto encodeTag = [](size_t thread, size_t op) -> void * { + return reinterpret_cast(thread * NUM_OPS_PER_THREAD + op); + }; + + auto decodeTag = [](void *tag) -> std::pair { + auto op = reinterpret_cast(tag) & (NUM_OPS_PER_THREAD - 1); + auto thread = reinterpret_cast(tag) / NUM_OPS_PER_THREAD; + return {thread, op}; + }; + + for (size_t i = 0; i < NUM_THREADS; i++) { + threads.emplace_back([this, i, encodeTag, decodeTag] { + for (size_t j = 0; j < NUM_OPS_PER_THREAD; j++) { + void *oldTag; + umf_result_t ret = + umfPoolSetTag(pool.get(), encodeTag(i, j), &oldTag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + void *queriedTag; + ret = umfPoolGetTag(pool.get(), &queriedTag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + auto [t1, op1] = decodeTag(oldTag); + auto [t2, op2] = decodeTag(queriedTag); + // if the tag was set by the same thread, the op part should be the same or higher + ASSERT_TRUE(t1 != t2 || op2 >= op1); + } + }); + } + + for (auto &thread : threads) { + thread.join(); + } + + void *tag; + auto ret = umfPoolGetTag(pool.get(), &tag); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + auto [t, op] = decodeTag(tag); + ASSERT_TRUE(t < NUM_THREADS); + ASSERT_TRUE(op == NUM_OPS_PER_THREAD - 1); +} + +TEST_F(tagTest, SetAndGetInvalidPtr) { + umf_result_t ret = umfPoolSetTag(pool.get(), nullptr, nullptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + ret = umfPoolGetTag(pool.get(), nullptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_F(tagTest, SetAndGetInvalidPool) { + umf_result_t ret = + umfPoolSetTag(nullptr, reinterpret_cast(0x1), nullptr); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + void *tag; + ret = umfPoolGetTag(nullptr, &tag); + ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + INSTANTIATE_TEST_SUITE_P( mallocPoolTest, umfPoolTest, ::testing::Values(poolCreateExtParams{&MALLOC_POOL_OPS, nullptr, From 4394ed70aae99c35dabaaf201c679696783d0739 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 6 Dec 2024 08:09:32 +0100 Subject: [PATCH 029/466] Remove the disable_provider_free parameter of jemalloc pool Remove the disable_provider_free parameter of jemalloc pool and all umfJemallocPoolParams*() API. Fixes: #904 Signed-off-by: Lukasz Dorau --- README.md | 12 --- examples/dram_and_fsdax/dram_and_fsdax.c | 31 +----- include/umf/pools/pool_jemalloc.h | 27 ----- src/libumf.def | 3 - src/libumf.map | 3 - src/pool/pool_jemalloc.c | 96 +----------------- test/pools/jemalloc_pool.cpp | 119 ----------------------- 7 files changed, 4 insertions(+), 287 deletions(-) diff --git a/README.md b/README.md index 81a82bfab..0c569c1b3 100644 --- a/README.md +++ b/README.md @@ -209,12 +209,6 @@ Additionally, required for tests: A memory provider that provides memory from a device DAX (a character device file /dev/daxX.Y). It can be used when large memory mappings are needed. -The DevDax memory provider does not support the free operation -(`umfMemoryProviderFree()` always returns `UMF_RESULT_ERROR_NOT_SUPPORTED`), -so it should be used with a pool manager that will take over -the managing of the provided memory - for example the jemalloc pool -with the `disable_provider_free` parameter set to true. - ##### Requirements 1) Linux OS @@ -224,12 +218,6 @@ with the `disable_provider_free` parameter set to true. A memory provider that provides memory by mapping a regular, extendable file. -The file memory provider does not support the free operation -(`umfMemoryProviderFree()` always returns `UMF_RESULT_ERROR_NOT_SUPPORTED`), -so it should be used with a pool manager that will take over -the managing of the provided memory - for example the jemalloc pool -with the `disable_provider_free` parameter set to true. - IPC API requires the `UMF_MEM_MAP_SHARED` memory `visibility` mode (`UMF_RESULT_ERROR_INVALID_ARGUMENT` is returned otherwise). diff --git a/examples/dram_and_fsdax/dram_and_fsdax.c b/examples/dram_and_fsdax/dram_and_fsdax.c index 26f451728..970242e10 100644 --- a/examples/dram_and_fsdax/dram_and_fsdax.c +++ b/examples/dram_and_fsdax/dram_and_fsdax.c @@ -78,41 +78,14 @@ static umf_memory_pool_handle_t create_fsdax_pool(const char *path) { } // Create an FSDAX memory pool - // - // The file memory provider does not support the free operation - // (`umfMemoryProviderFree()` always returns `UMF_RESULT_ERROR_NOT_SUPPORTED`), - // so it should be used with a pool manager that will take over - // the managing of the provided memory - for example the jemalloc pool - // with the `disable_provider_free` parameter set to true. - umf_jemalloc_pool_params_handle_t pool_params; - umf_result = umfJemallocPoolParamsCreate(&pool_params); - if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, "Failed to create jemalloc params!\n"); - umfMemoryProviderDestroy(provider_fsdax); - return NULL; - } - umf_result = umfJemallocPoolParamsSetKeepAllMemory(pool_params, true); - if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, "Failed to set KeepAllMemory!\n"); - umfMemoryProviderDestroy(provider_fsdax); - return NULL; - } - - // Create an FSDAX memory pool - umf_result = - umfPoolCreate(umfJemallocPoolOps(), provider_fsdax, pool_params, - UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &pool_fsdax); + umf_result = umfPoolCreate(umfJemallocPoolOps(), provider_fsdax, NULL, + UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &pool_fsdax); if (umf_result != UMF_RESULT_SUCCESS) { fprintf(stderr, "Failed to create an FSDAX memory pool!\n"); umfMemoryProviderDestroy(provider_fsdax); return NULL; } - umf_result = umfJemallocPoolParamsDestroy(pool_params); - if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, "Failed to destroy jemalloc params!\n"); - } - return pool_fsdax; } diff --git a/include/umf/pools/pool_jemalloc.h b/include/umf/pools/pool_jemalloc.h index 0cbecd38f..c30df6509 100644 --- a/include/umf/pools/pool_jemalloc.h +++ b/include/umf/pools/pool_jemalloc.h @@ -14,35 +14,8 @@ extern "C" { #endif -#include #include -struct umf_jemalloc_pool_params_t; - -/// @brief handle to the parameters of the jemalloc pool. -typedef struct umf_jemalloc_pool_params_t *umf_jemalloc_pool_params_handle_t; - -/// @brief Create a struct to store parameters of jemalloc pool. -/// @param hParams [out] handle to the newly created parameters struct. -/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. -umf_result_t -umfJemallocPoolParamsCreate(umf_jemalloc_pool_params_handle_t *hParams); - -/// @brief Destroy parameters struct. -/// @param hParams handle to the parameters of the jemalloc pool. -/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. -umf_result_t -umfJemallocPoolParamsDestroy(umf_jemalloc_pool_params_handle_t hParams); - -/// @brief Set if \p umfMemoryProviderFree() should never be called. -/// @param hParams handle to the parameters of the jemalloc pool. -/// @param keepAllMemory \p true if the jemalloc pool should not call -/// \p umfMemoryProviderFree, \p false otherwise. -/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. -umf_result_t -umfJemallocPoolParamsSetKeepAllMemory(umf_jemalloc_pool_params_handle_t hParams, - bool keepAllMemory); - umf_memory_pool_ops_t *umfJemallocPoolOps(void); #ifdef __cplusplus diff --git a/src/libumf.def b/src/libumf.def index f2b24be6c..3c0b63ce5 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -37,9 +37,6 @@ EXPORTS umfGetIPCHandle umfGetLastFailedMemoryProvider umfJemallocPoolOps - umfJemallocPoolParamsCreate - umfJemallocPoolParamsDestroy - umfJemallocPoolParamsSetKeepAllMemory umfLevelZeroMemoryProviderOps umfLevelZeroMemoryProviderParamsCreate umfLevelZeroMemoryProviderParamsDestroy diff --git a/src/libumf.map b/src/libumf.map index 067ec8838..85a904220 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -31,9 +31,6 @@ UMF_1.0 { umfGetIPCHandle; umfGetLastFailedMemoryProvider; umfJemallocPoolOps; - umfJemallocPoolParamsCreate; - umfJemallocPoolParamsDestroy; - umfJemallocPoolParamsSetKeepAllMemory; umfLevelZeroMemoryProviderOps; umfLevelZeroMemoryProviderParamsCreate; umfLevelZeroMemoryProviderParamsDestroy; diff --git a/src/pool/pool_jemalloc.c b/src/pool/pool_jemalloc.c index 47bc6497f..f88d5ce9d 100644 --- a/src/pool/pool_jemalloc.c +++ b/src/pool/pool_jemalloc.c @@ -24,26 +24,6 @@ umf_memory_pool_ops_t *umfJemallocPoolOps(void) { return NULL; } -umf_result_t -umfJemallocPoolParamsCreate(umf_jemalloc_pool_params_handle_t *hParams) { - (void)hParams; // unused - return UMF_RESULT_ERROR_NOT_SUPPORTED; -} - -umf_result_t -umfJemallocPoolParamsDestroy(umf_jemalloc_pool_params_handle_t hParams) { - (void)hParams; // unused - return UMF_RESULT_ERROR_NOT_SUPPORTED; -} - -umf_result_t -umfJemallocPoolParamsSetKeepAllMemory(umf_jemalloc_pool_params_handle_t hParams, - bool keepAllMemory) { - (void)hParams; // unused - (void)keepAllMemory; // unused - return UMF_RESULT_ERROR_NOT_SUPPORTED; -} - #else #include @@ -53,16 +33,8 @@ umfJemallocPoolParamsSetKeepAllMemory(umf_jemalloc_pool_params_handle_t hParams, typedef struct jemalloc_memory_pool_t { umf_memory_provider_handle_t provider; unsigned int arena_index; // index of jemalloc arena - // set to true if umfMemoryProviderFree() should never be called - bool disable_provider_free; } jemalloc_memory_pool_t; -// Configuration of Jemalloc Pool -typedef struct umf_jemalloc_pool_params_t { - /// Set to true if umfMemoryProviderFree() should never be called. - bool disable_provider_free; -} umf_jemalloc_pool_params_t; - static __TLS umf_result_t TLS_last_allocation_error; static jemalloc_memory_pool_t *pool_by_arena_index[MALLCTL_ARENAS_ALL]; @@ -75,52 +47,6 @@ static jemalloc_memory_pool_t *get_pool_by_arena_index(unsigned arena_ind) { return pool_by_arena_index[arena_ind]; } -umf_result_t -umfJemallocPoolParamsCreate(umf_jemalloc_pool_params_handle_t *hParams) { - if (!hParams) { - LOG_ERR("jemalloc pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_jemalloc_pool_params_t *params_data = - umf_ba_global_alloc(sizeof(*params_data)); - if (!params_data) { - LOG_ERR("cannot allocate memory for jemalloc poolparams"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - params_data->disable_provider_free = false; - - *hParams = (umf_jemalloc_pool_params_handle_t)params_data; - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfJemallocPoolParamsDestroy(umf_jemalloc_pool_params_handle_t hParams) { - if (!hParams) { - LOG_ERR("jemalloc pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_ba_global_free(hParams); - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfJemallocPoolParamsSetKeepAllMemory(umf_jemalloc_pool_params_handle_t hParams, - bool keepAllMemory) { - if (!hParams) { - LOG_ERR("jemalloc pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->disable_provider_free = keepAllMemory; - - return UMF_RESULT_SUCCESS; -} - // arena_extent_alloc - an extent allocation function conforms to the extent_alloc_t type and upon // success returns a pointer to size bytes of mapped memory on behalf of arena arena_ind such that // the extent's base address is a multiple of alignment, as well as setting *zero to indicate @@ -150,9 +76,7 @@ static void *arena_extent_alloc(extent_hooks_t *extent_hooks, void *new_addr, } if (new_addr != NULL && ptr != new_addr) { - if (!pool->disable_provider_free) { - umfMemoryProviderFree(pool->provider, ptr, size); - } + umfMemoryProviderFree(pool->provider, ptr, size); return NULL; } @@ -186,10 +110,6 @@ static void arena_extent_destroy(extent_hooks_t *extent_hooks, void *addr, jemalloc_memory_pool_t *pool = get_pool_by_arena_index(arena_ind); - if (pool->disable_provider_free) { - return; - } - umf_result_t ret; ret = umfMemoryProviderFree(pool->provider, addr, size); if (ret != UMF_RESULT_SUCCESS) { @@ -212,10 +132,6 @@ static bool arena_extent_dalloc(extent_hooks_t *extent_hooks, void *addr, jemalloc_memory_pool_t *pool = get_pool_by_arena_index(arena_ind); - if (pool->disable_provider_free) { - return true; // opt-out from deallocation - } - umf_result_t ret; ret = umfMemoryProviderFree(pool->provider, addr, size); if (ret != UMF_RESULT_SUCCESS) { @@ -466,12 +382,10 @@ static void *op_aligned_alloc(void *pool, size_t size, size_t alignment) { static umf_result_t op_initialize(umf_memory_provider_handle_t provider, void *params, void **out_pool) { + (void)params; // unused assert(provider); assert(out_pool); - umf_jemalloc_pool_params_handle_t je_params = - (umf_jemalloc_pool_params_handle_t)params; - extent_hooks_t *pHooks = &arena_extent_hooks; size_t unsigned_size = sizeof(unsigned); int err; @@ -484,12 +398,6 @@ static umf_result_t op_initialize(umf_memory_provider_handle_t provider, pool->provider = provider; - if (je_params) { - pool->disable_provider_free = je_params->disable_provider_free; - } else { - pool->disable_provider_free = false; - } - unsigned arena_index; err = je_mallctl("arenas.create", (void *)&arena_index, &unsigned_size, NULL, 0); diff --git a/test/pools/jemalloc_pool.cpp b/test/pools/jemalloc_pool.cpp index 4dddbcd32..96c386895 100644 --- a/test/pools/jemalloc_pool.cpp +++ b/test/pools/jemalloc_pool.cpp @@ -62,122 +62,3 @@ TEST_F(test, metadataNotAllocatedUsingProvider) { [pool = pool.get()](void *ptr) { umfPoolFree(pool, ptr); }); } } - -using jemallocPoolParams = bool; -struct umfJemallocPoolParamsTest - : umf_test::test, - ::testing::WithParamInterface { - - struct validation_params_t { - bool keep_all_memory; - }; - - struct provider_validator : public umf_test::provider_ba_global { - using base_provider = umf_test::provider_ba_global; - - umf_result_t initialize(validation_params_t *params) { - EXPECT_NE(params, nullptr); - expected_params = params; - return UMF_RESULT_SUCCESS; - } - umf_result_t free(void *ptr, size_t size) { - EXPECT_EQ(expected_params->keep_all_memory, false); - return base_provider::free(ptr, size); - } - - validation_params_t *expected_params; - }; - - static constexpr umf_memory_provider_ops_t VALIDATOR_PROVIDER_OPS = - umf::providerMakeCOps(); - - umfJemallocPoolParamsTest() : expected_params{false}, params(nullptr) {} - void SetUp() override { - test::SetUp(); - expected_params.keep_all_memory = this->GetParam(); - umf_result_t ret = umfJemallocPoolParamsCreate(¶ms); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ret = umfJemallocPoolParamsSetKeepAllMemory( - params, expected_params.keep_all_memory); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - } - - void TearDown() override { - umfJemallocPoolParamsDestroy(params); - test::TearDown(); - } - - umf::pool_unique_handle_t makePool() { - umf_memory_provider_handle_t hProvider = nullptr; - umf_memory_pool_handle_t hPool = nullptr; - - auto ret = umfMemoryProviderCreate(&VALIDATOR_PROVIDER_OPS, - &expected_params, &hProvider); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - - ret = umfPoolCreate(umfJemallocPoolOps(), hProvider, params, - UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - - return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); - } - - void allocFreeFlow() { - static const size_t ALLOC_SIZE = 128; - static const size_t NUM_ALLOCATIONS = 100; - std::vector ptrs; - - auto pool = makePool(); - ASSERT_NE(pool, nullptr); - - for (size_t i = 0; i < NUM_ALLOCATIONS; ++i) { - auto *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); - ASSERT_NE(ptr, nullptr); - ptrs.push_back(ptr); - } - - for (size_t i = 0; i < NUM_ALLOCATIONS; ++i) { - auto ret = umfPoolFree(pool.get(), ptrs[i]); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - } - - // Now pool can call free during pool destruction - expected_params.keep_all_memory = false; - } - - validation_params_t expected_params; - umf_jemalloc_pool_params_handle_t params; -}; - -TEST_P(umfJemallocPoolParamsTest, allocFree) { allocFreeFlow(); } - -TEST_P(umfJemallocPoolParamsTest, updateParams) { - expected_params.keep_all_memory = !expected_params.keep_all_memory; - umf_result_t ret = umfJemallocPoolParamsSetKeepAllMemory( - params, expected_params.keep_all_memory); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - - allocFreeFlow(); -} - -TEST_P(umfJemallocPoolParamsTest, invalidParams) { - umf_result_t ret = umfJemallocPoolParamsCreate(nullptr); - ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - ret = umfJemallocPoolParamsSetKeepAllMemory(nullptr, true); - ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - ret = umfJemallocPoolParamsSetKeepAllMemory(nullptr, false); - ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); - - ret = umfJemallocPoolParamsDestroy(nullptr); - ASSERT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); -} - -GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfJemallocPoolParamsTest); - -/* TODO: enable this test after the issue #903 is fixed. -(https://github.com/oneapi-src/unified-memory-framework/issues/903) -INSTANTIATE_TEST_SUITE_P(jemallocPoolTest, umfJemallocPoolParamsTest, - testing::Values(false, true)); -*/ From 7d1f1d03043f008dbde7ce99d5f690571c66ff30 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 6 Dec 2024 08:26:41 +0100 Subject: [PATCH 030/466] Ignore return value of `je_mallctl()` in an error handling path Ignore return value of `je_mallctl()` in an error handling path, because we cannot do nothing more with this error. It fixes two Coverity issues. Signed-off-by: Lukasz Dorau --- src/pool/pool_jemalloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pool/pool_jemalloc.c b/src/pool/pool_jemalloc.c index 47bc6497f..673352b18 100644 --- a/src/pool/pool_jemalloc.c +++ b/src/pool/pool_jemalloc.c @@ -504,7 +504,7 @@ static umf_result_t op_initialize(umf_memory_provider_handle_t provider, err = je_mallctl(cmd, NULL, NULL, (void *)&pHooks, sizeof(void *)); if (err) { snprintf(cmd, sizeof(cmd), "arena.%u.destroy", arena_index); - je_mallctl(cmd, NULL, 0, NULL, 0); + (void)je_mallctl(cmd, NULL, 0, NULL, 0); LOG_ERR("Could not setup extent_hooks for newly created arena."); goto err_free_pool; } @@ -528,7 +528,7 @@ static void op_finalize(void *pool) { jemalloc_memory_pool_t *je_pool = (jemalloc_memory_pool_t *)pool; char cmd[64]; snprintf(cmd, sizeof(cmd), "arena.%u.destroy", je_pool->arena_index); - je_mallctl(cmd, NULL, 0, NULL, 0); + (void)je_mallctl(cmd, NULL, 0, NULL, 0); pool_by_arena_index[je_pool->arena_index] = NULL; umf_ba_global_free(je_pool); From 4dbe19c1f400b9539a670e05b2d14693032bf99f Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 6 Dec 2024 08:49:09 +0100 Subject: [PATCH 031/466] Fix building the custom jemalloc The "make install" step is executed always and all UMF tests are re-linked also always now because of an incorrect dependence. This patch fixes that. Signed-off-by: Lukasz Dorau --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 7fcfcbb95..82381f5b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -164,13 +164,13 @@ else() add_custom_command( COMMAND make WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} - OUTPUT ${jemalloc_targ_SOURCE_DIR}/lib/libjemalloc.la + OUTPUT ${jemalloc_targ_SOURCE_DIR}/lib/libjemalloc.a DEPENDS ${jemalloc_targ_SOURCE_DIR}/Makefile) add_custom_command( COMMAND make install WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} OUTPUT ${jemalloc_targ_BINARY_DIR}/lib/libjemalloc.a - DEPENDS ${jemalloc_targ_SOURCE_DIR}/lib/libjemalloc.la) + DEPENDS ${jemalloc_targ_SOURCE_DIR}/lib/libjemalloc.a) add_custom_target(jemalloc_prod DEPENDS ${jemalloc_targ_BINARY_DIR}/lib/libjemalloc.a) From 2fca364ae3d116395d813380c23157837d3e57f6 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 6 Dec 2024 17:11:14 +0100 Subject: [PATCH 032/466] Fix: remove incorrect assert in utils_align_ptr_up_size_down() Remove incorrect assert in utils_align_ptr_up_size_down(). A pointer is aligned, but a size is only adjusted to the new pointer. The size does not have to be aligned. Signed-off-by: Lukasz Dorau --- src/utils/utils_common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/utils/utils_common.c b/src/utils/utils_common.c index bffc9f355..eaf5420fc 100644 --- a/src/utils/utils_common.c +++ b/src/utils/utils_common.c @@ -25,7 +25,6 @@ void utils_align_ptr_up_size_down(void **ptr, size_t *size, size_t alignment) { } ASSERT(IS_ALIGNED(p, alignment)); - ASSERT(IS_ALIGNED(s, alignment)); *ptr = (void *)p; *size = s; From 6f274ec8510b6c153ad87cad94adfc8805d0f0c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Mon, 9 Dec 2024 13:56:43 +0100 Subject: [PATCH 033/466] set fixed iteration count for benchmarks --- benchmark/benchmark.hpp | 5 ++++- benchmark/benchmark_interfaces.hpp | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/benchmark/benchmark.hpp b/benchmark/benchmark.hpp index ead6b39e7..ad9ab7cc8 100644 --- a/benchmark/benchmark.hpp +++ b/benchmark/benchmark.hpp @@ -95,7 +95,8 @@ struct alloc_data { ->ArgNames( \ BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::argsName()) \ ->Name(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::name()) \ - ->MinWarmUpTime(1) + ->Iterations( \ + BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::iterations()) class fixed_alloc_size : public alloc_size_interface { public: @@ -238,6 +239,7 @@ class alloc_benchmark : public benchmark_interface { return res; } static std::string name() { return base::name() + "/alloc"; } + static int64_t iterations() { return 200000; } protected: using base = benchmark_interface; @@ -324,6 +326,7 @@ class multiple_malloc_free_benchmark : public alloc_benchmark { res.insert(res.end(), n.begin(), n.end()); return res; } + static int64_t iterations() { return 2000; } std::default_random_engine generator; distribution dist; }; diff --git a/benchmark/benchmark_interfaces.hpp b/benchmark/benchmark_interfaces.hpp index 868116062..e25c97771 100644 --- a/benchmark/benchmark_interfaces.hpp +++ b/benchmark/benchmark_interfaces.hpp @@ -55,7 +55,7 @@ struct benchmark_interface : public benchmark::Fixture { return res; } static std::string name() { return Allocator::name(); } - + static int64_t iterations() { return 10000; } Size alloc_size; Allocator allocator; }; From 4ff474fdcaf82bc462942d8681884450e3c8d389 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 6 Dec 2024 17:11:14 +0100 Subject: [PATCH 034/466] Fix: remove incorrect assert in utils_align_ptr_up_size_down() Remove incorrect assert in utils_align_ptr_up_size_down(). A pointer is aligned, but a size is only adjusted to the new pointer. The size does not have to be aligned. Signed-off-by: Lukasz Dorau --- src/utils/utils_common.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/utils/utils_common.c b/src/utils/utils_common.c index bffc9f355..eaf5420fc 100644 --- a/src/utils/utils_common.c +++ b/src/utils/utils_common.c @@ -25,7 +25,6 @@ void utils_align_ptr_up_size_down(void **ptr, size_t *size, size_t alignment) { } ASSERT(IS_ALIGNED(p, alignment)); - ASSERT(IS_ALIGNED(s, alignment)); *ptr = (void *)p; *size = s; From 4b09af0ba6b9b55f3b8bcc9fd10d2626825424f4 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 9 Dec 2024 11:25:24 +0100 Subject: [PATCH 035/466] Make coarse_alloc() return aligned address when alignment==0 Make coarse_alloc() always return address aligned to coarse->page_size when alignment==0. Signed-off-by: Lukasz Dorau --- src/coarse/coarse.c | 36 ++++++++++++++++++++++--------- test/coarse_lib.cpp | 52 ++++++++++++++++++++++++++++++++++++--------- 2 files changed, 68 insertions(+), 20 deletions(-) diff --git a/src/coarse/coarse.c b/src/coarse/coarse.c index 729480154..0ce4ded3d 100644 --- a/src/coarse/coarse.c +++ b/src/coarse/coarse.c @@ -744,12 +744,18 @@ static block_t *find_free_block(struct ravl *free_blocks, size_t size, size_t alignment, coarse_strategy_t allocation_strategy) { block_t *block; + size_t new_size = size + alignment; switch (allocation_strategy) { case UMF_COARSE_MEMORY_STRATEGY_FASTEST: // Always allocate a free block of the (size + alignment) size // and later cut out the properly aligned part leaving two remaining parts. - return free_blocks_rm_ge(free_blocks, size + alignment, 0, + if (new_size < size) { + LOG_ERR("arithmetic overflow (size + alignment)"); + return NULL; + } + + return free_blocks_rm_ge(free_blocks, new_size, 0, CHECK_ONLY_THE_FIRST_BLOCK); case UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE: @@ -760,8 +766,13 @@ static block_t *find_free_block(struct ravl *free_blocks, size_t size, return block; } + if (new_size < size) { + LOG_ERR("arithmetic overflow (size + alignment)"); + return NULL; + } + // If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. - return free_blocks_rm_ge(free_blocks, size + alignment, 0, + return free_blocks_rm_ge(free_blocks, new_size, 0, CHECK_ONLY_THE_FIRST_BLOCK); case UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE: @@ -773,9 +784,14 @@ static block_t *find_free_block(struct ravl *free_blocks, size_t size, return block; } + if (new_size < size) { + LOG_ERR("arithmetic overflow (size + alignment)"); + return NULL; + } + // If none of them had the correct alignment, // use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. - return free_blocks_rm_ge(free_blocks, size + alignment, 0, + return free_blocks_rm_ge(free_blocks, new_size, 0, CHECK_ONLY_THE_FIRST_BLOCK); } @@ -1017,17 +1033,17 @@ umf_result_t coarse_alloc(coarse_t *coarse, size_t size, size_t alignment, } // alignment must be a power of two and a multiple or a divider of the page size - if (alignment && - ((alignment & (alignment - 1)) || ((alignment % coarse->page_size) && - (coarse->page_size % alignment)))) { + if (alignment == 0) { + alignment = coarse->page_size; + } else if ((alignment & (alignment - 1)) || + ((alignment % coarse->page_size) && + (coarse->page_size % alignment))) { LOG_ERR("wrong alignment: %zu (not a power of 2 or a multiple or a " "divider of the page size (%zu))", alignment, coarse->page_size); return UMF_RESULT_ERROR_INVALID_ALIGNMENT; - } - - if (IS_NOT_ALIGNED(alignment, coarse->page_size)) { - alignment = ALIGN_UP(alignment, coarse->page_size); + } else if (IS_NOT_ALIGNED(alignment, coarse->page_size)) { + alignment = ALIGN_UP_SAFE(alignment, coarse->page_size); } if (utils_mutex_lock(&coarse->lock) != 0) { diff --git a/test/coarse_lib.cpp b/test/coarse_lib.cpp index 6a3d9637e..c5e30ee8f 100644 --- a/test/coarse_lib.cpp +++ b/test/coarse_lib.cpp @@ -166,9 +166,10 @@ TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_provider) { TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_fixed_memory) { // preallocate some memory and initialize the vector with zeros - const size_t buff_size = 20 * MB; + const size_t buff_size = 20 * MB + coarse_params.page_size; std::vector buffer(buff_size, 0); - void *buf = (void *)buffer.data(); + void *buf = (void *)ALIGN_UP_SAFE((uintptr_t)buffer.data(), + coarse_params.page_size); ASSERT_NE(buf, nullptr); coarse_params.cb.alloc = NULL; @@ -206,9 +207,10 @@ TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_fixed_memory) { TEST_P(CoarseWithMemoryStrategyTest, coarseTest_fixed_memory_various) { // preallocate some memory and initialize the vector with zeros - const size_t buff_size = 20 * MB; + const size_t buff_size = 20 * MB + coarse_params.page_size; std::vector buffer(buff_size, 0); - void *buf = (void *)buffer.data(); + void *buf = (void *)ALIGN_UP_SAFE((uintptr_t)buffer.data(), + coarse_params.page_size); ASSERT_NE(buf, nullptr); coarse_params.cb.alloc = NULL; @@ -627,6 +629,15 @@ TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_free_cb_fails) { } TEST_P(CoarseWithMemoryStrategyTest, coarseTest_split_cb_fails) { + if (coarse_params.allocation_strategy == + UMF_COARSE_MEMORY_STRATEGY_FASTEST) { + // This test is designed for the UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE + // and UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE strategies, + // because the UMF_COARSE_MEMORY_STRATEGY_FASTEST strategy + // looks always for a block of size greater by the page size. + return; + } + umf_memory_provider_handle_t malloc_memory_provider; umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, &malloc_memory_provider); @@ -702,9 +713,10 @@ TEST_P(CoarseWithMemoryStrategyTest, coarseTest_split_cb_fails) { TEST_P(CoarseWithMemoryStrategyTest, coarseTest_merge_cb_fails) { // preallocate some memory and initialize the vector with zeros - const size_t buff_size = 10 * MB; + const size_t buff_size = 10 * MB + coarse_params.page_size; std::vector buffer(buff_size, 0); - void *buf = (void *)buffer.data(); + void *buf = (void *)ALIGN_UP_SAFE((uintptr_t)buffer.data(), + coarse_params.page_size); ASSERT_NE(buf, nullptr); coarse_params.cb.alloc = NULL; @@ -901,6 +913,15 @@ TEST_P(CoarseWithMemoryStrategyTest, coarseTest_provider_alloc_not_set) { } TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic) { + if (coarse_params.allocation_strategy == + UMF_COARSE_MEMORY_STRATEGY_FASTEST) { + // This test is designed for the UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE + // and UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE strategies, + // because the UMF_COARSE_MEMORY_STRATEGY_FASTEST strategy + // looks always for a block of size greater by the page size. + return; + } + umf_memory_provider_handle_t malloc_memory_provider; umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, &malloc_memory_provider); @@ -1065,6 +1086,15 @@ TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic) { } TEST_P(CoarseWithMemoryStrategyTest, coarseTest_simple1) { + if (coarse_params.allocation_strategy == + UMF_COARSE_MEMORY_STRATEGY_FASTEST) { + // This test is designed for the UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE + // and UMF_COARSE_MEMORY_STRATEGY_CHECK_ALL_SIZE strategies, + // because the UMF_COARSE_MEMORY_STRATEGY_FASTEST strategy + // looks always for a block of size greater by the page size. + return; + } + umf_memory_provider_handle_t malloc_memory_provider; umf_result = umfMemoryProviderCreate(&UMF_MALLOC_MEMORY_PROVIDER_OPS, NULL, &malloc_memory_provider); @@ -1106,8 +1136,9 @@ TEST_P(CoarseWithMemoryStrategyTest, coarseTest_simple1) { ASSERT_NE(t[i], nullptr); } - if (max_alloc_size == 0) { - max_alloc_size = coarse_get_stats(ch).alloc_size; + size_t alloc_size = coarse_get_stats(ch).alloc_size; + if (alloc_size > max_alloc_size) { + max_alloc_size = alloc_size; } for (int i = 0; i < nptrs; i++) { @@ -1253,9 +1284,10 @@ TEST_P(CoarseWithMemoryStrategyTest, coarseTest_alignment_provider) { TEST_P(CoarseWithMemoryStrategyTest, coarseTest_alignment_fixed_memory) { // preallocate some memory and initialize the vector with zeros - const size_t alloc_size = 40 * MB; + const size_t alloc_size = 40 * MB + coarse_params.page_size; std::vector buffer(alloc_size, 0); - void *buf = (void *)buffer.data(); + void *buf = (void *)ALIGN_UP_SAFE((uintptr_t)buffer.data(), + coarse_params.page_size); ASSERT_NE(buf, nullptr); coarse_params.cb.alloc = NULL; From d2ecbe9c44da0939a9f97edeabb82e0ff1a31c27 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 9 Dec 2024 12:39:15 +0100 Subject: [PATCH 036/466] Make UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE the default strategy Make UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE the default strategy, because the alignment never equals 0 now. Signed-off-by: Lukasz Dorau --- src/coarse/coarse.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coarse/coarse.h b/src/coarse/coarse.h index cd151ca27..93ec99002 100644 --- a/src/coarse/coarse.h +++ b/src/coarse/coarse.h @@ -34,16 +34,16 @@ typedef struct coarse_callbacks_t { // coarse library allocation strategy typedef enum coarse_strategy_t { + // Check if the first free block of the 'size' size has the correct alignment. + // If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. + UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE = 0, + // Always allocate a free block of the (size + alignment) size // and cut out the properly aligned part leaving two remaining parts. // It is the fastest strategy but causes memory fragmentation // when alignment is greater than 0. // It is the best strategy when alignment always equals 0. - UMF_COARSE_MEMORY_STRATEGY_FASTEST = 0, - - // Check if the first free block of the 'size' size has the correct alignment. - // If not, use the `UMF_COARSE_MEMORY_STRATEGY_FASTEST` strategy. - UMF_COARSE_MEMORY_STRATEGY_FASTEST_BUT_ONE, + UMF_COARSE_MEMORY_STRATEGY_FASTEST, // Look through all free blocks of the 'size' size // and choose the first one with the correct alignment. From dd47ffc2b8ec3eeb68ff4bf6789710aee50e76fe Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 9 Dec 2024 15:53:02 +0100 Subject: [PATCH 037/466] Add tests for utils_align_ptr_up_size_down() Signed-off-by: Lukasz Dorau --- test/utils/utils_linux.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/utils/utils_linux.cpp b/test/utils/utils_linux.cpp index 7aa0a9d83..1815a1a78 100644 --- a/test/utils/utils_linux.cpp +++ b/test/utils/utils_linux.cpp @@ -2,6 +2,7 @@ // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include #include #include "base.hpp" @@ -169,3 +170,19 @@ TEST_F(test, utils_open) { EXPECT_EQ(utils_file_open(NULL), -1); EXPECT_EQ(utils_file_open_or_create(NULL), -1); } + +TEST_F(test, utils_align_ptr_up_size_down) { + uintptr_t ptr = 0x4000; + size_t size = 0x8000; + size_t alignment = 0x4000; + utils_align_ptr_up_size_down((void **)&ptr, &size, alignment); + EXPECT_EQ(ptr, 0x4000); + EXPECT_EQ(size, 0x8000); + + ptr = 0x4001; + size = 0x8000; + alignment = 0x4000; + utils_align_ptr_up_size_down((void **)&ptr, &size, alignment); + EXPECT_EQ(ptr, 0x8000); + EXPECT_EQ(size, 0x4001); +} From 62496ed59ffe9321b1cd46db3b9c81412739607c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Thu, 5 Dec 2024 18:27:01 +0100 Subject: [PATCH 038/466] add extra test for dax provider --- test/provider_devdax_memory.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/test/provider_devdax_memory.cpp b/test/provider_devdax_memory.cpp index afff1de4f..1feaaaaa6 100644 --- a/test/provider_devdax_memory.cpp +++ b/test/provider_devdax_memory.cpp @@ -233,6 +233,13 @@ TEST_P(umfProviderTest, purge_force) { test_alloc_free_success(provider.get(), page_size, 0, PURGE_FORCE); } +TEST_P(umfProviderTest, purge_force_unalligned_alloc) { + void *ptr; + auto ret = umfMemoryProviderAlloc(provider.get(), page_plus_64, 0, &ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + test_alloc_free_success(provider.get(), page_size, 0, PURGE_FORCE); + umfMemoryProviderFree(provider.get(), ptr, page_plus_64); +} // negative tests using test_alloc_failure TEST_P(umfProviderTest, alloc_page64_align_page_minus_1_WRONG_ALIGNMENT_1) { From 3e12e1838bfa02369c69a53036a7ab7a8d9e1cde Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Tue, 10 Dec 2024 10:44:42 +0100 Subject: [PATCH 039/466] Update description of UMF_LINK_HWLOC_STATICALLY It is supported on Linux, MacOS, and Windows with the exception that the proxy library is disabled when this flag is used on Windows with Debug config. --- CMakeLists.txt | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 82381f5b5..1e8ad7268 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,7 +55,7 @@ option( OFF) option( UMF_LINK_HWLOC_STATICALLY - "Link UMF with HWLOC library statically (supported for Linux, MacOS and Release build on Windows)" + "Link UMF with HWLOC library statically (proxy library will be disabled on Windows+Debug build)" OFF) option(UMF_FORMAT_CODE_STYLE "Add clang, cmake, and black -format-check and -format-apply targets" diff --git a/README.md b/README.md index 0c569c1b3..854b25878 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ List of options provided by CMake: | UMF_USE_MSAN | Enable MemorySanitizer checks | ON/OFF | OFF | | UMF_USE_VALGRIND | Enable Valgrind instrumentation | ON/OFF | OFF | | UMF_USE_COVERAGE | Build with coverage enabled (Linux only) | ON/OFF | OFF | -| UMF_LINK_HWLOC_STATICALLY | Link UMF with HWLOC library statically (Windows+Release only) | ON/OFF | OFF | +| UMF_LINK_HWLOC_STATICALLY | Link UMF with HWLOC library statically (proxy library will be disabled on Windows+Debug build) | ON/OFF | OFF | | UMF_DISABLE_HWLOC | Disable features that requires hwloc (OS provider, memory targets, topology discovery) | ON/OFF | OFF | ## Architecture: memory pools and providers From 36d134c145c88cb633bfe9434b27f863d4f9ec39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Tue, 10 Dec 2024 13:23:01 +0100 Subject: [PATCH 040/466] use unique_ptr instead of constructor/destructor in benchmarks On Windows static builds, destructors are invoked after the UMF destructor, causing parameter structures to be unable to be destroyed. By switching to std::unique_ptr, we ensure that parameters are properly cleaned up and the destruction order issue is resolved. --- benchmark/benchmark.cpp | 111 +++++++++++++---------------- benchmark/benchmark.hpp | 8 +++ benchmark/benchmark_interfaces.hpp | 27 +++++-- 3 files changed, 79 insertions(+), 67 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index c10bbda87..655545d1e 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -37,20 +37,27 @@ struct glibc_malloc : public allocator_interface { }; struct os_provider : public provider_interface { - umf_os_memory_provider_params_handle_t params = NULL; - os_provider() { - umfOsMemoryProviderParamsCreate(¶ms); - return; - } - - ~os_provider() { - if (params != NULL) { - umfOsMemoryProviderParamsDestroy(params); + provider_interface::params_ptr + getParams(::benchmark::State &state) override { + umf_os_memory_provider_params_handle_t raw_params = nullptr; + umfOsMemoryProviderParamsCreate(&raw_params); + if (!raw_params) { + state.SkipWithError("Failed to create os provider params"); + return {nullptr, [](void *) {}}; } + + // Use a lambda as the custom deleter + auto deleter = [](void *p) { + auto handle = + static_cast(p); + umfOsMemoryProviderParamsDestroy(handle); + }; + + return {static_cast(raw_params), deleter}; } - void *getParams() override { return params; } - umf_memory_provider_ops_t *getOps() override { + umf_memory_provider_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { return umfOsMemoryProviderOps(); } static std::string name() { return "os_provider"; } @@ -62,73 +69,60 @@ struct proxy_pool : public pool_interface { getOps([[maybe_unused]] ::benchmark::State &state) override { return umfProxyPoolOps(); } - void *getParams([[maybe_unused]] ::benchmark::State &state) override { - return nullptr; - } + static std::string name() { return "proxy_pool<" + Provider::name() + ">"; } }; #ifdef UMF_POOL_DISJOINT_ENABLED template struct disjoint_pool : public pool_interface { - umf_disjoint_pool_params_handle_t disjoint_memory_pool_params; + umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfDisjointPoolOps(); + } - disjoint_pool() { - disjoint_memory_pool_params = NULL; - auto ret = umfDisjointPoolParamsCreate(&disjoint_memory_pool_params); + typename pool_interface::params_ptr + getParams(::benchmark::State &state) override { + umf_disjoint_pool_params_handle_t raw_params = nullptr; + auto ret = umfDisjointPoolParamsCreate(&raw_params); if (ret != UMF_RESULT_SUCCESS) { - return; + state.SkipWithError("Failed to create disjoint pool params"); + return {nullptr, [](void *) {}}; } - // those function should never fail, so error handling is minimal. - ret = umfDisjointPoolParamsSetSlabMinSize(disjoint_memory_pool_params, - 4096); - if (ret != UMF_RESULT_SUCCESS) { - goto err; - } + typename pool_interface::params_ptr params( + raw_params, [](void *p) { + umfDisjointPoolParamsDestroy( + static_cast(p)); + }); - ret = umfDisjointPoolParamsSetCapacity(disjoint_memory_pool_params, 4); + ret = umfDisjointPoolParamsSetSlabMinSize(raw_params, 4096); if (ret != UMF_RESULT_SUCCESS) { - goto err; + state.SkipWithError("Failed to set slab min size"); + return {nullptr, [](void *) {}}; } - ret = umfDisjointPoolParamsSetMinBucketSize(disjoint_memory_pool_params, - 4096); + ret = umfDisjointPoolParamsSetCapacity(raw_params, 4); if (ret != UMF_RESULT_SUCCESS) { - goto err; + state.SkipWithError("Failed to set capacity"); + return {nullptr, [](void *) {}}; } - ret = umfDisjointPoolParamsSetMaxPoolableSize( - disjoint_memory_pool_params, 4096 * 16); - + ret = umfDisjointPoolParamsSetMinBucketSize(raw_params, 4096); if (ret != UMF_RESULT_SUCCESS) { - goto err; + state.SkipWithError("Failed to set min bucket size"); + return {nullptr, [](void *) {}}; } - return; - err: - umfDisjointPoolParamsDestroy(disjoint_memory_pool_params); - disjoint_memory_pool_params = NULL; - } - - ~disjoint_pool() { - if (disjoint_memory_pool_params != NULL) { - umfDisjointPoolParamsDestroy(disjoint_memory_pool_params); + ret = umfDisjointPoolParamsSetMaxPoolableSize(raw_params, 4096 * 16); + if (ret != UMF_RESULT_SUCCESS) { + state.SkipWithError("Failed to set max poolable size"); + return {nullptr, [](void *) {}}; } - } - umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) override { - return umfDisjointPoolOps(); + return params; } - void *getParams([[maybe_unused]] ::benchmark::State &state) override { - if (disjoint_memory_pool_params == NULL) { - state.SkipWithError("Failed to create disjoint pool params"); - } - - return disjoint_memory_pool_params; - } static std::string name() { return "disjoint_pool<" + Provider::name() + ">"; } @@ -142,9 +136,7 @@ struct jemalloc_pool : public pool_interface { getOps([[maybe_unused]] ::benchmark::State &state) override { return umfJemallocPoolOps(); } - void *getParams([[maybe_unused]] ::benchmark::State &state) override { - return NULL; - } + static std::string name() { return "jemalloc_pool<" + Provider::name() + ">"; } @@ -158,10 +150,7 @@ struct scalable_pool : public pool_interface { getOps([[maybe_unused]] ::benchmark::State &state) override { return umfScalablePoolOps(); } - virtual void * - getParams([[maybe_unused]] ::benchmark::State &state) override { - return NULL; - } + static std::string name() { return "scalable_pool<" + Provider::name() + ">"; } diff --git a/benchmark/benchmark.hpp b/benchmark/benchmark.hpp index ad9ab7cc8..6ac7a4dfa 100644 --- a/benchmark/benchmark.hpp +++ b/benchmark/benchmark.hpp @@ -232,12 +232,14 @@ class alloc_benchmark : public benchmark_interface { state.ResumeTiming(); } } + static std::vector argsName() { auto n = benchmark_interface::argsName(); std::vector res = {"max_allocs", "pre_allocs"}; res.insert(res.end(), n.begin(), n.end()); return res; } + static std::string name() { return base::name() + "/alloc"; } static int64_t iterations() { return 200000; } @@ -320,13 +322,16 @@ class multiple_malloc_free_benchmark : public alloc_benchmark { static std::string name() { return base::base::name() + "/multiple_malloc_free"; } + static std::vector argsName() { auto n = benchmark_interface::argsName(); std::vector res = {"max_allocs"}; res.insert(res.end(), n.begin(), n.end()); return res; } + static int64_t iterations() { return 2000; } + std::default_random_engine generator; distribution dist; }; @@ -352,9 +357,11 @@ class provider_allocator : public allocator_interface { } return ptr; } + void benchFree(void *ptr, size_t size) override { umfMemoryProviderFree(provider.provider, ptr, size); } + static std::string name() { return Provider::name(); } private: @@ -374,6 +381,7 @@ template class pool_allocator : public allocator_interface { virtual void *benchAlloc(size_t size) override { return umfPoolMalloc(pool.pool, size); } + virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) override { umfPoolFree(pool.pool, ptr); } diff --git a/benchmark/benchmark_interfaces.hpp b/benchmark/benchmark_interfaces.hpp index e25c97771..516a20b69 100644 --- a/benchmark/benchmark_interfaces.hpp +++ b/benchmark/benchmark_interfaces.hpp @@ -6,6 +6,7 @@ * */ +#include #include #include #include @@ -39,6 +40,7 @@ struct benchmark_interface : public benchmark::Fixture { int argPos = alloc_size.SetUp(state, 0); allocator.SetUp(state, argPos); } + void TearDown(::benchmark::State &state) { alloc_size.TearDown(state); allocator.TearDown(state); @@ -54,6 +56,7 @@ struct benchmark_interface : public benchmark::Fixture { res.insert(res.end(), a.begin(), a.end()); return res; } + static std::string name() { return Allocator::name(); } static int64_t iterations() { return 10000; } Size alloc_size; @@ -61,13 +64,16 @@ struct benchmark_interface : public benchmark::Fixture { }; struct provider_interface { + using params_ptr = std::unique_ptr; + umf_memory_provider_handle_t provider = NULL; virtual void SetUp(::benchmark::State &state) { if (state.thread_index() != 0) { return; } + auto params = getParams(state); auto umf_result = - umfMemoryProviderCreate(getOps(), getParams(), &provider); + umfMemoryProviderCreate(getOps(state), params.get(), &provider); if (umf_result != UMF_RESULT_SUCCESS) { state.SkipWithError("umfMemoryProviderCreate() failed"); } @@ -83,21 +89,30 @@ struct provider_interface { } } - virtual umf_memory_provider_ops_t *getOps() { return nullptr; } - virtual void *getParams() { return nullptr; } + virtual umf_memory_provider_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) { + return nullptr; + } + + virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { + return {nullptr, [](void *) {}}; + } }; template ::value>> struct pool_interface { + using params_ptr = std::unique_ptr; + virtual void SetUp(::benchmark::State &state) { provider.SetUp(state); if (state.thread_index() != 0) { return; } + auto params = getParams(state); auto umf_result = umfPoolCreate(getOps(state), provider.provider, - getParams(state), 0, &pool); + params.get(), 0, &pool); if (umf_result != UMF_RESULT_SUCCESS) { state.SkipWithError("umfPoolCreate() failed"); } @@ -121,8 +136,8 @@ struct pool_interface { getOps([[maybe_unused]] ::benchmark::State &state) { return nullptr; } - virtual void *getParams([[maybe_unused]] ::benchmark::State &state) { - return nullptr; + virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { + return {nullptr, [](void *) {}}; } T provider; umf_memory_pool_handle_t pool; From 397e5c4f55e05b253d05d960c2be199f67942a14 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 12 Dec 2024 11:36:27 +0100 Subject: [PATCH 041/466] Build custom jemalloc with -j$(nproc) Signed-off-by: Lukasz Dorau --- CMakeLists.txt | 7 ++++++- scripts/qemu/run-build.sh | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d0890831..0eea5faf4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -161,8 +161,13 @@ else() WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} OUTPUT ${jemalloc_targ_SOURCE_DIR}/Makefile DEPENDS ${jemalloc_targ_SOURCE_DIR}/configure) + + if(NOT UMF_QEMU_BUILD) + set(MAKE_ARGUMENTS "-j$(nproc)") + endif() + add_custom_command( - COMMAND make + COMMAND make ${MAKE_ARGUMENTS} WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} OUTPUT ${jemalloc_targ_SOURCE_DIR}/lib/libjemalloc.a DEPENDS ${jemalloc_targ_SOURCE_DIR}/Makefile) diff --git a/scripts/qemu/run-build.sh b/scripts/qemu/run-build.sh index b0f4bee1e..c6314153c 100755 --- a/scripts/qemu/run-build.sh +++ b/scripts/qemu/run-build.sh @@ -21,6 +21,7 @@ cd build cmake .. \ -DCMAKE_BUILD_TYPE=Debug \ + -DUMF_QEMU_BUILD=1 \ -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON \ -DUMF_BUILD_CUDA_PROVIDER=ON \ -DUMF_FORMAT_CODE_STYLE=OFF \ From 0c75171f88af8cd0768c1e251bc93ec4cf51c61b Mon Sep 17 00:00:00 2001 From: kluszcze Date: Thu, 12 Dec 2024 13:10:32 +0100 Subject: [PATCH 042/466] fix codespell errors in CODE_OF_CONDUCT and reusable_valgrind.yml Signed-off-by: kluszcze --- .github/workflows/reusable_valgrind.yml | 2 +- CODE_OF_CONDUCT.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/reusable_valgrind.yml b/.github/workflows/reusable_valgrind.yml index 3e0af273a..aba0e3260 100644 --- a/.github/workflows/reusable_valgrind.yml +++ b/.github/workflows/reusable_valgrind.yml @@ -1,4 +1,4 @@ -# Run tests with valgrind intstrumentation tools: memcheck, drd, helgrind +# Run tests with valgrind instrumentation tools: memcheck, drd, helgrind name: Valgrind on: workflow_call diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 58dba18db..2e7fbf7d6 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -5,7 +5,7 @@ We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, +identity and expression, level of experience, education, socioeconomic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. From 0f8b59dcbbb9b52370f40978c20c62d19331c1f0 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 12 Dec 2024 14:00:40 +0100 Subject: [PATCH 043/466] Add strings with UMF version and useful CMake options Add strings with UMF version and useful CMake options that can be grepped in the following way: $ strings libumf.so | grep "@(#)" @(#) Intel(R) UMF version: 0.11.0-dev.git66.g89e3831d @(#) Intel(R) UMF CMake variables: "CMAKE_BUILD_TYPE:Debug,... Signed-off-by: Lukasz Dorau --- CMakeLists.txt | 66 ++++++++++++++++++++++++---------------- CONTRIBUTING.md | 28 ++++++++++++++--- src/CMakeLists.txt | 16 +++++++++- src/utils/utils_log.c | 34 +++++++++++++++------ test/utils/utils_log.cpp | 3 ++ 5 files changed, 105 insertions(+), 42 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5614684bd..f71ce1820 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,36 +33,47 @@ include(CMakePackageConfigHelpers) include(GNUInstallDirs) find_package(PkgConfig) +# Define a list to store the names of all options +set(UMF_OPTIONS_LIST "") +list(APPEND UMF_OPTIONS_LIST CMAKE_BUILD_TYPE) + +# Define a macro to wrap the option() command and track the options +macro(umf_option) + list(APPEND UMF_OPTIONS_LIST ${ARGV0}) + option(${ARGV}) +endmacro() + # Build Options -option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) -option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON) -option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON) -option(UMF_BUILD_LIBUMF_POOL_DISJOINT - "Build the libumf_pool_disjoint static library" OFF) -option(UMF_BUILD_LIBUMF_POOL_JEMALLOC - "Build the libumf_pool_jemalloc static library" OFF) -option(UMF_BUILD_TESTS "Build UMF tests" ON) -option(UMF_BUILD_GPU_TESTS "Build UMF GPU tests" OFF) -option(UMF_BUILD_BENCHMARKS "Build UMF benchmarks" OFF) -option(UMF_BUILD_BENCHMARKS_MT "Build UMF multithreaded benchmarks" OFF) -option(UMF_BUILD_EXAMPLES "Build UMF examples" ON) -option(UMF_BUILD_FUZZTESTS "Build UMF fuzz tests" OFF) -option(UMF_BUILD_GPU_EXAMPLES "Build UMF GPU examples" OFF) -option(UMF_DEVELOPER_MODE "Enable additional developer checks" OFF) -option( +umf_option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) +umf_option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON) +umf_option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON) +umf_option(UMF_BUILD_LIBUMF_POOL_DISJOINT + "Build the libumf_pool_disjoint static library" OFF) +umf_option(UMF_BUILD_LIBUMF_POOL_JEMALLOC + "Build the libumf_pool_jemalloc static library" OFF) +umf_option(UMF_BUILD_TESTS "Build UMF tests" ON) +umf_option(UMF_BUILD_GPU_TESTS "Build UMF GPU tests" OFF) +umf_option(UMF_BUILD_BENCHMARKS "Build UMF benchmarks" OFF) +umf_option(UMF_BUILD_BENCHMARKS_MT "Build UMF multithreaded benchmarks" OFF) +umf_option(UMF_BUILD_EXAMPLES "Build UMF examples" ON) +umf_option(UMF_BUILD_FUZZTESTS "Build UMF fuzz tests" OFF) +umf_option(UMF_BUILD_GPU_EXAMPLES "Build UMF GPU examples" OFF) +umf_option(UMF_DEVELOPER_MODE "Enable additional developer checks" OFF) +umf_option( UMF_DISABLE_HWLOC "Disable hwloc and UMF features requiring it (OS provider, memtargets, topology discovery)" OFF) -option( +umf_option( UMF_LINK_HWLOC_STATICALLY "Link UMF with HWLOC library statically (supported for Linux, MacOS and Release build on Windows)" OFF) -option(UMF_FORMAT_CODE_STYLE - "Add clang, cmake, and black -format-check and -format-apply targets" - OFF) +umf_option( + UMF_FORMAT_CODE_STYLE + "Add clang, cmake, and black -format-check and -format-apply targets" OFF) set(UMF_HWLOC_NAME "hwloc" CACHE STRING "Custom name for hwloc library w/o extension") +list(APPEND UMF_OPTIONS_LIST UMF_HWLOC_NAME) set(UMF_INSTALL_RPATH "" CACHE @@ -71,13 +82,13 @@ set(UMF_INSTALL_RPATH ) # Only a part of skips is treated as a failure now. TODO: extend to all tests -option(UMF_TESTS_FAIL_ON_SKIP "Treat skips in tests as fail" OFF) -option(UMF_USE_ASAN "Enable AddressSanitizer checks" OFF) -option(UMF_USE_UBSAN "Enable UndefinedBehaviorSanitizer checks" OFF) -option(UMF_USE_TSAN "Enable ThreadSanitizer checks" OFF) -option(UMF_USE_MSAN "Enable MemorySanitizer checks" OFF) -option(UMF_USE_VALGRIND "Enable Valgrind instrumentation" OFF) -option(UMF_USE_COVERAGE "Build with coverage enabled (Linux only)" OFF) +umf_option(UMF_TESTS_FAIL_ON_SKIP "Treat skips in tests as fail" OFF) +umf_option(UMF_USE_ASAN "Enable AddressSanitizer checks" OFF) +umf_option(UMF_USE_UBSAN "Enable UndefinedBehaviorSanitizer checks" OFF) +umf_option(UMF_USE_TSAN "Enable ThreadSanitizer checks" OFF) +umf_option(UMF_USE_MSAN "Enable MemorySanitizer checks" OFF) +umf_option(UMF_USE_VALGRIND "Enable Valgrind instrumentation" OFF) +umf_option(UMF_USE_COVERAGE "Build with coverage enabled (Linux only)" OFF) # set UMF_PROXY_LIB_BASED_ON_POOL to one of: SCALABLE or JEMALLOC set(KNOWN_PROXY_LIB_POOLS SCALABLE JEMALLOC) @@ -87,6 +98,7 @@ set(UMF_PROXY_LIB_BASED_ON_POOL "A UMF pool the proxy library is based on (SCALABLE or JEMALLOC)") set_property(CACHE UMF_PROXY_LIB_BASED_ON_POOL PROPERTY STRINGS ${KNOWN_PROXY_LIB_POOLS}) +list(APPEND UMF_OPTIONS_LIST UMF_PROXY_LIB_BASED_ON_POOL) if(UMF_BUILD_TESTS AND DEFINED ENV{CI} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index cd4a2a790..7b9749c49 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,15 +2,19 @@ -- [Opening new issues](#opening-new-issues) -- [Submitting Pull Requests](#submitting-pull-requests) +- [Contributing to UMF (Unified Memory Framework)](#contributing-to-umf-unified-memory-framework) + - [Opening new issues](#opening-new-issues) + - [Submitting Pull Requests](#submitting-pull-requests) - [Building and testing](#building-and-testing) - [Code style](#code-style) - - [When my PR is merged?](#when-my-PR-is-merged) + - [When my PR is merged?](#when-my-pr-is-merged) - [Extending public API](#extending-public-api) - [License](#license) - [Adding new dependency](#adding-new-dependency) -- [Code coverage](#code-coverage) + - [Code coverage](#code-coverage) + - [Debugging](#debugging) + - [Checking the UMF version and CMake variables (Linux only)](#checking-the-umf-version-and-cmake-variables-linux-only) + - [Requirements](#requirements) Below you'll find instructions on how to contribute to UMF, either with code changes or issues. All contributions are most welcome! @@ -222,3 +226,19 @@ $ apt install lcov $ lcov --capture --directory . --output-file coverage.info $ genhtml -o html_report coverage.info ``` + +## Debugging + +### Checking the UMF version and CMake variables (Linux only) + +Strings with the UMF version and useful CMake variables can be grepped in the following way: + +```bash +$ strings libumf.so | grep "@(#)" +@(#) Intel(R) UMF version: 0.11.0-dev.git66.g89e3831d +@(#) Intel(R) UMF CMake variables: "CMAKE_BUILD_TYPE:Debug,... +``` + +#### Requirements + +- binutils package (Linux) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b4736ed0f..57050e827 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,10 +11,24 @@ set(UMF_CUDA_INCLUDE_DIR "" CACHE PATH "Directory containing the CUDA headers") +# Compose the UMF_ALL_CMAKE_VARIABLES variable containing CMake options that +# will be saved in the constant string. +list(SORT UMF_OPTIONS_LIST ORDER DESCENDING) +foreach(_var ${UMF_OPTIONS_LIST}) + # Preprocessor definitions containing '#' cannot be passed on to the + # compiler command line because many compilers do not support it. + if(NOT "${${_var}}" MATCHES "#") + set(UMF_ALL_CMAKE_VARIABLES + "${_var}:${${_var}},${UMF_ALL_CMAKE_VARIABLES}") + endif() +endforeach() + # Compile definitions for UMF library. # # TODO: Cleanup the compile definitions across all the CMake files -set(UMF_COMMON_COMPILE_DEFINITIONS UMF_VERSION=${UMF_VERSION}) +set(UMF_COMMON_COMPILE_DEFINITIONS + UMF_VERSION=${UMF_VERSION} + UMF_ALL_CMAKE_VARIABLES="${UMF_ALL_CMAKE_VARIABLES}") add_subdirectory(utils) diff --git a/src/utils/utils_log.c b/src/utils/utils_log.c index bdb9ce823..2fd28fc2c 100644 --- a/src/utils/utils_log.c +++ b/src/utils/utils_log.c @@ -32,6 +32,29 @@ #include "utils_common.h" #include "utils_log.h" +#define UMF_MAGIC_STR "\x00@(#) " +#define UMF_PREF_STR "Intel(R) " +#define UMF_PREFIX UMF_MAGIC_STR UMF_PREF_STR + +// convert a define to a C string +#define STR_(X) #X +#define STR(X) STR_(X) + +#ifdef UMF_VERSION +#define STR_UMF_VERSION "UMF version: " STR(UMF_VERSION) +#define LOG_STR_UMF_VERSION STR_UMF_VERSION ", " +char const __umf_str_2_version[] = UMF_PREFIX STR_UMF_VERSION; +#else /* !UMF_VERSION */ +#error "UMF_VERSION not defined!" +#endif /* !UMF_VERSION */ + +#ifdef UMF_ALL_CMAKE_VARIABLES +char const __umf_str_1__all_cmake_vars[] = + UMF_PREFIX "UMF CMake variables: " STR(UMF_ALL_CMAKE_VARIABLES); +#else /* !UMF_ALL_CMAKE_VARIABLES */ +#error "UMF_ALL_CMAKE_VARIABLES not defined!" +#endif /* !UMF_ALL_CMAKE_VARIABLES */ + #define LOG_MAX 8192 #define LOG_HEADER 256 #define MAX_FILE_PATH 256 @@ -305,17 +328,8 @@ void utils_log_init(void) { loggerConfig.flushLevel = LOG_FATAL; } -#ifdef UMF_VERSION -// convert a define to a C string -#define STR_(X) #X -#define STR(X) STR_(X) -#define STR_UMF_VERSION "UMF version: " STR(UMF_VERSION) ", " -#else /* !UMF_VERSION */ -#error "UMF_VERSION not defined!" -#endif /* !UMF_VERSION */ - LOG_INFO( - "Logger enabled (" STR_UMF_VERSION + "Logger enabled (" LOG_STR_UMF_VERSION "level: %s, flush: %s, pid: %s, timestamp: %s)", level_to_str(loggerConfig.level), level_to_str(loggerConfig.flushLevel), bool_to_str(loggerConfig.pid), bool_to_str(loggerConfig.timestamp)); diff --git a/test/utils/utils_log.cpp b/test/utils/utils_log.cpp index c0f81abf0..cce61db58 100644 --- a/test/utils/utils_log.cpp +++ b/test/utils/utils_log.cpp @@ -110,6 +110,9 @@ const char *env_variable = ""; #ifndef UMF_VERSION #define UMF_VERSION "test version" #endif +#ifndef UMF_ALL_CMAKE_VARIABLES +#define UMF_ALL_CMAKE_VARIABLES "test UMF_ALL_CMAKE_VARIABLES" +#endif #include "utils/utils_log.c" #undef utils_env_var #undef fopen From b07c1a2d6f7d2c17b61827d273a85e6da81a0118 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Thu, 5 Dec 2024 15:43:16 +0100 Subject: [PATCH 044/466] add fixed provider --- README.md | 7 +- include/umf/providers/provider_fixed_memory.h | 64 +++ scripts/docs_config/api.rst | 11 +- src/CMakeLists.txt | 1 + src/libumf.def | 5 +- src/libumf.map | 11 +- src/provider/provider_fixed_memory.c | 337 +++++++++++++++ test/CMakeLists.txt | 4 + test/provider_fixed_memory.cpp | 393 ++++++++++++++++++ 9 files changed, 815 insertions(+), 18 deletions(-) create mode 100644 include/umf/providers/provider_fixed_memory.h create mode 100644 src/provider/provider_fixed_memory.c create mode 100644 test/provider_fixed_memory.cpp diff --git a/README.md b/README.md index 854b25878..df90b6852 100644 --- a/README.md +++ b/README.md @@ -132,12 +132,9 @@ More detailed documentation is available here: https://oneapi-src.github.io/unif ### Memory providers -#### Coarse Provider +#### Fixed memory provider -A memory provider that can provide memory from: -1) a given pre-allocated buffer (the fixed-size memory provider option) or -2) from an additional upstream provider (e.g. provider that does not support the free() operation - like the File memory provider or the DevDax memory provider - see below). +A memory provider that can provide memory from a given pre-allocated buffer. #### OS memory provider diff --git a/include/umf/providers/provider_fixed_memory.h b/include/umf/providers/provider_fixed_memory.h new file mode 100644 index 000000000..2351faf31 --- /dev/null +++ b/include/umf/providers/provider_fixed_memory.h @@ -0,0 +1,64 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_FIXED_MEMORY_PROVIDER_H +#define UMF_FIXED_MEMORY_PROVIDER_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/// @cond +#define UMF_FIXED_RESULTS_START_FROM 4000 +/// @endcond + +struct umf_fixed_memory_provider_params_t; + +typedef struct umf_fixed_memory_provider_params_t + *umf_fixed_memory_provider_params_handle_t; + +/// @brief Create a struct to store parameters of the Fixed Memory Provider. +/// @param hParams [out] handle to the newly created parameters struct. +/// @param ptr [in] pointer to the memory region. +/// @param size [in] size of the memory region in bytes. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfFixedMemoryProviderParamsCreate( + umf_fixed_memory_provider_params_handle_t *hParams, void *ptr, size_t size); + +/// @brief Set the memory region in params struct. Overwrites the previous value. +/// It provides an ability to use the same instance of params to create multiple +/// instances of the provider for different memory regions. +/// @param hParams [in] handle to the parameters of the Fixed Memory Provider. +/// @param ptr [in] pointer to the memory region. +/// @param size [in] size of the memory region in bytes. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfFixedMemoryProviderParamsSetMemory( + umf_fixed_memory_provider_params_handle_t hParams, void *ptr, size_t size); + +/// @brief Destroy parameters struct. +/// @param hParams [in] handle to the parameters of the Fixed Memory Provider. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfFixedMemoryProviderParamsDestroy( + umf_fixed_memory_provider_params_handle_t hParams); + +/// @brief Retrieve the operations structure for the Fixed Memory Provider. +/// @return Pointer to the umf_memory_provider_ops_t structure. +umf_memory_provider_ops_t *umfFixedMemoryProviderOps(void); + +/// @brief Fixed Memory Provider operation results +typedef enum umf_fixed_memory_provider_native_error { + UMF_FIXED_RESULT_SUCCESS = UMF_FIXED_RESULTS_START_FROM, ///< Success + UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED, ///< Force purging failed +} umf_fixed_memory_provider_native_error_t; + +#ifdef __cplusplus +} +#endif + +#endif /* UMF_FIXED_MEMORY_PROVIDER_H */ diff --git a/scripts/docs_config/api.rst b/scripts/docs_config/api.rst index 7f734cad2..c0448f117 100644 --- a/scripts/docs_config/api.rst +++ b/scripts/docs_config/api.rst @@ -80,17 +80,12 @@ and operate on the provider. .. doxygenfile:: memory_provider.h :sections: define enum typedef func var -Coarse Provider +Fixed Memory Provider ------------------------------------------ -A memory provider that can provide memory from: +A memory provider that can provide memory from a given pre-allocated buffer. -1) A given pre-allocated buffer (the fixed-size memory provider option) or -2) From an additional upstream provider (e.g. provider that does not support - the free() operation like the File memory provider or the DevDax memory - provider - see below). - -.. doxygenfile:: provider_coarse.h +.. doxygenfile:: provider_fixed_memory.h :sections: define enum typedef func var OS Memory Provider diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fb32b6d2e..2a27dce46 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -62,6 +62,7 @@ set(UMF_SOURCES provider/provider_cuda.c provider/provider_devdax_memory.c provider/provider_file_memory.c + provider/provider_fixed_memory.c provider/provider_level_zero.c provider/provider_os_memory.c provider/provider_tracking.c diff --git a/src/libumf.def b/src/libumf.def index 7666c146b..5d1c5047f 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -25,13 +25,16 @@ EXPORTS umfDevDaxMemoryProviderParamsDestroy umfDevDaxMemoryProviderParamsSetDeviceDax umfDevDaxMemoryProviderParamsSetProtection - umfFree umfFileMemoryProviderOps umfFileMemoryProviderParamsCreate umfFileMemoryProviderParamsDestroy umfFileMemoryProviderParamsSetPath umfFileMemoryProviderParamsSetProtection umfFileMemoryProviderParamsSetVisibility + umfFixedMemoryProviderOps + umfFixedMemoryProviderParamsCreate + umfFixedMemoryProviderParamsDestroy + umfFree umfGetIPCHandle umfGetLastFailedMemoryProvider umfJemallocPoolOps diff --git a/src/libumf.map b/src/libumf.map index 5d1ca3b77..d604dd64e 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -19,13 +19,16 @@ UMF_1.0 { umfDevDaxMemoryProviderParamsDestroy; umfDevDaxMemoryProviderParamsSetDeviceDax; umfDevDaxMemoryProviderParamsSetProtection; - umfFree; umfFileMemoryProviderOps; umfFileMemoryProviderParamsCreate; umfFileMemoryProviderParamsDestroy; umfFileMemoryProviderParamsSetPath; umfFileMemoryProviderParamsSetProtection; umfFileMemoryProviderParamsSetVisibility; + umfFixedMemoryProviderOps; + umfFixedMemoryProviderParamsCreate; + umfFixedMemoryProviderParamsDestroy; + umfFree; umfGetIPCHandle; umfGetLastFailedMemoryProvider; umfJemallocPoolOps; @@ -81,13 +84,13 @@ UMF_1.0 { umfOsMemoryProviderOps; umfOsMemoryProviderParamsCreate; umfOsMemoryProviderParamsDestroy; - umfOsMemoryProviderParamsSetProtection; - umfOsMemoryProviderParamsSetVisibility; - umfOsMemoryProviderParamsSetShmName; umfOsMemoryProviderParamsSetNumaList; umfOsMemoryProviderParamsSetNumaMode; umfOsMemoryProviderParamsSetPartSize; umfOsMemoryProviderParamsSetPartitions; + umfOsMemoryProviderParamsSetProtection; + umfOsMemoryProviderParamsSetShmName; + umfOsMemoryProviderParamsSetVisibility; umfPoolAlignedMalloc; umfPoolByPtr; umfPoolCalloc; diff --git a/src/provider/provider_fixed_memory.c b/src/provider/provider_fixed_memory.c new file mode 100644 index 000000000..6392b39d3 --- /dev/null +++ b/src/provider/provider_fixed_memory.c @@ -0,0 +1,337 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "base_alloc_global.h" +#include "coarse.h" +#include "libumf.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" + +#define TLS_MSG_BUF_LEN 1024 + +typedef struct fixed_memory_provider_t { + void *base; // base address of memory + size_t size; // size of the memory region + coarse_t *coarse; // coarse library handle +} fixed_memory_provider_t; + +// Fixed Memory provider settings struct +typedef struct umf_fixed_memory_provider_params_t { + void *ptr; + size_t size; +} umf_fixed_memory_provider_params_t; + +typedef struct fixed_last_native_error_t { + int32_t native_error; + int errno_value; + char msg_buff[TLS_MSG_BUF_LEN]; +} fixed_last_native_error_t; + +static __TLS fixed_last_native_error_t TLS_last_native_error; + +// helper values used only in the Native_error_str array +#define _UMF_FIXED_RESULT_SUCCESS \ + (UMF_FIXED_RESULT_SUCCESS - UMF_FIXED_RESULT_SUCCESS) +#define _UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED \ + (UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED - UMF_FIXED_RESULT_SUCCESS) + +static const char *Native_error_str[] = { + [_UMF_FIXED_RESULT_SUCCESS] = "success", + [_UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED] = "force purging failed"}; + +static void fixed_store_last_native_error(int32_t native_error, + int errno_value) { + TLS_last_native_error.native_error = native_error; + TLS_last_native_error.errno_value = errno_value; +} + +static umf_result_t fixed_allocation_split_cb(void *provider, void *ptr, + size_t totalSize, + size_t firstSize) { + (void)provider; + (void)ptr; + (void)totalSize; + (void)firstSize; + return UMF_RESULT_SUCCESS; +} + +static umf_result_t fixed_allocation_merge_cb(void *provider, void *lowPtr, + void *highPtr, size_t totalSize) { + (void)provider; + (void)lowPtr; + (void)highPtr; + (void)totalSize; + return UMF_RESULT_SUCCESS; +} + +static umf_result_t fixed_initialize(void *params, void **provider) { + umf_result_t ret; + + if (params == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_fixed_memory_provider_params_t *in_params = + (umf_fixed_memory_provider_params_t *)params; + + fixed_memory_provider_t *fixed_provider = + umf_ba_global_alloc(sizeof(*fixed_provider)); + if (!fixed_provider) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + memset(fixed_provider, 0, sizeof(*fixed_provider)); + + coarse_params_t coarse_params = {0}; + coarse_params.provider = fixed_provider; + coarse_params.page_size = utils_get_page_size(); + // The alloc callback is not available in case of the fixed provider + // because it is a fixed-size memory provider + // and the entire memory is added as a single block + // to the coarse library. + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; // not available for the fixed provider + coarse_params.cb.split = fixed_allocation_split_cb; + coarse_params.cb.merge = fixed_allocation_merge_cb; + + coarse_t *coarse = NULL; + ret = coarse_new(&coarse_params, &coarse); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("coarse_new() failed"); + goto err_free_fixed_provider; + } + + fixed_provider->coarse = coarse; + + fixed_provider->base = in_params->ptr; + fixed_provider->size = in_params->size; + + // add the entire memory as a single block + ret = coarse_add_memory_fixed(coarse, fixed_provider->base, + fixed_provider->size); + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("adding memory block failed"); + goto err_coarse_delete; + } + + *provider = fixed_provider; + + return UMF_RESULT_SUCCESS; + +err_coarse_delete: + coarse_delete(fixed_provider->coarse); +err_free_fixed_provider: + umf_ba_global_free(fixed_provider); + return ret; +} + +static void fixed_finalize(void *provider) { + fixed_memory_provider_t *fixed_provider = provider; + coarse_delete(fixed_provider->coarse); + umf_ba_global_free(fixed_provider); +} + +static umf_result_t fixed_alloc(void *provider, size_t size, size_t alignment, + void **resultPtr) { + fixed_memory_provider_t *fixed_provider = + (fixed_memory_provider_t *)provider; + + return coarse_alloc(fixed_provider->coarse, size, alignment, resultPtr); +} + +static void fixed_get_last_native_error(void *provider, const char **ppMessage, + int32_t *pError) { + (void)provider; // unused + + if (ppMessage == NULL || pError == NULL) { + assert(0); + return; + } + + *pError = TLS_last_native_error.native_error; + if (TLS_last_native_error.errno_value == 0) { + *ppMessage = Native_error_str[*pError - UMF_FIXED_RESULT_SUCCESS]; + return; + } + + const char *msg; + size_t len; + size_t pos = 0; + + msg = Native_error_str[*pError - UMF_FIXED_RESULT_SUCCESS]; + len = strlen(msg); + memcpy(TLS_last_native_error.msg_buff + pos, msg, len + 1); + pos += len; + + msg = ": "; + len = strlen(msg); + memcpy(TLS_last_native_error.msg_buff + pos, msg, len + 1); + pos += len; + + utils_strerror(TLS_last_native_error.errno_value, + TLS_last_native_error.msg_buff + pos, TLS_MSG_BUF_LEN - pos); + + *ppMessage = TLS_last_native_error.msg_buff; +} + +static umf_result_t fixed_get_recommended_page_size(void *provider, size_t size, + size_t *page_size) { + (void)provider; // unused + (void)size; // unused + + *page_size = utils_get_page_size(); + + return UMF_RESULT_SUCCESS; +} + +static umf_result_t fixed_get_min_page_size(void *provider, void *ptr, + size_t *page_size) { + (void)ptr; // unused + + return fixed_get_recommended_page_size(provider, 0, page_size); +} + +static umf_result_t fixed_purge_lazy(void *provider, void *ptr, size_t size) { + (void)provider; // unused + (void)ptr; // unused + (void)size; // unused + // purge_lazy is unsupported in case of the fixed memory provider + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + +static umf_result_t fixed_purge_force(void *provider, void *ptr, size_t size) { + (void)provider; // unused + errno = 0; + if (utils_purge(ptr, size, UMF_PURGE_FORCE)) { + fixed_store_last_native_error(UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED, + errno); + LOG_PERR("force purging failed"); + return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + } + return UMF_RESULT_SUCCESS; +} + +static const char *fixed_get_name(void *provider) { + (void)provider; // unused + return "FIXED"; +} + +static umf_result_t fixed_allocation_split(void *provider, void *ptr, + size_t totalSize, size_t firstSize) { + fixed_memory_provider_t *fixed_provider = + (fixed_memory_provider_t *)provider; + return coarse_split(fixed_provider->coarse, ptr, totalSize, firstSize); +} + +static umf_result_t fixed_allocation_merge(void *provider, void *lowPtr, + void *highPtr, size_t totalSize) { + fixed_memory_provider_t *fixed_provider = + (fixed_memory_provider_t *)provider; + return coarse_merge(fixed_provider->coarse, lowPtr, highPtr, totalSize); +} + +static umf_result_t fixed_free(void *provider, void *ptr, size_t size) { + fixed_memory_provider_t *fixed_provider = + (fixed_memory_provider_t *)provider; + return coarse_free(fixed_provider->coarse, ptr, size); +} + +static umf_memory_provider_ops_t UMF_FIXED_MEMORY_PROVIDER_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = fixed_initialize, + .finalize = fixed_finalize, + .alloc = fixed_alloc, + .free = fixed_free, + .get_last_native_error = fixed_get_last_native_error, + .get_recommended_page_size = fixed_get_recommended_page_size, + .get_min_page_size = fixed_get_min_page_size, + .get_name = fixed_get_name, + .ext.purge_lazy = fixed_purge_lazy, + .ext.purge_force = fixed_purge_force, + .ext.allocation_merge = fixed_allocation_merge, + .ext.allocation_split = fixed_allocation_split, + .ipc.get_ipc_handle_size = NULL, + .ipc.get_ipc_handle = NULL, + .ipc.put_ipc_handle = NULL, + .ipc.open_ipc_handle = NULL, + .ipc.close_ipc_handle = NULL}; + +umf_memory_provider_ops_t *umfFixedMemoryProviderOps(void) { + return &UMF_FIXED_MEMORY_PROVIDER_OPS; +} + +umf_result_t umfFixedMemoryProviderParamsCreate( + umf_fixed_memory_provider_params_handle_t *hParams, void *ptr, + size_t size) { + libumfInit(); + if (hParams == NULL) { + LOG_ERR("Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_fixed_memory_provider_params_handle_t params = + umf_ba_global_alloc(sizeof(*params)); + if (params == NULL) { + LOG_ERR("Allocating memory for the Memory Provider params failed"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + umf_result_t ret = umfFixedMemoryProviderParamsSetMemory(params, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { + umf_ba_global_free(params); + return ret; + } + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfFixedMemoryProviderParamsDestroy( + umf_fixed_memory_provider_params_handle_t hParams) { + if (hParams != NULL) { + umf_ba_global_free(hParams); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfFixedMemoryProviderParamsSetMemory( + umf_fixed_memory_provider_params_handle_t hParams, void *ptr, size_t size) { + + if (hParams == NULL) { + LOG_ERR("Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (ptr == NULL) { + LOG_ERR("Memory pointer is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (size == 0) { + LOG_ERR("Size must be greater than 0"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->ptr = ptr; + hParams->size = size; + return UMF_RESULT_SUCCESS; +} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d5a07bfbb..bb353a889 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -317,6 +317,10 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented NAME provider_file_memory_ipc SRCS provider_file_memory_ipc.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( + NAME provider_fixed_memory + SRCS provider_fixed_memory.cpp + LIBS ${UMF_UTILS_FOR_TEST}) # This test requires Linux-only file memory provider if(UMF_POOL_JEMALLOC_ENABLED) diff --git a/test/provider_fixed_memory.cpp b/test/provider_fixed_memory.cpp new file mode 100644 index 000000000..7f976a1f5 --- /dev/null +++ b/test/provider_fixed_memory.cpp @@ -0,0 +1,393 @@ +// Copyright (C) 2024 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "base.hpp" + +#include "cpp_helpers.hpp" +#include "test_helpers.h" +#ifndef _WIN32 +#include "test_helpers_linux.h" +#endif + +#include +#include + +using umf_test::test; + +#define INVALID_PTR ((void *)0x01) + +typedef enum purge_t { + PURGE_NONE = 0, + PURGE_LAZY = 1, + PURGE_FORCE = 2, +} purge_t; + +static const char *Native_error_str[] = { + "success", // UMF_FIXED_RESULT_SUCCESS + "force purging failed", // UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED +}; + +// Test helpers + +static int compare_native_error_str(const char *message, int error) { + const char *error_str = Native_error_str[error - UMF_FIXED_RESULT_SUCCESS]; + size_t len = strlen(error_str); + return strncmp(message, error_str, len); +} + +using providerCreateExtParams = std::tuple; + +static void providerCreateExt(providerCreateExtParams params, + umf::provider_unique_handle_t *handle) { + umf_memory_provider_handle_t hProvider = nullptr; + auto [provider_ops, provider_params] = params; + + auto ret = + umfMemoryProviderCreate(provider_ops, provider_params, &hProvider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider, nullptr); + + *handle = + umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); +} + +struct FixedProviderTest + : umf_test::test, + ::testing::WithParamInterface { + void SetUp() override { + test::SetUp(); + + // Allocate a memory buffer to use with the fixed memory provider + memory_size = utils_get_page_size() * 10; // Allocate 10 pages + memory_buffer = malloc(memory_size); + ASSERT_NE(memory_buffer, nullptr); + + // Create provider parameters + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfFixedMemoryProviderParamsCreate( + ¶ms, memory_buffer, memory_size); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + providerCreateExt(std::make_tuple(umfFixedMemoryProviderOps(), params), + &provider); + + umfFixedMemoryProviderParamsDestroy(params); + umf_result_t umf_result = + umfMemoryProviderGetMinPageSize(provider.get(), NULL, &page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + page_plus_64 = page_size + 64; + } + + void TearDown() override { + if (memory_buffer) { + free(memory_buffer); + memory_buffer = nullptr; + } + test::TearDown(); + } + + void test_alloc_free_success(size_t size, size_t alignment, purge_t purge) { + void *ptr = nullptr; + auto provider = this->provider.get(); + + umf_result_t umf_result = + umfMemoryProviderAlloc(provider, size, alignment, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size); + + if (purge == PURGE_LAZY) { + umf_result = umfMemoryProviderPurgeLazy(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); + } else if (purge == PURGE_FORCE) { + umf_result = umfMemoryProviderPurgeForce(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + umf_result = umfMemoryProviderFree(provider, ptr, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } + + void verify_last_native_error(int32_t err) { + const char *message; + int32_t error; + auto provider = this->provider.get(); + umfMemoryProviderGetLastNativeError(provider, &message, &error); + ASSERT_EQ(error, err); + ASSERT_EQ(compare_native_error_str(message, error), 0); + } + + void test_alloc_failure(size_t size, size_t alignment, umf_result_t result, + int32_t err) { + void *ptr = nullptr; + auto provider = this->provider.get(); + + umf_result_t umf_result = + umfMemoryProviderAlloc(provider, size, alignment, &ptr); + ASSERT_EQ(umf_result, result); + ASSERT_EQ(ptr, nullptr); + + if (umf_result == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { + verify_last_native_error(err); + } + } + + umf::provider_unique_handle_t provider; + size_t page_size; + size_t page_plus_64; + void *memory_buffer = nullptr; + size_t memory_size = 0; +}; + +// TESTS + +// Positive tests using test_alloc_free_success + +INSTANTIATE_TEST_SUITE_P(fixedProviderTest, FixedProviderTest, + ::testing::Values(providerCreateExtParams{ + umfFixedMemoryProviderOps(), nullptr})); + +TEST_P(FixedProviderTest, create_destroy) { + // Creation and destruction are handled in SetUp and TearDown +} + +TEST_F(test, create_no_params) { + umf_memory_provider_handle_t provider = nullptr; + auto result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), nullptr, + &provider); + ASSERT_EQ(result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(provider, nullptr); +} + +TEST_P(FixedProviderTest, two_allocations) { + umf_result_t umf_result; + void *ptr1 = nullptr; + void *ptr2 = nullptr; + size_t size = page_plus_64; + size_t alignment = page_size; + + umf_result = umfMemoryProviderAlloc(provider.get(), size, alignment, &ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + umf_result = umfMemoryProviderAlloc(provider.get(), size, alignment, &ptr2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr2, nullptr); + + ASSERT_NE(ptr1, ptr2); + if ((uintptr_t)ptr1 > (uintptr_t)ptr2) { + ASSERT_GT((uintptr_t)ptr1 - (uintptr_t)ptr2, size); + } else { + ASSERT_GT((uintptr_t)ptr2 - (uintptr_t)ptr1, size); + } + + memset(ptr1, 0x11, size); + memset(ptr2, 0x22, size); + + umf_result = umfMemoryProviderFree(provider.get(), ptr1, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfMemoryProviderFree(provider.get(), ptr2, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_P(FixedProviderTest, alloc_page64_align_0) { + test_alloc_free_success(page_plus_64, 0, PURGE_NONE); +} + +TEST_P(FixedProviderTest, alloc_page64_align_page_div_2) { + test_alloc_free_success(page_plus_64, page_size / 2, PURGE_NONE); +} + +TEST_P(FixedProviderTest, purge_lazy) { + test_alloc_free_success(page_size, 0, PURGE_LAZY); +} + +TEST_P(FixedProviderTest, purge_force) { + test_alloc_free_success(page_size, 0, PURGE_FORCE); +} + +// Negative tests using test_alloc_failure + +TEST_P(FixedProviderTest, alloc_WRONG_SIZE) { + test_alloc_failure((size_t)-1, 0, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY, 0); +} + +TEST_P(FixedProviderTest, alloc_page64_WRONG_ALIGNMENT_3_pages) { + test_alloc_failure(page_plus_64, 3 * page_size, + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +TEST_P(FixedProviderTest, alloc_3pages_WRONG_ALIGNMENT_3pages) { + test_alloc_failure(3 * page_size, 3 * page_size, + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +TEST_P(FixedProviderTest, alloc_page64_align_page_plus_1_WRONG_ALIGNMENT_1) { + test_alloc_failure(page_plus_64, page_size + 1, + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +TEST_P(FixedProviderTest, alloc_page64_align_one_half_pages_WRONG_ALIGNMENT_2) { + test_alloc_failure(page_plus_64, page_size + (page_size / 2), + UMF_RESULT_ERROR_INVALID_ALIGNMENT, 0); +} + +// Other positive tests + +TEST_P(FixedProviderTest, get_min_page_size) { + size_t min_page_size; + umf_result_t umf_result = umfMemoryProviderGetMinPageSize( + provider.get(), nullptr, &min_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_LE(min_page_size, page_size); +} + +TEST_P(FixedProviderTest, get_recommended_page_size) { + size_t min_page_size; + umf_result_t umf_result = umfMemoryProviderGetMinPageSize( + provider.get(), nullptr, &min_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_LE(min_page_size, page_size); + + size_t recommended_page_size; + umf_result = umfMemoryProviderGetRecommendedPageSize( + provider.get(), 0, &recommended_page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(recommended_page_size, min_page_size); +} + +TEST_P(FixedProviderTest, get_name) { + const char *name = umfMemoryProviderGetName(provider.get()); + ASSERT_STREQ(name, "FIXED"); +} + +TEST_P(FixedProviderTest, free_size_0_ptr_not_null) { + umf_result_t umf_result = + umfMemoryProviderFree(provider.get(), INVALID_PTR, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(FixedProviderTest, free_NULL) { + umf_result_t umf_result = umfMemoryProviderFree(provider.get(), nullptr, 0); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +// Other negative tests + +TEST_P(FixedProviderTest, free_INVALID_POINTER_SIZE_GT_0) { + umf_result_t umf_result = + umfMemoryProviderFree(provider.get(), INVALID_PTR, page_plus_64); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(FixedProviderTest, purge_lazy_INVALID_POINTER) { + umf_result_t umf_result = + umfMemoryProviderPurgeLazy(provider.get(), INVALID_PTR, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_NOT_SUPPORTED); +} + +TEST_P(FixedProviderTest, purge_force_INVALID_POINTER) { + umf_result_t umf_result = + umfMemoryProviderPurgeForce(provider.get(), INVALID_PTR, 1); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC); + + verify_last_native_error(UMF_FIXED_RESULT_ERROR_PURGE_FORCE_FAILED); +} + +// Params tests + +TEST_F(test, params_null_handle) { + constexpr size_t memory_size = 100; + char memory_buffer[memory_size]; + umf_result_t umf_result = + umfFixedMemoryProviderParamsCreate(nullptr, memory_buffer, memory_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + umf_result = umfFixedMemoryProviderParamsDestroy(nullptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_F(test, create_with_null_ptr) { + constexpr size_t memory_size = 100; + umf_fixed_memory_provider_params_handle_t wrong_params = nullptr; + umf_result_t umf_result = + umfFixedMemoryProviderParamsCreate(&wrong_params, nullptr, memory_size); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(wrong_params, nullptr); +} + +TEST_F(test, create_with_zero_size) { + constexpr size_t memory_size = 100; + char memory_buffer[memory_size]; + umf_fixed_memory_provider_params_handle_t wrong_params = nullptr; + umf_result_t umf_result = + umfFixedMemoryProviderParamsCreate(&wrong_params, memory_buffer, 0); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(wrong_params, nullptr); +} + +TEST_P(FixedProviderTest, alloc_size_exceeds_buffer) { + size_t size = memory_size + page_size; + test_alloc_failure(size, 0, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY, 0); +} + +TEST_P(FixedProviderTest, merge) { + umf_result_t umf_result; + void *ptr1 = nullptr; + void *ptr2 = nullptr; + size_t size = page_size; + size_t alignment = page_size; + + umf_result = umfMemoryProviderAlloc(provider.get(), size, alignment, &ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + umf_result = umfMemoryProviderAlloc(provider.get(), size, alignment, &ptr2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr2, nullptr); + + ASSERT_EQ((uintptr_t)ptr2 - (uintptr_t)ptr1, size); + + memset(ptr1, 0x11, size); + memset(ptr2, 0x22, size); + + size_t merged_size = size * 2; + umf_result = umfMemoryProviderAllocationMerge(provider.get(), ptr1, ptr2, + merged_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfMemoryProviderFree(provider.get(), ptr1, merged_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_P(FixedProviderTest, split) { + umf_result_t umf_result; + void *ptr1 = nullptr; + void *ptr2 = nullptr; + size_t size = page_size; + size_t alignment = page_size; + + umf_result = + umfMemoryProviderAlloc(provider.get(), size * 2, alignment, &ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + umf_result = + umfMemoryProviderAllocationSplit(provider.get(), ptr1, size * 2, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ptr2 = (void *)((uintptr_t)ptr1 + size); + memset(ptr1, 0x11, size); + + umf_result = umfMemoryProviderFree(provider.get(), ptr1, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + memset(ptr2, 0x22, size); + umf_result = umfMemoryProviderFree(provider.get(), ptr2, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} From 202984eb90da6f18b34bc96d72539167886c7885 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 17 Dec 2024 09:26:47 +0100 Subject: [PATCH 045/466] Print more info when open() fails in the custom_file_provider example Signed-off-by: Lukasz Dorau --- examples/custom_file_provider/custom_file_provider.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/custom_file_provider/custom_file_provider.c b/examples/custom_file_provider/custom_file_provider.c index ad897fe5e..b17fdc0f0 100644 --- a/examples/custom_file_provider/custom_file_provider.c +++ b/examples/custom_file_provider/custom_file_provider.c @@ -62,7 +62,8 @@ static umf_result_t file_init(void *params, void **provider) { // Open the file file_provider->fd = open(file_params->filename, O_RDWR | O_CREAT, 0666); if (file_provider->fd < 0) { - perror("Failed to open file"); + perror("open()"); + fprintf(stderr, "Failed to open the file: %s\n", file_params->filename); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; goto cleanup_malloc; } From e24048963b3d755002695d1e41477c3173d0fe63 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 16 Dec 2024 13:27:29 +0100 Subject: [PATCH 046/466] Fix: add missing umf*MemoryProviderParamsDestroy() Signed-off-by: Lukasz Dorau --- test/provider_devdax_memory.cpp | 1 + test/provider_file_memory.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/test/provider_devdax_memory.cpp b/test/provider_devdax_memory.cpp index 1feaaaaa6..7765dd08d 100644 --- a/test/provider_devdax_memory.cpp +++ b/test/provider_devdax_memory.cpp @@ -165,6 +165,7 @@ TEST_F(test, test_if_mapped_with_MAP_SYNC) { ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); umfMemoryProviderDestroy(hProvider); + umfDevDaxMemoryProviderParamsDestroy(params); // fail test if the "sf" flag was not found ASSERT_EQ(flag_found, true); diff --git a/test/provider_file_memory.cpp b/test/provider_file_memory.cpp index 0d54c287c..cfa37be31 100644 --- a/test/provider_file_memory.cpp +++ b/test/provider_file_memory.cpp @@ -162,6 +162,7 @@ TEST_F(test, test_if_mapped_with_MAP_SYNC) { ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); umfMemoryProviderDestroy(hProvider); + umfFileMemoryProviderParamsDestroy(params); // fail test if the "sf" flag was not found ASSERT_EQ(flag_found, true); From a714e4542d600241bf3dc87434c58ea6e9ca0721 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 16 Dec 2024 15:14:50 +0100 Subject: [PATCH 047/466] Fix memory leak in the testNumaSplit:checkModeSplit test Signed-off-by: Lukasz Dorau --- test/provider_os_memory_multiple_numa_nodes.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/provider_os_memory_multiple_numa_nodes.cpp b/test/provider_os_memory_multiple_numa_nodes.cpp index e493a427c..cfc58f2f0 100644 --- a/test/provider_os_memory_multiple_numa_nodes.cpp +++ b/test/provider_os_memory_multiple_numa_nodes.cpp @@ -674,17 +674,17 @@ TEST_P(testNumaSplit, checkModeSplit) { auto [required_numa_nodes, pages, in, out] = param; umf_result_t umf_result; - umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; - - umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); - ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - std::vector numa_nodes = get_available_numa_nodes(); if (numa_nodes.size() < required_numa_nodes) { GTEST_SKIP_("Not enough numa nodes"); } + umf_os_memory_provider_params_handle_t os_memory_provider_params = nullptr; + + umf_result = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_EQ(out.size(), pages) << "Wrong test input - out array size doesn't match page count"; From c549441ebdb442c2559b19aa7bf15a1073b3753b Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 18 Dec 2024 09:48:19 +0100 Subject: [PATCH 048/466] Assert in umf_ba_destroy() in DEBUG and UMF_DEVELOPER_MODE if (NDEBUG is not defined) and (UMF_DEVELOPER_MODE is defined) assert in umf_ba_destroy() if there are any memory leaks in the base allocator. Signed-off-by: Lukasz Dorau --- .github/workflows/reusable_proxy_lib.yml | 2 +- CMakeLists.txt | 5 +++++ src/CMakeLists.txt | 3 ++- src/base_alloc/base_alloc.c | 8 +++++++- 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/reusable_proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml index e73dabe29..27a66267d 100644 --- a/.github/workflows/reusable_proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -49,7 +49,7 @@ jobs: -DUMF_BUILD_BENCHMARKS=OFF -DUMF_BUILD_TESTS=ON -DUMF_FORMAT_CODE_STYLE=OFF - -DUMF_DEVELOPER_MODE=OFF + -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON diff --git a/CMakeLists.txt b/CMakeLists.txt index 0eea5faf4..0b88f95b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,6 +111,11 @@ else() message(FATAL_ERROR "Unknown OS type") endif() +if(UMF_DEVELOPER_MODE) + set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} + UMF_DEVELOPER_MODE=1) +endif() + if(NOT UMF_BUILD_LIBUMF_POOL_JEMALLOC) set(UMF_POOL_JEMALLOC_ENABLED FALSE) set(JEMALLOC_FOUND FALSE) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2a27dce46..2d83faacf 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -14,7 +14,8 @@ set(UMF_CUDA_INCLUDE_DIR # Compile definitions for UMF library. # # TODO: Cleanup the compile definitions across all the CMake files -set(UMF_COMMON_COMPILE_DEFINITIONS UMF_VERSION=${UMF_VERSION}) +set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} + UMF_VERSION=${UMF_VERSION}) set(BA_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/base_alloc/base_alloc.c diff --git a/src/base_alloc/base_alloc.c b/src/base_alloc/base_alloc.c index 209ace7fe..6f975307d 100644 --- a/src/base_alloc/base_alloc.c +++ b/src/base_alloc/base_alloc.c @@ -303,7 +303,13 @@ void umf_ba_destroy(umf_ba_pool_t *pool) { #ifndef NDEBUG ba_debug_checks(pool); if (pool->metadata.n_allocs) { - LOG_ERR("pool->metadata.n_allocs = %zu", pool->metadata.n_allocs); + LOG_ERR("number of base allocator memory leaks: %zu", + pool->metadata.n_allocs); + +#ifdef UMF_DEVELOPER_MODE + assert(pool->metadata.n_allocs == 0 && + "memory leaks in base allocator occurred"); +#endif } #endif /* NDEBUG */ From d56b62caaae697b6653149af608c830249659b26 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 18 Dec 2024 10:04:10 +0100 Subject: [PATCH 049/466] Run also examples under valgrind Signed-off-by: Lukasz Dorau --- test/test_valgrind.sh | 55 +++++++++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/test/test_valgrind.sh b/test/test_valgrind.sh index 9f84cf0d3..be5f817dc 100755 --- a/test/test_valgrind.sh +++ b/test/test_valgrind.sh @@ -10,8 +10,8 @@ BUILD_DIR=$2 TOOL=$3 function print_usage() { - echo "$(basename $0) - run all UMF tests under a valgrind tool (memcheck, drd or helgrind)" - echo "This script looks for './test/umf_test-*' test executables in the UMF build directory." + echo "$(basename $0) - run all UMF tests and examples under a valgrind tool (memcheck, drd or helgrind)" + echo "This script looks for './test/umf_test-*' and './examples/umf_example_*' executables in the UMF build directory." echo "Usage: $(basename $0) " } @@ -58,7 +58,7 @@ esac WORKSPACE=$(realpath $WORKSPACE) BUILD_DIR=$(realpath $BUILD_DIR) -cd ${BUILD_DIR}/test/ +cd ${BUILD_DIR} mkdir -p cpuid echo "Gathering data for hwloc so it can be run under valgrind:" @@ -71,20 +71,21 @@ echo "Running: \"valgrind $OPTION\" for the following tests:" ANY_TEST_FAILED=0 rm -f umf_test-*.log umf_test-*.err -for test in $(ls -1 umf_test-*); do +for test in $(ls -1 ./test/umf_test-* ./examples/umf_example_*); do [ ! -x $test ] && continue echo "$test - starting ..." echo -n "$test " LOG=${test}.log ERR=${test}.err - SUP="${WORKSPACE}/test/supp/${TOOL}-${test}.supp" + NAME=$(basename $test) + SUP="${WORKSPACE}/test/supp/${TOOL}-${NAME}.supp" OPT_SUP="" - [ -f ${SUP} ] && OPT_SUP="--suppressions=${SUP}" && echo -n "(${TOOL}-${test}.supp) " + [ -f ${SUP} ] && OPT_SUP="--suppressions=${SUP}" && echo -n "($(basename ${SUP})) " # skip tests incompatible with valgrind FILTER="" case $test in - umf_test-disjointPool) + ./test/umf_test-disjointPool) if [ "$TOOL" = "helgrind" ]; then # skip because of the assert in helgrind: # Helgrind: hg_main.c:308 (lockN_acquire_reader): Assertion 'lk->kind == LK_rdwr' failed. @@ -92,53 +93,61 @@ for test in $(ls -1 umf_test-*); do continue; fi ;; - umf_test-ipc_os_prov_*) + ./test/umf_test-ipc_os_prov_*) echo "- SKIPPED" continue; # skip testing helper binaries used by the ipc_os_prov_* tests ;; - umf_test-ipc_devdax_prov_*) + ./test/umf_test-ipc_devdax_prov_*) echo "- SKIPPED" continue; # skip testing helper binaries used by the ipc_devdax_prov_* tests ;; - umf_test-ipc_file_prov_*) + ./test/umf_test-ipc_file_prov_*) echo "- SKIPPED" continue; # skip testing helper binaries used by the ipc_file_prov_* tests ;; - umf_test-memspace_host_all) + ./test/umf_test-memspace_host_all) FILTER='--gtest_filter="-*allocsSpreadAcrossAllNumaNodes"' ;; - umf_test-provider_os_memory) + ./test/umf_test-provider_os_memory) FILTER='--gtest_filter="-osProviderTest/umfIpcTest*"' ;; - umf_test-provider_os_memory_config) + ./test/umf_test-provider_os_memory_config) FILTER='--gtest_filter="-*protection_flag_none:*protection_flag_read:*providerConfigTestNumaMode*"' ;; - umf_test-memspace_highest_capacity) + ./test/umf_test-memspace_highest_capacity) FILTER='--gtest_filter="-*highestCapacityVerify*"' ;; - umf_test-provider_os_memory_multiple_numa_nodes) + ./test/umf_test-provider_os_memory_multiple_numa_nodes) FILTER='--gtest_filter="-testNuma.checkModeInterleave*:testNumaNodesAllocations/testNumaOnEachNode.checkNumaNodesAllocations*:testNumaNodesAllocations/testNumaOnEachNode.checkModePreferred*:testNumaNodesAllocations/testNumaOnEachNode.checkModeInterleaveSingleNode*:testNumaNodesAllocationsAllCpus/testNumaOnEachCpu.checkModePreferredEmptyNodeset*:testNumaNodesAllocationsAllCpus/testNumaOnEachCpu.checkModeLocal*"' ;; - umf_test-memspace_highest_bandwidth) + ./test/umf_test-memspace_highest_bandwidth) FILTER='--gtest_filter="-*allocLocalMt*"' ;; - umf_test-memspace_lowest_latency) + ./test/umf_test-memspace_lowest_latency) FILTER='--gtest_filter="-*allocLocalMt*"' ;; - umf_test-memoryPool) + ./test/umf_test-memoryPool) FILTER='--gtest_filter="-*allocMaxSize*"' ;; + ./examples/umf_example_ipc_ipcapi_*) + echo "- SKIPPED" + continue; # skip testing helper binaries used by the umf_example_ipc_ipcapi_* examples + ;; esac [ "$FILTER" != "" ] && echo -n "($FILTER) " LAST_TEST_FAILED=0 - - if ! HWLOC_CPUID_PATH=./cpuid valgrind $OPTION $OPT_SUP --gen-suppressions=all ./$test $FILTER >$LOG 2>&1; then + set +e + HWLOC_CPUID_PATH=./cpuid valgrind $OPTION $OPT_SUP --gen-suppressions=all $test $FILTER >$LOG 2>&1 + RET=$? + set -e + # 125 is the return code when the test is skipped + if [ $RET -ne 0 -a $RET -ne 125 ]; then LAST_TEST_FAILED=1 ANY_TEST_FAILED=1 - echo "(valgrind FAILED) " - echo "Command: HWLOC_CPUID_PATH=./cpuid valgrind $OPTION $OPT_SUP --gen-suppressions=all ./$test $FILTER >$LOG 2>&1" + echo "(valgrind FAILED RV=$RET) " + echo "Command: HWLOC_CPUID_PATH=./cpuid valgrind $OPTION $OPT_SUP --gen-suppressions=all $test $FILTER >$LOG 2>&1" echo "Output:" cat $LOG echo "=====================" @@ -147,7 +156,7 @@ for test in $(ls -1 umf_test-*); do # grep for "ERROR SUMMARY" with errors (there can be many lines with "ERROR SUMMARY") grep -e "ERROR SUMMARY:" $LOG | grep -v -e "ERROR SUMMARY: 0 errors from 0 contexts" > $ERR || true if [ $LAST_TEST_FAILED -eq 0 -a $(cat $ERR | wc -l) -eq 0 ]; then - echo "- OK" + [ $RET -eq 0 ] && echo "- OK" || echo "- SKIPPED" rm -f $LOG $ERR else echo "- FAILED!" From 7f005d9b66e26e23e5243de4a39dc53d75891447 Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Thu, 12 Dec 2024 17:45:31 +0100 Subject: [PATCH 050/466] Move PMDK CTL sources This commit introduces sources of the CTL. The CTL sources are copied from: https://github.com/pmem/pmdk/tree/master/src/common --- src/ctl/ctl.c | 563 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/ctl/ctl.h | 206 ++++++++++++++++++ 2 files changed, 769 insertions(+) create mode 100644 src/ctl/ctl.c create mode 100644 src/ctl/ctl.h diff --git a/src/ctl/ctl.c b/src/ctl/ctl.c new file mode 100644 index 000000000..330a5fa47 --- /dev/null +++ b/src/ctl/ctl.c @@ -0,0 +1,563 @@ +// SPDX-License-Identifier: BSD-3-Clause +/* Copyright 2016-2020, Intel Corporation */ + +/* + * ctl.c -- implementation of the interface for examination and modification of + * the library's internal state + */ +#include "ctl.h" +#include "alloc.h" +#include "os.h" + +#define CTL_MAX_ENTRIES 100 + +#define MAX_CONFIG_FILE_LEN (1 << 20) /* 1 megabyte */ + +#define CTL_STRING_QUERY_SEPARATOR ";" +#define CTL_NAME_VALUE_SEPARATOR "=" +#define CTL_QUERY_NODE_SEPARATOR "." +#define CTL_VALUE_ARG_SEPARATOR "," + +static int ctl_global_first_free = 0; +static struct ctl_node CTL_NODE(global)[CTL_MAX_ENTRIES]; + +/* + * This is the top level node of the ctl tree structure. Each node can contain + * children and leaf nodes. + * + * Internal nodes simply create a new path in the tree whereas child nodes are + * the ones providing the read/write functionality by the means of callbacks. + * + * Each tree node must be NULL-terminated, CTL_NODE_END macro is provided for + * convenience. + */ +struct ctl { + struct ctl_node root[CTL_MAX_ENTRIES]; + int first_free; +}; + +/* + * ctl_find_node -- (internal) searches for a matching entry point in the + * provided nodes + * + * The caller is responsible for freeing all of the allocated indexes, + * regardless of the return value. + */ +static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, + const char *name, + struct ctl_indexes *indexes) { + LOG(3, "nodes %p name %s indexes %p", nodes, name, indexes); + + const struct ctl_node *n = NULL; + char *sptr = NULL; + char *parse_str = Strdup(name); + if (parse_str == NULL) { + return NULL; + } + + char *node_name = strtok_r(parse_str, CTL_QUERY_NODE_SEPARATOR, &sptr); + + /* + * Go through the string and separate tokens that correspond to nodes + * in the main ctl tree. + */ + while (node_name != NULL) { + char *endptr; + /* + * Ignore errno from strtol: FreeBSD returns EINVAL if no + * conversion is performed. Linux does not, but endptr + * check is valid in both cases. + */ + int tmp_errno = errno; + long index_value = strtol(node_name, &endptr, 0); + errno = tmp_errno; + struct ctl_index *index_entry = NULL; + if (endptr != node_name) { /* a valid index */ + index_entry = Malloc(sizeof(*index_entry)); + if (index_entry == NULL) { + goto error; + } + index_entry->value = index_value; + PMDK_SLIST_INSERT_HEAD(indexes, index_entry, entry); + } + + for (n = &nodes[0]; n->name != NULL; ++n) { + if (index_entry && n->type == CTL_NODE_INDEXED) { + break; + } else if (strcmp(n->name, node_name) == 0) { + break; + } + } + if (n->name == NULL) { + goto error; + } + + if (index_entry) { + index_entry->name = n->name; + } + + nodes = n->children; + node_name = strtok_r(NULL, CTL_QUERY_NODE_SEPARATOR, &sptr); + } + + Free(parse_str); + return n; + +error: + Free(parse_str); + return NULL; +} + +/* + * ctl_delete_indexes -- + * (internal) removes and frees all entries on the index list + */ +static void ctl_delete_indexes(struct ctl_indexes *indexes) { + while (!PMDK_SLIST_EMPTY(indexes)) { + struct ctl_index *index = PMDK_SLIST_FIRST(indexes); + PMDK_SLIST_REMOVE_HEAD(indexes, entry); + Free(index); + } +} + +/* + * ctl_parse_args -- (internal) parses a string argument based on the node + * structure + */ +static void *ctl_parse_args(const struct ctl_argument *arg_proto, char *arg) { + ASSERTne(arg, NULL); + + char *dest_arg = Malloc(arg_proto->dest_size); + if (dest_arg == NULL) { + ERR("!Malloc"); + return NULL; + } + + char *sptr = NULL; + char *arg_sep = strtok_r(arg, CTL_VALUE_ARG_SEPARATOR, &sptr); + for (const struct ctl_argument_parser *p = arg_proto->parsers; + p->parser != NULL; ++p) { + ASSERT(p->dest_offset + p->dest_size <= arg_proto->dest_size); + if (arg_sep == NULL) { + ERR("!strtok_r"); + goto error_parsing; + } + + if (p->parser(arg_sep, dest_arg + p->dest_offset, p->dest_size) != 0) { + goto error_parsing; + } + + arg_sep = strtok_r(NULL, CTL_VALUE_ARG_SEPARATOR, &sptr); + } + + return dest_arg; + +error_parsing: + Free(dest_arg); + return NULL; +} + +/* + * ctl_query_get_real_args -- (internal) returns a pointer with actual argument + * structure as required by the node callback + */ +static void *ctl_query_get_real_args(const struct ctl_node *n, void *write_arg, + enum ctl_query_source source) { + void *real_arg = NULL; + switch (source) { + case CTL_QUERY_CONFIG_INPUT: + real_arg = ctl_parse_args(n->arg, write_arg); + break; + case CTL_QUERY_PROGRAMMATIC: + real_arg = write_arg; + break; + default: + ASSERT(0); + break; + } + + return real_arg; +} + +/* + * ctl_query_cleanup_real_args -- (internal) cleanups relevant argument + * structures allocated as a result of the get_real_args call + */ +static void ctl_query_cleanup_real_args(const struct ctl_node *n, + void *real_arg, + enum ctl_query_source source) { + switch (source) { + case CTL_QUERY_CONFIG_INPUT: + Free(real_arg); + break; + case CTL_QUERY_PROGRAMMATIC: + break; + default: + ASSERT(0); + break; + } +} + +/* + * ctl_exec_query_read -- (internal) calls the read callback of a node + */ +static int ctl_exec_query_read(void *ctx, const struct ctl_node *n, + enum ctl_query_source source, void *arg, + struct ctl_indexes *indexes) { + if (arg == NULL) { + ERR("read queries require non-NULL argument"); + errno = EINVAL; + return -1; + } + + return n->cb[CTL_QUERY_READ](ctx, source, arg, indexes); +} + +/* + * ctl_exec_query_write -- (internal) calls the write callback of a node + */ +static int ctl_exec_query_write(void *ctx, const struct ctl_node *n, + enum ctl_query_source source, void *arg, + struct ctl_indexes *indexes) { + if (arg == NULL) { + ERR("write queries require non-NULL argument"); + errno = EINVAL; + return -1; + } + + void *real_arg = ctl_query_get_real_args(n, arg, source); + if (real_arg == NULL) { + LOG(1, "Invalid arguments"); + return -1; + } + + int ret = n->cb[CTL_QUERY_WRITE](ctx, source, real_arg, indexes); + ctl_query_cleanup_real_args(n, real_arg, source); + + return ret; +} + +/* + * ctl_exec_query_runnable -- (internal) calls the run callback of a node + */ +static int ctl_exec_query_runnable(void *ctx, const struct ctl_node *n, + enum ctl_query_source source, void *arg, + struct ctl_indexes *indexes) { + return n->cb[CTL_QUERY_RUNNABLE](ctx, source, arg, indexes); +} + +static int (*ctl_exec_query[MAX_CTL_QUERY_TYPE])( + void *ctx, const struct ctl_node *n, enum ctl_query_source source, + void *arg, struct ctl_indexes *indexes) = { + ctl_exec_query_read, + ctl_exec_query_write, + ctl_exec_query_runnable, +}; + +/* + * ctl_query -- (internal) parses the name and calls the appropriate methods + * from the ctl tree + */ +int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, + const char *name, enum ctl_query_type type, void *arg) { + LOG(3, "ctl %p ctx %p source %d name %s type %d arg %p", ctl, ctx, source, + name, type, arg); + + if (name == NULL) { + ERR("invalid query"); + errno = EINVAL; + return -1; + } + + /* + * All of the indexes are put on this list so that the handlers can + * easily retrieve the index values. The list is cleared once the ctl + * query has been handled. + */ + struct ctl_indexes indexes; + PMDK_SLIST_INIT(&indexes); + + int ret = -1; + + const struct ctl_node *n = ctl_find_node(CTL_NODE(global), name, &indexes); + + if (n == NULL && ctl) { + ctl_delete_indexes(&indexes); + n = ctl_find_node(ctl->root, name, &indexes); + } + + if (n == NULL || n->type != CTL_NODE_LEAF || n->cb[type] == NULL) { + ERR("invalid query entry point %s", name); + errno = EINVAL; + goto out; + } + + ret = ctl_exec_query[type](ctx, n, source, arg, &indexes); + +out: + ctl_delete_indexes(&indexes); + + return ret; +} + +/* + * ctl_register_module_node -- adds a new node to the CTL tree root. + */ +void ctl_register_module_node(struct ctl *c, const char *name, + struct ctl_node *n) { + struct ctl_node *nnode = c == NULL + ? &CTL_NODE(global)[ctl_global_first_free++] + : &c->root[c->first_free++]; + + nnode->children = n; + nnode->type = CTL_NODE_NAMED; + nnode->name = name; +} + +/* + * ctl_parse_query -- (internal) splits an entire query string + * into name and value + */ +static int ctl_parse_query(char *qbuf, char **name, char **value) { + if (qbuf == NULL) { + return -1; + } + + char *sptr; + *name = strtok_r(qbuf, CTL_NAME_VALUE_SEPARATOR, &sptr); + if (*name == NULL) { + return -1; + } + + *value = strtok_r(NULL, CTL_NAME_VALUE_SEPARATOR, &sptr); + if (*value == NULL) { + return -1; + } + + /* the value itself mustn't include CTL_NAME_VALUE_SEPARATOR */ + char *extra = strtok_r(NULL, CTL_NAME_VALUE_SEPARATOR, &sptr); + if (extra != NULL) { + return -1; + } + + return 0; +} + +/* + * ctl_load_config -- executes the entire query collection from a provider + */ +static int ctl_load_config(struct ctl *ctl, void *ctx, char *buf) { + int r = 0; + char *sptr = NULL; /* for internal use of strtok */ + char *name; + char *value; + + ASSERTne(buf, NULL); + + char *qbuf = strtok_r(buf, CTL_STRING_QUERY_SEPARATOR, &sptr); + while (qbuf != NULL) { + r = ctl_parse_query(qbuf, &name, &value); + if (r != 0) { + ERR("failed to parse query %s", qbuf); + return -1; + } + + r = ctl_query(ctl, ctx, CTL_QUERY_CONFIG_INPUT, name, CTL_QUERY_WRITE, + value); + + if (r < 0 && ctx != NULL) { + return -1; + } + + qbuf = strtok_r(NULL, CTL_STRING_QUERY_SEPARATOR, &sptr); + } + + return 0; +} + +/* + * ctl_load_config_from_string -- loads obj configuration from string + */ +int ctl_load_config_from_string(struct ctl *ctl, void *ctx, + const char *cfg_string) { + LOG(3, "ctl %p ctx %p cfg_string \"%s\"", ctl, ctx, cfg_string); + + char *buf = Strdup(cfg_string); + if (buf == NULL) { + ERR("!Strdup"); + return -1; + } + + int ret = ctl_load_config(ctl, ctx, buf); + + Free(buf); + return ret; +} + +/* + * ctl_load_config_from_file -- loads obj configuration from file + * + * This function opens up the config file, allocates a buffer of size equal to + * the size of the file, reads its content and sanitizes it for ctl_load_config. + */ +int ctl_load_config_from_file(struct ctl *ctl, void *ctx, + const char *cfg_file) { + LOG(3, "ctl %p ctx %p cfg_file \"%s\"", ctl, ctx, cfg_file); + + int ret = -1; + + FILE *fp = os_fopen(cfg_file, "r"); + if (fp == NULL) { + return ret; + } + + int err; + if ((err = fseek(fp, 0, SEEK_END)) != 0) { + goto error_file_parse; + } + + long fsize = ftell(fp); + if (fsize == -1) { + goto error_file_parse; + } + + if (fsize > MAX_CONFIG_FILE_LEN) { + ERR("Config file too large"); + goto error_file_parse; + } + + if ((err = fseek(fp, 0, SEEK_SET)) != 0) { + goto error_file_parse; + } + + char *buf = Zalloc((size_t)fsize + 1); /* +1 for NULL-termination */ + if (buf == NULL) { + ERR("!Zalloc"); + goto error_file_parse; + } + + size_t bufpos = 0; + + int c; + int is_comment_section = 0; + while ((c = fgetc(fp)) != EOF) { + if (c == '#') { + is_comment_section = 1; + } else if (c == '\n') { + is_comment_section = 0; + } else if (!is_comment_section && !isspace(c)) { + buf[bufpos++] = (char)c; + } + } + + ret = ctl_load_config(ctl, ctx, buf); + + Free(buf); + +error_file_parse: + (void)fclose(fp); + return ret; +} + +/* + * ctl_new -- allocates and initializes ctl data structures + */ +struct ctl *ctl_new(void) { + struct ctl *c = Zalloc(sizeof(struct ctl)); + if (c == NULL) { + ERR("!Zalloc"); + return NULL; + } + + c->first_free = 0; + return c; +} + +/* + * ctl_delete -- deletes ctl + */ +void ctl_delete(struct ctl *c) { Free(c); } + +/* + * ctl_parse_ll -- (internal) parses and returns a long long signed integer + */ +static long long ctl_parse_ll(const char *str) { + char *endptr; + int olderrno = errno; + errno = 0; + long long val = strtoll(str, &endptr, 0); + if (endptr == str || errno != 0) { + return LLONG_MIN; + } + errno = olderrno; + + return val; +} + +/* + * ctl_arg_boolean -- checks whether the provided argument contains + * either a 1 or y or Y. + */ +int ctl_arg_boolean(const void *arg, void *dest, size_t dest_size) { + int *intp = dest; + char in = ((char *)arg)[0]; + + if (tolower(in) == 'y' || in == '1') { + *intp = 1; + return 0; + } else if (tolower(in) == 'n' || in == '0') { + *intp = 0; + return 0; + } + + return -1; +} + +/* + * ctl_arg_integer -- parses signed integer argument + */ +int ctl_arg_integer(const void *arg, void *dest, size_t dest_size) { + long long val = ctl_parse_ll(arg); + if (val == LLONG_MIN) { + return -1; + } + + switch (dest_size) { + case sizeof(int): + if (val > INT_MAX || val < INT_MIN) { + return -1; + } + *(int *)dest = (int)val; + break; + case sizeof(long long): + *(long long *)dest = val; + break; + case sizeof(uint8_t): + if (val > UINT8_MAX || val < 0) { + return -1; + } + *(uint8_t *)dest = (uint8_t)val; + break; + default: + ERR("invalid destination size %zu", dest_size); + errno = EINVAL; + return -1; + } + + return 0; +} + +/* + * ctl_arg_string -- verifies length and copies a string argument into a zeroed + * buffer + */ +int ctl_arg_string(const void *arg, void *dest, size_t dest_size) { + /* check if the incoming string is longer or equal to dest_size */ + if (strnlen(arg, dest_size) == dest_size) { + return -1; + } + + strncpy(dest, arg, dest_size); + + return 0; +} diff --git a/src/ctl/ctl.h b/src/ctl/ctl.h new file mode 100644 index 000000000..12b9d18fd --- /dev/null +++ b/src/ctl/ctl.h @@ -0,0 +1,206 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ +/* Copyright 2016-2020, Intel Corporation */ + +/* + * ctl.h -- internal declaration of statistics and control related structures + */ + +#ifndef PMDK_CTL_H +#define PMDK_CTL_H 1 + +#include "errno.h" +#include "out.h" +#include "queue.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct ctl; + +struct ctl_index { + const char *name; + long value; + PMDK_SLIST_ENTRY(ctl_index) entry; +}; + +PMDK_SLIST_HEAD(ctl_indexes, ctl_index); + +enum ctl_query_source { + CTL_UNKNOWN_QUERY_SOURCE, + /* query executed directly from the program */ + CTL_QUERY_PROGRAMMATIC, + /* query executed from the config file */ + CTL_QUERY_CONFIG_INPUT, + + MAX_CTL_QUERY_SOURCE +}; + +enum ctl_query_type { + CTL_QUERY_READ, + CTL_QUERY_WRITE, + CTL_QUERY_RUNNABLE, + + MAX_CTL_QUERY_TYPE +}; + +typedef int (*node_callback)(void *ctx, enum ctl_query_source type, void *arg, + struct ctl_indexes *indexes); + +enum ctl_node_type { + CTL_NODE_UNKNOWN, + CTL_NODE_NAMED, + CTL_NODE_LEAF, + CTL_NODE_INDEXED, + + MAX_CTL_NODE +}; + +typedef int (*ctl_arg_parser)(const void *arg, void *dest, size_t dest_size); + +struct ctl_argument_parser { + size_t dest_offset; /* offset of the field inside of the argument */ + size_t dest_size; /* size of the field inside of the argument */ + ctl_arg_parser parser; +}; + +struct ctl_argument { + size_t dest_size; /* sizeof the entire argument */ + struct ctl_argument_parser parsers[]; /* array of 'fields' in arg */ +}; + +#define sizeof_member(t, m) sizeof(((t *)0)->m) + +#define CTL_ARG_PARSER(t, p) \ + { 0, sizeof(t), p } + +#define CTL_ARG_PARSER_STRUCT(t, m, p) \ + { offsetof(t, m), sizeof_member(t, m), p } + +#define CTL_ARG_PARSER_END \ + { 0, 0, NULL } + +/* + * CTL Tree node structure, do not use directly. All the necessary functionality + * is provided by the included macros. + */ +struct ctl_node { + const char *name; + enum ctl_node_type type; + + node_callback cb[MAX_CTL_QUERY_TYPE]; + const struct ctl_argument *arg; + + const struct ctl_node *children; +}; + +struct ctl *ctl_new(void); +void ctl_delete(struct ctl *stats); + +int ctl_load_config_from_string(struct ctl *ctl, void *ctx, + const char *cfg_string); +int ctl_load_config_from_file(struct ctl *ctl, void *ctx, const char *cfg_file); + +/* Use through CTL_REGISTER_MODULE, never directly */ +void ctl_register_module_node(struct ctl *c, const char *name, + struct ctl_node *n); + +int ctl_arg_boolean(const void *arg, void *dest, size_t dest_size); +#define CTL_ARG_BOOLEAN \ + {sizeof(int), {{0, sizeof(int), ctl_arg_boolean}, CTL_ARG_PARSER_END}}; + +int ctl_arg_integer(const void *arg, void *dest, size_t dest_size); +#define CTL_ARG_INT \ + {sizeof(int), {{0, sizeof(int), ctl_arg_integer}, CTL_ARG_PARSER_END}}; + +#define CTL_ARG_LONG_LONG \ + {sizeof(long long), \ + {{0, sizeof(long long), ctl_arg_integer}, CTL_ARG_PARSER_END}}; + +int ctl_arg_string(const void *arg, void *dest, size_t dest_size); +#define CTL_ARG_STRING(len) \ + {len, {{0, len, ctl_arg_string}, CTL_ARG_PARSER_END}}; + +#define CTL_STR(name) #name + +#define CTL_NODE_END \ + { NULL, CTL_NODE_UNKNOWN, {NULL, NULL, NULL}, NULL, NULL } + +#define CTL_NODE(name, ...) ctl_node_##__VA_ARGS__##_##name + +int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, + const char *name, enum ctl_query_type type, void *arg); + +/* Declaration of a new child node */ +#define CTL_CHILD(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_NAMED, {NULL, NULL, NULL}, NULL, \ + (struct ctl_node *)CTL_NODE(name, __VA_ARGS__) \ + } + +/* Declaration of a new indexed node */ +#define CTL_INDEXED(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_INDEXED, {NULL, NULL, NULL}, NULL, \ + (struct ctl_node *)CTL_NODE(name, __VA_ARGS__) \ + } + +#define CTL_READ_HANDLER(name, ...) ctl_##__VA_ARGS__##_##name##_read + +#define CTL_WRITE_HANDLER(name, ...) ctl_##__VA_ARGS__##_##name##_write + +#define CTL_RUNNABLE_HANDLER(name, ...) ctl_##__VA_ARGS__##_##name##_runnable + +#define CTL_ARG(name) ctl_arg_##name + +/* + * Declaration of a new read-only leaf. If used the corresponding read function + * must be declared by CTL_READ_HANDLER macro. + */ +#define CTL_LEAF_RO(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_LEAF, \ + {CTL_READ_HANDLER(name, __VA_ARGS__), NULL, NULL}, NULL, NULL \ + } + +/* + * Declaration of a new write-only leaf. If used the corresponding write + * function must be declared by CTL_WRITE_HANDLER macro. + */ +#define CTL_LEAF_WO(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_LEAF, \ + {NULL, CTL_WRITE_HANDLER(name, __VA_ARGS__), NULL}, \ + &CTL_ARG(name), NULL \ + } + +/* + * Declaration of a new runnable leaf. If used the corresponding run + * function must be declared by CTL_RUNNABLE_HANDLER macro. + */ +#define CTL_LEAF_RUNNABLE(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_LEAF, \ + {NULL, NULL, CTL_RUNNABLE_HANDLER(name, __VA_ARGS__)}, NULL, NULL \ + } + +/* + * Declaration of a new read-write leaf. If used both read and write function + * must be declared by CTL_READ_HANDLER and CTL_WRITE_HANDLER macros. + */ +#define CTL_LEAF_RW(name) \ + { \ + CTL_STR(name), CTL_NODE_LEAF, \ + {CTL_READ_HANDLER(name), CTL_WRITE_HANDLER(name), NULL}, \ + &CTL_ARG(name), NULL \ + } + +#define CTL_REGISTER_MODULE(_ctl, name) \ + ctl_register_module_node((_ctl), CTL_STR(name), \ + (struct ctl_node *)CTL_NODE(name)) + +#ifdef __cplusplus +} +#endif + +#endif From 580420707e4ccc43533e22eb44b04fe01d8d42ba Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Thu, 12 Dec 2024 17:52:14 +0100 Subject: [PATCH 051/466] [CMake] Disable pedantic mode and disable cast qualifier warning --- cmake/helpers.cmake | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 0a165bc3a..ddcd5f03d 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -232,9 +232,8 @@ function(add_umf_target_compile_options name) PRIVATE -fPIC -Wall -Wextra - -Wpedantic -Wformat-security - -Wcast-qual + -Wno-cast-qual $<$:-fdiagnostics-color=auto>) if(CMAKE_BUILD_TYPE STREQUAL "Release") target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2) From abb24e7b583c118cd39e14daf168d8fb6bad63d2 Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Thu, 12 Dec 2024 17:46:32 +0100 Subject: [PATCH 052/466] CTL: Add a CTL functionality to the UMF Signed-off-by: Krzysztof Filipek --- cmake/helpers.cmake | 3 +- src/CMakeLists.txt | 3 + src/ctl/ctl.c | 233 ++++++++++++++++++++++++------------------- src/ctl/ctl.h | 40 +++++--- test/CMakeLists.txt | 5 + test/ctl/config.txt | 1 + test/ctl/ctl_debug.c | 128 ++++++++++++++++++++++++ test/ctl/ctl_debug.h | 32 ++++++ test/ctl/test.cpp | 93 +++++++++++++++++ 9 files changed, 422 insertions(+), 116 deletions(-) create mode 100644 test/ctl/config.txt create mode 100644 test/ctl/ctl_debug.c create mode 100644 test/ctl/ctl_debug.h create mode 100644 test/ctl/test.cpp diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index ddcd5f03d..56692ff6e 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -233,7 +233,8 @@ function(add_umf_target_compile_options name) -Wall -Wextra -Wformat-security - -Wno-cast-qual + -Wno-cast-qual # TODO: remove this when const qualifier drop + # will be solved in CTL $<$:-fdiagnostics-color=auto>) if(CMAKE_BUILD_TYPE STREQUAL "Release") target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ffd928f7c..4edaa5957 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -26,6 +26,8 @@ add_subdirectory(coarse) set(UMF_LIBS $ $) +set(CTL_SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/ctl/ctl.c) + if(LINUX) set(BA_SOURCES ${BA_SOURCES} ${CMAKE_CURRENT_SOURCE_DIR}/base_alloc/base_alloc_linux.c) @@ -45,6 +47,7 @@ set(HWLOC_DEPENDENT_SOURCES topology.c) set(UMF_SOURCES ${BA_SOURCES} + ${CTL_SOURCES} libumf.c ipc.c ipc_cache.c diff --git a/src/ctl/ctl.c b/src/ctl/ctl.c index 330a5fa47..124d56f6c 100644 --- a/src/ctl/ctl.c +++ b/src/ctl/ctl.c @@ -1,13 +1,38 @@ +/* + * + * Copyright (C) 2016-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +// This file was originally under following license: // SPDX-License-Identifier: BSD-3-Clause -/* Copyright 2016-2020, Intel Corporation */ +/* Copyright 2016-2024, Intel Corporation */ /* * ctl.c -- implementation of the interface for examination and modification of - * the library's internal state + * the library's internal state */ + #include "ctl.h" -#include "alloc.h" -#include "os.h" + +#include +#include +#include +#include +#include + +#include "base_alloc/base_alloc_global.h" +#include "utils/utils_common.h" +#include "utlist.h" + +#ifdef _WIN32 +#define strtok_r strtok_s +#else +#include +#endif #define CTL_MAX_ENTRIES 100 @@ -19,7 +44,7 @@ #define CTL_VALUE_ARG_SEPARATOR "," static int ctl_global_first_free = 0; -static struct ctl_node CTL_NODE(global)[CTL_MAX_ENTRIES]; +static struct ctl_node CTL_NODE(global, )[CTL_MAX_ENTRIES]; /* * This is the top level node of the ctl tree structure. Each node can contain @@ -36,18 +61,33 @@ struct ctl { int first_free; }; +void *Zalloc(size_t sz) { + void *ptr = umf_ba_global_alloc(sz); + if (ptr) { + memset(ptr, 0, sz); + } + return ptr; +} + +char *Strdup(const char *s) { + size_t len = strlen(s) + 1; + char *p = umf_ba_global_alloc(len); + if (p) { + memcpy(p, s, len); + } + return p; +} + /* * ctl_find_node -- (internal) searches for a matching entry point in the - * provided nodes + * provided nodes * * The caller is responsible for freeing all of the allocated indexes, * regardless of the return value. */ static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, const char *name, - struct ctl_indexes *indexes) { - LOG(3, "nodes %p name %s indexes %p", nodes, name, indexes); - + struct ctl_index_utlist *indexes) { const struct ctl_node *n = NULL; char *sptr = NULL; char *parse_str = Strdup(name); @@ -58,27 +98,27 @@ static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, char *node_name = strtok_r(parse_str, CTL_QUERY_NODE_SEPARATOR, &sptr); /* - * Go through the string and separate tokens that correspond to nodes - * in the main ctl tree. - */ + * Go through the string and separate tokens that correspond to nodes + * in the main ctl tree. + */ while (node_name != NULL) { char *endptr; /* - * Ignore errno from strtol: FreeBSD returns EINVAL if no - * conversion is performed. Linux does not, but endptr - * check is valid in both cases. - */ + * Ignore errno from strtol: FreeBSD returns EINVAL if no + * conversion is performed. Linux does not, but endptr + * check is valid in both cases. + */ int tmp_errno = errno; long index_value = strtol(node_name, &endptr, 0); errno = tmp_errno; - struct ctl_index *index_entry = NULL; + struct ctl_index_utlist *index_entry = NULL; if (endptr != node_name) { /* a valid index */ - index_entry = Malloc(sizeof(*index_entry)); + index_entry = umf_ba_global_alloc(sizeof(*index_entry)); if (index_entry == NULL) { goto error; } index_entry->value = index_value; - PMDK_SLIST_INSERT_HEAD(indexes, index_entry, entry); + LL_PREPEND(indexes, index_entry); } for (n = &nodes[0]; n->name != NULL; ++n) { @@ -100,36 +140,38 @@ static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, node_name = strtok_r(NULL, CTL_QUERY_NODE_SEPARATOR, &sptr); } - Free(parse_str); + umf_ba_global_free(parse_str); return n; error: - Free(parse_str); + umf_ba_global_free(parse_str); return NULL; } /* * ctl_delete_indexes -- - * (internal) removes and frees all entries on the index list + * (internal) removes and frees all entries on the index list */ -static void ctl_delete_indexes(struct ctl_indexes *indexes) { - while (!PMDK_SLIST_EMPTY(indexes)) { - struct ctl_index *index = PMDK_SLIST_FIRST(indexes); - PMDK_SLIST_REMOVE_HEAD(indexes, entry); - Free(index); +static void ctl_delete_indexes(struct ctl_index_utlist *indexes) { + if (!indexes) { + return; + } + struct ctl_index_utlist *elem, *tmp; + LL_FOREACH_SAFE(indexes, elem, tmp) { + LL_DELETE(indexes, elem); + if (elem) { + umf_ba_global_free(elem); + } } } /* * ctl_parse_args -- (internal) parses a string argument based on the node - * structure + * structure */ static void *ctl_parse_args(const struct ctl_argument *arg_proto, char *arg) { - ASSERTne(arg, NULL); - - char *dest_arg = Malloc(arg_proto->dest_size); + char *dest_arg = umf_ba_global_alloc(arg_proto->dest_size); if (dest_arg == NULL) { - ERR("!Malloc"); return NULL; } @@ -137,9 +179,7 @@ static void *ctl_parse_args(const struct ctl_argument *arg_proto, char *arg) { char *arg_sep = strtok_r(arg, CTL_VALUE_ARG_SEPARATOR, &sptr); for (const struct ctl_argument_parser *p = arg_proto->parsers; p->parser != NULL; ++p) { - ASSERT(p->dest_offset + p->dest_size <= arg_proto->dest_size); if (arg_sep == NULL) { - ERR("!strtok_r"); goto error_parsing; } @@ -153,13 +193,13 @@ static void *ctl_parse_args(const struct ctl_argument *arg_proto, char *arg) { return dest_arg; error_parsing: - Free(dest_arg); + umf_ba_global_free(dest_arg); return NULL; } /* * ctl_query_get_real_args -- (internal) returns a pointer with actual argument - * structure as required by the node callback + * structure as required by the node callback */ static void *ctl_query_get_real_args(const struct ctl_node *n, void *write_arg, enum ctl_query_source source) { @@ -172,7 +212,6 @@ static void *ctl_query_get_real_args(const struct ctl_node *n, void *write_arg, real_arg = write_arg; break; default: - ASSERT(0); break; } @@ -181,19 +220,21 @@ static void *ctl_query_get_real_args(const struct ctl_node *n, void *write_arg, /* * ctl_query_cleanup_real_args -- (internal) cleanups relevant argument - * structures allocated as a result of the get_real_args call + * structures allocated as a result of the get_real_args call */ static void ctl_query_cleanup_real_args(const struct ctl_node *n, void *real_arg, enum ctl_query_source source) { + /* suppress unused-parameter errors */ + (void)n; + switch (source) { case CTL_QUERY_CONFIG_INPUT: - Free(real_arg); + umf_ba_global_free(real_arg); break; case CTL_QUERY_PROGRAMMATIC: break; default: - ASSERT(0); break; } } @@ -203,9 +244,8 @@ static void ctl_query_cleanup_real_args(const struct ctl_node *n, */ static int ctl_exec_query_read(void *ctx, const struct ctl_node *n, enum ctl_query_source source, void *arg, - struct ctl_indexes *indexes) { + struct ctl_index_utlist *indexes) { if (arg == NULL) { - ERR("read queries require non-NULL argument"); errno = EINVAL; return -1; } @@ -218,16 +258,14 @@ static int ctl_exec_query_read(void *ctx, const struct ctl_node *n, */ static int ctl_exec_query_write(void *ctx, const struct ctl_node *n, enum ctl_query_source source, void *arg, - struct ctl_indexes *indexes) { + struct ctl_index_utlist *indexes) { if (arg == NULL) { - ERR("write queries require non-NULL argument"); errno = EINVAL; return -1; } void *real_arg = ctl_query_get_real_args(n, arg, source); if (real_arg == NULL) { - LOG(1, "Invalid arguments"); return -1; } @@ -242,13 +280,13 @@ static int ctl_exec_query_write(void *ctx, const struct ctl_node *n, */ static int ctl_exec_query_runnable(void *ctx, const struct ctl_node *n, enum ctl_query_source source, void *arg, - struct ctl_indexes *indexes) { + struct ctl_index_utlist *indexes) { return n->cb[CTL_QUERY_RUNNABLE](ctx, source, arg, indexes); } static int (*ctl_exec_query[MAX_CTL_QUERY_TYPE])( void *ctx, const struct ctl_node *n, enum ctl_query_source source, - void *arg, struct ctl_indexes *indexes) = { + void *arg, struct ctl_index_utlist *indexes) = { ctl_exec_query_read, ctl_exec_query_write, ctl_exec_query_runnable, @@ -256,46 +294,45 @@ static int (*ctl_exec_query[MAX_CTL_QUERY_TYPE])( /* * ctl_query -- (internal) parses the name and calls the appropriate methods - * from the ctl tree + * from the ctl tree */ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, const char *name, enum ctl_query_type type, void *arg) { - LOG(3, "ctl %p ctx %p source %d name %s type %d arg %p", ctl, ctx, source, - name, type, arg); - if (name == NULL) { - ERR("invalid query"); errno = EINVAL; return -1; } /* - * All of the indexes are put on this list so that the handlers can - * easily retrieve the index values. The list is cleared once the ctl - * query has been handled. - */ - struct ctl_indexes indexes; - PMDK_SLIST_INIT(&indexes); + * All of the indexes are put on this list so that the handlers can + * easily retrieve the index values. The list is cleared once the ctl + * query has been handled. + */ + struct ctl_index_utlist *indexes = NULL; + indexes = Zalloc(sizeof(*indexes)); + if (!indexes) { + return -1; + } int ret = -1; - const struct ctl_node *n = ctl_find_node(CTL_NODE(global), name, &indexes); + const struct ctl_node *n = ctl_find_node(CTL_NODE(global, ), name, indexes); if (n == NULL && ctl) { - ctl_delete_indexes(&indexes); - n = ctl_find_node(ctl->root, name, &indexes); + ctl_delete_indexes(indexes); + indexes = NULL; + n = ctl_find_node(ctl->root, name, indexes); } if (n == NULL || n->type != CTL_NODE_LEAF || n->cb[type] == NULL) { - ERR("invalid query entry point %s", name); errno = EINVAL; goto out; } - ret = ctl_exec_query[type](ctx, n, source, arg, &indexes); + ret = ctl_exec_query[type](ctx, n, source, arg, indexes); out: - ctl_delete_indexes(&indexes); + ctl_delete_indexes(indexes); return ret; } @@ -306,7 +343,7 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, void ctl_register_module_node(struct ctl *c, const char *name, struct ctl_node *n) { struct ctl_node *nnode = c == NULL - ? &CTL_NODE(global)[ctl_global_first_free++] + ? &CTL_NODE(global, )[ctl_global_first_free++] : &c->root[c->first_free++]; nnode->children = n; @@ -316,14 +353,14 @@ void ctl_register_module_node(struct ctl *c, const char *name, /* * ctl_parse_query -- (internal) splits an entire query string - * into name and value + * into name and value */ static int ctl_parse_query(char *qbuf, char **name, char **value) { if (qbuf == NULL) { return -1; } - char *sptr; + char *sptr = NULL; *name = strtok_r(qbuf, CTL_NAME_VALUE_SEPARATOR, &sptr); if (*name == NULL) { return -1; @@ -351,14 +388,11 @@ static int ctl_load_config(struct ctl *ctl, void *ctx, char *buf) { char *sptr = NULL; /* for internal use of strtok */ char *name; char *value; - - ASSERTne(buf, NULL); - char *qbuf = strtok_r(buf, CTL_STRING_QUERY_SEPARATOR, &sptr); + while (qbuf != NULL) { r = ctl_parse_query(qbuf, &name, &value); if (r != 0) { - ERR("failed to parse query %s", qbuf); return -1; } @@ -380,17 +414,14 @@ static int ctl_load_config(struct ctl *ctl, void *ctx, char *buf) { */ int ctl_load_config_from_string(struct ctl *ctl, void *ctx, const char *cfg_string) { - LOG(3, "ctl %p ctx %p cfg_string \"%s\"", ctl, ctx, cfg_string); - char *buf = Strdup(cfg_string); if (buf == NULL) { - ERR("!Strdup"); return -1; } int ret = ctl_load_config(ctl, ctx, buf); - Free(buf); + umf_ba_global_free(buf); return ret; } @@ -400,13 +431,14 @@ int ctl_load_config_from_string(struct ctl *ctl, void *ctx, * This function opens up the config file, allocates a buffer of size equal to * the size of the file, reads its content and sanitizes it for ctl_load_config. */ +#ifndef _WIN32 // TODO: implement for Windows int ctl_load_config_from_file(struct ctl *ctl, void *ctx, const char *cfg_file) { - LOG(3, "ctl %p ctx %p cfg_file \"%s\"", ctl, ctx, cfg_file); - int ret = -1; + long fsize = 0; + char *buf = NULL; - FILE *fp = os_fopen(cfg_file, "r"); + FILE *fp = fopen(cfg_file, "r"); if (fp == NULL) { return ret; } @@ -416,13 +448,12 @@ int ctl_load_config_from_file(struct ctl *ctl, void *ctx, goto error_file_parse; } - long fsize = ftell(fp); + fsize = ftell(fp); if (fsize == -1) { goto error_file_parse; } if (fsize > MAX_CONFIG_FILE_LEN) { - ERR("Config file too large"); goto error_file_parse; } @@ -430,34 +461,35 @@ int ctl_load_config_from_file(struct ctl *ctl, void *ctx, goto error_file_parse; } - char *buf = Zalloc((size_t)fsize + 1); /* +1 for NULL-termination */ + buf = Zalloc((size_t)fsize + 1); /* +1 for NULL-termination */ if (buf == NULL) { - ERR("!Zalloc"); goto error_file_parse; } - size_t bufpos = 0; - - int c; - int is_comment_section = 0; - while ((c = fgetc(fp)) != EOF) { - if (c == '#') { - is_comment_section = 1; - } else if (c == '\n') { - is_comment_section = 0; - } else if (!is_comment_section && !isspace(c)) { - buf[bufpos++] = (char)c; + { + size_t bufpos = 0; + int c; + int is_comment_section = 0; + while ((c = fgetc(fp)) != EOF) { + if (c == '#') { + is_comment_section = 1; + } else if (c == '\n') { + is_comment_section = 0; + } else if (!is_comment_section && !isspace(c)) { + buf[bufpos++] = (char)c; + } } } ret = ctl_load_config(ctl, ctx, buf); - Free(buf); + umf_ba_global_free(buf); error_file_parse: (void)fclose(fp); return ret; } +#endif /* * ctl_new -- allocates and initializes ctl data structures @@ -465,7 +497,6 @@ int ctl_load_config_from_file(struct ctl *ctl, void *ctx, struct ctl *ctl_new(void) { struct ctl *c = Zalloc(sizeof(struct ctl)); if (c == NULL) { - ERR("!Zalloc"); return NULL; } @@ -476,7 +507,7 @@ struct ctl *ctl_new(void) { /* * ctl_delete -- deletes ctl */ -void ctl_delete(struct ctl *c) { Free(c); } +void ctl_delete(struct ctl *c) { umf_ba_global_free(c); } /* * ctl_parse_ll -- (internal) parses and returns a long long signed integer @@ -496,11 +527,14 @@ static long long ctl_parse_ll(const char *str) { /* * ctl_arg_boolean -- checks whether the provided argument contains - * either a 1 or y or Y. + * either a 1 or y or Y. */ int ctl_arg_boolean(const void *arg, void *dest, size_t dest_size) { + /* suppress unused-parameter errors */ + (void)dest_size; + int *intp = dest; - char in = ((char *)arg)[0]; + char in = ((const char *)arg)[0]; if (tolower(in) == 'y' || in == '1') { *intp = 1; @@ -539,7 +573,6 @@ int ctl_arg_integer(const void *arg, void *dest, size_t dest_size) { *(uint8_t *)dest = (uint8_t)val; break; default: - ERR("invalid destination size %zu", dest_size); errno = EINVAL; return -1; } @@ -549,7 +582,7 @@ int ctl_arg_integer(const void *arg, void *dest, size_t dest_size) { /* * ctl_arg_string -- verifies length and copies a string argument into a zeroed - * buffer + * buffer */ int ctl_arg_string(const void *arg, void *dest, size_t dest_size) { /* check if the incoming string is longer or equal to dest_size */ diff --git a/src/ctl/ctl.h b/src/ctl/ctl.h index 12b9d18fd..f183abaf3 100644 --- a/src/ctl/ctl.h +++ b/src/ctl/ctl.h @@ -1,3 +1,13 @@ +/* + * + * Copyright (C) 2016-2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +// This file was originally under following license: /* SPDX-License-Identifier: BSD-3-Clause */ /* Copyright 2016-2020, Intel Corporation */ @@ -5,12 +15,11 @@ * ctl.h -- internal declaration of statistics and control related structures */ -#ifndef PMDK_CTL_H -#define PMDK_CTL_H 1 +#ifndef UMF_CTL_H +#define UMF_CTL_H 1 -#include "errno.h" -#include "out.h" -#include "queue.h" +#include +#include #ifdef __cplusplus extern "C" { @@ -18,14 +27,12 @@ extern "C" { struct ctl; -struct ctl_index { +struct ctl_index_utlist { const char *name; long value; - PMDK_SLIST_ENTRY(ctl_index) entry; + struct ctl_index_utlist *next; }; -PMDK_SLIST_HEAD(ctl_indexes, ctl_index); - enum ctl_query_source { CTL_UNKNOWN_QUERY_SOURCE, /* query executed directly from the program */ @@ -45,7 +52,7 @@ enum ctl_query_type { }; typedef int (*node_callback)(void *ctx, enum ctl_query_source type, void *arg, - struct ctl_indexes *indexes); + struct ctl_index_utlist *indexes); enum ctl_node_type { CTL_NODE_UNKNOWN, @@ -65,7 +72,7 @@ struct ctl_argument_parser { }; struct ctl_argument { - size_t dest_size; /* sizeof the entire argument */ + size_t dest_size; /* size of the entire argument */ struct ctl_argument_parser parsers[]; /* array of 'fields' in arg */ }; @@ -114,8 +121,11 @@ int ctl_arg_integer(const void *arg, void *dest, size_t dest_size); {sizeof(int), {{0, sizeof(int), ctl_arg_integer}, CTL_ARG_PARSER_END}}; #define CTL_ARG_LONG_LONG \ - {sizeof(long long), \ - {{0, sizeof(long long), ctl_arg_integer}, CTL_ARG_PARSER_END}}; + { \ + sizeof(long long), { \ + {0, sizeof(long long), ctl_arg_integer}, CTL_ARG_PARSER_END \ + } \ + } int ctl_arg_string(const void *arg, void *dest, size_t dest_size); #define CTL_ARG_STRING(len) \ @@ -191,13 +201,13 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_LEAF_RW(name) \ { \ CTL_STR(name), CTL_NODE_LEAF, \ - {CTL_READ_HANDLER(name), CTL_WRITE_HANDLER(name), NULL}, \ + {CTL_READ_HANDLER(name, ), CTL_WRITE_HANDLER(name, ), NULL}, \ &CTL_ARG(name), NULL \ } #define CTL_REGISTER_MODULE(_ctl, name) \ ctl_register_module_node((_ctl), CTL_STR(name), \ - (struct ctl_node *)CTL_NODE(name)) + (struct ctl_node *)CTL_NODE(name, )) #ifdef __cplusplus } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b23742866..4b50a8802 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -192,6 +192,11 @@ add_umf_test( SRCS utils/utils_log.cpp ${UMF_UTILS_SOURCES} LIBS ${UMF_LOGGER_LIBS}) +add_umf_test( + NAME ctl + SRCS ctl/test.cpp ctl/ctl_debug.c ../src/ctl/ctl.c ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( NAME utils_common SRCS utils/utils.cpp diff --git a/test/ctl/config.txt b/test/ctl/config.txt new file mode 100644 index 000000000..5d4f9c62b --- /dev/null +++ b/test/ctl/config.txt @@ -0,0 +1 @@ +debug.heap.alloc_pattern=321 \ No newline at end of file diff --git a/test/ctl/ctl_debug.c b/test/ctl/ctl_debug.c new file mode 100644 index 000000000..d523b3f80 --- /dev/null +++ b/test/ctl/ctl_debug.c @@ -0,0 +1,128 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + * ctl_debug.c -- implementation of the debug CTL namespace + */ + +#include "ctl_debug.h" + +static struct ctl *ctl_debug; + +static int alloc_pattern = 0; +static int enable_logging = 0; +static int log_level = 0; + +struct ctl *get_debug_ctl(void) { return ctl_debug; } + +/* + * CTL_WRITE_HANDLER(alloc_pattern) -- sets the alloc_pattern field in heap + */ +static int +CTL_WRITE_HANDLER(alloc_pattern, )(void *ctx, enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int arg_in = *(int *)arg; + alloc_pattern = arg_in; + return 0; +} + +/* + * CTL_READ_HANDLER(alloc_pattern) -- returns alloc_pattern heap field + */ +static int CTL_READ_HANDLER(alloc_pattern, )(void *ctx, + enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int *arg_out = arg; + *arg_out = alloc_pattern; + return 0; +} + +static int +CTL_WRITE_HANDLER(enable_logging, )(void *ctx, enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int arg_in = *(int *)arg; + enable_logging = arg_in; + return 0; +} + +static int +CTL_READ_HANDLER(enable_logging, )(void *ctx, enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int *arg_out = arg; + *arg_out = enable_logging; + return 0; +} + +static int CTL_WRITE_HANDLER(log_level, )(void *ctx, + enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int arg_in = *(int *)arg; + log_level = arg_in; + return 0; +} + +static int CTL_READ_HANDLER(log_level, )(void *ctx, + enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx; + + int *arg_out = arg; + *arg_out = log_level; + return 0; +} + +static const struct ctl_argument CTL_ARG(alloc_pattern) = CTL_ARG_LONG_LONG; + +static const struct ctl_argument CTL_ARG(enable_logging) = CTL_ARG_BOOLEAN; + +static const struct ctl_argument CTL_ARG(log_level) = CTL_ARG_INT; + +static const struct ctl_node CTL_NODE(heap, )[] = {CTL_LEAF_RW(alloc_pattern), + CTL_LEAF_RW(enable_logging), + CTL_LEAF_RW(log_level), + + CTL_NODE_END}; + +static const struct ctl_node CTL_NODE(debug, )[] = {CTL_CHILD(heap, ), + + CTL_NODE_END}; + +/* + * debug_ctl_register -- registers ctl nodes for "debug" module + */ +void debug_ctl_register(struct ctl *ctl) { CTL_REGISTER_MODULE(ctl, debug); } + +void initialize_debug_ctl(void) { + ctl_debug = ctl_new(); + debug_ctl_register(ctl_debug); +} + +void deinitialize_debug_ctl(void) { ctl_delete(ctl_debug); } diff --git a/test/ctl/ctl_debug.h b/test/ctl/ctl_debug.h new file mode 100644 index 000000000..9dd8bade5 --- /dev/null +++ b/test/ctl/ctl_debug.h @@ -0,0 +1,32 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +/* + * ctl_debug.h -- definitions for CTL test + */ + +#ifndef UMF_CTL_DEBUG_H +#define UMF_CTL_DEBUG_H 1 + +#include "../src/ctl/ctl.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void debug_ctl_register(struct ctl *ctl); +struct ctl *get_debug_ctl(void); +void initialize_debug_ctl(void); +void deinitialize_debug_ctl(void); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/test/ctl/test.cpp b/test/ctl/test.cpp new file mode 100644 index 000000000..c35759c67 --- /dev/null +++ b/test/ctl/test.cpp @@ -0,0 +1,93 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include "../common/base.hpp" +#include "ctl/ctl.h" +#include "ctl/ctl_debug.h" + +using namespace umf_test; + +TEST_F(test, ctl_debug_read_from_string) { + initialize_debug_ctl(); + auto ctl_handler = get_debug_ctl(); + ctl_load_config_from_string(ctl_handler, NULL, + "debug.heap.alloc_pattern=1"); + + int value = 0; + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.alloc_pattern", CTL_QUERY_READ, &value); + ASSERT_EQ(value, 1); + + // Test setting alloc_pattern to 2 + ctl_load_config_from_string(ctl_handler, NULL, + "debug.heap.alloc_pattern=2"); + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.alloc_pattern", CTL_QUERY_READ, &value); + ASSERT_EQ(value, 2); + + // Test setting alloc_pattern to 0 + ctl_load_config_from_string(ctl_handler, NULL, + "debug.heap.alloc_pattern=0"); + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.alloc_pattern", CTL_QUERY_READ, &value); + ASSERT_EQ(value, 0); + + // Negative test: non-existent configuration + ASSERT_NE(ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.non_existent", CTL_QUERY_READ, &value), + 0); + + // Negative test: invalid path + ASSERT_NE(ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "invalid.path.alloc_pattern", CTL_QUERY_READ, &value), + 0); + + debug_ctl_register(ctl_handler); + deinitialize_debug_ctl(); +} + +int ctl_config_write_to_file(const char *filename, const char *data) { + FILE *file = fopen(filename == NULL ? "config.txt" : filename, "w+"); + if (file == NULL) { + return -1; + } + fputs(data, file); + fclose(file); + return 0; +} + +TEST_F(test, ctl_debug_read_from_file) { +#ifndef _WIN32 + ASSERT_EQ(ctl_config_write_to_file( + "config.txt", "debug.heap.alloc_pattern=321;\ndebug.heap." + "enable_logging=1;\ndebug.heap.log_level=5;\n"), + 0); + initialize_debug_ctl(); + auto ctl_handler = get_debug_ctl(); + ASSERT_EQ(ctl_load_config_from_file(ctl_handler, NULL, "config.txt"), 0); + + int value = 0; + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.alloc_pattern", CTL_QUERY_READ, &value); + ASSERT_EQ(value, 321); + + value = 0; + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, "debug.heap.log_level", + CTL_QUERY_READ, &value); + ASSERT_EQ(value, 5); + + value = 0; + ctl_query(ctl_handler, NULL, CTL_QUERY_PROGRAMMATIC, + "debug.heap.enable_logging", CTL_QUERY_READ, &value); + ASSERT_EQ(value, 1); + + debug_ctl_register(ctl_handler); + deinitialize_debug_ctl(); +#endif +} From 9544998f34c836436f4f0599be83e31c4de0b63a Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 18 Dec 2024 09:55:14 +0100 Subject: [PATCH 053/466] Run only given tests/examples under valgrind Signed-off-by: Lukasz Dorau --- test/test_valgrind.sh | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/test/test_valgrind.sh b/test/test_valgrind.sh index be5f817dc..1b1675bd1 100755 --- a/test/test_valgrind.sh +++ b/test/test_valgrind.sh @@ -8,11 +8,16 @@ set -e WORKSPACE=$1 BUILD_DIR=$2 TOOL=$3 +TESTS=$4 function print_usage() { - echo "$(basename $0) - run all UMF tests and examples under a valgrind tool (memcheck, drd or helgrind)" - echo "This script looks for './test/umf_test-*' and './examples/umf_example_*' executables in the UMF build directory." - echo "Usage: $(basename $0) " + echo "$(basename $0) - run UMF tests and examples under a valgrind tool (memcheck, drd or helgrind)" + echo "Usage: $(basename $0) [tests_examples]" + echo "Where:" + echo + echo "tests_examples - (optional) list of tests or examples to be run (paths relative to the build directory)." + echo " If it is empty, all tests (./test/umf_test-*) and examples (./examples/umf_example_*)" + echo " found in will be run." } if ! valgrind --version > /dev/null; then @@ -71,7 +76,14 @@ echo "Running: \"valgrind $OPTION\" for the following tests:" ANY_TEST_FAILED=0 rm -f umf_test-*.log umf_test-*.err -for test in $(ls -1 ./test/umf_test-* ./examples/umf_example_*); do +[ "$TESTS" = "" ] && TESTS=$(ls -1 ./test/umf_test-* ./examples/umf_example_*) + +for test in $TESTS; do + if [ ! -f $test ]; then + echo + echo "error: the $test (${BUILD_DIR}/$test) file does not exist" + exit 1 + fi [ ! -x $test ] && continue echo "$test - starting ..." echo -n "$test " From 083252af8090fae73eb6a0450fdcfe2cb16bc909 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 18 Dec 2024 10:58:26 +0100 Subject: [PATCH 054/466] Silence hwloc-gather-cpuid Signed-off-by: Lukasz Dorau --- test/test_valgrind.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_valgrind.sh b/test/test_valgrind.sh index 1b1675bd1..46bfe7d1c 100755 --- a/test/test_valgrind.sh +++ b/test/test_valgrind.sh @@ -67,7 +67,7 @@ cd ${BUILD_DIR} mkdir -p cpuid echo "Gathering data for hwloc so it can be run under valgrind:" -hwloc-gather-cpuid ./cpuid +hwloc-gather-cpuid ./cpuid >/dev/null echo echo "Working directory: $(pwd)" From 9f6f8ba08f1ccd248a81c8fdcdd55b0af56201c3 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 18 Dec 2024 10:12:18 +0100 Subject: [PATCH 055/466] Run DAX tests under valgrind Signed-off-by: Lukasz Dorau --- .github/workflows/reusable_dax.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/reusable_dax.yml b/.github/workflows/reusable_dax.yml index af15226d2..f7f4fbe50 100644 --- a/.github/workflows/reusable_dax.yml +++ b/.github/workflows/reusable_dax.yml @@ -31,6 +31,7 @@ env: INSTL_DIR : "${{github.workspace}}/../install-dir" COVERAGE_DIR : "${{github.workspace}}/coverage" COVERAGE_NAME : "exports-coverage-dax" + DAX_TESTS: "./test/umf_test-provider_file_memory ./test/umf_test-provider_devdax_memory" jobs: dax: @@ -126,6 +127,12 @@ jobs: UMF_TESTS_FSDAX_PATH_2=${{env.UMF_TESTS_FSDAX_PATH_2}} ctest -C ${{matrix.build_type}} -V -R "file|fsdax" + - name: Run DAX tests under valgrind + run: | + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} memcheck "${{env.DAX_TESTS}}" + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} drd "${{env.DAX_TESTS}}" + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} helgrind "${{env.DAX_TESTS}}" + - name: Check coverage if: ${{ matrix.build_type == 'Debug' }} working-directory: ${{env.BUILD_DIR}} From 96b00eef47eef0bd65d4df5e55a17e5794780361 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 19 Dec 2024 11:20:16 +0100 Subject: [PATCH 056/466] Run NUMA tests under valgrind Signed-off-by: Lukasz Dorau --- .github/workflows/reusable_multi_numa.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.github/workflows/reusable_multi_numa.yml b/.github/workflows/reusable_multi_numa.yml index f65478984..f546b0545 100644 --- a/.github/workflows/reusable_multi_numa.yml +++ b/.github/workflows/reusable_multi_numa.yml @@ -10,6 +10,7 @@ env: BUILD_DIR : "${{github.workspace}}/build" COVERAGE_DIR : "${{github.workspace}}/coverage" COVERAGE_NAME : "exports-coverage-multinuma" + NUMA_TESTS: "./test/umf_test-memspace_numa ./test/umf_test-provider_os_memory_multiple_numa_nodes" jobs: multi_numa: @@ -68,6 +69,13 @@ jobs: ./test/umf_test-provider_os_memory_multiple_numa_nodes \ --gtest_filter="-*checkModeLocal/*:*checkModePreferredEmptyNodeset/*:testNuma.checkModeInterleave" + - name: Run NUMA tests under valgrind + if: matrix.os != 'rhel-9.1' + run: | + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} memcheck "${{env.NUMA_TESTS}}" + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} drd "${{env.NUMA_TESTS}}" + ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} helgrind "${{env.NUMA_TESTS}}" + - name: Check coverage if: ${{ matrix.build_type == 'Debug' && matrix.os == 'ubuntu-22.04' }} working-directory: ${{env.BUILD_DIR}} From d3d1bb197d95c829e914fcac2031c704f6d10ee7 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 19 Dec 2024 11:48:33 +0100 Subject: [PATCH 057/466] Fix name of the proxy_lib_size_threshold test Signed-off-by: Lukasz Dorau --- test/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bb353a889..0e12885be 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -455,10 +455,10 @@ if(UMF_PROXY_LIB_ENABLED AND UMF_BUILD_SHARED_LIBRARY) # TODO enable this test on Windows if(LINUX) add_umf_test( - NAME test_proxy_lib_size_threshold + NAME proxy_lib_size_threshold SRCS ${BA_SOURCES_FOR_TEST} test_proxy_lib_size_threshold.cpp LIBS ${UMF_UTILS_FOR_TEST} umf_proxy) - set_property(TEST umf-test_proxy_lib_size_threshold + set_property(TEST umf-proxy_lib_size_threshold PROPERTY ENVIRONMENT UMF_PROXY="size.threshold=64") endif() From f70c7ca14a435b69de7582b2e9e5ea26d15dbdec Mon Sep 17 00:00:00 2001 From: kluszcze Date: Thu, 12 Dec 2024 13:08:51 +0100 Subject: [PATCH 058/466] add python script to run codespell scan Signed-off-by: Katarzyna Luszczewska --- .github/scripts/run-codespell.py | 40 +++++++++++++++++++++++++++ .github/workflows/reusable_checks.yml | 5 +++- 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100644 .github/scripts/run-codespell.py diff --git a/.github/scripts/run-codespell.py b/.github/scripts/run-codespell.py new file mode 100644 index 000000000..b87bf37bd --- /dev/null +++ b/.github/scripts/run-codespell.py @@ -0,0 +1,40 @@ +""" + Copyright (C) 2024 Intel Corporation + + Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +""" + +import subprocess # nosec B404 +import logging +import sys + +logging.basicConfig( + level=logging.INFO, format="[%(levelname)s]: [%(asctime)s] %(message)s" +) + + +def codespell_scan(): + try: + codespell_result = subprocess.run( # nosec + [ + "codespell", + "-H", + "--quiet-level=3", + "--skip=./.git,./.venv,./.github/workflows/.spellcheck-conf.toml", + ], + text=True, + stdout=subprocess.PIPE, + ) + if codespell_result.returncode != 0: + for line in codespell_result.stdout.splitlines(): + logging.error(line.strip()) + sys.exit(1) + else: + logging.info("No spelling errors found") + except subprocess.CalledProcessError as ex: + logging.error(ex) + sys.exit(1) + + +codespell_scan() diff --git a/.github/workflows/reusable_checks.yml b/.github/workflows/reusable_checks.yml index e3e264b0d..6298b9883 100644 --- a/.github/workflows/reusable_checks.yml +++ b/.github/workflows/reusable_checks.yml @@ -29,7 +29,7 @@ jobs: python3 -m venv .venv . .venv/bin/activate echo "$PATH" >> $GITHUB_PATH - python3 -m pip install bandit + python3 -m pip install bandit codespell - name: Configure CMake run: > @@ -57,6 +57,9 @@ jobs: with: config: ./.github/workflows/.spellcheck-conf.toml + - name: Run codespell + run: python3 ./.github/scripts/run-codespell.py + # Run Bandit recursively, but omit _deps directory (with 3rd party code) and python's venv - name: Run Bandit run: python3 -m bandit -r . -x '/_deps/,/.venv/' From d93b3d761e6882aa660038ad3cbbf0dabd821f3c Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Thu, 19 Dec 2024 14:19:39 +0100 Subject: [PATCH 059/466] Remove unnecessary commas due to removing pedantic option This commit reverts changes that can make code reading more difficult. --- src/ctl/ctl.c | 6 ++--- src/ctl/ctl.h | 4 +-- test/ctl/ctl_debug.c | 58 +++++++++++++++++++++----------------------- 3 files changed, 33 insertions(+), 35 deletions(-) diff --git a/src/ctl/ctl.c b/src/ctl/ctl.c index 124d56f6c..d54e8390e 100644 --- a/src/ctl/ctl.c +++ b/src/ctl/ctl.c @@ -44,7 +44,7 @@ #define CTL_VALUE_ARG_SEPARATOR "," static int ctl_global_first_free = 0; -static struct ctl_node CTL_NODE(global, )[CTL_MAX_ENTRIES]; +static struct ctl_node CTL_NODE(global)[CTL_MAX_ENTRIES]; /* * This is the top level node of the ctl tree structure. Each node can contain @@ -316,7 +316,7 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, int ret = -1; - const struct ctl_node *n = ctl_find_node(CTL_NODE(global, ), name, indexes); + const struct ctl_node *n = ctl_find_node(CTL_NODE(global), name, indexes); if (n == NULL && ctl) { ctl_delete_indexes(indexes); @@ -343,7 +343,7 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, void ctl_register_module_node(struct ctl *c, const char *name, struct ctl_node *n) { struct ctl_node *nnode = c == NULL - ? &CTL_NODE(global, )[ctl_global_first_free++] + ? &CTL_NODE(global)[ctl_global_first_free++] : &c->root[c->first_free++]; nnode->children = n; diff --git a/src/ctl/ctl.h b/src/ctl/ctl.h index f183abaf3..9327b01af 100644 --- a/src/ctl/ctl.h +++ b/src/ctl/ctl.h @@ -201,13 +201,13 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_LEAF_RW(name) \ { \ CTL_STR(name), CTL_NODE_LEAF, \ - {CTL_READ_HANDLER(name, ), CTL_WRITE_HANDLER(name, ), NULL}, \ + {CTL_READ_HANDLER(name), CTL_WRITE_HANDLER(name), NULL}, \ &CTL_ARG(name), NULL \ } #define CTL_REGISTER_MODULE(_ctl, name) \ ctl_register_module_node((_ctl), CTL_STR(name), \ - (struct ctl_node *)CTL_NODE(name, )) + (struct ctl_node *)CTL_NODE(name)) #ifdef __cplusplus } diff --git a/test/ctl/ctl_debug.c b/test/ctl/ctl_debug.c index d523b3f80..711cb5e17 100644 --- a/test/ctl/ctl_debug.c +++ b/test/ctl/ctl_debug.c @@ -24,10 +24,10 @@ struct ctl *get_debug_ctl(void) { return ctl_debug; } /* * CTL_WRITE_HANDLER(alloc_pattern) -- sets the alloc_pattern field in heap */ -static int -CTL_WRITE_HANDLER(alloc_pattern, )(void *ctx, enum ctl_query_source source, - void *arg, - struct ctl_index_utlist *indexes) { +static int CTL_WRITE_HANDLER(alloc_pattern)(void *ctx, + enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { /* suppress unused-parameter errors */ (void)source, (void)indexes, (void)ctx; @@ -39,10 +39,10 @@ CTL_WRITE_HANDLER(alloc_pattern, )(void *ctx, enum ctl_query_source source, /* * CTL_READ_HANDLER(alloc_pattern) -- returns alloc_pattern heap field */ -static int CTL_READ_HANDLER(alloc_pattern, )(void *ctx, - enum ctl_query_source source, - void *arg, - struct ctl_index_utlist *indexes) { +static int CTL_READ_HANDLER(alloc_pattern)(void *ctx, + enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { /* suppress unused-parameter errors */ (void)source, (void)indexes, (void)ctx; @@ -51,10 +51,10 @@ static int CTL_READ_HANDLER(alloc_pattern, )(void *ctx, return 0; } -static int -CTL_WRITE_HANDLER(enable_logging, )(void *ctx, enum ctl_query_source source, - void *arg, - struct ctl_index_utlist *indexes) { +static int CTL_WRITE_HANDLER(enable_logging)(void *ctx, + enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { /* suppress unused-parameter errors */ (void)source, (void)indexes, (void)ctx; @@ -63,10 +63,10 @@ CTL_WRITE_HANDLER(enable_logging, )(void *ctx, enum ctl_query_source source, return 0; } -static int -CTL_READ_HANDLER(enable_logging, )(void *ctx, enum ctl_query_source source, - void *arg, - struct ctl_index_utlist *indexes) { +static int CTL_READ_HANDLER(enable_logging)(void *ctx, + enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { /* suppress unused-parameter errors */ (void)source, (void)indexes, (void)ctx; @@ -75,10 +75,9 @@ CTL_READ_HANDLER(enable_logging, )(void *ctx, enum ctl_query_source source, return 0; } -static int CTL_WRITE_HANDLER(log_level, )(void *ctx, - enum ctl_query_source source, - void *arg, - struct ctl_index_utlist *indexes) { +static int CTL_WRITE_HANDLER(log_level)(void *ctx, enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { /* suppress unused-parameter errors */ (void)source, (void)indexes, (void)ctx; @@ -87,10 +86,9 @@ static int CTL_WRITE_HANDLER(log_level, )(void *ctx, return 0; } -static int CTL_READ_HANDLER(log_level, )(void *ctx, - enum ctl_query_source source, - void *arg, - struct ctl_index_utlist *indexes) { +static int CTL_READ_HANDLER(log_level)(void *ctx, enum ctl_query_source source, + void *arg, + struct ctl_index_utlist *indexes) { /* suppress unused-parameter errors */ (void)source, (void)indexes, (void)ctx; @@ -105,15 +103,15 @@ static const struct ctl_argument CTL_ARG(enable_logging) = CTL_ARG_BOOLEAN; static const struct ctl_argument CTL_ARG(log_level) = CTL_ARG_INT; -static const struct ctl_node CTL_NODE(heap, )[] = {CTL_LEAF_RW(alloc_pattern), - CTL_LEAF_RW(enable_logging), - CTL_LEAF_RW(log_level), +static const struct ctl_node CTL_NODE(heap)[] = {CTL_LEAF_RW(alloc_pattern), + CTL_LEAF_RW(enable_logging), + CTL_LEAF_RW(log_level), - CTL_NODE_END}; + CTL_NODE_END}; -static const struct ctl_node CTL_NODE(debug, )[] = {CTL_CHILD(heap, ), +static const struct ctl_node CTL_NODE(debug)[] = {CTL_CHILD(heap), - CTL_NODE_END}; + CTL_NODE_END}; /* * debug_ctl_register -- registers ctl nodes for "debug" module From b09b24330b21e92de8dcee3d2cb90d1f8884e864 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 20 Dec 2024 09:31:51 +0100 Subject: [PATCH 060/466] Add error messages when CUDA provider is disabled Add error messages when CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF). Signed-off-by: Lukasz Dorau --- src/provider/provider_cuda.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index baccbd023..ce2f1debb 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -12,17 +12,21 @@ #include #include +#include "utils_log.h" + #if defined(UMF_NO_CUDA_PROVIDER) umf_result_t umfCUDAMemoryProviderParamsCreate( umf_cuda_memory_provider_params_handle_t *hParams) { (void)hParams; + LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t umfCUDAMemoryProviderParamsDestroy( umf_cuda_memory_provider_params_handle_t hParams) { (void)hParams; + LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -30,6 +34,7 @@ umf_result_t umfCUDAMemoryProviderParamsSetContext( umf_cuda_memory_provider_params_handle_t hParams, void *hContext) { (void)hParams; (void)hContext; + LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -37,6 +42,7 @@ umf_result_t umfCUDAMemoryProviderParamsSetDevice( umf_cuda_memory_provider_params_handle_t hParams, int hDevice) { (void)hParams; (void)hDevice; + LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -45,11 +51,13 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType( umf_usm_memory_type_t memoryType) { (void)hParams; (void)memoryType; + LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void) { // not supported + LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!"); return NULL; } From b108d7fc3767a5446bdaa19458e56188310df5eb Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 20 Dec 2024 09:32:14 +0100 Subject: [PATCH 061/466] Add error messages when DevDax provider is disabled Signed-off-by: Lukasz Dorau --- src/provider/provider_devdax_memory.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/provider/provider_devdax_memory.c b/src/provider/provider_devdax_memory.c index 32407acbb..cb5a4af57 100644 --- a/src/provider/provider_devdax_memory.c +++ b/src/provider/provider_devdax_memory.c @@ -17,10 +17,13 @@ #include #include +#include "utils_log.h" + #if defined(_WIN32) || defined(UMF_NO_HWLOC) umf_memory_provider_ops_t *umfDevDaxMemoryProviderOps(void) { // not supported + LOG_ERR("DevDax memory provider is disabled!"); return NULL; } @@ -30,12 +33,14 @@ umf_result_t umfDevDaxMemoryProviderParamsCreate( (void)hParams; (void)path; (void)size; + LOG_ERR("DevDax memory provider is disabled!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t umfDevDaxMemoryProviderParamsDestroy( umf_devdax_memory_provider_params_handle_t hParams) { (void)hParams; + LOG_ERR("DevDax memory provider is disabled!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -45,6 +50,7 @@ umf_result_t umfDevDaxMemoryProviderParamsSetDeviceDax( (void)hParams; (void)path; (void)size; + LOG_ERR("DevDax memory provider is disabled!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -52,6 +58,7 @@ umf_result_t umfDevDaxMemoryProviderParamsSetProtection( umf_devdax_memory_provider_params_handle_t hParams, unsigned protection) { (void)hParams; (void)protection; + LOG_ERR("DevDax memory provider is disabled!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } From 3d9f85f98972035d5c4cedfb9e7a93a05ecc2009 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 20 Dec 2024 09:34:05 +0100 Subject: [PATCH 062/466] Add error messages when File provider is disabled Signed-off-by: Lukasz Dorau --- src/provider/provider_file_memory.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/provider/provider_file_memory.c b/src/provider/provider_file_memory.c index 32383a5ec..7c9ee3856 100644 --- a/src/provider/provider_file_memory.c +++ b/src/provider/provider_file_memory.c @@ -18,10 +18,13 @@ #include #include +#include "utils_log.h" + #if defined(_WIN32) || defined(UMF_NO_HWLOC) umf_memory_provider_ops_t *umfFileMemoryProviderOps(void) { // not supported + LOG_ERR("File memory provider is disabled!"); return NULL; } @@ -29,12 +32,14 @@ umf_result_t umfFileMemoryProviderParamsCreate( umf_file_memory_provider_params_handle_t *hParams, const char *path) { (void)hParams; (void)path; + LOG_ERR("File memory provider is disabled!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t umfFileMemoryProviderParamsDestroy( umf_file_memory_provider_params_handle_t hParams) { (void)hParams; + LOG_ERR("File memory provider is disabled!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -42,6 +47,7 @@ umf_result_t umfFileMemoryProviderParamsSetPath( umf_file_memory_provider_params_handle_t hParams, const char *path) { (void)hParams; (void)path; + LOG_ERR("File memory provider is disabled!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -49,6 +55,7 @@ umf_result_t umfFileMemoryProviderParamsSetProtection( umf_file_memory_provider_params_handle_t hParams, unsigned protection) { (void)hParams; (void)protection; + LOG_ERR("File memory provider is disabled!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -57,6 +64,7 @@ umf_result_t umfFileMemoryProviderParamsSetVisibility( umf_memory_visibility_t visibility) { (void)hParams; (void)visibility; + LOG_ERR("File memory provider is disabled!"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } From 0d0d9b2f314e1cb07e11a9efe6d651134b34d6aa Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 20 Dec 2024 10:43:03 +0100 Subject: [PATCH 063/466] Add error messages when L0 memory provider is disabled Add error messages when L0 memory provider is disabled (UMF_BUILD_LEVEL_ZERO_PROVIDER is OFF). Signed-off-by: Lukasz Dorau --- src/provider/provider_level_zero.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/provider/provider_level_zero.c b/src/provider/provider_level_zero.c index f4a3e97c2..5f9c85a86 100644 --- a/src/provider/provider_level_zero.c +++ b/src/provider/provider_level_zero.c @@ -14,17 +14,23 @@ #include #include +#include "utils_log.h" + #if defined(UMF_NO_LEVEL_ZERO_PROVIDER) umf_result_t umfLevelZeroMemoryProviderParamsCreate( umf_level_zero_memory_provider_params_handle_t *hParams) { (void)hParams; + LOG_ERR("L0 memory provider is disabled! (UMF_BUILD_LEVEL_ZERO_PROVIDER is " + "OFF)"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_result_t umfLevelZeroMemoryProviderParamsDestroy( umf_level_zero_memory_provider_params_handle_t hParams) { (void)hParams; + LOG_ERR("L0 memory provider is disabled! (UMF_BUILD_LEVEL_ZERO_PROVIDER is " + "OFF)"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -33,6 +39,8 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetContext( ze_context_handle_t hContext) { (void)hParams; (void)hContext; + LOG_ERR("L0 memory provider is disabled! (UMF_BUILD_LEVEL_ZERO_PROVIDER is " + "OFF)"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -41,6 +49,8 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetDevice( ze_device_handle_t hDevice) { (void)hParams; (void)hDevice; + LOG_ERR("L0 memory provider is disabled! (UMF_BUILD_LEVEL_ZERO_PROVIDER is " + "OFF)"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -49,6 +59,8 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetMemoryType( umf_usm_memory_type_t memoryType) { (void)hParams; (void)memoryType; + LOG_ERR("L0 memory provider is disabled! (UMF_BUILD_LEVEL_ZERO_PROVIDER is " + "OFF)"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } @@ -58,11 +70,15 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( (void)hParams; (void)hDevices; (void)deviceCount; + LOG_ERR("L0 memory provider is disabled! (UMF_BUILD_LEVEL_ZERO_PROVIDER is " + "OFF)"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void) { // not supported + LOG_ERR("L0 memory provider is disabled! (UMF_BUILD_LEVEL_ZERO_PROVIDER is " + "OFF)"); return NULL; } From e83ee3492730823978c474ecb5141fe1a84a4364 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Mon, 16 Dec 2024 14:58:26 +0100 Subject: [PATCH 064/466] Minor cleanups and additions in docs includes: - add missing CUDA provider in the web docs, - proxy_pool is enabled by default, move req. info to proxy_lib, - add links in README. --- README.md | 17 ++++++++++++----- scripts/docs_config/api.rst | 13 ++++++++++++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index df90b6852..4cd1d8ff5 100644 --- a/README.md +++ b/README.md @@ -122,11 +122,16 @@ List of options provided by CMake: ## Architecture: memory pools and providers -A UMF memory pool is a combination of a pool allocator and a memory provider. A memory provider is responsible for coarse-grained memory allocations and management of memory pages, while the pool allocator controls memory pooling and handles fine-grained memory allocations. +A UMF memory pool is a combination of a pool allocator and a memory provider. A memory provider is responsible for +coarse-grained memory allocations and management of memory pages, while the pool allocator controls memory pooling +and handles fine-grained memory allocations. Pool allocator can leverage existing allocators (e.g. jemalloc or tbbmalloc) or be written from scratch. -UMF comes with predefined pool allocators (see include/pool) and providers (see include/provider). UMF can also work with user-defined pools and providers that implement a specific interface (see include/umf/memory_pool_ops.h and include/umf/memory_provider_ops.h). +UMF comes with predefined pool allocators (see [`include/umf/pools`](include/umf/pools)) and providers +(see [`include/umf/providers`](include/umf/providers)). UMF can also work with user-defined pools and +providers that implement a specific interface (see [`include/umf/memory_pool_ops.h`](include/umf/memory_pool_ops.h) +and [`include/umf/memory_provider_ops.h`](include/umf/memory_provider_ops.h)). More detailed documentation is available here: https://oneapi-src.github.io/unified-memory-framework/ @@ -152,6 +157,7 @@ a duplicate of another process's file descriptor (`pidfd_getfd(2)` is supported Permission to duplicate another process's file descriptor is governed by a ptrace access mode `PTRACE_MODE_ATTACH_REALCREDS` check (see `ptrace(2)`) that can be changed using the `/proc/sys/kernel/yama/ptrace_scope` interface in the following way: + ```sh $ sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" ``` @@ -183,6 +189,7 @@ a duplicate of another process's file descriptor (`pidfd_getfd(2)` is supported Permission to duplicate another process's file descriptor is governed by a ptrace access mode `PTRACE_MODE_ATTACH_REALCREDS` check (see `ptrace(2)`) that can be changed using the `/proc/sys/kernel/yama/ptrace_scope` interface in the following way: + ```sh $ sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" ``` @@ -203,7 +210,7 @@ Additionally, required for tests: #### DevDax memory provider (Linux only) -A memory provider that provides memory from a device DAX (a character device file /dev/daxX.Y). +A memory provider that provides memory from a device DAX (a character device file like `/dev/daxX.Y`). It can be used when large memory mappings are needed. ##### Requirements @@ -249,8 +256,6 @@ This memory pool is distributed as part of libumf. It forwards all requests to t memory provider. Currently umfPoolRealloc, umfPoolCalloc and umfPoolMallocUsableSize functions are not supported by the proxy pool. -To enable this feature, the `UMF_BUILD_SHARED_LIBRARY` option needs to be turned `ON`. - #### Disjoint pool TODO: Add a description @@ -326,6 +331,8 @@ Querying the latency value requires HMAT support on the platform. Calling `umfMe UMF provides the UMF proxy library (`umf_proxy`) that makes it possible to override the default allocator in other programs in both Linux and Windows. +To enable this feature, the `UMF_BUILD_SHARED_LIBRARY` option needs to be turned `ON`. + #### Linux In case of Linux it can be done without any code changes using the `LD_PRELOAD` environment variable: diff --git a/scripts/docs_config/api.rst b/scripts/docs_config/api.rst index c0448f117..3eedc8f1d 100644 --- a/scripts/docs_config/api.rst +++ b/scripts/docs_config/api.rst @@ -58,6 +58,9 @@ supported by the Proxy Pool. Scalable Pool ------------------------------------------ + +A oneTBB-based memory pool manager. + .. doxygenfile:: pool_scalable.h :sections: define enum typedef func var @@ -104,10 +107,18 @@ A memory provider that provides memory from L0 device. .. doxygenfile:: provider_level_zero.h :sections: define enum typedef func var +CUDA Provider +------------------------------------------ + +A memory provider that provides memory from CUDA device. + +.. doxygenfile:: provider_cuda.h + :sections: define enum typedef func var + DevDax Memory Provider ------------------------------------------ -A memory provider that provides memory from a device DAX (a character device file /dev/daxX.Y). +A memory provider that provides memory from a device DAX (a character device file like /dev/daxX.Y). .. doxygenfile:: provider_devdax_memory.h :sections: define enum typedef func var From 718c61dbf5ac76fd72166b9409d1a97644d4b34f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Mon, 16 Dec 2024 15:51:24 +0100 Subject: [PATCH 065/466] Move docs content into a separate dir and update the script to work outside of scripts dir. --- .github/workflows/docs.yml | 8 +++--- .github/workflows/reusable_docs_build.yml | 6 +++-- .gitignore | 2 +- RELEASE_STEPS.md | 2 +- docs/README.md | 8 ++++++ .../assets/images/intro_architecture.png | Bin {scripts/docs_config => docs/config}/Doxyfile | 2 +- {scripts/docs_config => docs/config}/api.rst | 0 {scripts/docs_config => docs/config}/conf.py | 4 ++- .../docs_config => docs/config}/examples.rst | 0 .../docs_config => docs/config}/glossary.rst | 0 .../docs_config => docs/config}/index.rst | 0 .../config}/introduction.rst | 0 {scripts => docs}/generate_docs.py | 24 +++++++++++++----- scripts/README.md | 5 ---- 15 files changed, 41 insertions(+), 20 deletions(-) create mode 100644 docs/README.md rename {scripts => docs}/assets/images/intro_architecture.png (100%) rename {scripts/docs_config => docs/config}/Doxyfile (99%) rename {scripts/docs_config => docs/config}/api.rst (100%) rename {scripts/docs_config => docs/config}/conf.py (92%) rename {scripts/docs_config => docs/config}/examples.rst (100%) rename {scripts/docs_config => docs/config}/glossary.rst (100%) rename {scripts/docs_config => docs/config}/index.rst (100%) rename {scripts/docs_config => docs/config}/introduction.rst (100%) rename {scripts => docs}/generate_docs.py (71%) delete mode 100644 scripts/README.md diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 3d9bfc29b..87e34cc74 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -41,13 +41,15 @@ jobs: run: echo "$HOME/.local/bin" >> $GITHUB_PATH - name: Build the documentation - working-directory: scripts - run: python3 generate_docs.py + run: | + mkdir build + cd build + python3 ../docs/generate_docs.py - name: Upload artifact uses: actions/upload-pages-artifact@0252fc4ba7626f0298f0cf00902a25c6afc77fa8 # v3.0.0 with: - path: docs/html + path: build/docs_build/generated/html deploy: name: Deploy docs to GitHub Pages diff --git a/.github/workflows/reusable_docs_build.yml b/.github/workflows/reusable_docs_build.yml index 269560c67..6702f9a66 100644 --- a/.github/workflows/reusable_docs_build.yml +++ b/.github/workflows/reusable_docs_build.yml @@ -30,5 +30,7 @@ jobs: python3 -m pip install -r third_party/requirements.txt - name: Build the documentation - working-directory: scripts - run: python3 generate_docs.py + run: | + mkdir build + cd build + python3 ../docs/generate_docs.py diff --git a/.gitignore b/.gitignore index a1a488bc1..e177e395e 100644 --- a/.gitignore +++ b/.gitignore @@ -58,7 +58,7 @@ __pycache__/ *.py[cod] # Generated docs -docs/ +docs_build/ # Build files /build*/ diff --git a/RELEASE_STEPS.md b/RELEASE_STEPS.md index fb46f156b..efdadbe9f 100644 --- a/RELEASE_STEPS.md +++ b/RELEASE_STEPS.md @@ -41,7 +41,7 @@ Do changes for a release: - For major releases mention API and ABI compatibility with the previous release - Update project's version in a few places: - For major and minor releases: `UMF_VERSION_CURRENT` in `include/umf/base.h` (the API version) - - `release` variable in `scripts/docs_config/conf.py` (for docs) + - `release` variable in `docs/config/conf.py` (for docs) - `UMF_VERSION` variable in `.github/workflows/reusable_basic.yml` (for installation test) - For major releases update ABI version in `.map` and `.def` files - These files are defined for all public libraries (`libumf` and `proxy_lib`, at the moment) diff --git a/docs/README.md b/docs/README.md new file mode 100644 index 000000000..1124b53bd --- /dev/null +++ b/docs/README.md @@ -0,0 +1,8 @@ +To generate HTML documentation run the `generate_docs.py` script from the `build` dir. +It will create extra `./docs_build` directory, where the intermediate and final files +will be created. HTML docs will be in the `./docs_build/generated/html` directory. + +The script requires: + * [Doxygen](http://www.doxygen.nl/) at least v1.9.1 + * [Python](https://www.python.org/downloads/) at least v3.8 + * and python pip requirements, as defined in `third_party/requirements.txt` diff --git a/scripts/assets/images/intro_architecture.png b/docs/assets/images/intro_architecture.png similarity index 100% rename from scripts/assets/images/intro_architecture.png rename to docs/assets/images/intro_architecture.png diff --git a/scripts/docs_config/Doxyfile b/docs/config/Doxyfile similarity index 99% rename from scripts/docs_config/Doxyfile rename to docs/config/Doxyfile index 43ff2a603..f23117ff2 100644 --- a/scripts/docs_config/Doxyfile +++ b/docs/config/Doxyfile @@ -2058,7 +2058,7 @@ GENERATE_XML = YES # The default directory is: xml. # This tag requires that the tag GENERATE_XML is set to YES. -XML_OUTPUT = ../docs/xml +XML_OUTPUT = docs_build/doxyxml # If the XML_PROGRAMLISTING tag is set to YES, doxygen will dump the program # listings (including syntax highlighting and cross-referencing information) to diff --git a/scripts/docs_config/api.rst b/docs/config/api.rst similarity index 100% rename from scripts/docs_config/api.rst rename to docs/config/api.rst diff --git a/scripts/docs_config/conf.py b/docs/config/conf.py similarity index 92% rename from scripts/docs_config/conf.py rename to docs/config/conf.py index 577bc0b48..3af2df378 100644 --- a/scripts/docs_config/conf.py +++ b/docs/config/conf.py @@ -49,7 +49,9 @@ # -- Extension configuration ------------------------------------------------- # -- Options for breathe extension ------------------------------------------- -breathe_projects = {project: "../../docs/xml"} +# 'doxyxml' dir is generated with Doxygen; it's supposed to be in a directory +# one above the config directory. +breathe_projects = {project: "../doxyxml"} breathe_default_project = project breathe_show_include = False breathe_default_members = ("members", "undoc-members") diff --git a/scripts/docs_config/examples.rst b/docs/config/examples.rst similarity index 100% rename from scripts/docs_config/examples.rst rename to docs/config/examples.rst diff --git a/scripts/docs_config/glossary.rst b/docs/config/glossary.rst similarity index 100% rename from scripts/docs_config/glossary.rst rename to docs/config/glossary.rst diff --git a/scripts/docs_config/index.rst b/docs/config/index.rst similarity index 100% rename from scripts/docs_config/index.rst rename to docs/config/index.rst diff --git a/scripts/docs_config/introduction.rst b/docs/config/introduction.rst similarity index 100% rename from scripts/docs_config/introduction.rst rename to docs/config/introduction.rst diff --git a/scripts/generate_docs.py b/docs/generate_docs.py similarity index 71% rename from scripts/generate_docs.py rename to docs/generate_docs.py index d5b2a0128..1697eacfe 100644 --- a/scripts/generate_docs.py +++ b/docs/generate_docs.py @@ -6,17 +6,20 @@ """ from pathlib import Path -from shutil import rmtree +from shutil import rmtree, copytree import subprocess # nosec B404 import time def _check_cwd() -> None: - script_path = Path(__file__).resolve().parent cwd = Path.cwd() - if script_path != cwd: + include_dir = Path(cwd, "../include") + # Verify if include dir is one level up (as defined in Doxyfile) + if not include_dir.exists(): print( - f"{__file__} script has to be run from the 'scripts' directory. Terminating..." + f"Include directory {include_dir.resolve()} not found! " + "Please run this script from /build!", + flush=True, ) exit(1) @@ -66,8 +69,17 @@ def _generate_html(config_path: Path, docs_path: Path) -> None: def main() -> None: _check_cwd() - config_path = Path("docs_config").resolve() - docs_path = Path("..", "docs").resolve() + + script_dir = Path(__file__).resolve().parent + docs_build_path = Path("docs_build").resolve() + + # Sphinx and breathe require access to a Doxygen generated dir ('doxyxml') + # so we copy the whole content of the 'docs' dir to the build dir. + copytree(Path(script_dir), docs_build_path, dirs_exist_ok=True) + + config_path = Path(docs_build_path, "config").resolve() + docs_path = Path(docs_build_path, "generated").resolve() + start = time.time() _prepare_docs_dir(docs_path) _generate_xml(config_path, docs_path) diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index e3a9ed533..000000000 --- a/scripts/README.md +++ /dev/null @@ -1,5 +0,0 @@ -The documentation HTML files are generated using the following dependencies: - * [Python](https://www.python.org/downloads/) at least v3.8 - * [Doxygen](http://www.doxygen.nl/) at least v1.9.1 - - To generate files run the `generate_docs.py` script from the `scripts` directory. Files will be generated to the `docs/html` directory relative to the main directory of this repository. From 78d27981ba94512c27c6ac774f340eff71551a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Tue, 17 Dec 2024 10:57:31 +0100 Subject: [PATCH 066/466] [CMake] Add 'docs' target --- .github/workflows/docs.yml | 5 ++--- .github/workflows/reusable_docs_build.yml | 10 +++++++--- CMakeLists.txt | 11 +++++++++++ docs/README.md | 22 +++++++++++++++++++--- 4 files changed, 39 insertions(+), 9 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 87e34cc74..c507f7994 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -42,9 +42,8 @@ jobs: - name: Build the documentation run: | - mkdir build - cd build - python3 ../docs/generate_docs.py + cmake -B build -DUMF_TESTS_FAIL_ON_SKIP=ON + cmake --build build --target docs - name: Upload artifact uses: actions/upload-pages-artifact@0252fc4ba7626f0298f0cf00902a25c6afc77fa8 # v3.0.0 diff --git a/.github/workflows/reusable_docs_build.yml b/.github/workflows/reusable_docs_build.yml index 6702f9a66..92dcda555 100644 --- a/.github/workflows/reusable_docs_build.yml +++ b/.github/workflows/reusable_docs_build.yml @@ -31,6 +31,10 @@ jobs: - name: Build the documentation run: | - mkdir build - cd build - python3 ../docs/generate_docs.py + cmake -B build \ + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF \ + -DUMF_BUILD_CUDA_PROVIDER=OFF \ + -DUMF_BUILD_TESTS=OFF \ + -DUMF_BUILD_EXAMPLES=OFF \ + -DUMF_DISABLE_HWLOC=ON + cmake --build build --target docs diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b88f95b5..4e181f246 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -760,6 +760,17 @@ if(UMF_FORMAT_CODE_STYLE) endif() endif() +find_package(Python3 3.8) +if(Python3_FOUND) + message(STATUS "Adding 'docs' target for creating a documentation.") + add_custom_target( + docs + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMAND ${Python3_EXECUTABLE} + ${UMF_CMAKE_SOURCE_DIR}/docs/generate_docs.py + COMMENT "Generate HTML documentation using Doxygen") +endif() + # --------------------------------------------------------------------------- # # Configure make install/uninstall and packages # --------------------------------------------------------------------------- # diff --git a/docs/README.md b/docs/README.md index 1124b53bd..3564d86db 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,8 +1,24 @@ -To generate HTML documentation run the `generate_docs.py` script from the `build` dir. -It will create extra `./docs_build` directory, where the intermediate and final files +# Documentation + +To generate HTML documentation run the `generate_docs.py` script from any sub-dir of the +repository (most likely `build`) or enable and use build target 'docs' (see details below). + +This script will create `./docs_build` sub-directory, where the intermediate and final files will be created. HTML docs will be in the `./docs_build/generated/html` directory. -The script requires: +## make docs + +To run documentation generation via build target use CMake commands below. +To enable this target, python executable (in required version) has to be found in the system. + +```bash +$ cmake -B build +$ cmake --build build --target docs +``` + +## Requirements + +Script to generate HTML docs requires: * [Doxygen](http://www.doxygen.nl/) at least v1.9.1 * [Python](https://www.python.org/downloads/) at least v3.8 * and python pip requirements, as defined in `third_party/requirements.txt` From c7fdc11978e965af6dd4a2fce52320967b20e434 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Tue, 17 Dec 2024 11:04:19 +0100 Subject: [PATCH 067/466] [CI] make docs workflow reusable in deploy job --- .github/workflows/docs.yml | 43 ++++------------------- .github/workflows/reusable_docs_build.yml | 14 +++++++- 2 files changed, 19 insertions(+), 38 deletions(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index c507f7994..165cc1754 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -14,45 +14,14 @@ permissions: contents: read jobs: - build: - name: Build docs - runs-on: ${{ github.repository_owner == 'oneapi-src' && 'intel-ubuntu-22.04' || 'ubuntu-latest' }} - - steps: - - name: Checkout repository - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - fetch-depth: 0 - - - name: Install doxygen - run: | - sudo apt-get update - sudo apt-get install -y doxygen - - # Latest distros do not allow global pip installation - - name: Install Python requirements in venv - run: | - python3 -m venv .venv - . .venv/bin/activate - echo "$PATH" >> $GITHUB_PATH - python3 -m pip install -r third_party/requirements.txt - - - name: Setup PATH for python - run: echo "$HOME/.local/bin" >> $GITHUB_PATH - - - name: Build the documentation - run: | - cmake -B build -DUMF_TESTS_FAIL_ON_SKIP=ON - cmake --build build --target docs - - - name: Upload artifact - uses: actions/upload-pages-artifact@0252fc4ba7626f0298f0cf00902a25c6afc77fa8 # v3.0.0 - with: - path: build/docs_build/generated/html + DocsBuild: + uses: ./.github/workflows/reusable_docs_build.yml + with: + upload: true - deploy: + DocsDeploy: name: Deploy docs to GitHub Pages - needs: build + needs: DocsBuild permissions: pages: write diff --git a/.github/workflows/reusable_docs_build.yml b/.github/workflows/reusable_docs_build.yml index 92dcda555..e90ca87ae 100644 --- a/.github/workflows/reusable_docs_build.yml +++ b/.github/workflows/reusable_docs_build.yml @@ -1,6 +1,12 @@ name: Docs build -on: workflow_call +on: + workflow_call: + inputs: + upload: + description: Should HTML documentation be uploaded as artifact? + type: boolean + default: false permissions: contents: read @@ -38,3 +44,9 @@ jobs: -DUMF_BUILD_EXAMPLES=OFF \ -DUMF_DISABLE_HWLOC=ON cmake --build build --target docs + + - name: Upload artifact + if: ${{ inputs.upload == true }} + uses: actions/upload-pages-artifact@0252fc4ba7626f0298f0cf00902a25c6afc77fa8 # v3.0.0 + with: + path: build/docs_build/generated/html From 70c59068b314de0c6ac3459566bad138d3fdbd63 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 30 Dec 2024 09:27:22 +0100 Subject: [PATCH 068/466] Fix paths of logs in test_valgrind.sh Signed-off-by: Lukasz Dorau --- test/test_valgrind.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/test_valgrind.sh b/test/test_valgrind.sh index 46bfe7d1c..954a3a56b 100755 --- a/test/test_valgrind.sh +++ b/test/test_valgrind.sh @@ -74,9 +74,12 @@ echo "Working directory: $(pwd)" echo "Running: \"valgrind $OPTION\" for the following tests:" ANY_TEST_FAILED=0 -rm -f umf_test-*.log umf_test-*.err +PATH_TESTS="./test/umf_test-*" +PATH_EXAMPLES="./examples/umf_example_*" -[ "$TESTS" = "" ] && TESTS=$(ls -1 ./test/umf_test-* ./examples/umf_example_*) +rm -f ${PATH_TESTS}.log ${PATH_TESTS}.err ${PATH_EXAMPLES}.log ${PATH_EXAMPLES}.err + +[ "$TESTS" = "" ] && TESTS=$(ls -1 ${PATH_TESTS} ${PATH_EXAMPLES}) for test in $TESTS; do if [ ! -f $test ]; then @@ -185,7 +188,7 @@ echo echo "======================================================================" echo -for log in $(ls -1 umf_test-*.log); do +for log in $(ls -1 ${PATH_TESTS}.log ${PATH_EXAMPLES}.log); do echo ">>>>>>> LOG $log" cat $log echo From 5844c5afed09b731f79e8fab5c2257bbeb0f6a2e Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Thu, 26 Dec 2024 21:01:33 +0100 Subject: [PATCH 069/466] Fix L0 provider Set device_properties.stype during init. Found by L0 validation layer. --- src/provider/provider_level_zero.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/provider/provider_level_zero.c b/src/provider/provider_level_zero.c index 70f0acfe5..964d91e10 100644 --- a/src/provider/provider_level_zero.c +++ b/src/provider/provider_level_zero.c @@ -336,6 +336,10 @@ static umf_result_t ze_memory_provider_initialize(void *params, ze_provider->device = ze_params->level_zero_device_handle; ze_provider->memory_type = (ze_memory_type_t)ze_params->memory_type; + memset(&ze_provider->device_properties, 0, + sizeof(ze_provider->device_properties)); + ze_provider->device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; + if (ze_provider->device) { umf_result_t ret = ze2umf_result(g_ze_ops.zeDeviceGetProperties( ze_provider->device, &ze_provider->device_properties)); @@ -345,9 +349,6 @@ static umf_result_t ze_memory_provider_initialize(void *params, umf_ba_global_free(ze_provider); return ret; } - } else { - memset(&ze_provider->device_properties, 0, - sizeof(ze_provider->device_properties)); } if (ze_params->resident_device_count) { From 58ba8e9ff09205dc1ccf4c998485e17ae894e7d5 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 31 Dec 2024 13:20:22 +0100 Subject: [PATCH 070/466] enable building examples on win static hwloc CI --- .github/workflows/reusable_basic.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 3b573453d..ae67aae65 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -333,7 +333,7 @@ jobs: -B ${{env.BUILD_DIR}} -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" -DUMF_BUILD_SHARED_LIBRARY=ON - -DUMF_BUILD_EXAMPLES=OFF + -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON @@ -376,7 +376,7 @@ jobs: -B ${{env.BUILD_DIR}} -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" -DUMF_BUILD_SHARED_LIBRARY=OFF - -DUMF_BUILD_EXAMPLES=OFF + -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON From 91f14d7d8039dc07f0a0595f0f2f441e7d526930 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 31 Dec 2024 13:20:41 +0100 Subject: [PATCH 071/466] fix setting LIBHWLOC_LIBRARIES on Windows --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b88f95b5..4d00bc2c2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -251,8 +251,8 @@ else() set(LIBHWLOC_INCLUDE_DIRS ${hwloc_targ_SOURCE_DIR}/include;${hwloc_targ_BINARY_DIR}/include) - set(LIBHWLOC_LIBRARY_DIRS - ${hwloc_targ_BINARY_DIR}/Release;${hwloc_targ_BINARY_DIR}/Debug) + set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/$) + set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/$/hwloc.lib) else() include(FetchContent) message( From c0331a480bb65138aff23dcced14e3ca5de1855a Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Fri, 6 Dec 2024 10:21:34 +0100 Subject: [PATCH 072/466] Set symbol versions in def/map files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Stolarczuk --- RELEASE_STEPS.md | 4 ++-- src/libumf.def | 4 ++-- src/libumf.map | 2 +- src/proxy_lib/proxy_lib.def | 1 + src/proxy_lib/proxy_lib.map | 4 ++-- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/RELEASE_STEPS.md b/RELEASE_STEPS.md index e88ca9c2d..ec6e5b690 100644 --- a/RELEASE_STEPS.md +++ b/RELEASE_STEPS.md @@ -38,12 +38,12 @@ Do changes for a release: - If previously we decided not to create such branch, create it now, based on the appropriate minor or major tag - For major/minor release start from the `main` branch - Add an entry to ChangeLog, remember to change the day of the week in the release date - - For major releases mention API and ABI compatibility with the previous release + - For major and minor (prior 1.0.0) releases mention API and ABI compatibility with the previous release - Update project's version in a few places: - For major and minor releases: `UMF_VERSION_CURRENT` in `include/umf/base.h` (the API version) - `release` variable in `scripts/docs_config/conf.py` (for docs) - `UMF_VERSION` variable in `.github/workflows/basic.yml` (for installation test) -- For major releases update ABI version in `.map` and `.def` files +- For major and minor (prior 1.0.0) releases update ABI version in `.map` and `.def` files - These files are defined for all public libraries (`libumf` and `proxy_lib`, at the moment) - Commit these changes and tag the release: - `git commit -a -S -m "$VERSION release"` diff --git a/src/libumf.def b/src/libumf.def index 33c09f4b9..82e32d4a1 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -4,9 +4,9 @@ ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ;;;; End Copyright Notice -LIBRARY umf +LIBRARY UMF -VERSION 1.0 +VERSION 0.10 EXPORTS DllMain diff --git a/src/libumf.map b/src/libumf.map index c1e1fd62c..4755b6b81 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -2,7 +2,7 @@ # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -UMF_1.0 { +UMF_0.10 { global: umfInit; umfTearDown; diff --git a/src/proxy_lib/proxy_lib.def b/src/proxy_lib/proxy_lib.def index f30b40556..82b666b6a 100644 --- a/src/proxy_lib/proxy_lib.def +++ b/src/proxy_lib/proxy_lib.def @@ -5,6 +5,7 @@ ;;;; End Copyright Notice LIBRARY UMF_PROXY + EXPORTS DllMain aligned_alloc diff --git a/src/proxy_lib/proxy_lib.map b/src/proxy_lib/proxy_lib.map index 5d93d03ba..93ae001e6 100644 --- a/src/proxy_lib/proxy_lib.map +++ b/src/proxy_lib/proxy_lib.map @@ -2,8 +2,8 @@ # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# linker VERSION script - +# These functions are meant to be in unnamed scope. They are also not named +# with any umf prefix, as they should override functions with the same name. { global: aligned_alloc; From 0b4cbaa79f2909222efe43021b4d30558c5d8458 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 7 Jan 2025 13:42:28 +0100 Subject: [PATCH 073/466] use the UMF version from git describe in tests, not the harcoded one --- .github/workflows/reusable_basic.yml | 18 ++++++++++++++++-- RELEASE_STEPS.md | 1 - 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index ae67aae65..5866f939e 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -7,8 +7,6 @@ permissions: contents: read env: - # for installation testing - it should match with version set in git - UMF_VERSION: 0.11.0 BUILD_DIR : "${{github.workspace}}/build" INSTL_DIR : "${{github.workspace}}/../install-dir" COVERAGE_DIR : "${{github.workspace}}/coverage" @@ -150,6 +148,11 @@ jobs: - name: Set ptrace value for IPC test run: sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" + - name: Get UMF version + run: | + VERSION=$(git describe --tags --abbrev=0 | grep -oP '\d+\.\d+\.\d+') + echo "UMF_VERSION=$VERSION" >> $GITHUB_ENV + - name: Configure build run: > ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh &&' || ''}} @@ -266,6 +269,12 @@ jobs: run: vcpkg install shell: pwsh # Specifies PowerShell as the shell for running the script. + - name: Get UMF version + run: | + $version = (git describe --tags --abbrev=0 | Select-String -Pattern '\d+\.\d+\.\d+').Matches.Value + echo "UMF_VERSION=$version" >> $env:GITHUB_ENV + shell: pwsh + - name: Configure build run: > cmake @@ -469,6 +478,11 @@ jobs: - name: Install hwloc run: brew install hwloc tbb automake + - name: Get UMF version + run: | + VERSION=$(git describe --tags --abbrev=0 | grep -Eo '\d+\.\d+\.\d+') + echo "UMF_VERSION=$VERSION" >> $GITHUB_ENV + - name: Configure build run: > cmake diff --git a/RELEASE_STEPS.md b/RELEASE_STEPS.md index efdadbe9f..2609e36bb 100644 --- a/RELEASE_STEPS.md +++ b/RELEASE_STEPS.md @@ -42,7 +42,6 @@ Do changes for a release: - Update project's version in a few places: - For major and minor releases: `UMF_VERSION_CURRENT` in `include/umf/base.h` (the API version) - `release` variable in `docs/config/conf.py` (for docs) - - `UMF_VERSION` variable in `.github/workflows/reusable_basic.yml` (for installation test) - For major releases update ABI version in `.map` and `.def` files - These files are defined for all public libraries (`libumf` and `proxy_lib`, at the moment) - Commit these changes and tag the release: From da8cfb84990b8031e46dbd7bde96230348b126a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 6 Nov 2024 16:02:30 +0100 Subject: [PATCH 074/466] Revert "Disable temporarily failing CI job with ICX compiler" This reverts commit e8cde28437c067e0c73a381147260206d45bdeba. --- .github/workflows/reusable_basic.yml | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 5866f939e..ab4c8061b 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -74,15 +74,15 @@ jobs: disable_hwloc: 'OFF' link_hwloc_statically: 'OFF' # test icx compiler - # - os: 'ubuntu-22.04' - # build_type: Release - # compiler: {c: icx, cxx: icpx} - # shared_library: 'ON' - # level_zero_provider: 'ON' - # cuda_provider: 'ON' - # install_tbb: 'ON' - # disable_hwloc: 'OFF' - # link_hwloc_statically: 'OFF' + - os: 'ubuntu-22.04' + build_type: Release + compiler: {c: icx, cxx: icpx} + shared_library: 'ON' + level_zero_provider: 'ON' + cuda_provider: 'ON' + install_tbb: 'ON' + disable_hwloc: 'OFF' + link_hwloc_statically: 'OFF' # test without installing TBB - os: 'ubuntu-22.04' build_type: Release From 7a59bd3979b0a50d3c8b15dcea6d37f0d9d7ae54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Thu, 19 Dec 2024 14:24:32 +0100 Subject: [PATCH 075/466] refactor benchmark file structure --- benchmark/benchmark.cpp | 162 ++----------------- benchmark/benchmark.hpp | 171 ++++++++------------ benchmark/benchmark_interfaces.hpp | 144 ----------------- benchmark/benchmark_size.hpp | 63 ++++++++ benchmark/benchmark_umf.hpp | 252 +++++++++++++++++++++++++++++ 5 files changed, 401 insertions(+), 391 deletions(-) delete mode 100644 benchmark/benchmark_interfaces.hpp create mode 100644 benchmark/benchmark_size.hpp create mode 100644 benchmark/benchmark_umf.hpp diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 655545d1e..df4fe6e5d 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -6,161 +6,29 @@ * */ -#include -#include -#ifdef UMF_POOL_SCALABLE_ENABLED -#include -#endif -#include - -#ifdef UMF_POOL_DISJOINT_ENABLED -#include -#endif - -#ifdef UMF_POOL_JEMALLOC_ENABLED -#include -#endif - #include "benchmark.hpp" -struct glibc_malloc : public allocator_interface { - unsigned SetUp([[maybe_unused]] ::benchmark::State &state, - unsigned argPos) override { - return argPos; - } - void TearDown([[maybe_unused]] ::benchmark::State &state) override{}; - void *benchAlloc(size_t size) override { return malloc(size); } - void benchFree(void *ptr, [[maybe_unused]] size_t size) override { - free(ptr); +#define UMF_BENCHMARK_TEMPLATE_DEFINE(BaseClass, Method, ...) \ + BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, __VA_ARGS__) \ + (benchmark::State & state) { \ + for (auto _ : state) { \ + bench(state); \ + } \ } - static std::string name() { return "glibc"; } -}; - -struct os_provider : public provider_interface { - provider_interface::params_ptr - getParams(::benchmark::State &state) override { - umf_os_memory_provider_params_handle_t raw_params = nullptr; - umfOsMemoryProviderParamsCreate(&raw_params); - if (!raw_params) { - state.SkipWithError("Failed to create os provider params"); - return {nullptr, [](void *) {}}; - } - - // Use a lambda as the custom deleter - auto deleter = [](void *p) { - auto handle = - static_cast(p); - umfOsMemoryProviderParamsDestroy(handle); - }; - - return {static_cast(raw_params), deleter}; - } - - umf_memory_provider_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) override { - return umfOsMemoryProviderOps(); - } - static std::string name() { return "os_provider"; } -}; - -template -struct proxy_pool : public pool_interface { - umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) override { - return umfProxyPoolOps(); - } - - static std::string name() { return "proxy_pool<" + Provider::name() + ">"; } -}; -#ifdef UMF_POOL_DISJOINT_ENABLED -template -struct disjoint_pool : public pool_interface { - umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) override { - return umfDisjointPoolOps(); - } - - typename pool_interface::params_ptr - getParams(::benchmark::State &state) override { - umf_disjoint_pool_params_handle_t raw_params = nullptr; - auto ret = umfDisjointPoolParamsCreate(&raw_params); - if (ret != UMF_RESULT_SUCCESS) { - state.SkipWithError("Failed to create disjoint pool params"); - return {nullptr, [](void *) {}}; - } - - typename pool_interface::params_ptr params( - raw_params, [](void *p) { - umfDisjointPoolParamsDestroy( - static_cast(p)); - }); - - ret = umfDisjointPoolParamsSetSlabMinSize(raw_params, 4096); - if (ret != UMF_RESULT_SUCCESS) { - state.SkipWithError("Failed to set slab min size"); - return {nullptr, [](void *) {}}; - } - - ret = umfDisjointPoolParamsSetCapacity(raw_params, 4); - if (ret != UMF_RESULT_SUCCESS) { - state.SkipWithError("Failed to set capacity"); - return {nullptr, [](void *) {}}; - } - - ret = umfDisjointPoolParamsSetMinBucketSize(raw_params, 4096); - if (ret != UMF_RESULT_SUCCESS) { - state.SkipWithError("Failed to set min bucket size"); - return {nullptr, [](void *) {}}; - } - - ret = umfDisjointPoolParamsSetMaxPoolableSize(raw_params, 4096 * 16); - if (ret != UMF_RESULT_SUCCESS) { - state.SkipWithError("Failed to set max poolable size"); - return {nullptr, [](void *) {}}; - } - - return params; - } - - static std::string name() { - return "disjoint_pool<" + Provider::name() + ">"; - } -}; -#endif - -#ifdef UMF_POOL_JEMALLOC_ENABLED -template -struct jemalloc_pool : public pool_interface { - umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) override { - return umfJemallocPoolOps(); - } - - static std::string name() { - return "jemalloc_pool<" + Provider::name() + ">"; - } -}; -#endif - -#ifdef UMF_POOL_SCALABLE_ENABLED -template -struct scalable_pool : public pool_interface { - virtual umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) override { - return umfScalablePoolOps(); - } - - static std::string name() { - return "scalable_pool<" + Provider::name() + ">"; - } -}; -#endif -// Benchmarks scenarios: +#define UMF_BENCHMARK_REGISTER_F(BaseClass, Method) \ + BENCHMARK_REGISTER_F(BaseClass, Method) \ + ->ArgNames( \ + BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::argsName()) \ + ->Name(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::name()) \ + ->Iterations( \ + BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::iterations()) UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_fix, fixed_alloc_size, glibc_malloc); +// Benchmarks scenarios: + // The benchmark arguments specified in Args() are, in order: // benchmark arguments, allocator arguments, size generator arguments. // The exact meaning of each argument depends on the benchmark, allocator, and size components used. diff --git a/benchmark/benchmark.hpp b/benchmark/benchmark.hpp index 6ac7a4dfa..df5d6a592 100644 --- a/benchmark/benchmark.hpp +++ b/benchmark/benchmark.hpp @@ -75,70 +75,97 @@ #include #include -#include "benchmark_interfaces.hpp" +#include "benchmark_size.hpp" +#include "benchmark_umf.hpp" struct alloc_data { void *ptr; size_t size; }; -#define UMF_BENCHMARK_TEMPLATE_DEFINE(BaseClass, Method, ...) \ - BENCHMARK_TEMPLATE_DEFINE_F(BaseClass, Method, __VA_ARGS__) \ - (benchmark::State & state) { \ - for (auto _ : state) { \ - bench(state); \ - } \ +template ::value>> +class provider_allocator : public allocator_interface { + public: + unsigned SetUp(::benchmark::State &state, unsigned r) override { + provider.SetUp(state); + return r; } -#define UMF_BENCHMARK_REGISTER_F(BaseClass, Method) \ - BENCHMARK_REGISTER_F(BaseClass, Method) \ - ->ArgNames( \ - BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::argsName()) \ - ->Name(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::name()) \ - ->Iterations( \ - BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::iterations()) + void TearDown(::benchmark::State &state) override { + provider.TearDown(state); + } -class fixed_alloc_size : public alloc_size_interface { - public: - unsigned SetUp(::benchmark::State &state, unsigned argPos) override { - size = state.range(argPos); - return argPos + 1; + void *benchAlloc(size_t size) override { + void *ptr; + if (umfMemoryProviderAlloc(provider.provider, size, 0, &ptr) != + UMF_RESULT_SUCCESS) { + return NULL; + } + return ptr; + } + + void benchFree(void *ptr, size_t size) override { + umfMemoryProviderFree(provider.provider, ptr, size); } - void TearDown([[maybe_unused]] ::benchmark::State &state) override {} - size_t nextSize() override { return size; }; - static std::vector argsName() { return {"size"}; } + + static std::string name() { return Provider::name(); } private: - size_t size; + Provider provider; }; -class uniform_alloc_size : public alloc_size_interface { - using distribution = std::uniform_int_distribution; - +// TODO: assert Pool to be a pool_interface. +template class pool_allocator : public allocator_interface { public: - unsigned SetUp(::benchmark::State &state, unsigned argPos) override { - auto min = state.range(argPos++); - auto max = state.range(argPos++); - auto gran = state.range(argPos++); - if (min % gran != 0 && max % gran != 0) { - state.SkipWithError("min and max must be divisible by granularity"); - return argPos; - } + unsigned SetUp(::benchmark::State &state, unsigned r) override { + pool.SetUp(state); + return r; + } + + void TearDown(::benchmark::State &state) override { pool.TearDown(state); } - dist.param(distribution::param_type(min / gran, max / gran)); - multiplier = gran; - return argPos; + virtual void *benchAlloc(size_t size) override { + return umfPoolMalloc(pool.pool, size); } - void TearDown([[maybe_unused]] ::benchmark::State &state) override {} - size_t nextSize() override { return dist(generator) * multiplier; } - static std::vector argsName() { - return {"min size", "max size", "granularity"}; + + virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) override { + umfPoolFree(pool.pool, ptr); } + static std::string name() { return Pool::name(); } + private: - std::default_random_engine generator; - distribution dist; - size_t multiplier; + Pool pool; +}; + +template +struct benchmark_interface : public benchmark::Fixture { + void SetUp(::benchmark::State &state) { + int argPos = alloc_size.SetUp(state, 0); + allocator.SetUp(state, argPos); + } + + void TearDown(::benchmark::State &state) { + alloc_size.TearDown(state); + allocator.TearDown(state); + } + + virtual void bench(::benchmark::State &state) = 0; + + static std::vector argsName() { + auto s = Size::argsName(); + auto a = Allocator::argsName(); + std::vector res = {}; + res.insert(res.end(), s.begin(), s.end()); + res.insert(res.end(), a.begin(), a.end()); + return res; + } + + static std::string name() { return Allocator::name(); } + static int64_t iterations() { return 10000; } + Size alloc_size; + Allocator allocator; }; // This class benchmarks speed of alloc() operations. @@ -335,59 +362,3 @@ class multiple_malloc_free_benchmark : public alloc_benchmark { std::default_random_engine generator; distribution dist; }; - -template ::value>> -class provider_allocator : public allocator_interface { - public: - unsigned SetUp(::benchmark::State &state, unsigned r) override { - provider.SetUp(state); - return r; - } - - void TearDown(::benchmark::State &state) override { - provider.TearDown(state); - } - - void *benchAlloc(size_t size) override { - void *ptr; - if (umfMemoryProviderAlloc(provider.provider, size, 0, &ptr) != - UMF_RESULT_SUCCESS) { - return NULL; - } - return ptr; - } - - void benchFree(void *ptr, size_t size) override { - umfMemoryProviderFree(provider.provider, ptr, size); - } - - static std::string name() { return Provider::name(); } - - private: - Provider provider; -}; - -// TODO: assert Pool to be a pool_interface. -template class pool_allocator : public allocator_interface { - public: - unsigned SetUp(::benchmark::State &state, unsigned r) override { - pool.SetUp(state); - return r; - } - - void TearDown(::benchmark::State &state) override { pool.TearDown(state); } - - virtual void *benchAlloc(size_t size) override { - return umfPoolMalloc(pool.pool, size); - } - - virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) override { - umfPoolFree(pool.pool, ptr); - } - - static std::string name() { return Pool::name(); } - - private: - Pool pool; -}; diff --git a/benchmark/benchmark_interfaces.hpp b/benchmark/benchmark_interfaces.hpp deleted file mode 100644 index 516a20b69..000000000 --- a/benchmark/benchmark_interfaces.hpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright (C) 2024 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - */ - -#include -#include -#include -#include - -#include -#include -#include - -class alloc_size_interface { - public: - virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, - [[maybe_unused]] unsigned argPos) = 0; - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; - virtual size_t nextSize() = 0; - static std::vector argsName() { return {""}; }; -}; - -class allocator_interface { - public: - virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, - [[maybe_unused]] unsigned argPos) = 0; - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; - virtual void *benchAlloc(size_t size) = 0; - virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) = 0; - static std::vector argsName() { return {}; } -}; - -template -struct benchmark_interface : public benchmark::Fixture { - void SetUp(::benchmark::State &state) { - int argPos = alloc_size.SetUp(state, 0); - allocator.SetUp(state, argPos); - } - - void TearDown(::benchmark::State &state) { - alloc_size.TearDown(state); - allocator.TearDown(state); - } - - virtual void bench(::benchmark::State &state) = 0; - - static std::vector argsName() { - auto s = Size::argsName(); - auto a = Allocator::argsName(); - std::vector res = {}; - res.insert(res.end(), s.begin(), s.end()); - res.insert(res.end(), a.begin(), a.end()); - return res; - } - - static std::string name() { return Allocator::name(); } - static int64_t iterations() { return 10000; } - Size alloc_size; - Allocator allocator; -}; - -struct provider_interface { - using params_ptr = std::unique_ptr; - - umf_memory_provider_handle_t provider = NULL; - virtual void SetUp(::benchmark::State &state) { - if (state.thread_index() != 0) { - return; - } - auto params = getParams(state); - auto umf_result = - umfMemoryProviderCreate(getOps(state), params.get(), &provider); - if (umf_result != UMF_RESULT_SUCCESS) { - state.SkipWithError("umfMemoryProviderCreate() failed"); - } - } - - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { - if (state.thread_index() != 0) { - return; - } - - if (provider) { - umfMemoryProviderDestroy(provider); - } - } - - virtual umf_memory_provider_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) { - return nullptr; - } - - virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { - return {nullptr, [](void *) {}}; - } -}; - -template ::value>> -struct pool_interface { - using params_ptr = std::unique_ptr; - - virtual void SetUp(::benchmark::State &state) { - provider.SetUp(state); - if (state.thread_index() != 0) { - return; - } - auto params = getParams(state); - auto umf_result = umfPoolCreate(getOps(state), provider.provider, - params.get(), 0, &pool); - if (umf_result != UMF_RESULT_SUCCESS) { - state.SkipWithError("umfPoolCreate() failed"); - } - } - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { - if (state.thread_index() != 0) { - return; - } - // TODO: The scalable pool destruction process can race with other threads - // performing TLS (Thread-Local Storage) destruction. - // As a temporary workaround, we introduce a delay (sleep) - // to ensure the pool is destroyed only after all threads have completed. - // Issue: #933 - std::this_thread::sleep_for(std::chrono::milliseconds(500)); - if (pool) { - umfPoolDestroy(pool); - } - }; - - virtual umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) { - return nullptr; - } - virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { - return {nullptr, [](void *) {}}; - } - T provider; - umf_memory_pool_handle_t pool; -}; diff --git a/benchmark/benchmark_size.hpp b/benchmark/benchmark_size.hpp new file mode 100644 index 000000000..d17a6b286 --- /dev/null +++ b/benchmark/benchmark_size.hpp @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2024 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include +#include +#include + +class alloc_size_interface { + public: + virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, + [[maybe_unused]] unsigned argPos) = 0; + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; + virtual size_t nextSize() = 0; + static std::vector argsName() { return {""}; }; +}; + +class fixed_alloc_size : public alloc_size_interface { + public: + unsigned SetUp(::benchmark::State &state, unsigned argPos) override { + size = state.range(argPos); + return argPos + 1; + } + void TearDown([[maybe_unused]] ::benchmark::State &state) override {} + size_t nextSize() override { return size; }; + static std::vector argsName() { return {"size"}; } + + private: + size_t size; +}; + +class uniform_alloc_size : public alloc_size_interface { + using distribution = std::uniform_int_distribution; + + public: + unsigned SetUp(::benchmark::State &state, unsigned argPos) override { + auto min = state.range(argPos++); + auto max = state.range(argPos++); + auto gran = state.range(argPos++); + if (min % gran != 0 && max % gran != 0) { + state.SkipWithError("min and max must be divisible by granularity"); + return argPos; + } + + dist.param(distribution::param_type(min / gran, max / gran)); + multiplier = gran; + return argPos; + } + void TearDown([[maybe_unused]] ::benchmark::State &state) override {} + size_t nextSize() override { return dist(generator) * multiplier; } + static std::vector argsName() { + return {"min size", "max size", "granularity"}; + } + + private: + std::default_random_engine generator; + distribution dist; + size_t multiplier; +}; diff --git a/benchmark/benchmark_umf.hpp b/benchmark/benchmark_umf.hpp new file mode 100644 index 000000000..389c224ed --- /dev/null +++ b/benchmark/benchmark_umf.hpp @@ -0,0 +1,252 @@ +/* + * Copyright (C) 2024-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ +#include +#include + +#include +#include +#include + +#include +#include +#ifdef UMF_POOL_SCALABLE_ENABLED +#include +#endif +#include + +#ifdef UMF_POOL_DISJOINT_ENABLED +#include +#endif + +#ifdef UMF_POOL_JEMALLOC_ENABLED +#include +#endif + +struct provider_interface { + using params_ptr = std::unique_ptr; + + umf_memory_provider_handle_t provider = NULL; + virtual void SetUp(::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + auto params = getParams(state); + auto umf_result = + umfMemoryProviderCreate(getOps(state), params.get(), &provider); + if (umf_result != UMF_RESULT_SUCCESS) { + state.SkipWithError("umfMemoryProviderCreate() failed"); + } + } + + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + + if (provider) { + umfMemoryProviderDestroy(provider); + } + } + + virtual umf_memory_provider_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) { + return nullptr; + } + + virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { + return {nullptr, [](void *) {}}; + } +}; + +template ::value>> +struct pool_interface { + using params_ptr = std::unique_ptr; + + virtual void SetUp(::benchmark::State &state) { + provider.SetUp(state); + if (state.thread_index() != 0) { + return; + } + auto params = getParams(state); + auto umf_result = umfPoolCreate(getOps(state), provider.provider, + params.get(), 0, &pool); + if (umf_result != UMF_RESULT_SUCCESS) { + state.SkipWithError("umfPoolCreate() failed"); + } + } + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + // TODO: The scalable pool destruction process can race with other threads + // performing TLS (Thread-Local Storage) destruction. + // As a temporary workaround, we introduce a delay (sleep) + // to ensure the pool is destroyed only after all threads have completed. + // Issue: #933 + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + if (pool) { + umfPoolDestroy(pool); + } + }; + + virtual umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) { + return nullptr; + } + virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { + return {nullptr, [](void *) {}}; + } + T provider; + umf_memory_pool_handle_t pool; +}; + +class allocator_interface { + public: + virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, + [[maybe_unused]] unsigned argPos) = 0; + virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; + virtual void *benchAlloc(size_t size) = 0; + virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) = 0; + static std::vector argsName() { return {}; } +}; + +struct glibc_malloc : public allocator_interface { + unsigned SetUp([[maybe_unused]] ::benchmark::State &state, + unsigned argPos) override { + return argPos; + } + void TearDown([[maybe_unused]] ::benchmark::State &state) override{}; + void *benchAlloc(size_t size) override { return malloc(size); } + void benchFree(void *ptr, [[maybe_unused]] size_t size) override { + free(ptr); + } + static std::string name() { return "glibc"; } +}; + +struct os_provider : public provider_interface { + provider_interface::params_ptr + getParams(::benchmark::State &state) override { + umf_os_memory_provider_params_handle_t raw_params = nullptr; + umfOsMemoryProviderParamsCreate(&raw_params); + if (!raw_params) { + state.SkipWithError("Failed to create os provider params"); + return {nullptr, [](void *) {}}; + } + + // Use a lambda as the custom deleter + auto deleter = [](void *p) { + auto handle = + static_cast(p); + umfOsMemoryProviderParamsDestroy(handle); + }; + + return {static_cast(raw_params), deleter}; + } + + umf_memory_provider_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfOsMemoryProviderOps(); + } + static std::string name() { return "os_provider"; } +}; + +template +struct proxy_pool : public pool_interface { + umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfProxyPoolOps(); + } + + static std::string name() { return "proxy_pool<" + Provider::name() + ">"; } +}; + +#ifdef UMF_POOL_DISJOINT_ENABLED +template +struct disjoint_pool : public pool_interface { + umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfDisjointPoolOps(); + } + + typename pool_interface::params_ptr + getParams(::benchmark::State &state) override { + umf_disjoint_pool_params_handle_t raw_params = nullptr; + auto ret = umfDisjointPoolParamsCreate(&raw_params); + if (ret != UMF_RESULT_SUCCESS) { + state.SkipWithError("Failed to create disjoint pool params"); + return {nullptr, [](void *) {}}; + } + + typename pool_interface::params_ptr params( + raw_params, [](void *p) { + umfDisjointPoolParamsDestroy( + static_cast(p)); + }); + + ret = umfDisjointPoolParamsSetSlabMinSize(raw_params, 4096); + if (ret != UMF_RESULT_SUCCESS) { + state.SkipWithError("Failed to set slab min size"); + return {nullptr, [](void *) {}}; + } + + ret = umfDisjointPoolParamsSetCapacity(raw_params, 4); + if (ret != UMF_RESULT_SUCCESS) { + state.SkipWithError("Failed to set capacity"); + return {nullptr, [](void *) {}}; + } + + ret = umfDisjointPoolParamsSetMinBucketSize(raw_params, 4096); + if (ret != UMF_RESULT_SUCCESS) { + state.SkipWithError("Failed to set min bucket size"); + return {nullptr, [](void *) {}}; + } + + ret = umfDisjointPoolParamsSetMaxPoolableSize(raw_params, 4096 * 16); + if (ret != UMF_RESULT_SUCCESS) { + state.SkipWithError("Failed to set max poolable size"); + return {nullptr, [](void *) {}}; + } + + return params; + } + + static std::string name() { + return "disjoint_pool<" + Provider::name() + ">"; + } +}; +#endif + +#ifdef UMF_POOL_JEMALLOC_ENABLED +template +struct jemalloc_pool : public pool_interface { + umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfJemallocPoolOps(); + } + + static std::string name() { + return "jemalloc_pool<" + Provider::name() + ">"; + } +}; +#endif + +#ifdef UMF_POOL_SCALABLE_ENABLED +template +struct scalable_pool : public pool_interface { + virtual umf_memory_pool_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfScalablePoolOps(); + } + + static std::string name() { + return "scalable_pool<" + Provider::name() + ">"; + } +}; +#endif From 4d9e2a5bb6b5cb9e135d13095c7448b0a3d8cdb6 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 8 Jan 2025 09:45:42 +0100 Subject: [PATCH 076/466] use the UMF version from git describe in docs, not the harcoded one --- CMakeLists.txt | 2 +- RELEASE_STEPS.md | 4 +--- docs/README.md | 22 +++++++++++++++------- docs/config/conf.py | 12 +++++++++--- 4 files changed, 26 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f9cfc3a07..495c70de3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -766,7 +766,7 @@ if(Python3_FOUND) add_custom_target( docs WORKING_DIRECTORY ${CMAKE_BINARY_DIR} - COMMAND ${Python3_EXECUTABLE} + COMMAND UMF_VERSION=${UMF_CMAKE_VERSION} ${Python3_EXECUTABLE} ${UMF_CMAKE_SOURCE_DIR}/docs/generate_docs.py COMMENT "Generate HTML documentation using Doxygen") endif() diff --git a/RELEASE_STEPS.md b/RELEASE_STEPS.md index 2609e36bb..9e04dc850 100644 --- a/RELEASE_STEPS.md +++ b/RELEASE_STEPS.md @@ -39,9 +39,7 @@ Do changes for a release: - For major/minor release start from the `main` branch - Add an entry to ChangeLog, remember to change the day of the week in the release date - For major releases mention API and ABI compatibility with the previous release -- Update project's version in a few places: - - For major and minor releases: `UMF_VERSION_CURRENT` in `include/umf/base.h` (the API version) - - `release` variable in `docs/config/conf.py` (for docs) +- For major and minor releases, update `UMF_VERSION_CURRENT` in `include/umf/base.h` (the API version) - For major releases update ABI version in `.map` and `.def` files - These files are defined for all public libraries (`libumf` and `proxy_lib`, at the moment) - Commit these changes and tag the release: diff --git a/docs/README.md b/docs/README.md index 3564d86db..737bb1259 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,7 +1,14 @@ # Documentation -To generate HTML documentation run the `generate_docs.py` script from any sub-dir of the -repository (most likely `build`) or enable and use build target 'docs' (see details below). +To generate HTML documentation, run the `generate_docs.py` script from any sub-directory of the repository (most likely `build`). +To display the proper version of UMF in the documentation title, set the `UMF_VERSION` variable before running the script. + +```bash +cd build +$ UMF_VERSION= python ../docs/generate_docs.py +``` + +Documentation can also be built using the build target 'docs' (see details below). This script will create `./docs_build` sub-directory, where the intermediate and final files will be created. HTML docs will be in the `./docs_build/generated/html` directory. @@ -12,13 +19,14 @@ To run documentation generation via build target use CMake commands below. To enable this target, python executable (in required version) has to be found in the system. ```bash -$ cmake -B build -$ cmake --build build --target docs +cmake -B build +cmake --build build --target docs ``` ## Requirements Script to generate HTML docs requires: - * [Doxygen](http://www.doxygen.nl/) at least v1.9.1 - * [Python](https://www.python.org/downloads/) at least v3.8 - * and python pip requirements, as defined in `third_party/requirements.txt` + +* [Doxygen](http://www.doxygen.nl/) at least v1.9.1 +* [Python](https://www.python.org/downloads/) at least v3.8 +* and python pip requirements, as defined in `third_party/requirements.txt` diff --git a/docs/config/conf.py b/docs/config/conf.py index 3af2df378..fa4788ff4 100644 --- a/docs/config/conf.py +++ b/docs/config/conf.py @@ -1,3 +1,5 @@ +import os + # Configuration file for the Sphinx documentation builder. # # This file only contains a selection of the most common options. For a full @@ -18,12 +20,16 @@ # -- Project information ----------------------------------------------------- project = "Intel Unified Memory Framework" -copyright = "2023-2024, Intel" +copyright = "2023-2025, Intel" author = "Intel" # The full version, including alpha/beta/rc tags -release = "0.11.0" - +release = os.getenv("UMF_VERSION", "") +print( + f"UMF_VERSION used in docs: {release}" + if release != "" + else "please set UMF_VERSION environment variable before running this script" +) # -- General configuration --------------------------------------------------- From e94647d1bf0f6cc4c1388cd9c41b187cf2ef028d Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Tue, 7 Jan 2025 12:32:33 +0100 Subject: [PATCH 077/466] Change linking to static for ICX Intel libraries --- .github/workflows/reusable_basic.yml | 3 +-- .github/workflows/reusable_sanitizers.yml | 1 - CMakeLists.txt | 6 ++++++ test/CMakeLists.txt | 8 +++++++- 4 files changed, 14 insertions(+), 4 deletions(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index ab4c8061b..83542efbb 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -182,8 +182,7 @@ jobs: - name: Run tests working-directory: ${{env.BUILD_DIR}} run: | - ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh' || true }} - ctest --output-on-failure # run all tests for better coverage + LD_LIBRARY_PATH=${{env.BUILD_DIR}}/lib/ ctest --output-on-failure # run all tests for better coverage - name: Check coverage if: ${{ matrix.build_type == 'Debug' && matrix.compiler.c == 'gcc' }} diff --git a/.github/workflows/reusable_sanitizers.yml b/.github/workflows/reusable_sanitizers.yml index f9e121f88..93752ff84 100644 --- a/.github/workflows/reusable_sanitizers.yml +++ b/.github/workflows/reusable_sanitizers.yml @@ -77,7 +77,6 @@ jobs: ASAN_OPTIONS: allocator_may_return_null=1 TSAN_OPTIONS: allocator_may_return_null=1 run: | - ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh' || true }} ctest --output-on-failure windows-build: diff --git a/CMakeLists.txt b/CMakeLists.txt index f9cfc3a07..6c4f4e4c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,12 @@ include(CMakePackageConfigHelpers) include(GNUInstallDirs) find_package(PkgConfig) +if(CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM") + # Compiler dependencies needs to be in library path or to be linked + # statically + add_link_options(-static-intel) +endif() + # Build Options option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 04ebfe109..7eed07e09 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,10 +1,16 @@ -# Copyright (C) 2022-2024 Intel Corporation +# Copyright (C) 2022-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED YES) +if(CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM") + # Compiler dependencies needs to be in library path or to be linked + # statically + add_link_options(-static-intel) +endif() + include(FetchContent) FetchContent_Declare( googletest From e34952096ae5c6ea3e5368ba7f9625f740e8d52c Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Thu, 9 Jan 2025 15:22:12 +0100 Subject: [PATCH 078/466] Remove all Intel libs from main library --- CMakeLists.txt | 6 ------ cmake/helpers.cmake | 5 ++++- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6c4f4e4c6..f9cfc3a07 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,12 +33,6 @@ include(CMakePackageConfigHelpers) include(GNUInstallDirs) find_package(PkgConfig) -if(CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM") - # Compiler dependencies needs to be in library path or to be linked - # statically - add_link_options(-static-intel) -endif() - # Build Options option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON) diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 56692ff6e..2d14e2f45 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -378,6 +378,9 @@ function(add_umf_library) elseif(LINUX) target_link_options(${ARG_NAME} PRIVATE "-Wl,--version-script=${ARG_LINUX_MAP_FILE}") + if(CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM") + target_link_options(${ARG_NAME} PRIVATE -no-intel-lib) + endif() endif() endif() From 286a6d8507fab37fb799c341dfa19db0561f4a6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Fri, 10 Jan 2025 13:30:49 +0100 Subject: [PATCH 079/466] 0.10.1 release --- .github/workflows/reusable_basic.yml | 2 +- ChangeLog | 10 ++++++++++ scripts/docs_config/conf.py | 4 ++-- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 1c13a771b..ced48e0c7 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -8,7 +8,7 @@ permissions: env: # for installation testing - it should match with version set in CMake - UMF_VERSION: 0.10.0 + UMF_VERSION: 0.10.1 BUILD_DIR : "${{github.workspace}}/build" INSTL_DIR : "${{github.workspace}}/../install-dir" COVERAGE_DIR : "${{github.workspace}}/coverage" diff --git a/ChangeLog b/ChangeLog index 75b69fdeb..0736379f8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +Fri Jan 10 2025 Łukasz Stolarczuk + + * Version 0.10.1 + + This patch release contains: + - Set symbol versions 0.10 in def/map files (#1013) + - Fix: remove incorrect assert in utils_align_ptr_up_size_down() (#977) + - Add strings with UMF version and useful CMake options (#992) + - Extended error messages, when providers are disabled (#1012) + Mon Dec 09 2024 Łukasz Stolarczuk * Version 0.10.0 diff --git a/scripts/docs_config/conf.py b/scripts/docs_config/conf.py index 28c9b5f9f..77d985627 100644 --- a/scripts/docs_config/conf.py +++ b/scripts/docs_config/conf.py @@ -18,11 +18,11 @@ # -- Project information ----------------------------------------------------- project = "Intel Unified Memory Framework" -copyright = "2023-2024, Intel" +copyright = "2023-2025, Intel" author = "Intel" # The full version, including alpha/beta/rc tags -release = "0.10.0" +release = "0.10.1" # -- General configuration --------------------------------------------------- From ed94adb2dfdf4477f4ad036b3cbe3d1dac20bf00 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 10 Jan 2025 21:51:07 +0000 Subject: [PATCH 080/466] Bump pygments in /third_party in the pip-dependencies group Bumps the pip-dependencies group in /third_party with 1 update: [pygments](https://github.com/pygments/pygments). Updates `pygments` from 2.18.0 to 2.19.1 - [Release notes](https://github.com/pygments/pygments/releases) - [Changelog](https://github.com/pygments/pygments/blob/master/CHANGES) - [Commits](https://github.com/pygments/pygments/compare/2.18.0...2.19.1) --- updated-dependencies: - dependency-name: pygments dependency-type: direct:production update-type: version-update:semver-minor dependency-group: pip-dependencies ... Signed-off-by: dependabot[bot] --- third_party/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 6a8be6e46..9832bf2f0 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -6,7 +6,7 @@ black==24.3.0 # Tests packaging==24.2 # Generating HTML documentation -pygments==2.18.0 +pygments==2.19.1 sphinxcontrib_applehelp==2.0.0 sphinxcontrib_devhelp==2.0.0 sphinxcontrib_htmlhelp==2.1.0 From 5db7da2bedb54581e8293c878db70b58f7d3e9d8 Mon Sep 17 00:00:00 2001 From: rbanka1 Date: Wed, 8 Jan 2025 15:32:49 +0100 Subject: [PATCH 081/466] Added license checker based on https://github.com/pmem/pmemstream/tree/master/utils/check_license Changes for this repository: - update dates - file exceptions --- .github/workflows/reusable_checks.yml | 8 + include/umf.h | 2 +- include/umf/memory_pool_ops.h | 2 +- include/umf/memtarget.h | 2 +- include/umf/pools/pool_jemalloc.h | 2 +- include/umf/pools/pool_scalable.h | 2 +- scripts/check_license/check_headers.sh | 177 +++++++++++++++++++++++ scripts/check_license/file-exceptions.sh | 34 +++++ src/ctl/ctl.c | 4 +- src/libumf.def | 2 +- src/libumf.map | 2 +- src/libumf.rc.in | 4 +- src/memory_pool_internal.h | 2 +- src/memory_provider_get_last_failed.c | 2 +- src/memory_provider_internal.h | 2 +- src/memspaces/memspace_numa.c | 2 +- src/memtargets/memtarget_numa.h | 2 +- src/pool/pool_disjoint.cpp | 2 +- src/proxy_lib/proxy_lib.rc.in | 4 +- src/utils/utils_common.h | 2 +- src/utils/utils_posix_concurrency.c | 2 +- src/utils/utils_windows_concurrency.c | 2 +- src/utils/utils_windows_math.c | 2 +- test/common/pool_trace.c | 2 +- test/common/test_helpers.c | 2 +- test/malloc_compliance_tests.cpp | 2 +- test/poolFixtures.hpp | 2 +- test/pools/jemalloc_pool.cpp | 2 +- test/pools/pool_base_alloc.cpp | 2 +- test/pools/scalable_pool.cpp | 2 +- test/provider_os_memory.cpp | 2 +- 31 files changed, 250 insertions(+), 31 deletions(-) create mode 100755 scripts/check_license/check_headers.sh create mode 100755 scripts/check_license/file-exceptions.sh diff --git a/.github/workflows/reusable_checks.yml b/.github/workflows/reusable_checks.yml index 6298b9883..de28161a5 100644 --- a/.github/workflows/reusable_checks.yml +++ b/.github/workflows/reusable_checks.yml @@ -52,6 +52,14 @@ jobs: - name: Check Python formatting run: cmake --build build --target black-format-check + - name: Run check-license + run: | + ./scripts/check_license/check_headers.sh . "Apache-2.0 WITH LLVM-exception" -v + + - name: Run copyright-format + run: | + ./scripts/check_license/check_headers.sh . "Apache-2.0 WITH LLVM-exception" -d + - name: Run a spell check uses: crate-ci/typos@b63f421581dce830bda2f597a678cb7776b41877 # v1.18.2 with: diff --git a/include/umf.h b/include/umf.h index 3e2d82799..57bebef8a 100644 --- a/include/umf.h +++ b/include/umf.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/include/umf/memory_pool_ops.h b/include/umf/memory_pool_ops.h index 67afdd166..829f49fb7 100644 --- a/include/umf/memory_pool_ops.h +++ b/include/umf/memory_pool_ops.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/include/umf/memtarget.h b/include/umf/memtarget.h index d74947f14..55ca30919 100644 --- a/include/umf/memtarget.h +++ b/include/umf/memtarget.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/include/umf/pools/pool_jemalloc.h b/include/umf/pools/pool_jemalloc.h index c30df6509..5974e6440 100644 --- a/include/umf/pools/pool_jemalloc.h +++ b/include/umf/pools/pool_jemalloc.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/include/umf/pools/pool_scalable.h b/include/umf/pools/pool_scalable.h index 072169b68..1915ad0b7 100644 --- a/include/umf/pools/pool_scalable.h +++ b/include/umf/pools/pool_scalable.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/scripts/check_license/check_headers.sh b/scripts/check_license/check_headers.sh new file mode 100755 index 000000000..d68b0891b --- /dev/null +++ b/scripts/check_license/check_headers.sh @@ -0,0 +1,177 @@ +#!/usr/bin/env bash + # Copyright (C) 2016-2024 Intel Corporation + # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# check-headers.sh - check copyright and license in source files + +SELF=$0 + +function usage() { + echo "Usage: $SELF [-h|-v|-a|-d]" + echo " -h, --help this help message" + echo " -v, --verbose verbose mode" + echo " -a, --all check all files (only modified files are checked by default)" + echo " -d, --update_dates change Copyright dates in all analyzed files (rather not use with -a)" +} + +if [ "$#" -lt 2 ]; then + usage >&2 + exit 2 +fi + +SOURCE_ROOT=$1 +shift +LICENSE=$1 +shift + +PATTERN=`mktemp` +TMP=`mktemp` +TMP2=`mktemp` +TEMPFILE=`mktemp` +rm -f $PATTERN $TMP $TMP2 + +if [ "$1" == "-h" -o "$1" == "--help" ]; then + usage + exit 0 +fi + +export GIT="git -C ${SOURCE_ROOT}" +$GIT rev-parse || exit 1 + +if [ -f $SOURCE_ROOT/.git/shallow ]; then + SHALLOW_CLONE=1 + echo + echo "Warning: This is a shallow clone. Checking dates in copyright headers" + echo " will be skipped in case of files that have no history." + echo +else + SHALLOW_CLONE=0 +fi + +VERBOSE=0 +CHECK_ALL=0 +UPDATE_DATES=0 +while [ "$1" != "" ]; do + case $1 in + -v|--verbose) + VERBOSE=1 + ;; + -a|--all) + CHECK_ALL=1 + ;; + -d|--update_dates) + UPDATE_DATES=1 + ;; + esac + shift +done + +if [ $CHECK_ALL -eq 0 ]; then + CURRENT_COMMIT=$($GIT log --pretty=%H -1) + MERGE_BASE=$($GIT merge-base HEAD origin/master 2>/dev/null) + [ -z $MERGE_BASE ] && \ + MERGE_BASE=$($GIT log --pretty="%cN:%H" | grep GitHub | head -n1 | cut -d: -f2) + [ -z $MERGE_BASE -o "$CURRENT_COMMIT" = "$MERGE_BASE" ] && \ + CHECK_ALL=1 +fi + +if [ $CHECK_ALL -eq 1 ]; then + echo "INFO: Checking copyright headers of all files..." + GIT_COMMAND="ls-tree -r --name-only HEAD" +else + echo "INFO: Checking copyright headers of modified files only..." + GIT_COMMAND="diff --name-only $MERGE_BASE $CURRENT_COMMIT" +fi + +FILES=$($GIT $GIT_COMMAND | ${SOURCE_ROOT}/scripts/check_license/file-exceptions.sh) + +RV=0 +for file in $FILES ; do + if [ $VERBOSE -eq 1 ]; then + echo "Checking file: $file" + fi + # The src_path is a path which should be used in every command except git. + # git is called with -C flag so filepaths should be relative to SOURCE_ROOT + src_path="${SOURCE_ROOT}/$file" + [ ! -f $src_path ] && continue + # ensure that file is UTF-8 encoded + ENCODING=`file -b --mime-encoding $src_path` + iconv -f $ENCODING -t "UTF-8" $src_path > $TEMPFILE + + if ! grep -q "SPDX-License-Identifier: $LICENSE" $src_path; then + echo >&2 "error: no $LICENSE SPDX tag in file: $src_path" + RV=1 + fi + + if [ $SHALLOW_CLONE -eq 0 ]; then + $GIT log --no-merges --format="%ai %aE" -- $file | sort > $TMP + else + # mark the grafted commits (commits with no parents) + $GIT log --no-merges --format="%ai %aE grafted-%p-commit" -- $file | sort > $TMP + fi + + # skip checking dates for non-Intel commits + [[ ! $(tail -n1 $TMP) =~ "@intel.com" ]] && continue + + # skip checking dates for new files + [ $(cat $TMP | wc -l) -le 1 ] && continue + + # grep out the grafted commits (commits with no parents) + # and skip checking dates for non-Intel commits + grep -v -e "grafted--commit" $TMP | grep -e "@intel.com" > $TMP2 + + [ $(cat $TMP2 | wc -l) -eq 0 ] && continue + + FIRST=`head -n1 $TMP2` + LAST=` tail -n1 $TMP2` + + YEARS=$(sed ' +/.*Copyright (C) \+.*[0-9-]\+ Intel Corporation/!d +s/.*Copyright (C) \([0-9]\+\)-\([0-9]\+\).*/\1-\2/ +s/.*Copyright (C) \([0-9]\+\).*/\1/' "$src_path") + if [ -z "$YEARS" ]; then + echo >&2 "No copyright years in $src_path" + RV=1 + continue + fi + + HEADER_FIRST=`echo $YEARS | cut -d"-" -f1` + HEADER_LAST=` echo $YEARS | cut -d"-" -f2` + + COMMIT_FIRST=`echo $FIRST | cut -d"-" -f1` + COMMIT_LAST=` echo $LAST | cut -d"-" -f1` + + if [ "$COMMIT_FIRST" != "" -a "$COMMIT_LAST" != "" ]; then + if [[ -n "$COMMIT_FIRST" && -n "$COMMIT_LAST" ]]; then + if [[ $COMMIT_FIRST -eq $COMMIT_LAST ]]; then + NEW=$COMMIT_LAST + else + NEW=$COMMIT_FIRST-$COMMIT_LAST + fi + + if [[ "$YEARS" == "$NEW" ]]; then + continue + else + if [[ ${UPDATE_DATES} -eq 1 ]]; then + sed -i "s/Copyright ${YEARS}/Copyright ${NEW}/g" "${src_path}" + else + echo "$file:1: error: wrong copyright date: (is: $YEARS, should be: $NEW)" >&2 + RV=1 + fi + fi + fi + else + echo "error: unknown commit dates in file: $file" >&2 + RV=1 + fi +done +rm -f $TMP $TMP2 $TEMPFILE + +# check if error found +if [ $RV -eq 0 ]; then + echo "Copyright headers are OK." +else + echo "Error(s) in copyright headers found!" >&2 +fi +exit $RV diff --git a/scripts/check_license/file-exceptions.sh b/scripts/check_license/file-exceptions.sh new file mode 100755 index 000000000..144e4b65b --- /dev/null +++ b/scripts/check_license/file-exceptions.sh @@ -0,0 +1,34 @@ +#!/bin/sh -e +# Copyright (C) 2025 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# You can add an exception file +# list for license and copyright check +grep -v -E -e 'benchmark/ubench.h' \ + -e 'ChangeLog' \ + -e 'CODEOWNERS$' \ + -e 'docs/assets/.*' \ + -e 'docs/config/conf.py' \ + -e 'docs/config/Doxyfile' \ + -e 'include/umf/proxy_lib_new_delete.h' \ + -e 'LICENSE.TXT' \ + -e 'scripts/assets/images/.*' \ + -e 'src/uthash/.*' \ + -e 'src/uthash/utlist.h' \ + -e 'src/uthash/uthash.h' \ + -e 'test/supp/.*' \ + -e '.clang-format$' \ + -e '.cmake-format$' \ + -e '.cmake.in$' \ + -e '.gitignore' \ + -e '.json$' \ + -e '.mailmap' \ + -e '.md$' \ + -e '.patch$' \ + -e '.rst$' \ + -e '.spellcheck-conf.toml' \ + -e '.trivyignore' \ + -e '.txt$' \ + -e '.xml$' \ + -e '.yml$' diff --git a/src/ctl/ctl.c b/src/ctl/ctl.c index d54e8390e..4db11ac21 100644 --- a/src/ctl/ctl.c +++ b/src/ctl/ctl.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2016-2024 Intel Corporation + * Copyright (C) 2016-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -9,7 +9,7 @@ // This file was originally under following license: // SPDX-License-Identifier: BSD-3-Clause -/* Copyright 2016-2024, Intel Corporation */ +/* Copyright 2024, Intel Corporation */ /* * ctl.c -- implementation of the interface for examination and modification of diff --git a/src/libumf.def b/src/libumf.def index 5d1c5047f..d4c8bb777 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -1,5 +1,5 @@ ;;;; Begin Copyright Notice -; Copyright (C) 2024 Intel Corporation +; Copyright (C) 2023-2025 Intel Corporation ; Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. ; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ;;;; End Copyright Notice diff --git a/src/libumf.map b/src/libumf.map index d604dd64e..ff6348e22 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/libumf.rc.in b/src/libumf.rc.in index 7aba79e7e..8ee85d626 100644 --- a/src/libumf.rc.in +++ b/src/libumf.rc.in @@ -1,4 +1,4 @@ -// Copyright (c) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -51,7 +51,7 @@ BEGIN VALUE "CompanyName", "Intel Corporation\0" VALUE "FileDescription", "Unified Memory Framework (UMF) library\0" VALUE "FileVersion", _UMF_VERSION "\0" - VALUE "LegalCopyright", "Copyright 2024, Intel Corporation. All rights reserved.\0" + VALUE "LegalCopyright", "Copyright 2024-2025, Intel Corporation. All rights reserved.\0" VALUE "LegalTrademarks", "\0" VALUE "OriginalFilename", "umf.dll\0" VALUE "ProductName", "Unified Memory Framework (UMF)\0" diff --git a/src/memory_pool_internal.h b/src/memory_pool_internal.h index e556ace21..ab3378163 100644 --- a/src/memory_pool_internal.h +++ b/src/memory_pool_internal.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/memory_provider_get_last_failed.c b/src/memory_provider_get_last_failed.c index 9434eea97..09bd075e1 100644 --- a/src/memory_provider_get_last_failed.c +++ b/src/memory_provider_get_last_failed.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/memory_provider_internal.h b/src/memory_provider_internal.h index 60955e0fb..0b7f45f80 100644 --- a/src/memory_provider_internal.h +++ b/src/memory_provider_internal.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/memspaces/memspace_numa.c b/src/memspaces/memspace_numa.c index 0028e394d..83e65fc29 100644 --- a/src/memspaces/memspace_numa.c +++ b/src/memspaces/memspace_numa.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/memtargets/memtarget_numa.h b/src/memtargets/memtarget_numa.h index 2d3e3fd70..6659d045e 100644 --- a/src/memtargets/memtarget_numa.h +++ b/src/memtargets/memtarget_numa.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp index e0298b43d..0390f5375 100644 --- a/src/pool/pool_disjoint.cpp +++ b/src/pool/pool_disjoint.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/proxy_lib/proxy_lib.rc.in b/src/proxy_lib/proxy_lib.rc.in index dce151ec3..f0497fb40 100644 --- a/src/proxy_lib/proxy_lib.rc.in +++ b/src/proxy_lib/proxy_lib.rc.in @@ -1,4 +1,4 @@ -// Copyright (c) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -51,7 +51,7 @@ BEGIN VALUE "CompanyName", "Intel Corporation\0" VALUE "FileDescription", "Unified Memory Framework (UMF) proxy library\0" VALUE "FileVersion", _UMF_VERSION "\0" - VALUE "LegalCopyright", "Copyright 2024, Intel Corporation. All rights reserved.\0" + VALUE "LegalCopyright", "Copyright 2024-2025, Intel Corporation. All rights reserved.\0" VALUE "LegalTrademarks", "\0" VALUE "OriginalFilename", "umf_proxy.dll\0" VALUE "ProductName", "Unified Memory Framework (UMF)\0" diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h index 9ef2b3cf1..6af5a08d9 100644 --- a/src/utils/utils_common.h +++ b/src/utils/utils_common.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/utils/utils_posix_concurrency.c b/src/utils/utils_posix_concurrency.c index fcf04ed95..531e09c10 100644 --- a/src/utils/utils_posix_concurrency.c +++ b/src/utils/utils_posix_concurrency.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/utils/utils_windows_concurrency.c b/src/utils/utils_windows_concurrency.c index 696f4523b..e2cc574a9 100644 --- a/src/utils/utils_windows_concurrency.c +++ b/src/utils/utils_windows_concurrency.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/src/utils/utils_windows_math.c b/src/utils/utils_windows_math.c index 07c4c9978..cd21ae696 100644 --- a/src/utils/utils_windows_math.c +++ b/src/utils/utils_windows_math.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/test/common/pool_trace.c b/test/common/pool_trace.c index 29329f31c..d8b7522ea 100644 --- a/test/common/pool_trace.c +++ b/test/common/pool_trace.c @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/test/common/test_helpers.c b/test/common/test_helpers.c index 71f018d0f..d69ca3535 100644 --- a/test/common/test_helpers.c +++ b/test/common/test_helpers.c @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // This file contains tests for UMF pool API diff --git a/test/malloc_compliance_tests.cpp b/test/malloc_compliance_tests.cpp index 06e3b5dd7..b91bde1f6 100644 --- a/test/malloc_compliance_tests.cpp +++ b/test/malloc_compliance_tests.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/test/poolFixtures.hpp b/test/poolFixtures.hpp index 6f54fe114..bd97ac1fa 100644 --- a/test/poolFixtures.hpp +++ b/test/poolFixtures.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/test/pools/jemalloc_pool.cpp b/test/pools/jemalloc_pool.cpp index 042841fc4..86784d919 100644 --- a/test/pools/jemalloc_pool.cpp +++ b/test/pools/jemalloc_pool.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/test/pools/pool_base_alloc.cpp b/test/pools/pool_base_alloc.cpp index ec07a7c2f..752d9f01e 100644 --- a/test/pools/pool_base_alloc.cpp +++ b/test/pools/pool_base_alloc.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/test/pools/scalable_pool.cpp b/test/pools/scalable_pool.cpp index 51cc02030..ce55923d9 100644 --- a/test/pools/scalable_pool.cpp +++ b/test/pools/scalable_pool.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index 4c81b84f9..9544a6fed 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception From 83a6a46709ab47c18db51e8930ebdfaa55cc7e84 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Mon, 13 Jan 2025 15:04:41 +0100 Subject: [PATCH 082/466] Add GAI Tooling Notice to readme --- README.md | 49 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 4cd1d8ff5..b16f35ff6 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ For a quick introduction to UMF usage, please see [examples](https://oneapi-src.github.io/unified-memory-framework/examples.html) documentation, which includes the code of the [basic example](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/basic/basic.c). -The are also more advanced that allocates USM memory from the +The are also more advanced that allocates USM memory from the [Level Zero device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/level_zero_shared_memory/level_zero_shared_memory.c) using the Level Zero API and UMF Level Zero memory provider and [CUDA device](https://github.com/oneapi-src/unified-memory-framework/blob/main/examples/cuda_shared_memory/cuda_shared_memory.c) using the CUDA API and UMF CUDA memory provider. @@ -28,19 +28,23 @@ using the CUDA API and UMF CUDA memory provider. ### Requirements Required packages: + - libhwloc-dev >= 2.3.0 (Linux) / hwloc >= 2.3.0 (Windows) - C compiler - [CMake](https://cmake.org/) >= 3.14.0 For development and contributions: + - clang-format-15.0 (can be installed with `python -m pip install clang-format==15.0.7`) - cmake-format-0.6 (can be installed with `python -m pip install cmake-format==0.6.13`) - black (can be installed with `python -m pip install black==24.3.0`) For building tests, multithreaded benchmarks and Disjoint Pool: + - C++ compiler with C++17 support For Level Zero memory provider tests: + - Level Zero headers and libraries - compatible GPU with installed driver @@ -50,8 +54,8 @@ Executable and binaries will be in **build/bin**. The `{build_config}` can be either `Debug` or `Release`. ```bash -$ cmake -B build -DCMAKE_BUILD_TYPE={build_config} -$ cmake --build build -j $(nproc) +cmake -B build -DCMAKE_BUILD_TYPE={build_config} +cmake --build build -j $(nproc) ``` ### Windows @@ -60,8 +64,8 @@ Generating Visual Studio Project. EXE and binaries will be in **build/bin/{build The `{build_config}` can be either `Debug` or `Release`. ```bash -$ cmake -B build -G "Visual Studio 15 2017 Win64" -$ cmake --build build --config {build_config} -j $Env:NUMBER_OF_PROCESSORS +cmake -B build -G "Visual Studio 15 2017 Win64" +cmake --build build --config {build_config} -j $Env:NUMBER_OF_PROCESSORS ``` ### Benchmark @@ -73,20 +77,22 @@ UMF also provides multithreaded benchmarks that can be enabled by setting both `UMF_BUILD_BENCHMARKS` and `UMF_BUILD_BENCHMARKS_MT` CMake configuration flags to `ON`. Multithreaded benchmarks require a C++ support. -The Scalable Pool requirements can be found in the relevant 'Memory Pool +The Scalable Pool requirements can be found in the relevant 'Memory Pool managers' section below. ### Sanitizers List of sanitizers available on Linux: + - AddressSanitizer - UndefinedBehaviorSanitizer - ThreadSanitizer - - Is mutually exclusive with other sanitizers. + - Is mutually exclusive with other sanitizers. - MemorySanitizer - - Requires linking against MSan-instrumented libraries to prevent false positive reports. More information [here](https://github.com/google/sanitizers/wiki/MemorySanitizerLibcxxHowTo). + - Requires linking against MSan-instrumented libraries to prevent false positive reports. More information [here](https://github.com/google/sanitizers/wiki/MemorySanitizerLibcxxHowTo). List of sanitizers available on Windows: + - AddressSanitizer Listed sanitizers can be enabled with appropriate [CMake options](#cmake-standard-options). @@ -133,7 +139,7 @@ UMF comes with predefined pool allocators (see [`include/umf/pools`](include/umf providers that implement a specific interface (see [`include/umf/memory_pool_ops.h`](include/umf/memory_pool_ops.h) and [`include/umf/memory_provider_ops.h`](include/umf/memory_provider_ops.h)). -More detailed documentation is available here: https://oneapi-src.github.io/unified-memory-framework/ +More detailed documentation is available here: ### Memory providers @@ -146,6 +152,7 @@ A memory provider that can provide memory from a given pre-allocated buffer. A memory provider that provides memory from an operating system. OS memory provider supports two types of memory mappings (set by the `visibility` parameter): + 1) private memory mapping (`UMF_MEM_MAP_PRIVATE`) 2) shared memory mapping (`UMF_MEM_MAP_SHARED` - supported on Linux only yet) @@ -159,16 +166,18 @@ Permission to duplicate another process's file descriptor is governed by a ptrac the `/proc/sys/kernel/yama/ptrace_scope` interface in the following way: ```sh -$ sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" +sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" ``` There are available two mechanisms for the shared memory mapping: + 1) a named shared memory object (used if the `shm_name` parameter is not NULL) or 2) an anonymous file descriptor (used if the `shm_name` parameter is NULL) The `shm_name` parameter should be a null-terminated string of up to NAME_MAX (i.e., 255) characters none of which are slashes. An anonymous file descriptor for the shared memory mapping will be created using: + 1) `memfd_secret()` syscall - (if it is implemented and) if the `UMF_MEM_FD_FUNC` environment variable does not contain the "memfd_create" string or 2) `memfd_create()` syscall - otherwise (and if it is implemented). @@ -178,7 +187,8 @@ IPC API on Linux requires the `PTRACE_MODE_ATTACH_REALCREDS` permission (see `pt to duplicate another process's file descriptor (see above). Packages required for tests (Linux-only yet): - - libnuma-dev + +- libnuma-dev #### Level Zero memory provider @@ -191,7 +201,7 @@ Permission to duplicate another process's file descriptor is governed by a ptrac the `/proc/sys/kernel/yama/ptrace_scope` interface in the following way: ```sh -$ sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" +sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" ``` ##### Requirements @@ -266,7 +276,7 @@ To enable this feature, the `UMF_BUILD_LIBUMF_POOL_DISJOINT` option needs to be #### Jemalloc pool -Jemalloc pool is a [jemalloc](https://github.com/jemalloc/jemalloc)-based memory +Jemalloc pool is a [jemalloc](https://github.com/jemalloc/jemalloc)-based memory pool manager built as a separate static library: libjemalloc_pool.a on Linux and jemalloc_pool.lib on Windows. The `UMF_BUILD_LIBUMF_POOL_JEMALLOC` option has to be turned `ON` to build this library. @@ -275,6 +285,7 @@ The `UMF_BUILD_LIBUMF_POOL_JEMALLOC` option has to be turned `ON` to build this In case of Linux OS jemalloc is built from the (fetched) sources with the following non-default options enabled: + - `--with-jemalloc-prefix=je_` - adds the `je_` prefix to all public APIs, - `--disable-cxx` - disables C++ integration, it will cause the `new` and the `delete` operators implementations to be omitted. @@ -289,6 +300,7 @@ The default jemalloc package is required on Windows. 1) The `UMF_BUILD_LIBUMF_POOL_JEMALLOC` option turned `ON` 2) jemalloc is required: + - on Linux and MacOS: jemalloc is fetched and built from sources (a custom build), - on Windows: the default jemalloc package is required @@ -300,7 +312,8 @@ It is distributed as part of libumf. To use this pool, TBB must be installed in ##### Requirements Packages required for using this pool and executing tests/benchmarks (not required for build): - - libtbb-dev (libtbbmalloc.so.2) on Linux or tbb (tbbmalloc.dll) on Windows + +- libtbb-dev (libtbbmalloc.so.2) on Linux or tbb (tbbmalloc.dll) on Windows ### Memspaces (Linux-only) @@ -338,10 +351,11 @@ To enable this feature, the `UMF_BUILD_SHARED_LIBRARY` option needs to be turned In case of Linux it can be done without any code changes using the `LD_PRELOAD` environment variable: ```sh -$ LD_PRELOAD=/usr/lib/libumf_proxy.so myprogram +LD_PRELOAD=/usr/lib/libumf_proxy.so myprogram ``` The memory used by the proxy memory allocator is mmap'ed: + 1) with the `MAP_PRIVATE` flag by default or 2) with the `MAP_SHARED` flag if the `UMF_PROXY` environment variable contains one of two following strings: `page.disposition=shared-shm` or `page.disposition=shared-fd`. These two options differ in a mechanism used during IPC: - `page.disposition=shared-shm` - IPC uses the named shared memory. An SHM name is generated using the `umf_proxy_lib_shm_pid_$PID` pattern, where `$PID` is the PID of the process. It creates the `/dev/shm/umf_proxy_lib_shm_pid_$PID` file. @@ -357,6 +371,7 @@ It can be enabled by adding the `size.threshold=` string to the `UMF_PROX #### Windows In case of Windows it requires: + 1) explicitly linking your program dynamically with the `umf_proxy.dll` library 2) (C++ code only) including `proxy_lib_new_delete.h` in a single(!) source file in your project to override also the `new`/`delete` operations. @@ -370,3 +385,7 @@ an issue or a Pull Request, please read [Contribution Guide](./CONTRIBUTING.md). To enable logging in UMF source files please follow the guide in the [web documentation](https://oneapi-src.github.io/unified-memory-framework/introduction.html#logging). + +## Notices + +The contents of this repository may have been developed with support from one or more Intel-operated generative artificial intelligence solutions. From b229a346f36bca356aa2985c798978597c722532 Mon Sep 17 00:00:00 2001 From: rbanka1 Date: Thu, 9 Jan 2025 10:43:28 +0100 Subject: [PATCH 083/466] Added a condition to check the validity of the starting date and some fixes Changes for this commit: - adding a condition to check the validity of the starting date - broken pipe fix - updating date fix --- .github/workflows/reusable_checks.yml | 4 -- CMakeLists.txt | 2 +- scripts/check_license/check_headers.sh | 80 +++++++++++++++--------- scripts/check_license/file-exceptions.sh | 5 +- src/pool/CMakeLists.txt | 2 +- 5 files changed, 56 insertions(+), 37 deletions(-) diff --git a/.github/workflows/reusable_checks.yml b/.github/workflows/reusable_checks.yml index de28161a5..6e700cec1 100644 --- a/.github/workflows/reusable_checks.yml +++ b/.github/workflows/reusable_checks.yml @@ -56,10 +56,6 @@ jobs: run: | ./scripts/check_license/check_headers.sh . "Apache-2.0 WITH LLVM-exception" -v - - name: Run copyright-format - run: | - ./scripts/check_license/check_headers.sh . "Apache-2.0 WITH LLVM-exception" -d - - name: Run a spell check uses: crate-ci/typos@b63f421581dce830bda2f597a678cb7776b41877 # v1.18.2 with: diff --git a/CMakeLists.txt b/CMakeLists.txt index 495c70de3..58b189ce1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2022-2024 Intel Corporation +# Copyright (C) 2022-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/scripts/check_license/check_headers.sh b/scripts/check_license/check_headers.sh index d68b0891b..aeb90e7a2 100755 --- a/scripts/check_license/check_headers.sh +++ b/scripts/check_license/check_headers.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash - # Copyright (C) 2016-2024 Intel Corporation - # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# Copyright (C) 2016-2025 Intel Corporation +# Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # check-headers.sh - check copyright and license in source files @@ -68,10 +68,10 @@ while [ "$1" != "" ]; do done if [ $CHECK_ALL -eq 0 ]; then - CURRENT_COMMIT=$($GIT log --pretty=%H -1) - MERGE_BASE=$($GIT merge-base HEAD origin/master 2>/dev/null) + CURRENT_COMMIT=$($GIT --no-pager log --pretty=%H -1) + MERGE_BASE=$($GIT merge-base HEAD origin/main 2>/dev/null) [ -z $MERGE_BASE ] && \ - MERGE_BASE=$($GIT log --pretty="%cN:%H" | grep GitHub | head -n1 | cut -d: -f2) + MERGE_BASE=$($GIT --no-pager log --pretty="%cN:%H" | grep GitHub 2>/dev/null | head -n1 | cut -d: -f2) [ -z $MERGE_BASE -o "$CURRENT_COMMIT" = "$MERGE_BASE" ] && \ CHECK_ALL=1 fi @@ -127,7 +127,7 @@ for file in $FILES ; do LAST=` tail -n1 $TMP2` YEARS=$(sed ' -/.*Copyright (C) \+.*[0-9-]\+ Intel Corporation/!d +/.*Copyright (C) [0-9-]\+ Intel Corporation/!d s/.*Copyright (C) \([0-9]\+\)-\([0-9]\+\).*/\1-\2/ s/.*Copyright (C) \([0-9]\+\).*/\1/' "$src_path") if [ -z "$YEARS" ]; then @@ -142,29 +142,49 @@ s/.*Copyright (C) \([0-9]\+\).*/\1/' "$src_path") COMMIT_FIRST=`echo $FIRST | cut -d"-" -f1` COMMIT_LAST=` echo $LAST | cut -d"-" -f1` - if [ "$COMMIT_FIRST" != "" -a "$COMMIT_LAST" != "" ]; then - if [[ -n "$COMMIT_FIRST" && -n "$COMMIT_LAST" ]]; then - if [[ $COMMIT_FIRST -eq $COMMIT_LAST ]]; then - NEW=$COMMIT_LAST - else - NEW=$COMMIT_FIRST-$COMMIT_LAST - fi - - if [[ "$YEARS" == "$NEW" ]]; then - continue - else - if [[ ${UPDATE_DATES} -eq 1 ]]; then - sed -i "s/Copyright ${YEARS}/Copyright ${NEW}/g" "${src_path}" - else - echo "$file:1: error: wrong copyright date: (is: $YEARS, should be: $NEW)" >&2 - RV=1 - fi - fi - fi - else - echo "error: unknown commit dates in file: $file" >&2 - RV=1 - fi + if [ "$COMMIT_FIRST" != "" -a "$COMMIT_LAST" != "" ]; then + if [ "$COMMIT_FIRST" -lt "$HEADER_FIRST" ]; then + RV=1 + fi + + if [[ -n "$COMMIT_FIRST" && -n "$COMMIT_LAST" ]]; then + if [[ $HEADER_FIRST -le $COMMIT_FIRST ]]; then + if [[ $HEADER_LAST -eq $COMMIT_LAST ]]; then + continue + else + NEW="$HEADER_FIRST-$COMMIT_LAST" + if [[ ${UPDATE_DATES} -eq 1 ]]; then + echo "Updating copyright date in $src_path: $YEARS -> $NEW" + sed -i "s/Copyright (C) ${YEARS}/Copyright (C) ${NEW}/g" "${src_path}" + else + echo "$file:1: error: wrong copyright date: (is: $YEARS, should be: $NEW)" >&2 + RV=1 + fi + fi + else + if [[ $COMMIT_FIRST -eq $COMMIT_LAST ]]; then + NEW=$COMMIT_LAST + else + NEW=$COMMIT_FIRST-$COMMIT_LAST + fi + + if [[ "$YEARS" == "$NEW" ]]; then + continue + else + if [[ ${UPDATE_DATES} -eq 1 ]]; then + echo "Updating copyright date in $src_path: $YEARS -> $NEW" + sed -i "s/Copyright (C) ${YEARS}/Copyright (C) ${NEW}/g" "${src_path}" + else + echo "$file:1: error: wrong copyright date: (is: $YEARS, should be: $NEW)" >&2 + RV=1 + fi + fi + fi + fi + else + echo "error: unknown commit dates in file: $file" >&2 + RV=1 + fi done rm -f $TMP $TMP2 $TEMPFILE diff --git a/scripts/check_license/file-exceptions.sh b/scripts/check_license/file-exceptions.sh index 144e4b65b..5a12443d9 100755 --- a/scripts/check_license/file-exceptions.sh +++ b/scripts/check_license/file-exceptions.sh @@ -13,11 +13,15 @@ grep -v -E -e 'benchmark/ubench.h' \ -e 'docs/config/Doxyfile' \ -e 'include/umf/proxy_lib_new_delete.h' \ -e 'LICENSE.TXT' \ + -e 'licensing/third-party-programs.txt' \ -e 'scripts/assets/images/.*' \ + -e 'scripts/qemu/requirements.txt' \ -e 'src/uthash/.*' \ -e 'src/uthash/utlist.h' \ -e 'src/uthash/uthash.h' \ + -e 'test/ctl/config.txt' \ -e 'test/supp/.*' \ + -e 'third_party/requirements.txt' \ -e '.clang-format$' \ -e '.cmake-format$' \ -e '.cmake.in$' \ @@ -29,6 +33,5 @@ grep -v -E -e 'benchmark/ubench.h' \ -e '.rst$' \ -e '.spellcheck-conf.toml' \ -e '.trivyignore' \ - -e '.txt$' \ -e '.xml$' \ -e '.yml$' diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt index 17be932a4..f54e70185 100644 --- a/src/pool/CMakeLists.txt +++ b/src/pool/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2023 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception From 42a95fd3ed047db4c38cd9a68d9a329d6b3c8dd1 Mon Sep 17 00:00:00 2001 From: Katarzyna Luszczewska Date: Mon, 13 Jan 2025 15:56:02 +0100 Subject: [PATCH 084/466] Fix spelling errors in a few files --- docs/config/api.rst | 2 +- docs/config/examples.rst | 2 +- include/umf/memory_pool.h | 4 ++-- include/umf/pools/pool_disjoint.h | 13 +++++++++---- include/umf/providers/provider_os_memory.h | 8 +++++--- 5 files changed, 18 insertions(+), 11 deletions(-) diff --git a/docs/config/api.rst b/docs/config/api.rst index 3eedc8f1d..1c20d709c 100644 --- a/docs/config/api.rst +++ b/docs/config/api.rst @@ -86,7 +86,7 @@ and operate on the provider. Fixed Memory Provider ------------------------------------------ -A memory provider that can provide memory from a given pre-allocated buffer. +A memory provider that can provide memory from a given preallocated buffer. .. doxygenfile:: provider_fixed_memory.h :sections: define enum typedef func var diff --git a/docs/config/examples.rst b/docs/config/examples.rst index c58e7fc22..4eeea6aa9 100644 --- a/docs/config/examples.rst +++ b/docs/config/examples.rst @@ -178,7 +178,7 @@ by a different library and the caller of the :any:`umfGetIPCHandle` function may The :any:`umfGetIPCHandle` function returns the IPC handle and its size. The IPC handle is a byte-copyable opaque data structure. The :any:`umf_ipc_handle_t` type is defined as a pointer to a byte array. The size of the handle might be different for different memory provider types. The code snippet below demonstrates how the IPC handle can -be serialized for marshalling purposes. +be serialized for marshaling purposes. .. code-block:: c diff --git a/include/umf/memory_pool.h b/include/umf/memory_pool.h index de045acf4..ae5e67a96 100644 --- a/include/umf/memory_pool.h +++ b/include/umf/memory_pool.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -140,7 +140,7 @@ umf_result_t umfFree(void *ptr); /// * Implementations *must* store the error code in thread-local /// storage prior to returning NULL from the allocation functions. /// -/// * If the last allocation/de-allocation operation succeeded, the value returned by +/// * If the last allocation/deallocation operation succeeded, the value returned by /// this function is unspecified. /// /// * The application *may* call this function from simultaneous threads. diff --git a/include/umf/pools/pool_disjoint.h b/include/umf/pools/pool_disjoint.h index fdf682ae5..d268a1dac 100644 --- a/include/umf/pools/pool_disjoint.h +++ b/include/umf/pools/pool_disjoint.h @@ -1,6 +1,11 @@ -// Copyright (C) 2023-2024 Intel Corporation -// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +/* + * + * Copyright (C) 2023-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ #pragma once #ifdef __cplusplus @@ -87,7 +92,7 @@ umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, /// @brief Set shared limits for disjoint pool. /// @param hParams handle to the parameters of the disjoint pool. -/// @param hSharedLimits handle tp the shared limits. +/// @param hSharedLimits handle to the shared limits. /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. umf_result_t umfDisjointPoolParamsSetSharedLimits( umf_disjoint_pool_params_handle_t hParams, diff --git a/include/umf/providers/provider_os_memory.h b/include/umf/providers/provider_os_memory.h index a6bf43a7d..90455cad1 100644 --- a/include/umf/providers/provider_os_memory.h +++ b/include/umf/providers/provider_os_memory.h @@ -1,9 +1,11 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * + * Copyright (C) 2022-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -*/ + * + */ #ifndef UMF_OS_MEMORY_PROVIDER_H #define UMF_OS_MEMORY_PROVIDER_H @@ -23,7 +25,7 @@ extern "C" { /// Not every mode is supported on every system. typedef enum umf_numa_mode_t { /// Default binding mode. Actual binding policy is system-specific. On - /// linux this corresponds to MPOL_DEFAULT. If this mode is specified, + /// Linux this corresponds to MPOL_DEFAULT. If this mode is specified, /// nodemask must be NULL and maxnode must be 0. UMF_NUMA_MODE_DEFAULT, From f72a6130a52f459951768a12d6e1dc086d869bc2 Mon Sep 17 00:00:00 2001 From: Katarzyna Luszczewska Date: Mon, 13 Jan 2025 15:54:42 +0100 Subject: [PATCH 085/466] Add spelling check in the web docs --- .github/workflows/reusable_checks.yml | 9 ++- docs/config/Doxyfile | 2 +- docs/config/conf.py | 5 +- docs/config/spelling_exceptions.txt | 74 ++++++++++++++++++++++++ scripts/check_license/file-exceptions.sh | 1 + third_party/requirements.txt | 3 + 6 files changed, 91 insertions(+), 3 deletions(-) create mode 100644 docs/config/spelling_exceptions.txt diff --git a/.github/workflows/reusable_checks.yml b/.github/workflows/reusable_checks.yml index 6e700cec1..a7602d269 100644 --- a/.github/workflows/reusable_checks.yml +++ b/.github/workflows/reusable_checks.yml @@ -21,7 +21,7 @@ jobs: - name: Install dependencies run: | sudo apt-get update - sudo apt-get install -y black cmake clang-format-15 cmake-format libhwloc-dev + sudo apt-get install -y black cmake clang-format-15 cmake-format libhwloc-dev doxygen # Latest distros do not allow global pip installation - name: Install Python requirements in venv @@ -29,6 +29,7 @@ jobs: python3 -m venv .venv . .venv/bin/activate echo "$PATH" >> $GITHUB_PATH + python3 -m pip install -r third_party/requirements.txt python3 -m pip install bandit codespell - name: Configure CMake @@ -64,6 +65,12 @@ jobs: - name: Run codespell run: python3 ./.github/scripts/run-codespell.py + - name: Check spelling in docs + run: | + cmake -B build + cmake --build build --target docs + sphinx-build -b spelling ./build/docs_build/config ./build/docs_build/spelling_log -W + # Run Bandit recursively, but omit _deps directory (with 3rd party code) and python's venv - name: Run Bandit run: python3 -m bandit -r . -x '/_deps/,/.venv/' diff --git a/docs/config/Doxyfile b/docs/config/Doxyfile index f23117ff2..630946374 100644 --- a/docs/config/Doxyfile +++ b/docs/config/Doxyfile @@ -445,7 +445,7 @@ INLINE_SIMPLE_STRUCTS = NO # types are typedef'ed and only the typedef is referenced, never the tag name. # The default value is: NO. -TYPEDEF_HIDES_STRUCT = NO +TYPEDEF_HIDES_STRUCT = YES # The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This # cache is used to resolve symbols given their name and scope. Since this can be diff --git a/docs/config/conf.py b/docs/config/conf.py index fa4788ff4..ae698ba98 100644 --- a/docs/config/conf.py +++ b/docs/config/conf.py @@ -36,7 +36,10 @@ # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. -extensions = ["breathe"] +extensions = ["breathe", "sphinxcontrib.spelling"] + +spelling_show_suggestions = True +spelling_word_list_filename = "spelling_exceptions.txt" # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/docs/config/spelling_exceptions.txt b/docs/config/spelling_exceptions.txt new file mode 100644 index 000000000..d4e40a3ec --- /dev/null +++ b/docs/config/spelling_exceptions.txt @@ -0,0 +1,74 @@ +addr +allocatable +allocator +allocators +calloc +CXL +copyable +customizable +daxX +deallocation +deallocating +deallocations +Devdax +dev +Globals +hMemtarget +hPool +hProvider +highPtr +io +interprocess +ipc +jemalloc +lowPtr +malloc +maxnode +mem +mempolicies +mempolicy +Mempolicy +memspace +Memspace +memspaces +Memtarget +memtarget +memtargets +middleware +multithreading +Nodemask +nodemask +numa +oneAPI +oneTBB +os +params +partList +pid +poolable +preallocated +providerIpcData +providential +ptr +realloc +Scalable +scalable +stdout +Tiering +tiering +topologies +umf +umfGetIPCHandle +umfMemoryProviderAlloc +umfMemoryProviderGetLastNativeError +umfMemoryProviderOpenIPCHandle +umfOsMemoryProviderParamsDestroy +umfPool +umfPoolCalloc +umfPoolDestroy +umfPoolGetTag +umfPoolMallocUsableSize +umfPoolRealloc +umfMemspaceUserFilter +umfMemspaceMemtargetAdd +unfreed \ No newline at end of file diff --git a/scripts/check_license/file-exceptions.sh b/scripts/check_license/file-exceptions.sh index 5a12443d9..10c556061 100755 --- a/scripts/check_license/file-exceptions.sh +++ b/scripts/check_license/file-exceptions.sh @@ -9,6 +9,7 @@ grep -v -E -e 'benchmark/ubench.h' \ -e 'ChangeLog' \ -e 'CODEOWNERS$' \ -e 'docs/assets/.*' \ + -e 'docs/config/spelling_exceptions.txt' \ -e 'docs/config/conf.py' \ -e 'docs/config/Doxyfile' \ -e 'include/umf/proxy_lib_new_delete.h' \ diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 9832bf2f0..1255dcb92 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -15,3 +15,6 @@ sphinxcontrib_qthelp==2.0.0 breathe==4.35.0 sphinx==8.1.3 sphinx_book_theme==1.1.3 +# Spelling check in documentation +pyenchant==3.2.2 +sphinxcontrib-spelling==8.0.0 From 39f77a17c341fe4eb9c486de24e064e7146be8f6 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Mon, 2 Dec 2024 18:20:36 +0000 Subject: [PATCH 086/466] L0 provider: implement support for defer and blocking free --- include/umf/providers/provider_level_zero.h | 15 +++++ src/libumf.def | 2 + src/libumf.map | 1 + src/provider/provider_level_zero.c | 60 ++++++++++++++++++- .../provider_level_zero_not_impl.cpp | 4 ++ 5 files changed, 80 insertions(+), 2 deletions(-) diff --git a/include/umf/providers/provider_level_zero.h b/include/umf/providers/provider_level_zero.h index f760c5724..df6dd7364 100644 --- a/include/umf/providers/provider_level_zero.h +++ b/include/umf/providers/provider_level_zero.h @@ -68,6 +68,21 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( umf_level_zero_memory_provider_params_handle_t hParams, ze_device_handle_t *hDevices, uint32_t deviceCount); +typedef enum umf_level_zero_memory_provider_free_policy_t { + UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFAULT = + 0, ///< Free memory immediately. Default. + UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_BLOCKING_FREE, ///< Blocks until all commands using the memory are complete before freeing. + UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFER_FREE, ///< Schedules the memory to be freed but does not free immediately. +} umf_level_zero_memory_provider_free_policy_t; + +/// @brief Set the memory free policy. +/// @param hParams handle to the parameters of the Level Zero Memory Provider. +/// @param policy memory free policy. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfLevelZeroMemoryProviderParamsSetFreePolicy( + umf_level_zero_memory_provider_params_handle_t hParams, + umf_level_zero_memory_provider_free_policy_t policy); + umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void); #ifdef __cplusplus diff --git a/src/libumf.def b/src/libumf.def index d053fa240..42d7cfaf3 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -120,3 +120,5 @@ EXPORTS umfScalablePoolParamsDestroy umfScalablePoolParamsSetGranularity umfScalablePoolParamsSetKeepAllMemory +; Added in UMF_0.11 + umfLevelZeroMemoryProviderParamsSetFreePolicy diff --git a/src/libumf.map b/src/libumf.map index 9aecf8f53..c33bb7c10 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -119,4 +119,5 @@ UMF_0.11 { umfFixedMemoryProviderOps; umfFixedMemoryProviderParamsCreate; umfFixedMemoryProviderParamsDestroy; + umfLevelZeroMemoryProviderParamsSetFreePolicy; } UMF_0.10; diff --git a/src/provider/provider_level_zero.c b/src/provider/provider_level_zero.c index f3ce269b2..eaea8abd9 100644 --- a/src/provider/provider_level_zero.c +++ b/src/provider/provider_level_zero.c @@ -75,6 +75,14 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( return UMF_RESULT_ERROR_NOT_SUPPORTED; } +umf_result_t umfLevelZeroMemoryProviderParamsSetFreePolicy( + umf_level_zero_memory_provider_params_handle_t hParams, + umf_level_zero_memory_provider_free_policy_t policy) { + (void)hParams; + (void)policy; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void) { // not supported LOG_ERR("L0 memory provider is disabled! (UMF_BUILD_LEVEL_ZERO_PROVIDER is " @@ -107,6 +115,9 @@ typedef struct umf_level_zero_memory_provider_params_t { resident_device_handles; ///< Array of devices for which the memory should be made resident uint32_t resident_device_count; ///< Number of devices for which the memory should be made resident + + umf_level_zero_memory_provider_free_policy_t + freePolicy; ///< Memory free policy } umf_level_zero_memory_provider_params_t; typedef struct ze_memory_provider_t { @@ -118,6 +129,8 @@ typedef struct ze_memory_provider_t { uint32_t resident_device_count; ze_device_properties_t device_properties; + + ze_driver_memory_free_policy_ext_flags_t freePolicyFlags; } ze_memory_provider_t; typedef struct ze_ops_t { @@ -144,6 +157,8 @@ typedef struct ze_ops_t { size_t); ze_result_t (*zeDeviceGetProperties)(ze_device_handle_t, ze_device_properties_t *); + ze_result_t (*zeMemFreeExt)(ze_context_handle_t, + ze_memory_free_ext_desc_t *, void *); } ze_ops_t; static ze_ops_t g_ze_ops; @@ -197,6 +212,8 @@ static void init_ze_global_state(void) { utils_get_symbol_addr(0, "zeContextMakeMemoryResident", lib_name); *(void **)&g_ze_ops.zeDeviceGetProperties = utils_get_symbol_addr(0, "zeDeviceGetProperties", lib_name); + *(void **)&g_ze_ops.zeMemFreeExt = + utils_get_symbol_addr(0, "zeMemFreeExt", lib_name); if (!g_ze_ops.zeMemAllocHost || !g_ze_ops.zeMemAllocDevice || !g_ze_ops.zeMemAllocShared || !g_ze_ops.zeMemFree || @@ -232,6 +249,7 @@ umf_result_t umfLevelZeroMemoryProviderParamsCreate( params->memory_type = UMF_MEMORY_TYPE_UNKNOWN; params->resident_device_handles = NULL; params->resident_device_count = 0; + params->freePolicy = UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFAULT; *hParams = params; @@ -308,6 +326,32 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( return UMF_RESULT_SUCCESS; } +umf_result_t umfLevelZeroMemoryProviderParamsSetFreePolicy( + umf_level_zero_memory_provider_params_handle_t hParams, + umf_level_zero_memory_provider_free_policy_t policy) { + if (!hParams) { + LOG_ERR("Level zero memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->freePolicy = policy; + return UMF_RESULT_SUCCESS; +} + +static ze_driver_memory_free_policy_ext_flags_t +umfFreePolicyToZePolicy(umf_level_zero_memory_provider_free_policy_t policy) { + switch (policy) { + case UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFAULT: + return 0; + case UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_BLOCKING_FREE: + return ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_BLOCKING_FREE; + case UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFER_FREE: + return ZE_DRIVER_MEMORY_FREE_POLICY_EXT_FLAG_DEFER_FREE; + default: + return 0; + } +} + static umf_result_t ze_memory_provider_initialize(void *params, void **provider) { if (params == NULL) { @@ -351,6 +395,8 @@ static umf_result_t ze_memory_provider_initialize(void *params, ze_provider->context = ze_params->level_zero_context_handle; ze_provider->device = ze_params->level_zero_device_handle; ze_provider->memory_type = (ze_memory_type_t)ze_params->memory_type; + ze_provider->freePolicyFlags = + umfFreePolicyToZePolicy(ze_params->freePolicy); memset(&ze_provider->device_properties, 0, sizeof(ze_provider->device_properties)); @@ -493,8 +539,18 @@ static umf_result_t ze_memory_provider_free(void *provider, void *ptr, } ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)provider; - ze_result_t ze_result = g_ze_ops.zeMemFree(ze_provider->context, ptr); - return ze2umf_result(ze_result); + + if (ze_provider->freePolicyFlags == 0) { + return ze2umf_result(g_ze_ops.zeMemFree(ze_provider->context, ptr)); + } + + ze_memory_free_ext_desc_t desc = { + .stype = ZE_STRUCTURE_TYPE_MEMORY_FREE_EXT_DESC, + .pNext = NULL, + .freePolicy = ze_provider->freePolicyFlags}; + + return ze2umf_result( + g_ze_ops.zeMemFreeExt(ze_provider->context, &desc, ptr)); } static void ze_memory_provider_get_last_native_error(void *provider, diff --git a/test/providers/provider_level_zero_not_impl.cpp b/test/providers/provider_level_zero_not_impl.cpp index bea1acbe7..c55c236fe 100644 --- a/test/providers/provider_level_zero_not_impl.cpp +++ b/test/providers/provider_level_zero_not_impl.cpp @@ -31,6 +31,10 @@ TEST_F(test, level_zero_provider_not_implemented) { hDevices, 1); ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + result = umfLevelZeroMemoryProviderParamsSetFreePolicy( + hParams, UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFAULT); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); ASSERT_EQ(ops, nullptr); } From 330ffd3791bce93d0bdb941a3f1700f04ded6d50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Mon, 13 Jan 2025 16:23:32 +0100 Subject: [PATCH 087/466] reduce code duplication in Benchmark by using Apply() to set arguments --- benchmark/benchmark.cpp | 131 +++++++++++++++++----------------------- benchmark/benchmark.hpp | 25 +++++--- 2 files changed, 72 insertions(+), 84 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index df4fe6e5d..6c8175e1d 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -1,11 +1,13 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * */ +#include + #include "benchmark.hpp" #define UMF_BENCHMARK_TEMPLATE_DEFINE(BaseClass, Method, ...) \ @@ -18,14 +20,8 @@ #define UMF_BENCHMARK_REGISTER_F(BaseClass, Method) \ BENCHMARK_REGISTER_F(BaseClass, Method) \ - ->ArgNames( \ - BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::argsName()) \ - ->Name(BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::name()) \ - ->Iterations( \ - BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::iterations()) - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_fix, fixed_alloc_size, - glibc_malloc); + ->Apply( \ + &BENCHMARK_PRIVATE_CONCAT_NAME(BaseClass, Method)::defaultArgs) // Benchmarks scenarios: @@ -33,54 +29,56 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_fix, fixed_alloc_size, // benchmark arguments, allocator arguments, size generator arguments. // The exact meaning of each argument depends on the benchmark, allocator, and size components used. // Refer to the 'argsName()' function in each component to find detailed descriptions of these arguments. + +static void default_alloc_fix_size(benchmark::internal::Benchmark *benchmark) { + benchmark->Args({10000, 0, 4096}); + benchmark->Args({10000, 100000, 4096}); + benchmark->Threads(4); + benchmark->Threads(1); +} + +static void +default_alloc_uniform_size(benchmark::internal::Benchmark *benchmark) { + benchmark->Args({10000, 0, 8, 64 * 1024, 8}); + benchmark->Threads(4); + benchmark->Threads(1); +} + +UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_fix, fixed_alloc_size, + glibc_malloc); + UMF_BENCHMARK_REGISTER_F(alloc_benchmark, glibc_fix) - ->Args({10000, 0, 4096}) - ->Args({10000, 100000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_alloc_fix_size); UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_uniform, uniform_alloc_size, glibc_malloc); UMF_BENCHMARK_REGISTER_F(alloc_benchmark, glibc_uniform) - ->Args({10000, 0, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_alloc_uniform_size); UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, os_provider, fixed_alloc_size, provider_allocator); UMF_BENCHMARK_REGISTER_F(alloc_benchmark, os_provider) - ->Args({10000, 0, 4096}) - ->Args({10000, 100000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_alloc_fix_size); UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, proxy_pool, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(alloc_benchmark, proxy_pool) - ->Args({1000, 0, 4096}) - ->Args({1000, 100000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_alloc_fix_size); #ifdef UMF_POOL_DISJOINT_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_fix, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_fix) - ->Args({10000, 0, 4096}) - ->Args({10000, 100000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_alloc_fix_size); // TODO: debug why this crashes /*UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_uniform) - ->Args({10000, 0, 8, 64 * 1024, 8}) - // ->Threads(4) - ->Threads(1); + ->Apply(&default_alloc_uniform_size); */ #endif @@ -89,18 +87,13 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_fix, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(alloc_benchmark, jemalloc_pool_fix) - ->Args({10000, 0, 4096}) - ->Args({10000, 100000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_alloc_fix_size); UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(alloc_benchmark, jemalloc_pool_uniform) - ->Args({10000, 0, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_alloc_uniform_size); #endif #ifdef UMF_POOL_SCALABLE_ENABLED @@ -109,71 +102,67 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, scalable_pool_fix, pool_allocator>); UMF_BENCHMARK_REGISTER_F(alloc_benchmark, scalable_pool_fix) - ->Args({10000, 0, 4096}) - ->Args({10000, 100000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_alloc_fix_size); UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, scalable_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(alloc_benchmark, scalable_pool_uniform) - ->Args({10000, 0, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_alloc_uniform_size); #endif // Multiple allocs/free +static void +default_multiple_alloc_fix_size(benchmark::internal::Benchmark *benchmark) { + benchmark->Args({10000, 4096}); + benchmark->Threads(4); + benchmark->Threads(1); +} + +static void +default_multiple_alloc_uniform_size(benchmark::internal::Benchmark *benchmark) { + benchmark->Args({10000, 8, 64 * 1024, 8}); + benchmark->Threads(4); + benchmark->Threads(1); +} UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, glibc_fix, fixed_alloc_size, glibc_malloc); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, glibc_fix) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, glibc_uniform, uniform_alloc_size, glibc_malloc); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, glibc_uniform) - ->Args({10000, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_uniform_size); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, proxy_pool, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, proxy_pool) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, os_provider, fixed_alloc_size, provider_allocator); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, os_provider) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size); #ifdef UMF_POOL_DISJOINT_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size); // TODO: debug why this crashes /*UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) - ->Args({10000, 0, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_uniform_size); */ #endif @@ -182,17 +171,13 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_fix) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_uniform) - ->Args({1000, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_uniform_size); #endif @@ -202,18 +187,14 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, scalable_pool_fix, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, scalable_pool_fix) - ->Args({10000, 4096}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_fix_size); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, scalable_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, scalable_pool_uniform) - ->Args({10000, 8, 64 * 1024, 8}) - ->Threads(4) - ->Threads(1); + ->Apply(&default_multiple_alloc_uniform_size); #endif BENCHMARK_MAIN(); diff --git a/benchmark/benchmark.hpp b/benchmark/benchmark.hpp index df5d6a592..50e75f8fb 100644 --- a/benchmark/benchmark.hpp +++ b/benchmark/benchmark.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -162,8 +162,15 @@ struct benchmark_interface : public benchmark::Fixture { return res; } - static std::string name() { return Allocator::name(); } - static int64_t iterations() { return 10000; } + virtual std::string name() { return Allocator::name(); } + virtual int64_t iterations() { return 10000; } + static void defaultArgs(Benchmark *benchmark) { + auto *bench = + static_cast *>(benchmark); + benchmark->ArgNames(bench->argsName()) + ->Name(bench->name()) + ->Iterations(bench->iterations()); + } Size alloc_size; Allocator allocator; }; @@ -260,15 +267,15 @@ class alloc_benchmark : public benchmark_interface { } } - static std::vector argsName() { + virtual std::vector argsName() { auto n = benchmark_interface::argsName(); std::vector res = {"max_allocs", "pre_allocs"}; res.insert(res.end(), n.begin(), n.end()); return res; } - static std::string name() { return base::name() + "/alloc"; } - static int64_t iterations() { return 200000; } + virtual std::string name() { return base::name() + "/alloc"; } + virtual int64_t iterations() { return 200000; } protected: using base = benchmark_interface; @@ -346,18 +353,18 @@ class multiple_malloc_free_benchmark : public alloc_benchmark { } } - static std::string name() { + virtual std::string name() { return base::base::name() + "/multiple_malloc_free"; } - static std::vector argsName() { + virtual std::vector argsName() { auto n = benchmark_interface::argsName(); std::vector res = {"max_allocs"}; res.insert(res.end(), n.begin(), n.end()); return res; } - static int64_t iterations() { return 2000; } + virtual int64_t iterations() { return 2000; } std::default_random_engine generator; distribution dist; From 279b6bf491890cc128ef31f2e62becc9d146cbe5 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 17 Jan 2025 11:23:04 +0100 Subject: [PATCH 088/466] Fix: use cuda_device_handle in cu_memory_provider_initialize cu_provider->device is not set (equals 0) in this place yet, so we have to use cu_params->cuda_device_handle instead of cu_provider->device here. Signed-off-by: Lukasz Dorau --- src/provider/provider_cuda.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index 9bb11327b..350bd016f 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -313,7 +313,7 @@ static umf_result_t cu_memory_provider_initialize(void *params, CUmemAllocationProp allocProps = {0}; allocProps.location.type = CU_MEM_LOCATION_TYPE_DEVICE; allocProps.type = CU_MEM_ALLOCATION_TYPE_PINNED; - allocProps.location.id = cu_provider->device; + allocProps.location.id = cu_params->cuda_device_handle; CUresult cu_result = g_cu_ops.cuMemGetAllocationGranularity( &min_alignment, &allocProps, CU_MEM_ALLOC_GRANULARITY_MINIMUM); if (cu_result != CUDA_SUCCESS) { From da7706c1d37fb8f6250989136a781fd078e4fdbb Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Fri, 3 Jan 2025 15:21:25 +0100 Subject: [PATCH 089/466] Add info about ptrace permissions Update our documentation with better info about ptrace - IPC users should be aware of required permissions. Add proposed, two possible solutions into our docs. Also, update our examples and tests to work without global change of ptrace_scope. Co-authored-by: sergey.vinogradov@intel.com --- .github/workflows/reusable_basic.yml | 3 --- .github/workflows/reusable_fast.yml | 4 ---- .github/workflows/reusable_proxy_lib.yml | 3 --- .github/workflows/reusable_sanitizers.yml | 4 ---- README.md | 25 ++++++++++++----------- examples/ipc_ipcapi/ipc_ipcapi_anon_fd.sh | 14 +++---------- examples/ipc_ipcapi/ipc_ipcapi_producer.c | 19 ++++++++++++++++- scripts/qemu/run-tests.sh | 4 +--- src/utils/utils_posix_common.c | 7 +++---- test/common/ipc_common.c | 10 ++++++++- test/ipc_os_prov_anon_fd.sh | 17 +-------------- test/providers/ipc_level_zero_prov.sh | 17 +-------------- 12 files changed, 49 insertions(+), 78 deletions(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 83542efbb..25d33e2b3 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -145,9 +145,6 @@ jobs: - name: Install libhwloc run: .github/scripts/install_hwloc.sh - - name: Set ptrace value for IPC test - run: sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" - - name: Get UMF version run: | VERSION=$(git describe --tags --abbrev=0 | grep -oP '\d+\.\d+\.\d+') diff --git a/.github/workflows/reusable_fast.yml b/.github/workflows/reusable_fast.yml index 5673727ac..58a172a74 100644 --- a/.github/workflows/reusable_fast.yml +++ b/.github/workflows/reusable_fast.yml @@ -88,10 +88,6 @@ jobs: sudo apt-get install -y cmake libnuma-dev libtbb-dev .github/scripts/install_hwloc.sh # install hwloc-2.3.0 instead of hwloc-2.1.0 present in the OS package - - name: Set ptrace value for IPC test (on Linux only) - if: ${{ matrix.os == 'ubuntu-latest' || matrix.os == 'ubuntu-20.04' }} - run: sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" - - name: Configure CMake if: matrix.simple_cmake == 'OFF' run: > diff --git a/.github/workflows/reusable_proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml index 27a66267d..a1f5975fa 100644 --- a/.github/workflows/reusable_proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -34,9 +34,6 @@ jobs: sudo apt-get update sudo apt-get install -y cmake libhwloc-dev libtbb-dev lcov - - name: Set ptrace value for IPC test - run: sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" - - name: Configure build run: > cmake diff --git a/.github/workflows/reusable_sanitizers.yml b/.github/workflows/reusable_sanitizers.yml index 93752ff84..25458da51 100644 --- a/.github/workflows/reusable_sanitizers.yml +++ b/.github/workflows/reusable_sanitizers.yml @@ -40,10 +40,6 @@ jobs: sudo apt-get update sudo apt-get install -y intel-oneapi-ippcp-devel intel-oneapi-ipp-devel intel-oneapi-common-oneapi-vars intel-oneapi-compiler-dpcpp-cpp - - - name: Set ptrace value for IPC test - run: sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" - - name: Configure build run: > ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh &&' || ''}} diff --git a/README.md b/README.md index b16f35ff6..5bd0b9b2f 100644 --- a/README.md +++ b/README.md @@ -159,11 +159,12 @@ OS memory provider supports two types of memory mappings (set by the `visibility IPC API requires the `UMF_MEM_MAP_SHARED` memory `visibility` mode (`UMF_RESULT_ERROR_INVALID_ARGUMENT` is returned otherwise). -IPC API uses the file descriptor duplication. It requires using `pidfd_getfd(2)` to obtain -a duplicate of another process's file descriptor (`pidfd_getfd(2)` is supported since Linux 5.6). -Permission to duplicate another process's file descriptor is governed by a ptrace access mode -`PTRACE_MODE_ATTACH_REALCREDS` check (see `ptrace(2)`) that can be changed using -the `/proc/sys/kernel/yama/ptrace_scope` interface in the following way: +IPC API uses file descriptor duplication, which requires the `pidfd_getfd(2)` system call to obtain +a duplicate of another process's file descriptor. This system call is supported since Linux 5.6. +Required permission ("restricted ptrace") is governed by the `PTRACE_MODE_ATTACH_REALCREDS` check +(see `ptrace(2)`). To allow file descriptor duplication in a binary that opens IPC handle, you can call +`prctl(PR_SET_PTRACER, ...)` in the producer binary that gets the IPC handle. +Alternatively you can change the `ptrace_scope` globally in the system, e.g.: ```sh sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" @@ -194,16 +195,16 @@ Packages required for tests (Linux-only yet): A memory provider that provides memory from L0 device. -IPC API uses the file descriptor duplication. It requires using `pidfd_getfd(2)` to obtain -a duplicate of another process's file descriptor (`pidfd_getfd(2)` is supported since Linux 5.6). -Permission to duplicate another process's file descriptor is governed by a ptrace access mode -`PTRACE_MODE_ATTACH_REALCREDS` check (see `ptrace(2)`) that can be changed using -the `/proc/sys/kernel/yama/ptrace_scope` interface in the following way: +IPC API uses file descriptor duplication, which requires the `pidfd_getfd(2)` system call to obtain +a duplicate of another process's file descriptor. This system call is supported since Linux 5.6. +Required permission ("restricted ptrace") is governed by the `PTRACE_MODE_ATTACH_REALCREDS` check +(see `ptrace(2)`). To allow file descriptor duplication in a binary that opens IPC handle, you can call +`prctl(PR_SET_PTRACER, ...)` in the producer binary that gets the IPC handle. +Alternatively you can change the `ptrace_scope` globally in the system, e.g.: ```sh sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" ``` - ##### Requirements 1) Linux or Windows OS @@ -359,7 +360,7 @@ The memory used by the proxy memory allocator is mmap'ed: 1) with the `MAP_PRIVATE` flag by default or 2) with the `MAP_SHARED` flag if the `UMF_PROXY` environment variable contains one of two following strings: `page.disposition=shared-shm` or `page.disposition=shared-fd`. These two options differ in a mechanism used during IPC: - `page.disposition=shared-shm` - IPC uses the named shared memory. An SHM name is generated using the `umf_proxy_lib_shm_pid_$PID` pattern, where `$PID` is the PID of the process. It creates the `/dev/shm/umf_proxy_lib_shm_pid_$PID` file. - - `page.disposition=shared-fd` - IPC uses the file descriptor duplication. It requires using `pidfd_getfd(2)` to obtain a duplicate of another process's file descriptor. Permission to duplicate another process's file descriptor is governed by a ptrace access mode `PTRACE_MODE_ATTACH_REALCREDS` check (see `ptrace(2)`) that can be changed using the `/proc/sys/kernel/yama/ptrace_scope` interface. `pidfd_getfd(2)` is supported since Linux 5.6. + - `page.disposition=shared-fd` - IPC API uses file descriptor duplication, which requires the `pidfd_getfd(2)` system call to obtain a duplicate of another process's file descriptor. This system call is supported since Linux 5.6. Required permission ("restricted ptrace") is governed by the `PTRACE_MODE_ATTACH_REALCREDS` check (see `ptrace(2)`). To allow file descriptor duplication in a binary that opens IPC handle, you can call `prctl(PR_SET_PTRACER, ...)` in the producer binary that gets the IPC handle. Alternatively you can change the `ptrace_scope` globally in the system, e.g.: `sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope"`. **Size threshold** diff --git a/examples/ipc_ipcapi/ipc_ipcapi_anon_fd.sh b/examples/ipc_ipcapi/ipc_ipcapi_anon_fd.sh index 615271eeb..2eb9409da 100755 --- a/examples/ipc_ipcapi/ipc_ipcapi_anon_fd.sh +++ b/examples/ipc_ipcapi/ipc_ipcapi_anon_fd.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -16,16 +16,8 @@ PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) # to obtain a duplicate of another process's file descriptor. # Permission to duplicate another process's file descriptor # is governed by a ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check (see ptrace(2)) -# that can be changed using the /proc/sys/kernel/yama/ptrace_scope interface. -PTRACE_SCOPE_FILE="/proc/sys/kernel/yama/ptrace_scope" -VAL=0 -if [ -f $PTRACE_SCOPE_FILE ]; then - PTRACE_SCOPE_VAL=$(cat $PTRACE_SCOPE_FILE) - if [ $PTRACE_SCOPE_VAL -ne $VAL ]; then - echo "SKIP: ptrace_scope is not set to 0 (classic ptrace permissions) - skipping the test" - exit 125 # skip code defined in CMakeLists.txt - fi -fi +# In the producer binary used in this example prctl(PR_SET_PTRACER, getppid()) is used +# to allow consumer to duplicate file descriptor of producer. UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" diff --git a/examples/ipc_ipcapi/ipc_ipcapi_producer.c b/examples/ipc_ipcapi/ipc_ipcapi_producer.c index 4157e8284..9082302ac 100644 --- a/examples/ipc_ipcapi/ipc_ipcapi_producer.c +++ b/examples/ipc_ipcapi/ipc_ipcapi_producer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -69,6 +70,21 @@ int main(int argc, char *argv[]) { int port = atoi(argv[1]); + // The prctl() function with PR_SET_PTRACER is used here to allow parent process and its children + // to ptrace the current process. This is necessary because UMF's memory providers on Linux (except CUDA) + // use the pidfd_getfd(2) system call to duplicate another process's file descriptor, which is + // governed by ptrace permissions. By default on Ubuntu /proc/sys/kernel/yama/ptrace_scope is + // set to 1 ("restricted ptrace"), which prevents pidfd_getfd from working unless ptrace_scope + // is set to 0. + // To overcome this limitation without requiring users to change the ptrace_scope + // setting (which requires root privileges), we use prctl() to allow the consumer process + // to copy producer's file descriptor, even when ptrace_scope is set to 1. + ret = prctl(PR_SET_PTRACER, getppid()); + if (ret == -1) { + perror("PR_SET_PTRACER may be not supported. prctl() call failed"); + goto err_end; + } + umf_memory_provider_handle_t OS_memory_provider = NULL; umf_os_memory_provider_params_handle_t os_params = NULL; enum umf_result_t umf_result; @@ -259,6 +275,7 @@ int main(int argc, char *argv[]) { err_destroy_OS_params: umfOsMemoryProviderParamsDestroy(os_params); +err_end: if (ret == 0) { fprintf(stderr, "[producer] Shutting down (status OK) ...\n"); } else if (ret == 1) { diff --git a/scripts/qemu/run-tests.sh b/scripts/qemu/run-tests.sh index 9d855590b..341e2f9ab 100755 --- a/scripts/qemu/run-tests.sh +++ b/scripts/qemu/run-tests.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -23,8 +23,6 @@ UMF_DIR=$(pwd) # Drop caches, restores free memory on NUMA nodes echo password | sudo sync; echo password | sudo sh -c "/usr/bin/echo 3 > /proc/sys/vm/drop_caches" -# Set ptrace value for IPC test -echo password | sudo bash -c "echo 0 > /proc/sys/kernel/yama/ptrace_scope" numactl -H diff --git a/src/utils/utils_posix_common.c b/src/utils/utils_posix_common.c index 4a60cbb1f..613b8ea41 100644 --- a/src/utils/utils_posix_common.c +++ b/src/utils/utils_posix_common.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -91,9 +91,8 @@ umf_result_t utils_duplicate_fd(int pid, int fd_in, int *fd_out) { return UMF_RESULT_ERROR_NOT_SUPPORTED; #else // pidfd_getfd(2) is used to obtain a duplicate of another process's file descriptor. - // Permission to duplicate another process's file descriptor - // is governed by a ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check (see ptrace(2)) - // that can be changed using the /proc/sys/kernel/yama/ptrace_scope interface. + // Calling prctl(PR_SET_PTRACER, getppid()) in a producer binary that creates IPC handle + // allows file descriptor duplication for parent process and its children. // pidfd_getfd(2) is supported since Linux 5.6 // pidfd_open(2) is supported since Linux 5.3 errno = 0; diff --git a/test/common/ipc_common.c b/test/common/ipc_common.c index 140927079..1590dd3c4 100644 --- a/test/common/ipc_common.c +++ b/test/common/ipc_common.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -336,6 +337,12 @@ int run_producer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params, int producer_socket = -1; char consumer_message[MSG_SIZE]; + ret = prctl(PR_SET_PTRACER, getppid()); + if (ret == -1) { + perror("PR_SET_PTRACER may be not supported. prctl() call failed"); + goto err_end; + } + // create OS memory provider umf_result = umfMemoryProviderCreate(provider_ops, provider_params, &provider); @@ -528,6 +535,7 @@ int run_producer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params, err_umfMemoryProviderDestroy: umfMemoryProviderDestroy(provider); +err_end: if (ret == 0) { fprintf(stderr, "[producer] Shutting down (status OK) ...\n"); } else if (ret == 1) { diff --git a/test/ipc_os_prov_anon_fd.sh b/test/ipc_os_prov_anon_fd.sh index c5738e989..a42d820a2 100755 --- a/test/ipc_os_prov_anon_fd.sh +++ b/test/ipc_os_prov_anon_fd.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -12,21 +12,6 @@ set -e # port should be a number from the range <1024, 65535> PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) -# The ipc_os_prov_anon_fd example requires using pidfd_getfd(2) -# to obtain a duplicate of another process's file descriptor. -# Permission to duplicate another process's file descriptor -# is governed by a ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check (see ptrace(2)) -# that can be changed using the /proc/sys/kernel/yama/ptrace_scope interface. -PTRACE_SCOPE_FILE="/proc/sys/kernel/yama/ptrace_scope" -VAL=0 -if [ -f $PTRACE_SCOPE_FILE ]; then - PTRACE_SCOPE_VAL=$(cat $PTRACE_SCOPE_FILE) - if [ $PTRACE_SCOPE_VAL -ne $VAL ]; then - echo "SKIP: ptrace_scope is not set to 0 (classic ptrace permissions) - skipping the test" - exit 125 # skip code defined in CMakeLists.txt - fi -fi - UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" echo "Starting ipc_os_prov_anon_fd CONSUMER on port $PORT ..." diff --git a/test/providers/ipc_level_zero_prov.sh b/test/providers/ipc_level_zero_prov.sh index d6bcef4f3..4d2967725 100755 --- a/test/providers/ipc_level_zero_prov.sh +++ b/test/providers/ipc_level_zero_prov.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -12,21 +12,6 @@ set -e # port should be a number from the range <1024, 65535> PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) -# The ipc_level_zero_prov test requires using pidfd_getfd(2) -# to obtain a duplicate of another process's file descriptor. -# Permission to duplicate another process's file descriptor -# is governed by a ptrace access mode PTRACE_MODE_ATTACH_REALCREDS check (see ptrace(2)) -# that can be changed using the /proc/sys/kernel/yama/ptrace_scope interface. -PTRACE_SCOPE_FILE="/proc/sys/kernel/yama/ptrace_scope" -VAL=0 -if [ -f $PTRACE_SCOPE_FILE ]; then - PTRACE_SCOPE_VAL=$(cat $PTRACE_SCOPE_FILE) - if [ $PTRACE_SCOPE_VAL -ne $VAL ]; then - echo "SKIP: ptrace_scope is not set to 0 (classic ptrace permissions) - skipping the test" - exit 125 # skip code defined in CMakeLists.txt - fi -fi - UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" echo "Starting ipc_level_zero_prov CONSUMER on port $PORT ..." From 0edc541cb3e749cea56048da8a9c4d95ca4219b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Mon, 20 Jan 2025 12:41:12 +0100 Subject: [PATCH 090/466] Order entries in libumf.def for 0.11 version this change is purely aesthetic. --- src/libumf.def | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libumf.def b/src/libumf.def index 42d7cfaf3..090b3a86f 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -31,9 +31,6 @@ EXPORTS umfFileMemoryProviderParamsSetPath umfFileMemoryProviderParamsSetProtection umfFileMemoryProviderParamsSetVisibility - umfFixedMemoryProviderOps - umfFixedMemoryProviderParamsCreate - umfFixedMemoryProviderParamsDestroy umfFree umfGetIPCHandle umfGetLastFailedMemoryProvider @@ -121,4 +118,7 @@ EXPORTS umfScalablePoolParamsSetGranularity umfScalablePoolParamsSetKeepAllMemory ; Added in UMF_0.11 + umfFixedMemoryProviderOps + umfFixedMemoryProviderParamsCreate + umfFixedMemoryProviderParamsDestroy umfLevelZeroMemoryProviderParamsSetFreePolicy From ade44f56dc5296aa8c69b4618e752fd9fc4e59f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Mon, 20 Jan 2025 18:55:30 +0100 Subject: [PATCH 091/466] Update RELEASE_STEPS with dev tags on main --- RELEASE_STEPS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/RELEASE_STEPS.md b/RELEASE_STEPS.md index 09a972598..9cac9f815 100644 --- a/RELEASE_STEPS.md +++ b/RELEASE_STEPS.md @@ -49,6 +49,10 @@ Do changes for a release: - If stable branch for this release is required, create it: - `git checkout -b v$VER.x` - For some early versions (like `0.1.0`) we may omit creation of the branch +- For major/minor release, when release is done, add an extra "dev" tag on the `main` branch: + - `git tag -a -s -m "Development version $VERSION+1" v$VERSION+1-dev` + - for example, when `v0.1.0` is released, the dev tag would be `v0.2.0-dev` + - This way, the `main` branch will introduce itself as the next version ## Publish changes From eb7c46d51b580b64e0c84e05c152433c2c821aa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Tue, 21 Jan 2025 10:36:59 +0100 Subject: [PATCH 092/466] [CMake] Allow '-devX' tags -devX tag may be required when e.g. a first -dev tag was "replaced" with new patch release incoming from stable branch into main. E.g., at this moment, the main branch is introduced as v0.10.1-... instead of v0.11.0-dev... and this is not desired. --- RELEASE_STEPS.md | 1 + cmake/helpers.cmake | 12 ++++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/RELEASE_STEPS.md b/RELEASE_STEPS.md index 9cac9f815..92c38c79d 100644 --- a/RELEASE_STEPS.md +++ b/RELEASE_STEPS.md @@ -52,6 +52,7 @@ Do changes for a release: - For major/minor release, when release is done, add an extra "dev" tag on the `main` branch: - `git tag -a -s -m "Development version $VERSION+1" v$VERSION+1-dev` - for example, when `v0.1.0` is released, the dev tag would be `v0.2.0-dev` + - if needed, further in time, an extra dev tag can be introduced, e.g. `v0.2.0-dev1` - This way, the `main` branch will introduce itself as the next version ## Publish changes diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 2d14e2f45..d6f12031d 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -121,12 +121,12 @@ function(set_version_variables) return() endif() - # v1.5.0-dev - we're on a development tag -> UMF ver: "1.5.0-dev" - string(REGEX MATCHALL "\^v([0-9]+\.[0-9]+\.[0-9]+)-dev\$" MATCHES + # v1.5.0-dev1 - we're on a development tag -> UMF ver: "1.5.0-dev1" + string(REGEX MATCHALL "\^v([0-9]+\.[0-9]+\.[0-9]+)-(dev[0-9]?)\$" MATCHES ${GIT_VERSION}) if(MATCHES) set(UMF_VERSION - "${CMAKE_MATCH_1}-dev" + "${CMAKE_MATCH_1}-${CMAKE_MATCH_2}" PARENT_SCOPE) set(UMF_CMAKE_VERSION "${CMAKE_MATCH_1}" @@ -157,12 +157,12 @@ function(set_version_variables) return() endif() - # v1.5.0-dev-19-gb8f7a32 -> UMF ver: "1.5.0-dev.git19.gb8f7a32" - string(REGEX MATCHALL "v([0-9.]*)-dev-([0-9]*)-([0-9a-g]*)" MATCHES + # v1.5.0-dev2-19-gb8f7a32 -> UMF ver: "1.5.0-dev2.git19.gb8f7a32" + string(REGEX MATCHALL "v([0-9.]*)-(dev[0-9]?)-([0-9]*)-([0-9a-g]*)" MATCHES ${GIT_VERSION}) if(MATCHES) set(UMF_VERSION - "${CMAKE_MATCH_1}-dev.git${CMAKE_MATCH_2}.${CMAKE_MATCH_3}" + "${CMAKE_MATCH_1}-${CMAKE_MATCH_2}.git${CMAKE_MATCH_3}.${CMAKE_MATCH_4}" PARENT_SCOPE) set(UMF_CMAKE_VERSION "${CMAKE_MATCH_1}" From 8437b9048635eb77c623e23732805a3e921216de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 15 Jan 2025 15:54:56 +0100 Subject: [PATCH 093/466] [CMake] Minor cleanups in top-level CMake mostly adding new "section" comments and a misspell fix. --- CMakeLists.txt | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 70ac08799..05f15d423 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,10 @@ list(APPEND CMAKE_MODULE_PATH "${UMF_CMAKE_SOURCE_DIR}/cmake") # Use full path of the helpers module (to omit potential conflicts with others) include(${UMF_CMAKE_SOURCE_DIR}/cmake/helpers.cmake) +# --------------------------------------------------------------------------- # +# Set UMF version variables, define project, and add basic modules +# --------------------------------------------------------------------------- # + # We use semver aligned version, set via git tags. We parse git output to # establish the version of UMF to be used in CMake, Win dll's, and within the # code (e.g. in logger). We have 3-component releases (e.g. 1.5.1) plus release @@ -33,6 +37,10 @@ include(CMakePackageConfigHelpers) include(GNUInstallDirs) find_package(PkgConfig) +# --------------------------------------------------------------------------- # +# Set UMF build options (and CACHE variables) +# --------------------------------------------------------------------------- # + # Define a list to store the names of all options set(UMF_OPTIONS_LIST "") list(APPEND UMF_OPTIONS_LIST CMAKE_BUILD_TYPE) @@ -43,7 +51,6 @@ macro(umf_option) option(${ARGV}) endmacro() -# Build Options umf_option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) umf_option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON) umf_option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON) @@ -56,9 +63,8 @@ umf_option(UMF_BUILD_GPU_TESTS "Build UMF GPU tests" OFF) umf_option(UMF_BUILD_BENCHMARKS "Build UMF benchmarks" OFF) umf_option(UMF_BUILD_BENCHMARKS_MT "Build UMF multithreaded benchmarks" OFF) umf_option(UMF_BUILD_EXAMPLES "Build UMF examples" ON) -umf_option(UMF_BUILD_FUZZTESTS "Build UMF fuzz tests" OFF) umf_option(UMF_BUILD_GPU_EXAMPLES "Build UMF GPU examples" OFF) -umf_option(UMF_DEVELOPER_MODE "Enable additional developer checks" OFF) +umf_option(UMF_BUILD_FUZZTESTS "Build UMF fuzz tests" OFF) umf_option( UMF_DISABLE_HWLOC "Disable hwloc and UMF features requiring it (OS provider, memtargets, topology discovery)" @@ -67,9 +73,6 @@ umf_option( UMF_LINK_HWLOC_STATICALLY "Link UMF with HWLOC library statically (proxy library will be disabled on Windows+Debug build)" OFF) -umf_option( - UMF_FORMAT_CODE_STYLE - "Add clang, cmake, and black -format-check and -format-apply targets" OFF) set(UMF_HWLOC_NAME "hwloc" CACHE STRING "Custom name for hwloc library w/o extension") @@ -81,6 +84,10 @@ set(UMF_INSTALL_RPATH "Set the runtime search path to the directory with dependencies (e.g. hwloc)" ) +umf_option(UMF_DEVELOPER_MODE "Enable additional developer checks" OFF) +umf_option( + UMF_FORMAT_CODE_STYLE + "Add clang, cmake, and black -format-check and -format-apply targets" OFF) # Only a part of skips is treated as a failure now. TODO: extend to all tests umf_option(UMF_TESTS_FAIL_ON_SKIP "Treat skips in tests as fail" OFF) umf_option(UMF_USE_ASAN "Enable AddressSanitizer checks" OFF) @@ -100,6 +107,11 @@ set_property(CACHE UMF_PROXY_LIB_BASED_ON_POOL PROPERTY STRINGS ${KNOWN_PROXY_LIB_POOLS}) list(APPEND UMF_OPTIONS_LIST UMF_PROXY_LIB_BASED_ON_POOL) +# --------------------------------------------------------------------------- # +# Setup required variables, definitions; fetch dependencies; include +# sub_directories based on build options; set flags; etc. +# --------------------------------------------------------------------------- # + if(UMF_BUILD_TESTS AND DEFINED ENV{CI} AND NOT UMF_TESTS_FAIL_ON_SKIP) @@ -711,7 +723,7 @@ if(UMF_FORMAT_CODE_STYLE) add_custom_target( cmake-format-apply COMMAND ${CMAKE_FORMAT} --in-place ${format_cmake_list} - COMMENT "Format Cmake files using cmake-format") + COMMENT "Format CMake files using cmake-format") endif() if(BLACK) From fd5a50ac17eded5f41a40fb6f523918740cd7702 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 16 Jan 2025 15:44:02 +0100 Subject: [PATCH 094/466] Unify 'Level Zero' spelling It should be either 'L0' or 'Level Zero' --- benchmark/ubench.c | 4 ++-- examples/CMakeLists.txt | 4 ++-- examples/ipc_level_zero/ipc_level_zero.c | 4 ++-- examples/level_zero_shared_memory/level_zero_shared_memory.c | 4 ++-- test/CMakeLists.txt | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/benchmark/ubench.c b/benchmark/ubench.c index 845dc881d..dfd28ea1f 100644 --- a/benchmark/ubench.c +++ b/benchmark/ubench.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -452,7 +452,7 @@ static int create_level_zero_params(ze_context_handle_t *context, int ret = utils_ze_init_level_zero(); if (ret != 0) { - fprintf(stderr, "Failed to init Level 0!\n"); + fprintf(stderr, "Failed to init Level Zero!\n"); return ret; } diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 986ad5641..5911a073f 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -154,7 +154,7 @@ if(UMF_BUILD_GPU_EXAMPLES else() message( STATUS - "IPC Level 0 example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" + "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" ) endif() diff --git a/examples/ipc_level_zero/ipc_level_zero.c b/examples/ipc_level_zero/ipc_level_zero.c index 9579244ab..87dbbd022 100644 --- a/examples/ipc_level_zero/ipc_level_zero.c +++ b/examples/ipc_level_zero/ipc_level_zero.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -104,7 +104,7 @@ int main(void) { const size_t BUFFER_PATTERN = 0x42; int ret = init_level_zero(); if (ret != 0) { - fprintf(stderr, "ERROR: Failed to init Level 0!\n"); + fprintf(stderr, "ERROR: Failed to init Level Zero!\n"); return ret; } diff --git a/examples/level_zero_shared_memory/level_zero_shared_memory.c b/examples/level_zero_shared_memory/level_zero_shared_memory.c index b0f646861..d4c49b8a0 100644 --- a/examples/level_zero_shared_memory/level_zero_shared_memory.c +++ b/examples/level_zero_shared_memory/level_zero_shared_memory.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -27,7 +27,7 @@ int main(void) { // Initialize Level Zero int ret = init_level_zero(); if (ret != 0) { - fprintf(stderr, "Failed to init Level 0!\n"); + fprintf(stderr, "Failed to init Level Zero!\n"); return ret; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7eed07e09..64cbb339c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -732,7 +732,7 @@ if(LINUX else() message( STATUS - "IPC Level 0 example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" + "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" ) endif() From fac88da8cb5fde28234add321283f1d1f5a16edf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 16 Jan 2025 15:25:08 +0100 Subject: [PATCH 095/466] [CMake] Add warning if UMF version is set to 0.0.0 Improper version set in CMake means, our lib will be wrongly configured and libumf.so.0.0.0 will be produced... which most likely is not expected (unless it's a developer's build). --- CMakeLists.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 05f15d423..ac7bcbf93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,6 +26,12 @@ project( umf VERSION ${UMF_CMAKE_VERSION} LANGUAGES C) +if(UMF_CMAKE_VERSION VERSION_EQUAL "0.0.0") + message( + WARNING + "UMF version is set to 0.0.0, which most likely is not expected! " + "Please checkout the git tags to get a proper version.") +endif() if(PROJECT_VERSION_PATCH GREATER 0) # set extra variable for Windows dll metadata From 079ecefa222c419747761c2f16ebceddc62d03d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 16 Jan 2025 15:40:20 +0100 Subject: [PATCH 096/466] [CMake] Add extra info in .cmake-format for 'build_umf_test' and 'add_umf_ipc_test'. It's for proper parsing of CMake files. --- .cmake-format | 35 ++++++---- test/CMakeLists.txt | 155 ++++++++++++++++---------------------------- 2 files changed, 80 insertions(+), 110 deletions(-) diff --git a/.cmake-format b/.cmake-format index 57ad821ef..c1a8e85a8 100644 --- a/.cmake-format +++ b/.cmake-format @@ -9,31 +9,31 @@ with section("parse"): "pargs": 0, "flags": [], 'kwargs': { - 'NAME': '*', - 'SRCS': '*', - 'LIBS': '*' , + 'NAME': '*', + 'SRCS': '*', + 'LIBS': '*' , 'LIBDIRS': '*'}}, 'add_umf_executable': { "pargs": 0, "flags": [], 'kwargs': { - 'NAME': '*', - 'SRCS': '*', + 'NAME': '*', + 'SRCS': '*', 'LIBS': '*'}}, 'add_umf_test': { "pargs": 0, "flags": [], 'kwargs': { - 'NAME': '*', - 'SRCS': '*', + 'NAME': '*', + 'SRCS': '*', 'LIBS': '*'}}, 'add_umf_library': { "pargs": 0, "flags": [], 'kwargs': { - 'NAME': '*', - 'TYPE': '*', - 'SRCS': '*', + 'NAME': '*', + 'TYPE': '*', + 'SRCS': '*', 'LIBS': '*', 'LINUX_MAP_FILE': '*', 'WINDOWS_DEF_FILE': '*'}}, @@ -43,7 +43,20 @@ with section("parse"): 'kwargs': { 'LABELS': '*', 'PASS_REGULAR_EXPRESSION': '*'}}, - } + 'build_umf_test': { + "pargs": 0, + "flags": [], + 'kwargs': { + 'NAME': '*', + 'SRCS': '*', + 'LIBS': '*' }}, + 'add_umf_ipc_test': { + "pargs": 0, + "flags": [], + 'kwargs': { + 'TEST': '*', + 'SRC_DIR': '*'}}, + } # Override configurations per-command where available override_spec = {} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 64cbb339c..918e874c6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -31,8 +31,11 @@ set(UMF_TEST_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(UMF_UTILS_DIR ${UMF_CMAKE_SOURCE_DIR}/src/utils) function(build_umf_test) - # Parameters: * NAME - a name of the test * SRCS - source files * LIBS - - # libraries to be linked with + # Parameters: + # + # * NAME - a name of the test + # * SRCS - source files + # * LIBS - libraries to be linked with set(oneValueArgs NAME) set(multiValueArgs SRCS LIBS) cmake_parse_arguments( @@ -108,8 +111,11 @@ function(build_umf_test) endfunction() function(add_umf_test) - # Parameters: * NAME - a name of the test * SRCS - source files * LIBS - - # libraries to be linked with + # Parameters: + # + # * NAME - a name of the test + # * SRCS - source files + # * LIBS - libraries to be linked with set(oneValueArgs NAME) set(multiValueArgs SRCS LIBS) cmake_parse_arguments( @@ -120,12 +126,9 @@ function(add_umf_test) ${ARGN}) build_umf_test( - NAME - ${ARG_NAME} - SRCS - ${ARG_SRCS} - LIBS - ${ARG_LIBS}) + NAME ${ARG_NAME} + SRCS ${ARG_SRCS} + LIBS ${ARG_LIBS}) set(TEST_NAME umf-${ARG_NAME}) set(TEST_TARGET_NAME umf_test-${ARG_NAME}) @@ -491,8 +494,10 @@ add_umf_test( add_umf_test(NAME ipc_negative SRCS ipc_negative.cpp) function(add_umf_ipc_test) - # Parameters: * TEST - a name of the test * SRC_DIR - source files directory - # path + # Parameters: + # + # * TEST - a name of the test + # * SRC_DIR - source files directory path set(oneValueArgs TEST SRC_DIR) cmake_parse_arguments( ARG @@ -526,64 +531,42 @@ endfunction() if(LINUX) if(NOT UMF_DISABLE_HWLOC AND UMF_POOL_SCALABLE_ENABLED) build_umf_test( - NAME - ipc_os_prov_consumer - SRCS - ipc_os_prov_consumer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_os_prov_consumer + SRCS ipc_os_prov_consumer.c common/ipc_common.c + common/ipc_os_prov_common.c) build_umf_test( - NAME - ipc_os_prov_producer - SRCS - ipc_os_prov_producer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_os_prov_producer + SRCS ipc_os_prov_producer.c common/ipc_common.c + common/ipc_os_prov_common.c) add_umf_ipc_test(TEST ipc_os_prov_anon_fd) add_umf_ipc_test(TEST ipc_os_prov_shm) if(UMF_PROXY_LIB_ENABLED AND UMF_BUILD_SHARED_LIBRARY) build_umf_test( - NAME - ipc_os_prov_proxy - SRCS - ipc_os_prov_proxy.c - common/ipc_common.c - LIBS - ${UMF_UTILS_FOR_TEST}) + NAME ipc_os_prov_proxy + SRCS ipc_os_prov_proxy.c common/ipc_common.c + LIBS ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_os_prov_proxy) endif() build_umf_test( - NAME - ipc_devdax_prov_consumer - SRCS - ipc_devdax_prov_consumer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_devdax_prov_consumer + SRCS ipc_devdax_prov_consumer.c common/ipc_common.c + common/ipc_os_prov_common.c) build_umf_test( - NAME - ipc_devdax_prov_producer - SRCS - ipc_devdax_prov_producer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_devdax_prov_producer + SRCS ipc_devdax_prov_producer.c common/ipc_common.c + common/ipc_os_prov_common.c) add_umf_ipc_test(TEST ipc_devdax_prov) build_umf_test( - NAME - ipc_file_prov_consumer - SRCS - ipc_file_prov_consumer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_file_prov_consumer + SRCS ipc_file_prov_consumer.c common/ipc_common.c + common/ipc_os_prov_common.c) build_umf_test( - NAME - ipc_file_prov_producer - SRCS - ipc_file_prov_producer.c - common/ipc_common.c - common/ipc_os_prov_common.c) + NAME ipc_file_prov_producer + SRCS ipc_file_prov_producer.c common/ipc_common.c + common/ipc_os_prov_common.c) add_umf_ipc_test(TEST ipc_file_prov) add_umf_ipc_test(TEST ipc_file_prov_fsdax) endif() @@ -594,29 +577,17 @@ if(LINUX) AND UMF_BUILD_LEVEL_ZERO_PROVIDER AND UMF_BUILD_LIBUMF_POOL_DISJOINT) build_umf_test( - NAME - ipc_level_zero_prov_consumer - SRCS - providers/ipc_level_zero_prov_consumer.c - common/ipc_common.c - providers/ipc_level_zero_prov_common.c - ${UMF_UTILS_DIR}/utils_level_zero.cpp - LIBS - ze_loader - disjoint_pool - ${UMF_UTILS_FOR_TEST}) + NAME ipc_level_zero_prov_consumer + SRCS providers/ipc_level_zero_prov_consumer.c common/ipc_common.c + providers/ipc_level_zero_prov_common.c + ${UMF_UTILS_DIR}/utils_level_zero.cpp + LIBS ze_loader disjoint_pool ${UMF_UTILS_FOR_TEST}) build_umf_test( - NAME - ipc_level_zero_prov_producer - SRCS - providers/ipc_level_zero_prov_producer.c - common/ipc_common.c - providers/ipc_level_zero_prov_common.c - ${UMF_UTILS_DIR}/utils_level_zero.cpp - LIBS - ze_loader - disjoint_pool - ${UMF_UTILS_FOR_TEST}) + NAME ipc_level_zero_prov_producer + SRCS providers/ipc_level_zero_prov_producer.c common/ipc_common.c + providers/ipc_level_zero_prov_common.c + ${UMF_UTILS_DIR}/utils_level_zero.cpp + LIBS ze_loader disjoint_pool ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_level_zero_prov SRC_DIR providers) endif() @@ -624,29 +595,15 @@ if(LINUX) AND UMF_BUILD_CUDA_PROVIDER AND UMF_BUILD_LIBUMF_POOL_DISJOINT) build_umf_test( - NAME - ipc_cuda_prov_consumer - SRCS - providers/ipc_cuda_prov_consumer.c - common/ipc_common.c - providers/ipc_cuda_prov_common.c - providers/cuda_helpers.cpp - LIBS - cuda - disjoint_pool - ${UMF_UTILS_FOR_TEST}) + NAME ipc_cuda_prov_consumer + SRCS providers/ipc_cuda_prov_consumer.c common/ipc_common.c + providers/ipc_cuda_prov_common.c providers/cuda_helpers.cpp + LIBS cuda disjoint_pool ${UMF_UTILS_FOR_TEST}) build_umf_test( - NAME - ipc_cuda_prov_producer - SRCS - providers/ipc_cuda_prov_producer.c - common/ipc_common.c - providers/ipc_cuda_prov_common.c - providers/cuda_helpers.cpp - LIBS - cuda - disjoint_pool - ${UMF_UTILS_FOR_TEST}) + NAME ipc_cuda_prov_producer + SRCS providers/ipc_cuda_prov_producer.c common/ipc_common.c + providers/ipc_cuda_prov_common.c providers/cuda_helpers.cpp + LIBS cuda disjoint_pool ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_cuda_prov SRC_DIR providers) endif() else() From 7dc14223e4d9b31b61696284e561d22fdefd182b Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 21 Jan 2025 09:26:46 +0100 Subject: [PATCH 097/466] add CUDA multi context test --- test/providers/cuda_helpers.cpp | 9 +++-- test/providers/cuda_helpers.h | 4 +- test/providers/provider_cuda.cpp | 67 +++++++++++++++++++++++++++++++- 3 files changed, 75 insertions(+), 5 deletions(-) diff --git a/test/providers/cuda_helpers.cpp b/test/providers/cuda_helpers.cpp index 9c41d9382..bed9906c0 100644 --- a/test/providers/cuda_helpers.cpp +++ b/test/providers/cuda_helpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -251,7 +251,7 @@ int InitCUDAOps() { } #endif // USE_DLOPEN -static CUresult set_context(CUcontext required_ctx, CUcontext *restore_ctx) { +CUresult set_context(CUcontext required_ctx, CUcontext *restore_ctx) { CUcontext current_ctx = NULL; CUresult cu_result = libcu_ops.cuCtxGetCurrent(¤t_ctx); if (cu_result != CUDA_SUCCESS) { @@ -259,7 +259,10 @@ static CUresult set_context(CUcontext required_ctx, CUcontext *restore_ctx) { return cu_result; } - *restore_ctx = current_ctx; + if (restore_ctx != NULL) { + *restore_ctx = current_ctx; + } + if (current_ctx != required_ctx) { cu_result = libcu_ops.cuCtxSetCurrent(required_ctx); if (cu_result != CUDA_SUCCESS) { diff --git a/test/providers/cuda_helpers.h b/test/providers/cuda_helpers.h index fc06c1fcf..65f4fbbf5 100644 --- a/test/providers/cuda_helpers.h +++ b/test/providers/cuda_helpers.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -30,6 +30,8 @@ int get_cuda_device(CUdevice *device); int create_context(CUdevice device, CUcontext *context); +CUresult set_context(CUcontext required_ctx, CUcontext *restore_ctx); + int destroy_context(CUcontext context); int cuda_fill(CUcontext context, CUdevice device, void *ptr, size_t size, diff --git a/test/providers/provider_cuda.cpp b/test/providers/provider_cuda.cpp index 4f1d35911..8a7fdd28a 100644 --- a/test/providers/provider_cuda.cpp +++ b/test/providers/provider_cuda.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -315,6 +315,71 @@ TEST_P(umfCUDAProviderTest, cudaProviderNullParams) { EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); } +TEST_P(umfCUDAProviderTest, multiContext) { + CUdevice device; + int ret = get_cuda_device(&device); + ASSERT_EQ(ret, 0); + + // create two CUDA contexts and two providers + CUcontext ctx1, ctx2; + ret = create_context(device, &ctx1); + ASSERT_EQ(ret, 0); + ret = create_context(device, &ctx2); + ASSERT_EQ(ret, 0); + + cuda_params_unique_handle_t params1 = + create_cuda_prov_params(ctx1, device, UMF_MEMORY_TYPE_HOST); + ASSERT_NE(params1, nullptr); + umf_memory_provider_handle_t provider1; + umf_result_t umf_result = umfMemoryProviderCreate( + umfCUDAMemoryProviderOps(), params1.get(), &provider1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider1, nullptr); + + cuda_params_unique_handle_t params2 = + create_cuda_prov_params(ctx2, device, UMF_MEMORY_TYPE_HOST); + ASSERT_NE(params2, nullptr); + umf_memory_provider_handle_t provider2; + umf_result = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), + params2.get(), &provider2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider2, nullptr); + + // use the providers + // allocate from 1, then from 2, then free 1, then free 2 + void *ptr1, *ptr2; + const int size = 128; + // NOTE: we use ctx1 here + umf_result = umfMemoryProviderAlloc(provider1, size, 0, &ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr1, nullptr); + + // NOTE: we use ctx2 here + umf_result = umfMemoryProviderAlloc(provider2, size, 0, &ptr2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr2, nullptr); + + // even if we change the context, we should be able to free the memory + ret = set_context(ctx2, NULL); + ASSERT_EQ(ret, 0); + // free memory from ctx1 + umf_result = umfMemoryProviderFree(provider1, ptr1, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ret = set_context(ctx1, NULL); + ASSERT_EQ(ret, 0); + umf_result = umfMemoryProviderFree(provider2, ptr2, size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // cleanup + umfMemoryProviderDestroy(provider2); + umfMemoryProviderDestroy(provider1); + ret = destroy_context(ctx1); + ASSERT_EQ(ret, 0); + ret = destroy_context(ctx2); + ASSERT_EQ(ret, 0); +} + // TODO add tests that mixes CUDA Memory Provider and Disjoint Pool CUDATestHelper cudaTestHelper; From 4acb4e97db7b08a33a218106f50f0827d6473f44 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 22 Jan 2025 13:16:45 +0100 Subject: [PATCH 098/466] set/restore valid context in CUDA provider free --- src/provider/provider_cuda.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index 350bd016f..7a7b0a467 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -433,6 +433,14 @@ static umf_result_t cu_memory_provider_free(void *provider, void *ptr, cu_memory_provider_t *cu_provider = (cu_memory_provider_t *)provider; + // Remember current context and set the one from the provider + CUcontext restore_ctx = NULL; + umf_result_t umf_result = set_context(cu_provider->context, &restore_ctx); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("Failed to set CUDA context, ret = %d", umf_result); + return umf_result; + } + CUresult cu_result = CUDA_SUCCESS; switch (cu_provider->memory_type) { case UMF_MEMORY_TYPE_HOST: { @@ -451,6 +459,11 @@ static umf_result_t cu_memory_provider_free(void *provider, void *ptr, return UMF_RESULT_ERROR_UNKNOWN; } + umf_result = set_context(restore_ctx, &restore_ctx); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("Failed to restore CUDA context, ret = %d", umf_result); + } + return cu2umf_result(cu_result); } From e39e455ce994be76d324091c50f5f34f194b4740 Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Mon, 20 Jan 2025 11:47:25 +0100 Subject: [PATCH 099/466] Test static hwloc linking on macos --- .github/workflows/reusable_basic.yml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 25d33e2b3..22bf0ea50 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -453,6 +453,9 @@ jobs: strategy: matrix: os: ['macos-13', 'macos-14'] + include: + - os: macos-14 + static_hwloc: '-DUMF_LINK_HWLOC_STATICALLY=ON' env: BUILD_TYPE : "Release" runs-on: ${{matrix.os}} @@ -471,8 +474,12 @@ jobs: echo "$PATH" >> $GITHUB_PATH python3 -m pip install -r third_party/requirements.txt + - name: Install dependencies + run: brew install jemalloc tbb automake libtool + - name: Install hwloc - run: brew install hwloc tbb automake + if: ${{ !matrix.static_hwloc }} + run: brew install hwloc - name: Get UMF version run: | @@ -492,6 +499,7 @@ jobs: -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_SHARED_LIBRARY=ON -DUMF_TESTS_FAIL_ON_SKIP=ON + ${{matrix.static_hwloc}} - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} -j $(sysctl -n hw.logicalcpu) From 4a3cd185c4c0803396b095b9bcd416c52d74b253 Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Mon, 20 Jan 2025 16:05:28 +0100 Subject: [PATCH 100/466] Add dependent macOS libraries for hwloc build --- src/CMakeLists.txt | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index fe05ef0b7..c0072be7e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -182,6 +182,18 @@ add_dependencies(umf coarse) if(UMF_LINK_HWLOC_STATICALLY) add_dependencies(umf ${UMF_HWLOC_NAME}) + # On Darwin, link with the IOKit and Foundation frameworks, if they are + # available in the system. This is to comply with hwloc which links these, + # if available. There is no option to disable these frameworks on Darwin + # hwloc builds. + if(MACOSX) + find_library(IOKIT_LIBRARY IOKit) + find_library(FOUNDATION_LIBRARY Foundation) + if(IOKIT_LIBRARY OR FOUNDATION_LIBRARY) + target_link_libraries(umf PRIVATE ${IOKIT_LIBRARY} + ${FOUNDATION_LIBRARY}) + endif() + endif() endif() if(NOT WINDOWS AND UMF_POOL_JEMALLOC_ENABLED) From 2e534658da794ea2f8ec07788d290f3c47697f45 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 23 Jan 2025 09:04:40 +0100 Subject: [PATCH 101/466] Fix disjoint_pool unit tests The UMF tests should not use the default system allocator to allocate memory, because they may not work correctly under the UMF proxy library when they and the proxy library are dynamically linked with the same libumf.so file. In such case the same tracker is used in both: the test and the proxy library, so there may be conflicts of memory allocations. An allocation made originally by the the default system allocator in the test is first added to the tracker by the proxy library (which replaced the system allocator) and then the test itself tries to add the same pointer to the same tracker in the tracking provider of the pool that had made this allocation. Fixes: #240 Signed-off-by: Lukasz Dorau --- test/pools/disjoint_pool.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index c254400db..eadef7270 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -61,14 +61,14 @@ TEST_F(test, freeErrorPropagation) { static umf_result_t expectedResult = UMF_RESULT_SUCCESS; struct memory_provider : public umf_test::provider_base_t { umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { - *ptr = malloc(size); + *ptr = umf_ba_global_alloc(size); return UMF_RESULT_SUCCESS; } umf_result_t free(void *ptr, [[maybe_unused]] size_t size) noexcept { // do the actual free only when we expect the success if (expectedResult == UMF_RESULT_SUCCESS) { - ::free(ptr); + umf_ba_global_free(ptr); } return expectedResult; } @@ -114,12 +114,12 @@ TEST_F(test, sharedLimits) { struct memory_provider : public umf_test::provider_base_t { umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { - *ptr = malloc(size); + *ptr = umf_ba_global_alloc(size); numAllocs++; return UMF_RESULT_SUCCESS; } umf_result_t free(void *ptr, [[maybe_unused]] size_t size) noexcept { - ::free(ptr); + umf_ba_global_free(ptr); numFrees++; return UMF_RESULT_SUCCESS; } From 041981e6df4f600cda10c761804c86dc52cdcac3 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 23 Jan 2025 09:29:25 +0100 Subject: [PATCH 102/466] Add timeout to the add_umf_ipc_example function Signed-off-by: Lukasz Dorau --- examples/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 986ad5641..0fd654654 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -190,6 +190,7 @@ function(add_umf_ipc_example script) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) set_tests_properties(${EXAMPLE_NAME} PROPERTIES LABELS "example") + set_tests_properties(${EXAMPLE_NAME} PROPERTIES TIMEOUT 60) if(NOT UMF_TESTS_FAIL_ON_SKIP) set_tests_properties(${EXAMPLE_NAME} PROPERTIES SKIP_RETURN_CODE 125) endif() From ada63d7e744066953923e04c8a0b0cd2f30757e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Fri, 24 Jan 2025 12:58:01 +0100 Subject: [PATCH 103/466] Revert changes for Win static hwloc 1. "enable building examples on win static hwloc CI" This reverts commit 58ba8e9ff09205dc1ccf4c998485e17ae894e7d5. 2. "fix setting LIBHWLOC_LIBRARIES on Windows" This reverts commit 91f14d7d8039dc07f0a0595f0f2f441e7d526930. Ref. #1016 --- .github/workflows/reusable_basic.yml | 4 ++-- CMakeLists.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 22bf0ea50..02f79bad0 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -338,7 +338,7 @@ jobs: -B ${{env.BUILD_DIR}} -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" -DUMF_BUILD_SHARED_LIBRARY=ON - -DUMF_BUILD_EXAMPLES=ON + -DUMF_BUILD_EXAMPLES=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON @@ -381,7 +381,7 @@ jobs: -B ${{env.BUILD_DIR}} -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" -DUMF_BUILD_SHARED_LIBRARY=OFF - -DUMF_BUILD_EXAMPLES=ON + -DUMF_BUILD_EXAMPLES=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON diff --git a/CMakeLists.txt b/CMakeLists.txt index ac7bcbf93..c24fceb73 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -281,8 +281,8 @@ else() set(LIBHWLOC_INCLUDE_DIRS ${hwloc_targ_SOURCE_DIR}/include;${hwloc_targ_BINARY_DIR}/include) - set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/$) - set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/$/hwloc.lib) + set(LIBHWLOC_LIBRARY_DIRS + ${hwloc_targ_BINARY_DIR}/Release;${hwloc_targ_BINARY_DIR}/Debug) else() include(FetchContent) message( From abf957f39140d576eae30cfab33c942dacce376e Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Thu, 23 Jan 2025 13:46:37 +0100 Subject: [PATCH 104/466] do not use global ctor/dtor for test params --- test/ipcAPI.cpp | 7 +- test/ipcFixtures.hpp | 53 ++++++++++--- test/memoryPoolAPI.cpp | 16 ++-- test/poolFixtures.hpp | 35 ++++++++- test/pools/disjoint_pool.cpp | 74 ++++++++++-------- test/pools/jemalloc_coarse_devdax.cpp | 33 ++++---- test/pools/jemalloc_coarse_file.cpp | 24 +++--- test/pools/jemalloc_pool.cpp | 61 +++++++++++---- test/pools/pool_base_alloc.cpp | 4 +- test/pools/scalable_coarse_devdax.cpp | 35 +++++---- test/pools/scalable_coarse_file.cpp | 24 +++--- test/pools/scalable_pool.cpp | 23 +++--- test/provider_devdax_memory_ipc.cpp | 41 +++++----- test/provider_file_memory_ipc.cpp | 53 +++++++------ test/provider_os_memory.cpp | 43 ++++++----- test/providers/provider_cuda.cpp | 101 +++++++++++++++---------- test/providers/provider_level_zero.cpp | 101 +++++++++++++++---------- 17 files changed, 442 insertions(+), 286 deletions(-) diff --git a/test/ipcAPI.cpp b/test/ipcAPI.cpp index aa22f353d..429896308 100644 --- a/test/ipcAPI.cpp +++ b/test/ipcAPI.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // This file contains tests for UMF pool API @@ -115,5 +115,6 @@ HostMemoryAccessor hostMemoryAccessor; INSTANTIATE_TEST_SUITE_P(umfIpcTestSuite, umfIpcTest, ::testing::Values(ipcTestParams{ - umfProxyPoolOps(), nullptr, &IPC_MOCK_PROVIDER_OPS, - nullptr, &hostMemoryAccessor})); + umfProxyPoolOps(), nullptr, nullptr, + &IPC_MOCK_PROVIDER_OPS, nullptr, nullptr, + &hostMemoryAccessor})); diff --git a/test/ipcFixtures.hpp b/test/ipcFixtures.hpp index fd5260d1b..28369b273 100644 --- a/test/ipcFixtures.hpp +++ b/test/ipcFixtures.hpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -46,23 +46,36 @@ class HostMemoryAccessor : public MemoryAccessor { } }; +typedef void *(*pfnPoolParamsCreate)(); +typedef umf_result_t (*pfnPoolParamsDestroy)(void *); + +typedef void *(*pfnProviderParamsCreate)(); +typedef umf_result_t (*pfnProviderParamsDestroy)(void *); + // ipcTestParams: -// pool_ops, pool_params, provider_ops, provider_params, memoryAccessor +// pool_ops, pfnPoolParamsCreate,pfnPoolParamsDestroy, +// provider_ops, pfnProviderParamsCreate, pfnProviderParamsDestroy, +// memoryAccessor using ipcTestParams = - std::tuple; + std::tuple; struct umfIpcTest : umf_test::test, ::testing::WithParamInterface { umfIpcTest() {} void SetUp() override { test::SetUp(); - auto [pool_ops, pool_params, provider_ops, provider_params, accessor] = + auto [pool_ops, pool_params_create, pool_params_destroy, provider_ops, + provider_params_create, provider_params_destroy, accessor] = this->GetParam(); poolOps = pool_ops; - poolParams = pool_params; + poolParamsCreate = pool_params_create; + poolParamsDestroy = pool_params_destroy; providerOps = provider_ops; - providerParams = provider_params; + providerParamsCreate = provider_params_create; + providerParamsDestroy = provider_params_destroy; memAccessor = accessor; } @@ -74,10 +87,19 @@ struct umfIpcTest : umf_test::test, umf_memory_provider_handle_t hProvider = NULL; umf_memory_pool_handle_t hPool = NULL; + void *providerParams = nullptr; + if (providerParamsCreate) { + providerParams = providerParamsCreate(); + } + auto ret = umfMemoryProviderCreate(providerOps, providerParams, &hProvider); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + if (providerParamsDestroy) { + providerParamsDestroy(providerParams); + } + auto trace = [](void *trace_context, const char *name) { stats_type *stat = static_cast(trace_context); if (std::strcmp(name, "alloc") == 0) { @@ -96,10 +118,19 @@ struct umfIpcTest : umf_test::test, umf_memory_provider_handle_t hTraceProvider = traceProviderCreate(hProvider, true, (void *)&stat, trace); + void *poolParams = nullptr; + if (poolParamsCreate) { + poolParams = poolParamsCreate(); + } + ret = umfPoolCreate(poolOps, hTraceProvider, poolParams, UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + if (poolParamsDestroy) { + poolParamsDestroy(poolParams); + } + return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); } @@ -118,10 +149,14 @@ struct umfIpcTest : umf_test::test, static constexpr int NTHREADS = 10; stats_type stat; MemoryAccessor *memAccessor = nullptr; + umf_memory_pool_ops_t *poolOps = nullptr; - void *poolParams = nullptr; + pfnPoolParamsCreate poolParamsCreate = nullptr; + pfnPoolParamsDestroy poolParamsDestroy = nullptr; + umf_memory_provider_ops_t *providerOps = nullptr; - void *providerParams = nullptr; + pfnProviderParamsCreate providerParamsCreate = nullptr; + pfnProviderParamsDestroy providerParamsDestroy = nullptr; }; TEST_P(umfIpcTest, GetIPCHandleSize) { diff --git a/test/memoryPoolAPI.cpp b/test/memoryPoolAPI.cpp index 0a45bfbe5..e2455fe85 100644 --- a/test/memoryPoolAPI.cpp +++ b/test/memoryPoolAPI.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // This file contains tests for UMF pool API @@ -295,15 +295,17 @@ TEST_F(tagTest, SetAndGetInvalidPool) { INSTANTIATE_TEST_SUITE_P( mallocPoolTest, umfPoolTest, - ::testing::Values(poolCreateExtParams{&MALLOC_POOL_OPS, nullptr, - &UMF_NULL_PROVIDER_OPS, nullptr}, - poolCreateExtParams{umfProxyPoolOps(), nullptr, - &BA_GLOBAL_PROVIDER_OPS, nullptr})); + ::testing::Values(poolCreateExtParams{&MALLOC_POOL_OPS, nullptr, nullptr, + &UMF_NULL_PROVIDER_OPS, nullptr, + nullptr}, + poolCreateExtParams{umfProxyPoolOps(), nullptr, nullptr, + &BA_GLOBAL_PROVIDER_OPS, nullptr, + nullptr})); INSTANTIATE_TEST_SUITE_P(mallocMultiPoolTest, umfMultiPoolTest, ::testing::Values(poolCreateExtParams{ - umfProxyPoolOps(), nullptr, - &BA_GLOBAL_PROVIDER_OPS, nullptr})); + umfProxyPoolOps(), nullptr, nullptr, + &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); INSTANTIATE_TEST_SUITE_P(umfPoolWithCreateFlagsTest, umfPoolWithCreateFlagsTest, ::testing::Values(0, diff --git a/test/poolFixtures.hpp b/test/poolFixtures.hpp index bd97ac1fa..d9a5410c0 100644 --- a/test/poolFixtures.hpp +++ b/test/poolFixtures.hpp @@ -19,17 +19,30 @@ #include "../malloc_compliance_tests.hpp" -using poolCreateExtParams = std::tuple; +typedef void *(*pfnPoolParamsCreate)(); +typedef umf_result_t (*pfnPoolParamsDestroy)(void *); + +typedef void *(*pfnProviderParamsCreate)(); +typedef umf_result_t (*pfnProviderParamsDestroy)(void *); + +using poolCreateExtParams = + std::tuple; umf::pool_unique_handle_t poolCreateExtUnique(poolCreateExtParams params) { - auto [pool_ops, pool_params, provider_ops, provider_params] = params; + auto [pool_ops, poolParamsCreate, poolParamsDestroy, provider_ops, + providerParamsCreate, providerParamsDestroy] = params; umf_memory_provider_handle_t upstream_provider = nullptr; umf_memory_provider_handle_t provider = nullptr; umf_memory_pool_handle_t hPool = nullptr; umf_result_t ret; + void *provider_params = NULL; + if (providerParamsCreate) { + provider_params = providerParamsCreate(); + } ret = umfMemoryProviderCreate(provider_ops, provider_params, &upstream_provider); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); @@ -37,11 +50,27 @@ umf::pool_unique_handle_t poolCreateExtUnique(poolCreateExtParams params) { provider = upstream_provider; + void *pool_params = NULL; + if (poolParamsCreate) { + pool_params = poolParamsCreate(); + } + + // NOTE: we set the UMF_POOL_CREATE_FLAG_OWN_PROVIDER flag here so the pool + // will destroy the provider when it is destroyed ret = umfPoolCreate(pool_ops, provider, pool_params, UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); EXPECT_NE(hPool, nullptr); + // we do not need params anymore + if (poolParamsDestroy) { + poolParamsDestroy(pool_params); + } + + if (providerParamsDestroy) { + providerParamsDestroy(provider_params); + } + return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); } diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index c254400db..cd480ac3a 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,16 +11,12 @@ #include "provider_null.h" #include "provider_trace.h" -using disjoint_params_unique_handle_t = - std::unique_ptr; - static constexpr size_t DEFAULT_DISJOINT_SLAB_MIN_SIZE = 4096; static constexpr size_t DEFAULT_DISJOINT_MAX_POOLABLE_SIZE = 4096; static constexpr size_t DEFAULT_DISJOINT_CAPACITY = 4; static constexpr size_t DEFAULT_DISJOINT_MIN_BUCKET_SIZE = 64; -disjoint_params_unique_handle_t poolConfig() { +void *defaultPoolConfig() { umf_disjoint_pool_params_handle_t config = nullptr; umf_result_t res = umfDisjointPoolParamsCreate(&config); if (res != UMF_RESULT_SUCCESS) { @@ -50,8 +46,12 @@ disjoint_params_unique_handle_t poolConfig() { throw std::runtime_error("Failed to set min bucket size"); } - return disjoint_params_unique_handle_t(config, - &umfDisjointPoolParamsDestroy); + return config; +} + +umf_result_t poolConfigDestroy(void *config) { + return umfDisjointPoolParamsDestroy( + static_cast(config)); } using umf_test::test; @@ -83,17 +83,21 @@ TEST_F(test, freeErrorPropagation) { provider_handle = providerUnique.get(); // force all allocations to go to memory provider - disjoint_params_unique_handle_t params = poolConfig(); - umf_result_t retp = - umfDisjointPoolParamsSetMaxPoolableSize(params.get(), 0); + umf_disjoint_pool_params_handle_t params; + umf_result_t retp = umfDisjointPoolParamsCreate(¶ms); + EXPECT_EQ(retp, UMF_RESULT_SUCCESS); + retp = umfDisjointPoolParamsSetMaxPoolableSize(params, 0); EXPECT_EQ(retp, UMF_RESULT_SUCCESS); umf_memory_pool_handle_t pool = NULL; - retp = umfPoolCreate(umfDisjointPoolOps(), provider_handle, params.get(), 0, - &pool); + retp = + umfPoolCreate(umfDisjointPoolOps(), provider_handle, params, 0, &pool); EXPECT_EQ(retp, UMF_RESULT_SUCCESS); auto poolHandle = umf_test::wrapPoolUnique(pool); + retp = umfDisjointPoolParamsDestroy(params); + EXPECT_EQ(retp, UMF_RESULT_SUCCESS); + static constexpr size_t size = 1024; void *ptr = umfPoolMalloc(pool, size); @@ -114,12 +118,12 @@ TEST_F(test, sharedLimits) { struct memory_provider : public umf_test::provider_base_t { umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { - *ptr = malloc(size); + *ptr = umf_ba_global_alloc(size); numAllocs++; return UMF_RESULT_SUCCESS; } umf_result_t free(void *ptr, [[maybe_unused]] size_t size) noexcept { - ::free(ptr); + umf_ba_global_free(ptr); numFrees++; return UMF_RESULT_SUCCESS; } @@ -130,9 +134,9 @@ TEST_F(test, sharedLimits) { static constexpr size_t SlabMinSize = 1024; static constexpr size_t MaxSize = 4 * SlabMinSize; - disjoint_params_unique_handle_t config = poolConfig(); - umf_result_t ret = - umfDisjointPoolParamsSetSlabMinSize(config.get(), SlabMinSize); + umf_disjoint_pool_params_handle_t params = + (umf_disjoint_pool_params_handle_t)defaultPoolConfig(); + umf_result_t ret = umfDisjointPoolParamsSetSlabMinSize(params, SlabMinSize); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); auto limits = @@ -141,7 +145,7 @@ TEST_F(test, sharedLimits) { umfDisjointPoolSharedLimitsCreate(MaxSize), &umfDisjointPoolSharedLimitsDestroy); - ret = umfDisjointPoolParamsSetSharedLimits(config.get(), limits.get()); + ret = umfDisjointPoolParamsSetSharedLimits(params, limits.get()); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); auto provider = @@ -149,16 +153,19 @@ TEST_F(test, sharedLimits) { umf_memory_pool_handle_t pool1 = NULL; umf_memory_pool_handle_t pool2 = NULL; - ret = umfPoolCreate(umfDisjointPoolOps(), provider.get(), - (void *)config.get(), 0, &pool1); + ret = + umfPoolCreate(umfDisjointPoolOps(), provider.get(), params, 0, &pool1); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); auto poolHandle1 = umf_test::wrapPoolUnique(pool1); - ret = umfPoolCreate(umfDisjointPoolOps(), provider.get(), - (void *)config.get(), 0, &pool2); + ret = + umfPoolCreate(umfDisjointPoolOps(), provider.get(), params, 0, &pool2); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); auto poolHandle2 = umf_test::wrapPoolUnique(pool2); + ret = umfDisjointPoolParamsDestroy(params); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + EXPECT_EQ(0, numAllocs); EXPECT_EQ(0, numFrees); @@ -243,23 +250,24 @@ TEST_F(test, disjointPoolInvalidBucketSize) { umfDisjointPoolParamsDestroy(params); } -disjoint_params_unique_handle_t defaultPoolConfig = poolConfig(); INSTANTIATE_TEST_SUITE_P(disjointPoolTests, umfPoolTest, ::testing::Values(poolCreateExtParams{ - umfDisjointPoolOps(), - (void *)defaultPoolConfig.get(), - &BA_GLOBAL_PROVIDER_OPS, nullptr})); + umfDisjointPoolOps(), defaultPoolConfig, + poolConfigDestroy, &BA_GLOBAL_PROVIDER_OPS, + nullptr, nullptr})); + +void *memProviderParams() { return (void *)&DEFAULT_DISJOINT_CAPACITY; } INSTANTIATE_TEST_SUITE_P( disjointPoolTests, umfMemTest, ::testing::Values(std::make_tuple( - poolCreateExtParams{ - umfDisjointPoolOps(), (void *)defaultPoolConfig.get(), - &MOCK_OUT_OF_MEM_PROVIDER_OPS, (void *)&DEFAULT_DISJOINT_CAPACITY}, + poolCreateExtParams{umfDisjointPoolOps(), defaultPoolConfig, + poolConfigDestroy, &MOCK_OUT_OF_MEM_PROVIDER_OPS, + memProviderParams, nullptr}, static_cast(DEFAULT_DISJOINT_CAPACITY) / 2))); INSTANTIATE_TEST_SUITE_P(disjointMultiPoolTests, umfMultiPoolTest, ::testing::Values(poolCreateExtParams{ - umfDisjointPoolOps(), - (void *)defaultPoolConfig.get(), - &BA_GLOBAL_PROVIDER_OPS, nullptr})); + umfDisjointPoolOps(), defaultPoolConfig, + poolConfigDestroy, &BA_GLOBAL_PROVIDER_OPS, + nullptr, nullptr})); diff --git a/test/pools/jemalloc_coarse_devdax.cpp b/test/pools/jemalloc_coarse_devdax.cpp index 72906e625..53d2a41b3 100644 --- a/test/pools/jemalloc_coarse_devdax.cpp +++ b/test/pools/jemalloc_coarse_devdax.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,18 +7,20 @@ #include "pool_coarse.hpp" -using devdax_params_unique_handle_t = - std::unique_ptr; - -devdax_params_unique_handle_t create_devdax_params() { +bool devDaxEnvSet() { char *path = getenv("UMF_TESTS_DEVDAX_PATH"); char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { - return devdax_params_unique_handle_t( - nullptr, &umfDevDaxMemoryProviderParamsDestroy); + return false; } + return true; +} + +void *createDevDaxParams() { + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + umf_devdax_memory_provider_params_handle_t params = NULL; umf_result_t res = umfDevDaxMemoryProviderParamsCreate(¶ms, path, atol(size)); @@ -27,17 +29,16 @@ devdax_params_unique_handle_t create_devdax_params() { "Failed to create DevDax Memory Provider params"); } - return devdax_params_unique_handle_t(params, - &umfDevDaxMemoryProviderParamsDestroy); + return params; } -auto devdaxParams = create_devdax_params(); - static std::vector poolParamsList = - devdaxParams.get() ? std::vector{poolCreateExtParams{ - umfJemallocPoolOps(), nullptr, - umfDevDaxMemoryProviderOps(), devdaxParams.get()}} - : std::vector{}; + devDaxEnvSet() + ? std::vector{poolCreateExtParams{ + umfJemallocPoolOps(), nullptr, nullptr, + umfDevDaxMemoryProviderOps(), createDevDaxParams, + (pfnProviderParamsDestroy)umfDevDaxMemoryProviderParamsDestroy}} + : std::vector{}; INSTANTIATE_TEST_SUITE_P(jemallocCoarseDevDaxTest, umfPoolTest, ::testing::ValuesIn(poolParamsList)); diff --git a/test/pools/jemalloc_coarse_file.cpp b/test/pools/jemalloc_coarse_file.cpp index 68a602df6..dcd03898e 100644 --- a/test/pools/jemalloc_coarse_file.cpp +++ b/test/pools/jemalloc_coarse_file.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,25 +7,25 @@ #include "pool_coarse.hpp" -using file_params_unique_handle_t = - std::unique_ptr; - -file_params_unique_handle_t get_file_params_default(char *path) { +void *getFileParamsDefault() { umf_file_memory_provider_params_handle_t file_params = NULL; - umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + umf_result_t res = + umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); if (res != UMF_RESULT_SUCCESS) { throw std::runtime_error( "Failed to create File Memory Provider params"); } - return file_params_unique_handle_t(file_params, - &umfFileMemoryProviderParamsDestroy); + return file_params; } -file_params_unique_handle_t fileParams = get_file_params_default(FILE_PATH); +umf_result_t destroyFileParams(void *params) { + return umfFileMemoryProviderParamsDestroy( + (umf_file_memory_provider_params_handle_t)params); +} INSTANTIATE_TEST_SUITE_P(jemallocCoarseFileTest, umfPoolTest, ::testing::Values(poolCreateExtParams{ - umfJemallocPoolOps(), nullptr, - umfFileMemoryProviderOps(), fileParams.get()})); + umfJemallocPoolOps(), nullptr, nullptr, + umfFileMemoryProviderOps(), getFileParamsDefault, + destroyFileParams})); diff --git a/test/pools/jemalloc_pool.cpp b/test/pools/jemalloc_pool.cpp index 86784d919..e282be316 100644 --- a/test/pools/jemalloc_pool.cpp +++ b/test/pools/jemalloc_pool.cpp @@ -15,21 +15,26 @@ using os_params_unique_handle_t = std::unique_ptr; -os_params_unique_handle_t createOsMemoryProviderParams() { +void *createOsMemoryProviderParams() { umf_os_memory_provider_params_handle_t params = nullptr; umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { throw std::runtime_error("Failed to create os memory provider params"); } - return os_params_unique_handle_t(params, &umfOsMemoryProviderParamsDestroy); + return params; } -auto defaultParams = createOsMemoryProviderParams(); -INSTANTIATE_TEST_SUITE_P(jemallocPoolTest, umfPoolTest, - ::testing::Values(poolCreateExtParams{ - umfJemallocPoolOps(), nullptr, - umfOsMemoryProviderOps(), defaultParams.get()})); +umf_result_t destroyOsMemoryProviderParams(void *params) { + return umfOsMemoryProviderParamsDestroy( + (umf_os_memory_provider_params_handle_t)params); +} + +INSTANTIATE_TEST_SUITE_P( + jemallocPoolTest, umfPoolTest, + ::testing::Values(poolCreateExtParams{ + umfJemallocPoolOps(), nullptr, nullptr, umfOsMemoryProviderOps(), + createOsMemoryProviderParams, destroyOsMemoryProviderParams})); // this test makes sure that jemalloc does not use // memory provider to allocate metadata (and hence @@ -41,17 +46,41 @@ TEST_F(test, metadataNotAllocatedUsingProvider) { // set coarse grain allocations to PROT_NONE so that we can be sure // jemalloc does not touch any of the allocated memory - umf_os_memory_provider_params_handle_t params = nullptr; - umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); - ASSERT_EQ(res, UMF_RESULT_SUCCESS); - res = umfOsMemoryProviderParamsSetProtection(params, UMF_PROTECTION_NONE); - ASSERT_EQ(res, UMF_RESULT_SUCCESS); - auto pool = poolCreateExtUnique( - {umfJemallocPoolOps(), nullptr, umfOsMemoryProviderOps(), params}); + auto providerParamsCreate = []() { + umf_os_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create OS Memory Provider params"); + } + res = + umfOsMemoryProviderParamsSetProtection(params, UMF_PROTECTION_NONE); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to set OS Memory Provider params protection"); + } + return (void *)params; + }; + + auto providerParamsDestroy = [](void *params) { + umf_result_t res = umfOsMemoryProviderParamsDestroy( + (umf_os_memory_provider_params_handle_t)params); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to destroy OS Memory Provider params"); + } + return res; + }; - res = umfOsMemoryProviderParamsDestroy(params); - ASSERT_EQ(res, UMF_RESULT_SUCCESS); + auto pool = poolCreateExtUnique({ + umfJemallocPoolOps(), + nullptr, + nullptr, + umfOsMemoryProviderOps(), + (pfnProviderParamsCreate)providerParamsCreate, + (pfnProviderParamsDestroy)providerParamsDestroy, + }); std::vector> allocs; for (size_t i = 0; i < numAllocs; i++) { diff --git a/test/pools/pool_base_alloc.cpp b/test/pools/pool_base_alloc.cpp index 752d9f01e..ca931bcec 100644 --- a/test/pools/pool_base_alloc.cpp +++ b/test/pools/pool_base_alloc.cpp @@ -47,5 +47,5 @@ umf_memory_pool_ops_t BA_POOL_OPS = umf::poolMakeCOps(); INSTANTIATE_TEST_SUITE_P(baPool, umfPoolTest, ::testing::Values(poolCreateExtParams{ - &BA_POOL_OPS, nullptr, - &umf_test::BASE_PROVIDER_OPS, nullptr})); + &BA_POOL_OPS, nullptr, nullptr, + &umf_test::BASE_PROVIDER_OPS, nullptr, nullptr})); diff --git a/test/pools/scalable_coarse_devdax.cpp b/test/pools/scalable_coarse_devdax.cpp index 970f45ef9..86c580402 100644 --- a/test/pools/scalable_coarse_devdax.cpp +++ b/test/pools/scalable_coarse_devdax.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,18 +7,20 @@ #include "pool_coarse.hpp" -using devdax_params_unique_handle_t = - std::unique_ptr; - -devdax_params_unique_handle_t create_devdax_params() { +bool devDaxEnvSet() { char *path = getenv("UMF_TESTS_DEVDAX_PATH"); char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { - return devdax_params_unique_handle_t( - nullptr, &umfDevDaxMemoryProviderParamsDestroy); + return false; } + return true; +} + +void *createDevDaxParams() { + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + umf_devdax_memory_provider_params_handle_t params = NULL; umf_result_t res = umfDevDaxMemoryProviderParamsCreate(¶ms, path, atol(size)); @@ -27,17 +29,20 @@ devdax_params_unique_handle_t create_devdax_params() { "Failed to create DevDax Memory Provider params"); } - return devdax_params_unique_handle_t(params, - &umfDevDaxMemoryProviderParamsDestroy); + return params; } -auto devdaxParams = create_devdax_params(); +umf_result_t destroyDevDaxParams(void *params) { + return umfDevDaxMemoryProviderParamsDestroy( + (umf_devdax_memory_provider_params_handle_t)params); +} static std::vector poolParamsList = - devdaxParams.get() ? std::vector{poolCreateExtParams{ - umfScalablePoolOps(), nullptr, - umfDevDaxMemoryProviderOps(), devdaxParams.get()}} - : std::vector{}; + devDaxEnvSet() ? std::vector{poolCreateExtParams{ + umfScalablePoolOps(), nullptr, nullptr, + umfDevDaxMemoryProviderOps(), createDevDaxParams, + destroyDevDaxParams}} + : std::vector{}; INSTANTIATE_TEST_SUITE_P(scalableCoarseDevDaxTest, umfPoolTest, ::testing::ValuesIn(poolParamsList)); diff --git a/test/pools/scalable_coarse_file.cpp b/test/pools/scalable_coarse_file.cpp index 30134f5eb..a5fd5b46a 100644 --- a/test/pools/scalable_coarse_file.cpp +++ b/test/pools/scalable_coarse_file.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,25 +7,25 @@ #include "pool_coarse.hpp" -using file_params_unique_handle_t = - std::unique_ptr; - -file_params_unique_handle_t get_file_params_default(char *path) { +void *getFileParamsDefault() { umf_file_memory_provider_params_handle_t file_params = NULL; - umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + umf_result_t res = + umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); if (res != UMF_RESULT_SUCCESS) { throw std::runtime_error( "Failed to create File Memory Provider params"); } - return file_params_unique_handle_t(file_params, - &umfFileMemoryProviderParamsDestroy); + return file_params; } -file_params_unique_handle_t fileParams = get_file_params_default(FILE_PATH); +umf_result_t destroyFileParams(void *params) { + return umfFileMemoryProviderParamsDestroy( + (umf_file_memory_provider_params_handle_t)params); +} INSTANTIATE_TEST_SUITE_P(scalableCoarseFileTest, umfPoolTest, ::testing::Values(poolCreateExtParams{ - umfScalablePoolOps(), nullptr, - umfFileMemoryProviderOps(), fileParams.get()})); + umfScalablePoolOps(), nullptr, nullptr, + umfFileMemoryProviderOps(), getFileParamsDefault, + destroyFileParams})); diff --git a/test/pools/scalable_pool.cpp b/test/pools/scalable_pool.cpp index ce55923d9..14cf5f305 100644 --- a/test/pools/scalable_pool.cpp +++ b/test/pools/scalable_pool.cpp @@ -9,25 +9,26 @@ #include "poolFixtures.hpp" #include "provider.hpp" -using os_params_unique_handle_t = - std::unique_ptr; - -os_params_unique_handle_t createOsMemoryProviderParams() { +void *createOsMemoryProviderParams() { umf_os_memory_provider_params_handle_t params = nullptr; umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { throw std::runtime_error("Failed to create os memory provider params"); } - return os_params_unique_handle_t(params, &umfOsMemoryProviderParamsDestroy); + return params; +} + +umf_result_t destroyOsMemoryProviderParams(void *params) { + return umfOsMemoryProviderParamsDestroy( + (umf_os_memory_provider_params_handle_t)params); } -auto defaultParams = createOsMemoryProviderParams(); -INSTANTIATE_TEST_SUITE_P(scalablePoolTest, umfPoolTest, - ::testing::Values(poolCreateExtParams{ - umfScalablePoolOps(), nullptr, - umfOsMemoryProviderOps(), defaultParams.get()})); +INSTANTIATE_TEST_SUITE_P( + scalablePoolTest, umfPoolTest, + ::testing::Values(poolCreateExtParams{ + umfScalablePoolOps(), nullptr, nullptr, umfOsMemoryProviderOps(), + createOsMemoryProviderParams, destroyOsMemoryProviderParams})); using scalablePoolParams = std::tuple; struct umfScalablePoolParamsTest diff --git a/test/provider_devdax_memory_ipc.cpp b/test/provider_devdax_memory_ipc.cpp index ed4f1a5f8..47b389c95 100644 --- a/test/provider_devdax_memory_ipc.cpp +++ b/test/provider_devdax_memory_ipc.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,16 +15,21 @@ using umf_test::test; -using devdax_params_unique_handle_t = - std::unique_ptr; +bool devDaxEnvSet() { + char *path = getenv("UMF_TESTS_DEVDAX_PATH"); + char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); + if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { + return false; + } -devdax_params_unique_handle_t create_devdax_params() { + return true; +} + +void *defaultDevDaxParamsCreate() { char *path = getenv("UMF_TESTS_DEVDAX_PATH"); char *size = getenv("UMF_TESTS_DEVDAX_SIZE"); if (path == nullptr || path[0] == 0 || size == nullptr || size[0] == 0) { - return devdax_params_unique_handle_t( - nullptr, &umfDevDaxMemoryProviderParamsDestroy); + return nullptr; } umf_devdax_memory_provider_params_handle_t params = NULL; @@ -35,32 +40,34 @@ devdax_params_unique_handle_t create_devdax_params() { "Failed to create DevDax Memory Provider params"); } - return devdax_params_unique_handle_t(params, - &umfDevDaxMemoryProviderParamsDestroy); + return params; } -auto defaultDevDaxParams = create_devdax_params(); +umf_result_t defaultDevDaxParamsDestroy(void *params) { + return umfDevDaxMemoryProviderParamsDestroy( + (umf_devdax_memory_provider_params_handle_t)params); +} HostMemoryAccessor hostAccessor; static std::vector getIpcProxyPoolTestParamsList(void) { std::vector ipcProxyPoolTestParamsList = {}; - if (!defaultDevDaxParams.get()) { + if (!devDaxEnvSet()) { // return empty list to skip the test return ipcProxyPoolTestParamsList; } ipcProxyPoolTestParamsList = { - {umfProxyPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - defaultDevDaxParams.get(), &hostAccessor}, + {umfProxyPoolOps(), nullptr, nullptr, umfDevDaxMemoryProviderOps(), + defaultDevDaxParamsCreate, defaultDevDaxParamsDestroy, &hostAccessor}, #ifdef UMF_POOL_JEMALLOC_ENABLED - {umfJemallocPoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - defaultDevDaxParams.get(), &hostAccessor}, + {umfJemallocPoolOps(), nullptr, nullptr, umfDevDaxMemoryProviderOps(), + defaultDevDaxParamsCreate, defaultDevDaxParamsDestroy, &hostAccessor}, #endif #ifdef UMF_POOL_SCALABLE_ENABLED - {umfScalablePoolOps(), nullptr, umfDevDaxMemoryProviderOps(), - defaultDevDaxParams.get(), &hostAccessor}, + {umfScalablePoolOps(), nullptr, nullptr, umfDevDaxMemoryProviderOps(), + defaultDevDaxParamsCreate, defaultDevDaxParamsDestroy, &hostAccessor}, #endif }; diff --git a/test/provider_file_memory_ipc.cpp b/test/provider_file_memory_ipc.cpp index 70c1acd8f..90623a179 100644 --- a/test/provider_file_memory_ipc.cpp +++ b/test/provider_file_memory_ipc.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -17,13 +17,10 @@ using umf_test::test; #define FILE_PATH ((char *)"tmp_file") -using file_params_unique_handle_t = - std::unique_ptr; - -file_params_unique_handle_t get_file_params_shared(char *path) { +void *createFileParamsShared() { umf_file_memory_provider_params_handle_t file_params = NULL; - umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + umf_result_t res = + umfFileMemoryProviderParamsCreate(&file_params, FILE_PATH); if (res != UMF_RESULT_SUCCESS) { throw std::runtime_error( "Failed to create File Memory Provider params"); @@ -37,20 +34,21 @@ file_params_unique_handle_t get_file_params_shared(char *path) { "Memory Provider params"); } - return file_params_unique_handle_t(file_params, - &umfFileMemoryProviderParamsDestroy); + return file_params; } -file_params_unique_handle_t file_params_shared = - get_file_params_shared(FILE_PATH); +umf_result_t destroyFileParamsShared(void *params) { + return umfFileMemoryProviderParamsDestroy( + (umf_file_memory_provider_params_handle_t)params); +} -file_params_unique_handle_t get_file_params_fsdax(char *path) { +void *createFileParamsFSDAX() { umf_file_memory_provider_params_handle_t file_params = NULL; - umf_result_t res = umfFileMemoryProviderParamsCreate(&file_params, path); + umf_result_t res = umfFileMemoryProviderParamsCreate( + &file_params, getenv("UMF_TESTS_FSDAX_PATH")); if (res != UMF_RESULT_SUCCESS) { //test will be skipped. - return file_params_unique_handle_t(nullptr, - &umfFileMemoryProviderParamsDestroy); + return nullptr; } res = umfFileMemoryProviderParamsSetVisibility(file_params, @@ -61,12 +59,13 @@ file_params_unique_handle_t get_file_params_fsdax(char *path) { "Memory Provider params"); } - return file_params_unique_handle_t(file_params, - &umfFileMemoryProviderParamsDestroy); + return file_params; } -file_params_unique_handle_t file_params_fsdax = - get_file_params_fsdax(getenv("UMF_TESTS_FSDAX_PATH")); +umf_result_t destroyFileParamsFSDAX(void *params) { + return umfFileMemoryProviderParamsDestroy( + (umf_file_memory_provider_params_handle_t)params); +} HostMemoryAccessor hostAccessor; @@ -75,12 +74,12 @@ static std::vector ipcManyPoolsTestParamsList = { // {umfProxyPoolOps(), nullptr, umfFileMemoryProviderOps(), // file_params_shared.get(), &hostAccessor}, #ifdef UMF_POOL_JEMALLOC_ENABLED - {umfJemallocPoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_shared.get(), &hostAccessor}, + {umfJemallocPoolOps(), nullptr, nullptr, umfFileMemoryProviderOps(), + createFileParamsShared, destroyFileParamsShared, &hostAccessor}, #endif #ifdef UMF_POOL_SCALABLE_ENABLED - {umfScalablePoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_shared.get(), &hostAccessor}, + {umfScalablePoolOps(), nullptr, nullptr, umfFileMemoryProviderOps(), + createFileParamsShared, destroyFileParamsShared, &hostAccessor}, #endif }; @@ -98,12 +97,12 @@ static std::vector getIpcFsDaxTestParamsList(void) { // {umfProxyPoolOps(), nullptr, umfFileMemoryProviderOps(), // file_params_fsdax.get(), &hostAccessor}, #ifdef UMF_POOL_JEMALLOC_ENABLED - {umfJemallocPoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_fsdax.get(), &hostAccessor}, + {umfJemallocPoolOps(), nullptr, nullptr, umfFileMemoryProviderOps(), + createFileParamsFSDAX, destroyFileParamsFSDAX, &hostAccessor}, #endif #ifdef UMF_POOL_SCALABLE_ENABLED - {umfScalablePoolOps(), nullptr, umfFileMemoryProviderOps(), - file_params_fsdax.get(), &hostAccessor}, + {umfScalablePoolOps(), nullptr, nullptr, umfFileMemoryProviderOps(), + createFileParamsFSDAX, destroyFileParamsFSDAX, &hostAccessor}, #endif }; diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index 9544a6fed..ddc44548e 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -10,7 +10,7 @@ #include #include -#if (defined UMF_POOL_DISJOINT_ENABLED) +#ifdef UMF_POOL_DISJOINT_ENABLED #include #endif #ifdef UMF_POOL_JEMALLOC_ENABLED @@ -407,11 +407,7 @@ TEST_P(umfProviderTest, close_ipc_handle_wrong_visibility) { GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); -using os_params_unique_handle_t = - std::unique_ptr; - -os_params_unique_handle_t osMemoryProviderParamsShared() { +void *createOsMemoryProviderParamsShared() { umf_os_memory_provider_params_handle_t params = nullptr; umf_result_t res = umfOsMemoryProviderParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { @@ -422,18 +418,19 @@ os_params_unique_handle_t osMemoryProviderParamsShared() { throw std::runtime_error("Failed to set protection"); } - return os_params_unique_handle_t(params, &umfOsMemoryProviderParamsDestroy); + return params; +} + +umf_result_t destroyOsMemoryProviderParamsShared(void *params) { + return umfOsMemoryProviderParamsDestroy( + static_cast(params)); } -auto os_params = osMemoryProviderParamsShared(); HostMemoryAccessor hostAccessor; -#if (defined UMF_POOL_DISJOINT_ENABLED) -using disjoint_params_unique_handle_t = - std::unique_ptr; +#ifdef UMF_POOL_DISJOINT_ENABLED -disjoint_params_unique_handle_t disjointPoolParams() { +void *createDisjointPoolParams() { umf_disjoint_pool_params_handle_t params = nullptr; umf_result_t res = umfDisjointPoolParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { @@ -460,19 +457,25 @@ disjoint_params_unique_handle_t disjointPoolParams() { throw std::runtime_error("Failed to set min bucket size"); } - return disjoint_params_unique_handle_t(params, - &umfDisjointPoolParamsDestroy); + return params; } -disjoint_params_unique_handle_t disjointParams = disjointPoolParams(); + +umf_result_t destroyDisjointPoolParams(void *params) { + return umfDisjointPoolParamsDestroy( + static_cast(params)); +} + #endif static std::vector ipcTestParamsList = { -#if (defined UMF_POOL_DISJOINT_ENABLED) - {umfDisjointPoolOps(), disjointParams.get(), umfOsMemoryProviderOps(), - os_params.get(), &hostAccessor}, +#ifdef UMF_POOL_DISJOINT_ENABLED + {umfDisjointPoolOps(), createDisjointPoolParams, destroyDisjointPoolParams, + umfOsMemoryProviderOps(), createOsMemoryProviderParamsShared, + destroyOsMemoryProviderParamsShared, &hostAccessor}, #endif #ifdef UMF_POOL_JEMALLOC_ENABLED - {umfJemallocPoolOps(), nullptr, umfOsMemoryProviderOps(), os_params.get(), + {umfJemallocPoolOps(), nullptr, nullptr, umfOsMemoryProviderOps(), + createOsMemoryProviderParamsShared, destroyOsMemoryProviderParamsShared, &hostAccessor}, #endif }; diff --git a/test/providers/provider_cuda.cpp b/test/providers/provider_cuda.cpp index 8a7fdd28a..ff0fca550 100644 --- a/test/providers/provider_cuda.cpp +++ b/test/providers/provider_cuda.cpp @@ -52,47 +52,35 @@ CUDATestHelper::CUDATestHelper() { } } -using cuda_params_unique_handle_t = - std::unique_ptr; - -cuda_params_unique_handle_t +umf_cuda_memory_provider_params_handle_t create_cuda_prov_params(CUcontext context, CUdevice device, umf_usm_memory_type_t memory_type) { umf_cuda_memory_provider_params_handle_t params = nullptr; umf_result_t res = umfCUDAMemoryProviderParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { - return cuda_params_unique_handle_t(nullptr, - &umfCUDAMemoryProviderParamsDestroy); + return nullptr; } res = umfCUDAMemoryProviderParamsSetContext(params, context); if (res != UMF_RESULT_SUCCESS) { umfCUDAMemoryProviderParamsDestroy(params); - return cuda_params_unique_handle_t(nullptr, - &umfCUDAMemoryProviderParamsDestroy); - ; + return nullptr; } res = umfCUDAMemoryProviderParamsSetDevice(params, device); if (res != UMF_RESULT_SUCCESS) { umfCUDAMemoryProviderParamsDestroy(params); - return cuda_params_unique_handle_t(nullptr, - &umfCUDAMemoryProviderParamsDestroy); - ; + return nullptr; } res = umfCUDAMemoryProviderParamsSetMemoryType(params, memory_type); if (res != UMF_RESULT_SUCCESS) { umfCUDAMemoryProviderParamsDestroy(params); - return cuda_params_unique_handle_t(nullptr, - &umfCUDAMemoryProviderParamsDestroy); - ; + return nullptr; } - return cuda_params_unique_handle_t(params, - &umfCUDAMemoryProviderParamsDestroy); + return params; } class CUDAMemoryAccessor : public MemoryAccessor { @@ -126,8 +114,11 @@ class CUDAMemoryAccessor : public MemoryAccessor { CUcontext hContext_; }; +typedef void *(*pfnProviderParamsCreate)(); +typedef umf_result_t (*pfnProviderParamsDestroy)(void *); + using CUDAProviderTestParams = - std::tuple; struct umfCUDAProviderTest @@ -137,17 +128,31 @@ struct umfCUDAProviderTest void SetUp() override { test::SetUp(); - auto [cuda_params, cu_context, memory_type, accessor] = - this->GetParam(); - params = cuda_params; + auto [params_create, params_destroy, cu_context, memory_type, + accessor] = this->GetParam(); + + params = nullptr; + if (params_create) { + params = (umf_cuda_memory_provider_params_handle_t)params_create(); + } + paramsDestroy = params_destroy; + memAccessor = accessor; expected_context = cu_context; expected_memory_type = memory_type; } - void TearDown() override { test::TearDown(); } + void TearDown() override { + if (paramsDestroy) { + paramsDestroy(params); + } + + test::TearDown(); + } umf_cuda_memory_provider_params_handle_t params; + pfnProviderParamsDestroy paramsDestroy = nullptr; + MemoryAccessor *memAccessor = nullptr; CUcontext expected_context; umf_usm_memory_type_t expected_memory_type; @@ -327,23 +332,27 @@ TEST_P(umfCUDAProviderTest, multiContext) { ret = create_context(device, &ctx2); ASSERT_EQ(ret, 0); - cuda_params_unique_handle_t params1 = + umf_cuda_memory_provider_params_handle_t params1 = create_cuda_prov_params(ctx1, device, UMF_MEMORY_TYPE_HOST); ASSERT_NE(params1, nullptr); umf_memory_provider_handle_t provider1; umf_result_t umf_result = umfMemoryProviderCreate( - umfCUDAMemoryProviderOps(), params1.get(), &provider1); + umfCUDAMemoryProviderOps(), params1, &provider1); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(provider1, nullptr); + umf_result = umfCUDAMemoryProviderParamsDestroy(params1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - cuda_params_unique_handle_t params2 = + umf_cuda_memory_provider_params_handle_t params2 = create_cuda_prov_params(ctx2, device, UMF_MEMORY_TYPE_HOST); ASSERT_NE(params2, nullptr); umf_memory_provider_handle_t provider2; - umf_result = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), - params2.get(), &provider2); + umf_result = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), params2, + &provider2); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); ASSERT_NE(provider2, nullptr); + umf_result = umfCUDAMemoryProviderParamsDestroy(params2); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); // use the providers // allocate from 1, then from 2, then free 1, then free 2 @@ -384,30 +393,40 @@ TEST_P(umfCUDAProviderTest, multiContext) { CUDATestHelper cudaTestHelper; -cuda_params_unique_handle_t cuParams_device_memory = create_cuda_prov_params( - cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device(), - UMF_MEMORY_TYPE_DEVICE); -cuda_params_unique_handle_t cuParams_shared_memory = create_cuda_prov_params( - cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device(), - UMF_MEMORY_TYPE_SHARED); -cuda_params_unique_handle_t cuParams_host_memory = create_cuda_prov_params( - cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device(), - UMF_MEMORY_TYPE_HOST); +void *createCuParamsDeviceMemory() { + return create_cuda_prov_params(cudaTestHelper.get_test_context(), + cudaTestHelper.get_test_device(), + UMF_MEMORY_TYPE_DEVICE); +} +void *createCuParamsSharedMemory() { + return create_cuda_prov_params(cudaTestHelper.get_test_context(), + cudaTestHelper.get_test_device(), + UMF_MEMORY_TYPE_SHARED); +} +void *createCuParamsHostMemory() { + return create_cuda_prov_params(cudaTestHelper.get_test_context(), + cudaTestHelper.get_test_device(), + UMF_MEMORY_TYPE_HOST); +} + +umf_result_t destroyCuParams(void *params) { + return umfCUDAMemoryProviderParamsDestroy( + (umf_cuda_memory_provider_params_handle_t)params); +} CUDAMemoryAccessor cuAccessor(cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device()); HostMemoryAccessor hostAccessor; - INSTANTIATE_TEST_SUITE_P( umfCUDAProviderTestSuite, umfCUDAProviderTest, ::testing::Values( - CUDAProviderTestParams{cuParams_device_memory.get(), + CUDAProviderTestParams{createCuParamsDeviceMemory, destroyCuParams, cudaTestHelper.get_test_context(), UMF_MEMORY_TYPE_DEVICE, &cuAccessor}, - CUDAProviderTestParams{cuParams_shared_memory.get(), + CUDAProviderTestParams{createCuParamsSharedMemory, destroyCuParams, cudaTestHelper.get_test_context(), UMF_MEMORY_TYPE_SHARED, &hostAccessor}, - CUDAProviderTestParams{cuParams_host_memory.get(), + CUDAProviderTestParams{createCuParamsHostMemory, destroyCuParams, cudaTestHelper.get_test_context(), UMF_MEMORY_TYPE_HOST, &hostAccessor})); diff --git a/test/providers/provider_level_zero.cpp b/test/providers/provider_level_zero.cpp index 78b5e4847..cdf620ace 100644 --- a/test/providers/provider_level_zero.cpp +++ b/test/providers/provider_level_zero.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -61,11 +61,7 @@ LevelZeroTestHelper::LevelZeroTestHelper() { } } -using level_zero_params_unique_handle_t = - std::unique_ptr; - -level_zero_params_unique_handle_t +umf_level_zero_memory_provider_params_handle_t create_level_zero_prov_params(ze_context_handle_t context, ze_device_handle_t device, umf_usm_memory_type_t memory_type) { @@ -73,36 +69,28 @@ create_level_zero_prov_params(ze_context_handle_t context, umf_result_t res = umfLevelZeroMemoryProviderParamsCreate(¶ms); if (res != UMF_RESULT_SUCCESS) { - return level_zero_params_unique_handle_t( - nullptr, &umfLevelZeroMemoryProviderParamsDestroy); + return nullptr; } res = umfLevelZeroMemoryProviderParamsSetContext(params, context); if (res != UMF_RESULT_SUCCESS) { umfLevelZeroMemoryProviderParamsDestroy(params); - return level_zero_params_unique_handle_t( - nullptr, &umfLevelZeroMemoryProviderParamsDestroy); - ; + return nullptr; } res = umfLevelZeroMemoryProviderParamsSetDevice(params, device); if (res != UMF_RESULT_SUCCESS) { umfLevelZeroMemoryProviderParamsDestroy(params); - return level_zero_params_unique_handle_t( - nullptr, &umfLevelZeroMemoryProviderParamsDestroy); - ; + return nullptr; } res = umfLevelZeroMemoryProviderParamsSetMemoryType(params, memory_type); if (res != UMF_RESULT_SUCCESS) { umfLevelZeroMemoryProviderParamsDestroy(params); - return level_zero_params_unique_handle_t( - nullptr, &umfLevelZeroMemoryProviderParamsDestroy); - ; + return nullptr; } - return level_zero_params_unique_handle_t( - params, &umfLevelZeroMemoryProviderParamsDestroy); + return params; } struct LevelZeroProviderInit @@ -237,8 +225,11 @@ class LevelZeroMemoryAccessor : public MemoryAccessor { ze_context_handle_t hContext_; }; +typedef void *(*pfnProviderParamsCreate)(); +typedef umf_result_t (*pfnProviderParamsDestroy)(void *); + using LevelZeroProviderTestParams = - std::tuple; struct umfLevelZeroProviderTest @@ -248,8 +239,16 @@ struct umfLevelZeroProviderTest void SetUp() override { test::SetUp(); - auto [l0_params, ze_context, memory_type, accessor] = this->GetParam(); - params = l0_params; + auto [params_create, params_destroy, ze_context, memory_type, + accessor] = this->GetParam(); + + params = nullptr; + if (params_create) { + params = + (umf_level_zero_memory_provider_params_handle_t)params_create(); + } + paramsDestroy = params_destroy; + memAccessor = accessor; hContext = ze_context; @@ -273,9 +272,17 @@ struct umfLevelZeroProviderTest ASSERT_NE(zeMemoryTypeExpected, ZE_MEMORY_TYPE_UNKNOWN); } - void TearDown() override { test::TearDown(); } + void TearDown() override { + if (paramsDestroy) { + paramsDestroy(params); + } + + test::TearDown(); + } umf_level_zero_memory_provider_params_handle_t params; + pfnProviderParamsDestroy paramsDestroy = nullptr; + MemoryAccessor *memAccessor = nullptr; ze_context_handle_t hContext = nullptr; ze_memory_type_t zeMemoryTypeExpected = ZE_MEMORY_TYPE_UNKNOWN; @@ -418,17 +425,27 @@ TEST_P(umfLevelZeroProviderTest, levelZeroProviderNullParams) { // TODO add tests that mixes Level Zero Memory Provider and Disjoint Pool -level_zero_params_unique_handle_t l0Params_device_memory = - create_level_zero_prov_params(l0TestHelper.get_test_context(), - l0TestHelper.get_test_device(), - UMF_MEMORY_TYPE_DEVICE); -level_zero_params_unique_handle_t l0Params_shared_memory = - create_level_zero_prov_params(l0TestHelper.get_test_context(), - l0TestHelper.get_test_device(), - UMF_MEMORY_TYPE_SHARED); -level_zero_params_unique_handle_t l0Params_host_memory = - create_level_zero_prov_params(l0TestHelper.get_test_context(), nullptr, - UMF_MEMORY_TYPE_HOST); +void *createL0ParamsDeviceMemory() { + return create_level_zero_prov_params(l0TestHelper.get_test_context(), + l0TestHelper.get_test_device(), + UMF_MEMORY_TYPE_DEVICE); +} + +void *createL0ParamsSharedMemory() { + return create_level_zero_prov_params(l0TestHelper.get_test_context(), + l0TestHelper.get_test_device(), + UMF_MEMORY_TYPE_SHARED); +} + +void *createL0ParamsHostMemory() { + return create_level_zero_prov_params(l0TestHelper.get_test_context(), + nullptr, UMF_MEMORY_TYPE_HOST); +} + +umf_result_t destroyL0Params(void *params) { + return umfLevelZeroMemoryProviderParamsDestroy( + static_cast(params)); +} LevelZeroMemoryAccessor l0Accessor((ze_context_handle_t)l0TestHelper.get_test_context(), @@ -439,13 +456,13 @@ HostMemoryAccessor hostAccessor; INSTANTIATE_TEST_SUITE_P( umfLevelZeroProviderTestSuite, umfLevelZeroProviderTest, ::testing::Values( - LevelZeroProviderTestParams{l0Params_device_memory.get(), + LevelZeroProviderTestParams{createL0ParamsDeviceMemory, destroyL0Params, l0TestHelper.get_test_context(), UMF_MEMORY_TYPE_DEVICE, &l0Accessor}, - LevelZeroProviderTestParams{l0Params_shared_memory.get(), + LevelZeroProviderTestParams{createL0ParamsSharedMemory, destroyL0Params, l0TestHelper.get_test_context(), UMF_MEMORY_TYPE_SHARED, &hostAccessor}, - LevelZeroProviderTestParams{l0Params_host_memory.get(), + LevelZeroProviderTestParams{createL0ParamsHostMemory, destroyL0Params, l0TestHelper.get_test_context(), UMF_MEMORY_TYPE_HOST, &hostAccessor})); @@ -454,9 +471,9 @@ INSTANTIATE_TEST_SUITE_P( #ifdef _WIN32 GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); #else -INSTANTIATE_TEST_SUITE_P(umfLevelZeroProviderTestSuite, umfIpcTest, - ::testing::Values(ipcTestParams{ - umfProxyPoolOps(), nullptr, - umfLevelZeroMemoryProviderOps(), - l0Params_device_memory.get(), &l0Accessor})); +INSTANTIATE_TEST_SUITE_P( + umfLevelZeroProviderTestSuite, umfIpcTest, + ::testing::Values(ipcTestParams{ + umfProxyPoolOps(), nullptr, nullptr, umfLevelZeroMemoryProviderOps(), + createL0ParamsDeviceMemory, destroyL0Params, &l0Accessor})); #endif From 0aedddfe4cb335e5ce65bace7ec4cd55ad12c4cb Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 27 Jan 2025 11:27:22 +0100 Subject: [PATCH 105/466] Check return value of set_context() Check return value of set_context() in cu_memory_provider_open_ipc_handle(). It fixes a Coverity issue. Signed-off-by: Lukasz Dorau --- src/provider/provider_cuda.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index 7a7b0a467..edebb04e6 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -594,7 +594,10 @@ static umf_result_t cu_memory_provider_open_ipc_handle(void *provider, LOG_ERR("cuIpcOpenMemHandle() failed."); } - set_context(restore_ctx, &restore_ctx); + umf_result = set_context(restore_ctx, &restore_ctx); + if (umf_result != UMF_RESULT_SUCCESS) { + LOG_ERR("Failed to restore CUDA context, ret = %d", umf_result); + } return cu2umf_result(cu_result); } From 378d1a32e98781426ba62772986d3ec4032a09cc Mon Sep 17 00:00:00 2001 From: Agata Momot Date: Thu, 7 Nov 2024 17:14:45 +0100 Subject: [PATCH 106/466] add template for Windows benchmarks problems with adding comments to PR to be fixed --- .github/workflows/nightly.yml | 8 + .github/workflows/pr_push.yml | 7 +- .github/workflows/reusable_benchmarks.yml | 230 ++++++++++++++++------ .github/workflows/reusable_docs_build.yml | 14 ++ 4 files changed, 201 insertions(+), 58 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 46543fac8..7a4cd704b 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -9,6 +9,7 @@ on: permissions: contents: read + pull-requests: write jobs: fuzz-test: @@ -194,3 +195,10 @@ jobs: # Beside the 2 LTS Ubuntu, we also test this on the latest Ubuntu - to be updated # every 6 months, so we verify the latest version of packages (compilers, etc.). os: "['ubuntu-22.04', 'ubuntu-24.04', 'ubuntu-24.10']" + + Benchmarks: + uses: ./.github/workflows/reusable_benchmarks.yml + with: + pr_no: '0' + bench_script_params: '' + upload_report: true diff --git a/.github/workflows/pr_push.yml b/.github/workflows/pr_push.yml index 9623b69f1..88f415742 100644 --- a/.github/workflows/pr_push.yml +++ b/.github/workflows/pr_push.yml @@ -14,7 +14,8 @@ concurrency: permissions: contents: read - + pull-requests: write + jobs: CodeChecks: uses: ./.github/workflows/reusable_checks.yml @@ -57,6 +58,10 @@ jobs: Benchmarks: needs: [Build] uses: ./.github/workflows/reusable_benchmarks.yml + with: + pr_no: '0' + bench_script_params: '' + upload_report: true ProxyLib: needs: [Build] uses: ./.github/workflows/reusable_proxy_lib.yml diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index ed6a48294..92e5c1eb3 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -1,10 +1,27 @@ # Executes benchmarks implemented in this repository name: Benchmarks -on: workflow_call +on: + workflow_call: + inputs: + pr_no: + required: true + # even though this is a number, this is a workaround for issues with + # reusable workflow calls that result in "Unexpected value '0'" error. + type: string + default: '0' + bench_script_params: + required: false + type: string + default: '' + upload_report: + required: false + type: boolean + default: false permissions: contents: read + pull-requests: write env: BUILD_DIR : "${{github.workspace}}/build" @@ -13,11 +30,11 @@ env: jobs: benchmarks: name: Benchmarks - env: - VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" + # env: + # VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" strategy: matrix: - os: ['ubuntu-latest', 'windows-latest'] + os: ['ubuntu-latest'] #, 'windows-latest'] include: # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command to determine the build type - os: ubuntu-latest @@ -25,56 +42,155 @@ jobs: runs-on: ${{matrix.os}} steps: - - name: Checkout - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - with: - fetch-depth: 0 - - - name: Install apt packages - if: matrix.os == 'ubuntu-latest' - run: | - sudo apt-get update - sudo apt-get install -y cmake libhwloc-dev libnuma-dev libtbb-dev - - - name: Initialize vcpkg - if: matrix.os == 'windows-latest' - uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 - with: - vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 - vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg - vcpkgJsonGlob: '**/vcpkg.json' - - - name: Install vcpkg packages - if: matrix.os == 'windows-latest' - run: vcpkg install - shell: pwsh # Specifies PowerShell as the shell for running the script. - - - name: Configure build - run: > - cmake - -B ${{env.BUILD_DIR}} - ${{matrix.extra_build_option}} - -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" - -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" - -DUMF_BUILD_SHARED_LIBRARY=ON - -DUMF_BUILD_BENCHMARKS=ON - -DUMF_BUILD_BENCHMARKS_MT=ON - -DUMF_BUILD_TESTS=OFF - -DUMF_FORMAT_CODE_STYLE=OFF - -DUMF_DEVELOPER_MODE=OFF - -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON - -DUMF_BUILD_CUDA_PROVIDER=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON - -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - - - name: Build UMF on Linux - if: matrix.os == 'ubuntu-latest' - run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) - - - name: Build UMF on Windows - if: matrix.os == 'windows-latest' - run: cmake --build ${{env.BUILD_DIR}} --config Release -j $Env:NUMBER_OF_PROCESSORS - - - name: Run benchmarks - working-directory: ${{env.BUILD_DIR}} - run: ctest -V --test-dir benchmark -C Release + - name: Add comment to PR + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + if: ${{ always() && inputs.pr_no != 0 }} + with: + script: | + const pr_no = '${{ inputs.pr_no }}'; + const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; + const params = '${{ inputs.bench_script_params }}'; + const body = `Compute Benchmarks run (with params: ${params}):\n${url}`; + + github.rest.issues.createComment({ + issue_number: pr_no, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }) + + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + + # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged. + - name: Fetch PR's merge commit + if: ${{ inputs.pr_no != 0 }} + env: + PR_NO: ${{ inputs.pr_no }} + run: | + git fetch -- https://github.com/${{github.repository}} +refs/pull/${PR_NO}/*:refs/remotes/origin/pr/${PR_NO}/* + git checkout origin/pr/${PR_NO}/merge + git rev-parse origin/pr/${PR_NO}/merge + + - name: Install apt packages + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt-get update + sudo apt-get install -y cmake libhwloc-dev libnuma-dev libtbb-dev + + # - name: Initialize vcpkg + # if: matrix.os == 'windows-latest' + # uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 + # with: + # vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + # vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg + # vcpkgJsonGlob: '**/vcpkg.json' + + # - name: Install vcpkg packages + # if: matrix.os == 'windows-latest' + # run: vcpkg install + # shell: pwsh # Specifies PowerShell as the shell for running the script. + + # -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" + - name: Configure build + run: > + cmake + -B ${{env.BUILD_DIR}} + ${{matrix.extra_build_option}} + -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" + -DUMF_BUILD_SHARED_LIBRARY=ON + -DUMF_BUILD_BENCHMARKS=ON + -DUMF_BUILD_BENCHMARKS_MT=ON + -DUMF_BUILD_TESTS=OFF + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=OFF + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + + - name: Build UMF on Linux + if: matrix.os == 'ubuntu-latest' + run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) + + # - name: Build UMF on Windows + # if: matrix.os == 'windows-latest' + # run: cmake --build ${{env.BUILD_DIR}} --config Release -j $Env:NUMBER_OF_PROCESSORS + + - name: Checkout UR + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: oneapi-src/unified-runtime + path: ur-repo + fetch-depth: 1 + fetch-tags: false + + - name: Install pip packages + run: | + pip install --force-reinstall -r ${{github.workspace}}/ur-repo/third_party/benchmark_requirements.txt + + - name: Install HWLOC + if: matrix.os == 'ubuntu-latest' + run: | + sudo apt-get update + sudo apt-get install libhwloc-dev + + - name: Run benchmarks + id: benchmarks + if: matrix.os == 'ubuntu-latest' + working-directory: ${{env.BUILD_DIR}} + run: > + ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py + ~/bench_workdir + --umf ${{env.BUILD_DIR}} + ${{ inputs.upload_report && '--output-html' || '' }} + ${{ inputs.bench_script_params }} + + - name: Test output + run: > + echo 'out: ${{ steps.benchmarks.outcome }}' + + # - name: Run benchmarks + # if: matrix.os == 'windows-latest' + # working-directory: ${{env.BUILD_DIR}} + # run: > + # python3 ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py + # ~/bench_workdir + # --umf ${{env.BUILD_DIR}} + # ${{ inputs.upload_report && '--output-html' || '' }} + # ${{ inputs.bench_script_params }} + + - name: Add comment to PR + uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 + if: ${{ always() && inputs.pr_no != 0 }} + with: + script: | + let markdown = "" + try { + const fs = require('fs'); + markdown = fs.readFileSync('${{env.BUILD_DIR}}/benchmark_results.md', 'utf8'); + } catch(err) { + } + + const pr_no = '${{ inputs.pr_no }}'; + const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; + const test_status = '${{ steps.benchmarks.outcome }}'; + const job_status = '${{ job.status }}'; + const params = '${{ inputs.bench_script_params }}'; + const body = `Compute Benchmarks run (${params}):\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`; + + github.rest.issues.createComment({ + issue_number: pr_no, + owner: context.repo.owner, + repo: context.repo.repo, + body: body + }) + + - name: Upload HTML report + if: ${{ always() && inputs.upload_report }} + uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: ${{env.BUILD_DIR}}/benchmark_results.html + key: benchmark-results-${{ github.run_id }} diff --git a/.github/workflows/reusable_docs_build.yml b/.github/workflows/reusable_docs_build.yml index e90ca87ae..013f83e3a 100644 --- a/.github/workflows/reusable_docs_build.yml +++ b/.github/workflows/reusable_docs_build.yml @@ -45,6 +45,20 @@ jobs: -DUMF_DISABLE_HWLOC=ON cmake --build build --target docs + - name: Download benchmark HTML + if: ${{ inputs.upload == true }} + id: download-bench-html + uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + with: + path: ${{github.workspace}}/build/benchmark_results.html + key: benchmark-results- + + - name: Move benchmark HTML + if: ${{ inputs.upload == true && steps.download-bench-html.outputs.cache-hit != '' }} + # exact or partial cache hit + run: | + mv ${{ github.workspace }}/build/benchmark_results.html ${{ github.workspace }}/build/docs_build/generated/html + - name: Upload artifact if: ${{ inputs.upload == true }} uses: actions/upload-pages-artifact@0252fc4ba7626f0298f0cf00902a25c6afc77fa8 # v3.0.0 From 6cdc9bbfeda31c9ff61cd1ceed164bd0167b4f0e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 27 Jan 2025 21:56:46 +0000 Subject: [PATCH 107/466] Bump sphinxcontrib-spelling Bumps the pip-dependencies group in /third_party with 1 update: [sphinxcontrib-spelling](https://github.com/sphinx-contrib/spelling). Updates `sphinxcontrib-spelling` from 8.0.0 to 8.0.1 - [Release notes](https://github.com/sphinx-contrib/spelling/releases) - [Commits](https://github.com/sphinx-contrib/spelling/compare/8.0.0...8.0.1) --- updated-dependencies: - dependency-name: sphinxcontrib-spelling dependency-type: direct:production update-type: version-update:semver-patch dependency-group: pip-dependencies ... Signed-off-by: dependabot[bot] --- third_party/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 1255dcb92..4b8244b3a 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -17,4 +17,4 @@ sphinx==8.1.3 sphinx_book_theme==1.1.3 # Spelling check in documentation pyenchant==3.2.2 -sphinxcontrib-spelling==8.0.0 +sphinxcontrib-spelling==8.0.1 From a4ef27ff4bfa46fb83c27bb35f94ca0eae47a03f Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Thu, 2 Jan 2025 17:47:32 +0000 Subject: [PATCH 108/466] [L0 provider] implement min/recommended page size query and allow passing device ordinal through params --- include/umf/providers/provider_level_zero.h | 10 +- src/libumf.def | 1 + src/libumf.map | 1 + src/provider/provider_level_zero.c | 289 +++++++++++------- src/utils/utils_level_zero.cpp | 28 +- src/utils/utils_level_zero.h | 4 +- test/providers/provider_level_zero.cpp | 45 +++ .../provider_level_zero_not_impl.cpp | 5 +- 8 files changed, 270 insertions(+), 113 deletions(-) diff --git a/include/umf/providers/provider_level_zero.h b/include/umf/providers/provider_level_zero.h index df6dd7364..b20fb40d5 100644 --- a/include/umf/providers/provider_level_zero.h +++ b/include/umf/providers/provider_level_zero.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -83,6 +83,14 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetFreePolicy( umf_level_zero_memory_provider_params_handle_t hParams, umf_level_zero_memory_provider_free_policy_t policy); +/// @brief Set the device ordinal in the parameters struct. +/// @param hParams handle to the parameters of the Level Zero Memory Provider. +/// @param deviceOrdinal device ordinal. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfLevelZeroMemoryProviderParamsSetDeviceOrdinal( + umf_level_zero_memory_provider_params_handle_t hParams, + uint32_t deviceOrdinal); + umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void); #ifdef __cplusplus diff --git a/src/libumf.def b/src/libumf.def index 090b3a86f..f93553e90 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -122,3 +122,4 @@ EXPORTS umfFixedMemoryProviderParamsCreate umfFixedMemoryProviderParamsDestroy umfLevelZeroMemoryProviderParamsSetFreePolicy + umfLevelZeroMemoryProviderParamsSetDeviceOrdinal diff --git a/src/libumf.map b/src/libumf.map index c33bb7c10..7a7ac5ad3 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -120,4 +120,5 @@ UMF_0.11 { umfFixedMemoryProviderParamsCreate; umfFixedMemoryProviderParamsDestroy; umfLevelZeroMemoryProviderParamsSetFreePolicy; + umfLevelZeroMemoryProviderParamsSetDeviceOrdinal; } UMF_0.10; diff --git a/src/provider/provider_level_zero.c b/src/provider/provider_level_zero.c index eaea8abd9..7794d4575 100644 --- a/src/provider/provider_level_zero.c +++ b/src/provider/provider_level_zero.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -83,6 +83,14 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetFreePolicy( return UMF_RESULT_ERROR_NOT_SUPPORTED; } +umf_result_t umfLevelZeroMemoryProviderParamsSetDeviceOrdinal( + umf_level_zero_memory_provider_params_handle_t hParams, + uint32_t deviceOrdinal) { + (void)hParams; + (void)deviceOrdinal; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void) { // not supported LOG_ERR("L0 memory provider is disabled! (UMF_BUILD_LEVEL_ZERO_PROVIDER is " @@ -118,6 +126,8 @@ typedef struct umf_level_zero_memory_provider_params_t { umf_level_zero_memory_provider_free_policy_t freePolicy; ///< Memory free policy + + uint32_t device_ordinal; } umf_level_zero_memory_provider_params_t; typedef struct ze_memory_provider_t { @@ -131,6 +141,10 @@ typedef struct ze_memory_provider_t { ze_device_properties_t device_properties; ze_driver_memory_free_policy_ext_flags_t freePolicyFlags; + + size_t min_page_size; + + uint32_t device_ordinal; } ze_memory_provider_t; typedef struct ze_ops_t { @@ -159,6 +173,9 @@ typedef struct ze_ops_t { ze_device_properties_t *); ze_result_t (*zeMemFreeExt)(ze_context_handle_t, ze_memory_free_ext_desc_t *, void *); + ze_result_t (*zeMemGetAllocProperties)(ze_context_handle_t, const void *, + ze_memory_allocation_properties_t *, + ze_device_handle_t *); } ze_ops_t; static ze_ops_t g_ze_ops; @@ -214,13 +231,15 @@ static void init_ze_global_state(void) { utils_get_symbol_addr(0, "zeDeviceGetProperties", lib_name); *(void **)&g_ze_ops.zeMemFreeExt = utils_get_symbol_addr(0, "zeMemFreeExt", lib_name); + *(void **)&g_ze_ops.zeMemGetAllocProperties = + utils_get_symbol_addr(0, "zeMemGetAllocProperties", lib_name); if (!g_ze_ops.zeMemAllocHost || !g_ze_ops.zeMemAllocDevice || !g_ze_ops.zeMemAllocShared || !g_ze_ops.zeMemFree || !g_ze_ops.zeMemGetIpcHandle || !g_ze_ops.zeMemOpenIpcHandle || !g_ze_ops.zeMemCloseIpcHandle || !g_ze_ops.zeContextMakeMemoryResident || - !g_ze_ops.zeDeviceGetProperties) { + !g_ze_ops.zeDeviceGetProperties || !g_ze_ops.zeMemGetAllocProperties) { // g_ze_ops.zeMemPutIpcHandle can be NULL because it was introduced // starting from Level Zero 1.6 LOG_ERR("Required Level Zero symbols not found."); @@ -250,6 +269,7 @@ umf_result_t umfLevelZeroMemoryProviderParamsCreate( params->resident_device_handles = NULL; params->resident_device_count = 0; params->freePolicy = UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFAULT; + params->device_ordinal = 0; *hParams = params; @@ -307,6 +327,18 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetMemoryType( return UMF_RESULT_SUCCESS; } +umf_result_t umfLevelZeroMemoryProviderParamsSetDeviceOrdinal( + umf_level_zero_memory_provider_params_handle_t hParams, + uint32_t deviceOrdinal) { + if (!hParams) { + LOG_ERR("Level zero memory provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + hParams->device_ordinal = deviceOrdinal; + + return UMF_RESULT_SUCCESS; +} + umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( umf_level_zero_memory_provider_params_handle_t hParams, ze_device_handle_t *hDevices, uint32_t deviceCount) { @@ -351,100 +383,6 @@ umfFreePolicyToZePolicy(umf_level_zero_memory_provider_free_policy_t policy) { return 0; } } - -static umf_result_t ze_memory_provider_initialize(void *params, - void **provider) { - if (params == NULL) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_level_zero_memory_provider_params_handle_t ze_params = - (umf_level_zero_memory_provider_params_handle_t)params; - - if (!ze_params->level_zero_context_handle) { - LOG_ERR("Level Zero context handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if ((ze_params->memory_type == UMF_MEMORY_TYPE_HOST) == - (ze_params->level_zero_device_handle != NULL)) { - LOG_ERR("Level Zero device handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - if ((bool)ze_params->resident_device_count && - (ze_params->resident_device_handles == NULL)) { - LOG_ERR("Resident devices handles array is NULL, but device_count is " - "not zero"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - utils_init_once(&ze_is_initialized, init_ze_global_state); - if (Init_ze_global_state_failed) { - LOG_ERR("Loading Level Zero symbols failed"); - return UMF_RESULT_ERROR_UNKNOWN; - } - - ze_memory_provider_t *ze_provider = - umf_ba_global_alloc(sizeof(ze_memory_provider_t)); - if (!ze_provider) { - LOG_ERR("Cannot allocate memory for Level Zero Memory Provider"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - ze_provider->context = ze_params->level_zero_context_handle; - ze_provider->device = ze_params->level_zero_device_handle; - ze_provider->memory_type = (ze_memory_type_t)ze_params->memory_type; - ze_provider->freePolicyFlags = - umfFreePolicyToZePolicy(ze_params->freePolicy); - - memset(&ze_provider->device_properties, 0, - sizeof(ze_provider->device_properties)); - ze_provider->device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; - - if (ze_provider->device) { - umf_result_t ret = ze2umf_result(g_ze_ops.zeDeviceGetProperties( - ze_provider->device, &ze_provider->device_properties)); - - if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("Cannot get device properties"); - umf_ba_global_free(ze_provider); - return ret; - } - } - - if (ze_params->resident_device_count) { - ze_provider->resident_device_handles = umf_ba_global_alloc( - sizeof(ze_device_handle_t) * ze_params->resident_device_count); - if (!ze_provider->resident_device_handles) { - LOG_ERR("Cannot allocate memory for resident devices"); - umf_ba_global_free(ze_provider); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - ze_provider->resident_device_count = ze_params->resident_device_count; - - for (uint32_t i = 0; i < ze_provider->resident_device_count; i++) { - ze_provider->resident_device_handles[i] = - ze_params->resident_device_handles[i]; - } - } else { - ze_provider->resident_device_handles = NULL; - ze_provider->resident_device_count = 0; - } - - *provider = ze_provider; - - return UMF_RESULT_SUCCESS; -} - -static void ze_memory_provider_finalize(void *provider) { - ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)provider; - umf_ba_global_free(ze_provider->resident_device_handles); - - umf_ba_global_free(provider); -} - static bool use_relaxed_allocation(ze_memory_provider_t *ze_provider, size_t size) { assert(ze_provider); @@ -482,8 +420,7 @@ static umf_result_t ze_memory_provider_alloc(void *provider, size_t size, ? &relaxed_device_allocation_desc : NULL, .flags = 0, - .ordinal = 0 // TODO - }; + .ordinal = ze_provider->device_ordinal}; ze_result = g_ze_ops.zeMemAllocDevice(ze_provider->context, &dev_desc, size, alignment, ze_provider->device, resultPtr); @@ -500,8 +437,7 @@ static umf_result_t ze_memory_provider_alloc(void *provider, size_t size, ? &relaxed_device_allocation_desc : NULL, .flags = 0, - .ordinal = 0 // TODO - }; + .ordinal = ze_provider->device_ordinal}; ze_result = g_ze_ops.zeMemAllocShared(ze_provider->context, &dev_desc, &host_desc, size, alignment, ze_provider->device, resultPtr); @@ -553,6 +489,133 @@ static umf_result_t ze_memory_provider_free(void *provider, void *ptr, g_ze_ops.zeMemFreeExt(ze_provider->context, &desc, ptr)); } +static umf_result_t query_min_page_size(ze_memory_provider_t *ze_provider, + size_t *min_page_size) { + assert(min_page_size); + + LOG_DEBUG("Querying minimum page size"); + + void *ptr; + umf_result_t result = ze_memory_provider_alloc(ze_provider, 1, 0, &ptr); + if (result != UMF_RESULT_SUCCESS) { + return result; + } + + ze_memory_allocation_properties_t properties = { + .stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES}; + ze_result_t ze_result = g_ze_ops.zeMemGetAllocProperties( + ze_provider->context, ptr, &properties, NULL); + + *min_page_size = properties.pageSize; + + ze_memory_provider_free(ze_provider, ptr, 1); + + return ze2umf_result(ze_result); +} + +static void ze_memory_provider_finalize(void *provider) { + ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)provider; + umf_ba_global_free(ze_provider->resident_device_handles); + + umf_ba_global_free(provider); +} + +static umf_result_t ze_memory_provider_initialize(void *params, + void **provider) { + if (params == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_level_zero_memory_provider_params_handle_t ze_params = + (umf_level_zero_memory_provider_params_handle_t)params; + + if (!ze_params->level_zero_context_handle) { + LOG_ERR("Level Zero context handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if ((ze_params->memory_type == UMF_MEMORY_TYPE_HOST) == + (ze_params->level_zero_device_handle != NULL)) { + LOG_ERR("Level Zero device handle should be set only for device and " + "shared memory types"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if ((bool)ze_params->resident_device_count && + (ze_params->resident_device_handles == NULL)) { + LOG_ERR("Resident devices handles array is NULL, but device_count is " + "not zero"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + utils_init_once(&ze_is_initialized, init_ze_global_state); + if (Init_ze_global_state_failed) { + LOG_ERR("Loading Level Zero symbols failed"); + return UMF_RESULT_ERROR_UNKNOWN; + } + + ze_memory_provider_t *ze_provider = + umf_ba_global_alloc(sizeof(ze_memory_provider_t)); + if (!ze_provider) { + LOG_ERR("Cannot allocate memory for Level Zero Memory Provider"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + ze_provider->context = ze_params->level_zero_context_handle; + ze_provider->device = ze_params->level_zero_device_handle; + ze_provider->memory_type = (ze_memory_type_t)ze_params->memory_type; + ze_provider->freePolicyFlags = + umfFreePolicyToZePolicy(ze_params->freePolicy); + ze_provider->min_page_size = 0; + ze_provider->device_ordinal = ze_params->device_ordinal; + + memset(&ze_provider->device_properties, 0, + sizeof(ze_provider->device_properties)); + ze_provider->device_properties.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES; + + if (ze_provider->device) { + umf_result_t ret = ze2umf_result(g_ze_ops.zeDeviceGetProperties( + ze_provider->device, &ze_provider->device_properties)); + + if (ret != UMF_RESULT_SUCCESS) { + LOG_ERR("Cannot get device properties"); + umf_ba_global_free(ze_provider); + return ret; + } + } + + if (ze_params->resident_device_count) { + ze_provider->resident_device_handles = umf_ba_global_alloc( + sizeof(ze_device_handle_t) * ze_params->resident_device_count); + if (!ze_provider->resident_device_handles) { + LOG_ERR("Cannot allocate memory for resident devices"); + umf_ba_global_free(ze_provider); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + ze_provider->resident_device_count = ze_params->resident_device_count; + + for (uint32_t i = 0; i < ze_provider->resident_device_count; i++) { + ze_provider->resident_device_handles[i] = + ze_params->resident_device_handles[i]; + } + } else { + ze_provider->resident_device_handles = NULL; + ze_provider->resident_device_count = 0; + } + + umf_result_t result = + query_min_page_size(ze_provider, &ze_provider->min_page_size); + if (result != UMF_RESULT_SUCCESS) { + ze_memory_provider_finalize(provider); + return result; + } + + *provider = ze_provider; + + return UMF_RESULT_SUCCESS; +} + static void ze_memory_provider_get_last_native_error(void *provider, const char **ppMessage, int32_t *pError) { @@ -569,11 +632,23 @@ static void ze_memory_provider_get_last_native_error(void *provider, static umf_result_t ze_memory_provider_get_min_page_size(void *provider, void *ptr, size_t *pageSize) { - (void)provider; - (void)ptr; + ze_memory_provider_t *ze_provider = (ze_memory_provider_t *)provider; + + if (!ptr) { + *pageSize = ze_provider->min_page_size; + return UMF_RESULT_SUCCESS; + } + + ze_memory_allocation_properties_t properties = { + .stype = ZE_STRUCTURE_TYPE_MEMORY_ALLOCATION_PROPERTIES}; + ze_result_t ze_result = g_ze_ops.zeMemGetAllocProperties( + ze_provider->context, ptr, &properties, NULL); + if (ze_result != ZE_RESULT_SUCCESS) { + return ze2umf_result(ze_result); + } + + *pageSize = properties.pageSize; - // TODO - *pageSize = 1024 * 64; return UMF_RESULT_SUCCESS; } @@ -600,12 +675,8 @@ static umf_result_t ze_memory_provider_purge_force(void *provider, void *ptr, static umf_result_t ze_memory_provider_get_recommended_page_size(void *provider, size_t size, size_t *pageSize) { - (void)provider; (void)size; - - // TODO - *pageSize = 1024 * 64; - return UMF_RESULT_SUCCESS; + return ze_memory_provider_get_min_page_size(provider, NULL, pageSize); } static const char *ze_memory_provider_get_name(void *provider) { diff --git a/src/utils/utils_level_zero.cpp b/src/utils/utils_level_zero.cpp index 833047dd7..40f906f43 100644 --- a/src/utils/utils_level_zero.cpp +++ b/src/utils/utils_level_zero.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -60,6 +60,9 @@ struct libze_ops { const ze_device_mem_alloc_desc_t *, size_t, size_t, ze_device_handle_t, void **); ze_result_t (*zeMemFree)(ze_context_handle_t, void *); + ze_result_t (*zeDeviceGetMemoryProperties)( + ze_device_handle_t hDevice, uint32_t *pCount, + ze_device_memory_properties_t *pMemProperties); } libze_ops; #if USE_DLOPEN @@ -125,6 +128,9 @@ struct DlHandleCloser { libze_ops.zeMemFree = [](auto... args) { return noop_stub(args...); }; + libze_ops.zeDeviceGetMemoryProperties = [](auto... args) { + return noop_stub(args...); + }; utils_close_library(dlHandle); } } @@ -265,6 +271,13 @@ int InitLevelZeroOps() { fprintf(stderr, "zeMemFree symbol not found in %s\n", lib_name); return -1; } + *(void **)&libze_ops.zeDeviceGetMemoryProperties = utils_get_symbol_addr( + zeDlHandle.get(), "zeDeviceGetMemoryProperties", lib_name); + if (libze_ops.zeDeviceGetMemoryProperties == nullptr) { + fprintf(stderr, "zeDeviceGetMemoryProperties symbol not found in %s\n", + lib_name); + return -1; + } return 0; } @@ -292,6 +305,7 @@ int InitLevelZeroOps() { libze_ops.zeMemGetAllocProperties = zeMemGetAllocProperties; libze_ops.zeMemAllocDevice = zeMemAllocDevice; libze_ops.zeMemFree = zeMemFree; + libze_ops.zeDeviceGetMemoryProperties = zeDeviceGetMemoryProperties; return 0; } @@ -745,3 +759,15 @@ ze_memory_type_t utils_ze_get_mem_type(ze_context_handle_t context, void *ptr) { libze_ops.zeMemGetAllocProperties(context, ptr, &alloc_props, &device); return alloc_props.type; } + +int64_t utils_ze_get_num_memory_properties(ze_device_handle_t device) { + uint32_t pCount = 0; + ze_result_t ze_result = + libze_ops.zeDeviceGetMemoryProperties(device, &pCount, nullptr); + if (ze_result != ZE_RESULT_SUCCESS) { + fprintf(stderr, "zeDeviceGetMemoryProperties() failed!\n"); + return -1; + } + + return static_cast(pCount); +} diff --git a/src/utils/utils_level_zero.h b/src/utils/utils_level_zero.h index b29a4dc43..d0f3fe154 100644 --- a/src/utils/utils_level_zero.h +++ b/src/utils/utils_level_zero.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -45,6 +45,8 @@ int utils_ze_destroy_context(ze_context_handle_t context); ze_memory_type_t utils_ze_get_mem_type(ze_context_handle_t context, void *ptr); +int64_t utils_ze_get_num_memory_properties(ze_device_handle_t device); + #ifdef __cplusplus } #endif diff --git a/test/providers/provider_level_zero.cpp b/test/providers/provider_level_zero.cpp index cdf620ace..af90aa72e 100644 --- a/test/providers/provider_level_zero.cpp +++ b/test/providers/provider_level_zero.cpp @@ -347,6 +347,18 @@ TEST_P(umfLevelZeroProviderTest, getPageSize) { ASSERT_GE(recommendedPageSize, minPageSize); + void *ptr; + umf_result = umfMemoryProviderAlloc(provider, 1, 0, &ptr); + + size_t actualPageSize = 0; + umf_result = + umfMemoryProviderGetMinPageSize(provider, ptr, &actualPageSize); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_GE(actualPageSize, minPageSize); + + umf_result = umfMemoryProviderFree(provider, ptr, 1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + umfMemoryProviderDestroy(provider); } @@ -421,6 +433,39 @@ TEST_P(umfLevelZeroProviderTest, levelZeroProviderNullParams) { res = umfLevelZeroMemoryProviderParamsSetMemoryType(nullptr, UMF_MEMORY_TYPE_DEVICE); EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); + + res = umfLevelZeroMemoryProviderParamsSetDeviceOrdinal(nullptr, 0); + EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); +} + +TEST_P(umfLevelZeroProviderTest, setDeviceOrdinalValid) { + int64_t numProps = + utils_ze_get_num_memory_properties(l0TestHelper.get_test_device()); + ASSERT_GE(numProps, 0); + + for (uint32_t ordinal = 0; ordinal < static_cast(numProps); + ordinal++) { + umf_memory_provider_handle_t provider = nullptr; + umf_result_t res = + umfLevelZeroMemoryProviderParamsSetDeviceOrdinal(params, ordinal); + EXPECT_EQ(res, UMF_RESULT_SUCCESS); + + res = umfMemoryProviderCreate(umfLevelZeroMemoryProviderOps(), params, + &provider); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + size_t size = 1024; + void *ptr = nullptr; + res = umfMemoryProviderAlloc(provider, size, 0, &ptr); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + res = umfMemoryProviderFree(provider, ptr, size); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(provider); + } } // TODO add tests that mixes Level Zero Memory Provider and Disjoint Pool diff --git a/test/providers/provider_level_zero_not_impl.cpp b/test/providers/provider_level_zero_not_impl.cpp index c55c236fe..4948bd66f 100644 --- a/test/providers/provider_level_zero_not_impl.cpp +++ b/test/providers/provider_level_zero_not_impl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -35,6 +35,9 @@ TEST_F(test, level_zero_provider_not_implemented) { hParams, UMF_LEVEL_ZERO_MEMORY_PROVIDER_FREE_POLICY_DEFAULT); ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + result = umfLevelZeroMemoryProviderParamsSetDeviceOrdinal(hParams, 0); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); ASSERT_EQ(ops, nullptr); } From 5e1f9d809783818b630f62916a5cf3cc05072fe4 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Tue, 28 Jan 2025 22:16:51 +0000 Subject: [PATCH 109/466] [L0 provider] change "Level zero" to "Level Zero" in logs. --- src/provider/provider_level_zero.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/provider/provider_level_zero.c b/src/provider/provider_level_zero.c index 7794d4575..8c8beda31 100644 --- a/src/provider/provider_level_zero.c +++ b/src/provider/provider_level_zero.c @@ -251,7 +251,7 @@ umf_result_t umfLevelZeroMemoryProviderParamsCreate( umf_level_zero_memory_provider_params_handle_t *hParams) { libumfInit(); if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -287,12 +287,12 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetContext( umf_level_zero_memory_provider_params_handle_t hParams, ze_context_handle_t hContext) { if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } if (!hContext) { - LOG_ERR("Level zero context handle is NULL"); + LOG_ERR("Level Zero context handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -305,7 +305,7 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetDevice( umf_level_zero_memory_provider_params_handle_t hParams, ze_device_handle_t hDevice) { if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -318,7 +318,7 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetMemoryType( umf_level_zero_memory_provider_params_handle_t hParams, umf_usm_memory_type_t memoryType) { if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -331,7 +331,7 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetDeviceOrdinal( umf_level_zero_memory_provider_params_handle_t hParams, uint32_t deviceOrdinal) { if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } hParams->device_ordinal = deviceOrdinal; @@ -343,7 +343,7 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetResidentDevices( umf_level_zero_memory_provider_params_handle_t hParams, ze_device_handle_t *hDevices, uint32_t deviceCount) { if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } @@ -362,7 +362,7 @@ umf_result_t umfLevelZeroMemoryProviderParamsSetFreePolicy( umf_level_zero_memory_provider_params_handle_t hParams, umf_level_zero_memory_provider_free_policy_t policy) { if (!hParams) { - LOG_ERR("Level zero memory provider params handle is NULL"); + LOG_ERR("Level Zero memory provider params handle is NULL"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } From ccb2156db85bf5adc18b01c60a82d7886ea2cb4b Mon Sep 17 00:00:00 2001 From: Agata Momot Date: Mon, 27 Jan 2025 14:38:30 +0100 Subject: [PATCH 110/466] add workflow for running UMF benchmarks on Ubuntu download scripts for data visualisation from UR repository, run UMF benchmarks, upload the results to GitHub pages --- .github/workflows/benchmarks.yml | 31 ++++++ .github/workflows/nightly.yml | 4 +- .github/workflows/performance.yml | 115 ---------------------- .github/workflows/pr_push.yml | 10 +- .github/workflows/reusable_benchmarks.yml | 81 +++++---------- .github/workflows/reusable_docs_build.yml | 3 +- 6 files changed, 61 insertions(+), 183 deletions(-) create mode 100644 .github/workflows/benchmarks.yml delete mode 100644 .github/workflows/performance.yml diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 000000000..7eb3c7b06 --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,31 @@ +name: Compute Benchmarks + +on: + workflow_dispatch: + inputs: + pr_no: + description: PR number (if 0, it'll run on the main) + type: number + bench_script_params: + description: Parameters passed to script executing benchmark + type: string + required: false + default: '' + upload_report: + description: 'Upload HTML report' + type: boolean + required: false + default: false + +permissions: + contents: read + pull-requests: write + +jobs: + manual: + name: Compute Benchmarks + uses: ./.github/workflows/reusable_benchmarks.yml + with: + pr_no: ${{ inputs.pr_no }} + bench_script_params: ${{ inputs.bench_script_params }} + upload_report: ${{ inputs.upload_report }} diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 7a4cd704b..28149c3a1 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -9,7 +9,6 @@ on: permissions: contents: read - pull-requests: write jobs: fuzz-test: @@ -198,6 +197,9 @@ jobs: Benchmarks: uses: ./.github/workflows/reusable_benchmarks.yml + permissions: + contents: read + pull-requests: write with: pr_no: '0' bench_script_params: '' diff --git a/.github/workflows/performance.yml b/.github/workflows/performance.yml deleted file mode 100644 index 6057df5f0..000000000 --- a/.github/workflows/performance.yml +++ /dev/null @@ -1,115 +0,0 @@ -name: Performance - -on: - # Can be triggered via manual "dispatch" (from workflow view in GitHub Actions tab) - workflow_dispatch: - inputs: - pr_no: - description: PR number (if 0, it'll run on the main) - type: number - required: true - -permissions: - contents: read - pull-requests: write - -env: - BUILD_DIR : "${{github.workspace}}/build" - -jobs: - perf-l0: - name: Build UMF and run performance tests - runs-on: "L0_PERF" - - steps: - # Workspace on self-hosted runners is not cleaned automatically. - # We have to delete the files created outside of using actions. - - name: Cleanup self-hosted workspace - if: always() - run: | - ls -la ./ - rm -rf ./* || true - - - name: Add comment to PR - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - if: ${{ always() && inputs.pr_no != 0 }} - with: - script: | - const pr_no = '${{ inputs.pr_no }}'; - const provider = 'LEVEL_ZERO'; - const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; - const body = `Performance workflow for ${provider}_PROVIDER run:\n${url}`; - - github.rest.issues.createComment({ - issue_number: pr_no, - owner: context.repo.owner, - repo: context.repo.repo, - body: body - }) - - - name: Checkout UMF - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - - - name: Get information about platform - run: .github/scripts/get_system_info.sh - - # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged. - - name: Fetch PR's merge commit - if: ${{ inputs.pr_no != 0 }} - working-directory: ${{github.workspace}} - env: - PR_NO: ${{ inputs.pr_no }} - run: | - git fetch -- https://github.com/${{github.repository}} +refs/pull/${PR_NO}/*:refs/remotes/origin/pr/${PR_NO}/* - git checkout origin/pr/${PR_NO}/merge - git rev-parse origin/pr/${PR_NO}/merge - - - name: Configure build - run: > - cmake - -B ${{env.BUILD_DIR}} - -DCMAKE_BUILD_TYPE=Release - -DUMF_BUILD_SHARED_LIBRARY=ON - -DUMF_BUILD_BENCHMARKS=ON - -DUMF_BUILD_BENCHMARKS_MT=ON - -DUMF_BUILD_TESTS=OFF - -DUMF_FORMAT_CODE_STYLE=OFF - -DUMF_DEVELOPER_MODE=OFF - -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON - -DUMF_BUILD_CUDA_PROVIDER=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON - -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - - - name: Build - run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) - - - name: Run benchmarks - working-directory: ${{env.BUILD_DIR}} - id: benchmarks - run: numactl -N 1 ctest -V --test-dir benchmark -C Release - - - name: Add comment to PR - uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 - if: ${{ always() && inputs.pr_no != 0 }} - with: - script: | - let markdown = "" - try { - const fs = require('fs'); - markdown = fs.readFileSync('umf_perf_results.md', 'utf8'); - } catch(err) { - } - - const pr_no = '${{ inputs.pr_no }}'; - const provider = 'LEVEL_ZERO'; - const url = '${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}'; - const test_status = '${{ steps.benchmarks.outcome }}'; - const job_status = '${{ job.status }}'; - const body = `Performance workflow for ${provider}_PROVIDER run:\n${url}\nJob status: ${job_status}. Test status: ${test_status}.\n ${markdown}`; - - github.rest.issues.createComment({ - issue_number: pr_no, - owner: context.repo.owner, - repo: context.repo.repo, - body: body - }) diff --git a/.github/workflows/pr_push.yml b/.github/workflows/pr_push.yml index 88f415742..cfc4a04b9 100644 --- a/.github/workflows/pr_push.yml +++ b/.github/workflows/pr_push.yml @@ -14,8 +14,7 @@ concurrency: permissions: contents: read - pull-requests: write - + jobs: CodeChecks: uses: ./.github/workflows/reusable_checks.yml @@ -55,13 +54,6 @@ jobs: uses: ./.github/workflows/reusable_qemu.yml with: short_run: true - Benchmarks: - needs: [Build] - uses: ./.github/workflows/reusable_benchmarks.yml - with: - pr_no: '0' - bench_script_params: '' - upload_report: true ProxyLib: needs: [Build] uses: ./.github/workflows/reusable_proxy_lib.yml diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index 92e5c1eb3..028b974ef 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -1,11 +1,12 @@ # Executes benchmarks implemented in this repository +# using scripts for benchmark results visualisation, +# which are downloaded from Unified Runtime repository. name: Benchmarks on: workflow_call: inputs: pr_no: - required: true # even though this is a number, this is a workaround for issues with # reusable workflow calls that result in "Unexpected value '0'" error. type: string @@ -24,24 +25,26 @@ permissions: pull-requests: write env: - BUILD_DIR : "${{github.workspace}}/build" - INSTL_DIR : "${{github.workspace}}/../install-dir" + UMF_DIR: "${{github.workspace}}/umf-repo" + BUILD_DIR : "${{github.workspace}}/umf-repo/build" jobs: benchmarks: name: Benchmarks - # env: - # VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" strategy: matrix: - os: ['ubuntu-latest'] #, 'windows-latest'] - include: - # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command to determine the build type - - os: ubuntu-latest - extra_build_option: '-DCMAKE_BUILD_TYPE=Release' + os: ['ubuntu-latest'] runs-on: ${{matrix.os}} steps: + # Workspace on self-hosted runners is not cleaned automatically. + # We have to delete the files created outside of using actions. + - name: Cleanup self-hosted workspace + if: always() + run: | + ls -la ./ + rm -rf ./* || true + - name: Add comment to PR uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 if: ${{ always() && inputs.pr_no != 0 }} @@ -59,14 +62,16 @@ jobs: body: body }) - - name: Checkout + - name: Checkout UMF uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: + path: ${{env.UMF_DIR}} fetch-depth: 0 # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged. - name: Fetch PR's merge commit if: ${{ inputs.pr_no != 0 }} + working-directory: ${{env.UMF_DIR}} env: PR_NO: ${{ inputs.pr_no }} run: | @@ -75,31 +80,16 @@ jobs: git rev-parse origin/pr/${PR_NO}/merge - name: Install apt packages - if: matrix.os == 'ubuntu-latest' run: | sudo apt-get update sudo apt-get install -y cmake libhwloc-dev libnuma-dev libtbb-dev - # - name: Initialize vcpkg - # if: matrix.os == 'windows-latest' - # uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 - # with: - # vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 - # vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg - # vcpkgJsonGlob: '**/vcpkg.json' - - # - name: Install vcpkg packages - # if: matrix.os == 'windows-latest' - # run: vcpkg install - # shell: pwsh # Specifies PowerShell as the shell for running the script. - - # -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" - name: Configure build run: > cmake + -S ${{env.UMF_DIR}} -B ${{env.BUILD_DIR}} - ${{matrix.extra_build_option}} - -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" + -DCMAKE_BUILD_TYPE=Release -DUMF_BUILD_SHARED_LIBRARY=ON -DUMF_BUILD_BENCHMARKS=ON -DUMF_BUILD_BENCHMARKS_MT=ON @@ -110,15 +100,13 @@ jobs: -DUMF_BUILD_CUDA_PROVIDER=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_EXAMPLES=OFF - - name: Build UMF on Linux - if: matrix.os == 'ubuntu-latest' + - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) - # - name: Build UMF on Windows - # if: matrix.os == 'windows-latest' - # run: cmake --build ${{env.BUILD_DIR}} --config Release -j $Env:NUMBER_OF_PROCESSORS - + # We are going to clone Unified Runtime repository in order to run + # the most up-to-date UR scripts for benchmark data visualisation - name: Checkout UR uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: @@ -127,19 +115,12 @@ jobs: fetch-depth: 1 fetch-tags: false - - name: Install pip packages + - name: Install pip packages for benchmarking scripts from UR run: | pip install --force-reinstall -r ${{github.workspace}}/ur-repo/third_party/benchmark_requirements.txt - - name: Install HWLOC - if: matrix.os == 'ubuntu-latest' - run: | - sudo apt-get update - sudo apt-get install libhwloc-dev - - - name: Run benchmarks + - name: Run dedicated for UMF benchmarking scripts from UR id: benchmarks - if: matrix.os == 'ubuntu-latest' working-directory: ${{env.BUILD_DIR}} run: > ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py @@ -148,20 +129,6 @@ jobs: ${{ inputs.upload_report && '--output-html' || '' }} ${{ inputs.bench_script_params }} - - name: Test output - run: > - echo 'out: ${{ steps.benchmarks.outcome }}' - - # - name: Run benchmarks - # if: matrix.os == 'windows-latest' - # working-directory: ${{env.BUILD_DIR}} - # run: > - # python3 ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py - # ~/bench_workdir - # --umf ${{env.BUILD_DIR}} - # ${{ inputs.upload_report && '--output-html' || '' }} - # ${{ inputs.bench_script_params }} - - name: Add comment to PR uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 if: ${{ always() && inputs.pr_no != 0 }} diff --git a/.github/workflows/reusable_docs_build.yml b/.github/workflows/reusable_docs_build.yml index 013f83e3a..c27045c5b 100644 --- a/.github/workflows/reusable_docs_build.yml +++ b/.github/workflows/reusable_docs_build.yml @@ -45,7 +45,8 @@ jobs: -DUMF_DISABLE_HWLOC=ON cmake --build build --target docs - - name: Download benchmark HTML + - name: Download benchmark HTML before uploading with documentation on GitHub pages + # If the benchmark results are meant to be uploaded on GH pages if: ${{ inputs.upload == true }} id: download-bench-html uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 From a50be2c8cd5d17346d38be7ef214fad67fc47b90 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 31 Jan 2025 13:09:23 +0100 Subject: [PATCH 111/466] Add LIBHWLOC_INCLUDE_DIRS to build_umf_test() Add LIBHWLOC_INCLUDE_DIRS to build_umf_test() when UMF_LINK_HWLOC_STATICALLY is ON. Ref: #1065 Signed-off-by: Lukasz Dorau --- test/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 918e874c6..87df7d28d 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -59,6 +59,10 @@ function(build_umf_test) set(INC_DIRS ${INC_DIRS} ${LEVEL_ZERO_INCLUDE_DIRS}) endif() + if(UMF_LINK_HWLOC_STATICALLY) + set(INC_DIRS ${INC_DIRS} ${LIBHWLOC_INCLUDE_DIRS}) + endif() + if(UMF_POOL_JEMALLOC_ENABLED) set(CPL_DEFS ${CPL_DEFS} UMF_POOL_JEMALLOC_ENABLED=1) endif() From 647214e885e4ef3302748dc9f75a1e1d3b9faa94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Fri, 31 Jan 2025 15:54:03 +0100 Subject: [PATCH 112/466] Fix cache for bench results dir should most likely match, when cached and restored. Side note, caches are accessible only on the same branch - PR cannot access caches from, e.g., main branch. --- .github/workflows/reusable_benchmarks.yml | 2 +- .github/workflows/reusable_docs_build.yml | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index 028b974ef..8edca90e1 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -159,5 +159,5 @@ jobs: if: ${{ always() && inputs.upload_report }} uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 with: - path: ${{env.BUILD_DIR}}/benchmark_results.html + path: umf-repo/build/benchmark_results.html key: benchmark-results-${{ github.run_id }} diff --git a/.github/workflows/reusable_docs_build.yml b/.github/workflows/reusable_docs_build.yml index c27045c5b..9317478bb 100644 --- a/.github/workflows/reusable_docs_build.yml +++ b/.github/workflows/reusable_docs_build.yml @@ -45,20 +45,19 @@ jobs: -DUMF_DISABLE_HWLOC=ON cmake --build build --target docs - - name: Download benchmark HTML before uploading with documentation on GitHub pages - # If the benchmark results are meant to be uploaded on GH pages + # If we upload HTML docs, we want to include benchmark results as well + - name: Download benchmark HTML before uploading docs if: ${{ inputs.upload == true }} id: download-bench-html uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 with: - path: ${{github.workspace}}/build/benchmark_results.html + path: umf-repo/build/benchmark_results.html key: benchmark-results- - name: Move benchmark HTML if: ${{ inputs.upload == true && steps.download-bench-html.outputs.cache-hit != '' }} - # exact or partial cache hit run: | - mv ${{ github.workspace }}/build/benchmark_results.html ${{ github.workspace }}/build/docs_build/generated/html + mv umf-repo/build/benchmark_results.html ${{github.workspace}}/build/docs_build/generated/html - name: Upload artifact if: ${{ inputs.upload == true }} From 93c00afa2fa4a3675c9a8dbdbdb01b2f7fb42868 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 3 Feb 2025 09:12:28 +0100 Subject: [PATCH 113/466] Fix using LIBHWLOC_INCLUDE_DIRS in tests LIBHWLOC_INCLUDE_DIRS should be used always when hwloc is not disabled. Ref: #1066 Signed-off-by: Lukasz Dorau --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 87df7d28d..d7ac857f7 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -59,7 +59,7 @@ function(build_umf_test) set(INC_DIRS ${INC_DIRS} ${LEVEL_ZERO_INCLUDE_DIRS}) endif() - if(UMF_LINK_HWLOC_STATICALLY) + if(NOT UMF_DISABLE_HWLOC) set(INC_DIRS ${INC_DIRS} ${LIBHWLOC_INCLUDE_DIRS}) endif() From 5b0940a512e64092cb4564ba1330137972444d55 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 3 Feb 2025 11:34:03 +0100 Subject: [PATCH 114/466] Remove doubled CMake messages Signed-off-by: Lukasz Dorau --- CMakeLists.txt | 7 +++++++ cmake/FindJEMALLOC.cmake | 8 +------- cmake/FindLIBHWLOC.cmake | 21 +++++++++++++-------- 3 files changed, 21 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c24fceb73..eac6fdf3a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -232,6 +232,9 @@ if(JEMALLOC_FOUND OR JEMALLOC_LIBRARIES) message(STATUS " JEMALLOC_LIBRARIES = ${JEMALLOC_LIBRARIES}") message(STATUS " JEMALLOC_INCLUDE_DIRS = ${JEMALLOC_INCLUDE_DIRS}") message(STATUS " JEMALLOC_LIBRARY_DIRS = ${JEMALLOC_LIBRARY_DIRS}") + if(WINDOWS) + message(STATUS " JEMALLOC_DLL_DIRS = ${JEMALLOC_DLL_DIRS}") + endif() else() set(UMF_POOL_JEMALLOC_ENABLED FALSE) message( @@ -336,6 +339,10 @@ else() message(STATUS " LIBHWLOC_LIBRARIES = ${LIBHWLOC_LIBRARIES}") message(STATUS " LIBHWLOC_INCLUDE_DIRS = ${LIBHWLOC_INCLUDE_DIRS}") message(STATUS " LIBHWLOC_LIBRARY_DIRS = ${LIBHWLOC_LIBRARY_DIRS}") + message(STATUS " LIBHWLOC_API_VERSION = ${LIBHWLOC_API_VERSION}") + if(WINDOWS) + message(STATUS " LIBHWLOC_DLL_DIRS = ${LIBHWLOC_DLL_DIRS}") + endif() endif() if(hwloc_targ_SOURCE_DIR) diff --git a/cmake/FindJEMALLOC.cmake b/cmake/FindJEMALLOC.cmake index 89d488ecc..2dab1f383 100644 --- a/cmake/FindJEMALLOC.cmake +++ b/cmake/FindJEMALLOC.cmake @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -35,12 +35,6 @@ endif() if(JEMALLOC_LIBRARY) message(STATUS " Found jemalloc using find_library()") - message(STATUS " JEMALLOC_LIBRARIES = ${JEMALLOC_LIBRARIES}") - message(STATUS " JEMALLOC_INCLUDE_DIRS = ${JEMALLOC_INCLUDE_DIRS}") - message(STATUS " JEMALLOC_LIBRARY_DIRS = ${JEMALLOC_LIBRARY_DIRS}") - if(WINDOWS) - message(STATUS " JEMALLOC_DLL_DIRS = ${JEMALLOC_DLL_DIRS}") - endif() else() set(MSG_NOT_FOUND "jemalloc NOT found (set CMAKE_PREFIX_PATH to point the location)") diff --git a/cmake/FindLIBHWLOC.cmake b/cmake/FindLIBHWLOC.cmake index 8d7998f8d..4972f55ce 100644 --- a/cmake/FindLIBHWLOC.cmake +++ b/cmake/FindLIBHWLOC.cmake @@ -1,7 +1,17 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +function(print_hwloc_dirs) + message(STATUS " LIBHWLOC_LIBRARIES = ${LIBHWLOC_LIBRARIES}") + message(STATUS " LIBHWLOC_INCLUDE_DIRS = ${LIBHWLOC_INCLUDE_DIRS}") + message(STATUS " LIBHWLOC_LIBRARY_DIRS = ${LIBHWLOC_LIBRARY_DIRS}") + message(STATUS " LIBHWLOC_API_VERSION = ${LIBHWLOC_API_VERSION}") + if(WINDOWS) + message(STATUS " LIBHWLOC_DLL_DIRS = ${LIBHWLOC_DLL_DIRS}") + endif() +endfunction() + message(STATUS "Checking for module 'libhwloc' using find_library()") find_library(LIBHWLOC_LIBRARY NAMES ${UMF_HWLOC_NAME}) @@ -46,19 +56,14 @@ endif() if(LIBHWLOC_LIBRARY) message(STATUS " Found libhwloc using find_library()") - message(STATUS " LIBHWLOC_LIBRARIES = ${LIBHWLOC_LIBRARIES}") - message(STATUS " LIBHWLOC_INCLUDE_DIRS = ${LIBHWLOC_INCLUDE_DIRS}") - message(STATUS " LIBHWLOC_LIBRARY_DIRS = ${LIBHWLOC_LIBRARY_DIRS}") - message(STATUS " LIBHWLOC_API_VERSION = ${LIBHWLOC_API_VERSION}") - if(WINDOWS) - message(STATUS " LIBHWLOC_DLL_DIRS = ${LIBHWLOC_DLL_DIRS}") - endif() if(LIBHWLOC_FIND_VERSION) if(NOT LIBHWLOC_API_VERSION) + print_hwloc_dirs() message(FATAL_ERROR "Failed to retrieve libhwloc version") elseif(NOT LIBHWLOC_API_VERSION VERSION_GREATER_EQUAL LIBHWLOC_FIND_VERSION) + print_hwloc_dirs() message( FATAL_ERROR " Required version: ${LIBHWLOC_FIND_VERSION}, found ${LIBHWLOC_API_VERSION}" From 8bb3cae0f6e46104f7d4a2fb4e5988b07b98f354 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Tue, 4 Feb 2025 15:14:15 +0100 Subject: [PATCH 115/466] Fix IPC benchmark in ubench --- benchmark/ubench.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmark/ubench.c b/benchmark/ubench.c index dfd28ea1f..3892740e8 100644 --- a/benchmark/ubench.c +++ b/benchmark/ubench.c @@ -30,8 +30,8 @@ #include "utils_common.h" -#if (defined UMF_BUILD_LIBUMF_POOL_DISJOINT && \ - defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_POOL_DISJOINT_ENABLED && \ + defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) #include "utils_level_zero.h" #endif @@ -422,7 +422,7 @@ UBENCH_EX(simple, scalable_pool_with_os_memory_provider) { #endif /* (defined UMF_POOL_SCALABLE_ENABLED) */ #if (defined UMF_POOL_DISJOINT_ENABLED && \ - defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) + defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs, size_t repeats, umf_ipc_handle_t *ipc_handles) { From 819badb3117a92945a6834950f462ba4066b0bf9 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 4 Feb 2025 14:41:03 +0000 Subject: [PATCH 116/466] add clangd files to gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index e177e395e..7d0aa10fd 100644 --- a/.gitignore +++ b/.gitignore @@ -83,3 +83,7 @@ out/ # IDE Files /.vscode /.devcontainer + +# clangd files +/.cache/clangd +compile_commands.json From 307eb9ca21179f19b07784234286adfe43aa60fe Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 4 Feb 2025 11:36:24 +0100 Subject: [PATCH 117/466] Fix building hwloc on Windows with Ninja generator Fixes: #1057 Co-developed-by: Patryk Kaminski Co-developed-by: Lukasz Dorau Signed-off-by: Lukasz Dorau --- CMakeLists.txt | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eac6fdf3a..6ad0cfb01 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -282,10 +282,52 @@ else() SOURCE_SUBDIR contrib/windows-cmake/ FIND_PACKAGE_ARGS) FetchContent_MakeAvailable(hwloc_targ) + message(STATUS "hwloc CMAKE_GENERATOR: ${CMAKE_GENERATOR}") + + if(CMAKE_GENERATOR STREQUAL "Ninja") + add_custom_command( + COMMAND ${CMAKE_COMMAND} + -DCMAKE_INSTALL_PREFIX=${hwloc_targ_BINARY_DIR} -B build + WORKING_DIRECTORY + ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/ + OUTPUT + ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/CMakeCache.txt + ) + add_custom_command( + COMMAND ${CMAKE_COMMAND} --build build + WORKING_DIRECTORY + ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/ + OUTPUT + ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/build/lib/hwloc.lib + DEPENDS + ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/CMakeCache.txt + ) + add_custom_command( + COMMAND ${CMAKE_COMMAND} --build build --target INSTALL + WORKING_DIRECTORY + ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/ + OUTPUT ${hwloc_targ_BINARY_DIR}/lib/hwloc.lib + DEPENDS + ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/build/lib/hwloc.lib + ) + add_custom_target(hwloc_prod + DEPENDS ${hwloc_targ_BINARY_DIR}/lib/hwloc.lib) + target_link_libraries( + hwloc INTERFACE ${hwloc_targ_BINARY_DIR}/lib/hwloc.lib) + add_dependencies(hwloc hwloc_prod) + + set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/lib) + set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/lib/hwloc.lib) + elseif(CMAKE_GENERATOR STREQUAL "NMake Makefiles") + set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/) + set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/hwloc.lib) + else() + set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/$) + set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/$/hwloc.lib) + endif() + set(LIBHWLOC_INCLUDE_DIRS ${hwloc_targ_SOURCE_DIR}/include;${hwloc_targ_BINARY_DIR}/include) - set(LIBHWLOC_LIBRARY_DIRS - ${hwloc_targ_BINARY_DIR}/Release;${hwloc_targ_BINARY_DIR}/Debug) else() include(FetchContent) message( From f72f64979b0c316fc1801c6d929effe972bc6240 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 6 Feb 2025 14:23:57 +0100 Subject: [PATCH 118/466] Add path of umf.dll to DLL_PATH_LIST for tests on Windows Signed-off-by: Lukasz Dorau --- test/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d7ac857f7..b841cceba 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -145,6 +145,11 @@ function(add_umf_test) set_tests_properties(${TEST_NAME} PROPERTIES LABELS "umf") if(WINDOWS) + # add PATH to DLL on Windows + set(DLL_PATH_LIST + "${DLL_PATH_LIST};PATH=path_list_append:${CMAKE_BINARY_DIR}/bin/;PATH=path_list_append:${CMAKE_BINARY_DIR}/bin/$/" + ) + # append PATH to DLLs set_property(TEST ${TEST_NAME} PROPERTY ENVIRONMENT_MODIFICATION "${DLL_PATH_LIST}") From eb0fc90f759319b294ec7b0b776c929044b3fc58 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 4 Feb 2025 11:37:47 +0100 Subject: [PATCH 119/466] Revert "Revert changes for Win static hwloc" This reverts commit ada63d7e744066953923e04c8a0b0cd2f30757e8. Fixes: #1057 Signed-off-by: Lukasz Dorau --- .github/workflows/reusable_basic.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 02f79bad0..22bf0ea50 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -338,7 +338,7 @@ jobs: -B ${{env.BUILD_DIR}} -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" -DUMF_BUILD_SHARED_LIBRARY=ON - -DUMF_BUILD_EXAMPLES=OFF + -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON @@ -381,7 +381,7 @@ jobs: -B ${{env.BUILD_DIR}} -DCMAKE_INSTALL_PREFIX="${{env.INSTL_DIR}}" -DUMF_BUILD_SHARED_LIBRARY=OFF - -DUMF_BUILD_EXAMPLES=OFF + -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON From bfcf8cf60bf01f4bef0bcde587120496680ccabb Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 5 Feb 2025 11:50:47 +0100 Subject: [PATCH 120/466] Add Windows-Ninja-cl job to Nightly CI workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-developed-by: Łukasz Stolarczuk Co-developed-by: Lukasz Dorau Signed-off-by: Lukasz Dorau --- .github/workflows/nightly.yml | 64 +++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 28149c3a1..c24312b87 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -89,6 +89,70 @@ jobs: - name: Run tests under valgrind run: ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{github.workspace}}/build ${{matrix.tool}} + Windows-Ninja-cl: + name: Windows-Ninja-cl + env: + VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" + BUILD_DIR : "${{github.workspace}}/build" + strategy: + matrix: + os: ['windows-2019', 'windows-2022'] + build_type: [Debug, Release] + compiler: [{c: cl, cxx: cl}] + shared_library: ['ON', 'OFF'] + static_hwloc: ['ON', 'OFF'] + + runs-on: ${{matrix.os}} + + steps: + - name: Checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Initialize vcpkg + uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 + with: + vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg + vcpkgJsonGlob: '**/vcpkg.json' + + - name: Install dependencies + run: vcpkg install + + - name: Install Ninja + uses: seanmiddleditch/gha-setup-ninja@96bed6edff20d1dd61ecff9b75cc519d516e6401 # v5 + + - name: Configure MSVC environment + uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 + + - name: Configure build + run: > + cmake + -B ${{env.BUILD_DIR}} + -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} + -G Ninja + -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} + -DUMF_LINK_HWLOC_STATICALLY=${{matrix.static_hwloc}} + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build UMF + shell: cmd + run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j %NUMBER_OF_PROCESSORS% + + - name: Run tests + shell: cmd + working-directory: ${{env.BUILD_DIR}} + run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test + # TODO fix #843 #icx: # name: ICX From cdb73d6a5cc2063c34e7388720c9765e8a90cd2f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 6 Feb 2025 20:18:38 +0100 Subject: [PATCH 121/466] [CI] Add manual dispatch for docs generation This might be useful if we want to push new benchmarks report, e.g. from a PR --- .github/workflows/docs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 165cc1754..0918a3699 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -4,6 +4,7 @@ name: GitHubPages on: push: branches: ["main"] + workflow_dispatch: # Cancel previous in-progress workflow, only the latest run is relevant concurrency: From 4c21ebc105f91665e965140fc76b006fb074f042 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 6 Feb 2025 20:19:52 +0100 Subject: [PATCH 122/466] [CI] Run nigthly at 4am, to avoid conflicts with UR's perf job --- .github/workflows/nightly.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index c24312b87..f90a6e7bb 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -1,11 +1,11 @@ # Various non-standard tests, requiring e.g. longer run name: Nightly -# This job is run at 00:00 UTC every day or on demand. +# This job is run at 04:00 UTC every day or on demand. on: workflow_dispatch: schedule: - - cron: '0 0 * * *' + - cron: '0 4 * * *' permissions: contents: read From 81c0c3c45cd242c386cb83b6abd304bdeafbccda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 6 Feb 2025 20:26:05 +0100 Subject: [PATCH 123/466] [CI] Update get_system_info script to include more info, e.g., for testing CUDA provider; More info is based on what is set in UR workflows. --- .github/scripts/get_system_info.sh | 46 ++++++++++++++++-------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/.github/scripts/get_system_info.sh b/.github/scripts/get_system_info.sh index 595d5e31a..be900e2a7 100755 --- a/.github/scripts/get_system_info.sh +++ b/.github/scripts/get_system_info.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -26,9 +26,9 @@ function system_info { cat /etc/os-release | grep -oP "PRETTY_NAME=\K.*" cat /proc/version - # echo "**********SYCL-LS**********" - # source /opt/intel/oneapi/setvars.sh - # sycl-ls + echo "**********SYCL-LS**********" + source /opt/intel/oneapi/setvars.sh + sycl-ls echo "**********numactl topology**********" numactl -H @@ -36,22 +36,22 @@ function system_info { echo "**********VGA info**********" lspci | grep -i VGA - # echo "**********CUDA Version**********" - # if command -v nvidia-smi &> /dev/null; then - # nvidia-smi - # else - # echo "CUDA not installed" - # fi + echo "**********CUDA Version**********" + if command -v nvidia-smi &> /dev/null; then + nvidia-smi + else + echo "CUDA not installed" + fi echo "**********L0 Version**********" check_L0_version - # echo "**********ROCm Version**********" - # if command -v rocminfo &> /dev/null; then - # rocminfo - # else - # echo "ROCm not installed" - # fi + echo "**********ROCm Version**********" + if command -v rocminfo &> /dev/null; then + rocminfo + else + echo "ROCm not installed" + fi echo "******OpenCL*******" # The driver version of OpenCL Graphics is the compute-runtime version @@ -67,11 +67,15 @@ function system_info { cat /proc/meminfo echo "**********env variables**********" - echo "PATH=${PATH}" - echo "CPATH=${CPATH}" - echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}" - echo "LIBRARY_PATH=${LIBRARY_PATH}" - echo "PKG_CONFIG_PATH=${PKG_CONFIG_PATH}" + echo "PATH=$PATH" + echo + echo "CPATH=$CPATH" + echo + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" + echo + echo "LIBRARY_PATH=$LIBRARY_PATH" + echo + echo "PKG_CONFIG_PATH=$PKG_CONFIG_PATH" echo echo "******build tools versions*******" From 99bccf13c8117dcfafb169f79ebd0652fd2aec52 Mon Sep 17 00:00:00 2001 From: Agata Momot Date: Tue, 4 Feb 2025 19:03:47 +0100 Subject: [PATCH 124/466] add save baseline to nightly --- .github/workflows/nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index c24312b87..d74babae1 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -266,5 +266,5 @@ jobs: pull-requests: write with: pr_no: '0' - bench_script_params: '' + bench_script_params: '--save baseline' upload_report: true From 96fa583e88c64374e2f712149916638c3e219ec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Fri, 7 Feb 2025 13:51:31 +0100 Subject: [PATCH 125/466] Don't set 'no-intel-lib' for icx, it breaks compiler's build Partially reverts changes from: https://github.com/oneapi-src/unified-memory-framework/pull/1030 --- cmake/helpers.cmake | 3 --- 1 file changed, 3 deletions(-) diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index d6f12031d..02aaf5c71 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -378,9 +378,6 @@ function(add_umf_library) elseif(LINUX) target_link_options(${ARG_NAME} PRIVATE "-Wl,--version-script=${ARG_LINUX_MAP_FILE}") - if(CMAKE_C_COMPILER_ID STREQUAL "IntelLLVM") - target_link_options(${ARG_NAME} PRIVATE -no-intel-lib) - endif() endif() endif() From 9ff9452dff66da5bdf478523d4253ba3d0a9f35e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Fri, 7 Feb 2025 14:00:00 +0100 Subject: [PATCH 126/466] [CI] Fix icx build when 'no-intel-lib' is removed --- .github/workflows/reusable_basic.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 22bf0ea50..d4d583bfd 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -179,7 +179,8 @@ jobs: - name: Run tests working-directory: ${{env.BUILD_DIR}} run: | - LD_LIBRARY_PATH=${{env.BUILD_DIR}}/lib/ ctest --output-on-failure # run all tests for better coverage + ${{ matrix.compiler.cxx == 'icpx' && '. /opt/intel/oneapi/setvars.sh' || true }} + LD_LIBRARY_PATH="${{env.BUILD_DIR}}/lib/:${LD_LIBRARY_PATH}" ctest --output-on-failure - name: Check coverage if: ${{ matrix.build_type == 'Debug' && matrix.compiler.c == 'gcc' }} From 3f76e9355f14350400183779e1e2f4f1bbd008d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 6 Feb 2025 20:37:09 +0100 Subject: [PATCH 127/466] [CI] Update benchmark's workflow to use self-hosted runner --- .github/workflows/reusable_benchmarks.yml | 57 ++++++++++++++++------- 1 file changed, 39 insertions(+), 18 deletions(-) diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index 8edca90e1..b33fdb25e 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -1,5 +1,5 @@ # Executes benchmarks implemented in this repository -# using scripts for benchmark results visualisation, +# using scripts for benchmark results visualization, # which are downloaded from Unified Runtime repository. name: Benchmarks @@ -31,10 +31,9 @@ env: jobs: benchmarks: name: Benchmarks - strategy: - matrix: - os: ['ubuntu-latest'] - runs-on: ${{matrix.os}} + # run only on upstream; forks will not have the HW + if: github.repository == 'oneapi-src/unified-memory-framework' + runs-on: L0_PERF steps: # Workspace on self-hosted runners is not cleaned automatically. @@ -79,12 +78,7 @@ jobs: git checkout origin/pr/${PR_NO}/merge git rev-parse origin/pr/${PR_NO}/merge - - name: Install apt packages - run: | - sudo apt-get update - sudo apt-get install -y cmake libhwloc-dev libnuma-dev libtbb-dev - - - name: Configure build + - name: Configure UMF run: > cmake -S ${{env.UMF_DIR}} @@ -94,19 +88,19 @@ jobs: -DUMF_BUILD_BENCHMARKS=ON -DUMF_BUILD_BENCHMARKS_MT=ON -DUMF_BUILD_TESTS=OFF - -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_BUILD_EXAMPLES=OFF -DUMF_DEVELOPER_MODE=OFF + -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_EXAMPLES=OFF - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) # We are going to clone Unified Runtime repository in order to run - # the most up-to-date UR scripts for benchmark data visualisation + # the most up-to-date UR scripts for benchmark data visualization - name: Checkout UR uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: @@ -119,16 +113,38 @@ jobs: run: | pip install --force-reinstall -r ${{github.workspace}}/ur-repo/third_party/benchmark_requirements.txt - - name: Run dedicated for UMF benchmarking scripts from UR + - name: Set core range and GPU mask + run: | + # Compute the core range for the second NUMA node; first node is for UR jobs. + # Skip the first 4 cores - the kernel is likely to schedule more work on these. + CORES=$(lscpu | awk ' + /NUMA node1 CPU|On-line CPU/ {line=$0} + END { + split(line, a, " ") + split(a[4], b, ",") + sub(/^0/, "4", b[1]) + print b[1] + }') + echo "Selected core: $CORES" + echo "CORES=$CORES" >> $GITHUB_ENV + + ZE_AFFINITY_MASK=1 + echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV + + - name: Run UMF benchmarks (using scripts from UR) id: benchmarks working-directory: ${{env.BUILD_DIR}} run: > - ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py - ~/bench_workdir - --umf ${{env.BUILD_DIR}} + taskset -c ${{ env.CORES }} ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py + ~/bench_workdir_umf + --umf ${{env.BUILD_DIR}} ${{ inputs.upload_report && '--output-html' || '' }} ${{ inputs.bench_script_params }} + - name: Print benchmark results + if: ${{ always() }} + run: cat ${{env.BUILD_DIR}}/benchmark_results.md + - name: Add comment to PR uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 if: ${{ always() && inputs.pr_no != 0 }} @@ -161,3 +177,8 @@ jobs: with: path: umf-repo/build/benchmark_results.html key: benchmark-results-${{ github.run_id }} + + - name: Get information about platform + if: ${{ always() }} + working-directory: ${{env.UMF_DIR}} + run: .github/scripts/get_system_info.sh From fdca1b049568d3d37cf1b283fbceda0cf151ac9f Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 10 Feb 2025 10:20:01 +0100 Subject: [PATCH 128/466] Fix: remove CUDA_ERROR_INVALID_RESOURCE_TYPE CUDA_ERROR_INVALID_RESOURCE_TYPE is not defined in CUDA v10.1 that is used in UR. Signed-off-by: Lukasz Dorau --- src/provider/provider_cuda.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index edebb04e6..40bbd840d 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -150,7 +150,6 @@ static umf_result_t cu2umf_result(CUresult result) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; case CUDA_ERROR_INVALID_VALUE: case CUDA_ERROR_INVALID_HANDLE: - case CUDA_ERROR_INVALID_RESOURCE_TYPE: return UMF_RESULT_ERROR_INVALID_ARGUMENT; default: cu_store_last_native_error(result); From 316c220c282bbcef2b42b3f1f39e1d81497e98b5 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 10 Feb 2025 11:02:45 +0100 Subject: [PATCH 129/466] Add messages printing CUDA_INCLUDE_DIRS Signed-off-by: Lukasz Dorau --- CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6ad0cfb01..f8c393609 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -454,11 +454,11 @@ if(UMF_BUILD_CUDA_PROVIDER AND (NOT UMF_CUDA_INCLUDE_DIR)) set(CUDA_INCLUDE_DIRS ${cuda-headers_SOURCE_DIR} CACHE PATH "Path to CUDA headers") - message(STATUS "CUDA include directory: ${CUDA_INCLUDE_DIRS}") + message(STATUS "CUDA_INCLUDE_DIRS = ${CUDA_INCLUDE_DIRS}") elseif(UMF_BUILD_CUDA_PROVIDER) # Only header is needed to build UMF set(CUDA_INCLUDE_DIRS ${UMF_CUDA_INCLUDE_DIR}) - message(STATUS "CUDA include directory: ${CUDA_INCLUDE_DIRS}") + message(STATUS "CUDA_INCLUDE_DIRS = ${CUDA_INCLUDE_DIRS}") endif() # This build type check is not possible on Windows when CMAKE_BUILD_TYPE is not From 5b01c8582ad60dd9d4174ecd5cc218dabc57a4dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Tue, 4 Feb 2025 13:43:18 +0100 Subject: [PATCH 130/466] increase leak pool size in proxy lib This improves search time if ptr belongs to leak pool, during free --- src/proxy_lib/proxy_lib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/proxy_lib/proxy_lib.c b/src/proxy_lib/proxy_lib.c index 15ddfca1b..41ec62134 100644 --- a/src/proxy_lib/proxy_lib.c +++ b/src/proxy_lib/proxy_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -301,7 +301,9 @@ static inline void *ba_generic_realloc(umf_ba_linear_pool_t *pool, void *ptr, /*** The "LEAK" linear base allocator functions ******************************/ /*****************************************************************************/ -static void ba_leak_create(void) { Base_alloc_leak = umf_ba_linear_create(0); } +static void ba_leak_create(void) { + Base_alloc_leak = umf_ba_linear_create(4 * 1024 * 1024); +} // it does not implement destroy(), because we cannot destroy non-freed memory From 68700b51982f04678bc7def33a0d0404cd65a3f9 Mon Sep 17 00:00:00 2001 From: "Vinogradov, Sergei" Date: Mon, 10 Feb 2025 08:56:15 -0800 Subject: [PATCH 131/466] Fix ubench for CUDA provider --- benchmark/CMakeLists.txt | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index efad0baf3..941c685e3 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -109,6 +109,9 @@ function(add_umf_benchmark) if(UMF_BUILD_CUDA_PROVIDER) target_compile_definitions(${BENCH_NAME} PRIVATE UMF_BUILD_CUDA_PROVIDER=1) + target_include_directories( + ${BENCH_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/test/common + ${CUDA_INCLUDE_DIRS}) endif() if(UMF_BUILD_GPU_TESTS) target_compile_definitions(${BENCH_NAME} PRIVATE UMF_BUILD_GPU_TESTS=1) From 82e5f7fba32f2ffcfb77ff1fd6f409edd625ce73 Mon Sep 17 00:00:00 2001 From: "Vinogradov, Sergei" Date: Mon, 10 Feb 2025 09:45:13 -0800 Subject: [PATCH 132/466] Enable UMF_BUILD_BENCHMARKS_MT=ON in GPU CI flows --- .github/workflows/reusable_gpu.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/reusable_gpu.yml b/.github/workflows/reusable_gpu.yml index 913a0f0f1..8dd8bcdb5 100644 --- a/.github/workflows/reusable_gpu.yml +++ b/.github/workflows/reusable_gpu.yml @@ -88,6 +88,7 @@ jobs: -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} -DUMF_BUILD_BENCHMARKS=ON + -DUMF_BUILD_BENCHMARKS_MT=ON -DUMF_BUILD_TESTS=ON -DUMF_BUILD_GPU_TESTS=ON -DUMF_BUILD_GPU_EXAMPLES=ON From 7177d9de7cf11bc33235d78c18b59fb85e591af3 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 11 Feb 2025 10:11:19 +0000 Subject: [PATCH 133/466] disable umf mt bench in GPU workflow --- .github/workflows/reusable_gpu.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reusable_gpu.yml b/.github/workflows/reusable_gpu.yml index 8dd8bcdb5..47f48f6a8 100644 --- a/.github/workflows/reusable_gpu.yml +++ b/.github/workflows/reusable_gpu.yml @@ -115,7 +115,7 @@ jobs: - name: Run benchmarks if: matrix.build_type == 'Release' working-directory: ${{env.BUILD_DIR}} - run: ctest --output-on-failure --test-dir benchmark -C ${{matrix.build_type}} --exclude-regex umf-bench-multithreaded + run: ctest --output-on-failure --test-dir benchmark -C ${{matrix.build_type}} --exclude-regex umf-multithreaded - name: Check coverage if: ${{ matrix.build_type == 'Debug' && matrix.os == 'Ubuntu' }} From fd99542cfa7b9d2168d9d86b14725fd90e92b5b8 Mon Sep 17 00:00:00 2001 From: intel12232289 Date: Tue, 11 Feb 2025 13:16:47 +0100 Subject: [PATCH 134/466] ICX For Windows --- .github/workflows/nightly.yml | 173 +++++++++++++++++----------------- 1 file changed, 86 insertions(+), 87 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 0243b1e00..7a6335ed6 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -153,93 +153,92 @@ jobs: working-directory: ${{env.BUILD_DIR}} run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test - # TODO fix #843 - #icx: - # name: ICX - # env: - # VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" - # BUILD_DIR : "${{github.workspace}}/build" - # strategy: - # matrix: - # os: ['windows-2019', 'windows-2022'] - # build_type: [Debug] - # compiler: [{c: icx, cxx: icx}] - # shared_library: ['ON', 'OFF'] - # include: - # - os: windows-2022 - # build_type: Release - # compiler: {c: icx, cxx: icx} - # shared_library: 'ON' - # - # runs-on: ${{matrix.os}} - # - # steps: - # - name: Checkout - # uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 - # with: - # fetch-depth: 0 - # - # - name: Initialize vcpkg - # uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 - # with: - # vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 - # vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg - # vcpkgJsonGlob: '**/vcpkg.json' - # - # - name: Install dependencies - # run: vcpkg install - # - # - name: Install Ninja - # uses: seanmiddleditch/gha-setup-ninja@96bed6edff20d1dd61ecff9b75cc519d516e6401 # v5 - # - # - name: Download icx compiler - # env: - # # Link source: https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler-download.html - # CMPLR_LINK: "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/15a35578-2f9a-4f39-804b-3906e0a5f8fc/w_dpcpp-cpp-compiler_p_2024.2.1.83_offline.exe" - # run: | - # Invoke-WebRequest -Uri "${{ env.CMPLR_LINK }}" -OutFile compiler_install.exe - # - # - name: Install icx compiler - # shell: cmd - # run: | - # start /b /wait .\compiler_install.exe -s -x -f extracted --log extract.log - # extracted\bootstrapper.exe -s --action install --eula=accept -p=NEED_VS2017_INTEGRATION=0 ^ - # -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=. - # - # - name: Configure build - # shell: cmd - # run: | - # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - # call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" - # cmake ^ - # -B ${{env.BUILD_DIR}} ^ - # -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" ^ - # -DCMAKE_C_COMPILER=${{matrix.compiler.c}} ^ - # -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} ^ - # -G Ninja ^ - # -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} ^ - # -DUMF_FORMAT_CODE_STYLE=OFF ^ - # -DUMF_DEVELOPER_MODE=ON ^ - # -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON ^ - # -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON ^ - # -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON ^ - # -DUMF_BUILD_CUDA_PROVIDER=ON ^ - # -DUMF_TESTS_FAIL_ON_SKIP=ON - # - # - name: Build UMF - # shell: cmd - # run: | - # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - # call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" - # cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j %NUMBER_OF_PROCESSORS% - # - # - name: Run tests - # shell: cmd - # working-directory: ${{env.BUILD_DIR}} - # run: | - # call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" - # call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" - # ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test + icx: + name: ICX + env: + VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" + BUILD_DIR : "${{github.workspace}}/build" + strategy: + matrix: + os: ['windows-2019', 'windows-2022'] + build_type: [Debug] + compiler: [{c: icx, cxx: icx}] + shared_library: ['ON', 'OFF'] + include: + - os: windows-2022 + build_type: Release + compiler: {c: icx, cxx: icx} + shared_library: 'ON' + + runs-on: ${{matrix.os}} + + steps: + - name: Checkout + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + + - name: Initialize vcpkg + uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 + with: + vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg + vcpkgJsonGlob: '**/vcpkg.json' + + - name: Install dependencies + run: vcpkg install + + - name: Install Ninja + uses: seanmiddleditch/gha-setup-ninja@96bed6edff20d1dd61ecff9b75cc519d516e6401 # v5 + + - name: Download icx compiler + env: + # Link source: https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compiler-download.html + CMPLR_LINK: "https://registrationcenter-download.intel.com/akdlm/IRC_NAS/15a35578-2f9a-4f39-804b-3906e0a5f8fc/w_dpcpp-cpp-compiler_p_2024.2.1.83_offline.exe" + run: | + Invoke-WebRequest -Uri "${{ env.CMPLR_LINK }}" -OutFile compiler_install.exe + + - name: Install icx compiler + shell: cmd + run: | + start /b /wait .\compiler_install.exe -s -x -f extracted --log extract.log + extracted\bootstrapper.exe -s --action install --eula=accept -p=NEED_VS2017_INTEGRATION=0 ^ + -p=NEED_VS2019_INTEGRATION=0 -p=NEED_VS2022_INTEGRATION=0 --log-dir=. + + - name: Configure build + shell: cmd + run: | + call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" + cmake ^ + -B ${{env.BUILD_DIR}} ^ + -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" ^ + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} ^ + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} ^ + -G Ninja ^ + -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} ^ + -DUMF_FORMAT_CODE_STYLE=OFF ^ + -DUMF_DEVELOPER_MODE=ON ^ + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON ^ + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON ^ + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON ^ + -DUMF_BUILD_CUDA_PROVIDER=ON ^ + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build UMF + shell: cmd + run: | + call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" + cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j %NUMBER_OF_PROCESSORS% + + - name: Run tests + shell: cmd + working-directory: ${{env.BUILD_DIR}} + run: | + call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" + call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" + ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test L0: uses: ./.github/workflows/reusable_gpu.yml From b42b0c08d9562c2536784564b411fbf5e37fe7ce Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 11 Feb 2025 14:06:00 +0100 Subject: [PATCH 135/466] Fix building the coarse library on Windows The extra sources and libraries are redundant on Windows. They cause the "duplicate symbol" errors. Signed-off-by: Lukasz Dorau --- src/coarse/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coarse/CMakeLists.txt b/src/coarse/CMakeLists.txt index 8806b6b55..c211f9a7b 100644 --- a/src/coarse/CMakeLists.txt +++ b/src/coarse/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -6,7 +6,7 @@ include(${UMF_CMAKE_SOURCE_DIR}/cmake/helpers.cmake) set(COARSE_SOURCES coarse.c ../ravl/ravl.c) -if(UMF_BUILD_SHARED_LIBRARY) +if(UMF_BUILD_SHARED_LIBRARY AND (NOT WINDOWS)) set(COARSE_EXTRA_SRCS ${BA_SOURCES}) set(COARSE_EXTRA_LIBS $) endif() From 64ffe3714c206d666d1c9455b2044b67757a6118 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 11 Feb 2025 14:19:56 +0100 Subject: [PATCH 136/466] Revert "Temporarily disable failing windows-2022 clang CI job" This reverts commit 467be1753b0274169666da6dc15c21ede8b1286d. Fixes: #910 Signed-off-by: Lukasz Dorau --- .github/workflows/reusable_basic.yml | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index d4d583bfd..2c86124f2 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -226,14 +226,13 @@ jobs: level_zero_provider: ['ON'] cuda_provider: ['ON'] include: - # temporarily disable failing CI job - #- os: 'windows-2022' - # build_type: Release - # compiler: {c: clang-cl, cxx: clang-cl} - # shared_library: 'ON' - # level_zero_provider: 'ON' - # cuda_provider: 'ON' - # toolset: "-T ClangCL" + - os: 'windows-2022' + build_type: Release + compiler: {c: clang-cl, cxx: clang-cl} + shared_library: 'ON' + level_zero_provider: 'ON' + cuda_provider: 'ON' + toolset: "-T ClangCL" - os: 'windows-2022' build_type: Release compiler: {c: cl, cxx: cl} From a2a9ba803e33433c1b7a558480d7a95b9a790f38 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 11 Feb 2025 14:23:44 +0100 Subject: [PATCH 137/466] Use windows-2019 instead of windows-2022 in the Clang build Use windows-2019 instead of windows-2022 in the Clang build, because it fails on windows-2022. Signed-off-by: Lukasz Dorau --- .github/workflows/reusable_basic.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 2c86124f2..d23e646dd 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -226,7 +226,8 @@ jobs: level_zero_provider: ['ON'] cuda_provider: ['ON'] include: - - os: 'windows-2022' + - os: 'windows-2019' + # clang build fails on Windows 2022 build_type: Release compiler: {c: clang-cl, cxx: clang-cl} shared_library: 'ON' From a77a37c283dbf31ca18b148fa818b9bc496f6a7a Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 12 Feb 2025 08:58:25 +0100 Subject: [PATCH 138/466] Use LOG_FATAL() in case of critical errors Signed-off-by: Lukasz Dorau --- src/pool/pool_scalable.c | 6 +++--- src/provider/provider_cuda.c | 4 ++-- src/provider/provider_level_zero.c | 4 ++-- src/proxy_lib/proxy_lib.c | 22 ++++++++++++---------- 4 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/pool/pool_scalable.c b/src/pool/pool_scalable.c index 6ee364344..2ee265df8 100644 --- a/src/pool/pool_scalable.c +++ b/src/pool/pool_scalable.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -143,7 +143,7 @@ static int init_tbb_callbacks(tbb_callbacks_t *tbb_callbacks) { !tbb_callbacks->pool_aligned_malloc || !tbb_callbacks->pool_free || !tbb_callbacks->pool_create_v1 || !tbb_callbacks->pool_destroy || !tbb_callbacks->pool_identify) { - LOG_ERR("Could not find symbols in %s", lib_name); + LOG_FATAL("Could not find all TBB symbols in %s", lib_name); utils_close_library(tbb_callbacks->lib_handle); return -1; } @@ -266,7 +266,7 @@ static umf_result_t tbb_pool_initialize(umf_memory_provider_handle_t provider, int ret = init_tbb_callbacks(&pool_data->tbb_callbacks); if (ret != 0) { - LOG_ERR("loading TBB symbols failed"); + LOG_FATAL("loading TBB symbols failed"); return UMF_RESULT_ERROR_UNKNOWN; } diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index 40bbd840d..a9b6e88e9 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -202,7 +202,7 @@ static void init_cu_global_state(void) { !g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxSetCurrent || !g_cu_ops.cuIpcGetMemHandle || !g_cu_ops.cuIpcOpenMemHandle || !g_cu_ops.cuIpcCloseMemHandle) { - LOG_ERR("Required CUDA symbols not found."); + LOG_FATAL("Required CUDA symbols not found."); Init_cu_global_state_failed = true; } } @@ -296,7 +296,7 @@ static umf_result_t cu_memory_provider_initialize(void *params, utils_init_once(&cu_is_initialized, init_cu_global_state); if (Init_cu_global_state_failed) { - LOG_ERR("Loading CUDA symbols failed"); + LOG_FATAL("Loading CUDA symbols failed"); return UMF_RESULT_ERROR_UNKNOWN; } diff --git a/src/provider/provider_level_zero.c b/src/provider/provider_level_zero.c index 8c8beda31..2d6aa074b 100644 --- a/src/provider/provider_level_zero.c +++ b/src/provider/provider_level_zero.c @@ -242,7 +242,7 @@ static void init_ze_global_state(void) { !g_ze_ops.zeDeviceGetProperties || !g_ze_ops.zeMemGetAllocProperties) { // g_ze_ops.zeMemPutIpcHandle can be NULL because it was introduced // starting from Level Zero 1.6 - LOG_ERR("Required Level Zero symbols not found."); + LOG_FATAL("Required Level Zero symbols not found."); Init_ze_global_state_failed = true; } } @@ -550,7 +550,7 @@ static umf_result_t ze_memory_provider_initialize(void *params, utils_init_once(&ze_is_initialized, init_ze_global_state); if (Init_ze_global_state_failed) { - LOG_ERR("Loading Level Zero symbols failed"); + LOG_FATAL("Loading Level Zero symbols failed"); return UMF_RESULT_ERROR_UNKNOWN; } diff --git a/src/proxy_lib/proxy_lib.c b/src/proxy_lib/proxy_lib.c index 15ddfca1b..4571550f8 100644 --- a/src/proxy_lib/proxy_lib.c +++ b/src/proxy_lib/proxy_lib.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -138,7 +138,7 @@ static size_t get_size_threshold(void) { LOG_DEBUG("UMF_PROXY[size.threshold] = %s", str_threshold); long threshold = utils_get_size_threshold(str_threshold); if (threshold < 0) { - LOG_ERR("incorrect size threshold: %s", str_threshold); + LOG_FATAL("incorrect size threshold: %s", str_threshold); exit(-1); } @@ -163,6 +163,8 @@ static int get_system_allocator_symbols(void) { return 0; } + LOG_FATAL("Required system allocator's symbols not found."); + return -1; } #endif /* _WIN32 */ @@ -174,7 +176,7 @@ void proxy_lib_create_common(void) { umf_result = umfOsMemoryProviderParamsCreate(&os_params); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("creating OS memory provider params failed"); + LOG_FATAL("creating OS memory provider params failed"); exit(-1); } @@ -182,7 +184,7 @@ void proxy_lib_create_common(void) { size_t _threshold = get_size_threshold(); if (_threshold > 0) { if (get_system_allocator_symbols()) { - LOG_ERR("initialization of the system allocator failed!"); + LOG_FATAL("initialization of the system allocator failed!"); exit(-1); } @@ -197,12 +199,12 @@ void proxy_lib_create_common(void) { umf_result = umfOsMemoryProviderParamsSetVisibility(os_params, UMF_MEM_MAP_SHARED); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("setting visibility mode failed"); + LOG_FATAL("setting visibility mode failed"); exit(-1); } umf_result = umfOsMemoryProviderParamsSetShmName(os_params, NULL); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("setting shared memory name failed"); + LOG_FATAL("setting shared memory name failed"); exit(-1); } } else if (utils_env_var_has_str("UMF_PROXY", @@ -210,7 +212,7 @@ void proxy_lib_create_common(void) { umf_result = umfOsMemoryProviderParamsSetVisibility(os_params, UMF_MEM_MAP_SHARED); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("setting visibility mode failed"); + LOG_FATAL("setting visibility mode failed"); exit(-1); } @@ -219,7 +221,7 @@ void proxy_lib_create_common(void) { sprintf(shm_name, "umf_proxy_lib_shm_pid_%i", utils_getpid()); umf_result = umfOsMemoryProviderParamsSetShmName(os_params, shm_name); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("setting shared memory name failed"); + LOG_FATAL("setting shared memory name failed"); exit(-1); } @@ -233,14 +235,14 @@ void proxy_lib_create_common(void) { &OS_memory_provider); umfOsMemoryProviderParamsDestroy(os_params); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("creating OS memory provider failed"); + LOG_FATAL("creating OS memory provider failed"); exit(-1); } umf_result = umfPoolCreate(umfPoolManagerOps(), OS_memory_provider, NULL, 0, &Proxy_pool); if (umf_result != UMF_RESULT_SUCCESS) { - LOG_ERR("creating UMF pool manager failed"); + LOG_FATAL("creating UMF pool manager failed"); exit(-1); } From ef65affed89601b41f2f7e0359944070c0fb82d5 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Tue, 4 Feb 2025 15:18:56 +0100 Subject: [PATCH 139/466] Refactor Level Zero and CUDA tests --- src/utils/utils_level_zero.cpp | 15 +-- src/utils/utils_level_zero.h | 1 - test/common/ipc_common.c | 6 +- test/ipcFixtures.hpp | 15 +++ test/providers/cuda_helpers.cpp | 14 +-- test/providers/cuda_helpers.h | 2 + test/providers/ipc_cuda_prov_consumer.c | 10 +- test/providers/ipc_cuda_prov_producer.c | 10 +- test/providers/ipc_level_zero_prov_consumer.c | 10 +- test/providers/ipc_level_zero_prov_producer.c | 10 +- test/providers/provider_cuda.cpp | 107 +++++++---------- test/providers/provider_level_zero.cpp | 112 ++++++++---------- 12 files changed, 148 insertions(+), 164 deletions(-) diff --git a/src/utils/utils_level_zero.cpp b/src/utils/utils_level_zero.cpp index 40f906f43..02e961d49 100644 --- a/src/utils/utils_level_zero.cpp +++ b/src/utils/utils_level_zero.cpp @@ -344,12 +344,6 @@ int utils_ze_get_drivers(uint32_t *drivers_num_, ze_driver_handle_t *drivers = NULL; uint32_t drivers_num = 0; - ret = utils_ze_init_level_zero(); - if (ret != 0) { - fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); - goto init_fail; - } - ze_result = libze_ops.zeDriverGet(&drivers_num, NULL); if (ze_result != ZE_RESULT_SUCCESS) { fprintf(stderr, "zeDriverGet() failed!\n"); @@ -386,7 +380,6 @@ int utils_ze_get_drivers(uint32_t *drivers_num_, *drivers_ = NULL; } -init_fail: return ret; } @@ -397,12 +390,6 @@ int utils_ze_get_devices(ze_driver_handle_t driver, uint32_t *devices_num_, uint32_t devices_num = 0; ze_device_handle_t *devices = NULL; - ret = utils_ze_init_level_zero(); - if (ret != 0) { - fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); - goto init_fail; - } - ze_result = libze_ops.zeDeviceGet(driver, &devices_num, NULL); if (ze_result != ZE_RESULT_SUCCESS) { fprintf(stderr, "zeDeviceGet() failed!\n"); @@ -438,7 +425,7 @@ int utils_ze_get_devices(ze_driver_handle_t driver, uint32_t *devices_num_, free(devices); devices = NULL; } -init_fail: + return ret; } diff --git a/src/utils/utils_level_zero.h b/src/utils/utils_level_zero.h index d0f3fe154..00f55b351 100644 --- a/src/utils/utils_level_zero.h +++ b/src/utils/utils_level_zero.h @@ -16,7 +16,6 @@ extern "C" { #endif -int utils_ze_init_level_zero(void); int utils_ze_init_level_zero(void); int utils_ze_get_drivers(uint32_t *drivers_num_, ze_driver_handle_t **drivers_); diff --git a/test/common/ipc_common.c b/test/common/ipc_common.c index 1590dd3c4..bf116a677 100644 --- a/test/common/ipc_common.c +++ b/test/common/ipc_common.c @@ -127,8 +127,7 @@ int run_consumer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params, umf_result = umfMemoryProviderCreate(provider_ops, provider_params, &provider); if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, - "[consumer] ERROR: creating OS memory provider failed\n"); + fprintf(stderr, "[consumer] ERROR: creating memory provider failed\n"); return -1; } @@ -347,8 +346,7 @@ int run_producer(int port, umf_memory_pool_ops_t *pool_ops, void *pool_params, umf_result = umfMemoryProviderCreate(provider_ops, provider_params, &provider); if (umf_result != UMF_RESULT_SUCCESS) { - fprintf(stderr, - "[producer] ERROR: creating OS memory provider failed\n"); + fprintf(stderr, "[producer] ERROR: creating memory provider failed\n"); return -1; } diff --git a/test/ipcFixtures.hpp b/test/ipcFixtures.hpp index 28369b273..cfe58a166 100644 --- a/test/ipcFixtures.hpp +++ b/test/ipcFixtures.hpp @@ -21,6 +21,7 @@ class MemoryAccessor { public: + virtual ~MemoryAccessor() = default; virtual void fill(void *ptr, size_t size, const void *pattern, size_t pattern_size) = 0; virtual void copy(void *dst_ptr, void *src_ptr, size_t size) = 0; @@ -162,6 +163,7 @@ struct umfIpcTest : umf_test::test, TEST_P(umfIpcTest, GetIPCHandleSize) { size_t size = 0; umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); umf_result_t ret = umfPoolGetIPCHandleSize(pool.get(), &size); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); @@ -174,6 +176,8 @@ TEST_P(umfIpcTest, GetIPCHandleSizeInvalidArgs) { EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + ret = umfPoolGetIPCHandleSize(pool.get(), nullptr); EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); } @@ -190,6 +194,8 @@ TEST_P(umfIpcTest, GetIPCHandleInvalidArgs) { EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + ptr = umfPoolMalloc(pool.get(), SIZE); EXPECT_NE(ptr, nullptr); @@ -213,6 +219,8 @@ TEST_P(umfIpcTest, BasicFlow) { constexpr size_t SIZE = 100; std::vector expected_data(SIZE); umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + int *ptr = (int *)umfPoolMalloc(pool.get(), SIZE * sizeof(int)); EXPECT_NE(ptr, nullptr); @@ -283,6 +291,7 @@ TEST_P(umfIpcTest, GetPoolByOpenedHandle) { void *openedPtrs[NUM_POOLS][NUM_ALLOCS]; std::vector pools_to_open; umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); for (size_t i = 0; i < NUM_POOLS; ++i) { pools_to_open.push_back(makePool()); @@ -341,6 +350,8 @@ TEST_P(umfIpcTest, GetPoolByOpenedHandle) { TEST_P(umfIpcTest, AllocFreeAllocTest) { constexpr size_t SIZE = 64 * 1024; umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + umf_ipc_handler_handle_t ipcHandler = nullptr; umf_result_t ret = umfPoolGetIPCHandler(pool.get(), &ipcHandler); @@ -400,7 +411,9 @@ TEST_P(umfIpcTest, openInTwoIpcHandlers) { constexpr size_t SIZE = 100; std::vector expected_data(SIZE); umf::pool_unique_handle_t pool1 = makePool(); + ASSERT_NE(pool1.get(), nullptr); umf::pool_unique_handle_t pool2 = makePool(); + ASSERT_NE(pool2.get(), nullptr); umf_ipc_handler_handle_t ipcHandler1 = nullptr; umf_ipc_handler_handle_t ipcHandler2 = nullptr; @@ -465,6 +478,7 @@ TEST_P(umfIpcTest, ConcurrentGetPutHandles) { constexpr size_t ALLOC_SIZE = 100; constexpr size_t NUM_POINTERS = 100; umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); for (size_t i = 0; i < NUM_POINTERS; ++i) { void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); @@ -514,6 +528,7 @@ TEST_P(umfIpcTest, ConcurrentOpenCloseHandles) { constexpr size_t ALLOC_SIZE = 100; constexpr size_t NUM_POINTERS = 100; umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); for (size_t i = 0; i < NUM_POINTERS; ++i) { void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); diff --git a/test/providers/cuda_helpers.cpp b/test/providers/cuda_helpers.cpp index bed9906c0..c8bca6166 100644 --- a/test/providers/cuda_helpers.cpp +++ b/test/providers/cuda_helpers.cpp @@ -406,7 +406,7 @@ void init_cuda_once() { InitResult = init_cuda_lib(); } -int init_cuda() { +int init_cuda(void) { utils_init_once(&cuda_init_flag, init_cuda_once); return InitResult; @@ -415,12 +415,6 @@ int init_cuda() { int get_cuda_device(CUdevice *device) { CUdevice cuDevice = -1; - int ret = init_cuda(); - if (ret != 0) { - fprintf(stderr, "init_cuda() failed!\n"); - return ret; - } - CUresult res = libcu_ops.cuDeviceGet(&cuDevice, 0); if (res != CUDA_SUCCESS || cuDevice < 0) { return -1; @@ -433,12 +427,6 @@ int get_cuda_device(CUdevice *device) { int create_context(CUdevice device, CUcontext *context) { CUcontext cuContext = nullptr; - int ret = init_cuda(); - if (ret != 0) { - fprintf(stderr, "init_cuda() failed!\n"); - return ret; - } - CUresult res = libcu_ops.cuCtxCreate(&cuContext, 0, device); if (res != CUDA_SUCCESS || cuContext == nullptr) { return -1; diff --git a/test/providers/cuda_helpers.h b/test/providers/cuda_helpers.h index 65f4fbbf5..3d6572209 100644 --- a/test/providers/cuda_helpers.h +++ b/test/providers/cuda_helpers.h @@ -26,6 +26,8 @@ extern "C" { #endif +int init_cuda(void); + int get_cuda_device(CUdevice *device); int create_context(CUdevice device, CUcontext *context); diff --git a/test/providers/ipc_cuda_prov_consumer.c b/test/providers/ipc_cuda_prov_consumer.c index 1aeb5b15c..3d4a70707 100644 --- a/test/providers/ipc_cuda_prov_consumer.c +++ b/test/providers/ipc_cuda_prov_consumer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -25,7 +25,13 @@ int main(int argc, char *argv[]) { CUdevice hDevice = -1; CUcontext hContext = NULL; - int ret = get_cuda_device(&hDevice); + int ret = init_cuda(); + if (ret != 0) { + fprintf(stderr, "init_cuda() failed!\n"); + return -1; + } + + ret = get_cuda_device(&hDevice); if (ret != 0) { fprintf(stderr, "get_cuda_device() failed!\n"); return -1; diff --git a/test/providers/ipc_cuda_prov_producer.c b/test/providers/ipc_cuda_prov_producer.c index c2cd1d132..a7421da06 100644 --- a/test/providers/ipc_cuda_prov_producer.c +++ b/test/providers/ipc_cuda_prov_producer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -25,7 +25,13 @@ int main(int argc, char *argv[]) { CUdevice hDevice = -1; CUcontext hContext = NULL; - int ret = get_cuda_device(&hDevice); + int ret = init_cuda(); + if (ret != 0) { + fprintf(stderr, "init_cuda() failed!\n"); + return -1; + } + + ret = get_cuda_device(&hDevice); if (ret != 0) { fprintf(stderr, "get_cuda_device() failed!\n"); return -1; diff --git a/test/providers/ipc_level_zero_prov_consumer.c b/test/providers/ipc_level_zero_prov_consumer.c index 8ec0648e4..5fb212881 100644 --- a/test/providers/ipc_level_zero_prov_consumer.c +++ b/test/providers/ipc_level_zero_prov_consumer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -27,7 +27,13 @@ int main(int argc, char *argv[]) { ze_device_handle_t hDevice = NULL; ze_context_handle_t hContext = NULL; - int ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver); + int ret = utils_ze_init_level_zero(); + if (ret != 0) { + fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); + return -1; + } + + ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver); if (ret != 0 || hDriver == NULL) { fprintf(stderr, "utils_ze_find_driver_with_gpu() failed!\n"); return -1; diff --git a/test/providers/ipc_level_zero_prov_producer.c b/test/providers/ipc_level_zero_prov_producer.c index 2a8fedc37..e6ffcf2ed 100644 --- a/test/providers/ipc_level_zero_prov_producer.c +++ b/test/providers/ipc_level_zero_prov_producer.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -27,7 +27,13 @@ int main(int argc, char *argv[]) { ze_device_handle_t hDevice = NULL; ze_context_handle_t hContext = NULL; - int ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver); + int ret = utils_ze_init_level_zero(); + if (ret != 0) { + fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); + return -1; + } + + ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver); if (ret != 0 || hDriver == NULL) { fprintf(stderr, "utils_ze_find_driver_with_gpu() failed!\n"); return -1; diff --git a/test/providers/provider_cuda.cpp b/test/providers/provider_cuda.cpp index ff0fca550..bacaacd6c 100644 --- a/test/providers/provider_cuda.cpp +++ b/test/providers/provider_cuda.cpp @@ -39,7 +39,13 @@ class CUDATestHelper { }; CUDATestHelper::CUDATestHelper() { - int ret = get_cuda_device(&hDevice_); + int ret = init_cuda(); + if (ret != 0) { + fprintf(stderr, "init_cuda() failed!\n"); + return; + } + + ret = get_cuda_device(&hDevice_); if (ret != 0) { fprintf(stderr, "get_cuda_device() failed!\n"); return; @@ -83,6 +89,11 @@ create_cuda_prov_params(CUcontext context, CUdevice device, return params; } +umf_result_t destroyCuParams(void *params) { + return umfCUDAMemoryProviderParamsDestroy( + (umf_cuda_memory_provider_params_handle_t)params); +} + class CUDAMemoryAccessor : public MemoryAccessor { public: CUDAMemoryAccessor(CUcontext hContext, CUdevice hDevice) @@ -114,47 +125,53 @@ class CUDAMemoryAccessor : public MemoryAccessor { CUcontext hContext_; }; -typedef void *(*pfnProviderParamsCreate)(); -typedef umf_result_t (*pfnProviderParamsDestroy)(void *); - -using CUDAProviderTestParams = - std::tuple; - struct umfCUDAProviderTest : umf_test::test, - ::testing::WithParamInterface { + ::testing::WithParamInterface { void SetUp() override { test::SetUp(); - auto [params_create, params_destroy, cu_context, memory_type, - accessor] = this->GetParam(); - - params = nullptr; - if (params_create) { - params = (umf_cuda_memory_provider_params_handle_t)params_create(); + umf_usm_memory_type_t memory_type = this->GetParam(); + + memAccessor = nullptr; + expected_context = cudaTestHelper.get_test_context(); + params = create_cuda_prov_params(cudaTestHelper.get_test_context(), + cudaTestHelper.get_test_device(), + memory_type); + ASSERT_NE(expected_context, nullptr); + + switch (memory_type) { + case UMF_MEMORY_TYPE_DEVICE: + + memAccessor = std::make_unique( + cudaTestHelper.get_test_context(), + cudaTestHelper.get_test_device()); + break; + case UMF_MEMORY_TYPE_SHARED: + case UMF_MEMORY_TYPE_HOST: + memAccessor = std::make_unique(); + break; + case UMF_MEMORY_TYPE_UNKNOWN: + break; } - paramsDestroy = params_destroy; - memAccessor = accessor; - expected_context = cu_context; expected_memory_type = memory_type; } void TearDown() override { - if (paramsDestroy) { - paramsDestroy(params); + if (params) { + destroyCuParams(params); } test::TearDown(); } - umf_cuda_memory_provider_params_handle_t params; - pfnProviderParamsDestroy paramsDestroy = nullptr; + CUDATestHelper cudaTestHelper; + umf_cuda_memory_provider_params_handle_t params = nullptr; - MemoryAccessor *memAccessor = nullptr; - CUcontext expected_context; + std::unique_ptr memAccessor = nullptr; + CUcontext expected_context = nullptr; umf_usm_memory_type_t expected_memory_type; }; @@ -391,44 +408,10 @@ TEST_P(umfCUDAProviderTest, multiContext) { // TODO add tests that mixes CUDA Memory Provider and Disjoint Pool -CUDATestHelper cudaTestHelper; - -void *createCuParamsDeviceMemory() { - return create_cuda_prov_params(cudaTestHelper.get_test_context(), - cudaTestHelper.get_test_device(), - UMF_MEMORY_TYPE_DEVICE); -} -void *createCuParamsSharedMemory() { - return create_cuda_prov_params(cudaTestHelper.get_test_context(), - cudaTestHelper.get_test_device(), - UMF_MEMORY_TYPE_SHARED); -} -void *createCuParamsHostMemory() { - return create_cuda_prov_params(cudaTestHelper.get_test_context(), - cudaTestHelper.get_test_device(), - UMF_MEMORY_TYPE_HOST); -} - -umf_result_t destroyCuParams(void *params) { - return umfCUDAMemoryProviderParamsDestroy( - (umf_cuda_memory_provider_params_handle_t)params); -} - -CUDAMemoryAccessor cuAccessor(cudaTestHelper.get_test_context(), - cudaTestHelper.get_test_device()); -HostMemoryAccessor hostAccessor; -INSTANTIATE_TEST_SUITE_P( - umfCUDAProviderTestSuite, umfCUDAProviderTest, - ::testing::Values( - CUDAProviderTestParams{createCuParamsDeviceMemory, destroyCuParams, - cudaTestHelper.get_test_context(), - UMF_MEMORY_TYPE_DEVICE, &cuAccessor}, - CUDAProviderTestParams{createCuParamsSharedMemory, destroyCuParams, - cudaTestHelper.get_test_context(), - UMF_MEMORY_TYPE_SHARED, &hostAccessor}, - CUDAProviderTestParams{createCuParamsHostMemory, destroyCuParams, - cudaTestHelper.get_test_context(), - UMF_MEMORY_TYPE_HOST, &hostAccessor})); +INSTANTIATE_TEST_SUITE_P(umfCUDAProviderTestSuite, umfCUDAProviderTest, + ::testing::Values(UMF_MEMORY_TYPE_DEVICE, + UMF_MEMORY_TYPE_SHARED, + UMF_MEMORY_TYPE_HOST)); // TODO: add IPC API GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); diff --git a/test/providers/provider_level_zero.cpp b/test/providers/provider_level_zero.cpp index af90aa72e..47b62cc94 100644 --- a/test/providers/provider_level_zero.cpp +++ b/test/providers/provider_level_zero.cpp @@ -42,7 +42,13 @@ class LevelZeroTestHelper { LevelZeroTestHelper::LevelZeroTestHelper() { uint32_t driver_idx = 0; - int ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver_); + int ret = utils_ze_init_level_zero(); + if (ret != 0) { + fprintf(stderr, "utils_ze_init_level_zero() failed!\n"); + return; + } + + ret = utils_ze_find_driver_with_gpu(&driver_idx, &hDriver_); if (ret != 0 || hDriver_ == NULL) { fprintf(stderr, "utils_ze_find_driver_with_gpu() failed!\n"); return; @@ -93,17 +99,22 @@ create_level_zero_prov_params(ze_context_handle_t context, return params; } +umf_result_t destroyL0Params(void *params) { + return umfLevelZeroMemoryProviderParamsDestroy( + static_cast(params)); +} + struct LevelZeroProviderInit : public test, - public ::testing::WithParamInterface {}; + public ::testing::WithParamInterface { + LevelZeroTestHelper l0TestHelper; +}; INSTANTIATE_TEST_SUITE_P(, LevelZeroProviderInit, ::testing::Values(UMF_MEMORY_TYPE_HOST, UMF_MEMORY_TYPE_DEVICE, UMF_MEMORY_TYPE_SHARED)); -LevelZeroTestHelper l0TestHelper; - TEST_P(LevelZeroProviderInit, FailNullContext) { umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); ASSERT_NE(ops, nullptr); @@ -156,12 +167,14 @@ TEST_P(LevelZeroProviderInit, FailNullDevice) { umfLevelZeroMemoryProviderParamsDestroy(hParams); } -TEST_F(test, FailNonNullDevice) { +TEST_F(LevelZeroProviderInit, FailNonNullDevice) { + if (GetParam() != UMF_MEMORY_TYPE_HOST) { + GTEST_SKIP() << "Host memory does not require device handle"; + } umf_memory_provider_ops_t *ops = umfLevelZeroMemoryProviderOps(); ASSERT_NE(ops, nullptr); - auto memory_type = UMF_MEMORY_TYPE_HOST; - + auto memory_type = GetParam(); umf_level_zero_memory_provider_params_handle_t hParams = nullptr; umf_result_t result = umfLevelZeroMemoryProviderParamsCreate(&hParams); ASSERT_EQ(result, UMF_RESULT_SUCCESS); @@ -225,44 +238,43 @@ class LevelZeroMemoryAccessor : public MemoryAccessor { ze_context_handle_t hContext_; }; -typedef void *(*pfnProviderParamsCreate)(); -typedef umf_result_t (*pfnProviderParamsDestroy)(void *); - -using LevelZeroProviderTestParams = - std::tuple; - struct umfLevelZeroProviderTest : umf_test::test, - ::testing::WithParamInterface { + ::testing::WithParamInterface { void SetUp() override { test::SetUp(); - auto [params_create, params_destroy, ze_context, memory_type, - accessor] = this->GetParam(); + umf_usm_memory_type_t memory_type = this->GetParam(); params = nullptr; - if (params_create) { - params = - (umf_level_zero_memory_provider_params_handle_t)params_create(); - } - paramsDestroy = params_destroy; - - memAccessor = accessor; - hContext = ze_context; + memAccessor = nullptr; + hContext = l0TestHelper.get_test_context(); ASSERT_NE(hContext, nullptr); switch (memory_type) { case UMF_MEMORY_TYPE_DEVICE: zeMemoryTypeExpected = ZE_MEMORY_TYPE_DEVICE; + params = create_level_zero_prov_params( + l0TestHelper.get_test_context(), l0TestHelper.get_test_device(), + memory_type); + memAccessor = std::make_unique( + l0TestHelper.get_test_context(), + l0TestHelper.get_test_device()); break; case UMF_MEMORY_TYPE_SHARED: zeMemoryTypeExpected = ZE_MEMORY_TYPE_SHARED; + params = create_level_zero_prov_params( + l0TestHelper.get_test_context(), l0TestHelper.get_test_device(), + memory_type); + memAccessor = std::make_unique(); break; case UMF_MEMORY_TYPE_HOST: zeMemoryTypeExpected = ZE_MEMORY_TYPE_HOST; + params = create_level_zero_prov_params( + l0TestHelper.get_test_context(), nullptr, memory_type); + memAccessor = std::make_unique(); break; case UMF_MEMORY_TYPE_UNKNOWN: zeMemoryTypeExpected = ZE_MEMORY_TYPE_UNKNOWN; @@ -273,17 +285,17 @@ struct umfLevelZeroProviderTest } void TearDown() override { - if (paramsDestroy) { - paramsDestroy(params); + if (params) { + destroyL0Params(params); } test::TearDown(); } - umf_level_zero_memory_provider_params_handle_t params; - pfnProviderParamsDestroy paramsDestroy = nullptr; + LevelZeroTestHelper l0TestHelper; + umf_level_zero_memory_provider_params_handle_t params = nullptr; - MemoryAccessor *memAccessor = nullptr; + std::unique_ptr memAccessor = nullptr; ze_context_handle_t hContext = nullptr; ze_memory_type_t zeMemoryTypeExpected = ZE_MEMORY_TYPE_UNKNOWN; }; @@ -470,47 +482,23 @@ TEST_P(umfLevelZeroProviderTest, setDeviceOrdinalValid) { // TODO add tests that mixes Level Zero Memory Provider and Disjoint Pool +INSTANTIATE_TEST_SUITE_P(umfLevelZeroProviderTestSuite, + umfLevelZeroProviderTest, + ::testing::Values(UMF_MEMORY_TYPE_DEVICE, + UMF_MEMORY_TYPE_SHARED, + UMF_MEMORY_TYPE_HOST)); + +LevelZeroTestHelper l0TestHelper; + void *createL0ParamsDeviceMemory() { return create_level_zero_prov_params(l0TestHelper.get_test_context(), l0TestHelper.get_test_device(), UMF_MEMORY_TYPE_DEVICE); } -void *createL0ParamsSharedMemory() { - return create_level_zero_prov_params(l0TestHelper.get_test_context(), - l0TestHelper.get_test_device(), - UMF_MEMORY_TYPE_SHARED); -} - -void *createL0ParamsHostMemory() { - return create_level_zero_prov_params(l0TestHelper.get_test_context(), - nullptr, UMF_MEMORY_TYPE_HOST); -} - -umf_result_t destroyL0Params(void *params) { - return umfLevelZeroMemoryProviderParamsDestroy( - static_cast(params)); -} - LevelZeroMemoryAccessor l0Accessor((ze_context_handle_t)l0TestHelper.get_test_context(), (ze_device_handle_t)l0TestHelper.get_test_device()); - -HostMemoryAccessor hostAccessor; - -INSTANTIATE_TEST_SUITE_P( - umfLevelZeroProviderTestSuite, umfLevelZeroProviderTest, - ::testing::Values( - LevelZeroProviderTestParams{createL0ParamsDeviceMemory, destroyL0Params, - l0TestHelper.get_test_context(), - UMF_MEMORY_TYPE_DEVICE, &l0Accessor}, - LevelZeroProviderTestParams{createL0ParamsSharedMemory, destroyL0Params, - l0TestHelper.get_test_context(), - UMF_MEMORY_TYPE_SHARED, &hostAccessor}, - LevelZeroProviderTestParams{createL0ParamsHostMemory, destroyL0Params, - l0TestHelper.get_test_context(), - UMF_MEMORY_TYPE_HOST, &hostAccessor})); - // TODO: it looks like there is some problem with IPC implementation in Level // Zero on windows. Issue: #494 #ifdef _WIN32 From d36d5857e5a3dfb13a97cac65f1bf93dcd3a1f87 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Mon, 10 Feb 2025 22:25:04 +0100 Subject: [PATCH 140/466] add support for CUDA allocation flags --- include/umf/providers/provider_cuda.h | 9 +- src/libumf.def | 1 + src/libumf.map | 1 + src/provider/provider_cuda.c | 60 ++++++++-- test/providers/cuda_helpers.cpp | 36 ++++-- test/providers/cuda_helpers.h | 2 + test/providers/provider_cuda.cpp | 132 +++++++++++++++++++++- test/providers/provider_cuda_not_impl.cpp | 5 +- 8 files changed, 224 insertions(+), 22 deletions(-) diff --git a/include/umf/providers/provider_cuda.h b/include/umf/providers/provider_cuda.h index 5f1d5a6e2..e3b81858b 100644 --- a/include/umf/providers/provider_cuda.h +++ b/include/umf/providers/provider_cuda.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -53,6 +53,13 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType( umf_cuda_memory_provider_params_handle_t hParams, umf_usm_memory_type_t memoryType); +/// @brief Set the allocation flags in the parameters struct. +/// @param hParams handle to the parameters of the CUDA Memory Provider. +/// @param flags valid combination of CUDA allocation flags. +/// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. +umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags( + umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags); + umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void); #ifdef __cplusplus diff --git a/src/libumf.def b/src/libumf.def index f93553e90..98226dace 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -118,6 +118,7 @@ EXPORTS umfScalablePoolParamsSetGranularity umfScalablePoolParamsSetKeepAllMemory ; Added in UMF_0.11 + umfCUDAMemoryProviderParamsSetAllocFlags umfFixedMemoryProviderOps umfFixedMemoryProviderParamsCreate umfFixedMemoryProviderParamsDestroy diff --git a/src/libumf.map b/src/libumf.map index 7a7ac5ad3..bbf664dcf 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -116,6 +116,7 @@ UMF_0.10 { }; UMF_0.11 { + umfCUDAMemoryProviderParamsSetAllocFlags; umfFixedMemoryProviderOps; umfFixedMemoryProviderParamsCreate; umfFixedMemoryProviderParamsDestroy; diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index a9b6e88e9..a0f963fdd 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -55,6 +55,14 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType( return UMF_RESULT_ERROR_NOT_SUPPORTED; } +umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags( + umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags) { + (void)hParams; + (void)flags; + LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void) { // not supported LOG_ERR("CUDA provider is disabled (UMF_BUILD_CUDA_PROVIDER is OFF)!"); @@ -89,13 +97,22 @@ typedef struct cu_memory_provider_t { CUdevice device; umf_usm_memory_type_t memory_type; size_t min_alignment; + unsigned int alloc_flags; } cu_memory_provider_t; // CUDA Memory Provider settings struct typedef struct umf_cuda_memory_provider_params_t { - void *cuda_context_handle; ///< Handle to the CUDA context - int cuda_device_handle; ///< Handle to the CUDA device - umf_usm_memory_type_t memory_type; ///< Allocation memory type + // Handle to the CUDA context + void *cuda_context_handle; + + // Handle to the CUDA device + int cuda_device_handle; + + // Allocation memory type + umf_usm_memory_type_t memory_type; + + // Allocation flags for cuMemHostAlloc/cuMemAllocManaged + unsigned int alloc_flags; } umf_cuda_memory_provider_params_t; typedef struct cu_ops_t { @@ -103,7 +120,7 @@ typedef struct cu_ops_t { size_t *granularity, const CUmemAllocationProp *prop, CUmemAllocationGranularity_flags option); CUresult (*cuMemAlloc)(CUdeviceptr *dptr, size_t bytesize); - CUresult (*cuMemAllocHost)(void **pp, size_t bytesize); + CUresult (*cuMemHostAlloc)(void **pp, size_t bytesize, unsigned int flags); CUresult (*cuMemAllocManaged)(CUdeviceptr *dptr, size_t bytesize, unsigned int flags); CUresult (*cuMemFree)(CUdeviceptr dptr); @@ -172,8 +189,8 @@ static void init_cu_global_state(void) { utils_get_symbol_addr(0, "cuMemGetAllocationGranularity", lib_name); *(void **)&g_cu_ops.cuMemAlloc = utils_get_symbol_addr(0, "cuMemAlloc_v2", lib_name); - *(void **)&g_cu_ops.cuMemAllocHost = - utils_get_symbol_addr(0, "cuMemAllocHost_v2", lib_name); + *(void **)&g_cu_ops.cuMemHostAlloc = + utils_get_symbol_addr(0, "cuMemHostAlloc", lib_name); *(void **)&g_cu_ops.cuMemAllocManaged = utils_get_symbol_addr(0, "cuMemAllocManaged", lib_name); *(void **)&g_cu_ops.cuMemFree = @@ -196,7 +213,7 @@ static void init_cu_global_state(void) { utils_get_symbol_addr(0, "cuIpcCloseMemHandle", lib_name); if (!g_cu_ops.cuMemGetAllocationGranularity || !g_cu_ops.cuMemAlloc || - !g_cu_ops.cuMemAllocHost || !g_cu_ops.cuMemAllocManaged || + !g_cu_ops.cuMemHostAlloc || !g_cu_ops.cuMemAllocManaged || !g_cu_ops.cuMemFree || !g_cu_ops.cuMemFreeHost || !g_cu_ops.cuGetErrorName || !g_cu_ops.cuGetErrorString || !g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxSetCurrent || @@ -225,6 +242,7 @@ umf_result_t umfCUDAMemoryProviderParamsCreate( params_data->cuda_context_handle = NULL; params_data->cuda_device_handle = -1; params_data->memory_type = UMF_MEMORY_TYPE_UNKNOWN; + params_data->alloc_flags = 0; *hParams = params_data; @@ -275,6 +293,18 @@ umf_result_t umfCUDAMemoryProviderParamsSetMemoryType( return UMF_RESULT_SUCCESS; } +umf_result_t umfCUDAMemoryProviderParamsSetAllocFlags( + umf_cuda_memory_provider_params_handle_t hParams, unsigned int flags) { + if (!hParams) { + LOG_ERR("CUDA Memory Provider params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->alloc_flags = flags; + + return UMF_RESULT_SUCCESS; +} + static umf_result_t cu_memory_provider_initialize(void *params, void **provider) { if (params == NULL) { @@ -325,6 +355,17 @@ static umf_result_t cu_memory_provider_initialize(void *params, cu_provider->memory_type = cu_params->memory_type; cu_provider->min_alignment = min_alignment; + // If the memory type is shared (CUDA managed), the allocation flags must + // be set. NOTE: we do not check here if the flags are valid - + // this will be done by CUDA runtime. + if (cu_params->memory_type == UMF_MEMORY_TYPE_SHARED && + cu_params->alloc_flags == 0) { + // the default setting is CU_MEM_ATTACH_GLOBAL + cu_provider->alloc_flags = CU_MEM_ATTACH_GLOBAL; + } else { + cu_provider->alloc_flags = cu_params->alloc_flags; + } + *provider = cu_provider; return UMF_RESULT_SUCCESS; @@ -381,7 +422,8 @@ static umf_result_t cu_memory_provider_alloc(void *provider, size_t size, CUresult cu_result = CUDA_SUCCESS; switch (cu_provider->memory_type) { case UMF_MEMORY_TYPE_HOST: { - cu_result = g_cu_ops.cuMemAllocHost(resultPtr, size); + cu_result = + g_cu_ops.cuMemHostAlloc(resultPtr, size, cu_provider->alloc_flags); break; } case UMF_MEMORY_TYPE_DEVICE: { @@ -390,7 +432,7 @@ static umf_result_t cu_memory_provider_alloc(void *provider, size_t size, } case UMF_MEMORY_TYPE_SHARED: { cu_result = g_cu_ops.cuMemAllocManaged((CUdeviceptr *)resultPtr, size, - CU_MEM_ATTACH_GLOBAL); + cu_provider->alloc_flags); break; } default: diff --git a/test/providers/cuda_helpers.cpp b/test/providers/cuda_helpers.cpp index c8bca6166..aa0647080 100644 --- a/test/providers/cuda_helpers.cpp +++ b/test/providers/cuda_helpers.cpp @@ -22,7 +22,7 @@ struct libcu_ops { CUresult (*cuDeviceGet)(CUdevice *device, int ordinal); CUresult (*cuMemAlloc)(CUdeviceptr *dptr, size_t size); CUresult (*cuMemFree)(CUdeviceptr dptr); - CUresult (*cuMemAllocHost)(void **pp, size_t size); + CUresult (*cuMemHostAlloc)(void **pp, size_t size, unsigned int flags); CUresult (*cuMemAllocManaged)(CUdeviceptr *dptr, size_t bytesize, unsigned int flags); CUresult (*cuMemFreeHost)(void *p); @@ -34,6 +34,7 @@ struct libcu_ops { CUresult (*cuPointerGetAttributes)(unsigned int numAttributes, CUpointer_attribute *attributes, void **data, CUdeviceptr ptr); + CUresult (*cuMemHostGetFlags)(unsigned int *pFlags, void *p); CUresult (*cuStreamSynchronize)(CUstream hStream); CUresult (*cuCtxSynchronize)(void); } libcu_ops; @@ -69,7 +70,7 @@ struct DlHandleCloser { libcu_ops.cuMemFree = [](auto... args) { return noop_stub(args...); }; - libcu_ops.cuMemAllocHost = [](auto... args) { + libcu_ops.cuMemHostAlloc = [](auto... args) { return noop_stub(args...); }; libcu_ops.cuMemAllocManaged = [](auto... args) { @@ -90,6 +91,9 @@ struct DlHandleCloser { libcu_ops.cuPointerGetAttributes = [](auto... args) { return noop_stub(args...); }; + libcu_ops.cuMemHostGetFlags = [](auto... args) { + return noop_stub(args...); + }; libcu_ops.cuStreamSynchronize = [](auto... args) { return noop_stub(args...); }; @@ -164,10 +168,10 @@ int InitCUDAOps() { fprintf(stderr, "cuMemFree_v2 symbol not found in %s\n", lib_name); return -1; } - *(void **)&libcu_ops.cuMemAllocHost = - utils_get_symbol_addr(cuDlHandle.get(), "cuMemAllocHost_v2", lib_name); - if (libcu_ops.cuMemAllocHost == nullptr) { - fprintf(stderr, "cuMemAllocHost_v2 symbol not found in %s\n", lib_name); + *(void **)&libcu_ops.cuMemHostAlloc = + utils_get_symbol_addr(cuDlHandle.get(), "cuMemHostAlloc", lib_name); + if (libcu_ops.cuMemHostAlloc == nullptr) { + fprintf(stderr, "cuMemHostAlloc symbol not found in %s\n", lib_name); return -1; } *(void **)&libcu_ops.cuMemAllocManaged = @@ -208,6 +212,12 @@ int InitCUDAOps() { lib_name); return -1; } + *(void **)&libcu_ops.cuMemHostGetFlags = + utils_get_symbol_addr(cuDlHandle.get(), "cuMemHostGetFlags", lib_name); + if (libcu_ops.cuMemHostGetFlags == nullptr) { + fprintf(stderr, "cuMemHostGetFlags symbol not found in %s\n", lib_name); + return -1; + } *(void **)&libcu_ops.cuStreamSynchronize = utils_get_symbol_addr( cuDlHandle.get(), "cuStreamSynchronize", lib_name); if (libcu_ops.cuStreamSynchronize == nullptr) { @@ -236,7 +246,7 @@ int InitCUDAOps() { libcu_ops.cuCtxSetCurrent = cuCtxSetCurrent; libcu_ops.cuDeviceGet = cuDeviceGet; libcu_ops.cuMemAlloc = cuMemAlloc; - libcu_ops.cuMemAllocHost = cuMemAllocHost; + libcu_ops.cuMemHostAlloc = cuMemHostAlloc; libcu_ops.cuMemAllocManaged = cuMemAllocManaged; libcu_ops.cuMemFree = cuMemFree; libcu_ops.cuMemFreeHost = cuMemFreeHost; @@ -244,6 +254,7 @@ int InitCUDAOps() { libcu_ops.cuMemcpy = cuMemcpy; libcu_ops.cuPointerGetAttribute = cuPointerGetAttribute; libcu_ops.cuPointerGetAttributes = cuPointerGetAttributes; + libcu_ops.cuMemHostGetFlags = cuMemHostGetFlags; libcu_ops.cuStreamSynchronize = cuStreamSynchronize; libcu_ops.cuCtxSynchronize = cuCtxSynchronize; @@ -373,6 +384,17 @@ umf_usm_memory_type_t get_mem_type(CUcontext context, void *ptr) { return UMF_MEMORY_TYPE_UNKNOWN; } +unsigned int get_mem_host_alloc_flags(void *ptr) { + unsigned int flags; + CUresult res = libcu_ops.cuMemHostGetFlags(&flags, ptr); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuPointerGetAttribute() failed!\n"); + return 0; + } + + return flags; +} + CUcontext get_mem_context(void *ptr) { CUcontext context; CUresult res = libcu_ops.cuPointerGetAttribute( diff --git a/test/providers/cuda_helpers.h b/test/providers/cuda_helpers.h index 3d6572209..e7deb9064 100644 --- a/test/providers/cuda_helpers.h +++ b/test/providers/cuda_helpers.h @@ -44,6 +44,8 @@ int cuda_copy(CUcontext context, CUdevice device, void *dst_ptr, umf_usm_memory_type_t get_mem_type(CUcontext context, void *ptr); +unsigned int get_mem_host_alloc_flags(void *ptr); + CUcontext get_mem_context(void *ptr); CUcontext get_current_context(); diff --git a/test/providers/provider_cuda.cpp b/test/providers/provider_cuda.cpp index bacaacd6c..9c7f76dd1 100644 --- a/test/providers/provider_cuda.cpp +++ b/test/providers/provider_cuda.cpp @@ -60,7 +60,7 @@ CUDATestHelper::CUDATestHelper() { umf_cuda_memory_provider_params_handle_t create_cuda_prov_params(CUcontext context, CUdevice device, - umf_usm_memory_type_t memory_type) { + umf_usm_memory_type_t memory_type, unsigned int flags) { umf_cuda_memory_provider_params_handle_t params = nullptr; umf_result_t res = umfCUDAMemoryProviderParamsCreate(¶ms); @@ -86,6 +86,12 @@ create_cuda_prov_params(CUcontext context, CUdevice device, return nullptr; } + res = umfCUDAMemoryProviderParamsSetAllocFlags(params, flags); + if (res != UMF_RESULT_SUCCESS) { + umfCUDAMemoryProviderParamsDestroy(params); + return nullptr; + } + return params; } @@ -138,7 +144,7 @@ struct umfCUDAProviderTest expected_context = cudaTestHelper.get_test_context(); params = create_cuda_prov_params(cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device(), - memory_type); + memory_type, 0 /* alloc flags */); ASSERT_NE(expected_context, nullptr); switch (memory_type) { @@ -350,7 +356,7 @@ TEST_P(umfCUDAProviderTest, multiContext) { ASSERT_EQ(ret, 0); umf_cuda_memory_provider_params_handle_t params1 = - create_cuda_prov_params(ctx1, device, UMF_MEMORY_TYPE_HOST); + create_cuda_prov_params(ctx1, device, UMF_MEMORY_TYPE_HOST, 0); ASSERT_NE(params1, nullptr); umf_memory_provider_handle_t provider1; umf_result_t umf_result = umfMemoryProviderCreate( @@ -361,7 +367,7 @@ TEST_P(umfCUDAProviderTest, multiContext) { ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); umf_cuda_memory_provider_params_handle_t params2 = - create_cuda_prov_params(ctx2, device, UMF_MEMORY_TYPE_HOST); + create_cuda_prov_params(ctx2, device, UMF_MEMORY_TYPE_HOST, 0); ASSERT_NE(params2, nullptr); umf_memory_provider_handle_t provider2; umf_result = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), params2, @@ -406,6 +412,115 @@ TEST_P(umfCUDAProviderTest, multiContext) { ASSERT_EQ(ret, 0); } +struct umfCUDAProviderAllocFlagsTest + : umf_test::test, + ::testing::WithParamInterface< + std::tuple> { + + void SetUp() override { + test::SetUp(); + + get_cuda_device(&device); + create_context(device, &context); + } + + void TearDown() override { + destroy_context(context); + + test::TearDown(); + } + + CUdevice device; + CUcontext context; +}; + +TEST_P(umfCUDAProviderAllocFlagsTest, cudaAllocFlags) { + auto [memory_type, test_flags] = this->GetParam(); + + umf_cuda_memory_provider_params_handle_t test_params = + create_cuda_prov_params(context, device, memory_type, test_flags); + + umf_memory_provider_handle_t provider = nullptr; + umf_result_t umf_result = umfMemoryProviderCreate( + umfCUDAMemoryProviderOps(), test_params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + void *ptr = nullptr; + umf_result = umfMemoryProviderAlloc(provider, 128, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + if (memory_type == UMF_MEMORY_TYPE_HOST) { + // check if the memory allocation flag is set correctly + unsigned int flags = get_mem_host_alloc_flags(ptr); + ASSERT_TRUE(flags & test_flags); + } + + umf_result = umfMemoryProviderFree(provider, ptr, 128); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(provider); + umfCUDAMemoryProviderParamsDestroy(test_params); +} + +TEST_P(umfCUDAProviderAllocFlagsTest, reuseParams) { + auto [memory_type, test_flags] = this->GetParam(); + + // first, create a provider for SHARED memory type with empty alloc flags, + // and the reuse the test_params to create a provider for test params + umf_cuda_memory_provider_params_handle_t test_params = + create_cuda_prov_params(context, device, UMF_MEMORY_TYPE_SHARED, 0); + + umf_memory_provider_handle_t provider = nullptr; + + umf_result_t umf_result = umfMemoryProviderCreate( + umfCUDAMemoryProviderOps(), test_params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + void *ptr = nullptr; + umf_result = umfMemoryProviderAlloc(provider, 128, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + umf_result = umfMemoryProviderFree(provider, ptr, 128); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(provider); + + // reuse the test_params to create a provider for test params + umf_result = + umfCUDAMemoryProviderParamsSetMemoryType(test_params, memory_type); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = + umfCUDAMemoryProviderParamsSetAllocFlags(test_params, test_flags); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), + test_params, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + umf_result = umfMemoryProviderAlloc(provider, 128, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + if (memory_type == UMF_MEMORY_TYPE_HOST) { + // check if the memory allocation flag is set correctly + unsigned int flags = get_mem_host_alloc_flags(ptr); + ASSERT_TRUE(flags & test_flags); + } + + umf_result = umfMemoryProviderFree(provider, ptr, 128); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(provider); + + umfCUDAMemoryProviderParamsDestroy(test_params); +} + // TODO add tests that mixes CUDA Memory Provider and Disjoint Pool INSTANTIATE_TEST_SUITE_P(umfCUDAProviderTestSuite, umfCUDAProviderTest, @@ -413,6 +528,15 @@ INSTANTIATE_TEST_SUITE_P(umfCUDAProviderTestSuite, umfCUDAProviderTest, UMF_MEMORY_TYPE_SHARED, UMF_MEMORY_TYPE_HOST)); +INSTANTIATE_TEST_SUITE_P( + umfCUDAProviderAllocFlagsTestSuite, umfCUDAProviderAllocFlagsTest, + ::testing::Values( + std::make_tuple(UMF_MEMORY_TYPE_SHARED, CU_MEM_ATTACH_GLOBAL), + std::make_tuple(UMF_MEMORY_TYPE_SHARED, CU_MEM_ATTACH_HOST), + std::make_tuple(UMF_MEMORY_TYPE_HOST, CU_MEMHOSTALLOC_PORTABLE), + std::make_tuple(UMF_MEMORY_TYPE_HOST, CU_MEMHOSTALLOC_DEVICEMAP), + std::make_tuple(UMF_MEMORY_TYPE_HOST, CU_MEMHOSTALLOC_WRITECOMBINED))); + // TODO: add IPC API GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(umfIpcTest); /* diff --git a/test/providers/provider_cuda_not_impl.cpp b/test/providers/provider_cuda_not_impl.cpp index 30fc373ca..4054c26a8 100644 --- a/test/providers/provider_cuda_not_impl.cpp +++ b/test/providers/provider_cuda_not_impl.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -26,6 +26,9 @@ TEST_F(test, cuda_provider_not_implemented) { UMF_MEMORY_TYPE_DEVICE); ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + result = umfCUDAMemoryProviderParamsSetAllocFlags(hParams, 0); + ASSERT_EQ(result, UMF_RESULT_ERROR_NOT_SUPPORTED); + umf_memory_provider_ops_t *ops = umfCUDAMemoryProviderOps(); ASSERT_EQ(ops, nullptr); } From b31c5ac595a1b7aa3f44540c075edf6e6d39df23 Mon Sep 17 00:00:00 2001 From: "Vinogradov, Sergei" Date: Thu, 13 Feb 2025 07:56:39 -0800 Subject: [PATCH 141/466] Do not overwrite ret code in level_zero_shared_memory example --- examples/level_zero_shared_memory/level_zero_shared_memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/level_zero_shared_memory/level_zero_shared_memory.c b/examples/level_zero_shared_memory/level_zero_shared_memory.c index d4c49b8a0..7cfe89366 100644 --- a/examples/level_zero_shared_memory/level_zero_shared_memory.c +++ b/examples/level_zero_shared_memory/level_zero_shared_memory.c @@ -189,6 +189,6 @@ int main(void) { umfLevelZeroMemoryProviderParamsDestroy(ze_memory_provider_params); level_zero_destroy: - ret = destroy_context(hContext); + destroy_context(hContext); return ret; } From 1db4c48eba2a33eed01b40b2db786921ab40d010 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Fri, 7 Feb 2025 18:14:13 +0100 Subject: [PATCH 142/466] Increase refcount to ze_loader library when Level Zero provider is used --- include/umf/base.h | 4 +- src/libumf.c | 4 +- src/provider/provider_level_zero.c | 56 ++++++++++++++------- src/provider/provider_level_zero_internal.h | 10 ++++ src/utils/utils_load_library.c | 11 +++- src/utils/utils_load_library.h | 5 +- 6 files changed, 68 insertions(+), 22 deletions(-) create mode 100644 src/provider/provider_level_zero_internal.h diff --git a/include/umf/base.h b/include/umf/base.h index 32d84771f..8dad184f2 100644 --- a/include/umf/base.h +++ b/include/umf/base.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -45,6 +45,8 @@ typedef enum umf_result_t { UMF_RESULT_ERROR_NOT_SUPPORTED = 5, ///< Operation not supported UMF_RESULT_ERROR_USER_SPECIFIC = 6, ///< Failure in user provider code (i.e in user provided callback) + UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE = + 7, ///< External required dependency is unavailable or missing UMF_RESULT_ERROR_UNKNOWN = 0x7ffffffe ///< Unknown or internal error } umf_result_t; diff --git a/src/libumf.c b/src/libumf.c index b89e5c844..6ca006c82 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -12,6 +12,7 @@ #include "base_alloc_global.h" #include "ipc_cache.h" #include "memspace_internal.h" +#include "provider_level_zero_internal.h" #include "provider_tracking.h" #include "utils_common.h" #include "utils_log.h" @@ -79,6 +80,7 @@ void umfTearDown(void) { LOG_DEBUG("UMF base allocator destroyed"); fini_umfTearDown: + fini_ze_global_state(); LOG_DEBUG("UMF library finalized"); } } diff --git a/src/provider/provider_level_zero.c b/src/provider/provider_level_zero.c index 2d6aa074b..f89661401 100644 --- a/src/provider/provider_level_zero.c +++ b/src/provider/provider_level_zero.c @@ -14,8 +14,19 @@ #include #include +#include "provider_level_zero_internal.h" +#include "utils_load_library.h" #include "utils_log.h" +static void *ze_lib_handle = NULL; + +void fini_ze_global_state(void) { + if (ze_lib_handle) { + utils_close_library(ze_lib_handle); + ze_lib_handle = NULL; + } +} + #if defined(UMF_NO_LEVEL_ZERO_PROVIDER) umf_result_t umfLevelZeroMemoryProviderParamsCreate( @@ -105,7 +116,6 @@ umf_memory_provider_ops_t *umfLevelZeroMemoryProviderOps(void) { #include "utils_assert.h" #include "utils_common.h" #include "utils_concurrency.h" -#include "utils_load_library.h" #include "utils_log.h" #include "utils_sanitizers.h" #include "ze_api.h" @@ -207,32 +217,41 @@ static void init_ze_global_state(void) { #else const char *lib_name = "libze_loader.so"; #endif - // check if Level Zero shared library is already loaded - // we pass 0 as a handle to search the global symbol table + // The Level Zero shared library should be already loaded by the user + // of the Level Zero provider. UMF just want to reuse it + // and increase the reference count to the Level Zero shared library. + void *lib_handle = + utils_open_library(lib_name, UMF_UTIL_OPEN_LIBRARY_NO_LOAD); + if (!lib_handle) { + LOG_FATAL("Failed to open Level Zero shared library"); + Init_ze_global_state_failed = true; + return; + } + *(void **)&g_ze_ops.zeMemAllocHost = - utils_get_symbol_addr(0, "zeMemAllocHost", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemAllocHost", lib_name); *(void **)&g_ze_ops.zeMemAllocDevice = - utils_get_symbol_addr(0, "zeMemAllocDevice", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemAllocDevice", lib_name); *(void **)&g_ze_ops.zeMemAllocShared = - utils_get_symbol_addr(0, "zeMemAllocShared", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemAllocShared", lib_name); *(void **)&g_ze_ops.zeMemFree = - utils_get_symbol_addr(0, "zeMemFree", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemFree", lib_name); *(void **)&g_ze_ops.zeMemGetIpcHandle = - utils_get_symbol_addr(0, "zeMemGetIpcHandle", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemGetIpcHandle", lib_name); *(void **)&g_ze_ops.zeMemPutIpcHandle = - utils_get_symbol_addr(0, "zeMemPutIpcHandle", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemPutIpcHandle", lib_name); *(void **)&g_ze_ops.zeMemOpenIpcHandle = - utils_get_symbol_addr(0, "zeMemOpenIpcHandle", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemOpenIpcHandle", lib_name); *(void **)&g_ze_ops.zeMemCloseIpcHandle = - utils_get_symbol_addr(0, "zeMemCloseIpcHandle", lib_name); - *(void **)&g_ze_ops.zeContextMakeMemoryResident = - utils_get_symbol_addr(0, "zeContextMakeMemoryResident", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemCloseIpcHandle", lib_name); + *(void **)&g_ze_ops.zeContextMakeMemoryResident = utils_get_symbol_addr( + lib_handle, "zeContextMakeMemoryResident", lib_name); *(void **)&g_ze_ops.zeDeviceGetProperties = - utils_get_symbol_addr(0, "zeDeviceGetProperties", lib_name); + utils_get_symbol_addr(lib_handle, "zeDeviceGetProperties", lib_name); *(void **)&g_ze_ops.zeMemFreeExt = - utils_get_symbol_addr(0, "zeMemFreeExt", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemFreeExt", lib_name); *(void **)&g_ze_ops.zeMemGetAllocProperties = - utils_get_symbol_addr(0, "zeMemGetAllocProperties", lib_name); + utils_get_symbol_addr(lib_handle, "zeMemGetAllocProperties", lib_name); if (!g_ze_ops.zeMemAllocHost || !g_ze_ops.zeMemAllocDevice || !g_ze_ops.zeMemAllocShared || !g_ze_ops.zeMemFree || @@ -244,7 +263,10 @@ static void init_ze_global_state(void) { // starting from Level Zero 1.6 LOG_FATAL("Required Level Zero symbols not found."); Init_ze_global_state_failed = true; + utils_close_library(lib_handle); + return; } + ze_lib_handle = lib_handle; } umf_result_t umfLevelZeroMemoryProviderParamsCreate( @@ -551,7 +573,7 @@ static umf_result_t ze_memory_provider_initialize(void *params, utils_init_once(&ze_is_initialized, init_ze_global_state); if (Init_ze_global_state_failed) { LOG_FATAL("Loading Level Zero symbols failed"); - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } ze_memory_provider_t *ze_provider = diff --git a/src/provider/provider_level_zero_internal.h b/src/provider/provider_level_zero_internal.h new file mode 100644 index 000000000..7da299ffd --- /dev/null +++ b/src/provider/provider_level_zero_internal.h @@ -0,0 +1,10 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +void fini_ze_global_state(void); diff --git a/src/utils/utils_load_library.c b/src/utils/utils_load_library.c index ef0da450b..d774fec84 100644 --- a/src/utils/utils_load_library.c +++ b/src/utils/utils_load_library.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -32,7 +32,11 @@ #ifdef _WIN32 void *utils_open_library(const char *filename, int userFlags) { - (void)userFlags; //unused for win + if (userFlags & UMF_UTIL_OPEN_LIBRARY_NO_LOAD) { + HMODULE hModule; + BOOL ret = GetModuleHandleEx(0, TEXT(filename), &hModule); + return ret ? hModule : NULL; + } return LoadLibrary(TEXT(filename)); } @@ -66,6 +70,9 @@ void *utils_open_library(const char *filename, int userFlags) { if (userFlags & UMF_UTIL_OPEN_LIBRARY_GLOBAL) { dlopenFlags |= RTLD_GLOBAL; } + if (userFlags & UMF_UTIL_OPEN_LIBRARY_NO_LOAD) { + dlopenFlags |= RTLD_NOLOAD; + } void *handle = dlopen(filename, dlopenFlags); if (handle == NULL) { diff --git a/src/utils/utils_load_library.h b/src/utils/utils_load_library.h index 3206183f5..5b6e71239 100644 --- a/src/utils/utils_load_library.h +++ b/src/utils/utils_load_library.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -17,7 +17,10 @@ #ifdef __cplusplus extern "C" { #endif +// The symbols defined by this library will be made available for symbol resolution of subsequently loaded libraries. #define UMF_UTIL_OPEN_LIBRARY_GLOBAL 1 +// Don't load the library. utils_open_library succeeds if the library is already loaded. +#define UMF_UTIL_OPEN_LIBRARY_NO_LOAD 1 << 1 void *utils_open_library(const char *filename, int userFlags); int utils_close_library(void *handle); From c715e625a4f2e1fd6579ffbb362170b9296010fa Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Mon, 10 Feb 2025 14:35:25 +0100 Subject: [PATCH 143/466] Increase refcount to CUDA library when CUDA provider is used --- src/libumf.c | 2 + src/provider/provider_cuda.c | 57 ++++++++++++++++++--------- src/provider/provider_cuda_internal.h | 10 +++++ 3 files changed, 51 insertions(+), 18 deletions(-) create mode 100644 src/provider/provider_cuda_internal.h diff --git a/src/libumf.c b/src/libumf.c index 6ca006c82..64314f4d3 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -12,6 +12,7 @@ #include "base_alloc_global.h" #include "ipc_cache.h" #include "memspace_internal.h" +#include "provider_cuda_internal.h" #include "provider_level_zero_internal.h" #include "provider_tracking.h" #include "utils_common.h" @@ -81,6 +82,7 @@ void umfTearDown(void) { fini_umfTearDown: fini_ze_global_state(); + fini_cu_global_state(); LOG_DEBUG("UMF library finalized"); } } diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index a0f963fdd..c7929cc7e 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -12,8 +12,19 @@ #include #include +#include "provider_cuda_internal.h" +#include "utils_load_library.h" #include "utils_log.h" +static void *cu_lib_handle = NULL; + +void fini_cu_global_state(void) { + if (cu_lib_handle) { + utils_close_library(cu_lib_handle); + cu_lib_handle = NULL; + } +} + #if defined(UMF_NO_CUDA_PROVIDER) umf_result_t umfCUDAMemoryProviderParamsCreate( @@ -88,7 +99,6 @@ umf_memory_provider_ops_t *umfCUDAMemoryProviderOps(void) { #include "utils_assert.h" #include "utils_common.h" #include "utils_concurrency.h" -#include "utils_load_library.h" #include "utils_log.h" #include "utils_sanitizers.h" @@ -180,37 +190,45 @@ static void init_cu_global_state(void) { #else const char *lib_name = "libcuda.so"; #endif - // check if CUDA shared library is already loaded - // we pass 0 as a handle to search the global symbol table + // The CUDA shared library should be already loaded by the user + // of the CUDA provider. UMF just want to reuse it + // and increase the reference count to the CUDA shared library. + void *lib_handle = + utils_open_library(lib_name, UMF_UTIL_OPEN_LIBRARY_NO_LOAD); + if (!lib_handle) { + LOG_ERR("Failed to open CUDA shared library"); + Init_cu_global_state_failed = true; + return; + } // NOTE: some symbols defined in the lib have _vX postfixes - it is // important to load the proper version of functions - *(void **)&g_cu_ops.cuMemGetAllocationGranularity = - utils_get_symbol_addr(0, "cuMemGetAllocationGranularity", lib_name); + *(void **)&g_cu_ops.cuMemGetAllocationGranularity = utils_get_symbol_addr( + lib_handle, "cuMemGetAllocationGranularity", lib_name); *(void **)&g_cu_ops.cuMemAlloc = - utils_get_symbol_addr(0, "cuMemAlloc_v2", lib_name); + utils_get_symbol_addr(lib_handle, "cuMemAlloc_v2", lib_name); *(void **)&g_cu_ops.cuMemHostAlloc = - utils_get_symbol_addr(0, "cuMemHostAlloc", lib_name); + utils_get_symbol_addr(lib_handle, "cuMemHostAlloc", lib_name); *(void **)&g_cu_ops.cuMemAllocManaged = - utils_get_symbol_addr(0, "cuMemAllocManaged", lib_name); + utils_get_symbol_addr(lib_handle, "cuMemAllocManaged", lib_name); *(void **)&g_cu_ops.cuMemFree = - utils_get_symbol_addr(0, "cuMemFree_v2", lib_name); + utils_get_symbol_addr(lib_handle, "cuMemFree_v2", lib_name); *(void **)&g_cu_ops.cuMemFreeHost = - utils_get_symbol_addr(0, "cuMemFreeHost", lib_name); + utils_get_symbol_addr(lib_handle, "cuMemFreeHost", lib_name); *(void **)&g_cu_ops.cuGetErrorName = - utils_get_symbol_addr(0, "cuGetErrorName", lib_name); + utils_get_symbol_addr(lib_handle, "cuGetErrorName", lib_name); *(void **)&g_cu_ops.cuGetErrorString = - utils_get_symbol_addr(0, "cuGetErrorString", lib_name); + utils_get_symbol_addr(lib_handle, "cuGetErrorString", lib_name); *(void **)&g_cu_ops.cuCtxGetCurrent = - utils_get_symbol_addr(0, "cuCtxGetCurrent", lib_name); + utils_get_symbol_addr(lib_handle, "cuCtxGetCurrent", lib_name); *(void **)&g_cu_ops.cuCtxSetCurrent = - utils_get_symbol_addr(0, "cuCtxSetCurrent", lib_name); + utils_get_symbol_addr(lib_handle, "cuCtxSetCurrent", lib_name); *(void **)&g_cu_ops.cuIpcGetMemHandle = - utils_get_symbol_addr(0, "cuIpcGetMemHandle", lib_name); + utils_get_symbol_addr(lib_handle, "cuIpcGetMemHandle", lib_name); *(void **)&g_cu_ops.cuIpcOpenMemHandle = - utils_get_symbol_addr(0, "cuIpcOpenMemHandle_v2", lib_name); + utils_get_symbol_addr(lib_handle, "cuIpcOpenMemHandle_v2", lib_name); *(void **)&g_cu_ops.cuIpcCloseMemHandle = - utils_get_symbol_addr(0, "cuIpcCloseMemHandle", lib_name); + utils_get_symbol_addr(lib_handle, "cuIpcCloseMemHandle", lib_name); if (!g_cu_ops.cuMemGetAllocationGranularity || !g_cu_ops.cuMemAlloc || !g_cu_ops.cuMemHostAlloc || !g_cu_ops.cuMemAllocManaged || @@ -221,7 +239,10 @@ static void init_cu_global_state(void) { !g_cu_ops.cuIpcCloseMemHandle) { LOG_FATAL("Required CUDA symbols not found."); Init_cu_global_state_failed = true; + utils_close_library(lib_handle); + return; } + cu_lib_handle = lib_handle; } umf_result_t umfCUDAMemoryProviderParamsCreate( @@ -327,7 +348,7 @@ static umf_result_t cu_memory_provider_initialize(void *params, utils_init_once(&cu_is_initialized, init_cu_global_state); if (Init_cu_global_state_failed) { LOG_FATAL("Loading CUDA symbols failed"); - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; } cu_memory_provider_t *cu_provider = diff --git a/src/provider/provider_cuda_internal.h b/src/provider/provider_cuda_internal.h new file mode 100644 index 000000000..bc3d79d4a --- /dev/null +++ b/src/provider/provider_cuda_internal.h @@ -0,0 +1,10 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +void fini_cu_global_state(void); From 3b738d55c02d61d43c185aa075f45081b7a96595 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Thu, 13 Feb 2025 15:09:04 +0100 Subject: [PATCH 144/466] Fix LD_LIBRARY_PATH for tests that use libze_loader --- benchmark/CMakeLists.txt | 10 +++++++ examples/CMakeLists.txt | 20 +++++++++++++ examples/ipc_level_zero/CMakeLists.txt | 4 +-- .../level_zero_shared_memory/CMakeLists.txt | 4 +-- test/CMakeLists.txt | 30 +++++++++++++++++++ 5 files changed, 64 insertions(+), 4 deletions(-) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 941c685e3..73b9b257a 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -86,6 +86,16 @@ function(add_umf_benchmark) set_property(TEST ${BENCH_NAME} PROPERTY ENVIRONMENT_MODIFICATION "${DLL_PATH_LIST}") endif() + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is required + # because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so and tests + # should use it instead of system one. + set_property( + TEST ${BENCH_NAME} + PROPERTY ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib" + ) + endif() if(UMF_BUILD_LIBUMF_POOL_DISJOINT) target_compile_definitions(${BENCH_NAME} diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 009f424ed..89f80ee2d 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -72,6 +72,16 @@ if(UMF_BUILD_GPU_EXAMPLES set_property(TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION "${DLL_PATH_LIST}") endif() + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is required + # because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so and tests + # should use it instead of system one. + set_property( + TEST ${EXAMPLE_NAME} + PROPERTY ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib" + ) + endif() else() message(STATUS "GPU Level Zero shared memory example requires " "UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and " @@ -151,6 +161,16 @@ if(UMF_BUILD_GPU_EXAMPLES set_property(TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION "${DLL_PATH_LIST}") endif() + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is required + # because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so and tests + # should use it instead of system one. + set_property( + TEST ${EXAMPLE_NAME} + PROPERTY ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib" + ) + endif() else() message( STATUS diff --git a/examples/ipc_level_zero/CMakeLists.txt b/examples/ipc_level_zero/CMakeLists.txt index 5c17d4c9c..273a88bb0 100644 --- a/examples/ipc_level_zero/CMakeLists.txt +++ b/examples/ipc_level_zero/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -69,6 +69,6 @@ if(LINUX) TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION - "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" + "LD_LIBRARY_PATH=path_list_prepend:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" ) endif() diff --git a/examples/level_zero_shared_memory/CMakeLists.txt b/examples/level_zero_shared_memory/CMakeLists.txt index 3711b4094..d05072ca2 100644 --- a/examples/level_zero_shared_memory/CMakeLists.txt +++ b/examples/level_zero_shared_memory/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -70,6 +70,6 @@ if(LINUX) TEST ${EXAMPLE_NAME} PROPERTY ENVIRONMENT_MODIFICATION - "LD_LIBRARY_PATH=path_list_append:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" + "LD_LIBRARY_PATH=path_list_prepend:${LIBUMF_LIBRARY_DIRS};LD_LIBRARY_PATH=path_list_append:${LIBHWLOC_LIBRARY_DIRS}" ) endif() diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b841cceba..76eb3eaeb 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -154,6 +154,16 @@ function(add_umf_test) set_property(TEST ${TEST_NAME} PROPERTY ENVIRONMENT_MODIFICATION "${DLL_PATH_LIST}") endif() + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is required + # because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so and tests + # should use it instead of system one. + set_property( + TEST ${TEST_NAME} + PROPERTY ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib" + ) + endif() endfunction() add_subdirectory(common) @@ -535,6 +545,16 @@ function(add_umf_ipc_test) if(NOT UMF_TESTS_FAIL_ON_SKIP) set_tests_properties(${TEST_NAME} PROPERTIES SKIP_RETURN_CODE 125) endif() + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is required + # because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so and tests + # should use it instead of system one. + set_property( + TEST ${TEST_NAME} + PROPERTY ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib" + ) + endif() endfunction() if(LINUX) @@ -740,5 +760,15 @@ if(LINUX "${CMAKE_INSTALL_PREFIX}" "${STANDALONE_CMAKE_OPTIONS}" ${EXAMPLES} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + if(LINUX) + # prepend LD_LIBRARY_PATH with ${CMAKE_BINARY_DIR}/lib it is + # required because ${CMAKE_BINARY_DIR}/lib contains libze_loader.so + # and tests should use it instead of system one. + set_property( + TEST umf-standalone_examples + PROPERTY + ENVIRONMENT_MODIFICATION + "LD_LIBRARY_PATH=path_list_prepend:${CMAKE_BINARY_DIR}/lib") + endif() endif() endif() From b0dd2fc879773ce5f683905c7f8a95d29cfe4e04 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Fri, 14 Feb 2025 14:47:00 +0100 Subject: [PATCH 145/466] Test Level Zero provider when ze_loader is opened with RTLD_LOCAL --- src/utils/utils_level_zero.cpp | 9 +++++++-- test/CMakeLists.txt | 14 +++++++++++--- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/src/utils/utils_level_zero.cpp b/src/utils/utils_level_zero.cpp index 02e961d49..f5a42b0fa 100644 --- a/src/utils/utils_level_zero.cpp +++ b/src/utils/utils_level_zero.cpp @@ -144,10 +144,15 @@ int InitLevelZeroOps() { const char *lib_name = "libze_loader.so"; #endif // Load Level Zero symbols - // NOTE that we use UMF_UTIL_OPEN_LIBRARY_GLOBAL which add all loaded symbols to the +#if OPEN_ZE_LIBRARY_GLOBAL + // NOTE UMF_UTIL_OPEN_LIBRARY_GLOBAL adds all loaded symbols to the // global symbol table. + int open_flags = UMF_UTIL_OPEN_LIBRARY_GLOBAL; +#else + int open_flags = 0; +#endif zeDlHandle = std::unique_ptr( - utils_open_library(lib_name, UMF_UTIL_OPEN_LIBRARY_GLOBAL)); + utils_open_library(lib_name, open_flags)); *(void **)&libze_ops.zeInit = utils_get_symbol_addr(zeDlHandle.get(), "zeInit", lib_name); if (libze_ops.zeInit == nullptr) { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 76eb3eaeb..aab8d62b2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -415,12 +415,20 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) LIBS ${UMF_UTILS_FOR_TEST} ze_loader) add_umf_test( - NAME provider_level_zero_dlopen + NAME provider_level_zero_dlopen_global SRCS providers/provider_level_zero.cpp ${UMF_UTILS_DIR}/utils_level_zero.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) - target_compile_definitions(umf_test-provider_level_zero_dlopen - PUBLIC USE_DLOPEN=1) + target_compile_definitions(umf_test-provider_level_zero_dlopen_global + PUBLIC USE_DLOPEN=1 OPEN_ZE_LIBRARY_GLOBAL=1) + + add_umf_test( + NAME provider_level_zero_dlopen_local + SRCS providers/provider_level_zero.cpp + ${UMF_UTILS_DIR}/utils_level_zero.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + target_compile_definitions(umf_test-provider_level_zero_dlopen_local + PUBLIC USE_DLOPEN=1 OPEN_ZE_LIBRARY_GLOBAL=0) endif() if(NOT UMF_BUILD_LEVEL_ZERO_PROVIDER) From 664484fb494acd9ba9435d709fe052f05f0cc4cc Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Fri, 14 Feb 2025 14:50:14 +0100 Subject: [PATCH 146/466] Test CUDA provider when cuda is opened with RTLD_LOCAL --- test/CMakeLists.txt | 14 +++++++++++--- test/providers/cuda_helpers.cpp | 11 ++++++++--- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index aab8d62b2..cdbe2425f 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -450,12 +450,20 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) LIBS ${UMF_UTILS_FOR_TEST} cuda) add_umf_test( - NAME provider_cuda_dlopen + NAME provider_cuda_dlopen_global SRCS providers/provider_cuda.cpp providers/cuda_helpers.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) - target_compile_definitions(umf_test-provider_cuda_dlopen - PUBLIC USE_DLOPEN=1) + target_compile_definitions(umf_test-provider_cuda_dlopen_global + PUBLIC USE_DLOPEN=1 OPEN_CU_LIBRARY_GLOBAL=1) + + add_umf_test( + NAME provider_cuda_dlopen_local + SRCS providers/provider_cuda.cpp providers/cuda_helpers.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + target_compile_definitions(umf_test-provider_cuda_dlopen_local + PUBLIC USE_DLOPEN=1 OPEN_CU_LIBRARY_GLOBAL=0) else() message( STATUS diff --git a/test/providers/cuda_helpers.cpp b/test/providers/cuda_helpers.cpp index aa0647080..a607d7ecb 100644 --- a/test/providers/cuda_helpers.cpp +++ b/test/providers/cuda_helpers.cpp @@ -113,10 +113,15 @@ int InitCUDAOps() { const char *lib_name = "libcuda.so"; #endif // CUDA symbols - // NOTE that we use UMF_UTIL_OPEN_LIBRARY_GLOBAL which add all loaded - // symbols to the global symbol table. +#if OPEN_CU_LIBRARY_GLOBAL + // NOTE UMF_UTIL_OPEN_LIBRARY_GLOBAL adds all loaded symbols to the + // global symbol table. + int open_flags = UMF_UTIL_OPEN_LIBRARY_GLOBAL; +#else + int open_flags = 0; +#endif cuDlHandle = std::unique_ptr( - utils_open_library(lib_name, UMF_UTIL_OPEN_LIBRARY_GLOBAL)); + utils_open_library(lib_name, open_flags)); // NOTE: some symbols defined in the lib have _vX postfixes - this is // important to load the proper version of functions From 471516746f9dc6250d1da49ea9304692a2bf1134 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Mon, 17 Feb 2025 22:57:22 +0100 Subject: [PATCH 147/466] Refactor memory tracker implementation --- src/provider/provider_tracking.c | 122 +++++++++++++++++-------------- src/provider/provider_tracking.h | 9 +-- 2 files changed, 68 insertions(+), 63 deletions(-) diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index c4fff4133..bc9d5aca4 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -28,17 +28,23 @@ uint64_t IPC_HANDLE_ID = 0; -typedef struct tracker_value_t { +struct umf_memory_tracker_t { + umf_ba_pool_t *alloc_info_allocator; + critnib *alloc_segments_map; + utils_mutex_t splitMergeMutex; +}; + +typedef struct tracker_alloc_info_t { umf_memory_pool_handle_t pool; size_t size; -} tracker_value_t; +} tracker_alloc_info_t; static umf_result_t umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, umf_memory_pool_handle_t pool, const void *ptr, size_t size) { assert(ptr); - tracker_value_t *value = umf_ba_alloc(hTracker->tracker_allocator); + tracker_alloc_info_t *value = umf_ba_alloc(hTracker->alloc_info_allocator); if (value == NULL) { LOG_ERR("failed to allocate tracker value, ptr=%p, size=%zu", ptr, size); @@ -48,7 +54,8 @@ static umf_result_t umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, value->pool = pool; value->size = size; - int ret = critnib_insert(hTracker->map, (uintptr_t)ptr, value, 0); + int ret = + critnib_insert(hTracker->alloc_segments_map, (uintptr_t)ptr, value, 0); if (ret == 0) { LOG_DEBUG( @@ -60,7 +67,7 @@ static umf_result_t umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, LOG_ERR("failed to insert tracker value, ret=%d, ptr=%p, pool=%p, size=%zu", ret, ptr, (void *)pool, size); - umf_ba_free(hTracker->tracker_allocator, value); + umf_ba_free(hTracker->alloc_info_allocator, value); if (ret == ENOMEM) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -78,18 +85,18 @@ static umf_result_t umfMemoryTrackerRemove(umf_memory_tracker_handle_t hTracker, // Every umfMemoryTrackerAdd(..., ptr, ...) should have a corresponding // umfMemoryTrackerRemove call with the same ptr value. - void *value = critnib_remove(hTracker->map, (uintptr_t)ptr); + void *value = critnib_remove(hTracker->alloc_segments_map, (uintptr_t)ptr); if (!value) { - LOG_ERR("pointer %p not found in the map", ptr); + LOG_ERR("pointer %p not found in the alloc_segments_map", ptr); return UMF_RESULT_ERROR_UNKNOWN; } - tracker_value_t *v = value; + tracker_alloc_info_t *v = value; LOG_DEBUG("memory region removed: tracker=%p, ptr=%p, size=%zu", (void *)hTracker, ptr, v->size); - umf_ba_free(hTracker->tracker_allocator, value); + umf_ba_free(hTracker->alloc_info_allocator, value); return UMF_RESULT_SUCCESS; } @@ -117,15 +124,15 @@ umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, return UMF_RESULT_ERROR_NOT_SUPPORTED; } - if (TRACKER->map == NULL) { - LOG_ERR("tracker's map does not exist"); + if (TRACKER->alloc_segments_map == NULL) { + LOG_ERR("tracker's alloc_segments_map does not exist"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } uintptr_t rkey; - tracker_value_t *rvalue; - int found = critnib_find(TRACKER->map, (uintptr_t)ptr, FIND_LE, - (void *)&rkey, (void **)&rvalue); + tracker_alloc_info_t *rvalue; + int found = critnib_find(TRACKER->alloc_segments_map, (uintptr_t)ptr, + FIND_LE, (void *)&rkey, (void **)&rvalue); if (!found || (uintptr_t)ptr >= rkey + rvalue->size) { LOG_DEBUG("pointer %p not found in the tracker, TRACKER=%p", ptr, (void *)TRACKER); @@ -188,8 +195,8 @@ static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, umf_tracking_memory_provider_t *provider = (umf_tracking_memory_provider_t *)hProvider; - tracker_value_t *splitValue = - umf_ba_alloc(provider->hTracker->tracker_allocator); + tracker_alloc_info_t *splitValue = + umf_ba_alloc(provider->hTracker->alloc_info_allocator); if (!splitValue) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } @@ -202,8 +209,8 @@ static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, goto err_lock; } - tracker_value_t *value = - (tracker_value_t *)critnib_get(provider->hTracker->map, (uintptr_t)ptr); + tracker_alloc_info_t *value = (tracker_alloc_info_t *)critnib_get( + provider->hTracker->alloc_segments_map, (uintptr_t)ptr); if (!value) { LOG_ERR("region for split is not found in the tracker"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; @@ -240,14 +247,15 @@ static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, goto err; } - int cret = critnib_insert(provider->hTracker->map, (uintptr_t)ptr, - (void *)splitValue, 1 /* update */); + int cret = + critnib_insert(provider->hTracker->alloc_segments_map, (uintptr_t)ptr, + (void *)splitValue, 1 /* update */); // this cannot fail since we know the element exists (nothing to allocate) assert(cret == 0); (void)cret; // free the original value - umf_ba_free(provider->hTracker->tracker_allocator, value); + umf_ba_free(provider->hTracker->alloc_info_allocator, value); utils_mutex_unlock(&provider->hTracker->splitMergeMutex); return UMF_RESULT_SUCCESS; @@ -255,7 +263,7 @@ static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, err: utils_mutex_unlock(&provider->hTracker->splitMergeMutex); err_lock: - umf_ba_free(provider->hTracker->tracker_allocator, splitValue); + umf_ba_free(provider->hTracker->alloc_info_allocator, splitValue); return ret; } @@ -265,8 +273,8 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, umf_tracking_memory_provider_t *provider = (umf_tracking_memory_provider_t *)hProvider; - tracker_value_t *mergedValue = - umf_ba_alloc(provider->hTracker->tracker_allocator); + tracker_alloc_info_t *mergedValue = + umf_ba_alloc(provider->hTracker->alloc_info_allocator); if (!mergedValue) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -280,15 +288,15 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, goto err_lock; } - tracker_value_t *lowValue = (tracker_value_t *)critnib_get( - provider->hTracker->map, (uintptr_t)lowPtr); + tracker_alloc_info_t *lowValue = (tracker_alloc_info_t *)critnib_get( + provider->hTracker->alloc_segments_map, (uintptr_t)lowPtr); if (!lowValue) { LOG_ERR("no left value"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; goto err; } - tracker_value_t *highValue = (tracker_value_t *)critnib_get( - provider->hTracker->map, (uintptr_t)highPtr); + tracker_alloc_info_t *highValue = (tracker_alloc_info_t *)critnib_get( + provider->hTracker->alloc_segments_map, (uintptr_t)highPtr); if (!highValue) { LOG_ERR("no right value"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; @@ -314,20 +322,21 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, // We'll have a duplicate entry for the range [highPtr, highValue->size] but this is fine, // the value is the same anyway and we forbid removing that range concurrently - int cret = critnib_insert(provider->hTracker->map, (uintptr_t)lowPtr, - (void *)mergedValue, 1 /* update */); + int cret = + critnib_insert(provider->hTracker->alloc_segments_map, + (uintptr_t)lowPtr, (void *)mergedValue, 1 /* update */); // this cannot fail since we know the element exists (nothing to allocate) assert(cret == 0); (void)cret; // free old value that we just replaced with mergedValue - umf_ba_free(provider->hTracker->tracker_allocator, lowValue); + umf_ba_free(provider->hTracker->alloc_info_allocator, lowValue); - void *erasedhighValue = - critnib_remove(provider->hTracker->map, (uintptr_t)highPtr); + void *erasedhighValue = critnib_remove( + provider->hTracker->alloc_segments_map, (uintptr_t)highPtr); assert(erasedhighValue == highValue); - umf_ba_free(provider->hTracker->tracker_allocator, erasedhighValue); + umf_ba_free(provider->hTracker->alloc_info_allocator, erasedhighValue); utils_mutex_unlock(&provider->hTracker->splitMergeMutex); @@ -340,7 +349,7 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, utils_mutex_unlock(&provider->hTracker->splitMergeMutex); err_lock: - umf_ba_free(provider->hTracker->tracker_allocator, mergedValue); + umf_ba_free(provider->hTracker->alloc_info_allocator, mergedValue); return ret; } @@ -425,9 +434,9 @@ static void clear_tracker_for_the_pool(umf_memory_tracker_handle_t hTracker, size_t n_items = 0; uintptr_t last_key = 0; - while (1 == critnib_find((critnib *)hTracker->map, last_key, FIND_G, &rkey, - &rvalue)) { - tracker_value_t *value = (tracker_value_t *)rvalue; + while (1 == critnib_find((critnib *)hTracker->alloc_segments_map, last_key, + FIND_G, &rkey, &rvalue)) { + tracker_alloc_info_t *value = (tracker_alloc_info_t *)rvalue; if (value->pool != pool && pool != NULL) { last_key = rkey; continue; @@ -435,9 +444,10 @@ static void clear_tracker_for_the_pool(umf_memory_tracker_handle_t hTracker, n_items++; - void *removed_value = critnib_remove(hTracker->map, rkey); + void *removed_value = + critnib_remove(hTracker->alloc_segments_map, rkey); assert(removed_value == rvalue); - umf_ba_free(hTracker->tracker_allocator, removed_value); + umf_ba_free(hTracker->alloc_info_allocator, removed_value); last_key = rkey; } @@ -816,33 +826,33 @@ umf_memory_tracker_handle_t umfMemoryTrackerCreate(void) { return NULL; } - umf_ba_pool_t *tracker_allocator = - umf_ba_create(sizeof(struct tracker_value_t)); - if (!tracker_allocator) { + umf_ba_pool_t *alloc_info_allocator = + umf_ba_create(sizeof(struct tracker_alloc_info_t)); + if (!alloc_info_allocator) { goto err_free_handle; } - handle->tracker_allocator = tracker_allocator; + handle->alloc_info_allocator = alloc_info_allocator; void *mutex_ptr = utils_mutex_init(&handle->splitMergeMutex); if (!mutex_ptr) { - goto err_destroy_tracker_allocator; + goto err_destroy_alloc_info_allocator; } - handle->map = critnib_new(); - if (!handle->map) { + handle->alloc_segments_map = critnib_new(); + if (!handle->alloc_segments_map) { goto err_destroy_mutex; } - LOG_DEBUG("tracker created, handle=%p, segment map=%p", (void *)handle, - (void *)handle->map); + LOG_DEBUG("tracker created, handle=%p, alloc_segments_map=%p", + (void *)handle, (void *)handle->alloc_segments_map); return handle; err_destroy_mutex: utils_mutex_destroy_not_free(&handle->splitMergeMutex); -err_destroy_tracker_allocator: - umf_ba_destroy(tracker_allocator); +err_destroy_alloc_info_allocator: + umf_ba_destroy(alloc_info_allocator); err_free_handle: umf_ba_global_free(handle); return NULL; @@ -865,10 +875,10 @@ void umfMemoryTrackerDestroy(umf_memory_tracker_handle_t handle) { // We have to zero all inner pointers, // because the tracker handle can be copied // and used in many places. - critnib_delete(handle->map); - handle->map = NULL; + critnib_delete(handle->alloc_segments_map); + handle->alloc_segments_map = NULL; utils_mutex_destroy_not_free(&handle->splitMergeMutex); - umf_ba_destroy(handle->tracker_allocator); - handle->tracker_allocator = NULL; + umf_ba_destroy(handle->alloc_info_allocator); + handle->alloc_info_allocator = NULL; umf_ba_global_free(handle); } diff --git a/src/provider/provider_tracking.h b/src/provider/provider_tracking.h index 2abc36505..9e868cf31 100644 --- a/src/provider/provider_tracking.h +++ b/src/provider/provider_tracking.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -26,12 +26,7 @@ extern "C" { #endif -struct umf_memory_tracker_t { - umf_ba_pool_t *tracker_allocator; - critnib *map; - utils_mutex_t splitMergeMutex; -}; - +struct umf_memory_tracker_t; typedef struct umf_memory_tracker_t *umf_memory_tracker_handle_t; extern umf_memory_tracker_handle_t TRACKER; From fa9b0eaf7df4107145d06e80f02c4a030d69a11e Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Tue, 18 Feb 2025 00:09:47 +0100 Subject: [PATCH 148/466] Refactor IPC cache implementation --- src/ipc_cache.c | 71 ++++++++++++++++---------------- src/ipc_cache.h | 34 +++++++-------- src/provider/provider_tracking.c | 20 ++++----- 3 files changed, 61 insertions(+), 64 deletions(-) diff --git a/src/ipc_cache.c b/src/ipc_cache.c index 60072d4df..ccb296d5b 100644 --- a/src/ipc_cache.c +++ b/src/ipc_cache.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -22,41 +22,41 @@ #pragma warning(disable : 4702) #endif -struct ipc_handle_cache_entry_t; +struct ipc_opened_cache_entry_t; -typedef struct ipc_handle_cache_entry_t *hash_map_t; -typedef struct ipc_handle_cache_entry_t *lru_list_t; +typedef struct ipc_opened_cache_entry_t *hash_map_t; +typedef struct ipc_opened_cache_entry_t *lru_list_t; -typedef struct ipc_handle_cache_entry_t { +typedef struct ipc_opened_cache_entry_t { UT_hash_handle hh; - struct ipc_handle_cache_entry_t *next, *prev; - ipc_mapped_handle_cache_key_t key; + struct ipc_opened_cache_entry_t *next, *prev; + ipc_opened_cache_key_t key; uint64_t ref_count; uint64_t handle_id; hash_map_t *hash_table; // pointer to the hash table to which the entry belongs - ipc_mapped_handle_cache_value_t value; -} ipc_handle_cache_entry_t; + ipc_opened_cache_value_t value; +} ipc_opened_cache_entry_t; -typedef struct ipc_mapped_handle_cache_global_t { +typedef struct ipc_opened_cache_global_t { utils_mutex_t cache_lock; umf_ba_pool_t *cache_allocator; size_t max_size; size_t cur_size; lru_list_t lru_list; -} ipc_mapped_handle_cache_global_t; +} ipc_opened_cache_global_t; -typedef struct ipc_mapped_handle_cache_t { - ipc_mapped_handle_cache_global_t *global; +typedef struct ipc_opened_cache_t { + ipc_opened_cache_global_t *global; hash_map_t hash_table; - ipc_mapped_handle_cache_eviction_cb_t eviction_cb; -} ipc_mapped_handle_cache_t; + ipc_opened_cache_eviction_cb_t eviction_cb; +} ipc_opened_cache_t; -ipc_mapped_handle_cache_global_t *IPC_MAPPED_CACHE_GLOBAL = NULL; +ipc_opened_cache_global_t *IPC_OPENED_CACHE_GLOBAL = NULL; umf_result_t umfIpcCacheGlobalInit(void) { umf_result_t ret = UMF_RESULT_SUCCESS; - ipc_mapped_handle_cache_global_t *cache_global = + ipc_opened_cache_global_t *cache_global = umf_ba_global_alloc(sizeof(*cache_global)); if (!cache_global) { LOG_ERR("Failed to allocate memory for the IPC cache global data"); @@ -71,7 +71,7 @@ umf_result_t umfIpcCacheGlobalInit(void) { } cache_global->cache_allocator = - umf_ba_create(sizeof(ipc_handle_cache_entry_t)); + umf_ba_create(sizeof(ipc_opened_cache_entry_t)); if (!cache_global->cache_allocator) { LOG_ERR("Failed to create IPC cache allocator"); ret = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -83,7 +83,7 @@ umf_result_t umfIpcCacheGlobalInit(void) { cache_global->cur_size = 0; cache_global->lru_list = NULL; - IPC_MAPPED_CACHE_GLOBAL = cache_global; + IPC_OPENED_CACHE_GLOBAL = cache_global; goto err_exit; err_mutex_destroy: @@ -97,15 +97,15 @@ umf_result_t umfIpcCacheGlobalInit(void) { #ifndef NDEBUG static size_t getGlobalLruListSize(lru_list_t lru_list) { size_t size = 0; - ipc_handle_cache_entry_t *tmp; + ipc_opened_cache_entry_t *tmp; DL_COUNT(lru_list, tmp, size); return size; } #endif /* NDEBUG */ void umfIpcCacheGlobalTearDown(void) { - ipc_mapped_handle_cache_global_t *cache_global = IPC_MAPPED_CACHE_GLOBAL; - IPC_MAPPED_CACHE_GLOBAL = NULL; + ipc_opened_cache_global_t *cache_global = IPC_OPENED_CACHE_GLOBAL; + IPC_OPENED_CACHE_GLOBAL = NULL; if (!cache_global) { return; @@ -119,31 +119,31 @@ void umfIpcCacheGlobalTearDown(void) { umf_ba_global_free(cache_global); } -ipc_mapped_handle_cache_handle_t umfIpcHandleMappedCacheCreate( - ipc_mapped_handle_cache_eviction_cb_t eviction_cb) { +ipc_opened_cache_handle_t +umfIpcOpenedCacheCreate(ipc_opened_cache_eviction_cb_t eviction_cb) { if (eviction_cb == NULL) { LOG_ERR("Eviction callback is NULL"); return NULL; } - ipc_mapped_handle_cache_t *cache = umf_ba_global_alloc(sizeof(*cache)); + ipc_opened_cache_t *cache = umf_ba_global_alloc(sizeof(*cache)); if (!cache) { LOG_ERR("Failed to allocate memory for the IPC cache"); return NULL; } - assert(IPC_MAPPED_CACHE_GLOBAL != NULL); + assert(IPC_OPENED_CACHE_GLOBAL != NULL); - cache->global = IPC_MAPPED_CACHE_GLOBAL; + cache->global = IPC_OPENED_CACHE_GLOBAL; cache->hash_table = NULL; cache->eviction_cb = eviction_cb; return cache; } -void umfIpcHandleMappedCacheDestroy(ipc_mapped_handle_cache_handle_t cache) { - ipc_handle_cache_entry_t *entry, *tmp; +void umfIpcOpenedCacheDestroy(ipc_opened_cache_handle_t cache) { + ipc_opened_cache_entry_t *entry, *tmp; HASH_ITER(hh, cache->hash_table, entry, tmp) { DL_DELETE(cache->global->lru_list, entry); HASH_DEL(cache->hash_table, entry); @@ -157,15 +157,14 @@ void umfIpcHandleMappedCacheDestroy(ipc_mapped_handle_cache_handle_t cache) { umf_ba_global_free(cache); } -umf_result_t -umfIpcHandleMappedCacheGet(ipc_mapped_handle_cache_handle_t cache, - const ipc_mapped_handle_cache_key_t *key, - uint64_t handle_id, - ipc_mapped_handle_cache_value_t **retEntry) { - ipc_handle_cache_entry_t *entry = NULL; +umf_result_t umfIpcOpenedCacheGet(ipc_opened_cache_handle_t cache, + const ipc_opened_cache_key_t *key, + uint64_t handle_id, + ipc_opened_cache_value_t **retEntry) { + ipc_opened_cache_entry_t *entry = NULL; umf_result_t ret = UMF_RESULT_SUCCESS; bool evicted = false; - ipc_mapped_handle_cache_value_t evicted_value; + ipc_opened_cache_value_t evicted_value; if (!cache || !key || !retEntry) { LOG_ERR("Some arguments are NULL, cache=%p, key=%p, retEntry=%p", diff --git a/src/ipc_cache.h b/src/ipc_cache.h index 59ae28787..80870d373 100644 --- a/src/ipc_cache.h +++ b/src/ipc_cache.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -14,39 +14,37 @@ #include "utils_concurrency.h" -typedef struct ipc_mapped_handle_cache_key_t { +typedef struct ipc_opened_cache_key_t { void *remote_base_ptr; umf_memory_provider_handle_t local_provider; int remote_pid; -} ipc_mapped_handle_cache_key_t; +} ipc_opened_cache_key_t; -typedef struct ipc_mapped_handle_cache_value_t { +typedef struct ipc_opened_cache_value_t { void *mapped_base_ptr; size_t mapped_size; utils_mutex_t mmap_lock; -} ipc_mapped_handle_cache_value_t; +} ipc_opened_cache_value_t; -struct ipc_mapped_handle_cache_t; +struct ipc_opened_cache_t; -typedef struct ipc_mapped_handle_cache_t *ipc_mapped_handle_cache_handle_t; +typedef struct ipc_opened_cache_t *ipc_opened_cache_handle_t; umf_result_t umfIpcCacheGlobalInit(void); void umfIpcCacheGlobalTearDown(void); // define pointer to the eviction callback function -typedef void (*ipc_mapped_handle_cache_eviction_cb_t)( - const ipc_mapped_handle_cache_key_t *key, - const ipc_mapped_handle_cache_value_t *value); +typedef void (*ipc_opened_cache_eviction_cb_t)( + const ipc_opened_cache_key_t *key, const ipc_opened_cache_value_t *value); -ipc_mapped_handle_cache_handle_t umfIpcHandleMappedCacheCreate( - ipc_mapped_handle_cache_eviction_cb_t eviction_cb); +ipc_opened_cache_handle_t +umfIpcOpenedCacheCreate(ipc_opened_cache_eviction_cb_t eviction_cb); -void umfIpcHandleMappedCacheDestroy(ipc_mapped_handle_cache_handle_t cache); +void umfIpcOpenedCacheDestroy(ipc_opened_cache_handle_t cache); -umf_result_t -umfIpcHandleMappedCacheGet(ipc_mapped_handle_cache_handle_t cache, - const ipc_mapped_handle_cache_key_t *key, - uint64_t handle_id, - ipc_mapped_handle_cache_value_t **retEntry); +umf_result_t umfIpcOpenedCacheGet(ipc_opened_cache_handle_t cache, + const ipc_opened_cache_key_t *key, + uint64_t handle_id, + ipc_opened_cache_value_t **retEntry); #endif /* UMF_IPC_CACHE_H */ diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index bc9d5aca4..b27b858d4 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -160,7 +160,7 @@ typedef struct umf_tracking_memory_provider_t { umf_memory_tracker_handle_t hTracker; umf_memory_pool_handle_t pool; critnib *ipcCache; - ipc_mapped_handle_cache_handle_t hIpcMappedCache; + ipc_opened_cache_handle_t hIpcMappedCache; } umf_tracking_memory_provider_t; typedef struct umf_tracking_memory_provider_t umf_tracking_memory_provider_t; @@ -477,7 +477,7 @@ static void trackingFinalize(void *provider) { umf_tracking_memory_provider_t *p = (umf_tracking_memory_provider_t *)provider; - umfIpcHandleMappedCacheDestroy(p->hIpcMappedCache); + umfIpcOpenedCacheDestroy(p->hIpcMappedCache); critnib_delete(p->ipcCache); @@ -629,8 +629,8 @@ static umf_result_t trackingPutIpcHandle(void *provider, } static void -ipcMappedCacheEvictionCallback(const ipc_mapped_handle_cache_key_t *key, - const ipc_mapped_handle_cache_value_t *value) { +ipcOpenedCacheEvictionCallback(const ipc_opened_cache_key_t *key, + const ipc_opened_cache_value_t *value) { umf_tracking_memory_provider_t *p = (umf_tracking_memory_provider_t *)key->local_provider; // umfMemoryTrackerRemove should be called before umfMemoryProviderCloseIPCHandle @@ -700,16 +700,16 @@ static umf_result_t trackingOpenIpcHandle(void *provider, void *providerIpcData, umf_ipc_data_t *ipcUmfData = getIpcDataFromIpcHandle(providerIpcData); - // Compiler may add paddings to the ipc_mapped_handle_cache_key_t structure + // Compiler may add paddings to the ipc_opened_cache_key_t structure // so we need to zero it out to avoid false cache miss. - ipc_mapped_handle_cache_key_t key = {0}; + ipc_opened_cache_key_t key = {0}; key.remote_base_ptr = ipcUmfData->base; key.local_provider = provider; key.remote_pid = ipcUmfData->pid; - ipc_mapped_handle_cache_value_t *cache_entry = NULL; - ret = umfIpcHandleMappedCacheGet(p->hIpcMappedCache, &key, - ipcUmfData->handle_id, &cache_entry); + ipc_opened_cache_value_t *cache_entry = NULL; + ret = umfIpcOpenedCacheGet(p->hIpcMappedCache, &key, ipcUmfData->handle_id, + &cache_entry); if (ret != UMF_RESULT_SUCCESS) { LOG_ERR("failed to get cache entry"); return ret; @@ -798,7 +798,7 @@ umf_result_t umfTrackingMemoryProviderCreate( } params.hIpcMappedCache = - umfIpcHandleMappedCacheCreate(ipcMappedCacheEvictionCallback); + umfIpcOpenedCacheCreate(ipcOpenedCacheEvictionCallback); LOG_DEBUG("upstream=%p, tracker=%p, " "pool=%p, ipcCache=%p, hIpcMappedCache=%p", From e45c4f9a9f9e6b195a3f68fb583c36ae2bea6e4d Mon Sep 17 00:00:00 2001 From: sys_tr_ghrunner Date: Tue, 11 Feb 2025 15:04:52 +0100 Subject: [PATCH 149/466] suppress CMake rerun This avoids writing to the same generate.stamp while building UMF by not rerunning CMakeLists.txt --- .github/workflows/reusable_gpu.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable_gpu.yml b/.github/workflows/reusable_gpu.yml index 47f48f6a8..23be62a54 100644 --- a/.github/workflows/reusable_gpu.yml +++ b/.github/workflows/reusable_gpu.yml @@ -77,6 +77,7 @@ jobs: run: vcpkg install # note: disable all providers except the one being tested + # '-DCMAKE_SUPPRESS_REGENERATION=ON' is the WA for the error: "CUSTOMBUILD : CMake error : Cannot restore timestamp" - name: Configure build run: > cmake @@ -99,7 +100,8 @@ jobs: -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_${{inputs.name}}_PROVIDER=ON -DUMF_TESTS_FAIL_ON_SKIP=ON - ${{ matrix.os == 'Ubuntu' && matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} + ${{ matrix.os == 'Ubuntu' && matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} + ${{ matrix.os == 'Windows' && '-DCMAKE_SUPPRESS_REGENERATION=ON' || '' }} - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j ${{matrix.number_of_processors}} From ea10213c18f4beaab6ccc02809d47273a0848923 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 18 Feb 2025 11:22:02 +0100 Subject: [PATCH 150/466] Check if tracker is empty instead of clearing it Clearing the tracker was a temporary solution and should be removed. The tracker should be cleared using the provider's free() operation. Replace clear_tracker_for_the_pool() with check_if_tracker_is_empty(). This patch reverts commit 2766a21681c1e395d4bcd4c0f178a2627cf56d23 Ref: #759 Signed-off-by: Lukasz Dorau --- src/provider/provider_tracking.c | 61 +++++++++++++------------------- 1 file changed, 24 insertions(+), 37 deletions(-) diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index b27b858d4..1a08beb19 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -425,10 +425,9 @@ static umf_result_t trackingInitialize(void *params, void **ret) { return UMF_RESULT_SUCCESS; } -// TODO clearing the tracker is a temporary solution and should be removed. -// The tracker should be cleared using the provider's free() operation. -static void clear_tracker_for_the_pool(umf_memory_tracker_handle_t hTracker, - umf_memory_pool_handle_t pool) { +#ifndef NDEBUG +static void check_if_tracker_is_empty(umf_memory_tracker_handle_t hTracker, + umf_memory_pool_handle_t pool) { uintptr_t rkey; void *rvalue; size_t n_items = 0; @@ -437,41 +436,30 @@ static void clear_tracker_for_the_pool(umf_memory_tracker_handle_t hTracker, while (1 == critnib_find((critnib *)hTracker->alloc_segments_map, last_key, FIND_G, &rkey, &rvalue)) { tracker_alloc_info_t *value = (tracker_alloc_info_t *)rvalue; - if (value->pool != pool && pool != NULL) { - last_key = rkey; - continue; + if (value->pool == pool || pool == NULL) { + n_items++; } - n_items++; - - void *removed_value = - critnib_remove(hTracker->alloc_segments_map, rkey); - assert(removed_value == rvalue); - umf_ba_free(hTracker->alloc_info_allocator, removed_value); - last_key = rkey; } -#ifndef NDEBUG - // print error messages only if provider supports the free() operation if (n_items) { - if (pool) { - LOG_ERR( - "tracking provider of pool %p is not empty! (%zu items left)", - (void *)pool, n_items); - } else { - LOG_ERR("tracking provider is not empty! (%zu items left)", - n_items); + // Do not log the error if we are running in the proxy library, + // because it may need those resources till + // the very end of exiting the application. + if (!utils_is_running_in_proxy_lib()) { + if (pool) { + LOG_ERR("tracking provider of pool %p is not empty! (%zu items " + "left)", + (void *)pool, n_items); + } else { + LOG_ERR("tracking provider is not empty! (%zu items left)", + n_items); + } } } -#else /* DEBUG */ - (void)n_items; // unused in DEBUG build -#endif /* DEBUG */ -} - -static void clear_tracker(umf_memory_tracker_handle_t hTracker) { - clear_tracker_for_the_pool(hTracker, NULL); } +#endif /* NDEBUG */ static void trackingFinalize(void *provider) { umf_tracking_memory_provider_t *p = @@ -481,12 +469,9 @@ static void trackingFinalize(void *provider) { critnib_delete(p->ipcCache); - // Do not clear the tracker if we are running in the proxy library, - // because it may need those resources till - // the very end of exiting the application. - if (!utils_is_running_in_proxy_lib()) { - clear_tracker_for_the_pool(p->hTracker, p->pool); - } +#ifndef NDEBUG + check_if_tracker_is_empty(p->hTracker, p->pool); +#endif /* NDEBUG */ umf_ba_global_free(provider); } @@ -870,7 +855,9 @@ void umfMemoryTrackerDestroy(umf_memory_tracker_handle_t handle) { return; } - clear_tracker(handle); +#ifndef NDEBUG + check_if_tracker_is_empty(handle, NULL); +#endif /* NDEBUG */ // We have to zero all inner pointers, // because the tracker handle can be copied From 997c917eacff4049c7ae674e48faf99229159b00 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 18 Feb 2025 10:33:53 +0100 Subject: [PATCH 151/466] Assert if tracking provider is not empty in DEBUG UMF_DEVELOPER_MODE Signed-off-by: Lukasz Dorau --- src/provider/provider_tracking.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index 1a08beb19..aa4a7d8b0 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -456,6 +456,10 @@ static void check_if_tracker_is_empty(umf_memory_tracker_handle_t hTracker, LOG_ERR("tracking provider is not empty! (%zu items left)", n_items); } + +#ifdef UMF_DEVELOPER_MODE + assert(n_items == 0 && "tracking provider is not empty!"); +#endif } } } From df1de3ae377e6b435f19653e2b902d489e001f79 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 3 Dec 2024 16:05:42 +0100 Subject: [PATCH 152/466] do nothing in ba_global_free if ba is destroyed --- src/base_alloc/base_alloc_global.c | 14 ++++++++++++-- src/base_alloc/base_alloc_global.h | 3 +++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/base_alloc/base_alloc_global.c b/src/base_alloc/base_alloc_global.c index 2aca5d29c..f709eab9d 100644 --- a/src/base_alloc/base_alloc_global.c +++ b/src/base_alloc/base_alloc_global.c @@ -23,6 +23,7 @@ // global base allocator used by all providers and pools static UTIL_ONCE_FLAG ba_is_initialized = UTIL_ONCE_FLAG_INIT; +static bool ba_is_destroyed = false; #define ALLOC_METADATA_SIZE (sizeof(size_t)) @@ -40,6 +41,8 @@ struct base_alloc_t { static struct base_alloc_t BASE_ALLOC = {.ac_sizes = ALLOCATION_CLASSES}; void umf_ba_destroy_global(void) { + ba_is_destroyed = true; + for (int i = 0; i < NUM_ALLOCATION_CLASSES; i++) { if (BASE_ALLOC.ac[i]) { umf_ba_destroy(BASE_ALLOC.ac[i]); @@ -48,10 +51,12 @@ void umf_ba_destroy_global(void) { } // portable version of "ba_is_initialized = UTIL_ONCE_FLAG_INIT;" - static UTIL_ONCE_FLAG is_initialized = UTIL_ONCE_FLAG_INIT; - memcpy(&ba_is_initialized, &is_initialized, sizeof(ba_is_initialized)); + static UTIL_ONCE_FLAG set_once = UTIL_ONCE_FLAG_INIT; + memcpy(&ba_is_initialized, &set_once, sizeof(ba_is_initialized)); } +bool umf_ba_global_is_destroyed(void) { return ba_is_destroyed; } + static void umf_ba_create_global(void) { for (int i = 0; i < NUM_ALLOCATION_CLASSES; i++) { // allocation classes need to be powers of 2 @@ -202,6 +207,11 @@ void umf_ba_global_free(void *ptr) { return; } + if (ba_is_destroyed) { + LOG_WARN("base_alloc: calling free after the base alloc is destroyed"); + return; + } + size_t total_size; ptr = get_original_alloc(ptr, &total_size, NULL); diff --git a/src/base_alloc/base_alloc_global.h b/src/base_alloc/base_alloc_global.h index ad7f12ce5..bd55d352f 100644 --- a/src/base_alloc/base_alloc_global.h +++ b/src/base_alloc/base_alloc_global.h @@ -8,6 +8,8 @@ #ifndef UMF_BASE_ALLOC_GLOBAL_H #define UMF_BASE_ALLOC_GLOBAL_H 1 +#include + #include "base_alloc.h" #ifdef __cplusplus @@ -17,6 +19,7 @@ extern "C" { void *umf_ba_global_alloc(size_t size); void umf_ba_global_free(void *ptr); void umf_ba_destroy_global(void); +bool umf_ba_global_is_destroyed(void); size_t umf_ba_global_malloc_usable_size(void *ptr); void *umf_ba_global_aligned_alloc(size_t size, size_t alignment); From 1ee248c7a211f1fb54f36cf70360baed4b2884b8 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 3 Dec 2024 16:08:15 +0100 Subject: [PATCH 153/466] add utils min/max functions --- src/utils/utils_common.c | 3 +++ src/utils/utils_common.h | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/src/utils/utils_common.c b/src/utils/utils_common.c index eaf5420fc..225c02d2c 100644 --- a/src/utils/utils_common.c +++ b/src/utils/utils_common.c @@ -128,3 +128,6 @@ umf_result_t utils_translate_flags(unsigned in_flags, unsigned max, *out_flags = out_f; return UMF_RESULT_SUCCESS; } + +size_t utils_max(size_t a, size_t b) { return a > b ? a : b; } +size_t utils_min(size_t a, size_t b) { return a < b ? a : b; } diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h index 6af5a08d9..d8ea9bf6a 100644 --- a/src/utils/utils_common.h +++ b/src/utils/utils_common.h @@ -176,6 +176,10 @@ int utils_fallocate(int fd, long offset, long len); long utils_get_size_threshold(char *str_threshold); +size_t utils_max(size_t a, size_t b); + +size_t utils_min(size_t a, size_t b); + #ifdef __cplusplus } #endif From cd6efbbd9f9bf8ae272a11951ce2fd4612618ffe Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Sat, 7 Dec 2024 18:46:04 +0100 Subject: [PATCH 154/466] add utils_compare_exchange function --- src/utils/utils_concurrency.h | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 155184cc4..287f5d12a 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -61,11 +61,13 @@ int utils_mutex_unlock(utils_mutex_t *mutex); void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); #if defined(_WIN32) + static __inline unsigned char utils_lssb_index(long long value) { unsigned long ret; _BitScanForward64(&ret, value); return (unsigned char)ret; } + static __inline unsigned char utils_mssb_index(long long value) { unsigned long ret; _BitScanReverse64(&ret, value); @@ -81,15 +83,25 @@ static __inline unsigned char utils_mssb_index(long long value) { #define utils_atomic_store_release(object, desired) \ InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired) + #define utils_atomic_increment(object) \ InterlockedIncrement64((LONG64 volatile *)object) + #define utils_atomic_decrement(object) \ InterlockedDecrement64((LONG64 volatile *)object) + #define utils_fetch_and_add64(ptr, value) \ InterlockedExchangeAdd64((LONG64 *)(ptr), value) -#else + +// NOTE: windows version have different order of args +#define utils_compare_exchange(object, desired, expected) \ + InterlockedCompareExchange64((LONG64 volatile *)object, *expected, *desired) + +#else // !defined(_WIN32) + #define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) #define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) + #define utils_atomic_load_acquire(object, dest) \ do { \ utils_annotate_acquire((void *)object); \ @@ -103,12 +115,19 @@ static __inline unsigned char utils_mssb_index(long long value) { } while (0) #define utils_atomic_increment(object) \ - __atomic_add_fetch(object, 1, __ATOMIC_ACQ_REL) + __atomic_add_fetch(object, 1, memory_order_acq_rel) + #define utils_atomic_decrement(object) \ - __atomic_sub_fetch(object, 1, __ATOMIC_ACQ_REL) -#define utils_fetch_and_add64 __sync_fetch_and_add + __atomic_sub_fetch(object, 1, memory_order_acq_rel) -#endif +#define utils_fetch_and_add64(object, value) \ + __atomic_fetch_add(object, value, memory_order_acq_rel) + +#define utils_compare_exchange(object, expected, desired) \ + __atomic_compare_exchange(object, expected, desired, 0 /* strong */, \ + memory_order_acq_rel, memory_order_relaxed) + +#endif // !defined(_WIN32) #ifdef __cplusplus } From 2705633d9e968f9a03ae3d5261b5f48bc542b1cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 13 Feb 2025 17:12:23 +0100 Subject: [PATCH 155/466] Add info about CMake build options to DLL's metadata --- src/libumf.rc.in | 4 +++- src/proxy_lib/proxy_lib.rc.in | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/libumf.rc.in b/src/libumf.rc.in index 8ee85d626..43bed1560 100644 --- a/src/libumf.rc.in +++ b/src/libumf.rc.in @@ -10,6 +10,8 @@ #define UMF_VERNUMBERS @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@,@UMF_VERSION_REVISION@ #define _UMF_VERSION "@UMF_VERSION@" +// Store our CMake vars in the "FileDescription" block, as the custom fields require special parsing. +#define _UMF_CMAKE_VARS "@UMF_ALL_CMAKE_VARIABLES@" #ifdef _DEBUG #define VERSION_DEBUG VS_FF_DEBUG @@ -49,7 +51,7 @@ BEGIN BLOCK "040904b0" // U.S. English, Unicode (0x04b0 == 1200) BEGIN VALUE "CompanyName", "Intel Corporation\0" - VALUE "FileDescription", "Unified Memory Framework (UMF) library\0" + VALUE "FileDescription", "Unified Memory Framework (UMF) library (build options: " _UMF_CMAKE_VARS ")\0" VALUE "FileVersion", _UMF_VERSION "\0" VALUE "LegalCopyright", "Copyright 2024-2025, Intel Corporation. All rights reserved.\0" VALUE "LegalTrademarks", "\0" diff --git a/src/proxy_lib/proxy_lib.rc.in b/src/proxy_lib/proxy_lib.rc.in index f0497fb40..a3eff71de 100644 --- a/src/proxy_lib/proxy_lib.rc.in +++ b/src/proxy_lib/proxy_lib.rc.in @@ -10,6 +10,8 @@ #define UMF_VERNUMBERS @PROJECT_VERSION_MAJOR@,@PROJECT_VERSION_MINOR@,@PROJECT_VERSION_PATCH@,@UMF_VERSION_REVISION@ #define _UMF_VERSION "@UMF_VERSION@" +// Store our CMake vars in the "FileDescription" block, as the custom fields require special parsing. +#define _UMF_CMAKE_VARS "@UMF_ALL_CMAKE_VARIABLES@" #ifdef _DEBUG #define VERSION_DEBUG VS_FF_DEBUG @@ -49,7 +51,7 @@ BEGIN BLOCK "040904b0" // U.S. English, Unicode (0x04b0 == 1200) BEGIN VALUE "CompanyName", "Intel Corporation\0" - VALUE "FileDescription", "Unified Memory Framework (UMF) proxy library\0" + VALUE "FileDescription", "Unified Memory Framework (UMF) proxy library (build options: " _UMF_CMAKE_VARS ")\0" VALUE "FileVersion", _UMF_VERSION "\0" VALUE "LegalCopyright", "Copyright 2024-2025, Intel Corporation. All rights reserved.\0" VALUE "LegalTrademarks", "\0" From fb8838edad9724cb5eed71c48b9eafb314292ce2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 13 Feb 2025 16:26:26 +0100 Subject: [PATCH 156/466] Extend Debug information in CONTRIBUTING.md Include more precise info on reading UMF vars and version; and add Windows part. --- CONTRIBUTING.md | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 7b9749c49..6a050c0ae 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,8 +13,9 @@ - [Adding new dependency](#adding-new-dependency) - [Code coverage](#code-coverage) - [Debugging](#debugging) - - [Checking the UMF version and CMake variables (Linux only)](#checking-the-umf-version-and-cmake-variables-linux-only) - - [Requirements](#requirements) + - [Checking UMF version and build options](#checking-umf-version-and-build-options) + - [Linux](#linux) + - [Windows](#windows) Below you'll find instructions on how to contribute to UMF, either with code changes or issues. All contributions are most welcome! @@ -229,9 +230,17 @@ $ genhtml -o html_report coverage.info ## Debugging -### Checking the UMF version and CMake variables (Linux only) +### Checking UMF version and build options -Strings with the UMF version and useful CMake variables can be grepped in the following way: +From an already built UMF shared library you can obtain UMF precise version and +CMake variables/options it was built with. It's not only useful to verify what should +be included within the library, but also for debugging. If you're filing an issue to +UMF project, please include this information in your ticket. + +#### Linux + +Make sure the `binutils` package is installed in your system. Then, you can use +the following grep command: ```bash $ strings libumf.so | grep "@(#)" @@ -239,6 +248,11 @@ $ strings libumf.so | grep "@(#)" @(#) Intel(R) UMF CMake variables: "CMAKE_BUILD_TYPE:Debug,... ``` -#### Requirements +Please note, that version available in the name of library file (e.g. `libumf.so.0.11.0`) +may be not accurate - version coded inside of the library is far more precise. + +#### Windows -- binutils package (Linux) +On Windows, DLL's metadata can be accessed e.g. looking into *Properties* of the dll file +in the explorer. Look into the *Details* tab for "Product version" and "File description" +(it contains UMF's build options). From 76b4b5c6d82d9f619ed74a3ef1de9187ece20816 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Fri, 24 Jan 2025 15:35:01 +0100 Subject: [PATCH 157/466] make disjoint pool a C structure --- .github/workflows/coverity.yml | 1 - .github/workflows/nightly.yml | 3 - .github/workflows/reusable_basic.yml | 8 - .github/workflows/reusable_benchmarks.yml | 1 - .github/workflows/reusable_dax.yml | 1 - .github/workflows/reusable_fast.yml | 7 - .github/workflows/reusable_gpu.yml | 1 - .github/workflows/reusable_multi_numa.yml | 4 +- .github/workflows/reusable_proxy_lib.yml | 1 - .github/workflows/reusable_sanitizers.yml | 2 - .github/workflows/reusable_valgrind.yml | 1 - CMakeLists.txt | 6 +- README.md | 13 +- benchmark/CMakeLists.txt | 7 - benchmark/benchmark.cpp | 4 - benchmark/benchmark_umf.hpp | 8 +- benchmark/multithread.cpp | 9 +- benchmark/ubench.c | 15 +- examples/CMakeLists.txt | 20 +- examples/README.md | 4 +- examples/cuda_shared_memory/CMakeLists.txt | 7 +- examples/ipc_level_zero/CMakeLists.txt | 4 +- .../level_zero_shared_memory/CMakeLists.txt | 4 +- scripts/qemu/run-build.sh | 3 +- src/CMakeLists.txt | 3 +- src/base_alloc/base_alloc_global.c | 5 +- src/libumf.def | 12 + src/libumf.map | 12 + src/pool/CMakeLists.txt | 30 - src/pool/pool_disjoint.c | 1123 ++++++++++++++ src/pool/pool_disjoint.cpp | 1313 ----------------- src/pool/pool_disjoint_internal.h | 176 +++ src/utils/utils_common.h | 2 + src/utils/utils_concurrency.h | 21 +- src/utils/utils_posix_concurrency.c | 37 +- src/utils/utils_windows_concurrency.c | 52 +- test/CMakeLists.txt | 90 +- test/c_api/disjoint_pool.c | 5 +- ...leProv.cpp => disjoint_pool_file_prov.cpp} | 2 +- test/pools/disjoint_pool.cpp | 131 +- test/provider_os_memory.cpp | 10 +- test/supp/drd-umf_test-disjoint_pool.supp | 7 + ...ind-umf_test-disjointCoarseMallocPool.supp | 24 - ...p => helgrind-umf_test-disjoint_pool.supp} | 24 +- test/test_installation.py | 9 +- 45 files changed, 1635 insertions(+), 1587 deletions(-) create mode 100644 src/pool/pool_disjoint.c delete mode 100644 src/pool/pool_disjoint.cpp create mode 100644 src/pool/pool_disjoint_internal.h rename test/{disjointPoolFileProv.cpp => disjoint_pool_file_prov.cpp} (99%) create mode 100644 test/supp/drd-umf_test-disjoint_pool.supp delete mode 100644 test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp rename test/supp/{helgrind-umf_test-disjointPool.supp => helgrind-umf_test-disjoint_pool.supp} (53%) diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml index 531a463c7..ebae6086a 100644 --- a/.github/workflows/coverity.yml +++ b/.github/workflows/coverity.yml @@ -49,7 +49,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=OFF -DUMF_TESTS_FAIL_ON_SKIP=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - name: Build diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 7a6335ed6..44f2ba2ca 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -76,7 +76,6 @@ jobs: -DCMAKE_BUILD_TYPE=Debug -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_CUDA_PROVIDER=OFF @@ -138,7 +137,6 @@ jobs: -DUMF_LINK_HWLOC_STATICALLY=${{matrix.static_hwloc}} -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -219,7 +217,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} ^ -DUMF_FORMAT_CODE_STYLE=OFF ^ -DUMF_DEVELOPER_MODE=ON ^ - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON ^ -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON ^ -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON ^ -DUMF_BUILD_CUDA_PROVIDER=ON ^ diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index d23e646dd..7170ec418 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -165,7 +165,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_DISABLE_HWLOC=${{matrix.disable_hwloc}} -DUMF_LINK_HWLOC_STATICALLY=${{matrix.link_hwloc_statically}} @@ -208,7 +207,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} - --disjoint-pool ${{ matrix.install_tbb == 'ON' && matrix.disable_hwloc != 'ON' && matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || '' }} @@ -284,7 +282,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=${{matrix.level_zero_provider}} -DUMF_BUILD_CUDA_PROVIDER=${{matrix.cuda_provider}} @@ -304,7 +301,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{matrix.build_type}} - --disjoint-pool ${{matrix.shared_library == 'ON' && '--proxy' || '' }} --umf-version ${{env.UMF_VERSION}} ${{ matrix.shared_library == 'ON' && '--shared-library' || ''}} @@ -342,7 +338,6 @@ jobs: -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -385,7 +380,6 @@ jobs: -DUMF_BUILD_EXAMPLES=ON -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON @@ -496,7 +490,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_SHARED_LIBRARY=ON -DUMF_TESTS_FAIL_ON_SKIP=ON @@ -511,7 +504,6 @@ jobs: --build-dir ${{env.BUILD_DIR}} --install-dir ${{env.INSTL_DIR}} --build-type ${{env.BUILD_TYPE}} - --disjoint-pool --proxy --umf-version ${{env.UMF_VERSION}} --shared-library diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index b33fdb25e..b41c99f3a 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -93,7 +93,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - name: Build UMF diff --git a/.github/workflows/reusable_dax.yml b/.github/workflows/reusable_dax.yml index f7f4fbe50..1a41b11c7 100644 --- a/.github/workflows/reusable_dax.yml +++ b/.github/workflows/reusable_dax.yml @@ -84,7 +84,6 @@ jobs: -DUMF_BUILD_GPU_EXAMPLES=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_TESTS_FAIL_ON_SKIP=ON diff --git a/.github/workflows/reusable_fast.yml b/.github/workflows/reusable_fast.yml index 58a172a74..5166f2b96 100644 --- a/.github/workflows/reusable_fast.yml +++ b/.github/workflows/reusable_fast.yml @@ -19,24 +19,20 @@ jobs: matrix: include: - os: windows-latest - disjoint: 'OFF' build_tests: 'ON' simple_cmake: 'OFF' # pure C build (Windows) - os: windows-latest - disjoint: 'OFF' # Tests' building is off for a pure C build build_tests: 'OFF' simple_cmake: 'OFF' - os: ubuntu-latest - disjoint: 'ON' build_tests: 'ON' # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command extra_build_options: '-DCMAKE_BUILD_TYPE=Release -DUMF_BUILD_BENCHMARKS=ON -DUMF_BUILD_BENCHMARKS_MT=ON' simple_cmake: 'OFF' # pure C build (Linux) - os: ubuntu-latest - disjoint: 'OFF' # Windows doesn't recognize 'CMAKE_BUILD_TYPE', it uses '--config' param in build command # Tests' building is off for a pure C build build_tests: 'OFF' @@ -44,13 +40,11 @@ jobs: simple_cmake: 'OFF' # simplest CMake on ubuntu-latest - os: ubuntu-latest - disjoint: 'OFF' build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' # simplest CMake ubuntu-20.04 - os: ubuntu-20.04 - disjoint: 'OFF' build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' @@ -97,7 +91,6 @@ jobs: -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=${{matrix.disjoint}} -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_TESTS=${{matrix.build_tests}} -DUMF_BUILD_EXAMPLES=ON diff --git a/.github/workflows/reusable_gpu.yml b/.github/workflows/reusable_gpu.yml index 47f48f6a8..87a7cfd30 100644 --- a/.github/workflows/reusable_gpu.yml +++ b/.github/workflows/reusable_gpu.yml @@ -93,7 +93,6 @@ jobs: -DUMF_BUILD_GPU_TESTS=ON -DUMF_BUILD_GPU_EXAMPLES=ON -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_CUDA_PROVIDER=OFF -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF diff --git a/.github/workflows/reusable_multi_numa.yml b/.github/workflows/reusable_multi_numa.yml index f546b0545..7c7750551 100644 --- a/.github/workflows/reusable_multi_numa.yml +++ b/.github/workflows/reusable_multi_numa.yml @@ -45,7 +45,6 @@ jobs: -DUMF_BUILD_BENCHMARKS=OFF -DUMF_BUILD_TESTS=ON -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=${{ matrix.os == 'rhel-9.1' && 'OFF' || 'ON' }} -DUMF_TESTS_FAIL_ON_SKIP=ON ${{ matrix.build_type == 'Debug' && matrix.os == 'ubuntu-22.04' && '-DUMF_USE_COVERAGE=ON' || '' }} @@ -61,11 +60,12 @@ jobs: # On RHEL, hwloc version is just a little too low. # Skip some tests until we upgrade hwloc and update CMake to properly handle local hwloc installation. # TODO: fix issue #560 + # TODO: add issue for -E umf-init_teardown - it is not clear why it fails - name: Run tests (on RHEL) if: matrix.os == 'rhel-9.1' working-directory: ${{github.workspace}}/build run: | - ctest --output-on-failure --test-dir test -E "umf-provider_os_memory_multiple_numa_nodes" + ctest --output-on-failure --test-dir test -E "umf-provider_os_memory_multiple_numa_nodes|umf-init_teardown" ./test/umf_test-provider_os_memory_multiple_numa_nodes \ --gtest_filter="-*checkModeLocal/*:*checkModePreferredEmptyNodeset/*:testNuma.checkModeInterleave" diff --git a/.github/workflows/reusable_proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml index a1f5975fa..bb4a3278e 100644 --- a/.github/workflows/reusable_proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -48,7 +48,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON -DUMF_PROXY_LIB_BASED_ON_POOL=${{matrix.proxy_lib_pool}} ${{ matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} diff --git a/.github/workflows/reusable_sanitizers.yml b/.github/workflows/reusable_sanitizers.yml index 25458da51..1a044308e 100644 --- a/.github/workflows/reusable_sanitizers.yml +++ b/.github/workflows/reusable_sanitizers.yml @@ -55,7 +55,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_USE_ASAN=${{matrix.sanitizers.asan}} -DUMF_USE_UBSAN=${{matrix.sanitizers.ubsan}} -DUMF_USE_TSAN=${{matrix.sanitizers.tsan}} @@ -127,7 +126,6 @@ jobs: -DUMF_BUILD_SHARED_LIBRARY=OFF -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_USE_ASAN=${{matrix.sanitizers.asan}} -DUMF_BUILD_EXAMPLES=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF diff --git a/.github/workflows/reusable_valgrind.yml b/.github/workflows/reusable_valgrind.yml index aba0e3260..5999297d6 100644 --- a/.github/workflows/reusable_valgrind.yml +++ b/.github/workflows/reusable_valgrind.yml @@ -29,7 +29,6 @@ jobs: -DCMAKE_BUILD_TYPE=Debug -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_CUDA_PROVIDER=OFF diff --git a/CMakeLists.txt b/CMakeLists.txt index f8c393609..396a27c1e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,8 +60,6 @@ endmacro() umf_option(UMF_BUILD_SHARED_LIBRARY "Build UMF as shared library" OFF) umf_option(UMF_BUILD_LEVEL_ZERO_PROVIDER "Build Level Zero memory provider" ON) umf_option(UMF_BUILD_CUDA_PROVIDER "Build CUDA memory provider" ON) -umf_option(UMF_BUILD_LIBUMF_POOL_DISJOINT - "Build the libumf_pool_disjoint static library" OFF) umf_option(UMF_BUILD_LIBUMF_POOL_JEMALLOC "Build the libumf_pool_jemalloc static library" OFF) umf_option(UMF_BUILD_TESTS "Build UMF tests" ON) @@ -497,8 +495,8 @@ endif() # For using the options listed in the OPTIONS_REQUIRING_CXX variable a C++17 # compiler is required. Moreover, if these options are not set, CMake will set # up a strict C build, without C++ support. -set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_LIBUMF_POOL_DISJOINT" - "UMF_BUILD_BENCHMARKS_MT" "UMF_BUILD_BENCHMARKS") +set(OPTIONS_REQUIRING_CXX "UMF_BUILD_TESTS" "UMF_BUILD_BENCHMARKS_MT" + "UMF_BUILD_BENCHMARKS") foreach(option_name ${OPTIONS_REQUIRING_CXX}) if(${option_name}) enable_language(CXX) diff --git a/README.md b/README.md index 5bd0b9b2f..00d6136df 100644 --- a/README.md +++ b/README.md @@ -39,7 +39,7 @@ For development and contributions: - cmake-format-0.6 (can be installed with `python -m pip install cmake-format==0.6.13`) - black (can be installed with `python -m pip install black==24.3.0`) -For building tests, multithreaded benchmarks and Disjoint Pool: +For building tests and multithreaded benchmarks: - C++ compiler with C++17 support @@ -106,7 +106,6 @@ List of options provided by CMake: | UMF_BUILD_SHARED_LIBRARY | Build UMF as shared library | ON/OFF | OFF | | UMF_BUILD_LEVEL_ZERO_PROVIDER | Build Level Zero memory provider | ON/OFF | ON | | UMF_BUILD_CUDA_PROVIDER | Build CUDA memory provider | ON/OFF | ON | -| UMF_BUILD_LIBUMF_POOL_DISJOINT | Build the libumf_pool_disjoint static library | ON/OFF | OFF | | UMF_BUILD_LIBUMF_POOL_JEMALLOC | Build the libumf_pool_jemalloc static library | ON/OFF | OFF | | UMF_BUILD_TESTS | Build UMF tests | ON/OFF | ON | | UMF_BUILD_GPU_TESTS | Build UMF GPU tests | ON/OFF | OFF | @@ -267,13 +266,11 @@ This memory pool is distributed as part of libumf. It forwards all requests to t memory provider. Currently umfPoolRealloc, umfPoolCalloc and umfPoolMallocUsableSize functions are not supported by the proxy pool. -#### Disjoint pool +#### Disjoint pool (part of libumf) -TODO: Add a description - -##### Requirements - -To enable this feature, the `UMF_BUILD_LIBUMF_POOL_DISJOINT` option needs to be turned `ON`. +The Disjoint pool is designed to keep internal metadata separate from user data. +This separation is particularly useful when user data needs to be placed in memory with relatively high latency, +such as GPU memory or disk storage. #### Jemalloc pool diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 73b9b257a..80c8ba5ec 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -97,10 +97,6 @@ function(add_umf_benchmark) ) endif() - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - target_compile_definitions(${BENCH_NAME} - PRIVATE UMF_POOL_DISJOINT_ENABLED=1) - endif() if(UMF_POOL_JEMALLOC_ENABLED) target_compile_definitions(${BENCH_NAME} PRIVATE UMF_POOL_JEMALLOC_ENABLED=1) @@ -131,9 +127,6 @@ endfunction() set(LIB_DIRS ${LIBHWLOC_LIBRARY_DIRS}) # optional libraries -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(LIBS_OPTIONAL ${LIBS_OPTIONAL} disjoint_pool) -endif() if(LINUX) set(LIBS_OPTIONAL ${LIBS_OPTIONAL} m) endif() diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 6c8175e1d..ad29e9029 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -66,7 +66,6 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, proxy_pool, fixed_alloc_size, UMF_BENCHMARK_REGISTER_F(alloc_benchmark, proxy_pool) ->Apply(&default_alloc_fix_size); -#ifdef UMF_POOL_DISJOINT_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_fix, fixed_alloc_size, pool_allocator>); @@ -80,7 +79,6 @@ UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_fix) UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_uniform) ->Apply(&default_alloc_uniform_size); */ -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_fix, @@ -150,7 +148,6 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, os_provider, UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, os_provider) ->Apply(&default_multiple_alloc_fix_size); -#ifdef UMF_POOL_DISJOINT_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, fixed_alloc_size, pool_allocator>); @@ -164,7 +161,6 @@ UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix) UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) ->Apply(&default_multiple_alloc_uniform_size); */ -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, diff --git a/benchmark/benchmark_umf.hpp b/benchmark/benchmark_umf.hpp index 389c224ed..86cba4877 100644 --- a/benchmark/benchmark_umf.hpp +++ b/benchmark/benchmark_umf.hpp @@ -13,16 +13,14 @@ #include #include +#include #include + #ifdef UMF_POOL_SCALABLE_ENABLED #include #endif #include -#ifdef UMF_POOL_DISJOINT_ENABLED -#include -#endif - #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif @@ -167,7 +165,6 @@ struct proxy_pool : public pool_interface { static std::string name() { return "proxy_pool<" + Provider::name() + ">"; } }; -#ifdef UMF_POOL_DISJOINT_ENABLED template struct disjoint_pool : public pool_interface { umf_memory_pool_ops_t * @@ -221,7 +218,6 @@ struct disjoint_pool : public pool_interface { return "disjoint_pool<" + Provider::name() + ">"; } }; -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED template diff --git a/benchmark/multithread.cpp b/benchmark/multithread.cpp index ecc238529..d00ffba90 100644 --- a/benchmark/multithread.cpp +++ b/benchmark/multithread.cpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -121,7 +121,7 @@ int main() { std::cout << "skipping jemalloc_pool mt_alloc_free" << std::endl; #endif -#if defined(UMF_POOL_DISJOINT_ENABLED) + // NOTE: disjoint pool is always enabled umf_disjoint_pool_params_handle_t hDisjointParams = nullptr; umf_result_t ret = umfDisjointPoolParamsCreate(&hDisjointParams); if (ret != UMF_RESULT_SUCCESS) { @@ -132,20 +132,15 @@ int main() { std::cout << "disjoint_pool mt_alloc_free: "; mt_alloc_free(poolCreateExtParams{umfDisjointPoolOps(), hDisjointParams, umfOsMemoryProviderOps(), osParams}); -#else - std::cout << "skipping disjoint_pool mt_alloc_free" << std::endl; -#endif // ctest looks for "PASSED" in the output std::cout << "PASSED" << std::endl; -#if defined(UMF_POOL_DISJOINT_ENABLED) ret = umfDisjointPoolParamsDestroy(hDisjointParams); if (ret != UMF_RESULT_SUCCESS) { std::cerr << "disjoint pool params destroy failed" << std::endl; return -1; } -#endif return 0; } diff --git a/benchmark/ubench.c b/benchmark/ubench.c index 3892740e8..5beaa62be 100644 --- a/benchmark/ubench.c +++ b/benchmark/ubench.c @@ -15,23 +15,19 @@ #include #include +#include #include #include #include #include -#ifdef UMF_POOL_DISJOINT_ENABLED -#include -#endif - #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif #include "utils_common.h" -#if (defined UMF_POOL_DISJOINT_ENABLED && \ - defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) #include "utils_level_zero.h" #endif @@ -244,7 +240,6 @@ UBENCH_EX(simple, proxy_pool_with_os_memory_provider) { free(array); } -#if (defined UMF_POOL_DISJOINT_ENABLED) ////////////////// DISJOINT POOL WITH OS MEMORY PROVIDER UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { @@ -327,7 +322,6 @@ UBENCH_EX(simple, disjoint_pool_with_os_memory_provider) { umfMemoryProviderDestroy(os_memory_provider); free(array); } -#endif /* (defined UMF_POOL_DISJOINT_ENABLED) */ #if (defined UMF_POOL_JEMALLOC_ENABLED) ////////////////// JEMALLOC POOL WITH OS MEMORY PROVIDER @@ -421,8 +415,7 @@ UBENCH_EX(simple, scalable_pool_with_os_memory_provider) { } #endif /* (defined UMF_POOL_SCALABLE_ENABLED) */ -#if (defined UMF_POOL_DISJOINT_ENABLED && \ - defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) +#if (defined UMF_PROVIDER_LEVEL_ZERO_ENABLED && defined UMF_BUILD_GPU_TESTS) static void do_ipc_get_put_benchmark(alloc_t *allocs, size_t num_allocs, size_t repeats, umf_ipc_handle_t *ipc_handles) { @@ -630,7 +623,7 @@ UBENCH_EX(ipc, disjoint_pool_with_level_zero_provider) { err_destroy_context: utils_ze_destroy_context(context); } -#endif /* (defined UMF_POLL_DISJOINT_ENABLED && defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ +#endif /* (defined UMF_BUILD_LEVEL_ZERO_PROVIDER && defined UMF_BUILD_GPU_TESTS) */ // TODO add IPC benchmark for CUDA diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 89f80ee2d..a26b8915e 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -41,16 +41,14 @@ if(UMF_POOL_SCALABLE_ENABLED) endif() endif() -if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) +if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLE_NAME umf_example_level_zero_shared_memory) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS level_zero_shared_memory/level_zero_shared_memory.c common/examples_level_zero_helpers.c - LIBS disjoint_pool ze_loader umf) + LIBS ze_loader umf) target_include_directories( ${EXAMPLE_NAME} @@ -84,12 +82,11 @@ if(UMF_BUILD_GPU_EXAMPLES endif() else() message(STATUS "GPU Level Zero shared memory example requires " - "UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and " - "UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping") + "UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER " + "to be turned ON - skipping") endif() if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_CUDA_PROVIDER AND UMF_CUDA_ENABLED) set(EXAMPLE_NAME umf_example_cuda_shared_memory) @@ -97,7 +94,7 @@ if(UMF_BUILD_GPU_EXAMPLES add_umf_executable( NAME ${EXAMPLE_NAME} SRCS cuda_shared_memory/cuda_shared_memory.c - LIBS disjoint_pool cuda umf) + LIBS cuda umf) target_include_directories( ${EXAMPLE_NAME} @@ -123,14 +120,13 @@ if(UMF_BUILD_GPU_EXAMPLES else() message( STATUS - "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON and installed CUDA libraries - skipping" + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_CUDA_PROVIDER to be turned ON and installed CUDA libraries - skipping" ) endif() # TODO: it looks like there is some problem with IPC implementation in Level # Zero on windows if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_LEVEL_ZERO_PROVIDER AND LINUX) set(EXAMPLE_NAME umf_example_ipc_level_zero) @@ -139,7 +135,7 @@ if(UMF_BUILD_GPU_EXAMPLES NAME ${EXAMPLE_NAME} SRCS ipc_level_zero/ipc_level_zero.c common/examples_level_zero_helpers.c - LIBS disjoint_pool ze_loader umf) + LIBS ze_loader umf) target_include_directories( ${EXAMPLE_NAME} @@ -174,7 +170,7 @@ if(UMF_BUILD_GPU_EXAMPLES else() message( STATUS - "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" + "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping" ) endif() diff --git a/examples/README.md b/examples/README.md index e7823347e..70d114a63 100644 --- a/examples/README.md +++ b/examples/README.md @@ -24,7 +24,7 @@ cleans up and exits with an error status. ### Requirements * Level Zero headers and libraries * compatible GPU with installed driver -* set UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LIBUMF_POOL_DISJOINT and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON +* set UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON ## IPC example with Level Zero memory provider This example demonstrates how to use UMF IPC API. The example creates two @@ -35,7 +35,7 @@ and build this example Level Zero development package should be installed. ### Requirements * Level Zero headers and libraries * compatible GPU with installed driver -* set UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LIBUMF_POOL_DISJOINT and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON +* set UMF_BUILD_GPU_EXAMPLES and UMF_BUILD_LEVEL_ZERO_PROVIDER CMake configuration flags to ON ## IPC example with shared memory This example also demonstrates how to use UMF IPC API. The example creates two diff --git a/examples/cuda_shared_memory/CMakeLists.txt b/examples/cuda_shared_memory/CMakeLists.txt index dd8567c14..0e57ec607 100644 --- a/examples/cuda_shared_memory/CMakeLists.txt +++ b/examples/cuda_shared_memory/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -55,9 +55,8 @@ target_link_directories( ${LIBHWLOC_LIBRARY_DIRS} ${CUDA_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries( - ${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a ${CUDA_LIBRARIES} - ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ${CUDA_LIBRARIES} + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/examples/ipc_level_zero/CMakeLists.txt b/examples/ipc_level_zero/CMakeLists.txt index 273a88bb0..d672d3e92 100644 --- a/examples/ipc_level_zero/CMakeLists.txt +++ b/examples/ipc_level_zero/CMakeLists.txt @@ -53,8 +53,8 @@ target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a - ze_loader ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ze_loader + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/examples/level_zero_shared_memory/CMakeLists.txt b/examples/level_zero_shared_memory/CMakeLists.txt index d05072ca2..f4aaf09e9 100644 --- a/examples/level_zero_shared_memory/CMakeLists.txt +++ b/examples/level_zero_shared_memory/CMakeLists.txt @@ -53,8 +53,8 @@ target_include_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_INCLUDE_DIRS} target_link_directories(${EXAMPLE_NAME} PRIVATE ${LIBUMF_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) target_link_options(${EXAMPLE_NAME} PRIVATE "-Wl,--start-group") -target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ libdisjoint_pool.a - ze_loader ${LIBUMF_LIBRARIES}) +target_link_libraries(${EXAMPLE_NAME} PRIVATE stdc++ ze_loader + ${LIBUMF_LIBRARIES}) # an optional part - adds a test of this example add_test( diff --git a/scripts/qemu/run-build.sh b/scripts/qemu/run-build.sh index c6314153c..724e6d7ff 100755 --- a/scripts/qemu/run-build.sh +++ b/scripts/qemu/run-build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -26,7 +26,6 @@ cmake .. \ -DUMF_BUILD_CUDA_PROVIDER=ON \ -DUMF_FORMAT_CODE_STYLE=OFF \ -DUMF_DEVELOPER_MODE=ON \ - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON \ -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON \ -DUMF_BUILD_EXAMPLES=ON \ -DUMF_USE_COVERAGE=${COVERAGE} \ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index c0072be7e..49fa2c5d8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -85,8 +85,9 @@ set(UMF_SOURCES provider/provider_tracking.c critnib/critnib.c ravl/ravl.c - pool/pool_proxy.c + pool/pool_disjoint.c pool/pool_jemalloc.c + pool/pool_proxy.c pool/pool_scalable.c) if(UMF_POOL_JEMALLOC_ENABLED) diff --git a/src/base_alloc/base_alloc_global.c b/src/base_alloc/base_alloc_global.c index f709eab9d..f3b61566a 100644 --- a/src/base_alloc/base_alloc_global.c +++ b/src/base_alloc/base_alloc_global.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -208,7 +208,8 @@ void umf_ba_global_free(void *ptr) { } if (ba_is_destroyed) { - LOG_WARN("base_alloc: calling free after the base alloc is destroyed"); + LOG_WARN( + "base_alloc: calling free() after the base allocator is destroyed"); return; } diff --git a/src/libumf.def b/src/libumf.def index 98226dace..ce8820a8f 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -119,6 +119,18 @@ EXPORTS umfScalablePoolParamsSetKeepAllMemory ; Added in UMF_0.11 umfCUDAMemoryProviderParamsSetAllocFlags + umfDisjointPoolOps + umfDisjointPoolParamsCreate + umfDisjointPoolParamsDestroy + umfDisjointPoolParamsSetCapacity + umfDisjointPoolParamsSetMaxPoolableSize + umfDisjointPoolParamsSetMinBucketSize + umfDisjointPoolParamsSetName + umfDisjointPoolParamsSetSharedLimits + umfDisjointPoolParamsSetSlabMinSize + umfDisjointPoolParamsSetTrace + umfDisjointPoolSharedLimitsCreate + umfDisjointPoolSharedLimitsDestroy umfFixedMemoryProviderOps umfFixedMemoryProviderParamsCreate umfFixedMemoryProviderParamsDestroy diff --git a/src/libumf.map b/src/libumf.map index bbf664dcf..6582fd0f8 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -117,6 +117,18 @@ UMF_0.10 { UMF_0.11 { umfCUDAMemoryProviderParamsSetAllocFlags; + umfDisjointPoolOps; + umfDisjointPoolParamsCreate; + umfDisjointPoolParamsDestroy; + umfDisjointPoolParamsSetCapacity; + umfDisjointPoolParamsSetMaxPoolableSize; + umfDisjointPoolParamsSetMinBucketSize; + umfDisjointPoolParamsSetName; + umfDisjointPoolParamsSetSharedLimits; + umfDisjointPoolParamsSetSlabMinSize; + umfDisjointPoolParamsSetTrace; + umfDisjointPoolSharedLimitsCreate; + umfDisjointPoolSharedLimitsDestroy; umfFixedMemoryProviderOps; umfFixedMemoryProviderParamsCreate; umfFixedMemoryProviderParamsDestroy; diff --git a/src/pool/CMakeLists.txt b/src/pool/CMakeLists.txt index f54e70185..22aeab783 100644 --- a/src/pool/CMakeLists.txt +++ b/src/pool/CMakeLists.txt @@ -8,33 +8,3 @@ if(UMF_BUILD_SHARED_LIBRARY) endif() set(POOL_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS}) - -# libumf_pool_disjoint -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - add_umf_library( - NAME disjoint_pool - TYPE STATIC - SRCS pool_disjoint.cpp ${POOL_EXTRA_SRCS} - LIBS ${POOL_EXTRA_LIBS}) - - target_compile_definitions(disjoint_pool - PRIVATE ${POOL_COMPILE_DEFINITIONS}) - - if(WINDOWS) - target_compile_options(disjoint_pool PRIVATE /DWIN32_LEAN_AND_MEAN - /DNOMINMAX) - endif() - - add_library(${PROJECT_NAME}::disjoint_pool ALIAS disjoint_pool) - - add_dependencies(disjoint_pool umf) - - target_link_libraries(disjoint_pool PRIVATE umf) - - target_include_directories( - disjoint_pool - PUBLIC $ - $) - - install(TARGETS disjoint_pool EXPORT ${PROJECT_NAME}-targets) -endif() diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c new file mode 100644 index 000000000..e2288e49e --- /dev/null +++ b/src/pool/pool_disjoint.c @@ -0,0 +1,1123 @@ +/* + * Copyright (C) 2022-2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#include "pool_disjoint_internal.h" + +// Temporary solution for disabling memory poisoning. This is needed because +// AddressSanitizer does not support memory poisoning for GPU allocations. +// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 +#ifndef POISON_MEMORY +#undef __SANITIZE_ADDRESS__ +#endif +#include "utils_sanitizers.h" + +// Forward declarations +static slab_t *create_slab(bucket_t *bucket, bool full_size); +static void destroy_slab(slab_t *slab); + +static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); +static bool bucket_can_pool(bucket_t *bucket); +static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool); +static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, + bool *from_pool); +static slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, + bool *from_pool); + +static __TLS umf_result_t TLS_last_allocation_error; + +// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is +// requested. The implementation distinguishes between allocations of size +// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. +// Allocation requests smaller than ChunkCutoff use chunks taken from a single +// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation +// size, and 8-byte allocations, only 1 in ~8000 requests results in a new +// coarse-grain allocation. Freeing results only in a chunk of a larger +// allocation to be marked as available and no real return to the system. An +// allocation is returned to the system only when all chunks in the larger +// allocation are freed by the program. Allocations larger than ChunkCutOff use +// a separate coarse-grain allocation for each request. These are subject to +// "pooling". That is, when such an allocation is freed by the program it is +// retained in a pool. The pool is available for future allocations, which means +// there are fewer actual coarse-grain allocations/deallocations. + +// The largest size which is allocated via the allocator. +// Allocations with size > CutOff bypass the pool and +// go directly to the provider. +static size_t CutOff = (size_t)1 << 31; // 2GB + +static size_t bucket_slab_min_size(bucket_t *bucket) { + return bucket->pool->params.slab_min_size; +} + +static size_t bucket_slab_alloc_size(bucket_t *bucket) { + return utils_max(bucket->size, bucket_slab_min_size(bucket)); +} + +static slab_t *create_slab(bucket_t *bucket, bool full_size) { + assert(bucket); + + umf_result_t res = UMF_RESULT_SUCCESS; + umf_memory_provider_handle_t provider = bucket->pool->provider; + + slab_t *slab = umf_ba_global_alloc(sizeof(*slab)); + if (slab == NULL) { + LOG_ERR("allocation of new slab failed!"); + return NULL; + } + + slab->num_chunks_allocated = 0; + slab->first_free_chunk_idx = 0; + slab->bucket = bucket; + + slab->iter = umf_ba_global_alloc(sizeof(*slab->iter)); + if (slab->iter == NULL) { + LOG_ERR("allocation of new slab iter failed!"); + goto free_slab; + } + slab->iter->val = slab; + slab->iter->prev = slab->iter->next = NULL; + + if (full_size) { + slab->num_chunks_total = 0; + slab->chunks = NULL; + } else { + slab->num_chunks_total = bucket_slab_min_size(bucket) / bucket->size; + slab->chunks = + umf_ba_global_alloc(sizeof(bool) * slab->num_chunks_total); + if (slab->chunks == NULL) { + LOG_ERR("allocation of slab chunks failed!"); + goto free_slab_iter; + } + memset(slab->chunks, 0, sizeof(bool) * slab->num_chunks_total); + } + // if slab_min_size is not a multiple of bucket size, we would have some + // padding at the end of the slab + slab->slab_size = bucket_slab_alloc_size(bucket); + + // TODO not true + // NOTE: originally slabs memory were allocated without alignment + // with this registering a slab is simpler and doesn't require multimap + res = umfMemoryProviderAlloc(provider, slab->slab_size, 0, &slab->mem_ptr); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("allocation of slab data failed!"); + goto free_slab_chunks; + } + + // TODO + // ASSERT_IS_ALIGNED((uintptr_t)slab->mem_ptr, bucket->size); + + // raw allocation is not available for user so mark it as inaccessible + utils_annotate_memory_inaccessible(slab->mem_ptr, slab->slab_size); + + LOG_DEBUG("bucket: %p, slab_size: %zu", (void *)bucket, slab->slab_size); + return slab; + +free_slab_chunks: + umf_ba_global_free(slab->chunks); + +free_slab_iter: + umf_ba_global_free(slab->iter); + +free_slab: + umf_ba_global_free(slab); + return NULL; +} + +static void destroy_slab(slab_t *slab) { + LOG_DEBUG("bucket: %p, slab_size: %zu", (void *)slab->bucket, + slab->slab_size); + + umf_memory_provider_handle_t provider = slab->bucket->pool->provider; + umf_result_t res = + umfMemoryProviderFree(provider, slab->mem_ptr, slab->slab_size); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("deallocation of slab data failed!"); + } + + umf_ba_global_free(slab->chunks); + umf_ba_global_free(slab->iter); + umf_ba_global_free(slab); +} + +// return the index of the first available chunk, SIZE_MAX otherwise +static size_t slab_find_first_available_chunk_idx(const slab_t *slab) { + // use the first free chunk index as a hint for the search + for (bool *chunk = slab->chunks + slab->first_free_chunk_idx; + chunk != slab->chunks + slab->num_chunks_total; chunk++) { + + // false means not used + if (*chunk == false) { + size_t idx = chunk - slab->chunks; + LOG_DEBUG("idx: %zu", idx); + return idx; + } + } + + LOG_DEBUG("idx: SIZE_MAX"); + return SIZE_MAX; +} + +static void *slab_get_chunk(slab_t *slab) { + // slab has to be allocated in chunk mode + assert(slab->chunks && slab->num_chunks_total > 0); + + // free chunk must exist, otherwise we would have allocated another slab + const size_t chunk_idx = slab_find_first_available_chunk_idx(slab); + assert(chunk_idx != SIZE_MAX); + + void *free_chunk = + (void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size); + + // mark chunk as used + slab->chunks[chunk_idx] = true; + slab->num_chunks_allocated += 1; + + // use the found index as the next hint + slab->first_free_chunk_idx = chunk_idx + 1; + + return free_chunk; +} + +static void *slab_get(const slab_t *slab) { return slab->mem_ptr; } +static void *slab_get_end(const slab_t *slab) { + return (void *)((uintptr_t)slab->mem_ptr + + bucket_slab_min_size(slab->bucket)); +} + +static void slab_free_chunk(slab_t *slab, void *ptr) { + // This method should be called through bucket (since we might remove the + // slab as a result), therefore all locks are done on bucket level. + + // Make sure that we're in the right slab + assert(ptr >= slab_get(slab) && ptr < slab_get_end(slab)); + + // Even if the pointer p was previously aligned, it's still inside the + // corresponding chunk, so we get the correct index here. + size_t chunk_idx = + ((uintptr_t)ptr - (uintptr_t)slab->mem_ptr) / slab->bucket->size; + + // Make sure that the chunk was allocated + assert(slab->chunks[chunk_idx] && "double free detected"); + slab->chunks[chunk_idx] = false; + slab->num_chunks_allocated -= 1; + + if (chunk_idx < slab->first_free_chunk_idx) { + slab->first_free_chunk_idx = chunk_idx; + } + + LOG_DEBUG("chunk_idx: %zu, num_chunks_allocated: %zu, " + "first_free_chunk_idx: %zu", + chunk_idx, slab->num_chunks_allocated, + slab->first_free_chunk_idx); +} + +static bool slab_has_avail(const slab_t *slab) { + return slab->num_chunks_allocated < slab->num_chunks_total; +} + +static umf_result_t slab_reg(slab_t *slab) { + bucket_t *bucket = slab->bucket; + disjoint_pool_t *pool = bucket->pool; + critnib *slabs = pool->known_slabs; + + // NOTE: changed vs original DisjointPool implementation - currently slab + // is already aligned to bucket size. + void *slab_addr = slab_get(slab); + // TODO ASSERT_IS_ALIGNED((uintptr_t)slab_addr, bucket->size); + LOG_DEBUG("slab: %p, start: %p", (void *)slab, slab_addr); + + // NOTE: we don't need to lock the slabs map as the critnib already has a + // lock inside it + int ret = critnib_insert(slabs, (uintptr_t)slab_addr, slab, 0); + umf_result_t res = UMF_RESULT_SUCCESS; + if (ret == ENOMEM) { + LOG_ERR("register failed because of out of memory!"); + res = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } else if (ret == EEXIST) { + LOG_ERR("register failed because the address is already registered!"); + res = UMF_RESULT_ERROR_UNKNOWN; + } + + return res; +} + +static umf_result_t slab_unreg(slab_t *slab) { + bucket_t *bucket = slab->bucket; + disjoint_pool_t *pool = bucket->pool; + critnib *slabs = pool->known_slabs; + + void *slab_addr = slab_get(slab); + // TODO ASSERT_IS_ALIGNED((uintptr_t)slab_addr, bucket->size); + LOG_DEBUG("slab: %p, start: %p", (void *)slab, slab_addr); + + critnib_remove(slabs, (uintptr_t)slab_addr); + + return UMF_RESULT_SUCCESS; +} + +static bucket_t * +create_bucket(size_t sz, disjoint_pool_t *pool, + umf_disjoint_pool_shared_limits_handle_t shared_limits) { + + bucket_t *bucket = umf_ba_global_alloc(sizeof(*bucket)); + if (bucket == NULL) { + LOG_ERR("allocation of new bucket failed!"); + return NULL; + } + + memset(bucket, 0, sizeof(*bucket)); + bucket->size = sz; + bucket->pool = pool; + bucket->shared_limits = shared_limits; + + utils_mutex_init(&bucket->bucket_lock); + return bucket; +} + +static void destroy_bucket(bucket_t *bucket) { + // use an extra tmp to store the next iterator before destroying the slab + slab_list_item_t *it = NULL, *tmp = NULL; + LL_FOREACH_SAFE(bucket->available_slabs, it, tmp) { + LL_DELETE(bucket->available_slabs, it); + destroy_slab(it->val); + } + + LL_FOREACH_SAFE(bucket->unavailable_slabs, it, tmp) { + LL_DELETE(bucket->unavailable_slabs, it); + destroy_slab(it->val); + } + + utils_mutex_destroy_not_free(&bucket->bucket_lock); + umf_ba_global_free(bucket); +} + +static size_t slab_get_num_free_chunks(const slab_t *slab) { + return slab->num_chunks_total - slab->num_chunks_allocated; +} + +// NOTE: this function must be called under bucket->bucket_lock +static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, + bool *to_pool) { + slab_free_chunk(slab, ptr); + + // in case if the slab was previously full and now has single available + // chunk, it should be moved to the list of available slabs + if (slab_get_num_free_chunks(slab) == 1) { + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + DL_DELETE(bucket->unavailable_slabs, slab_it); + DL_PREPEND(bucket->available_slabs, slab_it); + bucket->available_slabs_num++; + } + + // check if slab is empty, and pool it if we can + if (slab->num_chunks_allocated == 0) { + // The slab is now empty. + // If the pool has capacity then put the slab in the pool. + // The to_pool parameter indicates whether the slab will be put in the + // pool or freed. + *to_pool = bucket_can_pool(bucket); + if (*to_pool == false) { + // remove slab + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + slab_unreg(slab_it->val); + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num--; + destroy_slab(slab_it->val); + } + } else { + // return this chunk to the pool + *to_pool = true; + } +} + +// NOTE: this function must be called under bucket->bucket_lock +static void *bucket_get_free_chunk(bucket_t *bucket, bool *from_pool) { + slab_list_item_t *slab_it = bucket_get_avail_slab(bucket, from_pool); + if (slab_it == NULL) { + return NULL; + } + + void *free_chunk = slab_get_chunk(slab_it->val); + + // if we allocated last free chunk from the slab and now it is full, move + // it to unavailable slabs and update its iterator + if (!(slab_has_avail(slab_it->val))) { + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num--; + slab_it->prev = NULL; + DL_PREPEND(bucket->unavailable_slabs, slab_it); + } + + return free_chunk; +} + +static size_t bucket_chunk_cut_off(bucket_t *bucket) { + return bucket_slab_min_size(bucket) / 2; +} + +static slab_t *bucket_create_slab(bucket_t *bucket, bool full_size) { + slab_t *slab = create_slab(bucket, full_size); + if (slab == NULL) { + LOG_ERR("create_slab failed!") + return NULL; + } + + umf_result_t res = slab_reg(slab); + if (res != UMF_RESULT_SUCCESS) { + LOG_ERR("slab_reg failed!") + destroy_slab(slab); + return NULL; + } + + DL_PREPEND(bucket->available_slabs, slab->iter); + bucket->available_slabs_num++; + bucket_update_stats(bucket, 1, 0); + + return slab; +} + +static slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, + bool *from_pool) { + // return a slab that will be used for a single allocation + if (bucket->available_slabs == NULL) { + bucket_create_slab(bucket, true /* full size */); + *from_pool = false; + } else { + bucket_decrement_pool(bucket, from_pool); + } + + return bucket->available_slabs; +} + +// NOTE: this function must be called under bucket->bucket_lock +static void *bucket_get_free_slab(bucket_t *bucket, bool *from_pool) { + slab_list_item_t *slab_it = bucket_get_avail_full_slab(bucket, from_pool); + if (slab_it == NULL) { + return NULL; + } + + slab_t *slab = slab_it->val; + void *ptr = slab_get(slab); + + DL_DELETE(bucket->available_slabs, slab_it); + bucket->available_slabs_num--; + slab_it->prev = NULL; + DL_PREPEND(bucket->unavailable_slabs, slab_it); + + return ptr; +} + +// NOTE: this function must be called under bucket->bucket_lock +static void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool) { + slab_list_item_t *slab_it = slab->iter; + assert(slab_it->val != NULL); + *to_pool = bucket_can_pool(bucket); + if (*to_pool) { + DL_DELETE(bucket->unavailable_slabs, slab_it); + slab_it->prev = NULL; + DL_PREPEND(bucket->available_slabs, slab_it); + bucket->available_slabs_num++; + } else { + slab_unreg(slab_it->val); + DL_DELETE(bucket->unavailable_slabs, slab_it); + destroy_slab(slab_it->val); + } +} + +static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, + bool *from_pool) { + if (bucket->available_slabs == NULL) { + bucket_create_slab(bucket, false /* chunked */); + *from_pool = false; + } else { + slab_t *slab = bucket->available_slabs->val; + if (slab->num_chunks_allocated == 0) { + // If this was an empty slab, it was in the pool. + // Now it is no longer in the pool, so update count. + --bucket->chunked_slabs_in_pool; + bucket_decrement_pool(bucket, from_pool); + } else { + // Allocation from existing slab is treated as from pool for statistics. + *from_pool = true; + } + } + + return bucket->available_slabs; +} + +static size_t bucket_capacity(bucket_t *bucket) { + // For buckets used in chunked mode, just one slab in pool is sufficient. + // For larger buckets, the capacity could be more and is adjustable. + if (bucket->size <= bucket_chunk_cut_off(bucket)) { + return 1; + } else { + return bucket->pool->params.capacity; + } +} + +static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) { + if (bucket->pool->params.pool_trace == 0) { + return; + } + + bucket->curr_slabs_in_use += in_use; + bucket->max_slabs_in_use = + utils_max(bucket->curr_slabs_in_use, bucket->max_slabs_in_use); + + bucket->curr_slabs_in_pool += in_pool; + bucket->max_slabs_in_pool = + utils_max(bucket->curr_slabs_in_pool, bucket->max_slabs_in_pool); + + // Increment or decrement current pool sizes based on whether + // slab was added to or removed from pool. + bucket->pool->params.cur_pool_size += + in_pool * bucket_slab_alloc_size(bucket); +} + +static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) { + // If a slab was available in the pool then note that the current pooled + // size has reduced by the size of a slab in this bucket. + *from_pool = true; + bucket_update_stats(bucket, 1, -1); + utils_fetch_and_add64(&bucket->shared_limits->total_size, + -(long long)bucket_slab_alloc_size(bucket)); +} + +static bool bucket_can_pool(bucket_t *bucket) { + size_t new_free_slabs_in_bucket; + + // check if this bucket is used in chunked form or as full slabs + bool chunked_bucket = bucket->size <= bucket_chunk_cut_off(bucket); + if (chunked_bucket) { + new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; + } else { + new_free_slabs_in_bucket = bucket->available_slabs_num + 1; + } + + // we keep at most params.capacity slabs in the pool + if (bucket_capacity(bucket) >= new_free_slabs_in_bucket) { + size_t pool_size = 0; + utils_atomic_load_acquire(&bucket->shared_limits->total_size, + &pool_size); + while (true) { + size_t new_pool_size = pool_size + bucket_slab_alloc_size(bucket); + + if (bucket->shared_limits->max_size < new_pool_size) { + break; + } + + if (utils_compare_exchange(&bucket->shared_limits->total_size, + &pool_size, &new_pool_size)) { + if (chunked_bucket) { + ++bucket->chunked_slabs_in_pool; + } + + bucket_update_stats(bucket, -1, 1); + return true; + } + } + } + + bucket_update_stats(bucket, -1, 0); + return false; +} + +static size_t size_to_idx(disjoint_pool_t *pool, size_t size) { + assert(size <= CutOff && "Unexpected size"); + assert(size > 0 && "Unexpected size"); + + size_t min_bucket_size = (size_t)1 << pool->min_bucket_size_exp; + if (size < min_bucket_size) { + return 0; + } + + // get the position of the leftmost set bit + size_t position = getLeftmostSetBitPos(size); + + bool is_power_of_2 = 0 == (size & (size - 1)); + bool larger_than_halfway_between_powers_of_2 = + !is_power_of_2 && + (bool)((size - 1) & ((uint64_t)(1) << (position - 1))); + size_t index = (position - pool->min_bucket_size_exp) * 2 + + (int)(!is_power_of_2) + + (int)larger_than_halfway_between_powers_of_2; + + return index; +} + +static umf_disjoint_pool_shared_limits_t * +disjoint_pool_get_limits(disjoint_pool_t *pool) { + if (pool->params.shared_limits) { + return pool->params.shared_limits; + } else { + return pool->default_shared_limits; + } +} + +static bucket_t *disjoint_pool_find_bucket(disjoint_pool_t *pool, size_t size) { + size_t calculated_idx = size_to_idx(pool, size); + return pool->buckets[calculated_idx]; +} + +static void disjoint_pool_print_stats(disjoint_pool_t *pool) { + size_t high_bucket_size = 0; + size_t high_peak_slabs_in_use = 0; + const char *name = pool->params.name; + + LOG_DEBUG("\"%s\" pool memory statistics", name); + LOG_DEBUG("%14s %12s %12s %18s %20s %21s", "Bucket Size", "Allocs", "Frees", + "Allocs from Pool", "Peak Slabs in Use", "Peak Slabs in Pool"); + + for (size_t i = 0; i < pool->buckets_num; i++) { + bucket_t *bucket = pool->buckets[i]; + if (bucket->alloc_count) { + LOG_DEBUG("%14zu %12zu %12zu %18zu %20zu %21zu", bucket->size, + bucket->alloc_count, bucket->free_count, + bucket->alloc_pool_count, bucket->max_slabs_in_use, + bucket->max_slabs_in_pool); + high_bucket_size = + utils_max(bucket_slab_alloc_size(bucket), high_bucket_size); + } + high_peak_slabs_in_use = + utils_max(bucket->max_slabs_in_use, high_peak_slabs_in_use); + } + + LOG_DEBUG("current pool size: %zu", + disjoint_pool_get_limits(pool)->total_size); + LOG_DEBUG("suggested setting=;%c%s:%zu,%zu,64K", (char)tolower(name[0]), + (name + 1), high_bucket_size, high_peak_slabs_in_use); +} + +static void *disjoint_pool_allocate(disjoint_pool_t *pool, size_t size) { + if (size == 0) { + return NULL; + } + + void *ptr = NULL; + + if (size > pool->params.max_poolable_size) { + umf_result_t ret = + umfMemoryProviderAlloc(pool->provider, size, 0, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("allocation from the memory provider failed"); + return NULL; + } + + utils_annotate_memory_undefined(ptr, size); + return ptr; + } + + bucket_t *bucket = disjoint_pool_find_bucket(pool, size); + + utils_mutex_lock(&bucket->bucket_lock); + + bool from_pool = false; + if (size > bucket_chunk_cut_off(bucket)) { + ptr = bucket_get_free_slab(bucket, &from_pool); + } else { + ptr = bucket_get_free_chunk(bucket, &from_pool); + } + + if (ptr == NULL) { + TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + + if (pool->params.pool_trace > 1) { + // update stats + ++bucket->alloc_count; + if (from_pool) { + ++bucket->alloc_pool_count; + } + } + + utils_mutex_unlock(&bucket->bucket_lock); + + if (pool->params.pool_trace > 2) { + LOG_DEBUG("Allocated %8zu %s bytes from %s -> %p", size, + pool->params.name, (from_pool ? "pool" : "provider"), ptr); + } + + VALGRIND_DO_MEMPOOL_ALLOC(pool, ptr, size); + utils_annotate_memory_undefined(ptr, bucket->size); + return ptr; +} + +umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, + void *params, void **ppPool) { + // TODO set defaults when user pass the NULL as params + if (!provider || !params || !ppPool) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + disjoint_pool_t *disjoint_pool = + umf_ba_global_alloc(sizeof(*disjoint_pool)); + if (!disjoint_pool) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + umf_disjoint_pool_params_t *dp_params = + (umf_disjoint_pool_params_t *)params; + + // min_bucket_size parameter must be a power of 2 for bucket sizes + // to generate correctly. + if (!dp_params->min_bucket_size || + !IS_POWER_OF_2(dp_params->min_bucket_size)) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + VALGRIND_DO_CREATE_MEMPOOL(disjoint_pool, 0, 0); + + disjoint_pool->provider = provider; + disjoint_pool->params = *dp_params; + + disjoint_pool->known_slabs = critnib_new(); + + // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. + // Powers of 2 and the value halfway between the powers of 2. + size_t Size1 = disjoint_pool->params.min_bucket_size; + + // min_bucket_size cannot be larger than CutOff. + Size1 = utils_min(Size1, CutOff); + + // Buckets sized smaller than the bucket default size- 8 aren't needed. + Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); + + // Calculate the exponent for min_bucket_size used for finding buckets. + disjoint_pool->min_bucket_size_exp = (size_t)log2Utils(Size1); + disjoint_pool->default_shared_limits = + umfDisjointPoolSharedLimitsCreate(SIZE_MAX); + + // count number of buckets, start from 1 + disjoint_pool->buckets_num = 1; + size_t Size2 = Size1 + Size1 / 2; + size_t ts2 = Size2, ts1 = Size1; + for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { + disjoint_pool->buckets_num += 2; + } + disjoint_pool->buckets = + umf_ba_global_alloc(sizeof(bucket_t *) * disjoint_pool->buckets_num); + + int i = 0; + Size1 = ts1; + Size2 = ts2; + for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) { + disjoint_pool->buckets[i] = create_bucket( + Size1, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + disjoint_pool->buckets[i + 1] = create_bucket( + Size2, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + } + disjoint_pool->buckets[i] = create_bucket( + CutOff, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + + umf_result_t ret = umfMemoryProviderGetMinPageSize( + provider, NULL, &disjoint_pool->provider_min_page_size); + if (ret != UMF_RESULT_SUCCESS) { + disjoint_pool->provider_min_page_size = 0; + } + + *ppPool = (void *)disjoint_pool; + + return UMF_RESULT_SUCCESS; +} + +void *disjoint_pool_malloc(void *pool, size_t size) { + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + void *ptr = disjoint_pool_allocate(hPool, size); + + return ptr; +} + +void *disjoint_pool_calloc(void *pool, size_t num, size_t size) { + (void)pool; + (void)num; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_realloc(void *pool, void *ptr, size_t size) { + (void)pool; + (void)ptr; + (void)size; + + // Not supported + TLS_last_allocation_error = UMF_RESULT_ERROR_NOT_SUPPORTED; + return NULL; +} + +void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + + void *ptr = NULL; + + if (size == 0) { + return NULL; + } + + if (alignment <= 1) { + return disjoint_pool_allocate(pool, size); + } + + size_t aligned_size; + if (alignment <= disjoint_pool->provider_min_page_size) { + // This allocation will be served from a Bucket which size is multiple + // of Alignment and Slab address is aligned to provider_min_page_size + // so the address will be properly aligned. + aligned_size = (size > 1) ? ALIGN_UP_SAFE(size, alignment) : alignment; + } else { + // Slabs are only aligned to provider_min_page_size, we need to compensate + // for that in case the allocation is within pooling limit. + // TODO: consider creating properly-aligned Slabs on demand + aligned_size = size + alignment - 1; + } + + // Check if requested allocation size is within pooling limit. + // If not, just request aligned pointer from the system. + if (aligned_size > disjoint_pool->params.max_poolable_size) { + + umf_result_t ret = umfMemoryProviderAlloc(disjoint_pool->provider, size, + alignment, &ptr); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("allocation from the memory provider failed"); + return NULL; + } + + assert(ptr); + utils_annotate_memory_undefined(ptr, size); + return ptr; + } + + bool from_pool = false; + bucket_t *bucket = disjoint_pool_find_bucket(pool, aligned_size); + + utils_mutex_lock(&bucket->bucket_lock); + + if (aligned_size > bucket_chunk_cut_off(bucket)) { + ptr = bucket_get_free_slab(bucket, &from_pool); + } else { + ptr = bucket_get_free_chunk(bucket, &from_pool); + } + + if (ptr == NULL) { + TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + utils_mutex_unlock(&bucket->bucket_lock); + return NULL; + } + + if (disjoint_pool->params.pool_trace > 1) { + // update stats + ++bucket->alloc_count; + if (from_pool) { + ++bucket->alloc_pool_count; + } + } + + utils_mutex_unlock(&bucket->bucket_lock); + + if (disjoint_pool->params.pool_trace > 2) { + LOG_DEBUG("Allocated %8zu %s bytes aligned at %zu from %s -> %p", size, + disjoint_pool->params.name, alignment, + (from_pool ? "pool" : "provider"), ptr); + } + + void *aligned_ptr = (void *)ALIGN_UP_SAFE((size_t)ptr, alignment); + VALGRIND_DO_MEMPOOL_ALLOC(disjoint_pool, aligned_ptr, size); + utils_annotate_memory_undefined(aligned_ptr, size); + return aligned_ptr; +} + +size_t disjoint_pool_malloc_usable_size(void *pool, void *ptr) { + (void)pool; + (void)ptr; + + // Not supported + return 0; +} + +umf_result_t disjoint_pool_free(void *pool, void *ptr) { + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + if (ptr == NULL) { + return UMF_RESULT_SUCCESS; + } + + // check if given pointer is allocated inside any Disjoint Pool slab + slab_t *slab = + (slab_t *)critnib_find_le(disjoint_pool->known_slabs, (uintptr_t)ptr); + + if (slab == NULL || ptr >= slab_get_end(slab)) { + + // regular free + umf_alloc_info_t allocInfo = {NULL, 0, NULL}; + umf_result_t ret = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("failed to get allocation info from the memory tracker"); + return ret; + } + + size_t size = allocInfo.baseSize; + umf_memory_provider_handle_t provider = disjoint_pool->provider; + ret = umfMemoryProviderFree(provider, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { + TLS_last_allocation_error = ret; + LOG_ERR("deallocation from the memory provider failed"); + } + + return ret; + } + + bool to_pool = false; + + if (ptr < slab_get(slab) || ptr >= slab_get_end(slab)) { + assert(0); + return UMF_RESULT_ERROR_UNKNOWN; + } + + // The slab object won't be deleted until it's removed from the map which is + // protected by the lock, so it's safe to access it here. + + bucket_t *bucket = slab->bucket; + + VALGRIND_DO_MEMPOOL_FREE(pool, ptr); + utils_mutex_lock(&bucket->bucket_lock); + + utils_annotate_memory_inaccessible(ptr, bucket->size); + if (bucket->size <= bucket_chunk_cut_off(bucket)) { + bucket_free_chunk(bucket, ptr, slab, &to_pool); + } else { + bucket_free_slab(bucket, slab, &to_pool); + } + + if (disjoint_pool->params.pool_trace > 1) { + bucket->free_count++; + } + + utils_mutex_unlock(&bucket->bucket_lock); + + if (disjoint_pool->params.pool_trace > 2) { + const char *name = disjoint_pool->params.name; + LOG_DEBUG("freed %s %p to %s, current total pool size: %zu, current " + "pool size for %s: %zu", + name, ptr, (to_pool ? "pool" : "provider"), + disjoint_pool_get_limits(disjoint_pool)->total_size, name, + disjoint_pool->params.cur_pool_size); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t disjoint_pool_get_last_allocation_error(void *pool) { + (void)pool; + + return TLS_last_allocation_error; +} + +// Define destructor for use with unique_ptr +void disjoint_pool_finalize(void *pool) { + + disjoint_pool_t *hPool = (disjoint_pool_t *)pool; + + if (hPool->params.pool_trace > 1) { + disjoint_pool_print_stats(hPool); + } + + for (size_t i = 0; i < hPool->buckets_num; i++) { + destroy_bucket(hPool->buckets[i]); + } + + VALGRIND_DO_DESTROY_MEMPOOL(hPool); + + umfDisjointPoolSharedLimitsDestroy(hPool->default_shared_limits); + critnib_delete(hPool->known_slabs); + + umf_ba_global_free(hPool); +} + +static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = { + .version = UMF_VERSION_CURRENT, + .initialize = disjoint_pool_initialize, + .finalize = disjoint_pool_finalize, + .malloc = disjoint_pool_malloc, + .calloc = disjoint_pool_calloc, + .realloc = disjoint_pool_realloc, + .aligned_malloc = disjoint_pool_aligned_malloc, + .malloc_usable_size = disjoint_pool_malloc_usable_size, + .free = disjoint_pool_free, + .get_last_allocation_error = disjoint_pool_get_last_allocation_error, +}; + +umf_memory_pool_ops_t *umfDisjointPoolOps(void) { + return &UMF_DISJOINT_POOL_OPS; +} + +umf_disjoint_pool_shared_limits_t * +umfDisjointPoolSharedLimitsCreate(size_t max_size) { + umf_disjoint_pool_shared_limits_t *ptr = + umf_ba_global_alloc(sizeof(umf_disjoint_pool_shared_limits_t)); + //umf_ba_global_alloc(sizeof(*ptr)); + ptr->max_size = max_size; + ptr->total_size = 0; + return ptr; +} + +void umfDisjointPoolSharedLimitsDestroy( + umf_disjoint_pool_shared_limits_t *limits) { + umf_ba_global_free(limits); +} + +umf_result_t +umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { + static const char *DEFAULT_NAME = "disjoint_pool"; + + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + umf_disjoint_pool_params_handle_t params = + umf_ba_global_alloc(sizeof(*params)); + if (params == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool params"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + params->slab_min_size = 0; + params->max_poolable_size = 0; + params->capacity = 0; + params->min_bucket_size = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE; + params->cur_pool_size = 0; + params->pool_trace = 0; + params->shared_limits = NULL; + params->name = NULL; + + umf_result_t ret = umfDisjointPoolParamsSetName(params, DEFAULT_NAME); + if (ret != UMF_RESULT_SUCCESS) { + umf_ba_global_free(params); + return ret; + } + + *hParams = params; + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { + // NOTE: dereferencing hParams when BA is already destroyed leads to crash + if (hParams && !umf_ba_global_is_destroyed()) { + umf_ba_global_free(hParams->name); + umf_ba_global_free(hParams); + } + + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, + size_t slabMinSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->slab_min_size = slabMinSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( + umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->max_poolable_size = maxPoolableSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, + size_t maxCapacity) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->capacity = maxCapacity; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, + size_t minBucketSize) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // minBucketSize parameter must be a power of 2 and greater than 0. + if (minBucketSize == 0 || (minBucketSize & (minBucketSize - 1))) { + LOG_ERR("minBucketSize must be a power of 2 and greater than 0"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->min_bucket_size = minBucketSize; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, + int poolTrace) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->pool_trace = poolTrace; + return UMF_RESULT_SUCCESS; +} + +umf_result_t umfDisjointPoolParamsSetSharedLimits( + umf_disjoint_pool_params_handle_t hParams, + umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + hParams->shared_limits = hSharedLimits; + return UMF_RESULT_SUCCESS; +} + +umf_result_t +umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, + const char *name) { + if (!hParams) { + LOG_ERR("disjoint pool params handle is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + char *newName = umf_ba_global_alloc(sizeof(char) * (strlen(name) + 1)); + if (newName == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool name"); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + umf_ba_global_free(hParams->name); + hParams->name = newName; + strcpy(hParams->name, name); + + return UMF_RESULT_SUCCESS; +} diff --git a/src/pool/pool_disjoint.cpp b/src/pool/pool_disjoint.cpp deleted file mode 100644 index 0390f5375..000000000 --- a/src/pool/pool_disjoint.cpp +++ /dev/null @@ -1,1313 +0,0 @@ -// Copyright (C) 2023-2025 Intel Corporation -// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// TODO: replace with logger? -#include - -#include "provider/provider_tracking.h" - -#include "../cpp_helpers.hpp" -#include "pool_disjoint.h" -#include "umf.h" -#include "utils_log.h" -#include "utils_math.h" -#include "utils_sanitizers.h" - -// Temporary solution for disabling memory poisoning. This is needed because -// AddressSanitizer does not support memory poisoning for GPU allocations. -// More info: https://github.com/oneapi-src/unified-memory-framework/issues/634 -#ifndef POISON_MEMORY -#define POISON_MEMORY 0 -#endif - -static inline void annotate_memory_inaccessible([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_inaccessible(ptr, size); -#endif -} - -static inline void annotate_memory_undefined([[maybe_unused]] void *ptr, - [[maybe_unused]] size_t size) { -#if (POISON_MEMORY != 0) - utils_annotate_memory_undefined(ptr, size); -#endif -} - -typedef struct umf_disjoint_pool_shared_limits_t { - size_t MaxSize; - std::atomic TotalSize; -} umf_disjoint_pool_shared_limits_t; - -// Configuration of Disjoint Pool -typedef struct umf_disjoint_pool_params_t { - // Minimum allocation size that will be requested from the memory provider. - size_t SlabMinSize; - - // Allocations up to this limit will be subject to chunking/pooling - size_t MaxPoolableSize; - - // When pooling, each bucket will hold a max of 'Capacity' unfreed slabs - size_t Capacity; - - // Holds the minimum bucket size valid for allocation of a memory type. - // This value must be a power of 2. - size_t MinBucketSize; - - // Holds size of the pool managed by the allocator. - size_t CurPoolSize; - - // Whether to print pool usage statistics - int PoolTrace; - - // Memory limits that can be shared between multitple pool instances, - // i.e. if multiple pools use the same SharedLimits sum of those pools' - // sizes cannot exceed MaxSize. - umf_disjoint_pool_shared_limits_handle_t SharedLimits; - - // Name used in traces - char *Name; -} umf_disjoint_pool_params_t; - -class DisjointPool { - public: - class AllocImpl; - using Config = umf_disjoint_pool_params_t; - - umf_result_t initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_handle_t parameters); - void *malloc(size_t size); - void *calloc(size_t, size_t); - void *realloc(void *, size_t); - void *aligned_malloc(size_t size, size_t alignment); - size_t malloc_usable_size(void *); - umf_result_t free(void *ptr); - umf_result_t get_last_allocation_error(); - - DisjointPool(); - ~DisjointPool(); - - private: - std::unique_ptr impl; -}; - -umf_disjoint_pool_shared_limits_t * -umfDisjointPoolSharedLimitsCreate(size_t MaxSize) { - return new umf_disjoint_pool_shared_limits_t{MaxSize, 0}; -} - -void umfDisjointPoolSharedLimitsDestroy( - umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { - delete hSharedLimits; -} - -umf_result_t -umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { - static const char *DEFAULT_NAME = "disjoint_pool"; - - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - umf_disjoint_pool_params_handle_t params = new umf_disjoint_pool_params_t{}; - if (params == nullptr) { - LOG_ERR("cannot allocate memory for disjoint pool params"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - params->SlabMinSize = 0; - params->MaxPoolableSize = 0; - params->Capacity = 0; - params->MinBucketSize = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE; - params->CurPoolSize = 0; - params->PoolTrace = 0; - params->SharedLimits = nullptr; - params->Name = nullptr; - - umf_result_t ret = umfDisjointPoolParamsSetName(params, DEFAULT_NAME); - if (ret != UMF_RESULT_SUCCESS) { - delete params; - return ret; - } - - *hParams = params; - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { - if (hParams) { - delete[] hParams->Name; - delete hParams; - } - - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetSlabMinSize(umf_disjoint_pool_params_handle_t hParams, - size_t slabMinSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->SlabMinSize = slabMinSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t umfDisjointPoolParamsSetMaxPoolableSize( - umf_disjoint_pool_params_handle_t hParams, size_t maxPoolableSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->MaxPoolableSize = maxPoolableSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetCapacity(umf_disjoint_pool_params_handle_t hParams, - size_t maxCapacity) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->Capacity = maxCapacity; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetMinBucketSize(umf_disjoint_pool_params_handle_t hParams, - size_t minBucketSize) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - // minBucketSize parameter must be a power of 2 and greater than 0. - if (minBucketSize == 0 || (minBucketSize & (minBucketSize - 1))) { - LOG_ERR("minBucketSize must be a power of 2 and greater than 0"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->MinBucketSize = minBucketSize; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetTrace(umf_disjoint_pool_params_handle_t hParams, - int poolTrace) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->PoolTrace = poolTrace; - return UMF_RESULT_SUCCESS; -} - -umf_result_t umfDisjointPoolParamsSetSharedLimits( - umf_disjoint_pool_params_handle_t hParams, - umf_disjoint_pool_shared_limits_handle_t hSharedLimits) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - hParams->SharedLimits = hSharedLimits; - return UMF_RESULT_SUCCESS; -} - -umf_result_t -umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, - const char *name) { - if (!hParams) { - LOG_ERR("disjoint pool params handle is NULL"); - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - char *newName = new char[std::strlen(name) + 1]; - if (newName == nullptr) { - LOG_ERR("cannot allocate memory for disjoint pool name"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - delete[] hParams->Name; - hParams->Name = newName; - std::strcpy(hParams->Name, name); - - return UMF_RESULT_SUCCESS; -} - -// Allocations are a minimum of 4KB/64KB/2MB even when a smaller size is -// requested. The implementation distinguishes between allocations of size -// ChunkCutOff = (minimum-alloc-size / 2) and those that are larger. -// Allocation requests smaller than ChunkCutoff use chunks taken from a single -// coarse-grain allocation. Thus, for example, for a 64KB minimum allocation -// size, and 8-byte allocations, only 1 in ~8000 requests results in a new -// coarse-grain allocation. Freeing results only in a chunk of a larger -// allocation to be marked as available and no real return to the system. An -// allocation is returned to the system only when all chunks in the larger -// allocation are freed by the program. Allocations larger than ChunkCutOff use -// a separate coarse-grain allocation for each request. These are subject to -// "pooling". That is, when such an allocation is freed by the program it is -// retained in a pool. The pool is available for future allocations, which means -// there are fewer actual coarse-grain allocations/deallocations. - -// The largest size which is allocated via the allocator. -// Allocations with size > CutOff bypass the pool and -// go directly to the provider. -static constexpr size_t CutOff = (size_t)1 << 31; // 2GB - -// Aligns the pointer down to the specified alignment -// (e.g. returns 8 for Size = 13, Alignment = 8) -static void *AlignPtrDown(void *Ptr, const size_t Alignment) { - return reinterpret_cast((reinterpret_cast(Ptr)) & - (~(Alignment - 1))); -} - -// Aligns the pointer up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static void *AlignPtrUp(void *Ptr, const size_t Alignment) { - void *AlignedPtr = AlignPtrDown(Ptr, Alignment); - // Special case when the pointer is already aligned - if (Ptr == AlignedPtr) { - return Ptr; - } - return static_cast(AlignedPtr) + Alignment; -} - -// Aligns the value up to the specified alignment -// (e.g. returns 16 for Size = 13, Alignment = 8) -static size_t AlignUp(size_t Val, size_t Alignment) { - assert(Alignment > 0); - return (Val + Alignment - 1) & (~(Alignment - 1)); -} - -typedef struct MemoryProviderError { - umf_result_t code; -} MemoryProviderError_t; - -class Bucket; - -// Represents the allocated memory block of size 'SlabMinSize' -// Internally, it splits the memory block into chunks. The number of -// chunks depends of the size of a Bucket which created the Slab. -// Note: Bucket's methods are responsible for thread safety of Slab access, -// so no locking happens here. -class Slab { - - // Pointer to the allocated memory of SlabMinSize bytes - void *MemPtr; - - // Represents the current state of each chunk: - // if the bit is set then the chunk is allocated - // the chunk is free for allocation otherwise - std::vector Chunks; - - // Total number of allocated chunks at the moment. - size_t NumAllocated = 0; - - // The bucket which the slab belongs to - Bucket &bucket; - - using ListIter = std::list>::iterator; - - // Store iterator to the corresponding node in avail/unavail list - // to achieve O(1) removal - ListIter SlabListIter; - - // Hints where to start search for free chunk in a slab - size_t FirstFreeChunkIdx = 0; - - // Return the index of the first available chunk, SIZE_MAX otherwise - size_t FindFirstAvailableChunkIdx() const; - - // Register/Unregister the slab in the global slab address map. - void regSlab(Slab &); - void unregSlab(Slab &); - static void regSlabByAddr(void *, Slab &); - static void unregSlabByAddr(void *, Slab &); - - public: - Slab(Bucket &); - ~Slab(); - - void setIterator(ListIter It) { SlabListIter = It; } - ListIter getIterator() const { return SlabListIter; } - - size_t getNumAllocated() const { return NumAllocated; } - - // Get pointer to allocation that is one piece of this slab. - void *getChunk(); - - // Get pointer to allocation that is this entire slab. - void *getSlab(); - - void *getPtr() const { return MemPtr; } - void *getEnd() const; - - size_t getChunkSize() const; - size_t getNumChunks() const { return Chunks.size(); } - - bool hasAvail(); - - Bucket &getBucket(); - const Bucket &getBucket() const; - - void freeChunk(void *Ptr); -}; - -class Bucket { - const size_t Size; - - // List of slabs which have at least 1 available chunk. - std::list> AvailableSlabs; - - // List of slabs with 0 available chunk. - std::list> UnavailableSlabs; - - // Protects the bucket and all the corresponding slabs - std::mutex BucketLock; - - // Reference to the allocator context, used access memory allocation - // routines, slab map and etc. - DisjointPool::AllocImpl &OwnAllocCtx; - - // For buckets used in chunked mode, a counter of slabs in the pool. - // For allocations that use an entire slab each, the entries in the Available - // list are entries in the pool.Each slab is available for a new - // allocation.The size of the Available list is the size of the pool. - // For allocations that use slabs in chunked mode, slabs will be in the - // Available list if any one or more of their chunks is free.The entire slab - // is not necessarily free, just some chunks in the slab are free. To - // implement pooling we will allow one slab in the Available list to be - // entirely empty. Normally such a slab would have been freed. But - // now we don't, and treat this slab as "in the pool". - // When a slab becomes entirely free we have to decide whether to return it - // to the provider or keep it allocated. A simple check for size of the - // Available list is not sufficient to check whether any slab has been - // pooled yet. We would have to traverse the entire Available list and check - // if any of them is entirely free. Instead we keep a counter of entirely - // empty slabs within the Available list to speed up the process of checking - // if a slab in this bucket is already pooled. - size_t chunkedSlabsInPool; - - // Statistics - size_t allocPoolCount; - size_t freeCount; - size_t currSlabsInUse; - size_t currSlabsInPool; - size_t maxSlabsInPool; - - public: - // Statistics - size_t allocCount; - size_t maxSlabsInUse; - - Bucket(size_t Sz, DisjointPool::AllocImpl &AllocCtx) - : Size{Sz}, OwnAllocCtx{AllocCtx}, chunkedSlabsInPool(0), - allocPoolCount(0), freeCount(0), currSlabsInUse(0), - currSlabsInPool(0), maxSlabsInPool(0), allocCount(0), - maxSlabsInUse(0) {} - - // Get pointer to allocation that is one piece of an available slab in this - // bucket. - void *getChunk(bool &FromPool); - - // Get pointer to allocation that is a full slab in this bucket. - void *getSlab(bool &FromPool); - - // Return the allocation size of this bucket. - size_t getSize() const { return Size; } - - // Free an allocation that is one piece of a slab in this bucket. - void freeChunk(void *Ptr, Slab &Slab, bool &ToPool); - - // Free an allocation that is a full slab in this bucket. - void freeSlab(Slab &Slab, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle(); - - DisjointPool::AllocImpl &getAllocCtx() { return OwnAllocCtx; } - - // Check whether an allocation to be freed can be placed in the pool. - bool CanPool(bool &ToPool); - - // The minimum allocation size for any slab. - size_t SlabMinSize(); - - // The allocation size for a slab in this bucket. - size_t SlabAllocSize(); - - // The minimum size of a chunk from this bucket's slabs. - size_t ChunkCutOff(); - - // The number of slabs in this bucket that can be in the pool. - size_t Capacity(); - - // The maximum allocation size subject to pooling. - size_t MaxPoolableSize(); - - // Update allocation count - void countAlloc(bool FromPool); - - // Update free count - void countFree(); - - // Update statistics of Available/Unavailable - void updateStats(int InUse, int InPool); - - // Print bucket statistics - void printStats(bool &TitlePrinted, const std::string &Label); - - private: - void onFreeChunk(Slab &, bool &ToPool); - - // Update statistics of pool usage, and indicate that an allocation was made - // from the pool. - void decrementPool(bool &FromPool); - - // Get a slab to be used for chunked allocations. - decltype(AvailableSlabs.begin()) getAvailSlab(bool &FromPool); - - // Get a slab that will be used as a whole for a single allocation. - decltype(AvailableSlabs.begin()) getAvailFullSlab(bool &FromPool); -}; - -class DisjointPool::AllocImpl { - // It's important for the map to be destroyed last after buckets and their - // slabs This is because slab's destructor removes the object from the map. - std::unordered_multimap KnownSlabs; - std::shared_timed_mutex KnownSlabsMapLock; - - // Handle to the memory provider - umf_memory_provider_handle_t MemHandle; - - // Store as unique_ptrs since Bucket is not Movable(because of std::mutex) - std::vector> Buckets; - - // Configuration for this instance - umf_disjoint_pool_params_t params; - - umf_disjoint_pool_shared_limits_t DefaultSharedLimits = { - (std::numeric_limits::max)(), 0}; - - // Used in algorithm for finding buckets - std::size_t MinBucketSizeExp; - - // Coarse-grain allocation min alignment - size_t ProviderMinPageSize; - - public: - AllocImpl(umf_memory_provider_handle_t hProvider, - umf_disjoint_pool_params_handle_t params) - : MemHandle{hProvider}, params(*params) { - - VALGRIND_DO_CREATE_MEMPOOL(this, 0, 0); - - // deep copy of the Name - this->params.Name = new char[std::strlen(params->Name) + 1]; - std::strcpy(this->params.Name, params->Name); - - // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. - // Powers of 2 and the value halfway between the powers of 2. - auto Size1 = this->params.MinBucketSize; - // MinBucketSize cannot be larger than CutOff. - Size1 = std::min(Size1, CutOff); - // Buckets sized smaller than the bucket default size- 8 aren't needed. - Size1 = std::max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); - // Calculate the exponent for MinBucketSize used for finding buckets. - MinBucketSizeExp = (size_t)log2Utils(Size1); - auto Size2 = Size1 + Size1 / 2; - for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { - Buckets.push_back(std::make_unique(Size1, *this)); - Buckets.push_back(std::make_unique(Size2, *this)); - } - Buckets.push_back(std::make_unique(CutOff, *this)); - - auto ret = umfMemoryProviderGetMinPageSize(hProvider, nullptr, - &ProviderMinPageSize); - if (ret != UMF_RESULT_SUCCESS) { - ProviderMinPageSize = 0; - } - } - - ~AllocImpl() { - VALGRIND_DO_DESTROY_MEMPOOL(this); - delete[] this->params.Name; - } - - void *allocate(size_t Size, size_t Alignment, bool &FromPool); - void *allocate(size_t Size, bool &FromPool); - void deallocate(void *Ptr, bool &ToPool); - - umf_memory_provider_handle_t getMemHandle() { return MemHandle; } - - std::shared_timed_mutex &getKnownSlabsMapLock() { - return KnownSlabsMapLock; - } - std::unordered_multimap &getKnownSlabs() { - return KnownSlabs; - } - - size_t SlabMinSize() { return params.SlabMinSize; }; - - umf_disjoint_pool_params_t &getParams() { return params; } - - umf_disjoint_pool_shared_limits_t *getLimits() { - if (params.SharedLimits) { - return params.SharedLimits; - } else { - return &DefaultSharedLimits; - } - }; - - void printStats(bool &TitlePrinted, size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, const std::string &Label); - - private: - Bucket &findBucket(size_t Size); - std::size_t sizeToIdx(size_t Size); -}; - -static void *memoryProviderAlloc(umf_memory_provider_handle_t hProvider, - size_t size, size_t alignment = 0) { - void *ptr; - auto ret = umfMemoryProviderAlloc(hProvider, size, alignment, &ptr); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } - annotate_memory_inaccessible(ptr, size); - return ptr; -} - -static void memoryProviderFree(umf_memory_provider_handle_t hProvider, - void *ptr) { - size_t size = 0; - - if (ptr) { - umf_alloc_info_t allocInfo = {NULL, 0, NULL}; - umf_result_t umf_result = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); - if (umf_result == UMF_RESULT_SUCCESS) { - size = allocInfo.baseSize; - } - } - - auto ret = umfMemoryProviderFree(hProvider, ptr, size); - if (ret != UMF_RESULT_SUCCESS) { - throw MemoryProviderError{ret}; - } -} - -bool operator==(const Slab &Lhs, const Slab &Rhs) { - return Lhs.getPtr() == Rhs.getPtr(); -} - -std::ostream &operator<<(std::ostream &Os, const Slab &Slab) { - Os << "Slab<" << Slab.getPtr() << ", " << Slab.getEnd() << ", " - << Slab.getBucket().getSize() << ">"; - return Os; -} - -Slab::Slab(Bucket &Bkt) - : // In case bucket size is not a multiple of SlabMinSize, we would have - // some padding at the end of the slab. - Chunks(Bkt.SlabMinSize() / Bkt.getSize()), NumAllocated{0}, - bucket(Bkt), SlabListIter{}, FirstFreeChunkIdx{0} { - auto SlabSize = Bkt.SlabAllocSize(); - MemPtr = memoryProviderAlloc(Bkt.getMemHandle(), SlabSize); - regSlab(*this); -} - -Slab::~Slab() { - try { - unregSlab(*this); - } catch (std::exception &e) { - LOG_ERR("DisjointPool: unexpected error: %s", e.what()); - } - - try { - memoryProviderFree(bucket.getMemHandle(), MemPtr); - } catch (MemoryProviderError &e) { - LOG_ERR("DisjointPool: error from memory provider: %d", e.code); - - if (e.code == UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC) { - const char *message = ""; - int error = 0; - - try { - umfMemoryProviderGetLastNativeError( - umfGetLastFailedMemoryProvider(), &message, &error); - LOG_ERR("Native error msg: %s, native error code: %d", message, - error); - } catch (...) { - // ignore any additional errors from logger - } - } - } -} - -// Return the index of the first available chunk, SIZE_MAX otherwise -size_t Slab::FindFirstAvailableChunkIdx() const { - // Use the first free chunk index as a hint for the search. - auto It = std::find_if(Chunks.begin() + FirstFreeChunkIdx, Chunks.end(), - [](auto x) { return !x; }); - if (It != Chunks.end()) { - return It - Chunks.begin(); - } - - return std::numeric_limits::max(); -} - -void *Slab::getChunk() { - // assert(NumAllocated != Chunks.size()); - - const size_t ChunkIdx = FindFirstAvailableChunkIdx(); - // Free chunk must exist, otherwise we would have allocated another slab - assert(ChunkIdx != (std::numeric_limits::max())); - - void *const FreeChunk = - (static_cast(getPtr())) + ChunkIdx * getChunkSize(); - Chunks[ChunkIdx] = true; - NumAllocated += 1; - - // Use the found index as the next hint - FirstFreeChunkIdx = ChunkIdx; - - return FreeChunk; -} - -void *Slab::getSlab() { return getPtr(); } - -Bucket &Slab::getBucket() { return bucket; } -const Bucket &Slab::getBucket() const { return bucket; } - -size_t Slab::getChunkSize() const { return bucket.getSize(); } - -void Slab::regSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - Map.insert({Addr, Slab}); -} - -void Slab::unregSlabByAddr(void *Addr, Slab &Slab) { - auto &Lock = Slab.getBucket().getAllocCtx().getKnownSlabsMapLock(); - auto &Map = Slab.getBucket().getAllocCtx().getKnownSlabs(); - - std::lock_guard Lg(Lock); - - auto Slabs = Map.equal_range(Addr); - // At least the must get the current slab from the map. - assert(Slabs.first != Slabs.second && "Slab is not found"); - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - if (It->second == Slab) { - Map.erase(It); - return; - } - } - - assert(false && "Slab is not found"); -} - -void Slab::regSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - regSlabByAddr(StartAddr, Slab); - regSlabByAddr(EndAddr, Slab); -} - -void Slab::unregSlab(Slab &Slab) { - void *StartAddr = AlignPtrDown(Slab.getPtr(), bucket.SlabMinSize()); - void *EndAddr = static_cast(StartAddr) + bucket.SlabMinSize(); - - unregSlabByAddr(StartAddr, Slab); - unregSlabByAddr(EndAddr, Slab); -} - -void Slab::freeChunk(void *Ptr) { - // This method should be called through bucket(since we might remove the slab - // as a result), therefore all locks are done on that level. - - // Make sure that we're in the right slab - assert(Ptr >= getPtr() && Ptr < getEnd()); - - // Even if the pointer p was previously aligned, it's still inside the - // corresponding chunk, so we get the correct index here. - auto ChunkIdx = (static_cast(Ptr) - static_cast(MemPtr)) / - getChunkSize(); - - // Make sure that the chunk was allocated - assert(Chunks[ChunkIdx] && "double free detected"); - - Chunks[ChunkIdx] = false; - NumAllocated -= 1; - - if (ChunkIdx < FirstFreeChunkIdx) { - FirstFreeChunkIdx = ChunkIdx; - } -} - -void *Slab::getEnd() const { - return static_cast(getPtr()) + bucket.SlabMinSize(); -} - -bool Slab::hasAvail() { return NumAllocated != getNumChunks(); } - -// If a slab was available in the pool then note that the current pooled -// size has reduced by the size of a slab in this bucket. -void Bucket::decrementPool(bool &FromPool) { - FromPool = true; - updateStats(1, -1); - OwnAllocCtx.getLimits()->TotalSize -= SlabAllocSize(); -} - -auto Bucket::getAvailFullSlab(bool &FromPool) - -> decltype(AvailableSlabs.begin()) { - // Return a slab that will be used for a single allocation. - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - FromPool = false; - updateStats(1, 0); - } else { - decrementPool(FromPool); - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getSlab(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailFullSlab(FromPool); - auto *FreeSlab = (*SlabIt)->getSlab(); - auto It = - UnavailableSlabs.insert(UnavailableSlabs.begin(), std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - return FreeSlab; -} - -void Bucket::freeSlab(Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - if (CanPool(ToPool)) { - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - (*It)->setIterator(It); - } else { - UnavailableSlabs.erase(SlabIter); - } -} - -auto Bucket::getAvailSlab(bool &FromPool) -> decltype(AvailableSlabs.begin()) { - - if (AvailableSlabs.size() == 0) { - auto It = AvailableSlabs.insert(AvailableSlabs.begin(), - std::make_unique(*this)); - (*It)->setIterator(It); - - updateStats(1, 0); - FromPool = false; - } else { - if ((*(AvailableSlabs.begin()))->getNumAllocated() == 0) { - // If this was an empty slab, it was in the pool. - // Now it is no longer in the pool, so update count. - --chunkedSlabsInPool; - decrementPool(FromPool); - } else { - // Allocation from existing slab is treated as from pool for statistics. - FromPool = true; - } - } - - return AvailableSlabs.begin(); -} - -void *Bucket::getChunk(bool &FromPool) { - std::lock_guard Lg(BucketLock); - - auto SlabIt = getAvailSlab(FromPool); - auto *FreeChunk = (*SlabIt)->getChunk(); - - // If the slab is full, move it to unavailable slabs and update its iterator - if (!((*SlabIt)->hasAvail())) { - auto It = UnavailableSlabs.insert(UnavailableSlabs.begin(), - std::move(*SlabIt)); - AvailableSlabs.erase(SlabIt); - (*It)->setIterator(It); - } - - return FreeChunk; -} - -void Bucket::freeChunk(void *Ptr, Slab &Slab, bool &ToPool) { - std::lock_guard Lg(BucketLock); - - Slab.freeChunk(Ptr); - - onFreeChunk(Slab, ToPool); -} - -// The lock must be acquired before calling this method -void Bucket::onFreeChunk(Slab &Slab, bool &ToPool) { - ToPool = true; - - // In case if the slab was previously full and now has 1 available - // chunk, it should be moved to the list of available slabs - if (Slab.getNumAllocated() == (Slab.getNumChunks() - 1)) { - auto SlabIter = Slab.getIterator(); - assert(SlabIter != UnavailableSlabs.end()); - - auto It = - AvailableSlabs.insert(AvailableSlabs.begin(), std::move(*SlabIter)); - UnavailableSlabs.erase(SlabIter); - - (*It)->setIterator(It); - } - - // Check if slab is empty, and pool it if we can. - if (Slab.getNumAllocated() == 0) { - // The slab is now empty. - // If pool has capacity then put the slab in the pool. - // The ToPool parameter indicates whether the Slab will be put in the - // pool or freed. - if (!CanPool(ToPool)) { - // Note: since the slab is stored as unique_ptr, just remove it from - // the list to destroy the object. - auto It = Slab.getIterator(); - assert(It != AvailableSlabs.end()); - AvailableSlabs.erase(It); - } - } -} - -bool Bucket::CanPool(bool &ToPool) { - size_t NewFreeSlabsInBucket; - // Check if this bucket is used in chunked form or as full slabs. - bool chunkedBucket = getSize() <= ChunkCutOff(); - if (chunkedBucket) { - NewFreeSlabsInBucket = chunkedSlabsInPool + 1; - } else { - NewFreeSlabsInBucket = AvailableSlabs.size() + 1; - } - if (Capacity() >= NewFreeSlabsInBucket) { - size_t PoolSize = OwnAllocCtx.getLimits()->TotalSize; - while (true) { - size_t NewPoolSize = PoolSize + SlabAllocSize(); - - if (OwnAllocCtx.getLimits()->MaxSize < NewPoolSize) { - break; - } - - if (OwnAllocCtx.getLimits()->TotalSize.compare_exchange_strong( - PoolSize, NewPoolSize)) { - if (chunkedBucket) { - ++chunkedSlabsInPool; - } - - updateStats(-1, 1); - ToPool = true; - return true; - } - } - } - - updateStats(-1, 0); - ToPool = false; - return false; -} - -umf_memory_provider_handle_t Bucket::getMemHandle() { - return OwnAllocCtx.getMemHandle(); -} - -size_t Bucket::SlabMinSize() { return OwnAllocCtx.getParams().SlabMinSize; } - -size_t Bucket::SlabAllocSize() { return std::max(getSize(), SlabMinSize()); } - -size_t Bucket::Capacity() { - // For buckets used in chunked mode, just one slab in pool is sufficient. - // For larger buckets, the capacity could be more and is adjustable. - if (getSize() <= ChunkCutOff()) { - return 1; - } else { - return OwnAllocCtx.getParams().Capacity; - } -} - -size_t Bucket::MaxPoolableSize() { - return OwnAllocCtx.getParams().MaxPoolableSize; -} - -size_t Bucket::ChunkCutOff() { return SlabMinSize() / 2; } - -void Bucket::countAlloc(bool FromPool) { - ++allocCount; - if (FromPool) { - ++allocPoolCount; - } -} - -void Bucket::countFree() { ++freeCount; } - -void Bucket::updateStats(int InUse, int InPool) { - if (OwnAllocCtx.getParams().PoolTrace == 0) { - return; - } - currSlabsInUse += InUse; - maxSlabsInUse = std::max(currSlabsInUse, maxSlabsInUse); - currSlabsInPool += InPool; - maxSlabsInPool = std::max(currSlabsInPool, maxSlabsInPool); - // Increment or decrement current pool sizes based on whether - // slab was added to or removed from pool. - OwnAllocCtx.getParams().CurPoolSize += InPool * SlabAllocSize(); -} - -void Bucket::printStats(bool &TitlePrinted, const std::string &Label) { - if (allocCount) { - if (!TitlePrinted) { - std::cout << Label << " memory statistics\n"; - std::cout << std::setw(14) << "Bucket Size" << std::setw(12) - << "Allocs" << std::setw(12) << "Frees" << std::setw(18) - << "Allocs from Pool" << std::setw(20) - << "Peak Slabs in Use" << std::setw(21) - << "Peak Slabs in Pool" << std::endl; - TitlePrinted = true; - } - std::cout << std::setw(14) << getSize() << std::setw(12) << allocCount - << std::setw(12) << freeCount << std::setw(18) - << allocPoolCount << std::setw(20) << maxSlabsInUse - << std::setw(21) << maxSlabsInPool << std::endl; - } -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - FromPool = false; - if (Size > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(Size); - - if (Size > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, Ptr, Size); - annotate_memory_undefined(Ptr, Bucket.getSize()); - - return Ptr; -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -void *DisjointPool::AllocImpl::allocate(size_t Size, size_t Alignment, - bool &FromPool) try { - void *Ptr; - - if (Size == 0) { - return nullptr; - } - - if (Alignment <= 1) { - return allocate(Size, FromPool); - } - - size_t AlignedSize; - if (Alignment <= ProviderMinPageSize) { - // This allocation will be served from a Bucket which size is multiple - // of Alignment and Slab address is aligned to ProviderMinPageSize - // so the address will be properly aligned. - AlignedSize = (Size > 1) ? AlignUp(Size, Alignment) : Alignment; - } else { - // Slabs are only aligned to ProviderMinPageSize, we need to compensate - // for that in case the allocation is within pooling limit. - // TODO: consider creating properly-aligned Slabs on demand - AlignedSize = Size + Alignment - 1; - } - - // Check if requested allocation size is within pooling limit. - // If not, just request aligned pointer from the system. - FromPool = false; - if (AlignedSize > getParams().MaxPoolableSize) { - Ptr = memoryProviderAlloc(getMemHandle(), Size, Alignment); - annotate_memory_undefined(Ptr, Size); - return Ptr; - } - - auto &Bucket = findBucket(AlignedSize); - - if (AlignedSize > Bucket.ChunkCutOff()) { - Ptr = Bucket.getSlab(FromPool); - } else { - Ptr = Bucket.getChunk(FromPool); - } - - if (getParams().PoolTrace > 1) { - Bucket.countAlloc(FromPool); - } - - VALGRIND_DO_MEMPOOL_ALLOC(this, AlignPtrUp(Ptr, Alignment), Size); - annotate_memory_undefined(AlignPtrUp(Ptr, Alignment), Size); - return AlignPtrUp(Ptr, Alignment); -} catch (MemoryProviderError &e) { - umf::getPoolLastStatusRef() = e.code; - return nullptr; -} - -std::size_t DisjointPool::AllocImpl::sizeToIdx(size_t Size) { - assert(Size <= CutOff && "Unexpected size"); - assert(Size > 0 && "Unexpected size"); - - size_t MinBucketSize = (size_t)1 << MinBucketSizeExp; - if (Size < MinBucketSize) { - return 0; - } - - // Get the position of the leftmost set bit. - size_t position = getLeftmostSetBitPos(Size); - - auto isPowerOf2 = 0 == (Size & (Size - 1)); - auto largerThanHalfwayBetweenPowersOf2 = - !isPowerOf2 && bool((Size - 1) & (uint64_t(1) << (position - 1))); - auto index = (position - MinBucketSizeExp) * 2 + (int)(!isPowerOf2) + - (int)largerThanHalfwayBetweenPowersOf2; - - return index; -} - -Bucket &DisjointPool::AllocImpl::findBucket(size_t Size) { - auto calculatedIdx = sizeToIdx(Size); - assert((*(Buckets[calculatedIdx])).getSize() >= Size); - if (calculatedIdx > 0) { - assert((*(Buckets[calculatedIdx - 1])).getSize() < Size); - } - - return *(Buckets[calculatedIdx]); -} - -void DisjointPool::AllocImpl::deallocate(void *Ptr, bool &ToPool) { - auto *SlabPtr = AlignPtrDown(Ptr, SlabMinSize()); - - // Lock the map on read - std::shared_lock Lk(getKnownSlabsMapLock()); - - ToPool = false; - auto Slabs = getKnownSlabs().equal_range(SlabPtr); - if (Slabs.first == Slabs.second) { - Lk.unlock(); - memoryProviderFree(getMemHandle(), Ptr); - return; - } - - for (auto It = Slabs.first; It != Slabs.second; ++It) { - // The slab object won't be deleted until it's removed from the map which is - // protected by the lock, so it's safe to access it here. - auto &Slab = It->second; - if (Ptr >= Slab.getPtr() && Ptr < Slab.getEnd()) { - // Unlock the map before freeing the chunk, it may be locked on write - // there - Lk.unlock(); - auto &Bucket = Slab.getBucket(); - - if (getParams().PoolTrace > 1) { - Bucket.countFree(); - } - - VALGRIND_DO_MEMPOOL_FREE(this, Ptr); - annotate_memory_inaccessible(Ptr, Bucket.getSize()); - if (Bucket.getSize() <= Bucket.ChunkCutOff()) { - Bucket.freeChunk(Ptr, Slab, ToPool); - } else { - Bucket.freeSlab(Slab, ToPool); - } - - return; - } - } - - Lk.unlock(); - // There is a rare case when we have a pointer from system allocation next - // to some slab with an entry in the map. So we find a slab - // but the range checks fail. - memoryProviderFree(getMemHandle(), Ptr); -} - -void DisjointPool::AllocImpl::printStats(bool &TitlePrinted, - size_t &HighBucketSize, - size_t &HighPeakSlabsInUse, - const std::string &MTName) { - HighBucketSize = 0; - HighPeakSlabsInUse = 0; - for (auto &B : Buckets) { - (*B).printStats(TitlePrinted, MTName); - HighPeakSlabsInUse = std::max((*B).maxSlabsInUse, HighPeakSlabsInUse); - if ((*B).allocCount) { - HighBucketSize = std::max((*B).SlabAllocSize(), HighBucketSize); - } - } -} - -umf_result_t -DisjointPool::initialize(umf_memory_provider_handle_t provider, - umf_disjoint_pool_params_handle_t parameters) { - if (!provider) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - // MinBucketSize parameter must be a power of 2 for bucket sizes - // to generate correctly. - if (!parameters->MinBucketSize || - !((parameters->MinBucketSize & (parameters->MinBucketSize - 1)) == 0)) { - return UMF_RESULT_ERROR_INVALID_ARGUMENT; - } - - impl = std::make_unique(provider, parameters); - return UMF_RESULT_SUCCESS; -} - -void *DisjointPool::malloc(size_t size) { // For full-slab allocations indicates - // whether slab is from Pool. - bool FromPool; - auto Ptr = impl->allocate(size, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes from " << (FromPool ? "Pool" : "Provider") << " ->" - << Ptr << std::endl; - } - return Ptr; -} - -void *DisjointPool::calloc(size_t, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::realloc(void *, size_t) { - // Not supported - umf::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; - return NULL; -} - -void *DisjointPool::aligned_malloc(size_t size, size_t alignment) { - bool FromPool; - auto Ptr = impl->allocate(size, alignment, FromPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Allocated " << std::setw(8) << size << " " << MT - << " bytes aligned at " << alignment << " from " - << (FromPool ? "Pool" : "Provider") << " ->" << Ptr - << std::endl; - } - return Ptr; -} - -size_t DisjointPool::malloc_usable_size(void *) { - // Not supported - return 0; -} - -umf_result_t DisjointPool::free(void *ptr) try { - bool ToPool; - impl->deallocate(ptr, ToPool); - - if (impl->getParams().PoolTrace > 2) { - auto MT = impl->getParams().Name; - std::cout << "Freed " << MT << " " << ptr << " to " - << (ToPool ? "Pool" : "Provider") - << ", Current total pool size " - << impl->getLimits()->TotalSize.load() - << ", Current pool size for " << MT << " " - << impl->getParams().CurPoolSize << "\n"; - } - return UMF_RESULT_SUCCESS; -} catch (MemoryProviderError &e) { - return e.code; -} - -umf_result_t DisjointPool::get_last_allocation_error() { - return umf::getPoolLastStatusRef(); -} - -DisjointPool::DisjointPool() {} - -// Define destructor for use with unique_ptr -DisjointPool::~DisjointPool() { - bool TitlePrinted = false; - size_t HighBucketSize; - size_t HighPeakSlabsInUse; - if (impl->getParams().PoolTrace > 1) { - auto name = impl->getParams().Name; - try { // cannot throw in destructor - impl->printStats(TitlePrinted, HighBucketSize, HighPeakSlabsInUse, - name); - if (TitlePrinted) { - std::cout << "Current Pool Size " - << impl->getLimits()->TotalSize.load() << std::endl; - std::cout << "Suggested Setting=;" - << std::string(1, (char)tolower(name[0])) - << std::string(name + 1) << ":" << HighBucketSize - << "," << HighPeakSlabsInUse << ",64K" << std::endl; - } - } catch (...) { // ignore exceptions - } - } -} - -static umf_memory_pool_ops_t UMF_DISJOINT_POOL_OPS = - umf::poolMakeCOps(); - -umf_memory_pool_ops_t *umfDisjointPoolOps(void) { - return &UMF_DISJOINT_POOL_OPS; -} diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h new file mode 100644 index 000000000..56a25e611 --- /dev/null +++ b/src/pool/pool_disjoint_internal.h @@ -0,0 +1,176 @@ +/* + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +#ifndef UMF_POOL_DISJOINT_INTERNAL_H +#define UMF_POOL_DISJOINT_INTERNAL_H 1 + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "critnib/critnib.h" +#include "uthash/utlist.h" + +#include "base_alloc_global.h" +#include "provider/provider_tracking.h" +#include "utils_common.h" +#include "utils_concurrency.h" +#include "utils_log.h" +#include "utils_math.h" + +typedef struct bucket_t bucket_t; +typedef struct slab_t slab_t; +typedef struct slab_list_item_t slab_list_item_t; +typedef struct disjoint_pool_t disjoint_pool_t; + +typedef struct bucket_t { + size_t size; + + // Linked list of slabs which have at least 1 available chunk. + // We always count available slabs as an optimization. + slab_list_item_t *available_slabs; + size_t available_slabs_num; + + // Linked list of slabs with 0 available chunks + slab_list_item_t *unavailable_slabs; + + // Protects the bucket and all the corresponding slabs + utils_mutex_t bucket_lock; + + // Reference to the allocator context, used to access memory allocation + // routines, slab map and etc. + disjoint_pool_t *pool; + + umf_disjoint_pool_shared_limits_handle_t shared_limits; + + // For buckets used in chunked mode, a counter of slabs in the pool. + // For allocations that use an entire slab each, the entries in the + // "available" list are entries in the pool. Each slab is available for a + // new allocation. The size of the available list is the size of the pool. + // + // For allocations that use slabs in chunked mode, slabs will be in the + // "available" list if any one or more of their chunks are free. The entire + // slab is not necessarily free, just some chunks in the slab are free. To + // implement pooling, we will allow one slab in the "available" list to be + // entirely empty, and treat this slab as "in the pool". + // When a slab becomes entirely free, we must decide whether to return it + // to the provider or keep it allocated. We keep a counter of entirely + // empty slabs within the "available" list to speed up the process of + // checking if a slab in this bucket is already pooled. + size_t chunked_slabs_in_pool; + + // Statistics + size_t alloc_count; + size_t alloc_pool_count; + size_t free_count; + size_t curr_slabs_in_use; + size_t curr_slabs_in_pool; + size_t max_slabs_in_pool; + size_t max_slabs_in_use; +} bucket_t; + +// Represents the allocated memory block of size 'slab_min_size' +// Internally, it splits the memory block into chunks. The number of +// chunks depends on the size of a Bucket which created the Slab. +// Note: Bucket's methods are responsible for thread safety of Slab access, +// so no locking happens here. +typedef struct slab_t { + // Pointer to the allocated memory of slab_min_size bytes + void *mem_ptr; + size_t slab_size; + + // Represents the current state of each chunk: if the bit is set, the + // chunk is allocated; otherwise, the chunk is free for allocation + bool *chunks; + size_t num_chunks_total; + + // Total number of allocated chunks at the moment. + size_t num_chunks_allocated; + + // The bucket which the slab belongs to + bucket_t *bucket; + + // Hints where to start search for free chunk in a slab + size_t first_free_chunk_idx; + + // Store iterator to the corresponding node in avail/unavail list + // to achieve O(1) removal + slab_list_item_t *iter; +} slab_t; + +typedef struct slab_list_item_t { + slab_t *val; + struct slab_list_item_t *prev, *next; +} slab_list_item_t; + +typedef struct umf_disjoint_pool_shared_limits_t { + size_t max_size; + size_t total_size; // requires atomic access +} umf_disjoint_pool_shared_limits_t; + +typedef struct umf_disjoint_pool_params_t { + // Minimum allocation size that will be requested from the memory provider. + size_t slab_min_size; + + // Allocations up to this limit will be subject to chunking/pooling + size_t max_poolable_size; + + // When pooling, each bucket will hold a max of 'capacity' unfreed slabs + size_t capacity; + + // Holds the minimum bucket size valid for allocation of a memory type. + // This value must be a power of 2. + size_t min_bucket_size; + + // Holds size of the pool managed by the allocator. + size_t cur_pool_size; + + // Whether to print pool usage statistics + int pool_trace; + + // Memory limits that can be shared between multiple pool instances, + // i.e. if multiple pools use the same shared_limits sum of those pools' + // sizes cannot exceed max_size. + umf_disjoint_pool_shared_limits_handle_t shared_limits; + + // Name used in traces + char *name; +} umf_disjoint_pool_params_t; + +typedef struct disjoint_pool_t { + // Keep the list of known slabs to quickly find required one during the + // free() + critnib *known_slabs; // (void *, slab_t *) + + // Handle to the memory provider + umf_memory_provider_handle_t provider; + + // Array of bucket_t* + bucket_t **buckets; + size_t buckets_num; + + // Configuration for this instance + umf_disjoint_pool_params_t params; + + umf_disjoint_pool_shared_limits_handle_t default_shared_limits; + + // Used in algorithm for finding buckets + size_t min_bucket_size_exp; + + // Coarse-grain allocation min alignment + size_t provider_min_page_size; +} disjoint_pool_t; + +#endif // UMF_POOL_DISJOINT_INTERNAL_H diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h index d8ea9bf6a..7824e74af 100644 --- a/src/utils/utils_common.h +++ b/src/utils/utils_common.h @@ -38,6 +38,8 @@ typedef enum umf_purge_advise_t { expression; \ } while (0) +#define IS_POWER_OF_2(value) ((value) != 0 && ((value) & ((value)-1)) == 0) + #define IS_ALIGNED(value, align) \ ((align == 0 || (((value) & ((align)-1)) == 0))) #define IS_NOT_ALIGNED(value, align) \ diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 287f5d12a..910c859b0 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,6 +11,7 @@ #define UMF_UTILS_CONCURRENCY_H 1 #include +#include #ifdef _WIN32 #include @@ -45,11 +46,27 @@ typedef struct utils_mutex_t { } utils_mutex_t; size_t utils_mutex_get_size(void); -utils_mutex_t *utils_mutex_init(void *ptr); +utils_mutex_t *utils_mutex_init(utils_mutex_t *ptr); void utils_mutex_destroy_not_free(utils_mutex_t *m); int utils_mutex_lock(utils_mutex_t *mutex); int utils_mutex_unlock(utils_mutex_t *mutex); +typedef struct utils_rwlock_t { +#ifdef _WIN32 + // Slim Read/Wrtiter lock + SRWLOCK lock; +#else + pthread_rwlock_t rwlock; +#endif +} utils_rwlock_t; + +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *ptr); +void utils_rwlock_destroy_not_free(utils_rwlock_t *rwlock); +int utils_read_lock(utils_rwlock_t *rwlock); +int utils_write_lock(utils_rwlock_t *rwlock); +int utils_read_unlock(utils_rwlock_t *rwlock); +int utils_write_unlock(utils_rwlock_t *rwlock); + #if defined(_WIN32) #define UTIL_ONCE_FLAG INIT_ONCE #define UTIL_ONCE_FLAG_INIT INIT_ONCE_STATIC_INIT diff --git a/src/utils/utils_posix_concurrency.c b/src/utils/utils_posix_concurrency.c index 531e09c10..44a317361 100644 --- a/src/utils/utils_posix_concurrency.c +++ b/src/utils/utils_posix_concurrency.c @@ -11,10 +11,11 @@ #include #include "utils_concurrency.h" +#include "utils_log.h" size_t utils_mutex_get_size(void) { return sizeof(pthread_mutex_t); } -utils_mutex_t *utils_mutex_init(void *ptr) { +utils_mutex_t *utils_mutex_init(utils_mutex_t *ptr) { pthread_mutex_t *mutex = (pthread_mutex_t *)ptr; int ret = pthread_mutex_init(mutex, NULL); return ret == 0 ? ((utils_mutex_t *)mutex) : NULL; @@ -23,7 +24,9 @@ utils_mutex_t *utils_mutex_init(void *ptr) { void utils_mutex_destroy_not_free(utils_mutex_t *m) { pthread_mutex_t *mutex = (pthread_mutex_t *)m; int ret = pthread_mutex_destroy(mutex); - (void)ret; // TODO: add logging + if (ret) { + LOG_ERR("pthread_mutex_destroy failed"); + } } int utils_mutex_lock(utils_mutex_t *m) { @@ -37,3 +40,33 @@ int utils_mutex_unlock(utils_mutex_t *m) { void utils_init_once(UTIL_ONCE_FLAG *flag, void (*oneCb)(void)) { pthread_once(flag, oneCb); } + +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *ptr) { + pthread_rwlock_t *rwlock = (pthread_rwlock_t *)ptr; + int ret = pthread_rwlock_init(rwlock, NULL); + return ret == 0 ? ((utils_rwlock_t *)rwlock) : NULL; +} + +void utils_rwlock_destroy_not_free(utils_rwlock_t *ptr) { + pthread_rwlock_t *rwlock = (pthread_rwlock_t *)ptr; + int ret = pthread_rwlock_destroy(rwlock); + if (ret) { + LOG_ERR("pthread_rwlock_destroy failed"); + } +} + +int utils_read_lock(utils_rwlock_t *rwlock) { + return pthread_rwlock_rdlock((pthread_rwlock_t *)rwlock); +} + +int utils_write_lock(utils_rwlock_t *rwlock) { + return pthread_rwlock_wrlock((pthread_rwlock_t *)rwlock); +} + +int utils_read_unlock(utils_rwlock_t *rwlock) { + return pthread_rwlock_unlock((pthread_rwlock_t *)rwlock); +} + +int utils_write_unlock(utils_rwlock_t *rwlock) { + return pthread_rwlock_unlock((pthread_rwlock_t *)rwlock); +} diff --git a/src/utils/utils_windows_concurrency.c b/src/utils/utils_windows_concurrency.c index e2cc574a9..faa302be3 100644 --- a/src/utils/utils_windows_concurrency.c +++ b/src/utils/utils_windows_concurrency.c @@ -11,35 +11,61 @@ size_t utils_mutex_get_size(void) { return sizeof(utils_mutex_t); } -utils_mutex_t *utils_mutex_init(void *ptr) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)ptr; - InitializeCriticalSection(&mutex_internal->lock); - return (utils_mutex_t *)mutex_internal; +utils_mutex_t *utils_mutex_init(utils_mutex_t *mutex) { + InitializeCriticalSection(&mutex->lock); + return mutex; } void utils_mutex_destroy_not_free(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - DeleteCriticalSection(&mutex_internal->lock); + DeleteCriticalSection(&mutex->lock); } int utils_mutex_lock(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - EnterCriticalSection(&mutex_internal->lock); + EnterCriticalSection(&mutex->lock); - if (mutex_internal->lock.RecursionCount > 1) { - LeaveCriticalSection(&mutex_internal->lock); + if (mutex->lock.RecursionCount > 1) { + LeaveCriticalSection(&mutex->lock); /* deadlock detected */ - return -1; + abort(); } return 0; } int utils_mutex_unlock(utils_mutex_t *mutex) { - utils_mutex_t *mutex_internal = (utils_mutex_t *)mutex; - LeaveCriticalSection(&mutex_internal->lock); + LeaveCriticalSection(&mutex->lock); return 0; } +utils_rwlock_t *utils_rwlock_init(utils_rwlock_t *rwlock) { + InitializeSRWLock(&rwlock->lock); + return 0; // never fails +} + +void utils_rwlock_destroy_not_free(utils_rwlock_t *rwlock) { + // there is no call to destroy SWR lock + (void)rwlock; +} + +int utils_read_lock(utils_rwlock_t *rwlock) { + AcquireSRWLockShared(&rwlock->lock); + return 0; // never fails +} + +int utils_write_lock(utils_rwlock_t *rwlock) { + AcquireSRWLockExclusive(&rwlock->lock); + return 0; // never fails +} + +int utils_read_unlock(utils_rwlock_t *rwlock) { + ReleaseSRWLockShared(&rwlock->lock); + return 0; // never fails +} + +int utils_write_unlock(utils_rwlock_t *rwlock) { + ReleaseSRWLockExclusive(&rwlock->lock); + return 0; // never fails +} + static BOOL CALLBACK initOnceCb(PINIT_ONCE InitOnce, PVOID Parameter, PVOID *lpContext) { (void)InitOnce; // unused diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index cdbe2425f..ecdde95e1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -71,10 +71,6 @@ function(build_umf_test) set(CPL_DEFS ${CPL_DEFS} UMF_POOL_SCALABLE_ENABLED=1) endif() - if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(CPL_DEFS ${CPL_DEFS} UMF_POOL_DISJOINT_ENABLED=1) - endif() - set(TEST_LIBS umf_test_common ${ARG_LIBS} @@ -192,10 +188,6 @@ if(UMF_BUILD_SHARED_LIBRARY) endif() endif() -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - set(LIB_DISJOINT_POOL disjoint_pool) -endif() - if(UMF_BUILD_SHARED_LIBRARY) # if build as shared library, ba symbols won't be visible in tests set(BA_SOURCES_FOR_TEST ${BA_SOURCES}) @@ -237,32 +229,29 @@ add_umf_test( SRCS coarse_lib.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST} coarse) -if(UMF_BUILD_LIBUMF_POOL_DISJOINT) - add_umf_test( - NAME disjointPool - SRCS pools/disjoint_pool.cpp malloc_compliance_tests.cpp - LIBS disjoint_pool) +add_umf_test( + NAME disjoint_pool + SRCS pools/disjoint_pool.cpp malloc_compliance_tests.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +add_umf_test( + NAME c_api_disjoint_pool + SRCS c_api/disjoint_pool.c ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +if(LINUX AND (NOT UMF_DISABLE_HWLOC)) + # this test uses the file provider add_umf_test( - NAME c_api_disjoint_pool - SRCS c_api/disjoint_pool.c - LIBS disjoint_pool) - if(LINUX AND (NOT UMF_DISABLE_HWLOC)) - # this test uses the file provider - add_umf_test( - NAME disjointPoolFileProv - SRCS disjointPoolFileProv.cpp - LIBS disjoint_pool) - endif() + NAME disjoint_pool_file_prov + SRCS disjoint_pool_file_prov.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) endif() -if(UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_POOL_JEMALLOC_ENABLED +if(UMF_POOL_JEMALLOC_ENABLED AND UMF_POOL_SCALABLE_ENABLED AND (NOT UMF_DISABLE_HWLOC)) - add_umf_test( - NAME c_api_multi_pool - SRCS c_api/multi_pool.c - LIBS disjoint_pool) + add_umf_test(NAME c_api_multi_pool SRCS c_api/multi_pool.c) endif() if(UMF_POOL_JEMALLOC_ENABLED AND (NOT UMF_DISABLE_HWLOC)) @@ -293,7 +282,7 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME provider_os_memory SRCS provider_os_memory.cpp ${BA_SOURCES_FOR_TEST} - LIBS ${UMF_UTILS_FOR_TEST} ${LIB_DISJOINT_POOL}) + LIBS ${UMF_UTILS_FOR_TEST}) add_umf_test( NAME provider_os_memory_multiple_numa_nodes SRCS provider_os_memory_multiple_numa_nodes.cpp @@ -618,37 +607,33 @@ if(LINUX) # TODO add IPC tests for CUDA - if(UMF_BUILD_GPU_TESTS - AND UMF_BUILD_LEVEL_ZERO_PROVIDER - AND UMF_BUILD_LIBUMF_POOL_DISJOINT) + if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) build_umf_test( NAME ipc_level_zero_prov_consumer SRCS providers/ipc_level_zero_prov_consumer.c common/ipc_common.c providers/ipc_level_zero_prov_common.c ${UMF_UTILS_DIR}/utils_level_zero.cpp - LIBS ze_loader disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS ze_loader ${UMF_UTILS_FOR_TEST}) build_umf_test( NAME ipc_level_zero_prov_producer SRCS providers/ipc_level_zero_prov_producer.c common/ipc_common.c providers/ipc_level_zero_prov_common.c ${UMF_UTILS_DIR}/utils_level_zero.cpp - LIBS ze_loader disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS ze_loader ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_level_zero_prov SRC_DIR providers) endif() - if(UMF_BUILD_GPU_TESTS - AND UMF_BUILD_CUDA_PROVIDER - AND UMF_BUILD_LIBUMF_POOL_DISJOINT) + if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) build_umf_test( NAME ipc_cuda_prov_consumer SRCS providers/ipc_cuda_prov_consumer.c common/ipc_common.c providers/ipc_cuda_prov_common.c providers/cuda_helpers.cpp - LIBS cuda disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS cuda ${UMF_UTILS_FOR_TEST}) build_umf_test( NAME ipc_cuda_prov_producer SRCS providers/ipc_cuda_prov_producer.c common/ipc_common.c providers/ipc_cuda_prov_common.c providers/cuda_helpers.cpp - LIBS cuda disjoint_pool ${UMF_UTILS_FOR_TEST}) + LIBS cuda ${UMF_UTILS_FOR_TEST}) add_umf_ipc_test(TEST ipc_cuda_prov SRC_DIR providers) endif() else() @@ -701,41 +686,34 @@ if(LINUX ) endif() - if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLES ${EXAMPLES} level_zero_shared_memory) else() message( STATUS - "GPU level zero shared memory example requires UMF_BUILD_GPU_EXAMPLES, " - "UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT " - "to be turned ON - skipping") + "GPU level zero shared memory example requires UMF_BUILD_GPU_EXAMPLES and " + "UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping") endif() if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT AND UMF_BUILD_CUDA_PROVIDER AND UMF_CUDA_ENABLED) set(EXAMPLES ${EXAMPLES} cuda_shared_memory) else() message( STATUS - "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES, " - "UMF_BUILD_CUDA_PROVIDER, UMF_BUILD_LIBUMF_POOL_DISJOINT " - "to be turned ON and installed CUDA libraries - skipping") + "GPU CUDA shared memory example requires UMF_BUILD_GPU_EXAMPLES " + "and UMF_BUILD_CUDA_PROVIDER to be turned ON and installed CUDA " + "libraries - skipping") endif() # TODO add IPC examples for CUDA - if(UMF_BUILD_GPU_EXAMPLES - AND UMF_BUILD_LIBUMF_POOL_DISJOINT - AND UMF_BUILD_LEVEL_ZERO_PROVIDER) + if(UMF_BUILD_GPU_EXAMPLES AND UMF_BUILD_LEVEL_ZERO_PROVIDER) set(EXAMPLES ${EXAMPLES} ipc_level_zero) else() message( - STATUS - "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES, UMF_BUILD_LEVEL_ZERO_PROVIDER and UMF_BUILD_LIBUMF_POOL_DISJOINT to be turned ON - skipping" - ) + STATUS "IPC Level Zero example requires UMF_BUILD_GPU_EXAMPLES and " + "UMF_BUILD_LEVEL_ZERO_PROVIDER to be turned ON - skipping") endif() if(UMF_POOL_SCALABLE_ENABLED) diff --git a/test/c_api/disjoint_pool.c b/test/c_api/disjoint_pool.c index 4d4634def..b529497c8 100644 --- a/test/c_api/disjoint_pool.c +++ b/test/c_api/disjoint_pool.c @@ -1,10 +1,11 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include -#include "pool_disjoint.h" +#include + #include "provider_null.h" #include "test_helpers.h" #include "test_ut_asserts.h" diff --git a/test/disjointPoolFileProv.cpp b/test/disjoint_pool_file_prov.cpp similarity index 99% rename from test/disjointPoolFileProv.cpp rename to test/disjoint_pool_file_prov.cpp index 383487a87..b874d2a49 100644 --- a/test/disjointPoolFileProv.cpp +++ b/test/disjoint_pool_file_prov.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index 025f546be..dad960187 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -4,9 +4,11 @@ #include +#include + #include "pool.hpp" +#include "pool/pool_disjoint_internal.h" #include "poolFixtures.hpp" -#include "pool_disjoint.h" #include "provider.hpp" #include "provider_null.h" #include "provider_trace.h" @@ -57,11 +59,130 @@ umf_result_t poolConfigDestroy(void *config) { using umf_test::test; using namespace umf_test; +TEST_F(test, internals) { + static umf_result_t expectedResult = UMF_RESULT_SUCCESS; + struct memory_provider : public umf_test::provider_base_t { + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); + return UMF_RESULT_SUCCESS; + } + + umf_result_t free(void *ptr, [[maybe_unused]] size_t size) noexcept { + // do the actual free only when we expect the success + if (expectedResult == UMF_RESULT_SUCCESS) { + umf_ba_global_free(ptr); + } + return expectedResult; + } + + umf_result_t + get_min_page_size([[maybe_unused]] void *ptr, + [[maybe_unused]] size_t *pageSize) noexcept { + *pageSize = 1024; + return UMF_RESULT_SUCCESS; + } + }; + umf_memory_provider_ops_t provider_ops = + umf::providerMakeCOps(); + + auto providerUnique = + wrapProviderUnique(createProviderChecked(&provider_ops, nullptr)); + + umf_memory_provider_handle_t provider_handle; + provider_handle = providerUnique.get(); + + umf_disjoint_pool_params_handle_t params = + (umf_disjoint_pool_params_handle_t)defaultPoolConfig(); + // set to maximum tracing + params->pool_trace = 3; + params->max_poolable_size = 1024 * 1024; + + // in "internals" test we use ops interface to directly manipulate the pool + // structure + umf_memory_pool_ops_t *ops = umfDisjointPoolOps(); + EXPECT_NE(ops, nullptr); + + disjoint_pool_t *pool; + umf_result_t res = ops->initialize(provider_handle, params, (void **)&pool); + EXPECT_EQ(res, UMF_RESULT_SUCCESS); + EXPECT_NE(pool, nullptr); + EXPECT_EQ(pool->provider_min_page_size, 1024); + + // check buckets sizes + size_t expected_size = DEFAULT_DISJOINT_MIN_BUCKET_SIZE; + EXPECT_EQ(pool->buckets[0]->size, expected_size); + EXPECT_EQ(pool->buckets[pool->buckets_num - 1]->size, + (size_t)1 << 31); // 2GB + for (size_t i = 0; i < pool->buckets_num; i++) { + bucket_t *bucket = pool->buckets[i]; + EXPECT_NE(bucket, nullptr); + EXPECT_EQ(bucket->size, expected_size); + + // assuming DEFAULT_DISJOINT_MIN_BUCKET_SIZE = 64, expected bucket + // sizes are: 64, 96, 128, 192, 256, ..., 2GB + if (i % 2 == 0) { + expected_size += expected_size / 2; + } else { + expected_size = DEFAULT_DISJOINT_MIN_BUCKET_SIZE << ((i + 1) / 2); + } + } + + // test small allocations + size_t size = 8; + void *ptr = ops->malloc(pool, size); + EXPECT_NE(ptr, nullptr); + + // get bucket - because of small size this should be the first bucket in + // the pool + bucket_t *bucket = pool->buckets[0]; + EXPECT_NE(bucket, nullptr); + + // check bucket stats + EXPECT_EQ(bucket->alloc_count, 1); + + // first allocation will always use external memory (newly added to the + // pool) and this is counted as allocation from the outside of the pool + EXPECT_EQ(bucket->alloc_pool_count, 0); + EXPECT_EQ(bucket->curr_slabs_in_use, 1); + + // check slab - there should be only single slab allocated + EXPECT_NE(bucket->available_slabs, nullptr); + EXPECT_EQ(bucket->available_slabs_num, 1); + EXPECT_EQ(bucket->available_slabs->next, nullptr); + slab_t *slab = bucket->available_slabs->val; + + // check slab stats + EXPECT_GE(slab->slab_size, params->slab_min_size); + EXPECT_GE(slab->num_chunks_total, slab->slab_size / bucket->size); + + // check allocation in slab + EXPECT_EQ(slab->chunks[0], true); + EXPECT_EQ(slab->chunks[1], false); + EXPECT_EQ(slab->first_free_chunk_idx, 1); + + // TODO: + // * multiple alloc + free from single bucket + // * alignments + // * full slab alloc + // * slab overflow + // * chunked slabs + // * multiple alloc + free from different buckets + // * alloc something outside pool (> MaxPoolableSize) + // * test capacity + // * check minBucketSize + // * test large objects + // * check available_slabs_num + + // cleanup + ops->finalize(pool); + umfDisjointPoolParamsDestroy(params); +} + TEST_F(test, freeErrorPropagation) { static umf_result_t expectedResult = UMF_RESULT_SUCCESS; struct memory_provider : public umf_test::provider_base_t { - umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { - *ptr = umf_ba_global_alloc(size); + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); return UMF_RESULT_SUCCESS; } @@ -117,8 +238,8 @@ TEST_F(test, sharedLimits) { static size_t numFrees = 0; struct memory_provider : public umf_test::provider_base_t { - umf_result_t alloc(size_t size, size_t, void **ptr) noexcept { - *ptr = umf_ba_global_alloc(size); + umf_result_t alloc(size_t size, size_t alignment, void **ptr) noexcept { + *ptr = umf_ba_global_aligned_alloc(size, alignment); numAllocs++; return UMF_RESULT_SUCCESS; } diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index ddc44548e..5b647b642 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -9,10 +9,8 @@ #include "test_helpers.h" #include -#include -#ifdef UMF_POOL_DISJOINT_ENABLED #include -#endif +#include #ifdef UMF_POOL_JEMALLOC_ENABLED #include #endif @@ -428,8 +426,6 @@ umf_result_t destroyOsMemoryProviderParamsShared(void *params) { HostMemoryAccessor hostAccessor; -#ifdef UMF_POOL_DISJOINT_ENABLED - void *createDisjointPoolParams() { umf_disjoint_pool_params_handle_t params = nullptr; umf_result_t res = umfDisjointPoolParamsCreate(¶ms); @@ -465,14 +461,10 @@ umf_result_t destroyDisjointPoolParams(void *params) { static_cast(params)); } -#endif - static std::vector ipcTestParamsList = { -#ifdef UMF_POOL_DISJOINT_ENABLED {umfDisjointPoolOps(), createDisjointPoolParams, destroyDisjointPoolParams, umfOsMemoryProviderOps(), createOsMemoryProviderParamsShared, destroyOsMemoryProviderParamsShared, &hostAccessor}, -#endif #ifdef UMF_POOL_JEMALLOC_ENABLED {umfJemallocPoolOps(), nullptr, nullptr, umfOsMemoryProviderOps(), createOsMemoryProviderParamsShared, destroyOsMemoryProviderParamsShared, diff --git a/test/supp/drd-umf_test-disjoint_pool.supp b/test/supp/drd-umf_test-disjoint_pool.supp new file mode 100644 index 000000000..24a44b93d --- /dev/null +++ b/test/supp/drd-umf_test-disjoint_pool.supp @@ -0,0 +1,7 @@ +{ + False-positive ConflictingAccess in critnib_insert + drd:ConflictingAccess + fun:store + fun:critnib_insert + ... +} diff --git a/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp b/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp deleted file mode 100644 index 2f669eb31..000000000 --- a/test/supp/helgrind-umf_test-disjointCoarseMallocPool.supp +++ /dev/null @@ -1,24 +0,0 @@ -{ - Incompatibility with helgrind's implementation (pthread_mutex_lock with a pthread_rwlock_t* argument) - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZL20__gthread_mutex_lockP15pthread_mutex_t - ... -} - -{ - Incompatibility with helgrind's implementation (pthread_mutex_unlock with a pthread_rwlock_t* argument) - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZL22__gthread_mutex_unlockP15pthread_mutex_t - ... -} - -{ - Incompatibility with helgrind's implementation (lock order "0xA before 0xB" violated) - Helgrind:LockOrder - obj:*vgpreload_helgrind-amd64-linux.so - fun:_ZStL23__glibcxx_rwlock_wrlockP16pthread_rwlock_t - fun:_ZNSt22__shared_mutex_pthread4lockEv - ... -} diff --git a/test/supp/helgrind-umf_test-disjointPool.supp b/test/supp/helgrind-umf_test-disjoint_pool.supp similarity index 53% rename from test/supp/helgrind-umf_test-disjointPool.supp rename to test/supp/helgrind-umf_test-disjoint_pool.supp index 3ada32736..929674e8e 100644 --- a/test/supp/helgrind-umf_test-disjointPool.supp +++ b/test/supp/helgrind-umf_test-disjoint_pool.supp @@ -29,25 +29,9 @@ } { - Incompatibility with helgrind's implementation ("pthread_rwlock_{rd,rw}lock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_wrlock*pthread_rwlock_t - ... -} - -{ - Incompatibility with helgrind's implementation ("pthread_rwlock_unlock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_unlock*pthread_rwlock_t - ... -} - -{ - Incompatibility with helgrind's implementation ("pthread_rwlock_{rd,rw}lock with a pthread_mutex_t* argument") - Helgrind:Misc - obj:*vgpreload_helgrind-amd64-linux.so - fun:*glibcxx_rwlock_rdlock*pthread_rwlock_t* + False-positive Race in critnib_insert + Helgrind:Race + fun:store + fun:critnib_insert ... } diff --git a/test/test_installation.py b/test/test_installation.py index b5dd676dc..ef30ac759 100644 --- a/test/test_installation.py +++ b/test/test_installation.py @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -278,11 +278,6 @@ def parse_arguments(self) -> argparse.Namespace: action="store_true", help="Add this argument if the proxy library should be built together with the UMF library", ) - self.parser.add_argument( - "--disjoint-pool", - action="store_true", - help="Add this argument if the UMF was built with Disjoint Pool enabled", - ) self.parser.add_argument( "--umf-version", action="store", @@ -299,8 +294,6 @@ def run(self) -> None: build_dir = Path(workspace_dir, self.args.build_dir) install_dir = Path(workspace_dir, self.args.install_dir) pools = [] - if self.args.disjoint_pool: - pools.append("disjoint_pool") umf_version = Version(self.args.umf_version) From 41429f486e13712af242bc21330a121d20f5d74f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Mon, 10 Feb 2025 14:58:49 +0100 Subject: [PATCH 158/466] reenable disjoint pool benchmark --- benchmark/benchmark.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index ad29e9029..401b06d26 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -154,13 +154,11 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix) ->Apply(&default_multiple_alloc_fix_size); -// TODO: debug why this crashes -/*UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) ->Apply(&default_multiple_alloc_uniform_size); -*/ #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, From fd56adcd6e2993e99d002916289f5f25b6f2df96 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Fri, 7 Feb 2025 15:42:45 +0100 Subject: [PATCH 159/466] fix all sizeof MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index e2288e49e..267791333 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -87,12 +87,12 @@ static slab_t *create_slab(bucket_t *bucket, bool full_size) { } else { slab->num_chunks_total = bucket_slab_min_size(bucket) / bucket->size; slab->chunks = - umf_ba_global_alloc(sizeof(bool) * slab->num_chunks_total); + umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total); if (slab->chunks == NULL) { LOG_ERR("allocation of slab chunks failed!"); goto free_slab_iter; } - memset(slab->chunks, 0, sizeof(bool) * slab->num_chunks_total); + memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total); } // if slab_min_size is not a multiple of bucket size, we would have some // padding at the end of the slab @@ -703,8 +703,8 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { disjoint_pool->buckets_num += 2; } - disjoint_pool->buckets = - umf_ba_global_alloc(sizeof(bucket_t *) * disjoint_pool->buckets_num); + disjoint_pool->buckets = umf_ba_global_alloc( + sizeof(*disjoint_pool->buckets) * disjoint_pool->buckets_num); int i = 0; Size1 = ts1; @@ -964,8 +964,7 @@ umf_memory_pool_ops_t *umfDisjointPoolOps(void) { umf_disjoint_pool_shared_limits_t * umfDisjointPoolSharedLimitsCreate(size_t max_size) { - umf_disjoint_pool_shared_limits_t *ptr = - umf_ba_global_alloc(sizeof(umf_disjoint_pool_shared_limits_t)); + umf_disjoint_pool_shared_limits_t *ptr = umf_ba_global_alloc(sizeof(*ptr)); //umf_ba_global_alloc(sizeof(*ptr)); ptr->max_size = max_size; ptr->total_size = 0; @@ -1109,7 +1108,7 @@ umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - char *newName = umf_ba_global_alloc(sizeof(char) * (strlen(name) + 1)); + char *newName = umf_ba_global_alloc(sizeof(*newName) * (strlen(name) + 1)); if (newName == NULL) { LOG_ERR("cannot allocate memory for disjoint pool name"); return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; From a74fcb404a1d421de5548d5b2392c3efc2ab56f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Fri, 7 Feb 2025 15:46:41 +0100 Subject: [PATCH 160/466] add missing error check for allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 267791333..bf1a841a1 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -965,7 +965,10 @@ umf_memory_pool_ops_t *umfDisjointPoolOps(void) { umf_disjoint_pool_shared_limits_t * umfDisjointPoolSharedLimitsCreate(size_t max_size) { umf_disjoint_pool_shared_limits_t *ptr = umf_ba_global_alloc(sizeof(*ptr)); - //umf_ba_global_alloc(sizeof(*ptr)); + if (ptr == NULL) { + LOG_ERR("cannot allocate memory for disjoint pool shared limits"); + return NULL; + } ptr->max_size = max_size; ptr->total_size = 0; return ptr; From 9ec909e01506fdcf5027cf36a411594c96644190 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Fri, 7 Feb 2025 15:51:28 +0100 Subject: [PATCH 161/466] remove not needed forward declarations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index bf1a841a1..b4488ad52 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -16,16 +16,11 @@ #include "utils_sanitizers.h" // Forward declarations -static slab_t *create_slab(bucket_t *bucket, bool full_size); -static void destroy_slab(slab_t *slab); - static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); static bool bucket_can_pool(bucket_t *bucket); static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool); static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool); -static slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, - bool *from_pool); static __TLS umf_result_t TLS_last_allocation_error; From 75dea9524d5a3146403319c54ad533687e97f007 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Fri, 7 Feb 2025 17:17:07 +0100 Subject: [PATCH 162/466] remove distiguation between "chunked slab" and "full slab" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead having two modes - we can have only chunked slabs and full slabs are just a chunked slab with one chunk. This removes extra complexity in the code. Should not have performance impact, as we added few extra steps for big allocations, but removed extra branch in the code. Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 124 ++++++++------------------------------- 1 file changed, 23 insertions(+), 101 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index b4488ad52..9cfae16fe 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -18,7 +18,7 @@ // Forward declarations static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); static bool bucket_can_pool(bucket_t *bucket); -static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool); +static void bucket_decrement_pool(bucket_t *bucket); static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool); @@ -52,7 +52,7 @@ static size_t bucket_slab_alloc_size(bucket_t *bucket) { return utils_max(bucket->size, bucket_slab_min_size(bucket)); } -static slab_t *create_slab(bucket_t *bucket, bool full_size) { +static slab_t *create_slab(bucket_t *bucket) { assert(bucket); umf_result_t res = UMF_RESULT_SUCCESS; @@ -76,19 +76,16 @@ static slab_t *create_slab(bucket_t *bucket, bool full_size) { slab->iter->val = slab; slab->iter->prev = slab->iter->next = NULL; - if (full_size) { - slab->num_chunks_total = 0; - slab->chunks = NULL; - } else { - slab->num_chunks_total = bucket_slab_min_size(bucket) / bucket->size; - slab->chunks = - umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total); - if (slab->chunks == NULL) { - LOG_ERR("allocation of slab chunks failed!"); - goto free_slab_iter; - } - memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total); + slab->num_chunks_total = + utils_max(bucket_slab_min_size(bucket) / bucket->size, 1); + slab->chunks = + umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total); + if (slab->chunks == NULL) { + LOG_ERR("allocation of slab chunks failed!"); + goto free_slab_iter; } + memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total); + // if slab_min_size is not a multiple of bucket size, we would have some // padding at the end of the slab slab->slab_size = bucket_slab_alloc_size(bucket); @@ -157,9 +154,6 @@ static size_t slab_find_first_available_chunk_idx(const slab_t *slab) { } static void *slab_get_chunk(slab_t *slab) { - // slab has to be allocated in chunk mode - assert(slab->chunks && slab->num_chunks_total > 0); - // free chunk must exist, otherwise we would have allocated another slab const size_t chunk_idx = slab_find_first_available_chunk_idx(slab); assert(chunk_idx != SIZE_MAX); @@ -356,8 +350,8 @@ static size_t bucket_chunk_cut_off(bucket_t *bucket) { return bucket_slab_min_size(bucket) / 2; } -static slab_t *bucket_create_slab(bucket_t *bucket, bool full_size) { - slab_t *slab = create_slab(bucket, full_size); +static slab_t *bucket_create_slab(bucket_t *bucket) { + slab_t *slab = create_slab(bucket); if (slab == NULL) { LOG_ERR("create_slab failed!") return NULL; @@ -377,69 +371,20 @@ static slab_t *bucket_create_slab(bucket_t *bucket, bool full_size) { return slab; } -static slab_list_item_t *bucket_get_avail_full_slab(bucket_t *bucket, - bool *from_pool) { - // return a slab that will be used for a single allocation - if (bucket->available_slabs == NULL) { - bucket_create_slab(bucket, true /* full size */); - *from_pool = false; - } else { - bucket_decrement_pool(bucket, from_pool); - } - - return bucket->available_slabs; -} - -// NOTE: this function must be called under bucket->bucket_lock -static void *bucket_get_free_slab(bucket_t *bucket, bool *from_pool) { - slab_list_item_t *slab_it = bucket_get_avail_full_slab(bucket, from_pool); - if (slab_it == NULL) { - return NULL; - } - - slab_t *slab = slab_it->val; - void *ptr = slab_get(slab); - - DL_DELETE(bucket->available_slabs, slab_it); - bucket->available_slabs_num--; - slab_it->prev = NULL; - DL_PREPEND(bucket->unavailable_slabs, slab_it); - - return ptr; -} - -// NOTE: this function must be called under bucket->bucket_lock -static void bucket_free_slab(bucket_t *bucket, slab_t *slab, bool *to_pool) { - slab_list_item_t *slab_it = slab->iter; - assert(slab_it->val != NULL); - *to_pool = bucket_can_pool(bucket); - if (*to_pool) { - DL_DELETE(bucket->unavailable_slabs, slab_it); - slab_it->prev = NULL; - DL_PREPEND(bucket->available_slabs, slab_it); - bucket->available_slabs_num++; - } else { - slab_unreg(slab_it->val); - DL_DELETE(bucket->unavailable_slabs, slab_it); - destroy_slab(slab_it->val); - } -} - static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool) { if (bucket->available_slabs == NULL) { - bucket_create_slab(bucket, false /* chunked */); + bucket_create_slab(bucket); *from_pool = false; } else { slab_t *slab = bucket->available_slabs->val; + // Allocation from existing slab is treated as from pool for statistics. + *from_pool = true; if (slab->num_chunks_allocated == 0) { // If this was an empty slab, it was in the pool. // Now it is no longer in the pool, so update count. --bucket->chunked_slabs_in_pool; - bucket_decrement_pool(bucket, from_pool); - } else { - // Allocation from existing slab is treated as from pool for statistics. - *from_pool = true; + bucket_decrement_pool(bucket); } } @@ -475,10 +420,7 @@ static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) { in_pool * bucket_slab_alloc_size(bucket); } -static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) { - // If a slab was available in the pool then note that the current pooled - // size has reduced by the size of a slab in this bucket. - *from_pool = true; +static void bucket_decrement_pool(bucket_t *bucket) { bucket_update_stats(bucket, 1, -1); utils_fetch_and_add64(&bucket->shared_limits->total_size, -(long long)bucket_slab_alloc_size(bucket)); @@ -487,13 +429,7 @@ static void bucket_decrement_pool(bucket_t *bucket, bool *from_pool) { static bool bucket_can_pool(bucket_t *bucket) { size_t new_free_slabs_in_bucket; - // check if this bucket is used in chunked form or as full slabs - bool chunked_bucket = bucket->size <= bucket_chunk_cut_off(bucket); - if (chunked_bucket) { - new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; - } else { - new_free_slabs_in_bucket = bucket->available_slabs_num + 1; - } + new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; // we keep at most params.capacity slabs in the pool if (bucket_capacity(bucket) >= new_free_slabs_in_bucket) { @@ -509,9 +445,7 @@ static bool bucket_can_pool(bucket_t *bucket) { if (utils_compare_exchange(&bucket->shared_limits->total_size, &pool_size, &new_pool_size)) { - if (chunked_bucket) { - ++bucket->chunked_slabs_in_pool; - } + ++bucket->chunked_slabs_in_pool; bucket_update_stats(bucket, -1, 1); return true; @@ -614,11 +548,7 @@ static void *disjoint_pool_allocate(disjoint_pool_t *pool, size_t size) { utils_mutex_lock(&bucket->bucket_lock); bool from_pool = false; - if (size > bucket_chunk_cut_off(bucket)) { - ptr = bucket_get_free_slab(bucket, &from_pool); - } else { - ptr = bucket_get_free_chunk(bucket, &from_pool); - } + ptr = bucket_get_free_chunk(bucket, &from_pool); if (ptr == NULL) { TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -799,11 +729,7 @@ void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { utils_mutex_lock(&bucket->bucket_lock); - if (aligned_size > bucket_chunk_cut_off(bucket)) { - ptr = bucket_get_free_slab(bucket, &from_pool); - } else { - ptr = bucket_get_free_chunk(bucket, &from_pool); - } + ptr = bucket_get_free_chunk(bucket, &from_pool); if (ptr == NULL) { TLS_last_allocation_error = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; @@ -889,11 +815,7 @@ umf_result_t disjoint_pool_free(void *pool, void *ptr) { utils_mutex_lock(&bucket->bucket_lock); utils_annotate_memory_inaccessible(ptr, bucket->size); - if (bucket->size <= bucket_chunk_cut_off(bucket)) { - bucket_free_chunk(bucket, ptr, slab, &to_pool); - } else { - bucket_free_slab(bucket, slab, &to_pool); - } + bucket_free_chunk(bucket, ptr, slab, &to_pool); if (disjoint_pool->params.pool_trace > 1) { bucket->free_count++; From 6bb22d89ae2cfe4d7500678a5c3fc903aaaea488 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Mon, 10 Feb 2025 17:26:10 +0100 Subject: [PATCH 163/466] do not allocate slab->iter dynamically MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no reason to do so. This is only extra performance overhead. Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 21 ++++++--------------- src/pool/pool_disjoint_internal.h | 12 ++++++------ 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 9cfae16fe..a1abbc414 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -68,13 +68,8 @@ static slab_t *create_slab(bucket_t *bucket) { slab->first_free_chunk_idx = 0; slab->bucket = bucket; - slab->iter = umf_ba_global_alloc(sizeof(*slab->iter)); - if (slab->iter == NULL) { - LOG_ERR("allocation of new slab iter failed!"); - goto free_slab; - } - slab->iter->val = slab; - slab->iter->prev = slab->iter->next = NULL; + slab->iter.val = slab; + slab->iter.prev = slab->iter.next = NULL; slab->num_chunks_total = utils_max(bucket_slab_min_size(bucket) / bucket->size, 1); @@ -82,7 +77,7 @@ static slab_t *create_slab(bucket_t *bucket) { umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total); if (slab->chunks == NULL) { LOG_ERR("allocation of slab chunks failed!"); - goto free_slab_iter; + goto free_slab; } memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total); @@ -111,9 +106,6 @@ static slab_t *create_slab(bucket_t *bucket) { free_slab_chunks: umf_ba_global_free(slab->chunks); -free_slab_iter: - umf_ba_global_free(slab->iter); - free_slab: umf_ba_global_free(slab); return NULL; @@ -131,7 +123,6 @@ static void destroy_slab(slab_t *slab) { } umf_ba_global_free(slab->chunks); - umf_ba_global_free(slab->iter); umf_ba_global_free(slab); } @@ -296,7 +287,7 @@ static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, // in case if the slab was previously full and now has single available // chunk, it should be moved to the list of available slabs if (slab_get_num_free_chunks(slab) == 1) { - slab_list_item_t *slab_it = slab->iter; + slab_list_item_t *slab_it = &slab->iter; assert(slab_it->val != NULL); DL_DELETE(bucket->unavailable_slabs, slab_it); DL_PREPEND(bucket->available_slabs, slab_it); @@ -312,7 +303,7 @@ static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, *to_pool = bucket_can_pool(bucket); if (*to_pool == false) { // remove slab - slab_list_item_t *slab_it = slab->iter; + slab_list_item_t *slab_it = &slab->iter; assert(slab_it->val != NULL); slab_unreg(slab_it->val); DL_DELETE(bucket->available_slabs, slab_it); @@ -364,7 +355,7 @@ static slab_t *bucket_create_slab(bucket_t *bucket) { return NULL; } - DL_PREPEND(bucket->available_slabs, slab->iter); + DL_PREPEND(bucket->available_slabs, &slab->iter); bucket->available_slabs_num++; bucket_update_stats(bucket, 1, 0); diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h index 56a25e611..3d656689c 100644 --- a/src/pool/pool_disjoint_internal.h +++ b/src/pool/pool_disjoint_internal.h @@ -81,6 +81,11 @@ typedef struct bucket_t { size_t max_slabs_in_use; } bucket_t; +typedef struct slab_list_item_t { + slab_t *val; + struct slab_list_item_t *prev, *next; +} slab_list_item_t; + // Represents the allocated memory block of size 'slab_min_size' // Internally, it splits the memory block into chunks. The number of // chunks depends on the size of a Bucket which created the Slab. @@ -107,14 +112,9 @@ typedef struct slab_t { // Store iterator to the corresponding node in avail/unavail list // to achieve O(1) removal - slab_list_item_t *iter; + slab_list_item_t iter; } slab_t; -typedef struct slab_list_item_t { - slab_t *val; - struct slab_list_item_t *prev, *next; -} slab_list_item_t; - typedef struct umf_disjoint_pool_shared_limits_t { size_t max_size; size_t total_size; // requires atomic access From b6181b62a90c42322aad61fa883efbfa5b05a6df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Tue, 11 Feb 2025 15:02:21 +0100 Subject: [PATCH 164/466] rename slab_Reg to pool_register_slab MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this improves redability, and reduces "spaghetti" in the code Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index a1abbc414..090d1dd9e 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -199,9 +199,7 @@ static bool slab_has_avail(const slab_t *slab) { return slab->num_chunks_allocated < slab->num_chunks_total; } -static umf_result_t slab_reg(slab_t *slab) { - bucket_t *bucket = slab->bucket; - disjoint_pool_t *pool = bucket->pool; +static umf_result_t pool_register_slab(disjoint_pool_t *pool, slab_t *slab) { critnib *slabs = pool->known_slabs; // NOTE: changed vs original DisjointPool implementation - currently slab @@ -225,9 +223,7 @@ static umf_result_t slab_reg(slab_t *slab) { return res; } -static umf_result_t slab_unreg(slab_t *slab) { - bucket_t *bucket = slab->bucket; - disjoint_pool_t *pool = bucket->pool; +static umf_result_t pool_unregister_slab(disjoint_pool_t *pool, slab_t *slab) { critnib *slabs = pool->known_slabs; void *slab_addr = slab_get(slab); @@ -305,7 +301,7 @@ static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, // remove slab slab_list_item_t *slab_it = &slab->iter; assert(slab_it->val != NULL); - slab_unreg(slab_it->val); + pool_unregister_slab(bucket->pool, slab_it->val); DL_DELETE(bucket->available_slabs, slab_it); bucket->available_slabs_num--; destroy_slab(slab_it->val); @@ -348,7 +344,7 @@ static slab_t *bucket_create_slab(bucket_t *bucket) { return NULL; } - umf_result_t res = slab_reg(slab); + umf_result_t res = pool_register_slab(bucket->pool, slab); if (res != UMF_RESULT_SUCCESS) { LOG_ERR("slab_reg failed!") destroy_slab(slab); From 368ab19cdc0dfd7a6b2e6b4b00c7b18dfca0fcc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Tue, 11 Feb 2025 16:37:37 +0100 Subject: [PATCH 165/466] Rename bucket_capacity to bucket_ma_pooled_slabs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit this function has nothing releated to capacity. input parameter to disjointpool should also be renamed, but this is topic for the diffrent pull request Signed-off-by: Łukasz Plewa --- src/pool/pool_disjoint.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 090d1dd9e..ef7b3875d 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -378,8 +378,8 @@ static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, return bucket->available_slabs; } -static size_t bucket_capacity(bucket_t *bucket) { - // For buckets used in chunked mode, just one slab in pool is sufficient. +static size_t bucket_max_pooled_slabs(bucket_t *bucket) { + // For small buckets where slabs are split to chunks, just one pooled slab is sufficient. // For larger buckets, the capacity could be more and is adjustable. if (bucket->size <= bucket_chunk_cut_off(bucket)) { return 1; @@ -419,7 +419,7 @@ static bool bucket_can_pool(bucket_t *bucket) { new_free_slabs_in_bucket = bucket->chunked_slabs_in_pool + 1; // we keep at most params.capacity slabs in the pool - if (bucket_capacity(bucket) >= new_free_slabs_in_bucket) { + if (bucket_max_pooled_slabs(bucket) >= new_free_slabs_in_bucket) { size_t pool_size = 0; utils_atomic_load_acquire(&bucket->shared_limits->total_size, &pool_size); From 59eee62efcaaf20d2ed91a1df09f09aca619ca82 Mon Sep 17 00:00:00 2001 From: Weronika Lewandowska Date: Tue, 11 Feb 2025 18:45:20 +0100 Subject: [PATCH 166/466] Init TBB symbols only once #700 --- src/libumf.c | 2 + src/pool/pool_scalable.c | 118 ++++++++++++++++++------------ src/pool/pool_scalable_internal.h | 10 +++ 3 files changed, 83 insertions(+), 47 deletions(-) create mode 100644 src/pool/pool_scalable_internal.h diff --git a/src/libumf.c b/src/libumf.c index 64314f4d3..f8f6cc61f 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -12,6 +12,7 @@ #include "base_alloc_global.h" #include "ipc_cache.h" #include "memspace_internal.h" +#include "pool/pool_scalable_internal.h" #include "provider_cuda_internal.h" #include "provider_level_zero_internal.h" #include "provider_tracking.h" @@ -83,6 +84,7 @@ void umfTearDown(void) { fini_umfTearDown: fini_ze_global_state(); fini_cu_global_state(); + fini_tbb_global_state(); LOG_DEBUG("UMF library finalized"); } } diff --git a/src/pool/pool_scalable.c b/src/pool/pool_scalable.c index 2ee265df8..e1ab3d376 100644 --- a/src/pool/pool_scalable.c +++ b/src/pool/pool_scalable.c @@ -20,6 +20,7 @@ #include "base_alloc_global.h" #include "libumf.h" +#include "pool_scalable_internal.h" #include "utils_common.h" #include "utils_concurrency.h" #include "utils_load_library.h" @@ -33,6 +34,7 @@ static __TLS umf_result_t TLS_last_allocation_error; static __TLS umf_result_t TLS_last_free_error; static const size_t DEFAULT_GRANULARITY = 2 * 1024 * 1024; // 2MB + typedef struct tbb_mem_pool_policy_t { raw_alloc_tbb_type pAlloc; raw_free_tbb_type pFree; @@ -66,7 +68,6 @@ typedef struct tbb_callbacks_t { typedef struct tbb_memory_pool_t { umf_memory_provider_handle_t mem_provider; void *tbb_pool; - tbb_callbacks_t tbb_callbacks; } tbb_memory_pool_t; typedef enum tbb_enums_t { @@ -82,6 +83,10 @@ typedef enum tbb_enums_t { TBB_POOL_SYMBOLS_MAX // it has to be the last one } tbb_enums_t; +static UTIL_ONCE_FLAG tbb_initialized = UTIL_ONCE_FLAG_INIT; +static int tbb_init_result = 0; +static tbb_callbacks_t tbb_callbacks = {0}; + static const char *tbb_symbol[TBB_POOL_SYMBOLS_MAX] = { #ifdef _WIN32 // symbols copied from oneTBB/src/tbbmalloc/def/win64-tbbmalloc.def @@ -109,46 +114,60 @@ static const char *tbb_symbol[TBB_POOL_SYMBOLS_MAX] = { #endif }; -static int init_tbb_callbacks(tbb_callbacks_t *tbb_callbacks) { - assert(tbb_callbacks); - +static void init_tbb_callbacks_once(void) { const char *lib_name = tbb_symbol[TBB_LIB_NAME]; - tbb_callbacks->lib_handle = utils_open_library(lib_name, 0); - if (!tbb_callbacks->lib_handle) { + tbb_callbacks.lib_handle = utils_open_library(lib_name, 0); + if (!tbb_callbacks.lib_handle) { LOG_ERR("%s required by Scalable Pool not found - install TBB malloc " "or make sure it is in the default search paths.", lib_name); - return -1; + tbb_init_result = -1; + return; } - - *(void **)&tbb_callbacks->pool_malloc = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_MALLOC], lib_name); - *(void **)&tbb_callbacks->pool_realloc = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_REALLOC], lib_name); - *(void **)&tbb_callbacks->pool_aligned_malloc = - utils_get_symbol_addr(tbb_callbacks->lib_handle, + *(void **)&tbb_callbacks.pool_malloc = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_MALLOC], lib_name); + *(void **)&tbb_callbacks.pool_realloc = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_REALLOC], lib_name); + *(void **)&tbb_callbacks.pool_aligned_malloc = + utils_get_symbol_addr(tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_ALIGNED_MALLOC], lib_name); - *(void **)&tbb_callbacks->pool_free = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_FREE], lib_name); - *(void **)&tbb_callbacks->pool_create_v1 = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_CREATE_V1], lib_name); - *(void **)&tbb_callbacks->pool_destroy = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_DESTROY], lib_name); - *(void **)&tbb_callbacks->pool_identify = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_IDENTIFY], lib_name); - *(void **)&tbb_callbacks->pool_msize = utils_get_symbol_addr( - tbb_callbacks->lib_handle, tbb_symbol[TBB_POOL_MSIZE], lib_name); - - if (!tbb_callbacks->pool_malloc || !tbb_callbacks->pool_realloc || - !tbb_callbacks->pool_aligned_malloc || !tbb_callbacks->pool_free || - !tbb_callbacks->pool_create_v1 || !tbb_callbacks->pool_destroy || - !tbb_callbacks->pool_identify) { + *(void **)&tbb_callbacks.pool_free = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_FREE], lib_name); + *(void **)&tbb_callbacks.pool_create_v1 = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_CREATE_V1], lib_name); + *(void **)&tbb_callbacks.pool_destroy = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_DESTROY], lib_name); + *(void **)&tbb_callbacks.pool_identify = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_IDENTIFY], lib_name); + *(void **)&tbb_callbacks.pool_msize = utils_get_symbol_addr( + tbb_callbacks.lib_handle, tbb_symbol[TBB_POOL_MSIZE], lib_name); + + if (!tbb_callbacks.pool_malloc || !tbb_callbacks.pool_realloc || + !tbb_callbacks.pool_aligned_malloc || !tbb_callbacks.pool_free || + !tbb_callbacks.pool_create_v1 || !tbb_callbacks.pool_destroy || + !tbb_callbacks.pool_identify) { LOG_FATAL("Could not find all TBB symbols in %s", lib_name); - utils_close_library(tbb_callbacks->lib_handle); - return -1; + if (utils_close_library(tbb_callbacks.lib_handle)) { + LOG_ERR("Could not close %s library", lib_name); + } + tbb_init_result = -1; } +} - return 0; +static int init_tbb_callbacks(void) { + utils_init_once(&tbb_initialized, init_tbb_callbacks_once); + return tbb_init_result; +} + +void fini_tbb_global_state(void) { + if (tbb_callbacks.lib_handle) { + if (!utils_close_library(tbb_callbacks.lib_handle)) { + tbb_callbacks.lib_handle = NULL; + LOG_DEBUG("TBB library closed"); + } else { + LOG_ERR("TBB library cannot be unloaded"); + } + } } static void *tbb_raw_alloc_wrapper(intptr_t pool_id, size_t *raw_bytes) { @@ -264,35 +283,41 @@ static umf_result_t tbb_pool_initialize(umf_memory_provider_handle_t provider, return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - int ret = init_tbb_callbacks(&pool_data->tbb_callbacks); + umf_result_t res = UMF_RESULT_SUCCESS; + int ret = init_tbb_callbacks(); if (ret != 0) { LOG_FATAL("loading TBB symbols failed"); - return UMF_RESULT_ERROR_UNKNOWN; + res = UMF_RESULT_ERROR_UNKNOWN; + goto err_tbb_init; } pool_data->mem_provider = provider; - ret = pool_data->tbb_callbacks.pool_create_v1((intptr_t)pool_data, &policy, - &(pool_data->tbb_pool)); + ret = tbb_callbacks.pool_create_v1((intptr_t)pool_data, &policy, + &(pool_data->tbb_pool)); if (ret != 0 /* TBBMALLOC_OK */) { - return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + res = UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; + goto err_tbb_init; } *pool = (void *)pool_data; - return UMF_RESULT_SUCCESS; + return res; + +err_tbb_init: + umf_ba_global_free(pool_data); + return res; } static void tbb_pool_finalize(void *pool) { tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; - pool_data->tbb_callbacks.pool_destroy(pool_data->tbb_pool); - utils_close_library(pool_data->tbb_callbacks.lib_handle); + tbb_callbacks.pool_destroy(pool_data->tbb_pool); umf_ba_global_free(pool_data); } static void *tbb_malloc(void *pool, size_t size) { tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; TLS_last_allocation_error = UMF_RESULT_SUCCESS; - void *ptr = pool_data->tbb_callbacks.pool_malloc(pool_data->tbb_pool, size); + void *ptr = tbb_callbacks.pool_malloc(pool_data->tbb_pool, size); if (ptr == NULL) { if (TLS_last_allocation_error == UMF_RESULT_SUCCESS) { TLS_last_allocation_error = UMF_RESULT_ERROR_UNKNOWN; @@ -319,8 +344,7 @@ static void *tbb_calloc(void *pool, size_t num, size_t size) { static void *tbb_realloc(void *pool, void *ptr, size_t size) { tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; TLS_last_allocation_error = UMF_RESULT_SUCCESS; - void *new_ptr = - pool_data->tbb_callbacks.pool_realloc(pool_data->tbb_pool, ptr, size); + void *new_ptr = tbb_callbacks.pool_realloc(pool_data->tbb_pool, ptr, size); if (new_ptr == NULL) { if (TLS_last_allocation_error == UMF_RESULT_SUCCESS) { TLS_last_allocation_error = UMF_RESULT_ERROR_UNKNOWN; @@ -334,8 +358,8 @@ static void *tbb_realloc(void *pool, void *ptr, size_t size) { static void *tbb_aligned_malloc(void *pool, size_t size, size_t alignment) { tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; TLS_last_allocation_error = UMF_RESULT_SUCCESS; - void *ptr = pool_data->tbb_callbacks.pool_aligned_malloc( - pool_data->tbb_pool, size, alignment); + void *ptr = + tbb_callbacks.pool_aligned_malloc(pool_data->tbb_pool, size, alignment); if (ptr == NULL) { if (TLS_last_allocation_error == UMF_RESULT_SUCCESS) { TLS_last_allocation_error = UMF_RESULT_ERROR_UNKNOWN; @@ -360,7 +384,7 @@ static umf_result_t tbb_free(void *pool, void *ptr) { utils_annotate_release(pool); tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; - if (pool_data->tbb_callbacks.pool_free(pool_data->tbb_pool, ptr)) { + if (tbb_callbacks.pool_free(pool_data->tbb_pool, ptr)) { return UMF_RESULT_SUCCESS; } @@ -373,7 +397,7 @@ static umf_result_t tbb_free(void *pool, void *ptr) { static size_t tbb_malloc_usable_size(void *pool, void *ptr) { tbb_memory_pool_t *pool_data = (tbb_memory_pool_t *)pool; - return pool_data->tbb_callbacks.pool_msize(pool_data->tbb_pool, ptr); + return tbb_callbacks.pool_msize(pool_data->tbb_pool, ptr); } static umf_result_t tbb_get_last_allocation_error(void *pool) { diff --git a/src/pool/pool_scalable_internal.h b/src/pool/pool_scalable_internal.h new file mode 100644 index 000000000..cfdc668fc --- /dev/null +++ b/src/pool/pool_scalable_internal.h @@ -0,0 +1,10 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +void fini_tbb_global_state(void); From 024b5910df0824611f3d020ea014308555421c5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Fri, 24 Jan 2025 13:49:29 +0100 Subject: [PATCH 167/466] Improve benchmark stability. --- benchmark/benchmark.cpp | 128 ++++-------- benchmark/benchmark.hpp | 384 ++++++++++++++++++++--------------- benchmark/benchmark_size.hpp | 8 +- 3 files changed, 256 insertions(+), 264 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 401b06d26..3969b6068 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -30,129 +30,60 @@ // The exact meaning of each argument depends on the benchmark, allocator, and size components used. // Refer to the 'argsName()' function in each component to find detailed descriptions of these arguments. -static void default_alloc_fix_size(benchmark::internal::Benchmark *benchmark) { - benchmark->Args({10000, 0, 4096}); - benchmark->Args({10000, 100000, 4096}); +static void multithreaded(benchmark::internal::Benchmark *benchmark) { benchmark->Threads(4); benchmark->Threads(1); } -static void -default_alloc_uniform_size(benchmark::internal::Benchmark *benchmark) { - benchmark->Args({10000, 0, 8, 64 * 1024, 8}); - benchmark->Threads(4); - benchmark->Threads(1); -} - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_fix, fixed_alloc_size, - glibc_malloc); - -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, glibc_fix) - ->Apply(&default_alloc_fix_size); - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, glibc_uniform, - uniform_alloc_size, glibc_malloc); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, glibc_uniform) - ->Apply(&default_alloc_uniform_size); - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, os_provider, fixed_alloc_size, - provider_allocator); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, os_provider) - ->Apply(&default_alloc_fix_size); - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, proxy_pool, fixed_alloc_size, - pool_allocator>); - -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, proxy_pool) - ->Apply(&default_alloc_fix_size); - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_fix, - fixed_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_fix) - ->Apply(&default_alloc_fix_size); - -// TODO: debug why this crashes -/*UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, disjoint_pool_uniform, - uniform_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, disjoint_pool_uniform) - ->Apply(&default_alloc_uniform_size); -*/ - -#ifdef UMF_POOL_JEMALLOC_ENABLED -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_fix, - fixed_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, jemalloc_pool_fix) - ->Apply(&default_alloc_fix_size); - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, jemalloc_pool_uniform, - uniform_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, jemalloc_pool_uniform) - ->Apply(&default_alloc_uniform_size); - -#endif -#ifdef UMF_POOL_SCALABLE_ENABLED -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, scalable_pool_fix, - fixed_alloc_size, - pool_allocator>); - -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, scalable_pool_fix) - ->Apply(&default_alloc_fix_size); - -UMF_BENCHMARK_TEMPLATE_DEFINE(alloc_benchmark, scalable_pool_uniform, - uniform_alloc_size, - pool_allocator>); - -UMF_BENCHMARK_REGISTER_F(alloc_benchmark, scalable_pool_uniform) - ->Apply(&default_alloc_uniform_size); -#endif -// Multiple allocs/free static void default_multiple_alloc_fix_size(benchmark::internal::Benchmark *benchmark) { - benchmark->Args({10000, 4096}); - benchmark->Threads(4); - benchmark->Threads(1); + benchmark->Args({10000, 1, 4096}); + benchmark->Iterations(500000); } static void default_multiple_alloc_uniform_size(benchmark::internal::Benchmark *benchmark) { - benchmark->Args({10000, 8, 64 * 1024, 8}); - benchmark->Threads(4); - benchmark->Threads(1); + benchmark->Args({10000, 1, 8, 4096, 8}); + benchmark->Args({10000, 1, 8, 128, 8}); + benchmark->Iterations(500000); } UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, glibc_fix, fixed_alloc_size, glibc_malloc); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, glibc_fix) - ->Apply(&default_multiple_alloc_fix_size); + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, glibc_uniform, uniform_alloc_size, glibc_malloc); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, glibc_uniform) - ->Apply(&default_multiple_alloc_uniform_size); + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, proxy_pool, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, proxy_pool) - ->Apply(&default_multiple_alloc_fix_size); + ->Apply(&default_multiple_alloc_fix_size) + // reduce iterations, as this benchmark is slower than others + ->Iterations(50000); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, os_provider, fixed_alloc_size, provider_allocator); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, os_provider) - ->Apply(&default_multiple_alloc_fix_size); + ->Apply(&default_multiple_alloc_fix_size) + // reduce iterations, as this benchmark is slower than others + ->Iterations(50000); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix) - ->Apply(&default_multiple_alloc_fix_size); + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform, uniform_alloc_size, @@ -165,13 +96,15 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, fixed_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_fix) - ->Apply(&default_multiple_alloc_fix_size); + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_uniform) - ->Apply(&default_multiple_alloc_uniform_size); + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); #endif @@ -181,14 +114,25 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, scalable_pool_fix, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, scalable_pool_fix) - ->Apply(&default_multiple_alloc_fix_size); + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, scalable_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, scalable_pool_uniform) - ->Apply(&default_multiple_alloc_uniform_size); + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); #endif -BENCHMARK_MAIN(); + +//BENCHMARK_MAIN(); +int main(int argc, char **argv) { + if (initAffinityMask()) { + return -1; + } + benchmark::Initialize(&argc, argv); + benchmark::RunSpecifiedBenchmarks(); + benchmark::Shutdown(); +} diff --git a/benchmark/benchmark.hpp b/benchmark/benchmark.hpp index 50e75f8fb..a960d89bc 100644 --- a/benchmark/benchmark.hpp +++ b/benchmark/benchmark.hpp @@ -70,8 +70,10 @@ * - Additional benchmarking scenarios can be created by extending `benchmark_interface`. */ -#include +#include #include + +#include #include #include @@ -83,13 +85,92 @@ struct alloc_data { size_t size; }; +struct next_alloc_data { + size_t offset; + size_t size; +}; + +#ifndef WIN32 +std::vector affinityMask; + +int initAffinityMask() { + cpu_set_t mask; + CPU_ZERO(&mask); + + if (sched_getaffinity(0, sizeof(mask), &mask) == -1) { + perror("sched_getaffinity"); + return 1; + } + + for (int cpu = 0; cpu < CPU_SETSIZE; cpu++) { + if (CPU_ISSET(cpu, &mask)) { + cpu_set_t mask; + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + affinityMask.push_back(mask); + } + } + // we reverse affinityMask to avoid using cpu 0 if possible. + // CPU 0 is usually the most used one by other applications on the system. + std::reverse(affinityMask.begin(), affinityMask.end()); + return 0; +} + +void setAffinity(benchmark::State &state) { + size_t tid = state.thread_index(); + if (tid >= affinityMask.size()) { + state.SkipWithError("Not enough CPUs available to set affinity"); + } + + auto &mask = affinityMask[tid]; + + if (sched_setaffinity(0, sizeof(mask), &mask) != 0) { + state.SkipWithError("Failed to set affinity"); + } +} + +#else +int initAffinityMask() { + printf( + "Affinity set not supported on Windows, benchmark can be unstable\n"); + return 0; +} + +void setAffinity([[maybe_unused]] benchmark::State &state) { + // Not implemented for Windows +} + +#endif + +// function that ensures that all threads have reached the same point +inline void waitForAllThreads(const benchmark::State &state) { + static std::atomic count{0}; + static std::atomic generation{0}; + + const int totalThreads = state.threads(); + int gen = generation.load(std::memory_order_relaxed); + + int c = count.fetch_add(1, std::memory_order_acq_rel) + 1; + + if (c == totalThreads) { + // Last thread - reset count and bump generation + count.store(0, std::memory_order_relaxed); + generation.fetch_add(1, std::memory_order_acq_rel); + } else { + // Not the last thread: spin until the generation changes + while (generation.load(std::memory_order_acquire) == gen) { + std::this_thread::yield(); + } + } +} + template ::value>> class provider_allocator : public allocator_interface { public: - unsigned SetUp(::benchmark::State &state, unsigned r) override { + unsigned SetUp(::benchmark::State &state, unsigned argPos) override { provider.SetUp(state); - return r; + return argPos; } void TearDown(::benchmark::State &state) override { @@ -118,9 +199,9 @@ class provider_allocator : public allocator_interface { // TODO: assert Pool to be a pool_interface. template class pool_allocator : public allocator_interface { public: - unsigned SetUp(::benchmark::State &state, unsigned r) override { + unsigned SetUp(::benchmark::State &state, unsigned argPos) override { pool.SetUp(state); - return r; + return argPos; } void TearDown(::benchmark::State &state) override { pool.TearDown(state); } @@ -141,19 +222,28 @@ template class pool_allocator : public allocator_interface { template struct benchmark_interface : public benchmark::Fixture { - void SetUp(::benchmark::State &state) { - int argPos = alloc_size.SetUp(state, 0); - allocator.SetUp(state, argPos); + int parseArgs(::benchmark::State &state, int argPos) { + Size generator; + argPos = generator.SetUp(state, argPos); + argPos = allocator.SetUp(state, argPos); + alloc_sizes.resize(state.threads()); + for (auto &i : alloc_sizes) { + i = generator; + } + return argPos; } + void SetUp(::benchmark::State &state) { parseArgs(state, 0); } void TearDown(::benchmark::State &state) { - alloc_size.TearDown(state); + for (auto &i : alloc_sizes) { + i.TearDown(state); + } allocator.TearDown(state); } virtual void bench(::benchmark::State &state) = 0; - static std::vector argsName() { + virtual std::vector argsName() { auto s = Size::argsName(); auto a = Allocator::argsName(); std::vector res = {}; @@ -163,209 +253,167 @@ struct benchmark_interface : public benchmark::Fixture { } virtual std::string name() { return Allocator::name(); } - virtual int64_t iterations() { return 10000; } + static void defaultArgs(Benchmark *benchmark) { auto *bench = static_cast *>(benchmark); - benchmark->ArgNames(bench->argsName()) - ->Name(bench->name()) - ->Iterations(bench->iterations()); + benchmark->ArgNames(bench->argsName())->Name(bench->name()); } - Size alloc_size; + + std::vector alloc_sizes; Allocator allocator; }; -// This class benchmarks speed of alloc() operations. +// This class benchmarks performance of random deallocations and (re)allocations template < typename Size, typename Alloc, typename = std::enable_if_t::value>, typename = std::enable_if_t::value>> -class alloc_benchmark : public benchmark_interface { +class multiple_malloc_free_benchmark : public benchmark_interface { + using distribution = std::uniform_int_distribution; + template using vector2d = std::vector>; + using base = benchmark_interface; + + int allocsPerIterations = 10; + bool thread_local_allocations = true; + size_t max_allocs = 0; + + vector2d allocations; + std::vector iters; + + vector2d next; + std::vector::const_iterator> next_iter; + int64_t iterations; + public: - size_t max_allocs = 1000; - size_t pre_allocs = 0; void SetUp(::benchmark::State &state) override { - if (state.thread_index() != 0) { - return; - } + auto tid = state.thread_index(); - // unpack arguments - int argPos = 0; - max_allocs = state.range(argPos++); - pre_allocs = state.range(argPos++); - // pass rest of the arguments to "alloc_size" and "allocator" - argPos = base::alloc_size.SetUp(state, argPos); - base::allocator.SetUp(state, argPos); - - // initialize allocations tracking vectors (one per thread) - // and iterators for these vectors. - allocations.resize(state.threads()); - iters.resize(state.threads()); - - for (auto &i : iters) { - i = pre_allocs; + if (tid == 0) { + // unpack arguments + iterations = state.max_iterations; + int argPos = 0; + max_allocs = state.range(argPos++); + thread_local_allocations = state.range(argPos++); + base::parseArgs(state, argPos); + + allocations.resize(state.threads()); + next.resize(state.threads()); + next_iter.resize(state.threads()); + +#ifndef WIN32 + // Ensure that system malloc does not have memory pooled on the heap + malloc_trim(0); +#endif } - - // do "pre_alloc" allocations before actual benchmark. - for (auto &i : allocations) { - i.resize(max_allocs + pre_allocs); - - for (size_t j = 0; j < pre_allocs; j++) { - i[j].ptr = - base::allocator.benchAlloc(base::alloc_size.nextSize()); - if (i[j].ptr == NULL) { - state.SkipWithError("preallocation failed"); - return; - } - i[j].size = base::alloc_size.nextSize(); - } + setAffinity(state); + // sync thread to ensure that thread 0 parsed args and did all initialization + waitForAllThreads(state); + // Prepare workload for warp up + prealloc(state); + prepareWorkload(state); + // Start warm up with all threads at once + waitForAllThreads(state); + // warm up + for (int j = 0; j < iterations; j++) { + bench(state); } + waitForAllThreads(state); + // prepare workload for actual benchmark. + freeAllocs(state); + prealloc(state); + prepareWorkload(state); } void TearDown(::benchmark::State &state) override { - if (state.thread_index() != 0) { - return; - } - for (auto &i : allocations) { - for (auto &j : i) { - if (j.ptr != NULL) { - base::allocator.benchFree(j.ptr, j.size); - j.ptr = NULL; - j.size = 0; - } - } - } + auto tid = state.thread_index(); + freeAllocs(state); + waitForAllThreads(state); + if (tid == 0) { + // release memory used by benchmark + next.clear(); + next_iter.clear(); + allocations.clear(); + iters.clear(); + } base::TearDown(state); } void bench(benchmark::State &state) override { auto tid = state.thread_index(); - auto s = base::alloc_size.nextSize(); - auto &i = iters[tid]; - allocations[tid][i].ptr = base::allocator.benchAlloc(s); - if (allocations[tid][i].ptr == NULL) { - state.SkipWithError("allocation failed"); - return; - } - allocations[tid][i].size = s; - i++; - if (i >= max_allocs + pre_allocs) { - // This benchmark tests only allocations - - // if allocation tracker is full we pause benchmark to dealloc all allocations - - // excluding pre-allocated ones. - state.PauseTiming(); - while (i > pre_allocs) { - auto &allocation = allocations[tid][--i]; - base::allocator.benchFree(allocation.ptr, allocation.size); - allocation.ptr = NULL; - allocation.size = 0; + auto &allocation = allocations[tid]; + for (int i = 0; i < allocsPerIterations; i++) { + auto &n = *next_iter[tid]++; + auto &alloc = allocation[n.offset]; + base::allocator.benchFree(alloc.ptr, alloc.size); + + alloc.size = n.size; + alloc.ptr = base::allocator.benchAlloc(alloc.size); + + if (alloc.ptr == NULL) { + state.SkipWithError("allocation failed"); } - state.ResumeTiming(); } } + virtual std::string name() { + return base::name() + "/multiple_malloc_free"; + } + virtual std::vector argsName() { auto n = benchmark_interface::argsName(); - std::vector res = {"max_allocs", "pre_allocs"}; + std::vector res = {"max_allocs", + "thread_local_allocations"}; res.insert(res.end(), n.begin(), n.end()); return res; } - virtual std::string name() { return base::name() + "/alloc"; } - virtual int64_t iterations() { return 200000; } - - protected: - using base = benchmark_interface; - std::vector> allocations; - std::vector iters; -}; - -// This class benchmarks performance of random deallocations and (re)allocations -template < - typename Size, typename Alloc, - typename = - std::enable_if_t::value>, - typename = - std::enable_if_t::value>> -class multiple_malloc_free_benchmark : public alloc_benchmark { - using distribution = std::uniform_int_distribution; - using base = alloc_benchmark; - - public: - int reallocs = 100; - void SetUp(::benchmark::State &state) override { - if (state.thread_index() != 0) { - return; - } - // unpack arguments - int argPos = 0; - base::max_allocs = state.range(argPos++); - - // pass rest of the arguments to "alloc_size" and "allocator" - argPos = base::alloc_size.SetUp(state, argPos); - base::allocator.SetUp(state, argPos); - - // perform initial allocations which will be later freed and reallocated - base::allocations.resize(state.threads()); - for (auto &i : base::allocations) { - i.resize(base::max_allocs); - - for (size_t j = 0; j < base::max_allocs; j++) { - i[j].ptr = - base::allocator.benchAlloc(base::alloc_size.nextSize()); - if (i[j].ptr == NULL) { - state.SkipWithError("preallocation failed"); - return; - } - i[j].size = base::alloc_size.nextSize(); + private: + void prealloc(benchmark::State &state) { + auto tid = state.thread_index(); + auto &i = allocations[tid]; + i.resize(max_allocs); + auto sizeGenerator = base::alloc_sizes[tid]; + for (size_t j = 0; j < max_allocs; j++) { + auto size = sizeGenerator.nextSize(); + i[j].ptr = base::allocator.benchAlloc(size); + if (i[j].ptr == NULL) { + state.SkipWithError("preallocation failed"); + return; } + i[j].size = size; } - dist.param(distribution::param_type(0, base::max_allocs - 1)); } - void bench(benchmark::State &state) override { + void freeAllocs(benchmark::State &state) { auto tid = state.thread_index(); - auto &allocation = base::allocations[tid]; - std::vector to_alloc; - for (int j = 0; j < reallocs; j++) { - auto idx = dist(generator); - if (allocation[idx].ptr == NULL) { - continue; - } - to_alloc.push_back(idx); - - base::allocator.benchFree(allocation[idx].ptr, - allocation[idx].size); - allocation[idx].ptr = NULL; - allocation[idx].size = 0; - } - - for (auto idx : to_alloc) { - auto s = base::alloc_size.nextSize(); - allocation[idx].ptr = base::allocator.benchAlloc(s); - if (allocation[idx].ptr == NULL) { - state.SkipWithError("allocation failed"); + auto &i = allocations[tid]; + for (auto &j : i) { + if (j.ptr != NULL) { + base::allocator.benchFree(j.ptr, j.size); + j.ptr = NULL; + j.size = 0; } - allocation[idx].size = s; } } - virtual std::string name() { - return base::base::name() + "/multiple_malloc_free"; - } - - virtual std::vector argsName() { - auto n = benchmark_interface::argsName(); - std::vector res = {"max_allocs"}; - res.insert(res.end(), n.begin(), n.end()); - return res; + void prepareWorkload(benchmark::State &state) { + auto tid = state.thread_index(); + auto &n = next[tid]; + std::default_random_engine generator; + distribution dist; + generator.seed(0); + dist.param(distribution::param_type(0, max_allocs - 1)); + auto sizeGenerator = base::alloc_sizes[tid]; + + n.clear(); + for (int64_t j = 0; j < state.max_iterations * allocsPerIterations; + j++) { + n.push_back({dist(generator), sizeGenerator.nextSize()}); + } + next_iter[tid] = n.cbegin(); } - - virtual int64_t iterations() { return 2000; } - - std::default_random_engine generator; - distribution dist; }; diff --git a/benchmark/benchmark_size.hpp b/benchmark/benchmark_size.hpp index d17a6b286..44e4bf1da 100644 --- a/benchmark/benchmark_size.hpp +++ b/benchmark/benchmark_size.hpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -45,7 +45,7 @@ class uniform_alloc_size : public alloc_size_interface { state.SkipWithError("min and max must be divisible by granularity"); return argPos; } - + generator.seed(0); dist.param(distribution::param_type(min / gran, max / gran)); multiplier = gran; return argPos; @@ -53,11 +53,11 @@ class uniform_alloc_size : public alloc_size_interface { void TearDown([[maybe_unused]] ::benchmark::State &state) override {} size_t nextSize() override { return dist(generator) * multiplier; } static std::vector argsName() { - return {"min size", "max size", "granularity"}; + return {"min_size", "max_size", "granularity"}; } private: std::default_random_engine generator; distribution dist; - size_t multiplier; + size_t multiplier = 1; }; From bc279d14087c33ed494db7e19aa166d5a2f0c4bb Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 19 Feb 2025 09:59:24 +0000 Subject: [PATCH 168/466] lock the Disjoint Pool bucket before printing its stats --- src/pool/pool_disjoint.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index ef7b3875d..7aeee7165 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -492,6 +492,9 @@ static void disjoint_pool_print_stats(disjoint_pool_t *pool) { for (size_t i = 0; i < pool->buckets_num; i++) { bucket_t *bucket = pool->buckets[i]; + // lock bucket before accessing its stats + utils_mutex_lock(&bucket->bucket_lock); + if (bucket->alloc_count) { LOG_DEBUG("%14zu %12zu %12zu %18zu %20zu %21zu", bucket->size, bucket->alloc_count, bucket->free_count, @@ -500,8 +503,11 @@ static void disjoint_pool_print_stats(disjoint_pool_t *pool) { high_bucket_size = utils_max(bucket_slab_alloc_size(bucket), high_bucket_size); } + high_peak_slabs_in_use = utils_max(bucket->max_slabs_in_use, high_peak_slabs_in_use); + + utils_mutex_unlock(&bucket->bucket_lock); } LOG_DEBUG("current pool size: %zu", From ed0d5f3e2ddcbfe3a151f4cbd350cd633c128d70 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 12 Feb 2025 16:52:59 +0000 Subject: [PATCH 169/466] add separate versioning to ops structures --- .../custom_file_provider/custom_file_provider.c | 4 ++-- include/umf/memory_pool.h | 7 +------ include/umf/memory_pool_ops.h | 7 ++++++- include/umf/memory_provider_ops.h | 9 +++++++-- src/cpp_helpers.hpp | 6 +++--- src/ipc.c | 7 ++++--- src/memory_pool.c | 8 ++++++-- src/memory_provider.c | 15 +++++++-------- src/memory_provider_internal.h | 5 +++++ src/memtarget.c | 9 +++++++-- src/memtarget_internal.h | 3 +-- src/memtarget_ops.h | 9 +++++++-- src/memtargets/memtarget_numa.c | 4 ++-- src/pool/pool_jemalloc.c | 4 ++-- src/pool/pool_proxy.c | 4 ++-- src/pool/pool_scalable.c | 2 +- src/provider/provider_cuda.c | 4 ++-- src/provider/provider_devdax_memory.c | 4 ++-- src/provider/provider_file_memory.c | 6 +++--- src/provider/provider_fixed_memory.c | 4 ++-- src/provider/provider_level_zero.c | 4 ++-- src/provider/provider_os_memory.c | 4 ++-- src/provider/provider_tracking.c | 2 +- test/common/pool_null.c | 4 ++-- test/common/pool_trace.c | 2 +- test/common/provider_null.c | 4 ++-- test/common/provider_trace.c | 4 ++-- test/memspaces/mempolicy.cpp | 8 ++------ 28 files changed, 86 insertions(+), 67 deletions(-) diff --git a/examples/custom_file_provider/custom_file_provider.c b/examples/custom_file_provider/custom_file_provider.c index b17fdc0f0..a442fca6a 100644 --- a/examples/custom_file_provider/custom_file_provider.c +++ b/examples/custom_file_provider/custom_file_provider.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -234,7 +234,7 @@ static umf_result_t file_get_min_page_size(void *provider, void *ptr, // File provider operations static umf_memory_provider_ops_t file_ops = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = file_init, .finalize = file_deinit, .alloc = file_alloc, diff --git a/include/umf/memory_pool.h b/include/umf/memory_pool.h index ae5e67a96..ed3d1eb0d 100644 --- a/include/umf/memory_pool.h +++ b/include/umf/memory_pool.h @@ -11,6 +11,7 @@ #define UMF_MEMORY_POOL_H 1 #include +#include #include #ifdef __cplusplus @@ -22,12 +23,6 @@ extern "C" { /// functions typedef struct umf_memory_pool_t *umf_memory_pool_handle_t; -/// @brief This structure comprises function pointers used by corresponding umfPool* -/// calls. Each memory pool implementation should initialize all function -/// pointers. -/// -typedef struct umf_memory_pool_ops_t umf_memory_pool_ops_t; - /// @brief Supported pool creation flags typedef enum umf_pool_create_flag_t { UMF_POOL_CREATE_FLAG_NONE = diff --git a/include/umf/memory_pool_ops.h b/include/umf/memory_pool_ops.h index 829f49fb7..657f40aea 100644 --- a/include/umf/memory_pool_ops.h +++ b/include/umf/memory_pool_ops.h @@ -17,6 +17,11 @@ extern "C" { #endif +/// @brief Version of the Memory Pool ops structure. +/// NOTE: This is equal to the latest UMF version, in which the ops structure +/// has been modified. +#define UMF_POOL_OPS_VERSION_CURRENT UMF_MAKE_VERSION(0, 11) + /// /// @brief This structure comprises function pointers used by corresponding umfPool* /// calls. Each memory pool implementation should initialize all function @@ -24,7 +29,7 @@ extern "C" { /// typedef struct umf_memory_pool_ops_t { /// Version of the ops structure. - /// Should be initialized using UMF_VERSION_CURRENT. + /// Should be initialized using UMF_POOL_OPS_VERSION_CURRENT. uint32_t version; /// diff --git a/include/umf/memory_provider_ops.h b/include/umf/memory_provider_ops.h index a61e0aad0..aaddd503b 100644 --- a/include/umf/memory_provider_ops.h +++ b/include/umf/memory_provider_ops.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -16,6 +16,11 @@ extern "C" { #endif +/// @brief Version of the Memory Provider ops structure. +/// NOTE: This is equal to the latest UMF version, in which the ops structure +/// has been modified. +#define UMF_PROVIDER_OPS_VERSION_CURRENT UMF_MAKE_VERSION(0, 11) + /// /// @brief This structure comprises optional function pointers used /// by corresponding umfMemoryProvider* calls. A memory provider implementation @@ -143,7 +148,7 @@ typedef struct umf_memory_provider_ipc_ops_t { /// typedef struct umf_memory_provider_ops_t { /// Version of the ops structure. - /// Should be initialized using UMF_VERSION_CURRENT. + /// Should be initialized using UMF_PROVIDER_OPS_VERSION_CURRENT. uint32_t version; /// diff --git a/src/cpp_helpers.hpp b/src/cpp_helpers.hpp index 878910581..85e81c502 100644 --- a/src/cpp_helpers.hpp +++ b/src/cpp_helpers.hpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -67,7 +67,7 @@ umf_result_t initialize(T *obj, ArgsTuple &&args) { template umf_memory_pool_ops_t poolOpsBase() { umf_memory_pool_ops_t ops{}; - ops.version = UMF_VERSION_CURRENT; + ops.version = UMF_POOL_OPS_VERSION_CURRENT; ops.finalize = [](void *obj) { delete reinterpret_cast(obj); }; UMF_ASSIGN_OP(ops, T, malloc, ((void *)nullptr)); UMF_ASSIGN_OP(ops, T, calloc, ((void *)nullptr)); @@ -81,7 +81,7 @@ template umf_memory_pool_ops_t poolOpsBase() { template constexpr umf_memory_provider_ops_t providerOpsBase() { umf_memory_provider_ops_t ops{}; - ops.version = UMF_VERSION_CURRENT; + ops.version = UMF_PROVIDER_OPS_VERSION_CURRENT; ops.finalize = [](void *obj) { delete reinterpret_cast(obj); }; UMF_ASSIGN_OP(ops, T, alloc, UMF_RESULT_ERROR_UNKNOWN); UMF_ASSIGN_OP(ops, T, free, UMF_RESULT_ERROR_UNKNOWN); diff --git a/src/ipc.c b/src/ipc.c index 1b479fd7c..12c7bb978 100644 --- a/src/ipc.c +++ b/src/ipc.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,6 +15,7 @@ #include "base_alloc_global.h" #include "ipc_internal.h" #include "memory_pool_internal.h" +#include "memory_provider_internal.h" #include "provider/provider_tracking.h" #include "utils_common.h" #include "utils_log.h" @@ -123,14 +124,14 @@ umf_result_t umfOpenIPCHandle(umf_ipc_handler_handle_t hIPCHandler, umf_ipc_handle_t umfIPCHandle, void **ptr) { // IPC handler is an instance of tracking memory provider - if (*(uint32_t *)hIPCHandler != UMF_VERSION_CURRENT) { + umf_memory_provider_handle_t hProvider = hIPCHandler; + if (hProvider->ops.version != UMF_PROVIDER_OPS_VERSION_CURRENT) { // It is a temporary hack to verify that user passes correct IPC handler, // not a pool handle, as it was required in previous version. LOG_ERR("Invalid IPC handler."); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - umf_memory_provider_handle_t hProvider = hIPCHandler; void *base = NULL; umf_result_t ret = umfMemoryProviderOpenIPCHandle( diff --git a/src/memory_pool.c b/src/memory_pool.c index e739f3f2f..ef2c0fa66 100644 --- a/src/memory_pool.c +++ b/src/memory_pool.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -38,7 +38,11 @@ static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - assert(ops->version == UMF_VERSION_CURRENT); + if (ops->version != UMF_POOL_OPS_VERSION_CURRENT) { + LOG_WARN("Memory Pool ops version \"%d\" is different than the current " + "version \"%d\"", + ops->version, UMF_POOL_OPS_VERSION_CURRENT); + } if (!(flags & UMF_POOL_CREATE_FLAG_DISABLE_TRACKING)) { // Wrap provider with memory tracking provider. diff --git a/src/memory_provider.c b/src/memory_provider.c index 59f3f1259..ce6a10a20 100644 --- a/src/memory_provider.c +++ b/src/memory_provider.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -20,11 +20,6 @@ #include "memory_provider_internal.h" #include "utils_assert.h" -typedef struct umf_memory_provider_t { - umf_memory_provider_ops_t ops; - void *provider_priv; -} umf_memory_provider_t; - static umf_result_t umfDefaultPurgeLazy(void *provider, void *ptr, size_t size) { (void)provider; @@ -167,14 +162,18 @@ umf_result_t umfMemoryProviderCreate(const umf_memory_provider_ops_t *ops, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } + if (ops->version != UMF_PROVIDER_OPS_VERSION_CURRENT) { + LOG_WARN("Memory Provider ops version \"%d\" is different than the " + "current version \"%d\"", + ops->version, UMF_PROVIDER_OPS_VERSION_CURRENT); + } + umf_memory_provider_handle_t provider = umf_ba_global_alloc(sizeof(umf_memory_provider_t)); if (!provider) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - assert(ops->version == UMF_VERSION_CURRENT); - provider->ops = *ops; assignOpsExtDefaults(&(provider->ops)); diff --git a/src/memory_provider_internal.h b/src/memory_provider_internal.h index 0b7f45f80..dd1111a23 100644 --- a/src/memory_provider_internal.h +++ b/src/memory_provider_internal.h @@ -18,6 +18,11 @@ extern "C" { #endif +typedef struct umf_memory_provider_t { + umf_memory_provider_ops_t ops; + void *provider_priv; +} umf_memory_provider_t; + void *umfMemoryProviderGetPriv(umf_memory_provider_handle_t hProvider); umf_memory_provider_handle_t *umfGetLastFailedMemoryProviderPtr(void); diff --git a/src/memtarget.c b/src/memtarget.c index a89708460..8eb6e4e8c 100644 --- a/src/memtarget.c +++ b/src/memtarget.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,6 +15,7 @@ #include "memtarget_internal.h" #include "memtarget_ops.h" #include "utils_concurrency.h" +#include "utils_log.h" umf_result_t umfMemtargetCreate(const umf_memtarget_ops_t *ops, void *params, umf_memtarget_handle_t *memoryTarget) { @@ -29,7 +30,11 @@ umf_result_t umfMemtargetCreate(const umf_memtarget_ops_t *ops, void *params, return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - assert(ops->version == UMF_VERSION_CURRENT); + if (ops->version != UMF_MEMTARGET_OPS_VERSION_CURRENT) { + LOG_WARN("Memtarget ops version \"%d\" is different than the current " + "version \"%d\"", + ops->version, UMF_MEMTARGET_OPS_VERSION_CURRENT); + } target->ops = ops; diff --git a/src/memtarget_internal.h b/src/memtarget_internal.h index c5b9a61c5..85ec99b8e 100644 --- a/src/memtarget_internal.h +++ b/src/memtarget_internal.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -16,7 +16,6 @@ extern "C" { #endif -struct umf_memtarget_ops_t; typedef struct umf_memtarget_ops_t umf_memtarget_ops_t; typedef struct umf_memtarget_t { diff --git a/src/memtarget_ops.h b/src/memtarget_ops.h index 75e16447e..4bd9bb899 100644 --- a/src/memtarget_ops.h +++ b/src/memtarget_ops.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -18,9 +18,14 @@ extern "C" { #endif +// Version of the Memtarget ops structure. +// NOTE: This is equal to the latest UMF version, in which the ops structure +// has been modified. +#define UMF_MEMTARGET_OPS_VERSION_CURRENT UMF_MAKE_VERSION(0, 11) + typedef struct umf_memtarget_ops_t { /// Version of the ops structure. - /// Should be initialized using UMF_VERSION_CURRENT + /// Should be initialized using UMF_MEMTARGET_OPS_VERSION_CURRENT uint32_t version; umf_result_t (*initialize)(void *params, void **memoryTarget); diff --git a/src/memtargets/memtarget_numa.c b/src/memtargets/memtarget_numa.c index f32774ebb..88d8ac2a4 100644 --- a/src/memtargets/memtarget_numa.c +++ b/src/memtargets/memtarget_numa.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -390,7 +390,7 @@ static umf_result_t numa_compare(void *memTarget, void *otherMemTarget, } struct umf_memtarget_ops_t UMF_MEMTARGET_NUMA_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_MEMTARGET_OPS_VERSION_CURRENT, .initialize = numa_initialize, .finalize = numa_finalize, .pool_create_from_memspace = numa_pool_create_from_memspace, diff --git a/src/pool/pool_jemalloc.c b/src/pool/pool_jemalloc.c index 88e8e9342..10e00dea5 100644 --- a/src/pool/pool_jemalloc.c +++ b/src/pool/pool_jemalloc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -454,7 +454,7 @@ static umf_result_t op_get_last_allocation_error(void *pool) { } static umf_memory_pool_ops_t UMF_JEMALLOC_POOL_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = op_initialize, .finalize = op_finalize, .malloc = op_malloc, diff --git a/src/pool/pool_proxy.c b/src/pool/pool_proxy.c index 2269d9344..eedddb0ac 100644 --- a/src/pool/pool_proxy.c +++ b/src/pool/pool_proxy.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -123,7 +123,7 @@ static umf_result_t proxy_get_last_allocation_error(void *pool) { } static umf_memory_pool_ops_t UMF_PROXY_POOL_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = proxy_pool_initialize, .finalize = proxy_pool_finalize, .malloc = proxy_malloc, diff --git a/src/pool/pool_scalable.c b/src/pool/pool_scalable.c index 2ee265df8..447ba864e 100644 --- a/src/pool/pool_scalable.c +++ b/src/pool/pool_scalable.c @@ -382,7 +382,7 @@ static umf_result_t tbb_get_last_allocation_error(void *pool) { } static umf_memory_pool_ops_t UMF_SCALABLE_POOL_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = tbb_pool_initialize, .finalize = tbb_pool_finalize, .malloc = tbb_malloc, diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index c7929cc7e..bb4b3cf64 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -680,8 +680,8 @@ cu_memory_provider_close_ipc_handle(void *provider, void *ptr, size_t size) { return UMF_RESULT_SUCCESS; } -static struct umf_memory_provider_ops_t UMF_CUDA_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, +static umf_memory_provider_ops_t UMF_CUDA_MEMORY_PROVIDER_OPS = { + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = cu_memory_provider_initialize, .finalize = cu_memory_provider_finalize, .alloc = cu_memory_provider_alloc, diff --git a/src/provider/provider_devdax_memory.c b/src/provider/provider_devdax_memory.c index 4841a9919..8e8197190 100644 --- a/src/provider/provider_devdax_memory.c +++ b/src/provider/provider_devdax_memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -530,7 +530,7 @@ static umf_result_t devdax_free(void *provider, void *ptr, size_t size) { } static umf_memory_provider_ops_t UMF_DEVDAX_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = devdax_initialize, .finalize = devdax_finalize, .alloc = devdax_alloc, diff --git a/src/provider/provider_file_memory.c b/src/provider/provider_file_memory.c index ea69dc7b6..f81e4f8d2 100644 --- a/src/provider/provider_file_memory.c +++ b/src/provider/provider_file_memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -107,7 +107,7 @@ typedef struct file_memory_provider_t { // A critnib map storing (ptr, fd_offset + 1) pairs. We add 1 to fd_offset // in order to be able to store fd_offset equal 0, because // critnib_get() returns value or NULL, so a value cannot equal 0. - // It is needed mainly in the get_ipc_handle and open_ipc_handle hooks + // It is needed mainly in the ipc_get_handle and ipc_open_handle hooks // to mmap a specific part of a file. critnib *fd_offset_map; @@ -848,7 +848,7 @@ static umf_result_t file_free(void *provider, void *ptr, size_t size) { } static umf_memory_provider_ops_t UMF_FILE_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = file_initialize, .finalize = file_finalize, .alloc = file_alloc, diff --git a/src/provider/provider_fixed_memory.c b/src/provider/provider_fixed_memory.c index 6392b39d3..eeeb8b702 100644 --- a/src/provider/provider_fixed_memory.c +++ b/src/provider/provider_fixed_memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -254,7 +254,7 @@ static umf_result_t fixed_free(void *provider, void *ptr, size_t size) { } static umf_memory_provider_ops_t UMF_FIXED_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = fixed_initialize, .finalize = fixed_finalize, .alloc = fixed_alloc, diff --git a/src/provider/provider_level_zero.c b/src/provider/provider_level_zero.c index f89661401..a4c68b391 100644 --- a/src/provider/provider_level_zero.c +++ b/src/provider/provider_level_zero.c @@ -843,8 +843,8 @@ ze_memory_provider_close_ipc_handle(void *provider, void *ptr, size_t size) { return UMF_RESULT_SUCCESS; } -static struct umf_memory_provider_ops_t UMF_LEVEL_ZERO_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, +static umf_memory_provider_ops_t UMF_LEVEL_ZERO_MEMORY_PROVIDER_OPS = { + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = ze_memory_provider_initialize, .finalize = ze_memory_provider_finalize, .alloc = ze_memory_provider_alloc, diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index 1fe467942..bd5ea9c69 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2022-2024 Intel Corporation + * Copyright (C) 2022-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -1402,7 +1402,7 @@ static umf_result_t os_close_ipc_handle(void *provider, void *ptr, } static umf_memory_provider_ops_t UMF_OS_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = os_initialize, .finalize = os_finalize, .alloc = os_alloc, diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index aa4a7d8b0..73a03fb2d 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -749,7 +749,7 @@ static umf_result_t trackingCloseIpcHandle(void *provider, void *ptr, } umf_memory_provider_ops_t UMF_TRACKING_MEMORY_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = trackingInitialize, .finalize = trackingFinalize, .alloc = trackingAlloc, diff --git a/test/common/pool_null.c b/test/common/pool_null.c index c34bcfc16..40d662679 100644 --- a/test/common/pool_null.c +++ b/test/common/pool_null.c @@ -1,4 +1,4 @@ -// Copyright (C) 2023 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -64,7 +64,7 @@ static umf_result_t nullGetLastStatus(void *pool) { } umf_memory_pool_ops_t UMF_NULL_POOL_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = nullInitialize, .finalize = nullFinalize, .malloc = nullMalloc, diff --git a/test/common/pool_trace.c b/test/common/pool_trace.c index d8b7522ea..9a9e01019 100644 --- a/test/common/pool_trace.c +++ b/test/common/pool_trace.c @@ -90,7 +90,7 @@ static umf_result_t traceGetLastStatus(void *pool) { } umf_memory_pool_ops_t UMF_TRACE_POOL_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = traceInitialize, .finalize = traceFinalize, .malloc = traceMalloc, diff --git a/test/common/provider_null.c b/test/common/provider_null.c index e667bfce4..b4e54f976 100644 --- a/test/common/provider_null.c +++ b/test/common/provider_null.c @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -130,7 +130,7 @@ static umf_result_t nullCloseIpcHandle(void *provider, void *ptr, size_t size) { } umf_memory_provider_ops_t UMF_NULL_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = nullInitialize, .finalize = nullFinalize, .alloc = nullAlloc, diff --git a/test/common/provider_trace.c b/test/common/provider_trace.c index 9d063b4f5..20f44e868 100644 --- a/test/common/provider_trace.c +++ b/test/common/provider_trace.c @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -191,7 +191,7 @@ static umf_result_t traceCloseIpcHandle(void *provider, void *ptr, } umf_memory_provider_ops_t UMF_TRACE_PROVIDER_OPS = { - .version = UMF_VERSION_CURRENT, + .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = traceInitialize, .finalize = traceFinalize, .alloc = traceAlloc, diff --git a/test/memspaces/mempolicy.cpp b/test/memspaces/mempolicy.cpp index 97948bfbb..7b9c4891d 100644 --- a/test/memspaces/mempolicy.cpp +++ b/test/memspaces/mempolicy.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,11 +7,7 @@ #include "provider_os_memory_internal.h" os_memory_provider_t *providerGetPriv(umf_memory_provider_handle_t hProvider) { - // hack to have access to fields in structure defined in memory_provider.c - struct umf_memory_provider_t { - umf_memory_provider_ops_t ops; - void *provider_priv; - } *provider = (struct umf_memory_provider_t *)hProvider; + umf_memory_provider_t *provider = (umf_memory_provider_t *)hProvider; return (os_memory_provider_t *)provider->provider_priv; } From 36153e2e3d6c13901b69d745e6cf0bad2a114070 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 12 Feb 2025 16:53:20 +0000 Subject: [PATCH 170/466] update RELEASE_STEPS --- RELEASE_STEPS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/RELEASE_STEPS.md b/RELEASE_STEPS.md index 92c38c79d..9189d4804 100644 --- a/RELEASE_STEPS.md +++ b/RELEASE_STEPS.md @@ -40,6 +40,7 @@ Do changes for a release: - Add an entry to ChangeLog, remember to change the day of the week in the release date - For major and minor (prior 1.0.0) releases mention API and ABI compatibility with the previous release - For major and minor releases, update `UMF_VERSION_CURRENT` in `include/umf/base.h` (the API version) + - For changes in ops structures, update corresponding UMF_*_OPS_VERSION_CURRENT - For major and minor (prior 1.0.0) releases update ABI version in `.map` and `.def` files - These files are defined for all public libraries (`libumf` and `proxy_lib`, at the moment) - Commit these changes and tag the release: From 1a814cf8efae4310dbeb1d6bee077ceeafdaacd9 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 12 Feb 2025 16:55:09 +0000 Subject: [PATCH 171/466] add backward-compatibility workflow --- .github/workflows/pr_push.yml | 8 + .github/workflows/reusable_compatibility.yml | 211 +++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 .github/workflows/reusable_compatibility.yml diff --git a/.github/workflows/pr_push.yml b/.github/workflows/pr_push.yml index cfc4a04b9..511808887 100644 --- a/.github/workflows/pr_push.yml +++ b/.github/workflows/pr_push.yml @@ -85,3 +85,11 @@ jobs: contents: read security-events: write uses: ./.github/workflows/reusable_trivy.yml + Compatibility: + needs: [Build] + uses: ./.github/workflows/reusable_compatibility.yml + strategy: + matrix: + tag: ["v0.11.0-dev1"] + with: + tag: ${{matrix.tag}} diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml new file mode 100644 index 000000000..fbd17a2f4 --- /dev/null +++ b/.github/workflows/reusable_compatibility.yml @@ -0,0 +1,211 @@ +# Workflow for checkig the backward compatibility of UMF. +# Test the latest UMF shared library with binaries compiled using the older UMF +# shared library. +name: Compatibility + +on: + workflow_call: + inputs: + tag: + description: Check backward compatibility with this tag + type: string + default: "v0.11.0-dev1" + +permissions: + contents: read + +jobs: + ubuntu-build: + name: Ubuntu + runs-on: 'ubuntu-22.04' + + steps: + - name: Install apt packages + run: | + sudo apt-get update + sudo apt-get install -y clang cmake libnuma-dev libtbb-dev + + - name: Checkout "tag" UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + ref: refs/tags/${{inputs.tag}} + path: ${{github.workspace}}/tag_version + + - name: Install libhwloc + working-directory: ${{github.workspace}}/tag_version + run: .github/scripts/install_hwloc.sh + + - name: Get "tag" UMF version + working-directory: ${{github.workspace}}/tag_version + run: | + VERSION=$(git describe --tags) + echo "tag version: $VERSION" + + - name: Configure "tag" UMF build + working-directory: ${{github.workspace}}/tag_version + run: > + cmake + -B ${{github.workspace}}/tag_version/build + -DCMAKE_BUILD_TYPE=Debug + -DUMF_BUILD_SHARED_LIBRARY=ON + -DCMAKE_C_COMPILER=gcc + -DCMAKE_CXX_COMPILER=g++ + -DUMF_BUILD_TESTS=ON + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build "tag" UMF + working-directory: ${{github.workspace}}/tag_version + run: | + cmake --build ${{github.workspace}}/tag_version/build -j $(nproc) + + - name: Run "tag" UMF tests + working-directory: ${{github.workspace}}/tag_version/build + run: | + LD_LIBRARY_PATH=${{github.workspace}}/tag_version/build/lib/ ctest --output-on-failure + + - name: Checkout latest UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + path: ${{github.workspace}}/latest_version + + - name: Get latest UMF version + working-directory: ${{github.workspace}}/latest_version + run: | + VERSION=$(git describe --tags) + echo "checked version: $VERSION" + + - name: Configure latest UMF build + working-directory: ${{github.workspace}}/latest_version + run: > + cmake + -B ${{github.workspace}}/latest_version/build + -DCMAKE_BUILD_TYPE=Debug + -DUMF_BUILD_SHARED_LIBRARY=ON + -DCMAKE_C_COMPILER=gcc + -DCMAKE_CXX_COMPILER=g++ + -DUMF_BUILD_TESTS=OFF + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build latest UMF + working-directory: ${{github.workspace}}/latest_version + run: | + cmake --build ${{github.workspace}}/latest_version/build -j $(nproc) + + - name: Run "tag" UMF tests with latest UMF libs (warnings enabled) + working-directory: ${{github.workspace}}/tag_version/build + run: > + UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" + LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ + ctest --output-on-failure + + windows-build: + name: Windows + env: + VCPKG_PATH: "${{github.workspace}}/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/vcpkg/packages/jemalloc_x64-windows" + runs-on: "windows-2022" + + steps: + - name: Checkout "tag" UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + ref: refs/tags/${{inputs.tag}} + path: ${{github.workspace}}/tag_version + + - name: Initialize vcpkg + uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 + with: + vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + vcpkgDirectory: ${{github.workspace}}/vcpkg + vcpkgJsonGlob: '**/vcpkg.json' + + - name: Install dependencies + working-directory: ${{github.workspace}}/tag_version + run: vcpkg install + shell: pwsh # Specifies PowerShell as the shell for running the script. + + - name: Get "tag" UMF version + working-directory: ${{github.workspace}}/tag_version + run: | + $version = (git describe --tags) + echo "tag version: $VERSION" + + - name: Configure "tag" UMF build + working-directory: ${{github.workspace}}/tag_version + run: > + cmake + -B "${{github.workspace}}/tag_version/build" + -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" + -DCMAKE_C_COMPILER=cl + -DCMAKE_CXX_COMPILER=cl + -DUMF_BUILD_SHARED_LIBRARY=ON + -DUMF_BUILD_TESTS=ON + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build "tag" UMF + run: cmake --build "${{github.workspace}}/tag_version/build" --config Debug -j $Env:NUMBER_OF_PROCESSORS + + - name: Run "tag" UMF tests + working-directory: "${{github.workspace}}/tag_version/build" + run: ctest -C Debug --output-on-failure --test-dir test + + - name: Checkout latest UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + path: ${{github.workspace}}/latest_version + + # NOTE we use vcpkg setup from "tag" version + - name: Get latest UMF version + working-directory: ${{github.workspace}}/latest_version + run: | + $version = (git describe --tags) + echo "latest version: $VERSION" + + - name: Configure latest UMF build + working-directory: ${{github.workspace}}/latest_version + run: > + cmake + -B "${{github.workspace}}/latest_version/build" + -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" + -DCMAKE_C_COMPILER=cl + -DCMAKE_CXX_COMPILER=cl + -DUMF_BUILD_SHARED_LIBRARY=ON + -DUMF_BUILD_TESTS=OFF + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build latest UMF + run: cmake --build "${{github.workspace}}/latest_version/build" --config Debug -j $Env:NUMBER_OF_PROCESSORS + + - name: Run "tag" UMF tests with latest UMF libs (warnings enabled) + working-directory: ${{github.workspace}}/tag_version/build + run: | + $env:UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" + cp ${{github.workspace}}/latest_version/build/bin/Debug/umf.dll ${{github.workspace}}/tag_version/build/bin/Debug/umf.dll + ctest -C Debug --output-on-failure --test-dir test From 406be0c4c6daee4c33a118c2e11641c5b2c5cd3e Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Thu, 13 Feb 2025 15:58:17 +0000 Subject: [PATCH 172/466] update pull request template --- .github/pull_request_template.md | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 89e814856..35a7f05b6 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -36,3 +36,4 @@ Before checking all the boxes please mark the PR as draft. - [ ] All newly added source files have a license - [ ] All newly added source files are referenced in CMake files - [ ] Logger (with debug/info/... messages) is used +- [ ] All API changes are reflected in docs and def/map files, and are tested From 1d3ca7f989edea9f1b03c7379d23ed45f35be25c Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 19 Feb 2025 11:15:05 +0100 Subject: [PATCH 173/466] Return DEPENDENCY_UNAVAILABLE error when loading TBB symbols failed Return the UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE error when loading the TBB symbols failed. Signed-off-by: Lukasz Dorau --- src/pool/pool_scalable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pool/pool_scalable.c b/src/pool/pool_scalable.c index e1ab3d376..4abf3b63f 100644 --- a/src/pool/pool_scalable.c +++ b/src/pool/pool_scalable.c @@ -287,7 +287,7 @@ static umf_result_t tbb_pool_initialize(umf_memory_provider_handle_t provider, int ret = init_tbb_callbacks(); if (ret != 0) { LOG_FATAL("loading TBB symbols failed"); - res = UMF_RESULT_ERROR_UNKNOWN; + res = UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; goto err_tbb_init; } From 39659ace1f42eeebaba97ebd60168f5aada1d1cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Wed, 19 Feb 2025 15:09:53 +0100 Subject: [PATCH 174/466] enable maltreating for disjointpool benchmark --- benchmark/benchmark.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 3969b6068..d38e07722 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -89,7 +89,8 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) - ->Apply(&default_multiple_alloc_uniform_size); + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, From 21628ae6edb4fa4c1f4708d68d375a140ed72528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Wed, 19 Feb 2025 17:05:33 +0100 Subject: [PATCH 175/466] change disjointpool min bucket size in benchmark --- benchmark/benchmark_umf.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/benchmark_umf.hpp b/benchmark/benchmark_umf.hpp index 86cba4877..5c3b160c7 100644 --- a/benchmark/benchmark_umf.hpp +++ b/benchmark/benchmark_umf.hpp @@ -199,7 +199,7 @@ struct disjoint_pool : public pool_interface { return {nullptr, [](void *) {}}; } - ret = umfDisjointPoolParamsSetMinBucketSize(raw_params, 4096); + ret = umfDisjointPoolParamsSetMinBucketSize(raw_params, 8); if (ret != UMF_RESULT_SUCCESS) { state.SkipWithError("Failed to set min bucket size"); return {nullptr, [](void *) {}}; From 31f6b845849ffa9158d22e92b380ed8f89d63812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 19 Feb 2025 11:12:03 +0100 Subject: [PATCH 176/466] [CI] Update benchmark's scripts repo from UR to SYCL --- .github/workflows/reusable_benchmarks.yml | 27 +++++++++++------------ 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index b41c99f3a..a0dc7a1aa 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -1,6 +1,5 @@ -# Executes benchmarks implemented in this repository -# using scripts for benchmark results visualization, -# which are downloaded from Unified Runtime repository. +# Executes benchmarks implemented in this repository using scripts +# for results visualization from intel/llvm (unified-runtime dir). name: Benchmarks on: @@ -98,23 +97,23 @@ jobs: - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) - # We are going to clone Unified Runtime repository in order to run - # the most up-to-date UR scripts for benchmark data visualization - - name: Checkout UR + # Get scripts for benchmark data visualization. + # Use specific tag, as the scripts or files' location may change. + - name: Checkout SYCL uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: - repository: oneapi-src/unified-runtime - path: ur-repo + repository: intel/llvm + ref: nightly-2025-02-19 + path: sycl-repo fetch-depth: 1 - fetch-tags: false - - name: Install pip packages for benchmarking scripts from UR + - name: Install benchmarking scripts deps run: | - pip install --force-reinstall -r ${{github.workspace}}/ur-repo/third_party/benchmark_requirements.txt + pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/unified-runtime/third_party/benchmark_requirements.txt - name: Set core range and GPU mask run: | - # Compute the core range for the second NUMA node; first node is for UR jobs. + # Compute the core range for the second NUMA node; first node is for SYCL/UR jobs. # Skip the first 4 cores - the kernel is likely to schedule more work on these. CORES=$(lscpu | awk ' /NUMA node1 CPU|On-line CPU/ {line=$0} @@ -130,11 +129,11 @@ jobs: ZE_AFFINITY_MASK=1 echo "ZE_AFFINITY_MASK=$ZE_AFFINITY_MASK" >> $GITHUB_ENV - - name: Run UMF benchmarks (using scripts from UR) + - name: Run UMF benchmarks id: benchmarks working-directory: ${{env.BUILD_DIR}} run: > - taskset -c ${{ env.CORES }} ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py + taskset -c ${{ env.CORES }} ${{ github.workspace }}/sycl-repo/unified-runtime/scripts/benchmarks/main.py ~/bench_workdir_umf --umf ${{env.BUILD_DIR}} ${{ inputs.upload_report && '--output-html' || '' }} From 4e7f117a23959f9105524f6de9cb9d19e03f6df7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 19 Feb 2025 11:14:22 +0100 Subject: [PATCH 177/466] [CI] Adjust benchmark scripts params to new scripts' version --- .github/workflows/reusable_benchmarks.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index a0dc7a1aa..a7c9e5e28 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -136,11 +136,14 @@ jobs: taskset -c ${{ env.CORES }} ${{ github.workspace }}/sycl-repo/unified-runtime/scripts/benchmarks/main.py ~/bench_workdir_umf --umf ${{env.BUILD_DIR}} + --compare baseline ${{ inputs.upload_report && '--output-html' || '' }} + ${{ inputs.pr_no != 0 && '--output-markdown' || '' }} ${{ inputs.bench_script_params }} + # In case it failed to add a comment, we can still print the results. - name: Print benchmark results - if: ${{ always() }} + if: ${{ always() && inputs.pr_no != 0 }} run: cat ${{env.BUILD_DIR}}/benchmark_results.md - name: Add comment to PR From 1380620f243ed00bffc22a2a8636a42edb7d1239 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Wed, 19 Feb 2025 15:25:26 +0100 Subject: [PATCH 178/466] Fix data race in the umfIpcOpenedCacheDestroy function --- src/ipc_cache.c | 3 ++ test/ipcFixtures.hpp | 69 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/src/ipc_cache.c b/src/ipc_cache.c index ccb296d5b..cab5fc478 100644 --- a/src/ipc_cache.c +++ b/src/ipc_cache.c @@ -144,6 +144,8 @@ umfIpcOpenedCacheCreate(ipc_opened_cache_eviction_cb_t eviction_cb) { void umfIpcOpenedCacheDestroy(ipc_opened_cache_handle_t cache) { ipc_opened_cache_entry_t *entry, *tmp; + + utils_mutex_lock(&(cache->global->cache_lock)); HASH_ITER(hh, cache->hash_table, entry, tmp) { DL_DELETE(cache->global->lru_list, entry); HASH_DEL(cache->hash_table, entry); @@ -153,6 +155,7 @@ void umfIpcOpenedCacheDestroy(ipc_opened_cache_handle_t cache) { umf_ba_free(cache->global->cache_allocator, entry); } HASH_CLEAR(hh, cache->hash_table); + utils_mutex_unlock(&(cache->global->cache_lock)); umf_ba_global_free(cache); } diff --git a/test/ipcFixtures.hpp b/test/ipcFixtures.hpp index cfe58a166..23f15a63f 100644 --- a/test/ipcFixtures.hpp +++ b/test/ipcFixtures.hpp @@ -593,4 +593,73 @@ TEST_P(umfIpcTest, ConcurrentOpenCloseHandles) { EXPECT_EQ(stat.openCount, stat.closeCount); } +TEST_P(umfIpcTest, ConcurrentDestroyIpcHandlers) { + constexpr size_t SIZE = 100; + constexpr size_t NUM_ALLOCS = 100; + constexpr size_t NUM_POOLS = 10; + void *ptrs[NUM_ALLOCS]; + void *openedPtrs[NUM_POOLS][NUM_ALLOCS]; + std::vector consumerPools; + umf::pool_unique_handle_t producerPool = makePool(); + ASSERT_NE(producerPool.get(), nullptr); + + for (size_t i = 0; i < NUM_POOLS; ++i) { + consumerPools.push_back(makePool()); + } + + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + void *ptr = umfPoolMalloc(producerPool.get(), SIZE); + ASSERT_NE(ptr, nullptr); + ptrs[i] = ptr; + } + + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + umf_ipc_handle_t ipcHandle = nullptr; + size_t handleSize = 0; + umf_result_t ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + for (size_t poolId = 0; poolId < NUM_POOLS; poolId++) { + void *ptr = nullptr; + umf_ipc_handler_handle_t ipcHandler = nullptr; + ret = + umfPoolGetIPCHandler(consumerPools[poolId].get(), &ipcHandler); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler, nullptr); + + ret = umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + openedPtrs[poolId][i] = ptr; + } + + ret = umfPutIPCHandle(ipcHandle); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + for (size_t poolId = 0; poolId < NUM_POOLS; poolId++) { + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + umf_result_t ret = umfCloseIPCHandle(openedPtrs[poolId][i]); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + } + + for (size_t i = 0; i < NUM_ALLOCS; ++i) { + umf_result_t ret = umfFree(ptrs[i]); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + // Destroy pools in parallel to cause IPC cache cleanup in parallel. + umf_test::syncthreads_barrier syncthreads(NUM_POOLS); + auto poolDestroyFn = [&consumerPools, &syncthreads](size_t tid) { + syncthreads(); + consumerPools[tid].reset(nullptr); + }; + umf_test::parallel_exec(NUM_POOLS, poolDestroyFn); + + producerPool.reset(nullptr); + + EXPECT_EQ(stat.putCount, stat.getCount); + EXPECT_EQ(stat.openCount, stat.closeCount); +} + #endif /* UMF_TEST_IPC_FIXTURES_HPP */ From c925acbc0d7166aeb067aef53d2577e224da8fe3 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Thu, 20 Feb 2025 01:12:49 +0100 Subject: [PATCH 179/466] Suppress Valgrind errors in jemalloc and tbbmalloc --- ...drd-umf_test-provider_devdax_memory_ipc.supp | 17 +++++++++++++++++ .../drd-umf_test-provider_file_memory_ipc.supp | 17 +++++++++++++++++ test/supp/drd-umf_test-provider_os_memory.supp | 17 +++++++++++++++++ ...ind-umf_test-provider_devdax_memory_ipc.supp | 17 +++++++++++++++++ ...grind-umf_test-provider_file_memory_ipc.supp | 17 +++++++++++++++++ .../helgrind-umf_test-provider_os_memory.supp | 17 +++++++++++++++++ 6 files changed, 102 insertions(+) diff --git a/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp b/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp index cd44bb49a..025834658 100644 --- a/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp +++ b/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp @@ -6,3 +6,20 @@ fun:umfOpenIPCHandle ... } + +{ + False-positive ConflictingAccess in jemalloc + drd:ConflictingAccess + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + drd:ConflictingAccess + ... + fun:tbb_pool_finalize + ... +} diff --git a/test/supp/drd-umf_test-provider_file_memory_ipc.supp b/test/supp/drd-umf_test-provider_file_memory_ipc.supp index 7fce24116..a15d860aa 100644 --- a/test/supp/drd-umf_test-provider_file_memory_ipc.supp +++ b/test/supp/drd-umf_test-provider_file_memory_ipc.supp @@ -14,3 +14,20 @@ fun:umfOpenIPCHandle ... } + +{ + False-positive ConflictingAccess in jemalloc + drd:ConflictingAccess + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + drd:ConflictingAccess + ... + fun:tbb_pool_finalize + ... +} diff --git a/test/supp/drd-umf_test-provider_os_memory.supp b/test/supp/drd-umf_test-provider_os_memory.supp index cd44bb49a..025834658 100644 --- a/test/supp/drd-umf_test-provider_os_memory.supp +++ b/test/supp/drd-umf_test-provider_os_memory.supp @@ -6,3 +6,20 @@ fun:umfOpenIPCHandle ... } + +{ + False-positive ConflictingAccess in jemalloc + drd:ConflictingAccess + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + drd:ConflictingAccess + ... + fun:tbb_pool_finalize + ... +} diff --git a/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp b/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp index 4fcd2786c..d6401e8ee 100644 --- a/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp +++ b/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp @@ -6,3 +6,20 @@ fun:umfOpenIPCHandle ... } + +{ + False-positive ConflictingAccess in jemalloc + Helgrind:Race + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + Helgrind:Race + ... + fun:tbb_pool_finalize + ... +} diff --git a/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp b/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp index 4194f4847..cdc0bd8df 100644 --- a/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp +++ b/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp @@ -23,3 +23,20 @@ fun:critnib_find ... } + +{ + False-positive ConflictingAccess in jemalloc + Helgrind:Race + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + Helgrind:Race + ... + fun:tbb_pool_finalize + ... +} diff --git a/test/supp/helgrind-umf_test-provider_os_memory.supp b/test/supp/helgrind-umf_test-provider_os_memory.supp index 4fcd2786c..d6401e8ee 100644 --- a/test/supp/helgrind-umf_test-provider_os_memory.supp +++ b/test/supp/helgrind-umf_test-provider_os_memory.supp @@ -6,3 +6,20 @@ fun:umfOpenIPCHandle ... } + +{ + False-positive ConflictingAccess in jemalloc + Helgrind:Race + fun:atomic_* + ... + fun:je_* + ... +} + +{ + False-positive ConflictingAccess in tbbmalloc + Helgrind:Race + ... + fun:tbb_pool_finalize + ... +} From b860ee1bae939dfffc379e039fe9f3e4c6b2f616 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Thu, 20 Feb 2025 17:29:16 +0100 Subject: [PATCH 180/466] Remove check_if_tracker_is_empty from trackingFinalize --- src/provider/provider_tracking.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index 73a03fb2d..62145d5d7 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -473,10 +473,6 @@ static void trackingFinalize(void *provider) { critnib_delete(p->ipcCache); -#ifndef NDEBUG - check_if_tracker_is_empty(p->hTracker, p->pool); -#endif /* NDEBUG */ - umf_ba_global_free(provider); } From a406ddeaf275a437ad75136ed84fccd791587f91 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 20 Feb 2025 11:35:12 +0100 Subject: [PATCH 181/466] Fix node_list_rm_first() and node_list_rm_with_alignment() Fix checking alignment in the following functions of the coarse library: - node_list_rm_first() and - node_list_rm_with_alignment() In order to check the alignment we have to verify an address of the block (block->data), not a size (block->size) of course ... Signed-off-by: Lukasz Dorau --- src/coarse/coarse.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coarse/coarse.c b/src/coarse/coarse.c index 0ce4ded3d..956e54857 100644 --- a/src/coarse/coarse.c +++ b/src/coarse/coarse.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -278,7 +278,7 @@ static block_t *node_list_rm_first(ravl_free_blocks_head_t *head_node, assert(node->prev == NULL); struct block_t *block = node->block; - if (IS_NOT_ALIGNED(block->size, alignment)) { + if (IS_NOT_ALIGNED(((uintptr_t)block->data), alignment)) { return NULL; } @@ -303,7 +303,7 @@ static block_t *node_list_rm_with_alignment(ravl_free_blocks_head_t *head_node, ravl_free_blocks_elem_t *node; for (node = head_node->head; node != NULL; node = node->next) { - if (IS_ALIGNED(node->block->size, alignment)) { + if (IS_ALIGNED(((uintptr_t)node->block->data), alignment)) { return node_list_rm(head_node, node); } } From d6c5327802a600d2db8b432bacaf011428f0fed2 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 20 Feb 2025 15:54:59 +0100 Subject: [PATCH 182/466] Add a test for not aligned fixed memory buffer Add a test for not aligned fixed memory buffer: - coarseTest_basic_non_aligned_fixed_memory Signed-off-by: Lukasz Dorau --- test/coarse_lib.cpp | 47 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/test/coarse_lib.cpp b/test/coarse_lib.cpp index c5e30ee8f..a1aec224a 100644 --- a/test/coarse_lib.cpp +++ b/test/coarse_lib.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -1349,3 +1349,48 @@ TEST_P(CoarseWithMemoryStrategyTest, coarseTest_alignment_fixed_memory) { coarse_delete(ch); } + +TEST_P(CoarseWithMemoryStrategyTest, + coarseTest_basic_non_aligned_fixed_memory) { + // preallocate some memory and initialize the vector with zeros + const size_t buff_size = 20 * MB + coarse_params.page_size; + std::vector buffer(buff_size, 0); + + void *buf_aligned = (void *)ALIGN_UP_SAFE((uintptr_t)buffer.data(), + coarse_params.page_size); + ASSERT_NE(buf_aligned, nullptr); + + void *buf_non_aligned = (void *)((uintptr_t)buf_aligned + 64); + size_t buf_non_aligned_size = + buff_size - ((uintptr_t)buf_non_aligned - (uintptr_t)buffer.data()); + buf_non_aligned_size = + ALIGN_DOWN(buf_non_aligned_size, coarse_params.page_size); + + coarse_params.cb.alloc = NULL; + coarse_params.cb.free = NULL; + + umf_result = coarse_new(&coarse_params, &coarse_handle); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(coarse_handle, nullptr); + + coarse_t *ch = coarse_handle; + char *ptr = nullptr; + + umf_result = + coarse_add_memory_fixed(ch, buf_non_aligned, buf_non_aligned_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buf_non_aligned_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + umf_result = coarse_alloc(ch, buf_non_aligned_size, 0, (void **)&ptr); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY); + ASSERT_EQ(ptr, nullptr); + + ASSERT_EQ(coarse_get_stats(ch).used_size, 0 * MB); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buf_non_aligned_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + + coarse_delete(ch); +} From 0255017528549064b95570bf77e09bc9f00bbef4 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Thu, 20 Feb 2025 16:12:32 +0000 Subject: [PATCH 183/466] fix aligned chunk address calc in disjoint pool --- src/pool/pool_disjoint.c | 31 ++++++++++++++++++------------- src/pool/pool_disjoint_internal.h | 1 + 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 7aeee7165..c6f7ce6d3 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -94,9 +94,6 @@ static slab_t *create_slab(bucket_t *bucket) { goto free_slab_chunks; } - // TODO - // ASSERT_IS_ALIGNED((uintptr_t)slab->mem_ptr, bucket->size); - // raw allocation is not available for user so mark it as inaccessible utils_annotate_memory_inaccessible(slab->mem_ptr, slab->slab_size); @@ -175,10 +172,10 @@ static void slab_free_chunk(slab_t *slab, void *ptr) { // Make sure that we're in the right slab assert(ptr >= slab_get(slab) && ptr < slab_get_end(slab)); - // Even if the pointer p was previously aligned, it's still inside the - // corresponding chunk, so we get the correct index here. - size_t chunk_idx = - ((uintptr_t)ptr - (uintptr_t)slab->mem_ptr) / slab->bucket->size; + // Get the chunk index + uintptr_t ptr_diff = (uintptr_t)ptr - (uintptr_t)slab->mem_ptr; + assert((ptr_diff % slab->bucket->size) == 0); + size_t chunk_idx = ptr_diff / slab->bucket->size; // Make sure that the chunk was allocated assert(slab->chunks[chunk_idx] && "double free detected"); @@ -738,6 +735,10 @@ void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { } } + void *aligned_ptr = (void *)ALIGN_UP_SAFE((size_t)ptr, alignment); + VALGRIND_DO_MEMPOOL_ALLOC(disjoint_pool, aligned_ptr, size); + utils_annotate_memory_undefined(aligned_ptr, size); + utils_mutex_unlock(&bucket->bucket_lock); if (disjoint_pool->params.pool_trace > 2) { @@ -746,9 +747,6 @@ void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { (from_pool ? "pool" : "provider"), ptr); } - void *aligned_ptr = (void *)ALIGN_UP_SAFE((size_t)ptr, alignment); - VALGRIND_DO_MEMPOOL_ALLOC(disjoint_pool, aligned_ptr, size); - utils_annotate_memory_undefined(aligned_ptr, size); return aligned_ptr; } @@ -804,11 +802,18 @@ umf_result_t disjoint_pool_free(void *pool, void *ptr) { bucket_t *bucket = slab->bucket; - VALGRIND_DO_MEMPOOL_FREE(pool, ptr); utils_mutex_lock(&bucket->bucket_lock); + VALGRIND_DO_MEMPOOL_FREE(pool, ptr); + + // Get the unaligned pointer + // NOTE: the base pointer slab->mem_ptr needn't to be aligned to bucket size + size_t chunk_idx = + (((uintptr_t)ptr - (uintptr_t)slab->mem_ptr) / slab->bucket->size); + void *unaligned_ptr = + (void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size); - utils_annotate_memory_inaccessible(ptr, bucket->size); - bucket_free_chunk(bucket, ptr, slab, &to_pool); + utils_annotate_memory_inaccessible(unaligned_ptr, bucket->size); + bucket_free_chunk(bucket, unaligned_ptr, slab, &to_pool); if (disjoint_pool->params.pool_trace > 1) { bucket->free_count++; diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h index 3d656689c..c26938ecb 100644 --- a/src/pool/pool_disjoint_internal.h +++ b/src/pool/pool_disjoint_internal.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include From efaf4ac7bb6169417e284d1cd8ef191dc3b5103f Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Thu, 20 Feb 2025 16:40:20 +0000 Subject: [PATCH 184/466] cleanup includes in Disjoint Pool --- src/pool/pool_disjoint.c | 16 ++++++++++++++++ src/pool/pool_disjoint_internal.h | 16 ---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index c6f7ce6d3..7b03ea79e 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -5,7 +5,23 @@ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "base_alloc_global.h" #include "pool_disjoint_internal.h" +#include "provider/provider_tracking.h" +#include "uthash/utlist.h" +#include "utils_common.h" +#include "utils_log.h" +#include "utils_math.h" // Temporary solution for disabling memory poisoning. This is needed because // AddressSanitizer does not support memory poisoning for GPU allocations. diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h index c26938ecb..86460509b 100644 --- a/src/pool/pool_disjoint_internal.h +++ b/src/pool/pool_disjoint_internal.h @@ -8,28 +8,12 @@ #ifndef UMF_POOL_DISJOINT_INTERNAL_H #define UMF_POOL_DISJOINT_INTERNAL_H 1 -#include -#include -#include -#include #include -#include -#include -#include -#include -#include #include #include "critnib/critnib.h" -#include "uthash/utlist.h" - -#include "base_alloc_global.h" -#include "provider/provider_tracking.h" -#include "utils_common.h" #include "utils_concurrency.h" -#include "utils_log.h" -#include "utils_math.h" typedef struct bucket_t bucket_t; typedef struct slab_t slab_t; From 604b870ff7a3b62892c4255d56378fe3ad61e984 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Fri, 21 Feb 2025 14:58:18 +0000 Subject: [PATCH 185/466] temporary disable DP MT benchmark --- benchmark/benchmark.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index d38e07722..94d77dabd 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -89,8 +89,9 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) - ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&multithreaded); + ->Apply(&default_multiple_alloc_uniform_size); +// TODO: enable +//->Apply(&multithreaded); #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, From d7b9de076fa0c6479cf3a4409cc9837839699d52 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Fri, 21 Feb 2025 17:01:10 +0100 Subject: [PATCH 186/466] Add concurrent tests for IPC Get/Put functions --- test/ipcFixtures.hpp | 159 +++++++++++++++++++++++++++++++------------ 1 file changed, 115 insertions(+), 44 deletions(-) diff --git a/test/ipcFixtures.hpp b/test/ipcFixtures.hpp index 23f15a63f..57bd04079 100644 --- a/test/ipcFixtures.hpp +++ b/test/ipcFixtures.hpp @@ -15,8 +15,10 @@ #include #include +#include #include #include +#include #include class MemoryAccessor { @@ -158,6 +160,110 @@ struct umfIpcTest : umf_test::test, umf_memory_provider_ops_t *providerOps = nullptr; pfnProviderParamsCreate providerParamsCreate = nullptr; pfnProviderParamsDestroy providerParamsDestroy = nullptr; + + void concurrentGetConcurrentPutHandles(bool shuffle) { + std::vector ptrs; + constexpr size_t ALLOC_SIZE = 100; + constexpr size_t NUM_POINTERS = 100; + umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + + for (size_t i = 0; i < NUM_POINTERS; ++i) { + void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); + EXPECT_NE(ptr, nullptr); + ptrs.push_back(ptr); + } + + std::array, NTHREADS> ipcHandles; + + umf_test::syncthreads_barrier syncthreads(NTHREADS); + + auto getHandlesFn = [shuffle, &ipcHandles, &ptrs, + &syncthreads](size_t tid) { + // Each thread gets a copy of the pointers to shuffle them + std::vector localPtrs = ptrs; + if (shuffle) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(localPtrs.begin(), localPtrs.end(), g); + } + syncthreads(); + for (void *ptr : localPtrs) { + umf_ipc_handle_t ipcHandle; + size_t handleSize; + umf_result_t ret = + umfGetIPCHandle(ptr, &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ipcHandles[tid].push_back(ipcHandle); + } + }; + + umf_test::parallel_exec(NTHREADS, getHandlesFn); + + auto putHandlesFn = [&ipcHandles, &syncthreads](size_t tid) { + syncthreads(); + for (umf_ipc_handle_t ipcHandle : ipcHandles[tid]) { + umf_result_t ret = umfPutIPCHandle(ipcHandle); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + }; + + umf_test::parallel_exec(NTHREADS, putHandlesFn); + + for (void *ptr : ptrs) { + umf_result_t ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + pool.reset(nullptr); + EXPECT_EQ(stat.putCount, stat.getCount); + } + + void concurrentGetPutHandles(bool shuffle) { + std::vector ptrs; + constexpr size_t ALLOC_SIZE = 100; + constexpr size_t NUM_POINTERS = 100; + umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + + for (size_t i = 0; i < NUM_POINTERS; ++i) { + void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); + EXPECT_NE(ptr, nullptr); + ptrs.push_back(ptr); + } + + umf_test::syncthreads_barrier syncthreads(NTHREADS); + + auto getPutHandlesFn = [shuffle, &ptrs, &syncthreads](size_t) { + // Each thread gets a copy of the pointers to shuffle them + std::vector localPtrs = ptrs; + if (shuffle) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(localPtrs.begin(), localPtrs.end(), g); + } + syncthreads(); + for (void *ptr : localPtrs) { + umf_ipc_handle_t ipcHandle; + size_t handleSize; + umf_result_t ret = + umfGetIPCHandle(ptr, &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ret = umfPutIPCHandle(ipcHandle); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + }; + + umf_test::parallel_exec(NTHREADS, getPutHandlesFn); + + for (void *ptr : ptrs) { + umf_result_t ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + pool.reset(nullptr); + EXPECT_EQ(stat.putCount, stat.getCount); + } }; TEST_P(umfIpcTest, GetIPCHandleSize) { @@ -473,53 +579,18 @@ TEST_P(umfIpcTest, openInTwoIpcHandlers) { EXPECT_EQ(stat.closeCount, stat.openCount); } -TEST_P(umfIpcTest, ConcurrentGetPutHandles) { - std::vector ptrs; - constexpr size_t ALLOC_SIZE = 100; - constexpr size_t NUM_POINTERS = 100; - umf::pool_unique_handle_t pool = makePool(); - ASSERT_NE(pool.get(), nullptr); - - for (size_t i = 0; i < NUM_POINTERS; ++i) { - void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); - EXPECT_NE(ptr, nullptr); - ptrs.push_back(ptr); - } - - std::array, NTHREADS> ipcHandles; - - umf_test::syncthreads_barrier syncthreads(NTHREADS); - - auto getHandlesFn = [&ipcHandles, &ptrs, &syncthreads](size_t tid) { - syncthreads(); - for (void *ptr : ptrs) { - umf_ipc_handle_t ipcHandle; - size_t handleSize; - umf_result_t ret = umfGetIPCHandle(ptr, &ipcHandle, &handleSize); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ipcHandles[tid].push_back(ipcHandle); - } - }; - - umf_test::parallel_exec(NTHREADS, getHandlesFn); - - auto putHandlesFn = [&ipcHandles, &syncthreads](size_t tid) { - syncthreads(); - for (umf_ipc_handle_t ipcHandle : ipcHandles[tid]) { - umf_result_t ret = umfPutIPCHandle(ipcHandle); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - } - }; +TEST_P(umfIpcTest, ConcurrentGetConcurrentPutHandles) { + concurrentGetConcurrentPutHandles(false); +} - umf_test::parallel_exec(NTHREADS, putHandlesFn); +TEST_P(umfIpcTest, ConcurrentGetConcurrentPutHandlesShuffled) { + concurrentGetConcurrentPutHandles(true); +} - for (void *ptr : ptrs) { - umf_result_t ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - } +TEST_P(umfIpcTest, ConcurrentGetPutHandles) { concurrentGetPutHandles(false); } - pool.reset(nullptr); - EXPECT_EQ(stat.putCount, stat.getCount); +TEST_P(umfIpcTest, ConcurrentGetPutHandlesShuffled) { + concurrentGetPutHandles(true); } TEST_P(umfIpcTest, ConcurrentOpenCloseHandles) { From 8228ec94050c0ded4dc3fcb3ba5cb376d4143be4 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 24 Feb 2025 10:51:20 +0100 Subject: [PATCH 187/466] Disable building and installing the jemalloc's documentation Fixes: #1128 Signed-off-by: Lukasz Dorau --- CMakeLists.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 396a27c1e..71d630fa5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -186,11 +186,12 @@ else() # --disable-initial-exec-tls - Disable the initial-exec TLS model for # jemalloc's internal thread-local storage (on those platforms that # support explicit settings). This can allow jemalloc to be dynamically - # loaded after program startup (e.g. using dlopen). + # loaded after program startup (e.g. using dlopen). --disable-doc - + # Disable building and installing the documentation. COMMAND ./configure --prefix=${jemalloc_targ_BINARY_DIR} --with-jemalloc-prefix=je_ --disable-cxx --disable-initial-exec-tls - CFLAGS=-fPIC + --disable-doc CFLAGS=-fPIC WORKING_DIRECTORY ${jemalloc_targ_SOURCE_DIR} OUTPUT ${jemalloc_targ_SOURCE_DIR}/Makefile DEPENDS ${jemalloc_targ_SOURCE_DIR}/configure) From fd278365d46a1fdec5d5b01853eac7b5976e0071 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 20 Feb 2025 16:01:06 +0100 Subject: [PATCH 188/466] Bump L0 loader to v1.20.2 --- CMakeLists.txt | 2 +- examples/ipc_level_zero/CMakeLists.txt | 2 +- examples/level_zero_shared_memory/CMakeLists.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 396a27c1e..1d8a16ab5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -407,7 +407,7 @@ if(UMF_BUILD_LEVEL_ZERO_PROVIDER AND (NOT UMF_LEVEL_ZERO_INCLUDE_DIR)) include(FetchContent) set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") - set(LEVEL_ZERO_LOADER_TAG v1.19.2) + set(LEVEL_ZERO_LOADER_TAG v1.20.2) message( STATUS diff --git a/examples/ipc_level_zero/CMakeLists.txt b/examples/ipc_level_zero/CMakeLists.txt index d672d3e92..2aa391d65 100644 --- a/examples/ipc_level_zero/CMakeLists.txt +++ b/examples/ipc_level_zero/CMakeLists.txt @@ -24,7 +24,7 @@ endif() include(FetchContent) set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") -set(LEVEL_ZERO_LOADER_TAG v1.19.2) +set(LEVEL_ZERO_LOADER_TAG v1.20.2) message( STATUS diff --git a/examples/level_zero_shared_memory/CMakeLists.txt b/examples/level_zero_shared_memory/CMakeLists.txt index f4aaf09e9..b7c990145 100644 --- a/examples/level_zero_shared_memory/CMakeLists.txt +++ b/examples/level_zero_shared_memory/CMakeLists.txt @@ -24,7 +24,7 @@ endif() include(FetchContent) set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") -set(LEVEL_ZERO_LOADER_TAG v1.19.2) +set(LEVEL_ZERO_LOADER_TAG v1.20.2) message( STATUS From ab731c046d782ad0c1ed7872bca9f1ae64cc9e33 Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Mon, 24 Feb 2025 12:39:58 +0100 Subject: [PATCH 189/466] Fix Windows Unix Makefiles generator builds Using Unix Makefiles CMake generator on Windows results in a build error. Now fetched hwloc is explicitly built before the library is linked with other UMF targets. --- CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 71d630fa5..7eafb7c3b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -283,7 +283,8 @@ else() message(STATUS "hwloc CMAKE_GENERATOR: ${CMAKE_GENERATOR}") - if(CMAKE_GENERATOR STREQUAL "Ninja") + if(CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL + "Unix Makefiles") add_custom_command( COMMAND ${CMAKE_COMMAND} -DCMAKE_INSTALL_PREFIX=${hwloc_targ_BINARY_DIR} -B build From 249c36446cf56209841a0ff9eebaea06b5f44f46 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Mon, 24 Feb 2025 14:25:15 +0000 Subject: [PATCH 190/466] implement malloc_usable_size and enable pool tests for disjoint --- src/pool/pool_disjoint.c | 37 ++++++++++++++++---- test/common/pool.hpp | 46 ++++++++++++++++++++++++- test/memoryPoolAPI.cpp | 15 ++++---- test/poolFixtures.hpp | 49 ++++++++++++++++++-------- test/pools/disjoint_pool.cpp | 66 +++++++----------------------------- 5 files changed, 131 insertions(+), 82 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 7b03ea79e..9adb1a7a4 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -752,8 +752,10 @@ void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { } void *aligned_ptr = (void *)ALIGN_UP_SAFE((size_t)ptr, alignment); - VALGRIND_DO_MEMPOOL_ALLOC(disjoint_pool, aligned_ptr, size); - utils_annotate_memory_undefined(aligned_ptr, size); + size_t diff = (ptrdiff_t)aligned_ptr - (ptrdiff_t)ptr; + size_t real_size = bucket->size - diff; + VALGRIND_DO_MEMPOOL_ALLOC(disjoint_pool, aligned_ptr, real_size); + utils_annotate_memory_undefined(aligned_ptr, real_size); utils_mutex_unlock(&bucket->bucket_lock); @@ -767,11 +769,34 @@ void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { } size_t disjoint_pool_malloc_usable_size(void *pool, void *ptr) { - (void)pool; - (void)ptr; + disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; + if (ptr == NULL) { + return 0; + } - // Not supported - return 0; + // check if given pointer is allocated inside any Disjoint Pool slab + slab_t *slab = + (slab_t *)critnib_find_le(disjoint_pool->known_slabs, (uintptr_t)ptr); + if (slab == NULL || ptr >= slab_get_end(slab)) { + // memory comes directly from the provider + umf_alloc_info_t allocInfo = {NULL, 0, NULL}; + umf_result_t ret = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); + if (ret != UMF_RESULT_SUCCESS) { + return 0; + } + + return allocInfo.baseSize; + } + // Get the unaligned pointer + // NOTE: the base pointer slab->mem_ptr needn't to be aligned to bucket size + size_t chunk_idx = + (((uintptr_t)ptr - (uintptr_t)slab->mem_ptr) / slab->bucket->size); + void *unaligned_ptr = + (void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size); + + ptrdiff_t diff = (ptrdiff_t)ptr - (ptrdiff_t)unaligned_ptr; + + return slab->bucket->size - diff; } umf_result_t disjoint_pool_free(void *pool, void *ptr) { diff --git a/test/common/pool.hpp b/test/common/pool.hpp index 9a5739085..a5b4afc15 100644 --- a/test/common/pool.hpp +++ b/test/common/pool.hpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -19,6 +19,7 @@ #include #include +#include #include "base.hpp" #include "cpp_helpers.hpp" @@ -150,6 +151,49 @@ struct malloc_pool : public pool_base_t { umf_memory_pool_ops_t MALLOC_POOL_OPS = umf::poolMakeCOps(); +static constexpr size_t DEFAULT_DISJOINT_SLAB_MIN_SIZE = 4096; +static constexpr size_t DEFAULT_DISJOINT_MAX_POOLABLE_SIZE = 4096; +static constexpr size_t DEFAULT_DISJOINT_CAPACITY = 4; +static constexpr size_t DEFAULT_DISJOINT_MIN_BUCKET_SIZE = 64; + +inline void *defaultDisjointPoolConfig() { + umf_disjoint_pool_params_handle_t config = nullptr; + umf_result_t res = umfDisjointPoolParamsCreate(&config); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error("Failed to create pool params"); + } + res = umfDisjointPoolParamsSetSlabMinSize(config, + DEFAULT_DISJOINT_SLAB_MIN_SIZE); + if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); + throw std::runtime_error("Failed to set slab min size"); + } + res = umfDisjointPoolParamsSetMaxPoolableSize( + config, DEFAULT_DISJOINT_MAX_POOLABLE_SIZE); + if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); + throw std::runtime_error("Failed to set max poolable size"); + } + res = umfDisjointPoolParamsSetCapacity(config, DEFAULT_DISJOINT_CAPACITY); + if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); + throw std::runtime_error("Failed to set capacity"); + } + res = umfDisjointPoolParamsSetMinBucketSize( + config, DEFAULT_DISJOINT_MIN_BUCKET_SIZE); + if (res != UMF_RESULT_SUCCESS) { + umfDisjointPoolParamsDestroy(config); + throw std::runtime_error("Failed to set min bucket size"); + } + + return config; +} + +inline umf_result_t defaultDisjointPoolConfigDestroy(void *config) { + return umfDisjointPoolParamsDestroy( + static_cast(config)); +} + } // namespace umf_test #endif /* UMF_TEST_POOL_HPP */ diff --git a/test/memoryPoolAPI.cpp b/test/memoryPoolAPI.cpp index e2455fe85..a949b281f 100644 --- a/test/memoryPoolAPI.cpp +++ b/test/memoryPoolAPI.cpp @@ -12,6 +12,7 @@ #include "test_helpers.h" #include +#include #include #ifdef UMF_PROXY_LIB_ENABLED @@ -295,12 +296,14 @@ TEST_F(tagTest, SetAndGetInvalidPool) { INSTANTIATE_TEST_SUITE_P( mallocPoolTest, umfPoolTest, - ::testing::Values(poolCreateExtParams{&MALLOC_POOL_OPS, nullptr, nullptr, - &UMF_NULL_PROVIDER_OPS, nullptr, - nullptr}, - poolCreateExtParams{umfProxyPoolOps(), nullptr, nullptr, - &BA_GLOBAL_PROVIDER_OPS, nullptr, - nullptr})); + ::testing::Values( + poolCreateExtParams{&MALLOC_POOL_OPS, nullptr, nullptr, + &UMF_NULL_PROVIDER_OPS, nullptr, nullptr}, + poolCreateExtParams{umfProxyPoolOps(), nullptr, nullptr, + &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr}, + poolCreateExtParams{umfDisjointPoolOps(), defaultDisjointPoolConfig, + defaultDisjointPoolConfigDestroy, + &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); INSTANTIATE_TEST_SUITE_P(mallocMultiPoolTest, umfMultiPoolTest, ::testing::Values(poolCreateExtParams{ diff --git a/test/poolFixtures.hpp b/test/poolFixtures.hpp index d9a5410c0..6f18664f9 100644 --- a/test/poolFixtures.hpp +++ b/test/poolFixtures.hpp @@ -452,26 +452,45 @@ TEST_P(umfPoolTest, allocMaxSize) { } TEST_P(umfPoolTest, mallocUsableSize) { + [[maybe_unused]] auto pool_ops = std::get<0>(this->GetParam()); +#ifdef _WIN32 + if (pool_ops == &umf_test::MALLOC_POOL_OPS) { + GTEST_SKIP() + << "Windows Malloc Pool does not support umfPoolAlignedMalloc"; + } +#endif + if (!umf_test::isAlignedAllocSupported(pool.get())) { + GTEST_SKIP(); + } #ifdef __SANITIZE_ADDRESS__ - // Sanitizer replaces malloc_usable_size implementation with its own - GTEST_SKIP() - << "This test is invalid with AddressSanitizer instrumentation"; -#else + if (pool_ops == &umf_test::MALLOC_POOL_OPS) { + // Sanitizer replaces malloc_usable_size implementation with its own + GTEST_SKIP() + << "This test is invalid with AddressSanitizer instrumentation"; + } +#endif + for (size_t allocSize : + {32, 64, 1 << 6, 1 << 10, 1 << 13, 1 << 16, 1 << 19}) { + for (size_t alignment : {0, 1 << 6, 1 << 8, 1 << 12}) { + if (alignment >= allocSize) { + continue; + } + void *ptr = nullptr; + if (alignment == 0) { + ptr = umfPoolMalloc(pool.get(), allocSize); + } else { + ptr = umfPoolAlignedMalloc(pool.get(), allocSize, alignment); + } + ASSERT_NE(ptr, nullptr); + size_t result = umfPoolMallocUsableSize(pool.get(), ptr); + ASSERT_TRUE(result == 0 || result >= allocSize); - for (size_t allocSize : {32, 48, 1024, 8192}) { - char *ptr = static_cast(umfPoolMalloc(pool.get(), allocSize)); - ASSERT_NE(ptr, nullptr); - size_t result = umfPoolMallocUsableSize(pool.get(), ptr); - ASSERT_TRUE(result == 0 || result >= allocSize); + // Make sure we can write to this memory + memset(ptr, 123, result); - // Make sure we can write to this memory - for (size_t i = 0; i < result; i++) { - ptr[i] = 123; + umfPoolFree(pool.get(), ptr); } - - umfPoolFree(pool.get(), ptr); } -#endif } #endif /* UMF_TEST_POOL_FIXTURES_HPP */ diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index dad960187..02f769802 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -13,49 +13,6 @@ #include "provider_null.h" #include "provider_trace.h" -static constexpr size_t DEFAULT_DISJOINT_SLAB_MIN_SIZE = 4096; -static constexpr size_t DEFAULT_DISJOINT_MAX_POOLABLE_SIZE = 4096; -static constexpr size_t DEFAULT_DISJOINT_CAPACITY = 4; -static constexpr size_t DEFAULT_DISJOINT_MIN_BUCKET_SIZE = 64; - -void *defaultPoolConfig() { - umf_disjoint_pool_params_handle_t config = nullptr; - umf_result_t res = umfDisjointPoolParamsCreate(&config); - if (res != UMF_RESULT_SUCCESS) { - throw std::runtime_error("Failed to create pool params"); - } - res = umfDisjointPoolParamsSetSlabMinSize(config, - DEFAULT_DISJOINT_SLAB_MIN_SIZE); - if (res != UMF_RESULT_SUCCESS) { - umfDisjointPoolParamsDestroy(config); - throw std::runtime_error("Failed to set slab min size"); - } - res = umfDisjointPoolParamsSetMaxPoolableSize( - config, DEFAULT_DISJOINT_MAX_POOLABLE_SIZE); - if (res != UMF_RESULT_SUCCESS) { - umfDisjointPoolParamsDestroy(config); - throw std::runtime_error("Failed to set max poolable size"); - } - res = umfDisjointPoolParamsSetCapacity(config, DEFAULT_DISJOINT_CAPACITY); - if (res != UMF_RESULT_SUCCESS) { - umfDisjointPoolParamsDestroy(config); - throw std::runtime_error("Failed to set capacity"); - } - res = umfDisjointPoolParamsSetMinBucketSize( - config, DEFAULT_DISJOINT_MIN_BUCKET_SIZE); - if (res != UMF_RESULT_SUCCESS) { - umfDisjointPoolParamsDestroy(config); - throw std::runtime_error("Failed to set min bucket size"); - } - - return config; -} - -umf_result_t poolConfigDestroy(void *config) { - return umfDisjointPoolParamsDestroy( - static_cast(config)); -} - using umf_test::test; using namespace umf_test; @@ -92,7 +49,7 @@ TEST_F(test, internals) { provider_handle = providerUnique.get(); umf_disjoint_pool_params_handle_t params = - (umf_disjoint_pool_params_handle_t)defaultPoolConfig(); + (umf_disjoint_pool_params_handle_t)defaultDisjointPoolConfig(); // set to maximum tracing params->pool_trace = 3; params->max_poolable_size = 1024 * 1024; @@ -256,7 +213,7 @@ TEST_F(test, sharedLimits) { static constexpr size_t MaxSize = 4 * SlabMinSize; umf_disjoint_pool_params_handle_t params = - (umf_disjoint_pool_params_handle_t)defaultPoolConfig(); + (umf_disjoint_pool_params_handle_t)defaultDisjointPoolConfig(); umf_result_t ret = umfDisjointPoolParamsSetSlabMinSize(params, SlabMinSize); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); @@ -373,22 +330,23 @@ TEST_F(test, disjointPoolInvalidBucketSize) { INSTANTIATE_TEST_SUITE_P(disjointPoolTests, umfPoolTest, ::testing::Values(poolCreateExtParams{ - umfDisjointPoolOps(), defaultPoolConfig, - poolConfigDestroy, &BA_GLOBAL_PROVIDER_OPS, - nullptr, nullptr})); + umfDisjointPoolOps(), defaultDisjointPoolConfig, + defaultDisjointPoolConfigDestroy, + &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); void *memProviderParams() { return (void *)&DEFAULT_DISJOINT_CAPACITY; } INSTANTIATE_TEST_SUITE_P( disjointPoolTests, umfMemTest, ::testing::Values(std::make_tuple( - poolCreateExtParams{umfDisjointPoolOps(), defaultPoolConfig, - poolConfigDestroy, &MOCK_OUT_OF_MEM_PROVIDER_OPS, - memProviderParams, nullptr}, + poolCreateExtParams{umfDisjointPoolOps(), defaultDisjointPoolConfig, + defaultDisjointPoolConfigDestroy, + &MOCK_OUT_OF_MEM_PROVIDER_OPS, memProviderParams, + nullptr}, static_cast(DEFAULT_DISJOINT_CAPACITY) / 2))); INSTANTIATE_TEST_SUITE_P(disjointMultiPoolTests, umfMultiPoolTest, ::testing::Values(poolCreateExtParams{ - umfDisjointPoolOps(), defaultPoolConfig, - poolConfigDestroy, &BA_GLOBAL_PROVIDER_OPS, - nullptr, nullptr})); + umfDisjointPoolOps(), defaultDisjointPoolConfig, + defaultDisjointPoolConfigDestroy, + &BA_GLOBAL_PROVIDER_OPS, nullptr, nullptr})); From bf593a032c16173b8798735a0b56fd3c8c4c1bcd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Feb 2025 21:30:56 +0000 Subject: [PATCH 191/466] Bump breathe Bumps the pip-dependencies group with 1 update in the /third_party directory: [breathe](https://github.com/breathe-doc/breathe). Updates `breathe` from 4.35.0 to 4.36.0 - [Release notes](https://github.com/breathe-doc/breathe/releases) - [Changelog](https://github.com/breathe-doc/breathe/blob/main/CHANGELOG.rst) - [Commits](https://github.com/breathe-doc/breathe/compare/v4.35.0...v4.36.0) --- updated-dependencies: - dependency-name: breathe dependency-type: direct:production update-type: version-update:semver-minor dependency-group: pip-dependencies ... Signed-off-by: dependabot[bot] --- third_party/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/third_party/requirements.txt b/third_party/requirements.txt index 4b8244b3a..467ea1e03 100644 --- a/third_party/requirements.txt +++ b/third_party/requirements.txt @@ -12,7 +12,7 @@ sphinxcontrib_devhelp==2.0.0 sphinxcontrib_htmlhelp==2.1.0 sphinxcontrib_serializinghtml==2.0.0 sphinxcontrib_qthelp==2.0.0 -breathe==4.35.0 +breathe==4.36.0 sphinx==8.1.3 sphinx_book_theme==1.1.3 # Spelling check in documentation From c9134a4cbb32e3fbba1ebee2debdd04731514d11 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 26 Feb 2025 10:48:18 +0100 Subject: [PATCH 192/466] Use LOG_FATAL() in case of critical errors Ref: #1095 Signed-off-by: Lukasz Dorau --- src/provider/provider_tracking.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index 62145d5d7..f9a98e87f 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -291,26 +291,26 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, tracker_alloc_info_t *lowValue = (tracker_alloc_info_t *)critnib_get( provider->hTracker->alloc_segments_map, (uintptr_t)lowPtr); if (!lowValue) { - LOG_ERR("no left value"); + LOG_FATAL("no left value"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err; + goto err_assert; } tracker_alloc_info_t *highValue = (tracker_alloc_info_t *)critnib_get( provider->hTracker->alloc_segments_map, (uintptr_t)highPtr); if (!highValue) { - LOG_ERR("no right value"); + LOG_FATAL("no right value"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err; + goto err_assert; } if (lowValue->pool != highValue->pool) { - LOG_ERR("pool mismatch"); + LOG_FATAL("pool mismatch"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err; + goto err_assert; } if (lowValue->size + highValue->size != totalSize) { - LOG_ERR("lowValue->size + highValue->size != totalSize"); + LOG_FATAL("lowValue->size + highValue->size != totalSize"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; - goto err; + goto err_assert; } ret = umfMemoryProviderAllocationMerge(provider->hUpstream, lowPtr, highPtr, @@ -342,7 +342,7 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, return UMF_RESULT_SUCCESS; -err: +err_assert: assert(0); not_merged: From 184d0274eefa5f2d36b62e829d130ba448f586e8 Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Wed, 29 Jan 2025 12:06:24 +0100 Subject: [PATCH 193/466] [CTL] Add CTL functionality (by handle access) This commit introduces the control and introspection mechanism that can be accessed using pointer to supported pool or provider. --- include/umf/base.h | 37 +++++ include/umf/memory_pool_ops.h | 16 +++ include/umf/memory_provider_ops.h | 18 ++- src/ctl/ctl.c | 172 ++++++++++++++++++------ src/ctl/ctl.h | 63 +++++---- src/libumf.c | 7 + src/libumf.def | 3 + src/libumf.map | 3 + src/memory_pool.c | 30 +++++ src/memory_provider.c | 29 ++++ src/memory_provider_internal.h | 5 + src/pool/pool_scalable.c | 41 +++++- src/provider/provider_os_memory.c | 44 +++++- test/CMakeLists.txt | 10 +- test/ctl/config.txt | 4 +- test/ctl/ctl_api.cpp | 142 +++++++++++++++++++ test/ctl/ctl_debug.c | 63 +++++---- test/ctl/{test.cpp => ctl_unittest.cpp} | 0 18 files changed, 591 insertions(+), 96 deletions(-) create mode 100644 test/ctl/ctl_api.cpp rename test/ctl/{test.cpp => ctl_unittest.cpp} (100%) diff --git a/include/umf/base.h b/include/umf/base.h index 8dad184f2..cc6b0ccbd 100644 --- a/include/umf/base.h +++ b/include/umf/base.h @@ -50,6 +50,43 @@ typedef enum umf_result_t { UMF_RESULT_ERROR_UNKNOWN = 0x7ffffffe ///< Unknown or internal error } umf_result_t; +/// @brief Type of the CTL query +typedef enum umf_ctl_query_type { + CTL_QUERY_READ, + CTL_QUERY_WRITE, + CTL_QUERY_RUNNABLE, + CTL_QUERY_SUBTREE, + + MAX_CTL_QUERY_TYPE +} umf_ctl_query_type_t; + +/// +/// @brief Get value of a specified attribute at the given name. +/// @param name name of an attribute to be retrieved +/// @param ctx pointer to the pool or the provider +/// @param arg [out] pointer to the variable where the value will be stored +/// @return UMF_RESULT_SUCCESS on success or UMF_RESULT_ERROR_UNKNOWN on failure. +/// +umf_result_t umfCtlGet(const char *name, void *ctx, void *arg); + +/// +/// @brief Set value of a specified attribute at the given name. +/// @param name name of an attribute to be set +/// @param ctx pointer to the pool or the provider +/// @param arg [in] pointer to the value that will be set +/// @return UMF_RESULT_SUCCESS on success or UMF_RESULT_ERROR_UNKNOWN on failure. +/// +umf_result_t umfCtlSet(const char *name, void *ctx, void *arg); + +/// +/// @brief Execute callback related with the specified attribute. +/// @param name name of an attribute to be executed +/// @param ctx pointer to the pool or the provider +/// @param arg [in/out] pointer to the value, can be used as an input or output +/// @return UMF_RESULT_SUCCESS on success or UMF_RESULT_ERROR_UNKNOWN on failure. +/// +umf_result_t umfCtlExec(const char *name, void *ctx, void *arg); + #ifdef __cplusplus } #endif diff --git a/include/umf/memory_pool_ops.h b/include/umf/memory_pool_ops.h index 657f40aea..bf44383b4 100644 --- a/include/umf/memory_pool_ops.h +++ b/include/umf/memory_pool_ops.h @@ -125,6 +125,22 @@ typedef struct umf_memory_pool_ops_t { /// The value is undefined if the previous allocation was successful. /// umf_result_t (*get_last_allocation_error)(void *pool); + + /// + /// @brief Control operation for the memory pool. + /// The function is used to perform various control operations + /// on the memory pool. + /// + /// @param hPool handle to the memory pool. + /// @param operationType type of the operation to be performed. + /// @param name name associated with the operation. + /// @param arg argument for the operation. + /// @param queryType type of the query to be performed. + /// + /// @return umf_result_t result of the control operation. + /// + umf_result_t (*ctl)(void *hPool, int operationType, const char *name, + void *arg, umf_ctl_query_type_t queryType); } umf_memory_pool_ops_t; #ifdef __cplusplus diff --git a/include/umf/memory_provider_ops.h b/include/umf/memory_provider_ops.h index aaddd503b..638f2975b 100644 --- a/include/umf/memory_provider_ops.h +++ b/include/umf/memory_provider_ops.h @@ -82,7 +82,6 @@ typedef struct umf_memory_provider_ext_ops_t { /// umf_result_t (*allocation_split)(void *hProvider, void *ptr, size_t totalSize, size_t firstSize); - } umf_memory_provider_ext_ops_t; /// @@ -250,6 +249,23 @@ typedef struct umf_memory_provider_ops_t { /// @brief Optional IPC ops. The API allows sharing of memory objects across different processes. /// umf_memory_provider_ipc_ops_t ipc; + + /// + /// @brief Control operation for the memory provider. + /// The function is used to perform various control operations + /// on the memory provider. + /// + /// @param hProvider handle to the memory provider. + /// @param operationType type of the operation to be performed. + /// @param name name associated with the operation. + /// @param arg argument for the operation. + /// @param queryType type of the query to be performed. + /// + /// @return umf_result_t result of the control operation. + /// + umf_result_t (*ctl)(void *hProvider, int operationType, const char *name, + void *arg, umf_ctl_query_type_t queryType); + } umf_memory_provider_ops_t; #ifdef __cplusplus diff --git a/src/ctl/ctl.c b/src/ctl/ctl.c index 4db11ac21..99ab2d96e 100644 --- a/src/ctl/ctl.c +++ b/src/ctl/ctl.c @@ -24,6 +24,8 @@ #include #include +#include + #include "base_alloc/base_alloc_global.h" #include "utils/utils_common.h" #include "utlist.h" @@ -43,8 +45,9 @@ #define CTL_QUERY_NODE_SEPARATOR "." #define CTL_VALUE_ARG_SEPARATOR "," +/* GLOBAL TREE */ static int ctl_global_first_free = 0; -static struct ctl_node CTL_NODE(global)[CTL_MAX_ENTRIES]; +static umf_ctl_node_t CTL_NODE(global)[CTL_MAX_ENTRIES]; /* * This is the top level node of the ctl tree structure. Each node can contain @@ -57,7 +60,7 @@ static struct ctl_node CTL_NODE(global)[CTL_MAX_ENTRIES]; * convenience. */ struct ctl { - struct ctl_node root[CTL_MAX_ENTRIES]; + umf_ctl_node_t root[CTL_MAX_ENTRIES]; int first_free; }; @@ -78,17 +81,52 @@ char *Strdup(const char *s) { return p; } +umf_result_t umfCtlGet(const char *name, void *ctx, void *arg) { + if (name == NULL || arg == NULL || ctx == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + return ctl_query(NULL, ctx, CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_READ, + arg) + ? UMF_RESULT_ERROR_UNKNOWN + : UMF_RESULT_SUCCESS; +} + +umf_result_t umfCtlSet(const char *name, void *ctx, void *arg) { + if (name == NULL || arg == NULL || ctx == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + return ctl_query(NULL, ctx, CTL_QUERY_PROGRAMMATIC, name, CTL_QUERY_WRITE, + arg) + ? UMF_RESULT_ERROR_UNKNOWN + : UMF_RESULT_SUCCESS; +} + +umf_result_t umfCtlExec(const char *name, void *ctx, void *arg) { + if (name == NULL || arg == NULL || ctx == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + return ctl_query(NULL, ctx, CTL_QUERY_PROGRAMMATIC, name, + CTL_QUERY_RUNNABLE, arg) + ? UMF_RESULT_ERROR_UNKNOWN + : UMF_RESULT_SUCCESS; +} + /* * ctl_find_node -- (internal) searches for a matching entry point in the * provided nodes * + * Name offset is used to return the offset of the name in the query string. * The caller is responsible for freeing all of the allocated indexes, * regardless of the return value. */ -static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, - const char *name, - struct ctl_index_utlist *indexes) { - const struct ctl_node *n = NULL; +static const umf_ctl_node_t *ctl_find_node(const umf_ctl_node_t *nodes, + const char *name, + umf_ctl_index_utlist_t *indexes, + size_t *name_offset) { + assert(nodes != NULL); + assert(name != NULL); + assert(name_offset != NULL); + const umf_ctl_node_t *n = NULL; char *sptr = NULL; char *parse_str = Strdup(name); if (parse_str == NULL) { @@ -102,6 +140,11 @@ static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, * in the main ctl tree. */ while (node_name != NULL) { + *name_offset = node_name - parse_str; + if (n != NULL && n->type == CTL_NODE_SUBTREE) { + // if a subtree occurs, the subtree handler should be called + break; + } char *endptr; /* * Ignore errno from strtol: FreeBSD returns EINVAL if no @@ -111,7 +154,7 @@ static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, int tmp_errno = errno; long index_value = strtol(node_name, &endptr, 0); errno = tmp_errno; - struct ctl_index_utlist *index_entry = NULL; + umf_ctl_index_utlist_t *index_entry = NULL; if (endptr != node_name) { /* a valid index */ index_entry = umf_ba_global_alloc(sizeof(*index_entry)); if (index_entry == NULL) { @@ -128,6 +171,7 @@ static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, break; } } + if (n->name == NULL) { goto error; } @@ -152,11 +196,11 @@ static const struct ctl_node *ctl_find_node(const struct ctl_node *nodes, * ctl_delete_indexes -- * (internal) removes and frees all entries on the index list */ -static void ctl_delete_indexes(struct ctl_index_utlist *indexes) { +static void ctl_delete_indexes(umf_ctl_index_utlist_t *indexes) { if (!indexes) { return; } - struct ctl_index_utlist *elem, *tmp; + umf_ctl_index_utlist_t *elem, *tmp; LL_FOREACH_SAFE(indexes, elem, tmp) { LL_DELETE(indexes, elem); if (elem) { @@ -201,8 +245,8 @@ static void *ctl_parse_args(const struct ctl_argument *arg_proto, char *arg) { * ctl_query_get_real_args -- (internal) returns a pointer with actual argument * structure as required by the node callback */ -static void *ctl_query_get_real_args(const struct ctl_node *n, void *write_arg, - enum ctl_query_source source) { +static void *ctl_query_get_real_args(const umf_ctl_node_t *n, void *write_arg, + umf_ctl_query_source_t source) { void *real_arg = NULL; switch (source) { case CTL_QUERY_CONFIG_INPUT: @@ -222,9 +266,8 @@ static void *ctl_query_get_real_args(const struct ctl_node *n, void *write_arg, * ctl_query_cleanup_real_args -- (internal) cleanups relevant argument * structures allocated as a result of the get_real_args call */ -static void ctl_query_cleanup_real_args(const struct ctl_node *n, - void *real_arg, - enum ctl_query_source source) { +static void ctl_query_cleanup_real_args(const umf_ctl_node_t *n, void *real_arg, + umf_ctl_query_source_t source) { /* suppress unused-parameter errors */ (void)n; @@ -242,23 +285,38 @@ static void ctl_query_cleanup_real_args(const struct ctl_node *n, /* * ctl_exec_query_read -- (internal) calls the read callback of a node */ -static int ctl_exec_query_read(void *ctx, const struct ctl_node *n, - enum ctl_query_source source, void *arg, - struct ctl_index_utlist *indexes) { +static int ctl_exec_query_read(void *ctx, const umf_ctl_node_t *n, + umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + (void)extra_name, (void)query_type; + assert(n != NULL); + assert(n->cb[CTL_QUERY_READ] != NULL); + assert(MAX_CTL_QUERY_TYPE != query_type); + if (arg == NULL) { errno = EINVAL; return -1; } - return n->cb[CTL_QUERY_READ](ctx, source, arg, indexes); + return n->cb[CTL_QUERY_READ](ctx, source, arg, indexes, NULL, + MAX_CTL_QUERY_TYPE); } /* * ctl_exec_query_write -- (internal) calls the write callback of a node */ -static int ctl_exec_query_write(void *ctx, const struct ctl_node *n, - enum ctl_query_source source, void *arg, - struct ctl_index_utlist *indexes) { +static int ctl_exec_query_write(void *ctx, const umf_ctl_node_t *n, + umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + (void)extra_name, (void)query_type; + assert(n != NULL); + assert(n->cb[CTL_QUERY_WRITE] != NULL); + assert(MAX_CTL_QUERY_TYPE != query_type); + if (arg == NULL) { errno = EINVAL; return -1; @@ -269,7 +327,8 @@ static int ctl_exec_query_write(void *ctx, const struct ctl_node *n, return -1; } - int ret = n->cb[CTL_QUERY_WRITE](ctx, source, real_arg, indexes); + int ret = n->cb[CTL_QUERY_WRITE](ctx, source, real_arg, indexes, NULL, + MAX_CTL_QUERY_TYPE); ctl_query_cleanup_real_args(n, real_arg, source); return ret; @@ -278,26 +337,50 @@ static int ctl_exec_query_write(void *ctx, const struct ctl_node *n, /* * ctl_exec_query_runnable -- (internal) calls the run callback of a node */ -static int ctl_exec_query_runnable(void *ctx, const struct ctl_node *n, - enum ctl_query_source source, void *arg, - struct ctl_index_utlist *indexes) { - return n->cb[CTL_QUERY_RUNNABLE](ctx, source, arg, indexes); +static int ctl_exec_query_runnable(void *ctx, const umf_ctl_node_t *n, + umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + (void)extra_name, (void)query_type; + assert(n != NULL); + assert(n->cb[CTL_QUERY_RUNNABLE] != NULL); + assert(MAX_CTL_QUERY_TYPE != query_type); + return n->cb[CTL_QUERY_RUNNABLE](ctx, source, arg, indexes, NULL, + MAX_CTL_QUERY_TYPE); } -static int (*ctl_exec_query[MAX_CTL_QUERY_TYPE])( - void *ctx, const struct ctl_node *n, enum ctl_query_source source, - void *arg, struct ctl_index_utlist *indexes) = { +static int ctl_exec_query_subtree(void *ctx, const umf_ctl_node_t *n, + umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + assert(n != NULL); + assert(n->cb[CTL_QUERY_SUBTREE] != NULL); + assert(MAX_CTL_QUERY_TYPE != query_type); + return n->cb[CTL_QUERY_SUBTREE](ctx, source, arg, indexes, extra_name, + query_type); +} + +typedef int (*umf_ctl_exec_query_t)(void *ctx, const umf_ctl_node_t *n, + umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type); + +static umf_ctl_exec_query_t ctl_exec_query[MAX_CTL_QUERY_TYPE] = { ctl_exec_query_read, ctl_exec_query_write, ctl_exec_query_runnable, + ctl_exec_query_subtree, }; /* * ctl_query -- (internal) parses the name and calls the appropriate methods * from the ctl tree */ -int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, - const char *name, enum ctl_query_type type, void *arg) { +int ctl_query(struct ctl *ctl, void *ctx, umf_ctl_query_source_t source, + const char *name, umf_ctl_query_type_t type, void *arg) { if (name == NULL) { errno = EINVAL; return -1; @@ -308,29 +391,36 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, * easily retrieve the index values. The list is cleared once the ctl * query has been handled. */ - struct ctl_index_utlist *indexes = NULL; + umf_ctl_index_utlist_t *indexes = NULL; indexes = Zalloc(sizeof(*indexes)); if (!indexes) { return -1; } int ret = -1; + size_t name_offset = 0; - const struct ctl_node *n = ctl_find_node(CTL_NODE(global), name, indexes); + const umf_ctl_node_t *n = + ctl_find_node(CTL_NODE(global), name, indexes, &name_offset); if (n == NULL && ctl) { ctl_delete_indexes(indexes); indexes = NULL; - n = ctl_find_node(ctl->root, name, indexes); + n = ctl_find_node(ctl->root, name, indexes, &name_offset); } - if (n == NULL || n->type != CTL_NODE_LEAF || n->cb[type] == NULL) { + // if the appropriate node (leaf or subtree) is not found, then return error + if (n == NULL || + (n->type != CTL_NODE_LEAF && n->type != CTL_NODE_SUBTREE) || + n->cb[n->type == CTL_NODE_SUBTREE ? CTL_QUERY_SUBTREE : type] == NULL) { errno = EINVAL; goto out; } - ret = ctl_exec_query[type](ctx, n, source, arg, indexes); - + const char *extra_name = &name[0] + name_offset; + ret = + ctl_exec_query[n->type == CTL_NODE_SUBTREE ? CTL_QUERY_SUBTREE : type]( + ctx, n, source, arg, indexes, extra_name, type); out: ctl_delete_indexes(indexes); @@ -341,10 +431,10 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, * ctl_register_module_node -- adds a new node to the CTL tree root. */ void ctl_register_module_node(struct ctl *c, const char *name, - struct ctl_node *n) { - struct ctl_node *nnode = c == NULL - ? &CTL_NODE(global)[ctl_global_first_free++] - : &c->root[c->first_free++]; + umf_ctl_node_t *n) { + umf_ctl_node_t *nnode = c == NULL + ? &CTL_NODE(global)[ctl_global_first_free++] + : &c->root[c->first_free++]; nnode->children = n; nnode->type = CTL_NODE_NAMED; diff --git a/src/ctl/ctl.h b/src/ctl/ctl.h index 9327b01af..968998fc2 100644 --- a/src/ctl/ctl.h +++ b/src/ctl/ctl.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2016-2024 Intel Corporation + * Copyright (C) 2016-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -21,19 +21,21 @@ #include #include +#include + #ifdef __cplusplus extern "C" { #endif struct ctl; -struct ctl_index_utlist { +typedef struct ctl_index_utlist { const char *name; long value; struct ctl_index_utlist *next; -}; +} umf_ctl_index_utlist_t; -enum ctl_query_source { +typedef enum ctl_query_source { CTL_UNKNOWN_QUERY_SOURCE, /* query executed directly from the program */ CTL_QUERY_PROGRAMMATIC, @@ -41,24 +43,19 @@ enum ctl_query_source { CTL_QUERY_CONFIG_INPUT, MAX_CTL_QUERY_SOURCE -}; - -enum ctl_query_type { - CTL_QUERY_READ, - CTL_QUERY_WRITE, - CTL_QUERY_RUNNABLE, +} umf_ctl_query_source_t; - MAX_CTL_QUERY_TYPE -}; - -typedef int (*node_callback)(void *ctx, enum ctl_query_source type, void *arg, - struct ctl_index_utlist *indexes); +typedef int (*node_callback)(void *ctx, umf_ctl_query_source_t type, void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type); enum ctl_node_type { CTL_NODE_UNKNOWN, CTL_NODE_NAMED, CTL_NODE_LEAF, CTL_NODE_INDEXED, + CTL_NODE_SUBTREE, MAX_CTL_NODE }; @@ -91,7 +88,7 @@ struct ctl_argument { * CTL Tree node structure, do not use directly. All the necessary functionality * is provided by the included macros. */ -struct ctl_node { +typedef struct ctl_node { const char *name; enum ctl_node_type type; @@ -99,11 +96,13 @@ struct ctl_node { const struct ctl_argument *arg; const struct ctl_node *children; -}; +} umf_ctl_node_t; struct ctl *ctl_new(void); void ctl_delete(struct ctl *stats); +void initialize_global_ctl(void); + int ctl_load_config_from_string(struct ctl *ctl, void *ctx, const char *cfg_string); int ctl_load_config_from_file(struct ctl *ctl, void *ctx, const char *cfg_file); @@ -138,8 +137,8 @@ int ctl_arg_string(const void *arg, void *dest, size_t dest_size); #define CTL_NODE(name, ...) ctl_node_##__VA_ARGS__##_##name -int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, - const char *name, enum ctl_query_type type, void *arg); +int ctl_query(struct ctl *ctl, void *ctx, umf_ctl_query_source_t source, + const char *name, umf_ctl_query_type_t type, void *arg); /* Declaration of a new child node */ #define CTL_CHILD(name, ...) \ @@ -161,6 +160,8 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_RUNNABLE_HANDLER(name, ...) ctl_##__VA_ARGS__##_##name##_runnable +#define CTL_SUBTREE_HANDLER(name, ...) ctl_##__VA_ARGS__##_##name##_subtree + #define CTL_ARG(name) ctl_arg_##name /* @@ -170,7 +171,8 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_LEAF_RO(name, ...) \ { \ CTL_STR(name), CTL_NODE_LEAF, \ - {CTL_READ_HANDLER(name, __VA_ARGS__), NULL, NULL}, NULL, NULL \ + {CTL_READ_HANDLER(name, __VA_ARGS__), NULL, NULL, NULL}, NULL, \ + NULL \ } /* @@ -180,7 +182,7 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_LEAF_WO(name, ...) \ { \ CTL_STR(name), CTL_NODE_LEAF, \ - {NULL, CTL_WRITE_HANDLER(name, __VA_ARGS__), NULL}, \ + {NULL, CTL_WRITE_HANDLER(name, __VA_ARGS__), NULL, NULL}, \ &CTL_ARG(name), NULL \ } @@ -191,7 +193,22 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_LEAF_RUNNABLE(name, ...) \ { \ CTL_STR(name), CTL_NODE_LEAF, \ - {NULL, NULL, CTL_RUNNABLE_HANDLER(name, __VA_ARGS__)}, NULL, NULL \ + {NULL, NULL, CTL_RUNNABLE_HANDLER(name, __VA_ARGS__), NULL}, NULL, \ + NULL \ + } + +#define CTL_LEAF_SUBTREE(name, ...) \ + { \ + CTL_STR(name), CTL_NODE_SUBTREE, \ + {NULL, NULL, NULL, CTL_SUBTREE_HANDLER(name, __VA_ARGS__)}, NULL, \ + NULL \ + } + +#define CTL_LEAF_SUBTREE2(name, fun, ...) \ + { \ + CTL_STR(name), CTL_NODE_SUBTREE, \ + {NULL, NULL, NULL, CTL_SUBTREE_HANDLER(fun, __VA_ARGS__)}, NULL, \ + NULL \ } /* @@ -201,7 +218,7 @@ int ctl_query(struct ctl *ctl, void *ctx, enum ctl_query_source source, #define CTL_LEAF_RW(name) \ { \ CTL_STR(name), CTL_NODE_LEAF, \ - {CTL_READ_HANDLER(name), CTL_WRITE_HANDLER(name), NULL}, \ + {CTL_READ_HANDLER(name), CTL_WRITE_HANDLER(name), NULL, NULL}, \ &CTL_ARG(name), NULL \ } diff --git a/src/libumf.c b/src/libumf.c index f8f6cc61f..aad0140bb 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -11,6 +11,7 @@ #include "base_alloc_global.h" #include "ipc_cache.h" +#include "memory_provider_internal.h" #include "memspace_internal.h" #include "pool/pool_scalable_internal.h" #include "provider_cuda_internal.h" @@ -26,6 +27,11 @@ umf_memory_tracker_handle_t TRACKER = NULL; static unsigned long long umfRefCount = 0; +static umf_ctl_node_t CTL_NODE(umf)[] = {CTL_CHILD(provider), CTL_CHILD(pool), + CTL_NODE_END}; + +void initialize_global_ctl(void) { CTL_REGISTER_MODULE(NULL, umf); } + int umfInit(void) { if (utils_fetch_and_add64(&umfRefCount, 1) == 0) { utils_log_init(); @@ -44,6 +50,7 @@ int umfInit(void) { } LOG_DEBUG("UMF IPC cache initialized"); + initialize_global_ctl(); } if (TRACKER) { diff --git a/src/libumf.def b/src/libumf.def index ce8820a8f..dd0ddfbfc 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -119,6 +119,9 @@ EXPORTS umfScalablePoolParamsSetKeepAllMemory ; Added in UMF_0.11 umfCUDAMemoryProviderParamsSetAllocFlags + umfCtlExec + umfCtlGet + umfCtlSet umfDisjointPoolOps umfDisjointPoolParamsCreate umfDisjointPoolParamsDestroy diff --git a/src/libumf.map b/src/libumf.map index 6582fd0f8..5e97acc09 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -117,6 +117,9 @@ UMF_0.10 { UMF_0.11 { umfCUDAMemoryProviderParamsSetAllocFlags; + umfCtlExec; + umfCtlGet; + umfCtlSet; umfDisjointPoolOps; umfDisjointPoolParamsCreate; umfDisjointPoolParamsDestroy; diff --git a/src/memory_pool.c b/src/memory_pool.c index ef2c0fa66..1b61555de 100644 --- a/src/memory_pool.c +++ b/src/memory_pool.c @@ -22,6 +22,32 @@ #include "memory_provider_internal.h" #include "provider_tracking.h" +static int CTL_SUBTREE_HANDLER(by_handle_pool)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t queryType) { + (void)indexes, (void)source; + umf_memory_pool_handle_t hPool = (umf_memory_pool_handle_t)ctx; + hPool->ops.ctl(hPool, /*unused*/ 0, extra_name, arg, queryType); + return 0; +} + +umf_ctl_node_t CTL_NODE(pool)[] = {CTL_LEAF_SUBTREE2(by_handle, by_handle_pool), + CTL_NODE_END}; + +static umf_result_t umfDefaultCtlPoolHandle(void *hPool, int operationType, + const char *name, void *arg, + umf_ctl_query_type_t queryType) { + (void)hPool; + (void)operationType; + (void)name; + (void)arg; + (void)queryType; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, umf_memory_provider_handle_t provider, void *params, @@ -58,6 +84,10 @@ static umf_result_t umfPoolCreateInternal(const umf_memory_pool_ops_t *ops, pool->ops = *ops; pool->tag = NULL; + if (NULL == pool->ops.ctl) { + pool->ops.ctl = umfDefaultCtlPoolHandle; + } + if (NULL == utils_mutex_init(&pool->lock)) { LOG_ERR("Failed to initialize mutex for pool"); ret = UMF_RESULT_ERROR_UNKNOWN; diff --git a/src/memory_provider.c b/src/memory_provider.c index ce6a10a20..fdc8725e0 100644 --- a/src/memory_provider.c +++ b/src/memory_provider.c @@ -18,8 +18,23 @@ #include "base_alloc_global.h" #include "libumf.h" #include "memory_provider_internal.h" +#include "umf/base.h" #include "utils_assert.h" +static int CTL_SUBTREE_HANDLER(by_handle_provider)( + void *ctx, umf_ctl_query_source_t source, void *arg, + umf_ctl_index_utlist_t *indexes, const char *extra_name, + umf_ctl_query_type_t queryType) { + (void)indexes, (void)source; + umf_memory_provider_handle_t hProvider = (umf_memory_provider_handle_t)ctx; + hProvider->ops.ctl(hProvider->provider_priv, /*unused*/ 0, extra_name, arg, + queryType); + return 0; +} + +umf_ctl_node_t CTL_NODE(provider)[] = { + CTL_LEAF_SUBTREE2(by_handle, by_handle_provider), CTL_NODE_END}; + static umf_result_t umfDefaultPurgeLazy(void *provider, void *ptr, size_t size) { (void)provider; @@ -93,6 +108,17 @@ static umf_result_t umfDefaultCloseIPCHandle(void *provider, void *ptr, return UMF_RESULT_ERROR_NOT_SUPPORTED; } +static umf_result_t umfDefaultCtlHandle(void *provider, int operationType, + const char *name, void *arg, + umf_ctl_query_type_t queryType) { + (void)provider; + (void)operationType; + (void)name; + (void)arg; + (void)queryType; + return UMF_RESULT_ERROR_NOT_SUPPORTED; +} + void assignOpsExtDefaults(umf_memory_provider_ops_t *ops) { if (!ops->ext.purge_lazy) { ops->ext.purge_lazy = umfDefaultPurgeLazy; @@ -124,6 +150,9 @@ void assignOpsIpcDefaults(umf_memory_provider_ops_t *ops) { if (!ops->ipc.close_ipc_handle) { ops->ipc.close_ipc_handle = umfDefaultCloseIPCHandle; } + if (!ops->ctl) { + ops->ctl = umfDefaultCtlHandle; + } } static bool validateOpsMandatory(const umf_memory_provider_ops_t *ops) { diff --git a/src/memory_provider_internal.h b/src/memory_provider_internal.h index dd1111a23..5abc88d3b 100644 --- a/src/memory_provider_internal.h +++ b/src/memory_provider_internal.h @@ -14,6 +14,8 @@ #include +#include "ctl/ctl.h" + #ifdef __cplusplus extern "C" { #endif @@ -26,6 +28,9 @@ typedef struct umf_memory_provider_t { void *umfMemoryProviderGetPriv(umf_memory_provider_handle_t hProvider); umf_memory_provider_handle_t *umfGetLastFailedMemoryProviderPtr(void); +extern umf_ctl_node_t CTL_NODE(provider)[]; +extern umf_ctl_node_t CTL_NODE(pool)[]; + #ifdef __cplusplus } #endif diff --git a/src/pool/pool_scalable.c b/src/pool/pool_scalable.c index 8a9fd88c1..f68887529 100644 --- a/src/pool/pool_scalable.c +++ b/src/pool/pool_scalable.c @@ -13,6 +13,8 @@ #include #include +#include +#include #include #include #include @@ -114,6 +116,10 @@ static const char *tbb_symbol[TBB_POOL_SYMBOLS_MAX] = { #endif }; +struct ctl *pool_scallable_ctl_root; + +static UTIL_ONCE_FLAG ctl_initialized = UTIL_ONCE_FLAG_INIT; + static void init_tbb_callbacks_once(void) { const char *lib_name = tbb_symbol[TBB_LIB_NAME]; tbb_callbacks.lib_handle = utils_open_library(lib_name, 0); @@ -405,6 +411,38 @@ static umf_result_t tbb_get_last_allocation_error(void *pool) { return TLS_last_allocation_error; } +static int CTL_READ_HANDLER(tracking_enabled)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; + + int *arg_out = arg; + umf_memory_pool_handle_t pool = (umf_memory_pool_handle_t)ctx; + *arg_out = pool->flags & UMF_POOL_CREATE_FLAG_DISABLE_TRACKING ? 0 : 1; + return 0; +} + +static const umf_ctl_node_t CTL_NODE(params)[] = {CTL_LEAF_RO(tracking_enabled), + CTL_NODE_END}; + +static void initialize_pool_ctl(void) { + pool_scallable_ctl_root = ctl_new(); + CTL_REGISTER_MODULE(pool_scallable_ctl_root, params); +} + +static umf_result_t pool_ctl(void *hPool, int operationType, const char *name, + void *arg, umf_ctl_query_type_t query_type) { + (void)operationType; // unused + umf_memory_pool_handle_t pool_provider = (umf_memory_pool_handle_t)hPool; + utils_init_once(&ctl_initialized, initialize_pool_ctl); + return ctl_query(pool_scallable_ctl_root, pool_provider, + CTL_QUERY_PROGRAMMATIC, name, query_type, arg); +} + static umf_memory_pool_ops_t UMF_SCALABLE_POOL_OPS = { .version = UMF_POOL_OPS_VERSION_CURRENT, .initialize = tbb_pool_initialize, @@ -415,7 +453,8 @@ static umf_memory_pool_ops_t UMF_SCALABLE_POOL_OPS = { .aligned_malloc = tbb_aligned_malloc, .malloc_usable_size = tbb_malloc_usable_size, .free = tbb_free, - .get_last_allocation_error = tbb_get_last_allocation_error}; + .get_last_allocation_error = tbb_get_last_allocation_error, + .ctl = pool_ctl}; umf_memory_pool_ops_t *umfScalablePoolOps(void) { return &UMF_SCALABLE_POOL_OPS; diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index bd5ea9c69..9a487a5af 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -13,10 +13,12 @@ #include #include +#include #include +#include +#include #include #include - // OS Memory Provider requires HWLOC #if defined(UMF_NO_HWLOC) @@ -166,6 +168,33 @@ static const char *Native_error_str[] = { "HWLOC topology discovery failed", }; +struct ctl *os_memory_ctl_root; + +static UTIL_ONCE_FLAG ctl_initialized = UTIL_ONCE_FLAG_INIT; + +static int CTL_READ_HANDLER(ipc_enabled)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; + + int *arg_out = arg; + os_memory_provider_t *os_provider = (os_memory_provider_t *)ctx; + *arg_out = os_provider->IPC_enabled; + return 0; +} + +static const umf_ctl_node_t CTL_NODE(params)[] = {CTL_LEAF_RO(ipc_enabled), + CTL_NODE_END}; + +static void initialize_os_ctl(void) { + os_memory_ctl_root = ctl_new(); + CTL_REGISTER_MODULE(os_memory_ctl_root, params); +} + static void os_store_last_native_error(int32_t native_error, int errno_value) { TLS_last_native_error.native_error = native_error; TLS_last_native_error.errno_value = errno_value; @@ -1401,6 +1430,15 @@ static umf_result_t os_close_ipc_handle(void *provider, void *ptr, return UMF_RESULT_SUCCESS; } +static umf_result_t os_ctl(void *hProvider, int operationType, const char *name, + void *arg, umf_ctl_query_type_t query_type) { + (void)operationType; // unused + os_memory_provider_t *os_provider = (os_memory_provider_t *)hProvider; + utils_init_once(&ctl_initialized, initialize_os_ctl); + return ctl_query(os_memory_ctl_root, os_provider, CTL_QUERY_PROGRAMMATIC, + name, query_type, arg); +} + static umf_memory_provider_ops_t UMF_OS_MEMORY_PROVIDER_OPS = { .version = UMF_PROVIDER_OPS_VERSION_CURRENT, .initialize = os_initialize, @@ -1419,7 +1457,9 @@ static umf_memory_provider_ops_t UMF_OS_MEMORY_PROVIDER_OPS = { .ipc.get_ipc_handle = os_get_ipc_handle, .ipc.put_ipc_handle = os_put_ipc_handle, .ipc.open_ipc_handle = os_open_ipc_handle, - .ipc.close_ipc_handle = os_close_ipc_handle}; + .ipc.close_ipc_handle = os_close_ipc_handle, + .ctl = os_ctl, +}; umf_memory_provider_ops_t *umfOsMemoryProviderOps(void) { return &UMF_OS_MEMORY_PROVIDER_OPS; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ecdde95e1..32bdd4c14 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -208,8 +208,14 @@ add_umf_test( LIBS ${UMF_LOGGER_LIBS}) add_umf_test( - NAME ctl - SRCS ctl/test.cpp ctl/ctl_debug.c ../src/ctl/ctl.c ${BA_SOURCES_FOR_TEST} + NAME ctl_unittest + SRCS ctl/ctl_unittest.cpp ctl/ctl_debug.c ../src/ctl/ctl.c + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) + +add_umf_test( + NAME ctl_api + SRCS ctl/ctl_api.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) add_umf_test( diff --git a/test/ctl/config.txt b/test/ctl/config.txt index 5d4f9c62b..52c8febad 100644 --- a/test/ctl/config.txt +++ b/test/ctl/config.txt @@ -1 +1,3 @@ -debug.heap.alloc_pattern=321 \ No newline at end of file +debug.heap.alloc_pattern=321; +debug.heap.enable_logging=1; +debug.heap.log_level=5; diff --git a/test/ctl/ctl_api.cpp b/test/ctl/ctl_api.cpp new file mode 100644 index 000000000..ff6491c16 --- /dev/null +++ b/test/ctl/ctl_api.cpp @@ -0,0 +1,142 @@ +/* + * + * Copyright (C) 2025 Intel Corporation + * + * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + */ + +#include + +#include +#include +#include +#include +#include + +#include "../common/base.hpp" +#include "gtest/gtest.h" + +using namespace umf_test; + +TEST_F(test, ctl_by_handle_os_provider) { + umf_memory_provider_handle_t hProvider = NULL; + umf_os_memory_provider_params_handle_t os_memory_provider_params = NULL; + umf_memory_provider_ops_t *os_provider_ops = umfOsMemoryProviderOps(); + if (os_provider_ops == NULL) { + GTEST_SKIP() << "OS memory provider is not supported!"; + } + + int ret = umfOsMemoryProviderParamsCreate(&os_memory_provider_params); + ret = umfMemoryProviderCreate(os_provider_ops, os_memory_provider_params, + &hProvider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + + int ipc_enabled = 0xBAD; + ret = umfCtlGet("umf.provider.by_handle.params.ipc_enabled", hProvider, + &ipc_enabled); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_EQ(ipc_enabled, 0); + + umfOsMemoryProviderParamsDestroy(os_memory_provider_params); + umfMemoryProviderDestroy(hProvider); +} + +// Create a memory provider and a memory pool +umf_memory_provider_handle_t create_memory_provider() { + umf_memory_provider_ops_t *provider_ops = umfOsMemoryProviderOps(); + umf_os_memory_provider_params_handle_t params = NULL; + umf_memory_provider_handle_t provider; + + int ret = umfOsMemoryProviderParamsCreate(¶ms); + if (ret != UMF_RESULT_SUCCESS) { + return 0; + } + + ret = umfMemoryProviderCreate(provider_ops, params, &provider); + umfOsMemoryProviderParamsDestroy(params); + if (ret != UMF_RESULT_SUCCESS) { + return 0; + } + + return provider; +} + +class CtlTest : public ::testing::Test { + public: + class CtlException : public std::exception { + public: + CtlException(const char *msg) : msg(msg) {} + const char *what() const noexcept override { return msg; } + + private: + const char *msg; + }; + + void SetUp() override { + provider = NULL; + pool = NULL; + } + + void instantiatePool(umf_memory_pool_ops_t *pool_ops, void *pool_params, + umf_pool_create_flags_t flags = 0) { + freeResources(); + provider = create_memory_provider(); + if (provider == NULL) { + throw CtlException("Failed to create a memory provider!"); + } + int ret = umfPoolCreate(pool_ops, provider, pool_params, flags, &pool); + if (ret != UMF_RESULT_SUCCESS) { + throw CtlException("Failed to create a memory provider!"); + } + } + + template + void validateQuery( + std::function + ctlApiFunction, + const char *name, T expectedValue, umf_result_t expected) { + T value = 0xBAD; + umf_result_t ret = ctlApiFunction(name, pool, &value); + ASSERT_EQ(ret, expected); + if (ret == UMF_RESULT_SUCCESS) { + ASSERT_EQ(value, expectedValue); + } + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + } + + void TearDown() override { freeResources(); } + + private: + void freeResources() { + if (pool) { + umfPoolDestroy(pool); + } + if (provider) { + umfMemoryProviderDestroy(provider); + } + } + + umf_memory_provider_handle_t provider; + umf_memory_pool_handle_t pool; +}; + +TEST_F(CtlTest, ctl_by_handle_scalablePool) { + try { + instantiatePool(umfScalablePoolOps(), NULL); + validateQuery(umfCtlGet, + "umf.pool.by_handle.params.tracking_enabled", 1, + UMF_RESULT_SUCCESS); + + instantiatePool(umfScalablePoolOps(), NULL, + UMF_POOL_CREATE_FLAG_DISABLE_TRACKING); + validateQuery(umfCtlGet, + "umf.pool.by_handle.params.tracking_enabled", 0, + UMF_RESULT_SUCCESS); + } catch (CtlTest::CtlException &e) { + GTEST_SKIP() << e.what(); + } catch (...) { + GTEST_FAIL() << "Unknown exception!"; + } +} diff --git a/test/ctl/ctl_debug.c b/test/ctl/ctl_debug.c index 711cb5e17..5bc2920ea 100644 --- a/test/ctl/ctl_debug.c +++ b/test/ctl/ctl_debug.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -25,11 +25,13 @@ struct ctl *get_debug_ctl(void) { return ctl_debug; } * CTL_WRITE_HANDLER(alloc_pattern) -- sets the alloc_pattern field in heap */ static int CTL_WRITE_HANDLER(alloc_pattern)(void *ctx, - enum ctl_query_source source, + umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int arg_in = *(int *)arg; alloc_pattern = arg_in; @@ -40,11 +42,13 @@ static int CTL_WRITE_HANDLER(alloc_pattern)(void *ctx, * CTL_READ_HANDLER(alloc_pattern) -- returns alloc_pattern heap field */ static int CTL_READ_HANDLER(alloc_pattern)(void *ctx, - enum ctl_query_source source, + umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int *arg_out = arg; *arg_out = alloc_pattern; @@ -52,11 +56,13 @@ static int CTL_READ_HANDLER(alloc_pattern)(void *ctx, } static int CTL_WRITE_HANDLER(enable_logging)(void *ctx, - enum ctl_query_source source, + umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int arg_in = *(int *)arg; enable_logging = arg_in; @@ -64,33 +70,40 @@ static int CTL_WRITE_HANDLER(enable_logging)(void *ctx, } static int CTL_READ_HANDLER(enable_logging)(void *ctx, - enum ctl_query_source source, + umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int *arg_out = arg; *arg_out = enable_logging; return 0; } -static int CTL_WRITE_HANDLER(log_level)(void *ctx, enum ctl_query_source source, +static int CTL_WRITE_HANDLER(log_level)(void *ctx, + umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int arg_in = *(int *)arg; log_level = arg_in; return 0; } -static int CTL_READ_HANDLER(log_level)(void *ctx, enum ctl_query_source source, +static int CTL_READ_HANDLER(log_level)(void *ctx, umf_ctl_query_source_t source, void *arg, - struct ctl_index_utlist *indexes) { + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { /* suppress unused-parameter errors */ - (void)source, (void)indexes, (void)ctx; + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; int *arg_out = arg; *arg_out = log_level; @@ -103,15 +116,15 @@ static const struct ctl_argument CTL_ARG(enable_logging) = CTL_ARG_BOOLEAN; static const struct ctl_argument CTL_ARG(log_level) = CTL_ARG_INT; -static const struct ctl_node CTL_NODE(heap)[] = {CTL_LEAF_RW(alloc_pattern), - CTL_LEAF_RW(enable_logging), - CTL_LEAF_RW(log_level), +static const umf_ctl_node_t CTL_NODE(heap)[] = {CTL_LEAF_RW(alloc_pattern), + CTL_LEAF_RW(enable_logging), + CTL_LEAF_RW(log_level), - CTL_NODE_END}; + CTL_NODE_END}; -static const struct ctl_node CTL_NODE(debug)[] = {CTL_CHILD(heap), +static const umf_ctl_node_t CTL_NODE(debug)[] = {CTL_CHILD(heap), - CTL_NODE_END}; + CTL_NODE_END}; /* * debug_ctl_register -- registers ctl nodes for "debug" module diff --git a/test/ctl/test.cpp b/test/ctl/ctl_unittest.cpp similarity index 100% rename from test/ctl/test.cpp rename to test/ctl/ctl_unittest.cpp From 94d11508432a2b93da671f4ee80d013d199680ae Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Tue, 25 Feb 2025 14:06:06 +0100 Subject: [PATCH 194/466] Move *priv members to the end of internal structures This commit moves provider_priv and pool_priv members in internal structures due to compatibility issues. From now, adding new ops will not break accessing these members. --- src/memory_pool_internal.h | 4 +++- src/memory_provider_internal.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/memory_pool_internal.h b/src/memory_pool_internal.h index ab3378163..4e3c31696 100644 --- a/src/memory_pool_internal.h +++ b/src/memory_pool_internal.h @@ -26,7 +26,6 @@ extern "C" { typedef struct umf_memory_pool_t { void *pool_priv; - umf_memory_pool_ops_t ops; umf_pool_create_flags_t flags; // Memory provider used by the pool. @@ -34,6 +33,9 @@ typedef struct umf_memory_pool_t { utils_mutex_t lock; void *tag; + + // ops should be the last due to possible change size in the future + umf_memory_pool_ops_t ops; } umf_memory_pool_t; #ifdef __cplusplus diff --git a/src/memory_provider_internal.h b/src/memory_provider_internal.h index 5abc88d3b..4b4ec8b2d 100644 --- a/src/memory_provider_internal.h +++ b/src/memory_provider_internal.h @@ -21,8 +21,9 @@ extern "C" { #endif typedef struct umf_memory_provider_t { - umf_memory_provider_ops_t ops; void *provider_priv; + // ops should be the last due to possible change size in the future + umf_memory_provider_ops_t ops; } umf_memory_provider_t; void *umfMemoryProviderGetPriv(umf_memory_provider_handle_t hProvider); From d319917f71f6040032ba7b7a945cc9fe638dc3fc Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Tue, 25 Feb 2025 14:11:36 +0100 Subject: [PATCH 195/466] [CI] Disable mempolicy test in compatibility tests --- .github/workflows/reusable_compatibility.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml index fbd17a2f4..c7f84d6e3 100644 --- a/.github/workflows/reusable_compatibility.yml +++ b/.github/workflows/reusable_compatibility.yml @@ -110,7 +110,7 @@ jobs: run: > UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ - ctest --output-on-failure + ctest --output-on-failure -E "umf-mempolicy" # disable tests that rely on internal structures windows-build: name: Windows From 258c6f2f0f40d9dd3bf86e7e4e64a743a87de950 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Wed, 26 Feb 2025 15:22:53 +0100 Subject: [PATCH 196/466] explicitly declare single threaded benchmark This causes that information about threads count is appended to benchmark name which makes it consistent with other benchmarks. --- benchmark/benchmark.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 94d77dabd..60636a559 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -35,6 +35,10 @@ static void multithreaded(benchmark::internal::Benchmark *benchmark) { benchmark->Threads(1); } +static void singlethreaded(benchmark::internal::Benchmark *benchmark) { + benchmark->Threads(1); +} + static void default_multiple_alloc_fix_size(benchmark::internal::Benchmark *benchmark) { benchmark->Args({10000, 1, 4096}); @@ -68,7 +72,8 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, proxy_pool, UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, proxy_pool) ->Apply(&default_multiple_alloc_fix_size) // reduce iterations, as this benchmark is slower than others - ->Iterations(50000); + ->Iterations(50000) + ->Apply(&singlethreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, os_provider, fixed_alloc_size, @@ -76,7 +81,8 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, os_provider, UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, os_provider) ->Apply(&default_multiple_alloc_fix_size) // reduce iterations, as this benchmark is slower than others - ->Iterations(50000); + ->Iterations(50000) + ->Apply(&singlethreaded); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, fixed_alloc_size, @@ -89,8 +95,9 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) - ->Apply(&default_multiple_alloc_uniform_size); -// TODO: enable + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&singlethreaded); +// TODO: change to multithreaded //->Apply(&multithreaded); #ifdef UMF_POOL_JEMALLOC_ENABLED From 2d19a6157a0e4f92a18d88ff0a456938976c0052 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 26 Feb 2025 15:56:48 +0000 Subject: [PATCH 197/466] remove deprecated cmake flag from compat workflow --- .github/workflows/reusable_compatibility.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml index fbd17a2f4..29597ac18 100644 --- a/.github/workflows/reusable_compatibility.yml +++ b/.github/workflows/reusable_compatibility.yml @@ -97,7 +97,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON - name: Build latest UMF @@ -197,7 +196,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON -DUMF_TESTS_FAIL_ON_SKIP=ON - name: Build latest UMF From c2e758e15e5546ff12de2fac6bb96e6e0055b8fc Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Sat, 1 Mar 2025 11:35:53 +0100 Subject: [PATCH 198/466] coarse: error out on double free instead of assert Error out in case of double free() instead of assert in coarse library. Signed-off-by: Lukasz Dorau --- src/coarse/coarse.c | 7 +++++-- test/coarse_lib.cpp | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/coarse/coarse.c b/src/coarse/coarse.c index 956e54857..19798466e 100644 --- a/src/coarse/coarse.c +++ b/src/coarse/coarse.c @@ -1170,10 +1170,13 @@ umf_result_t coarse_free(coarse_t *coarse, void *ptr, size_t bytes) { } block_t *block = get_node_block(node); - assert(block->used); + if (!block->used) { + LOG_ERR("double free"); + utils_mutex_unlock(&coarse->lock); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } if (bytes > 0 && bytes != block->size) { - // wrong size of allocation LOG_ERR("wrong size of allocation"); utils_mutex_unlock(&coarse->lock); return UMF_RESULT_ERROR_INVALID_ARGUMENT; diff --git a/test/coarse_lib.cpp b/test/coarse_lib.cpp index a1aec224a..c2e1f9c85 100644 --- a/test/coarse_lib.cpp +++ b/test/coarse_lib.cpp @@ -160,6 +160,13 @@ TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_provider) { ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + // test double free + umf_result = coarse_free(ch, ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, alloc_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + coarse_delete(ch); umfMemoryProviderDestroy(malloc_memory_provider); } @@ -202,6 +209,13 @@ TEST_P(CoarseWithMemoryStrategyTest, coarseTest_basic_fixed_memory) { ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + // test double free + umf_result = coarse_free(ch, ptr, 2 * MB); + ASSERT_EQ(umf_result, UMF_RESULT_ERROR_INVALID_ARGUMENT); + ASSERT_EQ(coarse_get_stats(ch).used_size, 0); + ASSERT_EQ(coarse_get_stats(ch).alloc_size, buff_size); + ASSERT_EQ(coarse_get_stats(ch).num_all_blocks, 1); + coarse_delete(ch); } From d97349f61fe3cb44d65e023e3ee7ce90833efbc2 Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Tue, 4 Mar 2025 15:22:27 +0100 Subject: [PATCH 199/466] [CI] Disable legacy GHA Image - Ubuntu 20.04 --- .github/workflows/reusable_basic.yml | 11 +---------- .github/workflows/reusable_fast.yml | 15 ++++----------- 2 files changed, 5 insertions(+), 21 deletions(-) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 7170ec418..5a6756f2c 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -17,7 +17,7 @@ jobs: name: Ubuntu strategy: matrix: - os: ['ubuntu-20.04', 'ubuntu-22.04'] + os: ['ubuntu-22.04', 'ubuntu-24.04'] build_type: [Debug, Release] compiler: [{c: gcc, cxx: g++}] shared_library: ['OFF'] @@ -27,15 +27,6 @@ jobs: disable_hwloc: ['OFF'] link_hwloc_statically: ['OFF'] include: - - os: 'ubuntu-20.04' - build_type: Release - compiler: {c: gcc-7, cxx: g++-7} - shared_library: 'OFF' - level_zero_provider: 'ON' - cuda_provider: 'ON' - install_tbb: 'ON' - disable_hwloc: 'OFF' - link_hwloc_statically: 'OFF' - os: 'ubuntu-22.04' build_type: Release compiler: {c: clang, cxx: clang++} diff --git a/.github/workflows/reusable_fast.yml b/.github/workflows/reusable_fast.yml index 5166f2b96..90a8f023f 100644 --- a/.github/workflows/reusable_fast.yml +++ b/.github/workflows/reusable_fast.yml @@ -43,8 +43,8 @@ jobs: build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' - # simplest CMake ubuntu-20.04 - - os: ubuntu-20.04 + # simplest CMake ubuntu-22.04 + - os: ubuntu-22.04 build_tests: 'ON' extra_build_options: '-DCMAKE_BUILD_TYPE=Release' simple_cmake: 'ON' @@ -69,19 +69,12 @@ jobs: run: vcpkg install shell: pwsh # Specifies PowerShell as the shell for running the script. - - name: Install dependencies (ubuntu-latest) - if: matrix.os == 'ubuntu-latest' + - name: Install dependencies + if: matrix.os != 'windows-latest' run: | sudo apt-get update sudo apt-get install -y cmake libhwloc-dev libnuma-dev libtbb-dev - - name: Install dependencies (ubuntu-20.04) - if: matrix.os == 'ubuntu-20.04' - run: | - sudo apt-get update - sudo apt-get install -y cmake libnuma-dev libtbb-dev - .github/scripts/install_hwloc.sh # install hwloc-2.3.0 instead of hwloc-2.1.0 present in the OS package - - name: Configure CMake if: matrix.simple_cmake == 'OFF' run: > From 4094ad80790e0b60d5975d47a4bca67cf65ba48c Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 4 Mar 2025 15:25:53 +0000 Subject: [PATCH 200/466] Revert "temporary disable DP MT benchmark" --- benchmark/benchmark.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 60636a559..e5e055a55 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -96,9 +96,7 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&singlethreaded); -// TODO: change to multithreaded -//->Apply(&multithreaded); + ->Apply(&multithreaded); #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, From fcf0a374716bacc31e8ebd966fb6911aec8d395e Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 4 Mar 2025 15:30:57 +0000 Subject: [PATCH 201/466] cleanup atomics --- src/critnib/critnib.c | 74 ++++----- src/ipc_cache.c | 2 +- src/libumf.c | 6 +- src/pool/pool_disjoint.c | 7 +- src/pool/pool_disjoint_internal.h | 2 +- src/provider/provider_os_memory.c | 2 +- src/provider/provider_os_memory_internal.h | 4 +- src/provider/provider_tracking.c | 12 +- src/utils/utils_concurrency.h | 155 +++++++++++++----- test/supp/drd-umf_test-disjoint_pool.supp | 2 +- test/supp/drd-umf_test-ipc.supp | 27 +++ .../drd-umf_test-jemalloc_coarse_devdax.supp | 2 +- .../drd-umf_test-jemalloc_coarse_file.supp | 2 +- ...d-umf_test-provider_devdax_memory_ipc.supp | 1 + ...drd-umf_test-provider_file_memory_ipc.supp | 18 ++ .../supp/drd-umf_test-provider_os_memory.supp | 1 + .../supp/helgrind-umf_test-disjoint_pool.supp | 2 +- test/supp/helgrind-umf_test-ipc.supp | 39 ++++- ...grind-umf_test-jemalloc_coarse_devdax.supp | 2 +- ...elgrind-umf_test-jemalloc_coarse_file.supp | 2 +- ...d-umf_test-provider_devdax_memory_ipc.supp | 1 + ...ind-umf_test-provider_file_memory_ipc.supp | 30 +++- .../helgrind-umf_test-provider_os_memory.supp | 1 + 23 files changed, 288 insertions(+), 106 deletions(-) diff --git a/src/critnib/critnib.c b/src/critnib/critnib.c index 62d14af73..394a67124 100644 --- a/src/critnib/critnib.c +++ b/src/critnib/critnib.c @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -133,24 +133,6 @@ struct critnib { struct utils_mutex_t mutex; /* writes/removes */ }; -/* - * atomic load - */ -static void load(void *src, void *dst) { - utils_atomic_load_acquire((word *)src, (word *)dst); -} - -static void load64(uint64_t *src, uint64_t *dst) { - utils_atomic_load_acquire(src, dst); -} - -/* - * atomic store - */ -static void store(void *dst, void *src) { - utils_atomic_store_release((word *)dst, (word)src); -} - /* * internal: is_leaf -- check tagged pointer for leafness */ @@ -303,7 +285,7 @@ static void free_leaf(struct critnib *__restrict c, */ static struct critnib_leaf *alloc_leaf(struct critnib *__restrict c) { if (!c->deleted_leaf) { - return umf_ba_global_alloc(sizeof(struct critnib_leaf)); + return umf_ba_global_aligned_alloc(sizeof(struct critnib_leaf), 8); } struct critnib_leaf *k = c->deleted_leaf; @@ -343,10 +325,8 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { struct critnib_node *n = c->root; if (!n) { - store(&c->root, kn); - + utils_atomic_store_release_ptr((void **)&c->root, kn); utils_mutex_unlock(&c->mutex); - return 0; } @@ -361,7 +341,8 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { if (!n) { n = prev; - store(&n->child[slice_index(key, n->shift)], kn); + utils_atomic_store_release_ptr( + (void **)&n->child[slice_index(key, n->shift)], kn); utils_mutex_unlock(&c->mutex); @@ -406,7 +387,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { m->child[slice_index(path, sh)] = n; m->shift = sh; m->path = key & path_mask(sh); - store(parent, m); + utils_atomic_store_release_ptr((void **)parent, m); utils_mutex_unlock(&c->mutex); @@ -427,7 +408,8 @@ void *critnib_remove(struct critnib *c, word key) { goto not_found; } - word del = (utils_atomic_increment(&c->remove_count) - 1) % DELETED_LIFE; + word del = + (utils_atomic_increment_u64(&c->remove_count) - 1) % DELETED_LIFE; free_node(c, c->pending_del_nodes[del]); free_leaf(c, c->pending_del_leaves[del]); c->pending_del_nodes[del] = NULL; @@ -436,7 +418,7 @@ void *critnib_remove(struct critnib *c, word key) { if (is_leaf(n)) { k = to_leaf(n); if (k->key == key) { - store(&c->root, NULL); + utils_atomic_store_release_ptr((void **)&c->root, NULL); goto del_leaf; } @@ -466,7 +448,8 @@ void *critnib_remove(struct critnib *c, word key) { goto not_found; } - store(&n->child[slice_index(key, n->shift)], NULL); + utils_atomic_store_release_ptr( + (void **)&n->child[slice_index(key, n->shift)], NULL); /* Remove the node if there's only one remaining child. */ int ochild = -1; @@ -482,7 +465,7 @@ void *critnib_remove(struct critnib *c, word key) { ASSERTne(ochild, -1); - store(n_parent, n->child[ochild]); + utils_atomic_store_release_ptr((void **)n_parent, n->child[ochild]); c->pending_del_nodes[del] = n; del_leaf: @@ -511,8 +494,8 @@ void *critnib_get(struct critnib *c, word key) { do { struct critnib_node *n; - load64(&c->remove_count, &wrs1); - load(&c->root, &n); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs1); + utils_atomic_load_acquire_ptr((void **)&c->root, (void **)&n); /* * critbit algorithm: dive into the tree, looking at nothing but @@ -520,13 +503,14 @@ void *critnib_get(struct critnib *c, word key) { * going wrong way if our path is missing, but that's ok... */ while (n && !is_leaf(n)) { - load(&n->child[slice_index(key, n->shift)], &n); + utils_atomic_load_acquire_ptr( + (void **)&n->child[slice_index(key, n->shift)], (void **)&n); } /* ... as we check it at the end. */ struct critnib_leaf *k = to_leaf(n); res = (n && k->key == key) ? k->value : NULL; - load64(&c->remove_count, &wrs2); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs2); } while (wrs1 + DELETED_LIFE <= wrs2); return res; @@ -597,7 +581,7 @@ static struct critnib_leaf *find_le(struct critnib_node *__restrict n, /* recursive call: follow the path */ { struct critnib_node *m; - load(&n->child[nib], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&m); struct critnib_leaf *k = find_le(m, key); if (k) { return k; @@ -611,7 +595,7 @@ static struct critnib_leaf *find_le(struct critnib_node *__restrict n, */ for (; nib > 0; nib--) { struct critnib_node *m; - load(&n->child[nib - 1], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib - 1], (void **)&m); if (m) { n = m; if (is_leaf(n)) { @@ -635,12 +619,12 @@ void *critnib_find_le(struct critnib *c, word key) { void *res; do { - load64(&c->remove_count, &wrs1); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs1); struct critnib_node *n; /* avoid a subtle TOCTOU */ - load(&c->root, &n); + utils_atomic_load_acquire_ptr((void **)&c->root, (void **)&n); struct critnib_leaf *k = n ? find_le(n, key) : NULL; res = k ? k->value : NULL; - load64(&c->remove_count, &wrs2); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs2); } while (wrs1 + DELETED_LIFE <= wrs2); return res; @@ -694,7 +678,7 @@ static struct critnib_leaf *find_ge(struct critnib_node *__restrict n, unsigned nib = slice_index(key, n->shift); { struct critnib_node *m; - load(&n->child[nib], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&m); struct critnib_leaf *k = find_ge(m, key); if (k) { return k; @@ -703,7 +687,7 @@ static struct critnib_leaf *find_ge(struct critnib_node *__restrict n, for (; nib < NIB; nib++) { struct critnib_node *m; - load(&n->child[nib + 1], &m); + utils_atomic_load_acquire_ptr((void **)&n->child[nib + 1], (void **)&m); if (m) { n = m; if (is_leaf(n)) { @@ -741,9 +725,9 @@ int critnib_find(struct critnib *c, uintptr_t key, enum find_dir_t dir, } do { - load64(&c->remove_count, &wrs1); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs1); struct critnib_node *n; - load(&c->root, &n); + utils_atomic_load_acquire_ptr((void **)&c->root, (void **)&n); if (dir < 0) { k = find_le(n, key); @@ -751,7 +735,9 @@ int critnib_find(struct critnib *c, uintptr_t key, enum find_dir_t dir, k = find_ge(n, key); } else { while (n && !is_leaf(n)) { - load(&n->child[slice_index(key, n->shift)], &n); + utils_atomic_load_acquire_ptr( + (void **)&n->child[slice_index(key, n->shift)], + (void **)&n); } struct critnib_leaf *kk = to_leaf(n); @@ -761,7 +747,7 @@ int critnib_find(struct critnib *c, uintptr_t key, enum find_dir_t dir, _rkey = k->key; _rvalue = k->value; } - load64(&c->remove_count, &wrs2); + utils_atomic_load_acquire_u64(&c->remove_count, &wrs2); } while (wrs1 + DELETED_LIFE <= wrs2); if (k) { diff --git a/src/ipc_cache.c b/src/ipc_cache.c index cab5fc478..6d5d39e4f 100644 --- a/src/ipc_cache.c +++ b/src/ipc_cache.c @@ -232,7 +232,7 @@ umf_result_t umfIpcOpenedCacheGet(ipc_opened_cache_handle_t cache, exit: if (ret == UMF_RESULT_SUCCESS) { - utils_atomic_increment(&entry->ref_count); + utils_atomic_increment_u64(&entry->ref_count); *retEntry = &entry->value; } diff --git a/src/libumf.c b/src/libumf.c index f8f6cc61f..e357b2583 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -24,10 +24,10 @@ umf_memory_tracker_handle_t TRACKER = NULL; -static unsigned long long umfRefCount = 0; +static uint64_t umfRefCount = 0; int umfInit(void) { - if (utils_fetch_and_add64(&umfRefCount, 1) == 0) { + if (utils_fetch_and_add_u64(&umfRefCount, 1) == 0) { utils_log_init(); TRACKER = umfMemoryTrackerCreate(); if (!TRACKER) { @@ -54,7 +54,7 @@ int umfInit(void) { } void umfTearDown(void) { - if (utils_fetch_and_add64(&umfRefCount, -1) == 1) { + if (utils_fetch_and_sub_u64(&umfRefCount, 1) == 1) { #if !defined(_WIN32) && !defined(UMF_NO_HWLOC) umfMemspaceHostAllDestroy(); umfMemspaceHighestCapacityDestroy(); diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 9adb1a7a4..7a2f327e4 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -20,6 +21,7 @@ #include "provider/provider_tracking.h" #include "uthash/utlist.h" #include "utils_common.h" +#include "utils_concurrency.h" #include "utils_log.h" #include "utils_math.h" @@ -523,7 +525,7 @@ static void disjoint_pool_print_stats(disjoint_pool_t *pool) { utils_mutex_unlock(&bucket->bucket_lock); } - LOG_DEBUG("current pool size: %zu", + LOG_DEBUG("current pool size: %" PRIu64, disjoint_pool_get_limits(pool)->total_size); LOG_DEBUG("suggested setting=;%c%s:%zu,%zu,64K", (char)tolower(name[0]), (name + 1), high_bucket_size, high_peak_slabs_in_use); @@ -864,7 +866,8 @@ umf_result_t disjoint_pool_free(void *pool, void *ptr) { if (disjoint_pool->params.pool_trace > 2) { const char *name = disjoint_pool->params.name; - LOG_DEBUG("freed %s %p to %s, current total pool size: %zu, current " + LOG_DEBUG("freed %s %p to %s, current total pool size: %" PRIu64 + ", current " "pool size for %s: %zu", name, ptr, (to_pool ? "pool" : "provider"), disjoint_pool_get_limits(disjoint_pool)->total_size, name, diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h index 86460509b..2b5de64bc 100644 --- a/src/pool/pool_disjoint_internal.h +++ b/src/pool/pool_disjoint_internal.h @@ -102,7 +102,7 @@ typedef struct slab_t { typedef struct umf_disjoint_pool_shared_limits_t { size_t max_size; - size_t total_size; // requires atomic access + uint64_t total_size; // requires atomic access } umf_disjoint_pool_shared_limits_t; typedef struct umf_disjoint_pool_params_t { diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index bd5ea9c69..f0cd3abae 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -934,7 +934,7 @@ static membind_t membindFirst(os_memory_provider_t *provider, void *addr, if (provider->mode == UMF_NUMA_MODE_INTERLEAVE) { assert(provider->part_size != 0); - size_t s = utils_fetch_and_add64(&provider->alloc_sum, size); + size_t s = utils_fetch_and_add_u64(&provider->alloc_sum, size); membind.node = (s / provider->part_size) % provider->nodeset_len; membind.bitmap = provider->nodeset[membind.node]; membind.bind_size = ALIGN_UP(provider->part_size, membind.page_size); diff --git a/src/provider/provider_os_memory_internal.h b/src/provider/provider_os_memory_internal.h index faf0de247..4a603b1da 100644 --- a/src/provider/provider_os_memory_internal.h +++ b/src/provider/provider_os_memory_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -58,7 +58,7 @@ typedef struct os_memory_provider_t { int numa_flags; // combination of hwloc flags size_t part_size; - size_t alloc_sum; // sum of all allocations - used for manual interleaving + uint64_t alloc_sum; // sum of all allocations - used for manual interleaving struct { unsigned weight; diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index f9a98e87f..4696bc562 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -565,7 +565,7 @@ static umf_result_t trackingGetIpcHandle(void *provider, const void *ptr, return ret; } - cache_value->handle_id = utils_atomic_increment(&IPC_HANDLE_ID); + cache_value->handle_id = utils_atomic_increment_u64(&IPC_HANDLE_ID); cache_value->ipcDataSize = ipcDataSize; int insRes = critnib_insert(p->ipcCache, (uintptr_t)ptr, @@ -703,18 +703,20 @@ static umf_result_t trackingOpenIpcHandle(void *provider, void *providerIpcData, assert(cache_entry != NULL); void *mapped_ptr = NULL; - utils_atomic_load_acquire(&(cache_entry->mapped_base_ptr), &mapped_ptr); + utils_atomic_load_acquire_ptr(&(cache_entry->mapped_base_ptr), + (void **)&mapped_ptr); if (mapped_ptr == NULL) { utils_mutex_lock(&(cache_entry->mmap_lock)); - utils_atomic_load_acquire(&(cache_entry->mapped_base_ptr), &mapped_ptr); + utils_atomic_load_acquire_ptr(&(cache_entry->mapped_base_ptr), + (void **)&mapped_ptr); if (mapped_ptr == NULL) { ret = upstreamOpenIPCHandle(p, providerIpcData, ipcUmfData->baseSize, &mapped_ptr); if (ret == UMF_RESULT_SUCCESS) { // Put to the cache cache_entry->mapped_size = ipcUmfData->baseSize; - utils_atomic_store_release(&(cache_entry->mapped_base_ptr), - mapped_ptr); + utils_atomic_store_release_ptr(&(cache_entry->mapped_base_ptr), + mapped_ptr); } } utils_mutex_unlock(&(cache_entry->mmap_lock)); diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 910c859b0..0104b8646 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -10,6 +10,8 @@ #ifndef UMF_UTILS_CONCURRENCY_H #define UMF_UTILS_CONCURRENCY_H 1 +#include +#include #include #include @@ -19,7 +21,7 @@ #include "utils_windows_intrin.h" #pragma intrinsic(_BitScanForward64) -#else +#else /* !_WIN32 */ #include #ifndef __cplusplus @@ -27,10 +29,18 @@ #else /* __cplusplus */ #include #define _Atomic(X) std::atomic + +// TODO remove cpp code from this file +using std::memory_order_acq_rel; +using std::memory_order_acquire; +using std::memory_order_relaxed; +using std::memory_order_release; + #endif /* __cplusplus */ -#endif /* _WIN32 */ +#endif /* !_WIN32 */ +#include "utils_common.h" #include "utils_sanitizers.h" #ifdef __cplusplus @@ -79,70 +89,137 @@ void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); #if defined(_WIN32) -static __inline unsigned char utils_lssb_index(long long value) { +static inline unsigned char utils_lssb_index(long long value) { unsigned long ret; _BitScanForward64(&ret, value); return (unsigned char)ret; } -static __inline unsigned char utils_mssb_index(long long value) { +static inline unsigned char utils_mssb_index(long long value) { unsigned long ret; _BitScanReverse64(&ret, value); return (unsigned char)ret; } // There is no good way to do atomic_load on windows... -#define utils_atomic_load_acquire(object, dest) \ - do { \ - *(LONG64 *)dest = \ - InterlockedOr64Acquire((LONG64 volatile *)object, 0); \ - } while (0) +static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) { + // NOTE: Windows cl complains about direct accessing 'ptr' which is next + // accessed using Interlocked* functions (warning 28112 - disabled) + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + + // On Windows, there is no equivalent to __atomic_load, so we use cmpxchg + // with 0, 0 here. This will always return the value under the pointer + // without writing anything. + LONG64 ret = InterlockedCompareExchange64((LONG64 volatile *)ptr, 0, 0); + *out = *(uint64_t *)&ret; +} + +static inline void utils_atomic_load_acquire_ptr(void **ptr, void **out) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + uintptr_t ret = (uintptr_t)InterlockedCompareExchangePointer(ptr, 0, 0); + *(uintptr_t *)out = ret; +} -#define utils_atomic_store_release(object, desired) \ - InterlockedExchange64((LONG64 volatile *)object, (LONG64)desired) +static inline void utils_atomic_store_release_ptr(void **ptr, void *val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + InterlockedExchangePointer(ptr, val); +} -#define utils_atomic_increment(object) \ - InterlockedIncrement64((LONG64 volatile *)object) +static inline uint64_t utils_atomic_increment_u64(uint64_t *ptr) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return incremented value + return InterlockedIncrement64((LONG64 volatile *)ptr); +} -#define utils_atomic_decrement(object) \ - InterlockedDecrement64((LONG64 volatile *)object) +static inline uint64_t utils_atomic_decrement_u64(uint64_t *ptr) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return decremented value + return InterlockedDecrement64((LONG64 volatile *)ptr); +} -#define utils_fetch_and_add64(ptr, value) \ - InterlockedExchangeAdd64((LONG64 *)(ptr), value) +static inline uint64_t utils_fetch_and_add_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return the value that had previously been in *ptr + return InterlockedExchangeAdd64((LONG64 volatile *)(ptr), val); +} -// NOTE: windows version have different order of args -#define utils_compare_exchange(object, desired, expected) \ - InterlockedCompareExchange64((LONG64 volatile *)object, *expected, *desired) +static inline uint64_t utils_fetch_and_sub_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return the value that had previously been in *ptr + // NOTE: on Windows there is no *Sub* version of InterlockedExchange + return InterlockedExchangeAdd64((LONG64 volatile *)(ptr), -(LONG64)val); +} + +static inline bool utils_compare_exchange_u64(uint64_t *ptr, uint64_t *expected, + uint64_t *desired) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + LONG64 out = InterlockedCompareExchange64( + (LONG64 volatile *)ptr, *(LONG64 *)desired, *(LONG64 *)expected); + if (out == *(LONG64 *)expected) { + return true; + } + + // else + *expected = out; + return false; +} #else // !defined(_WIN32) #define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) #define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) -#define utils_atomic_load_acquire(object, dest) \ - do { \ - utils_annotate_acquire((void *)object); \ - __atomic_load(object, dest, memory_order_acquire); \ - } while (0) +static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + ASSERT_IS_ALIGNED((uintptr_t)out, 8); + __atomic_load(ptr, out, memory_order_acquire); + utils_annotate_acquire(ptr); +} -#define utils_atomic_store_release(object, desired) \ - do { \ - __atomic_store_n(object, desired, memory_order_release); \ - utils_annotate_release((void *)object); \ - } while (0) +static inline void utils_atomic_load_acquire_ptr(void **ptr, void **out) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + ASSERT_IS_ALIGNED((uintptr_t)out, 8); + __atomic_load((uintptr_t *)ptr, (uintptr_t *)out, memory_order_acquire); + utils_annotate_acquire(ptr); +} -#define utils_atomic_increment(object) \ - __atomic_add_fetch(object, 1, memory_order_acq_rel) +static inline void utils_atomic_store_release_ptr(void **ptr, void *val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + utils_annotate_release(ptr); + __atomic_store_n((uintptr_t *)ptr, (uintptr_t)val, memory_order_release); +} -#define utils_atomic_decrement(object) \ - __atomic_sub_fetch(object, 1, memory_order_acq_rel) +static inline uint64_t utils_atomic_increment_u64(uint64_t *val) { + ASSERT_IS_ALIGNED((uintptr_t)val, 8); + // return incremented value + return __atomic_add_fetch(val, 1, memory_order_acq_rel); +} -#define utils_fetch_and_add64(object, value) \ - __atomic_fetch_add(object, value, memory_order_acq_rel) +static inline uint64_t utils_atomic_decrement_u64(uint64_t *val) { + ASSERT_IS_ALIGNED((uintptr_t)val, 8); + // return decremented value + return __atomic_sub_fetch(val, 1, memory_order_acq_rel); +} -#define utils_compare_exchange(object, expected, desired) \ - __atomic_compare_exchange(object, expected, desired, 0 /* strong */, \ - memory_order_acq_rel, memory_order_relaxed) +static inline uint64_t utils_fetch_and_add_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + // return the value that had previously been in *ptr + return __atomic_fetch_add(ptr, val, memory_order_acq_rel); +} + +static inline uint64_t utils_fetch_and_sub_u64(uint64_t *ptr, uint64_t val) { + // return the value that had previously been in *ptr + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + return __atomic_fetch_sub(ptr, val, memory_order_acq_rel); +} + +static inline bool utils_compare_exchange_u64(uint64_t *ptr, uint64_t *expected, + uint64_t *desired) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + return __atomic_compare_exchange(ptr, expected, desired, 0 /* strong */, + memory_order_acq_rel, + memory_order_relaxed); +} #endif // !defined(_WIN32) diff --git a/test/supp/drd-umf_test-disjoint_pool.supp b/test/supp/drd-umf_test-disjoint_pool.supp index 24a44b93d..2a5548d27 100644 --- a/test/supp/drd-umf_test-disjoint_pool.supp +++ b/test/supp/drd-umf_test-disjoint_pool.supp @@ -1,7 +1,7 @@ { False-positive ConflictingAccess in critnib_insert drd:ConflictingAccess - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/drd-umf_test-ipc.supp b/test/supp/drd-umf_test-ipc.supp index 76844585d..fbdbd0183 100644 --- a/test/supp/drd-umf_test-ipc.supp +++ b/test/supp/drd-umf_test-ipc.supp @@ -5,3 +5,30 @@ fun:pthread_cond_destroy@* ... } + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp b/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp index bc4f2295f..8d8746861 100644 --- a/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp +++ b/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp @@ -9,7 +9,7 @@ { False-positive ConflictingAccess in critnib_insert drd:ConflictingAccess - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/drd-umf_test-jemalloc_coarse_file.supp b/test/supp/drd-umf_test-jemalloc_coarse_file.supp index bc4f2295f..8d8746861 100644 --- a/test/supp/drd-umf_test-jemalloc_coarse_file.supp +++ b/test/supp/drd-umf_test-jemalloc_coarse_file.supp @@ -9,7 +9,7 @@ { False-positive ConflictingAccess in critnib_insert drd:ConflictingAccess - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp b/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp index 025834658..f6f12aa1e 100644 --- a/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp +++ b/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp @@ -1,6 +1,7 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle drd:ConflictingAccess + fun:utils_atomic_store_release_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/drd-umf_test-provider_file_memory_ipc.supp b/test/supp/drd-umf_test-provider_file_memory_ipc.supp index a15d860aa..72fd6d87c 100644 --- a/test/supp/drd-umf_test-provider_file_memory_ipc.supp +++ b/test/supp/drd-umf_test-provider_file_memory_ipc.supp @@ -9,12 +9,30 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle drd:ConflictingAccess + fun:utils_atomic_store_release_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle ... } +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + { False-positive ConflictingAccess in jemalloc drd:ConflictingAccess diff --git a/test/supp/drd-umf_test-provider_os_memory.supp b/test/supp/drd-umf_test-provider_os_memory.supp index 025834658..f6f12aa1e 100644 --- a/test/supp/drd-umf_test-provider_os_memory.supp +++ b/test/supp/drd-umf_test-provider_os_memory.supp @@ -1,6 +1,7 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle drd:ConflictingAccess + fun:utils_atomic_store_release_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/helgrind-umf_test-disjoint_pool.supp b/test/supp/helgrind-umf_test-disjoint_pool.supp index 929674e8e..65dfdd2c7 100644 --- a/test/supp/helgrind-umf_test-disjoint_pool.supp +++ b/test/supp/helgrind-umf_test-disjoint_pool.supp @@ -31,7 +31,7 @@ { False-positive Race in critnib_insert Helgrind:Race - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/helgrind-umf_test-ipc.supp b/test/supp/helgrind-umf_test-ipc.supp index e46140c19..04f3a9199 100644 --- a/test/supp/helgrind-umf_test-ipc.supp +++ b/test/supp/helgrind-umf_test-ipc.supp @@ -1,7 +1,7 @@ { False-positive race in critnib_insert (lack of instrumentation) Helgrind:Race - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } @@ -14,3 +14,40 @@ fun:critnib_find ... } + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] umfMemoryProviderGetIPCHandle + Helgrind:Race + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] umfMemoryProviderGetIPCHandle + Helgrind:Race + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp b/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp index ac8969c5a..2f4980f51 100644 --- a/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp +++ b/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp @@ -9,7 +9,7 @@ { False-positive Race in critnib_insert Helgrind:Race - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp b/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp index ac8969c5a..2f4980f51 100644 --- a/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp +++ b/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp @@ -9,7 +9,7 @@ { False-positive Race in critnib_insert Helgrind:Race - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } diff --git a/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp b/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp index d6401e8ee..4bc776f43 100644 --- a/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp +++ b/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp @@ -1,6 +1,7 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle Helgrind:Race + fun:utils_atomic_store_release_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp b/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp index cdc0bd8df..de22665f5 100644 --- a/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp +++ b/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp @@ -1,6 +1,17 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle @@ -10,7 +21,7 @@ { False-positive race in critnib_insert (lack of instrumentation) Helgrind:Race - fun:store + fun:utils_atomic_store_release_ptr fun:critnib_insert ... } @@ -40,3 +51,20 @@ fun:tbb_pool_finalize ... } + +{ + [false-positive] trackingGetIpcHandle + Helgrind:Race + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] trackingGetIpcHandle + Helgrind:Race + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/helgrind-umf_test-provider_os_memory.supp b/test/supp/helgrind-umf_test-provider_os_memory.supp index d6401e8ee..4bc776f43 100644 --- a/test/supp/helgrind-umf_test-provider_os_memory.supp +++ b/test/supp/helgrind-umf_test-provider_os_memory.supp @@ -1,6 +1,7 @@ { [false-positive] Double check locking pattern in trackingOpenIpcHandle Helgrind:Race + fun:utils_atomic_store_release_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle From 112a80c963f91d52b347ad6fd6aa5590396992c3 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 4 Mar 2025 15:31:24 +0000 Subject: [PATCH 202/466] prevent from deadlock in DP bucket_can_pool() --- src/pool/pool_disjoint.c | 47 +++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 7a2f327e4..0bd88bd24 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -36,7 +36,6 @@ // Forward declarations static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool); static bool bucket_can_pool(bucket_t *bucket); -static void bucket_decrement_pool(bucket_t *bucket); static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, bool *from_pool); @@ -318,6 +317,7 @@ static void bucket_free_chunk(bucket_t *bucket, void *ptr, slab_t *slab, assert(slab_it->val != NULL); pool_unregister_slab(bucket->pool, slab_it->val); DL_DELETE(bucket->available_slabs, slab_it); + assert(bucket->available_slabs_num > 0); bucket->available_slabs_num--; destroy_slab(slab_it->val); } @@ -383,10 +383,16 @@ static slab_list_item_t *bucket_get_avail_slab(bucket_t *bucket, // Allocation from existing slab is treated as from pool for statistics. *from_pool = true; if (slab->num_chunks_allocated == 0) { + assert(bucket->chunked_slabs_in_pool > 0); // If this was an empty slab, it was in the pool. // Now it is no longer in the pool, so update count. --bucket->chunked_slabs_in_pool; - bucket_decrement_pool(bucket); + uint64_t size_to_sub = bucket_slab_alloc_size(bucket); + uint64_t old_size = utils_fetch_and_sub_u64( + &bucket->shared_limits->total_size, size_to_sub); + (void)old_size; + assert(old_size >= size_to_sub); + bucket_update_stats(bucket, 1, -1); } } @@ -422,12 +428,6 @@ static void bucket_update_stats(bucket_t *bucket, int in_use, int in_pool) { in_pool * bucket_slab_alloc_size(bucket); } -static void bucket_decrement_pool(bucket_t *bucket) { - bucket_update_stats(bucket, 1, -1); - utils_fetch_and_add64(&bucket->shared_limits->total_size, - -(long long)bucket_slab_alloc_size(bucket)); -} - static bool bucket_can_pool(bucket_t *bucket) { size_t new_free_slabs_in_bucket; @@ -435,23 +435,20 @@ static bool bucket_can_pool(bucket_t *bucket) { // we keep at most params.capacity slabs in the pool if (bucket_max_pooled_slabs(bucket) >= new_free_slabs_in_bucket) { - size_t pool_size = 0; - utils_atomic_load_acquire(&bucket->shared_limits->total_size, - &pool_size); - while (true) { - size_t new_pool_size = pool_size + bucket_slab_alloc_size(bucket); - - if (bucket->shared_limits->max_size < new_pool_size) { - break; - } - - if (utils_compare_exchange(&bucket->shared_limits->total_size, - &pool_size, &new_pool_size)) { - ++bucket->chunked_slabs_in_pool; - - bucket_update_stats(bucket, -1, 1); - return true; - } + + uint64_t size_to_add = bucket_slab_alloc_size(bucket); + size_t previous_size = utils_fetch_and_add_u64( + &bucket->shared_limits->total_size, size_to_add); + + if (previous_size + size_to_add <= bucket->shared_limits->max_size) { + ++bucket->chunked_slabs_in_pool; + bucket_update_stats(bucket, -1, 1); + return true; + } else { + uint64_t old = utils_fetch_and_sub_u64( + &bucket->shared_limits->total_size, size_to_add); + (void)old; + assert(old >= size_to_add); } } From ab4a76f903981bff79826ef25aa44a0f75d2e4ed Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Tue, 4 Mar 2025 15:31:32 +0000 Subject: [PATCH 203/466] implement valgrind macros used in critnib --- src/critnib/critnib.c | 12 ++++++------ src/utils/utils_common.h | 3 --- src/utils/utils_sanitizers.h | 20 +++++++++++++++++++- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/src/critnib/critnib.c b/src/critnib/critnib.c index 394a67124..c95637f20 100644 --- a/src/critnib/critnib.c +++ b/src/critnib/critnib.c @@ -174,8 +174,8 @@ struct critnib *critnib_new(void) { goto err_free_critnib; } - VALGRIND_HG_DRD_DISABLE_CHECKING(&c->root, sizeof(c->root)); - VALGRIND_HG_DRD_DISABLE_CHECKING(&c->remove_count, sizeof(c->remove_count)); + utils_annotate_memory_no_check(&c->root, sizeof(c->root)); + utils_annotate_memory_no_check(&c->remove_count, sizeof(c->remove_count)); return c; err_free_critnib: @@ -260,7 +260,7 @@ static struct critnib_node *alloc_node(struct critnib *__restrict c) { struct critnib_node *n = c->deleted_node; c->deleted_node = n->child[0]; - VALGRIND_ANNOTATE_NEW_MEMORY(n, sizeof(*n)); + utils_annotate_memory_new(n, sizeof(*n)); return n; } @@ -291,7 +291,7 @@ static struct critnib_leaf *alloc_leaf(struct critnib *__restrict c) { struct critnib_leaf *k = c->deleted_leaf; c->deleted_leaf = k->value; - VALGRIND_ANNOTATE_NEW_MEMORY(k, sizeof(*k)); + utils_annotate_memory_new(k, sizeof(*k)); return k; } @@ -316,7 +316,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { return ENOMEM; } - VALGRIND_HG_DRD_DISABLE_CHECKING(k, sizeof(struct critnib_leaf)); + utils_annotate_memory_no_check(k, sizeof(struct critnib_leaf)); k->key = key; k->value = value; @@ -377,7 +377,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { return ENOMEM; } - VALGRIND_HG_DRD_DISABLE_CHECKING(m, sizeof(struct critnib_node)); + utils_annotate_memory_no_check(m, sizeof(struct critnib_node)); for (int i = 0; i < SLNODES; i++) { m->child[i] = NULL; diff --git a/src/utils/utils_common.h b/src/utils/utils_common.h index 7824e74af..fff44f390 100644 --- a/src/utils/utils_common.h +++ b/src/utils/utils_common.h @@ -53,9 +53,6 @@ typedef enum umf_purge_advise_t { #define ASSERT_IS_ALIGNED(value, align) \ DO_WHILE_EXPRS(assert(IS_ALIGNED(value, align))) -#define VALGRIND_ANNOTATE_NEW_MEMORY(p, s) DO_WHILE_EMPTY -#define VALGRIND_HG_DRD_DISABLE_CHECKING(p, s) DO_WHILE_EMPTY - #ifdef _WIN32 /* Windows */ #define __TLS __declspec(thread) diff --git a/src/utils/utils_sanitizers.h b/src/utils/utils_sanitizers.h index 3498e4b70..f8896d0ae 100644 --- a/src/utils/utils_sanitizers.h +++ b/src/utils/utils_sanitizers.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -168,6 +168,24 @@ static inline void utils_annotate_memory_inaccessible(void *ptr, size_t size) { #endif } +static inline void utils_annotate_memory_new(void *ptr, size_t size) { +#ifdef UMF_VG_DRD_ENABLED + ANNOTATE_NEW_MEMORY(ptr, size); +#else + (void)ptr; + (void)size; +#endif +} + +static inline void utils_annotate_memory_no_check(void *ptr, size_t size) { +#ifdef UMF_VG_HELGRIND_ENABLED + VALGRIND_HG_DISABLE_CHECKING(ptr, size); +#else + (void)ptr; + (void)size; +#endif +} + #ifdef __cplusplus } #endif From cb34151fd6d0507573cd962e110b91204eb43494 Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Tue, 25 Feb 2025 12:01:53 +0100 Subject: [PATCH 204/466] Add NMake generator build tests --- .github/workflows/nightly.yml | 60 +++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 44f2ba2ca..3381c09be 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -151,6 +151,66 @@ jobs: working-directory: ${{env.BUILD_DIR}} run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test + Windows-NMake: + name: Windows-NMake + env: + VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" + BUILD_DIR : "${{github.workspace}}/build" + strategy: + matrix: + os: ['windows-2019', 'windows-2022'] + build_type: [Debug, Release] + compiler: [{c: cl, cxx: cl}] + shared_library: ['ON', 'OFF'] + + runs-on: ${{matrix.os}} + + steps: + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + + - name: Initialize vcpkg + uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 + with: + vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 + vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg + vcpkgJsonGlob: '**/vcpkg.json' + + - name: Install dependencies + run: vcpkg install + + - name: Configure MSVC environment + uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 + + - name: Configure build + run: > + cmake + -B ${{env.BUILD_DIR}} + -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" + -DCMAKE_C_COMPILER=${{matrix.compiler.c}} + -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} + -G "NMake Makefiles" + -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} + -DUMF_LINK_HWLOC_STATICALLY=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build UMF + shell: cmd + run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j %NUMBER_OF_PROCESSORS% + + - name: Run tests + shell: cmd + working-directory: ${{env.BUILD_DIR}} + run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test + + icx: name: ICX env: From 26242df09f0cbd6306c385c9f3f8ddacaf8caf2d Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 5 Mar 2025 09:16:32 +0100 Subject: [PATCH 205/466] Fix aligning in file_mmap_aligned() Signed-off-by: Lukasz Dorau --- src/provider/provider_file_memory.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/provider/provider_file_memory.c b/src/provider/provider_file_memory.c index f81e4f8d2..5cc377f32 100644 --- a/src/provider/provider_file_memory.c +++ b/src/provider/provider_file_memory.c @@ -404,8 +404,12 @@ static umf_result_t file_mmap_aligned(file_memory_provider_t *file_provider, "inserted a value to the map of memory mapping (addr=%p, size=%zu)", ptr, extended_size); - file_provider->base_mmap = ptr; - file_provider->size_mmap = extended_size; + // align the new pointer + uintptr_t aligned_ptr = ALIGN_UP_SAFE((uintptr_t)ptr, alignment); + size_t aligned_size = extended_size - (aligned_ptr - (uintptr_t)ptr); + + file_provider->base_mmap = (void *)aligned_ptr; + file_provider->size_mmap = aligned_size; file_provider->offset_mmap = 0; return UMF_RESULT_SUCCESS; From 89db62518f95117994291553592317827e2d39af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Tue, 4 Mar 2025 15:52:35 +0100 Subject: [PATCH 206/466] add fixedprovider based benchmarks --- benchmark/benchmark.cpp | 75 +++++++++++++++++++++++++++++++++++++ benchmark/benchmark_umf.hpp | 61 +++++++++++++++++++++++++++++- 2 files changed, 135 insertions(+), 1 deletion(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 60636a559..c6b954ea4 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -136,6 +136,81 @@ UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, scalable_pool_uniform) #endif +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + proxy_pool_fixedprovider, fixed_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + proxy_pool_fixedprovider) + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&singlethreaded); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, fixed_provider, + fixed_alloc_size, + provider_allocator); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, fixed_provider) + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&singlethreaded); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + disjoint_pool_fix_fixedprovider, fixed_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + disjoint_pool_fix_fixedprovider) + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + disjoint_pool_uniform_fixedprovider, + uniform_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + disjoint_pool_uniform_fixedprovider) + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&singlethreaded); +// TODO: change to multithreaded +//->Apply(&multithreaded); + +#ifdef UMF_POOL_JEMALLOC_ENABLED +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + jemalloc_pool_fixedprovider, fixed_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_fix) + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + jemalloc_pool_uniform_fixedprovider, + uniform_alloc_size, + pool_allocator>); +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_uniform) + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); + +#endif + +#ifdef UMF_POOL_SCALABLE_ENABLED +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + scalable_pool_fix_fixedprovider, fixed_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + scalable_pool_fix_fixedprovider) + ->Apply(&default_multiple_alloc_fix_size) + ->Apply(&multithreaded); + +UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, + scalable_pool_uniform_fixedprovider, + uniform_alloc_size, + pool_allocator>); + +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + scalable_pool_uniform_fixedprovider) + ->Apply(&default_multiple_alloc_uniform_size) + ->Apply(&multithreaded); + +#endif + //BENCHMARK_MAIN(); int main(int argc, char **argv) { if (initAffinityMask()) { diff --git a/benchmark/benchmark_umf.hpp b/benchmark/benchmark_umf.hpp index 5c3b160c7..cfc9982d2 100644 --- a/benchmark/benchmark_umf.hpp +++ b/benchmark/benchmark_umf.hpp @@ -19,6 +19,7 @@ #ifdef UMF_POOL_SCALABLE_ENABLED #include #endif +#include #include #ifdef UMF_POOL_JEMALLOC_ENABLED @@ -145,7 +146,9 @@ struct os_provider : public provider_interface { umfOsMemoryProviderParamsDestroy(handle); }; - return {static_cast(raw_params), deleter}; + return {static_cast( + raw_params), + deleter}; } umf_memory_provider_ops_t * @@ -155,6 +158,62 @@ struct os_provider : public provider_interface { static std::string name() { return "os_provider"; } }; +struct fixed_provider : public provider_interface { + private: + char *mem = NULL; + const size_t size = 1024 * 1024 * 1024; // 1GB + public: + virtual void SetUp(::benchmark::State &state) override { + if (state.thread_index() != 0) { + return; + } + + if (!mem) { + mem = new char[size]; + } + + provider_interface::SetUp(state); + } + + virtual void TearDown(::benchmark::State &state) override { + if (state.thread_index() != 0) { + return; + } + + delete[] mem; + mem = nullptr; + + provider_interface::TearDown(state); + } + + provider_interface::params_ptr + getParams(::benchmark::State &state) override { + umf_fixed_memory_provider_params_handle_t raw_params = nullptr; + umfFixedMemoryProviderParamsCreate(&raw_params, mem, size); + if (!raw_params) { + state.SkipWithError("Failed to create fixed provider params"); + return {nullptr, [](void *) {}}; + } + + // Use a lambda as the custom deleter + auto deleter = [](void *p) { + auto handle = + static_cast(p); + umfFixedMemoryProviderParamsDestroy(handle); + }; + + return {static_cast( + raw_params), + deleter}; + } + + umf_memory_provider_ops_t * + getOps([[maybe_unused]] ::benchmark::State &state) override { + return umfFixedMemoryProviderOps(); + } + static std::string name() { return "fixed_provider"; } +}; + template struct proxy_pool : public pool_interface { umf_memory_pool_ops_t * From cb94612f01b31b8ab4edc686731ac89cc624e729 Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Fri, 28 Feb 2025 13:52:26 +0100 Subject: [PATCH 207/466] Make umf-* prefix for tests instead of test_* --- .github/workflows/reusable_dax.yml | 2 +- .github/workflows/reusable_multi_numa.yml | 8 ++--- .github/workflows/reusable_proxy_lib.yml | 4 +-- test/CMakeLists.txt | 24 ++++++------- test/ipc_devdax_prov.sh | 6 ++-- test/ipc_file_prov.sh | 6 ++-- test/ipc_file_prov_fsdax.sh | 6 ++-- test/ipc_os_prov_anon_fd.sh | 4 +-- test/ipc_os_prov_proxy.sh | 6 ++-- test/ipc_os_prov_shm.sh | 6 ++-- test/providers/ipc_cuda_prov.sh | 6 ++-- test/providers/ipc_level_zero_prov.sh | 4 +-- ..._pool.supp => drd-test_disjoint_pool.supp} | 0 ...rd-umf_test-ipc.supp => drd-test_ipc.supp} | 0 ...p => drd-test_jemalloc_coarse_devdax.supp} | 0 ...upp => drd-test_jemalloc_coarse_file.supp} | 0 ..._pool.supp => drd-test_jemalloc_pool.supp} | 0 ... drd-test_provider_devdax_memory_ipc.supp} | 0 ...=> drd-test_provider_file_memory_ipc.supp} | 0 ....supp => drd-test_provider_os_memory.supp} | 0 ...p => drd-test_scalable_coarse_devdax.supp} | 0 ...upp => drd-test_scalable_coarse_file.supp} | 0 ..._pool.supp => drd-test_scalable_pool.supp} | 0 ....supp => helgrind-test_disjoint_pool.supp} | 0 ...f_test-ipc.supp => helgrind-test_ipc.supp} | 0 ...helgrind-test_jemalloc_coarse_devdax.supp} | 0 ...> helgrind-test_jemalloc_coarse_file.supp} | 0 ....supp => helgrind-test_jemalloc_pool.supp} | 0 ...rind-test_provider_devdax_memory_ipc.supp} | 0 ...lgrind-test_provider_file_memory_ipc.supp} | 0 ... => helgrind-test_provider_os_memory.supp} | 0 ...helgrind-test_scalable_coarse_devdax.supp} | 0 ...> helgrind-test_scalable_coarse_file.supp} | 0 ....supp => helgrind-test_scalable_pool.supp} | 0 ...memcheck-test_jemalloc_coarse_devdax.supp} | 0 ...> memcheck-test_jemalloc_coarse_file.supp} | 0 ....supp => memcheck-test_jemalloc_pool.supp} | 0 ....supp => memcheck-test_scalable_pool.supp} | 0 test/test_valgrind.sh | 34 +++++++++---------- 39 files changed, 58 insertions(+), 58 deletions(-) rename test/supp/{drd-umf_test-disjoint_pool.supp => drd-test_disjoint_pool.supp} (100%) rename test/supp/{drd-umf_test-ipc.supp => drd-test_ipc.supp} (100%) rename test/supp/{drd-umf_test-jemalloc_coarse_devdax.supp => drd-test_jemalloc_coarse_devdax.supp} (100%) rename test/supp/{drd-umf_test-jemalloc_coarse_file.supp => drd-test_jemalloc_coarse_file.supp} (100%) rename test/supp/{drd-umf_test-jemalloc_pool.supp => drd-test_jemalloc_pool.supp} (100%) rename test/supp/{drd-umf_test-provider_devdax_memory_ipc.supp => drd-test_provider_devdax_memory_ipc.supp} (100%) rename test/supp/{drd-umf_test-provider_file_memory_ipc.supp => drd-test_provider_file_memory_ipc.supp} (100%) rename test/supp/{drd-umf_test-provider_os_memory.supp => drd-test_provider_os_memory.supp} (100%) rename test/supp/{drd-umf_test-scalable_coarse_devdax.supp => drd-test_scalable_coarse_devdax.supp} (100%) rename test/supp/{drd-umf_test-scalable_coarse_file.supp => drd-test_scalable_coarse_file.supp} (100%) rename test/supp/{drd-umf_test-scalable_pool.supp => drd-test_scalable_pool.supp} (100%) rename test/supp/{helgrind-umf_test-disjoint_pool.supp => helgrind-test_disjoint_pool.supp} (100%) rename test/supp/{helgrind-umf_test-ipc.supp => helgrind-test_ipc.supp} (100%) rename test/supp/{helgrind-umf_test-jemalloc_coarse_devdax.supp => helgrind-test_jemalloc_coarse_devdax.supp} (100%) rename test/supp/{helgrind-umf_test-jemalloc_coarse_file.supp => helgrind-test_jemalloc_coarse_file.supp} (100%) rename test/supp/{helgrind-umf_test-jemalloc_pool.supp => helgrind-test_jemalloc_pool.supp} (100%) rename test/supp/{helgrind-umf_test-provider_devdax_memory_ipc.supp => helgrind-test_provider_devdax_memory_ipc.supp} (100%) rename test/supp/{helgrind-umf_test-provider_file_memory_ipc.supp => helgrind-test_provider_file_memory_ipc.supp} (100%) rename test/supp/{helgrind-umf_test-provider_os_memory.supp => helgrind-test_provider_os_memory.supp} (100%) rename test/supp/{helgrind-umf_test-scalable_coarse_devdax.supp => helgrind-test_scalable_coarse_devdax.supp} (100%) rename test/supp/{helgrind-umf_test-scalable_coarse_file.supp => helgrind-test_scalable_coarse_file.supp} (100%) rename test/supp/{helgrind-umf_test-scalable_pool.supp => helgrind-test_scalable_pool.supp} (100%) rename test/supp/{memcheck-umf_test-jemalloc_coarse_devdax.supp => memcheck-test_jemalloc_coarse_devdax.supp} (100%) rename test/supp/{memcheck-umf_test-jemalloc_coarse_file.supp => memcheck-test_jemalloc_coarse_file.supp} (100%) rename test/supp/{memcheck-umf_test-jemalloc_pool.supp => memcheck-test_jemalloc_pool.supp} (100%) rename test/supp/{memcheck-umf_test-scalable_pool.supp => memcheck-test_scalable_pool.supp} (100%) diff --git a/.github/workflows/reusable_dax.yml b/.github/workflows/reusable_dax.yml index 1a41b11c7..4ea5ddac7 100644 --- a/.github/workflows/reusable_dax.yml +++ b/.github/workflows/reusable_dax.yml @@ -31,7 +31,7 @@ env: INSTL_DIR : "${{github.workspace}}/../install-dir" COVERAGE_DIR : "${{github.workspace}}/coverage" COVERAGE_NAME : "exports-coverage-dax" - DAX_TESTS: "./test/umf_test-provider_file_memory ./test/umf_test-provider_devdax_memory" + DAX_TESTS: "./test/test_provider_file_memory ./test/test_provider_devdax_memory" jobs: dax: diff --git a/.github/workflows/reusable_multi_numa.yml b/.github/workflows/reusable_multi_numa.yml index 7c7750551..3c60bebc3 100644 --- a/.github/workflows/reusable_multi_numa.yml +++ b/.github/workflows/reusable_multi_numa.yml @@ -10,7 +10,7 @@ env: BUILD_DIR : "${{github.workspace}}/build" COVERAGE_DIR : "${{github.workspace}}/coverage" COVERAGE_NAME : "exports-coverage-multinuma" - NUMA_TESTS: "./test/umf_test-memspace_numa ./test/umf_test-provider_os_memory_multiple_numa_nodes" + NUMA_TESTS: "./test/test_memspace_numa ./test/test_provider_os_memory_multiple_numa_nodes" jobs: multi_numa: @@ -60,13 +60,13 @@ jobs: # On RHEL, hwloc version is just a little too low. # Skip some tests until we upgrade hwloc and update CMake to properly handle local hwloc installation. # TODO: fix issue #560 - # TODO: add issue for -E umf-init_teardown - it is not clear why it fails + # TODO: add issue for -E test_init_teardown - it is not clear why it fails - name: Run tests (on RHEL) if: matrix.os == 'rhel-9.1' working-directory: ${{github.workspace}}/build run: | - ctest --output-on-failure --test-dir test -E "umf-provider_os_memory_multiple_numa_nodes|umf-init_teardown" - ./test/umf_test-provider_os_memory_multiple_numa_nodes \ + ctest --output-on-failure --test-dir test -E "test_provider_os_memory_multiple_numa_nodes|test_init_teardown" + ./test/test_provider_os_memory_multiple_numa_nodes \ --gtest_filter="-*checkModeLocal/*:*checkModePreferredEmptyNodeset/*:testNuma.checkModeInterleave" - name: Run NUMA tests under valgrind diff --git a/.github/workflows/reusable_proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml index bb4a3278e..363e66526 100644 --- a/.github/workflows/reusable_proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -59,9 +59,9 @@ jobs: working-directory: ${{env.BUILD_DIR}} run: LD_PRELOAD=./lib/libumf_proxy.so ctest --output-on-failure - - name: Run "./test/umf_test-memoryPool" with proxy library + - name: Run "./test/test_memoryPool" with proxy library working-directory: ${{env.BUILD_DIR}} - run: LD_PRELOAD=./lib/libumf_proxy.so ./test/umf_test-memoryPool + run: LD_PRELOAD=./lib/libumf_proxy.so ./test/test_memoryPool - name: Run "/usr/bin/ls" with proxy library working-directory: ${{env.BUILD_DIR}} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ecdde95e1..37f4c809e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -45,8 +45,8 @@ function(build_umf_test) "${multiValueArgs}" ${ARGN}) - set(TEST_NAME umf-${ARG_NAME}) - set(TEST_TARGET_NAME umf_test-${ARG_NAME}) + set(TEST_NAME test_${ARG_NAME}) + set(TEST_TARGET_NAME test_${ARG_NAME}) set(LIB_DIRS ${LIB_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) @@ -130,8 +130,8 @@ function(add_umf_test) SRCS ${ARG_SRCS} LIBS ${ARG_LIBS}) - set(TEST_NAME umf-${ARG_NAME}) - set(TEST_TARGET_NAME umf_test-${ARG_NAME}) + set(TEST_NAME test_${ARG_NAME}) + set(TEST_TARGET_NAME test_${ARG_NAME}) add_test( NAME ${TEST_NAME} @@ -408,7 +408,7 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) SRCS providers/provider_level_zero.cpp ${UMF_UTILS_DIR}/utils_level_zero.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) - target_compile_definitions(umf_test-provider_level_zero_dlopen_global + target_compile_definitions(test_provider_level_zero_dlopen_global PUBLIC USE_DLOPEN=1 OPEN_ZE_LIBRARY_GLOBAL=1) add_umf_test( @@ -416,7 +416,7 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_LEVEL_ZERO_PROVIDER) SRCS providers/provider_level_zero.cpp ${UMF_UTILS_DIR}/utils_level_zero.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) - target_compile_definitions(umf_test-provider_level_zero_dlopen_local + target_compile_definitions(test_provider_level_zero_dlopen_local PUBLIC USE_DLOPEN=1 OPEN_ZE_LIBRARY_GLOBAL=0) endif() @@ -443,7 +443,7 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) SRCS providers/provider_cuda.cpp providers/cuda_helpers.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) - target_compile_definitions(umf_test-provider_cuda_dlopen_global + target_compile_definitions(test_provider_cuda_dlopen_global PUBLIC USE_DLOPEN=1 OPEN_CU_LIBRARY_GLOBAL=1) add_umf_test( @@ -451,7 +451,7 @@ if(UMF_BUILD_GPU_TESTS AND UMF_BUILD_CUDA_PROVIDER) SRCS providers/provider_cuda.cpp providers/cuda_helpers.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) - target_compile_definitions(umf_test-provider_cuda_dlopen_local + target_compile_definitions(test_provider_cuda_dlopen_local PUBLIC USE_DLOPEN=1 OPEN_CU_LIBRARY_GLOBAL=0) else() message( @@ -496,7 +496,7 @@ if(UMF_PROXY_LIB_ENABLED AND UMF_BUILD_SHARED_LIBRARY) NAME proxy_lib_size_threshold SRCS ${BA_SOURCES_FOR_TEST} test_proxy_lib_size_threshold.cpp LIBS ${UMF_UTILS_FOR_TEST} umf_proxy) - set_property(TEST umf-proxy_lib_size_threshold + set_property(TEST test_proxy_lib_size_threshold PROPERTY ENVIRONMENT UMF_PROXY="size.threshold=64") endif() @@ -506,7 +506,7 @@ if(UMF_PROXY_LIB_ENABLED AND UMF_BUILD_SHARED_LIBRARY) SRCS ${BA_SOURCES_FOR_TEST} memoryPoolAPI.cpp malloc_compliance_tests.cpp LIBS ${UMF_UTILS_FOR_TEST} umf_proxy) - target_compile_definitions(umf_test-proxy_lib_memoryPool + target_compile_definitions(test_proxy_lib_memoryPool PUBLIC UMF_PROXY_LIB_ENABLED=1) endif() @@ -530,7 +530,7 @@ function(add_umf_ipc_test) "" ${ARGN}) - set(TEST_NAME umf-${ARG_TEST}) + set(TEST_NAME test_${ARG_TEST}) if(DEFINED ARG_SRC_DIR) set(SRC_DIR ${ARG_SRC_DIR}) @@ -650,7 +650,7 @@ if(LINUX LIBS dl) # append LD_LIBRARY_PATH to the libumf set_property( - TEST umf-init_teardown + TEST test_init_teardown PROPERTY ENVIRONMENT_MODIFICATION "LD_LIBRARY_PATH=path_list_append:${CMAKE_BINARY_DIR}/lib") endif() diff --git a/test/ipc_devdax_prov.sh b/test/ipc_devdax_prov.sh index 7c5ba3675..43f177c71 100755 --- a/test/ipc_devdax_prov.sh +++ b/test/ipc_devdax_prov.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -25,10 +25,10 @@ PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" echo "Starting ipc_devdax_prov CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_devdax_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_devdax_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_devdax_prov PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_devdax_prov_producer $PORT +UMF_LOG=$UMF_LOG_VAL ./test_ipc_devdax_prov_producer $PORT diff --git a/test/ipc_file_prov.sh b/test/ipc_file_prov.sh index b3e3091a8..629b2cbb7 100755 --- a/test/ipc_file_prov.sh +++ b/test/ipc_file_prov.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -20,13 +20,13 @@ UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" rm -f ${FILE_NAME} echo "Starting ipc_file_prov CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_consumer $PORT ${FILE_NAME}_consumer & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_file_prov_consumer $PORT ${FILE_NAME}_consumer & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_file_prov PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_producer $PORT ${FILE_NAME}_producer +UMF_LOG=$UMF_LOG_VAL ./test_ipc_file_prov_producer $PORT ${FILE_NAME}_producer # remove the SHM file rm -f ${FILE_NAME} diff --git a/test/ipc_file_prov_fsdax.sh b/test/ipc_file_prov_fsdax.sh index 4e908869b..314d0aa66 100755 --- a/test/ipc_file_prov_fsdax.sh +++ b/test/ipc_file_prov_fsdax.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -31,13 +31,13 @@ UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" rm -f ${FILE_NAME} echo "Starting ipc_file_prov_fsdax CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_consumer $PORT $FILE_NAME & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_file_prov_consumer $PORT $FILE_NAME & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_file_prov_fsdax PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_file_prov_producer $PORT $FILE_NAME_2 +UMF_LOG=$UMF_LOG_VAL ./test_ipc_file_prov_producer $PORT $FILE_NAME_2 # remove the SHM file rm -f ${FILE_NAME} diff --git a/test/ipc_os_prov_anon_fd.sh b/test/ipc_os_prov_anon_fd.sh index a42d820a2..4e9a0f832 100755 --- a/test/ipc_os_prov_anon_fd.sh +++ b/test/ipc_os_prov_anon_fd.sh @@ -15,10 +15,10 @@ PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" echo "Starting ipc_os_prov_anon_fd CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_os_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_os_prov_anon_fd PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_producer $PORT +UMF_LOG=$UMF_LOG_VAL ./test_ipc_os_prov_producer $PORT diff --git a/test/ipc_os_prov_proxy.sh b/test/ipc_os_prov_proxy.sh index 86b95a235..9bd02dad8 100755 --- a/test/ipc_os_prov_proxy.sh +++ b/test/ipc_os_prov_proxy.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -17,10 +17,10 @@ LD_PRELOAD_VAL="../lib/libumf_proxy.so" PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) echo "Starting CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_os_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_os_prov_proxy PRODUCER on port $PORT ..." -LD_PRELOAD=$LD_PRELOAD_VAL UMF_LOG=$UMF_LOG_VAL UMF_PROXY=$UMF_PROXY_VAL ./umf_test-ipc_os_prov_proxy $PORT +LD_PRELOAD=$LD_PRELOAD_VAL UMF_LOG=$UMF_LOG_VAL UMF_PROXY=$UMF_PROXY_VAL ./test_ipc_os_prov_proxy $PORT diff --git a/test/ipc_os_prov_shm.sh b/test/ipc_os_prov_shm.sh index efa2de35a..7bde3c613 100755 --- a/test/ipc_os_prov_shm.sh +++ b/test/ipc_os_prov_shm.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -20,13 +20,13 @@ UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" rm -f /dev/shm/${SHM_NAME} echo "Starting ipc_os_prov_shm CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_os_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_os_prov_shm PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_os_prov_producer $PORT $SHM_NAME +UMF_LOG=$UMF_LOG_VAL ./test_ipc_os_prov_producer $PORT $SHM_NAME # remove the SHM file rm -f /dev/shm/${SHM_NAME} diff --git a/test/providers/ipc_cuda_prov.sh b/test/providers/ipc_cuda_prov.sh index 1e9b6b05d..bb4be9474 100755 --- a/test/providers/ipc_cuda_prov.sh +++ b/test/providers/ipc_cuda_prov.sh @@ -1,5 +1,5 @@ # -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,10 +15,10 @@ PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" echo "Starting ipc_cuda_prov CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_cuda_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_cuda_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_cuda_prov PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_cuda_prov_producer $PORT +UMF_LOG=$UMF_LOG_VAL ./test_ipc_cuda_prov_producer $PORT diff --git a/test/providers/ipc_level_zero_prov.sh b/test/providers/ipc_level_zero_prov.sh index 4d2967725..cebd90932 100755 --- a/test/providers/ipc_level_zero_prov.sh +++ b/test/providers/ipc_level_zero_prov.sh @@ -15,10 +15,10 @@ PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) UMF_LOG_VAL="level:debug;flush:debug;output:stderr;pid:yes" echo "Starting ipc_level_zero_prov CONSUMER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_level_zero_prov_consumer $PORT & +UMF_LOG=$UMF_LOG_VAL ./test_ipc_level_zero_prov_consumer $PORT & echo "Waiting 1 sec ..." sleep 1 echo "Starting ipc_level_zero_prov PRODUCER on port $PORT ..." -UMF_LOG=$UMF_LOG_VAL ./umf_test-ipc_level_zero_prov_producer $PORT +UMF_LOG=$UMF_LOG_VAL ./test_ipc_level_zero_prov_producer $PORT diff --git a/test/supp/drd-umf_test-disjoint_pool.supp b/test/supp/drd-test_disjoint_pool.supp similarity index 100% rename from test/supp/drd-umf_test-disjoint_pool.supp rename to test/supp/drd-test_disjoint_pool.supp diff --git a/test/supp/drd-umf_test-ipc.supp b/test/supp/drd-test_ipc.supp similarity index 100% rename from test/supp/drd-umf_test-ipc.supp rename to test/supp/drd-test_ipc.supp diff --git a/test/supp/drd-umf_test-jemalloc_coarse_devdax.supp b/test/supp/drd-test_jemalloc_coarse_devdax.supp similarity index 100% rename from test/supp/drd-umf_test-jemalloc_coarse_devdax.supp rename to test/supp/drd-test_jemalloc_coarse_devdax.supp diff --git a/test/supp/drd-umf_test-jemalloc_coarse_file.supp b/test/supp/drd-test_jemalloc_coarse_file.supp similarity index 100% rename from test/supp/drd-umf_test-jemalloc_coarse_file.supp rename to test/supp/drd-test_jemalloc_coarse_file.supp diff --git a/test/supp/drd-umf_test-jemalloc_pool.supp b/test/supp/drd-test_jemalloc_pool.supp similarity index 100% rename from test/supp/drd-umf_test-jemalloc_pool.supp rename to test/supp/drd-test_jemalloc_pool.supp diff --git a/test/supp/drd-umf_test-provider_devdax_memory_ipc.supp b/test/supp/drd-test_provider_devdax_memory_ipc.supp similarity index 100% rename from test/supp/drd-umf_test-provider_devdax_memory_ipc.supp rename to test/supp/drd-test_provider_devdax_memory_ipc.supp diff --git a/test/supp/drd-umf_test-provider_file_memory_ipc.supp b/test/supp/drd-test_provider_file_memory_ipc.supp similarity index 100% rename from test/supp/drd-umf_test-provider_file_memory_ipc.supp rename to test/supp/drd-test_provider_file_memory_ipc.supp diff --git a/test/supp/drd-umf_test-provider_os_memory.supp b/test/supp/drd-test_provider_os_memory.supp similarity index 100% rename from test/supp/drd-umf_test-provider_os_memory.supp rename to test/supp/drd-test_provider_os_memory.supp diff --git a/test/supp/drd-umf_test-scalable_coarse_devdax.supp b/test/supp/drd-test_scalable_coarse_devdax.supp similarity index 100% rename from test/supp/drd-umf_test-scalable_coarse_devdax.supp rename to test/supp/drd-test_scalable_coarse_devdax.supp diff --git a/test/supp/drd-umf_test-scalable_coarse_file.supp b/test/supp/drd-test_scalable_coarse_file.supp similarity index 100% rename from test/supp/drd-umf_test-scalable_coarse_file.supp rename to test/supp/drd-test_scalable_coarse_file.supp diff --git a/test/supp/drd-umf_test-scalable_pool.supp b/test/supp/drd-test_scalable_pool.supp similarity index 100% rename from test/supp/drd-umf_test-scalable_pool.supp rename to test/supp/drd-test_scalable_pool.supp diff --git a/test/supp/helgrind-umf_test-disjoint_pool.supp b/test/supp/helgrind-test_disjoint_pool.supp similarity index 100% rename from test/supp/helgrind-umf_test-disjoint_pool.supp rename to test/supp/helgrind-test_disjoint_pool.supp diff --git a/test/supp/helgrind-umf_test-ipc.supp b/test/supp/helgrind-test_ipc.supp similarity index 100% rename from test/supp/helgrind-umf_test-ipc.supp rename to test/supp/helgrind-test_ipc.supp diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp b/test/supp/helgrind-test_jemalloc_coarse_devdax.supp similarity index 100% rename from test/supp/helgrind-umf_test-jemalloc_coarse_devdax.supp rename to test/supp/helgrind-test_jemalloc_coarse_devdax.supp diff --git a/test/supp/helgrind-umf_test-jemalloc_coarse_file.supp b/test/supp/helgrind-test_jemalloc_coarse_file.supp similarity index 100% rename from test/supp/helgrind-umf_test-jemalloc_coarse_file.supp rename to test/supp/helgrind-test_jemalloc_coarse_file.supp diff --git a/test/supp/helgrind-umf_test-jemalloc_pool.supp b/test/supp/helgrind-test_jemalloc_pool.supp similarity index 100% rename from test/supp/helgrind-umf_test-jemalloc_pool.supp rename to test/supp/helgrind-test_jemalloc_pool.supp diff --git a/test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp b/test/supp/helgrind-test_provider_devdax_memory_ipc.supp similarity index 100% rename from test/supp/helgrind-umf_test-provider_devdax_memory_ipc.supp rename to test/supp/helgrind-test_provider_devdax_memory_ipc.supp diff --git a/test/supp/helgrind-umf_test-provider_file_memory_ipc.supp b/test/supp/helgrind-test_provider_file_memory_ipc.supp similarity index 100% rename from test/supp/helgrind-umf_test-provider_file_memory_ipc.supp rename to test/supp/helgrind-test_provider_file_memory_ipc.supp diff --git a/test/supp/helgrind-umf_test-provider_os_memory.supp b/test/supp/helgrind-test_provider_os_memory.supp similarity index 100% rename from test/supp/helgrind-umf_test-provider_os_memory.supp rename to test/supp/helgrind-test_provider_os_memory.supp diff --git a/test/supp/helgrind-umf_test-scalable_coarse_devdax.supp b/test/supp/helgrind-test_scalable_coarse_devdax.supp similarity index 100% rename from test/supp/helgrind-umf_test-scalable_coarse_devdax.supp rename to test/supp/helgrind-test_scalable_coarse_devdax.supp diff --git a/test/supp/helgrind-umf_test-scalable_coarse_file.supp b/test/supp/helgrind-test_scalable_coarse_file.supp similarity index 100% rename from test/supp/helgrind-umf_test-scalable_coarse_file.supp rename to test/supp/helgrind-test_scalable_coarse_file.supp diff --git a/test/supp/helgrind-umf_test-scalable_pool.supp b/test/supp/helgrind-test_scalable_pool.supp similarity index 100% rename from test/supp/helgrind-umf_test-scalable_pool.supp rename to test/supp/helgrind-test_scalable_pool.supp diff --git a/test/supp/memcheck-umf_test-jemalloc_coarse_devdax.supp b/test/supp/memcheck-test_jemalloc_coarse_devdax.supp similarity index 100% rename from test/supp/memcheck-umf_test-jemalloc_coarse_devdax.supp rename to test/supp/memcheck-test_jemalloc_coarse_devdax.supp diff --git a/test/supp/memcheck-umf_test-jemalloc_coarse_file.supp b/test/supp/memcheck-test_jemalloc_coarse_file.supp similarity index 100% rename from test/supp/memcheck-umf_test-jemalloc_coarse_file.supp rename to test/supp/memcheck-test_jemalloc_coarse_file.supp diff --git a/test/supp/memcheck-umf_test-jemalloc_pool.supp b/test/supp/memcheck-test_jemalloc_pool.supp similarity index 100% rename from test/supp/memcheck-umf_test-jemalloc_pool.supp rename to test/supp/memcheck-test_jemalloc_pool.supp diff --git a/test/supp/memcheck-umf_test-scalable_pool.supp b/test/supp/memcheck-test_scalable_pool.supp similarity index 100% rename from test/supp/memcheck-umf_test-scalable_pool.supp rename to test/supp/memcheck-test_scalable_pool.supp diff --git a/test/test_valgrind.sh b/test/test_valgrind.sh index 954a3a56b..ea156e620 100755 --- a/test/test_valgrind.sh +++ b/test/test_valgrind.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -16,7 +16,7 @@ function print_usage() { echo "Where:" echo echo "tests_examples - (optional) list of tests or examples to be run (paths relative to the build directory)." - echo " If it is empty, all tests (./test/umf_test-*) and examples (./examples/umf_example_*)" + echo " If it is empty, all tests (./test/test_*) and examples (./examples/umf_example_*)" echo " found in will be run." } @@ -37,8 +37,8 @@ if [ ! -f $WORKSPACE/README.md ]; then exit 1 fi -if [ $(ls -1 ${BUILD_DIR}/test/umf_test-* 2>/dev/null | wc -l) -eq 0 ]; then - echo -e "error: UMF tests ./test/umf_test-* not found in the build directory: ${BUILD_DIR}\n" +if [ $(ls -1 ${BUILD_DIR}/test/test_* 2>/dev/null | wc -l) -eq 0 ]; then + echo -e "error: UMF tests ./test/test_* not found in the build directory: ${BUILD_DIR}\n" print_usage exit 1 fi @@ -74,7 +74,7 @@ echo "Working directory: $(pwd)" echo "Running: \"valgrind $OPTION\" for the following tests:" ANY_TEST_FAILED=0 -PATH_TESTS="./test/umf_test-*" +PATH_TESTS="./test/test_*" PATH_EXAMPLES="./examples/umf_example_*" rm -f ${PATH_TESTS}.log ${PATH_TESTS}.err ${PATH_EXAMPLES}.log ${PATH_EXAMPLES}.err @@ -100,7 +100,7 @@ for test in $TESTS; do # skip tests incompatible with valgrind FILTER="" case $test in - ./test/umf_test-disjointPool) + ./test/test_disjointPool) if [ "$TOOL" = "helgrind" ]; then # skip because of the assert in helgrind: # Helgrind: hg_main.c:308 (lockN_acquire_reader): Assertion 'lk->kind == LK_rdwr' failed. @@ -108,40 +108,40 @@ for test in $TESTS; do continue; fi ;; - ./test/umf_test-ipc_os_prov_*) + ./test/test_ipc_os_prov_*) echo "- SKIPPED" continue; # skip testing helper binaries used by the ipc_os_prov_* tests ;; - ./test/umf_test-ipc_devdax_prov_*) + ./test/test_ipc_devdax_prov_*) echo "- SKIPPED" continue; # skip testing helper binaries used by the ipc_devdax_prov_* tests ;; - ./test/umf_test-ipc_file_prov_*) + ./test/test_ipc_file_prov_*) echo "- SKIPPED" continue; # skip testing helper binaries used by the ipc_file_prov_* tests ;; - ./test/umf_test-memspace_host_all) + ./test/test_memspace_host_all) FILTER='--gtest_filter="-*allocsSpreadAcrossAllNumaNodes"' ;; - ./test/umf_test-provider_os_memory) + ./test/test_provider_os_memory) FILTER='--gtest_filter="-osProviderTest/umfIpcTest*"' ;; - ./test/umf_test-provider_os_memory_config) + ./test/test_provider_os_memory_config) FILTER='--gtest_filter="-*protection_flag_none:*protection_flag_read:*providerConfigTestNumaMode*"' ;; - ./test/umf_test-memspace_highest_capacity) + ./test/test_memspace_highest_capacity) FILTER='--gtest_filter="-*highestCapacityVerify*"' ;; - ./test/umf_test-provider_os_memory_multiple_numa_nodes) + ./test/test_provider_os_memory_multiple_numa_nodes) FILTER='--gtest_filter="-testNuma.checkModeInterleave*:testNumaNodesAllocations/testNumaOnEachNode.checkNumaNodesAllocations*:testNumaNodesAllocations/testNumaOnEachNode.checkModePreferred*:testNumaNodesAllocations/testNumaOnEachNode.checkModeInterleaveSingleNode*:testNumaNodesAllocationsAllCpus/testNumaOnEachCpu.checkModePreferredEmptyNodeset*:testNumaNodesAllocationsAllCpus/testNumaOnEachCpu.checkModeLocal*"' ;; - ./test/umf_test-memspace_highest_bandwidth) + ./test/test_memspace_highest_bandwidth) FILTER='--gtest_filter="-*allocLocalMt*"' ;; - ./test/umf_test-memspace_lowest_latency) + ./test/test_memspace_lowest_latency) FILTER='--gtest_filter="-*allocLocalMt*"' ;; - ./test/umf_test-memoryPool) + ./test/test_memoryPool) FILTER='--gtest_filter="-*allocMaxSize*"' ;; ./examples/umf_example_ipc_ipcapi_*) From 15b3810b78261e587ac125b9b014d2d806e70583 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 5 Mar 2025 14:03:52 +0000 Subject: [PATCH 208/466] update benchmark scripts --- .github/workflows/reusable_benchmarks.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index a7c9e5e28..15e6b15f4 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -103,7 +103,9 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: intel/llvm - ref: nightly-2025-02-19 + # add preloaded UMF benchmarks + # https://github.com/intel/llvm/pull/17278 + ref: b2f9dab5266d227cc9eb19af1b54c5bdc50221d1 path: sycl-repo fetch-depth: 1 From 90a2a9128b9d2e0c0ef79bc68cd60cae15156a8f Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Wed, 5 Mar 2025 13:01:54 +0100 Subject: [PATCH 209/466] Add lld linker CI job --- .github/workflows/reusable_basic.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/reusable_basic.yml b/.github/workflows/reusable_basic.yml index 5a6756f2c..41ce4b385 100644 --- a/.github/workflows/reusable_basic.yml +++ b/.github/workflows/reusable_basic.yml @@ -74,6 +74,17 @@ jobs: install_tbb: 'ON' disable_hwloc: 'OFF' link_hwloc_statically: 'OFF' + # test lld linker + - os: 'ubuntu-24.04' + build_type: Release + compiler: {c: icx, cxx: icpx} + shared_library: 'ON' + level_zero_provider: 'ON' + cuda_provider: 'ON' + install_tbb: 'ON' + disable_hwloc: 'OFF' + link_hwloc_statically: 'OFF' + llvm_linker: '-DCMAKE_EXE_LINKER_FLAGS="-fuse-ld=lld" -DCMAKE_MODULE_LINKER_FLAGS="-fuse-ld=lld" -DCMAKE_SHARED_LINKER_FLAGS="-fuse-ld=lld"' # test without installing TBB - os: 'ubuntu-22.04' build_type: Release @@ -160,6 +171,7 @@ jobs: -DUMF_DISABLE_HWLOC=${{matrix.disable_hwloc}} -DUMF_LINK_HWLOC_STATICALLY=${{matrix.link_hwloc_statically}} ${{ matrix.build_type == 'Debug' && matrix.compiler.c == 'gcc' && '-DUMF_USE_COVERAGE=ON' || '' }} + ${{ matrix.llvm_linker || '' }} - name: Build UMF run: | From 45358d848ce4675563da5cdcb6d51cf916e1d9a0 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Thu, 6 Mar 2025 10:00:18 +0000 Subject: [PATCH 210/466] check for alloc fails in disjoint pool init --- src/pool/pool_disjoint.c | 50 ++++++++++++++++++++++++++++++++++------ 1 file changed, 43 insertions(+), 7 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 0bd88bd24..385599333 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -588,12 +588,6 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - disjoint_pool_t *disjoint_pool = - umf_ba_global_alloc(sizeof(*disjoint_pool)); - if (!disjoint_pool) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - umf_disjoint_pool_params_t *dp_params = (umf_disjoint_pool_params_t *)params; @@ -604,12 +598,21 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } + disjoint_pool_t *disjoint_pool = + umf_ba_global_alloc(sizeof(*disjoint_pool)); + if (disjoint_pool == NULL) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + VALGRIND_DO_CREATE_MEMPOOL(disjoint_pool, 0, 0); disjoint_pool->provider = provider; disjoint_pool->params = *dp_params; disjoint_pool->known_slabs = critnib_new(); + if (disjoint_pool->known_slabs == NULL) { + goto err_free_disjoint_pool; + } // Generate buckets sized such as: 64, 96, 128, 192, ..., CutOff. // Powers of 2 and the value halfway between the powers of 2. @@ -625,6 +628,9 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, disjoint_pool->min_bucket_size_exp = (size_t)log2Utils(Size1); disjoint_pool->default_shared_limits = umfDisjointPoolSharedLimitsCreate(SIZE_MAX); + if (disjoint_pool->default_shared_limits == NULL) { + goto err_free_known_slabs; + } // count number of buckets, start from 1 disjoint_pool->buckets_num = 1; @@ -633,10 +639,14 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { disjoint_pool->buckets_num += 2; } + disjoint_pool->buckets = umf_ba_global_alloc( sizeof(*disjoint_pool->buckets) * disjoint_pool->buckets_num); + if (disjoint_pool->buckets == NULL) { + goto err_free_shared_limits; + } - int i = 0; + size_t i = 0; Size1 = ts1; Size2 = ts2; for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2, i += 2) { @@ -648,6 +658,13 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, disjoint_pool->buckets[i] = create_bucket( CutOff, disjoint_pool, disjoint_pool_get_limits(disjoint_pool)); + // check if all buckets were created successfully + for (i = 0; i < disjoint_pool->buckets_num; i++) { + if (disjoint_pool->buckets[i] == NULL) { + goto err_free_buckets; + } + } + umf_result_t ret = umfMemoryProviderGetMinPageSize( provider, NULL, &disjoint_pool->provider_min_page_size); if (ret != UMF_RESULT_SUCCESS) { @@ -657,6 +674,25 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, *ppPool = (void *)disjoint_pool; return UMF_RESULT_SUCCESS; + +err_free_buckets: + for (i = 0; i < disjoint_pool->buckets_num; i++) { + if (disjoint_pool->buckets[i] != NULL) { + destroy_bucket(disjoint_pool->buckets[i]); + } + } + umf_ba_global_free(disjoint_pool->buckets); + +err_free_shared_limits: + umfDisjointPoolSharedLimitsDestroy(disjoint_pool->default_shared_limits); + +err_free_known_slabs: + critnib_delete(disjoint_pool->known_slabs); + +err_free_disjoint_pool: + umf_ba_global_free(disjoint_pool); + + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } void *disjoint_pool_malloc(void *pool, size_t size) { From 385fa4ce6d10b32b637433ac309276992f17586f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 6 Mar 2025 11:48:46 +0100 Subject: [PATCH 211/466] [CI] Add Compat test on GPU runners --- .github/workflows/reusable_compatibility.yml | 112 +++++++++++++++++-- 1 file changed, 103 insertions(+), 9 deletions(-) diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml index 29597ac18..a7b631106 100644 --- a/.github/workflows/reusable_compatibility.yml +++ b/.github/workflows/reusable_compatibility.yml @@ -1,4 +1,4 @@ -# Workflow for checkig the backward compatibility of UMF. +# Workflow for checking the backward compatibility of UMF. # Test the latest UMF shared library with binaries compiled using the older UMF # shared library. name: Compatibility @@ -15,7 +15,7 @@ permissions: contents: read jobs: - ubuntu-build: + ubuntu: name: Ubuntu runs-on: 'ubuntu-22.04' @@ -64,7 +64,7 @@ jobs: working-directory: ${{github.workspace}}/tag_version run: | cmake --build ${{github.workspace}}/tag_version/build -j $(nproc) - + - name: Run "tag" UMF tests working-directory: ${{github.workspace}}/tag_version/build run: | @@ -75,13 +75,13 @@ jobs: with: fetch-depth: 0 path: ${{github.workspace}}/latest_version - + - name: Get latest UMF version working-directory: ${{github.workspace}}/latest_version run: | VERSION=$(git describe --tags) echo "checked version: $VERSION" - + - name: Configure latest UMF build working-directory: ${{github.workspace}}/latest_version run: > @@ -107,11 +107,11 @@ jobs: - name: Run "tag" UMF tests with latest UMF libs (warnings enabled) working-directory: ${{github.workspace}}/tag_version/build run: > - UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" - LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ + UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" + LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ ctest --output-on-failure - - windows-build: + + windows: name: Windows env: VCPKG_PATH: "${{github.workspace}}/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/vcpkg/packages/jemalloc_x64-windows" @@ -207,3 +207,97 @@ jobs: $env:UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" cp ${{github.workspace}}/latest_version/build/bin/Debug/umf.dll ${{github.workspace}}/tag_version/build/bin/Debug/umf.dll ctest -C Debug --output-on-failure --test-dir test + + gpu: + name: GPU Ubuntu + strategy: + matrix: + provider: ['LEVEL_ZERO', 'CUDA'] + runs-on: ["DSS-${{matrix.provider}}", "DSS-UBUNTU"] + + steps: + - name: Checkout "tag" UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + ref: refs/tags/${{inputs.tag}} + path: ${{github.workspace}}/tag_version + + - name: Get "tag" UMF version + working-directory: ${{github.workspace}}/tag_version + run: | + VERSION=$(git describe --tags) + echo "tag version: $VERSION" + + - name: Configure "tag" UMF build + working-directory: ${{github.workspace}}/tag_version + run: > + cmake + -B ${{github.workspace}}/tag_version/build + -DCMAKE_BUILD_TYPE=Debug + -DUMF_BUILD_SHARED_LIBRARY=ON + -DCMAKE_C_COMPILER=gcc + -DCMAKE_CXX_COMPILER=g++ + -DUMF_BUILD_TESTS=ON + -DUMF_BUILD_GPU_TESTS=ON + -DUMF_BUILD_EXAMPLES=ON + -DUMF_BUILD_GPU_EXAMPLES=ON + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF + -DUMF_BUILD_CUDA_PROVIDER=OFF + -DUMF_BUILD_${{matrix.provider}}_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + -DUMF_BUILD_LIBUMF_POOL_DISJOINT=ON + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build "tag" UMF + working-directory: ${{github.workspace}}/tag_version + run: | + cmake --build ${{github.workspace}}/tag_version/build -j $(nproc) + + - name: Run "tag" UMF tests + working-directory: ${{github.workspace}}/tag_version/build + run: | + LD_LIBRARY_PATH=${{github.workspace}}/tag_version/build/lib/ ctest --output-on-failure + + - name: Checkout latest UMF version + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + with: + fetch-depth: 0 + path: ${{github.workspace}}/latest_version + + - name: Get latest UMF version + working-directory: ${{github.workspace}}/latest_version + run: | + VERSION=$(git describe --tags) + echo "checked version: $VERSION" + + - name: Configure latest UMF build + working-directory: ${{github.workspace}}/latest_version + run: > + cmake + -B ${{github.workspace}}/latest_version/build + -DCMAKE_BUILD_TYPE=Debug + -DUMF_BUILD_SHARED_LIBRARY=ON + -DCMAKE_C_COMPILER=gcc + -DCMAKE_CXX_COMPILER=g++ + -DUMF_BUILD_TESTS=OFF + -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON + -DUMF_BUILD_CUDA_PROVIDER=ON + -DUMF_FORMAT_CODE_STYLE=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON + + - name: Build latest UMF + working-directory: ${{github.workspace}}/latest_version + run: | + cmake --build ${{github.workspace}}/latest_version/build -j $(nproc) + + # NOTE: Once not implemented features may now be implemented - exclude these tests + - name: Run "tag" UMF tests with latest UMF libs (warnings enabled) + working-directory: ${{github.workspace}}/tag_version/build + run: > + UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" + LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ + ctest --output-on-failure -E "not_impl" From 4760e50d217aa7ec80d1718f190beb5983548a3e Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Wed, 5 Mar 2025 16:54:44 +0100 Subject: [PATCH 212/466] [CI] Fix failing address sanitizer --- src/base_alloc/base_alloc_linux.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/base_alloc/base_alloc_linux.c b/src/base_alloc/base_alloc_linux.c index 260eec5aa..cc4f2e2bd 100644 --- a/src/base_alloc/base_alloc_linux.c +++ b/src/base_alloc/base_alloc_linux.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -19,8 +19,8 @@ static UTIL_ONCE_FLAG Page_size_is_initialized = UTIL_ONCE_FLAG_INIT; static size_t Page_size; void *ba_os_alloc(size_t size) { - void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + void *ptr = utils_mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); // this should be unnecessary but pairs of mmap/munmap do not reset // asan's user-poisoning flags, leading to invalid error reports // Bug 81619: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81619 @@ -29,7 +29,7 @@ void *ba_os_alloc(size_t size) { } void ba_os_free(void *ptr, size_t size) { - int ret = munmap(ptr, size); + int ret = utils_munmap(ptr, size); assert(ret == 0); (void)ret; // unused } From cda4356f496915280cfa06345291cb7bf829496d Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Thu, 6 Mar 2025 15:13:10 +0100 Subject: [PATCH 213/466] Remove unnecessary headers from base alloc --- src/base_alloc/base_alloc_linux.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/base_alloc/base_alloc_linux.c b/src/base_alloc/base_alloc_linux.c index cc4f2e2bd..9b1dc63fe 100644 --- a/src/base_alloc/base_alloc_linux.c +++ b/src/base_alloc/base_alloc_linux.c @@ -6,13 +6,9 @@ */ #include -#include #include -#include #include -#include "base_alloc.h" -#include "base_alloc_global.h" #include "utils_concurrency.h" static UTIL_ONCE_FLAG Page_size_is_initialized = UTIL_ONCE_FLAG_INIT; From 1ef1e48dec5bb9349f4045c62854e5378633e7fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 6 Mar 2025 11:50:01 +0100 Subject: [PATCH 214/466] [CI][Compat] Remove steps printing version For tag checkout it's no surprise; for main we print full version during 'Configure' step. --- .github/workflows/reusable_compatibility.yml | 38 +------------------- 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml index a7b631106..a11c91128 100644 --- a/.github/workflows/reusable_compatibility.yml +++ b/.github/workflows/reusable_compatibility.yml @@ -35,12 +35,6 @@ jobs: - name: Install libhwloc working-directory: ${{github.workspace}}/tag_version run: .github/scripts/install_hwloc.sh - - - name: Get "tag" UMF version - working-directory: ${{github.workspace}}/tag_version - run: | - VERSION=$(git describe --tags) - echo "tag version: $VERSION" - name: Configure "tag" UMF build working-directory: ${{github.workspace}}/tag_version @@ -76,12 +70,6 @@ jobs: fetch-depth: 0 path: ${{github.workspace}}/latest_version - - name: Get latest UMF version - working-directory: ${{github.workspace}}/latest_version - run: | - VERSION=$(git describe --tags) - echo "checked version: $VERSION" - - name: Configure latest UMF build working-directory: ${{github.workspace}}/latest_version run: > @@ -132,16 +120,11 @@ jobs: vcpkgDirectory: ${{github.workspace}}/vcpkg vcpkgJsonGlob: '**/vcpkg.json' + # NOTE we use vcpkg setup from "tag" version - name: Install dependencies working-directory: ${{github.workspace}}/tag_version run: vcpkg install shell: pwsh # Specifies PowerShell as the shell for running the script. - - - name: Get "tag" UMF version - working-directory: ${{github.workspace}}/tag_version - run: | - $version = (git describe --tags) - echo "tag version: $VERSION" - name: Configure "tag" UMF build working-directory: ${{github.workspace}}/tag_version @@ -174,13 +157,6 @@ jobs: fetch-depth: 0 path: ${{github.workspace}}/latest_version - # NOTE we use vcpkg setup from "tag" version - - name: Get latest UMF version - working-directory: ${{github.workspace}}/latest_version - run: | - $version = (git describe --tags) - echo "latest version: $VERSION" - - name: Configure latest UMF build working-directory: ${{github.workspace}}/latest_version run: > @@ -223,12 +199,6 @@ jobs: ref: refs/tags/${{inputs.tag}} path: ${{github.workspace}}/tag_version - - name: Get "tag" UMF version - working-directory: ${{github.workspace}}/tag_version - run: | - VERSION=$(git describe --tags) - echo "tag version: $VERSION" - - name: Configure "tag" UMF build working-directory: ${{github.workspace}}/tag_version run: > @@ -267,12 +237,6 @@ jobs: fetch-depth: 0 path: ${{github.workspace}}/latest_version - - name: Get latest UMF version - working-directory: ${{github.workspace}}/latest_version - run: | - VERSION=$(git describe --tags) - echo "checked version: $VERSION" - - name: Configure latest UMF build working-directory: ${{github.workspace}}/latest_version run: > From f67374899f8cd3cad070c28c844e99cdee85aae4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 6 Mar 2025 12:01:20 +0100 Subject: [PATCH 215/466] [CI][Compat] Run tests verbosely - warnings will be always visible --- .github/workflows/reusable_compatibility.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml index a11c91128..12444d6cf 100644 --- a/.github/workflows/reusable_compatibility.yml +++ b/.github/workflows/reusable_compatibility.yml @@ -97,7 +97,7 @@ jobs: run: > UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ - ctest --output-on-failure + ctest --verbose windows: name: Windows @@ -182,7 +182,7 @@ jobs: run: | $env:UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" cp ${{github.workspace}}/latest_version/build/bin/Debug/umf.dll ${{github.workspace}}/tag_version/build/bin/Debug/umf.dll - ctest -C Debug --output-on-failure --test-dir test + ctest -C Debug --verbose --test-dir test gpu: name: GPU Ubuntu @@ -264,4 +264,4 @@ jobs: run: > UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ - ctest --output-on-failure -E "not_impl" + ctest --verbose -E "not_impl" From f2a7e21aa286d15a2a147cb95549c6f3abeb623c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 6 Mar 2025 12:16:46 +0100 Subject: [PATCH 216/466] [CI][Compat] Enable examples as well --- .github/workflows/reusable_compatibility.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml index 12444d6cf..b6007b081 100644 --- a/.github/workflows/reusable_compatibility.yml +++ b/.github/workflows/reusable_compatibility.yml @@ -46,6 +46,7 @@ jobs: -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DUMF_BUILD_TESTS=ON + -DUMF_BUILD_EXAMPLES=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON -DUMF_FORMAT_CODE_STYLE=OFF @@ -85,7 +86,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_TESTS_FAIL_ON_SKIP=ON - name: Build latest UMF working-directory: ${{github.workspace}}/latest_version @@ -136,6 +136,7 @@ jobs: -DCMAKE_CXX_COMPILER=cl -DUMF_BUILD_SHARED_LIBRARY=ON -DUMF_BUILD_TESTS=ON + -DUMF_BUILD_EXAMPLES=ON -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON -DUMF_BUILD_CUDA_PROVIDER=ON -DUMF_FORMAT_CODE_STYLE=OFF @@ -172,7 +173,6 @@ jobs: -DUMF_FORMAT_CODE_STYLE=OFF -DUMF_DEVELOPER_MODE=ON -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_TESTS_FAIL_ON_SKIP=ON - name: Build latest UMF run: cmake --build "${{github.workspace}}/latest_version/build" --config Debug -j $Env:NUMBER_OF_PROCESSORS @@ -182,7 +182,7 @@ jobs: run: | $env:UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" cp ${{github.workspace}}/latest_version/build/bin/Debug/umf.dll ${{github.workspace}}/tag_version/build/bin/Debug/umf.dll - ctest -C Debug --verbose --test-dir test + ctest -C Debug --verbose gpu: name: GPU Ubuntu From cba0326c418982be7f1c822bdad7b070ee270a5f Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 7 Mar 2025 09:23:44 +0100 Subject: [PATCH 217/466] Add missing unlock in an error handling path of umf_ba_alloc() Signed-off-by: Lukasz Dorau --- src/base_alloc/base_alloc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/base_alloc/base_alloc.c b/src/base_alloc/base_alloc.c index 6f975307d..00e58078e 100644 --- a/src/base_alloc/base_alloc.c +++ b/src/base_alloc/base_alloc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2024 Intel Corporation + * Copyright (C) 2024-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -230,6 +230,7 @@ void *umf_ba_alloc(umf_ba_pool_t *pool) { // check if the free list is not empty if (pool->metadata.free_list == NULL) { LOG_ERR("base_alloc: Free list should not be empty before new alloc"); + utils_mutex_unlock(&pool->metadata.free_lock); return NULL; } From 877dd1d38f463cb63bc98c322abe4f00a2499f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Fri, 7 Mar 2025 10:03:59 +0100 Subject: [PATCH 218/466] Enable MT bench for disjoint_pool in fixed provider Ref. #1151 --- benchmark/benchmark.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 7b04f2061..377a38fcf 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -165,9 +165,7 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform_fixedprovider) ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&singlethreaded); -// TODO: change to multithreaded -//->Apply(&multithreaded); + ->Apply(&multithreaded); #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, From f9719a6984bff6559dcf6a6c6250779b83e1a0ef Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 5 Mar 2025 17:01:30 +0000 Subject: [PATCH 219/466] cleanup bit scan utils --- src/base_alloc/base_alloc_global.c | 6 ++-- src/critnib/critnib.c | 3 +- src/pool/pool_disjoint.c | 4 +-- src/utils/CMakeLists.txt | 7 ++--- src/utils/utils_concurrency.h | 15 --------- src/utils/utils_math.h | 50 +++++++++++++++++++++++++++--- src/utils/utils_posix_math.c | 20 ------------ src/utils/utils_windows_math.c | 24 -------------- 8 files changed, 56 insertions(+), 73 deletions(-) delete mode 100644 src/utils/utils_posix_math.c delete mode 100644 src/utils/utils_windows_math.c diff --git a/src/base_alloc/base_alloc_global.c b/src/base_alloc/base_alloc_global.c index f3b61566a..ecec3367c 100644 --- a/src/base_alloc/base_alloc_global.c +++ b/src/base_alloc/base_alloc_global.c @@ -71,7 +71,7 @@ static void umf_ba_create_global(void) { } size_t smallestSize = BASE_ALLOC.ac_sizes[0]; - BASE_ALLOC.smallest_ac_size_log2 = log2Utils(smallestSize); + BASE_ALLOC.smallest_ac_size_log2 = utils_msb64(smallestSize); LOG_DEBUG("UMF base allocator created"); } @@ -83,8 +83,8 @@ static int size_to_idx(size_t size) { } int isPowerOf2 = (0 == (size & (size - 1))); - int index = - (int)(log2Utils(size) + !isPowerOf2 - BASE_ALLOC.smallest_ac_size_log2); + int index = (int)(utils_msb64(size) + !isPowerOf2 - + BASE_ALLOC.smallest_ac_size_log2); assert(index >= 0); return index; diff --git a/src/critnib/critnib.c b/src/critnib/critnib.c index c95637f20..1adb2dc7e 100644 --- a/src/critnib/critnib.c +++ b/src/critnib/critnib.c @@ -64,6 +64,7 @@ #include "utils_assert.h" #include "utils_common.h" #include "utils_concurrency.h" +#include "utils_math.h" /* * A node that has been deleted is left untouched for this many delete @@ -367,7 +368,7 @@ int critnib_insert(struct critnib *c, word key, void *value, int update) { } /* and convert that to an index. */ - sh_t sh = utils_mssb_index(at) & (sh_t) ~(SLICE - 1); + sh_t sh = utils_msb64(at) & (sh_t) ~(SLICE - 1); struct critnib_node *m = alloc_node(c); if (!m) { diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 0bd88bd24..82623988c 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -466,7 +466,7 @@ static size_t size_to_idx(disjoint_pool_t *pool, size_t size) { } // get the position of the leftmost set bit - size_t position = getLeftmostSetBitPos(size); + size_t position = utils_msb64(size); bool is_power_of_2 = 0 == (size & (size - 1)); bool larger_than_halfway_between_powers_of_2 = @@ -622,7 +622,7 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, Size1 = utils_max(Size1, UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE); // Calculate the exponent for min_bucket_size used for finding buckets. - disjoint_pool->min_bucket_size_exp = (size_t)log2Utils(Size1); + disjoint_pool->min_bucket_size_exp = (size_t)utils_msb64(Size1); disjoint_pool->default_shared_limits = umfDisjointPoolSharedLimitsCreate(SIZE_MAX); diff --git a/src/utils/CMakeLists.txt b/src/utils/CMakeLists.txt index a0bff39fd..976a2cb62 100644 --- a/src/utils/CMakeLists.txt +++ b/src/utils/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -7,15 +7,14 @@ include(FindThreads) set(UMF_UTILS_SOURCES_COMMON utils_common.c utils_log.c utils_load_library.c) -set(UMF_UTILS_SOURCES_POSIX utils_posix_common.c utils_posix_concurrency.c - utils_posix_math.c) +set(UMF_UTILS_SOURCES_POSIX utils_posix_common.c utils_posix_concurrency.c) set(UMF_UTILS_SOURCES_LINUX utils_linux_common.c) set(UMF_UTILS_SOURCES_MACOSX utils_macosx_common.c) set(UMF_UTILS_SOURCES_WINDOWS utils_windows_common.c - utils_windows_concurrency.c utils_windows_math.c) + utils_windows_concurrency.c) if(UMF_USE_VALGRIND) if(UMF_USE_ASAN diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 0104b8646..e8a601ecd 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -89,18 +89,6 @@ void utils_init_once(UTIL_ONCE_FLAG *flag, void (*onceCb)(void)); #if defined(_WIN32) -static inline unsigned char utils_lssb_index(long long value) { - unsigned long ret; - _BitScanForward64(&ret, value); - return (unsigned char)ret; -} - -static inline unsigned char utils_mssb_index(long long value) { - unsigned long ret; - _BitScanReverse64(&ret, value); - return (unsigned char)ret; -} - // There is no good way to do atomic_load on windows... static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) { // NOTE: Windows cl complains about direct accessing 'ptr' which is next @@ -166,9 +154,6 @@ static inline bool utils_compare_exchange_u64(uint64_t *ptr, uint64_t *expected, #else // !defined(_WIN32) -#define utils_lssb_index(x) ((unsigned char)__builtin_ctzll(x)) -#define utils_mssb_index(x) ((unsigned char)(63 - __builtin_clzll(x))) - static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) { ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); ASSERT_IS_ALIGNED((uintptr_t)out, 8); diff --git a/src/utils/utils_math.h b/src/utils/utils_math.h index c78be1136..0e58fc38d 100644 --- a/src/utils/utils_math.h +++ b/src/utils/utils_math.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,16 +11,58 @@ #define UMF_MATH_H 1 #include +#include #include +#include #ifdef __cplusplus extern "C" { #endif -size_t getLeftmostSetBitPos(size_t num); +#if defined(_WIN32) -// Logarithm is an index of the most significant non-zero bit. -static inline size_t log2Utils(size_t num) { return getLeftmostSetBitPos(num); } +#include "utils_windows_intrin.h" + +#pragma intrinsic(_BitScanReverse64) +#pragma intrinsic(_BitScanForward64) + +// Retrieves the position of the leftmost set bit. +// The position of the bit is counted from 0 +// e.g. for 01000011110 the position equals 9. +static inline size_t utils_msb64(uint64_t num) { + assert(num != 0 && + "Finding leftmost set bit when number equals zero is undefined"); + unsigned long index = 0; + _BitScanReverse64(&index, num); + return (size_t)index; +} + +static inline size_t utils_lsb64(uint64_t num) { + assert(num != 0 && + "Finding rightmost set bit when number equals zero is undefined"); + unsigned long index = 0; + _BitScanForward64(&index, num); + return (size_t)index; +} + +#else // !defined(_WIN32) + +// Retrieves the position of the leftmost set bit. +// The position of the bit is counted from 0 +// e.g. for 01000011110 the position equals 9. +static inline size_t utils_msb64(uint64_t num) { + assert(num != 0 && + "Finding leftmost set bit when number equals zero is undefined"); + return 63 - __builtin_clzll(num); +} + +static inline size_t utils_lsb64(uint64_t num) { + assert(num != 0 && + "Finding rightmost set bit when number equals zero is undefined"); + return __builtin_ctzll(num); +} + +#endif // !defined(_WIN32) #ifdef __cplusplus } diff --git a/src/utils/utils_posix_math.c b/src/utils/utils_posix_math.c deleted file mode 100644 index 465b68772..000000000 --- a/src/utils/utils_posix_math.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * - * Copyright (C) 2023 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - */ - -#include "utils_math.h" -#include - -// Retrieves the position of the leftmost set bit. -// The position of the bit is counted from 0 -// e.g. for 01000011110 the position equals 9. -size_t getLeftmostSetBitPos(size_t num) { - assert(num != 0 && - "Finding leftmost set bit when number equals zero is undefined"); - return (sizeof(num) * CHAR_BIT - 1) - __builtin_clzll(num); -} diff --git a/src/utils/utils_windows_math.c b/src/utils/utils_windows_math.c deleted file mode 100644 index cd21ae696..000000000 --- a/src/utils/utils_windows_math.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * - * Copyright (C) 2023-2025 Intel Corporation - * - * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. - * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - * - */ - -#include "utils_math.h" -#include "utils_windows_intrin.h" - -#pragma intrinsic(_BitScanReverse) - -// Retrieves the position of the leftmost set bit. -// The position of the bit is counted from 0 -// e.g. for 01000011110 the position equals 9. -size_t getLeftmostSetBitPos(size_t num) { - assert(num != 0 && - "Finding leftmost set bit when number equals zero is undefined"); - unsigned long index = 0; - _BitScanReverse(&index, (unsigned long)num); - return (size_t)index; -} From d6930706a9171d7b9b0d35c6c19bb589f589ead0 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 5 Mar 2025 17:01:37 +0000 Subject: [PATCH 220/466] replace chunks bool array with bit fields --- src/pool/pool_disjoint.c | 71 +++++++++++++------------------ src/pool/pool_disjoint_internal.h | 35 ++++++++++++--- test/pools/disjoint_pool.cpp | 5 +-- 3 files changed, 61 insertions(+), 50 deletions(-) diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 82623988c..a380e09d8 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -75,28 +75,36 @@ static slab_t *create_slab(bucket_t *bucket) { umf_result_t res = UMF_RESULT_SUCCESS; umf_memory_provider_handle_t provider = bucket->pool->provider; - slab_t *slab = umf_ba_global_alloc(sizeof(*slab)); + size_t num_chunks_total = + utils_max(bucket_slab_min_size(bucket) / bucket->size, 1); + + // Calculate the number of 64-bit words needed. + size_t num_words = + (num_chunks_total + CHUNK_BITMAP_SIZE - 1) / CHUNK_BITMAP_SIZE; + + slab_t *slab = umf_ba_global_alloc(sizeof(*slab) + + num_words * sizeof(slab->chunks[0])); if (slab == NULL) { LOG_ERR("allocation of new slab failed!"); return NULL; } slab->num_chunks_allocated = 0; - slab->first_free_chunk_idx = 0; slab->bucket = bucket; slab->iter.val = slab; slab->iter.prev = slab->iter.next = NULL; - slab->num_chunks_total = - utils_max(bucket_slab_min_size(bucket) / bucket->size, 1); - slab->chunks = - umf_ba_global_alloc(sizeof(*slab->chunks) * slab->num_chunks_total); - if (slab->chunks == NULL) { - LOG_ERR("allocation of slab chunks failed!"); - goto free_slab; + slab->num_chunks_total = num_chunks_total; + slab->num_words = num_words; + + // set all chunks as free + memset(slab->chunks, ~0, num_words * sizeof(slab->chunks[0])); + if (num_chunks_total % CHUNK_BITMAP_SIZE) { + // clear remaining bits + slab->chunks[num_words - 1] = + ((1ULL << (num_chunks_total % CHUNK_BITMAP_SIZE)) - 1); } - memset(slab->chunks, 0, sizeof(*slab->chunks) * slab->num_chunks_total); // if slab_min_size is not a multiple of bucket size, we would have some // padding at the end of the slab @@ -108,7 +116,7 @@ static slab_t *create_slab(bucket_t *bucket) { res = umfMemoryProviderAlloc(provider, slab->slab_size, 0, &slab->mem_ptr); if (res != UMF_RESULT_SUCCESS) { LOG_ERR("allocation of slab data failed!"); - goto free_slab_chunks; + goto free_slab; } // raw allocation is not available for user so mark it as inaccessible @@ -117,9 +125,6 @@ static slab_t *create_slab(bucket_t *bucket) { LOG_DEBUG("bucket: %p, slab_size: %zu", (void *)bucket, slab->slab_size); return slab; -free_slab_chunks: - umf_ba_global_free(slab->chunks); - free_slab: umf_ba_global_free(slab); return NULL; @@ -136,25 +141,21 @@ static void destroy_slab(slab_t *slab) { LOG_ERR("deallocation of slab data failed!"); } - umf_ba_global_free(slab->chunks); umf_ba_global_free(slab); } -// return the index of the first available chunk, SIZE_MAX otherwise static size_t slab_find_first_available_chunk_idx(const slab_t *slab) { - // use the first free chunk index as a hint for the search - for (bool *chunk = slab->chunks + slab->first_free_chunk_idx; - chunk != slab->chunks + slab->num_chunks_total; chunk++) { - - // false means not used - if (*chunk == false) { - size_t idx = chunk - slab->chunks; - LOG_DEBUG("idx: %zu", idx); - return idx; + for (size_t i = 0; i < slab->num_words; i++) { + // NOTE: free chunks are represented as set bits + uint64_t word = slab->chunks[i]; + if (word != 0) { + size_t bit_index = utils_lsb64(word); + size_t free_chunk = i * CHUNK_BITMAP_SIZE + bit_index; + return free_chunk; } } - LOG_DEBUG("idx: SIZE_MAX"); + // No free chunk was found. return SIZE_MAX; } @@ -167,12 +168,9 @@ static void *slab_get_chunk(slab_t *slab) { (void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size); // mark chunk as used - slab->chunks[chunk_idx] = true; + slab_set_chunk_bit(slab, chunk_idx, false); slab->num_chunks_allocated += 1; - // use the found index as the next hint - slab->first_free_chunk_idx = chunk_idx + 1; - return free_chunk; } @@ -195,18 +193,9 @@ static void slab_free_chunk(slab_t *slab, void *ptr) { size_t chunk_idx = ptr_diff / slab->bucket->size; // Make sure that the chunk was allocated - assert(slab->chunks[chunk_idx] && "double free detected"); - slab->chunks[chunk_idx] = false; + assert(slab_read_chunk_bit(slab, chunk_idx) == 0 && "double free detected"); + slab_set_chunk_bit(slab, chunk_idx, true); slab->num_chunks_allocated -= 1; - - if (chunk_idx < slab->first_free_chunk_idx) { - slab->first_free_chunk_idx = chunk_idx; - } - - LOG_DEBUG("chunk_idx: %zu, num_chunks_allocated: %zu, " - "first_free_chunk_idx: %zu", - chunk_idx, slab->num_chunks_allocated, - slab->first_free_chunk_idx); } static bool slab_has_avail(const slab_t *slab) { diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h index 2b5de64bc..a930585fb 100644 --- a/src/pool/pool_disjoint_internal.h +++ b/src/pool/pool_disjoint_internal.h @@ -15,6 +15,8 @@ #include "critnib/critnib.h" #include "utils_concurrency.h" +#define CHUNK_BITMAP_SIZE 64 + typedef struct bucket_t bucket_t; typedef struct slab_t slab_t; typedef struct slab_list_item_t slab_list_item_t; @@ -81,23 +83,24 @@ typedef struct slab_t { void *mem_ptr; size_t slab_size; - // Represents the current state of each chunk: if the bit is set, the - // chunk is allocated; otherwise, the chunk is free for allocation - bool *chunks; size_t num_chunks_total; + // Num of 64-bit words needed to store chunk state + size_t num_words; + // Total number of allocated chunks at the moment. size_t num_chunks_allocated; // The bucket which the slab belongs to bucket_t *bucket; - // Hints where to start search for free chunk in a slab - size_t first_free_chunk_idx; - // Store iterator to the corresponding node in avail/unavail list // to achieve O(1) removal slab_list_item_t iter; + + // Represents the current state of each chunk: if the bit is clear, the + // chunk is allocated; otherwise, the chunk is free for allocation + uint64_t chunks[]; } slab_t; typedef struct umf_disjoint_pool_shared_limits_t { @@ -158,4 +161,24 @@ typedef struct disjoint_pool_t { size_t provider_min_page_size; } disjoint_pool_t; +static inline void slab_set_chunk_bit(slab_t *slab, size_t index, bool value) { + assert(index < slab->num_chunks_total && "Index out of range"); + + size_t word_index = index / CHUNK_BITMAP_SIZE; + unsigned bit_index = index % CHUNK_BITMAP_SIZE; + if (value) { + slab->chunks[word_index] |= (1ULL << bit_index); + } else { + slab->chunks[word_index] &= ~(1ULL << bit_index); + } +} + +static inline int slab_read_chunk_bit(const slab_t *slab, size_t index) { + assert(index < slab->num_chunks_total && "Index out of range"); + + size_t word_index = index / CHUNK_BITMAP_SIZE; + unsigned bit_index = index % CHUNK_BITMAP_SIZE; + return (slab->chunks[word_index] >> bit_index) & 1; +} + #endif // UMF_POOL_DISJOINT_INTERNAL_H diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index 02f769802..4eedce981 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -113,9 +113,8 @@ TEST_F(test, internals) { EXPECT_GE(slab->num_chunks_total, slab->slab_size / bucket->size); // check allocation in slab - EXPECT_EQ(slab->chunks[0], true); - EXPECT_EQ(slab->chunks[1], false); - EXPECT_EQ(slab->first_free_chunk_idx, 1); + EXPECT_EQ(slab_read_chunk_bit(slab, 0), false); + EXPECT_EQ(slab_read_chunk_bit(slab, 1), true); // TODO: // * multiple alloc + free from single bucket From 0d5a89cffc2bcd45f72f17e69a01348c2354b1ad Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 10 Mar 2025 09:16:11 +0100 Subject: [PATCH 221/466] Run Compatibility GPU CI jobs only on upstream Run Compatibility GPU CI jobs only on upstream, since forks do not have the required HW. Signed-off-by: Lukasz Dorau --- .github/workflows/reusable_compatibility.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml index b6007b081..5bf9bd817 100644 --- a/.github/workflows/reusable_compatibility.yml +++ b/.github/workflows/reusable_compatibility.yml @@ -186,6 +186,8 @@ jobs: gpu: name: GPU Ubuntu + # run only on upstream; forks will not have the HW + if: github.repository == 'oneapi-src/unified-memory-framework' strategy: matrix: provider: ['LEVEL_ZERO', 'CUDA'] From adce85f9c05d0230a1fba5dbac6686546b4bf286 Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Mon, 3 Mar 2025 22:09:41 +0000 Subject: [PATCH 222/466] Use atomics in critnib find_* It fixes ThreadSanitizer data race in find_predecessor() vs critnib_insert() and critnib_remove(). Signed-off-by: Lukasz Dorau --- src/critnib/critnib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/critnib/critnib.c b/src/critnib/critnib.c index 1adb2dc7e..5c3a65dfd 100644 --- a/src/critnib/critnib.c +++ b/src/critnib/critnib.c @@ -525,7 +525,9 @@ find_predecessor(struct critnib_node *__restrict n) { while (1) { int nib; for (nib = NIB; nib >= 0; nib--) { - if (n->child[nib]) { + struct critnib_node *m; + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&m); + if (m) { break; } } @@ -534,7 +536,7 @@ find_predecessor(struct critnib_node *__restrict n) { return NULL; } - n = n->child[nib]; + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&n); if (is_leaf(n)) { return to_leaf(n); } From 6483c5464774339335bdc8a8719fc77e7c2d119a Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 10 Mar 2025 09:03:36 +0100 Subject: [PATCH 223/466] Check valgrind log files Check valgrind log files. Do not print an error message like: ls: cannot access './examples/umf_example_*.log': No such file or directory when only the tests log files are present for example. Signed-off-by: Lukasz Dorau --- test/test_valgrind.sh | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/test/test_valgrind.sh b/test/test_valgrind.sh index ea156e620..2e4f655f6 100755 --- a/test/test_valgrind.sh +++ b/test/test_valgrind.sh @@ -188,11 +188,33 @@ echo echo "======================================================================" echo -for log in $(ls -1 ${PATH_TESTS}.log ${PATH_EXAMPLES}.log); do +LOG_FILES="" +NT=$(ls -1 ${PATH_TESTS}.log 2>/dev/null | wc -l) +if [ $NT -gt 0 ]; then + LOG_FILES="$LOG_FILES $(ls -1 ${PATH_TESTS}.log | xargs)" +fi +NE=$(ls -1 ${PATH_EXAMPLES}.log 2>/dev/null | wc -l) +if [ $NE -gt 0 ]; then + LOG_FILES="$LOG_FILES $(ls -1 ${PATH_EXAMPLES}.log | xargs)" +fi +if [ $(($NT + $NE)) -eq 0 ]; then + echo + echo "FATAL ERROR: no log files found, but number of failed tests equals $ANY_TEST_FAILED!" + echo + exit 1 +fi + +for log in $LOG_FILES; do echo ">>>>>>> LOG $log" cat $log echo echo done +if [ $(($NT + $NE)) -ne $ANY_TEST_FAILED ]; then + echo + echo "ERROR: incorrect number of log files: ANY_TEST_FAILED=$ANY_TEST_FAILED != ($NT + $NE)" + echo +fi + exit 1 From 96d2ef39c110725f2f5e98a613c02c794bec4273 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Mon, 10 Mar 2025 13:56:14 +0100 Subject: [PATCH 224/466] add missing nullcheck in critnib --- src/critnib/critnib.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/critnib/critnib.c b/src/critnib/critnib.c index 5c3a65dfd..feb492e20 100644 --- a/src/critnib/critnib.c +++ b/src/critnib/critnib.c @@ -537,6 +537,11 @@ find_predecessor(struct critnib_node *__restrict n) { } utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&n); + + if (!n) { + return NULL; + } + if (is_leaf(n)) { return to_leaf(n); } @@ -650,6 +655,11 @@ static struct critnib_leaf *find_successor(struct critnib_node *__restrict n) { } n = n->child[nib]; + + if (!n) { + return NULL; + } + if (is_leaf(n)) { return to_leaf(n); } From 8cc1d429c56b73545f783d67fdea6d2a3b8fc8f0 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 10 Mar 2025 14:22:47 +0100 Subject: [PATCH 225/466] Use atomics in find_successor() like in find_predecessor() Use atomics in find_successor() like in find_predecessor(). Ref: #1175 Signed-off-by: Lukasz Dorau --- src/critnib/critnib.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/critnib/critnib.c b/src/critnib/critnib.c index feb492e20..5625781d3 100644 --- a/src/critnib/critnib.c +++ b/src/critnib/critnib.c @@ -645,7 +645,9 @@ static struct critnib_leaf *find_successor(struct critnib_node *__restrict n) { while (1) { unsigned nib; for (nib = 0; nib <= NIB; nib++) { - if (n->child[nib]) { + struct critnib_node *m; + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&m); + if (m) { break; } } @@ -654,7 +656,7 @@ static struct critnib_leaf *find_successor(struct critnib_node *__restrict n) { return NULL; } - n = n->child[nib]; + utils_atomic_load_acquire_ptr((void **)&n->child[nib], (void **)&n); if (!n) { return NULL; From 965fc672d75f1d4e1be8534bd35063b940487902 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 4 Mar 2025 12:21:32 +0100 Subject: [PATCH 226/466] Add utils_atomic_store_release_u64() to utils Signed-off-by: Lukasz Dorau --- src/utils/utils_concurrency.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index e8a601ecd..31e5793b9 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -152,6 +152,17 @@ static inline bool utils_compare_exchange_u64(uint64_t *ptr, uint64_t *expected, return false; } +static inline void utils_atomic_store_release_u64(void *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + LONG64 out; + LONG64 desired = (LONG64)val; + LONG64 expected = 0; + while (expected != (out = InterlockedCompareExchange64( + (LONG64 volatile *)ptr, desired, expected))) { + expected = out; + } +} + #else // !defined(_WIN32) static inline void utils_atomic_load_acquire_u64(uint64_t *ptr, uint64_t *out) { @@ -168,6 +179,11 @@ static inline void utils_atomic_load_acquire_ptr(void **ptr, void **out) { utils_annotate_acquire(ptr); } +static inline void utils_atomic_store_release_u64(void *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + __atomic_store_n((uintptr_t *)ptr, (uintptr_t)val, memory_order_release); +} + static inline void utils_atomic_store_release_ptr(void **ptr, void *val) { ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); utils_annotate_release(ptr); From d437d6c24581fe2fbd9f72891fa102281ff0533d Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 6 Mar 2025 09:43:51 +0100 Subject: [PATCH 227/466] Use eight level of critnibs in the tracker Multilevel maps are needed to support the case when one memory pool acts as a memory provider for another memory pool (nested memory pooling). Signed-off-by: Lukasz Dorau --- include/umf/base.h | 3 +- src/provider/provider_tracking.c | 430 +++++++++++++++++++++++++------ 2 files changed, 351 insertions(+), 82 deletions(-) diff --git a/include/umf/base.h b/include/umf/base.h index 8dad184f2..12e99aa2b 100644 --- a/include/umf/base.h +++ b/include/umf/base.h @@ -47,7 +47,8 @@ typedef enum umf_result_t { 6, ///< Failure in user provider code (i.e in user provided callback) UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE = 7, ///< External required dependency is unavailable or missing - UMF_RESULT_ERROR_UNKNOWN = 0x7ffffffe ///< Unknown or internal error + UMF_RESULT_ERROR_OUT_OF_RESOURCES = 8, ///< Out of internal resources + UMF_RESULT_ERROR_UNKNOWN = 0x7ffffffe ///< Unknown error } umf_result_t; #ifdef __cplusplus diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index 4696bc562..bc560304c 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -7,73 +7,220 @@ * */ -#include "provider_tracking.h" +#include +#include +#include +#include +#include + +#include +#include +#include + #include "base_alloc_global.h" #include "critnib.h" #include "ipc_cache.h" #include "ipc_internal.h" +#include "provider_tracking.h" #include "utils_common.h" #include "utils_concurrency.h" #include "utils_log.h" -#include -#include -#include - -#include -#include -#include -#include -#include +// TODO: we need to support an arbitrary amount of layers in the future +#define MAX_LEVELS_OF_ALLOC_SEGMENT_MAP 8 uint64_t IPC_HANDLE_ID = 0; struct umf_memory_tracker_t { umf_ba_pool_t *alloc_info_allocator; - critnib *alloc_segments_map; + // Multilevel maps are needed to support the case + // when one memory pool acts as a memory provider + // for another memory pool (nested memory pooling). + critnib *alloc_segments_map[MAX_LEVELS_OF_ALLOC_SEGMENT_MAP]; utils_mutex_t splitMergeMutex; }; typedef struct tracker_alloc_info_t { umf_memory_pool_handle_t pool; size_t size; + // number of overlapping memory regions + // in the next level of map + // falling within the current range + size_t n_children; } tracker_alloc_info_t; -static umf_result_t umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, - umf_memory_pool_handle_t pool, - const void *ptr, size_t size) { +// Get the most nested (on the highest level) allocation segment in the map with the `ptr` key. +// If `no_children` is set to 1, the function will return the entry +// only if it has no children on the higher level. +// The function returns the entry if found, otherwise NULL. +static tracker_alloc_info_t *get_most_nested_alloc_segment( + umf_memory_tracker_handle_t hTracker, const void *ptr, int *_level, + uintptr_t *_parent_key, tracker_alloc_info_t **_parent_value, + int no_children) { assert(ptr); + tracker_alloc_info_t *parent_value = NULL; + tracker_alloc_info_t *rvalue = NULL; + uintptr_t parent_key = 0; + uintptr_t rkey = 0; + uint64_t rsize = 0; + int level = 0; + int found = 0; + + do { + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + found = + critnib_find(hTracker->alloc_segments_map[level], (uintptr_t)ptr, + FIND_LE, (void *)&rkey, (void **)&rvalue); + if (!found || !rvalue) { + break; + } + + utils_atomic_load_acquire_u64((uint64_t *)&rvalue->size, &rsize); + + if (found && (uintptr_t)ptr < rkey + rsize) { + if (rvalue->n_children) { + if (level == MAX_LEVELS_OF_ALLOC_SEGMENT_MAP - 1) { + break; + } + level++; + parent_key = rkey; + parent_value = rvalue; + } + } + } while (found && ((uintptr_t)ptr < rkey + rsize) && rvalue->n_children); + + if (!rvalue || rkey != (uintptr_t)ptr) { + return NULL; + } + + if (no_children && (rvalue->n_children > 0)) { + return NULL; + } + + if (_level) { + *_level = level; + } + if (_parent_key) { + *_parent_key = parent_key; + } + if (_parent_value) { + *_parent_value = parent_value; + } + + assert(!no_children || rvalue->n_children == 0); + + return rvalue; +} + +static umf_result_t +umfMemoryTrackerAddAtLevel(umf_memory_tracker_handle_t hTracker, int level, + umf_memory_pool_handle_t pool, const void *ptr, + size_t size, uintptr_t parent_key, + tracker_alloc_info_t *parent_value) { + assert(ptr); + + umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN; + tracker_alloc_info_t *value = umf_ba_alloc(hTracker->alloc_info_allocator); if (value == NULL) { - LOG_ERR("failed to allocate tracker value, ptr=%p, size=%zu", ptr, + LOG_ERR("failed to allocate a tracker value, ptr=%p, size=%zu", ptr, size); return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } value->pool = pool; - value->size = size; - - int ret = - critnib_insert(hTracker->alloc_segments_map, (uintptr_t)ptr, value, 0); + utils_atomic_store_release_u64(&value->size, size); + value->n_children = 0; + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + int ret = critnib_insert(hTracker->alloc_segments_map[level], + (uintptr_t)ptr, value, 0); if (ret == 0) { - LOG_DEBUG( - "memory region is added, tracker=%p, ptr=%p, pool=%p, size=%zu", - (void *)hTracker, ptr, (void *)pool, size); + LOG_DEBUG("memory region is added, tracker=%p, level=%i, pool=%p, " + "ptr=%p, size=%zu", + (void *)hTracker, level, (void *)pool, ptr, size); + + if (parent_value) { + parent_value->n_children++; + LOG_DEBUG( + "child #%zu added to memory region: tracker=%p, level=%i, " + "pool=%p, ptr=%p, size=%zu", + parent_value->n_children, (void *)hTracker, level - 1, + (void *)parent_value->pool, (void *)parent_key, + parent_value->size); + } return UMF_RESULT_SUCCESS; } + if (ret == ENOMEM) { + umf_result = UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } - LOG_ERR("failed to insert tracker value, ret=%d, ptr=%p, pool=%p, size=%zu", - ret, ptr, (void *)pool, size); + LOG_ERR( + "failed to insert the tracker value: pool=%p, ptr=%p, size=%zu, ret=%d", + (void *)pool, ptr, size, ret); umf_ba_free(hTracker->alloc_info_allocator, value); - if (ret == ENOMEM) { - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + return umf_result; +} + +static umf_result_t umfMemoryTrackerAdd(umf_memory_tracker_handle_t hTracker, + umf_memory_pool_handle_t pool, + const void *ptr, size_t size) { + assert(ptr); + + umf_result_t umf_result = UMF_RESULT_ERROR_UNKNOWN; + tracker_alloc_info_t *parent_value = NULL; + tracker_alloc_info_t *rvalue = NULL; + uintptr_t parent_key = 0; + uintptr_t rkey = 0; + uint64_t rsize = 0; + int level = 0; + int found = 0; + + // Find the most nested (in the highest level) entry + // in the critnib maps that contains the given 'ptr' pointer. + do { + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + found = + critnib_find(hTracker->alloc_segments_map[level], (uintptr_t)ptr, + FIND_LE, (void *)&rkey, (void **)&rvalue); + if (!found || !rvalue) { + break; + } + + utils_atomic_load_acquire_u64((uint64_t *)&rvalue->size, &rsize); + + if ((uintptr_t)ptr < rkey + rsize) { + if (level == MAX_LEVELS_OF_ALLOC_SEGMENT_MAP - 1) { + // TODO: we need to support an arbitrary amount of layers in the future + LOG_ERR("tracker level is too high, ptr=%p, size=%zu", ptr, + size); + return UMF_RESULT_ERROR_OUT_OF_RESOURCES; + } + if (((uintptr_t)ptr + size) > (rkey + rsize)) { + LOG_ERR( + "cannot insert to the tracker value (pool=%p, ptr=%p, " + "size=%zu) " + "that exceeds the parent value (pool=%p, ptr=%p, size=%zu)", + (void *)pool, ptr, size, (void *)rvalue->pool, (void *)rkey, + (size_t)rsize); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + parent_key = rkey; + parent_value = rvalue; + level++; + } + } while (found && ((uintptr_t)ptr < rkey + rsize) && rvalue->n_children); + + umf_result = umfMemoryTrackerAddAtLevel(hTracker, level, pool, ptr, size, + parent_key, parent_value); + if (umf_result != UMF_RESULT_SUCCESS) { + return umf_result; } - return UMF_RESULT_ERROR_UNKNOWN; + return UMF_RESULT_SUCCESS; } static umf_result_t umfMemoryTrackerRemove(umf_memory_tracker_handle_t hTracker, @@ -85,16 +232,35 @@ static umf_result_t umfMemoryTrackerRemove(umf_memory_tracker_handle_t hTracker, // Every umfMemoryTrackerAdd(..., ptr, ...) should have a corresponding // umfMemoryTrackerRemove call with the same ptr value. - void *value = critnib_remove(hTracker->alloc_segments_map, (uintptr_t)ptr); + tracker_alloc_info_t *parent_value = NULL; + uintptr_t parent_key = 0; + int level = 0; + + // Find the most nested (on the highest level) entry in the map + // with the `ptr` key and with no children - only such entry can be removed. + tracker_alloc_info_t *value = get_most_nested_alloc_segment( + hTracker, ptr, &level, &parent_key, &parent_value, 1 /* no_children */); if (!value) { LOG_ERR("pointer %p not found in the alloc_segments_map", ptr); return UMF_RESULT_ERROR_UNKNOWN; } - tracker_alloc_info_t *v = value; + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + value = critnib_remove(hTracker->alloc_segments_map[level], (uintptr_t)ptr); + assert(value); - LOG_DEBUG("memory region removed: tracker=%p, ptr=%p, size=%zu", - (void *)hTracker, ptr, v->size); + LOG_DEBUG("memory region removed: tracker=%p, level=%i, pool=%p, ptr=%p, " + "size=%zu", + (void *)hTracker, level, value->pool, ptr, value->size); + + if (parent_value) { + LOG_DEBUG( + "child #%zu removed from memory region: tracker=%p, level=%i, " + "pool=%p, ptr=%p, size=%zu", + parent_value->n_children, (void *)hTracker, level - 1, + (void *)parent_value->pool, (void *)parent_key, parent_value->size); + parent_value->n_children--; + } umf_ba_free(hTracker->alloc_info_allocator, value); @@ -124,24 +290,43 @@ umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, return UMF_RESULT_ERROR_NOT_SUPPORTED; } - if (TRACKER->alloc_segments_map == NULL) { + if (TRACKER->alloc_segments_map[0] == NULL) { LOG_ERR("tracker's alloc_segments_map does not exist"); return UMF_RESULT_ERROR_NOT_SUPPORTED; } - uintptr_t rkey; - tracker_alloc_info_t *rvalue; - int found = critnib_find(TRACKER->alloc_segments_map, (uintptr_t)ptr, + tracker_alloc_info_t *top_most_value = NULL; + tracker_alloc_info_t *rvalue = NULL; + uintptr_t top_most_key = 0; + uintptr_t rkey = 0; + int level = 0; + int found = 0; + + do { + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + found = critnib_find(TRACKER->alloc_segments_map[level], (uintptr_t)ptr, FIND_LE, (void *)&rkey, (void **)&rvalue); - if (!found || (uintptr_t)ptr >= rkey + rvalue->size) { + if (found && (uintptr_t)ptr < rkey + rvalue->size) { + top_most_key = rkey; + top_most_value = rvalue; + if (rvalue->n_children == 0 || + level == MAX_LEVELS_OF_ALLOC_SEGMENT_MAP - 1) { + break; + } + level++; + } + } while (found && (uintptr_t)ptr < rkey + rvalue->size && + rvalue->n_children); + + if (!top_most_value) { LOG_DEBUG("pointer %p not found in the tracker, TRACKER=%p", ptr, (void *)TRACKER); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - pAllocInfo->base = (void *)rkey; - pAllocInfo->baseSize = rvalue->size; - pAllocInfo->pool = rvalue->pool; + pAllocInfo->base = (void *)top_most_key; + pAllocInfo->baseSize = top_most_value->size; + pAllocInfo->pool = top_most_value->pool; return UMF_RESULT_SUCCESS; } @@ -166,26 +351,38 @@ typedef struct umf_tracking_memory_provider_t { typedef struct umf_tracking_memory_provider_t umf_tracking_memory_provider_t; static umf_result_t trackingAlloc(void *hProvider, size_t size, - size_t alignment, void **ptr) { + size_t alignment, void **_ptr) { umf_tracking_memory_provider_t *p = (umf_tracking_memory_provider_t *)hProvider; umf_result_t ret = UMF_RESULT_SUCCESS; + void *ptr; assert(p->hUpstream); - ret = umfMemoryProviderAlloc(p->hUpstream, size, alignment, ptr); - if (ret != UMF_RESULT_SUCCESS || !*ptr) { + *_ptr = NULL; + + ret = umfMemoryProviderAlloc(p->hUpstream, size, alignment, &ptr); + if (ret != UMF_RESULT_SUCCESS || !ptr) { return ret; } - umf_result_t ret2 = umfMemoryTrackerAdd(p->hTracker, p->pool, *ptr, size); - if (ret2 != UMF_RESULT_SUCCESS) { + ret = umfMemoryTrackerAdd(p->hTracker, p->pool, ptr, size); + if (ret != UMF_RESULT_SUCCESS) { LOG_ERR("failed to add allocated region to the tracker, ptr = %p, size " "= %zu, ret = %d", - *ptr, size, ret2); + ptr, size, ret); + umf_result_t ret2 = umfMemoryProviderFree(p->hUpstream, ptr, size); + if (ret2 != UMF_RESULT_SUCCESS) { + LOG_ERR("upstream provider failed to free the memory: ptr = %p, " + "size = %zu, ret = %d", + ptr, size, ret2); + } + return ret; } - return ret; + *_ptr = ptr; + + return UMF_RESULT_SUCCESS; } static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, @@ -194,6 +391,8 @@ static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, umf_result_t ret = UMF_RESULT_ERROR_UNKNOWN; umf_tracking_memory_provider_t *provider = (umf_tracking_memory_provider_t *)hProvider; + tracker_alloc_info_t *parent_value = NULL; + uintptr_t parent_key = 0; tracker_alloc_info_t *splitValue = umf_ba_alloc(provider->hTracker->alloc_info_allocator); @@ -203,21 +402,27 @@ static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, splitValue->pool = provider->pool; splitValue->size = firstSize; + splitValue->n_children = 0; int r = utils_mutex_lock(&provider->hTracker->splitMergeMutex); if (r) { goto err_lock; } - tracker_alloc_info_t *value = (tracker_alloc_info_t *)critnib_get( - provider->hTracker->alloc_segments_map, (uintptr_t)ptr); + int level = 0; + + // Find the most nested (on the highest level) entry in the map + // with the `ptr` key and with no children - only such entry can be split. + tracker_alloc_info_t *value = get_most_nested_alloc_segment( + provider->hTracker, ptr, &level, &parent_key, &parent_value, + 1 /* no_children */); if (!value) { LOG_ERR("region for split is not found in the tracker"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; goto err; } if (value->size != totalSize) { - LOG_ERR("tracked size %zu does not match requested size to split: %zu", + LOG_ERR("tracked size=%zu does not match requested size to split: %zu", value->size, totalSize); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; goto err; @@ -230,40 +435,58 @@ static umf_result_t trackingAllocationSplit(void *hProvider, void *ptr, goto err; } + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + int cret = + critnib_insert(provider->hTracker->alloc_segments_map[level], + (uintptr_t)ptr, (void *)splitValue, 1 /* update */); + // this cannot fail since we know the element exists (nothing to allocate) + assert(cret == 0); + (void)cret; + void *highPtr = (void *)(((uintptr_t)ptr) + firstSize); size_t secondSize = totalSize - firstSize; // We'll have a duplicate entry for the range [highPtr, highValue->size] but this is fine, // the value is the same anyway and we forbid removing that range concurrently - ret = umfMemoryTrackerAdd(provider->hTracker, provider->pool, highPtr, - secondSize); + ret = umfMemoryTrackerAddAtLevel(provider->hTracker, level, provider->pool, + highPtr, secondSize, parent_key, + parent_value); if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("failed to add split region to the tracker, ptr = %p, size " - "= %zu, ret = %d", + LOG_ERR("failed to add the split region to the tracker, ptr=%p, " + "size=%zu, ret=%d", highPtr, secondSize, ret); + // revert the split + assert(level < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP); + cret = critnib_insert(provider->hTracker->alloc_segments_map[level], + (uintptr_t)ptr, (void *)value, 1 /* update */); + // this cannot fail since we know the element exists (nothing to allocate) + assert(cret == 0); + (void)cret; // TODO: what now? should we rollback the split? This can only happen due to ENOMEM // so it's unlikely but probably the best solution would be to try to preallocate everything // (value and critnib nodes) before calling umfMemoryProviderAllocationSplit. goto err; } - int cret = - critnib_insert(provider->hTracker->alloc_segments_map, (uintptr_t)ptr, - (void *)splitValue, 1 /* update */); - // this cannot fail since we know the element exists (nothing to allocate) - assert(cret == 0); - (void)cret; - // free the original value umf_ba_free(provider->hTracker->alloc_info_allocator, value); utils_mutex_unlock(&provider->hTracker->splitMergeMutex); + LOG_DEBUG( + "split memory region (level=%i): ptr=%p, totalSize=%zu, firstSize=%zu", + level, ptr, totalSize, firstSize); + return UMF_RESULT_SUCCESS; err: utils_mutex_unlock(&provider->hTracker->splitMergeMutex); err_lock: umf_ba_free(provider->hTracker->alloc_info_allocator, splitValue); + + LOG_ERR( + "failed to split memory region: ptr=%p, totalSize=%zu, firstSize=%zu", + ptr, totalSize, firstSize); + return ret; } @@ -282,26 +505,38 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, mergedValue->pool = provider->pool; mergedValue->size = totalSize; + mergedValue->n_children = 0; + + // any different negative values + int lowLevel = -2; + int highLevel = -1; int r = utils_mutex_lock(&provider->hTracker->splitMergeMutex); if (r) { goto err_lock; } - tracker_alloc_info_t *lowValue = (tracker_alloc_info_t *)critnib_get( - provider->hTracker->alloc_segments_map, (uintptr_t)lowPtr); + tracker_alloc_info_t *lowValue = get_most_nested_alloc_segment( + provider->hTracker, lowPtr, &lowLevel, NULL, NULL, + 0 /* no_children */); // can have children if (!lowValue) { LOG_FATAL("no left value"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; goto err_assert; } - tracker_alloc_info_t *highValue = (tracker_alloc_info_t *)critnib_get( - provider->hTracker->alloc_segments_map, (uintptr_t)highPtr); + tracker_alloc_info_t *highValue = get_most_nested_alloc_segment( + provider->hTracker, highPtr, &highLevel, NULL, NULL, + 0 /* no_children */); // can have children if (!highValue) { LOG_FATAL("no right value"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; goto err_assert; } + if (lowLevel != highLevel) { + LOG_FATAL("tracker level mismatch"); + ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; + goto err_assert; + } if (lowValue->pool != highValue->pool) { LOG_FATAL("pool mismatch"); ret = UMF_RESULT_ERROR_INVALID_ARGUMENT; @@ -313,6 +548,8 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, goto err_assert; } + mergedValue->n_children = lowValue->n_children + highValue->n_children; + ret = umfMemoryProviderAllocationMerge(provider->hUpstream, lowPtr, highPtr, totalSize); if (ret != UMF_RESULT_SUCCESS) { @@ -320,10 +557,13 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, goto not_merged; } + size_t lno = lowValue->n_children; + size_t hno = highValue->n_children; + // We'll have a duplicate entry for the range [highPtr, highValue->size] but this is fine, // the value is the same anyway and we forbid removing that range concurrently int cret = - critnib_insert(provider->hTracker->alloc_segments_map, + critnib_insert(provider->hTracker->alloc_segments_map[lowLevel], (uintptr_t)lowPtr, (void *)mergedValue, 1 /* update */); // this cannot fail since we know the element exists (nothing to allocate) assert(cret == 0); @@ -333,16 +573,23 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, umf_ba_free(provider->hTracker->alloc_info_allocator, lowValue); void *erasedhighValue = critnib_remove( - provider->hTracker->alloc_segments_map, (uintptr_t)highPtr); + provider->hTracker->alloc_segments_map[highLevel], (uintptr_t)highPtr); assert(erasedhighValue == highValue); umf_ba_free(provider->hTracker->alloc_info_allocator, erasedhighValue); utils_mutex_unlock(&provider->hTracker->splitMergeMutex); + LOG_DEBUG("merged memory regions (level=%i): lowPtr=%p (child=%zu), " + "highPtr=%p (child=%zu), totalSize=%zu", + lowLevel, lowPtr, lno, highPtr, hno, totalSize); + return UMF_RESULT_SUCCESS; err_assert: + LOG_FATAL("failed to merge memory regions: lowPtr=%p (level=%i), " + "highPtr=%p (level=%i), totalSize=%zu", + lowPtr, lowLevel, highPtr, highLevel, totalSize); assert(0); not_merged: @@ -350,6 +597,11 @@ static umf_result_t trackingAllocationMerge(void *hProvider, void *lowPtr, err_lock: umf_ba_free(provider->hTracker->alloc_info_allocator, mergedValue); + + LOG_ERR("failed to merge memory regions: lowPtr=%p (level=%i), highPtr=%p " + "(level=%i), totalSize=%zu", + lowPtr, lowLevel, highPtr, highLevel, totalSize); + return ret; } @@ -428,19 +680,21 @@ static umf_result_t trackingInitialize(void *params, void **ret) { #ifndef NDEBUG static void check_if_tracker_is_empty(umf_memory_tracker_handle_t hTracker, umf_memory_pool_handle_t pool) { - uintptr_t rkey; - void *rvalue; size_t n_items = 0; - uintptr_t last_key = 0; - while (1 == critnib_find((critnib *)hTracker->alloc_segments_map, last_key, - FIND_G, &rkey, &rvalue)) { - tracker_alloc_info_t *value = (tracker_alloc_info_t *)rvalue; - if (value->pool == pool || pool == NULL) { - n_items++; - } + for (int i = 0; i < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP; i++) { + uintptr_t last_key = 0; + uintptr_t rkey; + tracker_alloc_info_t *rvalue; + + while (1 == critnib_find(hTracker->alloc_segments_map[i], last_key, + FIND_G, &rkey, (void **)&rvalue)) { + if (rvalue->pool == pool || pool == NULL) { + n_items++; + } - last_key = rkey; + last_key = rkey; + } } if (n_items) { @@ -813,6 +1067,8 @@ umf_memory_tracker_handle_t umfMemoryTrackerCreate(void) { return NULL; } + memset(handle, 0, sizeof(struct umf_memory_tracker_t)); + umf_ba_pool_t *alloc_info_allocator = umf_ba_create(sizeof(struct tracker_alloc_info_t)); if (!alloc_info_allocator) { @@ -826,9 +1082,12 @@ umf_memory_tracker_handle_t umfMemoryTrackerCreate(void) { goto err_destroy_alloc_info_allocator; } - handle->alloc_segments_map = critnib_new(); - if (!handle->alloc_segments_map) { - goto err_destroy_mutex; + int i; + for (i = 0; i < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP; i++) { + handle->alloc_segments_map[i] = critnib_new(); + if (!handle->alloc_segments_map[i]) { + goto err_destroy_mutex; + } } LOG_DEBUG("tracker created, handle=%p, alloc_segments_map=%p", @@ -837,6 +1096,11 @@ umf_memory_tracker_handle_t umfMemoryTrackerCreate(void) { return handle; err_destroy_mutex: + for (int j = i; j >= 0; j--) { + if (handle->alloc_segments_map[j]) { + critnib_delete(handle->alloc_segments_map[j]); + } + } utils_mutex_destroy_not_free(&handle->splitMergeMutex); err_destroy_alloc_info_allocator: umf_ba_destroy(alloc_info_allocator); @@ -864,8 +1128,12 @@ void umfMemoryTrackerDestroy(umf_memory_tracker_handle_t handle) { // We have to zero all inner pointers, // because the tracker handle can be copied // and used in many places. - critnib_delete(handle->alloc_segments_map); - handle->alloc_segments_map = NULL; + for (int i = 0; i < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP; i++) { + if (handle->alloc_segments_map[i]) { + critnib_delete(handle->alloc_segments_map[i]); + handle->alloc_segments_map[i] = NULL; + } + } utils_mutex_destroy_not_free(&handle->splitMergeMutex); umf_ba_destroy(handle->alloc_info_allocator); handle->alloc_info_allocator = NULL; From 775ac129021e92bd04a2e385ba4cb2ee2cb553bb Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 25 Feb 2025 13:54:44 +0100 Subject: [PATCH 228/466] Add tests for pool from pointer to Fixed provider tests Signed-off-by: Lukasz Dorau --- test/provider_fixed_memory.cpp | 112 ++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 2 deletions(-) diff --git a/test/provider_fixed_memory.cpp b/test/provider_fixed_memory.cpp index 7f976a1f5..1760ca4f7 100644 --- a/test/provider_fixed_memory.cpp +++ b/test/provider_fixed_memory.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,10 +11,12 @@ #endif #include +#include #include using umf_test::test; +#define FIXED_BUFFER_SIZE (10 * utils_get_page_size()) #define INVALID_PTR ((void *)0x01) typedef enum purge_t { @@ -59,7 +61,7 @@ struct FixedProviderTest test::SetUp(); // Allocate a memory buffer to use with the fixed memory provider - memory_size = utils_get_page_size() * 10; // Allocate 10 pages + memory_size = FIXED_BUFFER_SIZE; // Allocate 10 pages memory_buffer = malloc(memory_size); ASSERT_NE(memory_buffer, nullptr); @@ -391,3 +393,109 @@ TEST_P(FixedProviderTest, split) { umf_result = umfMemoryProviderFree(provider.get(), ptr2, size); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); } + +TEST_P(FixedProviderTest, pool_from_ptr_whole_size_success) { + umf_result_t umf_result; + size_t size_of_first_alloc; + size_t size_of_pool_from_ptr; + void *ptr_for_pool = nullptr; + void *ptr = nullptr; + + umf_memory_pool_handle_t proxyFixedPool = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), provider.get(), nullptr, 0, + &proxyFixedPool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + size_of_first_alloc = FIXED_BUFFER_SIZE - (2 * page_size); + ptr_for_pool = umfPoolMalloc(proxyFixedPool, size_of_first_alloc); + ASSERT_NE(ptr_for_pool, nullptr); + + // Create provider parameters + size_of_pool_from_ptr = size_of_first_alloc; // whole size + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result = umfFixedMemoryProviderParamsCreate(¶ms, ptr_for_pool, + size_of_pool_from_ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_memory_provider_handle_t providerFromPtr = nullptr; + umf_result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), params, + &providerFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(providerFromPtr, nullptr); + + umf_memory_pool_handle_t poolFromPtr = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), providerFromPtr, nullptr, 0, + &poolFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ptr = umfPoolMalloc(poolFromPtr, size_of_pool_from_ptr); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size_of_pool_from_ptr); + + umf_result = umfPoolFree(poolFromPtr, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(poolFromPtr); + umfMemoryProviderDestroy(providerFromPtr); + umfFixedMemoryProviderParamsDestroy(params); + + umf_result = umfPoolFree(proxyFixedPool, ptr_for_pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(proxyFixedPool); +} + +TEST_P(FixedProviderTest, pool_from_ptr_half_size_success) { + umf_result_t umf_result; + size_t size_of_first_alloc; + size_t size_of_pool_from_ptr; + void *ptr_for_pool = nullptr; + void *ptr = nullptr; + + umf_memory_pool_handle_t proxyFixedPool = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), provider.get(), nullptr, 0, + &proxyFixedPool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + size_of_first_alloc = FIXED_BUFFER_SIZE - (2 * page_size); + ptr_for_pool = umfPoolMalloc(proxyFixedPool, size_of_first_alloc); + ASSERT_NE(ptr_for_pool, nullptr); + + // Create provider parameters + size_of_pool_from_ptr = size_of_first_alloc / 2; // half size + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result = umfFixedMemoryProviderParamsCreate(¶ms, ptr_for_pool, + size_of_pool_from_ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_memory_provider_handle_t providerFromPtr = nullptr; + umf_result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), params, + &providerFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(providerFromPtr, nullptr); + + umf_memory_pool_handle_t poolFromPtr = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), providerFromPtr, nullptr, 0, + &poolFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ptr = umfPoolMalloc(poolFromPtr, size_of_pool_from_ptr); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size_of_pool_from_ptr); + + umf_result = umfPoolFree(poolFromPtr, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(poolFromPtr); + umfMemoryProviderDestroy(providerFromPtr); + umfFixedMemoryProviderParamsDestroy(params); + + umf_result = umfPoolFree(proxyFixedPool, ptr_for_pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(proxyFixedPool); +} From cd61c1e6187966fede302fc98d33c2e6ca6e7449 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Tue, 25 Feb 2025 17:38:11 +0100 Subject: [PATCH 229/466] Add tests for pool from pointer to poolFixtures.hpp Signed-off-by: Lukasz Dorau --- test/poolFixtures.hpp | 149 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 144 insertions(+), 5 deletions(-) diff --git a/test/poolFixtures.hpp b/test/poolFixtures.hpp index 6f18664f9..6b01769f1 100644 --- a/test/poolFixtures.hpp +++ b/test/poolFixtures.hpp @@ -5,11 +5,6 @@ #ifndef UMF_TEST_POOL_FIXTURES_HPP #define UMF_TEST_POOL_FIXTURES_HPP 1 -#include "pool.hpp" -#include "provider.hpp" -#include "umf/providers/provider_devdax_memory.h" -#include "utils/utils_sanitizers.h" - #include #include #include @@ -17,7 +12,14 @@ #include #include +#include +#include +#include + #include "../malloc_compliance_tests.hpp" +#include "pool.hpp" +#include "provider.hpp" +#include "utils/utils_sanitizers.h" typedef void *(*pfnPoolParamsCreate)(); typedef umf_result_t (*pfnPoolParamsDestroy)(void *); @@ -493,4 +495,141 @@ TEST_P(umfPoolTest, mallocUsableSize) { } } +TEST_P(umfPoolTest, umfPoolAlignedMalloc) { +#ifdef _WIN32 + // TODO: implement support for windows + GTEST_SKIP() << "umfPoolAlignedMalloc() is not supported on Windows"; +#else /* !_WIN32 */ + umf_result_t umf_result; + void *ptr = nullptr; + const size_t size = 2 * 1024 * 1024; // 2MB + + umf_memory_pool_handle_t pool_get = pool.get(); + + if (!umf_test::isAlignedAllocSupported(pool_get)) { + GTEST_SKIP(); + } + + ptr = umfPoolAlignedMalloc(pool_get, size, utils_get_page_size()); + ASSERT_NE(ptr, nullptr); + + umf_result = umfPoolFree(pool_get, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +#endif /* !_WIN32 */ +} + +TEST_P(umfPoolTest, pool_from_ptr_whole_size_success) { +#ifdef _WIN32 + // TODO: implement support for windows + GTEST_SKIP() << "umfPoolAlignedMalloc() is not supported on Windows"; +#else /* !_WIN32 */ + umf_result_t umf_result; + size_t size_of_pool_from_ptr; + void *ptr_for_pool = nullptr; + void *ptr = nullptr; + + umf_memory_pool_handle_t pool_get = pool.get(); + const size_t size_of_first_alloc = 2 * 1024 * 1024; // 2MB + + if (!umf_test::isAlignedAllocSupported(pool_get)) { + GTEST_SKIP(); + } + + ptr_for_pool = umfPoolAlignedMalloc(pool_get, size_of_first_alloc, + utils_get_page_size()); + ASSERT_NE(ptr_for_pool, nullptr); + + // Create provider parameters + size_of_pool_from_ptr = size_of_first_alloc; // whole size + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result = umfFixedMemoryProviderParamsCreate(¶ms, ptr_for_pool, + size_of_pool_from_ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_memory_provider_handle_t providerFromPtr = nullptr; + umf_result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), params, + &providerFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(providerFromPtr, nullptr); + + umf_memory_pool_handle_t poolFromPtr = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), providerFromPtr, nullptr, 0, + &poolFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ptr = umfPoolMalloc(poolFromPtr, size_of_pool_from_ptr); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size_of_pool_from_ptr); + + umf_result = umfPoolFree(poolFromPtr, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(poolFromPtr); + umfMemoryProviderDestroy(providerFromPtr); + umfFixedMemoryProviderParamsDestroy(params); + + umf_result = umfPoolFree(pool_get, ptr_for_pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +#endif /* !_WIN32 */ +} + +TEST_P(umfPoolTest, pool_from_ptr_half_size_success) { +#ifdef _WIN32 + // TODO: implement support for windows + GTEST_SKIP() << "umfPoolAlignedMalloc() is not supported on Windows"; +#else /* !_WIN32 */ + umf_result_t umf_result; + size_t size_of_pool_from_ptr; + void *ptr_for_pool = nullptr; + void *ptr = nullptr; + + umf_memory_pool_handle_t pool_get = pool.get(); + const size_t size_of_first_alloc = 2 * 1024 * 1024; // 2MB + + if (!umf_test::isAlignedAllocSupported(pool_get)) { + GTEST_SKIP(); + } + + ptr_for_pool = umfPoolAlignedMalloc(pool_get, size_of_first_alloc, + utils_get_page_size()); + ASSERT_NE(ptr_for_pool, nullptr); + + // Create provider parameters + size_of_pool_from_ptr = size_of_first_alloc / 2; // half size + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result = umfFixedMemoryProviderParamsCreate(¶ms, ptr_for_pool, + size_of_pool_from_ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_memory_provider_handle_t providerFromPtr = nullptr; + umf_result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), params, + &providerFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(providerFromPtr, nullptr); + + umf_memory_pool_handle_t poolFromPtr = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), providerFromPtr, nullptr, 0, + &poolFromPtr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + ptr = umfPoolMalloc(poolFromPtr, size_of_pool_from_ptr); + ASSERT_NE(ptr, nullptr); + + memset(ptr, 0xFF, size_of_pool_from_ptr); + + umf_result = umfPoolFree(poolFromPtr, ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(poolFromPtr); + umfMemoryProviderDestroy(providerFromPtr); + umfFixedMemoryProviderParamsDestroy(params); + + umf_result = umfPoolFree(pool_get, ptr_for_pool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +#endif /* !_WIN32 */ +} + #endif /* UMF_TEST_POOL_FIXTURES_HPP */ From bb4a5e48f9e903fddc1fa92d88db9a61c7ce2331 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 28 Feb 2025 16:43:39 +0100 Subject: [PATCH 230/466] Add tests for tracking provider Signed-off-by: Lukasz Dorau --- test/CMakeLists.txt | 4 + test/provider_tracking.cpp | 374 +++++++++++++++++++++++++++++++++++++ 2 files changed, 378 insertions(+) create mode 100644 test/provider_tracking.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 37f4c809e..e47ce5a39 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -343,6 +343,10 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented NAME provider_fixed_memory SRCS provider_fixed_memory.cpp LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( + NAME provider_tracking + SRCS provider_tracking.cpp + LIBS ${UMF_UTILS_FOR_TEST}) # This test requires Linux-only file memory provider if(UMF_POOL_JEMALLOC_ENABLED) diff --git a/test/provider_tracking.cpp b/test/provider_tracking.cpp new file mode 100644 index 000000000..864c15564 --- /dev/null +++ b/test/provider_tracking.cpp @@ -0,0 +1,374 @@ +// Copyright (C) 2025 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "base.hpp" + +#include "cpp_helpers.hpp" +#include "test_helpers.h" +#ifndef _WIN32 +#include "test_helpers_linux.h" +#endif + +#include +#include +#include + +using umf_test::test; + +#define FIXED_BUFFER_SIZE (512 * utils_get_page_size()) +#define INVALID_PTR ((void *)0x01) + +using providerCreateExtParams = std::tuple; + +static void providerCreateExt(providerCreateExtParams params, + umf::provider_unique_handle_t *handle) { + umf_memory_provider_handle_t hProvider = nullptr; + auto [provider_ops, provider_params] = params; + + auto ret = + umfMemoryProviderCreate(provider_ops, provider_params, &hProvider); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(hProvider, nullptr); + + *handle = + umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); +} + +struct TrackingProviderTest + : umf_test::test, + ::testing::WithParamInterface { + void SetUp() override { + test::SetUp(); + + // Allocate a memory buffer to use with the fixed memory provider + memory_size = FIXED_BUFFER_SIZE; + memory_buffer = malloc(memory_size); + ASSERT_NE(memory_buffer, nullptr); + + // Create provider parameters + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfFixedMemoryProviderParamsCreate( + ¶ms, memory_buffer, memory_size); + ASSERT_EQ(res, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + providerCreateExt(std::make_tuple(umfFixedMemoryProviderOps(), params), + &provider); + + umfFixedMemoryProviderParamsDestroy(params); + umf_result_t umf_result = + umfMemoryProviderGetMinPageSize(provider.get(), NULL, &page_size); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + page_plus_64 = page_size + 64; + + umf_memory_pool_handle_t hPool = nullptr; + umf_result = umfPoolCreate(umfProxyPoolOps(), provider.get(), nullptr, + 0, &hPool); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + pool = umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + } + + void TearDown() override { + if (memory_buffer) { + free(memory_buffer); + memory_buffer = nullptr; + } + test::TearDown(); + } + + umf::provider_unique_handle_t provider; + umf::pool_unique_handle_t pool; + size_t page_size; + size_t page_plus_64; + void *memory_buffer = nullptr; + size_t memory_size = 0; +}; + +static void +createPoolFromAllocation(void *ptr0, size_t size1, + umf_memory_provider_handle_t *_providerFromPtr, + umf_memory_pool_handle_t *_poolFromPtr) { + umf_result_t umf_result; + + // Create provider parameters + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result = umfFixedMemoryProviderParamsCreate(¶ms, ptr0, size1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(params, nullptr); + + umf_memory_provider_handle_t provider1 = nullptr; + umf_result = umfMemoryProviderCreate(umfFixedMemoryProviderOps(), params, + &provider1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider1, nullptr); + + umf_memory_pool_handle_t pool1 = nullptr; + umf_result = + umfPoolCreate(umfProxyPoolOps(), provider1, nullptr, 0, &pool1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfFixedMemoryProviderParamsDestroy(params); + + *_providerFromPtr = provider1; + *_poolFromPtr = pool1; +} + +// TESTS + +INSTANTIATE_TEST_SUITE_P(trackingProviderTest, TrackingProviderTest, + ::testing::Values(providerCreateExtParams{ + umfFixedMemoryProviderOps(), nullptr})); + +TEST_P(TrackingProviderTest, create_destroy) { + // Creation and destruction are handled in SetUp and TearDown +} + +TEST_P(TrackingProviderTest, whole_size_success) { + umf_result_t umf_result; + size_t size0; + size_t size1; + void *ptr0 = nullptr; + void *ptr1 = nullptr; + + umf_memory_pool_handle_t pool0 = pool.get(); + + size0 = FIXED_BUFFER_SIZE - (2 * page_size); + ptr0 = umfPoolAlignedMalloc(pool0, size0, utils_get_page_size()); + ASSERT_NE(ptr0, nullptr); + + size1 = size0; // whole size + + umf_memory_provider_handle_t provider1 = nullptr; + umf_memory_pool_handle_t pool1 = nullptr; + createPoolFromAllocation(ptr0, size1, &provider1, &pool1); + + ptr1 = umfPoolMalloc(pool1, size1); + ASSERT_NE(ptr1, nullptr); + + umf_result = umfPoolFree(pool1, ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(pool1); + umfMemoryProviderDestroy(provider1); + + umf_result = umfPoolFree(pool0, ptr0); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_P(TrackingProviderTest, half_size_success) { + umf_result_t umf_result; + size_t size0; + size_t size1; + void *ptr0 = nullptr; + void *ptr1 = nullptr; + + umf_memory_pool_handle_t pool0 = pool.get(); + + size0 = FIXED_BUFFER_SIZE - (2 * page_size); + ptr0 = umfPoolAlignedMalloc(pool0, size0, utils_get_page_size()); + ASSERT_NE(ptr0, nullptr); + + size1 = size0 / 2; // half size + + umf_memory_provider_handle_t provider1 = nullptr; + umf_memory_pool_handle_t pool1 = nullptr; + createPoolFromAllocation(ptr0, size1, &provider1, &pool1); + + ptr1 = umfPoolMalloc(pool1, size1); + ASSERT_NE(ptr1, nullptr); + + umf_result = umfPoolFree(pool1, ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(pool1); + umfMemoryProviderDestroy(provider1); + + umf_result = umfPoolFree(pool0, ptr0); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +TEST_P(TrackingProviderTest, failure_exceeding_size) { + umf_result_t umf_result; + size_t size0; + size_t size1; + void *ptr0 = nullptr; + void *ptr1 = nullptr; + + umf_memory_pool_handle_t pool0 = pool.get(); + + size0 = FIXED_BUFFER_SIZE - (2 * page_size); + ptr0 = umfPoolAlignedMalloc(pool0, size0, utils_get_page_size()); + ASSERT_NE(ptr0, nullptr); + + size1 = FIXED_BUFFER_SIZE - page_size; // exceeding size + + umf_memory_provider_handle_t provider1 = nullptr; + umf_memory_pool_handle_t pool1 = nullptr; + createPoolFromAllocation(ptr0, size1, &provider1, &pool1); + + ptr1 = umfPoolMalloc(pool1, size1); + ASSERT_EQ(ptr1, nullptr); + + umfPoolDestroy(pool1); + umfMemoryProviderDestroy(provider1); + + umf_result = umfPoolFree(pool0, ptr0); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); +} + +#define MAX_ARRAY 9 +#define TEST_LEVEL_SUCCESS 7 +#define TEST_LEVEL_FAILURE 8 + +TEST_P(TrackingProviderTest, success_max_levels) { + umf_result_t umf_result; + size_t size; + void *ptr[MAX_ARRAY] = {0}; + umf_memory_provider_handle_t providers[MAX_ARRAY] = {0}; + umf_memory_pool_handle_t pools[MAX_ARRAY] = {0}; + + size = FIXED_BUFFER_SIZE - (2 * page_size); + pools[0] = pool.get(); + + for (int i = 0; i < TEST_LEVEL_SUCCESS; i++) { + fprintf(stderr, "Alloc #%d\n", i); + ptr[i] = umfPoolAlignedMalloc(pools[i], size, utils_get_page_size()); + ASSERT_NE(ptr[i], nullptr); + + createPoolFromAllocation(ptr[i], size, &providers[i + 1], + &pools[i + 1]); + } + + int s = TEST_LEVEL_SUCCESS; + fprintf(stderr, "Alloc #%d\n", s); + ptr[s] = umfPoolAlignedMalloc(pools[s], size, utils_get_page_size()); + ASSERT_NE(ptr[s], nullptr); + + fprintf(stderr, "Free #%d\n", s); + umf_result = umfPoolFree(pools[s], ptr[s]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + for (int i = TEST_LEVEL_SUCCESS - 1; i >= 0; i--) { + umfPoolDestroy(pools[i + 1]); + umfMemoryProviderDestroy(providers[i + 1]); + + fprintf(stderr, "Free #%d\n", i); + umf_result = umfPoolFree(pools[i], ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } +} + +TEST_P(TrackingProviderTest, failure_exceeding_levels) { + umf_result_t umf_result; + size_t size; + void *ptr[MAX_ARRAY] = {0}; + umf_memory_provider_handle_t providers[MAX_ARRAY] = {0}; + umf_memory_pool_handle_t pools[MAX_ARRAY] = {0}; + + size = FIXED_BUFFER_SIZE - (2 * page_size); + pools[0] = pool.get(); + + for (int i = 0; i < TEST_LEVEL_FAILURE; i++) { + fprintf(stderr, "Alloc #%d\n", i); + ptr[i] = umfPoolAlignedMalloc(pools[i], size, utils_get_page_size()); + ASSERT_NE(ptr[i], nullptr); + + createPoolFromAllocation(ptr[i], size, &providers[i + 1], + &pools[i + 1]); + } + + // tracker level is too high + int f = TEST_LEVEL_FAILURE; + fprintf(stderr, "Alloc #%d\n", f); + ptr[f] = umfPoolAlignedMalloc(pools[f], size, utils_get_page_size()); + ASSERT_EQ(ptr[f], nullptr); + + for (int i = TEST_LEVEL_FAILURE - 1; i >= 0; i--) { + umfPoolDestroy(pools[i + 1]); + umfMemoryProviderDestroy(providers[i + 1]); + + fprintf(stderr, "Free #%d\n", i); + umf_result = umfPoolFree(pools[i], ptr[i]); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + } +} + +TEST_P(TrackingProviderTest, reverted_free_half_size) { + umf_result_t umf_result; + size_t size0; + size_t size1; + void *ptr0 = nullptr; + void *ptr1 = nullptr; + + umf_memory_pool_handle_t pool0 = pool.get(); + + size0 = FIXED_BUFFER_SIZE - (2 * page_size); + ptr0 = umfPoolAlignedMalloc(pool0, size0, utils_get_page_size()); + ASSERT_NE(ptr0, nullptr); + + umf_memory_provider_handle_t provider1 = nullptr; + umf_memory_pool_handle_t pool1 = nullptr; + createPoolFromAllocation(ptr0, size0, &provider1, &pool1); + + size1 = size0 / 2; // half size + + ptr1 = umfPoolMalloc(pool1, size1); + ASSERT_NE(ptr1, nullptr); + + // Freeing the "busy" pointer from the first pool is an Undefined Behavior + // It fails now if the sizes are different. + // see: https://github.com/oneapi-src/unified-memory-framework/pull/1161 + umf_result = umfPoolFree(pool0, ptr0); + + umf_result = umfPoolFree(pool1, ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(pool1); + umfMemoryProviderDestroy(provider1); + + // It could have been freed above, + // so we cannot verify the result here. + umf_result = umfPoolFree(pool0, ptr0); +} + +TEST_P(TrackingProviderTest, reverted_free_the_same_size) { + umf_result_t umf_result; + size_t size0; + size_t size1; + void *ptr0 = nullptr; + void *ptr1 = nullptr; + + umf_memory_pool_handle_t pool0 = pool.get(); + + size0 = FIXED_BUFFER_SIZE - (2 * page_size); + ptr0 = umfPoolAlignedMalloc(pool0, size0, utils_get_page_size()); + ASSERT_NE(ptr0, nullptr); + + umf_memory_provider_handle_t provider1 = nullptr; + umf_memory_pool_handle_t pool1 = nullptr; + createPoolFromAllocation(ptr0, size0, &provider1, &pool1); + + size1 = size0; // the same size + + ptr1 = umfPoolMalloc(pool1, size1); + ASSERT_NE(ptr1, nullptr); + + // Freeing the "busy" pointer from the first pool is an Undefined Behavior + // It succeeds now if the sizes are equal. + // see: https://github.com/oneapi-src/unified-memory-framework/pull/1161 + umf_result = umfPoolFree(pool0, ptr0); + + // try to free the pointer from the second pool (the same size) + umf_result = umfPoolFree(pool1, ptr1); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfPoolDestroy(pool1); + umfMemoryProviderDestroy(provider1); + + // It could have been freed above, + // so we cannot verify the result here. + umf_result = umfPoolFree(pool0, ptr0); +} From c1b9f1bdc019d769a716f19b356b10c15ba22cdf Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 3 Mar 2025 12:34:48 +0100 Subject: [PATCH 231/466] Add provider_tracking_fixture_tests Signed-off-by: Lukasz Dorau --- test/CMakeLists.txt | 5 ++ test/provider_tracking_fixture_tests.cpp | 91 ++++++++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 test/provider_tracking_fixture_tests.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e47ce5a39..5f244b60e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -347,6 +347,11 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented NAME provider_tracking SRCS provider_tracking.cpp LIBS ${UMF_UTILS_FOR_TEST}) + add_umf_test( + NAME provider_tracking_fixture_tests + SRCS provider_tracking_fixture_tests.cpp malloc_compliance_tests.cpp + ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST}) # This test requires Linux-only file memory provider if(UMF_POOL_JEMALLOC_ENABLED) diff --git a/test/provider_tracking_fixture_tests.cpp b/test/provider_tracking_fixture_tests.cpp new file mode 100644 index 000000000..05b87f87f --- /dev/null +++ b/test/provider_tracking_fixture_tests.cpp @@ -0,0 +1,91 @@ +// Copyright (C) 2025 Intel Corporation +// Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include + +#include "base.hpp" +#include "provider.hpp" + +#include "cpp_helpers.hpp" +#include "test_helpers.h" +#ifndef _WIN32 +#include "test_helpers_linux.h" +#endif + +#include "poolFixtures.hpp" + +#define FILE_PATH ((char *)"tmp_file") + +struct provider_from_pool : public umf_test::provider_base_t { + umf_memory_pool_handle_t pool; + umf_result_t initialize(umf_memory_pool_handle_t _pool) noexcept { + if (!_pool) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + pool = _pool; + return UMF_RESULT_SUCCESS; + } + umf_result_t alloc(size_t size, size_t align, void **ptr) noexcept { + *ptr = umfPoolAlignedMalloc(pool, size, align); + return (*ptr) ? UMF_RESULT_SUCCESS + : UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + umf_result_t free(void *ptr, size_t) noexcept { + return umfPoolFree(pool, ptr); + } + const char *get_name() noexcept { return "provider_from_pool"; } + + virtual ~provider_from_pool() { + if (pool) { + umfPoolDestroy(pool); + pool = nullptr; + } + } +}; + +umf_memory_provider_ops_t PROVIDER_FROM_POOL_OPS = + umf::providerMakeCOps(); + +static void *providerFromPoolParamsCreate(void) { + umf_file_memory_provider_params_handle_t paramsFile = NULL; + umf_result_t umf_result = + umfFileMemoryProviderParamsCreate(¶msFile, FILE_PATH); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_NE(paramsFile, nullptr); + + umf_memory_provider_handle_t providerFile = nullptr; + umf_result = umfMemoryProviderCreate(umfFileMemoryProviderOps(), paramsFile, + &providerFile); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_NE(providerFile, nullptr); + + umf_memory_pool_handle_t poolProxyFile = nullptr; + umf_result = + umfPoolCreate(umfProxyPoolOps(), providerFile, nullptr, + UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &poolProxyFile); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + EXPECT_NE(poolProxyFile, nullptr); + + umf_result = umfFileMemoryProviderParamsDestroy(paramsFile); + EXPECT_EQ(umf_result, UMF_RESULT_SUCCESS); + paramsFile = nullptr; + + return poolProxyFile; +} + +// TESTS + +INSTANTIATE_TEST_SUITE_P(TrackingProviderPoolTest, umfPoolTest, + ::testing::Values(poolCreateExtParams{ + umfProxyPoolOps(), nullptr, nullptr, + &PROVIDER_FROM_POOL_OPS, + providerFromPoolParamsCreate, nullptr})); + +INSTANTIATE_TEST_SUITE_P(TrackingProviderMultiPoolTest, umfMultiPoolTest, + ::testing::Values(poolCreateExtParams{ + umfProxyPoolOps(), nullptr, nullptr, + &PROVIDER_FROM_POOL_OPS, + providerFromPoolParamsCreate, nullptr})); From 58007d625618b34a7ae36bfae799950a19d6b4be Mon Sep 17 00:00:00 2001 From: "Vinogradov, Sergei" Date: Tue, 17 Dec 2024 11:16:09 +0100 Subject: [PATCH 232/466] Implement size limit for the cache of opened IPC handles --- src/ipc.c | 14 +-- src/ipc_cache.c | 50 +++++++- src/ipc_cache.h | 2 + src/provider/provider_tracking.c | 210 ++++++++++++++++++++++++++----- src/provider/provider_tracking.h | 9 ++ test/ipcFixtures.hpp | 64 ---------- 6 files changed, 240 insertions(+), 109 deletions(-) diff --git a/src/ipc.c b/src/ipc.c index 12c7bb978..d4e5cc806 100644 --- a/src/ipc.c +++ b/src/ipc.c @@ -146,19 +146,15 @@ umf_result_t umfOpenIPCHandle(umf_ipc_handler_handle_t hIPCHandler, } umf_result_t umfCloseIPCHandle(void *ptr) { - umf_alloc_info_t allocInfo; - umf_result_t ret = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); + umf_ipc_info_t ipcInfo; + umf_result_t ret = umfMemoryTrackerGetIpcInfo(ptr, &ipcInfo); if (ret != UMF_RESULT_SUCCESS) { - LOG_ERR("cannot get alloc info for ptr = %p.", ptr); + LOG_ERR("cannot get IPC info for ptr = %p.", ptr); return ret; } - // We cannot use umfPoolGetMemoryProvider function because it returns - // upstream provider but we need tracking one - umf_memory_provider_handle_t hProvider = allocInfo.pool->provider; - - return umfMemoryProviderCloseIPCHandle(hProvider, allocInfo.base, - allocInfo.baseSize); + return umfMemoryProviderCloseIPCHandle(ipcInfo.provider, ipcInfo.base, + ipcInfo.baseSize); } umf_result_t umfPoolGetIPCHandler(umf_memory_pool_handle_t hPool, diff --git a/src/ipc_cache.c b/src/ipc_cache.c index 6d5d39e4f..bf17a66a4 100644 --- a/src/ipc_cache.c +++ b/src/ipc_cache.c @@ -54,6 +54,22 @@ typedef struct ipc_opened_cache_t { ipc_opened_cache_global_t *IPC_OPENED_CACHE_GLOBAL = NULL; +// Returns value of the UMF_MAX_OPENED_IPC_HANDLES environment variable +// or 0 if it is not set. +static size_t umfIpcCacheGlobalInitMaxOpenedHandles(void) { + const char *max_size_str = getenv("UMF_MAX_OPENED_IPC_HANDLES"); + if (max_size_str) { + char *endptr; + size_t max_size = strtoul(max_size_str, &endptr, 10); + if (*endptr == '\0') { + return max_size; + } + LOG_ERR("Invalid value of UMF_MAX_OPENED_IPC_HANDLES: %s", + max_size_str); + } + return 0; +} + umf_result_t umfIpcCacheGlobalInit(void) { umf_result_t ret = UMF_RESULT_SUCCESS; ipc_opened_cache_global_t *cache_global = @@ -78,8 +94,7 @@ umf_result_t umfIpcCacheGlobalInit(void) { goto err_mutex_destroy; } - // TODO: make max_size configurable via environment variable - cache_global->max_size = 0; + cache_global->max_size = umfIpcCacheGlobalInitMaxOpenedHandles(); cache_global->cur_size = 0; cache_global->lru_list = NULL; @@ -191,7 +206,19 @@ umf_result_t umfIpcOpenedCacheGet(ipc_opened_cache_handle_t cache, if (entry == NULL && cache->global->max_size != 0 && cache->global->cur_size >= cache->global->max_size) { // If max_size is set and the cache is full, evict the least recently used entry. - entry = cache->global->lru_list->prev; + // we need to search for the least recently used entry with ref_count == 0 + // The utlist implementation of the doubly-linked list keeps a tail pointer in head->prev + ipc_opened_cache_entry_t *candidate = cache->global->lru_list->prev; + do { + uint64_t ref_count = 0; + utils_atomic_load_acquire_u64(&candidate->ref_count, + &ref_count); + if (ref_count == 0) { + entry = candidate; + break; + } + candidate = candidate->prev; + } while (candidate != cache->global->lru_list->prev); } if (entry) { // we have eviction candidate @@ -244,3 +271,20 @@ umf_result_t umfIpcOpenedCacheGet(ipc_opened_cache_handle_t cache, return ret; } + +umf_result_t +umfIpcHandleMappedCacheRelease(ipc_opened_cache_value_t *cacheValue) { + if (!cacheValue) { + LOG_ERR("cacheValue is NULL"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + // get pointer to the entry + ipc_opened_cache_entry_t *entry = + (ipc_opened_cache_entry_t *)((char *)cacheValue - + offsetof(ipc_opened_cache_entry_t, value)); + // decrement the ref count + utils_atomic_decrement_u64(&entry->ref_count); + + return UMF_RESULT_SUCCESS; +} diff --git a/src/ipc_cache.h b/src/ipc_cache.h index 80870d373..545c6e1e7 100644 --- a/src/ipc_cache.h +++ b/src/ipc_cache.h @@ -47,4 +47,6 @@ umf_result_t umfIpcOpenedCacheGet(ipc_opened_cache_handle_t cache, uint64_t handle_id, ipc_opened_cache_value_t **retEntry); +umf_result_t +umfIpcHandleMappedCacheRelease(ipc_opened_cache_value_t *cacheValue); #endif /* UMF_IPC_CACHE_H */ diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index bc560304c..92d3dd59c 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -21,6 +21,7 @@ #include "critnib.h" #include "ipc_cache.h" #include "ipc_internal.h" +#include "memory_pool_internal.h" #include "provider_tracking.h" #include "utils_common.h" #include "utils_concurrency.h" @@ -38,6 +39,8 @@ struct umf_memory_tracker_t { // for another memory pool (nested memory pooling). critnib *alloc_segments_map[MAX_LEVELS_OF_ALLOC_SEGMENT_MAP]; utils_mutex_t splitMergeMutex; + umf_ba_pool_t *ipc_info_allocator; + critnib *ipc_segments_map; }; typedef struct tracker_alloc_info_t { @@ -49,6 +52,12 @@ typedef struct tracker_alloc_info_t { size_t n_children; } tracker_alloc_info_t; +typedef struct tracker_ipc_info_t { + size_t size; + umf_memory_provider_handle_t provider; + ipc_opened_cache_value_t *ipc_cache_value; +} tracker_ipc_info_t; + // Get the most nested (on the highest level) allocation segment in the map with the `ptr` key. // If `no_children` is set to 1, the function will return the entry // only if it has no children on the higher level. @@ -267,6 +276,72 @@ static umf_result_t umfMemoryTrackerRemove(umf_memory_tracker_handle_t hTracker, return UMF_RESULT_SUCCESS; } +static umf_result_t +umfMemoryTrackerAddIpcSegment(umf_memory_tracker_handle_t hTracker, + const void *ptr, size_t size, + umf_memory_provider_handle_t provider, + ipc_opened_cache_value_t *cache_entry) { + assert(hTracker); + assert(provider); + assert(cache_entry); + + tracker_ipc_info_t *value = umf_ba_alloc(hTracker->ipc_info_allocator); + + if (value == NULL) { + LOG_ERR("failed to allocate tracker_ipc_info_t, ptr=%p, size=%zu", ptr, + size); + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + value->size = size; + value->provider = provider; + value->ipc_cache_value = cache_entry; + + int ret = + critnib_insert(hTracker->ipc_segments_map, (uintptr_t)ptr, value, 0); + if (ret == 0) { + LOG_DEBUG("IPC memory region is added, tracker=%p, ptr=%p, size=%zu, " + "provider=%p, cache_entry=%p", + (void *)hTracker, ptr, size, provider, cache_entry); + return UMF_RESULT_SUCCESS; + } + + LOG_ERR("failed to insert tracker_ipc_info_t, ret=%d, ptr=%p, size=%zu, " + "provider=%p, cache_entry=%p", + ret, ptr, size, provider, cache_entry); + + umf_ba_free(hTracker->ipc_info_allocator, value); + + if (ret == ENOMEM) { + return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + + return UMF_RESULT_ERROR_UNKNOWN; +} + +static umf_result_t +umfMemoryTrackerRemoveIpcSegment(umf_memory_tracker_handle_t hTracker, + const void *ptr) { + assert(ptr); + + void *value = critnib_remove(hTracker->ipc_segments_map, (uintptr_t)ptr); + + if (!value) { + LOG_ERR("pointer %p not found in the ipc_segments_map", ptr); + return UMF_RESULT_ERROR_UNKNOWN; + } + + tracker_ipc_info_t *v = value; + + LOG_DEBUG("IPC memory region removed: tracker=%p, ptr=%p, size=%zu, " + "provider=%p, cache_entry=%p", + (void *)hTracker, ptr, v->size, v->provider, v->ipc_cache_value); + + umf_ba_free(hTracker->ipc_info_allocator, value); + + return UMF_RESULT_SUCCESS; +} + umf_memory_pool_handle_t umfMemoryTrackerGetPool(const void *ptr) { umf_alloc_info_t allocInfo = {NULL, 0, NULL}; umf_result_t ret = umfMemoryTrackerGetAllocInfo(ptr, &allocInfo); @@ -331,6 +406,41 @@ umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, return UMF_RESULT_SUCCESS; } +umf_result_t umfMemoryTrackerGetIpcInfo(const void *ptr, + umf_ipc_info_t *pIpcInfo) { + assert(pIpcInfo); + + if (ptr == NULL) { + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (TRACKER == NULL) { + LOG_ERR("tracker does not exist"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + if (TRACKER->ipc_segments_map == NULL) { + LOG_ERR("tracker's ipc_segments_map does not exist"); + return UMF_RESULT_ERROR_NOT_SUPPORTED; + } + + uintptr_t rkey; + tracker_ipc_info_t *rvalue = NULL; + int found = critnib_find(TRACKER->ipc_segments_map, (uintptr_t)ptr, FIND_LE, + (void *)&rkey, (void **)&rvalue); + if (!found || (uintptr_t)ptr >= rkey + rvalue->size) { + LOG_DEBUG("pointer %p not found in the tracker, TRACKER=%p", ptr, + (void *)TRACKER); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + pIpcInfo->base = (void *)rkey; + pIpcInfo->baseSize = rvalue->size; + pIpcInfo->provider = rvalue->provider; + + return UMF_RESULT_SUCCESS; +} + // Cache entry structure to store provider-specific IPC data. // providerIpcData is a Flexible Array Member because its size varies // depending on the provider. @@ -872,17 +982,17 @@ ipcOpenedCacheEvictionCallback(const ipc_opened_cache_key_t *key, const ipc_opened_cache_value_t *value) { umf_tracking_memory_provider_t *p = (umf_tracking_memory_provider_t *)key->local_provider; - // umfMemoryTrackerRemove should be called before umfMemoryProviderCloseIPCHandle + // umfMemoryTrackerRemoveIpcSegment should be called before umfMemoryProviderCloseIPCHandle // to avoid a race condition. If the order would be different, other thread - // could allocate the memory at address `ptr` before a call to umfMemoryTrackerRemove + // could allocate the memory at address `ptr` before a call to umfMemoryTrackerRemoveIpcSegment // resulting in inconsistent state. if (value->mapped_base_ptr) { - umf_result_t ret = - umfMemoryTrackerRemove(p->hTracker, value->mapped_base_ptr); + umf_result_t ret = umfMemoryTrackerRemoveIpcSegment( + p->hTracker, value->mapped_base_ptr); if (ret != UMF_RESULT_SUCCESS) { // DO NOT return an error here, because the tracking provider // cannot change behaviour of the upstream provider. - LOG_ERR("failed to remove the region from the tracker, ptr=%p, " + LOG_ERR("failed to remove the region from the IPC tracker, ptr=%p, " "size=%zu, ret = %d", value->mapped_base_ptr, value->mapped_size, ret); } @@ -895,12 +1005,13 @@ ipcOpenedCacheEvictionCallback(const ipc_opened_cache_key_t *key, } } -static umf_result_t upstreamOpenIPCHandle(umf_tracking_memory_provider_t *p, - void *providerIpcData, - size_t bufferSize, void **ptr) { +static umf_result_t +upstreamOpenIPCHandle(umf_tracking_memory_provider_t *p, void *providerIpcData, + size_t bufferSize, + ipc_opened_cache_value_t *cache_entry) { void *mapped_ptr = NULL; assert(p != NULL); - assert(ptr != NULL); + assert(cache_entry != NULL); umf_result_t ret = umfMemoryProviderOpenIPCHandle( p->hUpstream, providerIpcData, &mapped_ptr); if (ret != UMF_RESULT_SUCCESS) { @@ -909,7 +1020,21 @@ static umf_result_t upstreamOpenIPCHandle(umf_tracking_memory_provider_t *p, } assert(mapped_ptr != NULL); - ret = umfMemoryTrackerAdd(p->hTracker, p->pool, mapped_ptr, bufferSize); + // Today umfMemoryTrackerAddIpcSegment requires the memory provider handle + // to know which tracking provider instance opened the IPC handle. + // The `p` points to the tracking provider private data. + // Because of that we get handle to the tracking provider instance + // using `p->pool->provider`. + // + // TODO: + // Today we always create a pool and get an IPC handler from the pool. + // And tracking provider is always created together with a pool. + // And the IPC handler is a tracking memory provider in fact. + // However, we are considering adding an API that allows IPC handler creation + // from scratch (without creating a memory pool). In that case, we will + // create a tracker provider without a pool. So p->pool might be NULL in the future. + ret = umfMemoryTrackerAddIpcSegment(p->hTracker, mapped_ptr, bufferSize, + p->pool->provider, cache_entry); if (ret != UMF_RESULT_SUCCESS) { LOG_ERR("failed to add IPC region to the tracker, ptr=%p, " "size=%zu, " @@ -924,7 +1049,8 @@ static umf_result_t upstreamOpenIPCHandle(umf_tracking_memory_provider_t *p, return ret; } - *ptr = mapped_ptr; + cache_entry->mapped_size = bufferSize; + utils_atomic_store_release_ptr(&(cache_entry->mapped_base_ptr), mapped_ptr); return UMF_RESULT_SUCCESS; } @@ -959,45 +1085,46 @@ static umf_result_t trackingOpenIpcHandle(void *provider, void *providerIpcData, void *mapped_ptr = NULL; utils_atomic_load_acquire_ptr(&(cache_entry->mapped_base_ptr), (void **)&mapped_ptr); - if (mapped_ptr == NULL) { + if (mapped_ptr == NULL) { // new cache entry utils_mutex_lock(&(cache_entry->mmap_lock)); utils_atomic_load_acquire_ptr(&(cache_entry->mapped_base_ptr), (void **)&mapped_ptr); if (mapped_ptr == NULL) { ret = upstreamOpenIPCHandle(p, providerIpcData, - ipcUmfData->baseSize, &mapped_ptr); - if (ret == UMF_RESULT_SUCCESS) { - // Put to the cache - cache_entry->mapped_size = ipcUmfData->baseSize; - utils_atomic_store_release_ptr(&(cache_entry->mapped_base_ptr), - mapped_ptr); - } + ipcUmfData->baseSize, cache_entry); } + mapped_ptr = cache_entry->mapped_base_ptr; utils_mutex_unlock(&(cache_entry->mmap_lock)); } if (ret == UMF_RESULT_SUCCESS) { + assert(mapped_ptr != NULL); *ptr = mapped_ptr; } return ret; } +static tracker_ipc_info_t *getTrackerIpcInfo(const void *ptr) { + assert(ptr); + + uintptr_t key = (uintptr_t)ptr; + tracker_ipc_info_t *value = critnib_get(TRACKER->ipc_segments_map, key); + + return value; +} + static umf_result_t trackingCloseIpcHandle(void *provider, void *ptr, size_t size) { (void)provider; - (void)ptr; - (void)size; - // We keep opened IPC handles in the p->hIpcMappedCache. - // IPC handle is closed when it is evicted from the cache - // or when cache is destroyed. - // - // TODO: today the size of the IPC cache is infinite. - // When the threshold for the cache size is implemented - // we need to introduce a reference counting mechanism. - // The trackingOpenIpcHandle will increment the refcount for the corresponding entry. - // The trackingCloseIpcHandle will decrement the refcount for the corresponding cache entry. - return UMF_RESULT_SUCCESS; + tracker_ipc_info_t *trackerIpcInfo = getTrackerIpcInfo(ptr); + + if (!trackerIpcInfo) { + LOG_ERR("failed to get tracker ipc info, ptr=%p, size=%zu", ptr, size); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + return umfIpcHandleMappedCacheRelease(trackerIpcInfo->ipc_cache_value); } umf_memory_provider_ops_t UMF_TRACKING_MEMORY_PROVIDER_OPS = { @@ -1086,16 +1213,29 @@ umf_memory_tracker_handle_t umfMemoryTrackerCreate(void) { for (i = 0; i < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP; i++) { handle->alloc_segments_map[i] = critnib_new(); if (!handle->alloc_segments_map[i]) { - goto err_destroy_mutex; + goto err_destroy_alloc_segments_map; } } + handle->ipc_info_allocator = + umf_ba_create(sizeof(struct tracker_ipc_info_t)); + if (!handle->ipc_info_allocator) { + goto err_destroy_alloc_segments_map; + } + + handle->ipc_segments_map = critnib_new(); + if (!handle->ipc_segments_map) { + goto err_destroy_ipc_info_allocator; + } + LOG_DEBUG("tracker created, handle=%p, alloc_segments_map=%p", (void *)handle, (void *)handle->alloc_segments_map); return handle; -err_destroy_mutex: +err_destroy_ipc_info_allocator: + umf_ba_destroy(handle->ipc_info_allocator); +err_destroy_alloc_segments_map: for (int j = i; j >= 0; j--) { if (handle->alloc_segments_map[j]) { critnib_delete(handle->alloc_segments_map[j]); @@ -1137,5 +1277,9 @@ void umfMemoryTrackerDestroy(umf_memory_tracker_handle_t handle) { utils_mutex_destroy_not_free(&handle->splitMergeMutex); umf_ba_destroy(handle->alloc_info_allocator); handle->alloc_info_allocator = NULL; + critnib_delete(handle->ipc_segments_map); + handle->ipc_segments_map = NULL; + umf_ba_destroy(handle->ipc_info_allocator); + handle->ipc_info_allocator = NULL; umf_ba_global_free(handle); } diff --git a/src/provider/provider_tracking.h b/src/provider/provider_tracking.h index 9e868cf31..842449be5 100644 --- a/src/provider/provider_tracking.h +++ b/src/provider/provider_tracking.h @@ -45,6 +45,15 @@ typedef struct umf_alloc_info_t { umf_result_t umfMemoryTrackerGetAllocInfo(const void *ptr, umf_alloc_info_t *pAllocInfo); +typedef struct umf_ipc_info_t { + void *base; + size_t baseSize; + umf_memory_provider_handle_t provider; +} umf_ipc_info_t; + +umf_result_t umfMemoryTrackerGetIpcInfo(const void *ptr, + umf_ipc_info_t *pIpcInfo); + // Creates a memory provider that tracks each allocation/deallocation through umf_memory_tracker_handle_t and // forwards all requests to hUpstream memory Provider. hUpstream lifetime should be managed by the user of this function. umf_result_t umfTrackingMemoryProviderCreate( diff --git a/test/ipcFixtures.hpp b/test/ipcFixtures.hpp index 57bd04079..c898c3663 100644 --- a/test/ipcFixtures.hpp +++ b/test/ipcFixtures.hpp @@ -389,70 +389,6 @@ TEST_P(umfIpcTest, BasicFlow) { EXPECT_EQ(stat.closeCount, stat.openCount); } -TEST_P(umfIpcTest, GetPoolByOpenedHandle) { - constexpr size_t SIZE = 100; - constexpr size_t NUM_ALLOCS = 100; - constexpr size_t NUM_POOLS = 4; - void *ptrs[NUM_ALLOCS]; - void *openedPtrs[NUM_POOLS][NUM_ALLOCS]; - std::vector pools_to_open; - umf::pool_unique_handle_t pool = makePool(); - ASSERT_NE(pool.get(), nullptr); - - for (size_t i = 0; i < NUM_POOLS; ++i) { - pools_to_open.push_back(makePool()); - } - - for (size_t i = 0; i < NUM_ALLOCS; ++i) { - void *ptr = umfPoolMalloc(pool.get(), SIZE); - ASSERT_NE(ptr, nullptr); - ptrs[i] = ptr; - } - - for (size_t i = 0; i < NUM_ALLOCS; ++i) { - umf_ipc_handle_t ipcHandle = nullptr; - size_t handleSize = 0; - umf_result_t ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - - for (size_t pool_id = 0; pool_id < NUM_POOLS; pool_id++) { - void *ptr = nullptr; - umf_ipc_handler_handle_t ipcHandler = nullptr; - ret = - umfPoolGetIPCHandler(pools_to_open[pool_id].get(), &ipcHandler); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ASSERT_NE(ipcHandler, nullptr); - - ret = umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - openedPtrs[pool_id][i] = ptr; - } - - ret = umfPutIPCHandle(ipcHandle); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - } - - for (size_t pool_id = 0; pool_id < NUM_POOLS; pool_id++) { - for (size_t i = 0; i < NUM_ALLOCS; ++i) { - umf_memory_pool_handle_t openedPool = - umfPoolByPtr(openedPtrs[pool_id][i]); - EXPECT_EQ(openedPool, pools_to_open[pool_id].get()); - } - } - - for (size_t pool_id = 0; pool_id < NUM_POOLS; pool_id++) { - for (size_t i = 0; i < NUM_ALLOCS; ++i) { - umf_result_t ret = umfCloseIPCHandle(openedPtrs[pool_id][i]); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - } - } - - for (size_t i = 0; i < NUM_ALLOCS; ++i) { - umf_result_t ret = umfFree(ptrs[i]); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - } -} - TEST_P(umfIpcTest, AllocFreeAllocTest) { constexpr size_t SIZE = 64 * 1024; umf::pool_unique_handle_t pool = makePool(); From 02e38d7b0ca7ecc4a402313e92a5b29cd22a7dc1 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Mon, 17 Feb 2025 17:31:00 +0100 Subject: [PATCH 233/466] Add IPC test with UMF_MAX_OPENED_IPC_HANDLES set --- .cmake-format | 3 ++- test/CMakeLists.txt | 12 +++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/.cmake-format b/.cmake-format index c1a8e85a8..f5f413d51 100644 --- a/.cmake-format +++ b/.cmake-format @@ -26,7 +26,8 @@ with section("parse"): 'kwargs': { 'NAME': '*', 'SRCS': '*', - 'LIBS': '*'}}, + 'LIBS': '*', + 'ENVS': '*'}}, 'add_umf_library': { "pargs": 0, "flags": [], diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5f244b60e..e172115e1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -116,8 +116,9 @@ function(add_umf_test) # * NAME - a name of the test # * SRCS - source files # * LIBS - libraries to be linked with + # * ENVS - environment variables set(oneValueArgs NAME) - set(multiValueArgs SRCS LIBS) + set(multiValueArgs SRCS LIBS ENVS) cmake_parse_arguments( ARG "" @@ -139,6 +140,9 @@ function(add_umf_test) WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) set_tests_properties(${TEST_NAME} PROPERTIES LABELS "umf") + if(ARG_ENVS) + set_tests_properties(${TEST_NAME} PROPERTIES ENVIRONMENT ${ARG_ENVS}) + endif() if(WINDOWS) # add PATH to DLL on Windows @@ -524,6 +528,12 @@ add_umf_test( SRCS ipcAPI.cpp ${BA_SOURCES_FOR_TEST} LIBS ${UMF_UTILS_FOR_TEST}) +add_umf_test( + NAME ipc_max_opened_limit + SRCS ipcAPI.cpp ${BA_SOURCES_FOR_TEST} + LIBS ${UMF_UTILS_FOR_TEST} + ENVS "UMF_MAX_OPENED_IPC_HANDLES=10") + add_umf_test(NAME ipc_negative SRCS ipc_negative.cpp) function(add_umf_ipc_test) From b40d71939104195ca2f0f2721890ba4349009ae3 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Fri, 21 Feb 2025 12:29:24 +0100 Subject: [PATCH 234/466] Add more IPC tests --- test/ipcFixtures.hpp | 241 +++++++++++++++++++++++++++++++------------ 1 file changed, 175 insertions(+), 66 deletions(-) diff --git a/test/ipcFixtures.hpp b/test/ipcFixtures.hpp index c898c3663..1fc57b900 100644 --- a/test/ipcFixtures.hpp +++ b/test/ipcFixtures.hpp @@ -68,6 +68,18 @@ using ipcTestParams = struct umfIpcTest : umf_test::test, ::testing::WithParamInterface { umfIpcTest() {} + size_t getOpenedIpcCacheSize() { + const char *max_size_str = getenv("UMF_MAX_OPENED_IPC_HANDLES"); + if (max_size_str) { + char *endptr; + size_t max_size = strtoul(max_size_str, &endptr, 10); + EXPECT_EQ(*endptr, '\0'); + if (*endptr == '\0') { + return max_size; + } + } + return 0; + } void SetUp() override { test::SetUp(); auto [pool_ops, pool_params_create, pool_params_destroy, provider_ops, @@ -80,6 +92,7 @@ struct umfIpcTest : umf_test::test, providerParamsCreate = provider_params_create; providerParamsDestroy = provider_params_destroy; memAccessor = accessor; + openedIpcCacheSize = getOpenedIpcCacheSize(); } void TearDown() override { test::TearDown(); } @@ -160,6 +173,7 @@ struct umfIpcTest : umf_test::test, umf_memory_provider_ops_t *providerOps = nullptr; pfnProviderParamsCreate providerParamsCreate = nullptr; pfnProviderParamsDestroy providerParamsDestroy = nullptr; + size_t openedIpcCacheSize = 0; void concurrentGetConcurrentPutHandles(bool shuffle) { std::vector ptrs; @@ -264,6 +278,156 @@ struct umfIpcTest : umf_test::test, pool.reset(nullptr); EXPECT_EQ(stat.putCount, stat.getCount); } + + void concurrentOpenConcurrentCloseHandles(bool shuffle) { + umf_result_t ret; + std::vector ptrs; + constexpr size_t ALLOC_SIZE = 100; + constexpr size_t NUM_POINTERS = 100; + umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + + for (size_t i = 0; i < NUM_POINTERS; ++i) { + void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); + EXPECT_NE(ptr, nullptr); + ptrs.push_back(ptr); + } + + std::vector ipcHandles; + for (size_t i = 0; i < NUM_POINTERS; ++i) { + umf_ipc_handle_t ipcHandle; + size_t handleSize; + ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ipcHandles.push_back(ipcHandle); + } + + std::array, NTHREADS> openedIpcHandles; + umf_ipc_handler_handle_t ipcHandler = nullptr; + ret = umfPoolGetIPCHandler(pool.get(), &ipcHandler); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler, nullptr); + + umf_test::syncthreads_barrier syncthreads(NTHREADS); + + auto openHandlesFn = [shuffle, &ipcHandles, &openedIpcHandles, + &syncthreads, ipcHandler](size_t tid) { + // Each thread gets a copy of the pointers to shuffle them + std::vector localIpcHandles = ipcHandles; + if (shuffle) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(localIpcHandles.begin(), localIpcHandles.end(), g); + } + syncthreads(); + for (auto ipcHandle : localIpcHandles) { + void *ptr; + umf_result_t ret = + umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + openedIpcHandles[tid].push_back(ptr); + } + }; + + umf_test::parallel_exec(NTHREADS, openHandlesFn); + + auto closeHandlesFn = [&openedIpcHandles, &syncthreads](size_t tid) { + syncthreads(); + for (void *ptr : openedIpcHandles[tid]) { + umf_result_t ret = umfCloseIPCHandle(ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + }; + + umf_test::parallel_exec(NTHREADS, closeHandlesFn); + + for (auto ipcHandle : ipcHandles) { + ret = umfPutIPCHandle(ipcHandle); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + for (void *ptr : ptrs) { + ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + pool.reset(nullptr); + EXPECT_EQ(stat.getCount, stat.allocCount); + EXPECT_EQ(stat.putCount, stat.getCount); + EXPECT_EQ(stat.openCount, stat.allocCount); + EXPECT_EQ(stat.openCount, stat.closeCount); + } + + void concurrentOpenCloseHandles(bool shuffle) { + umf_result_t ret; + std::vector ptrs; + constexpr size_t ALLOC_SIZE = 100; + constexpr size_t NUM_POINTERS = 100; + umf::pool_unique_handle_t pool = makePool(); + ASSERT_NE(pool.get(), nullptr); + + for (size_t i = 0; i < NUM_POINTERS; ++i) { + void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); + EXPECT_NE(ptr, nullptr); + ptrs.push_back(ptr); + } + + std::vector ipcHandles; + for (size_t i = 0; i < NUM_POINTERS; ++i) { + umf_ipc_handle_t ipcHandle; + size_t handleSize; + ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ipcHandles.push_back(ipcHandle); + } + + umf_ipc_handler_handle_t ipcHandler = nullptr; + ret = umfPoolGetIPCHandler(pool.get(), &ipcHandler); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ASSERT_NE(ipcHandler, nullptr); + + umf_test::syncthreads_barrier syncthreads(NTHREADS); + + auto openCloseHandlesFn = [shuffle, &ipcHandles, &syncthreads, + ipcHandler](size_t) { + // Each thread gets a copy of the pointers to shuffle them + std::vector localIpcHandles = ipcHandles; + if (shuffle) { + std::random_device rd; + std::mt19937 g(rd()); + std::shuffle(localIpcHandles.begin(), localIpcHandles.end(), g); + } + syncthreads(); + for (auto ipcHandle : localIpcHandles) { + void *ptr; + umf_result_t ret = + umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); + ASSERT_EQ(ret, UMF_RESULT_SUCCESS); + ret = umfCloseIPCHandle(ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + }; + + umf_test::parallel_exec(NTHREADS, openCloseHandlesFn); + + for (auto ipcHandle : ipcHandles) { + ret = umfPutIPCHandle(ipcHandle); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + for (void *ptr : ptrs) { + ret = umfPoolFree(pool.get(), ptr); + EXPECT_EQ(ret, UMF_RESULT_SUCCESS); + } + + pool.reset(nullptr); + EXPECT_EQ(stat.getCount, stat.allocCount); + EXPECT_EQ(stat.putCount, stat.getCount); + if (openedIpcCacheSize == 0) { + EXPECT_EQ(stat.openCount, stat.allocCount); + } + EXPECT_EQ(stat.openCount, stat.closeCount); + } }; TEST_P(umfIpcTest, GetIPCHandleSize) { @@ -529,75 +693,20 @@ TEST_P(umfIpcTest, ConcurrentGetPutHandlesShuffled) { concurrentGetPutHandles(true); } -TEST_P(umfIpcTest, ConcurrentOpenCloseHandles) { - umf_result_t ret; - std::vector ptrs; - constexpr size_t ALLOC_SIZE = 100; - constexpr size_t NUM_POINTERS = 100; - umf::pool_unique_handle_t pool = makePool(); - ASSERT_NE(pool.get(), nullptr); - - for (size_t i = 0; i < NUM_POINTERS; ++i) { - void *ptr = umfPoolMalloc(pool.get(), ALLOC_SIZE); - EXPECT_NE(ptr, nullptr); - ptrs.push_back(ptr); - } - - std::array ipcHandles; - for (size_t i = 0; i < NUM_POINTERS; ++i) { - umf_ipc_handle_t ipcHandle; - size_t handleSize; - ret = umfGetIPCHandle(ptrs[i], &ipcHandle, &handleSize); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ipcHandles[i] = ipcHandle; - } - - std::array, NTHREADS> openedIpcHandles; - umf_ipc_handler_handle_t ipcHandler = nullptr; - ret = umfPoolGetIPCHandler(pool.get(), &ipcHandler); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - ASSERT_NE(ipcHandler, nullptr); - - umf_test::syncthreads_barrier syncthreads(NTHREADS); - - auto openHandlesFn = [&ipcHandles, &openedIpcHandles, &syncthreads, - ipcHandler](size_t tid) { - syncthreads(); - for (auto ipcHandle : ipcHandles) { - void *ptr; - umf_result_t ret = umfOpenIPCHandle(ipcHandler, ipcHandle, &ptr); - ASSERT_EQ(ret, UMF_RESULT_SUCCESS); - openedIpcHandles[tid].push_back(ptr); - } - }; - - umf_test::parallel_exec(NTHREADS, openHandlesFn); - - auto closeHandlesFn = [&openedIpcHandles, &syncthreads](size_t tid) { - syncthreads(); - for (void *ptr : openedIpcHandles[tid]) { - umf_result_t ret = umfCloseIPCHandle(ptr); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - } - }; - - umf_test::parallel_exec(NTHREADS, closeHandlesFn); +TEST_P(umfIpcTest, ConcurrentOpenConcurrentCloseHandles) { + concurrentOpenConcurrentCloseHandles(false); +} - for (auto ipcHandle : ipcHandles) { - ret = umfPutIPCHandle(ipcHandle); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - } +TEST_P(umfIpcTest, ConcurrentOpenConcurrentCloseHandlesShuffled) { + concurrentOpenConcurrentCloseHandles(true); +} - for (void *ptr : ptrs) { - ret = umfPoolFree(pool.get(), ptr); - EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - } +TEST_P(umfIpcTest, ConcurrentOpenCloseHandles) { + concurrentOpenCloseHandles(false); +} - pool.reset(nullptr); - EXPECT_EQ(stat.getCount, stat.allocCount); - EXPECT_EQ(stat.putCount, stat.getCount); - EXPECT_EQ(stat.openCount, stat.allocCount); - EXPECT_EQ(stat.openCount, stat.closeCount); +TEST_P(umfIpcTest, ConcurrentOpenCloseHandlesShuffled) { + concurrentOpenCloseHandles(true); } TEST_P(umfIpcTest, ConcurrentDestroyIpcHandlers) { From c96de6190c012d1b738df68adaa4ce3951e90103 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Mon, 17 Feb 2025 22:11:43 +0100 Subject: [PATCH 235/466] Suppress drd and helgrind error in the umf_test-ipc_max_opened_limit --- test/supp/drd-test_ipc_max_opened_limit.supp | 34 ++++++++++++ .../drd-test_provider_devdax_memory_ipc.supp | 11 ++++ .../drd-test_provider_file_memory_ipc.supp | 11 ++++ test/supp/drd-test_provider_os_memory.supp | 11 ++++ .../helgrind-test_ipc_max_opened_limit.supp | 53 +++++++++++++++++++ ...grind-test_provider_devdax_memory_ipc.supp | 11 ++++ ...elgrind-test_provider_file_memory_ipc.supp | 1 + .../helgrind-test_provider_os_memory.supp | 11 ++++ 8 files changed, 143 insertions(+) create mode 100644 test/supp/drd-test_ipc_max_opened_limit.supp create mode 100644 test/supp/helgrind-test_ipc_max_opened_limit.supp diff --git a/test/supp/drd-test_ipc_max_opened_limit.supp b/test/supp/drd-test_ipc_max_opened_limit.supp new file mode 100644 index 000000000..fbdbd0183 --- /dev/null +++ b/test/supp/drd-test_ipc_max_opened_limit.supp @@ -0,0 +1,34 @@ +{ + Conditional variable destruction false-positive + drd:CondErr + ... + fun:pthread_cond_destroy@* + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] trackingGetIpcHandle + drd:ConflictingAccess + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/drd-test_provider_devdax_memory_ipc.supp b/test/supp/drd-test_provider_devdax_memory_ipc.supp index f6f12aa1e..31608d30c 100644 --- a/test/supp/drd-test_provider_devdax_memory_ipc.supp +++ b/test/supp/drd-test_provider_devdax_memory_ipc.supp @@ -2,6 +2,17 @@ [false-positive] Double check locking pattern in trackingOpenIpcHandle drd:ConflictingAccess fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_load_acquire_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/drd-test_provider_file_memory_ipc.supp b/test/supp/drd-test_provider_file_memory_ipc.supp index 72fd6d87c..9883001f7 100644 --- a/test/supp/drd-test_provider_file_memory_ipc.supp +++ b/test/supp/drd-test_provider_file_memory_ipc.supp @@ -10,6 +10,17 @@ [false-positive] Double check locking pattern in trackingOpenIpcHandle drd:ConflictingAccess fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_load_acquire_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/drd-test_provider_os_memory.supp b/test/supp/drd-test_provider_os_memory.supp index f6f12aa1e..31608d30c 100644 --- a/test/supp/drd-test_provider_os_memory.supp +++ b/test/supp/drd-test_provider_os_memory.supp @@ -2,6 +2,17 @@ [false-positive] Double check locking pattern in trackingOpenIpcHandle drd:ConflictingAccess fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + drd:ConflictingAccess + fun:utils_atomic_load_acquire_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/helgrind-test_ipc_max_opened_limit.supp b/test/supp/helgrind-test_ipc_max_opened_limit.supp new file mode 100644 index 000000000..04f3a9199 --- /dev/null +++ b/test/supp/helgrind-test_ipc_max_opened_limit.supp @@ -0,0 +1,53 @@ +{ + False-positive race in critnib_insert (lack of instrumentation) + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:critnib_insert + ... +} + +{ + False-positive race in critnib_find (lack of instrumentation) + Helgrind:Race + fun:find_predecessor + fun:find_le + fun:critnib_find + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_store_release_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] umfMemoryProviderGetIPCHandle + Helgrind:Race + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} + +{ + [false-positive] umfMemoryProviderGetIPCHandle + Helgrind:Race + fun:memmove + fun:trackingGetIpcHandle + fun:umfMemoryProviderGetIPCHandle + fun:umfGetIPCHandle +} diff --git a/test/supp/helgrind-test_provider_devdax_memory_ipc.supp b/test/supp/helgrind-test_provider_devdax_memory_ipc.supp index 4bc776f43..63e7d626c 100644 --- a/test/supp/helgrind-test_provider_devdax_memory_ipc.supp +++ b/test/supp/helgrind-test_provider_devdax_memory_ipc.supp @@ -2,6 +2,17 @@ [false-positive] Double check locking pattern in trackingOpenIpcHandle Helgrind:Race fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/helgrind-test_provider_file_memory_ipc.supp b/test/supp/helgrind-test_provider_file_memory_ipc.supp index de22665f5..11791e4ed 100644 --- a/test/supp/helgrind-test_provider_file_memory_ipc.supp +++ b/test/supp/helgrind-test_provider_file_memory_ipc.supp @@ -2,6 +2,7 @@ [false-positive] Double check locking pattern in trackingOpenIpcHandle Helgrind:Race fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/helgrind-test_provider_os_memory.supp b/test/supp/helgrind-test_provider_os_memory.supp index 4bc776f43..63e7d626c 100644 --- a/test/supp/helgrind-test_provider_os_memory.supp +++ b/test/supp/helgrind-test_provider_os_memory.supp @@ -2,6 +2,17 @@ [false-positive] Double check locking pattern in trackingOpenIpcHandle Helgrind:Race fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle + fun:trackingOpenIpcHandle + fun:umfMemoryProviderOpenIPCHandle + fun:umfOpenIPCHandle + ... +} + +{ + [false-positive] Double check locking pattern in trackingOpenIpcHandle + Helgrind:Race + fun:utils_atomic_load_acquire_ptr fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle From 72f02554ae1cc63c9646894f7d38195891ecdeee Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Fri, 21 Feb 2025 14:12:47 +0100 Subject: [PATCH 236/466] Update docs about IPC caching --- docs/config/api.rst | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/docs/config/api.rst b/docs/config/api.rst index 1c20d709c..97e664d97 100644 --- a/docs/config/api.rst +++ b/docs/config/api.rst @@ -168,6 +168,26 @@ IPC API allows retrieving IPC handles for the memory buffers allocated from UMF memory pools. The memory provider used by the pool should support IPC operations for this API to work. Otherwise IPC APIs return an error. +IPC caching +------------------------------------------ + +UMF employs IPC caching to avoid multiple IPC handles being created for the same +coarse-grain memory region allocated by the memory provider. UMF guarantees that +for each coarse-grain memory region allocated by the memory provider, only one +IPC handle is created when the :any:`umfGetIPCHandle` function is called. All +subsequent calls to the :any:`umfGetIPCHandle` function for the pointer to the +same memory region will return the entry from the cache. + +The same is true for the :any:`umfOpenIPCHandle` function. The actual mapping +of the IPC handle to the virtual address space is created only once, and all +subsequent calls to open the same IPC handle will return the entry from the cache. +The size of the cache for opened IPC handles is controlled by the ``UMF_MAX_OPENED_IPC_HANDLES`` +environment variable. By default, the cache size is unlimited. However, if the environment +variable is set and the cache size exceeds the limit, old items will be evicted. UMF tracks +the ref count for each entry in the cache and can evict only items with the ref count equal to 0. +The ref count is increased when the :any:`umfOpenIPCHandle` function is called and decreased +when the :any:`umfCloseIPCHandle` function is called for the corresponding IPC handle. + .. _ipc-api: IPC API From 5f4336d33cbd4c1017f41ab38530e93b58c6a8e0 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Fri, 21 Feb 2025 19:18:54 +0100 Subject: [PATCH 237/466] Skip umfIpcTest.GetPoolByOpenedHandle test in compatibility testing --- .github/workflows/reusable_compatibility.yml | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/reusable_compatibility.yml b/.github/workflows/reusable_compatibility.yml index 5bf9bd817..5116a59f4 100644 --- a/.github/workflows/reusable_compatibility.yml +++ b/.github/workflows/reusable_compatibility.yml @@ -94,9 +94,11 @@ jobs: - name: Run "tag" UMF tests with latest UMF libs (warnings enabled) working-directory: ${{github.workspace}}/tag_version/build + # GTEST_FILTER is used below to skip test that is not compatible run: > UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ + GTEST_FILTER="-*umfIpcTest.GetPoolByOpenedHandle*" ctest --verbose windows: @@ -181,6 +183,7 @@ jobs: working-directory: ${{github.workspace}}/tag_version/build run: | $env:UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" + $env:GTEST_FILTER="-*umfIpcTest.GetPoolByOpenedHandle*" cp ${{github.workspace}}/latest_version/build/bin/Debug/umf.dll ${{github.workspace}}/tag_version/build/bin/Debug/umf.dll ctest -C Debug --verbose @@ -230,8 +233,10 @@ jobs: - name: Run "tag" UMF tests working-directory: ${{github.workspace}}/tag_version/build - run: | - LD_LIBRARY_PATH=${{github.workspace}}/tag_version/build/lib/ ctest --output-on-failure + run: > + LD_LIBRARY_PATH=${{github.workspace}}/tag_version/build/lib/ + GTEST_FILTER="-*umfIpcTest.GetPoolByOpenedHandle*" + ctest --output-on-failure - name: Checkout latest UMF version uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 @@ -266,4 +271,5 @@ jobs: run: > UMF_LOG="level:warning;flush:debug;output:stderr;pid:no" LD_LIBRARY_PATH=${{github.workspace}}/latest_version/build/lib/ + GTEST_FILTER="-*umfIpcTest.GetPoolByOpenedHandle*" ctest --verbose -E "not_impl" From f0fa06fd91d670441a62c9865215b05a93182e06 Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Fri, 7 Mar 2025 16:36:52 +0100 Subject: [PATCH 238/466] [disjoint] Change pool name setting and cleanup --- include/umf/pools/pool_disjoint.h | 2 +- src/pool/pool_disjoint.c | 67 +++++++++++++------------------ src/pool/pool_disjoint_internal.h | 2 +- 3 files changed, 30 insertions(+), 41 deletions(-) diff --git a/include/umf/pools/pool_disjoint.h b/include/umf/pools/pool_disjoint.h index d268a1dac..a1558b85b 100644 --- a/include/umf/pools/pool_disjoint.h +++ b/include/umf/pools/pool_disjoint.h @@ -100,7 +100,7 @@ umf_result_t umfDisjointPoolParamsSetSharedLimits( /// @brief Set custom name of the disjoint pool to be used in the traces. /// @param hParams handle to the parameters of the disjoint pool. -/// @param name custom name of the pool. +/// @param name custom name of the pool. Name longer than 64 characters will be truncated. /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. umf_result_t umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, diff --git a/src/pool/pool_disjoint.c b/src/pool/pool_disjoint.c index 0bd88bd24..8661832f0 100644 --- a/src/pool/pool_disjoint.c +++ b/src/pool/pool_disjoint.c @@ -59,7 +59,7 @@ static __TLS umf_result_t TLS_last_allocation_error; // The largest size which is allocated via the allocator. // Allocations with size > CutOff bypass the pool and // go directly to the provider. -static size_t CutOff = (size_t)1 << 31; // 2GB +static const size_t CutOff = (size_t)1 << 31; // 2GB static size_t bucket_slab_min_size(bucket_t *bucket) { return bucket->pool->params.slab_min_size; @@ -468,7 +468,7 @@ static size_t size_to_idx(disjoint_pool_t *pool, size_t size) { // get the position of the leftmost set bit size_t position = getLeftmostSetBitPos(size); - bool is_power_of_2 = 0 == (size & (size - 1)); + bool is_power_of_2 = IS_POWER_OF_2(size); bool larger_than_halfway_between_powers_of_2 = !is_power_of_2 && (bool)((size - 1) & ((uint64_t)(1) << (position - 1))); @@ -630,8 +630,9 @@ umf_result_t disjoint_pool_initialize(umf_memory_provider_handle_t provider, disjoint_pool->buckets_num = 1; size_t Size2 = Size1 + Size1 / 2; size_t ts2 = Size2, ts1 = Size1; - for (; Size2 < CutOff; Size1 *= 2, Size2 *= 2) { + while (Size2 < CutOff) { disjoint_pool->buckets_num += 2; + Size2 *= 2; } disjoint_pool->buckets = umf_ba_global_alloc( sizeof(*disjoint_pool->buckets) * disjoint_pool->buckets_num); @@ -767,6 +768,14 @@ void *disjoint_pool_aligned_malloc(void *pool, size_t size, size_t alignment) { return aligned_ptr; } +static size_t get_chunk_idx(void *ptr, slab_t *slab) { + return (((uintptr_t)ptr - (uintptr_t)slab->mem_ptr) / slab->bucket->size); +} + +static void *get_unaligned_ptr(size_t chunk_idx, slab_t *slab) { + return (void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size); +} + size_t disjoint_pool_malloc_usable_size(void *pool, void *ptr) { disjoint_pool_t *disjoint_pool = (disjoint_pool_t *)pool; if (ptr == NULL) { @@ -788,10 +797,8 @@ size_t disjoint_pool_malloc_usable_size(void *pool, void *ptr) { } // Get the unaligned pointer // NOTE: the base pointer slab->mem_ptr needn't to be aligned to bucket size - size_t chunk_idx = - (((uintptr_t)ptr - (uintptr_t)slab->mem_ptr) / slab->bucket->size); - void *unaligned_ptr = - (void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size); + size_t chunk_idx = get_chunk_idx(ptr, slab); + void *unaligned_ptr = get_unaligned_ptr(chunk_idx, slab); ptrdiff_t diff = (ptrdiff_t)ptr - (ptrdiff_t)unaligned_ptr; @@ -847,10 +854,8 @@ umf_result_t disjoint_pool_free(void *pool, void *ptr) { // Get the unaligned pointer // NOTE: the base pointer slab->mem_ptr needn't to be aligned to bucket size - size_t chunk_idx = - (((uintptr_t)ptr - (uintptr_t)slab->mem_ptr) / slab->bucket->size); - void *unaligned_ptr = - (void *)((uintptr_t)slab->mem_ptr + chunk_idx * slab->bucket->size); + size_t chunk_idx = get_chunk_idx(ptr, slab); + void *unaligned_ptr = get_unaligned_ptr(chunk_idx, slab); utils_annotate_memory_inaccessible(unaligned_ptr, bucket->size); bucket_free_chunk(bucket, unaligned_ptr, slab, &to_pool); @@ -876,13 +881,11 @@ umf_result_t disjoint_pool_free(void *pool, void *ptr) { umf_result_t disjoint_pool_get_last_allocation_error(void *pool) { (void)pool; - return TLS_last_allocation_error; } // Define destructor for use with unique_ptr void disjoint_pool_finalize(void *pool) { - disjoint_pool_t *hPool = (disjoint_pool_t *)pool; if (hPool->params.pool_trace > 1) { @@ -937,7 +940,7 @@ void umfDisjointPoolSharedLimitsDestroy( umf_result_t umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { - static const char *DEFAULT_NAME = "disjoint_pool"; + static char *DEFAULT_NAME = "disjoint_pool"; if (!hParams) { LOG_ERR("disjoint pool params handle is NULL"); @@ -951,20 +954,16 @@ umfDisjointPoolParamsCreate(umf_disjoint_pool_params_handle_t *hParams) { return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - params->slab_min_size = 0; - params->max_poolable_size = 0; - params->capacity = 0; - params->min_bucket_size = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE; - params->cur_pool_size = 0; - params->pool_trace = 0; - params->shared_limits = NULL; - params->name = NULL; - - umf_result_t ret = umfDisjointPoolParamsSetName(params, DEFAULT_NAME); - if (ret != UMF_RESULT_SUCCESS) { - umf_ba_global_free(params); - return ret; - } + *params = (umf_disjoint_pool_params_t){ + .slab_min_size = 0, + .max_poolable_size = 0, + .capacity = 0, + .min_bucket_size = UMF_DISJOINT_POOL_MIN_BUCKET_DEFAULT_SIZE, + .cur_pool_size = 0, + .pool_trace = 0, + .shared_limits = NULL, + .name = {*DEFAULT_NAME}, + }; *hParams = params; @@ -975,7 +974,6 @@ umf_result_t umfDisjointPoolParamsDestroy(umf_disjoint_pool_params_handle_t hParams) { // NOTE: dereferencing hParams when BA is already destroyed leads to crash if (hParams && !umf_ba_global_is_destroyed()) { - umf_ba_global_free(hParams->name); umf_ba_global_free(hParams); } @@ -1067,15 +1065,6 @@ umfDisjointPoolParamsSetName(umf_disjoint_pool_params_handle_t hParams, return UMF_RESULT_ERROR_INVALID_ARGUMENT; } - char *newName = umf_ba_global_alloc(sizeof(*newName) * (strlen(name) + 1)); - if (newName == NULL) { - LOG_ERR("cannot allocate memory for disjoint pool name"); - return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; - } - - umf_ba_global_free(hParams->name); - hParams->name = newName; - strcpy(hParams->name, name); - + strncpy(hParams->name, name, sizeof(hParams->name) - 1); return UMF_RESULT_SUCCESS; } diff --git a/src/pool/pool_disjoint_internal.h b/src/pool/pool_disjoint_internal.h index 2b5de64bc..43cf73e13 100644 --- a/src/pool/pool_disjoint_internal.h +++ b/src/pool/pool_disjoint_internal.h @@ -131,7 +131,7 @@ typedef struct umf_disjoint_pool_params_t { umf_disjoint_pool_shared_limits_handle_t shared_limits; // Name used in traces - char *name; + char name[64]; } umf_disjoint_pool_params_t; typedef struct disjoint_pool_t { From 0d62314320249ce18bec3dcf0d8a40385a4f0ade Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Wed, 26 Feb 2025 16:30:54 +0100 Subject: [PATCH 239/466] add memusage stat to os_provider and use it in benchmarks --- benchmark/benchmark.hpp | 58 ++++++++++-- benchmark/benchmark_umf.hpp | 69 ++++++++++---- src/provider/provider_os_memory.c | 102 ++++++++++++++++++++- src/provider/provider_os_memory_internal.h | 5 +- src/utils/utils_concurrency.h | 12 ++- 5 files changed, 220 insertions(+), 26 deletions(-) diff --git a/benchmark/benchmark.hpp b/benchmark/benchmark.hpp index a960d89bc..b096716b3 100644 --- a/benchmark/benchmark.hpp +++ b/benchmark/benchmark.hpp @@ -173,6 +173,14 @@ class provider_allocator : public allocator_interface { return argPos; } + void preBench(::benchmark::State &state) override { + provider.preBench(state); + } + + void postBench(::benchmark::State &state) override { + provider.postBench(state); + } + void TearDown(::benchmark::State &state) override { provider.TearDown(state); } @@ -204,13 +212,18 @@ template class pool_allocator : public allocator_interface { return argPos; } + void preBench(::benchmark::State &state) override { pool.preBench(state); } + void postBench(::benchmark::State &state) override { + pool.postBench(state); + } + void TearDown(::benchmark::State &state) override { pool.TearDown(state); } - virtual void *benchAlloc(size_t size) override { + void *benchAlloc(size_t size) override { return umfPoolMalloc(pool.pool, size); } - virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) override { + void benchFree(void *ptr, [[maybe_unused]] size_t size) override { umfPoolFree(pool.pool, ptr); } @@ -241,7 +254,7 @@ struct benchmark_interface : public benchmark::Fixture { allocator.TearDown(state); } - virtual void bench(::benchmark::State &state) = 0; + void bench([[maybe_unused]] ::benchmark::State &state){}; virtual std::vector argsName() { auto s = Size::argsName(); @@ -260,6 +273,9 @@ struct benchmark_interface : public benchmark::Fixture { benchmark->ArgNames(bench->argsName())->Name(bench->name()); } + void custom_counters(::benchmark::State &state) { + allocator.custom_counters(state); + } std::vector alloc_sizes; Allocator allocator; }; @@ -282,7 +298,7 @@ class multiple_malloc_free_benchmark : public benchmark_interface { vector2d allocations; std::vector iters; - + std::vector memused; vector2d next; std::vector::const_iterator> next_iter; int64_t iterations; @@ -302,6 +318,7 @@ class multiple_malloc_free_benchmark : public benchmark_interface { allocations.resize(state.threads()); next.resize(state.threads()); next_iter.resize(state.threads()); + memused.assign(state.threads(), 0); #ifndef WIN32 // Ensure that system malloc does not have memory pooled on the heap @@ -323,13 +340,36 @@ class multiple_malloc_free_benchmark : public benchmark_interface { waitForAllThreads(state); // prepare workload for actual benchmark. freeAllocs(state); + prealloc(state); prepareWorkload(state); + waitForAllThreads(state); + base::allocator.preBench(state); } void TearDown(::benchmark::State &state) override { + base::allocator.postBench(state); auto tid = state.thread_index(); + if (tid == 0) { + size_t current_memory_allocated = 0; + for (const auto &used : memused) { + current_memory_allocated += used; + } + + auto memory_used = state.counters["provider_memory_allocated"]; + + if (memory_used != 0) { + state.counters["benchmark_memory_allocated"] = + static_cast(current_memory_allocated); + state.counters["memory_overhead"] = + 100.0 * (memory_used - current_memory_allocated) / + memory_used; + } else { + state.counters.erase("provider_memory_allocated"); + } + } + waitForAllThreads(state); freeAllocs(state); waitForAllThreads(state); if (tid == 0) { @@ -342,20 +382,22 @@ class multiple_malloc_free_benchmark : public benchmark_interface { base::TearDown(state); } - void bench(benchmark::State &state) override { + void bench(benchmark::State &state) { auto tid = state.thread_index(); auto &allocation = allocations[tid]; + auto &memuse = memused[tid]; for (int i = 0; i < allocsPerIterations; i++) { auto &n = *next_iter[tid]++; auto &alloc = allocation[n.offset]; base::allocator.benchFree(alloc.ptr, alloc.size); - + memuse -= alloc.size; alloc.size = n.size; alloc.ptr = base::allocator.benchAlloc(alloc.size); if (alloc.ptr == NULL) { state.SkipWithError("allocation failed"); } + memuse += alloc.size; } } @@ -376,7 +418,9 @@ class multiple_malloc_free_benchmark : public benchmark_interface { auto tid = state.thread_index(); auto &i = allocations[tid]; i.resize(max_allocs); + auto &memuse = memused[tid]; auto sizeGenerator = base::alloc_sizes[tid]; + for (size_t j = 0; j < max_allocs; j++) { auto size = sizeGenerator.nextSize(); i[j].ptr = base::allocator.benchAlloc(size); @@ -385,6 +429,7 @@ class multiple_malloc_free_benchmark : public benchmark_interface { return; } i[j].size = size; + memuse += size; } } @@ -394,6 +439,7 @@ class multiple_malloc_free_benchmark : public benchmark_interface { for (auto &j : i) { if (j.ptr != NULL) { base::allocator.benchFree(j.ptr, j.size); + memused[tid] -= j.size; j.ptr = NULL; j.size = 0; } diff --git a/benchmark/benchmark_umf.hpp b/benchmark/benchmark_umf.hpp index cfc9982d2..9553d6fdb 100644 --- a/benchmark/benchmark_umf.hpp +++ b/benchmark/benchmark_umf.hpp @@ -11,8 +11,6 @@ #include #include #include - -#include #include #include @@ -30,7 +28,7 @@ struct provider_interface { using params_ptr = std::unique_ptr; umf_memory_provider_handle_t provider = NULL; - virtual void SetUp(::benchmark::State &state) { + void SetUp(::benchmark::State &state) { if (state.thread_index() != 0) { return; } @@ -42,7 +40,27 @@ struct provider_interface { } } - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { + void preBench([[maybe_unused]] ::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + umfCtlExec("umf.provider.by_handle.stats.reset", provider, NULL); + } + + void postBench([[maybe_unused]] ::benchmark::State &state) { + if (state.thread_index() != 0) { + return; + } + size_t arg; + umf_result_t ret = umfCtlGet( + "umf.provider.by_handle.stats.allocated_memory", provider, &arg); + if (ret == UMF_RESULT_SUCCESS) { + state.counters["provider_memory_allocated"] = + static_cast(arg); + } + } + + void TearDown([[maybe_unused]] ::benchmark::State &state) { if (state.thread_index() != 0) { return; } @@ -53,9 +71,7 @@ struct provider_interface { } virtual umf_memory_provider_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) { - return nullptr; - } + getOps([[maybe_unused]] ::benchmark::State &state) = 0; virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { return {nullptr, [](void *) {}}; @@ -68,7 +84,7 @@ template ; - virtual void SetUp(::benchmark::State &state) { + void SetUp(::benchmark::State &state) { provider.SetUp(state); if (state.thread_index() != 0) { return; @@ -80,7 +96,22 @@ struct pool_interface { state.SkipWithError("umfPoolCreate() failed"); } } - virtual void TearDown([[maybe_unused]] ::benchmark::State &state) { + + void preBench([[maybe_unused]] ::benchmark::State &state) { + provider.preBench(state); + if (state.thread_index() != 0) { + return; + } + } + + void postBench([[maybe_unused]] ::benchmark::State &state) { + provider.postBench(state); + if (state.thread_index() != 0) { + return; + } + } + + void TearDown([[maybe_unused]] ::benchmark::State &state) { if (state.thread_index() != 0) { return; } @@ -93,15 +124,17 @@ struct pool_interface { if (pool) { umfPoolDestroy(pool); } + + provider.TearDown(state); }; virtual umf_memory_pool_ops_t * - getOps([[maybe_unused]] ::benchmark::State &state) { - return nullptr; - } + getOps([[maybe_unused]] ::benchmark::State &state) = 0; + virtual params_ptr getParams([[maybe_unused]] ::benchmark::State &state) { return {nullptr, [](void *) {}}; } + T provider; umf_memory_pool_handle_t pool; }; @@ -110,6 +143,8 @@ class allocator_interface { public: virtual unsigned SetUp([[maybe_unused]] ::benchmark::State &state, [[maybe_unused]] unsigned argPos) = 0; + virtual void preBench([[maybe_unused]] ::benchmark::State &state) = 0; + virtual void postBench([[maybe_unused]] ::benchmark::State &state) = 0; virtual void TearDown([[maybe_unused]] ::benchmark::State &state) = 0; virtual void *benchAlloc(size_t size) = 0; virtual void benchFree(void *ptr, [[maybe_unused]] size_t size) = 0; @@ -121,7 +156,9 @@ struct glibc_malloc : public allocator_interface { unsigned argPos) override { return argPos; } - void TearDown([[maybe_unused]] ::benchmark::State &state) override{}; + void preBench([[maybe_unused]] ::benchmark::State &state) override {} + void postBench([[maybe_unused]] ::benchmark::State &state) override {} + void TearDown([[maybe_unused]] ::benchmark::State &state) override {} void *benchAlloc(size_t size) override { return malloc(size); } void benchFree(void *ptr, [[maybe_unused]] size_t size) override { free(ptr); @@ -163,7 +200,7 @@ struct fixed_provider : public provider_interface { char *mem = NULL; const size_t size = 1024 * 1024 * 1024; // 1GB public: - virtual void SetUp(::benchmark::State &state) override { + void SetUp(::benchmark::State &state) { if (state.thread_index() != 0) { return; } @@ -175,7 +212,7 @@ struct fixed_provider : public provider_interface { provider_interface::SetUp(state); } - virtual void TearDown(::benchmark::State &state) override { + void TearDown(::benchmark::State &state) { if (state.thread_index() != 0) { return; } @@ -295,7 +332,7 @@ struct jemalloc_pool : public pool_interface { #ifdef UMF_POOL_SCALABLE_ENABLED template struct scalable_pool : public pool_interface { - virtual umf_memory_pool_ops_t * + umf_memory_pool_ops_t * getOps([[maybe_unused]] ::benchmark::State &state) override { return umfScalablePoolOps(); } diff --git a/src/provider/provider_os_memory.c b/src/provider/provider_os_memory.c index f3e5c7fa0..1ecb397fe 100644 --- a/src/provider/provider_os_memory.c +++ b/src/provider/provider_os_memory.c @@ -6,19 +6,21 @@ */ #include +#include #include #include + #include #include #include #include - -#include #include #include #include #include #include + +#include "utils_assert.h" // OS Memory Provider requires HWLOC #if defined(UMF_NO_HWLOC) @@ -187,12 +189,77 @@ static int CTL_READ_HANDLER(ipc_enabled)(void *ctx, return 0; } +static int CTL_READ_HANDLER(peak_memory)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; + + size_t *arg_out = arg; + os_memory_provider_t *os_provider = (os_memory_provider_t *)ctx; + COMPILE_ERROR_ON(sizeof(os_provider->stats.peak_memory) != + sizeof(uint64_t)); + utils_atomic_load_acquire_u64((uint64_t *)&os_provider->stats.peak_memory, + (uint64_t *)arg_out); + return 0; +} + +static int CTL_READ_HANDLER(allocated_memory)(void *ctx, + umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)ctx, (void)extra_name, (void)query_type; + + size_t *arg_out = arg; + os_memory_provider_t *os_provider = (os_memory_provider_t *)ctx; + COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != + sizeof(uint64_t)); + COMPILE_ERROR_ON(sizeof(*arg_out) != sizeof(uint64_t)); + utils_atomic_load_acquire_u64( + (uint64_t *)&os_provider->stats.allocated_memory, (uint64_t *)arg_out); + return 0; +} + +static int CTL_RUNNABLE_HANDLER(reset)(void *ctx, umf_ctl_query_source_t source, + void *arg, + umf_ctl_index_utlist_t *indexes, + const char *extra_name, + umf_ctl_query_type_t query_type) { + /* suppress unused-parameter errors */ + (void)source, (void)indexes, (void)arg, (void)extra_name, (void)query_type; + + os_memory_provider_t *os_provider = (os_memory_provider_t *)ctx; + size_t allocated; + + COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != + sizeof(uint64_t)); + COMPILE_ERROR_ON(sizeof(allocated) != sizeof(uint64_t)); + + utils_atomic_load_acquire_u64( + (uint64_t *)&os_provider->stats.allocated_memory, + (uint64_t *)&allocated); + utils_atomic_store_release_u64((uint64_t *)&os_provider->stats.peak_memory, + (uint64_t)allocated); + + return 0; +} +static const umf_ctl_node_t CTL_NODE(stats)[] = { + CTL_LEAF_RO(allocated_memory), CTL_LEAF_RO(peak_memory), + CTL_LEAF_RUNNABLE(reset), CTL_NODE_END}; + static const umf_ctl_node_t CTL_NODE(params)[] = {CTL_LEAF_RO(ipc_enabled), CTL_NODE_END}; static void initialize_os_ctl(void) { os_memory_ctl_root = ctl_new(); CTL_REGISTER_MODULE(os_memory_ctl_root, params); + CTL_REGISTER_MODULE(os_memory_ctl_root, stats); } static void os_store_last_native_error(int32_t native_error, int errno_value) { @@ -1109,6 +1176,29 @@ static umf_result_t os_alloc(void *provider, size_t size, size_t alignment, *resultPtr = addr; + COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != + sizeof(uint64_t)); + COMPILE_ERROR_ON(sizeof(os_provider->stats.peak_memory) != + sizeof(uint64_t)); + COMPILE_ERROR_ON(sizeof(size) != sizeof(uint64_t)); + // TODO: Change to memory_order_relaxed when we will have a proper wrapper + size_t allocated = + utils_fetch_and_add_u64( + (uint64_t *)&os_provider->stats.allocated_memory, (uint64_t)size) + + size; + + uint64_t peak; + utils_atomic_load_acquire_u64((uint64_t *)&os_provider->stats.peak_memory, + &peak); + + while (allocated > peak && !utils_compare_exchange_u64( + (uint64_t *)&os_provider->stats.peak_memory, + &peak, (uint64_t *)&allocated)) { + /* If the compare-exchange fails, 'peak' is updated to the current value of peak_memory. + We then re-check whether allocated is still greater than the updated peak value. */ + ; + } + return UMF_RESULT_SUCCESS; err_unmap: @@ -1136,6 +1226,14 @@ static umf_result_t os_free(void *provider, void *ptr, size_t size) { return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; } + COMPILE_ERROR_ON(sizeof(size) != sizeof(uint64_t)); + COMPILE_ERROR_ON(sizeof(os_provider->stats.allocated_memory) != + sizeof(uint64_t)); + + // TODO: Change it to memory_order_relaxed when we will have a proper wrapper + utils_fetch_and_sub_u64((uint64_t *)&os_provider->stats.allocated_memory, + size); + return UMF_RESULT_SUCCESS; } diff --git a/src/provider/provider_os_memory_internal.h b/src/provider/provider_os_memory_internal.h index 4a603b1da..a3f35cbd3 100644 --- a/src/provider/provider_os_memory_internal.h +++ b/src/provider/provider_os_memory_internal.h @@ -10,7 +10,6 @@ #include #include - #if defined(_WIN32) && !defined(NAME_MAX) #include #define NAME_MAX _MAX_FNAME @@ -68,6 +67,10 @@ typedef struct os_memory_provider_t { size_t partitions_weight_sum; hwloc_topology_t topo; + struct { + size_t allocated_memory; + size_t peak_memory; + } stats; } os_memory_provider_t; #ifdef __cplusplus diff --git a/src/utils/utils_concurrency.h b/src/utils/utils_concurrency.h index 0104b8646..638c1c426 100644 --- a/src/utils/utils_concurrency.h +++ b/src/utils/utils_concurrency.h @@ -120,11 +120,15 @@ static inline void utils_atomic_load_acquire_ptr(void **ptr, void **out) { *(uintptr_t *)out = ret; } +static inline void utils_atomic_store_release_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + InterlockedExchange64((LONG64 volatile *)ptr, val); +} + static inline void utils_atomic_store_release_ptr(void **ptr, void *val) { ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); InterlockedExchangePointer(ptr, val); } - static inline uint64_t utils_atomic_increment_u64(uint64_t *ptr) { ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); // return incremented value @@ -183,6 +187,12 @@ static inline void utils_atomic_load_acquire_ptr(void **ptr, void **out) { utils_annotate_acquire(ptr); } +static inline void utils_atomic_store_release_u64(uint64_t *ptr, uint64_t val) { + ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); + utils_annotate_release(ptr); + __atomic_store_n(ptr, val, memory_order_release); +} + static inline void utils_atomic_store_release_ptr(void **ptr, void *val) { ASSERT_IS_ALIGNED((uintptr_t)ptr, 8); utils_annotate_release(ptr); From 8cf9196f14dfe78a3d258d2e7e199a9de7037c33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Tue, 11 Mar 2025 10:54:54 +0100 Subject: [PATCH 240/466] [CI] Use venv for pip installation in bench Moved on to new Ubuntu and it doesn't support global pip installation. --- .github/workflows/reusable_benchmarks.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index 15e6b15f4..69e1f5c60 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -111,7 +111,9 @@ jobs: - name: Install benchmarking scripts deps run: | - pip install --force-reinstall -r ${{github.workspace}}/sycl-repo/unified-runtime/third_party/benchmark_requirements.txt + python -m venv .venv + source .venv/bin/activate + pip install -r ${{github.workspace}}/sycl-repo/unified-runtime/third_party/benchmark_requirements.txt - name: Set core range and GPU mask run: | @@ -135,6 +137,7 @@ jobs: id: benchmarks working-directory: ${{env.BUILD_DIR}} run: > + source ${{github.workspace}}/.venv/bin/activate && taskset -c ${{ env.CORES }} ${{ github.workspace }}/sycl-repo/unified-runtime/scripts/benchmarks/main.py ~/bench_workdir_umf --umf ${{env.BUILD_DIR}} From eec4f1356fce83c7ba480936768fe0c39c60c4fd Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 12 Mar 2025 10:01:59 +0000 Subject: [PATCH 241/466] fix for potential out-of-bounds read in tracker --- src/provider/provider_tracking.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index 92d3dd59c..c5a4b5f1f 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -1236,9 +1236,9 @@ umf_memory_tracker_handle_t umfMemoryTrackerCreate(void) { err_destroy_ipc_info_allocator: umf_ba_destroy(handle->ipc_info_allocator); err_destroy_alloc_segments_map: - for (int j = i; j >= 0; j--) { - if (handle->alloc_segments_map[j]) { - critnib_delete(handle->alloc_segments_map[j]); + for (i = 0; i < MAX_LEVELS_OF_ALLOC_SEGMENT_MAP; i++) { + if (handle->alloc_segments_map[i]) { + critnib_delete(handle->alloc_segments_map[i]); } } utils_mutex_destroy_not_free(&handle->splitMergeMutex); From f22aff25e96c3c233b7f4db81fa58f122466ef2d Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 12 Mar 2025 11:33:10 +0100 Subject: [PATCH 242/466] Log pool pointer in umfFree() --- src/memory_pool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/memory_pool.c b/src/memory_pool.c index ef2c0fa66..eb0054522 100644 --- a/src/memory_pool.c +++ b/src/memory_pool.c @@ -111,6 +111,8 @@ void umfPoolDestroy(umf_memory_pool_handle_t hPool) { umf_result_t umfFree(void *ptr) { umf_memory_pool_handle_t hPool = umfPoolByPtr(ptr); if (hPool) { + LOG_DEBUG("calling umfPoolFree(pool=%p, ptr=%p) ...", (void *)hPool, + ptr); return umfPoolFree(hPool, ptr); } return UMF_RESULT_SUCCESS; From ce6c5da7a0be4af53367e40a961e7d8ab30e110a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 26 Feb 2025 16:29:17 +0100 Subject: [PATCH 243/466] [CI] Enable SLES in multi numa workflow Co-developed-by: opensource-krzysztof --- .github/workflows/reusable_multi_numa.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/reusable_multi_numa.yml b/.github/workflows/reusable_multi_numa.yml index 3c60bebc3..8b342106e 100644 --- a/.github/workflows/reusable_multi_numa.yml +++ b/.github/workflows/reusable_multi_numa.yml @@ -1,4 +1,4 @@ -# Runs tests on multi-numa machine +# Runs tests on multi-numa machines name: MultiNuma on: [workflow_call] @@ -20,7 +20,7 @@ jobs: strategy: matrix: - os: [ubuntu-22.04, rhel-9.1] + os: [ubuntu-22.04, rhel-9.1, sles-15] build_type: [Debug, Release] shared_library: ['ON', 'OFF'] runs-on: ["DSS-MULTI-NUMA", "DSS-${{matrix.os}}"] @@ -53,16 +53,16 @@ jobs: run: cmake --build ${{github.workspace}}/build -j $(nproc) - name: Run tests - if: matrix.os != 'rhel-9.1' + if: (matrix.os != 'rhel-9.1') && (matrix.os != 'sles-15') working-directory: ${{github.workspace}}/build run: ctest --output-on-failure --test-dir test - # On RHEL, hwloc version is just a little too low. + # On RHEL/SLES, hwloc version is just a little too low. # Skip some tests until we upgrade hwloc and update CMake to properly handle local hwloc installation. # TODO: fix issue #560 # TODO: add issue for -E test_init_teardown - it is not clear why it fails - - name: Run tests (on RHEL) - if: matrix.os == 'rhel-9.1' + - name: Run tests (on RHEL/SLES) + if: (matrix.os == 'rhel-9.1') || (matrix.os == 'sles-15') working-directory: ${{github.workspace}}/build run: | ctest --output-on-failure --test-dir test -E "test_provider_os_memory_multiple_numa_nodes|test_init_teardown" @@ -70,7 +70,7 @@ jobs: --gtest_filter="-*checkModeLocal/*:*checkModePreferredEmptyNodeset/*:testNuma.checkModeInterleave" - name: Run NUMA tests under valgrind - if: matrix.os != 'rhel-9.1' + if: (matrix.os != 'rhel-9.1') && (matrix.os != 'sles-15') run: | ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} memcheck "${{env.NUMA_TESTS}}" ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{env.BUILD_DIR}} drd "${{env.NUMA_TESTS}}" From a234364ab91e40c3784f2e8711dcc3a18d725e2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 12 Mar 2025 12:33:25 +0100 Subject: [PATCH 244/466] [CI] Update get_system_info.sh zypper part and move the step gathering info to the end (in multi numa CI). --- .github/scripts/get_system_info.sh | 2 +- .github/workflows/reusable_multi_numa.yml | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/scripts/get_system_info.sh b/.github/scripts/get_system_info.sh index be900e2a7..81c54ce98 100755 --- a/.github/scripts/get_system_info.sh +++ b/.github/scripts/get_system_info.sh @@ -15,7 +15,7 @@ function check_L0_version { fi if command -v zypper &> /dev/null; then - zypper se level-zero && return + zypper -n se level-zero || true fi echo "level-zero not installed" diff --git a/.github/workflows/reusable_multi_numa.yml b/.github/workflows/reusable_multi_numa.yml index 8b342106e..47a48adb2 100644 --- a/.github/workflows/reusable_multi_numa.yml +++ b/.github/workflows/reusable_multi_numa.yml @@ -31,9 +31,6 @@ jobs: with: fetch-depth: 0 - - name: Get information about platform - run: .github/scripts/get_system_info.sh - - name: Configure build run: > cmake @@ -91,3 +88,7 @@ jobs: with: name: ${{env.COVERAGE_NAME}}-${{matrix.os}}-shared-${{matrix.shared_library}} path: ${{env.COVERAGE_DIR}} + + - name: Get information about platform + if: always() + run: .github/scripts/get_system_info.sh From 4cba219fcecd31f1ca02bc9988c9a059cbff6398 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 12 Mar 2025 14:03:13 +0100 Subject: [PATCH 245/466] Handle CUDA_ERROR_DEINITIALIZED error in cu2umf_result() Signed-off-by: Lukasz Dorau --- src/provider/provider_cuda.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index bb4b3cf64..7d40c534a 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -178,6 +178,9 @@ static umf_result_t cu2umf_result(CUresult result) { case CUDA_ERROR_INVALID_VALUE: case CUDA_ERROR_INVALID_HANDLE: return UMF_RESULT_ERROR_INVALID_ARGUMENT; + case CUDA_ERROR_DEINITIALIZED: + LOG_ERR("CUDA driver has been deinitialized"); + return UMF_RESULT_ERROR_OUT_OF_RESOURCES; default: cu_store_last_native_error(result); return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC; From 3a08d6c76516d8e37c10bf15b562f558553c46ca Mon Sep 17 00:00:00 2001 From: Rafal Rudnicki Date: Wed, 12 Mar 2025 09:33:21 +0000 Subject: [PATCH 246/466] use current ctx and dev by default in CUDA prov --- include/umf/providers/provider_cuda.h | 3 +- src/provider/provider_cuda.c | 40 +++++++++++++++++++++---- test/providers/cuda_helpers.cpp | 12 ++++++++ test/providers/cuda_helpers.h | 2 ++ test/providers/provider_cuda.cpp | 42 ++++++++++++++++++++++++++- 5 files changed, 92 insertions(+), 7 deletions(-) diff --git a/include/umf/providers/provider_cuda.h b/include/umf/providers/provider_cuda.h index e3b81858b..95f2634fb 100644 --- a/include/umf/providers/provider_cuda.h +++ b/include/umf/providers/provider_cuda.h @@ -20,7 +20,8 @@ typedef struct umf_cuda_memory_provider_params_t *umf_cuda_memory_provider_params_handle_t; /// @brief Create a struct to store parameters of the CUDA Memory Provider. -/// @param hParams [out] handle to the newly created parameters struct. +/// @param hParams [out] handle to the newly created parameters structure, +/// initialized with the default (current) context and device ID. /// @return UMF_RESULT_SUCCESS on success or appropriate error code on failure. umf_result_t umfCUDAMemoryProviderParamsCreate( umf_cuda_memory_provider_params_handle_t *hParams); diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index bb4b3cf64..a7179defd 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -139,6 +139,7 @@ typedef struct cu_ops_t { CUresult (*cuGetErrorName)(CUresult error, const char **pStr); CUresult (*cuGetErrorString)(CUresult error, const char **pStr); CUresult (*cuCtxGetCurrent)(CUcontext *pctx); + CUresult (*cuCtxGetDevice)(CUdevice *device); CUresult (*cuCtxSetCurrent)(CUcontext ctx); CUresult (*cuIpcGetMemHandle)(CUipcMemHandle *pHandle, CUdeviceptr dptr); CUresult (*cuIpcOpenMemHandle)(CUdeviceptr *pdptr, CUipcMemHandle handle, @@ -221,6 +222,8 @@ static void init_cu_global_state(void) { utils_get_symbol_addr(lib_handle, "cuGetErrorString", lib_name); *(void **)&g_cu_ops.cuCtxGetCurrent = utils_get_symbol_addr(lib_handle, "cuCtxGetCurrent", lib_name); + *(void **)&g_cu_ops.cuCtxGetDevice = + utils_get_symbol_addr(lib_handle, "cuCtxGetDevice", lib_name); *(void **)&g_cu_ops.cuCtxSetCurrent = utils_get_symbol_addr(lib_handle, "cuCtxSetCurrent", lib_name); *(void **)&g_cu_ops.cuIpcGetMemHandle = @@ -234,9 +237,9 @@ static void init_cu_global_state(void) { !g_cu_ops.cuMemHostAlloc || !g_cu_ops.cuMemAllocManaged || !g_cu_ops.cuMemFree || !g_cu_ops.cuMemFreeHost || !g_cu_ops.cuGetErrorName || !g_cu_ops.cuGetErrorString || - !g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxSetCurrent || - !g_cu_ops.cuIpcGetMemHandle || !g_cu_ops.cuIpcOpenMemHandle || - !g_cu_ops.cuIpcCloseMemHandle) { + !g_cu_ops.cuCtxGetCurrent || !g_cu_ops.cuCtxGetDevice || + !g_cu_ops.cuCtxSetCurrent || !g_cu_ops.cuIpcGetMemHandle || + !g_cu_ops.cuIpcOpenMemHandle || !g_cu_ops.cuIpcCloseMemHandle) { LOG_FATAL("Required CUDA symbols not found."); Init_cu_global_state_failed = true; utils_close_library(lib_handle); @@ -260,8 +263,29 @@ umf_result_t umfCUDAMemoryProviderParamsCreate( return UMF_RESULT_ERROR_OUT_OF_HOST_MEMORY; } - params_data->cuda_context_handle = NULL; - params_data->cuda_device_handle = -1; + utils_init_once(&cu_is_initialized, init_cu_global_state); + if (Init_cu_global_state_failed) { + LOG_FATAL("Loading CUDA symbols failed"); + return UMF_RESULT_ERROR_DEPENDENCY_UNAVAILABLE; + } + + // initialize context and device to the current ones + CUcontext current_ctx = NULL; + CUresult cu_result = g_cu_ops.cuCtxGetCurrent(¤t_ctx); + if (cu_result == CUDA_SUCCESS) { + params_data->cuda_context_handle = current_ctx; + } else { + params_data->cuda_context_handle = NULL; + } + + CUdevice current_device = -1; + cu_result = g_cu_ops.cuCtxGetDevice(¤t_device); + if (cu_result == CUDA_SUCCESS) { + params_data->cuda_device_handle = current_device; + } else { + params_data->cuda_device_handle = -1; + } + params_data->memory_type = UMF_MEMORY_TYPE_UNKNOWN; params_data->alloc_flags = 0; @@ -342,6 +366,12 @@ static umf_result_t cu_memory_provider_initialize(void *params, } if (cu_params->cuda_context_handle == NULL) { + LOG_ERR("Invalid context handle"); + return UMF_RESULT_ERROR_INVALID_ARGUMENT; + } + + if (cu_params->cuda_device_handle < 0) { + LOG_ERR("Invalid device handle"); return UMF_RESULT_ERROR_INVALID_ARGUMENT; } diff --git a/test/providers/cuda_helpers.cpp b/test/providers/cuda_helpers.cpp index a607d7ecb..3e81c184f 100644 --- a/test/providers/cuda_helpers.cpp +++ b/test/providers/cuda_helpers.cpp @@ -412,6 +412,18 @@ CUcontext get_mem_context(void *ptr) { return context; } +int get_mem_device(void *ptr) { + int device; + CUresult res = libcu_ops.cuPointerGetAttribute( + &device, CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL, (CUdeviceptr)ptr); + if (res != CUDA_SUCCESS) { + fprintf(stderr, "cuPointerGetAttribute() failed!\n"); + return -1; + } + + return device; +} + CUcontext get_current_context() { CUcontext context; CUresult res = libcu_ops.cuCtxGetCurrent(&context); diff --git a/test/providers/cuda_helpers.h b/test/providers/cuda_helpers.h index e7deb9064..944e6dbef 100644 --- a/test/providers/cuda_helpers.h +++ b/test/providers/cuda_helpers.h @@ -48,6 +48,8 @@ unsigned int get_mem_host_alloc_flags(void *ptr); CUcontext get_mem_context(void *ptr); +int get_mem_device(void *ptr); + CUcontext get_current_context(); #ifdef __cplusplus diff --git a/test/providers/provider_cuda.cpp b/test/providers/provider_cuda.cpp index 9c7f76dd1..a7e5dbe5a 100644 --- a/test/providers/provider_cuda.cpp +++ b/test/providers/provider_cuda.cpp @@ -142,14 +142,15 @@ struct umfCUDAProviderTest memAccessor = nullptr; expected_context = cudaTestHelper.get_test_context(); + expected_device = cudaTestHelper.get_test_device(); params = create_cuda_prov_params(cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device(), memory_type, 0 /* alloc flags */); ASSERT_NE(expected_context, nullptr); + ASSERT_GE(expected_device, 0); switch (memory_type) { case UMF_MEMORY_TYPE_DEVICE: - memAccessor = std::make_unique( cudaTestHelper.get_test_context(), cudaTestHelper.get_test_device()); @@ -178,6 +179,7 @@ struct umfCUDAProviderTest std::unique_ptr memAccessor = nullptr; CUcontext expected_context = nullptr; + int expected_device = -1; umf_usm_memory_type_t expected_memory_type; }; @@ -328,6 +330,44 @@ TEST_P(umfCUDAProviderTest, getPageSizeInvalidArgs) { umfMemoryProviderDestroy(provider); } +TEST_P(umfCUDAProviderTest, cudaProviderDefaultParams) { + umf_cuda_memory_provider_params_handle_t defaultParams = nullptr; + umf_result_t umf_result = umfCUDAMemoryProviderParamsCreate(&defaultParams); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umf_result = umfCUDAMemoryProviderParamsSetMemoryType(defaultParams, + expected_memory_type); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + // NOTE: we intentionally do not set any context and device params + + umf_memory_provider_handle_t provider = nullptr; + umf_result = umfMemoryProviderCreate(umfCUDAMemoryProviderOps(), + defaultParams, &provider); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(provider, nullptr); + + // do single alloc and check if the context and device id of allocated + // memory are correct + + void *ptr = nullptr; + umf_result = umfMemoryProviderAlloc(provider, 128, 0, &ptr); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + ASSERT_NE(ptr, nullptr); + + CUcontext actual_mem_context = get_mem_context(ptr); + ASSERT_EQ(actual_mem_context, expected_context); + + int actual_device = get_mem_device(ptr); + ASSERT_EQ(actual_device, expected_device); + + umf_result = umfMemoryProviderFree(provider, ptr, 128); + ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); + + umfMemoryProviderDestroy(provider); + umfCUDAMemoryProviderParamsDestroy(defaultParams); +} + TEST_P(umfCUDAProviderTest, cudaProviderNullParams) { umf_result_t res = umfCUDAMemoryProviderParamsCreate(nullptr); EXPECT_EQ(res, UMF_RESULT_ERROR_INVALID_ARGUMENT); From 7173cc5e90d73d12cb731adc2ec5ea3383105d1a Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Wed, 12 Mar 2025 13:47:46 +0100 Subject: [PATCH 247/466] Fix segfault in cu_memory_provider_get_last_native_error() Fix segfault in cu_memory_provider_get_last_native_error() when it is called after a CUDA device was destroyed. Signed-off-by: Lukasz Dorau --- src/provider/provider_cuda.c | 39 +++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 10 deletions(-) diff --git a/src/provider/provider_cuda.c b/src/provider/provider_cuda.c index 7d40c534a..c7e2a8fd2 100644 --- a/src/provider/provider_cuda.c +++ b/src/provider/provider_cuda.c @@ -542,22 +542,41 @@ static void cu_memory_provider_get_last_native_error(void *provider, return; } - const char *error_name = 0; - const char *error_string = 0; - g_cu_ops.cuGetErrorName(TLS_last_native_error.native_error, &error_name); - g_cu_ops.cuGetErrorString(TLS_last_native_error.native_error, - &error_string); - + CUresult result; size_t buf_size = 0; - strncpy(TLS_last_native_error.msg_buff, error_name, TLS_MSG_BUF_LEN - 1); - buf_size = strlen(TLS_last_native_error.msg_buff); + const char *error_name = NULL; + const char *error_string = NULL; + + // If the error code is not recognized, + // CUDA_ERROR_INVALID_VALUE will be returned + // and error_name will be set to the NULL address. + result = g_cu_ops.cuGetErrorName(TLS_last_native_error.native_error, + &error_name); + if (result == CUDA_SUCCESS && error_name != NULL) { + strncpy(TLS_last_native_error.msg_buff, error_name, + TLS_MSG_BUF_LEN - 1); + } else { + strncpy(TLS_last_native_error.msg_buff, "cuGetErrorName() failed", + TLS_MSG_BUF_LEN - 1); + } + buf_size = strlen(TLS_last_native_error.msg_buff); strncat(TLS_last_native_error.msg_buff, " - ", TLS_MSG_BUF_LEN - buf_size - 1); buf_size = strlen(TLS_last_native_error.msg_buff); - strncat(TLS_last_native_error.msg_buff, error_string, - TLS_MSG_BUF_LEN - buf_size - 1); + // If the error code is not recognized, + // CUDA_ERROR_INVALID_VALUE will be returned + // and error_string will be set to the NULL address. + result = g_cu_ops.cuGetErrorString(TLS_last_native_error.native_error, + &error_string); + if (result == CUDA_SUCCESS && error_string != NULL) { + strncat(TLS_last_native_error.msg_buff, error_string, + TLS_MSG_BUF_LEN - buf_size - 1); + } else { + strncat(TLS_last_native_error.msg_buff, "cuGetErrorString() failed", + TLS_MSG_BUF_LEN - buf_size - 1); + } *pError = TLS_last_native_error.native_error; *ppMessage = TLS_last_native_error.msg_buff; From 27dc807196da49c98f953a5529e3aca82087b7db Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Thu, 13 Mar 2025 12:20:11 +0100 Subject: [PATCH 248/466] Move C++ helper header to utils --- .github/workflows/reusable_gpu.yml | 4 ++-- scripts/qemu/configs/default.xml | 4 ---- test/c_api/test_ut_asserts.h | 4 ++-- test/common/pool.hpp | 2 +- test/common/provider.hpp | 4 ++-- test/provider_devdax_memory.cpp | 4 ++-- test/provider_file_memory.cpp | 4 ++-- test/provider_fixed_memory.cpp | 2 +- test/provider_os_memory.cpp | 2 +- test/provider_tracking.cpp | 2 +- test/provider_tracking_fixture_tests.cpp | 2 +- {src => test/utils}/cpp_helpers.hpp | 0 12 files changed, 15 insertions(+), 19 deletions(-) rename {src => test/utils}/cpp_helpers.hpp (100%) diff --git a/.github/workflows/reusable_gpu.yml b/.github/workflows/reusable_gpu.yml index a67be2f52..721d85206 100644 --- a/.github/workflows/reusable_gpu.yml +++ b/.github/workflows/reusable_gpu.yml @@ -99,8 +99,8 @@ jobs: -DUMF_BUILD_LEVEL_ZERO_PROVIDER=OFF -DUMF_BUILD_${{inputs.name}}_PROVIDER=ON -DUMF_TESTS_FAIL_ON_SKIP=ON - ${{ matrix.os == 'Ubuntu' && matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} - ${{ matrix.os == 'Windows' && '-DCMAKE_SUPPRESS_REGENERATION=ON' || '' }} + ${{ matrix.os == 'Ubuntu' && matrix.build_type == 'Debug' && '-DUMF_USE_COVERAGE=ON' || '' }} + ${{ matrix.os == 'Windows' && '-DCMAKE_SUPPRESS_REGENERATION=ON' || '' }} - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j ${{matrix.number_of_processors}} diff --git a/scripts/qemu/configs/default.xml b/scripts/qemu/configs/default.xml index 565468794..5d3198f60 100644 --- a/scripts/qemu/configs/default.xml +++ b/scripts/qemu/configs/default.xml @@ -50,16 +50,12 @@ Cell 2 | 0 | 1200MiB | 17, 28, 10 | - - - - diff --git a/test/c_api/test_ut_asserts.h b/test/c_api/test_ut_asserts.h index 834d39bda..b73f0cd19 100644 --- a/test/c_api/test_ut_asserts.h +++ b/test/c_api/test_ut_asserts.h @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -9,7 +9,7 @@ /* The project uses GTEST framework for testing, which is not supported in C - These asserts should NOT be used in other purposes than for testing C API + These asserts should NOT be used in other purposes than for testing C API */ #ifndef UMF_TEST_UT_ASSERTS_H diff --git a/test/common/pool.hpp b/test/common/pool.hpp index a5b4afc15..165f9b836 100644 --- a/test/common/pool.hpp +++ b/test/common/pool.hpp @@ -22,8 +22,8 @@ #include #include "base.hpp" -#include "cpp_helpers.hpp" #include "provider.hpp" +#include "utils/cpp_helpers.hpp" namespace umf_test { diff --git a/test/common/provider.hpp b/test/common/provider.hpp index 148f34dc8..f40c0bf64 100644 --- a/test/common/provider.hpp +++ b/test/common/provider.hpp @@ -1,6 +1,6 @@ /* * - * Copyright (C) 2023-2024 Intel Corporation + * Copyright (C) 2023-2025 Intel Corporation * * Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -15,8 +15,8 @@ #include "base.hpp" #include "base_alloc_global.h" -#include "cpp_helpers.hpp" #include "test_helpers.h" +#include "utils/cpp_helpers.hpp" namespace umf_test { diff --git a/test/provider_devdax_memory.cpp b/test/provider_devdax_memory.cpp index 7765dd08d..f0f5c21fd 100644 --- a/test/provider_devdax_memory.cpp +++ b/test/provider_devdax_memory.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -11,8 +11,8 @@ #include "base.hpp" -#include "cpp_helpers.hpp" #include "test_helpers.h" +#include "utils/cpp_helpers.hpp" #include #include diff --git a/test/provider_file_memory.cpp b/test/provider_file_memory.cpp index cfa37be31..896b95380 100644 --- a/test/provider_file_memory.cpp +++ b/test/provider_file_memory.cpp @@ -1,11 +1,11 @@ -// Copyright (C) 2024 Intel Corporation +// Copyright (C) 2024-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #include "base.hpp" -#include "cpp_helpers.hpp" #include "test_helpers.h" +#include "utils/cpp_helpers.hpp" #ifndef _WIN32 #include "test_helpers_linux.h" #endif diff --git a/test/provider_fixed_memory.cpp b/test/provider_fixed_memory.cpp index 1760ca4f7..1d75056ea 100644 --- a/test/provider_fixed_memory.cpp +++ b/test/provider_fixed_memory.cpp @@ -4,8 +4,8 @@ #include "base.hpp" -#include "cpp_helpers.hpp" #include "test_helpers.h" +#include "utils/cpp_helpers.hpp" #ifndef _WIN32 #include "test_helpers_linux.h" #endif diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index 5b647b642..7fd781208 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -4,9 +4,9 @@ #include "base.hpp" -#include "cpp_helpers.hpp" #include "ipcFixtures.hpp" #include "test_helpers.h" +#include "utils/cpp_helpers.hpp" #include #include diff --git a/test/provider_tracking.cpp b/test/provider_tracking.cpp index 864c15564..d289a9796 100644 --- a/test/provider_tracking.cpp +++ b/test/provider_tracking.cpp @@ -4,8 +4,8 @@ #include "base.hpp" -#include "cpp_helpers.hpp" #include "test_helpers.h" +#include "utils/cpp_helpers.hpp" #ifndef _WIN32 #include "test_helpers_linux.h" #endif diff --git a/test/provider_tracking_fixture_tests.cpp b/test/provider_tracking_fixture_tests.cpp index 05b87f87f..94227757b 100644 --- a/test/provider_tracking_fixture_tests.cpp +++ b/test/provider_tracking_fixture_tests.cpp @@ -9,8 +9,8 @@ #include "base.hpp" #include "provider.hpp" -#include "cpp_helpers.hpp" #include "test_helpers.h" +#include "utils/cpp_helpers.hpp" #ifndef _WIN32 #include "test_helpers_linux.h" #endif diff --git a/src/cpp_helpers.hpp b/test/utils/cpp_helpers.hpp similarity index 100% rename from src/cpp_helpers.hpp rename to test/utils/cpp_helpers.hpp From 52d1f59ba2d8751a0d5961189bd675494e283623 Mon Sep 17 00:00:00 2001 From: Krzysztof Filipek Date: Thu, 13 Mar 2025 14:46:25 +0100 Subject: [PATCH 249/466] [tests] Rename umf to umf_test --- test/coarse_lib.cpp | 2 +- test/common/pool.hpp | 4 ++-- test/common/provider.hpp | 9 +++---- test/disjoint_pool_file_prov.cpp | 2 +- test/ipcAPI.cpp | 2 +- test/ipcFixtures.hpp | 30 ++++++++++++------------ test/memoryPoolAPI.cpp | 11 +++++---- test/memoryProviderAPI.cpp | 4 ++-- test/poolFixtures.hpp | 10 ++++---- test/pools/disjoint_pool.cpp | 6 ++--- test/pools/pool_base_alloc.cpp | 11 +++++---- test/pools/scalable_pool.cpp | 6 ++--- test/provider_devdax_memory.cpp | 8 +++---- test/provider_file_memory.cpp | 8 +++---- test/provider_fixed_memory.cpp | 8 +++---- test/provider_os_memory.cpp | 8 +++---- test/provider_tracking.cpp | 12 +++++----- test/provider_tracking_fixture_tests.cpp | 2 +- test/utils/cpp_helpers.hpp | 10 ++++---- 19 files changed, 78 insertions(+), 75 deletions(-) diff --git a/test/coarse_lib.cpp b/test/coarse_lib.cpp index c2e1f9c85..761183389 100644 --- a/test/coarse_lib.cpp +++ b/test/coarse_lib.cpp @@ -99,7 +99,7 @@ static void coarse_params_set_default(coarse_params_t *coarse_params, } umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); struct CoarseWithMemoryStrategyTest : umf_test::test, diff --git a/test/common/pool.hpp b/test/common/pool.hpp index 165f9b836..558b9d665 100644 --- a/test/common/pool.hpp +++ b/test/common/pool.hpp @@ -38,7 +38,7 @@ createPoolChecked(umf_memory_pool_ops_t *ops, } auto wrapPoolUnique(umf_memory_pool_handle_t hPool) { - return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + return umf_test::pool_unique_handle_t(hPool, &umfPoolDestroy); } bool isReallocSupported(umf_memory_pool_handle_t hPool) { @@ -149,7 +149,7 @@ struct malloc_pool : public pool_base_t { }; umf_memory_pool_ops_t MALLOC_POOL_OPS = - umf::poolMakeCOps(); + umf_test::poolMakeCOps(); static constexpr size_t DEFAULT_DISJOINT_SLAB_MIN_SIZE = 4096; static constexpr size_t DEFAULT_DISJOINT_MAX_POOLABLE_SIZE = 4096; diff --git a/test/common/provider.hpp b/test/common/provider.hpp index f40c0bf64..38fe7336e 100644 --- a/test/common/provider.hpp +++ b/test/common/provider.hpp @@ -29,7 +29,8 @@ createProviderChecked(umf_memory_provider_ops_t *ops, void *params) { } auto wrapProviderUnique(umf_memory_provider_handle_t hProvider) { - return umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); + return umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); } typedef struct provider_base_t { @@ -97,7 +98,7 @@ typedef struct provider_base_t { } provider_base_t; umf_memory_provider_ops_t BASE_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); struct provider_ba_global : public provider_base_t { umf_result_t alloc(size_t size, size_t align, void **ptr) noexcept { @@ -127,7 +128,7 @@ struct provider_ba_global : public provider_base_t { }; umf_memory_provider_ops_t BA_GLOBAL_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); struct provider_mock_out_of_mem : public provider_base_t { provider_ba_global helper_prov; @@ -152,7 +153,7 @@ struct provider_mock_out_of_mem : public provider_base_t { }; umf_memory_provider_ops_t MOCK_OUT_OF_MEM_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); } // namespace umf_test diff --git a/test/disjoint_pool_file_prov.cpp b/test/disjoint_pool_file_prov.cpp index b874d2a49..58e15f571 100644 --- a/test/disjoint_pool_file_prov.cpp +++ b/test/disjoint_pool_file_prov.cpp @@ -20,7 +20,7 @@ using umf_test::test; #define FILE_PATH ((char *)"tmp_file") umf_memory_provider_ops_t UMF_MALLOC_MEMORY_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); struct FileWithMemoryStrategyTest : umf_test::test, diff --git a/test/ipcAPI.cpp b/test/ipcAPI.cpp index 429896308..c0642dd76 100644 --- a/test/ipcAPI.cpp +++ b/test/ipcAPI.cpp @@ -109,7 +109,7 @@ provider_mock_ipc::allocations_mutex_type provider_mock_ipc::alloc_mutex; provider_mock_ipc::allocations_map_type provider_mock_ipc::allocations; static umf_memory_provider_ops_t IPC_MOCK_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); HostMemoryAccessor hostMemoryAccessor; diff --git a/test/ipcFixtures.hpp b/test/ipcFixtures.hpp index 1fc57b900..cf31ff758 100644 --- a/test/ipcFixtures.hpp +++ b/test/ipcFixtures.hpp @@ -97,7 +97,7 @@ struct umfIpcTest : umf_test::test, void TearDown() override { test::TearDown(); } - umf::pool_unique_handle_t makePool() { + umf_test::pool_unique_handle_t makePool() { // TODO: The function is similar to poolCreateExt function // from memoryPool.hpp umf_memory_provider_handle_t hProvider = NULL; @@ -147,7 +147,7 @@ struct umfIpcTest : umf_test::test, poolParamsDestroy(poolParams); } - return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + return umf_test::pool_unique_handle_t(hPool, &umfPoolDestroy); } struct stats_type { @@ -179,7 +179,7 @@ struct umfIpcTest : umf_test::test, std::vector ptrs; constexpr size_t ALLOC_SIZE = 100; constexpr size_t NUM_POINTERS = 100; - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); ASSERT_NE(pool.get(), nullptr); for (size_t i = 0; i < NUM_POINTERS; ++i) { @@ -237,7 +237,7 @@ struct umfIpcTest : umf_test::test, std::vector ptrs; constexpr size_t ALLOC_SIZE = 100; constexpr size_t NUM_POINTERS = 100; - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); ASSERT_NE(pool.get(), nullptr); for (size_t i = 0; i < NUM_POINTERS; ++i) { @@ -284,7 +284,7 @@ struct umfIpcTest : umf_test::test, std::vector ptrs; constexpr size_t ALLOC_SIZE = 100; constexpr size_t NUM_POINTERS = 100; - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); ASSERT_NE(pool.get(), nullptr); for (size_t i = 0; i < NUM_POINTERS; ++i) { @@ -363,7 +363,7 @@ struct umfIpcTest : umf_test::test, std::vector ptrs; constexpr size_t ALLOC_SIZE = 100; constexpr size_t NUM_POINTERS = 100; - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); ASSERT_NE(pool.get(), nullptr); for (size_t i = 0; i < NUM_POINTERS; ++i) { @@ -432,7 +432,7 @@ struct umfIpcTest : umf_test::test, TEST_P(umfIpcTest, GetIPCHandleSize) { size_t size = 0; - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); ASSERT_NE(pool.get(), nullptr); umf_result_t ret = umfPoolGetIPCHandleSize(pool.get(), &size); @@ -445,7 +445,7 @@ TEST_P(umfIpcTest, GetIPCHandleSizeInvalidArgs) { umf_result_t ret = umfPoolGetIPCHandleSize(nullptr, &size); EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); ASSERT_NE(pool.get(), nullptr); ret = umfPoolGetIPCHandleSize(pool.get(), nullptr); @@ -463,7 +463,7 @@ TEST_P(umfIpcTest, GetIPCHandleInvalidArgs) { ret = umfGetIPCHandle(ptr, &ipcHandle, &handleSize); EXPECT_EQ(ret, UMF_RESULT_ERROR_INVALID_ARGUMENT); - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); ASSERT_NE(pool.get(), nullptr); ptr = umfPoolMalloc(pool.get(), SIZE); @@ -488,7 +488,7 @@ TEST_P(umfIpcTest, CloseIPCHandleInvalidPtr) { TEST_P(umfIpcTest, BasicFlow) { constexpr size_t SIZE = 100; std::vector expected_data(SIZE); - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); ASSERT_NE(pool.get(), nullptr); int *ptr = (int *)umfPoolMalloc(pool.get(), SIZE * sizeof(int)); @@ -555,7 +555,7 @@ TEST_P(umfIpcTest, BasicFlow) { TEST_P(umfIpcTest, AllocFreeAllocTest) { constexpr size_t SIZE = 64 * 1024; - umf::pool_unique_handle_t pool = makePool(); + umf_test::pool_unique_handle_t pool = makePool(); ASSERT_NE(pool.get(), nullptr); umf_ipc_handler_handle_t ipcHandler = nullptr; @@ -616,9 +616,9 @@ TEST_P(umfIpcTest, AllocFreeAllocTest) { TEST_P(umfIpcTest, openInTwoIpcHandlers) { constexpr size_t SIZE = 100; std::vector expected_data(SIZE); - umf::pool_unique_handle_t pool1 = makePool(); + umf_test::pool_unique_handle_t pool1 = makePool(); ASSERT_NE(pool1.get(), nullptr); - umf::pool_unique_handle_t pool2 = makePool(); + umf_test::pool_unique_handle_t pool2 = makePool(); ASSERT_NE(pool2.get(), nullptr); umf_ipc_handler_handle_t ipcHandler1 = nullptr; umf_ipc_handler_handle_t ipcHandler2 = nullptr; @@ -715,8 +715,8 @@ TEST_P(umfIpcTest, ConcurrentDestroyIpcHandlers) { constexpr size_t NUM_POOLS = 10; void *ptrs[NUM_ALLOCS]; void *openedPtrs[NUM_POOLS][NUM_ALLOCS]; - std::vector consumerPools; - umf::pool_unique_handle_t producerPool = makePool(); + std::vector consumerPools; + umf_test::pool_unique_handle_t producerPool = makePool(); ASSERT_NE(producerPool.get(), nullptr); for (size_t i = 0; i < NUM_POOLS; ++i) { diff --git a/test/memoryPoolAPI.cpp b/test/memoryPoolAPI.cpp index a949b281f..e8071a2d8 100644 --- a/test/memoryPoolAPI.cpp +++ b/test/memoryPoolAPI.cpp @@ -125,7 +125,7 @@ TEST_P(umfPoolWithCreateFlagsTest, memoryPoolWithCustomProvider) { return UMF_RESULT_SUCCESS; } }; - umf_memory_pool_ops_t pool_ops = umf::poolMakeCOps(); + umf_memory_pool_ops_t pool_ops = umf_test::poolMakeCOps(); umf_memory_pool_handle_t hPool; auto ret = umfPoolCreate(&pool_ops, hProvider, nullptr, flags, &hPool); @@ -187,8 +187,8 @@ struct tagTest : umf_test::test { createPoolChecked(umfProxyPoolOps(), provider.get(), nullptr)); } - umf::provider_unique_handle_t provider; - umf::pool_unique_handle_t pool; + umf_test::provider_unique_handle_t provider; + umf_test::pool_unique_handle_t pool; }; TEST_F(tagTest, SetAndGet) { @@ -370,7 +370,8 @@ TEST_P(poolInitializeTest, errorPropagation) { return *errorToReturn; } }; - umf_memory_pool_ops_t pool_ops = umf::poolMakeCOps(); + umf_memory_pool_ops_t pool_ops = + umf_test::poolMakeCOps(); umf_memory_pool_handle_t hPool; auto ret = umfPoolCreate(&pool_ops, hProvider, (void *)&this->GetParam(), 0, @@ -420,7 +421,7 @@ TEST_F(test, getLastFailedMemoryProvider) { const char *name; }; umf_memory_provider_ops_t provider_ops = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); auto providerUnique1 = wrapProviderUnique( createProviderChecked(&provider_ops, (void *)"provider1")); diff --git a/test/memoryProviderAPI.cpp b/test/memoryProviderAPI.cpp index 2dc7261f0..720f11b41 100644 --- a/test/memoryProviderAPI.cpp +++ b/test/memoryProviderAPI.cpp @@ -1,4 +1,4 @@ -// Copyright (C) 2023-2024 Intel Corporation +// Copyright (C) 2023-2025 Intel Corporation // Under the Apache License v2.0 with LLVM Exceptions. See LICENSE.TXT. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // This file contains tests for UMF provider API @@ -335,7 +335,7 @@ TEST_P(providerInitializeTest, errorPropagation) { } }; umf_memory_provider_ops_t provider_ops = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); umf_memory_provider_handle_t hProvider; auto ret = umfMemoryProviderCreate(&provider_ops, (void *)&this->GetParam(), diff --git a/test/poolFixtures.hpp b/test/poolFixtures.hpp index 6b01769f1..de5a54685 100644 --- a/test/poolFixtures.hpp +++ b/test/poolFixtures.hpp @@ -32,7 +32,7 @@ using poolCreateExtParams = pfnPoolParamsDestroy, umf_memory_provider_ops_t *, pfnProviderParamsCreate, pfnProviderParamsDestroy>; -umf::pool_unique_handle_t poolCreateExtUnique(poolCreateExtParams params) { +umf_test::pool_unique_handle_t poolCreateExtUnique(poolCreateExtParams params) { auto [pool_ops, poolParamsCreate, poolParamsDestroy, provider_ops, providerParamsCreate, providerParamsDestroy] = params; @@ -73,7 +73,7 @@ umf::pool_unique_handle_t poolCreateExtUnique(poolCreateExtParams params) { providerParamsDestroy(provider_params); } - return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + return umf_test::pool_unique_handle_t(hPool, &umfPoolDestroy); } struct umfPoolTest : umf_test::test, @@ -86,7 +86,7 @@ struct umfPoolTest : umf_test::test, void TearDown() override { test::TearDown(); } - umf::pool_unique_handle_t pool; + umf_test::pool_unique_handle_t pool; static constexpr int NTHREADS = 5; static constexpr std::array nonAlignedAllocSizes = {5, 7, 23, 55, @@ -106,7 +106,7 @@ struct umfMultiPoolTest : umf_test::test, void TearDown() override { test::TearDown(); } - std::vector pools; + std::vector pools; }; struct umfMemTest @@ -123,7 +123,7 @@ struct umfMemTest void TearDown() override { test::TearDown(); } - umf::pool_unique_handle_t pool; + umf_test::pool_unique_handle_t pool; int expectedRecycledPoolAllocs; }; diff --git a/test/pools/disjoint_pool.cpp b/test/pools/disjoint_pool.cpp index 4eedce981..9bdef4f13 100644 --- a/test/pools/disjoint_pool.cpp +++ b/test/pools/disjoint_pool.cpp @@ -40,7 +40,7 @@ TEST_F(test, internals) { } }; umf_memory_provider_ops_t provider_ops = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); auto providerUnique = wrapProviderUnique(createProviderChecked(&provider_ops, nullptr)); @@ -151,7 +151,7 @@ TEST_F(test, freeErrorPropagation) { } }; umf_memory_provider_ops_t provider_ops = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); auto providerUnique = wrapProviderUnique(createProviderChecked(&provider_ops, nullptr)); @@ -206,7 +206,7 @@ TEST_F(test, sharedLimits) { } }; umf_memory_provider_ops_t provider_ops = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); static constexpr size_t SlabMinSize = 1024; static constexpr size_t MaxSize = 4 * SlabMinSize; diff --git a/test/pools/pool_base_alloc.cpp b/test/pools/pool_base_alloc.cpp index ca931bcec..441ab37ec 100644 --- a/test/pools/pool_base_alloc.cpp +++ b/test/pools/pool_base_alloc.cpp @@ -17,17 +17,17 @@ struct base_alloc_pool : public umf_test::pool_base_t { void *malloc(size_t size) noexcept { return umf_ba_global_alloc(size); } void *calloc(size_t, size_t) noexcept { - umf::getPoolLastStatusRef() = + umf_test::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; return NULL; } void *realloc(void *, size_t) noexcept { - umf::getPoolLastStatusRef() = + umf_test::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; return NULL; } void *aligned_malloc(size_t, size_t) noexcept { - umf::getPoolLastStatusRef() = + umf_test::getPoolLastStatusRef() = UMF_RESULT_ERROR_NOT_SUPPORTED; return NULL; } @@ -39,11 +39,12 @@ struct base_alloc_pool : public umf_test::pool_base_t { return UMF_RESULT_SUCCESS; } umf_result_t get_last_allocation_error() { - return umf::getPoolLastStatusRef(); + return umf_test::getPoolLastStatusRef(); } }; -umf_memory_pool_ops_t BA_POOL_OPS = umf::poolMakeCOps(); +umf_memory_pool_ops_t BA_POOL_OPS = + umf_test::poolMakeCOps(); INSTANTIATE_TEST_SUITE_P(baPool, umfPoolTest, ::testing::Values(poolCreateExtParams{ diff --git a/test/pools/scalable_pool.cpp b/test/pools/scalable_pool.cpp index 14cf5f305..54c0128a4 100644 --- a/test/pools/scalable_pool.cpp +++ b/test/pools/scalable_pool.cpp @@ -61,7 +61,7 @@ struct umfScalablePoolParamsTest }; static constexpr umf_memory_provider_ops_t VALIDATOR_PROVIDER_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); umfScalablePoolParamsTest() : expected_params{0, false}, params(nullptr) {} void SetUp() override { @@ -82,7 +82,7 @@ struct umfScalablePoolParamsTest test::TearDown(); } - umf::pool_unique_handle_t makePool() { + umf_test::pool_unique_handle_t makePool() { umf_memory_provider_handle_t hProvider = nullptr; umf_memory_pool_handle_t hPool = nullptr; @@ -94,7 +94,7 @@ struct umfScalablePoolParamsTest UMF_POOL_CREATE_FLAG_OWN_PROVIDER, &hPool); EXPECT_EQ(ret, UMF_RESULT_SUCCESS); - return umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + return umf_test::pool_unique_handle_t(hPool, &umfPoolDestroy); } void allocFreeFlow() { diff --git a/test/provider_devdax_memory.cpp b/test/provider_devdax_memory.cpp index f0f5c21fd..6efeef90c 100644 --- a/test/provider_devdax_memory.cpp +++ b/test/provider_devdax_memory.cpp @@ -46,7 +46,7 @@ static int compare_native_error_str(const char *message, int error) { using providerCreateExtParams = std::tuple; static void providerCreateExt(providerCreateExtParams params, - umf::provider_unique_handle_t *handle) { + umf_test::provider_unique_handle_t *handle) { umf_memory_provider_handle_t hProvider = nullptr; auto [provider_ops, provider_params] = params; @@ -55,8 +55,8 @@ static void providerCreateExt(providerCreateExtParams params, ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_NE(hProvider, nullptr); - *handle = - umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); + *handle = umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); } struct umfProviderTest @@ -74,7 +74,7 @@ struct umfProviderTest void TearDown() override { test::TearDown(); } - umf::provider_unique_handle_t provider; + umf_test::provider_unique_handle_t provider; size_t page_size; size_t page_plus_64; }; diff --git a/test/provider_file_memory.cpp b/test/provider_file_memory.cpp index 896b95380..bcc9d2645 100644 --- a/test/provider_file_memory.cpp +++ b/test/provider_file_memory.cpp @@ -42,7 +42,7 @@ static int compare_native_error_str(const char *message, int error) { using providerCreateExtParams = std::tuple; static void providerCreateExt(providerCreateExtParams params, - umf::provider_unique_handle_t *handle) { + umf_test::provider_unique_handle_t *handle) { umf_memory_provider_handle_t hProvider = nullptr; auto [provider_ops, provider_params] = params; @@ -51,8 +51,8 @@ static void providerCreateExt(providerCreateExtParams params, ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_NE(hProvider, nullptr); - *handle = - umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); + *handle = umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); } struct FileProviderParamsDefault @@ -70,7 +70,7 @@ struct FileProviderParamsDefault void TearDown() override { test::TearDown(); } - umf::provider_unique_handle_t provider; + umf_test::provider_unique_handle_t provider; size_t page_size; size_t page_plus_64; }; diff --git a/test/provider_fixed_memory.cpp b/test/provider_fixed_memory.cpp index 1d75056ea..dac651435 100644 --- a/test/provider_fixed_memory.cpp +++ b/test/provider_fixed_memory.cpp @@ -41,7 +41,7 @@ static int compare_native_error_str(const char *message, int error) { using providerCreateExtParams = std::tuple; static void providerCreateExt(providerCreateExtParams params, - umf::provider_unique_handle_t *handle) { + umf_test::provider_unique_handle_t *handle) { umf_memory_provider_handle_t hProvider = nullptr; auto [provider_ops, provider_params] = params; @@ -50,8 +50,8 @@ static void providerCreateExt(providerCreateExtParams params, ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_NE(hProvider, nullptr); - *handle = - umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); + *handle = umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); } struct FixedProviderTest @@ -138,7 +138,7 @@ struct FixedProviderTest } } - umf::provider_unique_handle_t provider; + umf_test::provider_unique_handle_t provider; size_t page_size; size_t page_plus_64; void *memory_buffer = nullptr; diff --git a/test/provider_os_memory.cpp b/test/provider_os_memory.cpp index 7fd781208..f3552b923 100644 --- a/test/provider_os_memory.cpp +++ b/test/provider_os_memory.cpp @@ -47,7 +47,7 @@ static int compare_native_error_str(const char *message, int error) { using providerCreateExtParams = std::tuple; static void providerCreateExt(providerCreateExtParams params, - umf::provider_unique_handle_t *handle) { + umf_test::provider_unique_handle_t *handle) { umf_memory_provider_handle_t hProvider = nullptr; auto [provider_ops, provider_params] = params; @@ -56,8 +56,8 @@ static void providerCreateExt(providerCreateExtParams params, ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_NE(hProvider, nullptr); - *handle = - umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); + *handle = umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); } struct umfProviderTest @@ -75,7 +75,7 @@ struct umfProviderTest void TearDown() override { test::TearDown(); } - umf::provider_unique_handle_t provider; + umf_test::provider_unique_handle_t provider; size_t page_size; size_t page_plus_64; }; diff --git a/test/provider_tracking.cpp b/test/provider_tracking.cpp index d289a9796..55acc452c 100644 --- a/test/provider_tracking.cpp +++ b/test/provider_tracking.cpp @@ -22,7 +22,7 @@ using umf_test::test; using providerCreateExtParams = std::tuple; static void providerCreateExt(providerCreateExtParams params, - umf::provider_unique_handle_t *handle) { + umf_test::provider_unique_handle_t *handle) { umf_memory_provider_handle_t hProvider = nullptr; auto [provider_ops, provider_params] = params; @@ -31,8 +31,8 @@ static void providerCreateExt(providerCreateExtParams params, ASSERT_EQ(ret, UMF_RESULT_SUCCESS); ASSERT_NE(hProvider, nullptr); - *handle = - umf::provider_unique_handle_t(hProvider, &umfMemoryProviderDestroy); + *handle = umf_test::provider_unique_handle_t(hProvider, + &umfMemoryProviderDestroy); } struct TrackingProviderTest @@ -68,7 +68,7 @@ struct TrackingProviderTest 0, &hPool); ASSERT_EQ(umf_result, UMF_RESULT_SUCCESS); - pool = umf::pool_unique_handle_t(hPool, &umfPoolDestroy); + pool = umf_test::pool_unique_handle_t(hPool, &umfPoolDestroy); } void TearDown() override { @@ -79,8 +79,8 @@ struct TrackingProviderTest test::TearDown(); } - umf::provider_unique_handle_t provider; - umf::pool_unique_handle_t pool; + umf_test::provider_unique_handle_t provider; + umf_test::pool_unique_handle_t pool; size_t page_size; size_t page_plus_64; void *memory_buffer = nullptr; diff --git a/test/provider_tracking_fixture_tests.cpp b/test/provider_tracking_fixture_tests.cpp index 94227757b..d81d4f8b1 100644 --- a/test/provider_tracking_fixture_tests.cpp +++ b/test/provider_tracking_fixture_tests.cpp @@ -47,7 +47,7 @@ struct provider_from_pool : public umf_test::provider_base_t { }; umf_memory_provider_ops_t PROVIDER_FROM_POOL_OPS = - umf::providerMakeCOps(); + umf_test::providerMakeCOps(); static void *providerFromPoolParamsCreate(void) { umf_file_memory_provider_params_handle_t paramsFile = NULL; diff --git a/test/utils/cpp_helpers.hpp b/test/utils/cpp_helpers.hpp index 85e81c502..037c633c1 100644 --- a/test/utils/cpp_helpers.hpp +++ b/test/utils/cpp_helpers.hpp @@ -7,8 +7,8 @@ * */ -#ifndef UMF_HELPERS_HPP -#define UMF_HELPERS_HPP 1 +#ifndef UMF_TEST_HELPERS_HPP +#define UMF_TEST_HELPERS_HPP 1 #include #include @@ -22,7 +22,7 @@ #include #include -namespace umf { +namespace umf_test { using pool_unique_handle_t = std::unique_ptr umf_result_t &getPoolLastStatusRef() { return last_status; } -} // namespace umf +} // namespace umf_test -#endif /* UMF_HELPERS_HPP */ +#endif /* UMF_TEST_HELPERS_HPP */ From b8e7246104955ff6839c1860de0c09c19373579f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Thu, 13 Mar 2025 17:12:13 +0100 Subject: [PATCH 250/466] update benchmark scripts --- .github/workflows/reusable_benchmarks.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index 69e1f5c60..d3296beb2 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -103,9 +103,9 @@ jobs: uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: intel/llvm - # add preloaded UMF benchmarks - # https://github.com/intel/llvm/pull/17278 - ref: b2f9dab5266d227cc9eb19af1b54c5bdc50221d1 + # [BENCHMARK] fix default timeout parameter + # https://github.com/intel/llvm/pull/17412 + ref: 357e9e0b253b7eba105d044e38452b3c09169f8a path: sycl-repo fetch-depth: 1 From 8cd338315b8e8295cb4416c5ef64ec7e48070432 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Fri, 14 Mar 2025 14:10:52 +0100 Subject: [PATCH 251/466] Enable jemalloc pool test with Fixed provider --- test/pools/jemalloc_pool.cpp | 42 ++++++++++++++++++++++++++++++------ 1 file changed, 36 insertions(+), 6 deletions(-) diff --git a/test/pools/jemalloc_pool.cpp b/test/pools/jemalloc_pool.cpp index e282be316..8112f36bf 100644 --- a/test/pools/jemalloc_pool.cpp +++ b/test/pools/jemalloc_pool.cpp @@ -11,9 +11,7 @@ using umf_test::test; using namespace umf_test; -using os_params_unique_handle_t = - std::unique_ptr; +using void_unique_ptr = std::unique_ptr; void *createOsMemoryProviderParams() { umf_os_memory_provider_params_handle_t params = nullptr; @@ -30,11 +28,43 @@ umf_result_t destroyOsMemoryProviderParams(void *params) { (umf_os_memory_provider_params_handle_t)params); } +void *createFixedMemoryProviderParams() { + // Allocate a memory buffer to use with the fixed memory provider. + // The umfPoolTest.malloc_compliance test requires a lot of memory. + size_t memory_size = (1UL << 31); + static void_unique_ptr memory_buffer = + void_unique_ptr(malloc(memory_size), free); + if (memory_buffer.get() == NULL) { + throw std::runtime_error( + "Failed to allocate memory for Fixed memory provider"); + } + + umf_fixed_memory_provider_params_handle_t params = nullptr; + umf_result_t res = umfFixedMemoryProviderParamsCreate( + ¶ms, memory_buffer.get(), memory_size); + if (res != UMF_RESULT_SUCCESS) { + throw std::runtime_error( + "Failed to create Fixed memory provider params"); + } + + return params; +} + +umf_result_t destroyFixedMemoryProviderParams(void *params) { + return umfFixedMemoryProviderParamsDestroy( + (umf_fixed_memory_provider_params_handle_t)params); +} + INSTANTIATE_TEST_SUITE_P( jemallocPoolTest, umfPoolTest, - ::testing::Values(poolCreateExtParams{ - umfJemallocPoolOps(), nullptr, nullptr, umfOsMemoryProviderOps(), - createOsMemoryProviderParams, destroyOsMemoryProviderParams})); + ::testing::Values(poolCreateExtParams{umfJemallocPoolOps(), nullptr, + nullptr, umfOsMemoryProviderOps(), + createOsMemoryProviderParams, + destroyOsMemoryProviderParams}, + poolCreateExtParams{umfJemallocPoolOps(), nullptr, + nullptr, umfFixedMemoryProviderOps(), + createFixedMemoryProviderParams, + destroyFixedMemoryProviderParams})); // this test makes sure that jemalloc does not use // memory provider to allocate metadata (and hence From fcf2c8d654fb5748d7d3520e862449d258767a66 Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Mon, 17 Mar 2025 10:04:23 +0100 Subject: [PATCH 252/466] Install libnuma-dev in the proxy lib workflow Install libnuma-dev in the proxy lib workflow, because it is required. It has worked correctly so far most probably, because libnuma-dev has been installed as a dependency of libhwloc-dev. It failed when libhwloc-dev was not installed. Signed-off-by: Lukasz Dorau --- .github/workflows/reusable_proxy_lib.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/reusable_proxy_lib.yml b/.github/workflows/reusable_proxy_lib.yml index 363e66526..c519be95b 100644 --- a/.github/workflows/reusable_proxy_lib.yml +++ b/.github/workflows/reusable_proxy_lib.yml @@ -32,7 +32,7 @@ jobs: - name: Install apt packages run: | sudo apt-get update - sudo apt-get install -y cmake libhwloc-dev libtbb-dev lcov + sudo apt-get install -y cmake libhwloc-dev libnuma-dev libtbb-dev lcov - name: Configure build run: > From fc68be82643b590396a1c6d53a82a04693bdbc74 Mon Sep 17 00:00:00 2001 From: Sergei Vinogradov Date: Mon, 17 Mar 2025 10:53:45 +0100 Subject: [PATCH 253/466] Suppress false-postive in IPC tests under helgrind --- test/supp/helgrind-test_ipc.supp | 1 + test/supp/helgrind-test_ipc_max_opened_limit.supp | 1 + 2 files changed, 2 insertions(+) diff --git a/test/supp/helgrind-test_ipc.supp b/test/supp/helgrind-test_ipc.supp index 04f3a9199..25ae87ea4 100644 --- a/test/supp/helgrind-test_ipc.supp +++ b/test/supp/helgrind-test_ipc.supp @@ -19,6 +19,7 @@ [false-positive] Double check locking pattern in trackingOpenIpcHandle Helgrind:Race fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle diff --git a/test/supp/helgrind-test_ipc_max_opened_limit.supp b/test/supp/helgrind-test_ipc_max_opened_limit.supp index 04f3a9199..25ae87ea4 100644 --- a/test/supp/helgrind-test_ipc_max_opened_limit.supp +++ b/test/supp/helgrind-test_ipc_max_opened_limit.supp @@ -19,6 +19,7 @@ [false-positive] Double check locking pattern in trackingOpenIpcHandle Helgrind:Race fun:utils_atomic_store_release_ptr + fun:upstreamOpenIPCHandle fun:trackingOpenIpcHandle fun:umfMemoryProviderOpenIPCHandle fun:umfOpenIPCHandle From fb28a16413ff1b1dcd326aebf65300f82130c58a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Mon, 17 Mar 2025 14:53:30 +0100 Subject: [PATCH 254/466] fix jemalloc benchmark with fixedprovider --- benchmark/benchmark.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 377a38fcf..ea546422e 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -171,7 +171,8 @@ UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fixedprovider, fixed_alloc_size, pool_allocator>); -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_fix) +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + jemalloc_pool_fixedprovider) ->Apply(&default_multiple_alloc_fix_size) ->Apply(&multithreaded); @@ -179,7 +180,8 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_uniform_fixedprovider, uniform_alloc_size, pool_allocator>); -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, jemalloc_pool_uniform) +UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, + jemalloc_pool_uniform_fixedprovider) ->Apply(&default_multiple_alloc_uniform_size) ->Apply(&multithreaded); From ac6a2b254a65b39cfbb693fbdeba0cb60716b8ed Mon Sep 17 00:00:00 2001 From: Lukasz Dorau Date: Thu, 27 Feb 2025 07:59:42 +0100 Subject: [PATCH 255/466] Remove old SHM files left from the previous runs, because of crashes Remove old SHM files /tmp/umf_file_provider_* (at the beginning of the test) left from the previous runs, because of crashes of the ipc_file_prov test. Signed-off-by: Lukasz Dorau --- test/ipc_file_prov.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/test/ipc_file_prov.sh b/test/ipc_file_prov.sh index 629b2cbb7..ffb849f25 100755 --- a/test/ipc_file_prov.sh +++ b/test/ipc_file_prov.sh @@ -9,7 +9,12 @@ set -e -FILE_NAME="/tmp/umf_file_provider_$$" +FILE_BASE="/tmp/umf_file_provider" + +# remove old SHM files (left from the previous runs, because of crashes) +rm -f ${FILE_BASE}* + +FILE_NAME="${FILE_BASE}_$$" # port should be a number from the range <1024, 65535> PORT=$(( 1024 + ( $$ % ( 65535 - 1024 )))) From e316fdccfbd04965b00351456a308a47774050fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Thu, 6 Mar 2025 14:04:29 +0100 Subject: [PATCH 256/466] increase number of threads in benchmarks --- .github/workflows/reusable_benchmarks.yml | 1 + benchmark/benchmark.cpp | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index a7c9e5e28..4cf5325ab 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -137,6 +137,7 @@ jobs: ~/bench_workdir_umf --umf ${{env.BUILD_DIR}} --compare baseline + --timeout 3000 ${{ inputs.upload_report && '--output-html' || '' }} ${{ inputs.pr_no != 0 && '--output-markdown' || '' }} ${{ inputs.bench_script_params }} diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index 60636a559..fb9cf68f4 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -31,6 +31,8 @@ // Refer to the 'argsName()' function in each component to find detailed descriptions of these arguments. static void multithreaded(benchmark::internal::Benchmark *benchmark) { + benchmark->Threads(12); + benchmark->Threads(8); benchmark->Threads(4); benchmark->Threads(1); } From a64565a7b93b391e81d73959e85613fdfa4a7bdf Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Tue, 28 Jan 2025 11:43:46 +0100 Subject: [PATCH 257/466] Fallback to hwloc fetch if package not found Fallback to fetching hwloc from source instead of failing UMF build. Add a workflow for testing the fallback. Unify correct paths to hwloc. Use target name instead of a library path for hwloc linkage. --- .github/workflows/nightly.yml | 135 +++++++++++++------------ CMakeLists.txt | 181 ++++++++++++++-------------------- cmake/FindLIBHWLOC.cmake | 5 +- examples/CMakeLists.txt | 8 +- src/CMakeLists.txt | 7 +- test/CMakeLists.txt | 15 +-- 6 files changed, 156 insertions(+), 195 deletions(-) diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 3381c09be..1317482fd 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -10,6 +10,9 @@ on: permissions: contents: read +env: + BUILD_DIR : "${{github.workspace}}/build" + jobs: fuzz-test: name: Fuzz test @@ -88,18 +91,16 @@ jobs: - name: Run tests under valgrind run: ${{github.workspace}}/test/test_valgrind.sh ${{github.workspace}} ${{github.workspace}}/build ${{matrix.tool}} - Windows-Ninja-cl: - name: Windows-Ninja-cl - env: - VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" - BUILD_DIR : "${{github.workspace}}/build" + Windows-generators: + name: Windows ${{matrix.generator}} generator strategy: matrix: os: ['windows-2019', 'windows-2022'] - build_type: [Debug, Release] + build_type: [Release] compiler: [{c: cl, cxx: cl}] shared_library: ['ON', 'OFF'] static_hwloc: ['ON', 'OFF'] + generator: ['Ninja', 'NMake Makefiles'] runs-on: ${{matrix.os}} @@ -109,8 +110,18 @@ jobs: with: fetch-depth: 0 + - name: Set VCPKG_PATH with hwloc + if: matrix.static_hwloc == 'OFF' + run: echo "VCPKG_PATH='${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows'" >> $env:GITHUB_ENV + + - name: Set VCPKG_PATH without hwloc + if: matrix.static_hwloc == 'ON' + run: echo "VCPKG_PATH='${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows'" >> $env:GITHUB_ENV + - name: Initialize vcpkg uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 + env: + VCPKG_PATH: ${{env.VCPKG_PATH}} with: vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg @@ -120,6 +131,7 @@ jobs: run: vcpkg install - name: Install Ninja + if: matrix.generator == 'Ninja' uses: seanmiddleditch/gha-setup-ninja@96bed6edff20d1dd61ecff9b75cc519d516e6401 # v5 - name: Configure MSVC environment @@ -132,7 +144,7 @@ jobs: -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" -DCMAKE_C_COMPILER=${{matrix.compiler.c}} -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -G Ninja + -G "${{matrix.generator}}" -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} -DUMF_LINK_HWLOC_STATICALLY=${{matrix.static_hwloc}} -DUMF_FORMAT_CODE_STYLE=OFF @@ -151,71 +163,10 @@ jobs: working-directory: ${{env.BUILD_DIR}} run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test - Windows-NMake: - name: Windows-NMake - env: - VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" - BUILD_DIR : "${{github.workspace}}/build" - strategy: - matrix: - os: ['windows-2019', 'windows-2022'] - build_type: [Debug, Release] - compiler: [{c: cl, cxx: cl}] - shared_library: ['ON', 'OFF'] - - runs-on: ${{matrix.os}} - - steps: - - name: Checkout - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 - with: - fetch-depth: 0 - - - name: Initialize vcpkg - uses: lukka/run-vcpkg@5e0cab206a5ea620130caf672fce3e4a6b5666a1 # v11.5 - with: - vcpkgGitCommitId: 3dd44b931481d7a8e9ba412621fa810232b66289 - vcpkgDirectory: ${{env.BUILD_DIR}}/vcpkg - vcpkgJsonGlob: '**/vcpkg.json' - - - name: Install dependencies - run: vcpkg install - - - name: Configure MSVC environment - uses: ilammy/msvc-dev-cmd@0b201ec74fa43914dc39ae48a89fd1d8cb592756 # v1.13.0 - - - name: Configure build - run: > - cmake - -B ${{env.BUILD_DIR}} - -DCMAKE_PREFIX_PATH="${{env.VCPKG_PATH}}" - -DCMAKE_C_COMPILER=${{matrix.compiler.c}} - -DCMAKE_CXX_COMPILER=${{matrix.compiler.cxx}} - -G "NMake Makefiles" - -DUMF_BUILD_SHARED_LIBRARY=${{matrix.shared_library}} - -DUMF_LINK_HWLOC_STATICALLY=ON - -DUMF_FORMAT_CODE_STYLE=OFF - -DUMF_DEVELOPER_MODE=ON - -DUMF_BUILD_LIBUMF_POOL_JEMALLOC=ON - -DUMF_BUILD_LEVEL_ZERO_PROVIDER=ON - -DUMF_BUILD_CUDA_PROVIDER=ON - -DUMF_TESTS_FAIL_ON_SKIP=ON - - - name: Build UMF - shell: cmd - run: cmake --build ${{env.BUILD_DIR}} --config ${{matrix.build_type}} -j %NUMBER_OF_PROCESSORS% - - - name: Run tests - shell: cmd - working-directory: ${{env.BUILD_DIR}} - run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test - - icx: name: ICX env: VCPKG_PATH: "${{github.workspace}}/build/vcpkg/packages/hwloc_x64-windows;${{github.workspace}}/build/vcpkg/packages/tbb_x64-windows;${{github.workspace}}/build/vcpkg/packages/jemalloc_x64-windows" - BUILD_DIR : "${{github.workspace}}/build" strategy: matrix: os: ['windows-2019', 'windows-2022'] @@ -297,6 +248,54 @@ jobs: call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test + hwloc-fallback: + # Scenarios where UMF_LINK_HWLOC_STATICALLY is set to OFF and hwloc is not installed in the system + # The hwloc library is fetched implicitly + name: "Fallback to static hwloc build" + strategy: + matrix: + include: + - os: 'ubuntu-latest' + build_type: Release + number_of_processors: '$(nproc)' + - os: 'windows-latest' + build_type: Release + number_of_processors: '$Env:NUMBER_OF_PROCESSORS' + + runs-on: ${{matrix.os}} + + steps: + - name: Install dependencies + if: matrix.os == 'ubuntu-latest' + run: sudo apt-get install -y libnuma-dev + + - name: Checkout + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + fetch-depth: 0 + + - name: Configure build + run: > + cmake + -B ${{env.BUILD_DIR}} + -DCMAKE_BUILD_TYPE=${{matrix.build_type}} + -DUMF_BUILD_SHARED_LIBRARY=ON + -DUMF_BUILD_EXAMPLES=OFF + -DUMF_DEVELOPER_MODE=ON + -DUMF_LINK_HWLOC_STATICALLY=OFF + -DUMF_TESTS_FAIL_ON_SKIP=ON + + - name: Build UMF + run: > + cmake + --build ${{env.BUILD_DIR}} + --config ${{matrix.build_type}} + -j ${{matrix.number_of_processors}} + + - name: Run tests + working-directory: ${{env.BUILD_DIR}} + run: ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test + L0: uses: ./.github/workflows/reusable_gpu.yml with: diff --git a/CMakeLists.txt b/CMakeLists.txt index ec10a0c4b..ef2658fd9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,6 +41,7 @@ endif() include(CTest) include(CMakePackageConfigHelpers) include(GNUInstallDirs) +include(FetchContent) find_package(PkgConfig) # --------------------------------------------------------------------------- # @@ -115,6 +116,7 @@ list(APPEND UMF_OPTIONS_LIST UMF_PROXY_LIB_BASED_ON_POOL) # Setup required variables, definitions; fetch dependencies; include # sub_directories based on build options; set flags; etc. # --------------------------------------------------------------------------- # +message(STATUS "CMAKE_GENERATOR: ${CMAKE_GENERATOR}") if(UMF_BUILD_TESTS AND DEFINED ENV{CI} @@ -162,7 +164,6 @@ else() set(UMF_JEMALLOC_TAG 5.3.0) endif() - include(FetchContent) message( STATUS "Will fetch jemalloc from ${UMF_JEMALLOC_REPO} (tag: ${UMF_JEMALLOC_TAG})" @@ -257,126 +258,94 @@ else() if(NOT UMF_LINK_HWLOC_STATICALLY) pkg_check_modules(LIBHWLOC hwloc>=2.3.0) if(NOT LIBHWLOC_FOUND) - find_package(LIBHWLOC 2.3.0 REQUIRED hwloc) + find_package(LIBHWLOC 2.3.0 COMPONENTS hwloc) + if(LIBHWLOC_LIBRARIES) + set(LIBHWLOC_AVAILABLE TRUE) + endif() endif() - # add PATH to DLL on Windows - set(DLL_PATH_LIST - "${DLL_PATH_LIST};PATH=path_list_append:${LIBHWLOC_DLL_DIRS}") - elseif(WINDOWS) - include(FetchContent) - set(HWLOC_ENABLE_TESTING OFF) - set(HWLOC_SKIP_LSTOPO ON) - set(HWLOC_SKIP_TOOLS ON) + if(LIBHWLOC_AVAILABLE OR LIBHWLOC_FOUND) + # add PATH to DLL on Windows + set(DLL_PATH_LIST + "${DLL_PATH_LIST};PATH=path_list_append:${LIBHWLOC_DLL_DIRS}") + else() + set(UMF_LINK_HWLOC_STATICALLY ON) + endif() + endif() + if(UMF_LINK_HWLOC_STATICALLY) message( STATUS "Will fetch hwloc from ${UMF_HWLOC_REPO} (tag: ${UMF_HWLOC_TAG})" ) - FetchContent_Declare( - hwloc_targ - GIT_REPOSITORY ${UMF_HWLOC_REPO} - GIT_TAG ${UMF_HWLOC_TAG} - SOURCE_SUBDIR contrib/windows-cmake/ FIND_PACKAGE_ARGS) - FetchContent_MakeAvailable(hwloc_targ) - - message(STATUS "hwloc CMAKE_GENERATOR: ${CMAKE_GENERATOR}") + if(WINDOWS) + set(HWLOC_ENABLE_TESTING OFF) + set(HWLOC_SKIP_LSTOPO ON) + set(HWLOC_SKIP_TOOLS ON) + + FetchContent_Declare( + hwloc_targ + GIT_REPOSITORY ${UMF_HWLOC_REPO} + GIT_TAG ${UMF_HWLOC_TAG} + SOURCE_SUBDIR contrib/windows-cmake/) + FetchContent_MakeAvailable(hwloc_targ) + + set(HWLOC_LIB_PATH "") + if(CMAKE_GENERATOR STREQUAL "NMake Makefiles") + set(HWLOC_LIB_PATH "${hwloc_targ_BINARY_DIR}/hwloc.lib") + else() + set(HWLOC_LIB_PATH "${hwloc_targ_BINARY_DIR}/lib/hwloc.lib") + endif() + + get_filename_component(LIBHWLOC_LIBRARY_DIRS ${HWLOC_LIB_PATH} + DIRECTORY) + set(LIBHWLOC_LIBRARIES ${HWLOC_LIB_PATH}) + set(LIBHWLOC_INCLUDE_DIRS ${hwloc_targ_BINARY_DIR}/include) + else() # not Windows + FetchContent_Declare( + hwloc_targ + GIT_REPOSITORY ${UMF_HWLOC_REPO} + GIT_TAG ${UMF_HWLOC_TAG}) + FetchContent_MakeAvailable(hwloc_targ) - if(CMAKE_GENERATOR STREQUAL "Ninja" OR CMAKE_GENERATOR STREQUAL - "Unix Makefiles") add_custom_command( - COMMAND ${CMAKE_COMMAND} - -DCMAKE_INSTALL_PREFIX=${hwloc_targ_BINARY_DIR} -B build - WORKING_DIRECTORY - ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/ - OUTPUT - ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/CMakeCache.txt - ) + COMMAND ./autogen.sh + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_SOURCE_DIR}/configure) add_custom_command( - COMMAND ${CMAKE_COMMAND} --build build - WORKING_DIRECTORY - ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/ - OUTPUT - ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/build/lib/hwloc.lib - DEPENDS - ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/CMakeCache.txt - ) + COMMAND + ./configure --prefix=${hwloc_targ_BINARY_DIR} + --enable-static=yes --enable-shared=no --disable-libxml2 + --disable-pci --disable-levelzero --disable-opencl + --disable-cuda --disable-nvml --disable-libudev + --disable-rsmi CFLAGS=-fPIC CXXFLAGS=-fPIC + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_SOURCE_DIR}/Makefile + DEPENDS ${hwloc_targ_SOURCE_DIR}/configure) add_custom_command( - COMMAND ${CMAKE_COMMAND} --build build --target INSTALL - WORKING_DIRECTORY - ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/ - OUTPUT ${hwloc_targ_BINARY_DIR}/lib/hwloc.lib - DEPENDS - ${hwloc_targ_SOURCE_DIR}/contrib/windows-cmake/build/lib/hwloc.lib - ) + COMMAND make + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la + DEPENDS ${hwloc_targ_SOURCE_DIR}/Makefile) + add_custom_command( + COMMAND make install + WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} + OUTPUT ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a + DEPENDS ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la) + add_custom_target(hwloc_prod - DEPENDS ${hwloc_targ_BINARY_DIR}/lib/hwloc.lib) + DEPENDS ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) + add_library(hwloc INTERFACE) target_link_libraries( - hwloc INTERFACE ${hwloc_targ_BINARY_DIR}/lib/hwloc.lib) + hwloc INTERFACE ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) add_dependencies(hwloc hwloc_prod) set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/lib) - set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/lib/hwloc.lib) - elseif(CMAKE_GENERATOR STREQUAL "NMake Makefiles") - set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/) - set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/hwloc.lib) - else() - set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/$) - set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/$/hwloc.lib) + set(LIBHWLOC_INCLUDE_DIRS ${hwloc_targ_BINARY_DIR}/include) + set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) endif() - - set(LIBHWLOC_INCLUDE_DIRS - ${hwloc_targ_SOURCE_DIR}/include;${hwloc_targ_BINARY_DIR}/include) - else() - include(FetchContent) - message( - STATUS - "Will fetch hwloc from ${UMF_HWLOC_REPO} (tag: ${UMF_HWLOC_TAG})" - ) - - FetchContent_Declare( - hwloc_targ - GIT_REPOSITORY ${UMF_HWLOC_REPO} - GIT_TAG ${UMF_HWLOC_TAG}) - FetchContent_MakeAvailable(hwloc_targ) - - add_custom_command( - COMMAND ./autogen.sh - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_SOURCE_DIR}/configure) - add_custom_command( - COMMAND - ./configure --prefix=${hwloc_targ_BINARY_DIR} - --enable-static=yes --enable-shared=no --disable-libxml2 - --disable-pci --disable-levelzero --disable-opencl - --disable-cuda --disable-nvml --disable-libudev --disable-rsmi - CFLAGS=-fPIC CXXFLAGS=-fPIC - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_SOURCE_DIR}/Makefile - DEPENDS ${hwloc_targ_SOURCE_DIR}/configure) - add_custom_command( - COMMAND make - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la - DEPENDS ${hwloc_targ_SOURCE_DIR}/Makefile) - add_custom_command( - COMMAND make install - WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} - OUTPUT ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a - DEPENDS ${hwloc_targ_SOURCE_DIR}/lib/libhwloc.la) - - add_custom_target(hwloc_prod - DEPENDS ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) - add_library(hwloc INTERFACE) - target_link_libraries(hwloc - INTERFACE ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) - add_dependencies(hwloc hwloc_prod) - - set(LIBHWLOC_LIBRARY_DIRS ${hwloc_targ_BINARY_DIR}/lib) - set(LIBHWLOC_INCLUDE_DIRS ${hwloc_targ_BINARY_DIR}/include) - set(LIBHWLOC_LIBRARIES ${hwloc_targ_BINARY_DIR}/lib/libhwloc.a) - endif() + endif() # UMF_LINK_HWLOC_STATICALLY message(STATUS " LIBHWLOC_LIBRARIES = ${LIBHWLOC_LIBRARIES}") message(STATUS " LIBHWLOC_INCLUDE_DIRS = ${LIBHWLOC_INCLUDE_DIRS}") @@ -388,7 +357,7 @@ else() endif() if(hwloc_targ_SOURCE_DIR) - # apply security patch for HWLOC + # Apply security patch for HWLOC execute_process( COMMAND git apply ${PROJECT_SOURCE_DIR}/cmake/fix_coverity_issues.patch WORKING_DIRECTORY ${hwloc_targ_SOURCE_DIR} @@ -406,8 +375,6 @@ endif() # Fetch L0 loader only if needed i.e.: if building L0 provider is ON and L0 # headers are not provided by the user (via setting UMF_LEVEL_ZERO_INCLUDE_DIR). if(UMF_BUILD_LEVEL_ZERO_PROVIDER AND (NOT UMF_LEVEL_ZERO_INCLUDE_DIR)) - include(FetchContent) - set(LEVEL_ZERO_LOADER_REPO "https://github.com/oneapi-src/level-zero.git") set(LEVEL_ZERO_LOADER_TAG v1.20.2) @@ -436,8 +403,6 @@ endif() # Fetch CUDA only if needed i.e.: if building CUDA provider is ON and CUDA # headers are not provided by the user (via setting UMF_CUDA_INCLUDE_DIR). if(UMF_BUILD_CUDA_PROVIDER AND (NOT UMF_CUDA_INCLUDE_DIR)) - include(FetchContent) - set(CUDA_REPO "https://gitlab.com/nvidia/headers/cuda-individual/cudart.git") set(CUDA_TAG cuda-12.5.1) diff --git a/cmake/FindLIBHWLOC.cmake b/cmake/FindLIBHWLOC.cmake index 4972f55ce..2efd072d4 100644 --- a/cmake/FindLIBHWLOC.cmake +++ b/cmake/FindLIBHWLOC.cmake @@ -55,7 +55,7 @@ if(WINDOWS) endif() if(LIBHWLOC_LIBRARY) - message(STATUS " Found libhwloc using find_library()") + message(STATUS " Found libhwloc: ${LIBHWLOC_LIBRARY}") if(LIBHWLOC_FIND_VERSION) if(NOT LIBHWLOC_API_VERSION) @@ -72,8 +72,7 @@ if(LIBHWLOC_LIBRARY) endif() else() set(MSG_NOT_FOUND - "libhwloc NOT found (set CMAKE_PREFIX_PATH to point the location or disable with -DUMF_DISABLE_HWLOC=ON)" - ) + "libhwloc NOT found in the system (will fetch it from GitHub)") if(LIBHWLOC_FIND_REQUIRED) message(FATAL_ERROR ${MSG_NOT_FOUND}) else() diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index a26b8915e..8bb352787 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -18,7 +18,7 @@ set(EXAMPLE_NAME umf_example_basic) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS basic/basic.c - LIBS umf ${LIBHWLOC_LIBRARIES}) + LIBS umf ${UMF_HWLOC_NAME}) target_include_directories( ${EXAMPLE_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/src/utils @@ -231,7 +231,7 @@ if(LINUX) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS memspace_numa/memspace_numa.c - LIBS umf ${LIBHWLOC_LIBRARIES} numa) + LIBS umf ${UMF_HWLOC_NAME} numa) target_include_directories( ${EXAMPLE_NAME} @@ -254,7 +254,7 @@ if(LINUX) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS memspace_hmat/memspace_hmat.c - LIBS umf ${LIBHWLOC_LIBRARIES} numa) + LIBS umf ${UMF_HWLOC_NAME} numa) target_include_directories( ${EXAMPLE_NAME} @@ -278,7 +278,7 @@ if(LINUX) add_umf_executable( NAME ${EXAMPLE_NAME} SRCS custom_file_provider/custom_file_provider.c - LIBS umf ${LIBHWLOC_LIBRARIES}) + LIBS umf ${UMF_HWLOC_NAME}) target_include_directories( ${EXAMPLE_NAME} PRIVATE ${UMF_CMAKE_SOURCE_DIR}/src/utils diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 49fa2c5d8..24beb1e0a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -103,7 +103,7 @@ endif() if(NOT UMF_DISABLE_HWLOC) set(UMF_SOURCES ${UMF_SOURCES} ${HWLOC_DEPENDENT_SOURCES} memtargets/memtarget_numa.c) - set(UMF_LIBS ${UMF_LIBS} ${LIBHWLOC_LIBRARIES}) + set(UMF_LIBS ${UMF_LIBS} $) set(UMF_PRIVATE_LIBRARY_DIRS ${UMF_PRIVATE_LIBRARY_DIRS} ${LIBHWLOC_LIBRARY_DIRS}) else() @@ -150,14 +150,11 @@ if(UMF_BUILD_SHARED_LIBRARY) set(CMAKE_INSTALL_RPATH "${UMF_INSTALL_RPATH}") endif() - if(NOT UMF_DISABLE_HWLOC) - set(HWLOC_LIB ${UMF_HWLOC_NAME}) - endif() add_umf_library( NAME umf TYPE SHARED SRCS ${UMF_SOURCES} - LIBS ${UMF_LIBS} ${HWLOC_LIB} + LIBS ${UMF_LIBS} LINUX_MAP_FILE ${CMAKE_CURRENT_SOURCE_DIR}/libumf.map WINDOWS_DEF_FILE ${CMAKE_CURRENT_SOURCE_DIR}/libumf.def) set(UMF_COMMON_COMPILE_DEFINITIONS ${UMF_COMMON_COMPILE_DEFINITIONS} diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index e172115e1..20f982c65 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -294,27 +294,27 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME memspace_numa SRCS memspaces/memspace_numa.cpp - LIBS ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME provider_os_memory_config SRCS provider_os_memory_config.cpp - LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME memspace_host_all SRCS memspaces/memspace_host_all.cpp - LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME memspace_highest_capacity SRCS memspaces/memspace_highest_capacity.cpp - LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME memspace_highest_bandwidth SRCS memspaces/memspace_highest_bandwidth.cpp - LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME memspace_lowest_latency SRCS memspaces/memspace_lowest_latency.cpp - LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${UMF_UTILS_FOR_TEST} ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME mempolicy SRCS memspaces/mempolicy.cpp @@ -326,7 +326,7 @@ if(LINUX AND (NOT UMF_DISABLE_HWLOC)) # OS-specific functions are implemented add_umf_test( NAME memtarget SRCS memspaces/memtarget.cpp - LIBS ${LIBNUMA_LIBRARIES} ${LIBHWLOC_LIBRARIES}) + LIBS ${LIBNUMA_LIBRARIES} ${UMF_HWLOC_NAME}) add_umf_test( NAME provider_devdax_memory SRCS provider_devdax_memory.cpp @@ -678,6 +678,7 @@ endif() # replace test_examples.sh with CMake script?) if(LINUX AND UMF_BUILD_SHARED_LIBRARY + AND UMF_BUILD_EXAMPLES AND NOT (UMF_USE_ASAN OR UMF_USE_UBSAN From 31c47aac9d2473b95a7ae3ec099df50c08c5474f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Tue, 18 Mar 2025 13:16:57 +0100 Subject: [PATCH 258/466] test benchmark only with singlethreaded workloads --- .cmake-format | 3 ++- benchmark/CMakeLists.txt | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/.cmake-format b/.cmake-format index c1a8e85a8..cedd9cb05 100644 --- a/.cmake-format +++ b/.cmake-format @@ -12,7 +12,8 @@ with section("parse"): 'NAME': '*', 'SRCS': '*', 'LIBS': '*' , - 'LIBDIRS': '*'}}, + 'LIBDIRS': '*', + 'TESTARGS': '*'}}, 'add_umf_executable': { "pargs": 0, "flags": [], diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 80c8ba5ec..d52fc0857 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -40,8 +40,9 @@ function(add_umf_benchmark) # * SRCS - source files # * LIBS - libraries to be linked with # * LIBDIRS - directories of libraries to be linked with + # * TESTARGS - additional arguments to be passed to the add_test set(oneValueArgs NAME) - set(multiValueArgs SRCS LIBS LIBDIRS) + set(multiValueArgs SRCS LIBS LIBDIRS TESTARGS) cmake_parse_arguments( ARG "" @@ -66,7 +67,7 @@ function(add_umf_benchmark) add_test( NAME ${BENCH_NAME} - COMMAND ${BENCH_NAME} + COMMAND ${BENCH_NAME} ${ARG_TESTARGS} WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) if("${BENCH_NAME}" STREQUAL "umf-ubench") @@ -148,7 +149,9 @@ add_umf_benchmark( NAME benchmark SRCS benchmark.cpp LIBS ${LIBS_OPTIONAL} benchmark::benchmark - LIBDIRS ${LIB_DIRS}) + # limit running benchmarks in CI tests to single-threaded + LIBDIRS ${LIB_DIRS} + TESTARGS --benchmark_filter=threads:1$) if(UMF_BUILD_BENCHMARKS_MT) add_umf_benchmark( From cbaf24ae0cf33356ddb1bdafe7197b2a90556cd7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Tue, 18 Mar 2025 15:41:37 +0100 Subject: [PATCH 259/466] 0.11.0-rc1 release --- ChangeLog | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/ChangeLog b/ChangeLog index a4afa52ce..631b08e37 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +Tue Mar 18 2025 Łukasz Stolarczuk + + * Version 0.11.0-rc1 + + This is the first rc of v0.11.0 release. It contains: + - make disjoint pool a C structure #898 + - add fixed provider #976 + - remove the Coarse provider #934 and replace with internal coarse library #931, #932 + - implement umfPool[Set/Get]Tag #962 + - L0 provider: implement support for defer and blocking free #963 + - add set/restore context in CUDA provider free() #1049 + - L0 provider: implement min/recommended page size query #1059 + - add support for CUDA allocation flags #1079 + - increase refcount to ze_loader/CUDA libraries #1086 + - implement size limit for the cache of opened IPC handles #998 + - allow creating fixed provider based on allocations from another pool #1143 + - multiple benchmark improvements + - new tests and CI workflows, incl. backward compatibility checks (#1087, #1163) + Fri Jan 10 2025 Łukasz Stolarczuk * Version 0.10.1 From 387a2a94402f8c6930d35c8c48a8b7111a5ac9c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 19 Mar 2025 13:13:35 +0100 Subject: [PATCH 260/466] Move CTL functions into 0.12 sections (in .map/.def files) --- src/libumf.def | 7 ++++--- src/libumf.map | 9 ++++++--- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/libumf.def b/src/libumf.def index dd0ddfbfc..34ecee889 100644 --- a/src/libumf.def +++ b/src/libumf.def @@ -119,9 +119,6 @@ EXPORTS umfScalablePoolParamsSetKeepAllMemory ; Added in UMF_0.11 umfCUDAMemoryProviderParamsSetAllocFlags - umfCtlExec - umfCtlGet - umfCtlSet umfDisjointPoolOps umfDisjointPoolParamsCreate umfDisjointPoolParamsDestroy @@ -139,3 +136,7 @@ EXPORTS umfFixedMemoryProviderParamsDestroy umfLevelZeroMemoryProviderParamsSetFreePolicy umfLevelZeroMemoryProviderParamsSetDeviceOrdinal +; Added in UMF_0.12 + umfCtlExec + umfCtlGet + umfCtlSet diff --git a/src/libumf.map b/src/libumf.map index 5e97acc09..f9ec9b6bf 100644 --- a/src/libumf.map +++ b/src/libumf.map @@ -117,9 +117,6 @@ UMF_0.10 { UMF_0.11 { umfCUDAMemoryProviderParamsSetAllocFlags; - umfCtlExec; - umfCtlGet; - umfCtlSet; umfDisjointPoolOps; umfDisjointPoolParamsCreate; umfDisjointPoolParamsDestroy; @@ -138,3 +135,9 @@ UMF_0.11 { umfLevelZeroMemoryProviderParamsSetFreePolicy; umfLevelZeroMemoryProviderParamsSetDeviceOrdinal; } UMF_0.10; + +UMF_0.12 { + umfCtlExec; + umfCtlGet; + umfCtlSet; +} UMF_0.11; From ffa4eb6179ebe3d4e3e2ecd5207306609069876c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Wed, 19 Mar 2025 13:32:26 +0100 Subject: [PATCH 261/466] remove pool benchmarks with fixed provider. Simplify benchmark tests by removing redundant pool benchmarks for fixed provider, as results are nearly identical to os provider. Also reduce iteration count for 'fix' provider benchmarks to match with 'os' provider. --- benchmark/benchmark.cpp | 67 ++++------------------------------------- 1 file changed, 6 insertions(+), 61 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index f57c0d5ae..4ab5a62e2 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -143,73 +143,18 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, proxy_pool_fixedprovider) ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&singlethreaded); + ->Apply(&singlethreaded) + // reduce iterations, to match os_provider benchmark + ->Iterations(50000); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, fixed_provider, fixed_alloc_size, provider_allocator); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, fixed_provider) ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&singlethreaded); - -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - disjoint_pool_fix_fixedprovider, fixed_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - disjoint_pool_fix_fixedprovider) - ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&multithreaded); - -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - disjoint_pool_uniform_fixedprovider, - uniform_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - disjoint_pool_uniform_fixedprovider) - ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&multithreaded); - -#ifdef UMF_POOL_JEMALLOC_ENABLED -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - jemalloc_pool_fixedprovider, fixed_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - jemalloc_pool_fixedprovider) - ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&multithreaded); - -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - jemalloc_pool_uniform_fixedprovider, - uniform_alloc_size, - pool_allocator>); -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - jemalloc_pool_uniform_fixedprovider) - ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&multithreaded); - -#endif - -#ifdef UMF_POOL_SCALABLE_ENABLED -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - scalable_pool_fix_fixedprovider, fixed_alloc_size, - pool_allocator>); - -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - scalable_pool_fix_fixedprovider) - ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&multithreaded); - -UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, - scalable_pool_uniform_fixedprovider, - uniform_alloc_size, - pool_allocator>); - -UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, - scalable_pool_uniform_fixedprovider) - ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&multithreaded); - -#endif + ->Apply(&singlethreaded) + // reduce iterations, to match os_provider benchmark + ->Iterations(50000); //BENCHMARK_MAIN(); int main(int argc, char **argv) { From fa7a6acdaa85379742c8906cb2f722c8369d284d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Plewa?= Date: Wed, 19 Mar 2025 13:46:43 +0100 Subject: [PATCH 262/466] reduce number of threads with disjoint pool --- benchmark/benchmark.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmark/benchmark.cpp b/benchmark/benchmark.cpp index f57c0d5ae..9073e5b93 100644 --- a/benchmark/benchmark.cpp +++ b/benchmark/benchmark.cpp @@ -30,11 +30,10 @@ // The exact meaning of each argument depends on the benchmark, allocator, and size components used. // Refer to the 'argsName()' function in each component to find detailed descriptions of these arguments. +template static void multithreaded(benchmark::internal::Benchmark *benchmark) { - benchmark->Threads(12); - benchmark->Threads(8); - benchmark->Threads(4); benchmark->Threads(1); + benchmark->DenseThreadRange(4, max_threads, 4); } static void singlethreaded(benchmark::internal::Benchmark *benchmark) { @@ -91,14 +90,16 @@ UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_fix, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_fix) ->Apply(&default_multiple_alloc_fix_size) - ->Apply(&multithreaded); + // Limit benchmarks to 4 threads, as the disjoint pool scales poorly with higher thread counts. + ->Apply(&multithreaded<4>); UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, disjoint_pool_uniform, uniform_alloc_size, pool_allocator>); UMF_BENCHMARK_REGISTER_F(multiple_malloc_free_benchmark, disjoint_pool_uniform) ->Apply(&default_multiple_alloc_uniform_size) - ->Apply(&multithreaded); + // Limit benchmarks to 4 threads, as the disjoint pool scales poorly with higher thread counts. + ->Apply(&multithreaded<4>); #ifdef UMF_POOL_JEMALLOC_ENABLED UMF_BENCHMARK_TEMPLATE_DEFINE(multiple_malloc_free_benchmark, jemalloc_pool_fix, From 89443bfa2ead8999c6c27a88ded1eeb6ac30e014 Mon Sep 17 00:00:00 2001 From: Igor Chorazewicz Date: Wed, 19 Mar 2025 16:05:14 +0000 Subject: [PATCH 263/466] Fix -Wformat warning in provider_tracking.c The warnings are visible when building UR and SYCL --- src/provider/provider_tracking.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/provider/provider_tracking.c b/src/provider/provider_tracking.c index da9d9ab77..1bef85854 100644 --- a/src/provider/provider_tracking.c +++ b/src/provider/provider_tracking.c @@ -260,7 +260,7 @@ static umf_result_t umfMemoryTrackerRemove(umf_memory_tracker_handle_t hTracker, LOG_DEBUG("memory region removed: tracker=%p, level=%i, pool=%p, ptr=%p, " "size=%zu", - (void *)hTracker, level, value->pool, ptr, value->size); + (void *)hTracker, level, (void *)value->pool, ptr, value->size); if (parent_value) { LOG_DEBUG( @@ -302,13 +302,14 @@ umfMemoryTrackerAddIpcSegment(umf_memory_tracker_handle_t hTracker, if (ret == 0) { LOG_DEBUG("IPC memory region is added, tracker=%p, ptr=%p, size=%zu, " "provider=%p, cache_entry=%p", - (void *)hTracker, ptr, size, provider, cache_entry); + (void *)hTracker, ptr, size, (void *)provider, + (void *)cache_entry); return UMF_RESULT_SUCCESS; } LOG_ERR("failed to insert tracker_ipc_info_t, ret=%d, ptr=%p, size=%zu, " "provider=%p, cache_entry=%p", - ret, ptr, size, provider, cache_entry); + ret, ptr, size, (void *)provider, (void *)cache_entry); umf_ba_free(hTracker->ipc_info_allocator, value); @@ -335,7 +336,8 @@ umfMemoryTrackerRemoveIpcSegment(umf_memory_tracker_handle_t hTracker, LOG_DEBUG("IPC memory region removed: tracker=%p, ptr=%p, size=%zu, " "provider=%p, cache_entry=%p", - (void *)hTracker, ptr, v->size, v->provider, v->ipc_cache_value); + (void *)hTracker, ptr, v->size, (void *)v->provider, + (void *)v->ipc_cache_value); umf_ba_free(hTracker->ipc_info_allocator, value); From 2eeb9a61057ab938b9feacff19ee85a397072a56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Thu, 20 Mar 2025 10:22:35 +0100 Subject: [PATCH 264/466] [CI][Bench] Use new version of bench dashboard incl. using new format of data (stored on 'benchmark-results' branch). --- .github/workflows/benchmarks.yml | 18 ++-- .github/workflows/nightly.yml | 7 +- .github/workflows/reusable_benchmarks.yml | 113 ++++++++++++++-------- .github/workflows/reusable_docs_build.yml | 36 +++++-- 4 files changed, 111 insertions(+), 63 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 7eb3c7b06..b18a41c4b 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -7,18 +7,22 @@ on: description: PR number (if 0, it'll run on the main) type: number bench_script_params: + # If you want to save the results of the manual run in 'benchmark-results' branch, + # you have to pass '--save XXX', where XXX is the label of your results. description: Parameters passed to script executing benchmark type: string required: false default: '' - upload_report: - description: 'Upload HTML report' - type: boolean - required: false - default: false + runner: + description: Runner + type: choice + required: true + default: 'L0_PERF' + options: + - L0_PERF permissions: - contents: read + contents: write pull-requests: write jobs: @@ -28,4 +32,4 @@ jobs: with: pr_no: ${{ inputs.pr_no }} bench_script_params: ${{ inputs.bench_script_params }} - upload_report: ${{ inputs.upload_report }} + runner: ${{ inputs.runner }} diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 1317482fd..b11d17fa4 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -248,9 +248,9 @@ jobs: call "C:\Program Files (x86)\Intel\oneAPI\setvars-vcvarsall.bat" ctest -C ${{matrix.build_type}} --output-on-failure --test-dir test - hwloc-fallback: # Scenarios where UMF_LINK_HWLOC_STATICALLY is set to OFF and hwloc is not installed in the system # The hwloc library is fetched implicitly + hwloc-fallback: name: "Fallback to static hwloc build" strategy: matrix: @@ -317,9 +317,8 @@ jobs: Benchmarks: uses: ./.github/workflows/reusable_benchmarks.yml permissions: - contents: read + contents: write pull-requests: write with: pr_no: '0' - bench_script_params: '--save baseline' - upload_report: true + bench_script_params: '--save Baseline_PVC' diff --git a/.github/workflows/reusable_benchmarks.yml b/.github/workflows/reusable_benchmarks.yml index 3953e98de..26f9c348b 100644 --- a/.github/workflows/reusable_benchmarks.yml +++ b/.github/workflows/reusable_benchmarks.yml @@ -1,5 +1,5 @@ # Executes benchmarks implemented in this repository using scripts -# for results visualization from intel/llvm (unified-runtime dir). +# for results visualization from intel/llvm. name: Benchmarks on: @@ -14,13 +14,13 @@ on: required: false type: string default: '' - upload_report: + runner: required: false - type: boolean - default: false + type: string + default: 'L0_PERF' permissions: - contents: read + contents: write pull-requests: write env: @@ -32,17 +32,9 @@ jobs: name: Benchmarks # run only on upstream; forks will not have the HW if: github.repository == 'oneapi-src/unified-memory-framework' - runs-on: L0_PERF + runs-on: ${{ inputs.runner }} steps: - # Workspace on self-hosted runners is not cleaned automatically. - # We have to delete the files created outside of using actions. - - name: Cleanup self-hosted workspace - if: always() - run: | - ls -la ./ - rm -rf ./* || true - - name: Add comment to PR uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 if: ${{ always() && inputs.pr_no != 0 }} @@ -97,23 +89,32 @@ jobs: - name: Build UMF run: cmake --build ${{env.BUILD_DIR}} -j $(nproc) - # Get scripts for benchmark data visualization. - # Use specific tag, as the scripts or files' location may change. - - name: Checkout SYCL + - name: Checkout UMF results branch + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + ref: benchmark-results + path: results-repo + + # Get scripts for benchmark data visualization (from SYCL repo). + # Use specific ref, as the scripts or files' location may change. + - name: Checkout benchmark scripts uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: repository: intel/llvm - # [BENCHMARK] fix default timeout parameter - # https://github.com/intel/llvm/pull/17412 - ref: 357e9e0b253b7eba105d044e38452b3c09169f8a - path: sycl-repo - fetch-depth: 1 + # Note: The same ref is used in docs build (for dashboard generation)! + # + # 20.03.2025 + # branch: unify-benchmark-ci + ref: cae7049c78c697b3ac94f931716d9efb53addcd8 + path: sc + sparse-checkout: | + devops/scripts/benchmarks - name: Install benchmarking scripts deps run: | python -m venv .venv source .venv/bin/activate - pip install -r ${{github.workspace}}/sycl-repo/unified-runtime/third_party/benchmark_requirements.txt + pip install -r ${{github.workspace}}/sc/devops/scripts/benchmarks/requirements.txt - name: Set core range and GPU mask run: | @@ -135,22 +136,21 @@ jobs: - name: Run UMF benchmarks id: benchmarks - working-directory: ${{env.BUILD_DIR}} run: > - source ${{github.workspace}}/.venv/bin/activate && - taskset -c ${{ env.CORES }} ${{ github.workspace }}/sycl-repo/unified-runtime/scripts/benchmarks/main.py + source .venv/bin/activate && + taskset -c ${{ env.CORES }} ./sc/devops/scripts/benchmarks/main.py ~/bench_workdir_umf --umf ${{env.BUILD_DIR}} - --compare baseline --timeout 3000 - ${{ inputs.upload_report && '--output-html' || '' }} - ${{ inputs.pr_no != 0 && '--output-markdown' || '' }} + --output-html remote + --results-dir ${{ github.workspace }}/results-repo + --output-markdown ${{ inputs.bench_script_params }} # In case it failed to add a comment, we can still print the results. - name: Print benchmark results - if: ${{ always() && inputs.pr_no != 0 }} - run: cat ${{env.BUILD_DIR}}/benchmark_results.md + if: ${{ always() }} + run: cat ${{ github.workspace }}/benchmark_results.md || true - name: Add comment to PR uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1 @@ -160,7 +160,7 @@ jobs: let markdown = "" try { const fs = require('fs'); - markdown = fs.readFileSync('${{env.BUILD_DIR}}/benchmark_results.md', 'utf8'); + markdown = fs.readFileSync('${{ github.workspace }}/benchmark_results.md', 'utf8'); } catch(err) { } @@ -177,15 +177,42 @@ jobs: repo: context.repo.repo, body: body }) - - - name: Upload HTML report - if: ${{ always() && inputs.upload_report }} - uses: actions/cache/save@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 - with: - path: umf-repo/build/benchmark_results.html - key: benchmark-results-${{ github.run_id }} - - name: Get information about platform - if: ${{ always() }} - working-directory: ${{env.UMF_DIR}} - run: .github/scripts/get_system_info.sh + - name: Commit data.json and results directory + working-directory: results-repo + run: | + git config --global user.name "GitHub Actions Bot" + git config --global user.email "actions@github.com" + + for attempt in {1..5}; do + echo "Attempt #$attempt to push changes" + + rm -f data.json + cp ${{ github.workspace }}/sc/devops/scripts/benchmarks/html/data.json . + + git add data.json results/ + git commit -m "Add benchmark results and data.json" + + results_file=$(git diff HEAD~1 --name-only -- results/ | head -n 1) + + if git push origin benchmark-results; then + echo "Push succeeded" + break + fi + + echo "Push failed, retrying..." + + if [ -n "$results_file" ]; then + mv $results_file ${{ github.workspace }}/temp_$(basename $results_file) + + git reset --hard origin/benchmark-results + git pull origin benchmark-results + + new_file="results/$(basename "$results_file")" + mv ${{ github.workspace }}/temp_$(basename $results_file) $new_file + fi + + echo "Regenerating data.json" + (cd ${{ github.workspace }} && ${{ github.workspace }}/sc/devops/scripts/benchmarks/main.py ~/bench_workdir_umf --dry-run --results-dir ${{ github.workspace }}/results-repo --output-html remote) + + done diff --git a/.github/workflows/reusable_docs_build.yml b/.github/workflows/reusable_docs_build.yml index 9317478bb..e12895aeb 100644 --- a/.github/workflows/reusable_docs_build.yml +++ b/.github/workflows/reusable_docs_build.yml @@ -45,19 +45,37 @@ jobs: -DUMF_DISABLE_HWLOC=ON cmake --build build --target docs - # If we upload HTML docs, we want to include benchmark results as well - - name: Download benchmark HTML before uploading docs + # + # Documentation is built. Now we want to add benchmark dashboard. + # We only do it if inputs.upload is set, as this job is also used for testing docs build. + # + - name: Checkout benchmark scripts if: ${{ inputs.upload == true }} - id: download-bench-html - uses: actions/cache/restore@1bd1e32a3bdc45362d1e726936510720a7c30a57 # v4.2.0 + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: - path: umf-repo/build/benchmark_results.html - key: benchmark-results- + repository: intel/llvm + # 20.03.2025 + # branch: unify-benchmark-ci + ref: cae7049c78c697b3ac94f931716d9efb53addcd8 + path: sc + sparse-checkout: | + devops/scripts/benchmarks - - name: Move benchmark HTML - if: ${{ inputs.upload == true && steps.download-bench-html.outputs.cache-hit != '' }} + - name: Move benchmark HTML files + if: ${{ inputs.upload == true }} + working-directory: ${{ github.workspace }}/build/docs_build/generated/html + run: | + mkdir performance + mv ${{ github.workspace }}/sc/devops/scripts/benchmarks/html/* performance/ + + - name: Replace config.js + if: ${{ inputs.upload == true }} + working-directory: ${{ github.workspace }}/build/docs_build/generated/html run: | - mv umf-repo/build/benchmark_results.html ${{github.workspace}}/build/docs_build/generated/html + cat << 'EOF' > ./performance/config.js + remoteDataUrl = 'https://raw.githubusercontent.com/oneapi-src/unified-memory-framework/refs/heads/benchmark-results/data.json'; + defaultCompareNames = ["Baseline_PVC"]; + EOF - name: Upload artifact if: ${{ inputs.upload == true }} From bdac43d9b06b045486626b6e60d19a14c2734db2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Fri, 21 Mar 2025 12:28:32 +0100 Subject: [PATCH 265/466] [CI] Minor update in get_system_info.sh --- .github/scripts/get_system_info.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/scripts/get_system_info.sh b/.github/scripts/get_system_info.sh index 81c54ce98..573c7195d 100755 --- a/.github/scripts/get_system_info.sh +++ b/.github/scripts/get_system_info.sh @@ -7,7 +7,7 @@ function check_L0_version { if command -v dpkg &> /dev/null; then - dpkg -l | grep level-zero && return + dpkg -l | grep -iE "level-zero|libze|Compute Runtime|Level Zero" && return fi if command -v rpm &> /dev/null; then @@ -34,7 +34,7 @@ function system_info { numactl -H echo "**********VGA info**********" - lspci | grep -i VGA + lspci | grep -iE "vga|display|gpu" echo "**********CUDA Version**********" if command -v nvidia-smi &> /dev/null; then From 49a1a33bb4f4616b20e75df541c6b6d9cd056e6f Mon Sep 17 00:00:00 2001 From: "Dubinov, Igor" Date: Mon, 24 Mar 2025 15:28:39 +0100 Subject: [PATCH 266/466] Fix for uninitialized variable --- test/ctl/ctl_api.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/ctl/ctl_api.cpp b/test/ctl/ctl_api.cpp index ff6491c16..93c059052 100644 --- a/test/ctl/ctl_api.cpp +++ b/test/ctl/ctl_api.cpp @@ -74,6 +74,8 @@ class CtlTest : public ::testing::Test { const char *msg; }; + CtlTest() : provider(NULL), pool(NULL) {} + void SetUp() override { provider = NULL; pool = NULL; From 8065bb06e62851b0485b8de930b43362c380e784 Mon Sep 17 00:00:00 2001 From: Patryk Kaminski Date: Mon, 10 Mar 2025 13:17:39 +0100 Subject: [PATCH 267/466] Add sycl compatibility workflow --- .github/workflows/nightly.yml | 3 + .github/workflows/reusable_sycl.yml | 122 ++++++++++++++++++++++++++++ 2 files changed, 125 insertions(+) create mode 100644 .github/workflows/reusable_sycl.yml diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index b11d17fa4..c664b7f87 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -322,3 +322,6 @@ jobs: with: pr_no: '0' bench_script_params: '--save Baseline_PVC' + + SYCL: + uses: ./.github/workflows/reusable_sycl.yml diff --git a/.github/workflows/reusable_sycl.yml b/.github/workflows/reusable_sycl.yml new file mode 100644 index 000000000..22682b2ed --- /dev/null +++ b/.github/workflows/reusable_sycl.yml @@ -0,0 +1,122 @@ +# UMF compatibility with intel/llvm workflow. +# The latest llvm daily release and the last working release are tested. +# Triggered in the Nightly workflow. +name: SYCL + +on: workflow_call + +permissions: + contents: read + +jobs: + sycl-compatibility: + # run only on upstream; forks will not have the HW + if: github.repository == 'oneapi-src/unified-memory-framework' + name: ${{matrix.llvm_tag}} llvm build + runs-on: ["DSS-LEVEL_ZERO", "DSS-UBUNTU"] + + strategy: + matrix: + llvm_tag: ["latest", "nightly-2025-02-08"] # "latest" or llvm with UMF v0.11.0-dev2 + + steps: + # Install sycl + - name: Clean up + if: always() + run: rm -rf llvm sycl_linux.tar.gz + + - name: Download llvm daily release + run: | + if [ "${{ matrix.llvm_tag }}" == "latest" ]; then + llvm_tag=$(curl -s https://api.github.com/repos/intel/llvm/releases | awk -F'"' '/"tag_name":/ {print $4; exit}') + else + llvm_tag="${{ matrix.llvm_tag }}" + fi + download_url="https://github.com/intel/llvm/releases/download/${llvm_tag}/sycl_linux.tar.gz" + wget --no-verbose $download_url -O sycl_linux.tar.gz + + - name: Extract llvm + run: | + mkdir llvm + tar -xzf sycl_linux.tar.gz -C llvm --strip-components=1 + + - name: Remove UMF installed with llvm + run: rm -f llvm/lib/libumf* + + - name: Add sycl to PATH + run: | + echo "${{ github.workspace }}/llvm/bin" >> $GITHUB_PATH + echo "LD_LIBRARY_PATH=${{ github.workspace }}/llvm/lib:$LD_LIBRARY_PATH" >> $GITHUB_ENV + + # Install UMF + - name: Checkout UMF + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + path: umf_repo + fetch-depth: 0 + + - name: Configure UMF + working-directory: umf_repo + run: > + cmake + -B build + -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}/llvm + -DCMAKE_BUILD_TYPE=Release + -DCMAKE_C_COMPILER=gcc + -DCMAKE_CXX_COMPILER=g++ + -DUMF_BUILD_SHARED_LIBRARY=ON + -DUMF_BUILD_TESTS=OFF + -DUMF_BUILD_EXAMPLES=OFF + + - name: Build and install UMF + working-directory: umf_repo + run: cmake --build build --target install -j$(nproc) + + - name: Print installed lib files + run: ls -l llvm/lib + + # Test sycl-ls + - name: Run sycl-ls + run: | + ./llvm/bin/sycl-ls | tee sycl-ls-output.log + grep -q "level_zero:gpu" sycl-ls-output.log + + # Test several sycl e2e test + # These are arbitrarily picked tests to check the compatibility + # Note that some intel/llvm tests may be flaky, although I haven't noticed such a behavior in the following tests + - name: Checkout sycl + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: intel/llvm + path: sycl_repo + fetch-depth: 1 + ref: sycl + + - name: Create sycl tests build directory + run: | + TESTS_BUILD_DIR=${{ github.workspace }}/sycl_repo/sycl/test-e2e/build + mkdir $TESTS_BUILD_DIR + echo "TESTS_BUILD_DIR=$TESTS_BUILD_DIR" >> $GITHUB_ENV + + - name: Build sycl e2e tests + working-directory: sycl_repo + run: | + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/AbiNeutral/submit-kernel.cpp -o ${{env.TESTS_BUILD_DIR}}/submit-kernel -Iinclude + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/Adapters/interop-l0-direct.cpp -o ${{env.TESTS_BUILD_DIR}}/interop-l0-direct -lze_loader -Iinclude + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/Adapters/level_zero_interop_memcpy.cpp -o ${{env.TESTS_BUILD_DIR}}/level_zero_interop_memcpy -Iinclude + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/Basic/build_log.cpp -o ${{env.TESTS_BUILD_DIR}}/build_log -Iinclude + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/PerformanceTests/ParallelFor/parallel_for_range_roundup.cpp -fsycl-range-rounding=force -o ${{env.TESTS_BUILD_DIR}}/parallel_for_range_roundup -Iinclude + ${{github.workspace}}/llvm/bin/clang++ -fsycl sycl/test-e2e/USM/fill_any_size.cpp -o ${{env.TESTS_BUILD_DIR}}/fill_any_size -Iinclude + + - name: Run sycl e2e tests + env: + ONEAPI_DEVICE_SELECTOR: level_zero:gpu + UMF_LOG: "level:debug;flush:debug;output:stdout;pid:yes" + working-directory: ${{env.TESTS_BUILD_DIR}} + run: | + echo "---Run submit-kernel test" && ./submit-kernel + echo "---Run interop-l0-direct test" && ./interop-l0-direct + echo "---Run level_zero_interop_memcpy test" && ./level_zero_interop_memcpy + echo "---Run build_log test" && ./build_log + echo "---Run parallel_for_range_roundup test" && ./parallel_for_range_roundup + echo "---Run fill_any_size test" && ./fill_any_size From 8492c626dd30e5a7776cca82a7fe2cd32bc1a279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Tue, 25 Mar 2025 16:18:13 +0100 Subject: [PATCH 268/466] [CI][Bench] Add compare option to manual bench runs --- .github/workflows/benchmarks.yml | 8 ++++++++ .github/workflows/reusable_benchmarks.yml | 20 +++++++++++++++++--- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index b18a41c4b..7ee8269d2 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -13,6 +13,13 @@ on: type: string required: false default: '' + bench_script_compare: + description: Compare label, passed to script executing benchmark as '--compare