From 91fc5ae4a5a66a03931f8cd383abd2aa062bb0e9 Mon Sep 17 00:00:00 2001 From: Geoffrey Thomas Date: Sat, 24 May 2025 19:04:09 -0400 Subject: [PATCH] Use only safe identical code folding with BOLT "Identical code folding" (ICF) is the feature of an optimizer to find that two functions have the same code and that they can therefore be deduplicated in the binary. While this is usually safe, it can cause observable behavior differences if the program relies on the fact that the two functions have different addresses. CPython relies on this in (at least) Objects/typeobject.c, which defines two functions wrap_binaryfunc() and wrap_binaryfunc_l() with the same implementation, and stores their addresses in the slotdefs array. If these two functions have the same address, update_one_slot() in that file will fill in slots it shouldn't, causing, for instances, classes defined in Python that inherit from some built-in types to misbehave. As of LLVM 20 (llvm/llvm-project#116275), BOLT has a "safe ICF" mode, where it looks to see if there are any uses of a function symbol outside function calls (e.g., relocations in data sections) and skips ICF on such functions. The intent is that this avoids observable behavior differences but still saves storage as much as possible. This version is about two months old at the time of writing. To support older LLVM versions, we have to turn off ICF entirely. This problem was previously noticed for Windows/MSVC in #53093 (and again in #24098), where the default behavior of PGO is to enable ICF (which they expand to "identical COMDAT folding") and we had to turn it off. --- configure | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- configure.ac | 25 ++++++++++++++++++++++++- 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/configure b/configure index abdd28fcabf769..9613d36f828555 100755 --- a/configure +++ b/configure @@ -9368,6 +9368,54 @@ printf "%s\n" "\"Found merge-fdata\"" >&6; } else as_fn_error $? "merge-fdata is required for a --enable-bolt build but could not be found." "$LINENO" 5 fi + + py_bolt_icf_flag="-icf=safe" + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${LLVM_BOLT} supports safe identical code folding" >&5 +printf %s "checking whether ${LLVM_BOLT} supports safe identical code folding... " >&6; } +if test ${py_cv_bolt_icf_safe+y} +then : + printf %s "(cached) " >&6 +else case e in #( + e) + saved_cflags="$CFLAGS" + saved_ldflags="$LDFLAGS" + CFLAGS="$CFLAGS_NODIST" + LDFLAGS="$LDFLAGS_NODIST" + cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +int +main (void) +{ + + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + py_cv_bolt_icf_safe=no + ${LLVM_BOLT} -icf=safe -o conftest.bolt conftest$EXEEXT >&5 2>&1 && py_cv_bolt_icf_safe=yes +else case e in #( + e) { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in '$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in '$ac_pwd':" >&2;} +as_fn_error $? "could not compile empty test program +See 'config.log' for more details" "$LINENO" 5; } ;; +esac +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext + CFLAGS="$saved_cflags" + LDFLAGS="$saved_ldflags" + + ;; +esac +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $py_cv_bolt_icf_safe" >&5 +printf "%s\n" "$py_cv_bolt_icf_safe" >&6; } + if test "$py_cv_bolt_icf_safe" = no; then + py_bolt_icf_flag="" + fi fi @@ -9404,7 +9452,7 @@ printf "%s\n" "$BOLT_INSTRUMENT_FLAGS" >&6; } printf %s "checking BOLT_APPLY_FLAGS... " >&6; } if test -z "${BOLT_APPLY_FLAGS}" then - BOLT_APPLY_FLAGS=" ${BOLT_COMMON_FLAGS} -reorder-blocks=ext-tsp -reorder-functions=cdsort -split-functions -icf=1 -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=none -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot " + BOLT_APPLY_FLAGS=" ${BOLT_COMMON_FLAGS} -reorder-blocks=ext-tsp -reorder-functions=cdsort -split-functions ${py_bolt_icf_flag} -inline-all -split-eh -reorder-functions-use-hot-size -peepholes=none -jump-tables=aggressive -inline-ap -indirect-call-promotion=all -dyno-stats -use-gnu-stack -frame-opt=hot " fi { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $BOLT_APPLY_FLAGS" >&5 diff --git a/configure.ac b/configure.ac index 8d939f075058bf..25737e3f9d66d5 100644 --- a/configure.ac +++ b/configure.ac @@ -2129,6 +2129,29 @@ if test "$Py_BOLT" = 'true' ; then else AC_MSG_ERROR([merge-fdata is required for a --enable-bolt build but could not be found.]) fi + + py_bolt_icf_flag="-icf=safe" + AC_CACHE_CHECK( + [whether ${LLVM_BOLT} supports safe identical code folding], + [py_cv_bolt_icf_safe], + [ + saved_cflags="$CFLAGS" + saved_ldflags="$LDFLAGS" + CFLAGS="$CFLAGS_NODIST" + LDFLAGS="$LDFLAGS_NODIST" + AC_LINK_IFELSE( + [AC_LANG_PROGRAM([[]], [[]])], + [py_cv_bolt_icf_safe=no + ${LLVM_BOLT} -icf=safe -o conftest.bolt conftest$EXEEXT >&AS_MESSAGE_LOG_FD 2>&1 dnl + && py_cv_bolt_icf_safe=yes], + [AC_MSG_FAILURE([could not compile empty test program])]) + CFLAGS="$saved_cflags" + LDFLAGS="$saved_ldflags" + ] + ) + if test "$py_cv_bolt_icf_safe" = no; then + py_bolt_icf_flag="" + fi fi dnl Enable BOLT of libpython if built. @@ -2184,7 +2207,7 @@ then -reorder-blocks=ext-tsp -reorder-functions=cdsort -split-functions - -icf=1 + ${py_bolt_icf_flag} -inline-all -split-eh -reorder-functions-use-hot-size