Performance
- branch is relatively stable through its life cycle
and/or- branch is expensive to compute and/or require memory access
and/or- branch is hard to predict/learn by the hardware
Examples: logging, tracing, dispatching, ...
- Single header (https://raw.githubusercontent.com/boost-ext/mut/main/mut - for integration see FAQ)
- Minimal API
- Verifies itself upon include (can be disabled with
-DNTEST- see FAQ)
- C++20 (clang++15+, g++11+) / x86-64 / Linux
static_bool(https://godbolt.org/z/fz5zd1cM9)
/**
* constexpr minimal overhead static bool changed at run-time via code patching
*/
constexpr mut::static_bool semi_runtime_branch = false;
/**
* Note: `fun` can be inline/noinline/constexpr/etc.
* constexpr void fun();
* inline void fun();
* [[gnu::noinline]] void fun()
* [[gnu::always_inline]] void fun()
*/
void fun() {
if (semi_runtime_branch) {
std::puts("taken");
} else {
std::puts("not taken");
}
}
int main() {
std::ignore = mut::static_bool::init(); // enables run-time code patching
fun(); // not taken
semi_runtime_branch = true;
fun(); // taken
semi_runtime_branch = false;
fun(); // not taken
}main: // $CXX -O3
lea rdi, [rip + .L.str.1]
nop # code patching (nop->nop)
lea rdi, [rip + .L.str.2]
.Ltmp1:
call puts@PLT # not taken
call semi_runtime_branch.operator=(true)
lea rdi, [rip + .L.str.1]
jmp .Ltmp2 # code patching (nop->jmp)
lea rdi, [rip + .L.str.2]
.Ltmp2:
call puts@PLT # taken
call semi_runtime_branch.operator=(false)
lea rdi, [rip + .L.str.1]
nop # code patching (jmp->nop)
lea rdi, [rip + .L.str.2]
.Ltmp3:
call puts@PLT # not taken
xor eax, eax # return 0
ret
.L.str.1: .asciz "taken"
.L.str.2: .asciz "not taken"
variant(https://godbolt.org/z/GGTPqvY3b) | (https://wg21.link/P2996)
template<class... Ts>
class variant {
public:
template<class T>
constexpr explicit(false) variant(T&& t) {
constexpr auto index = [] {
std::array match{std::is_same_v<Ts, std::remove_cvref_t<T>>...};
return std::ranges::find(match, true)-match.begin();
}();
for (auto& option : options) option = false;
options[index] = true;
std::construct_at(&storage_.[:
nonstatic_data_members_of(^storage)[index+1u] :], std::forward<T>(t)
);
}
template<auto N = 0u, class Fn>
constexpr auto visit(Fn&& fn) const -> decltype(auto) {
if constexpr (N < sizeof...(Ts)) {
if (options[N]) {
return std::invoke(std::forward<Fn>(fn),
storage_.[: nonstatic_data_members_of(^storage)[N+1u] :]);
} else {
return visit<N + 1u>(std::forward<Fn>(fn));
}
}
std::unreachable();
}
// ...
private:
union storage;
struct empty{ };
static_assert(is_type(define_class(^storage, {
std::meta::data_member_spec(^empty, {.name = "empty"}),
std::meta::data_member_spec(^Ts)...
})));
template<class...> static constexpr auto false_v = false;
static constexpr std::array options{mut::static_bool{false_v<Ts>}...};
storage storage_{.empty={}};
};void usage(const variant<bool, int, float>& v) {
v.visit(overload{
[](bool) { std::puts("bool"); },
[](int) { std::puts("int"); },
[](float) { std::puts("float"); },
});
}
int main() {
std::ignore = mut::static_bool::init();
variant<bool, int, float> v{};
v = true;
usage(v);
v = 42;
usage(v);
v = 42.f;
usage(v);
}usage(variant<bool, int, float> const&):
lea rdi, [rip + .L.str.1]
nop # code patching (nop->jmp)
lea rdi, [rip + .L.str.2]
nop # code patching (jmp->jmp)
lea rdi, [rip + .L.str.3]
nop # code patching (jmp->jmp)
jmp puts
.L.str.1: .asciz "bool"
.L.str.2: .asciz "int"
.L.str.3: .asciz "float"/**
* Minimal overhead (via code patching) static bool
*/
class static_bool final {
public:
/**
* Creates empty class static_bool (sizeof(static_bool) == 1u)
* @param direction initial branch direction
* Note: direction should be false by default
* for better performance: { false: nop, true: unconditional jmp }
*/
constexpr explicit(false) static_bool(const auto direction) noexcept;
constexpr static_bool(const static_bool&) noexcept = delete;
constexpr static_bool(static_bool&&) noexcept = delete;
constexpr auto operator=(const static_bool&) noexcept = delete;
constexpr auto operator=(static_bool&&) noexcept = delete;
/**
* Makes required pages writable for code patching
* Note: Must be called before chaning the branch direction (`branch = true/false`)
* Should be called only once per application
* @param page_size page size (default: sysconf(_SC_PAGESIZE))
* @param permissions protect permissions (default: PROT_READ | PROT_WRITE | PROT_EXEC)
* @return 0 if succesful, -1 on error (errno is set to indicate the error)
*/
[[nodiscard]] static constexpr auto init(const u64 page_size = sysconf(_SC_PAGESIZE),
const u64 permissions = 0b111) noexcept -> int;
/**
* Updates branch direction
* @param direction new branch direction
*/
constexpr void operator=(const auto direction) const noexcept;
/**
* Returns branch direction
* @return current branch direction
*/
[[gnu::always_inline]] [[nodiscard]] inline operator bool() const noexcept;
};Configuration
#define MUT 2'1'0 // Current library version (SemVer)-
How does it work?
mutis using technique called code patching - which basically means that the code modifies itself.mut::static_boolis based on https://docs.kernel.org/staging/static-keys.html and it requiresasm gotosupport (gcc, clang).mutcurrently supports x86-64 Linux, but other platforms can be added using the same technique.Example:
constexpr mut::static_bool b = false; if (b) { return 42; } else { return 0; }
it will emit...
main: .byte 15 31 68 0 0 # nop - https://www.felixcloutier.com/x86/nop xor eax, eax # return 0 ret .LBB1: mov eax, 42 # return 42 ret
which will effectively execute...
main: nop xor eax, eax # return 0 ret
now, if we change the branch direction at run-time...
b = true; if (b) { return 42; } else { return 0; }
it will emit...
main: call b.operator=(true); # nop->jmp or jmp->nop jmp .LBB1: (nop->jmp - changed in the memory of the program) xor eax, eax # return 0 ret .LBB1: mov eax, 42 # return 42 ret
-
How to integrate with CMake/CPM?
CPMAddPackage( Name mut GITHUB_REPOSITORY boost-ext/mut GIT_TAG v2.1.0 ) add_library(mp INTERFACE) target_include_directories(mp SYSTEM INTERFACE ${mp_SOURCE_DIR}) add_library(mut::mut ALIAS mut)target_link_libraries(${PROJECT_NAME} mut:mut) -
Acknowledgments
https://docs.kernel.org/staging/static-keys.html, https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html, https://www.agner.org/optimize/instruction_tables.pdf, https://gcc.gnu.org/onlinedocs/gcc/Extended-Asm.html, https://www.felixcloutier.com/documents/gcc-asm.html, https://www.felixcloutier.com/x86, https://uops.info/table.html, https://arxiv.org/abs/2308.14185, https://arxiv.org/pdf/2011.13127