diff --git a/src/mono/mono/metadata/CMakeLists.txt b/src/mono/mono/metadata/CMakeLists.txt index 9efd5ac0079c24..9d6010e95f3286 100644 --- a/src/mono/mono/metadata/CMakeLists.txt +++ b/src/mono/mono/metadata/CMakeLists.txt @@ -45,7 +45,8 @@ endif() set(imported_native_sources ../../../native/containers/dn-simdhash.c ../../../native/containers/dn-simdhash-string-ptr.c - ../../../native/containers/dn-simdhash-u32-ptr.c) + ../../../native/containers/dn-simdhash-u32-ptr.c + ../../../native/containers/dn-simdhash-ptrpair-ptr.c) set(metadata_common_sources appdomain.c diff --git a/src/mono/mono/metadata/class-setup-vtable.c b/src/mono/mono/metadata/class-setup-vtable.c index 9a235c1ec0cced..62afa76dde5b74 100644 --- a/src/mono/mono/metadata/class-setup-vtable.c +++ b/src/mono/mono/metadata/class-setup-vtable.c @@ -773,6 +773,13 @@ mono_method_get_method_definition (MonoMethod *method) static gboolean verify_class_overrides (MonoClass *klass, MonoMethod **overrides, int onum) { + // on windows and arm, we define NDEBUG for release builds + // on browser and wasi, we define DEBUG for debug builds +#ifdef ENABLE_CHECKED_BUILD + if (klass->image == mono_defaults.corlib) + return TRUE; +#endif + int i; for (i = 0; i < onum; ++i) { @@ -1760,7 +1767,7 @@ mono_class_setup_vtable_general (MonoClass *klass, MonoMethod **overrides, int o MonoMethod *override = iface_overrides [i*2 + 1]; if (mono_class_is_gtd (override->klass)) { override = mono_class_inflate_generic_method_full_checked (override, ic, mono_class_get_context (ic), error); - } + } // there used to be code here to inflate decl if decl->is_inflated, but in https://github.com/dotnet/runtime/pull/64102#discussion_r790019545 we // think that this does not correspond to any real code. if (!apply_override (klass, ic, vtable, decl, override, &override_map, &override_class_map, &conflict_map)) diff --git a/src/mono/mono/metadata/class.c b/src/mono/mono/metadata/class.c index 5ae4f1981d38ac..6045628bf49e7f 100644 --- a/src/mono/mono/metadata/class.c +++ b/src/mono/mono/metadata/class.c @@ -4331,12 +4331,16 @@ mono_class_is_variant_compatible_slow (MonoClass *klass, MonoClass *oklass) } return TRUE; } -/*Check if @candidate implements the interface @target*/ + static gboolean -mono_class_implement_interface_slow (MonoClass *target, MonoClass *candidate) +mono_class_implement_interface_slow_cached (MonoClass *target, MonoClass *candidate, dn_simdhash_ptrpair_ptr_t *cache); + +static gboolean +mono_class_implement_interface_slow_uncached (MonoClass *target, MonoClass *candidate, dn_simdhash_ptrpair_ptr_t *cache) { ERROR_DECL (error); int i; + gboolean is_variant = mono_class_has_variant_generic_params (target); if (is_variant && MONO_CLASS_IS_INTERFACE_INTERNAL (candidate)) { @@ -4365,7 +4369,7 @@ mono_class_implement_interface_slow (MonoClass *target, MonoClass *candidate) return TRUE; if (is_variant && mono_class_is_variant_compatible_slow (target, iface_class)) return TRUE; - if (mono_class_implement_interface_slow (target, iface_class)) + if (mono_class_implement_interface_slow_cached (target, iface_class, cache)) return TRUE; } } @@ -4390,7 +4394,7 @@ mono_class_implement_interface_slow (MonoClass *target, MonoClass *candidate) if (is_variant && mono_class_is_variant_compatible_slow (target, candidate_interfaces [i])) return TRUE; - if (mono_class_implement_interface_slow (target, candidate_interfaces [i])) + if (mono_class_implement_interface_slow_cached (target, candidate_interfaces [i], cache)) return TRUE; } } @@ -4400,6 +4404,107 @@ mono_class_implement_interface_slow (MonoClass *target, MonoClass *candidate) return FALSE; } +// #define LOG_INTERFACE_CACHE_HITS 1 + +#if LOG_INTERFACE_CACHE_HITS +static gint64 implement_interface_hits = 0, implement_interface_misses = 0; + +static void +log_hit_rate (dn_simdhash_ptrpair_ptr_t *cache) +{ + gint64 total_calls = implement_interface_hits + implement_interface_misses; + if ((total_calls % 500) != 0) + return; + double hit_rate = implement_interface_hits * 100.0 / total_calls; + g_printf ("implement_interface cache hit rate: %f (%lld total calls). Overflow count: %u\n", hit_rate, total_calls, dn_simdhash_overflow_count (cache)); +} +#endif + +static gboolean +mono_class_implement_interface_slow_cached (MonoClass *target, MonoClass *candidate, dn_simdhash_ptrpair_ptr_t *cache) +{ + gpointer cached_result = NULL; + dn_ptrpair_t key = { target, candidate }; + gboolean result = 0, cache_hit = 0; + + // Skip the caching logic for exact matches + if (candidate == target) + return TRUE; + + cache_hit = dn_simdhash_ptrpair_ptr_try_get_value (cache, key, &cached_result); + if (cache_hit) { + // Testing shows a cache hit rate of 60% on S.R.Tests and S.T.J.Tests, + // and 40-50% for small app startup. Near-zero overflow count. +#if LOG_INTERFACE_CACHE_HITS + implement_interface_hits++; + log_hit_rate (cache); +#endif + result = (cached_result != NULL); +#ifndef ENABLE_CHECKED_BUILD + return result; +#endif + } + + gboolean uncached_result = mono_class_implement_interface_slow_uncached (target, candidate, cache); + + if (!cache_hit) { +#if LOG_INTERFACE_CACHE_HITS + implement_interface_misses++; + log_hit_rate (cache); +#endif + dn_simdhash_ptrpair_ptr_try_add (cache, key, uncached_result ? GUINT_TO_POINTER(1) : NULL); + } + +#ifdef ENABLE_CHECKED_BUILD + if (cache_hit) { + if (result != uncached_result) + g_print ( + "Cache mismatch for %s.%s and %s.%s: cached=%d, uncached=%d\n", + m_class_get_name_space (target), m_class_get_name (target), + m_class_get_name_space (candidate), m_class_get_name (candidate), + result, uncached_result + ); + g_assert (result == uncached_result); + } +#endif + return uncached_result; +} + +static dn_simdhash_ptrpair_ptr_t *implement_interface_scratch_cache = NULL; + +/*Check if @candidate implements the interface @target*/ +static gboolean +mono_class_implement_interface_slow (MonoClass *target, MonoClass *candidate) +{ + gpointer cas_result; + gboolean result; + dn_simdhash_ptrpair_ptr_t *cache = (dn_simdhash_ptrpair_ptr_t *)mono_atomic_xchg_ptr ((volatile gpointer *)&implement_interface_scratch_cache, NULL); + if (!cache) + // Roughly 64KB of memory usage and big enough to have fast lookups + // Smaller is viable but makes the hit rate worse + cache = dn_simdhash_ptrpair_ptr_new (2048, NULL); + else if (dn_simdhash_count (cache) >= 2250) { + // FIXME: 2250 is arbitrary (roughly 256 11-item buckets w/load factor) + // One step down reduces hit rate by approximately 2-4% + // HACK: Only clear the scratch cache once it gets too big. + // The pattern is that (especially during startup), we have lots + // of mono_class_implement_interface_slow calls back to back that + // perform similar checks, so keeping the cache data around between + // sequential calls will potentially optimize them a lot. + dn_simdhash_clear (cache); + } + + result = mono_class_implement_interface_slow_cached (target, candidate, cache); + + // Under most circumstances we won't have multiple threads competing to run implement_interface_slow, + // so it's not worth making this thread-local and potentially keeping a cache instance around per-thread. + cas_result = mono_atomic_cas_ptr ((volatile gpointer *)&implement_interface_scratch_cache, cache, NULL); + if (cas_result != NULL) + dn_simdhash_free (cache); + + return result; +} + /* * Check if @oklass can be assigned to @klass. * This function does the same as mono_class_is_assignable_from_internal but is safe to be used from mono_class_init_internal context. @@ -4416,8 +4521,9 @@ mono_class_is_assignable_from_slow (MonoClass *target, MonoClass *candidate) return TRUE; /*If target is not an interface there is no need to check them.*/ - if (MONO_CLASS_IS_INTERFACE_INTERNAL (target)) + if (MONO_CLASS_IS_INTERFACE_INTERNAL (target)) { return mono_class_implement_interface_slow (target, candidate); + } if (m_class_is_delegate (target) && mono_class_has_variant_generic_params (target)) return mono_class_is_variant_compatible (target, candidate, FALSE); diff --git a/src/native/containers/containers.cmake b/src/native/containers/containers.cmake index 16c41eab5619f8..4749dceea2dd9a 100644 --- a/src/native/containers/containers.cmake +++ b/src/native/containers/containers.cmake @@ -13,6 +13,8 @@ list(APPEND SHARED_CONTAINER_SOURCES # dn-simdhash-string-ptr.c # dn-simdhash-u32-ptr.c # dn-simdhash-ptr-ptr.c + # dn-simdhash-ght-compatible.c + # dn-simdhash-ptrpair-ptr.c ) list(APPEND SHARED_CONTAINER_HEADERS diff --git a/src/native/containers/dn-simdhash-ptrpair-ptr.c b/src/native/containers/dn-simdhash-ptrpair-ptr.c new file mode 100644 index 00000000000000..d377647b6636ac --- /dev/null +++ b/src/native/containers/dn-simdhash-ptrpair-ptr.c @@ -0,0 +1,39 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "dn-simdhash.h" + +#include "dn-simdhash-utils.h" + +typedef struct dn_ptrpair_t { + void *first; + void *second; +} dn_ptrpair_t; + +static inline uint32_t +dn_ptrpair_t_hash (dn_ptrpair_t key) +{ + return (MurmurHash3_32_ptr(key.first, 0) ^ MurmurHash3_32_ptr(key.second, 1)); +} + +static inline uint8_t +dn_ptrpair_t_equals (dn_ptrpair_t lhs, dn_ptrpair_t rhs) +{ + return (lhs.first == rhs.first) && (lhs.second == rhs.second); +} + +#define DN_SIMDHASH_T dn_simdhash_ptrpair_ptr +#define DN_SIMDHASH_KEY_T dn_ptrpair_t +#define DN_SIMDHASH_VALUE_T void * +#define DN_SIMDHASH_KEY_HASHER(hash, key) dn_ptrpair_t_hash(key) +#define DN_SIMDHASH_KEY_EQUALS(hash, lhs, rhs) dn_ptrpair_t_equals(lhs, rhs) +#if SIZEOF_VOID_P == 8 +// 192 bytes holds 12 16-byte blocks, so 11 keys and one suffix table +#define DN_SIMDHASH_BUCKET_CAPACITY 11 +#else +// 128 bytes holds 16 8-byte blocks, so 14 keys and one suffix table +#define DN_SIMDHASH_BUCKET_CAPACITY 14 +#endif + +#include "dn-simdhash-specialization.h" diff --git a/src/native/containers/dn-simdhash-specializations.h b/src/native/containers/dn-simdhash-specializations.h index 4966c7575d19a0..9533edfc5f3d1f 100644 --- a/src/native/containers/dn-simdhash-specializations.h +++ b/src/native/containers/dn-simdhash-specializations.h @@ -59,4 +59,19 @@ typedef struct dn_simdhash_str_key dn_simdhash_str_key; #include "dn-simdhash-ght-compatible.h" + +typedef struct dn_ptrpair_t { + void *first, *second; +} dn_ptrpair_t; + +#define DN_SIMDHASH_T dn_simdhash_ptrpair_ptr +#define DN_SIMDHASH_KEY_T dn_ptrpair_t +#define DN_SIMDHASH_VALUE_T void * + +#include "dn-simdhash-specialization-declarations.h" + +#undef DN_SIMDHASH_T +#undef DN_SIMDHASH_KEY_T +#undef DN_SIMDHASH_VALUE_T + #endif diff --git a/src/native/containers/dn-simdhash.c b/src/native/containers/dn-simdhash.c index 03d4d2bf3951aa..d1e2b6e330b6ef 100644 --- a/src/native/containers/dn-simdhash.c +++ b/src/native/containers/dn-simdhash.c @@ -119,8 +119,7 @@ dn_simdhash_clear (dn_simdhash_t *hash) if (hash->vtable.destroy_all) hash->vtable.destroy_all(hash); hash->count = 0; - // TODO: Scan through buckets sequentially and only erase ones with data in them - // Maybe skip erasing the key slots too? + // TODO: Implement a fast clear algorithm that scans buckets and only clears ones w/nonzero count memset(hash->buffers.buckets, 0, hash->buffers.buckets_length * hash->meta->bucket_size_bytes); // Skip this for performance; memset is especially slow in wasm // memset(hash->buffers.values, 0, hash->buffers.values_length * hash->meta->value_size); @@ -140,6 +139,19 @@ dn_simdhash_count (dn_simdhash_t *hash) return hash->count; } +uint32_t +dn_simdhash_overflow_count (dn_simdhash_t *hash) +{ + assert(hash); + uint32_t result = 0; + for (uint32_t bucket_index = 0; bucket_index < hash->buffers.buckets_length; bucket_index++) { + uint8_t *suffixes = ((uint8_t *)hash->buffers.buckets) + (bucket_index * hash->meta->bucket_size_bytes); + uint8_t cascade_count = suffixes[DN_SIMDHASH_CASCADED_SLOT]; + result += cascade_count; + } + return result; +} + void dn_simdhash_ensure_capacity (dn_simdhash_t *hash, uint32_t capacity) { diff --git a/src/native/containers/dn-simdhash.h b/src/native/containers/dn-simdhash.h index da4a7914e18873..a2d6e87c9045fe 100644 --- a/src/native/containers/dn-simdhash.h +++ b/src/native/containers/dn-simdhash.h @@ -144,6 +144,11 @@ dn_simdhash_capacity (dn_simdhash_t *hash); uint32_t dn_simdhash_count (dn_simdhash_t *hash); +// Returns the estimated number of items that have overflowed out of a bucket. +// WARNING: This is expensive to calculate. +uint32_t +dn_simdhash_overflow_count (dn_simdhash_t *hash); + // Automatically resizes the table if it is too small to hold the requested number // of items. Will not shrink the table if it is already bigger. void