Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit c82d669

Browse files
committed
gc.c: Fix a race condition in object_id for shareable objects
If an object is shareable and has no capacity left, it isn't safe to store the object ID in fields as it requires an object resize which can't be done unless all field reads are synchronized. So in this case we have to store the ID externally like we used to.
1 parent 097d742 commit c82d669

File tree

7 files changed

+256
-29
lines changed

7 files changed

+256
-29
lines changed

ext/objspace/objspace_dump.c

+2
Original file line numberDiff line numberDiff line change
@@ -829,6 +829,8 @@ shape_i(rb_shape_t *shape, void *data)
829829
break;
830830
case SHAPE_OBJ_ID:
831831
dump_append(dc, ", \"shape_type\":\"OBJ_ID\"");
832+
case SHAPE_EXTERNAL_OBJ_ID:
833+
dump_append(dc, ", \"shape_type\":\"EXTERNAL_OBJ_ID\"");
832834
break;
833835
}
834836

gc.c

+142-24
Original file line numberDiff line numberDiff line change
@@ -1879,6 +1879,61 @@ static const rb_data_type_t id2ref_tbl_type = {
18791879
.flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
18801880
};
18811881

1882+
static VALUE obj_to_id_value = 0;
1883+
static st_table *obj_to_id_tbl = NULL;
1884+
1885+
static void mark_hash_values(st_table *tbl);
1886+
1887+
static void
1888+
obj_to_id_tbl_mark(void *data)
1889+
{
1890+
st_table *table = (st_table *)data;
1891+
if (UNLIKELY(!RB_POSFIXABLE(LAST_OBJECT_ID()))) {
1892+
// It's very unlikely, but if enough object ids were generated, keys may be T_BIGNUM
1893+
mark_hash_values(table);
1894+
}
1895+
// We purposedly don't mark keys, as they are weak references.
1896+
// rb_gc_obj_free_vm_weak_references takes care of cleaning them up.
1897+
}
1898+
1899+
static size_t
1900+
obj_to_id_tbl_memsize(const void *data)
1901+
{
1902+
return rb_st_memsize(data);
1903+
}
1904+
1905+
static void
1906+
obj_to_id_tbl_compact(void *data)
1907+
{
1908+
st_table *table = (st_table *)data;
1909+
if (LIKELY(RB_POSFIXABLE(LAST_OBJECT_ID()))) {
1910+
// We know values are all FIXNUM, so no need to update them.
1911+
gc_ref_update_table_keys_only(table);
1912+
}
1913+
else {
1914+
gc_update_table_refs(table);
1915+
}
1916+
}
1917+
1918+
static void
1919+
obj_to_id_tbl_free(void *data)
1920+
{
1921+
obj_to_id_tbl = NULL; // clear global ref
1922+
st_table *table = (st_table *)data;
1923+
st_free_table(table);
1924+
}
1925+
1926+
static const rb_data_type_t obj_to_id_tbl_type = {
1927+
.wrap_struct_name = "VM/obj_to_id_table",
1928+
.function = {
1929+
.dmark = obj_to_id_tbl_mark,
1930+
.dfree = obj_to_id_tbl_free,
1931+
.dsize = obj_to_id_tbl_memsize,
1932+
.dcompact = obj_to_id_tbl_compact,
1933+
},
1934+
.flags = RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY
1935+
};
1936+
18821937
#define RUBY_ATOMIC_VALUE_LOAD(x) (VALUE)(RUBY_ATOMIC_PTR_LOAD(x))
18831938

18841939
static VALUE
@@ -1901,23 +1956,46 @@ class_object_id(VALUE klass)
19011956
}
19021957

19031958
static VALUE
1904-
object_id0(VALUE obj)
1959+
object_id0(VALUE obj, bool shareable)
19051960
{
19061961
VALUE id = Qfalse;
19071962

1908-
if (rb_shape_has_object_id(rb_obj_shape(obj))) {
1909-
shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj);
1910-
id = rb_obj_field_get(obj, object_id_shape_id);
1963+
rb_shape_t *current_shape = rb_obj_shape(obj);
1964+
1965+
if (rb_shape_has_object_id(current_shape)) {
1966+
shape_id_t object_id_shape_id = rb_shape_object_id(obj);
1967+
1968+
if (RB_UNLIKELY(shareable && RSHAPE(object_id_shape_id)->type == SHAPE_EXTERNAL_OBJ_ID)) {
1969+
RUBY_ASSERT(obj_to_id_tbl);
1970+
st_lookup(obj_to_id_tbl, obj, &id);
1971+
}
1972+
else {
1973+
id = rb_obj_field_get(obj, object_id_shape_id);
1974+
}
19111975
RUBY_ASSERT(id, "object_id missing");
19121976
return id;
19131977
}
19141978

1915-
// rb_shape_object_id_shape may lock if the current shape has
1916-
// multiple children.
1917-
shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj);
1918-
19191979
id = generate_next_object_id();
1920-
rb_obj_field_set(obj, object_id_shape_id, id);
1980+
// If the object is shareable and doesn't have free capacity we can't safely
1981+
// resize it. So we have to store the object_id externally.
1982+
if (shareable && current_shape->capacity && current_shape->next_field_index == current_shape->capacity) {
1983+
shape_id_t object_id_shape_id = rb_shape_transition_external_object_id(obj);
1984+
1985+
if (RB_UNLIKELY(!obj_to_id_tbl)) {
1986+
obj_to_id_tbl = st_init_numtable();
1987+
obj_to_id_value = TypedData_Wrap_Struct(0, &obj_to_id_tbl_type, obj_to_id_tbl);
1988+
}
1989+
st_insert(obj_to_id_tbl, obj, id);
1990+
rb_shape_set_shape_id(obj, object_id_shape_id);
1991+
}
1992+
else {
1993+
// rb_shape_object_id_shape may lock if the current shape has
1994+
// multiple children.
1995+
shape_id_t object_id_shape_id = rb_shape_transition_object_id(obj);
1996+
rb_obj_field_set(obj, object_id_shape_id, id);
1997+
}
1998+
19211999
if (RB_UNLIKELY(id2ref_tbl)) {
19222000
st_insert(id2ref_tbl, (st_data_t)id, (st_data_t)obj);
19232001
}
@@ -1941,14 +2019,14 @@ object_id(VALUE obj)
19412019
break;
19422020
}
19432021

1944-
if (UNLIKELY(rb_gc_multi_ractor_p() && rb_ractor_shareable_p(obj))) {
2022+
if (UNLIKELY(rb_gc_multi_ractor_p() && RB_OBJ_SHAREABLE_P(obj))) {
19452023
unsigned int lock_lev = rb_gc_vm_lock();
1946-
VALUE id = object_id0(obj);
2024+
VALUE id = object_id0(obj, true);
19472025
rb_gc_vm_unlock(lock_lev);
19482026
return id;
19492027
}
19502028

1951-
return object_id0(obj);
2029+
return object_id0(obj, false);
19522030
}
19532031

19542032
static void
@@ -2024,23 +2102,35 @@ obj_free_object_id(VALUE obj)
20242102
}
20252103

20262104
VALUE obj_id = 0;
2027-
if (RB_UNLIKELY(id2ref_tbl)) {
2028-
switch (BUILTIN_TYPE(obj)) {
2029-
case T_CLASS:
2030-
case T_MODULE:
2031-
if (RCLASS(obj)->object_id) {
2032-
obj_id = RCLASS(obj)->object_id;
2105+
switch (BUILTIN_TYPE(obj)) {
2106+
case T_CLASS:
2107+
case T_MODULE:
2108+
if (RB_LIKELY(!id2ref_tbl)) return;
2109+
2110+
if (RCLASS(obj)->object_id) {
2111+
obj_id = RCLASS(obj)->object_id;
2112+
}
2113+
break;
2114+
default:
2115+
if (rb_shape_obj_has_id(obj)) {
2116+
shape_id_t shape_id = rb_shape_object_id(obj);
2117+
if (RSHAPE(shape_id)->type == SHAPE_EXTERNAL_OBJ_ID) {
2118+
RUBY_ASSERT(obj_to_id_tbl);
2119+
2120+
VALUE key = obj;
2121+
st_delete(obj_to_id_tbl, &key, &obj_id);
20332122
}
2034-
break;
2035-
default:
2036-
if (rb_shape_obj_has_id(obj)) {
2037-
obj_id = object_id(obj);
2123+
else {
2124+
if (RB_LIKELY(!id2ref_tbl)) return;
2125+
2126+
obj_id = rb_obj_field_get(obj, shape_id);
20382127
}
2039-
break;
2128+
RUBY_ASSERT(obj_id);
20402129
}
2130+
break;
20412131
}
20422132

2043-
if (RB_UNLIKELY(obj_id)) {
2133+
if (RB_UNLIKELY(id2ref_tbl && obj_id)) {
20442134
RUBY_ASSERT(FIXNUM_P(obj_id) || RB_TYPE_P(obj, T_BIGNUM));
20452135

20462136
if (!st_delete(id2ref_tbl, (st_data_t *)&obj_id, NULL)) {
@@ -2733,6 +2823,14 @@ mark_key(st_data_t key, st_data_t value, st_data_t data)
27332823
return ST_CONTINUE;
27342824
}
27352825

2826+
static int
2827+
mark_value(st_data_t key, st_data_t value, st_data_t data)
2828+
{
2829+
gc_mark_internal((VALUE)value);
2830+
2831+
return ST_CONTINUE;
2832+
}
2833+
27362834
void
27372835
rb_mark_set(st_table *tbl)
27382836
{
@@ -2741,6 +2839,14 @@ rb_mark_set(st_table *tbl)
27412839
st_foreach(tbl, mark_key, (st_data_t)rb_gc_get_objspace());
27422840
}
27432841

2842+
static void
2843+
mark_hash_values(st_table *tbl)
2844+
{
2845+
if (!tbl) return;
2846+
2847+
st_foreach(tbl, mark_value, 0);
2848+
}
2849+
27442850
static int
27452851
mark_keyvalue(st_data_t key, st_data_t value, st_data_t data)
27462852
{
@@ -4148,6 +4254,17 @@ rb_gc_vm_weak_table_foreach(vm_table_foreach_callback_func callback,
41484254
}
41494255
break;
41504256
}
4257+
case RB_GC_VM_OBJ_TO_ID_TABLE: {
4258+
if (obj_to_id_tbl) {
4259+
st_foreach_with_replace(
4260+
obj_to_id_tbl,
4261+
vm_weak_table_foreach_weak_key,
4262+
vm_weak_table_foreach_update_weak_key,
4263+
(st_data_t)&foreach_data
4264+
);
4265+
}
4266+
break;
4267+
}
41514268
case RB_GC_VM_GENERIC_FIELDS_TABLE: {
41524269
st_table *generic_fields_tbl = rb_generic_fields_tbl_get();
41534270
if (generic_fields_tbl) {
@@ -5528,6 +5645,7 @@ Init_GC(void)
55285645
{
55295646
#undef rb_intern
55305647
rb_gc_register_address(&id2ref_value);
5648+
rb_gc_register_address(&obj_to_id_value);
55315649

55325650
malloc_offset = gc_compute_malloc_offset();
55335651

gc/gc.h

+29
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ enum rb_gc_vm_weak_tables {
2929
RB_GC_VM_OVERLOADED_CME_TABLE,
3030
RB_GC_VM_GLOBAL_SYMBOLS_TABLE,
3131
RB_GC_VM_ID2REF_TABLE,
32+
RB_GC_VM_OBJ_TO_ID_TABLE,
3233
RB_GC_VM_GENERIC_FIELDS_TABLE,
3334
RB_GC_VM_FROZEN_STRINGS_TABLE,
3435
RB_GC_VM_WEAK_TABLE_COUNT
@@ -135,6 +136,34 @@ gc_ref_update_table_values_only(st_table *tbl)
135136
}
136137
}
137138

139+
static int
140+
hash_foreach_replace_key(st_data_t key, st_data_t value, st_data_t argp, int error)
141+
{
142+
if (rb_gc_location((VALUE)key) != (VALUE)key) {
143+
return ST_REPLACE;
144+
}
145+
return ST_CONTINUE;
146+
}
147+
148+
static int
149+
hash_replace_ref_key(st_data_t *key, st_data_t *value, st_data_t argp, int existing)
150+
{
151+
*key = rb_gc_location((VALUE)*key);
152+
153+
return ST_CONTINUE;
154+
}
155+
156+
static void
157+
gc_ref_update_table_keys_only(st_table *tbl)
158+
{
159+
if (!tbl || tbl->num_entries == 0) return;
160+
161+
// FIXME: this certainly isn't correct. If a key moved, we need to re-hash.
162+
if (st_foreach_with_replace(tbl, hash_foreach_replace_key, hash_replace_ref_key, 0)) {
163+
rb_raise(rb_eRuntimeError, "hash modified during iteration");
164+
}
165+
}
166+
138167
static int
139168
gc_mark_tbl_no_pin_i(st_data_t key, st_data_t value, st_data_t data)
140169
{

0 commit comments

Comments
 (0)