Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions gc.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
#include "internal/object.h"
#include "internal/proc.h"
#include "internal/rational.h"
#include "internal/re.h"
#include "internal/sanitizers.h"
#include "internal/struct.h"
#include "internal/symbol.h"
Expand Down Expand Up @@ -2781,11 +2782,8 @@ obj_free(rb_objspace_t *objspace, VALUE obj)
}
break;
case T_REGEXP:
if (RANY(obj)->as.regexp.ptr) {
onig_free(RANY(obj)->as.regexp.ptr);
RB_DEBUG_COUNTER_INC(obj_regexp_ptr);
}
break;
rb_reg_free(obj);
break;
case T_DATA:
if (DATA_PTR(obj)) {
int free_immediately = FALSE;
Expand Down
1 change: 1 addition & 0 deletions internal/re.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

/* re.c */
VALUE rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline);
void rb_reg_free(VALUE re);
VALUE rb_reg_check_preprocess(VALUE);
long rb_reg_search0(VALUE, VALUE, long, int, int);
VALUE rb_reg_match_p(VALUE re, VALUE str, long pos);
Expand Down
1 change: 1 addition & 0 deletions internal/vm.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ int rb_vm_check_optimizable_mid(VALUE mid);
VALUE rb_yield_refine_block(VALUE refinement, VALUE refinements);
MJIT_STATIC VALUE ruby_vm_special_exception_copy(VALUE);
PUREFUNC(st_table *rb_vm_fstring_table(void));
PUREFUNC(st_table *rb_vm_regexp_literals_table(void));

MJIT_SYMBOL_EXPORT_BEGIN
VALUE vm_exec(struct rb_execution_context_struct *, int); /* used in JIT-ed code */
Expand Down
136 changes: 106 additions & 30 deletions re.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,15 @@

#include <ctype.h>

#include "debug_counter.h"
#include "encindex.h"
#include "gc.h"
#include "internal.h"
#include "internal/error.h"
#include "internal/hash.h"
#include "internal/imemo.h"
#include "internal/re.h"
#include "internal/vm.h"
#include "regint.h"
#include "ruby/encoding.h"
#include "ruby/re.h"
Expand Down Expand Up @@ -752,6 +755,7 @@ rb_reg_casefold_p(VALUE re)
static VALUE
rb_reg_options_m(VALUE re)
{
rb_reg_check(re);
int options = rb_reg_options(re);
return INT2NUM(options);
}
Expand Down Expand Up @@ -2956,33 +2960,41 @@ rb_reg_new(const char *s, long len, int options)
return rb_enc_reg_new(s, len, rb_ascii8bit_encoding(), options);
}

VALUE
rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
static int
reg_lit_update_callback(st_data_t *key, st_data_t *value, st_data_t arg, int existing)
{
VALUE re = rb_reg_alloc();
onig_errmsg_buffer err = "";
VALUE *new_re = (VALUE *)arg;
VALUE re = (VALUE)*key;

if (!str) str = rb_str_new(0,0);
if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
rb_set_errinfo(rb_reg_error_desc(str, options, err));
return Qnil;
if (existing) {
/* because of lazy sweep, str may be unmarked already and swept
* at next time */

if (rb_objspace_garbage_object_p(re) || rb_objspace_garbage_object_p(RREGEXP_SRC(re))) {
*new_re = Qundef;
return ST_DELETE;
}

*new_re = re;
return ST_STOP;
} else {
FL_SET(re, REG_LITERAL);
rb_obj_freeze(re);

*key = *new_re = re;
*value = re;
return ST_CONTINUE;
}
FL_SET(re, REG_LITERAL);
rb_obj_freeze(re);
return re;
}

static VALUE reg_cache;

VALUE
rb_reg_regcomp(VALUE str)
static st_index_t
reg_lit_hash(VALUE re)
{
if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
&& memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
return reg_cache;

return reg_cache = rb_reg_new_str(str, 0);
st_index_t hashval;
hashval = rb_reg_options(re);
hashval ^= ENCODING_GET(re);
hashval = rb_hash_uint(hashval, rb_str_hash(RREGEXP_SRC(re)));
return rb_hash_end(hashval);
}

static st_index_t reg_hash(VALUE re);
Expand Down Expand Up @@ -3013,6 +3025,78 @@ reg_hash(VALUE re)
return rb_hash_end(hashval);
}

VALUE
rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
{
VALUE re, ret;
onig_errmsg_buffer err = "";
st_table *regexp_literals = rb_vm_regexp_literals_table();

if (!str) str = rb_str_new(0,0);
re = rb_reg_alloc();
if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
rb_set_errinfo(rb_reg_error_desc(str, options, err));
return Qnil;
}

do {
ret = re;
st_update(regexp_literals, (st_data_t)re,
reg_lit_update_callback, (st_data_t)&ret);
} while (ret == Qundef);
return ret;
}

void
rb_reg_free(VALUE re) {
if (FL_TEST(re, REG_LITERAL)) {
if (RREGEXP_SRC_LEN(re) < 0 || RREGEXP_SRC_LEN(re) > 500) {
// FIXME: For some reason unknown the gc goes over weird corrupted regexp;
// - They have the REG_LITERAL flag
// - The RREGEXP_SRC_LEN and RREGEXP_SRC_PTR is random garbage, which cause a segfault on access.
printf("WTF!\n");
} else if (rb_objspace_garbage_object_p(re) || rb_objspace_garbage_object_p(RREGEXP_SRC(re)) || !RREGEXP_PTR(re) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
// TODO: cleanup dead refs with foreach?
} else {
st_data_t regexp_literal = (st_data_t)re;
st_delete(rb_vm_regexp_literals_table(), &regexp_literal, NULL);
}
}

onig_free(RREGEXP_PTR(re));
RB_DEBUG_COUNTER_INC(obj_regexp_ptr);
}

static VALUE reg_cache;

VALUE
rb_reg_regcomp(VALUE str)
{
if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
&& memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
return reg_cache;
return reg_cache = rb_reg_new_str(str, 0);
}

static int
reg_cmp(VALUE re1, VALUE re2)
{
if (re1 == re2) return 0;

if (!RB_TYPE_P(re2, T_REGEXP)) return 1;
if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return 1;
if (RREGEXP_PTR(re1)->options != RREGEXP_PTR(re2)->options) return 1;
if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return 1;
if (ENCODING_GET(re1) != ENCODING_GET(re2)) return 1;

return memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1));
}

const struct st_hash_type rb_regexp_literal_hash_type = {
reg_cmp,
reg_lit_hash,
};

/*
* call-seq:
Expand All @@ -3035,14 +3119,7 @@ rb_reg_equal(VALUE re1, VALUE re2)
if (re1 == re2) return Qtrue;
if (!RB_TYPE_P(re2, T_REGEXP)) return Qfalse;
rb_reg_check(re1); rb_reg_check(re2);
if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
if (RREGEXP_PTR(re1)->options != RREGEXP_PTR(re2)->options) return Qfalse;
if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
if (memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0) {
return Qtrue;
}
return Qfalse;
return reg_cmp(re1, re2) == 0 ? Qtrue : Qfalse;
}

/*
Expand Down Expand Up @@ -3588,7 +3665,6 @@ rb_reg_options(VALUE re)
{
int options;

rb_reg_check(re);
options = RREGEXP_PTR(re)->options & ARG_REG_OPTION_MASK;
if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
Expand Down
7 changes: 7 additions & 0 deletions test/ruby/test_regexp.rb
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ def test_assert_normal_exit
Regexp.union("a", "a")
end

def test_literal_deduplication
assert_same(/a/, /a/)
refute_same(/a/, /a/m)
refute_same(/a/, Regexp.new('a'))
assert_equal(/a/, Regexp.new('a'))
end

def test_to_s
assert_equal '(?-mix:\x00)', Regexp.new("\0").to_s

Expand Down
14 changes: 13 additions & 1 deletion vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -2244,6 +2244,7 @@ rb_vm_update_references(void *ptr)
if (ptr) {
rb_vm_t *vm = ptr;
rb_gc_update_tbl_refs(vm->frozen_strings);
rb_gc_update_tbl_refs(vm->regexp_literals);
}
}

Expand Down Expand Up @@ -2298,7 +2299,6 @@ rb_vm_mark(void *ptr)
rb_hook_list_mark(&vm->global_hooks);

rb_gc_mark_values(RUBY_NSIG, vm->trap_list.cmd);

mjit_mark();
}

Expand Down Expand Up @@ -2357,6 +2357,10 @@ ruby_vm_destruct(rb_vm_t *vm)
st_free_table(vm->frozen_strings);
vm->frozen_strings = 0;
}
if (vm->regexp_literals) {
st_free_table(vm->regexp_literals);
vm->regexp_literals = 0;
}
rb_vm_gvl_destroy(vm);
RB_ALTSTACK_FREE(vm->main_altstack);
if (objspace) {
Expand Down Expand Up @@ -3323,6 +3327,7 @@ rb_vm_set_progname(VALUE filename)
}

extern const struct st_hash_type rb_fstring_hash_type;
extern const struct st_hash_type rb_regexp_literal_hash_type;

void
Init_BareVM(void)
Expand Down Expand Up @@ -3358,6 +3363,7 @@ Init_vm_objects(void)
vm->mark_object_ary = rb_ary_tmp_new(128);
vm->loading_table = st_init_strtable();
vm->frozen_strings = st_init_table_with_size(&rb_fstring_hash_type, 10000);
vm->regexp_literals = st_init_table_with_size(&rb_regexp_literal_hash_type, 1000);

rb_objspace_gc_enable(vm->objspace);

Expand Down Expand Up @@ -3425,6 +3431,12 @@ rb_vm_fstring_table(void)
return GET_VM()->frozen_strings;
}

st_table *
rb_vm_regexp_literals_table(void)
{
return GET_VM()->regexp_literals;
}

#if VM_COLLECT_USAGE_DETAILS

#define HASH_ASET(h, k, v) rb_hash_aset((h), (st_data_t)(k), (st_data_t)(v))
Expand Down
1 change: 1 addition & 0 deletions vm_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,7 @@ typedef struct rb_vm_struct {

VALUE *defined_strings;
st_table *frozen_strings;
st_table *regexp_literals;

const struct rb_builtin_function *builtin_function_table;
int builtin_inline_index;
Expand Down