Thanks to visit codestin.com
Credit goes to github.com

Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions benchmarks/src/regex_search.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,10 @@ BENCHMARK_CAPTURE(bm_lorem_search, "bibe", "bibe")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)", "(bibe)")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)+", "(bibe)+")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, "(?:bibe)+", "(?:bibe)+")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, R"(\bbibe)", R"(\bbibe)")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, R"(\Bibe)", R"(\Bibe)")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, R"((?=....)bibe)", R"((?=....)bibe)")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, R"((?=bibe)....)", R"((?=bibe)....)")->Arg(2)->Arg(3)->Arg(4);
BENCHMARK_CAPTURE(bm_lorem_search, R"((?!lorem)bibe)", R"((?!lorem)bibe)")->Arg(2)->Arg(3)->Arg(4);

BENCHMARK_MAIN();
51 changes: 45 additions & 6 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1781,7 +1781,7 @@ public:
return true;
}

_BidIt _Skip(_BidIt, _BidIt, _Node_base* = nullptr);
_BidIt _Skip(_BidIt, _BidIt, _Node_base* = nullptr, unsigned int _Recursion_depth = 0U);

private:
_Tgt_state_t<_It> _Tgt_state;
Expand Down Expand Up @@ -4107,12 +4107,14 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
}

template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
_BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg, _BidIt _Last, _Node_base* _Node_arg) {
_BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(
_BidIt _First_arg, _BidIt _Last, _Node_base* _Node_arg, unsigned int _Recursion_depth) {
// skip until possible match
// assumes --_First_arg is valid
static constexpr char _Line_terminators_char[] = {static_cast<char>(_Meta_cr), static_cast<char>(_Meta_nl)};
static constexpr wchar_t _Line_terminators_wchar_t[] = {static_cast<wchar_t>(_Meta_cr),
static_cast<wchar_t>(_Meta_nl), static_cast<wchar_t>(_Meta_ls), static_cast<wchar_t>(_Meta_ps)};
constexpr unsigned int _Max_recursion_depth = 50U;
_Node_base* _Nx = _Node_arg ? _Node_arg : _Rep;

while (_First_arg != _Last && _Nx) { // check current node
Expand Down Expand Up @@ -4227,17 +4229,54 @@ _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg
break;
}

case _N_assert:
{
if (_Recursion_depth >= _Max_recursion_depth) {
return _First_arg;
}

_Node_assert* _Node = static_cast<_Node_assert*>(_Nx);
_First_arg = _Skip(_First_arg, _Last, _Node->_Child);
_BidIt _Next;
for (;;) {
_Next = _Skip(_First_arg, _Last, _Node->_Next, _Recursion_depth + 1U);
if (_Next == _First_arg) {
return _First_arg;
}

_First_arg = _Skip(_Next, _Last, _Node->_Child, _Recursion_depth + 1U);
if (_Next == _First_arg) {
return _First_arg;
}
}
}

case _N_neg_assert:
// we skip the negated assertion body and continue examining the rest of the regex
break;

case _N_wbound:
{
bool _Negated = (_Nx->_Flags & _Fl_negate) != 0;
bool _Prev_word = _STD _Is_word(*_STD _Prev_iter(_First_arg));
for (; _First_arg != _Last; ++_First_arg) {
bool _Next_word = _STD _Is_word(*_First_arg);
if (_Negated == (_Next_word == _Prev_word)) {
break;
}
_Prev_word = _Next_word;
}
return _First_arg;
}

case _N_begin:
case _N_endif:
break;

case _N_end:
case _N_none:
case _N_wbound:
case _N_dot:
case _N_assert:
case _N_neg_assert:
case _N_back:
case _N_endif:
case _N_end_rep:
default:
return _First_arg;
Expand Down
16 changes: 14 additions & 2 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,8 +234,8 @@ void test_VSO_167760_nested_quantifiers_should_not_infinite_loop() {
void test_DDB_153116_replacements() {
g_regexTester.should_replace_to("abc def def ghi", "^", "X", format_default, "Xabc def def ghi");
g_regexTester.should_replace_to("abc def def ghi", "$", "X", format_default, "abc def def ghiX");
g_regexTester.should_replace_to("abc def def ghi", "\\b", "X", format_default, "XabcX XdefX XdefX XghiX");
g_regexTester.should_replace_to("abc def def ghi", "\\B", "X", format_default, "aXbXc dXeXf dXeXf gXhXi");
g_regexTester.should_replace_to("abc def def ghi", "\\b", "X", format_default, "XabcX XdefX XdefX XghiX");
g_regexTester.should_replace_to("abc def def ghi", "\\B", "X", format_default, "aXbXc X dXeXf dXeXf X gXhXi");
g_regexTester.should_replace_to("abc def def ghi", "(?=ef)", "X", format_default, "abc dXef dXef ghi");
g_regexTester.should_replace_to("abc def def ghi", "(?!ef)", "X", format_default, "XaXbXcX XdeXfX XdeXfX XgXhXiX");
}
Expand Down Expand Up @@ -2092,6 +2092,17 @@ void test_gh_5509() {
}
}

void test_gh_5576() {
// GH-5576 sped up searches for regexes that start with assertions
// by extending the skip heuristic in the matcher.
// We test here that the skip heuristic is correct
// for positive and negative lookahead assertions.
g_regexTester.should_replace_to("AbGweEfFllLLlffflElF", "(?=[[:lower:]][[:upper:]])[fFlL]{2}", R"(X$&)",
match_default, "AbGweEXfFlXlLLlffflEXlF");
g_regexTester.should_replace_to("AbGweEfFllLLlffflElF", "(?![[:upper:]]|[[:lower:]]{2})[fFlL]{2}", R"(X$&)",
match_default, "AbGweEXfFlXlLLlffflEXlF");
}

int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
Expand Down Expand Up @@ -2141,6 +2152,7 @@ int main() {
test_gh_5377();
test_gh_5490();
test_gh_5509();
test_gh_5576();

return g_regexTester.result();
}