Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b44fb12

Browse files
committed
Implement rich comparison for _sre.SRE_Pattern
Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created by re.compile(), become comparable (only x==y and x!=y operators). This change should fix the issue #18383: don't duplicate warning filters when the warnings module is reloaded (thing usually only done in unit tests).
1 parent a2f7ee8 commit b44fb12

3 files changed

Lines changed: 118 additions & 9 deletions

File tree

Lib/test/test_re.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,13 @@
33
import io
44
import locale
55
import re
6-
from re import Scanner
76
import sre_compile
8-
import sys
97
import string
8+
import sys
109
import traceback
1110
import unittest
11+
import warnings
12+
from re import Scanner
1213
from weakref import proxy
1314

1415
# Misc tests from Tim Peters' re.doc
@@ -1777,6 +1778,48 @@ def test_enum(self):
17771778
self.assertIn('ASCII', str(re.A))
17781779
self.assertIn('DOTALL', str(re.S))
17791780

1781+
def test_pattern_compare(self):
1782+
pattern1 = re.compile('abc', re.IGNORECASE)
1783+
1784+
# equal
1785+
re.purge()
1786+
pattern2 = re.compile('abc', re.IGNORECASE)
1787+
self.assertEqual(hash(pattern2), hash(pattern1))
1788+
self.assertEqual(pattern2, pattern1)
1789+
1790+
# not equal: different pattern
1791+
re.purge()
1792+
pattern3 = re.compile('XYZ', re.IGNORECASE)
1793+
# Don't test hash(pattern3) != hash(pattern1) because there is no
1794+
# warranty that hash values are different
1795+
self.assertNotEqual(pattern3, pattern1)
1796+
1797+
# not equal: different flag (flags=0)
1798+
re.purge()
1799+
pattern4 = re.compile('abc')
1800+
self.assertNotEqual(pattern4, pattern1)
1801+
1802+
# only == and != comparison operators are supported
1803+
with self.assertRaises(TypeError):
1804+
pattern1 < pattern2
1805+
1806+
def test_pattern_compare_bytes(self):
1807+
pattern1 = re.compile(b'abc')
1808+
1809+
# equal: test bytes patterns
1810+
re.purge()
1811+
pattern2 = re.compile(b'abc')
1812+
self.assertEqual(hash(pattern2), hash(pattern1))
1813+
self.assertEqual(pattern2, pattern1)
1814+
1815+
# not equal: pattern of a different types (str vs bytes),
1816+
# comparison must not raise a BytesWarning
1817+
re.purge()
1818+
pattern3 = re.compile('abc')
1819+
with warnings.catch_warnings():
1820+
warnings.simplefilter('error', BytesWarning)
1821+
self.assertNotEqual(pattern3, pattern1)
1822+
17801823

17811824
class PatternReprTests(unittest.TestCase):
17821825
def check(self, pattern, expected):

Misc/NEWS

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,11 @@ Core and Builtins
4242
Library
4343
-------
4444

45+
- Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created
46+
by re.compile(), become comparable (only x==y and x!=y operators). This
47+
change should fix the issue #18383: don't duplicate warning filters when the
48+
warnings module is reloaded (thing usually only done in unit tests).
49+
4550
- Issue #20572: The subprocess.Popen.wait method's undocumented
4651
endtime parameter now raises a DeprecationWarning.
4752

@@ -77,7 +82,7 @@ Library
7782

7883
- Issue #28703: Fix asyncio.iscoroutinefunction to handle Mock objects.
7984

80-
- Issue #28704: Fix create_unix_server to support Path-like objects
85+
- Issue #28704: Fix create_unix_server to support Path-like objects
8186
(PEP 519).
8287

8388
- Issue #28720: Add collections.abc.AsyncGenerator.

Modules/_sre.c

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,14 +1506,12 @@ _sre_compile_impl(PyObject *module, PyObject *pattern, int flags,
15061506

15071507
self->groups = groups;
15081508

1509-
Py_XINCREF(groupindex);
1509+
Py_INCREF(groupindex);
15101510
self->groupindex = groupindex;
15111511

1512-
Py_XINCREF(indexgroup);
1512+
Py_INCREF(indexgroup);
15131513
self->indexgroup = indexgroup;
15141514

1515-
self->weakreflist = NULL;
1516-
15171515
if (!_validate(self)) {
15181516
Py_DECREF(self);
15191517
return NULL;
@@ -2649,6 +2647,69 @@ pattern_scanner(PatternObject *self, PyObject *string, Py_ssize_t pos, Py_ssize_
26492647
return (PyObject*) scanner;
26502648
}
26512649

2650+
static Py_hash_t
2651+
pattern_hash(PatternObject *self)
2652+
{
2653+
Py_hash_t hash, hash2;
2654+
2655+
hash = PyObject_Hash(self->pattern);
2656+
if (hash == -1) {
2657+
return -1;
2658+
}
2659+
2660+
hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
2661+
hash ^= hash2;
2662+
2663+
hash ^= self->flags;
2664+
hash ^= self->isbytes;
2665+
hash ^= self->codesize;
2666+
2667+
if (hash == -1) {
2668+
hash = -2;
2669+
}
2670+
return hash;
2671+
}
2672+
2673+
static PyObject*
2674+
pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
2675+
{
2676+
PatternObject *left, *right;
2677+
int cmp;
2678+
2679+
if (op != Py_EQ && op != Py_NE) {
2680+
Py_RETURN_NOTIMPLEMENTED;
2681+
}
2682+
2683+
if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
2684+
Py_RETURN_NOTIMPLEMENTED;
2685+
}
2686+
left = (PatternObject *)lefto;
2687+
right = (PatternObject *)righto;
2688+
2689+
cmp = (left->flags == right->flags
2690+
&& left->isbytes == right->isbytes
2691+
&& left->codesize && right->codesize);
2692+
if (cmp) {
2693+
/* Compare the code and the pattern because the same pattern can
2694+
produce different codes depending on the locale used to compile the
2695+
pattern when the re.LOCALE flag is used. Don't compare groups,
2696+
indexgroup nor groupindex: they are derivated from the pattern. */
2697+
cmp = (memcmp(left->code, right->code,
2698+
sizeof(left->code[0]) * left->codesize) == 0);
2699+
}
2700+
if (cmp) {
2701+
cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
2702+
Py_EQ);
2703+
if (cmp < 0) {
2704+
return NULL;
2705+
}
2706+
}
2707+
if (op == Py_NE) {
2708+
cmp = !cmp;
2709+
}
2710+
return PyBool_FromLong(cmp);
2711+
}
2712+
26522713
#include "clinic/_sre.c.h"
26532714

26542715
static PyMethodDef pattern_methods[] = {
@@ -2693,7 +2754,7 @@ static PyTypeObject Pattern_Type = {
26932754
0, /* tp_as_number */
26942755
0, /* tp_as_sequence */
26952756
0, /* tp_as_mapping */
2696-
0, /* tp_hash */
2757+
(hashfunc)pattern_hash, /* tp_hash */
26972758
0, /* tp_call */
26982759
0, /* tp_str */
26992760
0, /* tp_getattro */
@@ -2703,7 +2764,7 @@ static PyTypeObject Pattern_Type = {
27032764
pattern_doc, /* tp_doc */
27042765
0, /* tp_traverse */
27052766
0, /* tp_clear */
2706-
0, /* tp_richcompare */
2767+
pattern_richcompare, /* tp_richcompare */
27072768
offsetof(PatternObject, weakreflist), /* tp_weaklistoffset */
27082769
0, /* tp_iter */
27092770
0, /* tp_iternext */

0 commit comments

Comments
 (0)