Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 95bf939

Browse files
committed
SF bug #422121 Insecurities in dict comparison.
Fixed a half dozen ways in which general dict comparison could crash Python (even cause Win98SE to reboot) in the presence of kay and/or value comparison routines that mutate the dict during dict comparison. Bugfix candidate.
1 parent 66aaaae commit 95bf939

4 files changed

Lines changed: 239 additions & 34 deletions

File tree

Lib/test/output/test_mutants

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
test_mutants

Lib/test/test_mutants.py

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
from test_support import verbose
2+
import random
3+
4+
# From SF bug #422121: Insecurities in dict comparison.
5+
6+
# Safety of code doing comparisons has been an historical Python waak spot.
7+
# The problem is that comparison of structures in written in C *naturally*
8+
# wants to hold on to things like the size of the container, or "the
9+
# biggest" containee so far, across a traversal of the container; but
10+
# code to do containee comparisons can call back into Python and mutate
11+
# the container in arbitrary ways while the C loop is in midstream. If the
12+
# C code isn't extremely paranoid about digging things out of memory on
13+
# each trip, and artificially boosting refcounts for the duration, anything
14+
# from infinite loops to OS crashes can result (yes, I use Windows <wink>).
15+
#
16+
# The other problem is that code designed to provoke a weakness is usually
17+
# white-box code, and so catches only the particular vulnerabilities the
18+
# author knew to protect against. For example, Python's list.sort() code
19+
# went thru many iterations as one "new" vulnerability after another was
20+
# discovered.
21+
#
22+
# So the dict comparison test here uses a black-box approach instead,
23+
# generating dicts of various sizes at random, and performing random
24+
# mutations on them at random times. This proved very effective,
25+
# triggering at least six distinct failure modes the first 20 times I
26+
# ran it. Indeed, at the start, the driver never got beyond 6 iterations
27+
# before the test died.
28+
29+
# The dicts are global to make it easy to mutate tham from within functions.
30+
dict1 = {}
31+
dict2 = {}
32+
33+
# The current set of keys in dict1 and dict2. These are materialized as
34+
# lists to make it easy to pick a dict key at random.
35+
dict1keys = []
36+
dict2keys = []
37+
38+
# Global flag telling maybe_mutate() wether to *consider* mutating.
39+
mutate = 0
40+
41+
# If global mutate is true, consider mutating a dict. May or may not
42+
# mutate a dict even if mutate is true. If it does decide to mutate a
43+
# dict, it picks one of {dict1, dict2} at random, and deletes a random
44+
# entry from it.
45+
46+
def maybe_mutate():
47+
if not mutate:
48+
return
49+
if random.random() < 0.5:
50+
return
51+
if random.random() < 0.5:
52+
target, keys = dict1, dict1keys
53+
else:
54+
target, keys = dict2, dict2keys
55+
if keys:
56+
i = random.randrange(len(keys))
57+
key = keys[i]
58+
del target[key]
59+
# CAUTION: don't use keys.remove(key) here. Or do <wink>. The
60+
# point is that .remove() would trigger more comparisons, and so
61+
# also more calls to this routine. We're mutating often enough
62+
# without that.
63+
del keys[i]
64+
65+
# A horrid class that triggers random mutations of dict1 and dict2 when
66+
# instances are compared.
67+
68+
class Horrid:
69+
def __init__(self, i):
70+
# Comparison outcomes are determined by the value of i.
71+
self.i = i
72+
73+
# An artificial hashcode is selected at random so that we don't
74+
# have any systematic relationship between comparsion outcomes
75+
# (based on self.i and other.i) and relative position within the
76+
# hawh vector (based on hashcode).
77+
self.hashcode = random.randrange(1000000000)
78+
79+
def __hash__(self):
80+
return self.hashcode
81+
82+
def __cmp__(self, other):
83+
maybe_mutate() # The point of the test.
84+
return cmp(self.i, other.i)
85+
86+
def __repr__(self):
87+
return "Horrid(%d)" % self.i
88+
89+
# Fill dict d with numentries (Horrid(i), Horrid(j)) key-value pairs,
90+
# where i and j are selected at random from the candidates list.
91+
# Return d.keys() after filling.
92+
93+
def fill_dict(d, candidates, numentries):
94+
d.clear()
95+
for i in xrange(numentries):
96+
d[Horrid(random.choice(candidates))] = \
97+
Horrid(random.choice(candidates))
98+
return d.keys()
99+
100+
# Test one pair of randomly generated dicts, each with n entries.
101+
# Note that dict comparison is trivial if they don't have the same number
102+
# of entires (then the "shorter" dict is instantly considered to be the
103+
# smaller one, without even looking at the entries).
104+
105+
def test_one(n):
106+
global mutate, dict1, dict2, dict1keys, dict2keys
107+
108+
# Fill the dicts without mutating them.
109+
mutate = 0
110+
dict1keys = fill_dict(dict1, range(n), n)
111+
dict2keys = fill_dict(dict2, range(n), n)
112+
113+
# Enable mutation, then compare the dicts so long as they have the
114+
# same size.
115+
mutate = 1
116+
if verbose:
117+
print "trying w/ lengths", len(dict1), len(dict2),
118+
while dict1 and len(dict1) == len(dict2):
119+
if verbose:
120+
print ".",
121+
c = cmp(dict1, dict2)
122+
if verbose:
123+
print
124+
125+
# Run test_one n times. At the start (before the bugs were fixed), 20
126+
# consecutive runs of this test each blew up on or before the sixth time
127+
# test_one was run. So n doesn't have to be large to get an interesting
128+
# test.
129+
# OTOH, calling with large n is also interesting, to ensure that the fixed
130+
# code doesn't hold on to refcounts *too* long (in which case memory would
131+
# leak).
132+
133+
def test(n):
134+
for i in xrange(n):
135+
test_one(random.randrange(1, 100))
136+
137+
# See last comment block for clues about good values for n.
138+
test(100)

Misc/NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@ Core
4747
- Comparing dictionary objects via == and != is faster, and now works even
4848
if the keys and values don't support comparisons other than ==.
4949

50+
- Comparing dictionaries in ways other than == and != is slower: there were
51+
insecurities in the dict comparison implementation that could cause Python
52+
to crash if the element comparison routines for the dict keys and/or
53+
values mutated the dicts. Making the code bulletproof slowed it down.
54+
5055

5156
What's New in Python 2.1 (final)?
5257
=================================

Objects/dictobject.c

Lines changed: 95 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -981,43 +981,83 @@ PyDict_Items(PyObject *mp)
981981

982982
/* Subroutine which returns the smallest key in a for which b's value
983983
is different or absent. The value is returned too, through the
984-
pval argument. No reference counts are incremented. */
984+
pval argument. Both are NULL if no key in a is found for which b's status
985+
differs. The refcounts on (and only on) non-NULL *pval and function return
986+
values must be decremented by the caller (characterize() increments them
987+
to ensure that mutating comparison and PyDict_GetItem calls can't delete
988+
them before the caller is done looking at them). */
985989

986990
static PyObject *
987991
characterize(dictobject *a, dictobject *b, PyObject **pval)
988992
{
989-
PyObject *diff = NULL;
993+
PyObject *akey = NULL; /* smallest key in a s.t. a[akey] != b[akey] */
994+
PyObject *aval = NULL; /* a[akey] */
990995
int i, cmp;
991996

992-
*pval = NULL;
993997
for (i = 0; i < a->ma_size; i++) {
994-
if (a->ma_table[i].me_value != NULL) {
995-
PyObject *key = a->ma_table[i].me_key;
996-
PyObject *aval, *bval;
997-
if (diff != NULL) {
998-
cmp = PyObject_RichCompareBool(diff, key, Py_LT);
999-
if (cmp < 0)
1000-
return NULL;
1001-
if (cmp > 0)
1002-
continue;
998+
PyObject *thiskey, *thisaval, *thisbval;
999+
if (a->ma_table[i].me_value == NULL)
1000+
continue;
1001+
thiskey = a->ma_table[i].me_key;
1002+
Py_INCREF(thiskey); /* keep alive across compares */
1003+
if (akey != NULL) {
1004+
cmp = PyObject_RichCompareBool(akey, thiskey, Py_LT);
1005+
if (cmp < 0) {
1006+
Py_DECREF(thiskey);
1007+
goto Fail;
10031008
}
1004-
aval = a->ma_table[i].me_value;
1005-
bval = PyDict_GetItem((PyObject *)b, key);
1006-
if (bval == NULL)
1007-
cmp = 0;
1008-
else {
1009-
cmp = PyObject_RichCompareBool(aval, bval, Py_EQ);
1010-
if (cmp < 0)
1011-
return NULL;
1012-
}
1013-
if (cmp == 0)
1009+
if (cmp > 0 ||
1010+
i >= a->ma_size ||
1011+
a->ma_table[i].me_value == NULL)
10141012
{
1015-
diff = key;
1016-
*pval = aval;
1013+
/* Not the *smallest* a key; or maybe it is
1014+
* but the compare shrunk the dict so we can't
1015+
* find its associated value anymore; or
1016+
* maybe it is but the compare deleted the
1017+
* a[thiskey] entry.
1018+
*/
1019+
Py_DECREF(thiskey);
1020+
continue;
10171021
}
10181022
}
1023+
1024+
/* Compare a[thiskey] to b[thiskey]; cmp <- true iff equal. */
1025+
thisaval = a->ma_table[i].me_value;
1026+
assert(thisaval);
1027+
Py_INCREF(thisaval); /* keep alive */
1028+
thisbval = PyDict_GetItem((PyObject *)b, thiskey);
1029+
if (thisbval == NULL)
1030+
cmp = 0;
1031+
else {
1032+
/* both dicts have thiskey: same values? */
1033+
cmp = PyObject_RichCompareBool(
1034+
thisaval, thisbval, Py_EQ);
1035+
if (cmp < 0) {
1036+
Py_DECREF(thiskey);
1037+
Py_DECREF(thisaval);
1038+
goto Fail;
1039+
}
1040+
}
1041+
if (cmp == 0) {
1042+
/* New winner. */
1043+
Py_XDECREF(akey);
1044+
Py_XDECREF(aval);
1045+
akey = thiskey;
1046+
aval = thisaval;
1047+
}
1048+
else {
1049+
Py_DECREF(thiskey);
1050+
Py_DECREF(thisaval);
1051+
}
10191052
}
1020-
return diff;
1053+
*pval = aval;
1054+
return akey;
1055+
1056+
Fail:
1057+
Py_XDECREF(akey);
1058+
Py_XDECREF(aval);
1059+
*pval = NULL;
1060+
return NULL;
10211061
}
10221062

10231063
static int
@@ -1031,19 +1071,40 @@ dict_compare(dictobject *a, dictobject *b)
10311071
return -1; /* a is shorter */
10321072
else if (a->ma_used > b->ma_used)
10331073
return 1; /* b is shorter */
1074+
10341075
/* Same length -- check all keys */
1076+
bdiff = bval = NULL;
10351077
adiff = characterize(a, b, &aval);
1036-
if (adiff == NULL && PyErr_Occurred())
1037-
return -1;
1038-
if (adiff == NULL)
1039-
return 0; /* a is a subset with the same length */
1078+
if (adiff == NULL) {
1079+
assert(!aval);
1080+
/* Either an error, or a is a subst with the same length so
1081+
* must be equal.
1082+
*/
1083+
res = PyErr_Occurred() ? -1 : 0;
1084+
goto Finished;
1085+
}
10401086
bdiff = characterize(b, a, &bval);
1041-
if (bdiff == NULL && PyErr_Occurred())
1042-
return -1;
1043-
/* bdiff == NULL would be impossible now */
1044-
res = PyObject_Compare(adiff, bdiff);
1045-
if (res == 0)
1087+
if (bdiff == NULL && PyErr_Occurred()) {
1088+
assert(!bval);
1089+
res = -1;
1090+
goto Finished;
1091+
}
1092+
res = 0;
1093+
if (bdiff) {
1094+
/* bdiff == NULL "should be" impossible now, but perhaps
1095+
* the last comparison done by the characterize() on a had
1096+
* the side effect of making the dicts equal!
1097+
*/
1098+
res = PyObject_Compare(adiff, bdiff);
1099+
}
1100+
if (res == 0 && bval != NULL)
10461101
res = PyObject_Compare(aval, bval);
1102+
1103+
Finished:
1104+
Py_XDECREF(adiff);
1105+
Py_XDECREF(bdiff);
1106+
Py_XDECREF(aval);
1107+
Py_XDECREF(bval);
10471108
return res;
10481109
}
10491110

0 commit comments

Comments
 (0)