Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 190d79e

Browse files
committed
Merged revisions 60408-60440 via svnmerge from
svn+ssh://[email protected]/python/trunk ........ r60425 | raymond.hettinger | 2008-01-29 20:52:09 +0100 (Tue, 29 Jan 2008) | 1 line CallMethod is faster with a NULL third-argument than with an empty format string. ........ r60431 | raymond.hettinger | 2008-01-30 01:01:07 +0100 (Wed, 30 Jan 2008) | 1 line Add isdisjoint() to the Set/MutableSet ABCs. ........ r60432 | raymond.hettinger | 2008-01-30 01:08:31 +0100 (Wed, 30 Jan 2008) | 1 line MutableSets support a remove() method. ........ r60433 | raymond.hettinger | 2008-01-30 01:51:58 +0100 (Wed, 30 Jan 2008) | 1 line Demonstrate new except/as syntax. ........ r60440 | christian.heimes | 2008-01-30 12:32:37 +0100 (Wed, 30 Jan 2008) | 1 line Patch #1970 by Antoine Pitrou: Speedup unicode whitespace and linebreak detection. The speedup is about 25% for split() (571 / 457 usec) and 35% (175 / 127 usec )for splitlines() ........
1 parent 510711d commit 190d79e

4 files changed

Lines changed: 103 additions & 21 deletions

File tree

Doc/tutorial/errors.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,8 +131,8 @@ the exception (allowing a caller to handle the exception as well)::
131131
f = open('myfile.txt')
132132
s = f.readline()
133133
i = int(s.strip())
134-
except IOError as e:
135-
print("I/O error(%s): %s" % (e.errno, e.strerror))
134+
except IOError as (errno, strerror):
135+
print "I/O error(%s): %s" % (errno, strerror)
136136
except ValueError:
137137
print("Could not convert data to an integer.")
138138
except:

Include/unicodeobject.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,14 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
358358

359359
#else
360360

361-
#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
361+
/* Since splitting on whitespace is an important use case, and whitespace
362+
in most situations is solely ASCII whitespace, we optimize for the common
363+
case by using a quick look-up table with an inlined check.
364+
*/
365+
extern const unsigned char _Py_ascii_whitespace[];
366+
367+
#define Py_UNICODE_ISSPACE(ch) \
368+
((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
362369

363370
#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
364371
#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)

Lib/_abcoll.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,12 @@ def __and__(self, other):
211211
return NotImplemented
212212
return self._from_iterable(value for value in other if value in self)
213213

214+
def isdisjoint(self, other):
215+
for value in other:
216+
if value in self:
217+
return False
218+
return True
219+
214220
def __or__(self, other):
215221
if not isinstance(other, Iterable):
216222
return NotImplemented
@@ -278,6 +284,12 @@ def discard(self, value):
278284
"""Return True if it was deleted, False if not there."""
279285
raise NotImplementedError
280286

287+
def remove(self, value):
288+
"""Remove an element. If not a member, raise a KeyError."""
289+
if value not in self:
290+
raise KeyError(value)
291+
self.discard(value)
292+
281293
def pop(self):
282294
"""Return the popped value. Raise KeyError if empty."""
283295
it = iter(self)

Objects/unicodeobject.c

Lines changed: 81 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,64 @@ static PyUnicodeObject *unicode_latin1[256];
125125
*/
126126
static const char unicode_default_encoding[] = "utf-8";
127127

128+
/* Fast detection of the most frequent whitespace characters */
129+
const unsigned char _Py_ascii_whitespace[] = {
130+
0, 0, 0, 0, 0, 0, 0, 0,
131+
// case 0x0009: /* HORIZONTAL TABULATION */
132+
// case 0x000A: /* LINE FEED */
133+
// case 0x000B: /* VERTICAL TABULATION */
134+
// case 0x000C: /* FORM FEED */
135+
// case 0x000D: /* CARRIAGE RETURN */
136+
0, 1, 1, 1, 1, 1, 0, 0,
137+
0, 0, 0, 0, 0, 0, 0, 0,
138+
// case 0x001C: /* FILE SEPARATOR */
139+
// case 0x001D: /* GROUP SEPARATOR */
140+
// case 0x001E: /* RECORD SEPARATOR */
141+
// case 0x001F: /* UNIT SEPARATOR */
142+
0, 0, 0, 0, 1, 1, 1, 1,
143+
// case 0x0020: /* SPACE */
144+
1, 0, 0, 0, 0, 0, 0, 0,
145+
0, 0, 0, 0, 0, 0, 0, 0,
146+
0, 0, 0, 0, 0, 0, 0, 0,
147+
0, 0, 0, 0, 0, 0, 0, 0,
148+
149+
0, 0, 0, 0, 0, 0, 0, 0,
150+
0, 0, 0, 0, 0, 0, 0, 0,
151+
0, 0, 0, 0, 0, 0, 0, 0,
152+
0, 0, 0, 0, 0, 0, 0, 0,
153+
0, 0, 0, 0, 0, 0, 0, 0,
154+
0, 0, 0, 0, 0, 0, 0, 0,
155+
0, 0, 0, 0, 0, 0, 0, 0,
156+
0, 0, 0, 0, 0, 0, 0, 0
157+
};
158+
159+
/* Same for linebreaks */
160+
static unsigned char ascii_linebreak[] = {
161+
0, 0, 0, 0, 0, 0, 0, 0,
162+
// 0x000A, /* LINE FEED */
163+
// 0x000D, /* CARRIAGE RETURN */
164+
0, 0, 1, 0, 0, 1, 0, 0,
165+
0, 0, 0, 0, 0, 0, 0, 0,
166+
// 0x001C, /* FILE SEPARATOR */
167+
// 0x001D, /* GROUP SEPARATOR */
168+
// 0x001E, /* RECORD SEPARATOR */
169+
0, 0, 0, 0, 1, 1, 1, 0,
170+
0, 0, 0, 0, 0, 0, 0, 0,
171+
0, 0, 0, 0, 0, 0, 0, 0,
172+
0, 0, 0, 0, 0, 0, 0, 0,
173+
0, 0, 0, 0, 0, 0, 0, 0,
174+
175+
0, 0, 0, 0, 0, 0, 0, 0,
176+
0, 0, 0, 0, 0, 0, 0, 0,
177+
0, 0, 0, 0, 0, 0, 0, 0,
178+
0, 0, 0, 0, 0, 0, 0, 0,
179+
0, 0, 0, 0, 0, 0, 0, 0,
180+
0, 0, 0, 0, 0, 0, 0, 0,
181+
0, 0, 0, 0, 0, 0, 0, 0,
182+
0, 0, 0, 0, 0, 0, 0, 0
183+
};
184+
185+
128186
Py_UNICODE
129187
PyUnicode_GetMax(void)
130188
{
@@ -151,8 +209,9 @@ static BLOOM_MASK bloom_linebreak;
151209

152210
#define BLOOM(mask, ch) ((mask & (1 << ((ch) & 0x1F))))
153211

154-
#define BLOOM_LINEBREAK(ch)\
155-
(BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK((ch)))
212+
#define BLOOM_LINEBREAK(ch) \
213+
((ch) < 128U ? ascii_linebreak[(ch)] : \
214+
(BLOOM(bloom_linebreak, (ch)) && Py_UNICODE_ISLINEBREAK(ch)))
156215

157216
Py_LOCAL_INLINE(BLOOM_MASK) make_bloom_mask(Py_UNICODE* ptr, Py_ssize_t len)
158217
{
@@ -5602,25 +5661,26 @@ PyObject *split_whitespace(PyUnicodeObject *self,
56025661
register Py_ssize_t j;
56035662
Py_ssize_t len = self->length;
56045663
PyObject *str;
5664+
register const Py_UNICODE *buf = self->str;
56055665

56065666
for (i = j = 0; i < len; ) {
56075667
/* find a token */
5608-
while (i < len && Py_UNICODE_ISSPACE(self->str[i]))
5668+
while (i < len && Py_UNICODE_ISSPACE(buf[i]))
56095669
i++;
56105670
j = i;
5611-
while (i < len && !Py_UNICODE_ISSPACE(self->str[i]))
5671+
while (i < len && !Py_UNICODE_ISSPACE(buf[i]))
56125672
i++;
56135673
if (j < i) {
56145674
if (maxcount-- <= 0)
56155675
break;
5616-
SPLIT_APPEND(self->str, j, i);
5617-
while (i < len && Py_UNICODE_ISSPACE(self->str[i]))
5676+
SPLIT_APPEND(buf, j, i);
5677+
while (i < len && Py_UNICODE_ISSPACE(buf[i]))
56185678
i++;
56195679
j = i;
56205680
}
56215681
}
56225682
if (j < len) {
5623-
SPLIT_APPEND(self->str, j, len);
5683+
SPLIT_APPEND(buf, j, len);
56245684
}
56255685
return list;
56265686

@@ -5693,18 +5753,19 @@ PyObject *split_char(PyUnicodeObject *self,
56935753
register Py_ssize_t j;
56945754
Py_ssize_t len = self->length;
56955755
PyObject *str;
5756+
register const Py_UNICODE *buf = self->str;
56965757

56975758
for (i = j = 0; i < len; ) {
5698-
if (self->str[i] == ch) {
5759+
if (buf[i] == ch) {
56995760
if (maxcount-- <= 0)
57005761
break;
5701-
SPLIT_APPEND(self->str, j, i);
5762+
SPLIT_APPEND(buf, j, i);
57025763
i = j = i + 1;
57035764
} else
57045765
i++;
57055766
}
57065767
if (j <= len) {
5707-
SPLIT_APPEND(self->str, j, len);
5768+
SPLIT_APPEND(buf, j, len);
57085769
}
57095770
return list;
57105771

@@ -5753,25 +5814,26 @@ PyObject *rsplit_whitespace(PyUnicodeObject *self,
57535814
register Py_ssize_t j;
57545815
Py_ssize_t len = self->length;
57555816
PyObject *str;
5817+
register const Py_UNICODE *buf = self->str;
57565818

57575819
for (i = j = len - 1; i >= 0; ) {
57585820
/* find a token */
5759-
while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
5821+
while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
57605822
i--;
57615823
j = i;
5762-
while (i >= 0 && !Py_UNICODE_ISSPACE(self->str[i]))
5824+
while (i >= 0 && !Py_UNICODE_ISSPACE(buf[i]))
57635825
i--;
57645826
if (j > i) {
57655827
if (maxcount-- <= 0)
57665828
break;
5767-
SPLIT_APPEND(self->str, i + 1, j + 1);
5768-
while (i >= 0 && Py_UNICODE_ISSPACE(self->str[i]))
5829+
SPLIT_APPEND(buf, i + 1, j + 1);
5830+
while (i >= 0 && Py_UNICODE_ISSPACE(buf[i]))
57695831
i--;
57705832
j = i;
57715833
}
57725834
}
57735835
if (j >= 0) {
5774-
SPLIT_APPEND(self->str, 0, j + 1);
5836+
SPLIT_APPEND(buf, 0, j + 1);
57755837
}
57765838
if (PyList_Reverse(list) < 0)
57775839
goto onError;
@@ -5792,18 +5854,19 @@ PyObject *rsplit_char(PyUnicodeObject *self,
57925854
register Py_ssize_t j;
57935855
Py_ssize_t len = self->length;
57945856
PyObject *str;
5857+
register const Py_UNICODE *buf = self->str;
57955858

57965859
for (i = j = len - 1; i >= 0; ) {
5797-
if (self->str[i] == ch) {
5860+
if (buf[i] == ch) {
57985861
if (maxcount-- <= 0)
57995862
break;
5800-
SPLIT_APPEND(self->str, i + 1, j + 1);
5863+
SPLIT_APPEND(buf, i + 1, j + 1);
58015864
j = i = i - 1;
58025865
} else
58035866
i--;
58045867
}
58055868
if (j >= -1) {
5806-
SPLIT_APPEND(self->str, 0, j + 1);
5869+
SPLIT_APPEND(buf, 0, j + 1);
58075870
}
58085871
if (PyList_Reverse(list) < 0)
58095872
goto onError;

0 commit comments

Comments
 (0)