Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e503437

Browse files
committed
Removing UTF-16 aware Unicode comparison code. This kind of compare
function (together with other locale aware ones) should into a new collation support module. See python-dev for a discussion of this removal. Note: This patch should also be applied to the 1.6 branch.
1 parent 5660639 commit e503437

3 files changed

Lines changed: 86 additions & 51 deletions

File tree

Lib/test/output/test_unicode

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
test_unicode
22
Testing Unicode comparisons... done.
3-
Testing UTF-16 code point order comparisons... done.
43
Testing Unicode contains method... done.
54
Testing Unicode formatting strings... done.
65
Testing builtin codecs... done.

Lib/test/test_unicode.py

Lines changed: 53 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -168,56 +168,59 @@ def __init__(self): self.seq = [7, u'hello', 123L]
168168
assert u'abc' < u'abcd'
169169
print 'done.'
170170

171-
print 'Testing UTF-16 code point order comparisons...',
172-
#No surrogates, no fixup required.
173-
assert u'\u0061' < u'\u20ac'
174-
# Non surrogate below surrogate value, no fixup required
175-
assert u'\u0061' < u'\ud800\udc02'
176-
177-
# Non surrogate above surrogate value, fixup required
178-
def test_lecmp(s, s2):
179-
assert s < s2 , "comparison failed on %s < %s" % (s, s2)
180-
181-
def test_fixup(s):
182-
s2 = u'\ud800\udc01'
183-
test_lecmp(s, s2)
184-
s2 = u'\ud900\udc01'
185-
test_lecmp(s, s2)
186-
s2 = u'\uda00\udc01'
187-
test_lecmp(s, s2)
188-
s2 = u'\udb00\udc01'
189-
test_lecmp(s, s2)
190-
s2 = u'\ud800\udd01'
191-
test_lecmp(s, s2)
192-
s2 = u'\ud900\udd01'
193-
test_lecmp(s, s2)
194-
s2 = u'\uda00\udd01'
195-
test_lecmp(s, s2)
196-
s2 = u'\udb00\udd01'
197-
test_lecmp(s, s2)
198-
s2 = u'\ud800\ude01'
199-
test_lecmp(s, s2)
200-
s2 = u'\ud900\ude01'
201-
test_lecmp(s, s2)
202-
s2 = u'\uda00\ude01'
203-
test_lecmp(s, s2)
204-
s2 = u'\udb00\ude01'
205-
test_lecmp(s, s2)
206-
s2 = u'\ud800\udfff'
207-
test_lecmp(s, s2)
208-
s2 = u'\ud900\udfff'
209-
test_lecmp(s, s2)
210-
s2 = u'\uda00\udfff'
211-
test_lecmp(s, s2)
212-
s2 = u'\udb00\udfff'
213-
test_lecmp(s, s2)
214-
215-
test_fixup(u'\ue000')
216-
test_fixup(u'\uff61')
217-
218-
# Surrogates on both sides, no fixup required
219-
assert u'\ud800\udc02' < u'\ud84d\udc56'
220-
print 'done.'
171+
if 0:
172+
# Move these tests to a Unicode collation module test...
173+
174+
print 'Testing UTF-16 code point order comparisons...',
175+
#No surrogates, no fixup required.
176+
assert u'\u0061' < u'\u20ac'
177+
# Non surrogate below surrogate value, no fixup required
178+
assert u'\u0061' < u'\ud800\udc02'
179+
180+
# Non surrogate above surrogate value, fixup required
181+
def test_lecmp(s, s2):
182+
assert s < s2 , "comparison failed on %s < %s" % (s, s2)
183+
184+
def test_fixup(s):
185+
s2 = u'\ud800\udc01'
186+
test_lecmp(s, s2)
187+
s2 = u'\ud900\udc01'
188+
test_lecmp(s, s2)
189+
s2 = u'\uda00\udc01'
190+
test_lecmp(s, s2)
191+
s2 = u'\udb00\udc01'
192+
test_lecmp(s, s2)
193+
s2 = u'\ud800\udd01'
194+
test_lecmp(s, s2)
195+
s2 = u'\ud900\udd01'
196+
test_lecmp(s, s2)
197+
s2 = u'\uda00\udd01'
198+
test_lecmp(s, s2)
199+
s2 = u'\udb00\udd01'
200+
test_lecmp(s, s2)
201+
s2 = u'\ud800\ude01'
202+
test_lecmp(s, s2)
203+
s2 = u'\ud900\ude01'
204+
test_lecmp(s, s2)
205+
s2 = u'\uda00\ude01'
206+
test_lecmp(s, s2)
207+
s2 = u'\udb00\ude01'
208+
test_lecmp(s, s2)
209+
s2 = u'\ud800\udfff'
210+
test_lecmp(s, s2)
211+
s2 = u'\ud900\udfff'
212+
test_lecmp(s, s2)
213+
s2 = u'\uda00\udfff'
214+
test_lecmp(s, s2)
215+
s2 = u'\udb00\udfff'
216+
test_lecmp(s, s2)
217+
218+
test_fixup(u'\ue000')
219+
test_fixup(u'\uff61')
220+
221+
# Surrogates on both sides, no fixup required
222+
assert u'\ud800\udc02' < u'\ud84d\udc56'
223+
print 'done.'
221224

222225
test('ljust', u'abc', u'abc ', 10)
223226
test('rjust', u'abc', u' abc', 10)

Objects/unicodeobject.c

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3169,6 +3169,12 @@ unicode_center(PyUnicodeObject *self, PyObject *args)
31693169
return (PyObject*) pad(self, left, marg - left, ' ');
31703170
}
31713171

3172+
#if 0
3173+
3174+
/* This code should go into some future Unicode collation support
3175+
module. The basic comparison should compare ordinals on a naive
3176+
basis (this is what Java does and thus JPython too).
3177+
31723178
/* speedy UTF-16 code point order comparison */
31733179
/* gleaned from: */
31743180
/* http://www-4.ibm.com/software/developer/library/utf16.html?dwzone=unicode */
@@ -3213,6 +3219,33 @@ unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
32133219
return (len1 < len2) ? -1 : (len1 != len2);
32143220
}
32153221

3222+
#else
3223+
3224+
static int
3225+
unicode_compare(PyUnicodeObject *str1, PyUnicodeObject *str2)
3226+
{
3227+
register int len1, len2;
3228+
3229+
Py_UNICODE *s1 = str1->str;
3230+
Py_UNICODE *s2 = str2->str;
3231+
3232+
len1 = str1->length;
3233+
len2 = str2->length;
3234+
3235+
while (len1 > 0 && len2 > 0) {
3236+
register long diff;
3237+
3238+
diff = (long)*s1++ - (long)*s2++;
3239+
if (diff)
3240+
return (diff < 0) ? -1 : (diff != 0);
3241+
len1--; len2--;
3242+
}
3243+
3244+
return (len1 < len2) ? -1 : (len1 != len2);
3245+
}
3246+
3247+
#endif
3248+
32163249
int PyUnicode_Compare(PyObject *left,
32173250
PyObject *right)
32183251
{

0 commit comments

Comments
 (0)