Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e9ddfbb

Browse files
committed
SF #989185: Drop unicode.iswide() and unicode.width() and add
unicodedata.east_asian_width(). You can still implement your own simple width() function using it like this: def width(u): w = 0 for c in unicodedata.normalize('NFC', u): cwidth = unicodedata.east_asian_width(c) if cwidth in ('W', 'F'): w += 2 else: w += 1 return w
1 parent b5047fd commit e9ddfbb

15 files changed

Lines changed: 1626 additions & 1602 deletions

Doc/api/concrete.tex

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -894,11 +894,6 @@ \subsection{Unicode Objects \label{unicodeObjects}}
894894
character.
895895
\end{cfuncdesc}
896896

897-
\begin{cfuncdesc}{int}{Py_UNICODE_ISWIDE}{Py_UNICODE ch}
898-
Returns 1/0 depending on whether \var{ch} is a wide or full-width
899-
character.
900-
\end{cfuncdesc}
901-
902897
These APIs can be used for fast direct character conversions:
903898

904899
\begin{cfuncdesc}{Py_UNICODE}{Py_UNICODE_TOLOWER}{Py_UNICODE ch}
@@ -957,10 +952,6 @@ \subsection{Unicode Objects \label{unicodeObjects}}
957952
Return the length of the Unicode object.
958953
\end{cfuncdesc}
959954

960-
\begin{cfuncdesc}{int}{PyUnicode_GetWidth}{PyObject *unicode}
961-
Return the fixed-width representation length of the Unicode object.
962-
\end{cfuncdesc}
963-
964955
\begin{cfuncdesc}{PyObject*}{PyUnicode_FromEncodedObject}{PyObject *obj,
965956
const char *encoding,
966957
const char *errors}

Doc/lib/libstdtypes.tex

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -664,12 +664,6 @@ \subsubsection{String Methods \label{string-methods}}
664664
For 8-bit strings, this method is locale-dependent.
665665
\end{methoddesc}
666666

667-
\begin{methoddesc}[string]{iswide}{}
668-
Return true if all characters in the string are wide or full width and
669-
there is at least one wide or full width character, false otherwise.
670-
This method is supported by unicode type only.
671-
\end{methoddesc}
672-
673667
\begin{methoddesc}[string]{join}{seq}
674668
Return a string which is the concatenation of the strings in the
675669
sequence \var{seq}. The separator between elements is the string
@@ -810,11 +804,6 @@ \subsubsection{String Methods \label{string-methods}}
810804
For 8-bit strings, this method is locale-dependent.
811805
\end{methoddesc}
812806

813-
\begin{methoddesc}[string]{width}{}
814-
Return length of fixed-width representation of the string. This method
815-
is supported by unicode type only.
816-
\end{methoddesc}
817-
818807
\begin{methoddesc}[string]{zfill}{width}
819808
Return the numeric string left filled with zeros in a string
820809
of length \var{width}. The original string is returned if

Doc/lib/libunicodedata.tex

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,11 @@ \section{\module{unicodedata} ---
7171
class is defined.
7272
\end{funcdesc}
7373

74+
\begin{funcdesc}{east_asian_width}{unichr}
75+
Returns the east asian width of assigned to the Unicode character
76+
\var{unichr} as string.
77+
\end{funcdesc}
78+
7479
\begin{funcdesc}{mirrored}{unichr}
7580
Returns the mirrored property of assigned to the Unicode character
7681
\var{unichr} as integer. Returns \code{1} if the character has been
@@ -123,4 +128,4 @@ \section{\module{unicodedata} ---
123128
The version of the Unicode database used in this module.
124129

125130
\versionadded{2.3}
126-
\end{datadesc}
131+
\end{datadesc}

Include/unicodeobject.h

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
181181
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
182182
# define PyUnicode_GetMax PyUnicodeUCS2_GetMax
183183
# define PyUnicode_GetSize PyUnicodeUCS2_GetSize
184-
# define PyUnicode_GetWidth PyUnicodeUCS2_GetWidth
185184
# define PyUnicode_Join PyUnicodeUCS2_Join
186185
# define PyUnicode_Replace PyUnicodeUCS2_Replace
187186
# define PyUnicode_Resize PyUnicodeUCS2_Resize
@@ -201,7 +200,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
201200
# define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
202201
# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
203202
# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
204-
# define _PyUnicode_IsWide _PyUnicodeUCS2_IsWide
205203
# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
206204
# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
207205
# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
@@ -256,7 +254,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
256254
# define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
257255
# define PyUnicode_GetMax PyUnicodeUCS4_GetMax
258256
# define PyUnicode_GetSize PyUnicodeUCS4_GetSize
259-
# define PyUnicode_GetWidth PyUnicodeUCS4_GetWidth
260257
# define PyUnicode_Join PyUnicodeUCS4_Join
261258
# define PyUnicode_Replace PyUnicodeUCS4_Replace
262259
# define PyUnicode_Resize PyUnicodeUCS4_Resize
@@ -275,7 +272,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
275272
# define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
276273
# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
277274
# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
278-
# define _PyUnicode_IsWide _PyUnicodeUCS4_IsWide
279275
# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
280276
# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
281277
# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
@@ -321,8 +317,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
321317

322318
#define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
323319

324-
#define Py_UNICODE_ISWIDE(ch) _PyUnicode_IsWide(ch)
325-
326320
#else
327321

328322
#define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
@@ -346,8 +340,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
346340

347341
#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
348342

349-
#define Py_UNICODE_ISWIDE(ch) _PyUnicode_IsWide(ch)
350-
351343
#endif
352344

353345
#define Py_UNICODE_ISALNUM(ch) \
@@ -440,12 +432,6 @@ PyAPI_FUNC(int) PyUnicode_GetSize(
440432
PyObject *unicode /* Unicode object */
441433
);
442434

443-
/* Get the fixed-width representation length of the Unicode object */
444-
445-
PyAPI_FUNC(int) PyUnicode_GetWidth(
446-
PyObject *unicode /* Unicode object */
447-
);
448-
449435
/* Get the maximum ordinal for a Unicode character. */
450436
PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
451437

@@ -1176,10 +1162,6 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha(
11761162
Py_UNICODE ch /* Unicode character */
11771163
);
11781164

1179-
PyAPI_FUNC(int) _PyUnicode_IsWide(
1180-
Py_UNICODE ch /* Unicode character */
1181-
);
1182-
11831165
#ifdef __cplusplus
11841166
}
11851167
#endif

Lib/test/string_tests.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -695,28 +695,3 @@ def test_encoding_decoding(self):
695695

696696
self.checkraises(TypeError, 'xyz', 'decode', 42)
697697
self.checkraises(TypeError, 'xyz', 'encode', 42)
698-
699-
700-
class MixinUnicodeUserStringTest:
701-
# Additional tests that only work with
702-
# unicode compatible object, i.e. unicode and UserString
703-
704-
def test_iswide(self):
705-
self.checkequal(False, u'', 'iswide')
706-
self.checkequal(False, u'\x1f', 'iswide') # Neutral
707-
self.checkequal(False, u'\x20', 'iswide') # Narrow
708-
self.checkequal(True, u'\u2329', 'iswide') # Wide
709-
self.checkequal(False, u'\uff64', 'iswide') # Half
710-
self.checkequal(True, u'\u3000', 'iswide') # Full
711-
self.checkequal(False, u'\u2460', 'iswide') # Ambiguous
712-
self.checkequal(True, u'\ud55c\uae00', 'iswide')
713-
self.checkequal(False, u'\ud55c\u2606\uae00', 'iswide')
714-
715-
def test_width(self):
716-
self.checkequal(0, u'', 'width')
717-
self.checkequal(4, u'abcd', 'width')
718-
self.checkequal(2, u'\u0187\u01c9', 'width')
719-
self.checkequal(3, u'\u2460\u2329', 'width')
720-
self.checkequal(3, u'\u2329\u2460', 'width')
721-
self.checkequal(4, u'\ud55c\uae00', 'width')
722-
self.checkequal(5, u'\ud55c\u2606\uae00', 'width')

Lib/test/test_unicode.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@
1111

1212
class UnicodeTest(
1313
string_tests.CommonTest,
14-
string_tests.MixinStrUnicodeUserStringTest,
15-
string_tests.MixinUnicodeUserStringTest
14+
string_tests.MixinStrUnicodeUserStringTest
1615
):
1716
type2test = unicode
1817

Lib/test/test_unicodedata.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,17 @@ def test_normalize(self):
174174
# The rest can be found in test_normalization.py
175175
# which requires an external file.
176176

177+
def test_east_asian_width(self):
178+
eaw = self.db.east_asian_width
179+
self.assertRaises(TypeError, eaw, 'a')
180+
self.assertRaises(TypeError, eaw, u'')
181+
self.assertRaises(TypeError, eaw, u'ra')
182+
self.assertEqual(eaw(u'\x1e'), 'N')
183+
self.assertEqual(eaw(u'\x20'), 'Na')
184+
self.assertEqual(eaw(u'\uC894'), 'W')
185+
self.assertEqual(eaw(u'\uFF66'), 'H')
186+
self.assertEqual(eaw(u'\uFF1F'), 'F')
187+
self.assertEqual(eaw(u'\u2010'), 'A')
177188

178189
class UnicodeMiscTest(UnicodeDatabaseTest):
179190

Lib/test/test_userstring.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,7 @@ class UserStringTest(
1111
string_tests.CommonTest,
1212
string_tests.MixinStrUnicodeUserStringTest,
1313
string_tests.MixinStrStringUserStringTest,
14-
string_tests.MixinStrUserStringTest,
15-
string_tests.MixinUnicodeUserStringTest
14+
string_tests.MixinStrUserStringTest
1615
):
1716

1817
type2test = UserString

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,9 @@ Core and builtins
6767
- Added a workaround for proper string operations in BSDs. str.split
6868
and str.is* methods can now work correctly with UTF-8 locales.
6969

70+
- unicode.iswide() and unicode.width() is dropped and the East Asian
71+
Width support is moved to unicodedata extension module.
72+
7073
Extension modules
7174
-----------------
7275

Modules/unicodedata.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ typedef struct {
2424
const unsigned char bidirectional; /* index into
2525
_PyUnicode_BidirectionalNames */
2626
const unsigned char mirrored; /* true if mirrored in bidir mode */
27+
const unsigned char east_asian_width; /* index into
28+
_PyUnicode_EastAsianWidth */
2729
} _PyUnicode_DatabaseRecord;
2830

2931
/* data file generated by Tools/unicode/makeunicodedata.py */
@@ -204,6 +206,24 @@ unicodedata_mirrored(PyObject *self, PyObject *args)
204206
return PyInt_FromLong((int) _getrecord(v)->mirrored);
205207
}
206208

209+
static PyObject *
210+
unicodedata_east_asian_width(PyObject *self, PyObject *args)
211+
{
212+
PyUnicodeObject *v;
213+
int index;
214+
215+
if (!PyArg_ParseTuple(args, "O!:east_asian_width",
216+
&PyUnicode_Type, &v))
217+
return NULL;
218+
if (PyUnicode_GET_SIZE(v) != 1) {
219+
PyErr_SetString(PyExc_TypeError,
220+
"need a single Unicode character as parameter");
221+
return NULL;
222+
}
223+
index = (int) _getrecord(v)->east_asian_width;
224+
return PyString_FromString(_PyUnicode_EastAsianWidthNames[index]);
225+
}
226+
207227
static PyObject *
208228
unicodedata_decomposition(PyObject *self, PyObject *args)
209229
{
@@ -871,6 +891,7 @@ static PyMethodDef unicodedata_functions[] = {
871891
{"bidirectional", unicodedata_bidirectional, METH_VARARGS},
872892
{"combining", unicodedata_combining, METH_VARARGS},
873893
{"mirrored", unicodedata_mirrored, METH_VARARGS},
894+
{"east_asian_width", unicodedata_east_asian_width, METH_VARARGS},
874895
{"decomposition",unicodedata_decomposition, METH_VARARGS},
875896
{"name", unicodedata_name, METH_VARARGS},
876897
{"lookup", unicodedata_lookup, METH_VARARGS},

0 commit comments

Comments
 (0)