SF #989185: Drop unicode.iswide() and unicode.width() and add

hyeshik · hyeshik · commit e9ddfbb41207 · 2004-08-04T07:38:35.000Z
unicodedata.east_asian_width().  You can still implement your own
simple width() function using it like this:
    def width(u):
        w = 0
        for c in unicodedata.normalize('NFC', u):
            cwidth = unicodedata.east_asian_width(c)
            if cwidth in ('W', 'F'): w += 2
            else: w += 1
        return w
diff --git a/Doc/api/concrete.tex b/Doc/api/concrete.tex
@@ -894,11 +894,6 @@ \subsection{Unicode Objects \label{unicodeObjects}}
   character.
 \end{cfuncdesc}
 
-\begin{cfuncdesc}{int}{Py_UNICODE_ISWIDE}{Py_UNICODE ch}
-  Returns 1/0 depending on whether \var{ch} is a wide or full-width
-  character.
-\end{cfuncdesc}
-
 These APIs can be used for fast direct character conversions:
 
 \begin{cfuncdesc}{Py_UNICODE}{Py_UNICODE_TOLOWER}{Py_UNICODE ch}
@@ -957,10 +952,6 @@ \subsection{Unicode Objects \label{unicodeObjects}}
   Return the length of the Unicode object.
 \end{cfuncdesc}
 
-\begin{cfuncdesc}{int}{PyUnicode_GetWidth}{PyObject *unicode}
-  Return the fixed-width representation length of the Unicode object.
-\end{cfuncdesc}
-
 \begin{cfuncdesc}{PyObject*}{PyUnicode_FromEncodedObject}{PyObject *obj,
                                                       const char *encoding,
                                                       const char *errors}
diff --git a/Doc/lib/libstdtypes.tex b/Doc/lib/libstdtypes.tex
@@ -664,12 +664,6 @@ \subsubsection{String Methods \label{string-methods}}
 For 8-bit strings, this method is locale-dependent.
 \end{methoddesc}
 
-\begin{methoddesc}[string]{iswide}{}
-Return true if all characters in the string are wide or full width and
-there is at least one wide or full width character, false otherwise.
-This method is supported by unicode type only.
-\end{methoddesc}
-
 \begin{methoddesc}[string]{join}{seq}
 Return a string which is the concatenation of the strings in the
 sequence \var{seq}.  The separator between elements is the string
@@ -810,11 +804,6 @@ \subsubsection{String Methods \label{string-methods}}
 For 8-bit strings, this method is locale-dependent.
 \end{methoddesc}
 
-\begin{methoddesc}[string]{width}{}
-Return length of fixed-width representation of the string. This method
-is supported by unicode type only.
-\end{methoddesc}
-
 \begin{methoddesc}[string]{zfill}{width}
 Return the numeric string left filled with zeros in a string
 of length \var{width}. The original string is returned if
diff --git a/Doc/lib/libunicodedata.tex b/Doc/lib/libunicodedata.tex
@@ -71,6 +71,11 @@ \section{\module{unicodedata} ---
   class is defined.
 \end{funcdesc}
 
+\begin{funcdesc}{east_asian_width}{unichr}
+  Returns the east asian width of assigned to the Unicode character
+  \var{unichr} as string.
+\end{funcdesc}
+
 \begin{funcdesc}{mirrored}{unichr}
   Returns the mirrored property of assigned to the Unicode character
   \var{unichr} as integer. Returns \code{1} if the character has been
@@ -123,4 +128,4 @@ \section{\module{unicodedata} ---
 The version of the Unicode database used in this module.
 
 \versionadded{2.3}
-\end{datadesc}
+\end{datadesc}
diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
@@ -181,7 +181,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
-# define PyUnicode_GetWidth PyUnicodeUCS2_GetWidth
 # define PyUnicode_Join PyUnicodeUCS2_Join
 # define PyUnicode_Replace PyUnicodeUCS2_Replace
 # define PyUnicode_Resize PyUnicodeUCS2_Resize
@@ -201,7 +200,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
 # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
 # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
-# define _PyUnicode_IsWide _PyUnicodeUCS2_IsWide
 # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
 # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
 # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
@@ -256,7 +254,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
-# define PyUnicode_GetWidth PyUnicodeUCS4_GetWidth
 # define PyUnicode_Join PyUnicodeUCS4_Join
 # define PyUnicode_Replace PyUnicodeUCS4_Replace
 # define PyUnicode_Resize PyUnicodeUCS4_Resize
@@ -275,7 +272,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
 # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
 # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
-# define _PyUnicode_IsWide _PyUnicodeUCS4_IsWide
 # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
 # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
 # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
@@ -321,8 +317,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 
 #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
 
-#define Py_UNICODE_ISWIDE(ch) _PyUnicode_IsWide(ch)
-
 #else
 
 #define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
@@ -346,8 +340,6 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
 
 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
 
-#define Py_UNICODE_ISWIDE(ch) _PyUnicode_IsWide(ch)
-
 #endif
 
 #define Py_UNICODE_ISALNUM(ch) \
@@ -440,12 +432,6 @@ PyAPI_FUNC(int) PyUnicode_GetSize(
     PyObject *unicode	 	/* Unicode object */
     );
 
-/* Get the fixed-width representation length of the Unicode object */
-
-PyAPI_FUNC(int) PyUnicode_GetWidth(
-    PyObject *unicode	 	/* Unicode object */
-    );
-
 /* Get the maximum ordinal for a Unicode character. */
 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
 
@@ -1176,10 +1162,6 @@ PyAPI_FUNC(int) _PyUnicode_IsAlpha(
     Py_UNICODE ch 	/* Unicode character */
     );
 
-PyAPI_FUNC(int) _PyUnicode_IsWide(
-    Py_UNICODE ch 	/* Unicode character */
-    );
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/Lib/test/string_tests.py b/Lib/test/string_tests.py
@@ -695,28 +695,3 @@ def test_encoding_decoding(self):
 
         self.checkraises(TypeError, 'xyz', 'decode', 42)
         self.checkraises(TypeError, 'xyz', 'encode', 42)
-
-
-class MixinUnicodeUserStringTest:
-    # Additional tests that only work with
-    # unicode compatible object, i.e. unicode and UserString
-
-    def test_iswide(self):
-        self.checkequal(False, u'', 'iswide')
-        self.checkequal(False, u'\x1f', 'iswide') # Neutral
-        self.checkequal(False, u'\x20', 'iswide') # Narrow
-        self.checkequal(True, u'\u2329', 'iswide') # Wide
-        self.checkequal(False, u'\uff64', 'iswide') # Half
-        self.checkequal(True, u'\u3000', 'iswide') # Full
-        self.checkequal(False, u'\u2460', 'iswide') # Ambiguous
-        self.checkequal(True, u'\ud55c\uae00', 'iswide')
-        self.checkequal(False, u'\ud55c\u2606\uae00', 'iswide')
-
-    def test_width(self):
-        self.checkequal(0, u'', 'width')
-        self.checkequal(4, u'abcd', 'width')
-        self.checkequal(2, u'\u0187\u01c9', 'width')
-        self.checkequal(3, u'\u2460\u2329', 'width')
-        self.checkequal(3, u'\u2329\u2460', 'width')
-        self.checkequal(4, u'\ud55c\uae00', 'width')
-        self.checkequal(5, u'\ud55c\u2606\uae00', 'width')
diff --git a/Lib/test/test_unicode.py b/Lib/test/test_unicode.py
@@ -11,8 +11,7 @@
 
 class UnicodeTest(
     string_tests.CommonTest,
-    string_tests.MixinStrUnicodeUserStringTest,
-    string_tests.MixinUnicodeUserStringTest
+    string_tests.MixinStrUnicodeUserStringTest
     ):
     type2test = unicode
 
diff --git a/Lib/test/test_unicodedata.py b/Lib/test/test_unicodedata.py
@@ -174,6 +174,17 @@ def test_normalize(self):
         # The rest can be found in test_normalization.py
         # which requires an external file.
 
+    def test_east_asian_width(self):
+        eaw = self.db.east_asian_width
+        self.assertRaises(TypeError, eaw, 'a')
+        self.assertRaises(TypeError, eaw, u'')
+        self.assertRaises(TypeError, eaw, u'ra')
+        self.assertEqual(eaw(u'\x1e'), 'N')
+        self.assertEqual(eaw(u'\x20'), 'Na')
+        self.assertEqual(eaw(u'\uC894'), 'W')
+        self.assertEqual(eaw(u'\uFF66'), 'H')
+        self.assertEqual(eaw(u'\uFF1F'), 'F')
+        self.assertEqual(eaw(u'\u2010'), 'A')
 
 class UnicodeMiscTest(UnicodeDatabaseTest):
 
diff --git a/Lib/test/test_userstring.py b/Lib/test/test_userstring.py
@@ -11,8 +11,7 @@ class UserStringTest(
     string_tests.CommonTest,
     string_tests.MixinStrUnicodeUserStringTest,
     string_tests.MixinStrStringUserStringTest,
-    string_tests.MixinStrUserStringTest,
-    string_tests.MixinUnicodeUserStringTest
+    string_tests.MixinStrUserStringTest
     ):
 
     type2test = UserString
diff --git a/Misc/NEWS b/Misc/NEWS
@@ -67,6 +67,9 @@ Core and builtins
 - Added a workaround for proper string operations in BSDs.  str.split
   and str.is* methods can now work correctly with UTF-8 locales.
 
+- unicode.iswide() and unicode.width() is dropped and the East Asian
+  Width support is moved to unicodedata extension module.
+
 Extension modules
 -----------------
 
diff --git a/Modules/unicodedata.c b/Modules/unicodedata.c
@@ -24,6 +24,8 @@ typedef struct {
     const unsigned char	bidirectional; 	/* index into
 					   _PyUnicode_BidirectionalNames */
     const unsigned char mirrored;	/* true if mirrored in bidir mode */
+    const unsigned char east_asian_width;	/* index into
+						   _PyUnicode_EastAsianWidth */
 } _PyUnicode_DatabaseRecord;
 
 /* data file generated by Tools/unicode/makeunicodedata.py */
@@ -204,6 +206,24 @@ unicodedata_mirrored(PyObject *self, PyObject *args)
     return PyInt_FromLong((int) _getrecord(v)->mirrored);
 }
 
+static PyObject *
+unicodedata_east_asian_width(PyObject *self, PyObject *args)
+{
+    PyUnicodeObject *v;
+    int index;
+
+    if (!PyArg_ParseTuple(args, "O!:east_asian_width",
+			  &PyUnicode_Type, &v))
+	return NULL;
+    if (PyUnicode_GET_SIZE(v) != 1) {
+	PyErr_SetString(PyExc_TypeError,
+			"need a single Unicode character as parameter");
+	return NULL;
+    }
+    index = (int) _getrecord(v)->east_asian_width;
+    return PyString_FromString(_PyUnicode_EastAsianWidthNames[index]);
+}
+
 static PyObject *
 unicodedata_decomposition(PyObject *self, PyObject *args)
 {
@@ -871,6 +891,7 @@ static PyMethodDef unicodedata_functions[] = {
     {"bidirectional", unicodedata_bidirectional, METH_VARARGS},
     {"combining", unicodedata_combining, METH_VARARGS},
     {"mirrored", unicodedata_mirrored, METH_VARARGS},
+    {"east_asian_width", unicodedata_east_asian_width, METH_VARARGS},
     {"decomposition",unicodedata_decomposition, METH_VARARGS},
     {"name", unicodedata_name, METH_VARARGS},
     {"lookup", unicodedata_lookup, METH_VARARGS},
diff --git a/Modules/unicodedata_db.h b/Modules/unicodedata_db.h
diff --git a/Objects/unicodectype.c b/Objects/unicodectype.c
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
diff --git a/Objects/unicodetype_db.h b/Objects/unicodetype_db.h
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py