@@ -53,6 +53,13 @@ _getrecord(PyUnicodeObject* v)
5353
5454/* --- Module API --------------------------------------------------------- */
5555
56+ PyDoc_STRVAR (unicodedata_decimal__doc__ ,
57+ "decimal(unichr[, default])\n\
58+ \n\
59+ Returns the decimal value assigned to the Unicode character unichr\n\
60+ as integer. If no such value is defined, default is returned, or, if\n\
61+ not given, ValueError is raised." );
62+
5663static PyObject *
5764unicodedata_decimal (PyObject * self , PyObject * args )
5865{
@@ -82,6 +89,13 @@ unicodedata_decimal(PyObject *self, PyObject *args)
8289 return PyInt_FromLong (rc );
8390}
8491
92+ PyDoc_STRVAR (unicodedata_digit__doc__ ,
93+ "digit(unichr[, default])\n\
94+ \n\
95+ Returns the digit value assigned to the Unicode character unichr as\n\
96+ integer. If no such value is defined, default is returned, or, if\n\
97+ not given, ValueError is raised." );
98+
8599static PyObject *
86100unicodedata_digit (PyObject * self , PyObject * args )
87101{
@@ -110,6 +124,13 @@ unicodedata_digit(PyObject *self, PyObject *args)
110124 return PyInt_FromLong (rc );
111125}
112126
127+ PyDoc_STRVAR (unicodedata_numeric__doc__ ,
128+ "numeric(unichr[, default])\n\
129+ \n\
130+ Returns the numeric value assigned to the Unicode character unichr\n\
131+ as float. If no such value is defined, default is returned, or, if\n\
132+ not given, ValueError is raised." );
133+
113134static PyObject *
114135unicodedata_numeric (PyObject * self , PyObject * args )
115136{
@@ -138,6 +159,12 @@ unicodedata_numeric(PyObject *self, PyObject *args)
138159 return PyFloat_FromDouble (rc );
139160}
140161
162+ PyDoc_STRVAR (unicodedata_category__doc__ ,
163+ "category(unichr)\n\
164+ \n\
165+ Returns the general category assigned to the Unicode character\n\
166+ unichr as string." );
167+
141168static PyObject *
142169unicodedata_category (PyObject * self , PyObject * args )
143170{
@@ -156,6 +183,13 @@ unicodedata_category(PyObject *self, PyObject *args)
156183 return PyString_FromString (_PyUnicode_CategoryNames [index ]);
157184}
158185
186+ PyDoc_STRVAR (unicodedata_bidirectional__doc__ ,
187+ "bidirectional(unichr)\n\
188+ \n\
189+ Returns the bidirectional category assigned to the Unicode character\n\
190+ unichr as string. If no such value is defined, an empty string is\n\
191+ returned." );
192+
159193static PyObject *
160194unicodedata_bidirectional (PyObject * self , PyObject * args )
161195{
@@ -174,6 +208,13 @@ unicodedata_bidirectional(PyObject *self, PyObject *args)
174208 return PyString_FromString (_PyUnicode_BidirectionalNames [index ]);
175209}
176210
211+ PyDoc_STRVAR (unicodedata_combining__doc__ ,
212+ "combining(unichr)\n\
213+ \n\
214+ Returns the canonical combining class assigned to the Unicode\n\
215+ character unichr as integer. Returns 0 if no combining class is\n\
216+ defined." );
217+
177218static PyObject *
178219unicodedata_combining (PyObject * self , PyObject * args )
179220{
@@ -190,6 +231,13 @@ unicodedata_combining(PyObject *self, PyObject *args)
190231 return PyInt_FromLong ((int ) _getrecord (v )-> combining );
191232}
192233
234+ PyDoc_STRVAR (unicodedata_mirrored__doc__ ,
235+ "mirrored(unichr)\n\
236+ \n\
237+ Returns the mirrored property assigned to the Unicode character\n\
238+ unichr as integer. Returns 1 if the character has been identified as\n\
239+ a \"mirrored\" character in bidirectional text, 0 otherwise." );
240+
193241static PyObject *
194242unicodedata_mirrored (PyObject * self , PyObject * args )
195243{
@@ -206,6 +254,12 @@ unicodedata_mirrored(PyObject *self, PyObject *args)
206254 return PyInt_FromLong ((int ) _getrecord (v )-> mirrored );
207255}
208256
257+ PyDoc_STRVAR (unicodedata_east_asian_width__doc__ ,
258+ "east_asian_width(unichr)\n\
259+ \n\
260+ Returns the east asian width assigned to the Unicode character\n\
261+ unichr as string." );
262+
209263static PyObject *
210264unicodedata_east_asian_width (PyObject * self , PyObject * args )
211265{
@@ -224,6 +278,13 @@ unicodedata_east_asian_width(PyObject *self, PyObject *args)
224278 return PyString_FromString (_PyUnicode_EastAsianWidthNames [index ]);
225279}
226280
281+ PyDoc_STRVAR (unicodedata_decomposition__doc__ ,
282+ "decomposition(unichr)\n\
283+ \n\
284+ Returns the character decomposition mapping assigned to the Unicode\n\
285+ character unichr as string. An empty string is returned in case no\n\
286+ such mapping is defined." );
287+
227288static PyObject *
228289unicodedata_decomposition (PyObject * self , PyObject * args )
229290{
@@ -525,6 +586,12 @@ nfc_nfkc(PyObject *input, int k)
525586 return result ;
526587}
527588
589+ PyDoc_STRVAR (unicodedata_normalize__doc__ ,
590+ "normalize(form, unistr)\n\
591+ \n\
592+ Return the normal form 'form' for the Unicode string unistr. Valid\n\
593+ values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'." );
594+
528595static PyObject *
529596unicodedata_normalize (PyObject * self , PyObject * args )
530597{
@@ -826,6 +893,12 @@ static const _PyUnicode_Name_CAPI hashAPI =
826893/* -------------------------------------------------------------------- */
827894/* Python bindings */
828895
896+ PyDoc_STRVAR (unicodedata_name__doc__ ,
897+ "name(unichr[, default])\n\
898+ Returns the name assigned to the Unicode character unichr as a\n\
899+ string. If no name is defined, default is returned, or, if not\n\
900+ given, ValueError is raised." );
901+
829902static PyObject *
830903unicodedata_name (PyObject * self , PyObject * args )
831904{
@@ -857,6 +930,13 @@ unicodedata_name(PyObject* self, PyObject* args)
857930 return Py_BuildValue ("s" , name );
858931}
859932
933+ PyDoc_STRVAR (unicodedata_lookup__doc__ ,
934+ "lookup(name)\n\
935+ \n\
936+ Look up character by name. If a character with the\n\
937+ given name is found, return the corresponding Unicode\n\
938+ character. If not found, KeyError is raised." );
939+
860940static PyObject *
861941unicodedata_lookup (PyObject * self , PyObject * args )
862942{
@@ -884,22 +964,37 @@ unicodedata_lookup(PyObject* self, PyObject* args)
884964/* XXX Add doc strings. */
885965
886966static PyMethodDef unicodedata_functions [] = {
887- {"decimal" , unicodedata_decimal , METH_VARARGS },
888- {"digit" , unicodedata_digit , METH_VARARGS },
889- {"numeric" , unicodedata_numeric , METH_VARARGS },
890- {"category" , unicodedata_category , METH_VARARGS },
891- {"bidirectional" , unicodedata_bidirectional , METH_VARARGS },
892- {"combining" , unicodedata_combining , METH_VARARGS },
893- {"mirrored" , unicodedata_mirrored , METH_VARARGS },
894- {"east_asian_width" , unicodedata_east_asian_width , METH_VARARGS },
895- {"decomposition" ,unicodedata_decomposition , METH_VARARGS },
896- {"name" , unicodedata_name , METH_VARARGS },
897- {"lookup" , unicodedata_lookup , METH_VARARGS },
898- {"normalize" , unicodedata_normalize , METH_VARARGS },
967+ {"decimal" , unicodedata_decimal , METH_VARARGS , unicodedata_decimal__doc__ },
968+ {"digit" , unicodedata_digit , METH_VARARGS , unicodedata_digit__doc__ },
969+ {"numeric" , unicodedata_numeric , METH_VARARGS , unicodedata_numeric__doc__ },
970+ {"category" , unicodedata_category , METH_VARARGS ,
971+ unicodedata_category__doc__ },
972+ {"bidirectional" , unicodedata_bidirectional , METH_VARARGS ,
973+ unicodedata_bidirectional__doc__ },
974+ {"combining" , unicodedata_combining , METH_VARARGS ,
975+ unicodedata_combining__doc__ },
976+ {"mirrored" , unicodedata_mirrored , METH_VARARGS ,
977+ unicodedata_mirrored__doc__ },
978+ {"east_asian_width" , unicodedata_east_asian_width , METH_VARARGS ,
979+ unicodedata_east_asian_width__doc__ },
980+ {"decomposition" , unicodedata_decomposition , METH_VARARGS ,
981+ unicodedata_decomposition__doc__ },
982+ {"name" , unicodedata_name , METH_VARARGS , unicodedata_name__doc__ },
983+ {"lookup" , unicodedata_lookup , METH_VARARGS , unicodedata_lookup__doc__ },
984+ {"normalize" , unicodedata_normalize , METH_VARARGS ,
985+ unicodedata_normalize__doc__ },
899986 {NULL , NULL } /* sentinel */
900987};
901988
902- PyDoc_STRVAR (unicodedata_docstring , "unicode character database" );
989+ PyDoc_STRVAR (unicodedata_docstring ,
990+ "This module provides access to the Unicode Character Database which\n\
991+ defines character properties for all Unicode characters. The data in\n\
992+ this database is based on the UnicodeData.txt file version\n\
993+ 3.2.0 which is publically available from ftp://ftp.unicode.org/.\n\
994+ \n\
995+ The module uses the same names and symbols as defined by the\n\
996+ UnicodeData File Format 3.2.0 (see\n\
997+ http://www.unicode.org/Public/UNIDATA/UnicodeData.html)." );
903998
904999PyMODINIT_FUNC
9051000initunicodedata (void )
0 commit comments