77
88Unicode implementation based on original code by Fredrik Lundh,
99modified by Marc-Andre Lemburg ([email protected] ) according to the 10- Unicode Integration Proposal (see file Misc/unicode.txt).
10+ Unicode Integration Proposal. (See
11+ http://www.egenix.com/files/python/unicode-proposal.txt).
1112
1213Copyright (c) Corporation for National Research Initiatives.
1314
@@ -420,8 +421,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
420421 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
421422 } while (0)
422423
423- /* Check if substring matches at given offset. the offset must be
424- valid, and the substring must not be empty */
424+ /* Check if substring matches at given offset. The offset must be
425+ valid, and the substring must not be empty. */
425426
426427#define Py_UNICODE_MATCH (string , offset , substring ) \
427428 ((*((string)->str + (offset)) == *((substring)->str)) && \
@@ -549,8 +550,8 @@ PyAPI_FUNC(int) PyUnicode_Resize(
549550 Coercion is done in the following way:
550551
551552 1. bytes, bytearray and other char buffer compatible objects are decoded
552- under the assumptions that they contain data using the current
553- default encoding. Decoding is done in "strict" mode.
553+ under the assumptions that they contain data using the UTF-8
554+ encoding. Decoding is done in "strict" mode.
554555
555556 2. All other objects (including Unicode objects) raise an
556557 exception.
@@ -661,7 +662,7 @@ PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
661662
662663 Many of these APIs take two arguments encoding and errors. These
663664 parameters encoding and errors have the same semantics as the ones
664- of the builtin unicode () API.
665+ of the builtin str () API.
665666
666667 Setting encoding to NULL causes the default encoding (UTF-8) to be used.
667668
@@ -680,7 +681,8 @@ PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
680681/* Return a Python string holding the default encoded value of the
681682 Unicode object.
682683
683- The resulting string is cached in the Unicode object for subsequent
684+ Same as PyUnicode_AsUTF8String() except
685+ the resulting string is cached in the Unicode object for subsequent
684686 usage by this function. The cached version is needed to implement
685687 the character buffer interface and will live (at least) as long as
686688 the Unicode object itself.
@@ -695,14 +697,14 @@ PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
695697 PyObject * unicode ,
696698 const char * errors );
697699
698- /* Returns a pointer to the default encoding (normally, UTF-8) of the
700+ /* Returns a pointer to the default encoding (UTF-8) of the
699701 Unicode object unicode and the size of the encoded representation
700702 in bytes stored in *size.
701703
702704 In case of an error, no *size is set.
703705
704706 *** This API is for interpreter INTERNAL USE ONLY and will likely
705- *** be removed or changed for Python 3.1 .
707+ *** be removed or changed in the future .
706708
707709 *** If you need to access the Unicode object as UTF-8 bytes string,
708710 *** please use PyUnicode_AsUTF8String() instead.
@@ -713,7 +715,7 @@ PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
713715 PyObject * unicode ,
714716 Py_ssize_t * size );
715717
716- /* Returns a pointer to the default encoding (normally, UTf -8) of the
718+ /* Returns a pointer to the default encoding (UTF -8) of the
717719 Unicode object unicode.
718720
719721 Use of this API is DEPRECATED since no size information can be
@@ -729,14 +731,7 @@ PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
729731
730732PyAPI_FUNC (char * ) _PyUnicode_AsString (PyObject * unicode );
731733
732- /* Returns the currently active default encoding.
733-
734- The default encoding is currently implemented as run-time settable
735- process global. This may change in future versions of the
736- interpreter to become a parameter which is managed on a per-thread
737- basis.
738-
739- */
734+ /* Returns "utf-8". */
740735
741736PyAPI_FUNC (const char * ) PyUnicode_GetDefaultEncoding (void );
742737
0 commit comments