@@ -138,6 +138,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
138138# define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
139139# define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
140140# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
141+ # define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
141142# define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
142143# define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
143144# define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
@@ -154,6 +155,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
154155# define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
155156# define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
156157# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
158+ # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
159+ # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
157160# define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
158161# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
159162# define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
@@ -165,6 +168,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
165168# define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
166169# define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
167170# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
171+ # define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
168172# define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
169173# define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
170174# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
@@ -225,6 +229,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
225229# define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
226230# define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
227231# define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
232+ # define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
228233# define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
229234# define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
230235# define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
@@ -241,6 +246,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
241246# define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
242247# define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
243248# define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
249+ # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
250+ # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
244251# define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
245252# define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
246253# define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
@@ -252,6 +259,7 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
252259# define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
253260# define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
254261# define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
262+ # define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
255263# define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
256264# define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
257265# define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
@@ -749,6 +757,80 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
749757 const char * errors /* error handling */
750758 );
751759
760+ /* --- UTF-32 Codecs ------------------------------------------------------ */
761+
762+ /* Decodes length bytes from a UTF-32 encoded buffer string and returns
763+ the corresponding Unicode object.
764+
765+ errors (if non-NULL) defines the error handling. It defaults
766+ to "strict".
767+
768+ If byteorder is non-NULL, the decoder starts decoding using the
769+ given byte order:
770+
771+ *byteorder == -1: little endian
772+ *byteorder == 0: native order
773+ *byteorder == 1: big endian
774+
775+ In native mode, the first four bytes of the stream are checked for a
776+ BOM mark. If found, the BOM mark is analysed, the byte order
777+ adjusted and the BOM skipped. In the other modes, no BOM mark
778+ interpretation is done. After completion, *byteorder is set to the
779+ current byte order at the end of input data.
780+
781+ If byteorder is NULL, the codec starts in native order mode.
782+
783+ */
784+
785+ PyAPI_FUNC (PyObject * ) PyUnicode_DecodeUTF32 (
786+ const char * string , /* UTF-32 encoded string */
787+ Py_ssize_t length , /* size of string */
788+ const char * errors , /* error handling */
789+ int * byteorder /* pointer to byteorder to use
790+ 0=native;-1=LE,1=BE; updated on
791+ exit */
792+ );
793+
794+ PyAPI_FUNC (PyObject * ) PyUnicode_DecodeUTF32Stateful (
795+ const char * string , /* UTF-32 encoded string */
796+ Py_ssize_t length , /* size of string */
797+ const char * errors , /* error handling */
798+ int * byteorder , /* pointer to byteorder to use
799+ 0=native;-1=LE,1=BE; updated on
800+ exit */
801+ Py_ssize_t * consumed /* bytes consumed */
802+ );
803+
804+ /* Returns a Python string using the UTF-32 encoding in native byte
805+ order. The string always starts with a BOM mark. */
806+
807+ PyAPI_FUNC (PyObject * ) PyUnicode_AsUTF32String (
808+ PyObject * unicode /* Unicode object */
809+ );
810+
811+ /* Returns a Python string object holding the UTF-32 encoded value of
812+ the Unicode data.
813+
814+ If byteorder is not 0, output is written according to the following
815+ byte order:
816+
817+ byteorder == -1: little endian
818+ byteorder == 0: native byte order (writes a BOM mark)
819+ byteorder == 1: big endian
820+
821+ If byteorder is 0, the output string will always start with the
822+ Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
823+ prepended.
824+
825+ */
826+
827+ PyAPI_FUNC (PyObject * ) PyUnicode_EncodeUTF32 (
828+ const Py_UNICODE * data , /* Unicode char buffer */
829+ Py_ssize_t length , /* number of Py_UNICODE chars to encode */
830+ const char * errors , /* error handling */
831+ int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
832+ );
833+
752834/* --- UTF-16 Codecs ------------------------------------------------------ */
753835
754836/* Decodes length bytes from a UTF-16 encoded buffer string and returns
0 commit comments