@@ -41,6 +41,7 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
4141
4242#define PY_SSIZE_T_CLEAN
4343#include "Python.h"
44+ #include "bytes_methods.h"
4445
4546#include "unicodeobject.h"
4647#include "ucnhash.h"
@@ -592,9 +593,9 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
592593 if (* f == '%' ) {
593594 const char * p = f ;
594595 width = 0 ;
595- while (isdigit ( Py_CHARMASK ( * f ) ))
596+ while (ISDIGIT ( * f ))
596597 width = (width * 10 ) + * f ++ - '0' ;
597- while (* ++ f && * f != '%' && !isalpha ( Py_CHARMASK ( * f ) ))
598+ while (* ++ f && * f != '%' && !ISALPHA ( * f ))
598599 ;
599600
600601 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
@@ -755,12 +756,12 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
755756 zeropad = (* f == '0' );
756757 /* parse the width.precision part */
757758 width = 0 ;
758- while (isdigit ( Py_CHARMASK ( * f ) ))
759+ while (ISDIGIT ( * f ))
759760 width = (width * 10 ) + * f ++ - '0' ;
760761 precision = 0 ;
761762 if (* f == '.' ) {
762763 f ++ ;
763- while (isdigit ( Py_CHARMASK ( * f ) ))
764+ while (ISDIGIT ( * f ))
764765 precision = (precision * 10 ) + * f ++ - '0' ;
765766 }
766767 /* handle the long flag, but only for %ld and %lu.
@@ -1056,21 +1057,47 @@ PyObject *PyUnicode_Decode(const char *s,
10561057{
10571058 PyObject * buffer = NULL , * unicode ;
10581059 Py_buffer info ;
1060+ char lower [20 ]; /* Enough for any encoding name we recognize */
1061+ char * l ;
1062+ const char * e ;
10591063
10601064 if (encoding == NULL )
1061- encoding = PyUnicode_GetDefaultEncoding ();
1065+ encoding = PyUnicode_GetDefaultEncoding ();
1066+
1067+ /* Convert encoding to lower case and replace '_' with '-' in order to
1068+ catch e.g. UTF_8 */
1069+ e = encoding ;
1070+ l = lower ;
1071+ while (* e && l < & lower [(sizeof lower ) - 2 ]) {
1072+ if (ISUPPER (* e )) {
1073+ * l ++ = TOLOWER (* e ++ );
1074+ }
1075+ else if (* e == '_' ) {
1076+ * l ++ = '-' ;
1077+ e ++ ;
1078+ }
1079+ else {
1080+ * l ++ = * e ++ ;
1081+ }
1082+ }
1083+ * l = '\0' ;
10621084
10631085 /* Shortcuts for common default encodings */
1064- if (strcmp (encoding , "utf-8" ) == 0 )
1086+ if (strcmp (lower , "utf-8" ) == 0 )
10651087 return PyUnicode_DecodeUTF8 (s , size , errors );
1066- else if (strcmp (encoding , "latin-1" ) == 0 )
1088+ else if ((strcmp (lower , "latin-1" ) == 0 ) ||
1089+ (strcmp (lower , "iso-8859-1" ) == 0 ))
10671090 return PyUnicode_DecodeLatin1 (s , size , errors );
10681091#if defined(MS_WINDOWS ) && defined(HAVE_USABLE_WCHAR_T )
1069- else if (strcmp (encoding , "mbcs ") == 0 )
1092+ else if (strcmp (lower , "mbcs ") == 0 )
10701093 return PyUnicode_DecodeMBCS (s , size , errors );
10711094#endif
1072- else if (strcmp (encoding , "ascii ") == 0 )
1095+ else if (strcmp (lower , "ascii ") == 0 )
10731096 return PyUnicode_DecodeASCII (s , size , errors );
1097+ else if (strcmp (lower , "utf -16 ") == 0 )
1098+ return PyUnicode_DecodeUTF16 (s , size , errors , 0 );
1099+ else if (strcmp (lower , "utf - 32 ") == 0 )
1100+ return PyUnicode_DecodeUTF32 (s , size , errors , 0 );
10741101
10751102 /* Decode via the codec registry */
10761103 buffer = NULL;
@@ -1470,7 +1497,7 @@ char utf7_special[128] = {
14701497#define B64 (n ) \
14711498 ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(n) & 0x3f])
14721499#define B64CHAR (c ) \
1473- (isalnum (c) || (c) == '+' || (c) == '/')
1500+ (ISALNUM (c) || (c) == '+' || (c) == '/')
14741501#define UB64 (c ) \
14751502 ((c) == '+' ? 62 : (c) == '/' ? 63 : (c) >= 'a' ? \
14761503 (c) - 71 : (c) >= 'A' ? (c) - 65 : (c) + 4 )
@@ -2703,7 +2730,7 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
27032730 }
27042731 for (i = 0 ; i < digits ; ++ i ) {
27052732 c = (unsigned char ) s [i ];
2706- if (!isxdigit (c )) {
2733+ if (!ISXDIGIT (c )) {
27072734 endinpos = (s + i + 1 )- starts ;
27082735 if (unicode_decode_call_errorhandler (
27092736 errors , & errorHandler ,
@@ -3077,7 +3104,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
30773104 outpos = p - PyUnicode_AS_UNICODE (v );
30783105 for (x = 0 , i = 0 ; i < count ; ++ i , ++ s ) {
30793106 c = (unsigned char )* s ;
3080- if (!isxdigit (c )) {
3107+ if (!ISXDIGIT (c )) {
30813108 endinpos = s - starts ;
30823109 if (unicode_decode_call_errorhandler (
30833110 errors , & errorHandler ,
0 commit comments