Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d6e8de1

Browse files
committed
Issue #4916: fix little-endian UTF-16 decoding bug on big-endian UCS-4 builds, introduced by r68483.
1 parent a4a37fe commit d6e8de1

1 file changed

Lines changed: 16 additions & 8 deletions

File tree

Objects/unicodeobject.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2791,16 +2791,24 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
27912791
_p[3] = 0;
27922792
#endif
27932793
#endif
2794-
((unsigned char *) _p)[1] = _q[0];
2795-
((unsigned char *) _p)[0] = _q[1];
2796-
((unsigned char *) _p)[1 + Py_UNICODE_SIZE] = _q[2];
2797-
((unsigned char *) _p)[0 + Py_UNICODE_SIZE] = _q[3];
2794+
/* Issue #4916; UCS-4 builds on big endian machines must
2795+
fill the two last bytes of each 4-byte unit. */
2796+
#if (!defined(BYTEORDER_IS_LITTLE_ENDIAN) && Py_UNICODE_SIZE > 2)
2797+
# define OFF 2
2798+
#else
2799+
# define OFF 0
2800+
#endif
2801+
((unsigned char *) _p)[OFF + 1] = _q[0];
2802+
((unsigned char *) _p)[OFF + 0] = _q[1];
2803+
((unsigned char *) _p)[OFF + 1 + Py_UNICODE_SIZE] = _q[2];
2804+
((unsigned char *) _p)[OFF + 0 + Py_UNICODE_SIZE] = _q[3];
27982805
#if (SIZEOF_LONG == 8)
2799-
((unsigned char *) _p)[1 + 2 * Py_UNICODE_SIZE] = _q[4];
2800-
((unsigned char *) _p)[0 + 2 * Py_UNICODE_SIZE] = _q[5];
2801-
((unsigned char *) _p)[1 + 3 * Py_UNICODE_SIZE] = _q[6];
2802-
((unsigned char *) _p)[0 + 3 * Py_UNICODE_SIZE] = _q[7];
2806+
((unsigned char *) _p)[OFF + 1 + 2 * Py_UNICODE_SIZE] = _q[4];
2807+
((unsigned char *) _p)[OFF + 0 + 2 * Py_UNICODE_SIZE] = _q[5];
2808+
((unsigned char *) _p)[OFF + 1 + 3 * Py_UNICODE_SIZE] = _q[6];
2809+
((unsigned char *) _p)[OFF + 0 + 3 * Py_UNICODE_SIZE] = _q[7];
28032810
#endif
2811+
#undef OFF
28042812
_q += SIZEOF_LONG;
28052813
_p += SIZEOF_LONG / 2;
28062814
}

0 commit comments

Comments
 (0)