From 97ed17c30c56f4fd0ca9c61125c785f34259f9bc Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Oct 2024 12:07:18 +0300 Subject: [PATCH 1/3] gh-124969: Make locale.nl_langinfo(locale.ALT_DIGITS) returning a string again This is a follow up of GH-124974. Only Glibc needed a fix. Now the returned value is a string consisting of semicolon-separated symbols on all Posix platforms. --- Doc/library/locale.rst | 7 ++-- Lib/test/test__locale.py | 27 ++++++++----- ...-10-21-12-06-55.gh-issue-124969.xiY8UP.rst | 2 + Modules/_localemodule.c | 38 +++++++++++-------- 4 files changed, 47 insertions(+), 27 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst diff --git a/Doc/library/locale.rst b/Doc/library/locale.rst index 5f3c4840b5cc70..f172a55080efc9 100644 --- a/Doc/library/locale.rst +++ b/Doc/library/locale.rst @@ -158,8 +158,7 @@ The :mod:`locale` module defines the following exception and functions: .. function:: nl_langinfo(option) - Return some locale-specific information as a string (or a tuple for - ``ALT_DIGITS``). This function is not + Return some locale-specific information as a string. This function is not available on all systems, and the set of possible options might also vary across platforms. The possible argument values are numbers, for which symbolic constants are available in the locale module. @@ -312,7 +311,9 @@ The :mod:`locale` module defines the following exception and functions: .. data:: ALT_DIGITS - Get a tuple of up to 100 strings used to represent the values 0 to 99. + Get a string consisting of up to 100 semicolon-separated symbols used + to represent the values 0 to 99 in a locale-specific way. + In most locales this is an empty string. The function temporarily sets the ``LC_CTYPE`` locale to the locale of the category that determines the requested value (``LC_TIME``, diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index e403c2a822788d..42bdfa8acc6c88 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -26,7 +26,10 @@ 'bs_BA', 'fr_LU', 'kl_GL', 'fa_IR', 'de_BE', 'sv_SE', 'it_CH', 'uk_UA', 'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', 'ps_AF', 'en_US', 'fr_FR.ISO8859-1', 'fr_FR.UTF-8', 'fr_FR.ISO8859-15@euro', - 'ru_RU.KOI8-R', 'ko_KR.eucKR'] + 'ru_RU.KOI8-R', 'ko_KR.eucKR', + 'ja_JP', 'lzh_TW', 'my_MM', 'or_IN', 'shn_MM', + 'ar_AE', 'bn_IN', 'ks_IN', 'mr_IN', 'th_TH', +] def setUpModule(): global candidate_locales @@ -78,7 +81,7 @@ def accept(loc): 'C': (0, {}), 'en_US': (0, {}), 'fa_IR': (100, {0: '\u06f0\u06f0', 10: '\u06f1\u06f0', 99: '\u06f9\u06f9'}), - 'ja_JP': (100, {0: '\u3007', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}), + 'ja_JP': (100, {1: '\u4e00', 10: '\u5341', 99: '\u4e5d\u5341\u4e5d'}), 'lzh_TW': (32, {0: '\u3007', 10: '\u5341', 31: '\u5345\u4e00'}), 'my_MM': (100, {0: '\u1040\u1040', 10: '\u1041\u1040', 99: '\u1049\u1049'}), 'or_IN': (100, {0: '\u0b66', 10: '\u0b67\u0b66', 99: '\u0b6f\u0b6f'}), @@ -199,21 +202,27 @@ def test_lc_numeric_basic(self): def test_alt_digits_nl_langinfo(self): # Test nl_langinfo(ALT_DIGITS) tested = False - for loc, (count, samples) in known_alt_digits.items(): + for loc in candidate_locales: with self.subTest(locale=loc): try: setlocale(LC_TIME, loc) except Error: self.skipTest(f'no locale {loc!r}') continue + with self.subTest(locale=loc): alt_digits = nl_langinfo(locale.ALT_DIGITS) - self.assertIsInstance(alt_digits, tuple) - if count and not alt_digits and support.is_apple: - self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on Apple platforms') - self.assertEqual(len(alt_digits), count) - for i in samples: - self.assertEqual(alt_digits[i], samples[i]) + self.assertIsInstance(alt_digits, str) + alt_digits = alt_digits.split(';') if alt_digits else [] + if alt_digits: + self.assertGreaterEqual(len(alt_digits), 10, alt_digits) + if loc in known_alt_digits: + count, samples = known_alt_digits[loc] + if count and not alt_digits: + self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on this platform') + self.assertEqual(len(alt_digits), count, alt_digits) + for i in samples: + self.assertEqual(alt_digits[i], samples[i]) tested = True if not tested: self.skipTest('no suitable locales') diff --git a/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst b/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst new file mode 100644 index 00000000000000..c44550184e0000 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-10-21-12-06-55.gh-issue-124969.xiY8UP.rst @@ -0,0 +1,2 @@ +``locale.nl_langinfo(locale.ALT_DIGITS)`` now returns a string again. The +returned value consists of up to 100 semicolon-separated symbols. diff --git a/Modules/_localemodule.c b/Modules/_localemodule.c index 0daec646605775..2a789ea74d27da 100644 --- a/Modules/_localemodule.c +++ b/Modules/_localemodule.c @@ -667,28 +667,36 @@ _locale_nl_langinfo_impl(PyObject *module, int item) return NULL; } PyObject *pyresult; +#ifdef __GLIBC__ #ifdef ALT_DIGITS - if (item == ALT_DIGITS) { - /* The result is a sequence of up to 100 NUL-separated strings. */ - const char *s = result; + if (item == ALT_DIGITS && *result) { + /* According to the POSIX specification the result must be + * a sequence of up to 100 semicolon-separated strings. + * But in Glibc they are NUL-separated. */ + Py_ssize_t i = 0; int count = 0; - for (; count < 100 && *s; count++) { - s += strlen(s) + 1; + for (; count < 100 && result[i]; count++) { + i += strlen(result + i) + 1; } - pyresult = PyTuple_New(count); - if (pyresult != NULL) { - for (int i = 0; i < count; i++) { - PyObject *unicode = PyUnicode_DecodeLocale(result, NULL); - if (unicode == NULL) { - Py_CLEAR(pyresult); - break; - } - PyTuple_SET_ITEM(pyresult, i, unicode); - result += strlen(result) + 1; + char *buf = PyMem_Malloc(i); + if (buf == NULL) { + PyErr_NoMemory(); + pyresult = NULL; + } + else { + memcpy(buf, result, i); + /* Replace all NULs with semicolons. */ + i = 0; + while (--count) { + i += strlen(buf + i); + buf[i++] = ';'; } + pyresult = PyUnicode_DecodeLocale(buf, NULL); + PyMem_Free(buf); } } else +#endif #endif { pyresult = PyUnicode_DecodeLocale(result, NULL); From b5537f1346261aac78f66bf1b0c1ceecf2fac17e Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Oct 2024 20:25:00 +0300 Subject: [PATCH 2/3] Use locale names with encodings. --- Lib/test/test__locale.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index 42bdfa8acc6c88..46206f67e4f147 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -27,8 +27,8 @@ 'eu_ES', 'vi_VN', 'af_ZA', 'nb_NO', 'en_DK', 'tg_TJ', 'ps_AF', 'en_US', 'fr_FR.ISO8859-1', 'fr_FR.UTF-8', 'fr_FR.ISO8859-15@euro', 'ru_RU.KOI8-R', 'ko_KR.eucKR', - 'ja_JP', 'lzh_TW', 'my_MM', 'or_IN', 'shn_MM', - 'ar_AE', 'bn_IN', 'ks_IN', 'mr_IN', 'th_TH', + 'ja_JP.UTF-8', 'lzh_TW.UTF-8', 'my_MM.UTF-8', 'or_IN.UTF-8', 'shn_MM.UTF-8', + 'ar_AE.UTF-8', 'bn_IN.UTF-8', 'ja_JP.PCK', 'mr_IN.UTF-8', 'th_TH.TIS620', ] def setUpModule(): @@ -86,6 +86,8 @@ def accept(loc): 'my_MM': (100, {0: '\u1040\u1040', 10: '\u1041\u1040', 99: '\u1049\u1049'}), 'or_IN': (100, {0: '\u0b66', 10: '\u0b67\u0b66', 99: '\u0b6f\u0b6f'}), 'shn_MM': (100, {0: '\u1090\u1090', 10: '\u1091\u1090', 99: '\u1099\u1099'}), + 'ar_AE': (100, {0: '\u0660', 10: '\u0661\u0660', 99: '\u0669\u0669'}), + 'bn_IN': (100, {0: '\u09e6', 10: '\u09e7\u09e6', 99: '\u09ef\u09ef'}), } if sys.platform == 'win32': @@ -216,8 +218,9 @@ def test_alt_digits_nl_langinfo(self): alt_digits = alt_digits.split(';') if alt_digits else [] if alt_digits: self.assertGreaterEqual(len(alt_digits), 10, alt_digits) - if loc in known_alt_digits: - count, samples = known_alt_digits[loc] + loc1 = loc.split('.', 1)[0] + if loc1 in known_alt_digits: + count, samples = known_alt_digits[loc1] if count and not alt_digits: self.skipTest(f'ALT_DIGITS is not set for locale {loc!r} on this platform') self.assertEqual(len(alt_digits), count, alt_digits) From 23c0fff19a77c70d4c35a14da867f5b6cbcb79a9 Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Mon, 21 Oct 2024 21:28:02 +0300 Subject: [PATCH 3/3] Remove broken ja_JP.PCK locale. --- Lib/test/test__locale.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test__locale.py b/Lib/test/test__locale.py index 46206f67e4f147..7e6e296c069abb 100644 --- a/Lib/test/test__locale.py +++ b/Lib/test/test__locale.py @@ -28,7 +28,7 @@ 'fr_FR.ISO8859-1', 'fr_FR.UTF-8', 'fr_FR.ISO8859-15@euro', 'ru_RU.KOI8-R', 'ko_KR.eucKR', 'ja_JP.UTF-8', 'lzh_TW.UTF-8', 'my_MM.UTF-8', 'or_IN.UTF-8', 'shn_MM.UTF-8', - 'ar_AE.UTF-8', 'bn_IN.UTF-8', 'ja_JP.PCK', 'mr_IN.UTF-8', 'th_TH.TIS620', + 'ar_AE.UTF-8', 'bn_IN.UTF-8', 'mr_IN.UTF-8', 'th_TH.TIS620', ] def setUpModule():