Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e667e98

Browse files
committed
Issue #16218, #16444: Backport improvment on tests for non-ASCII characters
1 parent 37bfa4e commit e667e98

4 files changed

Lines changed: 105 additions & 10 deletions

File tree

Lib/test/support.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,6 +603,49 @@ def _is_ipv6_enabled():
603603
# module name.
604604
TESTFN = "{}_{}_tmp".format(TESTFN, os.getpid())
605605

606+
# FS_NONASCII: non-ASCII character encodable by os.fsencode(),
607+
# or None if there is no such character.
608+
FS_NONASCII = None
609+
for character in (
610+
# First try printable and common characters to have a readable filename.
611+
# For each character, the encoding list are just example of encodings able
612+
# to encode the character (the list is not exhaustive).
613+
614+
# U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1
615+
'\u00E6',
616+
# U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3
617+
'\u0130',
618+
# U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257
619+
'\u0141',
620+
# U+03C6 (Greek Small Letter Phi): cp1253
621+
'\u03C6',
622+
# U+041A (Cyrillic Capital Letter Ka): cp1251
623+
'\u041A',
624+
# U+05D0 (Hebrew Letter Alef): Encodable to cp424
625+
'\u05D0',
626+
# U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic
627+
'\u060C',
628+
# U+062A (Arabic Letter Teh): cp720
629+
'\u062A',
630+
# U+0E01 (Thai Character Ko Kai): cp874
631+
'\u0E01',
632+
633+
# Then try more "special" characters. "special" because they may be
634+
# interpreted or displayed differently depending on the exact locale
635+
# encoding and the font.
636+
637+
# U+00A0 (No-Break Space)
638+
'\u00A0',
639+
# U+20AC (Euro Sign)
640+
'\u20AC',
641+
):
642+
try:
643+
os.fsdecode(os.fsencode(character))
644+
except UnicodeError:
645+
pass
646+
else:
647+
FS_NONASCII = character
648+
break
606649

607650
# TESTFN_UNICODE is a non-ascii filename
608651
TESTFN_UNICODE = TESTFN + "-\xe0\xf2\u0258\u0141\u011f"
@@ -647,6 +690,38 @@ def _is_ipv6_enabled():
647690
# the byte 0xff. Skip some unicode filename tests.
648691
pass
649692

693+
# TESTFN_UNDECODABLE is a filename (bytes type) that should *not* be able to be
694+
# decoded from the filesystem encoding (in strict mode). It can be None if we
695+
# cannot generate such filename (ex: the latin1 encoding can decode any byte
696+
# sequence). On UNIX, TESTFN_UNDECODABLE can be decoded by os.fsdecode() thanks
697+
# to the surrogateescape error handler (PEP 383), but not from the filesystem
698+
# encoding in strict mode.
699+
TESTFN_UNDECODABLE = None
700+
for name in (
701+
# b'\xff' is not decodable by os.fsdecode() with code page 932. Windows
702+
# accepts it to create a file or a directory, or don't accept to enter to
703+
# such directory (when the bytes name is used). So test b'\xe7' first: it is
704+
# not decodable from cp932.
705+
b'\xe7w\xf0',
706+
# undecodable from ASCII, UTF-8
707+
b'\xff',
708+
# undecodable from iso8859-3, iso8859-6, iso8859-7, cp424, iso8859-8, cp856
709+
# and cp857
710+
b'\xae\xd5'
711+
# undecodable from UTF-8 (UNIX and Mac OS X)
712+
b'\xed\xb2\x80', b'\xed\xb4\x80',
713+
):
714+
try:
715+
name.decode(TESTFN_ENCODING)
716+
except UnicodeDecodeError:
717+
TESTFN_UNDECODABLE = os.fsencode(TESTFN) + name
718+
break
719+
720+
if FS_NONASCII:
721+
TESTFN_NONASCII = TESTFN + '-' + FS_NONASCII
722+
else:
723+
TESTFN_NONASCII = None
724+
650725
# Save the initial cwd
651726
SAVEDCWD = os.getcwd()
652727

Lib/test/test_cmd_line.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -93,15 +93,15 @@ def test_run_code(self):
9393
# All good if execution is successful
9494
assert_python_ok('-c', 'pass')
9595

96-
@unittest.skipIf(sys.getfilesystemencoding() == 'ascii',
97-
'need a filesystem encoding different than ASCII')
96+
@unittest.skipUnless(test.support.FS_NONASCII, 'need support.FS_NONASCII')
9897
def test_non_ascii(self):
9998
# Test handling of non-ascii data
10099
if test.support.verbose:
101100
import locale
102101
print('locale encoding = %s, filesystem encoding = %s'
103102
% (locale.getpreferredencoding(), sys.getfilesystemencoding()))
104-
command = "assert(ord('\xe9') == 0xe9)"
103+
command = ("assert(ord(%r) == %s)"
104+
% (test.support.FS_NONASCII, ord(test.support.FS_NONASCII)))
105105
assert_python_ok('-c', command)
106106

107107
# On Windows, pass bytes to subprocess doesn't test how Python decodes the

Lib/test/test_cmd_line_script.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -363,14 +363,30 @@ def test_pep_409_verbiage(self):
363363
self.assertTrue(text[1].startswith(' File '))
364364
self.assertTrue(text[3].startswith('NameError'))
365365

366-
def test_non_utf8(self):
366+
def test_non_ascii(self):
367+
# Mac OS X denies the creation of a file with an invalid UTF-8 name.
368+
# Windows allows to create a name with an arbitrary bytes name, but
369+
# Python cannot a undecodable bytes argument to a subprocess.
370+
#if (support.TESTFN_UNDECODABLE
371+
#and sys.platform not in ('win32', 'darwin')):
372+
# name = os.fsdecode(support.TESTFN_UNDECODABLE)
373+
#elif support.TESTFN_NONASCII:
374+
if support.TESTFN_NONASCII:
375+
name = support.TESTFN_NONASCII
376+
else:
377+
self.skipTest("need support.TESTFN_NONASCII")
378+
367379
# Issue #16218
368-
with temp_dir() as script_dir:
369-
script_name = _make_test_script(script_dir,
370-
'\udcf1\udcea\udcf0\udce8\udcef\udcf2')
371-
self._check_script(script_name, script_name, script_name,
372-
script_dir, None,
373-
importlib.machinery.SourceFileLoader)
380+
source = 'print(ascii(__file__))\n'
381+
script_name = _make_test_script(os.curdir, name, source)
382+
self.addCleanup(support.unlink, script_name)
383+
rc, stdout, stderr = assert_python_ok(script_name)
384+
self.assertEqual(
385+
ascii(script_name),
386+
stdout.rstrip().decode('ascii'),
387+
'stdout=%r stderr=%r' % (stdout, stderr))
388+
self.assertEqual(0, rc)
389+
374390

375391
def test_main():
376392
support.run_unittest(CmdLineTest)

Lib/test/test_os.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,8 @@ class Pep383Tests(unittest.TestCase):
12431243
def setUp(self):
12441244
if support.TESTFN_UNENCODABLE:
12451245
self.dir = support.TESTFN_UNENCODABLE
1246+
elif support.TESTFN_NONASCII:
1247+
self.dir = support.TESTFN_NONASCII
12461248
else:
12471249
self.dir = support.TESTFN
12481250
self.bdir = os.fsencode(self.dir)
@@ -1257,6 +1259,8 @@ def add_filename(fn):
12571259
add_filename(support.TESTFN_UNICODE)
12581260
if support.TESTFN_UNENCODABLE:
12591261
add_filename(support.TESTFN_UNENCODABLE)
1262+
if support.TESTFN_NONASCII:
1263+
add_filename(support.TESTFN_NONASCII)
12601264
if not bytesfn:
12611265
self.skipTest("couldn't create any non-ascii filename")
12621266

0 commit comments

Comments
 (0)