@@ -603,6 +603,49 @@ def _is_ipv6_enabled():
603603# module name.
604604TESTFN = "{}_{}_tmp" .format (TESTFN , os .getpid ())
605605
606+ # FS_NONASCII: non-ASCII character encodable by os.fsencode(),
607+ # or None if there is no such character.
608+ FS_NONASCII = None
609+ for character in (
610+ # First try printable and common characters to have a readable filename.
611+ # For each character, the encoding list are just example of encodings able
612+ # to encode the character (the list is not exhaustive).
613+
614+ # U+00E6 (Latin Small Letter Ae): cp1252, iso-8859-1
615+ '\u00E6 ' ,
616+ # U+0130 (Latin Capital Letter I With Dot Above): cp1254, iso8859_3
617+ '\u0130 ' ,
618+ # U+0141 (Latin Capital Letter L With Stroke): cp1250, cp1257
619+ '\u0141 ' ,
620+ # U+03C6 (Greek Small Letter Phi): cp1253
621+ '\u03C6 ' ,
622+ # U+041A (Cyrillic Capital Letter Ka): cp1251
623+ '\u041A ' ,
624+ # U+05D0 (Hebrew Letter Alef): Encodable to cp424
625+ '\u05D0 ' ,
626+ # U+060C (Arabic Comma): cp864, cp1006, iso8859_6, mac_arabic
627+ '\u060C ' ,
628+ # U+062A (Arabic Letter Teh): cp720
629+ '\u062A ' ,
630+ # U+0E01 (Thai Character Ko Kai): cp874
631+ '\u0E01 ' ,
632+
633+ # Then try more "special" characters. "special" because they may be
634+ # interpreted or displayed differently depending on the exact locale
635+ # encoding and the font.
636+
637+ # U+00A0 (No-Break Space)
638+ '\u00A0 ' ,
639+ # U+20AC (Euro Sign)
640+ '\u20AC ' ,
641+ ):
642+ try :
643+ os .fsdecode (os .fsencode (character ))
644+ except UnicodeError :
645+ pass
646+ else :
647+ FS_NONASCII = character
648+ break
606649
607650# TESTFN_UNICODE is a non-ascii filename
608651TESTFN_UNICODE = TESTFN + "-\xe0 \xf2 \u0258 \u0141 \u011f "
@@ -647,6 +690,38 @@ def _is_ipv6_enabled():
647690 # the byte 0xff. Skip some unicode filename tests.
648691 pass
649692
693+ # TESTFN_UNDECODABLE is a filename (bytes type) that should *not* be able to be
694+ # decoded from the filesystem encoding (in strict mode). It can be None if we
695+ # cannot generate such filename (ex: the latin1 encoding can decode any byte
696+ # sequence). On UNIX, TESTFN_UNDECODABLE can be decoded by os.fsdecode() thanks
697+ # to the surrogateescape error handler (PEP 383), but not from the filesystem
698+ # encoding in strict mode.
699+ TESTFN_UNDECODABLE = None
700+ for name in (
701+ # b'\xff' is not decodable by os.fsdecode() with code page 932. Windows
702+ # accepts it to create a file or a directory, or don't accept to enter to
703+ # such directory (when the bytes name is used). So test b'\xe7' first: it is
704+ # not decodable from cp932.
705+ b'\xe7 w\xf0 ' ,
706+ # undecodable from ASCII, UTF-8
707+ b'\xff ' ,
708+ # undecodable from iso8859-3, iso8859-6, iso8859-7, cp424, iso8859-8, cp856
709+ # and cp857
710+ b'\xae \xd5 '
711+ # undecodable from UTF-8 (UNIX and Mac OS X)
712+ b'\xed \xb2 \x80 ' , b'\xed \xb4 \x80 ' ,
713+ ):
714+ try :
715+ name .decode (TESTFN_ENCODING )
716+ except UnicodeDecodeError :
717+ TESTFN_UNDECODABLE = os .fsencode (TESTFN ) + name
718+ break
719+
720+ if FS_NONASCII :
721+ TESTFN_NONASCII = TESTFN + '-' + FS_NONASCII
722+ else :
723+ TESTFN_NONASCII = None
724+
650725# Save the initial cwd
651726SAVEDCWD = os .getcwd ()
652727
0 commit comments