Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit e8d5145

Browse files
author
Victor Stinner
committed
Create os.fsdecode(): decode from the filesystem encoding with surrogateescape
error handler, or strict error handler on Windows. * Rewrite os.fsencode() documentation * Improve os.fsencode and os.fsdecode() tests using the new PYTHONFSENCODING environment variable
1 parent dbe6042 commit e8d5145

5 files changed

Lines changed: 95 additions & 34 deletions

File tree

Doc/library/os.rst

Lines changed: 19 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,13 +155,26 @@ process and user.
155155
These functions are described in :ref:`os-file-dir`.
156156

157157

158-
.. function:: fsencode(value)
158+
.. function:: fsencode(filename)
159159

160-
Encode *value* to bytes for use in the file system, environment variables or
161-
the command line. Use :func:`sys.getfilesystemencoding` and
162-
``'surrogateescape'`` error handler for strings and return bytes unchanged.
163-
On Windows, use ``'strict'`` error handler for strings if the file system
164-
encoding is ``'mbcs'`` (which is the default encoding).
160+
Encode *filename* to the filesystem encoding with ``'surrogateescape'``
161+
error handler, return :class:`bytes` unchanged. On Windows, use ``'strict'``
162+
error handler if the filesystem encoding is ``'mbcs'`` (which is the default
163+
encoding).
164+
165+
:func:`fsdencode` is the reverse function.
166+
167+
.. versionadded:: 3.2
168+
169+
170+
.. function:: fsdecode(filename)
171+
172+
Decode *filename* from the filesystem encoding with ``'surrogateescape'``
173+
error handler, return :class:`str` unchanged. On Windows, use ``'strict'``
174+
error handler if the filesystem encoding is ``'mbcs'`` (which is the default
175+
encoding).
176+
177+
:func:`fsencode` is the reverse function.
165178

166179
.. versionadded:: 3.2
167180

Doc/whatsnew/3.2.rst

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -237,13 +237,16 @@ Major performance enhancements have been added:
237237
* Stub
238238

239239

240-
Unicode
241-
=======
240+
Filenames and unicode
241+
=====================
242242

243243
The filesystem encoding can be specified by setting the
244244
:envvar:`PYTHONFSENCODING` environment variable before running the interpreter.
245245
The value should be a string in the form ``<encoding>``, e.g. ``utf-8``.
246246

247+
The :mod:`os` module has two new functions: :func:`os.fsencode` and
248+
:func:`os.fsdecode`.
249+
247250

248251
IDLE
249252
====

Lib/os.py

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -402,8 +402,7 @@ def get_exec_path(env=None):
402402
path_list = path_listb
403403

404404
if path_list is not None and isinstance(path_list, bytes):
405-
path_list = path_list.decode(sys.getfilesystemencoding(),
406-
'surrogateescape')
405+
path_list = fsdecode(path_list)
407406

408407
if path_list is None:
409408
path_list = defpath
@@ -536,19 +535,39 @@ def getenvb(key, default=None):
536535

537536
__all__.extend(("environb", "getenvb"))
538537

539-
def fsencode(value):
540-
"""Encode value for use in the file system, environment variables
541-
or the command line."""
542-
if isinstance(value, bytes):
543-
return value
544-
elif isinstance(value, str):
538+
def fsencode(filename):
539+
"""
540+
Encode filename to the filesystem encoding with 'surrogateescape' error
541+
handler, return bytes unchanged. On Windows, use 'strict' error handler if
542+
the file system encoding is 'mbcs' (which is the default encoding).
543+
"""
544+
if isinstance(filename, bytes):
545+
return filename
546+
elif isinstance(filename, str):
547+
encoding = sys.getfilesystemencoding()
548+
if encoding == 'mbcs':
549+
return filename.encode(encoding)
550+
else:
551+
return filename.encode(encoding, 'surrogateescape')
552+
else:
553+
raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
554+
555+
def fsdecode(filename):
556+
"""
557+
Decode filename from the filesystem encoding with 'surrogateescape' error
558+
handler, return str unchanged. On Windows, use 'strict' error handler if
559+
the file system encoding is 'mbcs' (which is the default encoding).
560+
"""
561+
if isinstance(filename, str):
562+
return filename
563+
elif isinstance(filename, bytes):
545564
encoding = sys.getfilesystemencoding()
546565
if encoding == 'mbcs':
547-
return value.encode(encoding)
566+
return filename.decode(encoding)
548567
else:
549-
return value.encode(encoding, 'surrogateescape')
568+
return filename.decode(encoding, 'surrogateescape')
550569
else:
551-
raise TypeError("expect bytes or str, not %s" % type(value).__name__)
570+
raise TypeError("expect bytes or str, not %s" % type(filename).__name__)
552571

553572
def _exists(name):
554573
return name in globals()

Lib/test/test_os.py

Lines changed: 38 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -897,14 +897,6 @@ def test_setregid_neg1(self):
897897

898898
class Pep383Tests(unittest.TestCase):
899899
def setUp(self):
900-
def fsdecode(filename):
901-
encoding = sys.getfilesystemencoding()
902-
if encoding == 'mbcs':
903-
errors = 'strict'
904-
else:
905-
errors = 'surrogateescape'
906-
return filename.decode(encoding, errors)
907-
908900
if support.TESTFN_UNENCODABLE:
909901
self.dir = support.TESTFN_UNENCODABLE
910902
else:
@@ -930,7 +922,7 @@ def add_filename(fn):
930922
for fn in bytesfn:
931923
f = open(os.path.join(self.bdir, fn), "w")
932924
f.close()
933-
fn = fsdecode(fn)
925+
fn = os.fsdecode(fn)
934926
if fn in self.unicodefn:
935927
raise ValueError("duplicate filename")
936928
self.unicodefn.add(fn)
@@ -1139,12 +1131,43 @@ def check_stat(self, link, target):
11391131
self.assertNotEqual(os.lstat(link), os.stat(link))
11401132

11411133

1142-
class MiscTests(unittest.TestCase):
1134+
class FSEncodingTests(unittest.TestCase):
1135+
def test_nop(self):
1136+
self.assertEquals(os.fsencode(b'abc\xff'), b'abc\xff')
1137+
self.assertEquals(os.fsdecode('abc\u0141'), 'abc\u0141')
11431138

1144-
@unittest.skipIf(os.name == "nt", "POSIX specific test")
1145-
def test_fsencode(self):
1146-
self.assertEquals(os.fsencode(b'ab\xff'), b'ab\xff')
1147-
self.assertEquals(os.fsencode('ab\uDCFF'), b'ab\xff')
1139+
def test_identity(self):
1140+
# assert fsdecode(fsencode(x)) == x
1141+
for fn in ('unicode\u0141', 'latin\xe9', 'ascii'):
1142+
try:
1143+
bytesfn = os.fsencode(fn)
1144+
except UnicodeEncodeError:
1145+
continue
1146+
self.assertEquals(os.fsdecode(bytesfn), fn)
1147+
1148+
def get_output(self, fs_encoding, func):
1149+
env = os.environ.copy()
1150+
env['PYTHONIOENCODING'] = 'utf-8'
1151+
env['PYTHONFSENCODING'] = fs_encoding
1152+
code = 'import os; print(%s, end="")' % func
1153+
process = subprocess.Popen(
1154+
[sys.executable, "-c", code],
1155+
stdout=subprocess.PIPE, env=env)
1156+
stdout, stderr = process.communicate()
1157+
self.assertEqual(process.returncode, 0)
1158+
return stdout.decode('utf-8')
1159+
1160+
def test_encodings(self):
1161+
def check(encoding, bytesfn, unicodefn):
1162+
encoded = self.get_output(encoding, 'repr(os.fsencode(%a))' % unicodefn)
1163+
self.assertEqual(encoded, repr(bytesfn))
1164+
1165+
decoded = self.get_output(encoding, 'repr(os.fsdecode(%a))' % bytesfn)
1166+
self.assertEqual(decoded, repr(unicodefn))
1167+
1168+
check('ascii', b'abc\xff', 'abc\udcff')
1169+
check('utf-8', b'\xc3\xa9\x80', '\xe9\udc80')
1170+
check('iso-8859-15', b'\xef\xa4', '\xef\u20ac')
11481171

11491172

11501173
def test_main():
@@ -1163,7 +1186,7 @@ def test_main():
11631186
Pep383Tests,
11641187
Win32KillTests,
11651188
Win32SymlinkTests,
1166-
MiscTests,
1189+
FSEncodingTests,
11671190
)
11681191

11691192
if __name__ == "__main__":

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ Extensions
116116
Library
117117
-------
118118

119+
- Create os.fsdecode(): decode from the filesystem encoding with
120+
surrogateescape error handler, or strict error handler on Windows.
121+
119122
- Issue #3488: Provide convenient shorthand functions ``gzip.compress``
120123
and ``gzip.decompress``. Original patch by Anand B. Pillai.
121124

0 commit comments

Comments
 (0)