Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit ba9c664

Browse files
author
Victor Stinner
committed
Issue #10419: Fix build_scripts command of distutils to handle correctly
non-ASCII scripts. Open and write the script in binary mode, but ensure that the shebang is decodable from UTF-8 and from the encoding of the script.
1 parent 1367265 commit ba9c664

2 files changed

Lines changed: 40 additions & 11 deletions

File tree

Lib/distutils/command/build_scripts.py

Lines changed: 36 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111
from distutils.dep_util import newer
1212
from distutils.util import convert_path, Mixin2to3
1313
from distutils import log
14+
import sys
15+
import tokenize
1416

1517
# check if Python is called on the first line with this expression
16-
first_line_re = re.compile('^#!.*python[0-9.]*([ \t].*)?$')
18+
first_line_re = re.compile(b'^#!.*python[0-9.]*([ \t].*)?$')
1719

1820
class build_scripts(Command):
1921

@@ -74,12 +76,14 @@ def copy_scripts(self):
7476
# that way, we'll get accurate feedback if we can read the
7577
# script.
7678
try:
77-
f = open(script, "r")
79+
f = open(script, "rb")
7880
except IOError:
7981
if not self.dry_run:
8082
raise
8183
f = None
8284
else:
85+
encoding, lines = tokenize.detect_encoding(f.readline)
86+
f.seek(0)
8387
first_line = f.readline()
8488
if not first_line:
8589
self.warn("%s is an empty file (skipping)" % script)
@@ -88,25 +92,46 @@ def copy_scripts(self):
8892
match = first_line_re.match(first_line)
8993
if match:
9094
adjust = True
91-
post_interp = match.group(1) or ''
95+
post_interp = match.group(1) or b''
9296

9397
if adjust:
9498
log.info("copying and adjusting %s -> %s", script,
9599
self.build_dir)
96100
updated_files.append(outfile)
97101
if not self.dry_run:
98-
outf = open(outfile, "w")
99102
if not sysconfig.python_build:
100-
outf.write("#!%s%s\n" %
101-
(self.executable,
102-
post_interp))
103+
executable = self.executable
103104
else:
104-
outf.write("#!%s%s\n" %
105-
(os.path.join(
105+
executable = os.path.join(
106106
sysconfig.get_config_var("BINDIR"),
107107
"python%s%s" % (sysconfig.get_config_var("VERSION"),
108-
sysconfig.get_config_var("EXE"))),
109-
post_interp))
108+
sysconfig.get_config_var("EXE")))
109+
executable = executable.encode(sys.getfilesystemencoding(),
110+
'surrogateescape')
111+
shebang = b"#!" + executable + post_interp + b"\n"
112+
# Python parser starts to read a script using UTF-8 until
113+
# it gets a #coding:xxx cookie. The shebang has to be the
114+
# first line of a file, the #coding:xxx cookie cannot be
115+
# written before. So the shebang has to be decodable from
116+
# UTF-8.
117+
try:
118+
shebang.decode('utf-8')
119+
except UnicodeDecodeError:
120+
raise ValueError(
121+
"The shebang ({!r}) is not decodable "
122+
"from utf-8".format(shebang))
123+
# If the script is encoded to a custom encoding (use a
124+
# #coding:xxx cookie), the shebang has to be decodable from
125+
# the script encoding too.
126+
try:
127+
shebang.decode(encoding)
128+
except UnicodeDecodeError:
129+
raise ValueError(
130+
"The shebang ({!r}) is not decodable "
131+
"from the script encoding ({})"
132+
.format(shebang, encoding))
133+
outf = open(outfile, "wb")
134+
outf.write(shebang)
110135
outf.writelines(f.readlines())
111136
outf.close()
112137
if f:

Misc/NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,10 @@ Core and Builtins
6969
Library
7070
-------
7171

72+
- Issue #10419: Fix build_scripts command of distutils to handle correctly
73+
non-ASCII scripts. Open and write the script in binary mode, but ensure that
74+
the shebang is decodable from UTF-8 and from the encoding of the script.
75+
7276
- Issue #12012: ssl.PROTOCOL_SSLv2 becomes optional.
7377

7478
- Issue #11164: Stop trying to use _xmlplus in the xml module.

0 commit comments

Comments
 (0)