Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit da78043

Browse files
committed
Latin-1 source code was not being properly decoded when passed through
compile(). This was due to left-over special-casing before UTF-8 became the default source encoding. Closes issue #3574. Thanks to Victor Stinner for help with the patch.
1 parent 9e9dcd6 commit da78043

5 files changed

Lines changed: 24 additions & 10 deletions

File tree

Lib/test/test_pep3120.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,24 @@ def test_badsyntax(self):
2323
else:
2424
self.fail("expected exception didn't occur")
2525

26+
27+
class BuiltinCompileTests(unittest.TestCase):
28+
29+
# Issue 3574.
30+
def test_latin1(self):
31+
# Allow compile() to read Latin-1 source.
32+
source_code = '# coding: Latin-1\nu = "Ç"\n'.encode("Latin-1")
33+
try:
34+
code = compile(source_code, '<dummy>', 'exec')
35+
except SyntaxError:
36+
self.fail("compile() cannot handle Latin-1 source")
37+
ns = {}
38+
exec(code, ns)
39+
self.assertEqual('Ç', ns['u'])
40+
41+
2642
def test_main():
27-
support.run_unittest(PEP3120Test)
43+
support.run_unittest(PEP3120Test, BuiltinCompileTests)
2844

2945
if __name__=="__main__":
3046
test_main()

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ What's New in Python 3.0 beta 5
1515
Core and Builtins
1616
-----------------
1717

18+
- Issue #3574: compile() incorrectly handled source code encoded as Latin-1.
19+
1820
- Issues #2384 and #3975: Tracebacks were not correctly printed when the
1921
source file contains a ``coding:`` header: the wrong line was displayed, and
2022
the encoding was not respected.

Parser/tokenizer.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ tok_new(void)
135135
tok->decoding_state = STATE_INIT;
136136
tok->decoding_erred = 0;
137137
tok->read_coding_spec = 0;
138+
tok->enc = NULL;
138139
tok->encoding = NULL;
139140
tok->cont_line = 0;
140141
#ifndef PGEN
@@ -274,8 +275,7 @@ check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok,
274275
tok->read_coding_spec = 1;
275276
if (tok->encoding == NULL) {
276277
assert(tok->decoding_state == STATE_RAW);
277-
if (strcmp(cs, "utf-8") == 0 ||
278-
strcmp(cs, "iso-8859-1") == 0) {
278+
if (strcmp(cs, "utf-8") == 0) {
279279
tok->encoding = cs;
280280
} else {
281281
r = set_readline(tok, cs);

Parser/tokenizer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,14 @@ struct tok_state {
4949
enum decoding_state decoding_state;
5050
int decoding_erred; /* whether erred in decoding */
5151
int read_coding_spec; /* whether 'coding:...' has been read */
52-
char *encoding;
52+
char *encoding; /* Source encoding. */
5353
int cont_line; /* whether we are in a continuation line. */
5454
const char* line_start; /* pointer to start of current line */
5555
#ifndef PGEN
5656
PyObject *decoding_readline; /* codecs.open(...).readline */
5757
PyObject *decoding_buffer;
5858
#endif
59-
const char* enc;
59+
const char* enc; /* Encoding for the current str. */
6060
const char* str;
6161
};
6262

Python/ast.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3160,9 +3160,6 @@ decode_unicode(struct compiling *c, const char *s, size_t len, int rawmode, cons
31603160
if (encoding == NULL) {
31613161
buf = (char *)s;
31623162
u = NULL;
3163-
} else if (strcmp(encoding, "iso-8859-1") == 0) {
3164-
buf = (char *)s;
3165-
u = NULL;
31663163
} else {
31673164
/* check for integer overflow */
31683165
if (len > PY_SIZE_MAX / 4)
@@ -3275,8 +3272,7 @@ parsestr(struct compiling *c, const node *n, int *bytesmode)
32753272
}
32763273
}
32773274
need_encoding = (!*bytesmode && c->c_encoding != NULL &&
3278-
strcmp(c->c_encoding, "utf-8") != 0 &&
3279-
strcmp(c->c_encoding, "iso-8859-1") != 0);
3275+
strcmp(c->c_encoding, "utf-8") != 0);
32803276
if (rawmode || strchr(s, '\\') == NULL) {
32813277
if (need_encoding) {
32823278
PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);

0 commit comments

Comments
 (0)