Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f5b5224

Browse files
committed
ignore the coding cookie in compile(), exec(), and eval() if the source is a string #4626
1 parent 0663a1e commit f5b5224

10 files changed

Lines changed: 63 additions & 14 deletions

File tree

Include/parsetok.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ typedef struct {
2929
#define PyPARSE_UNICODE_LITERALS 0x0008
3030
#endif
3131

32+
#define PyPARSE_IGNORE_COOKIE 0x0010
33+
3234
PyAPI_FUNC(node *) PyParser_ParseString(const char *, grammar *, int,
3335
perrdetail *);
3436
PyAPI_FUNC(node *) PyParser_ParseFile (FILE *, const char *, grammar *, int,

Include/pythonrun.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ extern "C" {
1212
#define PyCF_SOURCE_IS_UTF8 0x0100
1313
#define PyCF_DONT_IMPLY_DEDENT 0x0200
1414
#define PyCF_ONLY_AST 0x0400
15+
#define PyCF_IGNORE_COOKIE 0x0800
1516

1617
typedef struct {
1718
int cf_flags; /* bitmask of CO_xxx flags relevant to future */

Lib/test/test_coding.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,10 @@ def verify_bad_module(self, module_name):
1717

1818
path = os.path.dirname(__file__)
1919
filename = os.path.join(path, module_name + '.py')
20-
fp = open(filename, encoding='utf-8')
21-
text = fp.read()
20+
fp = open(filename, "rb")
21+
bytes = fp.read()
2222
fp.close()
23-
self.assertRaises(SyntaxError, compile, text, filename, 'exec')
23+
self.assertRaises(SyntaxError, compile, bytes, filename, 'exec')
2424

2525
def test_exec_valid_coding(self):
2626
d = {}

Lib/test/test_pep263.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ def test_issue2301(self):
3030
else:
3131
self.fail()
3232

33+
def test_issue4626(self):
34+
c = compile("# coding=latin-1\n\u00c6 = '\u00c6'", "dummy", "exec")
35+
d = {}
36+
exec(c, d)
37+
self.assertEquals(d['\xc6'], '\xc6')
38+
3339
def test_main():
3440
support.run_unittest(PEP263Test)
3541

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ Core and Builtins
1919
- Issue #5249: time.strftime returned malformed string when format string
2020
contained non ascii character on windows.
2121

22+
- Issue #4626: compile(), exec(), and eval() ignore the coding cookie if the
23+
source has already been decoded into str.
24+
2225
- Issue #5186: Reduce hash collisions for objects with no __hash__ method by
2326
rotating the object pointer by 4 bits to the right.
2427

Parser/parsetok.c

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,11 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
4949

5050
initerr(err_ret, filename);
5151

52-
if ((tok = PyTokenizer_FromString(s)) == NULL) {
52+
if (*flags & PyPARSE_IGNORE_COOKIE)
53+
tok = PyTokenizer_FromUTF8(s);
54+
else
55+
tok = PyTokenizer_FromString(s);
56+
if (tok == NULL) {
5357
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
5458
return NULL;
5559
}

Parser/tokenizer.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,28 @@ PyTokenizer_FromString(const char *str)
715715
return tok;
716716
}
717717

718+
struct tok_state *
719+
PyTokenizer_FromUTF8(const char *str)
720+
{
721+
struct tok_state *tok = tok_new();
722+
if (tok == NULL)
723+
return NULL;
724+
tok->decoding_state = STATE_RAW;
725+
tok->read_coding_spec = 1;
726+
tok->enc = NULL;
727+
tok->str = str;
728+
tok->encoding = (char *)PyMem_MALLOC(6);
729+
if (!tok->encoding) {
730+
PyTokenizer_Free(tok);
731+
return NULL;
732+
}
733+
strcpy(tok->encoding, "utf-8");
734+
735+
/* XXX: constify members. */
736+
tok->buf = tok->cur = tok->end = tok->inp = (char*)str;
737+
return tok;
738+
}
739+
718740

719741
/* Set up tokenizer for file */
720742

Parser/tokenizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ struct tok_state {
6161
};
6262

6363
extern struct tok_state *PyTokenizer_FromString(const char *);
64+
extern struct tok_state *PyTokenizer_FromUTF8(const char *);
6465
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
6566
char *, char *);
6667
extern void PyTokenizer_Free(struct tok_state *);

Python/bltinmodule.c

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -494,12 +494,13 @@ PyDoc_STR(
494494

495495

496496
static char *
497-
source_as_string(PyObject *cmd, char *funcname, char *what)
497+
source_as_string(PyObject *cmd, char *funcname, char *what, PyCompilerFlags *cf)
498498
{
499499
char *str;
500500
Py_ssize_t size;
501501

502502
if (PyUnicode_Check(cmd)) {
503+
cf->cf_flags |= PyCF_IGNORE_COOKIE;
503504
cmd = _PyUnicode_AsDefaultEncodedString(cmd, NULL);
504505
if (cmd == NULL)
505506
return NULL;
@@ -591,7 +592,7 @@ builtin_compile(PyObject *self, PyObject *args, PyObject *kwds)
591592
return result;
592593
}
593594

594-
str = source_as_string(cmd, "compile", "string, bytes, AST or code");
595+
str = source_as_string(cmd, "compile", "string, bytes, AST or code", &cf);
595596
if (str == NULL)
596597
return NULL;
597598

@@ -703,14 +704,14 @@ builtin_eval(PyObject *self, PyObject *args)
703704
return PyEval_EvalCode((PyCodeObject *) cmd, globals, locals);
704705
}
705706

706-
str = source_as_string(cmd, "eval", "string, bytes or code");
707+
cf.cf_flags = PyCF_SOURCE_IS_UTF8;
708+
str = source_as_string(cmd, "eval", "string, bytes or code", &cf);
707709
if (str == NULL)
708710
return NULL;
709711

710712
while (*str == ' ' || *str == '\t')
711713
str++;
712714

713-
cf.cf_flags = PyCF_SOURCE_IS_UTF8;
714715
(void)PyEval_MergeCompilerFlags(&cf);
715716
result = PyRun_StringFlags(str, Py_eval_input, globals, locals, &cf);
716717
Py_XDECREF(tmp);
@@ -779,12 +780,13 @@ builtin_exec(PyObject *self, PyObject *args)
779780
v = PyEval_EvalCode((PyCodeObject *) prog, globals, locals);
780781
}
781782
else {
782-
char *str = source_as_string(prog, "exec",
783-
"string, bytes or code");
783+
char *str;
784784
PyCompilerFlags cf;
785+
cf.cf_flags = PyCF_SOURCE_IS_UTF8;
786+
str = source_as_string(prog, "exec",
787+
"string, bytes or code", &cf);
785788
if (str == NULL)
786789
return NULL;
787-
cf.cf_flags = PyCF_SOURCE_IS_UTF8;
788790
if (PyEval_MergeCompilerFlags(&cf))
789791
v = PyRun_StringFlags(str, Py_file_input, globals,
790792
locals, &cf);

Python/pythonrun.c

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,9 +1002,17 @@ PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flag
10021002
}
10031003

10041004
/* compute parser flags based on compiler flags */
1005-
#define PARSER_FLAGS(flags) \
1006-
((flags) ? ((((flags)->cf_flags & PyCF_DONT_IMPLY_DEDENT) ? \
1007-
PyPARSE_DONT_IMPLY_DEDENT : 0)) : 0)
1005+
static int PARSER_FLAGS(PyCompilerFlags *flags)
1006+
{
1007+
int parser_flags = 0;
1008+
if (!flags)
1009+
return 0;
1010+
if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT)
1011+
parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
1012+
if (flags->cf_flags & PyCF_IGNORE_COOKIE)
1013+
parser_flags |= PyPARSE_IGNORE_COOKIE;
1014+
return parser_flags;
1015+
}
10081016

10091017
#if 0
10101018
/* Keep an example of flags with future keyword support. */

0 commit comments

Comments
 (0)