Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2593146

Browse files
committed
Bug #2301: Don't try decoding the source code into the original
encoding for syntax errors.
1 parent ddaa706 commit 2593146

4 files changed

Lines changed: 18 additions & 74 deletions

File tree

Lib/test/test_pep263.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,13 @@ def test_compilestring(self):
2323
exec(c, d)
2424
self.assertEqual(d['u'], '\xf3')
2525

26+
def test_issue2301(self):
27+
try:
28+
compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
29+
except SyntaxError as v:
30+
self.assertEquals(v.text, "print '\u5e74'")
31+
else:
32+
self.fail()
2633

2734
def test_main():
2835
test_support.run_unittest(PEP263Test)

Misc/NEWS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ What's New in Python 3.0a4?
99

1010
*Release date: XX-XXX-2008*
1111

12+
Core and Builtins
13+
-----------------
14+
15+
- Bug #2301: Don't try decoding the source code into the original
16+
encoding for syntax errors.
17+
1218
Extension Modules
1319
-----------------
1420

Parser/parsetok.c

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -213,21 +213,16 @@ parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
213213
err_ret->error = E_EOF;
214214
err_ret->lineno = tok->lineno;
215215
if (tok->buf != NULL) {
216-
char *text = NULL;
217216
size_t len;
218217
assert(tok->cur - tok->buf < INT_MAX);
219218
err_ret->offset = (int)(tok->cur - tok->buf);
220219
len = tok->inp - tok->buf;
221-
text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset);
222-
if (text == NULL) {
223-
text = (char *) PyObject_MALLOC(len + 1);
224-
if (text != NULL) {
225-
if (len > 0)
226-
strncpy(text, tok->buf, len);
227-
text[len] = '\0';
228-
}
220+
err_ret->text = (char *) PyObject_MALLOC(len + 1);
221+
if (err_ret->text != NULL) {
222+
if (len > 0)
223+
strncpy(err_ret->text, tok->buf, len);
224+
err_ret->text[len] = '\0';
229225
}
230-
err_ret->text = text;
231226
}
232227
} else if (tok->encoding != NULL) {
233228
node* r = PyNode_New(encoding_decl);

Parser/tokenizer.c

Lines changed: 0 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1579,70 +1579,6 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
15791579
return result;
15801580
}
15811581

1582-
/* This function is only called from parsetok. However, it cannot live
1583-
there, as it must be empty for PGEN, and we can check for PGEN only
1584-
in this file. */
1585-
1586-
#ifdef PGEN
1587-
char*
1588-
PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset)
1589-
{
1590-
return NULL;
1591-
}
1592-
#else
1593-
static PyObject *
1594-
dec_utf8(const char *enc, const char *text, size_t len) {
1595-
PyObject *ret = NULL;
1596-
PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");
1597-
if (unicode_text) {
1598-
ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");
1599-
Py_DECREF(unicode_text);
1600-
}
1601-
if (!ret) {
1602-
PyErr_Clear();
1603-
}
1604-
else {
1605-
assert(PyString_Check(ret));
1606-
}
1607-
return ret;
1608-
}
1609-
1610-
char *
1611-
PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset)
1612-
{
1613-
char *text = NULL;
1614-
if (tok->encoding) {
1615-
/* convert source to original encondig */
1616-
PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);
1617-
if (lineobj != NULL) {
1618-
int linelen = PyString_GET_SIZE(lineobj);
1619-
const char *line = PyString_AS_STRING(lineobj);
1620-
text = PyObject_MALLOC(linelen + 1);
1621-
if (text != NULL && line != NULL) {
1622-
if (linelen)
1623-
strncpy(text, line, linelen);
1624-
text[linelen] = '\0';
1625-
}
1626-
Py_DECREF(lineobj);
1627-
1628-
/* adjust error offset */
1629-
if (*offset > 1) {
1630-
PyObject *offsetobj = dec_utf8(tok->encoding,
1631-
tok->buf,
1632-
*offset-1);
1633-
if (offsetobj) {
1634-
*offset = 1 + Py_SIZE(offsetobj);
1635-
Py_DECREF(offsetobj);
1636-
}
1637-
}
1638-
1639-
}
1640-
}
1641-
return text;
1642-
1643-
}
1644-
#endif
1645-
16461582
/* Get -*- encoding -*- from a Python file.
16471583
16481584
PyTokenizer_FindEncoding returns NULL when it can't find the encoding in

0 commit comments

Comments
 (0)