Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit aeaa592

Browse files
committed
Merged revisions 76230 via svnmerge from
svn+ssh://[email protected]/python/trunk ........ r76230 | benjamin.peterson | 2009-11-12 17:39:44 -0600 (Thu, 12 Nov 2009) | 2 lines fix several compile() issues by translating newlines in the tokenizer ........
1 parent a1d2332 commit aeaa592

8 files changed

Lines changed: 106 additions & 35 deletions

File tree

Doc/library/functions.rst

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -176,11 +176,15 @@ are always available. They are listed here in alphabetical order.
176176

177177
.. note::
178178

179-
When compiling a string with multi-line statements, line endings must be
180-
represented by a single newline character (``'\n'``), and the input must
181-
be terminated by at least one newline character. If line endings are
182-
represented by ``'\r\n'``, use :meth:`str.replace` to change them into
183-
``'\n'``.
179+
When compiling a string with multi-line statements in ``'single'`` or
180+
``'eval'`` mode, input must be terminated by at least one newline
181+
character. This is to facilitate detection of incomplete and complete
182+
statements in the :mod:`code` module.
183+
184+
185+
.. versionchanged:: 3.2
186+
Allowed use of Windows and Mac newlines. Also input in ``'exec'`` mode
187+
does not have to end in a newline anymore.
184188

185189

186190
.. function:: complex([real[, imag]])

Lib/test/test_codeop.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -295,10 +295,6 @@ def test_filename(self):
295295
self.assertNotEquals(compile_command("a = 1\n", "abc").co_filename,
296296
compile("a = 1\n", "def", 'single').co_filename)
297297

298-
def test_no_universal_newlines(self):
299-
code = compile_command("'\rfoo\r'", symbol='eval')
300-
self.assertEqual(eval(code), '\rfoo\r')
301-
302298

303299
def test_main():
304300
run_unittest(CodeopTests)

Lib/test/test_compile.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,19 @@
55

66
class TestSpecifics(unittest.TestCase):
77

8+
def test_no_ending_newline(self):
9+
compile("hi", "<test>", "exec")
10+
compile("hi\r", "<test>", "exec")
11+
12+
def test_empty(self):
13+
compile("", "<test>", "exec")
14+
15+
def test_other_newlines(self):
16+
compile("\r\n", "<test>", "exec")
17+
compile("\r", "<test>", "exec")
18+
compile("hi\r\nstuff\r\ndef f():\n pass\r", "<test>", "exec")
19+
compile("this_is\rreally_old_mac\rdef f():\n pass", "<test>", "exec")
20+
821
def test_debug_assignment(self):
922
# catch assignments to __debug__
1023
self.assertRaises(SyntaxError, compile, '__debug__ = 1', '?', 'single')

Lib/test/test_parser.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,9 @@ def walk(tree):
237237
(14, '+', 2, 13),
238238
(2, '1', 2, 15),
239239
(4, '', 2, 16),
240-
(6, '', 2, -1),
241-
(4, '', 2, -1),
242-
(0, '', 2, -1)],
240+
(6, '', 3, -1),
241+
(4, '', 3, -1),
242+
(0, '', 3, -1)],
243243
terminals)
244244

245245
def test_extended_unpacking(self):

Lib/test/test_pep263.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def test_issue2301(self):
2626
try:
2727
compile(b"# coding: cp932\nprint '\x94\x4e'", "dummy", "exec")
2828
except SyntaxError as v:
29-
self.assertEquals(v.text, "print '\u5e74'")
29+
self.assertEquals(v.text, "print '\u5e74'\n")
3030
else:
3131
self.fail()
3232

Parser/parsetok.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,14 @@ PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
4646
perrdetail *err_ret, int *flags)
4747
{
4848
struct tok_state *tok;
49+
int exec_input = start == file_input;
4950

5051
initerr(err_ret, filename);
5152

5253
if (*flags & PyPARSE_IGNORE_COOKIE)
53-
tok = PyTokenizer_FromUTF8(s);
54+
tok = PyTokenizer_FromUTF8(s, exec_input);
5455
else
55-
tok = PyTokenizer_FromString(s);
56+
tok = PyTokenizer_FromString(s, exec_input);
5657
if (tok == NULL) {
5758
err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
5859
return NULL;

Parser/tokenizer.c

Lines changed: 74 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ tok_new(void)
119119
tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL;
120120
tok->done = E_OK;
121121
tok->fp = NULL;
122+
tok->input = NULL;
122123
tok->tabsize = TABSIZE;
123124
tok->indent = 0;
124125
tok->indstack[0] = 0;
@@ -145,6 +146,17 @@ tok_new(void)
145146
return tok;
146147
}
147148

149+
static char *
150+
new_string(const char *s, Py_ssize_t len)
151+
{
152+
char* result = (char *)PyMem_MALLOC(len + 1);
153+
if (result != NULL) {
154+
memcpy(result, s, len);
155+
result[len] = '\0';
156+
}
157+
return result;
158+
}
159+
148160
#ifdef PGEN
149161

150162
static char *
@@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok)
159171
return feof(tok->fp);
160172
}
161173

162-
static const char *
163-
decode_str(const char *str, struct tok_state *tok)
174+
static char *
175+
decode_str(const char *str, int exec_input, struct tok_state *tok)
164176
{
165-
return str;
177+
return new_string(str, strlen(str));
166178
}
167179

168180
#else /* PGEN */
@@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */
177189
return NULL; /* as if it were EOF */
178190
}
179191

180-
static char *
181-
new_string(const char *s, Py_ssize_t len)
182-
{
183-
char* result = (char *)PyMem_MALLOC(len + 1);
184-
if (result != NULL) {
185-
memcpy(result, s, len);
186-
result[len] = '\0';
187-
}
188-
return result;
189-
}
190192

191193
static char *
192194
get_normal_name(char *s) /* for utf-8 and latin-1 */
@@ -635,17 +637,63 @@ translate_into_utf8(const char* str, const char* enc) {
635637
return utf8;
636638
}
637639

640+
641+
static char *
642+
translate_newlines(const char *s, int exec_input, struct tok_state *tok) {
643+
int skip_next_lf = 0, length = strlen(s), final_length;
644+
char *buf, *current;
645+
char c;
646+
buf = PyMem_MALLOC(length + 2);
647+
if (buf == NULL) {
648+
tok->done = E_NOMEM;
649+
return NULL;
650+
}
651+
for (current = buf; (c = *s++);) {
652+
if (skip_next_lf) {
653+
skip_next_lf = 0;
654+
if (c == '\n') {
655+
c = *s;
656+
s++;
657+
if (!c)
658+
break;
659+
}
660+
}
661+
if (c == '\r') {
662+
skip_next_lf = 1;
663+
c = '\n';
664+
}
665+
*current = c;
666+
current++;
667+
}
668+
/* If this is exec input, add a newline to the end of the file if
669+
there isn't one already. */
670+
if (exec_input && *current != '\n') {
671+
*current = '\n';
672+
current++;
673+
}
674+
*current = '\0';
675+
final_length = current - buf;
676+
if (final_length < length && final_length)
677+
/* should never fail */
678+
buf = PyMem_REALLOC(buf, final_length + 1);
679+
return buf;
680+
}
681+
638682
/* Decode a byte string STR for use as the buffer of TOK.
639683
Look for encoding declarations inside STR, and record them
640684
inside TOK. */
641685

642686
static const char *
643-
decode_str(const char *str, struct tok_state *tok)
687+
decode_str(const char *input, int single, struct tok_state *tok)
644688
{
645689
PyObject* utf8 = NULL;
690+
const char *str;
646691
const char *s;
647692
const char *newl[2] = {NULL, NULL};
648693
int lineno = 0;
694+
tok->input = str = translate_newlines(input, single, tok);
695+
if (str == NULL)
696+
return NULL;
649697
tok->enc = NULL;
650698
tok->str = str;
651699
if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok))
@@ -696,12 +744,12 @@ decode_str(const char *str, struct tok_state *tok)
696744
/* Set up tokenizer for string */
697745

698746
struct tok_state *
699-
PyTokenizer_FromString(const char *str)
747+
PyTokenizer_FromString(const char *str, int exec_input)
700748
{
701749
struct tok_state *tok = tok_new();
702750
if (tok == NULL)
703751
return NULL;
704-
str = (char *)decode_str(str, tok);
752+
str = (char *)decode_str(str, exec_input, tok);
705753
if (str == NULL) {
706754
PyTokenizer_Free(tok);
707755
return NULL;
@@ -713,11 +761,18 @@ PyTokenizer_FromString(const char *str)
713761
}
714762

715763
struct tok_state *
716-
PyTokenizer_FromUTF8(const char *str)
764+
PyTokenizer_FromUTF8(const char *str, int exec_input)
717765
{
718766
struct tok_state *tok = tok_new();
719767
if (tok == NULL)
720768
return NULL;
769+
#ifndef PGEN
770+
tok->input = str = translate_newlines(str, exec_input, tok);
771+
#endif
772+
if (str == NULL) {
773+
PyTokenizer_Free(tok);
774+
return NULL;
775+
}
721776
tok->decoding_state = STATE_RAW;
722777
tok->read_coding_spec = 1;
723778
tok->enc = NULL;
@@ -734,7 +789,6 @@ PyTokenizer_FromUTF8(const char *str)
734789
return tok;
735790
}
736791

737-
738792
/* Set up tokenizer for file */
739793

740794
struct tok_state *
@@ -780,6 +834,8 @@ PyTokenizer_Free(struct tok_state *tok)
780834
#endif
781835
if (tok->fp != NULL && tok->buf != NULL)
782836
PyMem_FREE(tok->buf);
837+
if (tok->input)
838+
PyMem_FREE((char *)tok->input);
783839
PyMem_FREE(tok);
784840
}
785841

Parser/tokenizer.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,10 +58,11 @@ struct tok_state {
5858
#endif
5959
const char* enc; /* Encoding for the current str. */
6060
const char* str;
61+
const char* input; /* Tokenizer's newline translated copy of the string. */
6162
};
6263

63-
extern struct tok_state *PyTokenizer_FromString(const char *);
64-
extern struct tok_state *PyTokenizer_FromUTF8(const char *);
64+
extern struct tok_state *PyTokenizer_FromString(const char *, int);
65+
extern struct tok_state *PyTokenizer_FromUTF8(const char *, int);
6566
extern struct tok_state *PyTokenizer_FromFile(FILE *, char*,
6667
char *, char *);
6768
extern void PyTokenizer_Free(struct tok_state *);

0 commit comments

Comments
 (0)