Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f62a89b

Browse files
committed
Ignore encoding declarations inside strings. Fixes #603509.
1 parent 65b7282 commit f62a89b

3 files changed

Lines changed: 18 additions & 2 deletions

File tree

Doc/ref/ref2.tex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ \subsection{Encoding declarations\label{encodings}}
101101
the end of a string, and to interpret the contents of Unicode literals.
102102
String literals are converted to Unicode for syntactical analysis,
103103
then converted back to their original encoding before interpretation
104-
starts.
104+
starts. The encoding declaration must appear on a line of its own.
105105

106106
\subsection{Explicit line joining\label{explicit-joining}}
107107

Parser/tokenizer.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ tok_new(void)
128128
tok->read_coding_spec = 0;
129129
tok->issued_encoding_warning = 0;
130130
tok->encoding = NULL;
131+
tok->cont_line = 0;
131132
#ifndef PGEN
132133
tok->decoding_readline = NULL;
133134
tok->decoding_buffer = NULL;
@@ -207,7 +208,15 @@ static char *
207208
get_coding_spec(const char *s, int size)
208209
{
209210
int i;
210-
for (i = 0; i < size - 6; i++) { /* XXX inefficient search */
211+
/* Coding spec must be in a comment, and that comment must be
212+
* the only statement on the source code line. */
213+
for (i = 0; i < size - 6; i++) {
214+
if (s[i] == '#')
215+
break;
216+
if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')
217+
return NULL;
218+
}
219+
for (; i < size - 6; i++) { /* XXX inefficient search */
211220
const char* t = s + i;
212221
if (strncmp(t, "coding", 6) == 0) {
213222
const char* begin = NULL;
@@ -247,6 +256,9 @@ check_coding_spec(const char* line, int size, struct tok_state *tok,
247256
int set_readline(struct tok_state *, const char *))
248257
{
249258
int r = 1;
259+
if (tok->cont_line)
260+
/* It's a continuation line, so it can't be a coding spec. */
261+
return 1;
250262
char* cs = get_coding_spec(line, size);
251263
if (cs != NULL) {
252264
tok->read_coding_spec = 1;
@@ -1158,6 +1170,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
11581170
goto nextline;
11591171
*p_start = tok->start;
11601172
*p_end = tok->cur - 1; /* Leave '\n' out of the string */
1173+
tok->cont_line = 0;
11611174
return NEWLINE;
11621175
}
11631176

@@ -1292,6 +1305,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
12921305
return ERRORTOKEN;
12931306
}
12941307
tripcount = 0;
1308+
tok->cont_line = 1; /* multiline string. */
12951309
}
12961310
else if (c == EOF) {
12971311
if (triple)
@@ -1340,6 +1354,7 @@ tok_get(register struct tok_state *tok, char **p_start, char **p_end)
13401354
tok->cur = tok->inp;
13411355
return ERRORTOKEN;
13421356
}
1357+
tok->cont_line = 1;
13431358
goto again; /* Read next line */
13441359
}
13451360

Parser/tokenizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ struct tok_state {
4545
int read_coding_spec; /* whether 'coding:...' has been read */
4646
int issued_encoding_warning; /* whether non-ASCII warning was issued */
4747
char *encoding;
48+
int cont_line; /* whether we are in a continuation line. */
4849
#ifndef PGEN
4950
PyObject *decoding_readline; /* codecs.open(...).readline */
5051
PyObject *decoding_buffer;

0 commit comments

Comments
 (0)