@@ -147,13 +147,15 @@ tok_new(void)
147147}
148148
149149static char *
150- new_string (const char * s , Py_ssize_t len )
150+ new_string (const char * s , Py_ssize_t len , struct tok_state * tok )
151151{
152152 char * result = (char * )PyMem_MALLOC (len + 1 );
153- if (result != NULL ) {
154- memcpy ( result , s , len ) ;
155- result [ len ] = '\0' ;
153+ if (! result ) {
154+ tok -> done = E_NOMEM ;
155+ return NULL ;
156156 }
157+ memcpy (result , s , len );
158+ result [len ] = '\0' ;
157159 return result ;
158160}
159161
@@ -174,7 +176,7 @@ decoding_feof(struct tok_state *tok)
174176static char *
175177decode_str (const char * str , int exec_input , struct tok_state * tok )
176178{
177- return new_string (str , strlen (str ));
179+ return new_string (str , strlen (str ), tok );
178180}
179181
180182#else /* PGEN */
@@ -221,17 +223,18 @@ get_normal_name(char *s) /* for utf-8 and latin-1 */
221223
222224/* Return the coding spec in S, or NULL if none is found. */
223225
224- static char *
225- get_coding_spec (const char * s , Py_ssize_t size )
226+ static int
227+ get_coding_spec (const char * s , char * * spec , Py_ssize_t size , struct tok_state * tok )
226228{
227229 Py_ssize_t i ;
230+ * spec = NULL ;
228231 /* Coding spec must be in a comment, and that comment must be
229232 * the only statement on the source code line. */
230233 for (i = 0 ; i < size - 6 ; i ++ ) {
231234 if (s [i ] == '#' )
232235 break ;
233236 if (s [i ] != ' ' && s [i ] != '\t' && s [i ] != '\014' )
234- return NULL ;
237+ return 1 ;
235238 }
236239 for (; i < size - 6 ; i ++ ) { /* XXX inefficient search */
237240 const char * t = s + i ;
@@ -250,17 +253,21 @@ get_coding_spec(const char *s, Py_ssize_t size)
250253 t ++ ;
251254
252255 if (begin < t ) {
253- char * r = new_string (begin , t - begin );
256+ char * r = new_string (begin , t - begin , tok );
257+ if (!r )
258+ return 0 ;
254259 char * q = get_normal_name (r );
255260 if (r != q ) {
256261 PyMem_FREE (r );
257- r = new_string (q , strlen (q ));
262+ r = new_string (q , strlen (q ), tok );
263+ if (!r )
264+ return 0 ;
258265 }
259- return r ;
266+ * spec = r ;
260267 }
261268 }
262269 }
263- return NULL ;
270+ return 1 ;
264271}
265272
266273/* Check whether the line contains a coding spec. If it does,
@@ -272,38 +279,39 @@ static int
272279check_coding_spec (const char * line , Py_ssize_t size , struct tok_state * tok ,
273280 int set_readline (struct tok_state * , const char * ))
274281{
275- char * cs ;
282+ char * cs ;
276283 int r = 1 ;
277284
278285 if (tok -> cont_line )
279286 /* It's a continuation line, so it can't be a coding spec. */
280287 return 1 ;
281- cs = get_coding_spec (line , size );
282- if (cs != NULL ) {
283- tok -> read_coding_spec = 1 ;
284- if (tok -> encoding == NULL ) {
285- assert (tok -> decoding_state == STATE_RAW );
286- if (strcmp (cs , "utf-8" ) == 0 ) {
288+ if (!get_coding_spec (line , & cs , size , tok ))
289+ return 0 ;
290+ if (!cs )
291+ return 1 ;
292+ tok -> read_coding_spec = 1 ;
293+ if (tok -> encoding == NULL ) {
294+ assert (tok -> decoding_state == STATE_RAW );
295+ if (strcmp (cs , "utf-8" ) == 0 ) {
296+ tok -> encoding = cs ;
297+ } else {
298+ r = set_readline (tok , cs );
299+ if (r ) {
287300 tok -> encoding = cs ;
288- } else {
289- r = set_readline (tok , cs );
290- if (r ) {
291- tok -> encoding = cs ;
292- tok -> decoding_state = STATE_NORMAL ;
293- }
294- else {
295- PyErr_Format (PyExc_SyntaxError ,
296- "encoding problem: %s" , cs );
297- PyMem_FREE (cs );
298- }
301+ tok -> decoding_state = STATE_NORMAL ;
299302 }
300- } else { /* then, compare cs with BOM */
301- r = (strcmp (tok -> encoding , cs ) == 0 );
302- if (!r )
303+ else {
303304 PyErr_Format (PyExc_SyntaxError ,
304- "encoding problem: %s with BOM" , cs );
305- PyMem_FREE (cs );
305+ "encoding problem: %s" , cs );
306+ PyMem_FREE (cs );
307+ }
306308 }
309+ } else { /* then, compare cs with BOM */
310+ r = (strcmp (tok -> encoding , cs ) == 0 );
311+ if (!r )
312+ PyErr_Format (PyExc_SyntaxError ,
313+ "encoding problem: %s with BOM" , cs );
314+ PyMem_FREE (cs );
307315 }
308316 return r ;
309317}
@@ -367,7 +375,9 @@ check_bom(int get_char(struct tok_state *),
367375 }
368376 if (tok -> encoding != NULL )
369377 PyMem_FREE (tok -> encoding );
370- tok -> encoding = new_string ("utf-8" , 5 ); /* resulting is in utf-8 */
378+ tok -> encoding = new_string ("utf-8" , 5 , tok );
379+ if (!tok -> encoding )
380+ return 0 ;
371381 /* No need to set_readline: input is already utf-8 */
372382 return 1 ;
373383}
0 commit comments