@@ -1690,17 +1690,18 @@ PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end)
16901690 return result ;
16911691}
16921692
1693- /* Get -*- encoding -*- from a Python file.
1693+ /* Get the encoding of a Python file. Check for the coding cookie and check if
1694+ the file starts with a BOM.
16941695
1695- PyTokenizer_FindEncoding returns NULL when it can't find the encoding in
1696- the first or second line of the file (in which case the encoding
1697- should be assumed to be PyUnicode_GetDefaultEncoding()).
1696+ PyTokenizer_FindEncodingFilename() returns NULL when it can't find the
1697+ encoding in the first or second line of the file (in which case the encoding
1698+ should be assumed to be UTF-8).
1699+
1700+ The char* returned is malloc'ed via PyMem_MALLOC() and thus must be freed
1701+ by the caller. */
16981702
1699- The char * returned is malloc'ed via PyMem_MALLOC() and thus must be freed
1700- by the caller.
1701- */
17021703char *
1703- PyTokenizer_FindEncoding (int fd )
1704+ PyTokenizer_FindEncodingFilename (int fd , PyObject * filename )
17041705{
17051706 struct tok_state * tok ;
17061707 FILE * fp ;
@@ -1720,9 +1721,18 @@ PyTokenizer_FindEncoding(int fd)
17201721 return NULL ;
17211722 }
17221723#ifndef PGEN
1723- tok -> filename = PyUnicode_FromString ("<string>" );
1724- if (tok -> filename == NULL )
1725- goto error ;
1724+ if (filename != NULL ) {
1725+ Py_INCREF (filename );
1726+ tok -> filename = filename ;
1727+ }
1728+ else {
1729+ tok -> filename = PyUnicode_FromString ("<string>" );
1730+ if (tok -> filename == NULL ) {
1731+ fclose (fp );
1732+ PyTokenizer_Free (tok );
1733+ return encoding ;
1734+ }
1735+ }
17261736#endif
17271737 while (tok -> lineno < 2 && tok -> done == E_OK ) {
17281738 PyTokenizer_Get (tok , & p_start , & p_end );
@@ -1733,13 +1743,16 @@ PyTokenizer_FindEncoding(int fd)
17331743 if (encoding )
17341744 strcpy (encoding , tok -> encoding );
17351745 }
1736- #ifndef PGEN
1737- error :
1738- #endif
17391746 PyTokenizer_Free (tok );
17401747 return encoding ;
17411748}
17421749
1750+ char *
1751+ PyTokenizer_FindEncoding (int fd )
1752+ {
1753+ return PyTokenizer_FindEncodingFilename (fd , NULL );
1754+ }
1755+
17431756#ifdef Py_DEBUG
17441757
17451758void
0 commit comments