@@ -904,6 +904,25 @@ rightmost_sep(Py_UCS4 *s)
904904 return found ;
905905}
906906
907+ /* Like rightmost_sep, but operate on unicode objects. */
908+ static Py_ssize_t
909+ rightmost_sep_obj (PyObject * o )
910+ {
911+ Py_ssize_t found , i ;
912+ Py_UCS4 c ;
913+ for (found = -1 , i = 0 ; i < PyUnicode_GET_LENGTH (o ); i ++ ) {
914+ c = PyUnicode_READ_CHAR (o , i );
915+ if (c == SEP
916+ #ifdef ALTSEP
917+ || c == ALTSEP
918+ #endif
919+ )
920+ {
921+ found = i ;
922+ }
923+ }
924+ return found ;
925+ }
907926
908927/* Given a pathname for a Python source file, fill a buffer with the
909928 pathname for the corresponding compiled file. Return the pathname
@@ -915,123 +934,49 @@ rightmost_sep(Py_UCS4 *s)
915934static PyObject *
916935make_compiled_pathname (PyObject * pathstr , int debug )
917936{
918- Py_UCS4 * pathname ;
919- Py_UCS4 buf [MAXPATHLEN ];
920- size_t buflen = (size_t )MAXPATHLEN ;
921- size_t len ;
922- size_t i , save ;
923- Py_UCS4 * pos ;
924- int sep = SEP ;
925-
926- pathname = PyUnicode_AsUCS4Copy (pathstr );
927- if (!pathname )
928- return NULL ;
929- len = Py_UCS4_strlen (pathname );
930-
931- /* Sanity check that the buffer has roughly enough space to hold what
932- will eventually be the full path to the compiled file. The 5 extra
933- bytes include the slash afer __pycache__, the two extra dots, the
934- extra trailing character ('c' or 'o') and null. This isn't exact
935- because the contents of the buffer can affect how many actual
936- characters of the string get into the buffer. We'll do a final
937- sanity check before writing the extension to ensure we do not
938- overflow the buffer.
939- */
940- if (len + Py_UCS4_strlen (CACHEDIR_UNICODE ) + Py_UCS4_strlen (PYC_TAG_UNICODE ) + 5 > buflen ) {
941- PyMem_Free (pathname );
942- return NULL ;
943- }
944-
945- /* Find the last path separator and copy everything from the start of
946- the source string up to and including the separator.
947- */
948- pos = rightmost_sep (pathname );
949- if (pos == NULL ) {
950- i = 0 ;
951- }
952- else {
953- sep = * pos ;
954- i = pos - pathname + 1 ;
955- Py_UCS4_strncpy (buf , pathname , i );
956- }
957-
958- save = i ;
959- buf [i ++ ] = '\0' ;
960- /* Add __pycache__/ */
961- Py_UCS4_strcat (buf , CACHEDIR_UNICODE );
962- i += Py_UCS4_strlen (CACHEDIR_UNICODE ) - 1 ;
963- buf [i ++ ] = sep ;
964- buf [i ] = '\0' ;
965- /* Add the base filename, but remove the .py or .pyw extension, since
966- the tag name must go before the extension.
967- */
968- Py_UCS4_strcat (buf , pathname + save );
969- pos = Py_UCS4_strrchr (buf + i , '.' );
970- if (pos != NULL )
971- * ++ pos = '\0' ;
972-
973- /* pathname is not used from here on. */
974- PyMem_Free (pathname );
975-
976- Py_UCS4_strcat (buf , PYC_TAG_UNICODE );
977- /* The length test above assumes that we're only adding one character
978- to the end of what would normally be the extension. What if there
979- is no extension, or the string ends in '.' or '.p', and otherwise
980- fills the buffer? By appending 4 more characters onto the string
981- here, we could overrun the buffer.
982-
983- As a simple example, let's say buflen=32 and the input string is
984- 'xxx.py'. strlen() would be 6 and the test above would yield:
985-
986- (6 + 11 + 10 + 5 == 32) > 32
987-
988- which is false and so the name mangling would continue. This would
989- be fine because we'd end up with this string in buf:
990-
991- __pycache__/xxx.cpython-32.pyc\0
992-
993- strlen(of that) == 30 + the nul fits inside a 32 character buffer.
994- We can even handle an input string of say 'xxxxx' above because
995- that's (5 + 11 + 10 + 5 == 31) > 32 which is also false. Name
996- mangling that yields:
997-
998- __pycache__/xxxxxcpython-32.pyc\0
999-
1000- which is 32 characters including the nul, and thus fits in the
1001- buffer. However, an input string of 'xxxxxx' would yield a result
1002- string of:
1003-
1004- __pycache__/xxxxxxcpython-32.pyc\0
1005-
1006- which is 33 characters long (including the nul), thus overflowing
1007- the buffer, even though the first test would fail, i.e.: the input
1008- string is also 6 characters long, so 32 > 32 is false.
1009-
1010- The reason the first test fails but we still overflow the buffer is
1011- that the test above only expects to add one extra character to be
1012- added to the extension, and here we're adding three (pyc). We
1013- don't add the first dot, so that reclaims one of expected
1014- positions, leaving us overflowing by 1 byte (3 extra - 1 reclaimed
1015- dot - 1 expected extra == 1 overflowed).
1016-
1017- The best we can do is ensure that we still have enough room in the
1018- target buffer before we write the extension. Because it's always
1019- only the extension that can cause the overflow, and never the other
1020- path bytes we've written, it's sufficient to just do one more test
1021- here. Still, the assertion that follows can't hurt.
1022- */
1023- #if 0
1024- printf ("strlen(buf): %d; buflen: %d\n" , (int )strlen (buf ), (int )buflen );
1025- #endif
1026- len = Py_UCS4_strlen (buf );
1027- if (len + 5 > buflen )
937+ PyObject * result ;
938+ Py_ssize_t fname , ext , len , i , pos , taglen ;
939+ Py_ssize_t pycache_len = sizeof ("__pycache__/" ) - 1 ;
940+ int kind ;
941+ void * data ;
942+
943+ /* Compute the output string size. */
944+ len = PyUnicode_GET_LENGTH (pathstr );
945+ /* If there is no separator, this returns -1, so
946+ lastsep will be 0. */
947+ fname = rightmost_sep_obj (pathstr ) + 1 ;
948+ ext = fname - 1 ;
949+ for (i = fname ; i < len ; i ++ )
950+ if (PyUnicode_READ_CHAR (pathstr , i ) == '.' )
951+ ext = i + 1 ;
952+ if (ext < fname )
953+ /* No dot in filename; use entire filename */
954+ ext = len ;
955+
956+ /* result = pathstr[:fname] + "__pycache__" + SEP +
957+ pathstr[fname:ext] + tag + ".py[co]" */
958+ taglen = strlen (pyc_tag );
959+ result = PyUnicode_New (ext + pycache_len + taglen + 4 ,
960+ PyUnicode_MAX_CHAR_VALUE (pathstr ));
961+ if (!result )
1028962 return NULL ;
1029- buf [len ] = '.' ; len ++ ;
1030- buf [len ] = 'p' ; len ++ ;
1031- buf [len ] = 'y' ; len ++ ;
1032- buf [len ] = debug ? 'c' : 'o' ; len ++ ;
1033- assert (len <= buflen );
1034- return PyUnicode_FromKindAndData (PyUnicode_4BYTE_KIND , buf , len );
963+ kind = PyUnicode_KIND (result );
964+ data = PyUnicode_DATA (result );
965+ PyUnicode_CopyCharacters (result , 0 , pathstr , 0 , fname );
966+ pos = fname ;
967+ for (i = 0 ; i < pycache_len - 1 ; i ++ )
968+ PyUnicode_WRITE (kind , data , pos ++ , "__pycache__" [i ]);
969+ PyUnicode_WRITE (kind , data , pos ++ , SEP );
970+ PyUnicode_CopyCharacters (result , pos , pathstr ,
971+ fname , ext - fname );
972+ pos += ext - fname ;
973+ for (i = 0 ; pyc_tag [i ]; i ++ )
974+ PyUnicode_WRITE (kind , data , pos ++ , pyc_tag [i ]);
975+ PyUnicode_WRITE (kind , data , pos ++ , '.' );
976+ PyUnicode_WRITE (kind , data , pos ++ , 'p' );
977+ PyUnicode_WRITE (kind , data , pos ++ , 'y' );
978+ PyUnicode_WRITE (kind , data , pos ++ , debug ? 'c' : 'o' );
979+ return result ;
1035980}
1036981
1037982
0 commit comments