Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 2db7286

Browse files
committed
Reformulate make_compiled_pathname in terms of unicode objects.
1 parent 9715d26 commit 2db7286

1 file changed

Lines changed: 61 additions & 116 deletions

File tree

Python/import.c

Lines changed: 61 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -904,6 +904,25 @@ rightmost_sep(Py_UCS4 *s)
904904
return found;
905905
}
906906

907+
/* Like rightmost_sep, but operate on unicode objects. */
908+
static Py_ssize_t
909+
rightmost_sep_obj(PyObject* o)
910+
{
911+
Py_ssize_t found, i;
912+
Py_UCS4 c;
913+
for (found = -1, i = 0; i < PyUnicode_GET_LENGTH(o); i++) {
914+
c = PyUnicode_READ_CHAR(o, i);
915+
if (c == SEP
916+
#ifdef ALTSEP
917+
|| c == ALTSEP
918+
#endif
919+
)
920+
{
921+
found = i;
922+
}
923+
}
924+
return found;
925+
}
907926

908927
/* Given a pathname for a Python source file, fill a buffer with the
909928
pathname for the corresponding compiled file. Return the pathname
@@ -915,123 +934,49 @@ rightmost_sep(Py_UCS4 *s)
915934
static PyObject*
916935
make_compiled_pathname(PyObject *pathstr, int debug)
917936
{
918-
Py_UCS4 *pathname;
919-
Py_UCS4 buf[MAXPATHLEN];
920-
size_t buflen = (size_t)MAXPATHLEN;
921-
size_t len;
922-
size_t i, save;
923-
Py_UCS4 *pos;
924-
int sep = SEP;
925-
926-
pathname = PyUnicode_AsUCS4Copy(pathstr);
927-
if (!pathname)
928-
return NULL;
929-
len = Py_UCS4_strlen(pathname);
930-
931-
/* Sanity check that the buffer has roughly enough space to hold what
932-
will eventually be the full path to the compiled file. The 5 extra
933-
bytes include the slash afer __pycache__, the two extra dots, the
934-
extra trailing character ('c' or 'o') and null. This isn't exact
935-
because the contents of the buffer can affect how many actual
936-
characters of the string get into the buffer. We'll do a final
937-
sanity check before writing the extension to ensure we do not
938-
overflow the buffer.
939-
*/
940-
if (len + Py_UCS4_strlen(CACHEDIR_UNICODE) + Py_UCS4_strlen(PYC_TAG_UNICODE) + 5 > buflen) {
941-
PyMem_Free(pathname);
942-
return NULL;
943-
}
944-
945-
/* Find the last path separator and copy everything from the start of
946-
the source string up to and including the separator.
947-
*/
948-
pos = rightmost_sep(pathname);
949-
if (pos == NULL) {
950-
i = 0;
951-
}
952-
else {
953-
sep = *pos;
954-
i = pos - pathname + 1;
955-
Py_UCS4_strncpy(buf, pathname, i);
956-
}
957-
958-
save = i;
959-
buf[i++] = '\0';
960-
/* Add __pycache__/ */
961-
Py_UCS4_strcat(buf, CACHEDIR_UNICODE);
962-
i += Py_UCS4_strlen(CACHEDIR_UNICODE) - 1;
963-
buf[i++] = sep;
964-
buf[i] = '\0';
965-
/* Add the base filename, but remove the .py or .pyw extension, since
966-
the tag name must go before the extension.
967-
*/
968-
Py_UCS4_strcat(buf, pathname + save);
969-
pos = Py_UCS4_strrchr(buf + i, '.');
970-
if (pos != NULL)
971-
*++pos = '\0';
972-
973-
/* pathname is not used from here on. */
974-
PyMem_Free(pathname);
975-
976-
Py_UCS4_strcat(buf, PYC_TAG_UNICODE);
977-
/* The length test above assumes that we're only adding one character
978-
to the end of what would normally be the extension. What if there
979-
is no extension, or the string ends in '.' or '.p', and otherwise
980-
fills the buffer? By appending 4 more characters onto the string
981-
here, we could overrun the buffer.
982-
983-
As a simple example, let's say buflen=32 and the input string is
984-
'xxx.py'. strlen() would be 6 and the test above would yield:
985-
986-
(6 + 11 + 10 + 5 == 32) > 32
987-
988-
which is false and so the name mangling would continue. This would
989-
be fine because we'd end up with this string in buf:
990-
991-
__pycache__/xxx.cpython-32.pyc\0
992-
993-
strlen(of that) == 30 + the nul fits inside a 32 character buffer.
994-
We can even handle an input string of say 'xxxxx' above because
995-
that's (5 + 11 + 10 + 5 == 31) > 32 which is also false. Name
996-
mangling that yields:
997-
998-
__pycache__/xxxxxcpython-32.pyc\0
999-
1000-
which is 32 characters including the nul, and thus fits in the
1001-
buffer. However, an input string of 'xxxxxx' would yield a result
1002-
string of:
1003-
1004-
__pycache__/xxxxxxcpython-32.pyc\0
1005-
1006-
which is 33 characters long (including the nul), thus overflowing
1007-
the buffer, even though the first test would fail, i.e.: the input
1008-
string is also 6 characters long, so 32 > 32 is false.
1009-
1010-
The reason the first test fails but we still overflow the buffer is
1011-
that the test above only expects to add one extra character to be
1012-
added to the extension, and here we're adding three (pyc). We
1013-
don't add the first dot, so that reclaims one of expected
1014-
positions, leaving us overflowing by 1 byte (3 extra - 1 reclaimed
1015-
dot - 1 expected extra == 1 overflowed).
1016-
1017-
The best we can do is ensure that we still have enough room in the
1018-
target buffer before we write the extension. Because it's always
1019-
only the extension that can cause the overflow, and never the other
1020-
path bytes we've written, it's sufficient to just do one more test
1021-
here. Still, the assertion that follows can't hurt.
1022-
*/
1023-
#if 0
1024-
printf("strlen(buf): %d; buflen: %d\n", (int)strlen(buf), (int)buflen);
1025-
#endif
1026-
len = Py_UCS4_strlen(buf);
1027-
if (len + 5 > buflen)
937+
PyObject *result;
938+
Py_ssize_t fname, ext, len, i, pos, taglen;
939+
Py_ssize_t pycache_len = sizeof("__pycache__/") - 1;
940+
int kind;
941+
void *data;
942+
943+
/* Compute the output string size. */
944+
len = PyUnicode_GET_LENGTH(pathstr);
945+
/* If there is no separator, this returns -1, so
946+
lastsep will be 0. */
947+
fname = rightmost_sep_obj(pathstr) + 1;
948+
ext = fname - 1;
949+
for(i = fname; i < len; i++)
950+
if (PyUnicode_READ_CHAR(pathstr, i) == '.')
951+
ext = i + 1;
952+
if (ext < fname)
953+
/* No dot in filename; use entire filename */
954+
ext = len;
955+
956+
/* result = pathstr[:fname] + "__pycache__" + SEP +
957+
pathstr[fname:ext] + tag + ".py[co]" */
958+
taglen = strlen(pyc_tag);
959+
result = PyUnicode_New(ext + pycache_len + taglen + 4,
960+
PyUnicode_MAX_CHAR_VALUE(pathstr));
961+
if (!result)
1028962
return NULL;
1029-
buf[len] = '.'; len++;
1030-
buf[len] = 'p'; len++;
1031-
buf[len] = 'y'; len++;
1032-
buf[len] = debug ? 'c' : 'o'; len++;
1033-
assert(len <= buflen);
1034-
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, buf, len);
963+
kind = PyUnicode_KIND(result);
964+
data = PyUnicode_DATA(result);
965+
PyUnicode_CopyCharacters(result, 0, pathstr, 0, fname);
966+
pos = fname;
967+
for (i = 0; i < pycache_len - 1; i++)
968+
PyUnicode_WRITE(kind, data, pos++, "__pycache__"[i]);
969+
PyUnicode_WRITE(kind, data, pos++, SEP);
970+
PyUnicode_CopyCharacters(result, pos, pathstr,
971+
fname, ext - fname);
972+
pos += ext - fname;
973+
for (i = 0; pyc_tag[i]; i++)
974+
PyUnicode_WRITE(kind, data, pos++, pyc_tag[i]);
975+
PyUnicode_WRITE(kind, data, pos++, '.');
976+
PyUnicode_WRITE(kind, data, pos++, 'p');
977+
PyUnicode_WRITE(kind, data, pos++, 'y');
978+
PyUnicode_WRITE(kind, data, pos++, debug ? 'c' : 'o');
979+
return result;
1035980
}
1036981

1037982

0 commit comments

Comments
 (0)