Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit cfc4c13

Browse files
committed
Add _PyUnicodeWriter_WriteSubstring() function
Write a function to enable more optimizations: * If the substring is the whole string and overallocation is disabled, just keep a reference to the string, don't copy characters * Avoid a call to the expensive _PyUnicode_FindMaxChar() function when possible
1 parent 4489e92 commit cfc4c13

4 files changed

Lines changed: 57 additions & 21 deletions

File tree

Include/unicodeobject.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,15 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
940940
PyObject *str /* Unicode string */
941941
);
942942

943+
/* Append a substring of a Unicode string.
944+
Return 0 on success, raise an exception and return -1 on error. */
945+
PyAPI_FUNC(int)
946+
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
947+
PyObject *str, /* Unicode string */
948+
Py_ssize_t start,
949+
Py_ssize_t end
950+
);
951+
943952
/* Append a latin1-encoded byte string.
944953
Return 0 on success, raise an exception and return -1 on error. */
945954
PyAPI_FUNC(int)

Lib/test/test_format.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,9 @@ def test_optimisations(self):
324324
self.assertIs("{0:1s}".format(text), text)
325325
self.assertIs("{0:5s}".format(text), text)
326326

327+
self.assertIs(text % (), text)
328+
self.assertIs(text.format(), text)
329+
327330

328331
def test_main():
329332
support.run_unittest(FormatTest)

Objects/stringlib/unicode_format.h

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -869,25 +869,19 @@ do_markup(SubString *input, PyObject *args, PyObject *kwargs,
869869
SubString literal;
870870
SubString field_name;
871871
SubString format_spec;
872-
Py_UCS4 conversion, maxchar;
873-
Py_ssize_t sublen;
874-
int err;
872+
Py_UCS4 conversion;
875873

876874
MarkupIterator_init(&iter, input->str, input->start, input->end);
877875
while ((result = MarkupIterator_next(&iter, &literal, &field_present,
878876
&field_name, &format_spec,
879877
&conversion,
880878
&format_spec_needs_expanding)) == 2) {
881-
sublen = literal.end - literal.start;
882-
if (sublen) {
883-
maxchar = _PyUnicode_FindMaxChar(literal.str,
884-
literal.start, literal.end);
885-
err = _PyUnicodeWriter_Prepare(writer, sublen, maxchar);
886-
if (err == -1)
879+
if (literal.end != literal.start) {
880+
if (!field_present && iter.str.start == iter.str.end)
881+
writer->overallocate = 0;
882+
if (_PyUnicodeWriter_WriteSubstring(writer, literal.str,
883+
literal.start, literal.end) < 0)
887884
return 0;
888-
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
889-
literal.str, literal.start, sublen);
890-
writer->pos += sublen;
891885
}
892886

893887
if (field_present) {

Objects/unicodeobject.c

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12790,6 +12790,41 @@ _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, PyObject *str)
1279012790
return 0;
1279112791
}
1279212792

12793+
int
12794+
_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, PyObject *str,
12795+
Py_ssize_t start, Py_ssize_t end)
12796+
{
12797+
Py_UCS4 maxchar;
12798+
Py_ssize_t len;
12799+
12800+
if (PyUnicode_READY(str) == -1)
12801+
return -1;
12802+
12803+
assert(0 <= start);
12804+
assert(end <= PyUnicode_GET_LENGTH(str));
12805+
assert(start <= end);
12806+
12807+
if (end == 0)
12808+
return 0;
12809+
12810+
if (start == 0 && end == PyUnicode_GET_LENGTH(str))
12811+
return _PyUnicodeWriter_WriteStr(writer, str);
12812+
12813+
if (PyUnicode_MAX_CHAR_VALUE(str) > writer->maxchar)
12814+
maxchar = _PyUnicode_FindMaxChar(str, start, end);
12815+
else
12816+
maxchar = writer->maxchar;
12817+
len = end - start;
12818+
12819+
if (_PyUnicodeWriter_Prepare(writer, len, maxchar) < 0)
12820+
return -1;
12821+
12822+
_PyUnicode_FastCopyCharacters(writer->buffer, writer->pos,
12823+
str, start, len);
12824+
writer->pos += len;
12825+
return 0;
12826+
}
12827+
1279312828
int
1279412829
_PyUnicodeWriter_WriteCstr(_PyUnicodeWriter *writer, const char *str, Py_ssize_t len)
1279512830
{
@@ -13963,7 +13998,7 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1396313998

1396413999
while (--ctx.fmtcnt >= 0) {
1396514000
if (PyUnicode_READ(ctx.fmtkind, ctx.fmtdata, ctx.fmtpos) != '%') {
13966-
Py_ssize_t nonfmtpos, sublen;
14001+
Py_ssize_t nonfmtpos;
1396714002
Py_UCS4 maxchar;
1396814003

1396914004
nonfmtpos = ctx.fmtpos++;
@@ -13976,15 +14011,10 @@ PyUnicode_Format(PyObject *format, PyObject *args)
1397614011
ctx.fmtpos--;
1397714012
ctx.writer.overallocate = 0;
1397814013
}
13979-
sublen = ctx.fmtpos - nonfmtpos;
13980-
maxchar = _PyUnicode_FindMaxChar(ctx.fmtstr,
13981-
nonfmtpos, nonfmtpos + sublen);
13982-
if (_PyUnicodeWriter_Prepare(&ctx.writer, sublen, maxchar) == -1)
13983-
goto onError;
1398414014

13985-
_PyUnicode_FastCopyCharacters(ctx.writer.buffer, ctx.writer.pos,
13986-
ctx.fmtstr, nonfmtpos, sublen);
13987-
ctx.writer.pos += sublen;
14015+
if (_PyUnicodeWriter_WriteSubstring(&ctx.writer, ctx.fmtstr,
14016+
nonfmtpos, ctx.fmtpos) < 0)
14017+
goto onError;
1398814018
}
1398914019
else {
1399014020
ctx.fmtpos++;

0 commit comments

Comments
 (0)