Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 52a21b8

Browse files
committed
SF patch #980695: efficient string concatenation
(Original patch by Armin Rigo).
1 parent d09d966 commit 52a21b8

3 files changed

Lines changed: 107 additions & 3 deletions

File tree

Doc/lib/libstdtypes.tex

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ \subsection{Sequence Types \label{typesseq}}
455455
\lineiii{\var{x} not in \var{s}}{\code{0} if an item of \var{s} is
456456
equal to \var{x}, else \code{1}}{(1)}
457457
\hline
458-
\lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{}
458+
\lineiii{\var{s} + \var{t}}{the concatenation of \var{s} and \var{t}}{(6)}
459459
\lineiii{\var{s} * \var{n}\textrm{,} \var{n} * \var{s}}{\var{n} shallow copies of \var{s} concatenated}{(2)}
460460
\hline
461461
\lineiii{\var{s}[\var{i}]}{\var{i}'th item of \var{s}, origin 0}{(3)}
@@ -536,6 +536,16 @@ \subsection{Sequence Types \label{typesseq}}
536536
(which end depends on the sign of \var{k}). Note, \var{k} cannot
537537
be zero.
538538

539+
\item[(6)] If \var{s} and \var{t} are both strings, some Python
540+
implementations such as CPython can usally perform an inplace optimization
541+
for assignments of the form \code{\var{s}=\var{s}+\var{t}} or
542+
\code{\var{s}+=\var{t}}. When applicable, this optimization makes
543+
quadratic run-time much less likely. This optimization is both version
544+
and implementation dependent. For performance sensitive code, it is
545+
preferrable to use the \method{str.join()} method which assures consistent
546+
linear concatenation performance across versions and implementations.
547+
\versionchanged[Formerly, string concatenation never occurred inplace]{2.4}
548+
539549
\end{description}
540550

541551

Misc/NEWS

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@ What's New in Python 2.4 alpha 2?
1212
Core and builtins
1313
-----------------
1414

15+
- Patch #980695: Implements efficient string concatenation for statements
16+
of the form s=s+t and s+=t. This will vary across implementations.
17+
Accordingly, the str.join() method is strongly preferred for performance
18+
sensitive code.
19+
1520
- PEP-0318, Function Decorators have been added to the language. These are
1621
implemented using the Java-style @decorator syntax, like so:
1722
@staticmethod

Python/ceval.c

Lines changed: 91 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ static int exec_statement(PyFrameObject *,
8585
static void set_exc_info(PyThreadState *, PyObject *, PyObject *, PyObject *);
8686
static void reset_exc_info(PyThreadState *);
8787
static void format_exc_check_arg(PyObject *, char *, PyObject *);
88+
static PyObject *string_concatenate(PyObject *, PyObject *,
89+
PyFrameObject *, unsigned char *);
8890

8991
#define NAME_ERROR_MSG \
9092
"name '%.200s' is not defined"
@@ -550,6 +552,7 @@ PyEval_EvalFrame(PyFrameObject *f)
550552
#define INSTR_OFFSET() (next_instr - first_instr)
551553
#define NEXTOP() (*next_instr++)
552554
#define NEXTARG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2])
555+
#define PEEKARG() ((next_instr[2]<<8) + next_instr[1])
553556
#define JUMPTO(x) (next_instr = first_instr + (x))
554557
#define JUMPBY(x) (next_instr += (x))
555558

@@ -580,8 +583,7 @@ PyEval_EvalFrame(PyFrameObject *f)
580583
#endif
581584

582585
#define PREDICTED(op) PRED_##op: next_instr++
583-
#define PREDICTED_WITH_ARG(op) PRED_##op: oparg = (next_instr[2]<<8) + \
584-
next_instr[1]; next_instr += 3
586+
#define PREDICTED_WITH_ARG(op) PRED_##op: oparg = PEEKARG(); next_instr += 3
585587

586588
/* Stack manipulation macros */
587589

@@ -1066,11 +1068,18 @@ PyEval_EvalFrame(PyFrameObject *f)
10661068
goto slow_add;
10671069
x = PyInt_FromLong(i);
10681070
}
1071+
else if (PyString_CheckExact(v) &&
1072+
PyString_CheckExact(w)) {
1073+
x = string_concatenate(v, w, f, next_instr);
1074+
/* string_concatenate consumed the ref to v */
1075+
goto skip_decref_vx;
1076+
}
10691077
else {
10701078
slow_add:
10711079
x = PyNumber_Add(v, w);
10721080
}
10731081
Py_DECREF(v);
1082+
skip_decref_vx:
10741083
Py_DECREF(w);
10751084
SET_TOP(x);
10761085
if (x != NULL) continue;
@@ -1261,11 +1270,18 @@ PyEval_EvalFrame(PyFrameObject *f)
12611270
goto slow_iadd;
12621271
x = PyInt_FromLong(i);
12631272
}
1273+
else if (PyString_CheckExact(v) &&
1274+
PyString_CheckExact(w)) {
1275+
x = string_concatenate(v, w, f, next_instr);
1276+
/* string_concatenate consumed the ref to v */
1277+
goto skip_decref_v;
1278+
}
12641279
else {
12651280
slow_iadd:
12661281
x = PyNumber_InPlaceAdd(v, w);
12671282
}
12681283
Py_DECREF(v);
1284+
skip_decref_v:
12691285
Py_DECREF(w);
12701286
SET_TOP(x);
12711287
if (x != NULL) continue;
@@ -4191,6 +4207,79 @@ format_exc_check_arg(PyObject *exc, char *format_str, PyObject *obj)
41914207
PyErr_Format(exc, format_str, obj_str);
41924208
}
41934209

4210+
static PyObject *
4211+
string_concatenate(PyObject *v, PyObject *w,
4212+
PyFrameObject *f, unsigned char *next_instr)
4213+
{
4214+
/* This function implements 'variable += expr' when both arguments
4215+
are strings. */
4216+
4217+
if (v->ob_refcnt == 2) {
4218+
/* In the common case, there are 2 references to the value
4219+
* stored in 'variable' when the += is performed: one on the
4220+
* value stack (in 'v') and one still stored in the 'variable'.
4221+
* We try to delete the variable now to reduce the refcnt to 1.
4222+
*/
4223+
switch (*next_instr) {
4224+
case STORE_FAST:
4225+
{
4226+
int oparg = PEEKARG();
4227+
PyObject **fastlocals = f->f_localsplus;
4228+
if (GETLOCAL(oparg) == v)
4229+
SETLOCAL(oparg, NULL);
4230+
break;
4231+
}
4232+
case STORE_DEREF:
4233+
{
4234+
PyObject **freevars = f->f_localsplus + f->f_nlocals;
4235+
PyObject *c = freevars[PEEKARG()];
4236+
if (PyCell_GET(c) == v)
4237+
PyCell_Set(c, NULL);
4238+
break;
4239+
}
4240+
case STORE_NAME:
4241+
{
4242+
PyObject *names = f->f_code->co_names;
4243+
PyObject *name = GETITEM(names, PEEKARG());
4244+
PyObject *locals = f->f_locals;
4245+
if (PyDict_CheckExact(locals) &&
4246+
PyDict_GetItem(locals, name) == v) {
4247+
if (PyDict_DelItem(locals, name) != 0) {
4248+
PyErr_Clear();
4249+
}
4250+
}
4251+
break;
4252+
}
4253+
}
4254+
}
4255+
4256+
if (v->ob_refcnt == 1) {
4257+
/* Now we own the last reference to 'v', so we can resize it
4258+
* in-place.
4259+
*/
4260+
int v_len = PyString_GET_SIZE(v);
4261+
int w_len = PyString_GET_SIZE(w);
4262+
if (_PyString_Resize(&v, v_len + w_len) != 0) {
4263+
/* XXX if _PyString_Resize() fails, 'v' has been
4264+
* deallocated so it cannot be put back into 'variable'.
4265+
* The MemoryError is raised when there is no value in
4266+
* 'variable', which might (very remotely) be a cause
4267+
* of incompatibilities.
4268+
*/
4269+
return NULL;
4270+
}
4271+
/* copy 'w' into the newly allocated area of 'v' */
4272+
memcpy(PyString_AS_STRING(v) + v_len,
4273+
PyString_AS_STRING(w), w_len);
4274+
return v;
4275+
}
4276+
else {
4277+
/* When in-place resizing is not an option. */
4278+
PyString_Concat(&v, w);
4279+
return v;
4280+
}
4281+
}
4282+
41944283
#ifdef DYNAMIC_EXECUTION_PROFILE
41954284

41964285
static PyObject *

0 commit comments

Comments
 (0)