@@ -380,6 +380,8 @@ intern_strings(PyObject *tuple)
380380 }
381381}
382382
383+ /* Begin: Peephole optimizations ----------------------------------------- */
384+
383385#define GETARG (arr , i ) ((int)((arr[i+2]<<8) + arr[i+1]))
384386#define UNCONDITIONAL_JUMP (op ) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
385387#define ABSOLUTE_JUMP (op ) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
@@ -388,6 +390,56 @@ intern_strings(PyObject *tuple)
388390#define CODESIZE (op ) (HAS_ARG(op) ? 3 : 1)
389391#define ISBASICBLOCK (blocks , start , bytes ) (blocks[start]==blocks[start+bytes-1])
390392
393+ /* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
394+ with LOAD_CONST (c1, c2, ... cn).
395+ The consts table must still be in list form so that the
396+ new constant (c1, c2, ... cn) can be appended.
397+ Called with codestr pointing to the first LOAD_CONST.
398+ Bails out with no change if one or more of the LOAD_CONSTs is missing. */
399+ static int
400+ tuple_of_constants (unsigned char * codestr , int n , PyObject * consts )
401+ {
402+ PyObject * newconst , * constant ;
403+ int i , arg , len_consts ;
404+
405+ /* Pre-conditions */
406+ assert (PyList_CheckExact (consts ));
407+ assert (codestr [0 ] == LOAD_CONST );
408+ assert (codestr [n * 3 ] == BUILD_TUPLE );
409+ assert (GETARG (codestr , (n * 3 )) == n );
410+
411+ /* Verify chain of n load_constants */
412+ for (i = 0 ; i < n ; i ++ )
413+ if (codestr [i * 3 ] != LOAD_CONST )
414+ return 0 ;
415+
416+ /* Buildup new tuple of constants */
417+ newconst = PyTuple_New (n );
418+ if (newconst == NULL )
419+ return 0 ;
420+ for (i = 0 ; i < n ; i ++ ) {
421+ arg = GETARG (codestr , (i * 3 ));
422+ constant = PyList_GET_ITEM (consts , arg );
423+ Py_INCREF (constant );
424+ PyTuple_SET_ITEM (newconst , i , constant );
425+ }
426+
427+ /* Append folded constant onto consts */
428+ len_consts = PyList_GET_SIZE (consts );
429+ if (PyList_Append (consts , newconst )) {
430+ Py_DECREF (newconst );
431+ return 0 ;
432+ }
433+ Py_DECREF (newconst );
434+
435+ /* Write NOPs over old LOAD_CONSTS and
436+ add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
437+ memset (codestr , NOP , n * 3 );
438+ codestr [n * 3 ] = LOAD_CONST ;
439+ SETARG (codestr , (n * 3 ), len_consts );
440+ return 1 ;
441+ }
442+
391443static unsigned int *
392444markblocks (unsigned char * code , int len )
393445{
@@ -423,6 +475,21 @@ markblocks(unsigned char *code, int len)
423475 return blocks ;
424476}
425477
478+ /* Perform basic peephole optimizations to components of a code object.
479+ The consts object should still be in list form to allow new constants
480+ to be appended.
481+
482+ To keep the optimizer simple, it bails out (does nothing) for code
483+ containing extended arguments or that has a length over 32,700. That
484+ allows us to avoid overflow and sign issues. Likewise, it bails when
485+ the lineno table has complex encoding for gaps >= 255.
486+
487+ Optimizations are restricted to simple transformations occuring within a
488+ single basic block. All transformations keep the code size the same or
489+ smaller. For those that reduce size, the gaps are initially filled with
490+ NOPs. Later those NOPs are removed and the jump addresses retargeted in
491+ a single pass. Line numbering is adjusted accordingly. */
492+
426493static PyObject *
427494optimize_code (PyObject * code , PyObject * consts , PyObject * names , PyObject * lineno_obj )
428495{
@@ -447,7 +514,7 @@ optimize_code(PyObject *code, PyObject* consts, PyObject *names, PyObject *linen
447514
448515 /* Avoid situations where jump retargeting could overflow */
449516 codelen = PyString_Size (code );
450- if (codelen > 32000 )
517+ if (codelen > 32700 )
451518 goto exitUnchanged ;
452519
453520 /* Make a modifiable copy of the code string */
@@ -464,7 +531,7 @@ optimize_code(PyObject *code, PyObject* consts, PyObject *names, PyObject *linen
464531 blocks = markblocks (codestr , codelen );
465532 if (blocks == NULL )
466533 goto exitUnchanged ;
467- assert (PyTuple_Check (consts ));
534+ assert (PyList_Check (consts ));
468535
469536 for (i = 0 , nops = 0 ; i < codelen ; i += CODESIZE (codestr [i ])) {
470537 addrmap [i ] = i - nops ;
@@ -511,8 +578,8 @@ optimize_code(PyObject *code, PyObject* consts, PyObject *names, PyObject *linen
511578 name = PyString_AsString (PyTuple_GET_ITEM (names , j ));
512579 if (name == NULL || strcmp (name , "None" ) != 0 )
513580 continue ;
514- for (j = 0 ; j < PyTuple_GET_SIZE (consts ) ; j ++ ) {
515- if (PyTuple_GET_ITEM (consts , j ) == Py_None ) {
581+ for (j = 0 ; j < PyList_GET_SIZE (consts ) ; j ++ ) {
582+ if (PyList_GET_ITEM (consts , j ) == Py_None ) {
516583 codestr [i ] = LOAD_CONST ;
517584 SETARG (codestr , i , j );
518585 break ;
@@ -525,17 +592,28 @@ optimize_code(PyObject *code, PyObject* consts, PyObject *names, PyObject *linen
525592 j = GETARG (codestr , i );
526593 if (codestr [i + 3 ] != JUMP_IF_FALSE ||
527594 codestr [i + 6 ] != POP_TOP ||
528- !ISBASICBLOCK (blocks ,i ,7 ) ||
529- !PyObject_IsTrue (PyTuple_GET_ITEM (consts , j )))
595+ !ISBASICBLOCK (blocks ,i ,7 ) ||
596+ !PyObject_IsTrue (PyList_GET_ITEM (consts , j )))
530597 continue ;
531598 memset (codestr + i , NOP , 7 );
532599 nops += 7 ;
533600 break ;
534601
535- /* Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
602+ /* Try to fold tuples of constants.
603+ Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
536604 Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
537605 Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
538606 case BUILD_TUPLE :
607+ j = GETARG (codestr , i );
608+ h = i - 3 * j ;
609+ if (h >= 0 &&
610+ codestr [h ] == LOAD_CONST &&
611+ ISBASICBLOCK (blocks , h , 3 * (j + 1 )) &&
612+ tuple_of_constants (& codestr [h ], j , consts )) {
613+ nops += 3 * j ;
614+ break ;
615+ }
616+ /* Intentional fallthrough */
539617 case BUILD_LIST :
540618 j = GETARG (codestr , i );
541619 if (codestr [i + 3 ] != UNPACK_SEQUENCE ||
@@ -610,8 +688,8 @@ optimize_code(PyObject *code, PyObject* consts, PyObject *names, PyObject *linen
610688
611689 /* Replace RETURN LOAD_CONST None RETURN with just RETURN */
612690 case RETURN_VALUE :
613- if (i + 4 >= codelen ||
614- codestr [i + 4 ] != RETURN_VALUE ||
691+ if (i + 4 >= codelen ||
692+ codestr [i + 4 ] != RETURN_VALUE ||
615693 !ISBASICBLOCK (blocks ,i ,5 ))
616694 continue ;
617695 memset (codestr + i + 1 , NOP , 4 );
@@ -677,6 +755,8 @@ optimize_code(PyObject *code, PyObject* consts, PyObject *names, PyObject *linen
677755 return code ;
678756}
679757
758+ /* End: Peephole optimizations ----------------------------------------- */
759+
680760PyCodeObject *
681761PyCode_New (int argcount , int nlocals , int stacksize , int flags ,
682762 PyObject * code , PyObject * consts , PyObject * names ,
@@ -4899,15 +4979,15 @@ jcompile(node *n, const char *filename, struct compiling *base,
48994979 if (sc .c_errors == 0 ) {
49004980 PyObject * consts , * names , * varnames , * filename , * name ,
49014981 * freevars , * cellvars , * code ;
4902- consts = PyList_AsTuple (sc .c_consts );
49034982 names = PyList_AsTuple (sc .c_names );
49044983 varnames = PyList_AsTuple (sc .c_varnames );
49054984 cellvars = dict_keys_inorder (sc .c_cellvars , 0 );
49064985 freevars = dict_keys_inorder (sc .c_freevars ,
49074986 PyTuple_GET_SIZE (cellvars ));
49084987 filename = PyString_InternFromString (sc .c_filename );
49094988 name = PyString_InternFromString (sc .c_name );
4910- code = optimize_code (sc .c_code , consts , names , sc .c_lnotab );
4989+ code = optimize_code (sc .c_code , sc .c_consts , names , sc .c_lnotab );
4990+ consts = PyList_AsTuple (sc .c_consts );
49114991 if (!PyErr_Occurred ())
49124992 co = PyCode_New (sc .c_argcount ,
49134993 sc .c_nlocals ,
0 commit comments