44#include "pycore_code.h" // write_location_entry_start()
55#include "pycore_compile.h"
66#include "pycore_opcode.h" // _PyOpcode_Caches[] and opcode category macros
7+ #include "pycore_opcode_utils.h" // IS_BACKWARDS_JUMP_OPCODE
78#include "opcode_metadata.h" // IS_PSEUDO_INSTR
89
910
@@ -34,6 +35,18 @@ same_location(location a, location b)
3435 a .end_col_offset == b .end_col_offset ;
3536}
3637
38+ static int
39+ instr_size (instruction * instr )
40+ {
41+ int opcode = instr -> i_opcode ;
42+ int oparg = instr -> i_oparg ;
43+ assert (!IS_PSEUDO_INSTR (opcode ));
44+ assert (OPCODE_HAS_ARG (opcode ) || oparg == 0 );
45+ int extended_args = (0xFFFFFF < oparg ) + (0xFFFF < oparg ) + (0xFF < oparg );
46+ int caches = _PyOpcode_Caches [opcode ];
47+ return extended_args + 1 + caches ;
48+ }
49+
3750struct assembler {
3851 PyObject * a_bytecode ; /* bytes containing bytecode */
3952 int a_offset ; /* offset into bytecode */
@@ -118,6 +131,7 @@ assemble_emit_exception_table_item(struct assembler *a, int value, int msb)
118131
119132static int
120133assemble_emit_exception_table_entry (struct assembler * a , int start , int end ,
134+ int handler_offset ,
121135 _PyCompile_ExceptHandlerInfo * handler )
122136{
123137 Py_ssize_t len = PyBytes_GET_SIZE (a -> a_except_table );
@@ -126,7 +140,7 @@ assemble_emit_exception_table_entry(struct assembler *a, int start, int end,
126140 }
127141 int size = end - start ;
128142 assert (end > start );
129- int target = handler -> h_offset ;
143+ int target = handler_offset ;
130144 int depth = handler -> h_startdepth - 1 ;
131145 if (handler -> h_preserve_lasti > 0 ) {
132146 depth -= 1 ;
@@ -145,24 +159,30 @@ assemble_exception_table(struct assembler *a, instr_sequence *instrs)
145159{
146160 int ioffset = 0 ;
147161 _PyCompile_ExceptHandlerInfo handler ;
148- handler .h_offset = -1 ;
162+ handler .h_label = -1 ;
149163 handler .h_startdepth = -1 ;
150164 handler .h_preserve_lasti = -1 ;
151165 int start = -1 ;
152166 for (int i = 0 ; i < instrs -> s_used ; i ++ ) {
153167 instruction * instr = & instrs -> s_instrs [i ];
154- if (instr -> i_except_handler_info .h_offset != handler .h_offset ) {
155- if (handler .h_offset >= 0 ) {
168+ if (instr -> i_except_handler_info .h_label != handler .h_label ) {
169+ if (handler .h_label >= 0 ) {
170+ int handler_offset = instrs -> s_instrs [handler .h_label ].i_offset ;
156171 RETURN_IF_ERROR (
157- assemble_emit_exception_table_entry (a , start , ioffset , & handler ));
172+ assemble_emit_exception_table_entry (a , start , ioffset ,
173+ handler_offset ,
174+ & handler ));
158175 }
159176 start = ioffset ;
160177 handler = instr -> i_except_handler_info ;
161178 }
162- ioffset += _PyCompile_InstrSize (instr -> i_opcode , instr -> i_oparg );
179+ ioffset += instr_size (instr );
163180 }
164- if (handler .h_offset >= 0 ) {
165- RETURN_IF_ERROR (assemble_emit_exception_table_entry (a , start , ioffset , & handler ));
181+ if (handler .h_label >= 0 ) {
182+ int handler_offset = instrs -> s_instrs [handler .h_label ].i_offset ;
183+ RETURN_IF_ERROR (assemble_emit_exception_table_entry (a , start , ioffset ,
184+ handler_offset ,
185+ & handler ));
166186 }
167187 return SUCCESS ;
168188}
@@ -329,7 +349,7 @@ assemble_location_info(struct assembler *a, instr_sequence *instrs,
329349 loc = instr -> i_loc ;
330350 size = 0 ;
331351 }
332- size += _PyCompile_InstrSize (instr -> i_opcode , instr -> i_oparg );
352+ size += instr_size (instr );
333353 }
334354 RETURN_IF_ERROR (assemble_emit_location (a , loc , size ));
335355 return SUCCESS ;
@@ -385,7 +405,7 @@ assemble_emit_instr(struct assembler *a, instruction *instr)
385405 Py_ssize_t len = PyBytes_GET_SIZE (a -> a_bytecode );
386406 _Py_CODEUNIT * code ;
387407
388- int size = _PyCompile_InstrSize (instr -> i_opcode , instr -> i_oparg );
408+ int size = instr_size (instr );
389409 if (a -> a_offset + size >= len / (int )sizeof (_Py_CODEUNIT )) {
390410 if (len > PY_SSIZE_T_MAX / 2 ) {
391411 return ERROR ;
@@ -585,12 +605,83 @@ makecode(_PyCompile_CodeUnitMetadata *umd, struct assembler *a, PyObject *const_
585605 return co ;
586606}
587607
608+ static int
609+ resolve_jump_offsets (instr_sequence * instrs )
610+ {
611+ /* Compute the size of each instruction and fixup jump args.
612+ * Replace instruction index with position in bytecode.
613+ */
614+
615+ for (int i = 0 ; i < instrs -> s_used ; i ++ ) {
616+ instruction * instr = & instrs -> s_instrs [i ];
617+ if (OPCODE_HAS_JUMP (instr -> i_opcode )) {
618+ instr -> i_target = instr -> i_oparg ;
619+ }
620+ }
621+
622+ int extended_arg_recompile ;
623+
624+ do {
625+ int totsize = 0 ;
626+ for (int i = 0 ; i < instrs -> s_used ; i ++ ) {
627+ instruction * instr = & instrs -> s_instrs [i ];
628+ instr -> i_offset = totsize ;
629+ int isize = instr_size (instr );
630+ totsize += isize ;
631+ }
632+ extended_arg_recompile = 0 ;
633+
634+ int offset = 0 ;
635+ for (int i = 0 ; i < instrs -> s_used ; i ++ ) {
636+ instruction * instr = & instrs -> s_instrs [i ];
637+ int isize = instr_size (instr );
638+ /* jump offsets are computed relative to
639+ * the instruction pointer after fetching
640+ * the jump instruction.
641+ */
642+ offset += isize ;
643+ if (OPCODE_HAS_JUMP (instr -> i_opcode )) {
644+ instruction * target = & instrs -> s_instrs [instr -> i_target ];
645+ instr -> i_oparg = target -> i_offset ;
646+ if (instr -> i_oparg < offset ) {
647+ assert (IS_BACKWARDS_JUMP_OPCODE (instr -> i_opcode ));
648+ instr -> i_oparg = offset - instr -> i_oparg ;
649+ }
650+ else {
651+ assert (!IS_BACKWARDS_JUMP_OPCODE (instr -> i_opcode ));
652+ instr -> i_oparg = instr -> i_oparg - offset ;
653+ }
654+ if (instr_size (instr ) != isize ) {
655+ extended_arg_recompile = 1 ;
656+ }
657+ }
658+ }
659+ /* XXX: This is an awful hack that could hurt performance, but
660+ on the bright side it should work until we come up
661+ with a better solution.
662+
663+ The issue is that in the first loop instr_size() is
664+ called, and it requires i_oparg be set appropriately.
665+ There is a bootstrap problem because i_oparg is
666+ calculated in the second loop above.
667+
668+ So we loop until we stop seeing new EXTENDED_ARGs.
669+ The only EXTENDED_ARGs that could be popping up are
670+ ones in jump instructions. So this should converge
671+ fairly quickly.
672+ */
673+ } while (extended_arg_recompile );
674+ return SUCCESS ;
675+ }
588676
589677PyCodeObject *
590678_PyAssemble_MakeCodeObject (_PyCompile_CodeUnitMetadata * umd , PyObject * const_cache ,
591679 PyObject * consts , int maxdepth , instr_sequence * instrs ,
592680 int nlocalsplus , int code_flags , PyObject * filename )
593681{
682+ if (resolve_jump_offsets (instrs ) < 0 ) {
683+ return NULL ;
684+ }
594685 PyCodeObject * co = NULL ;
595686
596687 struct assembler a ;
0 commit comments