Add more convenience properties to dis.Instruction

tomasr8 · tomasr8 · commit 0f70a02cc8eb · 2023-04-28T16:48:59.000+02:00
Adds start_offset, cache_offset, end_offset, baseopcode,
baseopname, jump_target and oparg to dis.Instruction.

Also slightly improves the disassembly output by allowing
opnames to overflow into the space reserved for opargs.
diff --git a/Doc/library/dis.rst b/Doc/library/dis.rst
@@ -342,10 +342,23 @@ details of bytecode instructions as :class:`Instruction` instances:
       human readable name for operation
 
 
+   .. data:: baseopcode
+
+      numeric code for the base operation if operation is specialized. Otherwise equal to :data:`opcode`
+
+
+   .. data:: baseopname
+
+      human readable name for the base operation if operation is specialized. Otherwise equal to :data:`opname`
+
+
    .. data:: arg
 
       numeric argument to operation (if any), otherwise ``None``
 
+   .. data:: oparg
+
+      alias for :data:`arg`
 
    .. data:: argval
 
@@ -363,6 +376,22 @@ details of bytecode instructions as :class:`Instruction` instances:
       start index of operation within bytecode sequence
 
 
+   .. data:: start_offset
+
+      start index of operation within bytecode sequence including prefixed ``EXTENDED_ARG`` operations if present.
+      Otherwise equal to :data:`offset`
+
+
+   .. data:: cache_offset
+
+      start index of the cache entries following the operation
+
+
+   .. data:: end_offset
+
+      end index of the cache entries following the operation
+
+
    .. data:: starts_line
 
       line started by this opcode (if any), otherwise ``None``
@@ -373,6 +402,11 @@ details of bytecode instructions as :class:`Instruction` instances:
       ``True`` if other code jumps to here, otherwise ``False``
 
 
+   .. data:: jump_target
+
+      bytecode index of the jump target if this is a jump operation, otherwise ``None``
+
+
    .. data:: positions
 
       :class:`dis.Positions` object holding the
@@ -384,6 +418,10 @@ details of bytecode instructions as :class:`Instruction` instances:
 
       Field ``positions`` is added.
 
+   .. versionchanged:: 3.12
+
+      Fields ``start_offset``, ``cache_offset``, ``end_offset``, ``baseopname``, ``baseopcode``, ``jump_target`` and ``oparg`` are added.
+
 
 .. class:: Positions
 
diff --git a/Lib/dis.py b/Lib/dis.py
@@ -258,6 +258,7 @@ def show_code(co, *, file=None):
         'argval',
         'argrepr',
         'offset',
+        'start_offset',
         'starts_line',
         'is_jump_target',
         'positions'
@@ -271,6 +272,8 @@ def show_code(co, *, file=None):
 _Instruction.argval.__doc__ = "Resolved arg value (if known), otherwise same as arg"
 _Instruction.argrepr.__doc__ = "Human readable description of operation argument"
 _Instruction.offset.__doc__ = "Start index of operation within bytecode sequence"
+_Instruction.start_offset.__doc__ = "Start index of operation within bytecode sequence including extended args if present. " \
+                                    "Otherwise equal to Instruction.offset"
 _Instruction.starts_line.__doc__ = "Line started by this opcode (if any), otherwise None"
 _Instruction.is_jump_target.__doc__ = "True if other code jumps to here, otherwise False"
 _Instruction.positions.__doc__ = "dis.Positions object holding the span of source code covered by this instruction"
@@ -281,6 +284,23 @@ def show_code(co, *, file=None):
 _OPNAME_WIDTH = 20
 _OPARG_WIDTH = 5
 
+def _get_jump_target(op, arg, offset):
+    """Gets the bytecode offset of the jump target if this is a jump instruction,
+    otherwise returns None
+    """
+    deop = _deoptop(op)
+    caches = _inline_cache_entries[deop]
+    if deop in hasjrel:
+        if _is_backward_jump(deop):
+            arg = -arg
+        target = offset + 2 + arg*2
+        target += 2 * caches
+    elif deop in hasjabs:
+        target = arg*2
+    else:
+        target = None
+    return target
+
 class Instruction(_Instruction):
     """Details for a bytecode operation
 
@@ -291,12 +311,48 @@ class Instruction(_Instruction):
          argval - resolved arg value (if known), otherwise same as arg
          argrepr - human readable description of operation argument
          offset - start index of operation within bytecode sequence
+         start_offset - start index of operation within bytecode sequence including extended args if present.
+                        Otherwise equal to Instruction.offset
          starts_line - line started by this opcode (if any), otherwise None
          is_jump_target - True if other code jumps to here, otherwise False
          positions - Optional dis.Positions object holding the span of source code
                      covered by this instruction
     """
 
+    @property
+    def oparg(self):
+        """Alias for Instruction.arg"""
+        return self.arg
+
+    @property
+    def baseopcode(self):
+        """numeric code for the base operation if operation is specialized.
+        Otherwise equal to Instruction.opcode
+        """
+        return _deoptop(self.opcode)
+
+    @property
+    def baseopname(self):
+        """human readable name for the base operation if operation is specialized.
+        Otherwise equal to Instruction.opname
+        """
+        return opname[self.baseopcode]
+
+    @property
+    def cache_offset(self):
+        """start index of the cache entries following the operation"""
+        return self.offset + 2
+
+    @property
+    def end_offset(self):
+        """end index of the cache entries following the operation"""
+        return self.cache_offset + _inline_cache_entries[self.opcode]*2
+
+    @property
+    def jump_target(self):
+        """bytecode index of the jump target if this is a jump operation, otherwise None"""
+        return _get_jump_target(self.opcode, self.arg, self.offset)
+
     def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
         """Format instruction details for inclusion in disassembly output
 
@@ -328,12 +384,23 @@ def _disassemble(self, lineno_width=3, mark_as_current=False, offset_width=4):
         fields.append(self.opname.ljust(_OPNAME_WIDTH))
         # Column: Opcode argument
         if self.arg is not None:
-            fields.append(repr(self.arg).rjust(_OPARG_WIDTH))
+            arg = repr(self.arg)
+            # If opname is longer than _OPNAME_WIDTH, but the total length together with
+            # oparg is less than _OPNAME_WIDTH + _OPARG_WIDTH (with at least one space in between),
+            # we allow opname to overflow into the space reserved for oparg.
+            # This results in fewer misaligned opargs in the disassembly output
+            opname_excess = max(0, len(self.opname) - _OPNAME_WIDTH)
+            if opname_excess + len(arg) < _OPARG_WIDTH:
+                fields.append(arg.rjust(_OPARG_WIDTH - opname_excess))
+            else:
+                fields.append(arg.rjust(_OPARG_WIDTH))
             # Column: Opcode argument details
             if self.argrepr:
                 fields.append('(' + self.argrepr + ')')
         return ' '.join(fields).rstrip()
 
+    def __str__(self):
+        return self._disassemble()
 
 def get_instructions(x, *, first_line=None, show_caches=False, adaptive=False):
     """Iterator for the opcodes in methods, functions or code
@@ -448,7 +515,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
         for i in range(start, end):
             labels.add(target)
     starts_line = None
-    for offset, op, arg in _unpack_opargs(code):
+    for offset, start_offset, op, arg in _unpack_opargs(code):
         if linestarts is not None:
             starts_line = linestarts.get(offset, None)
             if starts_line is not None:
@@ -509,7 +576,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
                 _, argrepr = _nb_ops[arg]
         yield Instruction(_all_opname[op], op,
                           arg, argval, argrepr,
-                          offset, starts_line, is_jump_target, positions)
+                          offset, start_offset, starts_line, is_jump_target, positions)
         caches = _inline_cache_entries[deop]
         if not caches:
             continue
@@ -529,7 +596,7 @@ def _get_instructions_bytes(code, varname_from_oparg=None,
                 else:
                     argrepr = ""
                 yield Instruction(
-                    "CACHE", CACHE, 0, None, argrepr, offset, None, False,
+                    "CACHE", CACHE, 0, None, argrepr, offset, offset, None, False,
                     Positions(*next(co_positions, ()))
                 )
 
@@ -615,6 +682,7 @@ def _disassemble_str(source, **kwargs):
 
 def _unpack_opargs(code):
     extended_arg = 0
+    extended_args_offset = 0  # Number of EXTENDED_ARG instructions preceding the current instruction
     caches = 0
     for i in range(0, len(code), 2):
         # Skip inline CACHE entries:
@@ -635,7 +703,13 @@ def _unpack_opargs(code):
         else:
             arg = None
             extended_arg = 0
-        yield (i, op, arg)
+        if deop == EXTENDED_ARG:
+            extended_args_offset += 1
+            yield (i, i, op, arg)
+        else:
+            start_offset = i - extended_args_offset*2
+            yield (i, start_offset, op, arg)
+            extended_args_offset = 0
 
 def findlabels(code):
     """Detect all offsets in a byte code which are jump targets.
@@ -644,18 +718,10 @@ def findlabels(code):
 
     """
     labels = []
-    for offset, op, arg in _unpack_opargs(code):
+    for offset, _, op, arg in _unpack_opargs(code):
         if arg is not None:
-            deop = _deoptop(op)
-            caches = _inline_cache_entries[deop]
-            if deop in hasjrel:
-                if _is_backward_jump(deop):
-                    arg = -arg
-                label = offset + 2 + arg*2
-                label += 2 * caches
-            elif deop in hasjabs:
-                label = arg*2
-            else:
+            label = _get_jump_target(op, arg, offset)
+            if label is None:
                 continue
             if label not in labels:
                 labels.append(label)
@@ -684,7 +750,7 @@ def _find_imports(co):
 
     consts = co.co_consts
     names = co.co_names
-    opargs = [(op, arg) for _, op, arg in _unpack_opargs(co.co_code)
+    opargs = [(op, arg) for _, _, op, arg in _unpack_opargs(co.co_code)
                   if op != EXTENDED_ARG]
     for i, (op, oparg) in enumerate(opargs):
         if op == IMPORT_NAME and i >= 2:
@@ -706,7 +772,7 @@ def _find_store_names(co):
     }
 
     names = co.co_names
-    for _, op, arg in _unpack_opargs(co.co_code):
+    for _, _, op, arg in _unpack_opargs(co.co_code):
         if op in STORE_OPS:
             yield names[arg]
 
diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py