diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 7f1bc363861ddf..54d7e62292966e 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -61,6 +61,8 @@ typedef struct _stack_chunk { PyObject * data[1]; /* Variable sized */ } _PyStackChunk; +/* Minimum size of data stack chunk */ +#define _PY_DATA_STACK_CHUNK_SIZE (16*1024) struct _ts { /* See Python/ceval.c for comments explaining most fields */ diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 1a265c59ff8c08..ce3fcb109f49f7 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -54,11 +54,13 @@ extern "C" { # define _Py_Debug_Free_Threaded 1 # define _Py_Debug_code_object_co_tlbc offsetof(PyCodeObject, co_tlbc) # define _Py_Debug_interpreter_frame_tlbc_index offsetof(_PyInterpreterFrame, tlbc_index) +# define _Py_Debug_interpreter_state_tlbc_generation offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) #else # define _Py_Debug_gilruntimestate_enabled 0 # define _Py_Debug_Free_Threaded 0 # define _Py_Debug_code_object_co_tlbc 0 # define _Py_Debug_interpreter_frame_tlbc_index 0 +# define _Py_Debug_interpreter_state_tlbc_generation 0 #endif @@ -89,6 +91,8 @@ typedef struct _Py_DebugOffsets { uint64_t gil_runtime_state_enabled; uint64_t gil_runtime_state_locked; uint64_t gil_runtime_state_holder; + uint64_t code_object_generation; + uint64_t tlbc_generation; } interpreter_state; // Thread state offset; @@ -216,6 +220,11 @@ typedef struct _Py_DebugOffsets { uint64_t gi_frame_state; } gen_object; + struct _llist_node { + uint64_t next; + uint64_t prev; + } llist_node; + struct _debugger_support { uint64_t eval_breaker; uint64_t remote_debugger_support; @@ -251,6 +260,8 @@ typedef struct _Py_DebugOffsets { .gil_runtime_state_enabled = _Py_Debug_gilruntimestate_enabled, \ .gil_runtime_state_locked = offsetof(PyInterpreterState, _gil.locked), \ .gil_runtime_state_holder = offsetof(PyInterpreterState, _gil.last_holder), \ + .code_object_generation = offsetof(PyInterpreterState, _code_object_generation), \ + .tlbc_generation = _Py_Debug_interpreter_state_tlbc_generation, \ }, \ .thread_state = { \ .size = sizeof(PyThreadState), \ @@ -347,6 +358,10 @@ typedef struct _Py_DebugOffsets { .gi_iframe = offsetof(PyGenObject, gi_iframe), \ .gi_frame_state = offsetof(PyGenObject, gi_frame_state), \ }, \ + .llist_node = { \ + .next = offsetof(struct llist_node, next), \ + .prev = offsetof(struct llist_node, prev), \ + }, \ .debugger_support = { \ .eval_breaker = offsetof(PyThreadState, eval_breaker), \ .remote_debugger_support = offsetof(PyThreadState, remote_debugger_support), \ diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index d896e870630418..356bcaa7c350a1 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -795,6 +795,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(alias)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(align)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(all_threads)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(allow_code)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(any)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(append)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index a06d7495bab8e7..aebe798031ce4f 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -286,6 +286,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(alias) STRUCT_FOR_ID(align) STRUCT_FOR_ID(all) + STRUCT_FOR_ID(all_threads) STRUCT_FOR_ID(allow_code) STRUCT_FOR_ID(any) STRUCT_FOR_ID(append) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index c3e6c77405bfe7..8a29c533b99058 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -726,6 +726,10 @@ typedef struct _PyIndexPool { // Next index to allocate if no free indices are available int32_t next_index; + + // Generation counter incremented on thread creation/destruction + // Used for TLBC cache invalidation in remote debugging + uint32_t tlbc_generation; } _PyIndexPool; typedef union _Py_unique_id_entry { @@ -843,6 +847,8 @@ struct _is { /* The per-interpreter GIL, which might not be used. */ struct _gil_runtime_state _gil; + uint64_t _code_object_generation; + /* ---------- IMPORTANT --------------------------- The fields above this line are declared as early as possible to facilitate out-of-process observability diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 83301d8aef7697..0fa1fa5af99a92 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -793,6 +793,7 @@ extern "C" { INIT_ID(alias), \ INIT_ID(align), \ INIT_ID(all), \ + INIT_ID(all_threads), \ INIT_ID(allow_code), \ INIT_ID(any), \ INIT_ID(append), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index c0f5f2b17f6609..4982c4532afd89 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -932,6 +932,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(all_threads); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(allow_code); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Lib/asyncio/tools.py b/Lib/asyncio/tools.py index b2da7d2f6ba10c..3fc4524c008db6 100644 --- a/Lib/asyncio/tools.py +++ b/Lib/asyncio/tools.py @@ -1,11 +1,10 @@ """Tools to analyze tasks running in asyncio programs.""" -from dataclasses import dataclass from collections import defaultdict from itertools import count from enum import Enum import sys -from _remote_debugging import get_all_awaited_by +from _remote_debugging import RemoteUnwinder class NodeType(Enum): @@ -118,6 +117,11 @@ def dfs(v): # ─── PRINT TREE FUNCTION ─────────────────────────────────────── +def get_all_awaited_by(pid): + unwinder = RemoteUnwinder(pid) + return unwinder.get_all_awaited_by() + + def build_async_tree(result, task_emoji="(T)", cor_emoji=""): """ Build a list of strings for pretty-print an async call tree. diff --git a/Lib/test/test_external_inspection.py b/Lib/test/test_external_inspection.py index ad3f669a03043e..291c419066ac5b 100644 --- a/Lib/test/test_external_inspection.py +++ b/Lib/test/test_external_inspection.py @@ -4,6 +4,7 @@ import importlib import sys import socket +import threading from asyncio import staggered, taskgroups from unittest.mock import ANY from test.support import os_helper, SHORT_TIMEOUT, busy_retry @@ -16,9 +17,7 @@ try: from _remote_debugging import PROCESS_VM_READV_SUPPORTED - from _remote_debugging import get_stack_trace - from _remote_debugging import get_async_stack_trace - from _remote_debugging import get_all_awaited_by + from _remote_debugging import RemoteUnwinder except ImportError: raise unittest.SkipTest("Test only runs when _remote_debugging is available") @@ -34,7 +33,23 @@ def _make_test_script(script_dir, script_basename, source): ) +def get_stack_trace(pid): + unwinder = RemoteUnwinder(pid, all_threads=True) + return unwinder.get_stack_trace() + + +def get_async_stack_trace(pid): + unwinder = RemoteUnwinder(pid) + return unwinder.get_async_stack_trace() + + +def get_all_awaited_by(pid): + unwinder = RemoteUnwinder(pid) + return unwinder.get_all_awaited_by() + + class TestGetStackTrace(unittest.TestCase): + maxDiff = None @skip_if_not_supported @unittest.skipIf( @@ -46,7 +61,7 @@ def test_remote_stack_trace(self): port = find_unused_port() script = textwrap.dedent( f"""\ - import time, sys, socket + import time, sys, socket, threading # Connect to the test process sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect(('localhost', {port})) @@ -55,13 +70,16 @@ def bar(): for x in range(100): if x == 50: baz() + def baz(): foo() def foo(): - sock.sendall(b"ready"); time.sleep(10_000) # same line number + sock.sendall(b"ready:thread\\n"); time.sleep(10_000) # same line number - bar() + t = threading.Thread(target=bar) + t.start() + sock.sendall(b"ready:main\\n"); t.join() # same line number """ ) stack_trace = None @@ -82,8 +100,9 @@ def foo(): p = subprocess.Popen([sys.executable, script_name]) client_socket, _ = server_socket.accept() server_socket.close() - response = client_socket.recv(1024) - self.assertEqual(response, b"ready") + response = b"" + while b"ready:main" not in response or b"ready:thread" not in response: + response += client_socket.recv(1024) stack_trace = get_stack_trace(p.pid) except PermissionError: self.skipTest("Insufficient permissions to read the stack trace") @@ -94,13 +113,23 @@ def foo(): p.terminate() p.wait(timeout=SHORT_TIMEOUT) - expected_stack_trace = [ - ("foo", script_name, 14), - ("baz", script_name, 11), + thread_expected_stack_trace = [ + ("foo", script_name, 15), + ("baz", script_name, 12), ("bar", script_name, 9), - ("", script_name, 16), + ('Thread.run', threading.__file__, ANY) ] - self.assertEqual(stack_trace, expected_stack_trace) + # Is possible that there are more threads, so we check that the + # expected stack traces are in the result (looking at you Windows!) + self.assertIn((ANY, thread_expected_stack_trace), stack_trace) + + # Check that the main thread stack trace is in the result + frame = ("", script_name, 19) + for _, stack in stack_trace: + if frame in stack: + break + else: + self.fail("Main thread stack trace not found in result") @skip_if_not_supported @unittest.skipIf( @@ -700,13 +729,28 @@ async def main(): ) def test_self_trace(self): stack_trace = get_stack_trace(os.getpid()) + # Is possible that there are more threads, so we check that the + # expected stack traces are in the result (looking at you Windows!) + this_tread_stack = None + for thread_id, stack in stack_trace: + if thread_id == threading.get_native_id(): + this_tread_stack = stack + break + self.assertIsNotNone(this_tread_stack) self.assertEqual( - stack_trace[0], - ( - "TestGetStackTrace.test_self_trace", - __file__, - self.test_self_trace.__code__.co_firstlineno + 6, - ), + stack[:2], + [ + ( + "get_stack_trace", + __file__, + get_stack_trace.__code__.co_firstlineno + 2, + ), + ( + "TestGetStackTrace.test_self_trace", + __file__, + self.test_self_trace.__code__.co_firstlineno + 6, + ), + ] ) diff --git a/Makefile.pre.in b/Makefile.pre.in index 3ab7c3d6c48ad9..b5703fbe6ae974 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1206,6 +1206,7 @@ PYTHON_HEADERS= \ $(srcdir)/Include/unicodeobject.h \ $(srcdir)/Include/warnings.h \ $(srcdir)/Include/weakrefobject.h \ + $(srcdir)/Python/remote_debug.h \ \ pyconfig.h \ $(PARSER_HEADERS) \ diff --git a/Modules/_remote_debugging_module.c b/Modules/_remote_debugging_module.c index 8c0f40f835c36e..a13cbd63ad3bd8 100644 --- a/Modules/_remote_debugging_module.c +++ b/Modules/_remote_debugging_module.c @@ -1,5 +1,16 @@ +/****************************************************************************** + * Python Remote Debugging Module + * + * This module provides functionality to debug Python processes remotely by + * reading their memory and reconstructing stack traces and asyncio task states. + ******************************************************************************/ + #define _GNU_SOURCE +/* ============================================================================ + * HEADERS AND INCLUDES + * ============================================================================ */ + #include #include #include @@ -23,6 +34,47 @@ # define HAVE_PROCESS_VM_READV 0 #endif +/* ============================================================================ + * TYPE DEFINITIONS AND STRUCTURES + * ============================================================================ */ + +#define GET_MEMBER(type, obj, offset) (*(type*)((char*)(obj) + (offset))) + +/* Size macros for opaque buffers */ +#define SIZEOF_BYTES_OBJ sizeof(PyBytesObject) +#define SIZEOF_CODE_OBJ sizeof(PyCodeObject) +#define SIZEOF_GEN_OBJ sizeof(PyGenObject) +#define SIZEOF_INTERP_FRAME sizeof(_PyInterpreterFrame) +#define SIZEOF_LLIST_NODE sizeof(struct llist_node) +#define SIZEOF_PAGE_CACHE_ENTRY sizeof(page_cache_entry_t) +#define SIZEOF_PYOBJECT sizeof(PyObject) +#define SIZEOF_SET_OBJ sizeof(PySetObject) +#define SIZEOF_TASK_OBJ 4096 +#define SIZEOF_THREAD_STATE sizeof(PyThreadState) +#define SIZEOF_TYPE_OBJ sizeof(PyTypeObject) +#define SIZEOF_UNICODE_OBJ sizeof(PyUnicodeObject) +#define SIZEOF_LONG_OBJ sizeof(PyLongObject) + +// Calculate the minimum buffer size needed to read interpreter state fields +// We need to read code_object_generation and potentially tlbc_generation +#ifndef MAX +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif + +#ifdef Py_GIL_DISABLED +#define INTERP_STATE_MIN_SIZE MAX(MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ + offsetof(PyInterpreterState, tlbc_indices.tlbc_generation) + sizeof(uint32_t)), \ + offsetof(PyInterpreterState, threads.head) + sizeof(void*)) +#else +#define INTERP_STATE_MIN_SIZE MAX(offsetof(PyInterpreterState, _code_object_generation) + sizeof(uint64_t), \ + offsetof(PyInterpreterState, threads.head) + sizeof(void*)) +#endif +#define INTERP_STATE_BUFFER_SIZE MAX(INTERP_STATE_MIN_SIZE, 256) + + + +// Copied from Modules/_asynciomodule.c because it's not exported + struct _Py_AsyncioModuleDebugOffsets { struct _asyncio_task_object { uint64_t size; @@ -45,6 +97,127 @@ struct _Py_AsyncioModuleDebugOffsets { } asyncio_thread_state; }; +typedef struct { + PyObject_HEAD + proc_handle_t handle; + uintptr_t runtime_start_address; + struct _Py_DebugOffsets debug_offsets; + int async_debug_offsets_available; + struct _Py_AsyncioModuleDebugOffsets async_debug_offsets; + uintptr_t interpreter_addr; + uintptr_t tstate_addr; + uint64_t code_object_generation; + _Py_hashtable_t *code_object_cache; +#ifdef Py_GIL_DISABLED + // TLBC cache invalidation tracking + uint32_t tlbc_generation; // Track TLBC index pool changes + _Py_hashtable_t *tlbc_cache; // Cache of TLBC arrays by code object address +#endif +} RemoteUnwinderObject; + +typedef struct { + PyObject *func_name; + PyObject *file_name; + int first_lineno; + PyObject *linetable; // bytes + uintptr_t addr_code_adaptive; +} CachedCodeMetadata; + +typedef struct { + /* Types */ + PyTypeObject *RemoteDebugging_Type; +} RemoteDebuggingState; + +typedef struct +{ + int lineno; + int end_lineno; + int column; + int end_column; +} LocationInfo; + +typedef struct { + uintptr_t remote_addr; + size_t size; + void *local_copy; +} StackChunkInfo; + +typedef struct { + StackChunkInfo *chunks; + size_t count; +} StackChunkList; + +#include "clinic/_remote_debugging_module.c.h" + +/*[clinic input] +module _remote_debugging +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=5f507d5b2e76a7f7]*/ + + +/* ============================================================================ + * FORWARD DECLARATIONS + * ============================================================================ */ + +static int +parse_tasks_in_set( + RemoteUnwinderObject *unwinder, + uintptr_t set_addr, + PyObject *awaited_by, + int recurse_task +); + +static int +parse_task( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *render_to, + int recurse_task +); + +static int +parse_coro_chain( + RemoteUnwinderObject *unwinder, + uintptr_t coro_address, + PyObject *render_to +); + +/* Forward declarations for task parsing functions */ +static int parse_frame_object( + RemoteUnwinderObject *unwinder, + PyObject** result, + uintptr_t address, + uintptr_t* previous_frame +); + +/* ============================================================================ + * UTILITY FUNCTIONS AND HELPERS + * ============================================================================ */ + +static void +cached_code_metadata_destroy(void *ptr) +{ + CachedCodeMetadata *meta = (CachedCodeMetadata *)ptr; + Py_DECREF(meta->func_name); + Py_DECREF(meta->file_name); + Py_DECREF(meta->linetable); + PyMem_RawFree(meta); +} + +static inline RemoteDebuggingState * +RemoteDebugging_GetState(PyObject *module) +{ + void *state = _PyModule_GetState(module); + assert(state != NULL); + return (RemoteDebuggingState *)state; +} + +static inline int +RemoteDebugging_InitState(RemoteDebuggingState *st) +{ + return 0; +} + // Helper to chain exceptions and avoid repetitions static void chain_exceptions(PyObject *exception, const char *string) @@ -54,36 +227,14 @@ chain_exceptions(PyObject *exception, const char *string) _PyErr_ChainExceptions1(exc); } -// Get the PyAsyncioDebug section address for any platform -static uintptr_t -_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) -{ - uintptr_t address; - -#ifdef MS_WINDOWS - // On Windows, search for asyncio debug in executable or DLL - address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio"); -#elif defined(__linux__) - // On Linux, search for asyncio debug in executable or DLL - address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); -#elif defined(__APPLE__) && TARGET_OS_OSX - // On macOS, try libpython first, then fall back to python - address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); - if (address == 0) { - PyErr_Clear(); - address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); - } -#else - Py_UNREACHABLE(); -#endif - - return address; -} +/* ============================================================================ + * MEMORY READING FUNCTIONS + * ============================================================================ */ static inline int read_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) { - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void*), ptr_addr); + int result = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(void*), ptr_addr); if (result < 0) { return -1; } @@ -93,7 +244,7 @@ read_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) static inline int read_Py_ssize_t(proc_handle_t *handle, uintptr_t address, Py_ssize_t *size) { - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(Py_ssize_t), size); + int result = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(Py_ssize_t), size); if (result < 0) { return -1; } @@ -113,72 +264,53 @@ read_py_ptr(proc_handle_t *handle, uintptr_t address, uintptr_t *ptr_addr) static int read_char(proc_handle_t *handle, uintptr_t address, char *result) { - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(char), result); - if (res < 0) { - return -1; - } - return 0; -} - -static int -read_sized_int(proc_handle_t *handle, uintptr_t address, void *result, size_t size) -{ - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, size, result); - if (res < 0) { - return -1; - } - return 0; -} - -static int -read_unsigned_long(proc_handle_t *handle, uintptr_t address, unsigned long *result) -{ - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(unsigned long), result); + int res = _Py_RemoteDebug_PagedReadRemoteMemory(handle, address, sizeof(char), result); if (res < 0) { return -1; } return 0; } -static int -read_pyobj(proc_handle_t *handle, uintptr_t address, PyObject *ptr_addr) -{ - int res = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(PyObject), ptr_addr); - if (res < 0) { - return -1; - } - return 0; -} +/* ============================================================================ + * PYTHON OBJECT READING FUNCTIONS + * ============================================================================ */ static PyObject * read_py_str( - proc_handle_t *handle, - _Py_DebugOffsets* debug_offsets, + RemoteUnwinderObject *unwinder, uintptr_t address, Py_ssize_t max_len ) { PyObject *result = NULL; char *buf = NULL; - Py_ssize_t len; - int res = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + debug_offsets->unicode_object.length, - sizeof(Py_ssize_t), - &len + // Read the entire PyUnicodeObject at once + char unicode_obj[SIZEOF_UNICODE_OBJ]; + int res = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_UNICODE_OBJ, + unicode_obj ); if (res < 0) { goto err; } + Py_ssize_t len = GET_MEMBER(Py_ssize_t, unicode_obj, unwinder->debug_offsets.unicode_object.length); + if (len < 0 || len > max_len) { + PyErr_Format(PyExc_RuntimeError, + "Invalid string length (%zd) at 0x%lx", len, address); + return NULL; + } + buf = (char *)PyMem_RawMalloc(len+1); if (buf == NULL) { PyErr_NoMemory(); return NULL; } - size_t offset = debug_offsets->unicode_object.asciiobject_size; - res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf); + size_t offset = unwinder->debug_offsets.unicode_object.asciiobject_size; + res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf); if (res < 0) { goto err; } @@ -202,32 +334,40 @@ read_py_str( static PyObject * read_py_bytes( - proc_handle_t *handle, - _Py_DebugOffsets* debug_offsets, - uintptr_t address + RemoteUnwinderObject *unwinder, + uintptr_t address, + Py_ssize_t max_len ) { PyObject *result = NULL; char *buf = NULL; - Py_ssize_t len; - int res = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + debug_offsets->bytes_object.ob_size, - sizeof(Py_ssize_t), - &len + // Read the entire PyBytesObject at once + char bytes_obj[SIZEOF_BYTES_OBJ]; + int res = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_BYTES_OBJ, + bytes_obj ); if (res < 0) { goto err; } + Py_ssize_t len = GET_MEMBER(Py_ssize_t, bytes_obj, unwinder->debug_offsets.bytes_object.ob_size); + if (len < 0 || len > max_len) { + PyErr_Format(PyExc_RuntimeError, + "Invalid string length (%zd) at 0x%lx", len, address); + return NULL; + } + buf = (char *)PyMem_RawMalloc(len+1); if (buf == NULL) { PyErr_NoMemory(); return NULL; } - size_t offset = debug_offsets->bytes_object.ob_sval; - res = _Py_RemoteDebug_ReadRemoteMemory(handle, address + offset, len, buf); + size_t offset = unwinder->debug_offsets.bytes_object.ob_sval; + res = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, address + offset, len, buf); if (res < 0) { goto err; } @@ -249,45 +389,60 @@ read_py_bytes( return NULL; } - - static long -read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t address) +read_py_long( + RemoteUnwinderObject *unwinder, + uintptr_t address +) { unsigned int shift = PYLONG_BITS_IN_DIGIT; - Py_ssize_t size; - uintptr_t lv_tag; - - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, address + offsets->long_object.lv_tag, - sizeof(uintptr_t), - &lv_tag); + // Read the entire PyLongObject at once + char long_obj[SIZEOF_LONG_OBJ]; + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + unwinder->debug_offsets.long_object.size, + long_obj); if (bytes_read < 0) { return -1; } + uintptr_t lv_tag = GET_MEMBER(uintptr_t, long_obj, unwinder->debug_offsets.long_object.lv_tag); int negative = (lv_tag & 3) == 2; - size = lv_tag >> 3; + Py_ssize_t size = lv_tag >> 3; if (size == 0) { return 0; } - digit *digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); - if (!digits) { - PyErr_NoMemory(); - return -1; - } + // If the long object has inline digits, use them directly + digit *digits; + if (size <= _PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS) { + // For small integers, digits are inline in the long_value.ob_digit array + digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); + if (!digits) { + PyErr_NoMemory(); + return -1; + } + memcpy(digits, long_obj + unwinder->debug_offsets.long_object.ob_digit, size * sizeof(digit)); + } else { + // For larger integers, we need to read the digits separately + digits = (digit *)PyMem_RawMalloc(size * sizeof(digit)); + if (!digits) { + PyErr_NoMemory(); + return -1; + } - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->long_object.ob_digit, - sizeof(digit) * size, - digits - ); - if (bytes_read < 0) { - goto error; + bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address + unwinder->debug_offsets.long_object.ob_digit, + sizeof(digit) * size, + digits + ); + if (bytes_read < 0) { + goto error; + } } long long value = 0; @@ -310,44 +465,115 @@ read_py_long(proc_handle_t *handle, _Py_DebugOffsets* offsets, uintptr_t address return -1; } +/* ============================================================================ + * ASYNCIO DEBUG FUNCTIONS + * ============================================================================ */ + +// Get the PyAsyncioDebug section address for any platform +static uintptr_t +_Py_RemoteDebug_GetAsyncioDebugAddress(proc_handle_t* handle) +{ + uintptr_t address; + +#ifdef MS_WINDOWS + // On Windows, search for asyncio debug in executable or DLL + address = search_windows_map_for_section(handle, "AsyncioD", L"_asyncio"); + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); + } +#elif defined(__linux__) + // On Linux, search for asyncio debug in executable or DLL + address = search_linux_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); + } +#elif defined(__APPLE__) && TARGET_OS_OSX + // On macOS, try libpython first, then fall back to python + address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + if (address == 0) { + PyErr_Clear(); + address = search_map_for_section(handle, "AsyncioDebug", "_asyncio.cpython"); + } + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyObject *exc = PyErr_GetRaisedException(); + PyErr_SetString(PyExc_RuntimeError, "Failed to find the AsyncioDebug section in the process."); + _PyErr_ChainExceptions1(exc); + } +#else + Py_UNREACHABLE(); +#endif + + return address; +} + +static int +read_async_debug( + RemoteUnwinderObject *unwinder +) { + uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(&unwinder->handle); + if (!async_debug_addr) { + return -1; + } + + size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets); + int result = _Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, async_debug_addr, size, &unwinder->async_debug_offsets); + return result; +} + +/* ============================================================================ + * ASYNCIO TASK PARSING FUNCTIONS + * ============================================================================ */ + static PyObject * parse_task_name( - proc_handle_t *handle, - _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, + RemoteUnwinderObject *unwinder, uintptr_t task_address ) { - uintptr_t task_name_addr; - int err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_name, - &task_name_addr); - if (err) { + // Read the entire TaskObj at once + char task_obj[SIZEOF_TASK_OBJ]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + task_address, + unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj); + if (err < 0) { return NULL; } - // The task name can be a long or a string so we need to check the type + uintptr_t task_name_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_name); + task_name_addr &= ~Py_TAG_BITS; - PyObject task_name_obj; - err = read_pyobj( - handle, + // The task name can be a long or a string so we need to check the type + char task_name_obj[SIZEOF_PYOBJECT]; + err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, task_name_addr, - &task_name_obj); - if (err) { + SIZEOF_PYOBJECT, + task_name_obj); + if (err < 0) { return NULL; } - unsigned long flags; - err = read_unsigned_long( - handle, - (uintptr_t)task_name_obj.ob_type + offsets->type_object.tp_flags, - &flags); - if (err) { + // Now read the type object to get the flags + char type_obj[SIZEOF_TYPE_OBJ]; + err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + GET_MEMBER(uintptr_t, task_name_obj, unwinder->debug_offsets.pyobject.ob_type), + SIZEOF_TYPE_OBJ, + type_obj); + if (err < 0) { return NULL; } - if ((flags & Py_TPFLAGS_LONG_SUBCLASS)) { - long res = read_py_long(handle, offsets, task_name_addr); + if ((GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_LONG_SUBCLASS)) { + long res = read_py_long(unwinder, task_name_addr); if (res == -1) { chain_exceptions(PyExc_RuntimeError, "Failed to get task name"); return NULL; @@ -355,355 +581,375 @@ parse_task_name( return PyUnicode_FromFormat("Task-%d", res); } - if(!(flags & Py_TPFLAGS_UNICODE_SUBCLASS)) { + if(!(GET_MEMBER(unsigned long, type_obj, unwinder->debug_offsets.type_object.tp_flags) & Py_TPFLAGS_UNICODE_SUBCLASS)) { PyErr_SetString(PyExc_RuntimeError, "Invalid task name object"); return NULL; } return read_py_str( - handle, - offsets, + unwinder, task_name_addr, 255 ); } -static int -parse_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, - uintptr_t address, - uintptr_t* previous_frame -); - -static int -parse_coro_chain( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t coro_address, - PyObject *render_to +static int parse_task_awaited_by( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *awaited_by, + int recurse_task ) { - assert((void*)coro_address != NULL); - - uintptr_t gen_type_addr; - int err = read_ptr( - handle, - coro_address + offsets->pyobject.ob_type, - &gen_type_addr); - if (err) { + // Read the entire TaskObj at once + char task_obj[SIZEOF_TASK_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, + unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj) < 0) { return -1; } - PyObject* name = NULL; - uintptr_t prev_frame; - if (parse_frame_object( - handle, - &name, - offsets, - coro_address + offsets->gen_object.gi_iframe, - &prev_frame) - < 0) - { - return -1; - } + uintptr_t task_ab_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by); + task_ab_addr &= ~Py_TAG_BITS; - if (PyList_Append(render_to, name)) { - Py_DECREF(name); - return -1; + if ((void*)task_ab_addr == NULL) { + return 0; } - Py_DECREF(name); - int8_t gi_frame_state; - err = read_sized_int( - handle, - coro_address + offsets->gen_object.gi_frame_state, - &gi_frame_state, - sizeof(int8_t) - ); - if (err) { - return -1; - } + char awaited_by_is_a_set = GET_MEMBER(char, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_awaited_by_is_set); - if (gi_frame_state == FRAME_SUSPENDED_YIELD_FROM) { - char owner; - err = read_char( - handle, - coro_address + offsets->gen_object.gi_iframe + - offsets->interpreter_frame.owner, - &owner - ); - if (err) { + if (awaited_by_is_a_set) { + if (parse_tasks_in_set(unwinder, task_ab_addr, awaited_by, recurse_task)) { return -1; } - if (owner != FRAME_OWNED_BY_GENERATOR) { - PyErr_SetString( - PyExc_RuntimeError, - "generator doesn't own its frame \\_o_/"); + } else { + if (parse_task(unwinder, task_ab_addr, awaited_by, recurse_task)) { return -1; } + } + + return 0; +} + +static int +handle_yield_from_frame( + RemoteUnwinderObject *unwinder, + uintptr_t gi_iframe_addr, + uintptr_t gen_type_addr, + PyObject *render_to +) { + // Read the entire interpreter frame at once + char iframe[SIZEOF_INTERP_FRAME]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + gi_iframe_addr, + SIZEOF_INTERP_FRAME, + iframe); + if (err < 0) { + return -1; + } + + if (GET_MEMBER(char, iframe, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR) { + PyErr_SetString( + PyExc_RuntimeError, + "generator doesn't own its frame \\_o_/"); + return -1; + } + + uintptr_t stackpointer_addr = GET_MEMBER(uintptr_t, iframe, unwinder->debug_offsets.interpreter_frame.stackpointer); + stackpointer_addr &= ~Py_TAG_BITS; - uintptr_t stackpointer_addr; + if ((void*)stackpointer_addr != NULL) { + uintptr_t gi_await_addr; err = read_py_ptr( - handle, - coro_address + offsets->gen_object.gi_iframe + - offsets->interpreter_frame.stackpointer, - &stackpointer_addr); + &unwinder->handle, + stackpointer_addr - sizeof(void*), + &gi_await_addr); if (err) { return -1; } - if ((void*)stackpointer_addr != NULL) { - uintptr_t gi_await_addr; - err = read_py_ptr( - handle, - stackpointer_addr - sizeof(void*), - &gi_await_addr); + if ((void*)gi_await_addr != NULL) { + uintptr_t gi_await_addr_type_addr; + err = read_ptr( + &unwinder->handle, + gi_await_addr + unwinder->debug_offsets.pyobject.ob_type, + &gi_await_addr_type_addr); if (err) { return -1; } - if ((void*)gi_await_addr != NULL) { - uintptr_t gi_await_addr_type_addr; - int err = read_ptr( - handle, - gi_await_addr + offsets->pyobject.ob_type, - &gi_await_addr_type_addr); + if (gen_type_addr == gi_await_addr_type_addr) { + /* This needs an explanation. We always start with parsing + native coroutine / generator frames. Ultimately they + are awaiting on something. That something can be + a native coroutine frame or... an iterator. + If it's the latter -- we can't continue building + our chain. So the condition to bail out of this is + to do that when the type of the current coroutine + doesn't match the type of whatever it points to + in its cr_await. + */ + err = parse_coro_chain(unwinder, gi_await_addr, render_to); if (err) { return -1; } - - if (gen_type_addr == gi_await_addr_type_addr) { - /* This needs an explanation. We always start with parsing - native coroutine / generator frames. Ultimately they - are awaiting on something. That something can be - a native coroutine frame or... an iterator. - If it's the latter -- we can't continue building - our chain. So the condition to bail out of this is - to do that when the type of the current coroutine - doesn't match the type of whatever it points to - in its cr_await. - */ - err = parse_coro_chain( - handle, - offsets, - async_offsets, - gi_await_addr, - render_to - ); - if (err) { - return -1; - } - } } } - } return 0; } - static int -parse_task_awaited_by( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t task_address, - PyObject *awaited_by, - int recurse_task -); +parse_coro_chain( + RemoteUnwinderObject *unwinder, + uintptr_t coro_address, + PyObject *render_to +) { + assert((void*)coro_address != NULL); + + // Read the entire generator object at once + char gen_object[SIZEOF_GEN_OBJ]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + coro_address, + SIZEOF_GEN_OBJ, + gen_object); + if (err < 0) { + return -1; + } + uintptr_t gen_type_addr = GET_MEMBER(uintptr_t, gen_object, unwinder->debug_offsets.pyobject.ob_type); -static int -parse_task( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, + PyObject* name = NULL; + + // Parse the previous frame using the gi_iframe from local copy + uintptr_t prev_frame; + uintptr_t gi_iframe_addr = coro_address + unwinder->debug_offsets.gen_object.gi_iframe; + if (parse_frame_object(unwinder, &name, gi_iframe_addr, &prev_frame) < 0) { + return -1; + } + + if (PyList_Append(render_to, name)) { + Py_DECREF(name); + return -1; + } + Py_DECREF(name); + + if (GET_MEMBER(int8_t, gen_object, unwinder->debug_offsets.gen_object.gi_frame_state) == FRAME_SUSPENDED_YIELD_FROM) { + return handle_yield_from_frame(unwinder, gi_iframe_addr, gen_type_addr, render_to); + } + + return 0; +} + +static PyObject* +create_task_result( + RemoteUnwinderObject *unwinder, uintptr_t task_address, - PyObject *render_to, int recurse_task ) { - char is_task; - int err = read_char( - handle, - task_address + async_offsets->asyncio_task_object.task_is_task, - &is_task); - if (err) { - return -1; - } + PyObject* result = NULL; + PyObject *call_stack = NULL; + PyObject *tn = NULL; + char task_obj[SIZEOF_TASK_OBJ]; + uintptr_t coro_addr; - PyObject* result = PyList_New(0); + result = PyList_New(0); if (result == NULL) { - return -1; + goto error; } - PyObject *call_stack = PyList_New(0); + call_stack = PyList_New(0); if (call_stack == NULL) { - goto err; + goto error; } + if (PyList_Append(result, call_stack)) { - Py_DECREF(call_stack); - goto err; + goto error; } - /* we can operate on a borrowed one to simplify cleanup */ - Py_DECREF(call_stack); + Py_CLEAR(call_stack); - if (is_task) { - PyObject *tn = NULL; - if (recurse_task) { - tn = parse_task_name( - handle, offsets, async_offsets, task_address); - } else { - tn = PyLong_FromUnsignedLongLong(task_address); + if (recurse_task) { + tn = parse_task_name(unwinder, task_address); + } else { + tn = PyLong_FromUnsignedLongLong(task_address); + } + if (tn == NULL) { + goto error; + } + + if (PyList_Append(result, tn)) { + goto error; + } + Py_CLEAR(tn); + + // Parse coroutine chain + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, task_address, + unwinder->async_debug_offsets.asyncio_task_object.size, + task_obj) < 0) { + goto error; + } + + coro_addr = GET_MEMBER(uintptr_t, task_obj, unwinder->async_debug_offsets.asyncio_task_object.task_coro); + coro_addr &= ~Py_TAG_BITS; + + if ((void*)coro_addr != NULL) { + call_stack = PyList_New(0); + if (call_stack == NULL) { + goto error; } - if (tn == NULL) { - goto err; + + if (parse_coro_chain(unwinder, coro_addr, call_stack) < 0) { + Py_DECREF(call_stack); + goto error; } - if (PyList_Append(result, tn)) { - Py_DECREF(tn); - goto err; + + if (PyList_Reverse(call_stack)) { + Py_DECREF(call_stack); + goto error; } - Py_DECREF(tn); - uintptr_t coro_addr; - err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_coro, - &coro_addr); - if (err) { - goto err; + if (PyList_SetItem(result, 0, call_stack) < 0) { + Py_DECREF(call_stack); + goto error; } + } - if ((void*)coro_addr != NULL) { - err = parse_coro_chain( - handle, - offsets, - async_offsets, - coro_addr, - call_stack - ); - if (err) { - goto err; - } + return result; - if (PyList_Reverse(call_stack)) { - goto err; - } +error: + Py_XDECREF(result); + Py_XDECREF(call_stack); + Py_XDECREF(tn); + return NULL; +} + +static int +parse_task( + RemoteUnwinderObject *unwinder, + uintptr_t task_address, + PyObject *render_to, + int recurse_task +) { + char is_task; + PyObject* result = NULL; + PyObject* awaited_by = NULL; + int err; + + err = read_char( + &unwinder->handle, + task_address + unwinder->async_debug_offsets.asyncio_task_object.task_is_task, + &is_task); + if (err) { + goto error; + } + + if (is_task) { + result = create_task_result(unwinder, task_address, recurse_task); + if (!result) { + goto error; + } + } else { + result = PyList_New(0); + if (result == NULL) { + goto error; } } if (PyList_Append(render_to, result)) { - goto err; + goto error; } if (recurse_task) { - PyObject *awaited_by = PyList_New(0); + awaited_by = PyList_New(0); if (awaited_by == NULL) { - goto err; + goto error; } + if (PyList_Append(result, awaited_by)) { - Py_DECREF(awaited_by); - goto err; + goto error; } - /* we can operate on a borrowed one to simplify cleanup */ Py_DECREF(awaited_by); - if (parse_task_awaited_by(handle, offsets, async_offsets, - task_address, awaited_by, 1) - ) { - goto err; + /* awaited_by is borrowed from 'result' to simplify cleanup */ + if (parse_task_awaited_by(unwinder, task_address, awaited_by, 1) < 0) { + // Clear the pointer so the cleanup doesn't try to decref it since + // it's borrowed from 'result' and will be decrefed when result is + // deleted. + awaited_by = NULL; + goto error; } } Py_DECREF(result); return 0; -err: - Py_DECREF(result); +error: + Py_XDECREF(result); + Py_XDECREF(awaited_by); return -1; } static int -parse_tasks_in_set( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t set_addr, +process_set_entry( + RemoteUnwinderObject *unwinder, + uintptr_t table_ptr, PyObject *awaited_by, int recurse_task ) { - uintptr_t set_obj; - if (read_py_ptr( - handle, - set_addr, - &set_obj) - ) { + uintptr_t key_addr; + if (read_py_ptr(&unwinder->handle, table_ptr, &key_addr)) { return -1; } - Py_ssize_t num_els; - if (read_Py_ssize_t( - handle, - set_obj + offsets->set_object.used, - &num_els) - ) { - return -1; - } + if ((void*)key_addr != NULL) { + Py_ssize_t ref_cnt; + if (read_Py_ssize_t(&unwinder->handle, table_ptr, &ref_cnt)) { + return -1; + } - Py_ssize_t set_len; - if (read_Py_ssize_t( - handle, - set_obj + offsets->set_object.mask, - &set_len) - ) { - return -1; + if (ref_cnt) { + // if 'ref_cnt=0' it's a set dummy marker + if (parse_task(unwinder, key_addr, awaited_by, recurse_task)) { + return -1; + } + return 1; // Successfully processed a valid entry + } } - set_len++; // The set contains the `mask+1` element slots. + return 0; // Entry was NULL or dummy marker +} - uintptr_t table_ptr; - if (read_ptr( - handle, - set_obj + offsets->set_object.table, - &table_ptr) - ) { +static int +parse_tasks_in_set( + RemoteUnwinderObject *unwinder, + uintptr_t set_addr, + PyObject *awaited_by, + int recurse_task +) { + char set_object[SIZEOF_SET_OBJ]; + int err = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + set_addr, + SIZEOF_SET_OBJ, + set_object); + if (err < 0) { return -1; } + Py_ssize_t num_els = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.used); + Py_ssize_t set_len = GET_MEMBER(Py_ssize_t, set_object, unwinder->debug_offsets.set_object.mask) + 1; // The set contains the `mask+1` element slots. + uintptr_t table_ptr = GET_MEMBER(uintptr_t, set_object, unwinder->debug_offsets.set_object.table); + Py_ssize_t i = 0; Py_ssize_t els = 0; - while (i < set_len) { - uintptr_t key_addr; - if (read_py_ptr(handle, table_ptr, &key_addr)) { + while (i < set_len && els < num_els) { + int result = process_set_entry(unwinder, table_ptr, awaited_by, recurse_task); + + if (result < 0) { return -1; } - - if ((void*)key_addr != NULL) { - Py_ssize_t ref_cnt; - if (read_Py_ssize_t(handle, table_ptr, &ref_cnt)) { - return -1; - } - - if (ref_cnt) { - // if 'ref_cnt=0' it's a set dummy marker - - if (parse_task( - handle, - offsets, - async_offsets, - key_addr, - awaited_by, - recurse_task - ) - ) { - return -1; - } - - if (++els == num_els) { - break; - } - } + if (result > 0) { + els++; } table_ptr += sizeof(void*) * 2; @@ -714,81 +960,224 @@ parse_tasks_in_set( static int -parse_task_awaited_by( - proc_handle_t *handle, - struct _Py_DebugOffsets* offsets, - struct _Py_AsyncioModuleDebugOffsets* async_offsets, - uintptr_t task_address, - PyObject *awaited_by, - int recurse_task +setup_async_result_structure(PyObject **result, PyObject **calls) +{ + *result = PyList_New(1); + if (*result == NULL) { + return -1; + } + + *calls = PyList_New(0); + if (*calls == NULL) { + Py_DECREF(*result); + *result = NULL; + return -1; + } + + if (PyList_SetItem(*result, 0, *calls)) { /* steals ref to 'calls' */ + Py_DECREF(*calls); + Py_DECREF(*result); + *result = NULL; + *calls = NULL; + return -1; + } + + return 0; +} + +static int +add_task_info_to_result( + RemoteUnwinderObject *self, + PyObject *result, + uintptr_t running_task_addr ) { - uintptr_t task_ab_addr; - int err = read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - &task_ab_addr); - if (err) { + PyObject *tn = parse_task_name(self, running_task_addr); + if (tn == NULL) { return -1; } - if ((void*)task_ab_addr == NULL) { - return 0; + if (PyList_Append(result, tn)) { + Py_DECREF(tn); + return -1; } + Py_DECREF(tn); - char awaited_by_is_a_set; - err = read_char( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by_is_set, - &awaited_by_is_a_set); - if (err) { + PyObject* awaited_by = PyList_New(0); + if (awaited_by == NULL) { return -1; } - if (awaited_by_is_a_set) { - if (parse_tasks_in_set( - handle, - offsets, - async_offsets, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - awaited_by, - recurse_task - ) - ) { - return -1; - } - } else { - uintptr_t sub_task; - if (read_py_ptr( - handle, - task_address + async_offsets->asyncio_task_object.task_awaited_by, - &sub_task) - ) { - return -1; - } + if (PyList_Append(result, awaited_by)) { + Py_DECREF(awaited_by); + return -1; + } + Py_DECREF(awaited_by); - if (parse_task( - handle, - offsets, - async_offsets, - sub_task, - awaited_by, - recurse_task - ) - ) { - return -1; - } + if (parse_task_awaited_by( + self, running_task_addr, awaited_by, 1) < 0) { + return -1; } return 0; } -typedef struct +static int +process_single_task_node( + RemoteUnwinderObject *unwinder, + uintptr_t task_addr, + PyObject *result +) { + PyObject *tn = NULL; + PyObject *current_awaited_by = NULL; + PyObject *task_id = NULL; + PyObject *result_item = NULL; + + tn = parse_task_name(unwinder, task_addr); + if (tn == NULL) { + goto error; + } + + current_awaited_by = PyList_New(0); + if (current_awaited_by == NULL) { + goto error; + } + + task_id = PyLong_FromUnsignedLongLong(task_addr); + if (task_id == NULL) { + goto error; + } + + result_item = PyTuple_New(3); + if (result_item == NULL) { + goto error; + } + + PyTuple_SET_ITEM(result_item, 0, task_id); // steals ref + PyTuple_SET_ITEM(result_item, 1, tn); // steals ref + PyTuple_SET_ITEM(result_item, 2, current_awaited_by); // steals ref + + // References transferred to tuple + task_id = NULL; + tn = NULL; + current_awaited_by = NULL; + + if (PyList_Append(result, result_item)) { + Py_DECREF(result_item); + return -1; + } + Py_DECREF(result_item); + + // Get back current_awaited_by reference for parse_task_awaited_by + current_awaited_by = PyTuple_GET_ITEM(result_item, 2); + if (parse_task_awaited_by(unwinder, task_addr, current_awaited_by, 0) < 0) { + return -1; + } + + return 0; + +error: + Py_XDECREF(tn); + Py_XDECREF(current_awaited_by); + Py_XDECREF(task_id); + Py_XDECREF(result_item); + return -1; +} + +/* ============================================================================ + * TLBC CACHING FUNCTIONS + * ============================================================================ */ + +#ifdef Py_GIL_DISABLED + +typedef struct { + void *tlbc_array; // Local copy of the TLBC array + Py_ssize_t tlbc_array_size; // Size of the TLBC array + uint32_t generation; // Generation when this was cached +} TLBCCacheEntry; + +static void +tlbc_cache_entry_destroy(void *ptr) { - int lineno; - int end_lineno; - int column; - int end_column; -} LocationInfo; + TLBCCacheEntry *entry = (TLBCCacheEntry *)ptr; + if (entry->tlbc_array) { + PyMem_RawFree(entry->tlbc_array); + } + PyMem_RawFree(entry); +} + +static TLBCCacheEntry * +get_tlbc_cache_entry(RemoteUnwinderObject *self, uintptr_t code_addr, uint32_t current_generation) +{ + void *key = (void *)code_addr; + TLBCCacheEntry *entry = _Py_hashtable_get(self->tlbc_cache, key); + + if (entry && entry->generation != current_generation) { + // Entry is stale, remove it by setting to NULL + _Py_hashtable_set(self->tlbc_cache, key, NULL); + entry = NULL; + } + + return entry; +} + +static int +cache_tlbc_array(RemoteUnwinderObject *self, uintptr_t code_addr, uintptr_t tlbc_array_addr, uint32_t generation) +{ + uintptr_t tlbc_array_ptr; + void *tlbc_array = NULL; + TLBCCacheEntry *entry = NULL; + + // Read the TLBC array pointer + if (read_ptr(&self->handle, tlbc_array_addr, &tlbc_array_ptr) != 0 || tlbc_array_ptr == 0) { + return 0; // No TLBC array + } + + // Read the TLBC array size + Py_ssize_t tlbc_size; + if (_Py_RemoteDebug_PagedReadRemoteMemory(&self->handle, tlbc_array_ptr, sizeof(tlbc_size), &tlbc_size) != 0 || tlbc_size <= 0) { + return 0; // Invalid size + } + + // Allocate and read the entire TLBC array + size_t array_data_size = tlbc_size * sizeof(void*); + tlbc_array = PyMem_RawMalloc(sizeof(Py_ssize_t) + array_data_size); + if (!tlbc_array) { + return -1; // Memory error + } + + if (_Py_RemoteDebug_PagedReadRemoteMemory(&self->handle, tlbc_array_ptr, sizeof(Py_ssize_t) + array_data_size, tlbc_array) != 0) { + PyMem_RawFree(tlbc_array); + return 0; // Read error + } + + // Create cache entry + entry = PyMem_RawMalloc(sizeof(TLBCCacheEntry)); + if (!entry) { + PyMem_RawFree(tlbc_array); + return -1; // Memory error + } + + entry->tlbc_array = tlbc_array; + entry->tlbc_array_size = tlbc_size; + entry->generation = generation; + + // Store in cache + void *key = (void *)code_addr; + if (_Py_hashtable_set(self->tlbc_cache, key, entry) < 0) { + tlbc_cache_entry_destroy(entry); + return -1; // Cache error + } + + return 1; // Success +} + + + +#endif + +/* ============================================================================ + * LINE TABLE PARSING FUNCTIONS + * ============================================================================ */ static int scan_varint(const uint8_t **ptr) @@ -818,7 +1207,6 @@ scan_signed_varint(const uint8_t **ptr) } } - static bool parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, LocationInfo* info) { @@ -863,7 +1251,9 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L } default: { uint8_t second_byte = *(ptr++); - assert((second_byte & 128) == 0); + if ((second_byte & 128) != 0) { + return false; + } info->column = code << 3 | (second_byte >> 4); info->end_column = info->column + (second_byte & 15); break; @@ -877,240 +1267,387 @@ parse_linetable(const uintptr_t addrq, const char* linetable, int firstlineno, L return false; } +/* ============================================================================ + * CODE OBJECT AND FRAME PARSING FUNCTIONS + * ============================================================================ */ + static int -read_remote_pointer(proc_handle_t *handle, uintptr_t address, uintptr_t *out_ptr, const char *error_message) +parse_code_object(RemoteUnwinderObject *unwinder, + PyObject **result, + uintptr_t address, + uintptr_t instruction_pointer, + uintptr_t *previous_frame, + int32_t tlbc_index) { - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory(handle, address, sizeof(void *), out_ptr); - if (bytes_read < 0) { - return -1; + void *key = (void *)address; + CachedCodeMetadata *meta = NULL; + PyObject *func = NULL; + PyObject *file = NULL; + PyObject *linetable = NULL; + PyObject *lineno = NULL; + PyObject *tuple = NULL; + +#ifdef Py_GIL_DISABLED + // In free threading builds, code object addresses might have the low bit set + // as a flag, so we need to mask it off to get the real address + uintptr_t real_address = address & (~1); +#else + uintptr_t real_address = address; +#endif + + if (unwinder && unwinder->code_object_cache != NULL) { + meta = _Py_hashtable_get(unwinder->code_object_cache, key); } - if ((void *)(*out_ptr) == NULL) { - PyErr_SetString(PyExc_RuntimeError, error_message); - return -1; + if (meta == NULL) { + char code_object[SIZEOF_CODE_OBJ]; + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, real_address, SIZEOF_CODE_OBJ, code_object) < 0) + { + goto error; + } + + func = read_py_str(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.qualname), 1024); + if (!func) { + goto error; + } + + file = read_py_str(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.filename), 1024); + if (!file) { + goto error; + } + + linetable = read_py_bytes(unwinder, + GET_MEMBER(uintptr_t, code_object, unwinder->debug_offsets.code_object.linetable), 4096); + if (!linetable) { + goto error; + } + + meta = PyMem_RawMalloc(sizeof(CachedCodeMetadata)); + if (!meta) { + goto error; + } + + meta->func_name = func; + meta->file_name = file; + meta->linetable = linetable; + meta->first_lineno = GET_MEMBER(int, code_object, unwinder->debug_offsets.code_object.firstlineno); + meta->addr_code_adaptive = real_address + unwinder->debug_offsets.code_object.co_code_adaptive; + + if (unwinder && unwinder->code_object_cache && _Py_hashtable_set(unwinder->code_object_cache, key, meta) < 0) { + cached_code_metadata_destroy(meta); + goto error; + } + + // Ownership transferred to meta + func = NULL; + file = NULL; + linetable = NULL; + } + + uintptr_t ip = instruction_pointer; + ptrdiff_t addrq; + +#ifdef Py_GIL_DISABLED + // Handle thread-local bytecode (TLBC) in free threading builds + if (tlbc_index == 0 || unwinder->debug_offsets.code_object.co_tlbc == 0 || unwinder == NULL) { + // No TLBC or no unwinder - use main bytecode directly + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; + goto done_tlbc; + } + + // Try to get TLBC data from cache (we'll get generation from the caller) + TLBCCacheEntry *tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); + + if (!tlbc_entry) { + // Cache miss - try to read and cache TLBC array + if (cache_tlbc_array(unwinder, real_address, real_address + unwinder->debug_offsets.code_object.co_tlbc, unwinder->tlbc_generation) > 0) { + tlbc_entry = get_tlbc_cache_entry(unwinder, real_address, unwinder->tlbc_generation); + } + } + + if (tlbc_entry && tlbc_index < tlbc_entry->tlbc_array_size) { + // Use cached TLBC data + uintptr_t *entries = (uintptr_t *)((char *)tlbc_entry->tlbc_array + sizeof(Py_ssize_t)); + uintptr_t tlbc_bytecode_addr = entries[tlbc_index]; + + if (tlbc_bytecode_addr != 0) { + // Calculate offset from TLBC bytecode + addrq = (uint16_t *)ip - (uint16_t *)tlbc_bytecode_addr; + goto done_tlbc; + } + } + + // Fall back to main bytecode + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; + +done_tlbc: +#else + // Non-free-threaded build, always use the main bytecode + (void)tlbc_index; // Suppress unused parameter warning + (void)unwinder; // Suppress unused parameter warning + addrq = (uint16_t *)ip - (uint16_t *)meta->addr_code_adaptive; +#endif + ; // Empty statement to avoid C23 extension warning + LocationInfo info = {0}; + bool ok = parse_linetable(addrq, PyBytes_AS_STRING(meta->linetable), + meta->first_lineno, &info); + if (!ok) { + info.lineno = -1; + } + + lineno = PyLong_FromLong(info.lineno); + if (!lineno) { + goto error; + } + + tuple = PyTuple_New(3); + if (!tuple) { + goto error; } + Py_INCREF(meta->func_name); + Py_INCREF(meta->file_name); + PyTuple_SET_ITEM(tuple, 0, meta->func_name); + PyTuple_SET_ITEM(tuple, 1, meta->file_name); + PyTuple_SET_ITEM(tuple, 2, lineno); + + *result = tuple; return 0; + +error: + Py_XDECREF(func); + Py_XDECREF(file); + Py_XDECREF(linetable); + Py_XDECREF(lineno); + Py_XDECREF(tuple); + return -1; } -static int -read_instruction_ptr(proc_handle_t *handle, struct _Py_DebugOffsets *offsets, - uintptr_t current_frame, uintptr_t *instruction_ptr) +/* ============================================================================ + * STACK CHUNK MANAGEMENT FUNCTIONS + * ============================================================================ */ + +static void +cleanup_stack_chunks(StackChunkList *chunks) { - return read_remote_pointer( - handle, - current_frame + offsets->interpreter_frame.instr_ptr, - instruction_ptr, - "No instruction ptr found" - ); + for (size_t i = 0; i < chunks->count; ++i) { + PyMem_RawFree(chunks->chunks[i].local_copy); + } + PyMem_RawFree(chunks->chunks); } static int -parse_code_object(proc_handle_t *handle, - PyObject **result, - struct _Py_DebugOffsets *offsets, - uintptr_t address, - uintptr_t current_frame, - uintptr_t *previous_frame) -{ - uintptr_t addr_func_name, addr_file_name, addr_linetable, instruction_ptr; +process_single_stack_chunk( + proc_handle_t *handle, + uintptr_t chunk_addr, + StackChunkInfo *chunk_info +) { + // Start with default size assumption + size_t current_size = _PY_DATA_STACK_CHUNK_SIZE; - if (read_remote_pointer(handle, address + offsets->code_object.qualname, &addr_func_name, "No function name found") < 0 || - read_remote_pointer(handle, address + offsets->code_object.filename, &addr_file_name, "No file name found") < 0 || - read_remote_pointer(handle, address + offsets->code_object.linetable, &addr_linetable, "No linetable found") < 0 || - read_instruction_ptr(handle, offsets, current_frame, &instruction_ptr) < 0) { + char *this_chunk = PyMem_RawMalloc(current_size); + if (!this_chunk) { + PyErr_NoMemory(); return -1; } - int firstlineno; - if (_Py_RemoteDebug_ReadRemoteMemory(handle, - address + offsets->code_object.firstlineno, - sizeof(int), - &firstlineno) < 0) { + if (_Py_RemoteDebug_PagedReadRemoteMemory(handle, chunk_addr, current_size, this_chunk) < 0) { + PyMem_RawFree(this_chunk); return -1; } - PyObject *py_linetable = read_py_bytes(handle, offsets, addr_linetable); - if (!py_linetable) { - return -1; + // Check actual size and reread if necessary + size_t actual_size = GET_MEMBER(size_t, this_chunk, offsetof(_PyStackChunk, size)); + if (actual_size != current_size) { + this_chunk = PyMem_RawRealloc(this_chunk, actual_size); + if (!this_chunk) { + PyErr_NoMemory(); + return -1; + } + + if (_Py_RemoteDebug_PagedReadRemoteMemory(handle, chunk_addr, actual_size, this_chunk) < 0) { + PyMem_RawFree(this_chunk); + return -1; + } + current_size = actual_size; } - uintptr_t addr_code_adaptive = address + offsets->code_object.co_code_adaptive; - ptrdiff_t addrq = (uint16_t *)instruction_ptr - (uint16_t *)addr_code_adaptive; + chunk_info->remote_addr = chunk_addr; + chunk_info->size = current_size; + chunk_info->local_copy = this_chunk; + return 0; +} - LocationInfo info; - parse_linetable(addrq, PyBytes_AS_STRING(py_linetable), firstlineno, &info); - Py_DECREF(py_linetable); // Done with linetable +static int +copy_stack_chunks(RemoteUnwinderObject *unwinder, + uintptr_t tstate_addr, + StackChunkList *out_chunks) +{ + uintptr_t chunk_addr; + StackChunkInfo *chunks = NULL; + size_t count = 0; + size_t max_chunks = 16; - PyObject *py_line = PyLong_FromLong(info.lineno); - if (!py_line) { + if (read_ptr(&unwinder->handle, tstate_addr + unwinder->debug_offsets.thread_state.datastack_chunk, &chunk_addr)) { return -1; } - PyObject *py_func_name = read_py_str(handle, offsets, addr_func_name, 256); - if (!py_func_name) { - Py_DECREF(py_line); + chunks = PyMem_RawMalloc(max_chunks * sizeof(StackChunkInfo)); + if (!chunks) { + PyErr_NoMemory(); return -1; } - PyObject *py_file_name = read_py_str(handle, offsets, addr_file_name, 256); - if (!py_file_name) { - Py_DECREF(py_line); - Py_DECREF(py_func_name); - return -1; + while (chunk_addr != 0) { + // Grow array if needed + if (count >= max_chunks) { + max_chunks *= 2; + StackChunkInfo *new_chunks = PyMem_RawRealloc(chunks, max_chunks * sizeof(StackChunkInfo)); + if (!new_chunks) { + PyErr_NoMemory(); + goto error; + } + chunks = new_chunks; + } + + // Process this chunk + if (process_single_stack_chunk(&unwinder->handle, chunk_addr, &chunks[count]) < 0) { + goto error; + } + + // Get next chunk address and increment count + chunk_addr = GET_MEMBER(uintptr_t, chunks[count].local_copy, offsetof(_PyStackChunk, previous)); + count++; } - PyObject *result_tuple = PyTuple_New(3); - if (!result_tuple) { - Py_DECREF(py_line); - Py_DECREF(py_func_name); - Py_DECREF(py_file_name); - return -1; + out_chunks->chunks = chunks; + out_chunks->count = count; + return 0; + +error: + for (size_t i = 0; i < count; ++i) { + PyMem_RawFree(chunks[i].local_copy); } + PyMem_RawFree(chunks); + return -1; +} - PyTuple_SET_ITEM(result_tuple, 0, py_func_name); // steals ref - PyTuple_SET_ITEM(result_tuple, 1, py_file_name); // steals ref - PyTuple_SET_ITEM(result_tuple, 2, py_line); // steals ref +static void * +find_frame_in_chunks(StackChunkList *chunks, uintptr_t remote_ptr) +{ + for (size_t i = 0; i < chunks->count; ++i) { + uintptr_t base = chunks->chunks[i].remote_addr + offsetof(_PyStackChunk, data); + size_t payload = chunks->chunks[i].size - offsetof(_PyStackChunk, data); - *result = result_tuple; - return 0; + if (remote_ptr >= base && remote_ptr < base + payload) { + return (char *)chunks->chunks[i].local_copy + (remote_ptr - chunks->chunks[i].remote_addr); + } + } + return NULL; } static int -parse_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, +parse_frame_from_chunks( + RemoteUnwinderObject *unwinder, + PyObject **result, uintptr_t address, - uintptr_t* previous_frame + uintptr_t *previous_frame, + StackChunkList *chunks ) { - int err; - - Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->interpreter_frame.previous, - sizeof(void*), - previous_frame - ); - if (bytes_read < 0) { + void *frame_ptr = find_frame_in_chunks(chunks, address); + if (!frame_ptr) { return -1; } - char owner; - if (read_char(handle, address + offsets->interpreter_frame.owner, &owner)) { - return -1; - } + char *frame = (char *)frame_ptr; + *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); - if (owner >= FRAME_OWNED_BY_INTERPRETER) { + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) >= FRAME_OWNED_BY_INTERPRETER || + !GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable)) { return 0; } - uintptr_t address_of_code_object; - err = read_py_ptr( - handle, - address + offsets->interpreter_frame.executable, - &address_of_code_object - ); - if (err) { - return -1; - } + uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); - if ((void*)address_of_code_object == NULL) { - return 0; + // Get tlbc_index for free threading builds + int32_t tlbc_index = 0; +#ifdef Py_GIL_DISABLED + if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) { + tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index); } +#endif return parse_code_object( - handle, result, offsets, address_of_code_object, address, previous_frame); + unwinder, result, GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable), + instruction_pointer, previous_frame, tlbc_index); } +/* ============================================================================ + * INTERPRETER STATE AND THREAD DISCOVERY FUNCTIONS + * ============================================================================ */ + static int -parse_async_frame_object( - proc_handle_t *handle, - PyObject** result, - struct _Py_DebugOffsets* offsets, - uintptr_t address, - uintptr_t* previous_frame, - uintptr_t* code_object +populate_initial_state_data( + int all_threads, + RemoteUnwinderObject *unwinder, + uintptr_t runtime_start_address, + uintptr_t *interpreter_state, + uintptr_t *tstate ) { - int err; - - Py_ssize_t bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - address + offsets->interpreter_frame.previous, - sizeof(void*), - previous_frame - ); - if (bytes_read < 0) { - return -1; - } + uint64_t interpreter_state_list_head = + unwinder->debug_offsets.runtime_state.interpreters_head; - char owner; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, address + offsets->interpreter_frame.owner, sizeof(char), &owner); + uintptr_t address_of_interpreter_state; + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + runtime_start_address + interpreter_state_list_head, + sizeof(void*), + &address_of_interpreter_state); if (bytes_read < 0) { return -1; } - if (owner == FRAME_OWNED_BY_CSTACK || owner == FRAME_OWNED_BY_INTERPRETER) { - return 0; // C frame - } - - if (owner != FRAME_OWNED_BY_GENERATOR - && owner != FRAME_OWNED_BY_THREAD) { - PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", owner); + if (address_of_interpreter_state == 0) { + PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); return -1; } - err = read_py_ptr( - handle, - address + offsets->interpreter_frame.executable, - code_object - ); - if (err) { - return -1; - } + *interpreter_state = address_of_interpreter_state; - assert(code_object != NULL); - if ((void*)*code_object == NULL) { + if (all_threads) { + *tstate = 0; return 0; } - if (parse_code_object( - handle, result, offsets, *code_object, address, previous_frame)) { - return -1; - } - - return 1; -} + uintptr_t address_of_thread = address_of_interpreter_state + + unwinder->debug_offsets.interpreter_state.threads_main; -static int -read_async_debug( - proc_handle_t *handle, - struct _Py_AsyncioModuleDebugOffsets* async_debug -) { - uintptr_t async_debug_addr = _Py_RemoteDebug_GetAsyncioDebugAddress(handle); - if (!async_debug_addr) { + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address_of_thread, + sizeof(void*), + tstate) < 0) { return -1; } - size_t size = sizeof(struct _Py_AsyncioModuleDebugOffsets); - int result = _Py_RemoteDebug_ReadRemoteMemory(handle, async_debug_addr, size, async_debug); - return result; + return 0; } static int find_running_frame( - proc_handle_t *handle, + RemoteUnwinderObject *unwinder, uintptr_t runtime_start_address, - _Py_DebugOffsets* local_debug_offsets, uintptr_t *frame ) { uint64_t interpreter_state_list_head = - local_debug_offsets->runtime_state.interpreters_head; + unwinder->debug_offsets.runtime_state.interpreters_head; uintptr_t address_of_interpreter_state; - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, runtime_start_address + interpreter_state_list_head, sizeof(void*), &address_of_interpreter_state); @@ -1124,10 +1661,10 @@ find_running_frame( } uintptr_t address_of_thread; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, + bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, address_of_interpreter_state + - local_debug_offsets->interpreter_state.threads_main, + unwinder->debug_offsets.interpreter_state.threads_main, sizeof(void*), &address_of_thread); if (bytes_read < 0) { @@ -1137,8 +1674,8 @@ find_running_frame( // No Python frames are available for us (can happen at tear-down). if ((void*)address_of_thread != NULL) { int err = read_ptr( - handle, - address_of_thread + local_debug_offsets->thread_state.current_frame, + &unwinder->handle, + address_of_thread + unwinder->debug_offsets.thread_state.current_frame, frame); if (err) { return -1; @@ -1152,21 +1689,18 @@ find_running_frame( static int find_running_task( - proc_handle_t *handle, - uintptr_t runtime_start_address, - _Py_DebugOffsets *local_debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, + RemoteUnwinderObject *unwinder, uintptr_t *running_task_addr ) { *running_task_addr = (uintptr_t)NULL; uint64_t interpreter_state_list_head = - local_debug_offsets->runtime_state.interpreters_head; + unwinder->debug_offsets.runtime_state.interpreters_head; uintptr_t address_of_interpreter_state; - int bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, - runtime_start_address + interpreter_state_list_head, + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + unwinder->runtime_start_address + interpreter_state_list_head, sizeof(void*), &address_of_interpreter_state); if (bytes_read < 0) { @@ -1179,10 +1713,10 @@ find_running_task( } uintptr_t address_of_thread; - bytes_read = _Py_RemoteDebug_ReadRemoteMemory( - handle, + bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, address_of_interpreter_state + - local_debug_offsets->interpreter_state.threads_head, + unwinder->debug_offsets.interpreter_state.threads_head, sizeof(void*), &address_of_thread); if (bytes_read < 0) { @@ -1196,9 +1730,9 @@ find_running_task( } bytes_read = read_py_ptr( - handle, + &unwinder->handle, address_of_thread - + async_offsets->asyncio_thread_state.asyncio_running_loop, + + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_loop, &address_of_running_loop); if (bytes_read == -1) { return -1; @@ -1210,9 +1744,9 @@ find_running_task( } int err = read_ptr( - handle, + &unwinder->handle, address_of_thread - + async_offsets->asyncio_thread_state.asyncio_running_task, + + unwinder->async_debug_offsets.asyncio_thread_state.asyncio_running_task, running_task_addr); if (err) { return -1; @@ -1222,579 +1756,936 @@ find_running_task( } static int -append_awaited_by_for_thread( - proc_handle_t *handle, - uintptr_t head_addr, - struct _Py_DebugOffsets *debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, - PyObject *result +find_running_task_and_coro( + RemoteUnwinderObject *self, + uintptr_t *running_task_addr, + uintptr_t *running_coro_addr, + uintptr_t *running_task_code_obj ) { - struct llist_node task_node; - - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - head_addr, - sizeof(task_node), - &task_node)) - { + *running_task_addr = (uintptr_t)NULL; + if (find_running_task( + self, running_task_addr) < 0) { + chain_exceptions(PyExc_RuntimeError, "Failed to find running task"); return -1; } - size_t iteration_count = 0; - const size_t MAX_ITERATIONS = 2 << 15; // A reasonable upper bound - while ((uintptr_t)task_node.next != head_addr) { - if (++iteration_count > MAX_ITERATIONS) { - PyErr_SetString(PyExc_RuntimeError, "Task list appears corrupted"); - return -1; - } - - if (task_node.next == NULL) { - PyErr_SetString( - PyExc_RuntimeError, - "Invalid linked list structure reading remote memory"); - return -1; - } - - uintptr_t task_addr = (uintptr_t)task_node.next - - async_offsets->asyncio_task_object.task_node; - - PyObject *tn = parse_task_name( - handle, - debug_offsets, - async_offsets, - task_addr); - if (tn == NULL) { - return -1; - } - - PyObject *current_awaited_by = PyList_New(0); - if (current_awaited_by == NULL) { - Py_DECREF(tn); - return -1; - } - - PyObject* task_id = PyLong_FromUnsignedLongLong(task_addr); - if (task_id == NULL) { - Py_DECREF(tn); - Py_DECREF(current_awaited_by); - return -1; - } - - PyObject *result_item = PyTuple_New(3); - if (result_item == NULL) { - Py_DECREF(tn); - Py_DECREF(current_awaited_by); - Py_DECREF(task_id); - return -1; - } - - PyTuple_SET_ITEM(result_item, 0, task_id); // steals ref - PyTuple_SET_ITEM(result_item, 1, tn); // steals ref - PyTuple_SET_ITEM(result_item, 2, current_awaited_by); // steals ref - if (PyList_Append(result, result_item)) { - Py_DECREF(result_item); - return -1; - } - Py_DECREF(result_item); - - if (parse_task_awaited_by(handle, debug_offsets, async_offsets, - task_addr, current_awaited_by, 0)) - { - return -1; - } - - // onto the next one... - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - (uintptr_t)task_node.next, - sizeof(task_node), - &task_node)) - { - return -1; - } - } - - return 0; -} - -static int -append_awaited_by( - proc_handle_t *handle, - unsigned long tid, - uintptr_t head_addr, - struct _Py_DebugOffsets *debug_offsets, - struct _Py_AsyncioModuleDebugOffsets *async_offsets, - PyObject *result) -{ - PyObject *tid_py = PyLong_FromUnsignedLong(tid); - if (tid_py == NULL) { + if ((void*)*running_task_addr == NULL) { + PyErr_SetString(PyExc_RuntimeError, "No running task found"); return -1; } - PyObject *result_item = PyTuple_New(2); - if (result_item == NULL) { - Py_DECREF(tid_py); + if (read_py_ptr( + &self->handle, + *running_task_addr + self->async_debug_offsets.asyncio_task_object.task_coro, + running_coro_addr) < 0) { + chain_exceptions(PyExc_RuntimeError, "Failed to read running task coro"); return -1; } - PyObject* awaited_by_for_thread = PyList_New(0); - if (awaited_by_for_thread == NULL) { - Py_DECREF(tid_py); - Py_DECREF(result_item); + if ((void*)*running_coro_addr == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL"); return -1; } - PyTuple_SET_ITEM(result_item, 0, tid_py); // steals ref - PyTuple_SET_ITEM(result_item, 1, awaited_by_for_thread); // steals ref - if (PyList_Append(result, result_item)) { - Py_DECREF(result_item); + // note: genobject's gi_iframe is an embedded struct so the address to + // the offset leads directly to its first field: f_executable + if (read_py_ptr( + &self->handle, + *running_coro_addr + self->debug_offsets.gen_object.gi_iframe, + running_task_code_obj) < 0) { return -1; } - Py_DECREF(result_item); - if (append_awaited_by_for_thread( - handle, - head_addr, - debug_offsets, - async_offsets, - awaited_by_for_thread)) - { + if ((void*)*running_task_code_obj == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL"); return -1; } return 0; } -static PyObject* -get_all_awaited_by(PyObject* self, PyObject* args) -{ -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_all_awaited_by is not implemented on this platform"); - return NULL; -#endif - int pid; - if (!PyArg_ParseTuple(args, "i", &pid)) { - return NULL; +/* ============================================================================ + * FRAME PARSING FUNCTIONS + * ============================================================================ */ + +static int +parse_frame_object( + RemoteUnwinderObject *unwinder, + PyObject** result, + uintptr_t address, + uintptr_t* previous_frame +) { + char frame[SIZEOF_INTERP_FRAME]; + + Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_INTERP_FRAME, + frame + ); + if (bytes_read < 0) { + return -1; } - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { + *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); + + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) >= FRAME_OWNED_BY_INTERPRETER) { return 0; } - PyObject *result = NULL; - - uintptr_t runtime_start_addr = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_addr == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } - goto result_err; + if ((void*)GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable) == NULL) { + return 0; } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_addr, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); - goto result_err; + uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); + + // Get tlbc_index for free threading builds + int32_t tlbc_index = 0; +#ifdef Py_GIL_DISABLED + if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) { + tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index); } +#endif - struct _Py_AsyncioModuleDebugOffsets local_async_debug; - if (read_async_debug(handle, &local_async_debug)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read asyncio debug offsets"); - goto result_err; + return parse_code_object( + unwinder, result, GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable), + instruction_pointer, previous_frame, tlbc_index); +} + +static int +parse_async_frame_object( + RemoteUnwinderObject *unwinder, + PyObject** result, + uintptr_t address, + uintptr_t* previous_frame, + uintptr_t* code_object +) { + char frame[SIZEOF_INTERP_FRAME]; + + Py_ssize_t bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + address, + SIZEOF_INTERP_FRAME, + frame + ); + if (bytes_read < 0) { + return -1; } - result = PyList_New(0); - if (result == NULL) { - goto result_err; + *previous_frame = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.previous); + + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_CSTACK || + GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) == FRAME_OWNED_BY_INTERPRETER) { + return 0; // C frame } - uint64_t interpreter_state_list_head = - local_debug_offsets.runtime_state.interpreters_head; + if (GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_GENERATOR + && GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner) != FRAME_OWNED_BY_THREAD) { + PyErr_Format(PyExc_RuntimeError, "Unhandled frame owner %d.\n", + GET_MEMBER(char, frame, unwinder->debug_offsets.interpreter_frame.owner)); + return -1; + } - uintptr_t interpreter_state_addr; - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - runtime_start_addr + interpreter_state_list_head, - sizeof(void*), - &interpreter_state_addr)) - { - goto result_err; + *code_object = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.executable); + // Strip tag bits for consistent comparison + *code_object &= ~Py_TAG_BITS; + + assert(code_object != NULL); + if ((void*)*code_object == NULL) { + return 0; } - uintptr_t thread_state_addr; - unsigned long tid = 0; - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - interpreter_state_addr - + local_debug_offsets.interpreter_state.threads_head, - sizeof(void*), - &thread_state_addr)) - { - goto result_err; + uintptr_t instruction_pointer = GET_MEMBER(uintptr_t, frame, unwinder->debug_offsets.interpreter_frame.instr_ptr); + + // Get tlbc_index for free threading builds + int32_t tlbc_index = 0; +#ifdef Py_GIL_DISABLED + if (unwinder->debug_offsets.interpreter_frame.tlbc_index != 0) { + tlbc_index = GET_MEMBER(int32_t, frame, unwinder->debug_offsets.interpreter_frame.tlbc_index); } +#endif - uintptr_t head_addr; - while (thread_state_addr != 0) { - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - thread_state_addr - + local_debug_offsets.thread_state.native_thread_id, - sizeof(tid), - &tid)) - { - goto result_err; + if (parse_code_object( + unwinder, result, *code_object, instruction_pointer, previous_frame, tlbc_index)) { + return -1; + } + + return 1; +} + +static int +parse_async_frame_chain( + RemoteUnwinderObject *self, + PyObject *calls, + uintptr_t running_task_code_obj +) { + uintptr_t address_of_current_frame; + if (find_running_frame(self, self->runtime_start_address, &address_of_current_frame) < 0) { + chain_exceptions(PyExc_RuntimeError, "Failed to find running frame"); + return -1; + } + + uintptr_t address_of_code_object; + while ((void*)address_of_current_frame != NULL) { + PyObject* frame_info = NULL; + int res = parse_async_frame_object( + self, + &frame_info, + address_of_current_frame, + &address_of_current_frame, + &address_of_code_object + ); + + if (res < 0) { + chain_exceptions(PyExc_RuntimeError, "Failed to parse async frame object"); + return -1; } - head_addr = thread_state_addr - + local_async_debug.asyncio_thread_state.asyncio_tasks_head; + if (!frame_info) { + continue; + } - if (append_awaited_by(handle, tid, head_addr, &local_debug_offsets, - &local_async_debug, result)) - { - goto result_err; + if (PyList_Append(calls, frame_info) == -1) { + Py_DECREF(frame_info); + return -1; } - if (0 > _Py_RemoteDebug_ReadRemoteMemory( - handle, - thread_state_addr + local_debug_offsets.thread_state.next, - sizeof(void*), - &thread_state_addr)) - { - goto result_err; + Py_DECREF(frame_info); + + if (address_of_code_object == running_task_code_obj) { + break; } } - head_addr = interpreter_state_addr - + local_async_debug.asyncio_interpreter_state.asyncio_tasks_head; + return 0; +} - // On top of a per-thread task lists used by default by asyncio to avoid - // contention, there is also a fallback per-interpreter list of tasks; - // any tasks still pending when a thread is destroyed will be moved to the - // per-interpreter task list. It's unlikely we'll find anything here, but - // interesting for debugging. - if (append_awaited_by(handle, 0, head_addr, &local_debug_offsets, - &local_async_debug, result)) - { - goto result_err; +/* ============================================================================ + * AWAITED BY PARSING FUNCTIONS + * ============================================================================ */ + +static int +append_awaited_by_for_thread( + RemoteUnwinderObject *unwinder, + uintptr_t head_addr, + PyObject *result +) { + char task_node[SIZEOF_LLIST_NODE]; + + if (_Py_RemoteDebug_PagedReadRemoteMemory(&unwinder->handle, head_addr, + sizeof(task_node), task_node) < 0) { + return -1; } - _Py_RemoteDebug_CleanupProcHandle(handle); - return result; + size_t iteration_count = 0; + const size_t MAX_ITERATIONS = 2 << 15; // A reasonable upper bound -result_err: - Py_XDECREF(result); - _Py_RemoteDebug_CleanupProcHandle(handle); - return NULL; -} + while (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) != head_addr) { + if (++iteration_count > MAX_ITERATIONS) { + PyErr_SetString(PyExc_RuntimeError, "Task list appears corrupted"); + return -1; + } -static PyObject* -get_stack_trace(PyObject* self, PyObject* args) -{ -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_stack_trace is not supported on this platform"); - return NULL; -#endif + if (GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) == 0) { + PyErr_SetString(PyExc_RuntimeError, + "Invalid linked list structure reading remote memory"); + return -1; + } - int pid; - if (!PyArg_ParseTuple(args, "i", &pid)) { - return NULL; - } + uintptr_t task_addr = (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next) + - unwinder->async_debug_offsets.asyncio_task_object.task_node; - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { - return 0; + if (process_single_task_node(unwinder, task_addr, result) < 0) { + return -1; + } + + // Read next node + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, + (uintptr_t)GET_MEMBER(uintptr_t, task_node, unwinder->debug_offsets.llist_node.next), + sizeof(task_node), + task_node) < 0) { + return -1; + } } - PyObject* result = NULL; + return 0; +} - uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_address == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } - goto result_err; +static int +append_awaited_by( + RemoteUnwinderObject *unwinder, + unsigned long tid, + uintptr_t head_addr, + PyObject *result) +{ + PyObject *tid_py = PyLong_FromUnsignedLong(tid); + if (tid_py == NULL) { + return -1; } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); - goto result_err; + PyObject *result_item = PyTuple_New(2); + if (result_item == NULL) { + Py_DECREF(tid_py); + return -1; } - uintptr_t address_of_current_frame; - if (find_running_frame( - handle, runtime_start_address, &local_debug_offsets, - &address_of_current_frame) - ) { - goto result_err; + PyObject* awaited_by_for_thread = PyList_New(0); + if (awaited_by_for_thread == NULL) { + Py_DECREF(tid_py); + Py_DECREF(result_item); + return -1; } - result = PyList_New(0); - if (result == NULL) { - goto result_err; + PyTuple_SET_ITEM(result_item, 0, tid_py); // steals ref + PyTuple_SET_ITEM(result_item, 1, awaited_by_for_thread); // steals ref + if (PyList_Append(result, result_item)) { + Py_DECREF(result_item); + return -1; } + Py_DECREF(result_item); - while ((void*)address_of_current_frame != NULL) { - PyObject* frame_info = NULL; - if (parse_frame_object( - handle, - &frame_info, - &local_debug_offsets, - address_of_current_frame, - &address_of_current_frame) - < 0) - { - Py_CLEAR(result); - goto result_err; + if (append_awaited_by_for_thread(unwinder, head_addr, awaited_by_for_thread)) + { + return -1; + } + + return 0; +} + +/* ============================================================================ + * STACK UNWINDING FUNCTIONS + * ============================================================================ */ + +static int +process_frame_chain( + RemoteUnwinderObject *unwinder, + uintptr_t initial_frame_addr, + StackChunkList *chunks, + PyObject *frame_info +) { + uintptr_t frame_addr = initial_frame_addr; + uintptr_t prev_frame_addr = 0; + const size_t MAX_FRAMES = 1024; + size_t frame_count = 0; + + while ((void*)frame_addr != NULL) { + PyObject *frame = NULL; + uintptr_t next_frame_addr = 0; + + if (++frame_count > MAX_FRAMES) { + PyErr_SetString(PyExc_RuntimeError, "Too many stack frames (possible infinite loop)"); + return -1; } - if (!frame_info) { - continue; + // Try chunks first, fallback to direct memory read + if (parse_frame_from_chunks(unwinder, &frame, frame_addr, &next_frame_addr, chunks) < 0) { + PyErr_Clear(); + if (parse_frame_object(unwinder, &frame, frame_addr, &next_frame_addr) < 0) { + return -1; + } } - if (PyList_Append(result, frame_info) == -1) { - Py_CLEAR(result); - goto result_err; + if (!frame) { + break; } - Py_DECREF(frame_info); - frame_info = NULL; + if (prev_frame_addr && frame_addr != prev_frame_addr) { + PyErr_Format(PyExc_RuntimeError, + "Broken frame chain: expected frame at 0x%lx, got 0x%lx", + prev_frame_addr, frame_addr); + Py_DECREF(frame); + return -1; + } + if (PyList_Append(frame_info, frame) == -1) { + Py_DECREF(frame); + return -1; + } + Py_DECREF(frame); + + prev_frame_addr = next_frame_addr; + frame_addr = next_frame_addr; } -result_err: - _Py_RemoteDebug_CleanupProcHandle(handle); - return result; + return 0; } static PyObject* -get_async_stack_trace(PyObject* self, PyObject* args) -{ -#if (!defined(__linux__) && !defined(__APPLE__)) && !defined(MS_WINDOWS) || \ - (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString( - PyExc_RuntimeError, - "get_stack_trace is not supported on this platform"); - return NULL; -#endif - int pid; +unwind_stack_for_thread( + RemoteUnwinderObject *unwinder, + uintptr_t *current_tstate +) { + PyObject *frame_info = NULL; + PyObject *thread_id = NULL; + PyObject *result = NULL; + StackChunkList chunks = {0}; - if (!PyArg_ParseTuple(args, "i", &pid)) { - return NULL; + char ts[SIZEOF_THREAD_STATE]; + int bytes_read = _Py_RemoteDebug_PagedReadRemoteMemory( + &unwinder->handle, *current_tstate, unwinder->debug_offsets.thread_state.size, ts); + if (bytes_read < 0) { + goto error; } - proc_handle_t the_handle; - proc_handle_t *handle = &the_handle; - if (_Py_RemoteDebug_InitProcHandle(handle, pid) < 0) { - return 0; - } + uintptr_t frame_addr = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.current_frame); - PyObject *result = NULL; + frame_info = PyList_New(0); + if (!frame_info) { + goto error; + } - uintptr_t runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(handle); - if (runtime_start_address == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } - goto result_err; + if (copy_stack_chunks(unwinder, *current_tstate, &chunks) < 0) { + goto error; } - struct _Py_DebugOffsets local_debug_offsets; - if (_Py_RemoteDebug_ReadDebugOffsets(handle, &runtime_start_address, &local_debug_offsets)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read debug offsets"); - goto result_err; + if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) { + goto error; } - struct _Py_AsyncioModuleDebugOffsets local_async_debug; - if (read_async_debug(handle, &local_async_debug)) { - chain_exceptions(PyExc_RuntimeError, "Failed to read asyncio debug offsets"); - goto result_err; + *current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next); + + thread_id = PyLong_FromLongLong( + GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id)); + if (thread_id == NULL) { + goto error; } - result = PyList_New(1); + result = PyTuple_New(2); if (result == NULL) { - goto result_err; + goto error; } - PyObject* calls = PyList_New(0); - if (calls == NULL) { - goto result_err; + + PyTuple_SET_ITEM(result, 0, thread_id); // Steals reference + PyTuple_SET_ITEM(result, 1, frame_info); // Steals reference + + cleanup_stack_chunks(&chunks); + return result; + +error: + Py_XDECREF(frame_info); + Py_XDECREF(thread_id); + Py_XDECREF(result); + cleanup_stack_chunks(&chunks); + return NULL; +} + + +/* ============================================================================ + * REMOTEUNWINDER CLASS IMPLEMENTATION + * ============================================================================ */ + +/*[clinic input] +class _remote_debugging.RemoteUnwinder "RemoteUnwinderObject *" "&RemoteUnwinder_Type" +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=55f164d8803318be]*/ + +/*[clinic input] +_remote_debugging.RemoteUnwinder.__init__ + pid: int + * + all_threads: bool = False + +Initialize a new RemoteUnwinder object for debugging a remote Python process. + +Args: + pid: Process ID of the target Python process to debug + all_threads: If True, initialize state for all threads in the process. + If False, only initialize for the main thread. + +The RemoteUnwinder provides functionality to inspect and debug a running Python +process, including examining thread states, stack frames and other runtime data. + +Raises: + PermissionError: If access to the target process is denied + OSError: If unable to attach to the target process or access its memory + RuntimeError: If unable to read debug information from the target process +[clinic start generated code]*/ + +static int +_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, + int pid, int all_threads) +/*[clinic end generated code: output=b8027cb247092081 input=6a2056b04e6f050e]*/ +{ + if (_Py_RemoteDebug_InitProcHandle(&self->handle, pid) < 0) { + return -1; } - if (PyList_SetItem(result, 0, calls)) { /* steals ref to 'calls' */ - Py_DECREF(calls); - goto result_err; + + self->runtime_start_address = _Py_RemoteDebug_GetPyRuntimeAddress(&self->handle); + if (self->runtime_start_address == 0) { + return -1; } - uintptr_t running_task_addr = (uintptr_t)NULL; - if (find_running_task( - handle, runtime_start_address, &local_debug_offsets, &local_async_debug, - &running_task_addr) - ) { - chain_exceptions(PyExc_RuntimeError, "Failed to find running task"); - goto result_err; + if (_Py_RemoteDebug_ReadDebugOffsets(&self->handle, + &self->runtime_start_address, + &self->debug_offsets) < 0) + { + return -1; } - if ((void*)running_task_addr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "No running task found"); - goto result_err; + // Try to read async debug offsets, but don't fail if they're not available + self->async_debug_offsets_available = 1; + if (read_async_debug(self) < 0) { + PyErr_Clear(); + memset(&self->async_debug_offsets, 0, sizeof(self->async_debug_offsets)); + self->async_debug_offsets_available = 0; } - uintptr_t running_coro_addr; - if (read_py_ptr( - handle, - running_task_addr + local_async_debug.asyncio_task_object.task_coro, - &running_coro_addr - )) { - chain_exceptions(PyExc_RuntimeError, "Failed to read running task coro"); - goto result_err; + if (populate_initial_state_data(all_threads, self, self->runtime_start_address, + &self->interpreter_addr ,&self->tstate_addr) < 0) + { + return -1; } - if ((void*)running_coro_addr == NULL) { - PyErr_SetString(PyExc_RuntimeError, "Running task coro is NULL"); - goto result_err; + self->code_object_cache = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + NULL, // keys are stable pointers, don't destroy + cached_code_metadata_destroy, + NULL + ); + if (self->code_object_cache == NULL) { + PyErr_NoMemory(); + return -1; } - // note: genobject's gi_iframe is an embedded struct so the address to - // the offset leads directly to its first field: f_executable - uintptr_t address_of_running_task_code_obj; - if (read_py_ptr( - handle, - running_coro_addr + local_debug_offsets.gen_object.gi_iframe, - &address_of_running_task_code_obj - )) { - goto result_err; +#ifdef Py_GIL_DISABLED + // Initialize TLBC cache + self->tlbc_generation = 0; + self->tlbc_cache = _Py_hashtable_new_full( + _Py_hashtable_hash_ptr, + _Py_hashtable_compare_direct, + NULL, // keys are stable pointers, don't destroy + tlbc_cache_entry_destroy, + NULL + ); + if (self->tlbc_cache == NULL) { + _Py_hashtable_destroy(self->code_object_cache); + PyErr_NoMemory(); + return -1; } +#endif - if ((void*)address_of_running_task_code_obj == NULL) { - PyErr_SetString(PyExc_RuntimeError, "Running task code object is NULL"); - goto result_err; + return 0; +} + +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_stack_trace + +Returns a list of stack traces for all threads in the target process. + +Each element in the returned list is a tuple of (thread_id, frame_list), where: +- thread_id is the OS thread identifier +- frame_list is a list of tuples (function_name, filename, line_number) representing + the Python stack frames for that thread, ordered from most recent to oldest + +Example: + [ + (1234, [ + ('process_data', 'worker.py', 127), + ('run_worker', 'worker.py', 45), + ('main', 'app.py', 23) + ]), + (1235, [ + ('handle_request', 'server.py', 89), + ('serve_forever', 'server.py', 52) + ]) + ] + +Raises: + RuntimeError: If there is an error copying memory from the target process + OSError: If there is an error accessing the target process + PermissionError: If access to the target process is denied + UnicodeDecodeError: If there is an error decoding strings from the target process + +[clinic start generated code]*/ + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=666192b90c69d567 input=331dbe370578badf]*/ +{ + PyObject* result = NULL; + // Read interpreter state into opaque buffer + char interp_state_buffer[INTERP_STATE_BUFFER_SIZE]; + if (_Py_RemoteDebug_PagedReadRemoteMemory( + &self->handle, + self->interpreter_addr, + INTERP_STATE_BUFFER_SIZE, + interp_state_buffer) < 0) { + goto exit; } - uintptr_t address_of_current_frame; - if (find_running_frame( - handle, runtime_start_address, &local_debug_offsets, - &address_of_current_frame) - ) { - chain_exceptions(PyExc_RuntimeError, "Failed to find running frame"); - goto result_err; + // Get code object generation from buffer + uint64_t code_object_generation = GET_MEMBER(uint64_t, interp_state_buffer, + self->debug_offsets.interpreter_state.code_object_generation); + + if (code_object_generation != self->code_object_generation) { + self->code_object_generation = code_object_generation; + _Py_hashtable_clear(self->code_object_cache); } - uintptr_t address_of_code_object; - while ((void*)address_of_current_frame != NULL) { - PyObject* frame_info = NULL; - int res = parse_async_frame_object( - handle, - &frame_info, - &local_debug_offsets, - address_of_current_frame, - &address_of_current_frame, - &address_of_code_object - ); +#ifdef Py_GIL_DISABLED + // Check TLBC generation and invalidate cache if needed + uint32_t current_tlbc_generation = GET_MEMBER(uint32_t, interp_state_buffer, + self->debug_offsets.interpreter_state.tlbc_generation); + if (current_tlbc_generation != self->tlbc_generation) { + self->tlbc_generation = current_tlbc_generation; + _Py_hashtable_clear(self->tlbc_cache); + } +#endif - if (res < 0) { - chain_exceptions(PyExc_RuntimeError, "Failed to parse async frame object"); - goto result_err; - } + uintptr_t current_tstate; + if (self->tstate_addr == 0) { + // Get threads head from buffer + current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer, + self->debug_offsets.interpreter_state.threads_head); + } else { + current_tstate = self->tstate_addr; + } + result = PyList_New(0); + if (!result) { + goto exit; + } + + while (current_tstate != 0) { + PyObject* frame_info = unwind_stack_for_thread(self, ¤t_tstate); if (!frame_info) { - continue; + Py_CLEAR(result); + goto exit; } - if (PyList_Append(calls, frame_info) == -1) { - Py_DECREF(calls); - goto result_err; + if (PyList_Append(result, frame_info) == -1) { + Py_DECREF(frame_info); + Py_CLEAR(result); + goto exit; } - Py_DECREF(frame_info); - frame_info = NULL; - if (address_of_code_object == address_of_running_task_code_obj) { + // We are targeting a single tstate, break here + if (self->tstate_addr) { break; } } - PyObject *tn = parse_task_name( - handle, &local_debug_offsets, &local_async_debug, running_task_addr); - if (tn == NULL) { - goto result_err; +exit: + _Py_RemoteDebug_ClearCache(&self->handle); + return result; +} + +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_all_awaited_by + +Get all tasks and their awaited_by relationships from the remote process. + +This provides a tree structure showing which tasks are waiting for other tasks. + +For each task, returns: +1. The call stack frames leading to where the task is currently executing +2. The name of the task +3. A list of tasks that this task is waiting for, with their own frames/names/etc + +Returns a list of [frames, task_name, subtasks] where: +- frames: List of (func_name, filename, lineno) showing the call stack +- task_name: String identifier for the task +- subtasks: List of tasks being awaited by this task, in same format + +Raises: + RuntimeError: If AsyncioDebug section is not available in the remote process + MemoryError: If memory allocation fails + OSError: If reading from the remote process fails + +Example output: +[ + # Task c2_root waiting for two subtasks + [ + # Call stack of c2_root + [("c5", "script.py", 10), ("c4", "script.py", 14)], + "c2_root", + [ + # First subtask (sub_main_2) and what it's waiting for + [ + [("c1", "script.py", 23)], + "sub_main_2", + [...] + ], + # Second subtask and its waiters + [...] + ] + ] +] +[clinic start generated code]*/ + +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=6a49cd345e8aec53 input=a452c652bb00701a]*/ +{ + if (!self->async_debug_offsets_available) { + PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available"); + return NULL; } - if (PyList_Append(result, tn)) { - Py_DECREF(tn); + + PyObject *result = PyList_New(0); + if (result == NULL) { goto result_err; } - Py_DECREF(tn); - PyObject* awaited_by = PyList_New(0); - if (awaited_by == NULL) { + uintptr_t thread_state_addr; + unsigned long tid = 0; + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &self->handle, + self->interpreter_addr + + self->debug_offsets.interpreter_state.threads_main, + sizeof(void*), + &thread_state_addr)) + { goto result_err; } - if (PyList_Append(result, awaited_by)) { - Py_DECREF(awaited_by); - goto result_err; + + uintptr_t head_addr; + while (thread_state_addr != 0) { + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &self->handle, + thread_state_addr + + self->debug_offsets.thread_state.native_thread_id, + sizeof(tid), + &tid)) + { + goto result_err; + } + + head_addr = thread_state_addr + + self->async_debug_offsets.asyncio_thread_state.asyncio_tasks_head; + + if (append_awaited_by(self, tid, head_addr, result)) + { + goto result_err; + } + + if (0 > _Py_RemoteDebug_PagedReadRemoteMemory( + &self->handle, + thread_state_addr + self->debug_offsets.thread_state.next, + sizeof(void*), + &thread_state_addr)) + { + goto result_err; + } } - Py_DECREF(awaited_by); - if (parse_task_awaited_by( - handle, &local_debug_offsets, &local_async_debug, - running_task_addr, awaited_by, 1) - ) { + head_addr = self->interpreter_addr + + self->async_debug_offsets.asyncio_interpreter_state.asyncio_tasks_head; + + // On top of a per-thread task lists used by default by asyncio to avoid + // contention, there is also a fallback per-interpreter list of tasks; + // any tasks still pending when a thread is destroyed will be moved to the + // per-interpreter task list. It's unlikely we'll find anything here, but + // interesting for debugging. + if (append_awaited_by(self, 0, head_addr, result)) + { goto result_err; } - _Py_RemoteDebug_CleanupProcHandle(handle); + _Py_RemoteDebug_ClearCache(&self->handle); return result; result_err: - _Py_RemoteDebug_CleanupProcHandle(handle); + _Py_RemoteDebug_ClearCache(&self->handle); Py_XDECREF(result); return NULL; } +/*[clinic input] +@critical_section +_remote_debugging.RemoteUnwinder.get_async_stack_trace -static PyMethodDef methods[] = { - {"get_stack_trace", get_stack_trace, METH_VARARGS, - "Get the Python stack from a given pid"}, - {"get_async_stack_trace", get_async_stack_trace, METH_VARARGS, - "Get the asyncio stack from a given pid"}, - {"get_all_awaited_by", get_all_awaited_by, METH_VARARGS, - "Get all tasks and their awaited_by from a given pid"}, - {NULL, NULL, 0, NULL}, +Returns information about the currently running async task and its stack trace. + +Returns a tuple of (task_info, stack_frames) where: +- task_info is a tuple of (task_id, task_name) identifying the task +- stack_frames is a list of tuples (function_name, filename, line_number) representing + the Python stack frames for the task, ordered from most recent to oldest + +Example: + ((4345585712, 'Task-1'), [ + ('run_echo_server', 'server.py', 127), + ('serve_forever', 'server.py', 45), + ('main', 'app.py', 23) + ]) + +Raises: + RuntimeError: If AsyncioDebug section is not available in the target process + RuntimeError: If there is an error copying memory from the target process + OSError: If there is an error accessing the target process + PermissionError: If access to the target process is denied + UnicodeDecodeError: If there is an error decoding strings from the target process + +[clinic start generated code]*/ + +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self) +/*[clinic end generated code: output=6433d52b55e87bbe input=11b7150c59d4c60f]*/ +{ + if (!self->async_debug_offsets_available) { + PyErr_SetString(PyExc_RuntimeError, "AsyncioDebug section not available"); + return NULL; + } + + PyObject *result = NULL; + PyObject *calls = NULL; + + if (setup_async_result_structure(&result, &calls) < 0) { + goto cleanup; + } + + uintptr_t running_task_addr, running_coro_addr, running_task_code_obj; + if (find_running_task_and_coro(self, &running_task_addr, + &running_coro_addr, &running_task_code_obj) < 0) { + goto cleanup; + } + + if (parse_async_frame_chain(self, calls, running_task_code_obj) < 0) { + goto cleanup; + } + + if (add_task_info_to_result(self, result, running_task_addr) < 0) { + goto cleanup; + } + + _Py_RemoteDebug_ClearCache(&self->handle); + return result; + +cleanup: + _Py_RemoteDebug_ClearCache(&self->handle); + Py_XDECREF(result); + return NULL; +} + +static PyMethodDef RemoteUnwinder_methods[] = { + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF + _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF + {NULL, NULL} }; -static struct PyModuleDef module = { - .m_base = PyModuleDef_HEAD_INIT, - .m_name = "_remote_debugging", - .m_size = -1, - .m_methods = methods, +static void +RemoteUnwinder_dealloc(RemoteUnwinderObject *self) +{ + PyTypeObject *tp = Py_TYPE(self); + if (self->code_object_cache) { + _Py_hashtable_destroy(self->code_object_cache); + } +#ifdef Py_GIL_DISABLED + if (self->tlbc_cache) { + _Py_hashtable_destroy(self->tlbc_cache); + } +#endif + if (self->handle.pid != 0) { + _Py_RemoteDebug_ClearCache(&self->handle); + _Py_RemoteDebug_CleanupProcHandle(&self->handle); + } + PyObject_Del(self); + Py_DECREF(tp); +} + +static PyType_Slot RemoteUnwinder_slots[] = { + {Py_tp_doc, (void *)"RemoteUnwinder(pid): Inspect stack of a remote Python process."}, + {Py_tp_methods, RemoteUnwinder_methods}, + {Py_tp_init, _remote_debugging_RemoteUnwinder___init__}, + {Py_tp_dealloc, RemoteUnwinder_dealloc}, + {0, NULL} }; -PyMODINIT_FUNC -PyInit__remote_debugging(void) +static PyType_Spec RemoteUnwinder_spec = { + .name = "_remote_debugging.RemoteUnwinder", + .basicsize = sizeof(RemoteUnwinderObject), + .flags = Py_TPFLAGS_DEFAULT, + .slots = RemoteUnwinder_slots, +}; + +/* ============================================================================ + * MODULE INITIALIZATION + * ============================================================================ */ + +static int +_remote_debugging_exec(PyObject *m) { - PyObject* mod = PyModule_Create(&module); - if (mod == NULL) { - return NULL; + RemoteDebuggingState *st = RemoteDebugging_GetState(m); +#define CREATE_TYPE(mod, type, spec) \ + do { \ + type = (PyTypeObject *)PyType_FromMetaclass(NULL, mod, spec, NULL); \ + if (type == NULL) { \ + return -1; \ + } \ + } while (0) + + CREATE_TYPE(m, st->RemoteDebugging_Type, &RemoteUnwinder_spec); + + if (PyModule_AddType(m, st->RemoteDebugging_Type) < 0) { + return -1; } #ifdef Py_GIL_DISABLED - PyUnstable_Module_SetGIL(mod, Py_MOD_GIL_NOT_USED); + PyUnstable_Module_SetGIL(m, Py_MOD_GIL_NOT_USED); #endif - int rc = PyModule_AddIntConstant( - mod, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV); + int rc = PyModule_AddIntConstant(m, "PROCESS_VM_READV_SUPPORTED", HAVE_PROCESS_VM_READV); if (rc < 0) { - Py_DECREF(mod); - return NULL; + return -1; } - return mod; + if (RemoteDebugging_InitState(st) < 0) { + return -1; + } + return 0; } + +static int +remote_debugging_traverse(PyObject *mod, visitproc visit, void *arg) +{ + RemoteDebuggingState *state = RemoteDebugging_GetState(mod); + Py_VISIT(state->RemoteDebugging_Type); + return 0; +} + +static int +remote_debugging_clear(PyObject *mod) +{ + RemoteDebuggingState *state = RemoteDebugging_GetState(mod); + Py_CLEAR(state->RemoteDebugging_Type); + return 0; +} + +static void +remote_debugging_free(void *mod) +{ + (void)remote_debugging_clear((PyObject *)mod); +} + +static PyModuleDef_Slot remote_debugging_slots[] = { + {Py_mod_exec, _remote_debugging_exec}, + {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED}, + {Py_mod_gil, Py_MOD_GIL_NOT_USED}, + {0, NULL}, +}; + +static PyMethodDef remote_debugging_methods[] = { + {NULL, NULL, 0, NULL}, +}; + +static struct PyModuleDef remote_debugging_module = { + PyModuleDef_HEAD_INIT, + .m_name = "_remote_debugging", + .m_size = sizeof(RemoteDebuggingState), + .m_methods = remote_debugging_methods, + .m_slots = remote_debugging_slots, + .m_traverse = remote_debugging_traverse, + .m_clear = remote_debugging_clear, + .m_free = remote_debugging_free, +}; + +PyMODINIT_FUNC +PyInit__remote_debugging(void) +{ + return PyModuleDef_Init(&remote_debugging_module); +} + diff --git a/Modules/clinic/_remote_debugging_module.c.h b/Modules/clinic/_remote_debugging_module.c.h new file mode 100644 index 00000000000000..e83e2fd7fd2b5b --- /dev/null +++ b/Modules/clinic/_remote_debugging_module.c.h @@ -0,0 +1,243 @@ +/*[clinic input] +preserve +[clinic start generated code]*/ + +#if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# include "pycore_gc.h" // PyGC_Head +# include "pycore_runtime.h" // _Py_ID() +#endif +#include "pycore_critical_section.h"// Py_BEGIN_CRITICAL_SECTION() +#include "pycore_modsupport.h" // _PyArg_UnpackKeywords() + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder___init____doc__, +"RemoteUnwinder(pid, *, all_threads=False)\n" +"--\n" +"\n" +"Initialize a new RemoteUnwinder object for debugging a remote Python process.\n" +"\n" +"Args:\n" +" pid: Process ID of the target Python process to debug\n" +" all_threads: If True, initialize state for all threads in the process.\n" +" If False, only initialize for the main thread.\n" +"\n" +"The RemoteUnwinder provides functionality to inspect and debug a running Python\n" +"process, including examining thread states, stack frames and other runtime data.\n" +"\n" +"Raises:\n" +" PermissionError: If access to the target process is denied\n" +" OSError: If unable to attach to the target process or access its memory\n" +" RuntimeError: If unable to read debug information from the target process"); + +static int +_remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self, + int pid, int all_threads); + +static int +_remote_debugging_RemoteUnwinder___init__(PyObject *self, PyObject *args, PyObject *kwargs) +{ + int return_value = -1; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + Py_hash_t ob_hash; + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_hash = -1, + .ob_item = { &_Py_ID(pid), &_Py_ID(all_threads), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"pid", "all_threads", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "RemoteUnwinder", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + PyObject * const *fastargs; + Py_ssize_t nargs = PyTuple_GET_SIZE(args); + Py_ssize_t noptargs = nargs + (kwargs ? PyDict_GET_SIZE(kwargs) : 0) - 1; + int pid; + int all_threads = 0; + + fastargs = _PyArg_UnpackKeywords(_PyTuple_CAST(args)->ob_item, nargs, kwargs, NULL, &_parser, + /*minpos*/ 1, /*maxpos*/ 1, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!fastargs) { + goto exit; + } + pid = PyLong_AsInt(fastargs[0]); + if (pid == -1 && PyErr_Occurred()) { + goto exit; + } + if (!noptargs) { + goto skip_optional_kwonly; + } + all_threads = PyObject_IsTrue(fastargs[1]); + if (all_threads < 0) { + goto exit; + } +skip_optional_kwonly: + return_value = _remote_debugging_RemoteUnwinder___init___impl((RemoteUnwinderObject *)self, pid, all_threads); + +exit: + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_stack_trace__doc__, +"get_stack_trace($self, /)\n" +"--\n" +"\n" +"Returns a list of stack traces for all threads in the target process.\n" +"\n" +"Each element in the returned list is a tuple of (thread_id, frame_list), where:\n" +"- thread_id is the OS thread identifier\n" +"- frame_list is a list of tuples (function_name, filename, line_number) representing\n" +" the Python stack frames for that thread, ordered from most recent to oldest\n" +"\n" +"Example:\n" +" [\n" +" (1234, [\n" +" (\'process_data\', \'worker.py\', 127),\n" +" (\'run_worker\', \'worker.py\', 45),\n" +" (\'main\', \'app.py\', 23)\n" +" ]),\n" +" (1235, [\n" +" (\'handle_request\', \'server.py\', 89),\n" +" (\'serve_forever\', \'server.py\', 52)\n" +" ])\n" +" ]\n" +"\n" +"Raises:\n" +" RuntimeError: If there is an error copying memory from the target process\n" +" OSError: If there is an error accessing the target process\n" +" PermissionError: If access to the target process is denied\n" +" UnicodeDecodeError: If there is an error decoding strings from the target process"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_STACK_TRACE_METHODDEF \ + {"get_stack_trace", (PyCFunction)_remote_debugging_RemoteUnwinder_get_stack_trace, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_stack_trace__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_stack_trace(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_stack_trace_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_all_awaited_by__doc__, +"get_all_awaited_by($self, /)\n" +"--\n" +"\n" +"Get all tasks and their awaited_by relationships from the remote process.\n" +"\n" +"This provides a tree structure showing which tasks are waiting for other tasks.\n" +"\n" +"For each task, returns:\n" +"1. The call stack frames leading to where the task is currently executing\n" +"2. The name of the task\n" +"3. A list of tasks that this task is waiting for, with their own frames/names/etc\n" +"\n" +"Returns a list of [frames, task_name, subtasks] where:\n" +"- frames: List of (func_name, filename, lineno) showing the call stack\n" +"- task_name: String identifier for the task\n" +"- subtasks: List of tasks being awaited by this task, in same format\n" +"\n" +"Raises:\n" +" RuntimeError: If AsyncioDebug section is not available in the remote process\n" +" MemoryError: If memory allocation fails\n" +" OSError: If reading from the remote process fails\n" +"\n" +"Example output:\n" +"[\n" +" [\n" +" [(\"c5\", \"script.py\", 10), (\"c4\", \"script.py\", 14)],\n" +" \"c2_root\",\n" +" [\n" +" [\n" +" [(\"c1\", \"script.py\", 23)],\n" +" \"sub_main_2\",\n" +" [...]\n" +" ],\n" +" [...]\n" +" ]\n" +" ]\n" +"]"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ALL_AWAITED_BY_METHODDEF \ + {"get_all_awaited_by", (PyCFunction)_remote_debugging_RemoteUnwinder_get_all_awaited_by, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_all_awaited_by__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_all_awaited_by(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_all_awaited_by_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} + +PyDoc_STRVAR(_remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__, +"get_async_stack_trace($self, /)\n" +"--\n" +"\n" +"Returns information about the currently running async task and its stack trace.\n" +"\n" +"Returns a tuple of (task_info, stack_frames) where:\n" +"- task_info is a tuple of (task_id, task_name) identifying the task\n" +"- stack_frames is a list of tuples (function_name, filename, line_number) representing\n" +" the Python stack frames for the task, ordered from most recent to oldest\n" +"\n" +"Example:\n" +" ((4345585712, \'Task-1\'), [\n" +" (\'run_echo_server\', \'server.py\', 127),\n" +" (\'serve_forever\', \'server.py\', 45),\n" +" (\'main\', \'app.py\', 23)\n" +" ])\n" +"\n" +"Raises:\n" +" RuntimeError: If AsyncioDebug section is not available in the target process\n" +" RuntimeError: If there is an error copying memory from the target process\n" +" OSError: If there is an error accessing the target process\n" +" PermissionError: If access to the target process is denied\n" +" UnicodeDecodeError: If there is an error decoding strings from the target process"); + +#define _REMOTE_DEBUGGING_REMOTEUNWINDER_GET_ASYNC_STACK_TRACE_METHODDEF \ + {"get_async_stack_trace", (PyCFunction)_remote_debugging_RemoteUnwinder_get_async_stack_trace, METH_NOARGS, _remote_debugging_RemoteUnwinder_get_async_stack_trace__doc__}, + +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace_impl(RemoteUnwinderObject *self); + +static PyObject * +_remote_debugging_RemoteUnwinder_get_async_stack_trace(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + PyObject *return_value = NULL; + + Py_BEGIN_CRITICAL_SECTION(self); + return_value = _remote_debugging_RemoteUnwinder_get_async_stack_trace_impl((RemoteUnwinderObject *)self); + Py_END_CRITICAL_SECTION(); + + return return_value; +} +/*[clinic end generated code: output=654772085f1f4bf6 input=a9049054013a1b77]*/ diff --git a/Objects/codeobject.c b/Objects/codeobject.c index 4f06a36a130207..ee869d991d93cd 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -2364,6 +2364,8 @@ free_monitoring_data(_PyCoMonitoringData *data) static void code_dealloc(PyObject *self) { + PyThreadState *tstate = PyThreadState_GET(); + _Py_atomic_add_uint64(&tstate->interp->_code_object_generation, 1); PyCodeObject *co = _PyCodeObject_CAST(self); _PyObject_ResurrectStart(self); notify_code_watchers(PY_CODE_EVENT_DESTROY, co); diff --git a/Python/index_pool.c b/Python/index_pool.c index 007c81a0fc16ec..520a65938ec6c7 100644 --- a/Python/index_pool.c +++ b/Python/index_pool.c @@ -172,6 +172,9 @@ _PyIndexPool_AllocIndex(_PyIndexPool *pool) else { index = heap_pop(free_indices); } + + pool->tlbc_generation++; + UNLOCK_POOL(pool); return index; } @@ -180,6 +183,7 @@ void _PyIndexPool_FreeIndex(_PyIndexPool *pool, int32_t index) { LOCK_POOL(pool); + pool->tlbc_generation++; heap_add(&pool->free_indices, index); UNLOCK_POOL(pool); } diff --git a/Python/pystate.c b/Python/pystate.c index 4144e6edefc073..0544b15aad1cc8 100644 --- a/Python/pystate.c +++ b/Python/pystate.c @@ -567,6 +567,7 @@ init_interpreter(PyInterpreterState *interp, } interp->sys_profile_initialized = false; interp->sys_trace_initialized = false; + interp->_code_object_generation = 0; interp->jit = false; interp->executor_list_head = NULL; interp->executor_deletion_list_head = NULL; @@ -777,6 +778,10 @@ interpreter_clear(PyInterpreterState *interp, PyThreadState *tstate) for (int t = 0; t < PY_MONITORING_TOOL_IDS; t++) { Py_CLEAR(interp->monitoring_tool_names[t]); } + interp->_code_object_generation = 0; +#ifdef Py_GIL_DISABLED + interp->tlbc_indices.tlbc_generation = 0; +#endif PyConfig_Clear(&interp->config); _PyCodec_Fini(interp); @@ -1346,9 +1351,6 @@ tstate_is_alive(PyThreadState *tstate) // lifecycle //---------- -/* Minimum size of data stack chunk */ -#define DATA_STACK_CHUNK_SIZE (16*1024) - static _PyStackChunk* allocate_chunk(int size_in_bytes, _PyStackChunk* previous) { @@ -2897,7 +2899,7 @@ _PyInterpreterState_HasFeature(PyInterpreterState *interp, unsigned long feature static PyObject ** push_chunk(PyThreadState *tstate, int size) { - int allocate_size = DATA_STACK_CHUNK_SIZE; + int allocate_size = _PY_DATA_STACK_CHUNK_SIZE; while (allocate_size < (int)sizeof(PyObject*)*(size + MINIMUM_OVERHEAD)) { allocate_size *= 2; } diff --git a/Python/remote_debug.h b/Python/remote_debug.h index edc77c302916ca..dbc6bdd09a693f 100644 --- a/Python/remote_debug.h +++ b/Python/remote_debug.h @@ -73,19 +73,71 @@ extern "C" { # define HAVE_PROCESS_VM_READV 0 #endif +static inline size_t +get_page_size(void) { + size_t page_size = 0; + if (page_size == 0) { +#ifdef MS_WINDOWS + SYSTEM_INFO si; + GetSystemInfo(&si); + page_size = si.dwPageSize; +#else + page_size = (size_t)getpagesize(); +#endif + } + return page_size; +} + +typedef struct page_cache_entry { + uintptr_t page_addr; // page-aligned base address + char *data; + int valid; + struct page_cache_entry *next; +} page_cache_entry_t; + +#define MAX_PAGES 1024 + // Define a platform-independent process handle structure typedef struct { pid_t pid; -#ifdef MS_WINDOWS +#if defined(__APPLE__) + mach_port_t task; +#elif defined(MS_WINDOWS) HANDLE hProcess; #endif + page_cache_entry_t pages[MAX_PAGES]; + Py_ssize_t page_size; } proc_handle_t; +static void +_Py_RemoteDebug_FreePageCache(proc_handle_t *handle) +{ + for (int i = 0; i < MAX_PAGES; i++) { + PyMem_RawFree(handle->pages[i].data); + handle->pages[i].data = NULL; + handle->pages[i].valid = 0; + } +} + +void +_Py_RemoteDebug_ClearCache(proc_handle_t *handle) +{ + for (int i = 0; i < MAX_PAGES; i++) { + handle->pages[i].valid = 0; + } +} + +#if defined(__APPLE__) && TARGET_OS_OSX +static mach_port_t pid_to_task(pid_t pid); +#endif + // Initialize the process handle static int _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) { handle->pid = pid; -#ifdef MS_WINDOWS +#if defined(__APPLE__) + handle->task = pid_to_task(handle->pid); +#elif defined(MS_WINDOWS) handle->hProcess = OpenProcess( PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION, FALSE, pid); @@ -94,6 +146,11 @@ _Py_RemoteDebug_InitProcHandle(proc_handle_t *handle, pid_t pid) { return -1; } #endif + handle->page_size = get_page_size(); + for (int i = 0; i < MAX_PAGES; i++) { + handle->pages[i].data = NULL; + handle->pages[i].valid = 0; + } return 0; } @@ -107,6 +164,7 @@ _Py_RemoteDebug_CleanupProcHandle(proc_handle_t *handle) { } #endif handle->pid = 0; + _Py_RemoteDebug_FreePageCache(handle); } #if defined(__APPLE__) && TARGET_OS_OSX @@ -755,7 +813,7 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address #elif defined(__APPLE__) && TARGET_OS_OSX Py_ssize_t result = -1; kern_return_t kr = mach_vm_read_overwrite( - pid_to_task(handle->pid), + handle->task, (mach_vm_address_t)remote_address, len, (mach_vm_address_t)dst, @@ -780,6 +838,59 @@ _Py_RemoteDebug_ReadRemoteMemory(proc_handle_t *handle, uintptr_t remote_address #endif } +int +_Py_RemoteDebug_PagedReadRemoteMemory(proc_handle_t *handle, + uintptr_t addr, + size_t size, + void *out) +{ + size_t page_size = handle->page_size; + uintptr_t page_base = addr & ~(page_size - 1); + size_t offset_in_page = addr - page_base; + + if (offset_in_page + size > page_size) { + return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out); + } + + // Search for valid cached page + for (int i = 0; i < MAX_PAGES; i++) { + page_cache_entry_t *entry = &handle->pages[i]; + if (entry->valid && entry->page_addr == page_base) { + memcpy(out, entry->data + offset_in_page, size); + return 0; + } + } + + // Find reusable slot + for (int i = 0; i < MAX_PAGES; i++) { + page_cache_entry_t *entry = &handle->pages[i]; + if (!entry->valid) { + if (entry->data == NULL) { + entry->data = PyMem_RawMalloc(page_size); + if (entry->data == NULL) { + PyErr_NoMemory(); + return -1; + } + } + + if (_Py_RemoteDebug_ReadRemoteMemory(handle, page_base, page_size, entry->data) < 0) { + // Try to just copy the exact ammount as a fallback + PyErr_Clear(); + goto fallback; + } + + entry->page_addr = page_base; + entry->valid = 1; + memcpy(out, entry->data + offset_in_page, size); + return 0; + } + } + +fallback: + // Cache full — fallback to uncached read + return _Py_RemoteDebug_ReadRemoteMemory(handle, addr, size, out); +} + static int _Py_RemoteDebug_ReadDebugOffsets( proc_handle_t *handle,