From bc4203b0b2439469205038b6a886bbc04fe4af32 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Wed, 20 Sep 2023 23:19:24 -0700 Subject: [PATCH 01/29] Update sysmodule.h --- Include/sysmodule.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Include/sysmodule.h b/Include/sysmodule.h index 225e0602d191e1..cee21afa622e14 100644 --- a/Include/sysmodule.h +++ b/Include/sysmodule.h @@ -32,6 +32,8 @@ PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void); PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name); PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void); + +PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename); #endif #ifndef Py_LIMITED_API From 3c805e14bd36595eaa599f8093a306e249e4411b Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Wed, 20 Sep 2023 23:20:59 -0700 Subject: [PATCH 02/29] Update ceval.h --- Include/ceval.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Include/ceval.h b/Include/ceval.h index 9885bdb7febc21..2fbe2ce13d557d 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -110,6 +110,9 @@ PyAPI_FUNC(void) PyEval_RestoreThread(PyThreadState *); PyAPI_FUNC(void) PyEval_AcquireThread(PyThreadState *tstate); PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); + #define Py_BEGIN_ALLOW_THREADS { \ PyThreadState *_save; \ _save = PyEval_SaveThread(); From ccc1ec5edd35abb4cf6e2209bde40f83d9af61d1 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Thu, 21 Sep 2023 09:18:16 -0700 Subject: [PATCH 03/29] Update pycore_ceval_state.h --- Include/internal/pycore_ceval_state.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h index d0af5b542233e0..67804e48cb8495 100644 --- a/Include/internal/pycore_ceval_state.h +++ b/Include/internal/pycore_ceval_state.h @@ -58,6 +58,7 @@ struct _ceval_runtime_state { struct code_arena_st *code_arena; struct trampoline_api_st trampoline_api; FILE *map_file; + Py_ssize_t persist_after_fork; #else int _not_used; #endif @@ -76,6 +77,7 @@ struct _ceval_runtime_state { { \ .status = PERF_STATUS_NO_INIT, \ .extra_code_index = -1, \ + .persist_after_fork = 0, \ } #else # define _PyEval_RUNTIME_PERF_INIT {0} From 3098e9ddf8e4b4eb3f5df6c67fe100a36a681931 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Thu, 21 Sep 2023 09:20:43 -0700 Subject: [PATCH 04/29] Update perf_trampoline.c --- Python/perf_trampoline.c | 48 +++++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index 209a23b6c1cbc7..b03149a940fccb 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -193,7 +193,7 @@ typedef struct trampoline_api_st trampoline_api_t; #define perf_code_arena _PyRuntime.ceval.perf.code_arena #define trampoline_api _PyRuntime.ceval.perf.trampoline_api #define perf_map_file _PyRuntime.ceval.perf.map_file - +#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork static void perf_map_write_entry(void *state, const void *code_addr, @@ -328,6 +328,24 @@ compile_trampoline(void) return code_arena_new_code(perf_code_arena); } +int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) +{ + py_trampoline f = NULL; + assert(extra_code_index != -1); + int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f); + if (ret != 0 || f == NULL) { + py_trampoline new_trampoline = compile_trampoline(); + if (new_trampoline == NULL) { + return 0; + } + trampoline_api.write_state(trampoline_api.state, new_trampoline, + code_arena->code_size, co); + return _PyCode_SetExtra((PyObject *)co, extra_code_index, + (void *)new_trampoline); + } + return 0; +} + static PyObject * py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame, int throw) @@ -447,17 +465,35 @@ _PyPerfTrampoline_Fini(void) #endif return 0; } + +int +PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){ +#ifdef PY_HAVE_PERF_TRAMPOLINE + persist_after_fork = enable; + return persist_after_fork; +#endif + return 0; +} PyStatus _PyPerfTrampoline_AfterFork_Child(void) { #ifdef PY_HAVE_PERF_TRAMPOLINE - // Restart trampoline in file in child. - int was_active = _PyIsPerfTrampolineActive(); - _PyPerfTrampoline_Fini(); PyUnstable_PerfMapState_Fini(); - if (was_active) { - _PyPerfTrampoline_Init(1); + if (persist_after_fork){ + char filename[256]; + pid_t parent_pid = getppid(); + snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid); + if(PyUnstable_CopyPerfMapFile(filename) != 0){ + return PyStatus_Error("Failed to copy perf map file."); + } + } else { + // Restart trampoline in file in child. + int was_active = _PyIsPerfTrampolineActive(); + _PyPerfTrampoline_Fini(); + if (was_active) { + _PyPerfTrampoline_Init(1); + } } #endif return PyStatus_Ok(); From 09d65f90fc6e118dbf7422ba1e4c370434a8d408 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Thu, 21 Sep 2023 09:22:40 -0700 Subject: [PATCH 05/29] Update sysmodule.c --- Python/sysmodule.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 9c1ee0215d7cf6..739c342aa4c857 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2370,6 +2370,40 @@ PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void) { #endif } +PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename) { +#ifndef MS_WINDOWS + FILE* from = fopen(parent_filename, "r"); + if (!from) { + return -1; + } + if (perf_map_state.perf_map == NULL) { + int ret = PyUnstable_PerfMapState_Init(); + if(ret != 0){ + return ret; + } + } + char buf[4096]; + PyThread_acquire_lock(perf_map_state.map_lock, 1); + while (1) { + size_t bytes_read = fread(buf, 1, sizeof(buf), from); + size_t bytes_written = fwrite(buf, 1, bytes_read, perf_map_state.perf_map); + fflush(perf_map_state.perf_map); + + if (bytes_read < sizeof(buf) && feof(from)) { + fclose(from); + PyThread_release_lock(perf_map_state.map_lock); + return 0; + } + if (bytes_read == 0 || bytes_written < bytes_read) { + fclose(from); + PyThread_release_lock(perf_map_state.map_lock); + return -1; + } + } +#endif + return 0; +} + #ifdef __cplusplus } #endif From 83c6359ac9c4f3e291a7172c1ec82e401fa146a7 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Thu, 21 Sep 2023 09:38:13 -0700 Subject: [PATCH 06/29] Update perf_trampoline.c --- Python/perf_trampoline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index b03149a940fccb..b58624a2a991eb 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -339,7 +339,7 @@ int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) return 0; } trampoline_api.write_state(trampoline_api.state, new_trampoline, - code_arena->code_size, co); + perf_code_arena->code_size, co); return _PyCode_SetExtra((PyObject *)co, extra_code_index, (void *)new_trampoline); } From 5d2be1cf33e3d80a662ec97ee3d67d798da354e2 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Thu, 21 Sep 2023 09:43:55 -0700 Subject: [PATCH 07/29] remove whitespace in perf_trampoline.c --- Python/perf_trampoline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index b58624a2a991eb..dfaea4d499e5c5 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -465,7 +465,7 @@ _PyPerfTrampoline_Fini(void) #endif return 0; } - + int PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){ #ifdef PY_HAVE_PERF_TRAMPOLINE From f04006b374cbb681150cf8da554c34668d0d58f4 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Fri, 22 Sep 2023 11:00:11 -0700 Subject: [PATCH 08/29] Update stable_abi.toml --- Misc/stable_abi.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 8df3f85e61eec6..640f9cd753693a 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2460,3 +2460,7 @@ added = '3.13' [function.PyMapping_HasKeyStringWithError] added = '3.13' +[function.PyUnstable_PerfTrampoline_CompileCode] + added = '3.13' +[function.PyUnstable_PerfTrampoline_SetPersistAfterFork] + added = '3.13' From 1d285f345901d56eea0eb0a382148619cb41fd20 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Fri, 22 Sep 2023 11:27:54 -0700 Subject: [PATCH 09/29] Update test_stable_abi_ctypes.py --- Lib/test/test_stable_abi_ctypes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 94f817f8e1d159..e72d05aaad90a2 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -805,6 +805,8 @@ def test_windows_feature_macros(self): "PyUnicode_Translate", "PyUnicode_Type", "PyUnicode_WriteChar", + "PyUnstable_PerfTrampoline_CompileCode", + "PyUnstable_PerfTrampoline_SetPersistAfterFork", "PyVectorcall_Call", "PyVectorcall_NARGS", "PyWeakref_GetObject", From 3712109c8ced32d6cfa741252f08708eef1d995e Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:09:15 -0700 Subject: [PATCH 10/29] Update stable_abi.toml --- Misc/stable_abi.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index 640f9cd753693a..8df3f85e61eec6 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2460,7 +2460,3 @@ added = '3.13' [function.PyMapping_HasKeyStringWithError] added = '3.13' -[function.PyUnstable_PerfTrampoline_CompileCode] - added = '3.13' -[function.PyUnstable_PerfTrampoline_SetPersistAfterFork] - added = '3.13' From 86bc6b62b85a8cae035ea3724fa8509b5bab1831 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:11:29 -0700 Subject: [PATCH 11/29] Update ceval.h --- Include/ceval.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/ceval.h b/Include/ceval.h index 2fbe2ce13d557d..594ea68fa46e85 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -110,8 +110,6 @@ PyAPI_FUNC(void) PyEval_RestoreThread(PyThreadState *); PyAPI_FUNC(void) PyEval_AcquireThread(PyThreadState *tstate); PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); -PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); -PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); #define Py_BEGIN_ALLOW_THREADS { \ PyThreadState *_save; \ @@ -134,6 +132,8 @@ PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); # define Py_CPYTHON_CEVAL_H # include "cpython/ceval.h" # undef Py_CPYTHON_CEVAL_H +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); #endif #ifdef __cplusplus From 9edfe53de82fd8604a0f8e02b3489997d80ad9b9 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:17:23 -0700 Subject: [PATCH 12/29] Add the two API to include/cpython/ceval.h instead of include/ceval.h --- Include/cpython/ceval.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Include/cpython/ceval.h b/Include/cpython/ceval.h index 78f7405661662f..df9bd62d1fdd3d 100644 --- a/Include/cpython/ceval.h +++ b/Include/cpython/ceval.h @@ -23,3 +23,7 @@ _PyEval_RequestCodeExtraIndex(freefunc f) { PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *); PyAPI_FUNC(int) _PyEval_SliceIndexNotNone(PyObject *, Py_ssize_t *); +#ifndef Py_LIMITED_API +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); +#endif From c41fa273143fdc8ee957020d4dba9ef7b2323fd6 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:18:31 -0700 Subject: [PATCH 13/29] Remove the two APIs fro include/ceval.h ceval.h and instead include them in include/cpython/ceval.h --- Include/ceval.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/Include/ceval.h b/Include/ceval.h index 594ea68fa46e85..611fb16ceb014c 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -132,8 +132,6 @@ PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); # define Py_CPYTHON_CEVAL_H # include "cpython/ceval.h" # undef Py_CPYTHON_CEVAL_H -PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); -PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); #endif #ifdef __cplusplus From 5132af85db01f7e0537c2247dbb58926d3d9c959 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Fri, 22 Sep 2023 16:19:33 -0700 Subject: [PATCH 14/29] remove new line from ceval.h --- Include/ceval.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Include/ceval.h b/Include/ceval.h index 611fb16ceb014c..9885bdb7febc21 100644 --- a/Include/ceval.h +++ b/Include/ceval.h @@ -110,7 +110,6 @@ PyAPI_FUNC(void) PyEval_RestoreThread(PyThreadState *); PyAPI_FUNC(void) PyEval_AcquireThread(PyThreadState *tstate); PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); - #define Py_BEGIN_ALLOW_THREADS { \ PyThreadState *_save; \ _save = PyEval_SaveThread(); From 1124bc06c675fd81e0ff5dd03a51298569ebd151 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Mon, 25 Sep 2023 11:40:24 -0700 Subject: [PATCH 15/29] Remove changes in test_stable_abi_ctypes.py --- Lib/test/test_stable_abi_ctypes.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index e72d05aaad90a2..94f817f8e1d159 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -805,8 +805,6 @@ def test_windows_feature_macros(self): "PyUnicode_Translate", "PyUnicode_Type", "PyUnicode_WriteChar", - "PyUnstable_PerfTrampoline_CompileCode", - "PyUnstable_PerfTrampoline_SetPersistAfterFork", "PyVectorcall_Call", "PyVectorcall_NARGS", "PyWeakref_GetObject", From 70ea0de8f26100d542e70b3f67b32fd717698d6e Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Mon, 25 Sep 2023 11:41:52 -0700 Subject: [PATCH 16/29] Remove #ifndef Py_LIMITED_API from ceval.h --- Include/cpython/ceval.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/Include/cpython/ceval.h b/Include/cpython/ceval.h index df9bd62d1fdd3d..d9348c1341878a 100644 --- a/Include/cpython/ceval.h +++ b/Include/cpython/ceval.h @@ -23,7 +23,5 @@ _PyEval_RequestCodeExtraIndex(freefunc f) { PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *); PyAPI_FUNC(int) _PyEval_SliceIndexNotNone(PyObject *, Py_ssize_t *); -#ifndef Py_LIMITED_API PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); -#endif From 9b64aee37517ca97103f953bbc05d4c9751dc88b Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Mon, 2 Oct 2023 23:08:55 +0000 Subject: [PATCH 17/29] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst diff --git a/Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst b/Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst new file mode 100644 index 00000000000000..c6fa24f024c20c --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-10-02-23-08-53.gh-issue-109587.UqqnDY.rst @@ -0,0 +1,2 @@ +Introduced :c:func:`PyUnstable_PerfTrampoline_CompileCode`, :c:func:`PyUnstable_PerfTrampoline_SetPersistAfterFork` and +:c:func:`PyUnstable_CopyPerfMapFile`. These functions allow extension modules to initialize trampolines eagerly, after the application is "warmed up". This makes it possible to have perf-trampolines running in an always-enabled fashion. From c043e9f42a55ee4e840cb169db9faf5bd505c5f8 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Mon, 16 Oct 2023 15:17:15 -0700 Subject: [PATCH 18/29] Update Python/perf_trampoline.c Co-authored-by: Pablo Galindo Salgado --- Python/perf_trampoline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index dfaea4d499e5c5..071ba4dcee87ad 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -484,7 +484,7 @@ _PyPerfTrampoline_AfterFork_Child(void) char filename[256]; pid_t parent_pid = getppid(); snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid); - if(PyUnstable_CopyPerfMapFile(filename) != 0){ + if (PyUnstable_CopyPerfMapFile(filename) != 0){ return PyStatus_Error("Failed to copy perf map file."); } } else { From c9fc82680b84f8b7c9de32407a9fa9d020332625 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Mon, 16 Oct 2023 15:21:27 -0700 Subject: [PATCH 19/29] add close_and_release goto in sysmodule.c --- Python/sysmodule.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/Python/sysmodule.c b/Python/sysmodule.c index c39b75a3438980..d4224de56e1754 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2347,7 +2347,7 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry( #ifndef MS_WINDOWS if (perf_map_state.perf_map == NULL) { int ret = PyUnstable_PerfMapState_Init(); - if(ret != 0){ + if (ret != 0){ return ret; } } @@ -2382,28 +2382,29 @@ PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename) { } if (perf_map_state.perf_map == NULL) { int ret = PyUnstable_PerfMapState_Init(); - if(ret != 0){ + if (ret != 0){ return ret; } } char buf[4096]; PyThread_acquire_lock(perf_map_state.map_lock, 1); + int fflush_result = 0, result = 0; while (1) { size_t bytes_read = fread(buf, 1, sizeof(buf), from); size_t bytes_written = fwrite(buf, 1, bytes_read, perf_map_state.perf_map); - fflush(perf_map_state.perf_map); - - if (bytes_read < sizeof(buf) && feof(from)) { - fclose(from); - PyThread_release_lock(perf_map_state.map_lock); - return 0; + fflush_result = fflush(perf_map_state.perf_map); + if (fflush_result != 0 || bytes_read == 0 || bytes_written < bytes_read) { + result = -1; + goto close_and_release; } - if (bytes_read == 0 || bytes_written < bytes_read) { - fclose(from); - PyThread_release_lock(perf_map_state.map_lock); - return -1; + if (bytes_read < sizeof(buf) && feof(from)) { + goto close_and_release; } } +close_and_release: + fclose(from); + PyThread_release_lock(perf_map_state.map_lock); + return result; #endif return 0; } From b78f0e0658ea692f39ea37f78b70cc247f4878cc Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Wed, 18 Oct 2023 15:43:45 -0700 Subject: [PATCH 20/29] expose PyUnstable_PerfTrampoline_CompileCode and PyUnstable_PerfTrampoline_SetPersistAfterFork _testinternalcapi.c --- Modules/_testinternalcapi.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index 05bac0936b155d..05e7f7b7ce1312 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -1530,6 +1530,36 @@ _testinternalcapi_test_long_numbits_impl(PyObject *module) Py_RETURN_NONE; } +static PyObject * +compile_perf_trampoline_entry(PyObject *self, PyObject *args) +{ + PyObject *co; + if (!PyArg_ParseTuple(args, "O!", &PyCode_Type, &co)) { + return NULL; + } + int ret = PyUnstable_PerfTrampoline_CompileCode((PyCodeObject *)co); + if (ret != 0) { + PyErr_SetString(PyExc_AssertionError, "Failed to compile trampoline"); + return NULL; + } + return PyLong_FromLong(ret); +} + +static PyObject * +perf_trampoline_set_persist_after_fork(PyObject *self, PyObject *args) +{ + int enable; + if (!PyArg_ParseTuple(args, "i", &enable)) { + return NULL; + } + int ret = PyUnstable_PerfTrampoline_SetPersistAfterFork(enable); + if (ret == 0) { + PyErr_SetString(PyExc_AssertionError, "Failed to set persist_after_fork"); + return NULL; + } + return PyLong_FromLong(ret); +} + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, @@ -1585,6 +1615,8 @@ static PyMethodDef module_functions[] = { {"run_in_subinterp_with_config", _PyCFunction_CAST(run_in_subinterp_with_config), METH_VARARGS | METH_KEYWORDS}, + {"compile_perf_trampoline_entry", compile_perf_trampoline_entry, METH_VARARGS}, + {"perf_trampoline_set_persist_after_fork", perf_trampoline_set_persist_after_fork, METH_VARARGS}, _TESTINTERNALCAPI_WRITE_UNRAISABLE_EXC_METHODDEF _TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF {NULL, NULL} /* sentinel */ From 5caafdf7734daa170b0cbfb2e3d478c664c7e31a Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Wed, 18 Oct 2023 15:45:15 -0700 Subject: [PATCH 21/29] Add unit tests in test_perf_profiler.py --- Lib/test/test_perf_profiler.py | 76 ++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/Lib/test/test_perf_profiler.py b/Lib/test/test_perf_profiler.py index fe8707a156e9dc..040be63da11447 100644 --- a/Lib/test/test_perf_profiler.py +++ b/Lib/test/test_perf_profiler.py @@ -353,6 +353,82 @@ def baz(n): self.assertNotIn(f"py::bar:{script}", stdout) self.assertNotIn(f"py::baz:{script}", stdout) + def test_pre_fork_compile(self): + code = """if 1: + import sys + import os + import sysconfig + from _testinternalcapi import ( + compile_perf_trampoline_entry, + perf_trampoline_set_persist_after_fork, + ) + + def foo_fork(): + pass + + def bar_fork(): + foo_fork() + + def foo(): + pass + + def bar(): + foo() + + def compile_trampolines_for_all_functions(): + perf_trampoline_set_persist_after_fork(1) + for _, obj in globals().items(): + if callable(obj) and hasattr(obj, '__code__'): + compile_perf_trampoline_entry(obj.__code__) + + if __name__ == "__main__": + compile_trampolines_for_all_functions() + pid = os.fork() + if pid == 0: + print(os.getpid()) + bar_fork() + else: + bar() + """ + + with temp_dir() as script_dir: + script = make_script(script_dir, "perftest", code) + with subprocess.Popen( + [sys.executable, "-Xperf", script], + universal_newlines=True, + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + ) as process: + stdout, stderr = process.communicate() + + self.assertEqual(process.returncode, 0) + self.assertNotIn("Error:", stderr) + child_pid = int(stdout.strip()) + perf_file = pathlib.Path(f"/tmp/perf-{process.pid}.map") + perf_child_file = pathlib.Path(f"/tmp/perf-{child_pid}.map") + self.assertTrue(perf_file.exists()) + self.assertTrue(perf_child_file.exists()) + + perf_file_contents = perf_file.read_text() + self.assertIn(f"py::foo:{script}", perf_file_contents) + self.assertIn(f"py::bar:{script}", perf_file_contents) + self.assertIn(f"py::foo_fork:{script}", perf_file_contents) + self.assertIn(f"py::bar_fork:{script}", perf_file_contents) + + child_perf_file_contents = perf_child_file.read_text() + self.assertIn(f"py::foo_fork:{script}", child_perf_file_contents) + self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents) + + # Pre-compiled perf-map entries of a forked process must be + # identical in both the parent and child perf-map files. + perf_file_lines = perf_file_contents.split("\n") + for line in perf_file_lines: + if ( + f"py::foo_fork:{script}" in line + or f"py::bar_fork:{script}" in line + ): + self.assertIn(line, child_perf_file_contents) + if __name__ == "__main__": unittest.main() From 1ed32057d119c104c1e39b843cb0e1a648ce00ff Mon Sep 17 00:00:00 2001 From: Aniket Panse Date: Thu, 19 Oct 2023 12:06:27 -0700 Subject: [PATCH 22/29] Fix build (possibly) --- Include/sysmodule.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Include/sysmodule.h b/Include/sysmodule.h index f54be62f2dbfc3..a59c4745c1f4a8 100644 --- a/Include/sysmodule.h +++ b/Include/sysmodule.h @@ -34,11 +34,10 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void); PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename); - +#endif PyAPI_FUNC(int) PySys_AuditTuple( const char *event, PyObject *args); -#endif #ifndef Py_LIMITED_API # define Py_CPYTHON_SYSMODULE_H From 140d31473897044d8cea4e3d5055ef0d761b61d5 Mon Sep 17 00:00:00 2001 From: Aniket Panse Date: Thu, 19 Oct 2023 14:17:12 -0700 Subject: [PATCH 23/29] fix bad merge --- Include/cpython/sysmodule.h | 1 + Include/sysmodule.h | 12 +----------- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/Include/cpython/sysmodule.h b/Include/cpython/sysmodule.h index df12ae440f024b..c6f6776a8b9896 100644 --- a/Include/cpython/sysmodule.h +++ b/Include/cpython/sysmodule.h @@ -21,3 +21,4 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry( unsigned int code_size, const char *entry_name); PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void); +PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename); diff --git a/Include/sysmodule.h b/Include/sysmodule.h index a59c4745c1f4a8..7b14f72ee2e494 100644 --- a/Include/sysmodule.h +++ b/Include/sysmodule.h @@ -1,6 +1,3 @@ - -/* System module interface */ - #ifndef Py_SYSMODULE_H #define Py_SYSMODULE_H #ifdef __cplusplus @@ -27,17 +24,10 @@ PyAPI_FUNC(int) PySys_Audit( const char *argFormat, ...); -PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void); - -PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(const void *code_addr, unsigned int code_size, const char *entry_name); - -PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void); - -PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename); -#endif PyAPI_FUNC(int) PySys_AuditTuple( const char *event, PyObject *args); +#endif #ifndef Py_LIMITED_API # define Py_CPYTHON_SYSMODULE_H From 012282e692bc29b074154c8a228c95ed31a9a89e Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Fri, 20 Oct 2023 12:32:30 -0700 Subject: [PATCH 24/29] add a space before the right parenthesis perf_trampoline.c --- Python/perf_trampoline.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index 071ba4dcee87ad..ce1552c6b998db 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -480,11 +480,11 @@ _PyPerfTrampoline_AfterFork_Child(void) { #ifdef PY_HAVE_PERF_TRAMPOLINE PyUnstable_PerfMapState_Fini(); - if (persist_after_fork){ + if (persist_after_fork) { char filename[256]; pid_t parent_pid = getppid(); snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid); - if (PyUnstable_CopyPerfMapFile(filename) != 0){ + if (PyUnstable_CopyPerfMapFile(filename) != 0) { return PyStatus_Error("Failed to copy perf map file."); } } else { From 62ff8390641e37bd9bfd84a9d3d82ec1f0566762 Mon Sep 17 00:00:00 2001 From: gsallam <123525874+gsallam@users.noreply.github.com> Date: Fri, 20 Oct 2023 12:33:31 -0700 Subject: [PATCH 25/29] add a missing space before the right parenthesis sysmodule.c --- Python/sysmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 06066a8a97e23b..4008a28ad7bd8a 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2396,7 +2396,7 @@ PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename) { } if (perf_map_state.perf_map == NULL) { int ret = PyUnstable_PerfMapState_Init(); - if (ret != 0){ + if (ret != 0) { return ret; } } From b3c42d1072d5dc2a2c316b68ceff89cc3cfaf928 Mon Sep 17 00:00:00 2001 From: Aniket Panse Date: Thu, 26 Oct 2023 18:38:40 -0700 Subject: [PATCH 26/29] move definitions to the right header file --- Include/cpython/ceval.h | 2 -- Include/cpython/sysmodule.h | 2 ++ 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/cpython/ceval.h b/Include/cpython/ceval.h index d9348c1341878a..78f7405661662f 100644 --- a/Include/cpython/ceval.h +++ b/Include/cpython/ceval.h @@ -23,5 +23,3 @@ _PyEval_RequestCodeExtraIndex(freefunc f) { PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *); PyAPI_FUNC(int) _PyEval_SliceIndexNotNone(PyObject *, Py_ssize_t *); -PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); -PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); diff --git a/Include/cpython/sysmodule.h b/Include/cpython/sysmodule.h index c6f6776a8b9896..9fd7cc0cb43931 100644 --- a/Include/cpython/sysmodule.h +++ b/Include/cpython/sysmodule.h @@ -22,3 +22,5 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry( const char *entry_name); PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void); PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); From c6cf58358725bbfe58cf95737f501d8e7a47a00d Mon Sep 17 00:00:00 2001 From: Aniket Panse Date: Thu, 26 Oct 2023 18:59:55 -0700 Subject: [PATCH 27/29] gate PyUnstable_PerfTrampoline_CompileCode under appropriate ifdefs --- Python/perf_trampoline.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index ce1552c6b998db..afe5ea7c3a4665 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -330,6 +330,7 @@ compile_trampoline(void) int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) { +#ifdef PY_HAVE_PERF_TRAMPOLINE py_trampoline f = NULL; assert(extra_code_index != -1); int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f); @@ -343,6 +344,7 @@ int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) return _PyCode_SetExtra((PyObject *)co, extra_code_index, (void *)new_trampoline); } +#endif // PY_HAVE_PERF_TRAMPOLINE return 0; } From ff5f1e4c176379dcaeb0b3691df76c9e0714f6c6 Mon Sep 17 00:00:00 2001 From: Aniket Panse Date: Thu, 26 Oct 2023 19:16:20 -0700 Subject: [PATCH 28/29] use PyAPI_FUNC when defining new c-api --- Python/perf_trampoline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index afe5ea7c3a4665..90a9d00dea48f0 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -328,7 +328,7 @@ compile_trampoline(void) return code_arena_new_code(perf_code_arena); } -int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) { #ifdef PY_HAVE_PERF_TRAMPOLINE py_trampoline f = NULL; From 53aebc0bd35ac303ce4a3976dcc0016c7dcb93a8 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Fri, 27 Oct 2023 12:29:14 +0900 Subject: [PATCH 29/29] fixup! use PyAPI_FUNC when defining new c-api --- Python/perf_trampoline.c | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/Python/perf_trampoline.c b/Python/perf_trampoline.c index 90a9d00dea48f0..491223924ed7f2 100644 --- a/Python/perf_trampoline.c +++ b/Python/perf_trampoline.c @@ -328,26 +328,6 @@ compile_trampoline(void) return code_arena_new_code(perf_code_arena); } -PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) -{ -#ifdef PY_HAVE_PERF_TRAMPOLINE - py_trampoline f = NULL; - assert(extra_code_index != -1); - int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f); - if (ret != 0 || f == NULL) { - py_trampoline new_trampoline = compile_trampoline(); - if (new_trampoline == NULL) { - return 0; - } - trampoline_api.write_state(trampoline_api.state, new_trampoline, - perf_code_arena->code_size, co); - return _PyCode_SetExtra((PyObject *)co, extra_code_index, - (void *)new_trampoline); - } -#endif // PY_HAVE_PERF_TRAMPOLINE - return 0; -} - static PyObject * py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame, int throw) @@ -381,6 +361,26 @@ py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame, } #endif // PY_HAVE_PERF_TRAMPOLINE +int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co) +{ +#ifdef PY_HAVE_PERF_TRAMPOLINE + py_trampoline f = NULL; + assert(extra_code_index != -1); + int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f); + if (ret != 0 || f == NULL) { + py_trampoline new_trampoline = compile_trampoline(); + if (new_trampoline == NULL) { + return 0; + } + trampoline_api.write_state(trampoline_api.state, new_trampoline, + perf_code_arena->code_size, co); + return _PyCode_SetExtra((PyObject *)co, extra_code_index, + (void *)new_trampoline); + } +#endif // PY_HAVE_PERF_TRAMPOLINE + return 0; +} + int _PyIsPerfTrampolineActive(void) {