Thanks to visit codestin.com
Credit goes to github.com

Skip to content

gh-109587: Allow "precompiled" perf-trampolines to largely mitigate the cost of enabling perf-trampolines #109666

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 32 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
bc4203b
Update sysmodule.h
gsallam Sep 21, 2023
3c805e1
Update ceval.h
gsallam Sep 21, 2023
ccc1ec5
Update pycore_ceval_state.h
gsallam Sep 21, 2023
3098e9d
Update perf_trampoline.c
gsallam Sep 21, 2023
09d65f9
Update sysmodule.c
gsallam Sep 21, 2023
83c6359
Update perf_trampoline.c
gsallam Sep 21, 2023
5d2be1c
remove whitespace in perf_trampoline.c
gsallam Sep 21, 2023
f04006b
Update stable_abi.toml
gsallam Sep 22, 2023
1d285f3
Update test_stable_abi_ctypes.py
gsallam Sep 22, 2023
3712109
Update stable_abi.toml
gsallam Sep 22, 2023
86bc6b6
Update ceval.h
gsallam Sep 22, 2023
9edfe53
Add the two API to include/cpython/ceval.h instead of include/ceval.h
gsallam Sep 22, 2023
c41fa27
Remove the two APIs fro include/ceval.h ceval.h and instead include t…
gsallam Sep 22, 2023
5132af8
remove new line from ceval.h
gsallam Sep 22, 2023
1124bc0
Remove changes in test_stable_abi_ctypes.py
gsallam Sep 25, 2023
70ea0de
Remove #ifndef Py_LIMITED_API from ceval.h
gsallam Sep 25, 2023
9b64aee
📜🤖 Added by blurb_it.
blurb-it[bot] Oct 2, 2023
3bb50da
Merge branch 'main' of github.com:python/cpython
czardoz Oct 6, 2023
c043e9f
Update Python/perf_trampoline.c
gsallam Oct 16, 2023
c9fc826
add close_and_release goto in sysmodule.c
gsallam Oct 16, 2023
b78f0e0
expose PyUnstable_PerfTrampoline_CompileCode and PyUnstable_PerfTramp…
gsallam Oct 18, 2023
5caafdf
Add unit tests in test_perf_profiler.py
gsallam Oct 18, 2023
a6db587
Merge remote-tracking branch 'upstream/main'
czardoz Oct 19, 2023
1ed3205
Fix build (possibly)
czardoz Oct 19, 2023
140d314
fix bad merge
czardoz Oct 19, 2023
012282e
add a space before the right parenthesis perf_trampoline.c
gsallam Oct 20, 2023
62ff839
add a missing space before the right parenthesis sysmodule.c
gsallam Oct 20, 2023
b3c42d1
move definitions to the right header file
czardoz Oct 27, 2023
6c3fe05
Merge remote-tracking branch 'upstream/main'
czardoz Oct 27, 2023
c6cf583
gate PyUnstable_PerfTrampoline_CompileCode under appropriate ifdefs
czardoz Oct 27, 2023
ff5f1e4
use PyAPI_FUNC when defining new c-api
czardoz Oct 27, 2023
53aebc0
fixup! use PyAPI_FUNC when defining new c-api
pablogsal Oct 27, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Include/cpython/sysmodule.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,6 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
unsigned int code_size,
const char *entry_name);
PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void);
PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename);
PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *);
PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable);
2 changes: 2 additions & 0 deletions Include/internal/pycore_ceval_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ struct _ceval_runtime_state {
struct code_arena_st *code_arena;
struct trampoline_api_st trampoline_api;
FILE *map_file;
Py_ssize_t persist_after_fork;
#else
int _not_used;
#endif
Expand All @@ -68,6 +69,7 @@ struct _ceval_runtime_state {
{ \
.status = PERF_STATUS_NO_INIT, \
.extra_code_index = -1, \
.persist_after_fork = 0, \
}
#else
# define _PyEval_RUNTIME_PERF_INIT {0}
Expand Down
3 changes: 0 additions & 3 deletions Include/sysmodule.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@

/* System module interface */

#ifndef Py_SYSMODULE_H
#define Py_SYSMODULE_H
#ifdef __cplusplus
Expand Down
76 changes: 76 additions & 0 deletions Lib/test/test_perf_profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,82 @@ def baz(n):
self.assertNotIn(f"py::bar:{script}", stdout)
self.assertNotIn(f"py::baz:{script}", stdout)

def test_pre_fork_compile(self):
code = """if 1:
import sys
import os
import sysconfig
from _testinternalcapi import (
compile_perf_trampoline_entry,
perf_trampoline_set_persist_after_fork,
)

def foo_fork():
pass

def bar_fork():
foo_fork()

def foo():
pass

def bar():
foo()

def compile_trampolines_for_all_functions():
perf_trampoline_set_persist_after_fork(1)
for _, obj in globals().items():
if callable(obj) and hasattr(obj, '__code__'):
compile_perf_trampoline_entry(obj.__code__)

if __name__ == "__main__":
compile_trampolines_for_all_functions()
pid = os.fork()
if pid == 0:
print(os.getpid())
bar_fork()
else:
bar()
"""

with temp_dir() as script_dir:
script = make_script(script_dir, "perftest", code)
with subprocess.Popen(
[sys.executable, "-Xperf", script],
universal_newlines=True,
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
) as process:
stdout, stderr = process.communicate()

self.assertEqual(process.returncode, 0)
self.assertNotIn("Error:", stderr)
child_pid = int(stdout.strip())
perf_file = pathlib.Path(f"/tmp/perf-{process.pid}.map")
perf_child_file = pathlib.Path(f"/tmp/perf-{child_pid}.map")
self.assertTrue(perf_file.exists())
self.assertTrue(perf_child_file.exists())

perf_file_contents = perf_file.read_text()
self.assertIn(f"py::foo:{script}", perf_file_contents)
self.assertIn(f"py::bar:{script}", perf_file_contents)
self.assertIn(f"py::foo_fork:{script}", perf_file_contents)
self.assertIn(f"py::bar_fork:{script}", perf_file_contents)

child_perf_file_contents = perf_child_file.read_text()
self.assertIn(f"py::foo_fork:{script}", child_perf_file_contents)
self.assertIn(f"py::bar_fork:{script}", child_perf_file_contents)

# Pre-compiled perf-map entries of a forked process must be
# identical in both the parent and child perf-map files.
perf_file_lines = perf_file_contents.split("\n")
for line in perf_file_lines:
if (
f"py::foo_fork:{script}" in line
or f"py::bar_fork:{script}" in line
):
self.assertIn(line, child_perf_file_contents)


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Introduced :c:func:`PyUnstable_PerfTrampoline_CompileCode`, :c:func:`PyUnstable_PerfTrampoline_SetPersistAfterFork` and
:c:func:`PyUnstable_CopyPerfMapFile`. These functions allow extension modules to initialize trampolines eagerly, after the application is "warmed up". This makes it possible to have perf-trampolines running in an always-enabled fashion.
32 changes: 32 additions & 0 deletions Modules/_testinternalcapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1556,6 +1556,36 @@ _testinternalcapi_test_long_numbits_impl(PyObject *module)
Py_RETURN_NONE;
}

static PyObject *
compile_perf_trampoline_entry(PyObject *self, PyObject *args)
{
PyObject *co;
if (!PyArg_ParseTuple(args, "O!", &PyCode_Type, &co)) {
return NULL;
}
int ret = PyUnstable_PerfTrampoline_CompileCode((PyCodeObject *)co);
if (ret != 0) {
PyErr_SetString(PyExc_AssertionError, "Failed to compile trampoline");
return NULL;
}
return PyLong_FromLong(ret);
}

static PyObject *
perf_trampoline_set_persist_after_fork(PyObject *self, PyObject *args)
{
int enable;
if (!PyArg_ParseTuple(args, "i", &enable)) {
return NULL;
}
int ret = PyUnstable_PerfTrampoline_SetPersistAfterFork(enable);
if (ret == 0) {
PyErr_SetString(PyExc_AssertionError, "Failed to set persist_after_fork");
return NULL;
}
return PyLong_FromLong(ret);
}


static PyMethodDef module_functions[] = {
{"get_configs", get_configs, METH_NOARGS},
Expand Down Expand Up @@ -1613,6 +1643,8 @@ static PyMethodDef module_functions[] = {
{"run_in_subinterp_with_config",
_PyCFunction_CAST(run_in_subinterp_with_config),
METH_VARARGS | METH_KEYWORDS},
{"compile_perf_trampoline_entry", compile_perf_trampoline_entry, METH_VARARGS},
{"perf_trampoline_set_persist_after_fork", perf_trampoline_set_persist_after_fork, METH_VARARGS},
_TESTINTERNALCAPI_WRITE_UNRAISABLE_EXC_METHODDEF
_TESTINTERNALCAPI_TEST_LONG_NUMBITS_METHODDEF
{NULL, NULL} /* sentinel */
Expand Down
50 changes: 44 additions & 6 deletions Python/perf_trampoline.c
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ typedef struct trampoline_api_st trampoline_api_t;
#define perf_code_arena _PyRuntime.ceval.perf.code_arena
#define trampoline_api _PyRuntime.ceval.perf.trampoline_api
#define perf_map_file _PyRuntime.ceval.perf.map_file

#define persist_after_fork _PyRuntime.ceval.perf.persist_after_fork

static void
perf_map_write_entry(void *state, const void *code_addr,
Expand Down Expand Up @@ -361,6 +361,26 @@ py_trampoline_evaluator(PyThreadState *ts, _PyInterpreterFrame *frame,
}
#endif // PY_HAVE_PERF_TRAMPOLINE

int PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *co)
{
#ifdef PY_HAVE_PERF_TRAMPOLINE
py_trampoline f = NULL;
assert(extra_code_index != -1);
int ret = _PyCode_GetExtra((PyObject *)co, extra_code_index, (void **)&f);
if (ret != 0 || f == NULL) {
py_trampoline new_trampoline = compile_trampoline();
if (new_trampoline == NULL) {
return 0;
}
trampoline_api.write_state(trampoline_api.state, new_trampoline,
perf_code_arena->code_size, co);
return _PyCode_SetExtra((PyObject *)co, extra_code_index,
(void *)new_trampoline);
}
#endif // PY_HAVE_PERF_TRAMPOLINE
return 0;
}

int
_PyIsPerfTrampolineActive(void)
{
Expand Down Expand Up @@ -448,16 +468,34 @@ _PyPerfTrampoline_Fini(void)
return 0;
}

int
PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable){
#ifdef PY_HAVE_PERF_TRAMPOLINE
persist_after_fork = enable;
return persist_after_fork;
#endif
return 0;
}

PyStatus
_PyPerfTrampoline_AfterFork_Child(void)
{
#ifdef PY_HAVE_PERF_TRAMPOLINE
// Restart trampoline in file in child.
int was_active = _PyIsPerfTrampolineActive();
_PyPerfTrampoline_Fini();
PyUnstable_PerfMapState_Fini();
if (was_active) {
_PyPerfTrampoline_Init(1);
if (persist_after_fork) {
char filename[256];
pid_t parent_pid = getppid();
snprintf(filename, sizeof(filename), "/tmp/perf-%d.map", parent_pid);
if (PyUnstable_CopyPerfMapFile(filename) != 0) {
return PyStatus_Error("Failed to copy perf map file.");
}
} else {
// Restart trampoline in file in child.
int was_active = _PyIsPerfTrampolineActive();
_PyPerfTrampoline_Fini();
if (was_active) {
_PyPerfTrampoline_Init(1);
}
}
#endif
return PyStatus_Ok();
Expand Down
41 changes: 40 additions & 1 deletion Python/sysmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -2361,7 +2361,7 @@ PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry(
#ifndef MS_WINDOWS
if (perf_map_state.perf_map == NULL) {
int ret = PyUnstable_PerfMapState_Init();
if(ret != 0){
if (ret != 0){
return ret;
}
}
Expand All @@ -2388,6 +2388,45 @@ PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void) {
#endif
}

PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename) {
#ifndef MS_WINDOWS
FILE* from = fopen(parent_filename, "r");
if (!from) {
return -1;
}
if (perf_map_state.perf_map == NULL) {
int ret = PyUnstable_PerfMapState_Init();
if (ret != 0) {
return ret;
}
}
char buf[4096];
PyThread_acquire_lock(perf_map_state.map_lock, 1);
int fflush_result = 0, result = 0;
while (1) {
size_t bytes_read = fread(buf, 1, sizeof(buf), from);
size_t bytes_written = fwrite(buf, 1, bytes_read, perf_map_state.perf_map);
fflush_result = fflush(perf_map_state.perf_map);
if (fflush_result != 0 || bytes_read == 0 || bytes_written < bytes_read) {
result = -1;
goto close_and_release;
}
if (bytes_read < sizeof(buf) && feof(from)) {
goto close_and_release;
}
}
close_and_release:
fclose(from);
PyThread_release_lock(perf_map_state.map_lock);
return result;
#endif
return 0;
}

#ifdef __cplusplus
}
#endif


static PyMethodDef sys_methods[] = {
/* Might as well keep this in alphabetic order */
Expand Down