Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit d635af5

Browse files
committed
Fixes issue #8052: The posix subprocess module's close_fds behavior was
suboptimal by closing all possible file descriptors rather than just the open ones in the child process before exec(). It now closes only the open fds when it is possible to safely determine what those are.
2 parents e961bd4 + 8facece commit d635af5

5 files changed

Lines changed: 281 additions & 37 deletions

File tree

Lib/test/test_subprocess.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1392,6 +1392,11 @@ def test_close_fds(self):
13921392
self.addCleanup(os.close, fds[1])
13931393

13941394
open_fds = set(fds)
1395+
# add a bunch more fds
1396+
for _ in range(9):
1397+
fd = os.open("/dev/null", os.O_RDONLY)
1398+
self.addCleanup(os.close, fd)
1399+
open_fds.add(fd)
13951400

13961401
p = subprocess.Popen([sys.executable, fd_status],
13971402
stdout=subprocess.PIPE, close_fds=False)
@@ -1410,6 +1415,19 @@ def test_close_fds(self):
14101415
"Some fds were left open")
14111416
self.assertIn(1, remaining_fds, "Subprocess failed")
14121417

1418+
# Keep some of the fd's we opened open in the subprocess.
1419+
# This tests _posixsubprocess.c's proper handling of fds_to_keep.
1420+
fds_to_keep = set(open_fds.pop() for _ in range(8))
1421+
p = subprocess.Popen([sys.executable, fd_status],
1422+
stdout=subprocess.PIPE, close_fds=True,
1423+
pass_fds=())
1424+
output, ignored = p.communicate()
1425+
remaining_fds = set(map(int, output.split(b',')))
1426+
1427+
self.assertFalse(remaining_fds & fds_to_keep & open_fds,
1428+
"Some fds not in pass_fds were left open")
1429+
self.assertIn(1, remaining_fds, "Subprocess failed")
1430+
14131431
# Mac OS X Tiger (10.4) has a kernel bug: sometimes, the file
14141432
# descriptor of a pipe closed in the parent process is valid in the
14151433
# child process according to fstat(), but the mode of the file

Modules/_posixsubprocess.c

Lines changed: 258 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,26 @@
55
#endif
66
#include <unistd.h>
77
#include <fcntl.h>
8+
#ifdef HAVE_SYS_TYPES_H
9+
#include <sys/types.h>
10+
#endif
11+
#ifdef HAVE_SYS_SYSCALL_H
12+
#include <sys/syscall.h>
13+
#endif
14+
#ifdef HAVE_DIRENT_H
15+
#include <dirent.h>
16+
#endif
17+
18+
#if defined(sun) && !defined(HAVE_DIRFD)
19+
/* Some versions of Solaris lack dirfd(). */
20+
# define DIRFD(dirp) ((dirp)->dd_fd)
21+
# define HAVE_DIRFD
22+
#else
23+
# define DIRFD(dirp) (dirfd(dirp))
24+
#endif
825

26+
#define LINUX_SOLARIS_FD_DIR "/proc/self/fd"
27+
#define BSD_OSX_FD_DIR "/dev/fd"
928

1029
#define POSIX_CALL(call) if ((call) == -1) goto error
1130

@@ -28,6 +47,233 @@ static int _enable_gc(PyObject *gc_module)
2847
}
2948

3049

50+
/* Convert ASCII to a positive int, no libc call. no overflow. -1 on error. */
51+
static int _pos_int_from_ascii(char *name)
52+
{
53+
int num = 0;
54+
while (*name >= '0' && *name <= '9') {
55+
num = num * 10 + (*name - '0');
56+
++name;
57+
}
58+
if (*name)
59+
return -1; /* Non digit found, not a number. */
60+
return num;
61+
}
62+
63+
64+
/* Returns 1 if there is a problem with fd_sequence, 0 otherwise. */
65+
static int _sanity_check_python_fd_sequence(PyObject *fd_sequence)
66+
{
67+
Py_ssize_t seq_idx, seq_len = PySequence_Length(fd_sequence);
68+
long prev_fd = -1;
69+
for (seq_idx = 0; seq_idx < seq_len; ++seq_idx) {
70+
PyObject* py_fd = PySequence_Fast_GET_ITEM(fd_sequence, seq_idx);
71+
long iter_fd = PyLong_AsLong(py_fd);
72+
if (iter_fd < 0 || iter_fd < prev_fd || iter_fd > INT_MAX) {
73+
/* Negative, overflow, not a Long, unsorted, too big for a fd. */
74+
return 1;
75+
}
76+
}
77+
return 0;
78+
}
79+
80+
81+
/* Is fd found in the sorted Python Sequence? */
82+
static int _is_fd_in_sorted_fd_sequence(int fd, PyObject *fd_sequence)
83+
{
84+
/* Binary search. */
85+
Py_ssize_t search_min = 0;
86+
Py_ssize_t search_max = PySequence_Length(fd_sequence) - 1;
87+
if (search_max < 0)
88+
return 0;
89+
do {
90+
long middle = (search_min + search_max) / 2;
91+
long middle_fd = PyLong_AsLong(
92+
PySequence_Fast_GET_ITEM(fd_sequence, middle));
93+
if (fd == middle_fd)
94+
return 1;
95+
if (fd > middle_fd)
96+
search_min = middle + 1;
97+
else
98+
search_max = middle - 1;
99+
} while (search_min <= search_max);
100+
return 0;
101+
}
102+
103+
104+
/* Close all file descriptors in the range start_fd inclusive to
105+
* end_fd exclusive except for those in py_fds_to_keep. If the
106+
* range defined by [start_fd, end_fd) is large this will take a
107+
* long time as it calls close() on EVERY possible fd.
108+
*/
109+
static void _close_fds_by_brute_force(int start_fd, int end_fd,
110+
PyObject *py_fds_to_keep)
111+
{
112+
Py_ssize_t num_fds_to_keep = PySequence_Length(py_fds_to_keep);
113+
Py_ssize_t keep_seq_idx;
114+
int fd_num;
115+
/* As py_fds_to_keep is sorted we can loop through the list closing
116+
* fds inbetween any in the keep list falling within our range. */
117+
for (keep_seq_idx = 0; keep_seq_idx < num_fds_to_keep; ++keep_seq_idx) {
118+
PyObject* py_keep_fd = PySequence_Fast_GET_ITEM(py_fds_to_keep,
119+
keep_seq_idx);
120+
int keep_fd = PyLong_AsLong(py_keep_fd);
121+
if (keep_fd < start_fd)
122+
continue;
123+
for (fd_num = start_fd; fd_num < keep_fd; ++fd_num) {
124+
while (close(fd_num) < 0 && errno == EINTR);
125+
}
126+
start_fd = keep_fd + 1;
127+
}
128+
if (start_fd <= end_fd) {
129+
for (fd_num = start_fd; fd_num < end_fd; ++fd_num) {
130+
while (close(fd_num) < 0 && errno == EINTR);
131+
}
132+
}
133+
}
134+
135+
136+
#if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
137+
/* It doesn't matter if d_name has room for NAME_MAX chars; we're using this
138+
* only to read a directory of short file descriptor number names. The kernel
139+
* will return an error if we didn't give it enough space. Highly Unlikely.
140+
* This structure is very old and stable: It will not change unless the kernel
141+
* chooses to break compatibility with all existing binaries. Highly Unlikely.
142+
*/
143+
struct linux_dirent {
144+
unsigned long d_ino; /* Inode number */
145+
unsigned long d_off; /* Offset to next linux_dirent */
146+
unsigned short d_reclen; /* Length of this linux_dirent */
147+
char d_name[256]; /* Filename (null-terminated) */
148+
};
149+
150+
/* Close all open file descriptors in the range start_fd inclusive to end_fd
151+
* exclusive. Do not close any in the sorted py_fds_to_keep list.
152+
*
153+
* This version is async signal safe as it does not make any unsafe C library
154+
* calls, malloc calls or handle any locks. It is _unfortunate_ to be forced
155+
* to resort to making a kernel system call directly but this is the ONLY api
156+
* available that does no harm. opendir/readdir/closedir perform memory
157+
* allocation and locking so while they usually work they are not guaranteed
158+
* to (especially if you have replaced your malloc implementation). A version
159+
* of this function that uses those can be found in the _maybe_unsafe variant.
160+
*
161+
* This is Linux specific because that is all I am ready to test it on. It
162+
* should be easy to add OS specific dirent or dirent64 structures and modify
163+
* it with some cpp #define magic to work on other OSes as well if you want.
164+
*/
165+
static void _close_open_fd_range_safe(int start_fd, int end_fd,
166+
PyObject* py_fds_to_keep)
167+
{
168+
int fd_dir_fd;
169+
if (start_fd >= end_fd)
170+
return;
171+
fd_dir_fd = open(LINUX_SOLARIS_FD_DIR, O_RDONLY | O_CLOEXEC, 0);
172+
/* Not trying to open the BSD_OSX path as this is currently Linux only. */
173+
if (fd_dir_fd == -1) {
174+
/* No way to get a list of open fds. */
175+
_close_fds_by_brute_force(start_fd, end_fd, py_fds_to_keep);
176+
return;
177+
} else {
178+
char buffer[sizeof(struct linux_dirent)];
179+
int bytes;
180+
while ((bytes = syscall(SYS_getdents, fd_dir_fd,
181+
(struct linux_dirent *)buffer,
182+
sizeof(buffer))) > 0) {
183+
struct linux_dirent *entry;
184+
int offset;
185+
for (offset = 0; offset < bytes; offset += entry->d_reclen) {
186+
int fd;
187+
entry = (struct linux_dirent *)(buffer + offset);
188+
if ((fd = _pos_int_from_ascii(entry->d_name)) < 0)
189+
continue; /* Not a number. */
190+
if (fd != fd_dir_fd && fd >= start_fd && fd < end_fd &&
191+
!_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
192+
while (close(fd) < 0 && errno == EINTR);
193+
}
194+
}
195+
}
196+
close(fd_dir_fd);
197+
}
198+
}
199+
200+
#define _close_open_fd_range _close_open_fd_range_safe
201+
202+
#else /* NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
203+
204+
205+
/* Close all open file descriptors in the range start_fd inclusive to end_fd
206+
* exclusive. Do not close any in the sorted py_fds_to_keep list.
207+
*
208+
* This function violates the strict use of async signal safe functions. :(
209+
* It calls opendir(), readdir64() and closedir(). Of these, the one most
210+
* likely to ever cause a problem is opendir() as it performs an internal
211+
* malloc(). Practically this should not be a problem. The Java VM makes the
212+
* same calls between fork and exec in its own UNIXProcess_md.c implementation.
213+
*
214+
* readdir_r() is not used because it provides no benefit. It is typically
215+
* implemented as readdir() followed by memcpy(). See also:
216+
* http://womble.decadent.org.uk/readdir_r-advisory.html
217+
*/
218+
static void _close_open_fd_range_maybe_unsafe(int start_fd, int end_fd,
219+
PyObject* py_fds_to_keep)
220+
{
221+
DIR *proc_fd_dir;
222+
#ifndef HAVE_DIRFD
223+
while (_is_fd_in_sorted_fd_sequence(start_fd, py_fds_to_keep) &&
224+
(start_fd < end_fd)) {
225+
++start_fd;
226+
}
227+
if (start_fd >= end_fd)
228+
return;
229+
/* Close our lowest fd before we call opendir so that it is likely to
230+
* reuse that fd otherwise we might close opendir's file descriptor in
231+
* our loop. This trick assumes that fd's are allocated on a lowest
232+
* available basis. */
233+
while (close(start_fd) < 0 && errno == EINTR);
234+
++start_fd;
235+
#endif
236+
if (start_fd >= end_fd)
237+
return;
238+
239+
proc_fd_dir = opendir(BSD_OSX_FD_DIR);
240+
if (!proc_fd_dir)
241+
proc_fd_dir = opendir(LINUX_SOLARIS_FD_DIR);
242+
if (!proc_fd_dir) {
243+
/* No way to get a list of open fds. */
244+
_close_fds_by_brute_force(start_fd, end_fd, py_fds_to_keep);
245+
} else {
246+
struct dirent64 *dir_entry;
247+
#ifdef HAVE_DIRFD
248+
int fd_used_by_opendir = DIRFD(proc_fd_dir);
249+
#else
250+
int fd_used_by_opendir = start_fd - 1;
251+
#endif
252+
errno = 0;
253+
/* readdir64 is used to work around Solaris 9 bug 6395699. */
254+
while ((dir_entry = readdir64(proc_fd_dir))) {
255+
int fd;
256+
if ((fd = _pos_int_from_ascii(dir_entry->d_name)) < 0)
257+
continue; /* Not a number. */
258+
if (fd != fd_used_by_opendir && fd >= start_fd && fd < end_fd &&
259+
!_is_fd_in_sorted_fd_sequence(fd, py_fds_to_keep)) {
260+
while (close(fd) < 0 && errno == EINTR);
261+
}
262+
errno = 0;
263+
}
264+
if (errno) {
265+
/* readdir error, revert behavior. Highly Unlikely. */
266+
_close_fds_by_brute_force(start_fd, end_fd, py_fds_to_keep);
267+
}
268+
closedir(proc_fd_dir);
269+
}
270+
}
271+
272+
#define _close_open_fd_range _close_open_fd_range_maybe_unsafe
273+
274+
#endif /* else NOT (defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)) */
275+
276+
31277
/*
32278
* This function is code executed in the child process immediately after fork
33279
* to set things up and call exec().
@@ -48,12 +294,12 @@ static void child_exec(char *const exec_array[],
48294
int errread, int errwrite,
49295
int errpipe_read, int errpipe_write,
50296
int close_fds, int restore_signals,
51-
int call_setsid, Py_ssize_t num_fds_to_keep,
297+
int call_setsid,
52298
PyObject *py_fds_to_keep,
53299
PyObject *preexec_fn,
54300
PyObject *preexec_fn_args_tuple)
55301
{
56-
int i, saved_errno, fd_num, unused;
302+
int i, saved_errno, unused;
57303
PyObject *result;
58304
const char* err_msg = "";
59305
/* Buffer large enough to hold a hex integer. We can't malloc. */
@@ -115,33 +361,8 @@ static void child_exec(char *const exec_array[],
115361
POSIX_CALL(close(errwrite));
116362
}
117363

118-
/* close() is intentionally not checked for errors here as we are closing */
119-
/* a large range of fds, some of which may be invalid. */
120-
if (close_fds) {
121-
Py_ssize_t keep_seq_idx;
122-
int start_fd = 3;
123-
for (keep_seq_idx = 0; keep_seq_idx < num_fds_to_keep; ++keep_seq_idx) {
124-
PyObject* py_keep_fd = PySequence_Fast_GET_ITEM(py_fds_to_keep,
125-
keep_seq_idx);
126-
int keep_fd = PyLong_AsLong(py_keep_fd);
127-
if (keep_fd < 0) { /* Negative number, overflow or not a Long. */
128-
err_msg = "bad value in fds_to_keep.";
129-
errno = 0; /* We don't want to report an OSError. */
130-
goto error;
131-
}
132-
if (keep_fd < start_fd)
133-
continue;
134-
for (fd_num = start_fd; fd_num < keep_fd; ++fd_num) {
135-
close(fd_num);
136-
}
137-
start_fd = keep_fd + 1;
138-
}
139-
if (start_fd <= max_fd) {
140-
for (fd_num = start_fd; fd_num < max_fd; ++fd_num) {
141-
close(fd_num);
142-
}
143-
}
144-
}
364+
if (close_fds)
365+
_close_open_fd_range(3, max_fd, py_fds_to_keep);
145366

146367
if (cwd)
147368
POSIX_CALL(chdir(cwd));
@@ -229,7 +450,7 @@ subprocess_fork_exec(PyObject* self, PyObject *args)
229450
pid_t pid;
230451
int need_to_reenable_gc = 0;
231452
char *const *exec_array, *const *argv = NULL, *const *envp = NULL;
232-
Py_ssize_t arg_num, num_fds_to_keep;
453+
Py_ssize_t arg_num;
233454

234455
if (!PyArg_ParseTuple(
235456
args, "OOOOOOiiiiiiiiiiO:fork_exec",
@@ -245,9 +466,12 @@ subprocess_fork_exec(PyObject* self, PyObject *args)
245466
PyErr_SetString(PyExc_ValueError, "errpipe_write must be >= 3");
246467
return NULL;
247468
}
248-
num_fds_to_keep = PySequence_Length(py_fds_to_keep);
249-
if (num_fds_to_keep < 0) {
250-
PyErr_SetString(PyExc_ValueError, "bad fds_to_keep");
469+
if (PySequence_Length(py_fds_to_keep) < 0) {
470+
PyErr_SetString(PyExc_ValueError, "cannot get length of fds_to_keep");
471+
return NULL;
472+
}
473+
if (_sanity_check_python_fd_sequence(py_fds_to_keep)) {
474+
PyErr_SetString(PyExc_ValueError, "bad value(s) in fds_to_keep");
251475
return NULL;
252476
}
253477

@@ -353,8 +577,7 @@ subprocess_fork_exec(PyObject* self, PyObject *args)
353577
p2cread, p2cwrite, c2pread, c2pwrite,
354578
errread, errwrite, errpipe_read, errpipe_write,
355579
close_fds, restore_signals, call_setsid,
356-
num_fds_to_keep, py_fds_to_keep,
357-
preexec_fn, preexec_fn_args_tuple);
580+
py_fds_to_keep, preexec_fn, preexec_fn_args_tuple);
358581
_exit(255);
359582
return NULL; /* Dead code to avoid a potential compiler warning. */
360583
}

configure

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6147,7 +6147,7 @@ poll.h sys/devpoll.h sys/epoll.h sys/poll.h \
61476147
sys/audioio.h sys/xattr.h sys/bsdtty.h sys/event.h sys/file.h sys/loadavg.h \
61486148
sys/lock.h sys/mkdev.h sys/modem.h \
61496149
sys/param.h sys/select.h sys/sendfile.h sys/socket.h sys/statvfs.h \
6150-
sys/stat.h sys/termio.h sys/time.h \
6150+
sys/stat.h sys/syscall.h sys/termio.h sys/time.h \
61516151
sys/times.h sys/types.h sys/uio.h sys/un.h sys/utsname.h sys/wait.h pty.h \
61526152
libutil.h sys/resource.h netpacket/packet.h sysexits.h bluetooth.h \
61536153
bluetooth/bluetooth.h linux/tipc.h spawn.h util.h

configure.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1337,7 +1337,7 @@ poll.h sys/devpoll.h sys/epoll.h sys/poll.h \
13371337
sys/audioio.h sys/xattr.h sys/bsdtty.h sys/event.h sys/file.h sys/loadavg.h \
13381338
sys/lock.h sys/mkdev.h sys/modem.h \
13391339
sys/param.h sys/select.h sys/sendfile.h sys/socket.h sys/statvfs.h \
1340-
sys/stat.h sys/termio.h sys/time.h \
1340+
sys/stat.h sys/syscall.h sys/termio.h sys/time.h \
13411341
sys/times.h sys/types.h sys/uio.h sys/un.h sys/utsname.h sys/wait.h pty.h \
13421342
libutil.h sys/resource.h netpacket/packet.h sysexits.h bluetooth.h \
13431343
bluetooth/bluetooth.h linux/tipc.h spawn.h util.h)

0 commit comments

Comments
 (0)