|
8 | 8 | #include <sys/random.h>
|
9 | 9 | #endif
|
10 | 10 | #include <sys/syscall.h>
|
| 11 | +#include <sys/wait.h> |
11 | 12 | #include <limits.h>
|
12 | 13 | #include <errno.h>
|
13 | 14 | #include <dirent.h>
|
|
30 | 31 | #ifndef SYS_pidfd_send_signal
|
31 | 32 | # define SYS_pidfd_send_signal 424
|
32 | 33 | #endif
|
| 34 | +#ifndef SYS_pidfd_open |
| 35 | +# define SYS_pidfd_open 434 |
| 36 | +#endif |
33 | 37 | #ifndef SYS_clone3
|
34 | 38 | # define SYS_clone3 435
|
35 | 39 | # define CLONE_PIDFD 0x00001000
|
@@ -148,24 +152,62 @@ CAMLprim value caml_eio_pidfd_send_signal(value v_pidfd, value v_signal) {
|
148 | 152 | CAMLreturn(Val_unit);
|
149 | 153 | }
|
150 | 154 |
|
| 155 | +static int pidfd_open(pid_t pid, unsigned int flags) { |
| 156 | + return syscall(SYS_pidfd_open, pid, flags); |
| 157 | +} |
| 158 | + |
| 159 | +/* Like clone3, but falls back to fork if not supported. |
| 160 | + Also, raises exceptions rather then returning an error. */ |
| 161 | +static pid_t clone3_with_fallback(struct clone_args *cl_args) { |
| 162 | + int *pidfd = (int *)(uintptr_t) cl_args->pidfd; |
| 163 | + pid_t child_pid = syscall(SYS_clone3, cl_args, sizeof(struct clone_args)); |
| 164 | + |
| 165 | + if (child_pid >= 0) |
| 166 | + return child_pid; /* Success! */ |
| 167 | + |
| 168 | + if (errno != ENOSYS && errno != EPERM) { |
| 169 | + uerror("clone3", Nothing); /* Unknown error */ |
| 170 | + } |
| 171 | + |
| 172 | + /* Probably Docker's security policy is blocking clone3. Fall back to forking. */ |
| 173 | + |
| 174 | + child_pid = fork(); |
| 175 | + if (child_pid == 0) { |
| 176 | + /* We are the child */ |
| 177 | + return 0; |
| 178 | + } else if (child_pid < 0) { |
| 179 | + uerror("fork", Nothing); |
| 180 | + } |
| 181 | + |
| 182 | + *pidfd = pidfd_open(child_pid, 0); /* Is automatically close-on-exec */ |
| 183 | + if (*pidfd < 0) { |
| 184 | + int e = errno; |
| 185 | + kill(child_pid, SIGKILL); |
| 186 | + waitpid(child_pid, NULL, 0); |
| 187 | + errno = e; |
| 188 | + uerror("pidfd_open", Nothing); |
| 189 | + } |
| 190 | + |
| 191 | + return child_pid; |
| 192 | +} |
| 193 | + |
151 | 194 | CAMLprim value caml_eio_clone3(value v_errors, value v_actions) {
|
152 | 195 | CAMLparam1(v_actions);
|
153 | 196 | CAMLlocal1(v_result);
|
154 | 197 | pid_t child_pid;
|
155 | 198 | int pidfd = -1; /* Is automatically close-on-exec */
|
156 | 199 | struct clone_args cl_args = {
|
157 | 200 | .flags = CLONE_PIDFD,
|
158 |
| - .pidfd = (uint64_t) &pidfd, |
| 201 | + .pidfd = (uintptr_t) &pidfd, |
159 | 202 | .exit_signal = SIGCHLD, /* Needed for wait4 to work if we exit before exec */
|
160 |
| - .stack = (uint64_t) NULL, /* Use copy-on-write parent stack */ |
| 203 | + .stack = (uintptr_t) NULL, /* Use copy-on-write parent stack */ |
161 | 204 | .stack_size = 0,
|
162 | 205 | };
|
163 | 206 |
|
164 |
| - child_pid = syscall(SYS_clone3, &cl_args, sizeof(struct clone_args)); |
| 207 | + child_pid = clone3_with_fallback(&cl_args); |
165 | 208 | if (child_pid == 0) {
|
| 209 | + /* Run child actions (doesn't return) */ |
166 | 210 | eio_unix_run_fork_actions(Int_val(v_errors), v_actions);
|
167 |
| - } else if (child_pid < 0) { |
168 |
| - uerror("clone3", Nothing); |
169 | 211 | }
|
170 | 212 |
|
171 | 213 | v_result = caml_alloc_tuple(2);
|
|
0 commit comments