Changeset View
Standalone View
lib/libc/gen/posix_spawn.c
Show First 20 Lines • Show All 188 Lines • ▼ Show 20 Lines | process_file_actions(const posix_spawn_file_actions_t fa) | ||||
STAILQ_FOREACH(fae, &fa->fa_list, fae_list) { | STAILQ_FOREACH(fae, &fa->fa_list, fae_list) { | ||||
error = process_file_actions_entry(fae); | error = process_file_actions_entry(fae); | ||||
if (error) | if (error) | ||||
return (error); | return (error); | ||||
} | } | ||||
return (0); | return (0); | ||||
} | } | ||||
struct posix_spawn_args { | |||||
const char *path; | |||||
const posix_spawn_file_actions_t *fa; | |||||
const posix_spawnattr_t *sa; | |||||
char * const * argv; | |||||
char * const * envp; | |||||
int use_env_path; | |||||
int error; | |||||
}; | |||||
#if defined(__i386__) || defined(__amd64__) | |||||
#define _NEED_RFORK_THREAD | |||||
#define _RFORK_THREAD_STACK_SIZE 192 | |||||
kib: You do not need both defines.
On amd64, there is so called red zone, an area in the stack… | |||||
Done Inline ActionsAh, that's good to know- 192 was a rough approximation based on measured-once... do_posix_spawn -> process_spawnattr -> (syscalls)/sigismember do_posix_spawn -> process_file_actions -> process_file_actions_entry -> (syscalls) So accounting for 3/4-deep, perhaps 768 as a conservative estimate? Even 1k wouldn't exactly hurt anything, that's 'small potatoes' as far as posix_spawn memory requirements go, I would think. kevans: Ah, that's good to know- 192 was a rough approximation based on measured-once... | |||||
Not Done Inline ActionsVarious exported functions such as _execve() are called, which might cause rtld and/or libthr to be involved and stack usage to go up greatly. In principle, it's possible to do the necessary work with only unexported aliases and/or do preparations like getenv("PATH") in the parent process. jilles: Various exported functions such as _execve() are called, which might cause rtld and/or libthr… | |||||
Done Inline ActionsHmm... at least most of this should be successfully avoiding libthr, as it's "in an undefined state after the vfork" according to it. What specifically would you propose here? Switching _execve over to the unwrapped version and using an even more conservative 1k/2k/4k for the stack size to catch any edge cases? kevans: Hmm... at least most of this should be successfully avoiding libthr, as it's "in an undefined… | |||||
Done Inline ActionsAlternatively, how bad of an idea would it be to fetch the stack pointer, subtract past the red zone, and use that for the rfork_thread stack? This is only x86 that has this problem, so it's really tempting. kevans: Alternatively, how bad of an idea would it be to fetch the stack pointer, subtract past the red… | |||||
Not Done Inline ActionsI would go forward with 2K or even 4K stack and claim that the issue is resolved. kib: I would go forward with 2K or even 4K stack and claim that the issue is resolved. | |||||
Not Done Inline ActionsWhat could be a problem is if the child is killed forcibly while holding a lock, since this would likely cause the parent to deadlock later. This could happen a signal is sent to the process group for which the parent has a handler that does little and the default action is to terminate the process. (Getting this to happen with SIGKILL would be very unlikely since it is hard to send a signal to the child process only.) With rtld, this can happen only with the first posix_spawn call using particular functionality in the process, and libthr seems to be successfully avoided. This seems rare enough not to block this review on it. jilles: What could be a problem is if the child is killed forcibly while holding a lock, since this… | |||||
#endif | |||||
static int | static int | ||||
_posix_spawn_thr(void *data) | |||||
{ | |||||
struct posix_spawn_args *psa; | |||||
char * const *envp; | |||||
psa = data; | |||||
if (psa->sa != NULL) { | |||||
psa->error = process_spawnattr(*psa->sa); | |||||
if (psa->error) | |||||
_exit(127); | |||||
} | |||||
if (psa->fa != NULL) { | |||||
psa->error = process_file_actions(*psa->fa); | |||||
if (psa->error) | |||||
_exit(127); | |||||
} | |||||
envp = psa->envp != NULL ? psa->envp : environ; | |||||
if (psa->use_env_path) | |||||
_execvpe(psa->path, psa->argv, envp); | |||||
else | |||||
_execve(psa->path, psa->argv, envp); | |||||
psa->error = errno; | |||||
Done Inline ActionsDo you mean "must not return"? jilles: Do you mean "must not return"? | |||||
Done Inline ActionsHmmm... I caught that once, but it looks like I had only fixed it locally in the diff that I uploaded (61840)... fixed for next update. kevans: Hmmm... I caught that once, but it looks like I had only fixed it locally in the diff that I… | |||||
/* This is called in such a way that it must not return. */ | |||||
_exit(127); | |||||
} | |||||
static int | |||||
do_posix_spawn(pid_t *pid, const char *path, | do_posix_spawn(pid_t *pid, const char *path, | ||||
const posix_spawn_file_actions_t *fa, | const posix_spawn_file_actions_t *fa, | ||||
const posix_spawnattr_t *sa, | const posix_spawnattr_t *sa, | ||||
char * const argv[], char * const envp[], int use_env_path) | char * const argv[], char * const envp[], int use_env_path) | ||||
{ | { | ||||
struct posix_spawn_args psa; | |||||
pid_t p; | pid_t p; | ||||
volatile int error = 0; | #ifdef _NEED_RFORK_THREAD | ||||
char *stack; | |||||
stack = malloc(_RFORK_THREAD_STACK_SIZE); | |||||
if (stack == NULL) | |||||
return (ENOMEM); | |||||
#endif | |||||
psa.path = path; | |||||
psa.fa = fa; | |||||
psa.sa = sa; | |||||
psa.argv = argv; | |||||
psa.envp = envp; | |||||
psa.use_env_path = use_env_path; | |||||
psa.error = 0; | |||||
/* | |||||
Done Inline ActionsIt might be wise to fall back to vfork() on EINVAL. kib: It might be wise to fall back to vfork() on EINVAL. | |||||
* Passing RFSPAWN to rfork(2) gives us effectively a vfork that drops | |||||
* non-ignored signal handlers. We'll fall back to the slightly less | |||||
Not Done Inline ActionsPerhaps explain that this happens on old kernels. kib: Perhaps explain that this happens on old kernels. | |||||
* ideal vfork(2) if we get an EINVAL from rfork -- this should only | |||||
* happen with newer libc on older kernel that doesn't accept | |||||
Done Inline ActionsI'm surprised that this works at all on (among others) amd64: the child pops the return address and overwrites it with new return addresses for calls it makes; how can the parent's return (the ret instruction in rfork.S from SYS.h) go to the right place? Note that vfork has a hand-written stub that deals with this problem. On architectures such as sparc64, arm and mips where subroutine return addresses are stored in a register and system call stubs do not use the stack, this should not be an issue. jilles: I'm surprised that this works at all on (among others) amd64: the child pops the return address… | |||||
Done Inline ActionsIndeed, perhaps rfork_thread(3) should be used. I hope that it still works. kib: Indeed, perhaps rfork_thread(3) should be used. I hope that it still works. | |||||
Done Inline ActionsI'll double check my test system and rework this kevans: I'll double check my test system and rework this | |||||
Done Inline ActionsUsing rfork_thread(3) to implement something vfork-like does not seem a violation of its deprecation. The deprecation is about using rfork_thread(3) to create threads. rfork_thread(3)'s API is deficient in specifying the stack: it only takes an initial stack pointer, where I would expect a base and a size. Perhaps because of that or because it is deprecated, it is only available on i386 and amd64. Using a separate stack for the child process does avoid GCC bugs like https://gcc.gnu.org/bugzilla/show_bug.cgi?id=21161. jilles: Using rfork_thread(3) to implement something vfork-like does not seem a violation of its… | |||||
Done Inline ActionsBy 'hope' I mean that we do not corrupt userspace registers in the syscall for rfork_thread() to work. Note that fast syscall on amd64 does not restore all registers as an optimization. Fortunately, child return from fork sets PCB_FULL_IRET in cpu_fork(), so all registers are restored. From my reading of the rfork_thread() code, it does not matter for parent. But I did not verified it by a test. kib: By 'hope' I mean that we do not corrupt userspace registers in the syscall for rfork_thread()… | |||||
Done Inline Actionsrfork_thread(RFPROC | RFFDG | RFMEM, ...) appears to work still (on stable/11 amd64 and head amd64). rfork_thread.S only depends on %rsi being preserved in the child, but vfork.S depends on %rsi being preserved in both parent and child. The fast return path of fast_syscall additionally preserves %rdi and %rsi, calling that a bonus in comments. I don't think it is a bonus -- vfork.S depends on it and has no good alternative location to store the parent's return address (thread local storage? yuck). jilles: rfork_thread(RFPROC | RFFDG | RFMEM, ...) appears to work still (on stable/11 amd64 and head… | |||||
Done Inline ActionsFrom my reading, the child dependencies are %rsi, %r12, and %rbx. But they are handler by iret path. kib: From my reading, the child dependencies are %rsi, %r12, and %rbx. But they are handler by iret… | |||||
* RFSPAWN. | |||||
*/ | |||||
#ifdef _NEED_RFORK_THREAD | |||||
Not Done Inline ActionsIn a threaded process, this will call libthr's __thr_sigprocmask which leaves SIGCANCEL/SIGTHR unmasked. Perhaps this should be __sigprocmask which is an unexported weak alias for __sys_sigprocmask. jilles: In a threaded process, this will call libthr's `__thr_sigprocmask` which leaves… | |||||
/* | |||||
* x86 stores the return address on the stack, so rfork(2) cannot work | |||||
* as-is because the child would clobber the return address om the | |||||
* parent. Because of this, we must use rfork_thread instead while | |||||
* almost every other arch stores the return address in a register. | |||||
*/ | |||||
p = rfork_thread(RFSPAWN, stack + _RFORK_THREAD_STACK_SIZE, | |||||
_posix_spawn_thr, &psa); | |||||
free(stack); | |||||
#else | |||||
p = rfork(RFSPAWN); | |||||
if (p == 0) | |||||
/* _posix_spawn_thr does not return */ | |||||
_posix_spawn_thr(&psa); | |||||
#endif | |||||
/* | |||||
* The above block should leave us in a state where we've either | |||||
* succeeded and we're ready to process the results, or we need to | |||||
* fallback to vfork() if the kernel didn't like RFSPAWN. | |||||
*/ | |||||
if (p == -1 && errno == EINVAL) { | |||||
p = vfork(); | p = vfork(); | ||||
switch (p) { | if (p == 0) | ||||
case -1: | /* _posix_spawn_thr does not return */ | ||||
return (errno); | _posix_spawn_thr(&psa); | ||||
case 0: | |||||
if (sa != NULL) { | |||||
error = process_spawnattr(*sa); | |||||
if (error) | |||||
_exit(127); | |||||
} | } | ||||
if (fa != NULL) { | if (p == -1) | ||||
error = process_file_actions(*fa); | return (errno); | ||||
if (error) | if (psa.error != 0) | ||||
_exit(127); | /* Failed; ready to reap */ | ||||
} | |||||
if (use_env_path) | |||||
_execvpe(path, argv, envp != NULL ? envp : environ); | |||||
else | |||||
_execve(path, argv, envp != NULL ? envp : environ); | |||||
error = errno; | |||||
_exit(127); | |||||
default: | |||||
if (error != 0) | |||||
_waitpid(p, NULL, WNOHANG); | _waitpid(p, NULL, WNOHANG); | ||||
else if (pid != NULL) | else if (pid != NULL) | ||||
/* exec succeeded */ | |||||
*pid = p; | *pid = p; | ||||
return (error); | return (psa.error); | ||||
} | |||||
} | } | ||||
int | int | ||||
posix_spawn(pid_t *pid, const char *path, | posix_spawn(pid_t *pid, const char *path, | ||||
const posix_spawn_file_actions_t *fa, | const posix_spawn_file_actions_t *fa, | ||||
const posix_spawnattr_t *sa, | const posix_spawnattr_t *sa, | ||||
char * const argv[], char * const envp[]) | char * const argv[], char * const envp[]) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 242 Lines • Show Last 20 Lines |
You do not need both defines.
On amd64, there is so called red zone, an area in the stack below %rsp that compiler is free to use. Red zone size is 128 bytes, which leaves 192 - 128 = 64 bytes to use until formal overflow. IMO it is too optimistic.