Index: sys/amd64/linux32/linux32_machdep.c =================================================================== --- sys/amd64/linux32/linux32_machdep.c +++ sys/amd64/linux32/linux32_machdep.c @@ -150,15 +150,7 @@ args->argp, args->envp); free(path, M_TEMP); if (error == 0) - error = kern_execve(td, &eargs, NULL); - if (error == 0) - /* Linux process can execute FreeBSD one, do not attempt - * to create emuldata for such process using - * linux_proc_init, this leads to a panic on KASSERT - * because such process has p->p_emuldata == NULL. - */ - if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX) - error = linux_proc_init(td, 0, 0); + error = linux_common_execve(td, &eargs); return (error); } @@ -456,8 +448,14 @@ linux_set_upcall_kse(struct thread *td, register_t stack) { - td->td_frame->tf_rsp = stack; + if (stack) + td->td_frame->tf_rsp = stack; + /* + * The newly created Linux thread returns + * to the user space by the same path that a parent do. + */ + td->td_frame->tf_rax = 0; return (0); } Index: sys/amd64/linux32/linux32_sysvec.c =================================================================== --- sys/amd64/linux32/linux32_sysvec.c +++ sys/amd64/linux32/linux32_sysvec.c @@ -130,6 +130,7 @@ static eventhandler_tag linux_exit_tag; static eventhandler_tag linux_exec_tag; +static eventhandler_tag linux_thread_dtor_tag; /* * Linux syscalls return negative errno's, we do positive and map them @@ -1125,14 +1126,14 @@ linux_ioctl_register_handler(*lihp); SET_FOREACH(ldhp, linux_device_handler_set) linux_device_register_handler(*ldhp); - mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); - sx_init(&emul_shared_lock, "emuldata->shared lock"); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, NULL, 1000); linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, NULL, 1000); + linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, + linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); linux_szplatform = roundup(strlen(linux_platform) + 1, sizeof(char *)); linux_osd_jail_register(); @@ -1158,11 +1159,10 @@ linux_ioctl_unregister_handler(*lihp); SET_FOREACH(ldhp, linux_device_handler_set) linux_device_unregister_handler(*ldhp); - mtx_destroy(&emul_lock); - sx_destroy(&emul_shared_lock); mtx_destroy(&futex_mtx); EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); + EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); linux_osd_jail_deregister(); if (bootverbose) printf("Linux ELF exec handler removed\n"); Index: sys/compat/linux/check_error.d =================================================================== --- sys/compat/linux/check_error.d +++ sys/compat/linux/check_error.d @@ -36,8 +36,8 @@ */ linuxulator*:dummy::not_implemented, -linuxulator*:emul:proc_exit:child_clear_tid_error, -linuxulator*:emul:proc_exit:futex_failed, +linuxulator*:emul:linux_thread_detach:child_clear_tid_error, +linuxulator*:emul:linux_thread_detach:futex_failed, linuxulator*:emul:linux_schedtail:copyout_error, linuxulator*:futex:futex_get:error, linuxulator*:futex:futex_sleep:requeue_error, Index: sys/compat/linux/check_internal_locks.d =================================================================== --- sys/compat/linux/check_internal_locks.d +++ sys/compat/linux/check_internal_locks.d @@ -41,14 +41,9 @@ BEGIN { - check["emul_lock"] = 0; - check["emul_shared_rlock"] = 0; - check["emul_shared_wlock"] = 0; check["futex_mtx"] = 0; } -linuxulator*:locks:emul_lock:locked, -linuxulator*:locks:emul_shared_wlock:locked, linuxulator*:locks:futex_mtx:locked /check[probefunc] > 0/ { @@ -57,9 +52,6 @@ stack(); } -linuxulator*:locks:emul_lock:locked, -linuxulator*:locks:emul_shared_rlock:locked, -linuxulator*:locks:emul_shared_wlock:locked, linuxulator*:locks:futex_mtx:locked { ++check[probefunc]; @@ -69,9 +61,6 @@ spec[probefunc] = speculation(); } -linuxulator*:locks:emul_lock:unlock, -linuxulator*:locks:emul_shared_rlock:unlock, -linuxulator*:locks:emul_shared_wlock:unlock, linuxulator*:locks:futex_mtx:unlock /check[probefunc] == 0/ { @@ -82,9 +71,6 @@ stack(); } -linuxulator*:locks:emul_lock:unlock, -linuxulator*:locks:emul_shared_rlock:unlock, -linuxulator*:locks:emul_shared_wlock:unlock, linuxulator*:locks:futex_mtx:unlock { discard(spec[probefunc]); @@ -95,27 +81,6 @@ /* Timeout handling */ tick-10s -/spec["emul_lock"] != 0 && timestamp - ts["emul_lock"] >= 9999999000/ -{ - commit(spec["emul_lock"]); - spec["emul_lock"] = 0; -} - -tick-10s -/spec["emul_shared_wlock"] != 0 && timestamp - ts["emul_shared_wlock"] >= 9999999000/ -{ - commit(spec["emul_shared_wlock"]); - spec["emul_shared_wlock"] = 0; -} - -tick-10s -/spec["emul_shared_rlock"] != 0 && timestamp - ts["emul_shared_rlock"] >= 9999999000/ -{ - commit(spec["emul_shared_rlock"]); - spec["emul_shared_rlock"] = 0; -} - -tick-10s /spec["futex_mtx"] != 0 && timestamp - ts["futex_mtx"] >= 9999999000/ { commit(spec["futex_mtx"]); Index: sys/compat/linux/linux_emul.h =================================================================== --- sys/compat/linux/linux_emul.h +++ sys/compat/linux/linux_emul.h @@ -1,5 +1,6 @@ /*- * Copyright (c) 2006 Roman Divacky + * Copyright (c) 2013 Dmitry Chagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,91 +32,34 @@ #ifndef _LINUX_EMUL_H_ #define _LINUX_EMUL_H_ -#define EMUL_SHARED_HASXSTAT 0x01 - -struct linux_emuldata_shared { - int refs; - int flags; - int xstat; - pid_t group_pid; - - LIST_HEAD(, linux_emuldata) threads; /* head of list of linux threads */ -}; - /* * modeled after similar structure in NetBSD * this will be extended as we need more functionality */ struct linux_emuldata { - pid_t pid; - int *child_set_tid; /* in clone(): Child's TID to set on clone */ int *child_clear_tid;/* in clone(): Child's TID to clear on exit */ - struct linux_emuldata_shared *shared; - int pdeath_signal; /* parent death signal */ int flags; /* different emuldata flags */ + int em_tid; /* thread id */ struct linux_robust_list_head *robust_futexes; - - LIST_ENTRY(linux_emuldata) threads; /* list of linux threads */ }; -struct linux_emuldata *em_find(struct proc *, int locked); - -/* - * DTrace probes for locks should be fired after locking and before releasing - * to prevent races (to provide data/function stability in dtrace, see the - * output of "dtrace -v ..." and the corresponding dtrace docs). - */ -#define EMUL_LOCK(l) do { \ - mtx_lock(l); \ - LIN_SDT_PROBE1(locks, emul_lock, \ - locked, l); \ - } while (0) -#define EMUL_UNLOCK(l) do { \ - LIN_SDT_PROBE1(locks, emul_lock, \ - unlock, l); \ - mtx_unlock(l); \ - } while (0) - -#define EMUL_SHARED_RLOCK(l) do { \ - sx_slock(l); \ - LIN_SDT_PROBE1(locks, emul_shared_rlock, \ - locked, l); \ - } while (0) -#define EMUL_SHARED_RUNLOCK(l) do { \ - LIN_SDT_PROBE1(locks, emul_shared_rlock, \ - unlock, l); \ - sx_sunlock(l); \ - } while (0) -#define EMUL_SHARED_WLOCK(l) do { \ - sx_xlock(l); \ - LIN_SDT_PROBE1(locks, emul_shared_wlock, \ - locked, l); \ - } while (0) -#define EMUL_SHARED_WUNLOCK(l) do { \ - LIN_SDT_PROBE1(locks, emul_shared_wlock, \ - unlock, l); \ - sx_xunlock(l); \ - } while (0) - -/* for em_find use */ -#define EMUL_DOLOCK 1 -#define EMUL_DONTLOCK 0 +struct linux_emuldata *em_find(struct thread *); /* emuldata flags */ #define LINUX_XDEPR_REQUEUEOP 0x00000001 /* uses deprecated futex REQUEUE op*/ +#define LINUX_THREAD_DETACHED 0x00000002 -int linux_proc_init(struct thread *, pid_t, int); +void linux_proc_init(struct thread *, struct thread *, int); void linux_proc_exit(void *, struct proc *); void linux_schedtail(struct thread *); void linux_proc_exec(void *, struct proc *, struct image_params *); -void linux_kill_threads(struct thread *, int); - -extern struct sx emul_shared_lock; -extern struct mtx emul_lock; +void linux_thread_dtor(void *arg __unused, struct thread *); +void linux_thread_detach(struct thread *); +int linux_common_execve(struct thread *, struct image_args *); #endif /* !_LINUX_EMUL_H_ */ Index: sys/compat/linux/linux_emul.c =================================================================== --- sys/compat/linux/linux_emul.c +++ sys/compat/linux/linux_emul.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2006 Roman Divacky + * Copyright (c) 2013 Dmitry Chagin * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -58,6 +60,7 @@ #include #include #include +#include /** * Special DTrace provider for the linuxulator. @@ -73,33 +76,21 @@ LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); /** - * Special DTrace module "locks", it covers some linuxulator internal - * locks. - */ -LIN_SDT_PROBE_DEFINE1(locks, emul_lock, locked, "struct mtx *"); -LIN_SDT_PROBE_DEFINE1(locks, emul_lock, unlock, "struct mtx *"); -LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, locked, "struct sx *"); -LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, unlock, "struct sx *"); -LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, locked, "struct sx *"); -LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, unlock, "struct sx *"); - -/** * DTrace probes in this module. */ -LIN_SDT_PROBE_DEFINE2(emul, em_find, entry, "struct proc *", "int"); +LIN_SDT_PROBE_DEFINE1(emul, em_find, entry, "struct thread *"); LIN_SDT_PROBE_DEFINE0(emul, em_find, return); -LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *", "pid_t", - "int"); +LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *", + "struct thread *", "int"); LIN_SDT_PROBE_DEFINE0(emul, proc_init, create_thread); LIN_SDT_PROBE_DEFINE0(emul, proc_init, fork); LIN_SDT_PROBE_DEFINE0(emul, proc_init, exec); LIN_SDT_PROBE_DEFINE0(emul, proc_init, return); LIN_SDT_PROBE_DEFINE1(emul, proc_exit, entry, "struct proc *"); -LIN_SDT_PROBE_DEFINE0(emul, proc_exit, futex_failed); -LIN_SDT_PROBE_DEFINE3(emul, proc_exit, reparent, "pid_t", "pid_t", - "struct proc *"); -LIN_SDT_PROBE_DEFINE1(emul, proc_exit, child_clear_tid_error, "int"); -LIN_SDT_PROBE_DEFINE0(emul, proc_exit, return); +LIN_SDT_PROBE_DEFINE1(emul, linux_thread_detach, entry, "struct thread *"); +LIN_SDT_PROBE_DEFINE0(emul, linux_thread_detach, futex_failed); +LIN_SDT_PROBE_DEFINE1(emul, linux_thread_detach, child_clear_tid_error, "int"); +LIN_SDT_PROBE_DEFINE0(emul, linux_thread_detach, return); LIN_SDT_PROBE_DEFINE2(emul, proc_exec, entry, "struct proc *", "struct image_params *"); LIN_SDT_PROBE_DEFINE0(emul, proc_exec, return); @@ -108,284 +99,211 @@ LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, return); LIN_SDT_PROBE_DEFINE1(emul, linux_set_tid_address, entry, "int *"); LIN_SDT_PROBE_DEFINE0(emul, linux_set_tid_address, return); -LIN_SDT_PROBE_DEFINE2(emul, linux_kill_threads, entry, "struct thread *", - "int"); -LIN_SDT_PROBE_DEFINE1(emul, linux_kill_threads, kill, "pid_t"); -LIN_SDT_PROBE_DEFINE0(emul, linux_kill_threads, return); - -struct sx emul_shared_lock; -struct mtx emul_lock; -/* this returns locked reference to the emuldata entry (if found) */ +/* + * This returns reference to the emuldata entry (if found) + * + * Hold PROC_LOCK when referencing emuldata from other threads. + */ struct linux_emuldata * -em_find(struct proc *p, int locked) +em_find(struct thread *td) { struct linux_emuldata *em; - LIN_SDT_PROBE2(emul, em_find, entry, p, locked); - - if (locked == EMUL_DOLOCK) - EMUL_LOCK(&emul_lock); + LIN_SDT_PROBE1(emul, em_find, entry, td); - em = p->p_emuldata; - - if (em == NULL && locked == EMUL_DOLOCK) - EMUL_UNLOCK(&emul_lock); + em = td->td_emuldata; LIN_SDT_PROBE1(emul, em_find, return, em); + return (em); } -int -linux_proc_init(struct thread *td, pid_t child, int flags) +void +linux_proc_init(struct thread *td, struct thread *newtd, int flags) { - struct linux_emuldata *em, *p_em; - struct proc *p; + struct linux_emuldata *em; - LIN_SDT_PROBE3(emul, proc_init, entry, td, child, flags); + LIN_SDT_PROBE3(emul, proc_init, entry, td, newtd, flags); - if (child != 0) { - /* fork or create a thread */ - em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO); - em->pid = child; + if (newtd != NULL) { + /* non-exec call */ + em = malloc(sizeof(*em), M_TEMP, M_WAITOK | M_ZERO); em->pdeath_signal = 0; em->flags = 0; em->robust_futexes = NULL; if (flags & LINUX_CLONE_THREAD) { - /* handled later in the code */ LIN_SDT_PROBE0(emul, proc_init, create_thread); - } else { - struct linux_emuldata_shared *s; + em->em_tid = newtd->td_tid; + } else { LIN_SDT_PROBE0(emul, proc_init, fork); - s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO); - s->refs = 1; - s->group_pid = child; - - LIST_INIT(&s->threads); - em->shared = s; + em->em_tid = newtd->td_proc->p_pid; } + newtd->td_emuldata = em; } else { /* exec */ LIN_SDT_PROBE0(emul, proc_init, exec); /* lookup the old one */ - em = em_find(td->td_proc, EMUL_DOLOCK); + em = em_find(td); KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n")); + + em->em_tid = td->td_proc->p_pid; } em->child_clear_tid = NULL; em->child_set_tid = NULL; - /* - * allocate the shared struct only in clone()/fork cases in the case - * of clone() td = calling proc and child = pid of the newly created - * proc - */ - if (child != 0) { - if (flags & LINUX_CLONE_THREAD) { - /* lookup the parent */ - /* - * we dont have to lock the p_em because - * its waiting for us in linux_clone so - * there is no chance of it changing the - * p_em->shared address - */ - p_em = em_find(td->td_proc, EMUL_DONTLOCK); - KASSERT(p_em != NULL, ("proc_init: parent emuldata not found for CLONE_THREAD\n")); - em->shared = p_em->shared; - EMUL_SHARED_WLOCK(&emul_shared_lock); - em->shared->refs++; - EMUL_SHARED_WUNLOCK(&emul_shared_lock); - } else { - /* - * handled earlier to avoid malloc(M_WAITOK) with - * rwlock held - */ - } + LIN_SDT_PROBE0(emul, proc_init, return); +} - EMUL_SHARED_WLOCK(&emul_shared_lock); - LIST_INSERT_HEAD(&em->shared->threads, em, threads); - EMUL_SHARED_WUNLOCK(&emul_shared_lock); +void +linux_proc_exit(void *arg __unused, struct proc *p) +{ + struct thread *td = curthread; - p = pfind(child); - KASSERT(p != NULL, ("process not found in proc_init\n")); - p->p_emuldata = em; + if (__predict_false(SV_CURPROC_ABI() != SV_ABI_LINUX)) { + LIN_SDT_PROBE1(emul, proc_exit, entry, p); + (p->p_sysent->sv_thread_detach)(td); + } +} + +int +linux_common_execve(struct thread *td, struct image_args *eargs) +{ + struct linux_emuldata *em; + struct proc *p; + int error; + + p = td->td_proc; + (p->p_sysent->sv_thread_detach)(td); + + error = kern_execve(td, eargs, NULL); + + if (p->p_flag & P_HADTHREADS) { + PROC_LOCK(p); + /* + * Unlike FreeBSD anyway upgrade to SINGLE_EXIT state + * to force other threads to suicide. + */ + thread_single(p, SINGLE_EXIT); PROC_UNLOCK(p); - } else - EMUL_UNLOCK(&emul_lock); + } + if (error) + return (error); - LIN_SDT_PROBE0(emul, proc_init, return); + /* + * In a case of transition from Linux binary execing to + * FreeBSD binary we destroy linux emuldata thread entry. + */ + if (SV_CURPROC_ABI() != SV_ABI_LINUX) { + PROC_LOCK(p); + em = em_find(td); + KASSERT(em != NULL, ("proc_exec: emuldata not found.\n")); + td->td_emuldata = NULL; + PROC_UNLOCK(p); + + free(em, M_TEMP); + } return (0); } -void -linux_proc_exit(void *arg __unused, struct proc *p) +void +linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp) { - struct linux_emuldata *em; - int error, shared_flags, shared_xstat; - struct thread *td = FIRST_THREAD_IN_PROC(p); - int *child_clear_tid; - struct proc *q, *nq; - - if (__predict_true(p->p_sysent != &elf_linux_sysvec)) - return; + struct thread *td = curthread; - LIN_SDT_PROBE1(emul, proc_exit, entry, p); + /* + * In a case of execing to linux binary we create linux + * emuldata thread entry. + */ + if (__predict_false((imgp->sysent->sv_flags & SV_ABI_MASK) == + SV_ABI_LINUX)) { + LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp); + if (SV_PROC_ABI(p) == SV_ABI_LINUX) + linux_proc_init(td, NULL, 0); + else + linux_proc_init(td, td, 0); - release_futexes(p); + LIN_SDT_PROBE0(emul, proc_exec, return); + } +} - /* find the emuldata */ - em = em_find(p, EMUL_DOLOCK); +void +linux_thread_detach(struct thread *td) +{ + struct linux_sys_futex_args cup; + struct linux_emuldata *em; + int *child_clear_tid; + int null = 0; + int error; - KASSERT(em != NULL, ("proc_exit: emuldata not found.\n")); + LIN_SDT_PROBE1(emul, linux_thread_detach, entry, td); - /* reparent all procs that are not a thread leader to initproc */ - if (em->shared->group_pid != p->p_pid) { - LIN_SDT_PROBE3(emul, proc_exit, reparent, - em->shared->group_pid, p->p_pid, p); + em = em_find(td); + KASSERT(em != NULL, ("thread_detach: emuldata not found.\n")); - child_clear_tid = em->child_clear_tid; - EMUL_UNLOCK(&emul_lock); - sx_xlock(&proctree_lock); - wakeup(initproc); - PROC_LOCK(p); - proc_reparent(p, initproc); - p->p_sigparent = SIGCHLD; - PROC_UNLOCK(p); - sx_xunlock(&proctree_lock); - } else { - child_clear_tid = em->child_clear_tid; - EMUL_UNLOCK(&emul_lock); + if (em->flags & LINUX_THREAD_DETACHED) { + LINUX_CTR1(exit, "thread(%d) already detached", em->em_tid); + return; } - EMUL_SHARED_WLOCK(&emul_shared_lock); - shared_flags = em->shared->flags; - shared_xstat = em->shared->xstat; - LIST_REMOVE(em, threads); + em->flags |= LINUX_THREAD_DETACHED; + LINUX_CTR1(exit, "thread detach(%d)", em->em_tid); - em->shared->refs--; - if (em->shared->refs == 0) { - EMUL_SHARED_WUNLOCK(&emul_shared_lock); - free(em->shared, M_LINUX); - } else - EMUL_SHARED_WUNLOCK(&emul_shared_lock); + release_futexes(td, em); - if ((shared_flags & EMUL_SHARED_HASXSTAT) != 0) - p->p_xstat = shared_xstat; + child_clear_tid = em->child_clear_tid; if (child_clear_tid != NULL) { - struct linux_sys_futex_args cup; - int null = 0; + LINUX_CTR2(exit, "thread detach(%d) %p", + em->em_tid, child_clear_tid); + error = copyout(&null, child_clear_tid, sizeof(null)); if (error) { - LIN_SDT_PROBE1(emul, proc_exit, + LIN_SDT_PROBE1(emul, linux_thread_detach, child_clear_tid_error, error); - free(em, M_LINUX); - - LIN_SDT_PROBE0(emul, proc_exit, return); + LIN_SDT_PROBE0(emul, linux_thread_detach, return); return; } - /* futexes stuff */ cup.uaddr = child_clear_tid; cup.op = LINUX_FUTEX_WAKE; cup.val = 0x7fffffff; /* Awake everyone */ cup.timeout = NULL; cup.uaddr2 = NULL; cup.val3 = 0; - error = linux_sys_futex(FIRST_THREAD_IN_PROC(p), &cup); + error = linux_sys_futex(td, &cup); /* * this cannot happen at the moment and if this happens it * probably means there is a user space bug */ if (error) { - LIN_SDT_PROBE0(emul, proc_exit, futex_failed); - printf(LMSG("futex stuff in proc_exit failed.\n")); + LIN_SDT_PROBE0(emul, linux_thread_detach, futex_failed); + printf(LMSG("futex stuff in thread_detach failed.\n")); } } - /* clean the stuff up */ - free(em, M_LINUX); - - /* this is a little weird but rewritten from exit1() */ - sx_xlock(&proctree_lock); - q = LIST_FIRST(&p->p_children); - for (; q != NULL; q = nq) { - nq = LIST_NEXT(q, p_sibling); - if (q->p_flag & P_WEXIT) - continue; - if (__predict_false(q->p_sysent != &elf_linux_sysvec)) - continue; - em = em_find(q, EMUL_DOLOCK); - KASSERT(em != NULL, ("linux_reparent: emuldata not found: %i\n", q->p_pid)); - PROC_LOCK(q); - if ((q->p_flag & P_WEXIT) == 0 && em->pdeath_signal != 0) { - kern_psignal(q, em->pdeath_signal); - } - PROC_UNLOCK(q); - EMUL_UNLOCK(&emul_lock); - } - sx_xunlock(&proctree_lock); - - LIN_SDT_PROBE0(emul, proc_exit, return); + LIN_SDT_PROBE0(emul, linux_thread_detach, return); } -/* - * This is used in a case of transition from FreeBSD binary execing to linux binary - * in this case we create linux emuldata proc entry with the pid of the currently running - * process. - */ -void -linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp) +void +linux_thread_dtor(void *arg __unused, struct thread *td) { - if (__predict_false(imgp->sysent == &elf_linux_sysvec)) { - LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp); - } - if (__predict_false(imgp->sysent == &elf_linux_sysvec - && p->p_sysent != &elf_linux_sysvec)) - linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0); - if (__predict_false((p->p_sysent->sv_flags & SV_ABI_MASK) == - SV_ABI_LINUX)) - /* Kill threads regardless of imgp->sysent value */ - linux_kill_threads(FIRST_THREAD_IN_PROC(p), SIGKILL); - if (__predict_false(imgp->sysent != &elf_linux_sysvec - && p->p_sysent == &elf_linux_sysvec)) { - struct linux_emuldata *em; - - /* - * XXX:There's a race because here we assign p->p_emuldata NULL - * but the process is still counted as linux one for a short - * time so some other process might reference it and try to - * access its p->p_emuldata and panicing on a NULL reference. - */ - em = em_find(p, EMUL_DONTLOCK); - - KASSERT(em != NULL, ("proc_exec: emuldata not found.\n")); - - EMUL_SHARED_WLOCK(&emul_shared_lock); - LIST_REMOVE(em, threads); - - PROC_LOCK(p); - p->p_emuldata = NULL; - PROC_UNLOCK(p); + struct linux_emuldata *em; - em->shared->refs--; - if (em->shared->refs == 0) { - EMUL_SHARED_WUNLOCK(&emul_shared_lock); - free(em->shared, M_LINUX); - } else - EMUL_SHARED_WUNLOCK(&emul_shared_lock); + em = em_find(td); + if (em == NULL) + return; + td->td_emuldata = NULL; - free(em, M_LINUX); - } + LINUX_CTR1(exit, "thread dtor(%d)", em->em_tid); - if (__predict_false(imgp->sysent == &elf_linux_sysvec)) { - LIN_SDT_PROBE0(emul, proc_exec, return); - } + free(em, M_TEMP); } void @@ -396,30 +314,28 @@ int error = 0; int *child_set_tid; - p = td->td_proc; - - LIN_SDT_PROBE1(emul, linux_schedtail, entry, p); + LIN_SDT_PROBE1(emul, linux_schedtail, entry, td); - /* find the emuldata */ - em = em_find(p, EMUL_DOLOCK); + p = td->td_proc; + em = em_find(td); KASSERT(em != NULL, ("linux_schedtail: emuldata not found.\n")); child_set_tid = em->child_set_tid; - EMUL_UNLOCK(&emul_lock); if (child_set_tid != NULL) { - error = copyout(&p->p_pid, (int *)child_set_tid, - sizeof(p->p_pid)); + error = copyout(&em->em_tid, (int *)child_set_tid, + sizeof(em->em_tid)); + LINUX_CTR4(clone, "schedtail(%d) %p stored %d error %d", + td->td_tid, child_set_tid, em->em_tid, error); if (error != 0) { LIN_SDT_PROBE1(emul, linux_schedtail, copyout_error, error); } - } + } else + LINUX_CTR1(clone, "schedtail(%d)", em->em_tid); LIN_SDT_PROBE0(emul, linux_schedtail, return); - - return; } int @@ -429,45 +345,16 @@ LIN_SDT_PROBE1(emul, linux_set_tid_address, entry, args->tidptr); - /* find the emuldata */ - em = em_find(td->td_proc, EMUL_DOLOCK); - + em = em_find(td); KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n")); em->child_clear_tid = args->tidptr; - td->td_retval[0] = td->td_proc->p_pid; - EMUL_UNLOCK(&emul_lock); + td->td_retval[0] = em->em_tid; - LIN_SDT_PROBE0(emul, linux_set_tid_address, return); - return 0; -} - -void -linux_kill_threads(struct thread *td, int sig) -{ - struct linux_emuldata *em, *td_em, *tmp_em; - struct proc *sp; - - LIN_SDT_PROBE2(emul, linux_kill_threads, entry, td, sig); - - td_em = em_find(td->td_proc, EMUL_DONTLOCK); + LINUX_CTR3(set_tid_address, "tidptr(%d) %p, returns %d", + em->em_tid, args->tidptr, td->td_retval[0]); - KASSERT(td_em != NULL, ("linux_kill_threads: emuldata not found.\n")); - - EMUL_SHARED_RLOCK(&emul_shared_lock); - LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) { - if (em->pid == td_em->pid) - continue; - - sp = pfind(em->pid); - if ((sp->p_flag & P_WEXIT) == 0) - kern_psignal(sp, sig); - PROC_UNLOCK(sp); - - LIN_SDT_PROBE1(emul, linux_kill_threads, kill, em->pid); - } - EMUL_SHARED_RUNLOCK(&emul_shared_lock); - - LIN_SDT_PROBE0(emul, linux_kill_threads, return); + LIN_SDT_PROBE0(emul, linux_set_tid_address, return); + return (0); } Index: sys/compat/linux/linux_fork.c =================================================================== --- sys/compat/linux/linux_fork.c +++ sys/compat/linux/linux_fork.c @@ -34,15 +34,21 @@ #include #include #include +#include #include #include #include +#include #include -#include +#include #include #include #include +#include +#include +#include + #ifdef COMPAT_LINUX32 #include #include @@ -50,18 +56,10 @@ #include #include #endif -#include #include #include #include - -/* DTrace init */ -LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); - -/* Linuxulator-global DTrace probes */ -LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); -LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); - +#include int linux_fork(struct thread *td, struct linux_fork_args *args) @@ -79,14 +77,11 @@ != 0) return (error); - td->td_retval[0] = p2->p_pid; - td->td_retval[1] = 0; + td2 = FIRST_THREAD_IN_PROC(p2); - error = linux_proc_init(td, td->td_retval[0], 0); - if (error) - return (error); + linux_proc_init(td, td2, 0); - td2 = FIRST_THREAD_IN_PROC(p2); + td->td_retval[0] = p2->p_pid; /* * Make this runnable after we are finished with it. @@ -116,17 +111,16 @@ NULL, 0)) != 0) return (error); - td->td_retval[0] = p2->p_pid; - error = linux_proc_init(td, td->td_retval[0], 0); - if (error) - return (error); + td2 = FIRST_THREAD_IN_PROC(p2); + + linux_proc_init(td, td2, 0); PROC_LOCK(p2); p2->p_flag |= P_PPWAIT; PROC_UNLOCK(p2); - td2 = FIRST_THREAD_IN_PROC(p2); + td->td_retval[0] = p2->p_pid; /* * Make this runnable after we are finished with it. @@ -145,8 +139,8 @@ return (0); } -int -linux_clone(struct thread *td, struct linux_clone_args *args) +static int +linux_clone_proc(struct thread *td, struct linux_clone_args *args) { int error, ff = RFPROC | RFSTOPPED; struct proc *p2; @@ -183,22 +177,6 @@ if (!(args->flags & (LINUX_CLONE_FILES | LINUX_CLONE_FS))) ff |= RFFDG; - /* - * Attempt to detect when linux_clone(2) is used for creating - * kernel threads. Unfortunately despite the existence of the - * CLONE_THREAD flag, version of linuxthreads package used in - * most popular distros as of beginning of 2005 doesn't make - * any use of it. Therefore, this detection relies on - * empirical observation that linuxthreads sets certain - * combination of flags, so that we can make more or less - * precise detection and notify the FreeBSD kernel that several - * processes are in fact part of the same threading group, so - * that special treatment is necessary for signal delivery - * between those processes and fd locking. - */ - if ((args->flags & 0xffffff00) == LINUX_THREADING_FLAGS) - ff |= RFTHREAD; - if (args->flags & LINUX_CLONE_PARENT_SETTID) if (args->parent_tidptr == NULL) return (EINVAL); @@ -207,29 +185,13 @@ if (error) return (error); - if (args->flags & (LINUX_CLONE_PARENT | LINUX_CLONE_THREAD)) { - sx_xlock(&proctree_lock); - PROC_LOCK(p2); - proc_reparent(p2, td->td_proc->p_pptr); - PROC_UNLOCK(p2); - sx_xunlock(&proctree_lock); - } + td2 = FIRST_THREAD_IN_PROC(p2); /* create the emuldata */ - error = linux_proc_init(td, p2->p_pid, args->flags); - /* reference it - no need to check this */ - em = em_find(p2, EMUL_DOLOCK); - KASSERT(em != NULL, ("clone: emuldata not found.")); - /* and adjust it */ - - if (args->flags & LINUX_CLONE_THREAD) { -#ifdef notyet - PROC_LOCK(p2); - p2->p_pgrp = td->td_proc->p_pgrp; - PROC_UNLOCK(p2); -#endif - exit_signal = 0; - } + linux_proc_init(td, td2, args->flags); + + em = em_find(td2); + KASSERT(em != NULL, ("clone_proc: emuldata not found.\n")); if (args->flags & LINUX_CLONE_CHILD_SETTID) em->child_set_tid = args->child_tidptr; @@ -241,8 +203,6 @@ else em->child_clear_tid = NULL; - EMUL_UNLOCK(&emul_lock); - if (args->flags & LINUX_CLONE_PARENT_SETTID) { error = copyout(&p2->p_pid, args->parent_tidptr, sizeof(p2->p_pid)); @@ -253,14 +213,12 @@ PROC_LOCK(p2); p2->p_sigparent = exit_signal; PROC_UNLOCK(p2); - td2 = FIRST_THREAD_IN_PROC(p2); /* * In a case of stack = NULL, we are supposed to COW calling process * stack. This is what normal fork() does, so we just keep tf_rsp arg * intact. */ - if (args->stack) - linux_set_upcall_kse(td2, PTROUT(args->stack)); + linux_set_upcall_kse(td2, PTROUT(args->stack)); if (args->flags & LINUX_CLONE_SETTLS) linux_set_cloned_tls(td2, args->tls); @@ -271,6 +229,7 @@ "stack %p sig = %d"), (int)p2->p_pid, args->stack, exit_signal); #endif + if (args->flags & LINUX_CLONE_VFORK) { PROC_LOCK(p2); p2->p_flag |= P_PPWAIT; @@ -286,7 +245,6 @@ thread_unlock(td2); td->td_retval[0] = p2->p_pid; - td->td_retval[1] = 0; if (args->flags & LINUX_CLONE_VFORK) { /* wait for the children to exit, ie. emulate vfork */ @@ -299,15 +257,156 @@ return (0); } -int -linux_exit(struct thread *td, struct linux_exit_args *args) +static int +linux_clone_thread(struct thread *td, struct linux_clone_args *args) { + struct linux_emuldata *em; + struct thread *newtd; + struct proc *p; + int error; + +#ifdef DEBUG + if (ldebug(clone)) { + printf(ARGS(clone, "thread: flags %x, stack %p, parent tid: %p, " + "child tid: %p"), (unsigned)args->flags, + args->stack, args->parent_tidptr, args->child_tidptr); + } +#endif + + LINUX_CTR4(clone, "thread(%d) flags %x ptid %p ctid %p", + td->td_tid, (unsigned)args->flags, + args->parent_tidptr, args->child_tidptr); + + if (args->flags & LINUX_CLONE_PARENT_SETTID) + if (args->parent_tidptr == NULL) + return (EINVAL); + + /* Threads should be created with own stack */ + if (args->stack == NULL) + return (EINVAL); + + p = td->td_proc; + + /* Initialize our td */ + error = kern_thr_alloc(p, 0, &newtd); + if (error) + return (error); + + cpu_set_upcall(newtd, td); + + bzero(&newtd->td_startzero, + __rangeof(struct thread, td_startzero, td_endzero)); + bcopy(&td->td_startcopy, &newtd->td_startcopy, + __rangeof(struct thread, td_startcopy, td_endcopy)); + + newtd->td_proc = p; + newtd->td_ucred = crhold(td->td_ucred); + + /* create the emuldata */ + linux_proc_init(td, newtd, args->flags); + + em = em_find(newtd); + KASSERT(em != NULL, ("clone_thread: emuldata not found.\n")); + + if (args->flags & LINUX_CLONE_SETTLS) + linux_set_cloned_tls(newtd, args->tls); + + if (args->flags & LINUX_CLONE_CHILD_SETTID) + em->child_set_tid = args->child_tidptr; + else + em->child_set_tid = NULL; + + if (args->flags & LINUX_CLONE_CHILD_CLEARTID) + em->child_clear_tid = args->child_tidptr; + else + em->child_clear_tid = NULL; + + cpu_thread_clean(newtd); + + linux_set_upcall_kse(newtd, PTROUT(args->stack)); + + PROC_LOCK(p); + p->p_flag |= P_HADTHREADS; + newtd->td_sigmask = td->td_sigmask; + bcopy(p->p_comm, newtd->td_name, sizeof(newtd->td_name)); + + if (args->flags & LINUX_CLONE_PARENT) + thread_link(newtd, p->p_pptr); + else + thread_link(newtd, p); + + thread_lock(td); + /* let the scheduler know about these things. */ + sched_fork_thread(td, newtd); + thread_unlock(td); + if (P_SHOULDSTOP(p)) + newtd->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK; + PROC_UNLOCK(p); + + tidhash_add(newtd); #ifdef DEBUG - if (ldebug(exit)) - printf(ARGS(exit, "%d"), args->rval); + if (ldebug(clone)) + printf(ARGS(clone, "successful clone to %d, stack %p"), + (int)newtd->td_tid, args->stack); #endif - exit1(td, W_EXITCODE(args->rval, 0)); - /* NOTREACHED */ + LINUX_CTR2(clone, "thread(%d) successful clone to %d", + td->td_tid, newtd->td_tid); + + if (args->flags & LINUX_CLONE_PARENT_SETTID) { + error = copyout(&newtd->td_tid, args->parent_tidptr, + sizeof(newtd->td_tid)); + if (error) + printf(LMSG("clone_thread: copyout failed!")); + } + + /* + * Make this runnable after we are finished with it. + */ + thread_lock(newtd); + TD_SET_CAN_RUN(newtd); + sched_add(newtd, SRQ_BORING); + thread_unlock(newtd); + + td->td_retval[0] = newtd->td_tid; + + return (0); +} + +int +linux_clone(struct thread *td, struct linux_clone_args *args) +{ + + if (args->flags & LINUX_CLONE_THREAD) + return (linux_clone_thread(td, args)); + else + return (linux_clone_proc(td, args)); +} + +int +linux_exit(struct thread *td, struct linux_exit_args *args) +{ + struct linux_emuldata *em; + struct proc *p; + + em = em_find(td); + KASSERT(em != NULL, ("exit: emuldata not found.\n")); + + LINUX_CTR2(exit, "thread(%d) (%d)", em->em_tid, args->rval); + + linux_thread_detach(td); + + p = td->td_proc; + + /* + * XXX. A race condition possible when last two threads of a process + * exits via pthread_exit(). We should hold of p_mtx before + * checking p_flag. + */ + if (p->p_flag & P_HADTHREADS) + return (kern_thr_exit(td)); + else + exit1(td, W_EXITCODE(args->rval, 0)); + /* NOTREACHED */ } Index: sys/compat/linux/linux_futex.h =================================================================== --- sys/compat/linux/linux_futex.h +++ sys/compat/linux/linux_futex.h @@ -76,6 +76,7 @@ #define FUTEX_TID_MASK 0x3fffffff #define FUTEX_BITSET_MATCH_ANY 0xffffffff -void release_futexes(struct proc *); +void release_futexes(struct thread *, + struct linux_emuldata *); #endif /* !_LINUX_FUTEX_H */ Index: sys/compat/linux/linux_futex.c =================================================================== --- sys/compat/linux/linux_futex.c +++ sys/compat/linux/linux_futex.c @@ -70,10 +70,6 @@ /* DTrace init */ LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); -/* Linuxulator-global DTrace probes */ -LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); -LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); - /** * Futex part for the special DTrace module "locks". */ @@ -174,8 +170,8 @@ "struct linux_get_robust_list_args *"); LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, copyout_error, "int"); LIN_SDT_PROBE_DEFINE1(futex, linux_get_robust_list, return, "int"); -LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, "struct proc *", - "uint32_t *", "unsigned int"); +LIN_SDT_PROBE_DEFINE3(futex, handle_futex_death, entry, + "struct linux_emuldata *", "uint32_t *", "unsigned int"); LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, copyin_error, "int"); LIN_SDT_PROBE_DEFINE1(futex, handle_futex_death, return, "int"); LIN_SDT_PROBE_DEFINE3(futex, fetch_robust_entry, entry, @@ -183,7 +179,8 @@ "unsigned int *"); LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, copyin_error, "int"); LIN_SDT_PROBE_DEFINE1(futex, fetch_robust_entry, return, "int"); -LIN_SDT_PROBE_DEFINE1(futex, release_futexes, entry, "struct proc *"); +LIN_SDT_PROBE_DEFINE2(futex, release_futexes, entry, "struct thread *", + "struct linux_emuldata *"); LIN_SDT_PROBE_DEFINE1(futex, release_futexes, copyin_error, "int"); LIN_SDT_PROBE_DEFINE0(futex, release_futexes, return); @@ -976,7 +973,7 @@ * Glibc versions prior to 2.3.3 fall back to FUTEX_WAKE when * FUTEX_REQUEUE returned EINVAL. */ - em = em_find(td->td_proc, EMUL_DONTLOCK); + em = em_find(td); if ((em->flags & LINUX_XDEPR_REQUEUEOP) == 0) { linux_msg(td, "linux_sys_futex: " @@ -1035,9 +1032,8 @@ return (EINVAL); } - em = em_find(td->td_proc, EMUL_DOLOCK); + em = em_find(td); em->robust_futexes = args->head; - EMUL_UNLOCK(&emul_lock); LIN_SDT_PROBE1(futex, linux_set_robust_list, return, 0); return (0); @@ -1049,29 +1045,30 @@ struct linux_emuldata *em; struct linux_robust_list_head *head; l_size_t len = sizeof(struct linux_robust_list_head); + struct thread *td2; int error = 0; LIN_SDT_PROBE2(futex, linux_get_robust_list, entry, td, args); if (!args->pid) { - em = em_find(td->td_proc, EMUL_DONTLOCK); + em = em_find(td); + KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); head = em->robust_futexes; } else { - struct proc *p; - - p = pfind(args->pid); - if (p == NULL) { + td2 = tdfind(args->pid, -1); + if (td2 == NULL) { LIN_SDT_PROBE1(futex, linux_get_robust_list, return, ESRCH); return (ESRCH); } - em = em_find(p, EMUL_DONTLOCK); + em = em_find(td2); + KASSERT(em != NULL, ("get_robust_list: emuldata notfound.\n")); /* XXX: ptrace? */ if (priv_check(td, PRIV_CRED_SETUID) || priv_check(td, PRIV_CRED_SETEUID) || - p_candebug(td, p)) { - PROC_UNLOCK(p); + p_candebug(td, td2->td_proc)) { + PROC_UNLOCK(td2->td_proc); LIN_SDT_PROBE1(futex, linux_get_robust_list, return, EPERM); @@ -1079,7 +1076,7 @@ } head = em->robust_futexes; - PROC_UNLOCK(p); + PROC_UNLOCK(td2->td_proc); } error = copyout(&len, args->len, sizeof(l_size_t)); @@ -1101,13 +1098,14 @@ } static int -handle_futex_death(struct proc *p, uint32_t *uaddr, unsigned int pi) +handle_futex_death(struct linux_emuldata *em, uint32_t *uaddr, + unsigned int pi) { uint32_t uval, nval, mval; struct futex *f; int error; - LIN_SDT_PROBE3(futex, handle_futex_death, entry, p, uaddr, pi); + LIN_SDT_PROBE3(futex, handle_futex_death, entry, em, uaddr, pi); retry: error = copyin(uaddr, &uval, 4); @@ -1116,7 +1114,7 @@ LIN_SDT_PROBE1(futex, handle_futex_death, return, EFAULT); return (EFAULT); } - if ((uval & FUTEX_TID_MASK) == p->p_pid) { + if ((uval & FUTEX_TID_MASK) == em->em_tid) { mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED; nval = casuword32(uaddr, uval, mval); @@ -1173,18 +1171,16 @@ /* This walks the list of robust futexes releasing them. */ void -release_futexes(struct proc *p) +release_futexes(struct thread *td, struct linux_emuldata *em) { struct linux_robust_list_head *head = NULL; struct linux_robust_list *entry, *next_entry, *pending; unsigned int limit = 2048, pi, next_pi, pip; - struct linux_emuldata *em; l_long futex_offset; int rc, error; - LIN_SDT_PROBE1(futex, release_futexes, entry, p); + LIN_SDT_PROBE2(futex, release_futexes, entry, td, em); - em = em_find(p, EMUL_DONTLOCK); head = em->robust_futexes; if (head == NULL) { @@ -1214,7 +1210,7 @@ rc = fetch_robust_entry(&next_entry, PTRIN(&entry->next), &next_pi); if (entry != pending) - if (handle_futex_death(p, + if (handle_futex_death(em, (uint32_t *)((caddr_t)entry + futex_offset), pi)) { LIN_SDT_PROBE0(futex, release_futexes, return); return; @@ -1234,7 +1230,7 @@ } if (pending) - handle_futex_death(p, (uint32_t *)((caddr_t)pending + futex_offset), pip); + handle_futex_death(em, (uint32_t *)((caddr_t)pending + futex_offset), pip); LIN_SDT_PROBE0(futex, release_futexes, return); } Index: sys/compat/linux/linux_misc.h =================================================================== --- sys/compat/linux/linux_misc.h +++ sys/compat/linux/linux_misc.h @@ -121,5 +121,6 @@ int options, struct rusage *ru); int linux_set_upcall_kse(struct thread *td, register_t stack); int linux_set_cloned_tls(struct thread *td, void *desc); +struct thread *linux_tdfind(struct thread *, lwpid_t, pid_t); #endif /* _LINUX_MISC_H_ */ Index: sys/compat/linux/linux_misc.c =================================================================== --- sys/compat/linux/linux_misc.c +++ sys/compat/linux/linux_misc.c @@ -53,7 +53,6 @@ #include #include #include -#include #include #include #include @@ -84,7 +83,6 @@ #include #endif -#include #include #include #include @@ -93,17 +91,6 @@ #include #include -/* DTrace init */ -LIN_SDT_PROVIDER_DECLARE(LINUX_DTRACE); - -/* Linuxulator-global DTrace probes */ -LIN_SDT_PROBE_DECLARE(locks, emul_lock, locked); -LIN_SDT_PROBE_DECLARE(locks, emul_lock, unlock); -LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, locked); -LIN_SDT_PROBE_DECLARE(locks, emul_shared_rlock, unlock); -LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, locked); -LIN_SDT_PROBE_DECLARE(locks, emul_shared_wlock, unlock); - int stclohz; /* Statistics clock frequency */ static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { @@ -1297,7 +1284,9 @@ linux_sched_setscheduler(struct thread *td, struct linux_sched_setscheduler_args *args) { - struct sched_setscheduler_args bsd; + struct sched_param sched_param; + struct thread *tdt; + int error, policy; #ifdef DEBUG if (ldebug(sched_setscheduler)) @@ -1307,39 +1296,51 @@ switch (args->policy) { case LINUX_SCHED_OTHER: - bsd.policy = SCHED_OTHER; + policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: - bsd.policy = SCHED_FIFO; + policy = SCHED_FIFO; break; case LINUX_SCHED_RR: - bsd.policy = SCHED_RR; + policy = SCHED_RR; break; default: return (EINVAL); } - bsd.pid = args->pid; - bsd.param = (struct sched_param *)args->param; - return (sys_sched_setscheduler(td, &bsd)); + error = copyin(args->param, &sched_param, sizeof(sched_param)); + if (error) + return (error); + + tdt = linux_tdfind(td, args->pid, -1); + if (tdt == NULL) + return (ESRCH); + + error = kern_sched_setscheduler(td, tdt, policy, &sched_param); + PROC_UNLOCK(tdt->td_proc); + return (error); } int linux_sched_getscheduler(struct thread *td, struct linux_sched_getscheduler_args *args) { - struct sched_getscheduler_args bsd; - int error; + struct thread *tdt; + int error, policy; #ifdef DEBUG if (ldebug(sched_getscheduler)) printf(ARGS(sched_getscheduler, "%d"), args->pid); #endif - bsd.pid = args->pid; - error = sys_sched_getscheduler(td, &bsd); + tdt = linux_tdfind(td, args->pid, -1); + if (tdt == NULL) + return (ESRCH); + + error = kern_sched_getscheduler(td, tdt, &policy); + PROC_UNLOCK(tdt->td_proc); - switch (td->td_retval[0]) { + switch (policy) { case SCHED_OTHER: td->td_retval[0] = LINUX_SCHED_OTHER; break; @@ -1350,7 +1351,6 @@ td->td_retval[0] = LINUX_SCHED_RR; break; } - return (error); } @@ -1476,20 +1476,12 @@ int linux_getpid(struct thread *td, struct linux_getpid_args *args) { - struct linux_emuldata *em; #ifdef DEBUG if (ldebug(getpid)) printf(ARGS(getpid, "")); #endif - - if (linux_use26(td)) { - em = em_find(td->td_proc, EMUL_DONTLOCK); - KASSERT(em != NULL, ("getpid: emuldata not found.\n")); - td->td_retval[0] = em->shared->group_pid; - } else { - td->td_retval[0] = td->td_proc->p_pid; - } + td->td_retval[0] = td->td_proc->p_pid; return (0); } @@ -1497,13 +1489,18 @@ int linux_gettid(struct thread *td, struct linux_gettid_args *args) { + struct linux_emuldata *em; #ifdef DEBUG if (ldebug(gettid)) printf(ARGS(gettid, "")); #endif - td->td_retval[0] = td->td_proc->p_pid; + em = em_find(td); + KASSERT(em != NULL, ("gettid: emuldata not found.\n")); + + td->td_retval[0] = em->em_tid; + return (0); } @@ -1511,50 +1508,15 @@ int linux_getppid(struct thread *td, struct linux_getppid_args *args) { - struct linux_emuldata *em; - struct proc *p, *pp; #ifdef DEBUG if (ldebug(getppid)) printf(ARGS(getppid, "")); #endif - if (!linux_use26(td)) { - PROC_LOCK(td->td_proc); - td->td_retval[0] = td->td_proc->p_pptr->p_pid; - PROC_UNLOCK(td->td_proc); - return (0); - } - - em = em_find(td->td_proc, EMUL_DONTLOCK); - - KASSERT(em != NULL, ("getppid: process emuldata not found.\n")); - - /* find the group leader */ - p = pfind(em->shared->group_pid); - - if (p == NULL) { -#ifdef DEBUG - printf(LMSG("parent process not found.\n")); -#endif - return (0); - } - - pp = p->p_pptr; /* switch to parent */ - PROC_LOCK(pp); - PROC_UNLOCK(p); - - /* if its also linux process */ - if (pp->p_sysent == &elf_linux_sysvec) { - em = em_find(pp, EMUL_DONTLOCK); - KASSERT(em != NULL, ("getppid: parent emuldata not found.\n")); - - td->td_retval[0] = em->shared->group_pid; - } else - td->td_retval[0] = pp->p_pid; - - PROC_UNLOCK(pp); - + PROC_LOCK(td->td_proc); + td->td_retval[0] = td->td_proc->p_pptr->p_pid; + PROC_UNLOCK(td->td_proc); return (0); } @@ -1659,22 +1621,16 @@ int linux_exit_group(struct thread *td, struct linux_exit_group_args *args) { - struct linux_emuldata *em; #ifdef DEBUG if (ldebug(exit_group)) printf(ARGS(exit_group, "%i"), args->error_code); #endif - em = em_find(td->td_proc, EMUL_DONTLOCK); - if (em->shared->refs > 1) { - EMUL_SHARED_WLOCK(&emul_shared_lock); - em->shared->flags |= EMUL_SHARED_HASXSTAT; - em->shared->xstat = W_EXITCODE(args->error_code, 0); - EMUL_SHARED_WUNLOCK(&emul_shared_lock); - if (linux_use26(td)) - linux_kill_threads(td, SIGKILL); - } + LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, + args->error_code); + + linux_thread_detach(td); /* * XXX: we should send a signal to the parent if @@ -1682,8 +1638,7 @@ * as it doesnt occur often. */ exit1(td, W_EXITCODE(args->error_code, 0)); - - return (0); + /* NOTREACHED */ } #define _LINUX_CAPABILITY_VERSION 0x19980330 @@ -1799,16 +1754,14 @@ case LINUX_PR_SET_PDEATHSIG: if (!LINUX_SIG_VALID(args->arg2)) return (EINVAL); - em = em_find(p, EMUL_DOLOCK); + em = em_find(td); KASSERT(em != NULL, ("prctl: emuldata not found.\n")); em->pdeath_signal = args->arg2; - EMUL_UNLOCK(&emul_lock); break; case LINUX_PR_GET_PDEATHSIG: - em = em_find(p, EMUL_DOLOCK); + em = em_find(td); KASSERT(em != NULL, ("prctl: emuldata not found.\n")); pdeath_signal = em->pdeath_signal; - EMUL_UNLOCK(&emul_lock); error = copyout(&pdeath_signal, (void *)(register_t)args->arg2, sizeof(pdeath_signal)); @@ -1879,7 +1832,6 @@ { struct sched_param sched_param; struct thread *tdt; - struct proc *p; int error; #ifdef DEBUG @@ -1891,24 +1843,12 @@ if (error) return (error); - if (uap->pid == 0) { - tdt = td; - p = tdt->td_proc; - PROC_LOCK(p); - } else { - p = pfind(uap->pid); - if (p == NULL) - return (ESRCH); - /* - * XXX. Scheduling parameters are in fact per-thread - * attributes in Linux. Temporarily use the first - * thread in proc. The same for get_param(). - */ - tdt = FIRST_THREAD_IN_PROC(p); - } + tdt = linux_tdfind(td, uap->pid, -1); + if (tdt == NULL) + return (ESRCH); error = kern_sched_setparam(td, tdt, &sched_param); - PROC_UNLOCK(p); + PROC_UNLOCK(tdt->td_proc); return (error); } @@ -1918,7 +1858,6 @@ { struct sched_param sched_param; struct thread *tdt; - struct proc *p; int error; #ifdef DEBUG @@ -1926,19 +1865,12 @@ printf(ARGS(sched_getparam, "%d, *"), uap->pid); #endif - if (uap->pid == 0) { - tdt = td; - p = tdt->td_proc; - PROC_LOCK(p); - } else { - p = pfind(uap->pid); - if (p == NULL) - return (ESRCH); - tdt = FIRST_THREAD_IN_PROC(p); - } + tdt = linux_tdfind(td, uap->pid, -1); + if (tdt == NULL) + return (ESRCH); error = kern_sched_getparam(td, tdt, &sched_param); - PROC_UNLOCK(p); + PROC_UNLOCK(tdt->td_proc); if (error == 0) error = copyout(&sched_param, uap->param, sizeof(sched_param)); @@ -1953,6 +1885,7 @@ struct linux_sched_getaffinity_args *args) { int error; + struct thread *tdt; struct cpuset_getaffinity_args cga; #ifdef DEBUG @@ -1963,9 +1896,14 @@ if (args->len < sizeof(cpuset_t)) return (EINVAL); + tdt = linux_tdfind(td, args->pid, -1); + if (tdt == NULL) + return (ESRCH); + + PROC_UNLOCK(tdt->td_proc); cga.level = CPU_LEVEL_WHICH; - cga.which = CPU_WHICH_PID; - cga.id = args->pid; + cga.which = CPU_WHICH_TID; + cga.id = tdt->td_tid; cga.cpusetsize = sizeof(cpuset_t); cga.mask = (cpuset_t *) args->user_mask_ptr; @@ -1983,6 +1921,7 @@ struct linux_sched_setaffinity_args *args) { struct cpuset_setaffinity_args csa; + struct thread *tdt; #ifdef DEBUG if (ldebug(sched_setaffinity)) @@ -1992,9 +1931,14 @@ if (args->len < sizeof(cpuset_t)) return (EINVAL); + tdt = linux_tdfind(td, args->pid, -1); + if (tdt == NULL) + return (ESRCH); + + PROC_UNLOCK(tdt->td_proc); csa.level = CPU_LEVEL_WHICH; - csa.which = CPU_WHICH_PID; - csa.id = args->pid; + csa.which = CPU_WHICH_TID; + csa.id = tdt->td_tid; csa.cpusetsize = sizeof(cpuset_t); csa.mask = (cpuset_t *) args->user_mask_ptr; @@ -2008,25 +1952,54 @@ struct timespec ts; struct l_timespec lts; struct thread *tdt; - struct proc *p; int error; - if (uap->pid == 0) { - tdt = td; - p = tdt->td_proc; - PROC_LOCK(p); - } else { - p = pfind(uap->pid); - if (p == NULL) - return (ESRCH); - tdt = FIRST_THREAD_IN_PROC(p); - } + tdt = linux_tdfind(td, uap->pid, -1); + if (tdt == NULL) + return (ESRCH); error = kern_sched_rr_get_interval_td(td, tdt, &ts); - PROC_UNLOCK(p); + PROC_UNLOCK(tdt->td_proc); if (error != 0) return (error); lts.tv_sec = ts.tv_sec; lts.tv_nsec = ts.tv_nsec; return (copyout(<s, uap->interval, sizeof(lts))); } + +/* + * In case when the Linux thread is the initial thread in + * the thread group thread id is equal to the process id. + * Glibc depends on this magic (assert in pthread_getattr_np.c). + */ +struct thread * +linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) +{ + struct linux_emuldata *em; + struct thread *tdt; + struct proc *p; + + tdt = NULL; + if (tid == 0 || tid == td->td_tid) { + tdt = td; + PROC_LOCK(tdt->td_proc); + } else if (tid > PID_MAX) + tdt = tdfind(tid, pid); + else { + /* + * Initial thread where the tid equal to the pid. + */ + p = pfind(tid); + if (p) { + FOREACH_THREAD_IN_PROC(p, tdt) { + em = em_find(tdt); + if (tid == em->em_tid) + return (tdt); + } + PROC_UNLOCK(p); + } + return (NULL); + } + + return (tdt); +} Index: sys/compat/linux/linux_signal.c =================================================================== --- sys/compat/linux/linux_signal.c +++ sys/compat/linux/linux_signal.c @@ -53,6 +53,10 @@ #include #include #include +#include + +static int linux_do_tkill(struct thread *td, struct thread *tdt, + ksiginfo_t *ksi); void linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss) @@ -537,56 +541,21 @@ } static int -linux_do_tkill(struct thread *td, l_int tgid, l_int pid, l_int signum) +linux_do_tkill(struct thread *td, struct thread *tdt, ksiginfo_t *ksi) { - struct proc *proc = td->td_proc; - struct linux_emuldata *em; struct proc *p; - ksiginfo_t ksi; int error; - AUDIT_ARG_SIGNUM(signum); - AUDIT_ARG_PID(pid); - - /* - * Allow signal 0 as a means to check for privileges - */ - if (!LINUX_SIG_VALID(signum) && signum != 0) - return (EINVAL); - - if (signum > 0 && signum <= LINUX_SIGTBLSZ) - signum = linux_to_bsd_signal[_SIG_IDX(signum)]; - - if ((p = pfind(pid)) == NULL) { - if ((p = zpfind(pid)) == NULL) - return (ESRCH); - } - + p = tdt->td_proc; + AUDIT_ARG_SIGNUM(ksi->ksi_signo); + AUDIT_ARG_PID(p->p_pid); AUDIT_ARG_PROCESS(p); - error = p_cansignal(td, p, signum); - if (error != 0 || signum == 0) - goto out; - error = ESRCH; - em = em_find(p, EMUL_DONTLOCK); - - if (em == NULL) { -#ifdef DEBUG - printf("emuldata not found in do_tkill.\n"); -#endif - goto out; - } - if (tgid > 0 && em->shared->group_pid != tgid) + error = p_cansignal(td, p, ksi->ksi_signo); + if (error != 0 || ksi->ksi_signo == 0) goto out; - ksiginfo_init(&ksi); - ksi.ksi_signo = signum; - ksi.ksi_code = LINUX_SI_TKILL; - ksi.ksi_errno = 0; - ksi.ksi_pid = proc->p_pid; - ksi.ksi_uid = proc->p_ucred->cr_ruid; - - error = pksignal(p, ksi.ksi_signo, &ksi); + tdksignal(tdt, ksi->ksi_signo, ksi); out: PROC_UNLOCK(p); @@ -596,20 +565,53 @@ int linux_tgkill(struct thread *td, struct linux_tgkill_args *args) { + struct thread *tdt; + ksiginfo_t ksi; + int sig; #ifdef DEBUG if (ldebug(tgkill)) - printf(ARGS(tgkill, "%d, %d, %d"), args->tgid, args->pid, args->sig); + printf(ARGS(tgkill, "%d, %d, %d"), + args->tgid, args->pid, args->sig); #endif + if (args->pid <= 0 || args->tgid <=0) return (EINVAL); - return (linux_do_tkill(td, args->tgid, args->pid, args->sig)); + /* + * Allow signal 0 as a means to check for privileges + */ + if (!LINUX_SIG_VALID(args->sig) && args->sig != 0) + return (EINVAL); + + if (args->sig > 0 && args->sig <= LINUX_SIGTBLSZ) + sig = linux_to_bsd_signal[_SIG_IDX(args->sig)]; + else + sig = args->sig; + + tdt = linux_tdfind(td, args->pid, args->tgid); + if (tdt == NULL) + return (ESRCH); + + ksiginfo_init(&ksi); + ksi.ksi_signo = sig; + ksi.ksi_code = LINUX_SI_TKILL; + ksi.ksi_errno = 0; + ksi.ksi_pid = td->td_proc->p_pid; + ksi.ksi_uid = td->td_proc->p_ucred->cr_ruid; + return (linux_do_tkill(td, tdt, &ksi)); } +/* + * Deprecated since 2.5.75. Replaced by tgkill(). + */ int linux_tkill(struct thread *td, struct linux_tkill_args *args) { + struct thread *tdt; + ksiginfo_t ksi; + int sig; + #ifdef DEBUG if (ldebug(tkill)) printf(ARGS(tkill, "%i, %i"), args->tid, args->sig); @@ -617,7 +619,25 @@ if (args->tid <= 0) return (EINVAL); - return (linux_do_tkill(td, 0, args->tid, args->sig)); + if (!LINUX_SIG_VALID(args->sig)) + return (EINVAL); + + if (args->sig > 0 && args->sig <= LINUX_SIGTBLSZ) + sig = linux_to_bsd_signal[_SIG_IDX(args->sig)]; + else + sig = args->sig; + + tdt = linux_tdfind(td, args->tid, -1); + if (tdt == NULL) + return (ESRCH); + + ksiginfo_init(&ksi); + ksi.ksi_signo = sig; + ksi.ksi_code = LINUX_SI_TKILL; + ksi.ksi_errno = 0; + ksi.ksi_pid = td->td_proc->p_pid; + ksi.ksi_uid = td->td_proc->p_ucred->cr_ruid; + return (linux_do_tkill(td, tdt, &ksi)); } void Index: sys/compat/linux/stats_timing.d =================================================================== --- sys/compat/linux/stats_timing.d +++ sys/compat/linux/stats_timing.d @@ -39,7 +39,6 @@ * possible for a given application * - graph of longest running (CPU-time!) function in total * - may help finding problem cases in the kernel code - * - timing statistics for the emul_lock * - graph of longest held (CPU-time!) locks */ Index: sys/i386/linux/linux_machdep.c =================================================================== --- sys/i386/linux/linux_machdep.c +++ sys/i386/linux/linux_machdep.c @@ -141,15 +141,7 @@ args->argp, args->envp); free(newpath, M_TEMP); if (error == 0) - error = kern_execve(td, &eargs, NULL); - if (error == 0) - /* linux process can exec fbsd one, dont attempt - * to create emuldata for such process using - * linux_proc_init, this leads to a panic on KASSERT - * because such process has p->p_emuldata == NULL - */ - if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX) - error = linux_proc_init(td, 0, 0); + error = linux_common_execve(td, &eargs); return (error); } @@ -360,8 +352,14 @@ linux_set_upcall_kse(struct thread *td, register_t stack) { - td->td_frame->tf_esp = stack; + if (stack) + td->td_frame->tf_esp = stack; + /* + * The newly created Linux thread returns + * to the user space by the same path that a parent do. + */ + td->td_frame->tf_eax = 0; return (0); } Index: sys/i386/linux/linux_sysvec.c =================================================================== --- sys/i386/linux/linux_sysvec.c +++ sys/i386/linux/linux_sysvec.c @@ -116,6 +116,7 @@ static eventhandler_tag linux_exit_tag; static eventhandler_tag linux_exec_tag; +static eventhandler_tag linux_thread_dtor_tag; /* * Linux syscalls return negative errno's, we do positive and map them @@ -1099,14 +1100,14 @@ linux_ioctl_register_handler(*lihp); SET_FOREACH(ldhp, linux_device_handler_set) linux_device_register_handler(*ldhp); - mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); - sx_init(&emul_shared_lock, "emuldata->shared lock"); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, NULL, 1000); linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, NULL, 1000); + linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, + linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); linux_get_machine(&linux_platform); linux_szplatform = roundup(strlen(linux_platform) + 1, sizeof(char *)); @@ -1133,11 +1134,10 @@ linux_ioctl_unregister_handler(*lihp); SET_FOREACH(ldhp, linux_device_handler_set) linux_device_unregister_handler(*ldhp); - mtx_destroy(&emul_lock); - sx_destroy(&emul_shared_lock); mtx_destroy(&futex_mtx); EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); + EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); linux_osd_jail_deregister(); if (bootverbose) printf("Linux ELF exec handler removed\n");