Changeset View
Standalone View
sys/kern/kern_jail.c
Show All 27 Lines | |||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include "opt_compat.h" | #include "opt_compat.h" | ||||
#include "opt_ddb.h" | #include "opt_ddb.h" | ||||
#include "opt_inet.h" | #include "opt_inet.h" | ||||
#include "opt_inet6.h" | #include "opt_inet6.h" | ||||
#include "opt_pax.h" | |||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/types.h> | #include <sys/types.h> | ||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/errno.h> | #include <sys/errno.h> | ||||
#include <sys/sysproto.h> | #include <sys/sysproto.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/osd.h> | #include <sys/osd.h> | ||||
#include <sys/pax.h> | |||||
#include <sys/priv.h> | #include <sys/priv.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/taskqueue.h> | #include <sys/taskqueue.h> | ||||
#include <sys/fcntl.h> | #include <sys/fcntl.h> | ||||
#include <sys/jail.h> | #include <sys/jail.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/racct.h> | #include <sys/racct.h> | ||||
Show All 40 Lines | |||||
#define _PR_IP_SADDRSEL PR_IP6_SADDRSEL | #define _PR_IP_SADDRSEL PR_IP6_SADDRSEL | ||||
#else | #else | ||||
#define _PR_IP_SADDRSEL 0 | #define _PR_IP_SADDRSEL 0 | ||||
#endif | #endif | ||||
#endif | #endif | ||||
/* prison0 describes what is "real" about the system. */ | /* prison0 describes what is "real" about the system. */ | ||||
struct prison prison0 = { | struct prison prison0 = { | ||||
.pr_id = 0, | .pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children) | ||||
.pr_name = "0", | |||||
.pr_ref = 1, | |||||
.pr_uref = 1, | |||||
.pr_path = "/", | |||||
.pr_securelevel = -1, | |||||
.pr_devfs_rsnum = 0, | |||||
.pr_childmax = JAIL_MAX, | |||||
.pr_hostuuid = DEFAULT_HOSTUUID, | |||||
.pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children), | |||||
#ifdef VIMAGE | |||||
.pr_flags = PR_HOST|PR_VNET|_PR_IP_SADDRSEL, | |||||
#else | |||||
.pr_flags = PR_HOST|_PR_IP_SADDRSEL, | |||||
#endif | |||||
.pr_allow = PR_ALLOW_ALL, | |||||
}; | }; | ||||
MTX_SYSINIT(prison0, &prison0.pr_mtx, "jail mutex", MTX_DEF); | |||||
/* allprison, allprison_racct and lastprid are protected by allprison_lock. */ | /* allprison, allprison_racct and lastprid are protected by allprison_lock. */ | ||||
struct sx allprison_lock; | struct sx allprison_lock; | ||||
rwatson: I find myself wondering if it's time for a new prison_init_prison0() sysinit that can take care… | |||||
Not Done Inline ActionsIs that a problem a patch implementing ASLR needs to solve? lattera-gmail.com: Is that a problem a patch implementing ASLR needs to solve? | |||||
Not Done Inline ActionsIt's a problem that the ASLR patch introduces: before ASLR, there isn't an issue with multiple independent sysinits for prison0. rwatson: It's a problem that the ASLR patch introduces: before ASLR, there isn't an issue with multiple… | |||||
Not Done Inline ActionsWhy this is a problem? op: Why this is a problem? | |||||
Not Done Inline ActionsIf there is avoidable non-atomicity in initialising kernel data structures, it's nice to do so. In this case, it might be that we are not yet ready to initialise prison0's ASLR state until well after we are ready to initialise its mutex .. on the other hand, it would be nice to think that prison0 state is fully initialised before something would need to use its mutex. This is an inevitable source of difficulty in bootstrapping the system, but maintaining congruence between bootstrap initialisation of special-case structures (e.g., prison0) and later instances of the same structure (e.g., every other prison structure) is a reasonable goal. rwatson: If there is avoidable non-atomicity in initialising kernel data structures, it's nice to do so. | |||||
Not Done Inline ActionsIn older version of pax_init_prison(struct prison *pr) we acquired the prison lock, and because this it's needed after prison0's mtx initializations. Currently we don't acquire the prison lock and I think it's fine, if we move the pax's prison0 initialization before the prison0's mutex initialization in sysinit priority list. op: In older version of pax_init_prison(struct prison *pr) we acquired the prison lock, and because… | |||||
Not Done Inline ActionsI was sort of pondering a new sysinit (very vaguely) along the lines of: static void mtx_init(&prison0) pax_init(&prison0) } The interesting question is what sysinit ordering to give it: I guess a new SI_SUB_PRISON could fall after SI_SUB_PAX such that PAX's internal state is initialised before the first prison structure uses it? rwatson: I was sort of pondering a new sysinit (very vaguely) along the lines of:
static void… | |||||
SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); | SX_SYSINIT(allprison_lock, &allprison_lock, "allprison"); | ||||
struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); | struct prisonlist allprison = TAILQ_HEAD_INITIALIZER(allprison); | ||||
LIST_HEAD(, prison_racct) allprison_racct; | LIST_HEAD(, prison_racct) allprison_racct; | ||||
int lastprid = 0; | int lastprid = 0; | ||||
static void prison0_init(void *data); | |||||
static int do_jail_attach(struct thread *td, struct prison *pr); | static int do_jail_attach(struct thread *td, struct prison *pr); | ||||
static void prison_complete(void *context, int pending); | static void prison_complete(void *context, int pending); | ||||
static void prison_deref(struct prison *pr, int flags); | static void prison_deref(struct prison *pr, int flags); | ||||
static char *prison_path(struct prison *pr1, struct prison *pr2); | static char *prison_path(struct prison *pr1, struct prison *pr2); | ||||
static void prison_remove_one(struct prison *pr); | static void prison_remove_one(struct prison *pr); | ||||
#ifdef RACCT | #ifdef RACCT | ||||
static void prison_racct_attach(struct prison *pr); | static void prison_racct_attach(struct prison *pr); | ||||
static void prison_racct_modify(struct prison *pr); | static void prison_racct_modify(struct prison *pr); | ||||
static void prison_racct_detach(struct prison *pr); | static void prison_racct_detach(struct prison *pr); | ||||
#endif | #endif | ||||
#ifdef INET | #ifdef INET | ||||
static int _prison_check_ip4(const struct prison *, const struct in_addr *); | static int _prison_check_ip4(const struct prison *, const struct in_addr *); | ||||
static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4); | static int prison_restrict_ip4(struct prison *pr, struct in_addr *newip4); | ||||
Not Done Inline ActionsYou define this as a sysinit here and ... imp: You define this as a sysinit here and ... | |||||
#endif | #endif | ||||
#ifdef INET6 | #ifdef INET6 | ||||
static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6); | static int _prison_check_ip6(struct prison *pr, struct in6_addr *ia6); | ||||
static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6); | static int prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6); | ||||
#endif | #endif | ||||
SYSINIT(prison0_sysinit, SI_SUB_PRISON, SI_ORDER_MIDDLE, prison0_init, NULL); | |||||
/* Flags for prison_deref */ | /* Flags for prison_deref */ | ||||
#define PD_DEREF 0x01 | #define PD_DEREF 0x01 | ||||
#define PD_DEUREF 0x02 | #define PD_DEUREF 0x02 | ||||
#define PD_LOCKED 0x04 | #define PD_LOCKED 0x04 | ||||
#define PD_LIST_SLOCKED 0x08 | #define PD_LIST_SLOCKED 0x08 | ||||
#define PD_LIST_XLOCKED 0x10 | #define PD_LIST_XLOCKED 0x10 | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 80 Lines • ▼ Show 20 Lines | |||||
#define JAIL_DEFAULT_DEVFS_RSNUM 0 | #define JAIL_DEFAULT_DEVFS_RSNUM 0 | ||||
static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; | static unsigned jail_default_allow = JAIL_DEFAULT_ALLOW; | ||||
static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; | static int jail_default_enforce_statfs = JAIL_DEFAULT_ENFORCE_STATFS; | ||||
static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM; | static int jail_default_devfs_rsnum = JAIL_DEFAULT_DEVFS_RSNUM; | ||||
#if defined(INET) || defined(INET6) | #if defined(INET) || defined(INET6) | ||||
static unsigned jail_max_af_ips = 255; | static unsigned jail_max_af_ips = 255; | ||||
#endif | #endif | ||||
#ifdef INET | #ifdef INET | ||||
Not Done Inline ActionsRemove blank line. rwatson: Remove blank line. | |||||
static int | static int | ||||
qcmp_v4(const void *ip1, const void *ip2) | qcmp_v4(const void *ip1, const void *ip2) | ||||
{ | { | ||||
in_addr_t iaa, iab; | in_addr_t iaa, iab; | ||||
/* | /* | ||||
* We need to compare in HBO here to get the list sorted as expected | * We need to compare in HBO here to get the list sorted as expected | ||||
* by the result of the code. Sorting NBO addresses gives you | * by the result of the code. Sorting NBO addresses gives you | ||||
Show All 31 Lines | if (ia6a->s6_addr[i] > ia6b->s6_addr[i]) | ||||
rc = 1; | rc = 1; | ||||
else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) | else if (ia6a->s6_addr[i] < ia6b->s6_addr[i]) | ||||
rc = -1; | rc = -1; | ||||
} | } | ||||
return (rc); | return (rc); | ||||
} | } | ||||
#endif | #endif | ||||
static void | |||||
prison0_init(void *data) | |||||
{ | |||||
mtx_init(&prison0.pr_mtx, "jail mutex", NULL, MTX_DEF); | |||||
prison0.pr_id = 0; | |||||
prison0.pr_name[0] = '0'; | |||||
prison0.pr_ref = 1; | |||||
prison0.pr_uref = 1; | |||||
prison0.pr_path[0] = '/'; | |||||
prison0.pr_securelevel = -1; | |||||
prison0.pr_devfs_rsnum = 0; | |||||
prison0.pr_childmax = JAIL_MAX; | |||||
strlcpy(prison0.pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN); | |||||
#ifdef VIMAGE | |||||
prison0.pr_flags = PR_HOST|PR_VNET|_PR_IP_SADDRSEL; | |||||
#else | |||||
prison0.pr_flags = PR_HOST|_PR_IP_SADDRSEL; | |||||
#endif | |||||
prison0.pr_allow = PR_ALLOW_ALL; | |||||
#ifdef PAX_ASLR | |||||
pax_init_prison(&prison0); | |||||
#endif | |||||
} | |||||
/* | /* | ||||
* struct jail_args { | * struct jail_args { | ||||
* struct jail *jail; | * struct jail *jail; | ||||
* }; | * }; | ||||
*/ | */ | ||||
int | int | ||||
sys_jail(struct thread *td, struct jail_args *uap) | sys_jail(struct thread *td, struct jail_args *uap) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 999 Lines • ▼ Show 20 Lines | #endif | ||||
* Allocate a dedicated cpuset for each jail. | * Allocate a dedicated cpuset for each jail. | ||||
* Unlike other initial settings, this may return an erorr. | * Unlike other initial settings, this may return an erorr. | ||||
*/ | */ | ||||
error = cpuset_create_root(ppr, &pr->pr_cpuset); | error = cpuset_create_root(ppr, &pr->pr_cpuset); | ||||
if (error) { | if (error) { | ||||
prison_deref(pr, PD_LIST_XLOCKED); | prison_deref(pr, PD_LIST_XLOCKED); | ||||
goto done_releroot; | goto done_releroot; | ||||
} | } | ||||
Not Done Inline Actions... explicitly call it here. Why? imp: ... explicitly call it here. Why? | |||||
Not Done Inline ActionsThe sysinit is for jail 0. This call is for when new (child) jails are spun up. lattera-gmail.com: The sysinit is for jail 0. This call is for when new (child) jails are spun up. | |||||
Not Done Inline ActionsThis is fine. They call it for new prisons and this code does not get executed for prison0. mjg: This is fine. They call it for new prisons and this code does not get executed for prison0. | |||||
#ifdef PAX_ASLR | |||||
pax_init_prison(pr); | |||||
#endif | |||||
Not Done Inline ActionsRemove blank line. rwatson: Remove blank line. | |||||
Not Done Inline ActionsRemove blank line. rwatson: Remove blank line. | |||||
mtx_lock(&pr->pr_mtx); | mtx_lock(&pr->pr_mtx); | ||||
/* | /* | ||||
* New prisons do not yet have a reference, because we do not | * New prisons do not yet have a reference, because we do not | ||||
* want other to see the incomplete prison once the | * want other to see the incomplete prison once the | ||||
* allprison_lock is downgraded. | * allprison_lock is downgraded. | ||||
*/ | */ | ||||
} else { | } else { | ||||
created = 0; | created = 0; | ||||
▲ Show 20 Lines • Show All 3,356 Lines • Show Last 20 Lines |
I find myself wondering if it's time for a new prison_init_prison0() sysinit that can take care of mutex initialisation, etc.