Index: sys/kern/kern_jail.c =================================================================== --- sys/kern/kern_jail.c +++ sys/kern/kern_jail.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -508,6 +509,170 @@ return (error); } +/* + * Network address lists (pr_ip4, pr_ip6) allocation for jails. The addresses + * are accessed locklessly by the network stack, thus need to be protected by + * the network epoch. + */ +static void * +prison_ip_alloc(const sa_family_t af, uint32_t cnt, int flags) +{ + void *mem; + + mem = malloc(sizeof(struct epoch_context) + cnt * + (af == AF_INET ? sizeof(struct in_addr) : sizeof(struct in6_addr)), + M_PRISON, flags); + return (mem != NULL ? (epoch_context_t)mem + 1 : NULL); +} + +static void +prison_ip_free_deferred(epoch_context_t ctx) +{ + + free(ctx, M_PRISON); +} + +static void +prison_ip_free(void *mem) +{ + + if (mem != NULL) + NET_EPOCH_CALL(prison_ip_free_deferred, + (epoch_context_t)mem - 1); +} + +static void +prison_ip_set(struct prison *pr, const sa_family_t af, void *new, + uint32_t newcnt) +{ + uint32_t *cnt; + void **mem, *old; + + mtx_assert(&pr->pr_mtx, MA_OWNED); + + cnt = af == AF_INET ? &pr->pr_ip4s : &pr->pr_ip6s; + mem = af == AF_INET ? (void *)&pr->pr_ip4 : (void *)&pr->pr_ip6; + + /* + * prison_check_ip[46]_locked first reads the count and then processes + * through the list. To prevent out of bounds read, we make sure + * the number read would be <= length of the list. + */ + old = *mem; + if (newcnt >= *cnt) { + ck_pr_store_ptr(mem, new); + ck_pr_store_32(cnt, newcnt); + } else { + ck_pr_store_32(cnt, newcnt); + ck_pr_store_ptr(mem, new); + } + prison_ip_free(old); +} + +/* + * Restrict a prison's IP address list with its parent's, possibly replacing + * it. Return true if the replacement buffer was used (or would have been). + */ +static bool +prison_ip_restrict(struct prison *pr, const sa_family_t af, void *new) +{ + const unsigned ip_flag = af == AF_INET ? PR_IP4_USER : PR_IP6_USER; + const size_t size = af == AF_INET ? + sizeof(struct in_addr) : sizeof(struct in6_addr); + int (*const cmp)(const void *, const void *) = af == AF_INET ? + prison_qcmp_v4 : prison_qcmp_v6; + struct prison *ppr; + uint32_t ips; + bool alloced; + +#define PR_IPS(pr) (af == AF_INET ? (pr)->pr_ip4s : (pr)->pr_ip6s) +#define PR_IP(pr, idx) (af == AF_INET ? \ + (void *)(&(pr)->pr_ip4[idx]) : \ + (void *)(&(pr)->pr_ip6[idx])) + + mtx_assert(&pr->pr_mtx, MA_OWNED); + + ppr = pr->pr_parent; + /* + * Due to epoch-synchronized access to the IP address lists we always + * allocate a new list even if the old one has enough space. We could + * atomically update an IPv4 address inside a list, but that would + * screw up sorting, and in case of IPv6 we can't even atomically write + * one. + */ + ips = (pr->pr_flags & ip_flag) ? PR_IPS(pr) : PR_IPS(ppr); + if (ips == 0) { + prison_ip_set(pr, af, NULL, 0); + return (false); + } + if (new == NULL) { + new = prison_ip_alloc(af, ips, M_NOWAIT); + if (new == NULL) + return (true); + alloced = true; + } else + alloced = false; + if (!(pr->pr_flags & ip_flag)) { + /* This has no user settings, so just copy the parent's list. */ + bcopy(PR_IP(ppr, 0), new, ips * size); + } else { + /* Remove addresses that aren't in the parent. */ + void *p = new; + int i; + + i = 0; /* index in PR_IP(pr) */ + ips = 0; /* how many we copied to new */ + + for (int pi = 0; pi < PR_IPS(ppr); pi++) + if (cmp(PR_IP(pr, 0), PR_IP(ppr, pi)) == 0) { + /* Found our main address in parent. */ + bcopy(PR_IP(pr, i), p, size); + p = (char *)p + size; + i++; + ips++; + break; + } + for (int pi = 1; i < PR_IPS(pr); ) { + /* Check against main, which is unsorted. */ + if (cmp(PR_IP(pr, i), PR_IP(ppr, 0)) == 0) { + /* Matches parent's main address. */ + bcopy(PR_IP(pr, i), p, size); + p = (char *)p + size; + i++; + ips++; + continue; + } + /* The rest are sorted. */ + switch (pi >= PR_IPS(ppr) ? -1 : + cmp(PR_IP(pr, i), PR_IP(ppr, pi))) { + case -1: + i++; + break; + case 0: + bcopy(PR_IP(pr, i), p, size); + p = (char *)p + size; + i++; + pi++; + ips++; + break; + case 1: + pi++; + break; + } + } + if (ips == 0) { + if (alloced) + prison_ip_free(new); + new = NULL; + } + } + prison_ip_set(pr, af, new, ips); + return (new != NULL ? true : false); +#undef PR_IPS +#undef PR_IP +} + + int kern_jail_set(struct thread *td, struct uio *optuio, int flags) { @@ -813,7 +978,7 @@ vfs_opterror(opts, "too many IPv4 addresses"); goto done_errmsg; } - ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); + ip4 = prison_ip_alloc(AF_INET, ip4s, M_WAITOK); bcopy(op, ip4, ip4s * sizeof(*ip4)); /* * IP addresses are all sorted but ip[0] to preserve @@ -870,7 +1035,7 @@ vfs_opterror(opts, "too many IPv6 addresses"); goto done_errmsg; } - ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); + ip6 = prison_ip_alloc(AF_INET6, ip6s, M_WAITOK); bcopy(op, ip6, ip6s * sizeof(*ip6)); if (ip6s > 1) qsort(ip6 + 1, ip6s - 1, sizeof(*ip6), @@ -1255,9 +1420,8 @@ pr->pr_flags |= ppr->pr_flags & PR_IP4; if (ppr->pr_ip4 != NULL) { pr->pr_ip4s = ppr->pr_ip4s; - pr->pr_ip4 = malloc(pr->pr_ip4s * - sizeof(struct in_addr), M_PRISON, - M_WAITOK); + pr->pr_ip4 = prison_ip_alloc(AF_INET, + pr->pr_ip4s, M_WAITOK); bcopy(ppr->pr_ip4, pr->pr_ip4, pr->pr_ip4s * sizeof(*pr->pr_ip4)); } @@ -1270,9 +1434,8 @@ pr->pr_flags |= ppr->pr_flags & PR_IP6; if (ppr->pr_ip6 != NULL) { pr->pr_ip6s = ppr->pr_ip6s; - pr->pr_ip6 = malloc(pr->pr_ip6s * - sizeof(struct in6_addr), M_PRISON, - M_WAITOK); + pr->pr_ip6 = prison_ip_alloc(AF_INET6, + pr->pr_ip6s, M_WAITOK); bcopy(ppr->pr_ip6, pr->pr_ip6, pr->pr_ip6s * sizeof(*pr->pr_ip6)); } @@ -1592,9 +1755,7 @@ redo_ip4 = 0; if (pr_flags & PR_IP4_USER) { pr->pr_flags |= PR_IP4; - free(pr->pr_ip4, M_PRISON); - pr->pr_ip4s = ip4s; - pr->pr_ip4 = ip4; + prison_ip_set(pr, AF_INET, ip4, ip4s); ip4 = NULL; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { #ifdef VIMAGE @@ -1603,7 +1764,7 @@ continue; } #endif - if (prison_restrict_ip4(tpr, NULL)) { + if (prison_ip_restrict(tpr, AF_INET, NULL)) { redo_ip4 = 1; descend = 0; } @@ -1614,9 +1775,7 @@ redo_ip6 = 0; if (pr_flags & PR_IP6_USER) { pr->pr_flags |= PR_IP6; - free(pr->pr_ip6, M_PRISON); - pr->pr_ip6s = ip6s; - pr->pr_ip6 = ip6; + prison_ip_set(pr, AF_INET6, ip6, ip6s); ip6 = NULL; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { #ifdef VIMAGE @@ -1625,7 +1784,7 @@ continue; } #endif - if (prison_restrict_ip6(tpr, NULL)) { + if (prison_ip_restrict(tpr, AF_INET6, NULL)) { redo_ip6 = 1; descend = 0; } @@ -1770,7 +1929,7 @@ #ifdef INET while (redo_ip4) { ip4s = pr->pr_ip4s; - ip4 = malloc(ip4s * sizeof(*ip4), M_PRISON, M_WAITOK); + ip4 = prison_ip_alloc(AF_INET, ip4s, M_WAITOK); mtx_lock(&pr->pr_mtx); redo_ip4 = 0; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { @@ -1780,7 +1939,7 @@ continue; } #endif - if (prison_restrict_ip4(tpr, ip4)) { + if (prison_ip_restrict(tpr, AF_INET, ip4)) { if (ip4 != NULL) ip4 = NULL; else @@ -1793,7 +1952,7 @@ #ifdef INET6 while (redo_ip6) { ip6s = pr->pr_ip6s; - ip6 = malloc(ip6s * sizeof(*ip6), M_PRISON, M_WAITOK); + ip6 = prison_ip_alloc(AF_INET6, ip6s, M_WAITOK); mtx_lock(&pr->pr_mtx); redo_ip6 = 0; FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) { @@ -1803,7 +1962,7 @@ continue; } #endif - if (prison_restrict_ip6(tpr, ip6)) { + if (prison_ip_restrict(tpr, AF_INET6, ip6)) { if (ip6 != NULL) ip6 = NULL; else @@ -1889,10 +2048,10 @@ } done_free: #ifdef INET - free(ip4, M_PRISON); + prison_ip_free(ip4); #endif #ifdef INET6 - free(ip6, M_PRISON); + prison_ip_free(ip6); #endif if (g_path != NULL) free(g_path, M_TEMP); @@ -2852,10 +3011,10 @@ vrele(rpr->pr_root); mtx_destroy(&rpr->pr_mtx); #ifdef INET - free(rpr->pr_ip4, M_PRISON); + prison_ip_free(rpr->pr_ip4); #endif #ifdef INET6 - free(rpr->pr_ip6, M_PRISON); + prison_ip_free(rpr->pr_ip6); #endif if (rpr->pr_cpuset != NULL) cpuset_rel(rpr->pr_cpuset); Index: sys/netinet/in_jail.c =================================================================== --- sys/netinet/in_jail.c +++ sys/netinet/in_jail.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -90,90 +91,6 @@ return (0); } -/* - * Restrict a prison's IP address list with its parent's, possibly replacing - * it. Return true if the replacement buffer was used (or would have been). - */ -int -prison_restrict_ip4(struct prison *pr, struct in_addr *newip4) -{ - int ii, ij, used; - struct prison *ppr; - - ppr = pr->pr_parent; - if (!(pr->pr_flags & PR_IP4_USER)) { - /* This has no user settings, so just copy the parent's list. */ - if (pr->pr_ip4s < ppr->pr_ip4s) { - /* - * There's no room for the parent's list. Use the - * new list buffer, which is assumed to be big enough - * (if it was passed). If there's no buffer, try to - * allocate one. - */ - used = 1; - if (newip4 == NULL) { - newip4 = malloc(ppr->pr_ip4s * sizeof(*newip4), - M_PRISON, M_NOWAIT); - if (newip4 != NULL) - used = 0; - } - if (newip4 != NULL) { - bcopy(ppr->pr_ip4, newip4, - ppr->pr_ip4s * sizeof(*newip4)); - free(pr->pr_ip4, M_PRISON); - pr->pr_ip4 = newip4; - pr->pr_ip4s = ppr->pr_ip4s; - } - return (used); - } - pr->pr_ip4s = ppr->pr_ip4s; - if (pr->pr_ip4s > 0) - bcopy(ppr->pr_ip4, pr->pr_ip4, - pr->pr_ip4s * sizeof(*newip4)); - else if (pr->pr_ip4 != NULL) { - free(pr->pr_ip4, M_PRISON); - pr->pr_ip4 = NULL; - } - } else if (pr->pr_ip4s > 0) { - /* Remove addresses that aren't in the parent. */ - for (ij = 0; ij < ppr->pr_ip4s; ij++) - if (pr->pr_ip4[0].s_addr == ppr->pr_ip4[ij].s_addr) - break; - if (ij < ppr->pr_ip4s) - ii = 1; - else { - bcopy(pr->pr_ip4 + 1, pr->pr_ip4, - --pr->pr_ip4s * sizeof(*pr->pr_ip4)); - ii = 0; - } - for (ij = 1; ii < pr->pr_ip4s; ) { - if (pr->pr_ip4[ii].s_addr == ppr->pr_ip4[0].s_addr) { - ii++; - continue; - } - switch (ij >= ppr->pr_ip4s ? -1 : - prison_qcmp_v4(&pr->pr_ip4[ii], &ppr->pr_ip4[ij])) { - case -1: - bcopy(pr->pr_ip4 + ii + 1, pr->pr_ip4 + ii, - (--pr->pr_ip4s - ii) * sizeof(*pr->pr_ip4)); - break; - case 0: - ii++; - ij++; - break; - case 1: - ij++; - break; - } - } - if (pr->pr_ip4s == 0) { - free(pr->pr_ip4, M_PRISON); - pr->pr_ip4 = NULL; - } - } - return (0); -} - /* * Pass back primary IPv4 address of this jail. * @@ -376,22 +293,33 @@ int prison_check_ip4_locked(const struct prison *pr, const struct in_addr *ia) { - int i, a, z, d; + const struct in_addr *ip4; + int ip4s, i, a, z, d; + + MPASS(mtx_owned(&pr->pr_mtx) || in_epoch(net_epoch_preempt)); + + if (!(pr->pr_flags & PR_IP4)) + return (0); + + ip4s = ck_pr_load_32(&pr->pr_ip4s); + ip4 = ck_pr_load_ptr(&pr->pr_ip4); + if (ip4 == NULL) + return (EAFNOSUPPORT); /* * Check the primary IP. */ - if (pr->pr_ip4[0].s_addr == ia->s_addr) + if (ip4[0].s_addr == ia->s_addr) return (0); /* * All the other IPs are sorted so we can do a binary search. */ a = 0; - z = pr->pr_ip4s - 2; + z = ip4s - 2; while (a <= z) { i = (a + z) / 2; - d = prison_qcmp_v4(&pr->pr_ip4[i+1], ia); + d = prison_qcmp_v4(&ip4[i+1], ia); if (d > 0) z = i - 1; else if (d < 0) Index: sys/netinet6/in6_jail.c =================================================================== --- sys/netinet6/in6_jail.c +++ sys/netinet6/in6_jail.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -84,88 +85,6 @@ return (rc); } -int -prison_restrict_ip6(struct prison *pr, struct in6_addr *newip6) -{ - int ii, ij, used; - struct prison *ppr; - - ppr = pr->pr_parent; - if (!(pr->pr_flags & PR_IP6_USER)) { - /* This has no user settings, so just copy the parent's list. */ - if (pr->pr_ip6s < ppr->pr_ip6s) { - /* - * There's no room for the parent's list. Use the - * new list buffer, which is assumed to be big enough - * (if it was passed). If there's no buffer, try to - * allocate one. - */ - used = 1; - if (newip6 == NULL) { - newip6 = malloc(ppr->pr_ip6s * sizeof(*newip6), - M_PRISON, M_NOWAIT); - if (newip6 != NULL) - used = 0; - } - if (newip6 != NULL) { - bcopy(ppr->pr_ip6, newip6, - ppr->pr_ip6s * sizeof(*newip6)); - free(pr->pr_ip6, M_PRISON); - pr->pr_ip6 = newip6; - pr->pr_ip6s = ppr->pr_ip6s; - } - return (used); - } - pr->pr_ip6s = ppr->pr_ip6s; - if (pr->pr_ip6s > 0) - bcopy(ppr->pr_ip6, pr->pr_ip6, - pr->pr_ip6s * sizeof(*newip6)); - else if (pr->pr_ip6 != NULL) { - free(pr->pr_ip6, M_PRISON); - pr->pr_ip6 = NULL; - } - } else if (pr->pr_ip6s > 0) { - /* Remove addresses that aren't in the parent. */ - for (ij = 0; ij < ppr->pr_ip6s; ij++) - if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], - &ppr->pr_ip6[ij])) - break; - if (ij < ppr->pr_ip6s) - ii = 1; - else { - bcopy(pr->pr_ip6 + 1, pr->pr_ip6, - --pr->pr_ip6s * sizeof(*pr->pr_ip6)); - ii = 0; - } - for (ij = 1; ii < pr->pr_ip6s; ) { - if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[ii], - &ppr->pr_ip6[0])) { - ii++; - continue; - } - switch (ij >= ppr->pr_ip6s ? -1 : - prison_qcmp_v6(&pr->pr_ip6[ii], &ppr->pr_ip6[ij])) { - case -1: - bcopy(pr->pr_ip6 + ii + 1, pr->pr_ip6 + ii, - (--pr->pr_ip6s - ii) * sizeof(*pr->pr_ip6)); - break; - case 0: - ii++; - ij++; - break; - case 1: - ij++; - break; - } - } - if (pr->pr_ip6s == 0) { - free(pr->pr_ip6, M_PRISON); - pr->pr_ip6 = NULL; - } - } - return 0; -} - /* * Pass back primary IPv6 address for this jail. * @@ -362,22 +281,33 @@ int prison_check_ip6_locked(const struct prison *pr, const struct in6_addr *ia6) { - int i, a, z, d; + const struct in6_addr *ip6; + int ip6s, i, a, z, d; + + MPASS(mtx_owned(&pr->pr_mtx) || in_epoch(net_epoch_preempt)); + + if (!(pr->pr_flags & PR_IP6)) + return (0); + + ip6s = ck_pr_load_32(&pr->pr_ip6s); + ip6 = ck_pr_load_ptr(&pr->pr_ip6); + if (ip6 == NULL) + return (EAFNOSUPPORT); /* * Check the primary IP. */ - if (IN6_ARE_ADDR_EQUAL(&pr->pr_ip6[0], ia6)) + if (IN6_ARE_ADDR_EQUAL(&ip6[0], ia6)) return (0); /* * All the other IPs are sorted so we can do a binary search. */ a = 0; - z = pr->pr_ip6s - 2; + z = ip6s - 2; while (a <= z) { i = (a + z) / 2; - d = prison_qcmp_v6(&pr->pr_ip6[i+1], ia6); + d = prison_qcmp_v6(&ip6[i+1], ia6); if (d > 0) z = i - 1; else if (d < 0) Index: sys/sys/jail.h =================================================================== --- sys/sys/jail.h +++ sys/sys/jail.h @@ -177,8 +177,8 @@ struct cpuset *pr_cpuset; /* (p) cpuset */ struct vnet *pr_vnet; /* (c) network stack */ struct vnode *pr_root; /* (c) vnode to rdir */ - int pr_ip4s; /* (p) number of v4 IPs */ - int pr_ip6s; /* (p) number of v6 IPs */ + uint32_t pr_ip4s; /* (p) number of v4 IPs */ + uint32_t pr_ip6s; /* (p) number of v6 IPs */ struct in_addr *pr_ip4; /* (p) v4 IPs of jail */ struct in6_addr *pr_ip6; /* (p) v6 IPs of jail */ struct prison_racct *pr_prison_racct; /* (c) racct jail proxy */ @@ -439,7 +439,6 @@ int prison_check_ip4(const struct ucred *, const struct in_addr *); int prison_check_ip4_locked(const struct prison *, const struct in_addr *); int prison_saddrsel_ip4(struct ucred *, struct in_addr *); -int prison_restrict_ip4(struct prison *, struct in_addr *); int prison_qcmp_v4(const void *, const void *); #ifdef INET6 int prison_equal_ip6(struct prison *, struct prison *); @@ -449,7 +448,6 @@ int prison_check_ip6(const struct ucred *, const struct in6_addr *); int prison_check_ip6_locked(const struct prison *, const struct in6_addr *); int prison_saddrsel_ip6(struct ucred *, struct in6_addr *); -int prison_restrict_ip6(struct prison *, struct in6_addr *); int prison_qcmp_v6(const void *, const void *); #endif int prison_check_af(struct ucred *cred, int af);