Changeset View
Standalone View
sys/kern/uipc_usrreq.c
Show First 20 Lines • Show All 772 Lines • ▼ Show 20 Lines | uipc_detach(struct socket *so) | ||||
KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); | KASSERT(unp != NULL, ("uipc_detach: unp == NULL")); | ||||
vp = NULL; | vp = NULL; | ||||
vplock = NULL; | vplock = NULL; | ||||
local_unp_rights = 0; | local_unp_rights = 0; | ||||
UNP_LINK_WLOCK(); | UNP_LINK_WLOCK(); | ||||
LIST_REMOVE(unp, unp_link); | LIST_REMOVE(unp, unp_link); | ||||
if (unp->unp_gcflag & UNPGC_DEAD) | |||||
LIST_REMOVE(unp, unp_dead); | |||||
unp->unp_gencnt = ++unp_gencnt; | unp->unp_gencnt = ++unp_gencnt; | ||||
--unp_count; | --unp_count; | ||||
UNP_LINK_WUNLOCK(); | UNP_LINK_WUNLOCK(); | ||||
UNP_PCB_UNLOCK_ASSERT(unp); | UNP_PCB_UNLOCK_ASSERT(unp); | ||||
restart: | restart: | ||||
if ((vp = unp->unp_vnode) != NULL) { | if ((vp = unp->unp_vnode) != NULL) { | ||||
vplock = mtx_pool_find(mtxpool_sleep, vp); | vplock = mtx_pool_find(mtxpool_sleep, vp); | ||||
▲ Show 20 Lines • Show All 1,687 Lines • ▼ Show 20 Lines | |||||
} | } | ||||
/* | /* | ||||
* unp_defer indicates whether additional work has been defered for a future | * unp_defer indicates whether additional work has been defered for a future | ||||
* pass through unp_gc(). It is thread local and does not require explicit | * pass through unp_gc(). It is thread local and does not require explicit | ||||
* synchronization. | * synchronization. | ||||
*/ | */ | ||||
static int unp_marked; | static int unp_marked; | ||||
static int unp_unreachable; | |||||
static void | static void | ||||
unp_accessable(struct filedescent **fdep, int fdcount) | unp_remove_dead_ref(struct filedescent **fdep, int fdcount) | ||||
{ | { | ||||
struct unpcb *unp; | struct unpcb *unp; | ||||
struct file *fp; | struct file *fp; | ||||
int i; | int i; | ||||
for (i = 0; i < fdcount; i++) { | for (i = 0; i < fdcount; i++) { | ||||
fp = fdep[i]->fde_file; | fp = fdep[i]->fde_file; | ||||
if ((unp = fptounp(fp)) == NULL) | if ((unp = fptounp(fp)) == NULL) | ||||
continue; | continue; | ||||
if (unp->unp_gcflag & UNPGC_REF) | if (!(unp->unp_gcflag & UNPGC_DEAD)) | ||||
markj: The preferred style is an explicit comparison with 0, i.e., (unp->unp_gcflag & UNPGC_DEAD) == 0. | |||||
continue; | continue; | ||||
unp->unp_gcflag &= ~UNPGC_DEAD; | unp->unp_gcrefs--; | ||||
Not Done Inline ActionsIt looks like this update is serialized by virtue of the fact that it is only ever done by the one taskqueue thread. We should add an assertion to that effect. markj: It looks like this update is serialized by virtue of the fact that it is only ever done by the… | |||||
Done Inline ActionsYes, combined with the fact that the link lock is held as a reader by the gc task, while all other paths that update these fields hold it as a writer. jah: Yes, combined with the fact that the link lock is held as a reader by the gc task, while all… | |||||
Done Inline ActionsDumb question: what's the best way to check that a specific taskqueue callout is active on the current thread? I'd naively either use taskqueue_member(taskqueue_thread, curthread) or stash off curthread in unp_gc and check it in these callbacks for INVARIANTS builds, but is there a better way? jah: Dumb question: what's the best way to check that a specific taskqueue callout is active on the… | |||||
Not Done Inline ActionsI was thinking that checking taskqueue_member(curthread) would be fine. I don't really have a better suggestion. markj: I was thinking that checking taskqueue_member(curthread) would be fine. I don't really have a… | |||||
unp->unp_gcflag |= UNPGC_REF; | |||||
unp_marked++; | |||||
} | } | ||||
} | } | ||||
static void | static void | ||||
unp_gc_process(struct unpcb *unp) | unp_restore_undead_ref(struct filedescent **fdep, int fdcount) | ||||
{ | { | ||||
struct socket *so, *soa; | struct unpcb *unp; | ||||
struct file *fp; | struct file *fp; | ||||
int i; | |||||
/* Already processed. */ | for (i = 0; i < fdcount; i++) { | ||||
if (unp->unp_gcflag & UNPGC_SCANNED) | fp = fdep[i]->fde_file; | ||||
return; | if ((unp = fptounp(fp)) == NULL) | ||||
fp = unp->unp_file; | continue; | ||||
if (!(unp->unp_gcflag & UNPGC_DEAD)) | |||||
/* | continue; | ||||
* Check for a socket potentially in a cycle. It must be in a | unp->unp_gcrefs++; | ||||
* queue as indicated by msgcount, and this must equal the file | unp_marked++; | ||||
Not Done Inline ActionsDitto regarding an assertion that these updates aren't racy. markj: Ditto regarding an assertion that these updates aren't racy. | |||||
* reference count. Note that when msgcount is 0 the file is NULL. | |||||
*/ | |||||
if ((unp->unp_gcflag & UNPGC_REF) == 0 && fp && | |||||
unp->unp_msgcount != 0 && fp->f_count == unp->unp_msgcount) { | |||||
unp->unp_gcflag |= UNPGC_DEAD; | |||||
unp_unreachable++; | |||||
return; | |||||
} | } | ||||
} | |||||
static void | |||||
unp_gc_scan(struct unpcb *unp, void (*op)(struct filedescent **, int)) | |||||
{ | |||||
struct socket *so, *soa; | |||||
so = unp->unp_socket; | so = unp->unp_socket; | ||||
SOCK_LOCK(so); | SOCK_LOCK(so); | ||||
if (SOLISTENING(so)) { | if (SOLISTENING(so)) { | ||||
/* | /* | ||||
* Mark all sockets in our accept queue. | * Mark all sockets in our accept queue. | ||||
*/ | */ | ||||
TAILQ_FOREACH(soa, &so->sol_comp, so_list) { | TAILQ_FOREACH(soa, &so->sol_comp, so_list) { | ||||
if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) | if (sotounpcb(soa)->unp_gcflag & UNPGC_IGNORE_RIGHTS) | ||||
continue; | continue; | ||||
SOCKBUF_LOCK(&soa->so_rcv); | SOCKBUF_LOCK(&soa->so_rcv); | ||||
unp_scan(soa->so_rcv.sb_mb, unp_accessable); | unp_scan(soa->so_rcv.sb_mb, op); | ||||
SOCKBUF_UNLOCK(&soa->so_rcv); | SOCKBUF_UNLOCK(&soa->so_rcv); | ||||
} | } | ||||
} else { | } else { | ||||
/* | /* | ||||
* Mark all sockets we reference with RIGHTS. | * Mark all sockets we reference with RIGHTS. | ||||
*/ | */ | ||||
if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) { | if ((unp->unp_gcflag & UNPGC_IGNORE_RIGHTS) == 0) { | ||||
SOCKBUF_LOCK(&so->so_rcv); | SOCKBUF_LOCK(&so->so_rcv); | ||||
unp_scan(so->so_rcv.sb_mb, unp_accessable); | unp_scan(so->so_rcv.sb_mb, op); | ||||
SOCKBUF_UNLOCK(&so->so_rcv); | SOCKBUF_UNLOCK(&so->so_rcv); | ||||
} | } | ||||
} | } | ||||
SOCK_UNLOCK(so); | SOCK_UNLOCK(so); | ||||
unp->unp_gcflag |= UNPGC_SCANNED; | |||||
} | } | ||||
static int unp_recycled; | static int unp_recycled; | ||||
SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, | SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, | ||||
"Number of unreachable sockets claimed by the garbage collector."); | "Number of unreachable sockets claimed by the garbage collector."); | ||||
static int unp_taskcount; | static int unp_taskcount; | ||||
SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, | SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, | ||||
"Number of times the garbage collector has run."); | "Number of times the garbage collector has run."); | ||||
SYSCTL_UINT(_net_local, OID_AUTO, sockcount, CTLFLAG_RD, &unp_count, 0, | |||||
"Number of active local sockets."); | |||||
static void | static void | ||||
unp_gc(__unused void *arg, int pending) | unp_gc(__unused void *arg, int pending) | ||||
{ | { | ||||
struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead, | struct unp_head *heads[] = { &unp_dhead, &unp_shead, &unp_sphead, | ||||
NULL }; | NULL }; | ||||
struct unp_head **head; | struct unp_head **head; | ||||
struct unp_head unp_deadhead; /* List of potentially-dead sockets. */ | |||||
struct file *f, **unref; | struct file *f, **unref; | ||||
struct unpcb *unp; | struct unpcb *unp, *unptmp; | ||||
int i, total; | int i, total, unp_unreachable; | ||||
LIST_INIT(&unp_deadhead); | |||||
unp_taskcount++; | unp_taskcount++; | ||||
UNP_LINK_RLOCK(); | UNP_LINK_RLOCK(); | ||||
/* | /* | ||||
* First clear all gc flags from previous runs, apart from | * First clear all gc state from previous runs, apart from | ||||
* UNPGC_IGNORE_RIGHTS. | * UNPGC_IGNORE_RIGHTS. | ||||
*/ | */ | ||||
Not Done Inline ActionsThis comment is a bit stale, there is no state to clear anymore. markj: This comment is a bit stale, there is no state to clear anymore. | |||||
unp_unreachable = 0; | |||||
for (head = heads; *head != NULL; head++) | for (head = heads; *head != NULL; head++) | ||||
LIST_FOREACH(unp, *head, unp_link) | LIST_FOREACH(unp, *head, unp_link) { | ||||
unp->unp_gcflag = | unp->unp_gcflag = | ||||
jahAuthorUnsubmitted Done Inline ActionsThis can be turned into a KASSERT on !(unp_gcflag & ~UNPGC_IGNORE_RIGHTS). Otherwise it can unnecessarily dirty a lot of cachelines. jah: This can be turned into a KASSERT on !(unp_gcflag & ~UNPGC_IGNORE_RIGHTS). Otherwise it can… | |||||
markjUnsubmitted Done Inline ActionsSorry, can you explain why? uipc_dispose is called before the socket is removed from the global lists. markj: Sorry, can you explain why? uipc_dispose is called before the socket is removed from the global… | |||||
jahAuthorUnsubmitted Done Inline ActionsWe were unconditionally writing unp_gcflag in a cacheline-aligned struct for all active sockets. IMO it seems reasonably likely that the portion of still-still active sockets that have had unp_dispose() called on them will be fairly small, just as it seems likely the portion of potentially-cyclic sockets will be small. jah: We were unconditionally writing unp_gcflag in a cacheline-aligned struct for all active sockets. | |||||
jahAuthorUnsubmitted Done Inline Actions"still-still active" -> "still-active". Haven't had my coffee yet. jah: "still-still active" -> "still-active". Haven't had my coffee yet.
Also worth noting that… | |||||
markjUnsubmitted Done Inline ActionsI see now. My concern was about correctness, but it was because I had missed the "~" in the assertion condition. Sorry for the noise. markj: I see now. My concern was about correctness, but it was because I had missed the "~" in the… | |||||
(unp->unp_gcflag & UNPGC_IGNORE_RIGHTS); | (unp->unp_gcflag & UNPGC_IGNORE_RIGHTS); | ||||
f = unp->unp_file; | |||||
/* | /* | ||||
* Scan marking all reachable sockets with UNPGC_REF. Once a socket | * Check for a socket potentially in a cycle. It must | ||||
* is reachable all of the sockets it references are reachable. | * be in a queue as indicated by msgcount, and this | ||||
* must equal the file reference count. Note that when | |||||
* msgcount is 0 the file is NULL. | |||||
*/ | |||||
Not Done Inline ActionsTo be pedantic, we are looking for unreachable sockets in a cycle. markj: To be pedantic, we are looking for unreachable sockets in a cycle. | |||||
if (f != NULL && unp->unp_msgcount != 0 && | |||||
f->f_count == unp->unp_msgcount) { | |||||
LIST_INSERT_HEAD(&unp_deadhead, unp, unp_dead); | |||||
unp->unp_gcflag |= UNPGC_DEAD; | |||||
unp->unp_gcrefs = unp->unp_msgcount; | |||||
unp_unreachable++; | |||||
} | |||||
} | |||||
/* | |||||
* Only one potentially unreachable socket cannot form a cycle. | |||||
* If unp_unreachable is 0, then the dead list traversals below | |||||
* will immediately fall through. | |||||
*/ | |||||
Not Done Inline ActionsI can't quite see why this is true. Suppose I create a socket pair <s1, s2> and send s2 over s1 to s2. Then I close s1 and s2. s2's receive buffer holds an internalized reference to s2 and the socket is orphaned so we need GC to clean it up, but unp_unreachable will be 1. Of course, as soon as a second socket is leaked that way we will find and dispose of both of them, so it should not really be a problem in practice. markj: I can't quite see why this is true. Suppose I create a socket pair <s1, s2> and send s2 over s1… | |||||
Done Inline ActionsI think that's right. I very nearly deleted this check when writing the patch anyway, because it's not very clean. So let's just get rid of it. jah: I think that's right. I very nearly deleted this check when writing the patch anyway, because… | |||||
if (unp_unreachable == 1) { | |||||
LIST_FIRST(&unp_deadhead)->unp_gcflag &= ~UNPGC_DEAD; | |||||
UNP_LINK_RUNLOCK(); | |||||
return; | |||||
} | |||||
/* | |||||
* Scan all sockets previously marked as potentially being in a cycle | |||||
* and remove the references each socket holds on any UNPGC_DEAD | |||||
* sockets in its queue. After this step, all remaining references on | |||||
* sockets marked UNPGC_DEAD should not be part of any cycle. | |||||
*/ | |||||
LIST_FOREACH(unp, &unp_deadhead, unp_dead) | |||||
unp_gc_scan(unp, unp_remove_dead_ref); | |||||
/* | |||||
* If a socket still has a non-negative refcount, it cannot be in a | |||||
* cycle. In this case increment refcount of all children iteratively. | |||||
* Stop the scan once we do a complete loop without discovering | * Stop the scan once we do a complete loop without discovering | ||||
* a new reachable socket. | * a new reachable socket. | ||||
*/ | */ | ||||
do { | do { | ||||
unp_unreachable = 0; | |||||
unp_marked = 0; | unp_marked = 0; | ||||
for (head = heads; *head != NULL; head++) | LIST_FOREACH_SAFE(unp, &unp_deadhead, unp_dead, unptmp) | ||||
LIST_FOREACH(unp, *head, unp_link) | if (unp->unp_gcrefs > 0) { | ||||
unp_gc_process(unp); | unp->unp_gcflag &= ~UNPGC_DEAD; | ||||
LIST_REMOVE(unp, unp_dead); | |||||
KASSERT(unp_unreachable > 0, | |||||
("unp_gc: unp_unreachable underflow.")); | |||||
unp_unreachable--; | |||||
unp_gc_scan(unp, unp_restore_undead_ref); | |||||
} | |||||
} while (unp_marked); | } while (unp_marked); | ||||
UNP_LINK_RUNLOCK(); | UNP_LINK_RUNLOCK(); | ||||
if (unp_unreachable == 0) | if (unp_unreachable == 0) | ||||
return; | return; | ||||
/* | /* | ||||
* Allocate space for a local list of dead unpcbs. | * Allocate space for a local array of dead unpcbs. | ||||
* TODO: can this path be simplified by instead using the local | |||||
* dead list at unp_deadhead, after taking out references | |||||
* on the file object and/or unpcb and dropping the link lock? | |||||
*/ | */ | ||||
unref = malloc(unp_unreachable * sizeof(struct file *), | unref = malloc(unp_unreachable * sizeof(struct file *), | ||||
M_TEMP, M_WAITOK); | M_TEMP, M_WAITOK); | ||||
/* | /* | ||||
* Iterate looking for sockets which have been specifically marked | * Iterate looking for sockets which have been specifically marked | ||||
* as as unreachable and store them locally. | * as unreachable and store them locally. | ||||
*/ | */ | ||||
UNP_LINK_RLOCK(); | UNP_LINK_RLOCK(); | ||||
for (total = 0, head = heads; *head != NULL; head++) | total = 0; | ||||
LIST_FOREACH(unp, *head, unp_link) | LIST_FOREACH(unp, &unp_deadhead, unp_dead) { | ||||
if ((unp->unp_gcflag & UNPGC_DEAD) != 0) { | unp->unp_gcflag &= ~UNPGC_DEAD; | ||||
Not Done Inline ActionsPerhaps assert that UNPGC_DEAD is set? markj: Perhaps assert that UNPGC_DEAD is set? | |||||
f = unp->unp_file; | f = unp->unp_file; | ||||
if (unp->unp_msgcount == 0 || f == NULL || | if (unp->unp_msgcount == 0 || f == NULL || | ||||
f->f_count != unp->unp_msgcount || | f->f_count != unp->unp_msgcount || | ||||
!fhold(f)) | !fhold(f)) | ||||
continue; | continue; | ||||
unref[total++] = f; | unref[total++] = f; | ||||
KASSERT(total <= unp_unreachable, | KASSERT(total <= unp_unreachable, | ||||
("unp_gc: incorrect unreachable count.")); | ("unp_gc: incorrect unreachable count.")); | ||||
} | } | ||||
UNP_LINK_RUNLOCK(); | UNP_LINK_RUNLOCK(); | ||||
/* | /* | ||||
* Now flush all sockets, free'ing rights. This will free the | * Now flush all sockets, free'ing rights. This will free the | ||||
* struct files associated with these sockets but leave each socket | * struct files associated with these sockets but leave each socket | ||||
* with one remaining ref. | * with one remaining ref. | ||||
*/ | */ | ||||
for (i = 0; i < total; i++) { | for (i = 0; i < total; i++) { | ||||
▲ Show 20 Lines • Show All 230 Lines • Show Last 20 Lines |
The preferred style is an explicit comparison with 0, i.e., (unp->unp_gcflag & UNPGC_DEAD) == 0.