Index: sys/netinet/in_pcb.h =================================================================== --- sys/netinet/in_pcb.h +++ sys/netinet/in_pcb.h @@ -45,6 +45,7 @@ #include #ifdef _KERNEL +#include #include #include #include @@ -245,6 +246,8 @@ struct route inpu_route; struct route_in6 inpu_route6; } inp_rtu; + ebr_entry_t inp_ebr_entry; + #define inp_route inp_rtu.inpu_route #define inp_route6 inp_rtu.inpu_route6 }; @@ -408,6 +411,7 @@ * Global lock protecting global inpcb list, inpcb count, etc. */ struct rwlock ipi_list_lock; + ebr_epoch_t ipi_epoch; }; #ifdef _KERNEL @@ -492,7 +496,20 @@ #endif /* _KERNEL */ -#define INP_INFO_LOCK_INIT(ipi, d) \ + +#define INP_INFO_EBR_RLOCK(info) do { \ + cookie = ebr_epoch_read_lock((info)->ipi_epoch); \ + } while (0) + +#define INP_INFO_EBR_RUNLOCK(info) do { \ + ebr_epoch_read_unlock(cookie); \ + } while (0) + +#define INP_INFO_EBR_SYNCHRONIZE(info) \ + ebr_epoch_synchronize((info)->ipi_epoch) + + +#define INP_INFO_LOCK_INIT(ipi, d) \ rw_init_flags(&(ipi)->ipi_lock, (d), RW_RECURSE) #define INP_INFO_LOCK_DESTROY(ipi) rw_destroy(&(ipi)->ipi_lock) #define INP_INFO_RLOCK(ipi) rw_rlock(&(ipi)->ipi_lock) Index: sys/netinet/in_pcb.c =================================================================== --- sys/netinet/in_pcb.c +++ sys/netinet/in_pcb.c @@ -227,9 +227,12 @@ INP_INFO_LOCK_INIT(pcbinfo, name); INP_HASH_LOCK_INIT(pcbinfo, "pcbinfohash"); /* XXXRW: argument? */ INP_LIST_LOCK_INIT(pcbinfo, "pcbinfolist"); + pcbinfo->ipi_epoch = ebr_epoch_alloc(5); + #ifdef VIMAGE pcbinfo->ipi_vnet = curvnet; #endif + pcbinfo->ipi_listhead = listhead; LIST_INIT(pcbinfo->ipi_listhead); pcbinfo->ipi_count = 0; @@ -292,6 +295,11 @@ inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT); if (inp == NULL) return (ENOBUFS); + if ((inp->inp_ebr_entry = ebr_epoch_entry_alloc(M_NOWAIT)) == NULL) { + uma_zfree(pcbinfo->ipi_zone, inp); + return (ENOBUFS); + } + bzero(inp, inp_zero_size); inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; @@ -332,6 +340,7 @@ refcount_init(&inp->inp_refcount, 1); /* Reference from inpcbinfo */ INP_LIST_WUNLOCK(pcbinfo); #if defined(IPSEC) || defined(MAC) + out: if (error != 0) { crfree(inp->inp_cred); @@ -1172,6 +1181,17 @@ refcount_acquire(&inp->inp_refcount); } +static void +inp_deferred_free(void *cookie) +{ + struct inpcb *inp = cookie; + struct inpcbinfo *pcbinfo; + + pcbinfo = inp->inp_pcbinfo; + ebr_epoch_entry_free(inp->inp_ebr_entry); + uma_zfree(pcbinfo->ipi_zone, inp); +} + /* * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we @@ -1184,11 +1204,27 @@ * need for the pcbinfo lock in in_pcbrele(). Deferring the free is entirely * about memory stability (and continued use of the write lock). */ -int -in_pcbrele_rlocked(struct inpcb *inp) + +static inline void +in_pcb_safe_free(struct inpcb *inp) { struct inpcbinfo *pcbinfo; + pcbinfo = inp->inp_pcbinfo; + if (curthread->td_pflags & TDP_ITHREAD) { + ebr_epoch_entry_init(V_tcbinfo.ipi_epoch, inp->inp_ebr_entry, inp, false); + ebr_epoch_defer(V_tcbinfo.ipi_epoch, inp->inp_ebr_entry, inp_deferred_free); + return; + } + + INP_INFO_EBR_SYNCHRONIZE(&V_tcbinfo); + ebr_epoch_entry_free(inp->inp_ebr_entry); + uma_zfree(pcbinfo->ipi_zone, inp); +} + +int +in_pcbrele_rlocked(struct inpcb *inp) +{ KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); INP_RLOCK_ASSERT(inp); @@ -1208,15 +1244,13 @@ KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); INP_RUNLOCK(inp); - pcbinfo = inp->inp_pcbinfo; - uma_zfree(pcbinfo->ipi_zone, inp); + in_pcb_safe_free(inp); return (1); } int in_pcbrele_wlocked(struct inpcb *inp) { - struct inpcbinfo *pcbinfo; KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); @@ -1237,8 +1271,7 @@ KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); INP_WUNLOCK(inp); - pcbinfo = inp->inp_pcbinfo; - uma_zfree(pcbinfo->ipi_zone, inp); + in_pcb_safe_free(inp); return (1); } Index: sys/netinet/tcp_timer.c =================================================================== --- sys/netinet/tcp_timer.c +++ sys/netinet/tcp_timer.c @@ -311,13 +311,14 @@ { struct tcpcb *tp = xtp; struct inpcb *inp; + void *cookie; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_EBR_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); @@ -325,14 +326,14 @@ if (callout_pending(&tp->t_timers->tt_2msl) || !callout_active(&tp->t_timers->tt_2msl)) { INP_WUNLOCK(tp->t_inpcb); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_2msl); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } @@ -355,7 +356,7 @@ */ if ((inp->inp_flags & INP_TIMEWAIT) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } @@ -383,7 +384,7 @@ if (tp != NULL) INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } @@ -393,27 +394,28 @@ struct tcpcb *tp = xtp; struct tcptemp *t_template; struct inpcb *inp; + void *cookie; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_EBR_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); if (callout_pending(&tp->t_timers->tt_keep) || !callout_active(&tp->t_timers->tt_keep)) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_keep); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } @@ -468,7 +470,7 @@ #endif TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; @@ -484,7 +486,7 @@ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); if (tp != NULL) INP_WUNLOCK(tp->t_inpcb); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } @@ -493,27 +495,28 @@ { struct tcpcb *tp = xtp; struct inpcb *inp; + void *cookie; CURVNET_SET(tp->t_vnet); #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_EBR_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); if (callout_pending(&tp->t_timers->tt_persist) || !callout_active(&tp->t_timers->tt_persist)) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_persist); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } @@ -563,7 +566,7 @@ TCP_PROBE2(debug__user, tp, PRU_SLOWTIMO); if (tp != NULL) INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); } @@ -575,27 +578,28 @@ int rexmt; int headlocked; struct inpcb *inp; + void *cookie; #ifdef TCPDEBUG int ostate; ostate = tp->t_state; #endif - INP_INFO_RLOCK(&V_tcbinfo); + INP_INFO_EBR_RLOCK(&V_tcbinfo); inp = tp->t_inpcb; KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp)); INP_WLOCK(inp); if (callout_pending(&tp->t_timers->tt_rexmt) || !callout_active(&tp->t_timers->tt_rexmt)) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } callout_deactivate(&tp->t_timers->tt_rexmt); if ((inp->inp_flags & INP_DROPPED) != 0) { INP_WUNLOCK(inp); - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); return; } @@ -622,7 +626,7 @@ headlocked = 1; goto out; } - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); headlocked = 0; if (tp->t_state == TCPS_SYN_SENT) { /* @@ -821,7 +825,7 @@ if (tp != NULL) INP_WUNLOCK(inp); if (headlocked) - INP_INFO_RUNLOCK(&V_tcbinfo); + INP_INFO_EBR_RUNLOCK(&V_tcbinfo); CURVNET_RESTORE(); }