Index: sys/netpfil/pf/if_pfsync.c =================================================================== --- sys/netpfil/pf/if_pfsync.c +++ sys/netpfil/pf/if_pfsync.c @@ -77,6 +77,7 @@ #include #include #include +#include #include #include #include @@ -106,6 +107,8 @@ sizeof(struct pfsync_header) + \ sizeof(struct pfsync_subheader) ) +struct pfsync_bucket; + struct pfsync_pkt { struct ip *ip; struct in_addr src; @@ -164,7 +167,7 @@ }; static void pfsync_q_ins(struct pf_state *, int, bool); -static void pfsync_q_del(struct pf_state *, bool); +static void pfsync_q_del(struct pf_state *, bool, struct pfsync_bucket *); static void pfsync_update_state(struct pf_state *); @@ -183,6 +186,35 @@ struct mbuf *pd_m; }; +struct pfsync_sofct; + +struct pfsync_bucket +{ + struct mtx b_mtx; + + int b_id; + struct pfsync_softc *b_sc; + int b_flags; +#define PFSYNCF_BUCKET_PUSH 0x00000001 + + size_t b_len; + TAILQ_HEAD(, pf_state) b_qs[PFSYNC_S_COUNT]; + + /* Only in Queue 0 */ + TAILQ_HEAD(, pfsync_upd_req_item) b_upd_req_list; + TAILQ_HEAD(, pfsync_deferral) b_deferrals; + + u_int b_deferred; + void *b_plus; + size_t b_pluslen; + /* End only in Queue 0 */ + + struct ifaltq b_snd; + + struct callout b_tmo; + void *b_pfsync_swi_cookie; +}; + struct pfsync_softc { /* Configuration */ struct ifnet *sc_ifp; @@ -192,20 +224,12 @@ uint32_t sc_flags; #define PFSYNCF_OK 0x00000001 #define PFSYNCF_DEFER 0x00000002 -#define PFSYNCF_PUSH 0x00000004 uint8_t sc_maxupdates; struct ip sc_template; - struct callout sc_tmo; struct mtx sc_mtx; /* Queued data */ - size_t sc_len; - TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; - TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; - TAILQ_HEAD(, pfsync_deferral) sc_deferrals; - u_int sc_deferred; - void *sc_plus; - size_t sc_pluslen; + struct pfsync_bucket *sc_buckets; /* Bulk update info */ struct mtx sc_bulk_mtx; @@ -223,6 +247,10 @@ #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) +#define PFSYNC_BUCKET_LOCK(b) mtx_lock(&(b)->b_mtx) +#define PFSYNC_BUCKET_UNLOCK(b) mtx_unlock(&(b)->b_mtx) +#define PFSYNC_BUCKET_ASSERT(b) mtx_assert(&(b)->b_mtx, MA_OWNED) + #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) @@ -231,15 +259,14 @@ static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); VNET_DEFINE_STATIC(struct pfsync_softc *, pfsyncif) = NULL; #define V_pfsyncif VNET(pfsyncif) -VNET_DEFINE_STATIC(void *, pfsync_swi_cookie) = NULL; -#define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) VNET_DEFINE_STATIC(struct pfsyncstats, pfsyncstats); #define V_pfsyncstats VNET(pfsyncstats) VNET_DEFINE_STATIC(int, pfsync_carp_adj) = CARP_MAXSKEW; #define V_pfsync_carp_adj VNET(pfsync_carp_adj) static void pfsync_timeout(void *); -static void pfsync_push(struct pfsync_softc *); +static void pfsync_push(struct pfsync_bucket *); +static void pfsync_push_all(struct pfsync_softc *); static void pfsyncintr(void *); static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, void *); @@ -249,12 +276,16 @@ static int pfsync_init(void); static void pfsync_uninit(void); +static unsigned long pfsync_buckets; + SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(pfsyncstats), pfsyncstats, "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); +SYSCTL_ULONG(_net_pfsync, OID_AUTO, pfsync_buckets, CTLFLAG_RDTUN, + &pfsync_buckets, 0, "Number of pfsync hash buckets"); static int pfsync_clone_create(struct if_clone *, int, caddr_t); static void pfsync_clone_destroy(struct ifnet *); @@ -273,7 +304,7 @@ static void pfsync_update_state_req(struct pf_state *); static void pfsync_drop(struct pfsync_softc *); -static void pfsync_sendout(int); +static void pfsync_sendout(int, int); static void pfsync_send_plus(void *, size_t); static void pfsync_bulk_start(void); @@ -285,6 +316,9 @@ #ifdef IPSEC static void pfsync_update_net_tdb(struct pfsync_tdb *); #endif +static struct pfsync_bucket *pfsync_get_bucket(struct pfsync_softc *, + struct pf_state *); + #define PFSYNC_MAX_BULKTRIES 12 @@ -296,21 +330,16 @@ { struct pfsync_softc *sc; struct ifnet *ifp; - int q; + int error; + int c, q; if (unit != 0) return (EINVAL); - sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); - sc->sc_flags |= PFSYNCF_OK; - - for (q = 0; q < PFSYNC_S_COUNT; q++) - TAILQ_INIT(&sc->sc_qs[q]); + if (! pfsync_buckets) + pfsync_buckets = mp_ncpus * 2; - TAILQ_INIT(&sc->sc_upd_req_list); - TAILQ_INIT(&sc->sc_deferrals); - - sc->sc_len = PFSYNC_MINPKT; + sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); sc->sc_maxupdates = 128; ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); @@ -318,17 +347,16 @@ free(sc, M_PFSYNC); return (ENOSPC); } + if_initname(ifp, pfsyncname, unit); ifp->if_softc = sc; ifp->if_ioctl = pfsyncioctl; ifp->if_output = pfsyncoutput; ifp->if_type = IFT_PFSYNC; - ifp->if_snd.ifq_maxlen = ifqmaxlen; ifp->if_hdrlen = sizeof(struct pfsync_header); ifp->if_mtu = ETHERMTU; mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); - callout_init(&sc->sc_tmo, 1); callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); @@ -336,6 +364,29 @@ bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); + sc->sc_buckets = mallocarray(pfsync_buckets, sizeof(*sc->sc_buckets), M_PFSYNC, M_ZERO | M_WAITOK); + for (c = 0; c < pfsync_buckets; c++) { + mtx_init(&sc->sc_buckets[c].b_mtx, pfsyncname, NULL, MTX_DEF); + + sc->sc_buckets[c].b_id = c; + sc->sc_buckets[c].b_sc = sc; + sc->sc_buckets[c].b_len = PFSYNC_MINPKT; + + for (q = 0; q < PFSYNC_S_COUNT; q++) + TAILQ_INIT(&sc->sc_buckets[c].b_qs[q]); + + TAILQ_INIT(&sc->sc_buckets[c].b_upd_req_list); + TAILQ_INIT(&sc->sc_buckets[c].b_deferrals); + + error = swi_add(NULL, pfsyncname, pfsyncintr, &sc->sc_buckets[c], + SWI_NET, INTR_MPSAFE, &sc->sc_buckets[c].b_pfsync_swi_cookie); + KASSERT(error == 0, ("swi_add failed")); + + callout_init(&sc->sc_buckets[c].b_tmo, 1); + + sc->sc_buckets[c].b_snd.ifq_maxlen = ifqmaxlen; + } + V_pfsyncif = sc; return (0); @@ -345,29 +396,33 @@ pfsync_clone_destroy(struct ifnet *ifp) { struct pfsync_softc *sc = ifp->if_softc; + int c; - /* - * At this stage, everything should have already been - * cleared by pfsync_uninit(), and we have only to - * drain callouts. - */ - while (sc->sc_deferred > 0) { - struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); - - TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); - sc->sc_deferred--; - if (callout_stop(&pd->pd_tmo) > 0) { - pf_release_state(pd->pd_st); - m_freem(pd->pd_m); - free(pd, M_PFSYNC); - } else { - pd->pd_refs++; - callout_drain(&pd->pd_tmo); - free(pd, M_PFSYNC); + for (c = 0; c < pfsync_buckets; c++) { + /* + * At this stage, everything should have already been + * cleared by pfsync_uninit(), and we have only to + * drain callouts. + */ + while (sc->sc_buckets[c].b_deferred > 0) { + struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_buckets[c].b_deferrals); + + TAILQ_REMOVE(&sc->sc_buckets[c].b_deferrals, pd, pd_entry); + sc->sc_buckets[c].b_deferred--; + if (callout_stop(&pd->pd_tmo) > 0) { + pf_release_state(pd->pd_st); + m_freem(pd->pd_m); + free(pd, M_PFSYNC); + } else { + pd->pd_refs++; + callout_drain(&pd->pd_tmo); + free(pd, M_PFSYNC); + } } + + callout_drain(&sc->sc_buckets[c].b_tmo); } - callout_drain(&sc->sc_tmo); callout_drain(&sc->sc_bulkfail_tmo); callout_drain(&sc->sc_bulk_tmo); @@ -383,6 +438,12 @@ pfsync_multicast_cleanup(sc); mtx_destroy(&sc->sc_mtx); mtx_destroy(&sc->sc_bulk_mtx); + + for (c = 0; c < pfsync_buckets; c++) { + swi_remove(sc->sc_buckets[c].b_pfsync_swi_cookie); + } + + free(sc->sc_buckets, M_PFSYNC); free(sc, M_PFSYNC); V_pfsyncif = NULL; @@ -546,7 +607,7 @@ st->state_flags &= ~PFSTATE_NOSYNC; if (st->state_flags & PFSTATE_ACK) { pfsync_q_ins(st, PFSYNC_S_IACK, true); - pfsync_push(sc); + pfsync_push_all(sc); } } st->state_flags &= ~PFSTATE_ACK; @@ -785,9 +846,7 @@ continue; if (st->state_flags & PFSTATE_ACK) { - PFSYNC_LOCK(V_pfsyncif); pfsync_undefer_state(st, 0); - PFSYNC_UNLOCK(V_pfsyncif); } PF_STATE_UNLOCK(st); } @@ -876,9 +935,7 @@ } if (st->state_flags & PFSTATE_ACK) { - PFSYNC_LOCK(sc); pfsync_undefer_state(st, 1); - PFSYNC_UNLOCK(sc); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) @@ -912,9 +969,7 @@ pfsync_update_state(st); PF_STATE_UNLOCK(st); - PFSYNC_LOCK(sc); - pfsync_push(sc); - PFSYNC_UNLOCK(sc); + pfsync_push_all(sc); continue; } PF_STATE_UNLOCK(st); @@ -960,16 +1015,14 @@ st = pf_find_state_byid(up->id, up->creatorid); if (st == NULL) { /* We don't have this state. Ask for it. */ - PFSYNC_LOCK(sc); + PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); pfsync_request_update(up->creatorid, up->id); - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); continue; } if (st->state_flags & PFSTATE_ACK) { - PFSYNC_LOCK(sc); pfsync_undefer_state(st, 1); - PFSYNC_UNLOCK(sc); } if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) @@ -1003,9 +1056,7 @@ pfsync_update_state(st); PF_STATE_UNLOCK(st); - PFSYNC_LOCK(sc); - pfsync_push(sc); - PFSYNC_UNLOCK(sc); + pfsync_push_all(sc); continue; } PF_STATE_UNLOCK(st); @@ -1283,6 +1334,7 @@ struct ifreq *ifr = (struct ifreq *)data; struct pfsyncreq pfsyncr; int error; + int c; switch (cmd) { case SIOCSIFFLAGS: @@ -1303,10 +1355,12 @@ ifr->ifr_mtu > sc->sc_sync_if->if_mtu) return (EINVAL); if (ifr->ifr_mtu < ifp->if_mtu) { - PFSYNC_LOCK(sc); - if (sc->sc_len > PFSYNC_MINPKT) - pfsync_sendout(1); - PFSYNC_UNLOCK(sc); + for (c = 0; c < pfsync_buckets; c++) { + PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); + if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT) + pfsync_sendout(1, c); + PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); + } } ifp->if_mtu = ifr->ifr_mtu; break; @@ -1379,12 +1433,16 @@ break; } - if (sc->sc_len > PFSYNC_MINPKT && - (sifp->if_mtu < sc->sc_ifp->if_mtu || - (sc->sc_sync_if != NULL && - sifp->if_mtu < sc->sc_sync_if->if_mtu) || - sifp->if_mtu < MCLBYTES - sizeof(struct ip))) - pfsync_sendout(1); + for (c = 0; c < pfsync_buckets; c++) { + PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); + if (sc->sc_buckets[c].b_len > PFSYNC_MINPKT && + (sifp->if_mtu < sc->sc_ifp->if_mtu || + (sc->sc_sync_if != NULL && + sifp->if_mtu < sc->sc_sync_if->if_mtu) || + sifp->if_mtu < MCLBYTES - sizeof(struct ip))) + pfsync_sendout(1, c); + PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); + } if (imo->imo_membership) pfsync_multicast_cleanup(sc); @@ -1421,8 +1479,10 @@ sc->sc_flags &= ~PFSYNCF_OK; if (V_pf_status.debug >= PF_DEBUG_MISC) printf("pfsync: requesting bulk update\n"); - pfsync_request_update(0, 0); PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); + pfsync_request_update(0, 0); + PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); PFSYNC_BLOCK(sc); sc->sc_ureq_sent = time_uptime; callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, @@ -1483,33 +1543,34 @@ { struct pf_state *st, *next; struct pfsync_upd_req_item *ur; - int q; + int c, q; - for (q = 0; q < PFSYNC_S_COUNT; q++) { - if (TAILQ_EMPTY(&sc->sc_qs[q])) - continue; + for (c = 0; c < pfsync_buckets; c++) { + for (q = 0; q < PFSYNC_S_COUNT; q++) { + if (TAILQ_EMPTY(&sc->sc_buckets[c].b_qs[q])) + continue; - TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { - KASSERT(st->sync_state == q, - ("%s: st->sync_state == q", - __func__)); - st->sync_state = PFSYNC_S_NONE; - pf_release_state(st); + TAILQ_FOREACH_SAFE(st, &sc->sc_buckets[c].b_qs[q], sync_list, next) { + KASSERT(st->sync_state == q, + ("%s: st->sync_state == q", + __func__)); + st->sync_state = PFSYNC_S_NONE; + pf_release_state(st); + } + TAILQ_INIT(&sc->sc_buckets[c].b_qs[q]); } - TAILQ_INIT(&sc->sc_qs[q]); + sc->sc_buckets[c].b_len = PFSYNC_MINPKT; + sc->sc_buckets[c].b_plus = NULL; } - while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { - TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); + while ((ur = TAILQ_FIRST(&sc->sc_buckets[c].b_upd_req_list)) != NULL) { + TAILQ_REMOVE(&sc->sc_buckets[c].b_upd_req_list, ur, ur_entry); free(ur, M_PFSYNC); } - - sc->sc_plus = NULL; - sc->sc_len = PFSYNC_MINPKT; } static void -pfsync_sendout(int schedswi) +pfsync_sendout(int schedswi, int c) { struct pfsync_softc *sc = V_pfsyncif; struct ifnet *ifp = sc->sc_ifp; @@ -1523,23 +1584,23 @@ int q, count = 0; KASSERT(sc != NULL, ("%s: null sc", __func__)); - KASSERT(sc->sc_len > PFSYNC_MINPKT, - ("%s: sc_len %zu", __func__, sc->sc_len)); - PFSYNC_LOCK_ASSERT(sc); + KASSERT(sc->sc_buckets[c].b_len > PFSYNC_MINPKT, + ("%s: sc_len %zu", __func__, sc->sc_buckets[c].b_len)); + PFSYNC_BUCKET_ASSERT(&sc->sc_buckets[c]); if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { pfsync_drop(sc); return; } - m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR); + m = m_get2(max_linkhdr + sc->sc_buckets[c].b_len, M_NOWAIT, MT_DATA, M_PKTHDR); if (m == NULL) { if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); V_pfsyncstats.pfsyncs_onomem++; return; } m->m_data += max_linkhdr; - m->m_len = m->m_pkthdr.len = sc->sc_len; + m->m_len = m->m_pkthdr.len = sc->sc_buckets[c].b_len; /* build the ip header */ ip = (struct ip *)m->m_data; @@ -1555,19 +1616,19 @@ offset += sizeof(*ph); ph->version = PFSYNC_VERSION; - ph->len = htons(sc->sc_len - sizeof(*ip)); + ph->len = htons(sc->sc_buckets[c].b_len - sizeof(*ip)); bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); /* walk the queues */ for (q = 0; q < PFSYNC_S_COUNT; q++) { - if (TAILQ_EMPTY(&sc->sc_qs[q])) + if (TAILQ_EMPTY(&sc->sc_buckets[c].b_qs[q])) continue; subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; - TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) { + TAILQ_FOREACH_SAFE(st, &sc->sc_buckets[c].b_qs[q], sync_list, st_next) { KASSERT(st->sync_state == q, ("%s: st->sync_state == q", __func__)); @@ -1581,7 +1642,7 @@ pf_release_state(st); count++; } - TAILQ_INIT(&sc->sc_qs[q]); + TAILQ_INIT(&sc->sc_buckets[c].b_qs[q]); bzero(subh, sizeof(*subh)); subh->action = pfsync_qs[q].action; @@ -1589,13 +1650,13 @@ V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; } - if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { + if (!TAILQ_EMPTY(&sc->sc_buckets[c].b_upd_req_list)) { subh = (struct pfsync_subheader *)(m->m_data + offset); offset += sizeof(*subh); count = 0; - while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { - TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); + while ((ur = TAILQ_FIRST(&sc->sc_buckets[c].b_upd_req_list)) != NULL) { + TAILQ_REMOVE(&sc->sc_buckets[c].b_upd_req_list, ur, ur_entry); bcopy(&ur->ur_msg, m->m_data + offset, sizeof(ur->ur_msg)); @@ -1611,11 +1672,11 @@ } /* has someone built a custom region for us to add? */ - if (sc->sc_plus != NULL) { - bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); - offset += sc->sc_pluslen; + if (sc->sc_buckets[c].b_plus != NULL) { + bcopy(sc->sc_buckets[c].b_plus, m->m_data + offset, sc->sc_buckets[c].b_pluslen); + offset += sc->sc_buckets[c].b_pluslen; - sc->sc_plus = NULL; + sc->sc_buckets[c].b_plus = NULL; } subh = (struct pfsync_subheader *)(m->m_data + offset); @@ -1629,36 +1690,37 @@ /* we're done, let's put it on the wire */ if (ifp->if_bpf) { m->m_data += sizeof(*ip); - m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); + m->m_len = m->m_pkthdr.len = sc->sc_buckets[c].b_len - sizeof(*ip); BPF_MTAP(ifp, m); m->m_data -= sizeof(*ip); - m->m_len = m->m_pkthdr.len = sc->sc_len; + m->m_len = m->m_pkthdr.len = sc->sc_buckets[c].b_len; } if (sc->sc_sync_if == NULL) { - sc->sc_len = PFSYNC_MINPKT; + sc->sc_buckets[c].b_len = PFSYNC_MINPKT; m_freem(m); return; } if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); - sc->sc_len = PFSYNC_MINPKT; + sc->sc_buckets[c].b_len = PFSYNC_MINPKT; - if (!_IF_QFULL(&sc->sc_ifp->if_snd)) - _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); + if (!_IF_QFULL(&sc->sc_buckets[c].b_snd)) + _IF_ENQUEUE(&sc->sc_buckets[c].b_snd, m); else { m_freem(m); if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); } if (schedswi) - swi_sched(V_pfsync_swi_cookie, 0); + swi_sched(sc->sc_buckets[c].b_pfsync_swi_cookie, 0); } static void pfsync_insert_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); if (st->state_flags & PFSTATE_NOSYNC) return; @@ -1672,12 +1734,12 @@ KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); - PFSYNC_LOCK(sc); - if (sc->sc_len == PFSYNC_MINPKT) - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); + PFSYNC_BUCKET_LOCK(b); + if (b->b_len == PFSYNC_MINPKT) + callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); pfsync_q_ins(st, PFSYNC_S_INS, true); - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_UNLOCK(b); st->sync_updates = 0; } @@ -1687,6 +1749,7 @@ { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); if (m->m_flags & (M_BCAST|M_MCAST)) return (0); @@ -1699,13 +1762,13 @@ return (0); } - if (sc->sc_deferred >= 128) - pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); + if (b->b_deferred >= 128) + pfsync_undefer(TAILQ_FIRST(&b->b_deferrals), 0); pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); if (pd == NULL) return (0); - sc->sc_deferred++; + b->b_deferred++; m->m_flags |= M_SKIP_FIREWALL; st->state_flags |= PFSTATE_ACK; @@ -1716,11 +1779,11 @@ pf_ref_state(st); pd->pd_m = m; - TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); - callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); + TAILQ_INSERT_TAIL(&b->b_deferrals, pd, pd_entry); + callout_init_mtx(&pd->pd_tmo, &b->b_mtx, CALLOUT_RETURNUNLOCKED); callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); - pfsync_push(sc); + pfsync_push(b); return (1); } @@ -1731,11 +1794,12 @@ struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_state *st = pd->pd_st; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_ASSERT(b); - TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); - sc->sc_deferred--; + TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); + b->b_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ free(pd, M_PFSYNC); pf_release_state(st); @@ -1743,8 +1807,8 @@ if (drop) m_freem(m); else { - _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); - pfsync_push(sc); + _IF_ENQUEUE(&b->b_snd, m); + pfsync_push(b); } } @@ -1755,13 +1819,14 @@ struct pfsync_softc *sc = pd->pd_sc; struct mbuf *m = pd->pd_m; struct pf_state *st = pd->pd_st; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_ASSERT(b); CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); - TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); - sc->sc_deferred--; + TAILQ_REMOVE(&b->b_deferrals, pd, pd_entry); + b->b_deferred--; pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ if (pd->pd_refs == 0) free(pd, M_PFSYNC); @@ -1779,40 +1844,52 @@ { struct pfsync_softc *sc = V_pfsyncif; struct pfsync_deferral *pd; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_LOCK(b); - TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { + TAILQ_FOREACH(pd, &b->b_deferrals, pd_entry) { if (pd->pd_st == st) { if (callout_stop(&pd->pd_tmo) > 0) pfsync_undefer(pd, drop); + + PFSYNC_BUCKET_UNLOCK(b); return; } } + PFSYNC_BUCKET_UNLOCK(b); panic("%s: unable to find deferred state", __func__); } +static struct pfsync_bucket* +pfsync_get_bucket(struct pfsync_softc *sc, struct pf_state *st) +{ + int c = PF_IDHASH(st) % pfsync_buckets; + return &sc->sc_buckets[c]; +} + static void pfsync_update_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; bool sync = false, ref = true; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PF_STATE_LOCK_ASSERT(st); - PFSYNC_LOCK(sc); + PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state(st, 0); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) - pfsync_q_del(st, true); - PFSYNC_UNLOCK(sc); + pfsync_q_del(st, true, b); + PFSYNC_BUCKET_UNLOCK(b); return; } - if (sc->sc_len == PFSYNC_MINPKT) - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); + if (b->b_len == PFSYNC_MINPKT) + callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); switch (st->sync_state) { case PFSYNC_S_UPD_C: @@ -1828,7 +1905,7 @@ break; case PFSYNC_S_IACK: - pfsync_q_del(st, false); + pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ @@ -1842,9 +1919,9 @@ } if (sync || (time_uptime - st->pfsync_time) < 2) - pfsync_push(sc); + pfsync_push(b); - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_UNLOCK(b); } static void @@ -1854,14 +1931,14 @@ struct pfsync_upd_req_item *item; size_t nlen = sizeof(struct pfsync_upd_req); - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_ASSERT(&sc->sc_buckets[0]); /* * This code does a bit to prevent multiple update requests for the * same state being generated. It searches current subheader queue, * but it doesn't lookup into queue of already packed datagrams. */ - TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry) + TAILQ_FOREACH(item, &sc->sc_buckets[0].b_upd_req_list, ur_entry) if (item->ur_msg.id == id && item->ur_msg.creatorid == creatorid) return; @@ -1873,18 +1950,18 @@ item->ur_msg.id = id; item->ur_msg.creatorid = creatorid; - if (TAILQ_EMPTY(&sc->sc_upd_req_list)) + if (TAILQ_EMPTY(&sc->sc_buckets[0].b_upd_req_list)) nlen += sizeof(struct pfsync_subheader); - if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { - pfsync_sendout(1); + if (sc->sc_buckets[0].b_len + nlen > sc->sc_ifp->if_mtu) { + pfsync_sendout(1, 0); nlen = sizeof(struct pfsync_subheader) + sizeof(struct pfsync_upd_req); } - TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); - sc->sc_len += nlen; + TAILQ_INSERT_TAIL(&sc->sc_buckets[0].b_upd_req_list, item, ur_entry); + sc->sc_buckets[0].b_len += nlen; } static void @@ -1892,27 +1969,28 @@ { struct pfsync_softc *sc = V_pfsyncif; bool ref = true; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); PF_STATE_LOCK_ASSERT(st); - PFSYNC_LOCK(sc); + PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) - pfsync_q_del(st, true); - PFSYNC_UNLOCK(sc); + pfsync_q_del(st, true, b); + PFSYNC_BUCKET_UNLOCK(b); return; } switch (st->sync_state) { case PFSYNC_S_UPD_C: case PFSYNC_S_IACK: - pfsync_q_del(st, false); + pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ case PFSYNC_S_NONE: pfsync_q_ins(st, PFSYNC_S_UPD, ref); - pfsync_push(sc); + pfsync_push(b); break; case PFSYNC_S_INS: @@ -1925,38 +2003,39 @@ panic("%s: unexpected sync state %d", __func__, st->sync_state); } - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_UNLOCK(b); } static void pfsync_delete_state(struct pf_state *st) { struct pfsync_softc *sc = V_pfsyncif; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); bool ref = true; - PFSYNC_LOCK(sc); + PFSYNC_BUCKET_LOCK(b); if (st->state_flags & PFSTATE_ACK) pfsync_undefer_state(st, 1); if (st->state_flags & PFSTATE_NOSYNC) { if (st->sync_state != PFSYNC_S_NONE) - pfsync_q_del(st, true); - PFSYNC_UNLOCK(sc); + pfsync_q_del(st, true, b); + PFSYNC_BUCKET_UNLOCK(b); return; } - if (sc->sc_len == PFSYNC_MINPKT) - callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); + if (b->b_len == PFSYNC_MINPKT) + callout_reset(&b->b_tmo, 1 * hz, pfsync_timeout, b); switch (st->sync_state) { case PFSYNC_S_INS: /* We never got to tell the world so just forget about it. */ - pfsync_q_del(st, true); + pfsync_q_del(st, true, b); break; case PFSYNC_S_UPD_C: case PFSYNC_S_UPD: case PFSYNC_S_IACK: - pfsync_q_del(st, false); + pfsync_q_del(st, false, b); ref = false; /* FALLTHROUGH */ @@ -1968,13 +2047,12 @@ panic("%s: unexpected sync state %d", __func__, st->sync_state); } - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_UNLOCK(b); } static void pfsync_clear_states(u_int32_t creatorid, const char *ifname) { - struct pfsync_softc *sc = V_pfsyncif; struct { struct pfsync_subheader subh; struct pfsync_clr clr; @@ -1989,9 +2067,7 @@ strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); r.clr.creatorid = creatorid; - PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); - PFSYNC_UNLOCK(sc); } static void @@ -1999,48 +2075,48 @@ { struct pfsync_softc *sc = V_pfsyncif; size_t nlen = pfsync_qs[q].len; + struct pfsync_bucket *b = pfsync_get_bucket(sc, st); - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_ASSERT(b); KASSERT(st->sync_state == PFSYNC_S_NONE, ("%s: st->sync_state %u", __func__, st->sync_state)); - KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", - sc->sc_len)); + KASSERT(b->b_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", + b->b_len)); - if (TAILQ_EMPTY(&sc->sc_qs[q])) + if (TAILQ_EMPTY(&b->b_qs[q])) nlen += sizeof(struct pfsync_subheader); - if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { - pfsync_sendout(1); + if (b->b_len + nlen > sc->sc_ifp->if_mtu) { + pfsync_sendout(1, b->b_id); nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; } - sc->sc_len += nlen; - TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); + b->b_len += nlen; + TAILQ_INSERT_TAIL(&b->b_qs[q], st, sync_list); st->sync_state = q; if (ref) pf_ref_state(st); } static void -pfsync_q_del(struct pf_state *st, bool unref) +pfsync_q_del(struct pf_state *st, bool unref, struct pfsync_bucket *b) { - struct pfsync_softc *sc = V_pfsyncif; int q = st->sync_state; - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_ASSERT(b); KASSERT(st->sync_state != PFSYNC_S_NONE, ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); - sc->sc_len -= pfsync_qs[q].len; - TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); + b->b_len -= pfsync_qs[q].len; + TAILQ_REMOVE(&b->b_qs[q], st, sync_list); st->sync_state = PFSYNC_S_NONE; if (unref) pf_release_state(st); - if (TAILQ_EMPTY(&sc->sc_qs[q])) - sc->sc_len -= sizeof(struct pfsync_subheader); + if (TAILQ_EMPTY(&b->b_qs[q])) + b->b_len -= sizeof(struct pfsync_subheader); } static void @@ -2094,7 +2170,7 @@ } for (; s; s = LIST_NEXT(s, entry)) { - +#if 0 if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < sizeof(struct pfsync_state)) { /* We've filled a packet. */ @@ -2106,7 +2182,7 @@ pfsync_bulk_update, sc); goto full; } - +#endif if (s->sync_state == PFSYNC_S_NONE && s->timeout < PFTM_MAX && s->pfsync_time <= sc->sc_ureq_received) { @@ -2119,8 +2195,9 @@ /* We're done. */ pfsync_bulk_status(PFSYNC_BUS_END); - +#if 0 full: +#endif CURVNET_RESTORE(); } @@ -2144,9 +2221,7 @@ r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); r.bus.status = status; - PFSYNC_LOCK(sc); pfsync_send_plus(&r, sizeof(r)); - PFSYNC_UNLOCK(sc); } static void @@ -2162,9 +2237,9 @@ /* Try again */ callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, V_pfsyncif); - PFSYNC_LOCK(sc); + PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); pfsync_request_update(0, 0); - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); } else { /* Pretend like the transfer was ok. */ sc->sc_ureq_sent = 0; @@ -2187,54 +2262,69 @@ { struct pfsync_softc *sc = V_pfsyncif; - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_LOCK(&sc->sc_buckets[0]); - if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) - pfsync_sendout(1); + if (sc->sc_buckets[0].b_len + pluslen > sc->sc_ifp->if_mtu) + pfsync_sendout(1, 0); - sc->sc_plus = plus; - sc->sc_len += (sc->sc_pluslen = pluslen); + sc->sc_buckets[0].b_plus = plus; + sc->sc_buckets[0].b_len += (sc->sc_buckets[0].b_pluslen = pluslen); - pfsync_sendout(1); + pfsync_sendout(1, 0); + PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[0]); } static void pfsync_timeout(void *arg) { - struct pfsync_softc *sc = arg; + struct pfsync_bucket *b = arg; + struct pfsync_softc *sc = b->b_sc; CURVNET_SET(sc->sc_ifp->if_vnet); - PFSYNC_LOCK(sc); - pfsync_push(sc); - PFSYNC_UNLOCK(sc); + PFSYNC_BUCKET_LOCK(b); + pfsync_push(b); + PFSYNC_BUCKET_UNLOCK(b); CURVNET_RESTORE(); } static void -pfsync_push(struct pfsync_softc *sc) +pfsync_push(struct pfsync_bucket *b) { - PFSYNC_LOCK_ASSERT(sc); + PFSYNC_BUCKET_ASSERT(b); - sc->sc_flags |= PFSYNCF_PUSH; - swi_sched(V_pfsync_swi_cookie, 0); + b->b_flags |= PFSYNCF_BUCKET_PUSH; + swi_sched(b->b_pfsync_swi_cookie, 0); +} + +static void +pfsync_push_all(struct pfsync_softc *sc) +{ + int c; + + for (c = 0; c < pfsync_buckets; c++) { + PFSYNC_BUCKET_LOCK(&sc->sc_buckets[c]); + pfsync_push(&sc->sc_buckets[c]); + PFSYNC_BUCKET_UNLOCK(&sc->sc_buckets[c]); + } } static void pfsyncintr(void *arg) { - struct pfsync_softc *sc = arg; + struct pfsync_bucket *b = arg; + struct pfsync_softc *sc = b->b_sc; struct mbuf *m, *n; CURVNET_SET(sc->sc_ifp->if_vnet); - PFSYNC_LOCK(sc); - if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { - pfsync_sendout(0); - sc->sc_flags &= ~PFSYNCF_PUSH; + PFSYNC_BUCKET_LOCK(b); + if ((b->b_flags & PFSYNCF_BUCKET_PUSH) && b->b_len > PFSYNC_MINPKT) { + pfsync_sendout(0, b->b_id); + b->b_flags &= ~PFSYNCF_BUCKET_PUSH; } - _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); - PFSYNC_UNLOCK(sc); + _IF_DEQUEUE_ALL(&b->b_snd, m); + PFSYNC_BUCKET_UNLOCK(b); for (; m != NULL; m = n) { @@ -2362,16 +2452,9 @@ static void vnet_pfsync_init(const void *unused __unused) { - int error; V_pfsync_cloner = if_clone_simple(pfsyncname, pfsync_clone_create, pfsync_clone_destroy, 1); - error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif, - SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); - if (error) { - if_clone_detach(V_pfsync_cloner); - log(LOG_INFO, "swi_add() failed in %s\n", __func__); - } pfsync_pointers_init(); } @@ -2385,7 +2468,6 @@ pfsync_pointers_uninit(); if_clone_detach(V_pfsync_cloner); - swi_remove(V_pfsync_swi_cookie); } VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH,