Index: head/sys/net/pfvar.h =================================================================== --- head/sys/net/pfvar.h +++ head/sys/net/pfvar.h @@ -835,7 +835,6 @@ struct pf_ruleset *, struct pf_pdesc *, int); extern pflog_packet_t *pflog_packet_ptr; -#define V_pf_end_threads VNET(pf_end_threads) #endif /* _KERNEL */ #define PFSYNC_FLAG_SRCNODE 0x04 @@ -1520,6 +1519,7 @@ #define V_pf_state_scrub_z VNET(pf_state_scrub_z) extern void pf_purge_thread(void *); +extern void pf_unload_vnet_purge(void); extern void pf_intr(void *); extern void pf_purge_expired_src_nodes(void); @@ -1661,7 +1661,9 @@ #define V_pfi_all VNET(pfi_all) void pfi_initialize(void); +void pfi_initialize_vnet(void); void pfi_cleanup(void); +void pfi_cleanup_vnet(void); void pfi_kif_ref(struct pfi_kif *); void pfi_kif_unref(struct pfi_kif *); struct pfi_kif *pfi_kif_find(const char *); Index: head/sys/netpfil/pf/if_pflog.c =================================================================== --- head/sys/netpfil/pf/if_pflog.c +++ head/sys/netpfil/pf/if_pflog.c @@ -91,19 +91,22 @@ static void pflogstart(struct ifnet *); static int pflog_clone_create(struct if_clone *, int, caddr_t); static void pflog_clone_destroy(struct ifnet *); -static struct if_clone *pflog_cloner; static const char pflogname[] = "pflog"; -struct ifnet *pflogifs[PFLOGIFS_MAX]; /* for fast access */ +static VNET_DEFINE(struct if_clone *, pflog_cloner); +#define V_pflog_cloner VNET(pflog_cloner) + +VNET_DEFINE(struct ifnet *, pflogifs[PFLOGIFS_MAX]); /* for fast access */ +#define V_pflogifs VNET(pflogifs) static void -pflogattach(int npflog) +pflogattach(int npflog __unused) { int i; for (i = 0; i < PFLOGIFS_MAX; i++) - pflogifs[i] = NULL; - pflog_cloner = if_clone_simple(pflogname, pflog_clone_create, + V_pflogifs[i] = NULL; + V_pflog_cloner = if_clone_simple(pflogname, pflog_clone_create, pflog_clone_destroy, 1); } @@ -130,7 +133,7 @@ bpfattach(ifp, DLT_PFLOG, PFLOG_HDRLEN); - pflogifs[unit] = ifp; + V_pflogifs[unit] = ifp; return (0); } @@ -141,8 +144,8 @@ int i; for (i = 0; i < PFLOGIFS_MAX; i++) - if (pflogifs[i] == ifp) - pflogifs[i] = NULL; + if (V_pflogifs[i] == ifp) + V_pflogifs[i] = NULL; bpfdetach(ifp); if_detach(ifp); @@ -206,7 +209,7 @@ if (kif == NULL || m == NULL || rm == NULL || pd == NULL) return ( 1); - if ((ifn = pflogifs[rm->logif]) == NULL || !ifn->if_bpf) + if ((ifn = V_pflogifs[rm->logif]) == NULL || !ifn->if_bpf) return (0); bzero(&hdr, sizeof(hdr)); @@ -259,6 +262,24 @@ return (0); } +static void +vnet_pflog_init(const void *unused __unused) +{ + + pflogattach(1); +} +VNET_SYSINIT(vnet_pflog_init, SI_SUB_PSEUDO, SI_ORDER_ANY, + vnet_pflog_init, NULL); + +static void +vnet_pflog_uninit(const void *unused __unused) +{ + + if_clone_detach(V_pflog_cloner); +} +VNET_SYSUNINIT(vnet_pflog_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND, + vnet_pflog_uninit, NULL); + static int pflog_modevent(module_t mod, int type, void *data) { @@ -266,7 +287,6 @@ switch (type) { case MOD_LOAD: - pflogattach(1); PF_RULES_WLOCK(); pflog_packet_ptr = pflog_packet; PF_RULES_WUNLOCK(); @@ -275,10 +295,9 @@ PF_RULES_WLOCK(); pflog_packet_ptr = NULL; PF_RULES_WUNLOCK(); - if_clone_detach(pflog_cloner); break; default: - error = EINVAL; + error = EOPNOTSUPP; break; } Index: head/sys/netpfil/pf/pf.c =================================================================== --- head/sys/netpfil/pf/pf.c +++ head/sys/netpfil/pf/pf.c @@ -299,7 +299,7 @@ int in4_cksum(struct mbuf *m, u_int8_t nxt, int off, int len); -VNET_DECLARE(int, pf_end_threads); +extern int pf_end_threads; VNET_DEFINE(struct pf_limit, pf_limits[PF_LIMIT_MAX]); @@ -1421,51 +1421,25 @@ } void -pf_purge_thread(void *v) +pf_purge_thread(void *unused __unused) { + VNET_ITERATOR_DECL(vnet_iter); u_int idx = 0; - CURVNET_SET((struct vnet *)v); - for (;;) { PF_RULES_RLOCK(); rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftm", hz / 10); + PF_RULES_RUNLOCK(); - if (V_pf_end_threads) { - /* - * To cleanse up all kifs and rules we need - * two runs: first one clears reference flags, - * then pf_purge_expired_states() doesn't - * raise them, and then second run frees. - */ - PF_RULES_RUNLOCK(); - pf_purge_unlinked_rules(); - pfi_kif_purge(); - - /* - * Now purge everything. - */ - pf_purge_expired_states(0, pf_hashmask); - pf_purge_expired_fragments(); - pf_purge_expired_src_nodes(); - - /* - * Now all kifs & rules should be unreferenced, - * thus should be successfully freed. - */ - pf_purge_unlinked_rules(); - pfi_kif_purge(); + VNET_LIST_RLOCK(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); - /* - * Announce success and exit. - */ - PF_RULES_RLOCK(); - V_pf_end_threads++; - PF_RULES_RUNLOCK(); + if (pf_end_threads) { + pf_end_threads++; wakeup(pf_purge_thread); kproc_exit(0); } - PF_RULES_RUNLOCK(); /* Process 1/interval fraction of the state table every run. */ idx = pf_purge_expired_states(idx, pf_hashmask / @@ -1483,11 +1457,42 @@ pf_purge_unlinked_rules(); pfi_kif_purge(); } + CURVNET_RESTORE(); + } + VNET_LIST_RUNLOCK(); } /* not reached */ - CURVNET_RESTORE(); } +void +pf_unload_vnet_purge(void) +{ + + /* + * To cleanse up all kifs and rules we need + * two runs: first one clears reference flags, + * then pf_purge_expired_states() doesn't + * raise them, and then second run frees. + */ + pf_purge_unlinked_rules(); + pfi_kif_purge(); + + /* + * Now purge everything. + */ + pf_purge_expired_states(0, pf_hashmask); + pf_purge_expired_fragments(); + pf_purge_expired_src_nodes(); + + /* + * Now all kifs & rules should be unreferenced, + * thus should be successfully freed. + */ + pf_purge_unlinked_rules(); + pfi_kif_purge(); +} + + u_int32_t pf_state_expires(const struct pf_state *state) { Index: head/sys/netpfil/pf/pf_if.c =================================================================== --- head/sys/netpfil/pf/pf_if.c +++ head/sys/netpfil/pf/pf_if.c @@ -58,6 +58,9 @@ #define V_pfi_update VNET(pfi_update) #define PFI_BUFFER_MAX 0x10000 +VNET_DECLARE(int, pf_vnet_active); +#define V_pf_vnet_active VNET(pf_vnet_active) + static VNET_DEFINE(struct pfr_addr *, pfi_buffer); static VNET_DEFINE(int, pfi_buffer_cnt); static VNET_DEFINE(int, pfi_buffer_max); @@ -108,7 +111,7 @@ MTX_DEF); void -pfi_initialize(void) +pfi_initialize_vnet(void) { struct ifg_group *ifg; struct ifnet *ifp; @@ -129,6 +132,11 @@ TAILQ_FOREACH(ifp, &V_ifnet, if_link) pfi_attach_ifnet(ifp); IFNET_RUNLOCK(); +} + +void +pfi_initialize(void) +{ pfi_attach_cookie = EVENTHANDLER_REGISTER(ifnet_arrival_event, pfi_attach_ifnet_event, NULL, EVENTHANDLER_PRI_ANY); @@ -145,31 +153,44 @@ } void -pfi_cleanup(void) +pfi_cleanup_vnet(void) { - struct pfi_kif *p; + struct pfi_kif *kif; - EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie); - EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie); - EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie); - EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie); - EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie); - EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie); + PF_RULES_WASSERT(); V_pfi_all = NULL; - while ((p = RB_MIN(pfi_ifhead, &V_pfi_ifs))) { - RB_REMOVE(pfi_ifhead, &V_pfi_ifs, p); - free(p, PFI_MTYPE); + while ((kif = RB_MIN(pfi_ifhead, &V_pfi_ifs))) { + RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif); + if (kif->pfik_group) + kif->pfik_group->ifg_pf_kif = NULL; + if (kif->pfik_ifp) + kif->pfik_ifp->if_pf_kif = NULL; + free(kif, PFI_MTYPE); } - while ((p = LIST_FIRST(&V_pfi_unlinked_kifs))) { - LIST_REMOVE(p, pfik_list); - free(p, PFI_MTYPE); + mtx_lock(&pfi_unlnkdkifs_mtx); + while ((kif = LIST_FIRST(&V_pfi_unlinked_kifs))) { + LIST_REMOVE(kif, pfik_list); + free(kif, PFI_MTYPE); } + mtx_unlock(&pfi_unlnkdkifs_mtx); free(V_pfi_buffer, PFI_MTYPE); } +void +pfi_cleanup(void) +{ + + EVENTHANDLER_DEREGISTER(ifnet_arrival_event, pfi_attach_cookie); + EVENTHANDLER_DEREGISTER(ifnet_departure_event, pfi_detach_cookie); + EVENTHANDLER_DEREGISTER(group_attach_event, pfi_attach_group_cookie); + EVENTHANDLER_DEREGISTER(group_change_event, pfi_change_group_cookie); + EVENTHANDLER_DEREGISTER(group_detach_event, pfi_detach_group_cookie); + EVENTHANDLER_DEREGISTER(ifaddr_event, pfi_ifaddr_event_cookie); +} + struct pfi_kif * pfi_kif_find(const char *kif_name) { @@ -668,7 +689,7 @@ bzero(pfs->bcounters, sizeof(pfs->bcounters)); } TAILQ_FOREACH(ifgm, &ifg_members, ifgm_next) { - if (ifgm->ifgm_ifp == NULL) + if (ifgm->ifgm_ifp == NULL || ifgm->ifgm_ifp->if_pf_kif == NULL) continue; p = (struct pfi_kif *)ifgm->ifgm_ifp->if_pf_kif; @@ -780,6 +801,11 @@ { CURVNET_SET(ifp->if_vnet); + if (V_pf_vnet_active == 0) { + /* Avoid teardown race in the least expensive way. */ + CURVNET_RESTORE(); + return; + } pfi_attach_ifnet(ifp); #ifdef ALTQ PF_RULES_WLOCK(); @@ -794,7 +820,15 @@ { struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif; + if (kif == NULL) + return; + CURVNET_SET(ifp->if_vnet); + if (V_pf_vnet_active == 0) { + /* Avoid teardown race in the least expensive way. */ + CURVNET_RESTORE(); + return; + } PF_RULES_WLOCK(); V_pfi_update++; pfi_kif_update(kif); @@ -813,6 +847,11 @@ { CURVNET_SET((struct vnet *)arg); + if (V_pf_vnet_active == 0) { + /* Avoid teardown race in the least expensive way. */ + CURVNET_RESTORE(); + return; + } pfi_attach_ifgroup(ifg); CURVNET_RESTORE(); } @@ -822,9 +861,14 @@ { struct pfi_kif *kif; - kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); - CURVNET_SET((struct vnet *)arg); + if (V_pf_vnet_active == 0) { + /* Avoid teardown race in the least expensive way. */ + CURVNET_RESTORE(); + return; + } + + kif = malloc(sizeof(*kif), PFI_MTYPE, M_WAITOK); PF_RULES_WLOCK(); V_pfi_update++; kif = pfi_kif_attach(kif, gname); @@ -838,7 +882,15 @@ { struct pfi_kif *kif = (struct pfi_kif *)ifg->ifg_pf_kif; + if (kif == NULL) + return; + CURVNET_SET((struct vnet *)arg); + if (V_pf_vnet_active == 0) { + /* Avoid teardown race in the least expensive way. */ + CURVNET_RESTORE(); + return; + } PF_RULES_WLOCK(); V_pfi_update++; @@ -851,8 +903,15 @@ static void pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp) { + if (ifp->if_pf_kif == NULL) + return; CURVNET_SET(ifp->if_vnet); + if (V_pf_vnet_active == 0) { + /* Avoid teardown race in the least expensive way. */ + CURVNET_RESTORE(); + return; + } PF_RULES_WLOCK(); if (ifp && ifp->if_pf_kif) { V_pfi_update++; Index: head/sys/netpfil/pf/pf_ioctl.c =================================================================== --- head/sys/netpfil/pf/pf_ioctl.c +++ head/sys/netpfil/pf/pf_ioctl.c @@ -87,7 +87,6 @@ #include #endif -static int pfattach(void); static struct pf_pool *pf_get_pool(char *, u_int32_t, u_int8_t, u_int32_t, u_int8_t, u_int8_t, u_int8_t); @@ -189,7 +188,16 @@ static volatile VNET_DEFINE(int, pf_pfil_hooked); #define V_pf_pfil_hooked VNET(pf_pfil_hooked) -VNET_DEFINE(int, pf_end_threads); + +/* + * We need a flag that is neither hooked nor running to know when + * the VNET is "valid". We primarily need this to control (global) + * external event, e.g., eventhandlers. + */ +VNET_DEFINE(int, pf_vnet_active); +#define V_pf_vnet_active VNET(pf_vnet_active) + +int pf_end_threads; struct rwlock pf_rules_lock; struct sx pf_ioctl_lock; @@ -204,17 +212,14 @@ /* pflog */ pflog_packet_t *pflog_packet_ptr = NULL; -static int -pfattach(void) +static void +pfattach_vnet(void) { u_int32_t *my_timeout = V_pf_default_rule.timeout; - int error; - if (IS_DEFAULT_VNET(curvnet)) - pf_mtag_initialize(); pf_initialize(); pfr_initialize(); - pfi_initialize(); + pfi_initialize_vnet(); pf_normalize_init(); V_pf_limits[PF_LIMIT_STATES].limit = PFSTATE_HIWAT; @@ -276,18 +281,13 @@ for (int i = 0; i < SCNT_MAX; i++) V_pf_status.scounters[i] = counter_u64_alloc(M_WAITOK); - if ((error = kproc_create(pf_purge_thread, curvnet, NULL, 0, 0, - "pf purge")) != 0) - /* XXXGL: leaked all above. */ - return (error); - if ((error = swi_add(NULL, "pf send", pf_intr, curvnet, SWI_NET, - INTR_MPSAFE, &V_pf_swi_cookie)) != 0) + if (swi_add(NULL, "pf send", pf_intr, curvnet, SWI_NET, + INTR_MPSAFE, &V_pf_swi_cookie) != 0) /* XXXGL: leaked all above. */ - return (error); - - return (0); + return; } + static struct pf_pool * pf_get_pool(char *anchor, u_int32_t ticket, u_int8_t rule_action, u_int32_t rule_number, u_int8_t r_last, u_int8_t active, @@ -3480,21 +3480,6 @@ u_int32_t t[5]; char nn = '\0'; - V_pf_status.running = 0; - - counter_u64_free(V_pf_default_rule.states_cur); - counter_u64_free(V_pf_default_rule.states_tot); - counter_u64_free(V_pf_default_rule.src_nodes); - - for (int i = 0; i < PFRES_MAX; i++) - counter_u64_free(V_pf_status.counters[i]); - for (int i = 0; i < LCNT_MAX; i++) - counter_u64_free(V_pf_status.lcounters[i]); - for (int i = 0; i < FCNT_MAX; i++) - counter_u64_free(V_pf_status.fcounters[i]); - for (int i = 0; i < SCNT_MAX; i++) - counter_u64_free(V_pf_status.scounters[i]); - do { if ((error = pf_begin_rules(&t[0], PF_RULESET_SCRUB, &nn)) != 0) { @@ -3546,6 +3531,20 @@ /* status does not use malloced mem so no need to cleanup */ /* fingerprints and interfaces have their own cleanup code */ + + /* Free counters last as we updated them during shutdown. */ + counter_u64_free(V_pf_default_rule.states_cur); + counter_u64_free(V_pf_default_rule.states_tot); + counter_u64_free(V_pf_default_rule.src_nodes); + + for (int i = 0; i < PFRES_MAX; i++) + counter_u64_free(V_pf_status.counters[i]); + for (int i = 0; i < LCNT_MAX; i++) + counter_u64_free(V_pf_status.lcounters[i]); + for (int i = 0; i < FCNT_MAX; i++) + counter_u64_free(V_pf_status.fcounters[i]); + for (int i = 0; i < SCNT_MAX; i++) + counter_u64_free(V_pf_status.scounters[i]); } while(0); return (error); @@ -3697,39 +3696,55 @@ return (0); } -static int -pf_load(void) +static void +pf_load_vnet(void) { - int error; - VNET_ITERATOR_DECL(vnet_iter); VNET_LIST_RLOCK(); VNET_FOREACH(vnet_iter) { CURVNET_SET(vnet_iter); V_pf_pfil_hooked = 0; - V_pf_end_threads = 0; TAILQ_INIT(&V_pf_tags); TAILQ_INIT(&V_pf_qids); CURVNET_RESTORE(); } VNET_LIST_RUNLOCK(); + pfattach_vnet(); + V_pf_vnet_active = 1; +} + +static int +pf_load(void) +{ + int error; + rw_init(&pf_rules_lock, "pf rulesets"); sx_init(&pf_ioctl_lock, "pf ioctl"); + pf_mtag_initialize(); + pf_dev = make_dev(&pf_cdevsw, 0, 0, 0, 0600, PF_NAME); - if ((error = pfattach()) != 0) + if (pf_dev == NULL) + return (ENOMEM); + + pf_end_threads = 0; + error = kproc_create(pf_purge_thread, NULL, NULL, 0, 0, "pf purge"); + if (error != 0) return (error); + pfi_initialize(); + return (0); } -static int -pf_unload(void) +static void +pf_unload_vnet() { - int error = 0; + int error; + V_pf_vnet_active = 0; V_pf_status.running = 0; swi_remove(V_pf_swi_cookie); error = dehook_pf(); @@ -3740,30 +3755,67 @@ * a message like 'No such process'. */ printf("%s : pfil unregisteration fail\n", __FUNCTION__); - return error; + return; } + + pf_unload_vnet_purge(); + PF_RULES_WLOCK(); shutdown_pf(); - V_pf_end_threads = 1; - while (V_pf_end_threads < 2) { - wakeup_one(pf_purge_thread); - rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftmo", 0); - } PF_RULES_WUNLOCK(); + pf_normalize_cleanup(); - pfi_cleanup(); + PF_RULES_WLOCK(); + pfi_cleanup_vnet(); + PF_RULES_WUNLOCK(); pfr_cleanup(); pf_osfp_flush(); pf_cleanup(); if (IS_DEFAULT_VNET(curvnet)) pf_mtag_cleanup(); - destroy_dev(pf_dev); +} + +static int +pf_unload(void) +{ + int error = 0; + + pf_end_threads = 1; + while (pf_end_threads < 2) { + wakeup_one(pf_purge_thread); + rw_sleep(pf_purge_thread, &pf_rules_lock, 0, "pftmo", 0); + } + + if (pf_dev != NULL) + destroy_dev(pf_dev); + + pfi_cleanup(); + rw_destroy(&pf_rules_lock); sx_destroy(&pf_ioctl_lock); return (error); } +static void +vnet_pf_init(void *unused __unused) +{ + + pf_load_vnet(); +} +VNET_SYSINIT(vnet_pf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, + vnet_pf_init, NULL); + +static void +vnet_pf_uninit(const void *unused __unused) +{ + + pf_unload_vnet(); +} +VNET_SYSUNINIT(vnet_pf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, + vnet_pf_uninit, NULL); + + static int pf_modevent(module_t mod, int type, void *data) { @@ -3796,5 +3848,5 @@ 0 }; -DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_FIRST); +DECLARE_MODULE(pf, pf_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND); MODULE_VERSION(pf, PF_MODVER);