Index: sys/netipsec/ipsec.h =================================================================== --- sys/netipsec/ipsec.h +++ sys/netipsec/ipsec.h @@ -219,6 +219,9 @@ uint64_t ips_out_inval; /* output: generic error */ uint64_t ips_out_bundlesa; /* output: bundled SA processed */ + uint64_t ips_spdcache_hits; /* SP cache hits */ + uint64_t ips_spdcache_misses; /* SP cache misses */ + uint64_t ips_mbcoalesced; /* mbufs coalesced during clone */ uint64_t ips_clcoalesced; /* clusters coalesced during clone */ uint64_t ips_clcopied; /* clusters copied during clone */ Index: sys/netipsec/key.c =================================================================== --- sys/netipsec/key.c +++ sys/netipsec/key.c @@ -173,6 +173,84 @@ #define SPHASH_HASHVAL(id) (key_u32hash(id) & V_sphash_mask) #define SPHASH_HASH(id) &V_sphashtbl[SPHASH_HASHVAL(id)] +/* SPD cache */ +struct spdcache_entry { + struct secpolicyindex spidx; /* secpolicyindex */ + struct secpolicy *sp; /* cached policy to be used */ + + LIST_ENTRY(spdcache_entry) chain; +}; +LIST_HEAD(spdcache_entry_list, spdcache_entry); + +#define SPDCACHE_MAX_ENTRIES_PER_HASH 8 + +static VNET_DEFINE(u_int, key_spdcache_maxentries) = 0; +#define V_key_spdcache_maxentries VNET(key_spdcache_maxentries) +static VNET_DEFINE(u_int, key_spdcache_threshold) = 32; +#define V_key_spdcache_threshold VNET(key_spdcache_threshold) +static VNET_DEFINE(unsigned long, spd_size) = 0; +#define V_spd_size VNET(spd_size) + +#define SPDCACHE_ENABLED() (V_key_spdcache_maxentries != 0) +#define SPDCACHE_ACTIVE() \ + (SPDCACHE_ENABLED() && V_spd_size >= V_key_spdcache_threshold) + +static VNET_DEFINE(struct spdcache_entry_list *, spdcachehashtbl); +static VNET_DEFINE(u_long, spdcachehash_mask); +#define V_spdcachehashtbl VNET(spdcachehashtbl) +#define V_spdcachehash_mask VNET(spdcachehash_mask) + +#define SPDCACHE_HASHVAL(idx) (key_spidxhash(idx) & V_spdcachehash_mask) + +static uint32_t +key_spidxhash(const struct secpolicyindex *spidx) +{ + uint32_t hval; + + hval = fnv_32_buf(&spidx->ul_proto, sizeof(spidx->ul_proto), + FNV1_32_INIT); + switch (spidx->dst.sa.sa_family) { +#ifdef INET + case AF_INET: + hval = fnv_32_buf(&spidx->src.sin.sin_addr, + sizeof(in_addr_t), hval); + hval = fnv_32_buf(&spidx->dst.sin.sin_addr, + sizeof(in_addr_t), hval); + break; +#endif +#ifdef INET6 + case AF_INET6: + hval = fnv_32_buf(&spidx->src.sin6.sin6_addr, + sizeof(struct in6_addr), hval); + hval = fnv_32_buf(&spidx->dst.sin6.sin6_addr, + sizeof(struct in6_addr), hval); + break; +#endif + default: + hval = 0; + ipseclog((LOG_DEBUG, "%s: unknown address family %d", + __func__, spidx->dst.sa.sa_family)); + } + return (hval); +} + +/* Each cache line is protected by a mutex */ +static VNET_DEFINE(struct mtx *, spdcache_lock); +#define V_spdcache_lock VNET(spdcache_lock) + +/* The spdcache_no_policy entries will NOT be used: there are just here to have + * a way to set a "computed policy entry which says no match" + */ +static VNET_DEFINE(struct secpolicy, spdcache_no_policy); +#define V_spdcache_no_policy (&VNET(spdcache_no_policy)) + +#define SPDCACHE_LOCK_INIT(a) \ + mtx_init(&V_spdcache_lock[a], "spdcache", \ + "fast ipsec SPD cache", MTX_DEF|MTX_DUPOK) +#define SPDCACHE_LOCK_DESTROY(a) mtx_destroy(&V_spdcache_lock[a]) +#define SPDCACHE_LOCK(a) mtx_lock(&V_spdcache_lock[a]); +#define SPDCACHE_UNLOCK(a) mtx_unlock(&V_spdcache_lock[a]); + /* SAD */ TAILQ_HEAD(secashead_queue, secashead); LIST_HEAD(secashead_list, secashead); @@ -463,6 +541,17 @@ SYSCTL_INT(_net_key, KEYCTL_PREFERED_OLDSA, preferred_oldsa, CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(key_preferred_oldsa), 0, ""); +static SYSCTL_NODE(_net_key, OID_AUTO, spdcache, CTLFLAG_RW, 0, "SPD cache"); + +SYSCTL_UINT(_net_key_spdcache, OID_AUTO, maxentries, + CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(key_spdcache_maxentries), 0, + "Maximum number of entries in the SPD cache" + " (power of 2, 0 to disable)"); + +SYSCTL_UINT(_net_key_spdcache, OID_AUTO, threshold, + CTLFLAG_VNET | CTLFLAG_RDTUN, &VNET_NAME(key_spdcache_threshold), 0, + "Number of SPs that make the SPD cache active"); + #define __LIST_CHAINED(elm) \ (!((elm)->chain.le_next == NULL && (elm)->chain.le_prev == NULL)) @@ -473,6 +562,7 @@ MALLOC_DEFINE(M_IPSEC_MISC, "ipsec-misc", "ipsec miscellaneous"); MALLOC_DEFINE(M_IPSEC_SAQ, "ipsec-saq", "ipsec sa acquire"); MALLOC_DEFINE(M_IPSEC_SAR, "ipsec-reg", "ipsec sa acquire"); +MALLOC_DEFINE(M_IPSEC_SPDCACHE, "ipsec-spdcache", "ipsec SPD cache"); static VNET_DEFINE(uma_zone_t, key_lft_zone); #define V_key_lft_zone VNET(key_lft_zone) @@ -574,6 +664,7 @@ #endif static void key_unlink(struct secpolicy *); +static struct secpolicy *key_do_allocsp(struct secpolicyindex *spidx, u_int dir); static struct secpolicy *key_getsp(struct secpolicyindex *); static struct secpolicy *key_getspbyid(u_int32_t); static struct mbuf *key_gather_mbuf(struct mbuf *, @@ -694,6 +785,15 @@ static struct mbuf *key_setkey(struct seckey *, uint16_t); static int xform_init(struct secasvar *, u_short); +static void spdcache_init(void); +static void spdcache_clear(void); +static struct spdcache_entry *spdcache_entry_alloc( + const struct secpolicyindex *spidx, + struct secpolicy *policy); +static void spdcache_entry_free(struct spdcache_entry *entry); +static void spdcache_destroy(void); + + #define DBG_IPSEC_INITREF(t, p) do { \ refcount_init(&(p)->refcnt, 1); \ KEYDBG(KEY_STAMP, \ @@ -799,14 +899,8 @@ return (0); } -/* - * allocating a SP for OUTBOUND or INBOUND packet. - * Must call key_freesp() later. - * OUT: NULL: not found - * others: found and return the pointer. - */ struct secpolicy * -key_allocsp(struct secpolicyindex *spidx, u_int dir) +key_do_allocsp(struct secpolicyindex *spidx, u_int dir) { SPTREE_RLOCK_TRACKER; struct secpolicy *sp; @@ -823,7 +917,79 @@ } } SPTREE_RUNLOCK(); + return (sp); +} + +/* + * allocating a SP for OUTBOUND or INBOUND packet. + * Must call key_freesp() later. + * OUT: NULL: not found + * others: found and return the pointer. + */ +struct secpolicy * +key_allocsp(struct secpolicyindex *spidx, u_int dir) +{ + struct spdcache_entry *entry, *lastentry, *tmpentry; + struct secpolicy *sp; + uint32_t hashv; + int nb_entries; + + if (!SPDCACHE_ACTIVE()) { + sp = key_do_allocsp(spidx, dir); + goto out; + } + + hashv = SPDCACHE_HASHVAL(spidx); + SPDCACHE_LOCK(hashv); + nb_entries = 0; + LIST_FOREACH_SAFE(entry, &V_spdcachehashtbl[hashv], chain, tmpentry) { + /* Removed outdated entries */ + if (entry->sp != V_spdcache_no_policy && + entry->sp->state == IPSEC_SPSTATE_DEAD) { + LIST_REMOVE(entry, chain); + spdcache_entry_free(entry); + continue; + } + + nb_entries++; + + if (!key_cmpspidx_exactly(&entry->spidx, spidx)) { + lastentry = entry; + continue; + } + + if (entry->sp == V_spdcache_no_policy) { + sp = NULL; + } + else { + sp = entry->sp; + SP_ADDREF(sp); + } + + IPSECSTAT_INC(ips_spdcache_hits); + + SPDCACHE_UNLOCK(hashv); + goto out; + } + + IPSECSTAT_INC(ips_spdcache_misses); + + sp = key_do_allocsp(spidx, dir); + entry = spdcache_entry_alloc(spidx, + sp == NULL ? V_spdcache_no_policy : sp); + if (entry != NULL) { + if (nb_entries >= SPDCACHE_MAX_ENTRIES_PER_HASH) { + LIST_REMOVE(lastentry, chain); + spdcache_entry_free(lastentry); + } + + LIST_INSERT_HEAD(&V_spdcachehashtbl[hashv], entry, chain); + } + + SPDCACHE_UNLOCK(hashv); + +out: if (sp != NULL) { /* found a SPD entry */ sp->lastused = time_second; KEYDBG(IPSEC_STAMP, @@ -1107,9 +1273,12 @@ } sp->state = IPSEC_SPSTATE_DEAD; TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); + V_spd_size--; LIST_REMOVE(sp, idhash); V_sp_genid++; SPTREE_WUNLOCK(); + if (SPDCACHE_ENABLED()) + spdcache_clear(); key_freesp(&sp); } @@ -1132,6 +1301,7 @@ done: LIST_INSERT_HEAD(SPHASH_HASH(newsp->id), newsp, idhash); newsp->state = IPSEC_SPSTATE_ALIVE; + V_spd_size++; V_sp_genid++; } @@ -1207,9 +1377,12 @@ spp[i]->state = IPSEC_SPSTATE_DEAD; TAILQ_REMOVE(&V_sptree_ifnet[spp[i]->spidx.dir], spp[i], chain); + V_spd_size--; LIST_REMOVE(spp[i], idhash); } SPTREE_WUNLOCK(); + if (SPDCACHE_ENABLED()) + spdcache_clear(); for (i = 0; i < count; i++) { m = key_setdumpsp(spp[i], SADB_X_SPDDELETE, 0, 0); @@ -1939,6 +2112,8 @@ } key_insertsp(newsp); SPTREE_WUNLOCK(); + if (SPDCACHE_ENABLED()) + spdcache_clear(); KEYDBG(KEY_STAMP, printf("%s: SP(%p)\n", __func__, newsp)); @@ -2393,7 +2568,10 @@ LIST_REMOVE(sp, idhash); } V_sp_genid++; + V_spd_size = 0; SPTREE_WUNLOCK(); + if (SPDCACHE_ENABLED()) + spdcache_clear(); sp = TAILQ_FIRST(&drainq); while (sp != NULL) { nextsp = TAILQ_NEXT(sp, chain); @@ -4070,7 +4248,8 @@ if (spidx0->prefs != spidx1->prefs || spidx0->prefd != spidx1->prefd - || spidx0->ul_proto != spidx1->ul_proto) + || spidx0->ul_proto != spidx1->ul_proto + || spidx0->dir != spidx1->dir) return 0; return key_sockaddrcmp(&spidx0->src.sa, &spidx1->src.sa, 1) == 0 && @@ -4338,12 +4517,15 @@ continue; } TAILQ_REMOVE(&V_sptree[sp->spidx.dir], sp, chain); + V_spd_size--; LIST_REMOVE(sp, idhash); sp->state = IPSEC_SPSTATE_DEAD; sp = nextsp; } V_sp_genid++; SPTREE_WUNLOCK(); + if (SPDCACHE_ENABLED()) + spdcache_clear(); sp = LIST_FIRST(&drainq); while (sp != NULL) { @@ -8067,6 +8249,95 @@ } void +spdcache_init(void) +{ + int i; + + TUNABLE_INT_FETCH("net.key.spdcache.maxentries", + &V_key_spdcache_maxentries); + TUNABLE_INT_FETCH("net.key.spdcache.threshold", + &V_key_spdcache_threshold); + + if (V_key_spdcache_maxentries) { + V_key_spdcache_maxentries = MAX(V_key_spdcache_maxentries, + SPDCACHE_MAX_ENTRIES_PER_HASH); + V_spdcachehashtbl = hashinit(V_key_spdcache_maxentries / + SPDCACHE_MAX_ENTRIES_PER_HASH, + M_IPSEC_SPDCACHE, &V_spdcachehash_mask); + + V_spdcache_lock = malloc(sizeof(struct mtx) * + (V_spdcachehash_mask + 1), + M_IPSEC_SPDCACHE, M_WAITOK|M_ZERO); + + for (i = 0; i < V_spdcachehash_mask + 1; ++i) { + LIST_INIT(&V_spdcachehashtbl[i]); + SPDCACHE_LOCK_INIT(i); + } + } +} + +struct spdcache_entry * +spdcache_entry_alloc(const struct secpolicyindex *spidx, struct secpolicy *sp) +{ + struct spdcache_entry *entry; + + entry = malloc(sizeof(struct spdcache_entry), + M_IPSEC_SPDCACHE, M_NOWAIT|M_ZERO); + if (entry == NULL) + return NULL; + + if (sp != V_spdcache_no_policy) + SP_ADDREF(sp); + + entry->spidx = *spidx; + entry->sp = sp; + + return (entry); +} + +void +spdcache_entry_free(struct spdcache_entry *entry) +{ + + if (entry->sp != V_spdcache_no_policy) + key_freesp(&entry->sp); + free(entry, M_IPSEC_SPDCACHE); +} + +void +spdcache_clear(void) +{ + struct spdcache_entry *entry; + int i; + + for (i = 0; i < V_spdcachehash_mask + 1; ++i) { + SPDCACHE_LOCK(i); + while (!LIST_EMPTY(&V_spdcachehashtbl[i])) { + entry = LIST_FIRST(&V_spdcachehashtbl[i]); + LIST_REMOVE(entry, chain); + spdcache_entry_free(entry); + } + SPDCACHE_UNLOCK(i); + } +} + +void +spdcache_destroy(void) +{ + int i; + + if (SPDCACHE_ENABLED()) { + spdcache_clear(); + hashdestroy(V_spdcachehashtbl, M_IPSEC_SPDCACHE, V_spdcachehash_mask); + + for (i = 0; i < V_spdcachehash_mask + 1; ++i) + SPDCACHE_LOCK_DESTROY(i); + + free(V_spdcache_lock, M_IPSEC_SPDCACHE); + } +} + +void key_init(void) { int i; @@ -8090,6 +8361,8 @@ V_acqseqhashtbl = hashinit(ACQHASH_NHASH, M_IPSEC_SAQ, &V_acqseqhash_mask); + spdcache_init(); + for (i = 0; i <= SADB_SATYPE_MAX; i++) LIST_INIT(&V_regtree[i]); @@ -8145,6 +8418,7 @@ for (i = 0; i < V_sphash_mask + 1; i++) LIST_INIT(&V_sphashtbl[i]); SPTREE_WUNLOCK(); + spdcache_destroy(); sp = TAILQ_FIRST(&drainq); while (sp != NULL) { Index: usr.bin/netstat/ipsec.c =================================================================== --- usr.bin/netstat/ipsec.c +++ usr.bin/netstat/ipsec.c @@ -191,6 +191,8 @@ #define p(f, m) if (ipsecstat->f || sflag <= 1) \ xo_emit(m, (uintmax_t)ipsecstat->f, plural(ipsecstat->f)) +#define p2(f, m) if (ipsecstat->f || sflag <= 1) \ + xo_emit(m, (uintmax_t)ipsecstat->f, plurales(ipsecstat->f)) p(ips_in_polvio, "\t{:dropped-policy-violation/%ju} " "{N:/inbound packet%s violated process security policy}\n"); @@ -210,6 +212,10 @@ "{N:/invalid outbound packet%s}\n"); p(ips_out_bundlesa, "\t{:send-bundled-sa/%ju} " "{N:/outbound packet%s with bundled SAs}\n"); + p(ips_spdcache_hits, "\t{:spdcache-hits/%ju} " + "{N:/spd cache hit%s}\n"); + p2(ips_spdcache_misses, "\t{:spdcache-misses/%ju} " + "{N:/spd cache miss%s}\n"); p(ips_mbcoalesced, "\t{:mbufs-coalesced-during-clone/%ju} " "{N:/mbuf%s coalesced during clone}\n"); p(ips_clcoalesced, "\t{:clusters-coalesced-during-clone/%ju} " @@ -218,6 +224,7 @@ "{N:/cluster%s copied during clone}\n"); p(ips_mbinserted, "\t{:mbufs-inserted/%ju} " "{N:/mbuf%s inserted during makespace}\n"); +#undef p2 #undef p xo_close_container("ipsec-statistics"); }