Changeset View
Standalone View
sys/netinet/tcp_hostcache.c
Show First 20 Lines • Show All 115 Lines • ▼ Show 20 Lines | |||||
#define V_tcp_hostcache VNET(tcp_hostcache) | #define V_tcp_hostcache VNET(tcp_hostcache) | ||||
VNET_DEFINE_STATIC(struct callout, tcp_hc_callout); | VNET_DEFINE_STATIC(struct callout, tcp_hc_callout); | ||||
#define V_tcp_hc_callout VNET(tcp_hc_callout) | #define V_tcp_hc_callout VNET(tcp_hc_callout) | ||||
static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *); | static struct hc_metrics *tcp_hc_lookup(struct in_conninfo *); | ||||
static struct hc_metrics *tcp_hc_insert(struct in_conninfo *); | static struct hc_metrics *tcp_hc_insert(struct in_conninfo *); | ||||
static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS); | static int sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS); | ||||
static int sysctl_tcp_hc_histo(SYSCTL_HANDLER_ARGS); | |||||
static int sysctl_tcp_hc_purgenow(SYSCTL_HANDLER_ARGS); | static int sysctl_tcp_hc_purgenow(SYSCTL_HANDLER_ARGS); | ||||
static void tcp_hc_purge_internal(int); | static void tcp_hc_purge_internal(int); | ||||
static void tcp_hc_purge(void *); | static void tcp_hc_purge(void *); | ||||
static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, | static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, | ||||
CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, | ||||
"TCP Host cache"); | "TCP Host cache"); | ||||
Show All 31 Lines | SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, purge, CTLFLAG_VNET | CTLFLAG_RW, | ||||
&VNET_NAME(tcp_hostcache.purgeall), 0, | &VNET_NAME(tcp_hostcache.purgeall), 0, | ||||
"Expire all entires on next purge run"); | "Expire all entires on next purge run"); | ||||
SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list, | SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, list, | ||||
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, | CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, | ||||
0, 0, sysctl_tcp_hc_list, "A", | 0, 0, sysctl_tcp_hc_list, "A", | ||||
"List of all hostcache entries"); | "List of all hostcache entries"); | ||||
SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, histo, | |||||
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_SKIP | CTLFLAG_MPSAFE, | |||||
0, 0, sysctl_tcp_hc_histo, "A", | |||||
"Print a histogram of hostcache hashbucket utilization"); | |||||
SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, purgenow, | SYSCTL_PROC(_net_inet_tcp_hostcache, OID_AUTO, purgenow, | ||||
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, | ||||
NULL, 0, sysctl_tcp_hc_purgenow, "I", | NULL, 0, sysctl_tcp_hc_purgenow, "I", | ||||
"Immediately purge all entries"); | "Immediately purge all entries"); | ||||
static MALLOC_DEFINE(M_HOSTCACHE, "hostcache", "TCP hostcache"); | static MALLOC_DEFINE(M_HOSTCACHE, "hostcache", "TCP hostcache"); | ||||
#define HOSTCACHE_HASH(ip) \ | #define HOSTCACHE_HASH(ip) \ | ||||
▲ Show 20 Lines • Show All 206 Lines • ▼ Show 20 Lines | if (hc_head->hch_length >= V_tcp_hostcache.bucket_limit || | ||||
* Just give up if this bucket row is empty and we don't have | * Just give up if this bucket row is empty and we don't have | ||||
* anything to replace. | * anything to replace. | ||||
*/ | */ | ||||
if (hc_entry == NULL) { | if (hc_entry == NULL) { | ||||
THC_UNLOCK(&hc_head->hch_mtx); | THC_UNLOCK(&hc_head->hch_mtx); | ||||
return NULL; | return NULL; | ||||
} | } | ||||
TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q); | TAILQ_REMOVE(&hc_head->hch_bucket, hc_entry, rmx_q); | ||||
KASSERT(V_tcp_hostcache.hashbase[hash].hch_length > 0 && | |||||
V_tcp_hostcache.hashbase[hash].hch_length <= | |||||
V_tcp_hostcache.bucket_limit, | |||||
("tcp_hostcache: bucket length range violated at %u: %u", | |||||
hash, V_tcp_hostcache.hashbase[hash].hch_length)); | |||||
V_tcp_hostcache.hashbase[hash].hch_length--; | V_tcp_hostcache.hashbase[hash].hch_length--; | ||||
atomic_subtract_int(&V_tcp_hostcache.cache_count, 1); | atomic_subtract_int(&V_tcp_hostcache.cache_count, 1); | ||||
TCPSTAT_INC(tcps_hc_bucketoverflow); | TCPSTAT_INC(tcps_hc_bucketoverflow); | ||||
#if 0 | #if 0 | ||||
uma_zfree(V_tcp_hostcache.zone, hc_entry); | uma_zfree(V_tcp_hostcache.zone, hc_entry); | ||||
#endif | #endif | ||||
} else { | } else { | ||||
/* | /* | ||||
Show All 18 Lines | #endif | ||||
hc_entry->rmx_head = hc_head; | hc_entry->rmx_head = hc_head; | ||||
hc_entry->rmx_expire = V_tcp_hostcache.expire; | hc_entry->rmx_expire = V_tcp_hostcache.expire; | ||||
/* | /* | ||||
* Put it upfront. | * Put it upfront. | ||||
*/ | */ | ||||
TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q); | TAILQ_INSERT_HEAD(&hc_head->hch_bucket, hc_entry, rmx_q); | ||||
V_tcp_hostcache.hashbase[hash].hch_length++; | V_tcp_hostcache.hashbase[hash].hch_length++; | ||||
KASSERT(V_tcp_hostcache.hashbase[hash].hch_length < | |||||
V_tcp_hostcache.bucket_limit, | |||||
("tcp_hostcache: bucket length too high at %u: %u", | |||||
hash, V_tcp_hostcache.hashbase[hash].hch_length)); | |||||
atomic_add_int(&V_tcp_hostcache.cache_count, 1); | atomic_add_int(&V_tcp_hostcache.cache_count, 1); | ||||
TCPSTAT_INC(tcps_hc_added); | TCPSTAT_INC(tcps_hc_added); | ||||
return hc_entry; | return hc_entry; | ||||
} | } | ||||
/* | /* | ||||
* External function: look up an entry in the hostcache and fill out the | * External function: look up an entry in the hostcache and fill out the | ||||
▲ Show 20 Lines • Show All 257 Lines • ▼ Show 20 Lines | #endif | ||||
} | } | ||||
#undef msec | #undef msec | ||||
error = sbuf_finish(&sb); | error = sbuf_finish(&sb); | ||||
sbuf_delete(&sb); | sbuf_delete(&sb); | ||||
return(error); | return(error); | ||||
} | } | ||||
/* | /* | ||||
* Sysctl function: prints a histogram of the hostcache hashbucket | |||||
* utilization. | |||||
*/ | |||||
static int | |||||
sysctl_tcp_hc_histo(SYSCTL_HANDLER_ARGS) | |||||
{ | |||||
const int linesize = 50; | |||||
struct sbuf sb; | |||||
int i, error; | |||||
int *histo; | |||||
u_int hch_length; | |||||
tuexen: I guess this needs to be unsigned int, since hch_length is unsigned. | |||||
if (jailed_without_vnet(curthread->td_ucred) != 0) | |||||
return (EPERM); | |||||
histo = (int *)malloc(sizeof(int) * (V_tcp_hostcache.bucket_limit + 1), | |||||
Done Inline ActionsIs this necessary if you would move the sbuf_new_for_sysctl() call down to be just before the first sbuf_printf() call? tuexen: Is this necessary if you would move the sbuf_new_for_sysctl() call down to be just before the… | |||||
M_TEMP, M_NOWAIT|M_ZERO); | |||||
if (histo == NULL) | |||||
return(ENOMEM); | |||||
for (i = 0; i < V_tcp_hostcache.hashsize; i++) { | |||||
Done Inline ActionsI think you need V_tcp_hostcache.bucket_limit + 1 here. tuexen: I think you need V_tcp_hostcache.bucket_limit + 1 here. | |||||
Done Inline ActionsIndeed. rscheff: Indeed. | |||||
hch_length = V_tcp_hostcache.hashbase[i].hch_length; | |||||
KASSERT(hch_length <= V_tcp_hostcache.bucket_limit, | |||||
("tcp_hostcache: bucket limit exceeded at %u: %u", | |||||
i, hch_length)); | |||||
histo[hch_length]++; | |||||
} | |||||
Done Inline ActionsCan't this be moved down after the computation of the histogram? tuexen: Can't this be moved down after the computation of the histogram? | |||||
/* Use a buffer for 16 lines */ | |||||
sbuf_new_for_sysctl(&sb, NULL, 16 * linesize, req); | |||||
Done Inline ActionsThe check needs to be <= instead of <. For consistency you might want to get the mutex. tuexen: The check needs to be <= instead of <. For consistency you might want to get the mutex. | |||||
Done Inline ActionsI want to avoid the cost of the LOCK/UNLOCK, at the cost of absolute accuracy. Similar to the hostcache.list - which will dump the contents of each hashindex at different times... The idea for this histogram is more to give a sense of the hash usage distribution (which doesn't need to be perfectly accurate). E.g. the jenkins_hash32 offers the possibility to change a salt. Or at some future point, a dynamic resizing (hash array width vs. depth) could be thought about. rscheff: I want to avoid the cost of the LOCK/UNLOCK, at the cost of absolute accuracy. Similar to the… | |||||
sbuf_printf(&sb, "\nLength\tCount\n"); | |||||
Done Inline ActionsWhat using a KASSERT V_tcp_hostcache.hashbase[i].hch_length <= V_tcp_hostcache.bucket_limit and then just update the array. tuexen: What using a KASSERT V_tcp_hostcache.hashbase[i].hch_length <= V_tcp_hostcache.bucket_limit and… | |||||
Done Inline ActionsSince this is an advisory output only, I rather hint at a non-catastrophic state (the bucket should no longer grow - but that could be monitored over time, if detected), than stopping the system. If such a invalid state needs to be frozen and analyzed, there are other ways to achieve this already. rscheff: Since this is an advisory output only, I rather hint at a non-catastrophic state (the bucket… | |||||
Done Inline ActionsMy point is that I consider the occurrence of hch_length > V_tcp_hostcache.bucket_limit a bug in the kernel. So I would put a message on the console, not include it in the sysctl output... tuexen: My point is that I consider the occurrence of hch_length > V_tcp_hostcache.bucket_limit a bug… | |||||
for (i = 0; i <= V_tcp_hostcache.bucket_limit; i++) { | |||||
sbuf_printf(&sb, "%u\t%u\n", i, histo[i]); | |||||
} | |||||
error = sbuf_finish(&sb); | |||||
sbuf_delete(&sb); | |||||
free(histo, M_TEMP); | |||||
return(error); | |||||
} | |||||
Done Inline ActionsYou need <= instead of <. tuexen: You need <= instead of <. | |||||
/* | |||||
* Caller has to make sure the curvnet is set properly. | * Caller has to make sure the curvnet is set properly. | ||||
*/ | */ | ||||
static void | static void | ||||
tcp_hc_purge_internal(int all) | tcp_hc_purge_internal(int all) | ||||
{ | { | ||||
struct hc_metrics *hc_entry, *hc_next; | struct hc_metrics *hc_entry, *hc_next; | ||||
int i; | int i; | ||||
for (i = 0; i < V_tcp_hostcache.hashsize; i++) { | for (i = 0; i < V_tcp_hostcache.hashsize; i++) { | ||||
THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); | THC_LOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); | ||||
TAILQ_FOREACH_SAFE(hc_entry, | TAILQ_FOREACH_SAFE(hc_entry, | ||||
&V_tcp_hostcache.hashbase[i].hch_bucket, rmx_q, hc_next) { | &V_tcp_hostcache.hashbase[i].hch_bucket, rmx_q, hc_next) { | ||||
KASSERT(V_tcp_hostcache.hashbase[i].hch_length > 0 && | |||||
V_tcp_hostcache.hashbase[i].hch_length <= | |||||
V_tcp_hostcache.bucket_limit, | |||||
("tcp_hostcache: bucket langth out of range at %u: %u", | |||||
i, V_tcp_hostcache.hashbase[i].hch_length)); | |||||
if (all || hc_entry->rmx_expire <= 0) { | if (all || hc_entry->rmx_expire <= 0) { | ||||
TAILQ_REMOVE(&V_tcp_hostcache.hashbase[i].hch_bucket, | TAILQ_REMOVE(&V_tcp_hostcache.hashbase[i].hch_bucket, | ||||
hc_entry, rmx_q); | hc_entry, rmx_q); | ||||
uma_zfree(V_tcp_hostcache.zone, hc_entry); | uma_zfree(V_tcp_hostcache.zone, hc_entry); | ||||
V_tcp_hostcache.hashbase[i].hch_length--; | V_tcp_hostcache.hashbase[i].hch_length--; | ||||
atomic_subtract_int(&V_tcp_hostcache.cache_count, 1); | atomic_subtract_int(&V_tcp_hostcache.cache_count, 1); | ||||
} else | } else | ||||
hc_entry->rmx_expire -= V_tcp_hostcache.prune; | hc_entry->rmx_expire -= V_tcp_hostcache.prune; | ||||
} | } | ||||
Not Done Inline ActionsThis part of the condition is always true, since hch_length is an unsigned int. tuexen: This part of the condition is always true, since hch_length is an unsigned int. | |||||
Not Done Inline ActionsI would move the KASSERT up, just before it is decremented and check that it is positive and not exceeding the limit. tuexen: I would move the KASSERT up, just before it is decremented and check that it is positive and… | |||||
THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); | THC_UNLOCK(&V_tcp_hostcache.hashbase[i].hch_mtx); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Expire and purge (old|all) entries in the tcp_hostcache. Runs | * Expire and purge (old|all) entries in the tcp_hostcache. Runs | ||||
* periodically from the callout. | * periodically from the callout. | ||||
*/ | */ | ||||
Show All 38 Lines |
I guess this needs to be unsigned int, since hch_length is unsigned.