Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/siftr.c
Show First 20 Lines • Show All 173 Lines • ▼ Show 20 Lines | struct pkt_node { | ||||
struct timeval tval; | struct timeval tval; | ||||
/* Direction pkt is travelling. */ | /* Direction pkt is travelling. */ | ||||
enum { | enum { | ||||
DIR_IN = 0, | DIR_IN = 0, | ||||
DIR_OUT = 1, | DIR_OUT = 1, | ||||
} direction; | } direction; | ||||
/* IP version pkt_node relates to; either INP_IPV4 or INP_IPV6. */ | /* IP version pkt_node relates to; either INP_IPV4 or INP_IPV6. */ | ||||
uint8_t ipver; | uint8_t ipver; | ||||
/* Hash of the pkt which triggered the log message. */ | |||||
uint32_t hash; | |||||
/* Local/foreign IP address. */ | /* Local/foreign IP address. */ | ||||
#ifdef SIFTR_IPV6 | #ifdef SIFTR_IPV6 | ||||
uint32_t ip_laddr[4]; | uint32_t ip_laddr[4]; | ||||
uint32_t ip_faddr[4]; | uint32_t ip_faddr[4]; | ||||
#else | #else | ||||
uint8_t ip_laddr[4]; | uint8_t ip_laddr[4]; | ||||
uint8_t ip_faddr[4]; | uint8_t ip_faddr[4]; | ||||
#endif | #endif | ||||
▲ Show 20 Lines • Show All 73 Lines • ▼ Show 20 Lines | struct siftr_stats | ||||
uint32_t nskip_out_dejavu; | uint32_t nskip_out_dejavu; | ||||
}; | }; | ||||
DPCPU_DEFINE_STATIC(struct siftr_stats, ss); | DPCPU_DEFINE_STATIC(struct siftr_stats, ss); | ||||
static volatile unsigned int siftr_exit_pkt_manager_thread = 0; | static volatile unsigned int siftr_exit_pkt_manager_thread = 0; | ||||
static unsigned int siftr_enabled = 0; | static unsigned int siftr_enabled = 0; | ||||
static unsigned int siftr_pkts_per_log = 1; | static unsigned int siftr_pkts_per_log = 1; | ||||
static unsigned int siftr_generate_hashes = 0; | |||||
static uint16_t siftr_port_filter = 0; | static uint16_t siftr_port_filter = 0; | ||||
/* static unsigned int siftr_binary_log = 0; */ | /* static unsigned int siftr_binary_log = 0; */ | ||||
static char siftr_logfile[PATH_MAX] = "/var/log/siftr.log"; | static char siftr_logfile[PATH_MAX] = "/var/log/siftr.log"; | ||||
static char siftr_logfile_shadow[PATH_MAX] = "/var/log/siftr.log"; | static char siftr_logfile_shadow[PATH_MAX] = "/var/log/siftr.log"; | ||||
static u_long siftr_hashmask; | static u_long siftr_hashmask; | ||||
STAILQ_HEAD(pkthead, pkt_node) pkt_queue = STAILQ_HEAD_INITIALIZER(pkt_queue); | STAILQ_HEAD(pkthead, pkt_node) pkt_queue = STAILQ_HEAD_INITIALIZER(pkt_queue); | ||||
LIST_HEAD(listhead, flow_hash_node) *counter_hash; | LIST_HEAD(listhead, flow_hash_node) *counter_hash; | ||||
static int wait_for_pkt; | static int wait_for_pkt; | ||||
Show All 23 Lines | SYSCTL_PROC(_net_inet_siftr, OID_AUTO, logfile, | ||||
CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &siftr_logfile_shadow, | CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &siftr_logfile_shadow, | ||||
sizeof(siftr_logfile_shadow), &siftr_sysctl_logfile_name_handler, "A", | sizeof(siftr_logfile_shadow), &siftr_sysctl_logfile_name_handler, "A", | ||||
"file to save siftr log messages to"); | "file to save siftr log messages to"); | ||||
SYSCTL_UINT(_net_inet_siftr, OID_AUTO, ppl, CTLFLAG_RW, | SYSCTL_UINT(_net_inet_siftr, OID_AUTO, ppl, CTLFLAG_RW, | ||||
&siftr_pkts_per_log, 1, | &siftr_pkts_per_log, 1, | ||||
"number of packets between generating a log message"); | "number of packets between generating a log message"); | ||||
SYSCTL_UINT(_net_inet_siftr, OID_AUTO, genhashes, CTLFLAG_RW, | |||||
&siftr_generate_hashes, 0, | |||||
"enable packet hash generation"); | |||||
SYSCTL_U16(_net_inet_siftr, OID_AUTO, port_filter, CTLFLAG_RW, | SYSCTL_U16(_net_inet_siftr, OID_AUTO, port_filter, CTLFLAG_RW, | ||||
&siftr_port_filter, 0, | &siftr_port_filter, 0, | ||||
"enable packet filter on a TCP port"); | "enable packet filter on a TCP port"); | ||||
/* XXX: TODO | /* XXX: TODO | ||||
SYSCTL_UINT(_net_inet_siftr, OID_AUTO, binary, CTLFLAG_RW, | SYSCTL_UINT(_net_inet_siftr, OID_AUTO, binary, CTLFLAG_RW, | ||||
&siftr_binary_log, 0, | &siftr_binary_log, 0, | ||||
"write log files in binary instead of ascii"); | "write log files in binary instead of ascii"); | ||||
▲ Show 20 Lines • Show All 118 Lines • ▼ Show 20 Lines | if (pkt_node->ipver == INP_IPV6) { /* IPv6 packet */ | ||||
pkt_node->ip_laddr[2] = ntohl(pkt_node->ip_laddr[2]); | pkt_node->ip_laddr[2] = ntohl(pkt_node->ip_laddr[2]); | ||||
pkt_node->ip_faddr[0] = ntohl(pkt_node->ip_faddr[0]); | pkt_node->ip_faddr[0] = ntohl(pkt_node->ip_faddr[0]); | ||||
pkt_node->ip_faddr[1] = ntohl(pkt_node->ip_faddr[1]); | pkt_node->ip_faddr[1] = ntohl(pkt_node->ip_faddr[1]); | ||||
pkt_node->ip_faddr[2] = ntohl(pkt_node->ip_faddr[2]); | pkt_node->ip_faddr[2] = ntohl(pkt_node->ip_faddr[2]); | ||||
/* Construct an IPv6 log message. */ | /* Construct an IPv6 log message. */ | ||||
log_buf->ae_bytesused = snprintf(log_buf->ae_data, | log_buf->ae_bytesused = snprintf(log_buf->ae_data, | ||||
MAX_LOG_MSG_LEN, | MAX_LOG_MSG_LEN, | ||||
"%c,0x%08x,%zd.%06ld,%x:%x:%x:%x:%x:%x:%x:%x,%u,%x:%x:%x:" | "%c,%zd.%06ld,%x:%x:%x:%x:%x:%x:%x:%x,%u,%x:%x:%x:" | ||||
"%x:%x:%x:%x:%x,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u," | "%x:%x:%x:%x:%x,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u," | ||||
"%u,%u,%u,%u,%u,%u,%u,%u,%u,%u\n", | "%u,%u,%u,%u,%u,%u,%u,%u,%u,%u\n", | ||||
direction[pkt_node->direction], | direction[pkt_node->direction], | ||||
pkt_node->hash, | |||||
pkt_node->tval.tv_sec, | pkt_node->tval.tv_sec, | ||||
pkt_node->tval.tv_usec, | pkt_node->tval.tv_usec, | ||||
UPPER_SHORT(pkt_node->ip_laddr[0]), | UPPER_SHORT(pkt_node->ip_laddr[0]), | ||||
LOWER_SHORT(pkt_node->ip_laddr[0]), | LOWER_SHORT(pkt_node->ip_laddr[0]), | ||||
UPPER_SHORT(pkt_node->ip_laddr[1]), | UPPER_SHORT(pkt_node->ip_laddr[1]), | ||||
LOWER_SHORT(pkt_node->ip_laddr[1]), | LOWER_SHORT(pkt_node->ip_laddr[1]), | ||||
UPPER_SHORT(pkt_node->ip_laddr[2]), | UPPER_SHORT(pkt_node->ip_laddr[2]), | ||||
LOWER_SHORT(pkt_node->ip_laddr[2]), | LOWER_SHORT(pkt_node->ip_laddr[2]), | ||||
Show All 39 Lines | if (pkt_node->ipver == INP_IPV6) { /* IPv6 packet */ | ||||
pkt_node->ip_faddr[1] = SECOND_OCTET(pkt_node->ip_faddr[3]); | pkt_node->ip_faddr[1] = SECOND_OCTET(pkt_node->ip_faddr[3]); | ||||
pkt_node->ip_faddr[2] = THIRD_OCTET(pkt_node->ip_faddr[3]); | pkt_node->ip_faddr[2] = THIRD_OCTET(pkt_node->ip_faddr[3]); | ||||
pkt_node->ip_faddr[3] = FOURTH_OCTET(pkt_node->ip_faddr[3]); | pkt_node->ip_faddr[3] = FOURTH_OCTET(pkt_node->ip_faddr[3]); | ||||
#endif /* SIFTR_IPV6 */ | #endif /* SIFTR_IPV6 */ | ||||
/* Construct an IPv4 log message. */ | /* Construct an IPv4 log message. */ | ||||
log_buf->ae_bytesused = snprintf(log_buf->ae_data, | log_buf->ae_bytesused = snprintf(log_buf->ae_data, | ||||
MAX_LOG_MSG_LEN, | MAX_LOG_MSG_LEN, | ||||
"%c,0x%08x,%jd.%06ld,%u.%u.%u.%u,%u,%u.%u.%u.%u,%u,%u,%u," | "%c,%jd.%06ld,%u.%u.%u.%u,%u,%u.%u.%u.%u,%u,%u,%u," | ||||
"%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u\n", | "%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u,%u\n", | ||||
direction[pkt_node->direction], | direction[pkt_node->direction], | ||||
pkt_node->hash, | |||||
(intmax_t)pkt_node->tval.tv_sec, | (intmax_t)pkt_node->tval.tv_sec, | ||||
pkt_node->tval.tv_usec, | pkt_node->tval.tv_usec, | ||||
pkt_node->ip_laddr[0], | pkt_node->ip_laddr[0], | ||||
pkt_node->ip_laddr[1], | pkt_node->ip_laddr[1], | ||||
pkt_node->ip_laddr[2], | pkt_node->ip_laddr[2], | ||||
pkt_node->ip_laddr[3], | pkt_node->ip_laddr[3], | ||||
ntohs(pkt_node->tcp_localport), | ntohs(pkt_node->tcp_localport), | ||||
pkt_node->ip_faddr[0], | pkt_node->ip_faddr[0], | ||||
▲ Show 20 Lines • Show All 101 Lines • ▼ Show 20 Lines | siftr_pkt_manager_thread(void *arg) | ||||
} | } | ||||
mtx_unlock(&siftr_pkt_mgr_mtx); | mtx_unlock(&siftr_pkt_mgr_mtx); | ||||
/* Calls wakeup on this thread's struct thread ptr. */ | /* Calls wakeup on this thread's struct thread ptr. */ | ||||
kthread_exit(); | kthread_exit(); | ||||
} | } | ||||
static uint32_t | |||||
hash_pkt(struct mbuf *m, uint32_t offset) | |||||
{ | |||||
uint32_t hash; | |||||
hash = 0; | |||||
while (m != NULL && offset > m->m_len) { | |||||
/* | /* | ||||
* The IP packet payload does not start in this mbuf, so | |||||
* need to figure out which mbuf it starts in and what offset | |||||
* into the mbuf's data region the payload starts at. | |||||
*/ | |||||
offset -= m->m_len; | |||||
m = m->m_next; | |||||
} | |||||
while (m != NULL) { | |||||
/* Ensure there is data in the mbuf */ | |||||
if ((m->m_len - offset) > 0) | |||||
hash = hash32_buf(m->m_data + offset, | |||||
m->m_len - offset, hash); | |||||
m = m->m_next; | |||||
offset = 0; | |||||
} | |||||
return (hash); | |||||
} | |||||
/* | |||||
* Check if a given mbuf has the SIFTR mbuf tag. If it does, log the fact that | * Check if a given mbuf has the SIFTR mbuf tag. If it does, log the fact that | ||||
* it's a reinjected packet and return. If it doesn't, tag the mbuf and return. | * it's a reinjected packet and return. If it doesn't, tag the mbuf and return. | ||||
* Return value >0 means the caller should skip processing this mbuf. | * Return value >0 means the caller should skip processing this mbuf. | ||||
*/ | */ | ||||
static inline int | static inline int | ||||
siftr_chkreinject(struct mbuf *m, int dir, struct siftr_stats *ss) | siftr_chkreinject(struct mbuf *m, int dir, struct siftr_stats *ss) | ||||
{ | { | ||||
if (m_tag_locate(m, PACKET_COOKIE_SIFTR, PACKET_TAG_SIFTR, NULL) | if (m_tag_locate(m, PACKET_COOKIE_SIFTR, PACKET_TAG_SIFTR, NULL) | ||||
▲ Show 20 Lines • Show All 248 Lines • ▼ Show 20 Lines | if (dir == PFIL_IN) | ||||
ss->nskip_in_malloc++; | ss->nskip_in_malloc++; | ||||
else | else | ||||
ss->nskip_out_malloc++; | ss->nskip_out_malloc++; | ||||
goto inp_unlock; | goto inp_unlock; | ||||
} | } | ||||
siftr_siftdata(pn, inp, tp, INP_IPV4, dir, inp_locally_locked); | siftr_siftdata(pn, inp, tp, INP_IPV4, dir, inp_locally_locked); | ||||
if (siftr_generate_hashes) { | |||||
if ((*m)->m_pkthdr.csum_flags & CSUM_TCP) { | |||||
/* | |||||
* For outbound packets, the TCP checksum isn't | |||||
* calculated yet. This is a problem for our packet | |||||
* hashing as the receiver will calc a different hash | |||||
* to ours if we don't include the correct TCP checksum | |||||
* in the bytes being hashed. To work around this | |||||
* problem, we manually calc the TCP checksum here in | |||||
* software. We unset the CSUM_TCP flag so the lower | |||||
* layers don't recalc it. | |||||
*/ | |||||
(*m)->m_pkthdr.csum_flags &= ~CSUM_TCP; | |||||
/* | |||||
* Calculate the TCP checksum in software and assign | |||||
* to correct TCP header field, which will follow the | |||||
* packet mbuf down the stack. The trick here is that | |||||
* tcp_output() sets th->th_sum to the checksum of the | |||||
* pseudo header for us already. Because of the nature | |||||
* of the checksumming algorithm, we can sum over the | |||||
* entire IP payload (i.e. TCP header and data), which | |||||
* will include the already calculated pseduo header | |||||
* checksum, thus giving us the complete TCP checksum. | |||||
* | |||||
* To put it in simple terms, if checksum(1,2,3,4)=10, | |||||
* then checksum(1,2,3,4,5) == checksum(10,5). | |||||
* This property is what allows us to "cheat" and | |||||
* checksum only the IP payload which has the TCP | |||||
* th_sum field populated with the pseudo header's | |||||
* checksum, and not need to futz around checksumming | |||||
* pseudo header bytes and TCP header/data in one hit. | |||||
* Refer to RFC 1071 for more info. | |||||
* | |||||
* NB: in_cksum_skip(struct mbuf *m, int len, int skip) | |||||
* in_cksum_skip 2nd argument is NOT the number of | |||||
* bytes to read from the mbuf at "skip" bytes offset | |||||
* from the start of the mbuf (very counter intuitive!). | |||||
* The number of bytes to read is calculated internally | |||||
* by the function as len-skip i.e. to sum over the IP | |||||
* payload (TCP header + data) bytes, it is INCORRECT | |||||
* to call the function like this: | |||||
* in_cksum_skip(at, ip->ip_len - offset, offset) | |||||
* Rather, it should be called like this: | |||||
* in_cksum_skip(at, ip->ip_len, offset) | |||||
* which means read "ip->ip_len - offset" bytes from | |||||
* the mbuf cluster "at" at offset "offset" bytes from | |||||
* the beginning of the "at" mbuf's data pointer. | |||||
*/ | |||||
th->th_sum = in_cksum_skip(*m, ntohs(ip->ip_len), | |||||
ip_hl); | |||||
} | |||||
/* | |||||
* XXX: Having to calculate the checksum in software and then | |||||
* hash over all bytes is really inefficient. Would be nice to | |||||
* find a way to create the hash and checksum in the same pass | |||||
* over the bytes. | |||||
*/ | |||||
pn->hash = hash_pkt(*m, ip_hl); | |||||
} | |||||
mtx_lock(&siftr_pkt_queue_mtx); | mtx_lock(&siftr_pkt_queue_mtx); | ||||
STAILQ_INSERT_TAIL(&pkt_queue, pn, nodes); | STAILQ_INSERT_TAIL(&pkt_queue, pn, nodes); | ||||
mtx_unlock(&siftr_pkt_queue_mtx); | mtx_unlock(&siftr_pkt_queue_mtx); | ||||
goto ret; | goto ret; | ||||
inp_unlock: | inp_unlock: | ||||
if (inp_locally_locked) | if (inp_locally_locked) | ||||
▲ Show 20 Lines • Show All 585 Lines • Show Last 20 Lines |