Changeset View
Changeset View
Standalone View
Standalone View
sys/netinet/ip_divert.c
Show First 20 Lines • Show All 105 Lines • ▼ Show 20 Lines | |||||
* On reinjection, processing in ip_input() and ip_output() | * On reinjection, processing in ip_input() and ip_output() | ||||
* will be exactly the same as for the original packet, except that | * will be exactly the same as for the original packet, except that | ||||
* packet filter processing will start at the rule number after the one | * packet filter processing will start at the rule number after the one | ||||
* written in the sin_port (ipfw does not allow a rule #0, so sin_port=0 | * written in the sin_port (ipfw does not allow a rule #0, so sin_port=0 | ||||
* will apply the entire ruleset to the packet). | * will apply the entire ruleset to the packet). | ||||
*/ | */ | ||||
/* Internal variables. */ | /* Internal variables. */ | ||||
VNET_DEFINE_STATIC(struct inpcbhead, divcb); | |||||
VNET_DEFINE_STATIC(struct inpcbinfo, divcbinfo); | VNET_DEFINE_STATIC(struct inpcbinfo, divcbinfo); | ||||
#define V_divcb VNET(divcb) | |||||
#define V_divcbinfo VNET(divcbinfo) | #define V_divcbinfo VNET(divcbinfo) | ||||
static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ | static u_long div_sendspace = DIVSNDQ; /* XXX sysctl ? */ | ||||
static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ | static u_long div_recvspace = DIVRCVQ; /* XXX sysctl ? */ | ||||
static eventhandler_tag ip_divert_event_tag; | static eventhandler_tag ip_divert_event_tag; | ||||
static int div_output_inbound(int fmaily, struct socket *so, struct mbuf *m, | static int div_output_inbound(int fmaily, struct socket *so, struct mbuf *m, | ||||
Show All 23 Lines | |||||
div_init(void) | div_init(void) | ||||
{ | { | ||||
/* | /* | ||||
* XXX We don't use the hash list for divert IP, but it's easier to | * XXX We don't use the hash list for divert IP, but it's easier to | ||||
* allocate one-entry hash lists than it is to check all over the | * allocate one-entry hash lists than it is to check all over the | ||||
* place for hashbase == NULL. | * place for hashbase == NULL. | ||||
*/ | */ | ||||
in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb", | in_pcbinfo_init(&V_divcbinfo, "div", 1, 1, "divcb", div_inpcb_init); | ||||
div_inpcb_init, IPI_HASHFIELDS_NONE); | |||||
} | } | ||||
static void | static void | ||||
div_destroy(void *unused __unused) | div_destroy(void *unused __unused) | ||||
{ | { | ||||
in_pcbinfo_destroy(&V_divcbinfo); | in_pcbinfo_destroy(&V_divcbinfo); | ||||
} | } | ||||
Show All 9 Lines | |||||
{ | { | ||||
struct mbuf *m = *mp; | struct mbuf *m = *mp; | ||||
KMOD_IPSTAT_INC(ips_noproto); | KMOD_IPSTAT_INC(ips_noproto); | ||||
m_freem(m); | m_freem(m); | ||||
return (IPPROTO_DONE); | return (IPPROTO_DONE); | ||||
} | } | ||||
static bool | |||||
div_port_match(const struct inpcb *inp, void *v) | |||||
{ | |||||
uint16_t nport = *(uint16_t *)v; | |||||
return (inp->inp_lport == nport); | |||||
} | |||||
/* | /* | ||||
* Divert a packet by passing it up to the divert socket at port 'port'. | * Divert a packet by passing it up to the divert socket at port 'port'. | ||||
* | * | ||||
* Setup generic address and protocol structures for div_input routine, | * Setup generic address and protocol structures for div_input routine, | ||||
* then pass them along with mbuf chain. | * then pass them along with mbuf chain. | ||||
*/ | */ | ||||
static void | static void | ||||
divert_packet(struct mbuf *m, bool incoming) | divert_packet(struct mbuf *m, bool incoming) | ||||
{ | { | ||||
struct ip *ip; | struct ip *ip; | ||||
struct inpcb *inp; | struct inpcb *inp; | ||||
struct socket *sa; | struct socket *sa; | ||||
u_int16_t nport; | u_int16_t nport; | ||||
struct sockaddr_in divsrc; | struct sockaddr_in divsrc; | ||||
struct inpcb_iterator inpi = INP_ITERATOR(&V_divcbinfo, | |||||
INPLOOKUP_RLOCKPCB, div_port_match, &nport); | |||||
struct m_tag *mtag; | struct m_tag *mtag; | ||||
NET_EPOCH_ASSERT(); | NET_EPOCH_ASSERT(); | ||||
mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); | mtag = m_tag_locate(m, MTAG_IPFW_RULE, 0, NULL); | ||||
if (mtag == NULL) { | if (mtag == NULL) { | ||||
m_freem(m); | m_freem(m); | ||||
return; | return; | ||||
▲ Show 20 Lines • Show All 89 Lines • ▼ Show 20 Lines | if (m->m_pkthdr.rcvif) { | ||||
* (see div_output for the other half of this.) | * (see div_output for the other half of this.) | ||||
*/ | */ | ||||
strlcpy(divsrc.sin_zero, m->m_pkthdr.rcvif->if_xname, | strlcpy(divsrc.sin_zero, m->m_pkthdr.rcvif->if_xname, | ||||
sizeof(divsrc.sin_zero)); | sizeof(divsrc.sin_zero)); | ||||
} | } | ||||
/* Put packet on socket queue, if any */ | /* Put packet on socket queue, if any */ | ||||
sa = NULL; | sa = NULL; | ||||
/* nport is inp_next's context. */ | |||||
nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info)); | nport = htons((u_int16_t)(((struct ipfw_rule_ref *)(mtag+1))->info)); | ||||
CK_LIST_FOREACH(inp, &V_divcb, inp_list) { | while ((inp = inp_next(&inpi)) != NULL) { | ||||
/* XXX why does only one socket match? */ | |||||
if (inp->inp_lport == nport) { | |||||
INP_RLOCK(inp); | |||||
if (__predict_false(inp->inp_flags2 & INP_FREED)) { | |||||
INP_RUNLOCK(inp); | |||||
continue; | |||||
} | |||||
sa = inp->inp_socket; | sa = inp->inp_socket; | ||||
SOCKBUF_LOCK(&sa->so_rcv); | SOCKBUF_LOCK(&sa->so_rcv); | ||||
if (sbappendaddr_locked(&sa->so_rcv, | if (sbappendaddr_locked(&sa->so_rcv, | ||||
(struct sockaddr *)&divsrc, m, | (struct sockaddr *)&divsrc, m, NULL) == 0) { | ||||
(struct mbuf *)0) == 0) { | |||||
soroverflow_locked(sa); | soroverflow_locked(sa); | ||||
sa = NULL; /* force mbuf reclaim below */ | sa = NULL; /* force mbuf reclaim below */ | ||||
} else | } else | ||||
sorwakeup_locked(sa); | sorwakeup_locked(sa); | ||||
/* XXX why does only one socket match? */ | |||||
INP_RUNLOCK(inp); | INP_RUNLOCK(inp); | ||||
break; | break; | ||||
} | } | ||||
} | |||||
if (sa == NULL) { | if (sa == NULL) { | ||||
m_freem(m); | m_freem(m); | ||||
KMOD_IPSTAT_INC(ips_noproto); | KMOD_IPSTAT_INC(ips_noproto); | ||||
KMOD_IPSTAT_DEC(ips_delivered); | KMOD_IPSTAT_DEC(ips_delivered); | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
▲ Show 20 Lines • Show All 277 Lines • ▼ Show 20 Lines | div_attach(struct socket *so, int proto, struct thread *td) | ||||
if (td != NULL) { | if (td != NULL) { | ||||
error = priv_check(td, PRIV_NETINET_DIVERT); | error = priv_check(td, PRIV_NETINET_DIVERT); | ||||
if (error) | if (error) | ||||
return (error); | return (error); | ||||
} | } | ||||
error = soreserve(so, div_sendspace, div_recvspace); | error = soreserve(so, div_sendspace, div_recvspace); | ||||
if (error) | if (error) | ||||
return error; | return error; | ||||
INP_INFO_WLOCK(&V_divcbinfo); | |||||
error = in_pcballoc(so, &V_divcbinfo); | error = in_pcballoc(so, &V_divcbinfo); | ||||
if (error) { | if (error) | ||||
INP_INFO_WUNLOCK(&V_divcbinfo); | |||||
return error; | return error; | ||||
} | |||||
inp = (struct inpcb *)so->so_pcb; | inp = (struct inpcb *)so->so_pcb; | ||||
INP_INFO_WUNLOCK(&V_divcbinfo); | |||||
inp->inp_ip_p = proto; | inp->inp_ip_p = proto; | ||||
inp->inp_vflag |= INP_IPV4; | inp->inp_vflag |= INP_IPV4; | ||||
inp->inp_flags |= INP_HDRINCL; | inp->inp_flags |= INP_HDRINCL; | ||||
INP_WUNLOCK(inp); | INP_WUNLOCK(inp); | ||||
return 0; | return 0; | ||||
} | } | ||||
static void | static void | ||||
div_detach(struct socket *so) | div_detach(struct socket *so) | ||||
{ | { | ||||
struct inpcb *inp; | struct inpcb *inp; | ||||
inp = sotoinpcb(so); | inp = sotoinpcb(so); | ||||
KASSERT(inp != NULL, ("div_detach: inp == NULL")); | KASSERT(inp != NULL, ("div_detach: inp == NULL")); | ||||
INP_INFO_WLOCK(&V_divcbinfo); | |||||
INP_WLOCK(inp); | INP_WLOCK(inp); | ||||
in_pcbdetach(inp); | in_pcbdetach(inp); | ||||
in_pcbfree(inp); | in_pcbfree(inp); | ||||
INP_INFO_WUNLOCK(&V_divcbinfo); | |||||
} | } | ||||
static int | static int | ||||
div_bind(struct socket *so, struct sockaddr *nam, struct thread *td) | div_bind(struct socket *so, struct sockaddr *nam, struct thread *td) | ||||
{ | { | ||||
struct inpcb *inp; | struct inpcb *inp; | ||||
int error; | int error; | ||||
inp = sotoinpcb(so); | inp = sotoinpcb(so); | ||||
KASSERT(inp != NULL, ("div_bind: inp == NULL")); | KASSERT(inp != NULL, ("div_bind: inp == NULL")); | ||||
/* in_pcbbind assumes that nam is a sockaddr_in | /* in_pcbbind assumes that nam is a sockaddr_in | ||||
* and in_pcbbind requires a valid address. Since divert | * and in_pcbbind requires a valid address. Since divert | ||||
* sockets don't we need to make sure the address is | * sockets don't we need to make sure the address is | ||||
* filled in properly. | * filled in properly. | ||||
* XXX -- divert should not be abusing in_pcbind | * XXX -- divert should not be abusing in_pcbind | ||||
* and should probably have its own family. | * and should probably have its own family. | ||||
*/ | */ | ||||
if (nam->sa_family != AF_INET) | if (nam->sa_family != AF_INET) | ||||
return EAFNOSUPPORT; | return EAFNOSUPPORT; | ||||
if (nam->sa_len != sizeof(struct sockaddr_in)) | if (nam->sa_len != sizeof(struct sockaddr_in)) | ||||
return EINVAL; | return EINVAL; | ||||
((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY; | ((struct sockaddr_in *)nam)->sin_addr.s_addr = INADDR_ANY; | ||||
INP_INFO_WLOCK(&V_divcbinfo); | |||||
INP_WLOCK(inp); | INP_WLOCK(inp); | ||||
INP_HASH_WLOCK(&V_divcbinfo); | INP_HASH_WLOCK(&V_divcbinfo); | ||||
error = in_pcbbind(inp, nam, td->td_ucred); | error = in_pcbbind(inp, nam, td->td_ucred); | ||||
INP_HASH_WUNLOCK(&V_divcbinfo); | INP_HASH_WUNLOCK(&V_divcbinfo); | ||||
INP_WUNLOCK(inp); | INP_WUNLOCK(inp); | ||||
INP_INFO_WUNLOCK(&V_divcbinfo); | |||||
return error; | return error; | ||||
} | } | ||||
static int | static int | ||||
div_shutdown(struct socket *so) | div_shutdown(struct socket *so) | ||||
{ | { | ||||
struct inpcb *inp; | struct inpcb *inp; | ||||
Show All 22 Lines | div_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, | ||||
/* Send packet */ | /* Send packet */ | ||||
return div_output(so, m, (struct sockaddr_in *)nam, control); | return div_output(so, m, (struct sockaddr_in *)nam, control); | ||||
} | } | ||||
static int | static int | ||||
div_pcblist(SYSCTL_HANDLER_ARGS) | div_pcblist(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
struct inpcb_iterator inpi = INP_ALL_ITERATOR(&V_divcbinfo, | |||||
INPLOOKUP_RLOCKPCB); | |||||
struct xinpgen xig; | struct xinpgen xig; | ||||
struct epoch_tracker et; | |||||
struct inpcb *inp; | struct inpcb *inp; | ||||
int error; | int error; | ||||
if (req->newptr != 0) | if (req->newptr != 0) | ||||
return EPERM; | return EPERM; | ||||
if (req->oldptr == 0) { | if (req->oldptr == 0) { | ||||
int n; | int n; | ||||
Show All 11 Lines | div_pcblist(SYSCTL_HANDLER_ARGS) | ||||
xig.xig_len = sizeof xig; | xig.xig_len = sizeof xig; | ||||
xig.xig_count = V_divcbinfo.ipi_count; | xig.xig_count = V_divcbinfo.ipi_count; | ||||
xig.xig_gen = V_divcbinfo.ipi_gencnt; | xig.xig_gen = V_divcbinfo.ipi_gencnt; | ||||
xig.xig_sogen = so_gencnt; | xig.xig_sogen = so_gencnt; | ||||
error = SYSCTL_OUT(req, &xig, sizeof xig); | error = SYSCTL_OUT(req, &xig, sizeof xig); | ||||
if (error) | if (error) | ||||
return error; | return error; | ||||
NET_EPOCH_ENTER(et); | while ((inp = inp_next(&inpi)) != NULL) { | ||||
for (inp = CK_LIST_FIRST(V_divcbinfo.ipi_listhead); | |||||
inp != NULL; | |||||
inp = CK_LIST_NEXT(inp, inp_list)) { | |||||
INP_RLOCK(inp); | |||||
if (inp->inp_gencnt <= xig.xig_gen) { | if (inp->inp_gencnt <= xig.xig_gen) { | ||||
struct xinpcb xi; | struct xinpcb xi; | ||||
in_pcbtoxinpcb(inp, &xi); | in_pcbtoxinpcb(inp, &xi); | ||||
INP_RUNLOCK(inp); | |||||
error = SYSCTL_OUT(req, &xi, sizeof xi); | error = SYSCTL_OUT(req, &xi, sizeof xi); | ||||
} else | if (error) { | ||||
INP_RUNLOCK(inp); | INP_RUNLOCK(inp); | ||||
break; | |||||
} | } | ||||
NET_EPOCH_EXIT(et); | } | ||||
} | |||||
if (!error) { | if (!error) { | ||||
/* | /* | ||||
* Give the user an updated idea of our state. | * Give the user an updated idea of our state. | ||||
* If the generation differs from what we told | * If the generation differs from what we told | ||||
* her before, she knows that something happened | * her before, she knows that something happened | ||||
* while we were processing this request, and it | * while we were processing this request, and it | ||||
* might be necessary to retry. | * might be necessary to retry. | ||||
▲ Show 20 Lines • Show All 110 Lines • Show Last 20 Lines |