Index: sbin/ipfw/ipfw.8 =================================================================== --- sbin/ipfw/ipfw.8 +++ sbin/ipfw/ipfw.8 @@ -4757,15 +4757,12 @@ rule which matches the first fragment of the packet. .Pp Packets diverted to userland, and then reinserted by a userland process -may lose various packet attributes. -The packet source interface name -will be preserved if it is shorter than 8 bytes and the userland process -saves and reuses the sockaddr_in +may lose various packet attributes and tags. +An application using a divert socket should reuse the received sockaddr struct (as does -.Xr natd 8 ) ; -otherwise, it may be lost. -If a packet is reinserted in this manner, later rules may be incorrectly -applied, making the order of +.Xr natd 8 ) +to make sure the packet's direction and FIB will preserved. +Otherwise later rules may be incorrectly applied, making the location of .Cm divert rules in the rule sequence very important. .Pp Index: share/man/man4/divert.4 =================================================================== --- share/man/man4/divert.4 +++ share/man/man4/divert.4 @@ -61,23 +61,45 @@ .Xr recv 2 , or .Xr recvfrom 2 . -In the latter case, the address returned will have its port set to -some tag supplied by the packet diverter, (usually the ipfw rule number) -and the IP address set to the (first) address of -the interface on which the packet was received (if the packet -was incoming) or +.Pp +.Xr recvfrom 2 +will return in +.Pa from +a +.Vt struct sockaddr_div : +.Pp +.Bd -literal +struct sockaddr_div { + uint8_t sdiv_len; + sa_family_t sdiv_family; + in_port_t sdiv_port; + struct in_addr sdiv_addr; + + char sdiv_ifnam[IFNAMSIZ]; + int sdiv_fib; +}; +.Ed +.Pp +.Pa sdiv_port +will be set to a tag supplied by the packet diverter in host byte order (the +current rule number for +.Xr ipfw 8 ). +.Pa sdiv_addr +will be set to the (first) IP address of the interface on which the packet was +received if the packet was incoming, or .Dv INADDR_ANY -(if the packet was outgoing). -The interface name (if defined -for the packet) will be placed in the 8 bytes following the address, -if it fits. +if the packet was outgoing. +.Pa sdiv_ifnam +will be set to the name of the receiving interface if available. +.Pa sdiv_fib +will be set to the routing FIB assigned to the packet in host byte order +(usually 0). .Sh WRITING PACKETS Writing to a divert socket is similar to writing to a raw IP socket; the packet is injected ``as is'' into the normal kernel IP packet processing using -.Xr sendto 2 -and minimal error checking is done. -Packets are distinguished as either incoming or outgoing. +.Xr sendto 2 . +Minimal error checking is done. If .Xr sendto 2 is used with a destination IP address of @@ -167,6 +189,59 @@ .Dv INADDR_ANY that was not associated with any interface. .El +.Sh EXAMPLES +The program below will receive packets from the divert socket, print out some +information, and send it back into the divert socket. +See +.Xr ipfw 8 +on how to create a divert socket. +.Bd -literal -compact +#include +#include + +#include + +#include +#include +#include + +int +main(int argc, char **argv) +{ + int fd = socket(PF_INET, SOCK_RAW, IPPROTO_DIVERT); + if (fd == -1) + err(1, "socket()"); + + struct sockaddr_in sockin; + sockin.sin_family = AF_INET; + sockin.sin_addr.s_addr = htonl(INADDR_ANY); + sockin.sin_port = htons(9875); + if (bind(fd, (struct sockaddr*) &sockin, sizeof(sockin)) == -1) + err(1, "bind(%d)", 9875); + + while (1) { + unsigned char buffer[256]; + struct sockaddr_div sdiv; + socklen_t sdivlen = sizeof(sdiv); + + ssize_t sl = recvfrom(fd, &buffer, sizeof(buffer), + 0 /*flags*/, (struct sockaddr *) &sdiv, &sdivlen); + if (sl == -1) + err(1, "recvfrom()"); + + printf("%s packet on %s using FIB %d, snaplen %d\n", + (sdiv.sdiv_addr.s_addr == htonl(INADDR_ANY)? "out":"in"), + (*sdiv.sdiv_ifnam? sdiv.sdiv_ifnam:"-"), + sdiv.sdiv_fib, sl); + + ssize_t n = sendto(fd, &buffer, sl, 0, (struct sockaddr*) &sdiv, sdivlen); + if (n == -1) + err(1, "sendto(divert, %d bytes)", sl); + else if (n != sl) + errx(1, "sendto(%d bytes): only wrote %d bytes", sl, n); + } +} +.Ed .Sh SEE ALSO .Xr bind 2 , .Xr recvfrom 2 , Index: sys/netinet/ip_divert.h =================================================================== --- sys/netinet/ip_divert.h +++ sys/netinet/ip_divert.h @@ -40,18 +40,31 @@ /* * divert has no custom kernel-userland API. * - * All communication occurs through a sockaddr_in socket where + * All communication occurs through a sockaddr_divert socket where * * kernel-->userland - * sin_port = matching rule, host format; - * sin_addr = IN: first address of the incoming interface; + * sdiv_port = matching rule, host format + * sdiv_addr = IN: first address of the incoming interface; * OUT: INADDR_ANY - * sin_zero = if fits, the interface name (max 7 bytes + NUL) + * sdiv_ifnam = the interface name + * sdiv_fib = FIB from packet * * userland->kernel - * sin_port = restart-rule - 1, host order - * (we restart at sin_port + 1) - * sin_addr = IN: address of the incoming interface; + * sdiv_port = restart-rule - 1, host order + * (we restart at sdiv_port + 1) + * sdiv_addr = IN: address of the incoming interface * OUT: INADDR_ANY + * sdiv_ifnam = don't care + * sdiv_fib = FIB to set on packet */ + +struct sockaddr_div { + uint8_t sdiv_len; + sa_family_t sdiv_family; + in_port_t sdiv_port; + struct in_addr sdiv_addr; + + char sdiv_ifnam[IFNAMSIZ]; // incoming interface; or "\0" + int sdiv_fib; // routing fib +}; #endif /* _NETINET_IP_DIVERT_H_ */ Index: sys/netinet/ip_divert.c =================================================================== --- sys/netinet/ip_divert.c +++ sys/netinet/ip_divert.c @@ -72,6 +72,8 @@ #ifdef SCTP #include #endif +#include +#include #include /* @@ -92,22 +94,23 @@ * the packet filter) and information on the matching filter rule for * subsequent reinjection. The divert_port is used to put the packet * on the corresponding divert socket, while the rule number is passed - * up (at least partially) as the sin_port in the struct sockaddr. + * up as the sdiv_port in the struct sockaddr. * - * Packets written to the divert socket carry in sin_addr a - * destination address, and in sin_port the number of the filter rule + * Packets written to the divert socket carry in sdiv_addr a + * destination address, and in sdiv_port the number of the filter rule * after which to continue processing. * If the destination address is INADDR_ANY, the packet is treated as * as outgoing and sent to ip_output(); otherwise it is treated as - * incoming and sent to ip_input(). - * Further, sin_zero carries some information on the interface, - * which can be used in the reinject -- see comments in the code. + * incoming and sent to ip_input(). sdiv_ifnam and sdiv_fib are set up + * with the interface name the packet came in on, c.q. the FIB number on the + * packet. * * On reinjection, processing in ip_input() and ip_output() * will be exactly the same as for the original packet, except that * packet filter processing will start at the rule number after the one - * written in the sin_port (ipfw does not allow a rule #0, so sin_port=0 - * will apply the entire ruleset to the packet). + * written in the sdiv_port (ipfw does not allow a rule #0, so sdiv_port=0 + * will apply the entire ruleset to the packet). sdiv_fib is set on the + * outgoing packet. */ /* Internal variables. */ @@ -123,7 +126,7 @@ static eventhandler_tag ip_divert_event_tag; static int div_output_inbound(int fmaily, struct socket *so, struct mbuf *m, - struct sockaddr_in *sin); + struct sockaddr_div *sdiv); static int div_output_outbound(int family, struct socket *so, struct mbuf *m); /* @@ -194,7 +197,7 @@ struct inpcb *inp; struct socket *sa; u_int16_t nport; - struct sockaddr_in divsrc; + struct sockaddr_div divsrc; struct m_tag *mtag; NET_EPOCH_ASSERT(); @@ -222,10 +225,12 @@ } #endif bzero(&divsrc, sizeof(divsrc)); - divsrc.sin_len = sizeof(divsrc); - divsrc.sin_family = AF_INET; - /* record matching rule, in host format */ - divsrc.sin_port = ((struct ipfw_rule_ref *)(mtag+1))->rulenum; + divsrc.sdiv_len = sizeof(divsrc); + divsrc.sdiv_family = AF_INET; + /* record matching rule, in host byte order */ + divsrc.sdiv_port = ((struct ipfw_rule_ref *)(mtag+1))->rulenum; + /* record fib for packet */ + divsrc.sdiv_fib = M_GETFIB(m); /* * Record receive interface address, if any. * But only for incoming packets. @@ -242,7 +247,7 @@ CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_INET) continue; - divsrc.sin_addr = + divsrc.sdiv_addr = ((struct sockaddr_in *) ifa->ifa_addr)->sin_addr; break; } @@ -252,13 +257,6 @@ */ if (m->m_pkthdr.rcvif) { /* - * Hide the actual interface name in there in the - * sin_zero array. XXX This needs to be moved to a - * different sockaddr type for divert, e.g. - * sockaddr_div with multiple fields like - * sockaddr_dl. Presently we have only 7 bytes - * but that will do for now as most interfaces - * are 4 or less + 2 or less bytes for unit. * There is probably a faster way of doing this, * possibly taking it from the sockaddr_dl on the iface. * This solves the problem of a P2P link and a LAN interface @@ -269,8 +267,8 @@ * this iface name will come along for the ride. * (see div_output for the other half of this.) */ - strlcpy(divsrc.sin_zero, m->m_pkthdr.rcvif->if_xname, - sizeof(divsrc.sin_zero)); + strlcpy(divsrc.sdiv_ifnam, m->m_pkthdr.rcvif->if_xname, + sizeof(divsrc.sdiv_ifnam)); } /* Put packet on socket queue, if any */ @@ -308,7 +306,7 @@ * the interface with that address. */ static int -div_output(struct socket *so, struct mbuf *m, struct sockaddr_in *sin, +div_output(struct socket *so, struct mbuf *m, struct sockaddr_div *sdiv, struct mbuf *control) { struct epoch_tracker et; @@ -318,8 +316,7 @@ int error, family; /* - * An mbuf may hasn't come from userland, but we pretend - * that it has. + * An mbuf may not have come from userland, but we pretend that it has. */ m->m_pkthdr.rcvif = NULL; m->m_nextpkt = NULL; @@ -342,7 +339,7 @@ dt = (struct ipfw_rule_ref *)(mtag+1); /* Loopback avoidance and state recovery */ - if (sin) { + if (sdiv) { int i; /* set the starting point. We provide a non-zero slot, @@ -351,19 +348,18 @@ */ dt->slot = 1; /* dummy, chain_id is invalid */ dt->chain_id = 0; - dt->rulenum = sin->sin_port+1; /* host format ? */ + dt->rulenum = sdiv->sdiv_port+1; /* host byte order */ dt->rule_id = 0; /* XXX: broken for IPv6 */ /* - * Find receive interface with the given name, stuffed - * (if it exists) in the sin_zero[] field. - * The name is user supplied data so don't trust its size - * or that it is zero terminated. + * Find receive interface with the given name. The name is user + * supplied data so don't trust its size or that it is zero + * terminated. */ - for (i = 0; i < sizeof(sin->sin_zero) && sin->sin_zero[i]; i++) + for (i = 0; i < sizeof(sdiv->sdiv_ifnam) && sdiv->sdiv_ifnam[i]; i++) ; - if ( i > 0 && i < sizeof(sin->sin_zero)) - m->m_pkthdr.rcvif = ifunit(sin->sin_zero); + if (i > 0 && i < sizeof(sdiv->sdiv_ifnam)) + m->m_pkthdr.rcvif = ifunit(sdiv->sdiv_ifnam); } ip = mtod(m, struct ip *); @@ -381,12 +377,12 @@ /* Reinject packet into the system as incoming or outgoing */ NET_EPOCH_ENTER(et); - if (!sin || sin->sin_addr.s_addr == 0) { + if (!sdiv || sdiv->sdiv_addr.s_addr == 0) { dt->info |= IPFW_IS_DIVERT | IPFW_INFO_OUT; error = div_output_outbound(family, so, m); } else { dt->info |= IPFW_IS_DIVERT | IPFW_INFO_IN; - error = div_output_inbound(family, so, m, sin); + error = div_output_inbound(family, so, m, sdiv); } NET_EPOCH_EXIT(et); @@ -505,7 +501,7 @@ */ static int div_output_inbound(int family, struct socket *so, struct mbuf *m, - struct sockaddr_in *sin) + struct sockaddr_div *sdiv) { const struct ip *ip; struct ifaddr *ifa; @@ -517,14 +513,14 @@ * there are no distractions for ifa_ifwithaddr. */ - /* XXX: broken for IPv6 */ - bzero(sin->sin_zero, sizeof(sin->sin_zero)); - sin->sin_port = 0; - ifa = ifa_ifwithaddr((struct sockaddr *) sin); - if (ifa == NULL) - return (EADDRNOTAVAIL); - m->m_pkthdr.rcvif = ifa->ifa_ifp; - } + /* XXX: broken for IPv6 */ + bzero(sdiv->sdiv_ifnam, sizeof(sdiv->sdiv_ifnam)); + sdiv->sdiv_port = 0; + ifa = ifa_ifwithaddr((struct sockaddr *) sdiv); + if (ifa == NULL) + return (EADDRNOTAVAIL); + m->m_pkthdr.rcvif = ifa->ifa_ifp; + } #ifdef MAC mac_socket_create_mbuf(so, m); #endif @@ -611,7 +607,7 @@ * and in_pcbbind requires a valid address. Since divert * sockets don't we need to make sure the address is * filled in properly. - * XXX -- divert should not be abusing in_pcbind + * XXX -- divert should not be abusing in_pcbbind * and should probably have its own family. */ if (nam->sa_family != AF_INET) @@ -654,7 +650,7 @@ } /* Send packet */ - return div_output(so, m, (struct sockaddr_in *)nam, control); + return div_output(so, m, (struct sockaddr_div *)nam, control); } static void