diff --git a/lib/libc/sys/socket.2 b/lib/libc/sys/socket.2 --- a/lib/libc/sys/socket.2 +++ b/lib/libc/sys/socket.2 @@ -28,7 +28,7 @@ .\" From: @(#)socket.2 8.1 (Berkeley) 6/4/93 .\" $FreeBSD$ .\" -.Dd August 26, 2022 +.Dd August 30, 2022 .Dt SOCKET 2 .Os .Sh NAME @@ -60,6 +60,7 @@ PF_UNIX Host-internal protocols, PF_INET Internet version 4 protocols, PF_INET6 Internet version 6 protocols, +PF_DIVERT Firewall packet diversion/re-injection, PF_ROUTE Internal routing protocol, PF_KEY Internal key-management function, PF_NETGRAPH Netgraph sockets, @@ -283,6 +284,7 @@ .Xr accept 2 , .Xr bind 2 , .Xr connect 2 , +.Xr divert 4 , .Xr getpeername 2 , .Xr getsockname 2 , .Xr getsockopt 2 , diff --git a/share/examples/netgraph/ngctl b/share/examples/netgraph/ngctl --- a/share/examples/netgraph/ngctl +++ b/share/examples/netgraph/ngctl @@ -31,10 +31,10 @@ quit Exit program + -# Now let's create a ng_ksocket(4) node, in the family PF_INET, -# of type SOCK_RAW, and protocol IPPROTO_DIVERT: +# Now let's create a ng_ksocket(4) node, in the family PF_DIVERT, +# of type SOCK_RAW: - + mkpeer ksocket foo inet/raw/divert + + mkpeer ksocket foo divert/raw/0 # Note that ``foo'' is the hook name on the socket node, which can be # anything. The ``inet/raw/divert'' is the hook name on the ksocket diff --git a/share/man/man4/divert.4 b/share/man/man4/divert.4 --- a/share/man/man4/divert.4 +++ b/share/man/man4/divert.4 @@ -1,6 +1,6 @@ .\" $FreeBSD$ .\" -.Dd December 17, 2004 +.Dd August 30, 2022 .Dt DIVERT 4 .Os .Sh NAME @@ -11,7 +11,7 @@ .In sys/socket.h .In netinet/in.h .Ft int -.Fn socket PF_INET SOCK_RAW IPPROTO_DIVERT +.Fn socket PF_DIVERT SOCK_RAW 0 .Pp To enable support for divert sockets, place the following lines in the kernel configuration file: @@ -30,24 +30,30 @@ ipdivert_load="YES" .Ed .Sh DESCRIPTION -Divert sockets are similar to raw IP sockets, except that they -can be bound to a specific +Divert sockets allow to intercept and re-inject packets flowing through +the +.Xr ipfw 4 +firewall. +A divert socket can be bound to a specific .Nm port via the .Xr bind 2 system call. -The IP address in the bind is ignored; only the port -number is significant. +The sockaddr argument shall be sockaddr_in with sin_port set to the +desired value. +Note that the +.Nm +port has nothing to do with TCP/UDP ports. +It is just a cookie number, that allows to differentiate between different +divert points in the +.Xr ipfw 4 +ruleset. A divert socket bound to a divert port will receive all packets diverted -to that port by some (here unspecified) kernel mechanism(s). -Packets may also be written to a divert port, in which case they -re-enter kernel IP packet processing. +to that port by +.Xr ipfw 4 . +Packets may also be written to a divert port, in which case they re-enter +firewall processing at the next rule. .Pp -Divert sockets are normally used in conjunction with -.Fx Ns 's -packet filtering implementation and the -.Xr ipfw 8 -program. By reading from and writing to a divert socket, matching packets can be passed through an arbitrary ``filter'' as they travel through the host machine, special routing tricks can be done, etc. @@ -154,7 +160,9 @@ Otherwise, all header fields are unchanged (and therefore in network order). .Pp Binding to port numbers less than 1024 requires super-user access, as does -creating a socket of type SOCK_RAW. +creating a +.Nm +socket. .Sh ERRORS Writing to a divert socket can return these errors, along with the usual errors possible when writing raw packets: diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -509,6 +509,17 @@ struct socket *so; int error; + /* + * XXX: divert(4) historically abused PF_INET. Keep this compatibility + * shim until all applications have been updated. + */ + if (__predict_false(dom == PF_INET && type == SOCK_RAW && + proto == IPPROTO_DIVERT)) { + dom = PF_DIVERT; + printf("%s uses obsolete way to create divert(4) socket\n", + td->td_proc->p_comm); + } + if (proto) prp = pffindproto(dom, proto, type); else diff --git a/sys/netgraph/ng_ksocket.c b/sys/netgraph/ng_ksocket.c --- a/sys/netgraph/ng_ksocket.c +++ b/sys/netgraph/ng_ksocket.c @@ -121,6 +121,7 @@ { "inet", PF_INET }, { "inet6", PF_INET6 }, { "atm", PF_ATM }, + { "divert", PF_DIVERT }, { NULL, -1 }, }; @@ -147,7 +148,6 @@ { "ah", IPPROTO_AH, PF_INET }, { "swipe", IPPROTO_SWIPE, PF_INET }, { "encap", IPPROTO_ENCAP, PF_INET }, - { "divert", IPPROTO_DIVERT, PF_INET }, { "pim", IPPROTO_PIM, PF_INET }, { NULL, -1 }, }; diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c --- a/sys/netinet/in_mcast.c +++ b/sys/netinet/in_mcast.c @@ -1751,13 +1751,9 @@ INP_WLOCK(inp); imo = inp->inp_moptions; - /* - * If socket is neither of type SOCK_RAW or SOCK_DGRAM, - * or is a divert socket, reject it. - */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { + /* If socket is neither of type SOCK_RAW or SOCK_DGRAM reject it. */ + if (inp->inp_socket->so_proto->pr_type != SOCK_RAW && + inp->inp_socket->so_proto->pr_type != SOCK_DGRAM) { INP_WUNLOCK(inp); return (EOPNOTSUPP); } @@ -2717,13 +2713,9 @@ error = 0; - /* - * If socket is neither of type SOCK_RAW or SOCK_DGRAM, - * or is a divert socket, reject it. - */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) + /* If socket is neither of type SOCK_RAW or SOCK_DGRAM, reject it. */ + if (inp->inp_socket->so_proto->pr_type != SOCK_RAW && + inp->inp_socket->so_proto->pr_type != SOCK_DGRAM) return (EOPNOTSUPP); switch (sopt->sopt_name) { diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -36,7 +36,7 @@ #include "opt_inet6.h" #include "opt_sctp.h" #ifndef INET -#error "IPDIVERT requires INET" +#error "IPDIVERT requires INET" /* XXX! */ #endif #include @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include @@ -716,7 +717,6 @@ static struct protosw div_protosw = { .pr_type = SOCK_RAW, - .pr_protocol = IPPROTO_DIVERT, .pr_flags = PR_ATOMIC|PR_ADDR, .pr_attach = div_attach, .pr_bind = div_bind, @@ -729,6 +729,13 @@ .pr_sosetlabel = in_pcbsosetlabel }; +static struct domain divertdomain = { + .dom_family = PF_DIVERT, + .dom_name = "divert", + .dom_nprotosw = 1, + .dom_protosw = { &div_protosw }, +}; + static int div_modevent(module_t mod, int type, void *unused) { @@ -736,12 +743,7 @@ switch (type) { case MOD_LOAD: - /* - * Protocol will be initialized by pf_proto_register(). - */ - err = protosw_register(&inetdomain, &div_protosw); - if (err != 0) - return (err); + domain_add(&divertdomain); ip_divert_ptr = divert_packet; break; case MOD_QUIESCE: @@ -763,6 +765,9 @@ * XXXRW: Note that there is a slight race here, as a new * socket open request could be spinning on the lock and then * we destroy the lock. + * + * XXXGL: One more reason this code is incorrect is that it + * checks only the current vnet. */ INP_INFO_WLOCK(&V_divcbinfo); if (V_divcbinfo.ipi_count != 0) { @@ -771,7 +776,7 @@ break; } ip_divert_ptr = NULL; - err = protosw_unregister(&div_protosw); + domain_remove(&divertdomain); INP_INFO_WUNLOCK(&V_divcbinfo); #ifndef VIMAGE div_destroy(NULL); diff --git a/sys/netinet6/in6_mcast.c b/sys/netinet6/in6_mcast.c --- a/sys/netinet6/in6_mcast.c +++ b/sys/netinet6/in6_mcast.c @@ -1772,13 +1772,9 @@ INP_WLOCK(inp); im6o = inp->in6p_moptions; - /* - * If socket is neither of type SOCK_RAW or SOCK_DGRAM, - * or is a divert socket, reject it. - */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { + /* If socket is neither of type SOCK_RAW or SOCK_DGRAM, reject it. */ + if (inp->inp_socket->so_proto->pr_type != SOCK_RAW && + inp->inp_socket->so_proto->pr_type != SOCK_DGRAM) { INP_WUNLOCK(inp); return (EOPNOTSUPP); } @@ -2655,13 +2651,9 @@ error = 0; - /* - * If socket is neither of type SOCK_RAW or SOCK_DGRAM, - * or is a divert socket, reject it. - */ - if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || - (inp->inp_socket->so_proto->pr_type != SOCK_RAW && - inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) + /* If socket is neither of type SOCK_RAW or SOCK_DGRAM, reject it. */ + if (inp->inp_socket->so_proto->pr_type != SOCK_RAW && + inp->inp_socket->so_proto->pr_type != SOCK_DGRAM) return (EOPNOTSUPP); switch (sopt->sopt_name) { diff --git a/sys/sys/socket.h b/sys/sys/socket.h --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -268,7 +268,8 @@ #define AF_INET_SDP 40 /* OFED Socket Direct Protocol ipv4 */ #define AF_INET6_SDP 42 /* OFED Socket Direct Protocol ipv6 */ #define AF_HYPERV 43 /* HyperV sockets */ -#define AF_MAX 43 +#define AF_DIVERT 44 /* divert(4) */ +#define AF_MAX 44 /* * When allocating a new AF_ constant, please only allocate * even numbered constants for FreeBSD until 134 as odd numbered AF_ @@ -393,6 +394,7 @@ #define PF_NETLINK AF_NETLINK #define PF_INET_SDP AF_INET_SDP #define PF_INET6_SDP AF_INET6_SDP +#define PF_DIVERT AF_DIVERT #define PF_MAX AF_MAX diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c --- a/usr.bin/netstat/inet.c +++ b/usr.bin/netstat/inet.c @@ -109,15 +109,14 @@ case IPPROTO_UDP: mibvar = "net.inet.udp.pcblist"; break; - case IPPROTO_DIVERT: - mibvar = "net.inet.divert.pcblist"; - break; default: mibvar = "net.inet.raw.pcblist"; break; } if (strncmp(name, "sdp", 3) == 0) mibvar = "net.inet.sdp.pcblist"; + else if (strncmp(name, "divert", 6) == 0) + mibvar = "net.inet.divert.pcblist"; len = 0; if (sysctlbyname(mibvar, 0, &len, 0, 0) < 0) { if (errno != ENOENT) @@ -272,7 +271,7 @@ so = &inp->xi_socket; /* Ignore sockets for protocols other than the desired one. */ - if (so->xso_protocol != proto) + if (proto != 0 && so->xso_protocol != proto) continue; /* Ignore PCBs which were freed during copyout. */ diff --git a/usr.bin/netstat/main.c b/usr.bin/netstat/main.c --- a/usr.bin/netstat/main.c +++ b/usr.bin/netstat/main.c @@ -101,7 +101,7 @@ NULL, NULL, "sdp", 1, IPPROTO_TCP }, #endif { N_DIVCBINFO, -1, 1, protopr, - NULL, NULL, "divert", 1, IPPROTO_DIVERT }, + NULL, NULL, "divert", 1, 0 }, { N_RIPCBINFO, N_IPSTAT, 1, protopr, ip_stats, NULL, "ip", 1, IPPROTO_RAW }, { N_RIPCBINFO, N_ICMPSTAT, 1, protopr,