Index: sbin/ifconfig/ifgre.c =================================================================== --- sbin/ifconfig/ifgre.c +++ sbin/ifconfig/ifgre.c @@ -32,43 +32,43 @@ #include #include #include - -#include -#include - -#include #include #include -#include #include +#include #include -#include #include -#include +#include #include -#include #include "ifconfig.h" +#define GREBITS "\020\01ENABLE_CSUM\02ENABLE_SEQ" + static void gre_status(int s); static void gre_status(int s) { - int grekey = 0; + uint32_t opts = 0; - ifr.ifr_data = (caddr_t)&grekey; + ifr.ifr_data = (caddr_t)&opts; if (ioctl(s, GREGKEY, &ifr) == 0) - if (grekey != 0) - printf("\tgrekey: %d\n", grekey); + if (opts != 0) + printf("\tgrekey: 0x%x (%u)\n", opts, opts); + opts = 0; + if (ioctl(s, GREGOPTS, &ifr) != 0 || opts == 0) + return; + printb("\toptions", opts, GREBITS); + putchar('\n'); } static void setifgrekey(const char *val, int dummy __unused, int s, const struct afswtch *afp) { - uint32_t grekey = atol(val); + uint32_t grekey = strtol(val, NULL, 0); strncpy(ifr.ifr_name, name, sizeof (ifr.ifr_name)); ifr.ifr_data = (caddr_t)&grekey; @@ -76,8 +76,35 @@ warn("ioctl (set grekey)"); } +static void +setifgreopts(const char *val, int d, int s, const struct afswtch *afp) +{ + uint32_t opts; + + ifr.ifr_data = (caddr_t)&opts; + if (ioctl(s, GREGOPTS, &ifr) == -1) { + warn("ioctl(GREGOPTS)"); + return; + } + + if (d < 0) + opts &= ~(-d); + else + opts |= d; + + if (ioctl(s, GRESOPTS, &ifr) == -1) { + warn("ioctl(GIFSOPTS)"); + return; + } +} + + static struct cmd gre_cmds[] = { DEF_CMD_ARG("grekey", setifgrekey), + DEF_CMD("enable_csum", GRE_ENABLE_CSUM, setifgreopts), + DEF_CMD("-enable_csum",-GRE_ENABLE_CSUM,setifgreopts), + DEF_CMD("enable_seq", GRE_ENABLE_SEQ, setifgreopts), + DEF_CMD("-enable_seq",-GRE_ENABLE_SEQ, setifgreopts), }; static struct afswtch af_gre = { .af_name = "af_gre", Index: share/man/man4/Makefile =================================================================== --- share/man/man4/Makefile +++ share/man/man4/Makefile @@ -252,6 +252,7 @@ malo.4 \ mcd.4 \ md.4 \ + me.4 \ mem.4 \ meteor.4 \ mfi.4 \ Index: share/man/man4/gre.4 =================================================================== --- share/man/man4/gre.4 +++ share/man/man4/gre.4 @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd June 20, 2008 +.Dd October 24, 2014 .Dt GRE 4 .Os .Sh NAME @@ -68,162 +68,30 @@ .Cm destroy subcommands. .Pp -This driver currently supports the following modes of operation: -.Bl -tag -width indent -.It "GRE encapsulation (IP protocol number 47)" -Encapsulated datagrams are -prepended an outer datagram and a GRE header. +This driver corresponds to RFC 2784. +Encapsulated datagrams are prepended an outer datagram and a GRE header. The GRE header specifies the type of the encapsulated datagram and thus allows for tunneling other protocols than IP. GRE mode is also the default tunnel mode on Cisco routers. -This is also the default mode of operation of the -.Nm -interfaces. -As part of the GRE mode, .Nm also supports Cisco WCCP protocol, both version 1 and version 2. -Since there is no reliable way to distinguish between WCCP versions, it -should be configured manually using the -.Cm link2 -flag. -If the -.Cm link2 -flag is not set (default), then WCCP version 1 is selected. -.It "MOBILE encapsulation (IP protocol number 55)" -Datagrams are -encapsulated into IP, but with a shorter encapsulation. -The original -IP header is modified and the modifications are inserted between the -so modified header and the original payload. -Like -.Xr gif 4 , -only for IP-in-IP encapsulation. -.El .Pp The .Nm -interfaces support a number of -.Xr ioctl 2 Ns s , -such as: -.Bl -tag -width ".Dv GRESADDRS" -.It Dv GRESADDRS -Set the IP address of the local tunnel end. -This is the source address -set by or displayed by -.Xr ifconfig 8 -for the -.Nm -interface. -.It Dv GRESADDRD -Set the IP address of the remote tunnel end. -This is the destination address -set by or displayed by -.Xr ifconfig 8 -for the -.Nm -interface. -.It Dv GREGADDRS -Query the IP address that is set for the local tunnel end. -This is the -address the encapsulation header carries as local address (i.e., the real -address of the tunnel start point). -.It Dv GREGADDRD -Query the IP address that is set for the remote tunnel end. -This is the -address the encapsulated packets are sent to (i.e., the real address of -the remote tunnel endpoint). -.It Dv GRESPROTO -Set the operation mode to the specified IP protocol value. -The -protocol is passed to the interface in -.Po Vt "struct ifreq" Pc Ns Li -> Ns Va ifr_flags . -The operation mode can also be given as -.Pp -.Bl -tag -width ".Cm -link0" -compact -.It Cm link0 -.Dv IPPROTO_GRE -.It Cm -link0 -.Dv IPPROTO_MOBILE -.El -.Pp -to -.Xr ifconfig 8 . -.Pp -The -.Cm link1 -flag is not used to choose encapsulation, but to modify the -internal route search for the remote tunnel endpoint, see the -.Sx BUGS -section below. -.It Dv GREGPROTO -Query operation mode. -.It Dv GRESKEY +interfaces support a number of additional parameters to the +.Xr ifconfig 8 : +.Bl -tag -width "enable_csum" +.It Ar grekey Set the GRE key used for outgoing packets. A value of 0 disables the key option. -.It Dv GREGKEY -Get the GRE key currently used for outgoing packets. -0 means no outgoing key. +.It Ar enable_csum +Enables checksum calculation for outgoing packets. +.It Ar enable_seq +Enables use of sequence number field in the GRE header for outgoing packets. .El -.Pp -Note that the IP addresses of the tunnel endpoints may be the same as the -ones defined with -.Xr ifconfig 8 -for the interface (as if IP is encapsulated), but need not be. .Sh EXAMPLES -Configuration example: -.Bd -literal -Host X-- Host A ----------------tunnel---------- Cisco D------Host E - \\ | - \\ / - +------Host B----------Host C----------+ -.Ed .Pp -On host A -.Pq Fx : -.Bd -literal -offset indent -route add default B -ifconfig greN create -ifconfig greN A D netmask 0xffffffff linkX up -ifconfig greN tunnel A D -route add E D -.Ed -.Pp -On Host D (Cisco): -.Bd -literal -offset indent -Interface TunnelX - ip unnumbered D ! e.g. address from Ethernet interface - tunnel source D ! e.g. address from Ethernet interface - tunnel destination A -ip route C -ip route A mask C -ip route X mask tunnelX -.Ed -.Pp -OR -.Pp -On Host D -.Pq Fx : -.Bd -literal -offset indent -route add default C -ifconfig greN create -ifconfig greN D A -ifconfig greN tunnel D A -.Ed -.Pp -If all goes well, you should see packets flowing ;-) -.Pp -If you want to reach Host A over the tunnel (from Host D (Cisco)), then -you have to have an alias on Host A for e.g.\& the Ethernet interface like: -.Pp -.Dl "ifconfig alias Y" -.Pp -and on the Cisco: -.Pp -.Dl "ip route Y mask tunnelX" -.Pp -A similar setup can be used to create a link between two private networks -(for example in the 192.168 subnet) over the Internet: .Bd -literal 192.168.1.* --- Router A -------tunnel-------- Router B --- 192.168.2.* \\ / @@ -238,29 +106,22 @@ On router A: .Bd -literal -offset indent ifconfig greN create -ifconfig greN 192.168.1.1 192.168.2.1 link1 -ifconfig greN tunnel A B +ifconfig greN inet 192.168.1.1 192.168.2.1 +ifconfig greN inet tunnel A B route add -net 192.168.2 -netmask 255.255.255.0 192.168.2.1 .Ed .Pp On router B: .Bd -literal -offset indent ifconfig greN create -ifconfig greN 192.168.2.1 192.168.1.1 link1 -ifconfig greN tunnel B A +ifconfig greN inet 192.168.2.1 192.168.1.1 +ifconfig greN inet tunnel B A route add -net 192.168.1 -netmask 255.255.255.0 192.168.1.1 .Ed -.Pp -Note that this is a safe situation where the -.Cm link1 -flag (as discussed in the -.Sx BUGS -section below) may (and probably should) be set. .Sh NOTES The MTU of .Nm interfaces is set to 1476 by default, to match the value used by Cisco routers. -If grekey is set this is lowered to 1472. This may not be an optimal value, depending on the link between the two tunnel endpoints. It can be adjusted via @@ -268,25 +129,8 @@ .Pp For correct operation, the .Nm -device needs a route to the destination that is less specific than the -one over the tunnel. -(Basically, there needs to be a route to the decapsulating host that -does not run over the tunnel, as this would be a loop.) -If the addresses are ambiguous, doing the -.Nm ifconfig Cm tunnel -step before the -.Xr ifconfig 8 -call to set the -.Nm -IP addresses will help to find a route outside the tunnel. -.Pp -In order to tell -.Xr ifconfig 8 -to actually mark the interface as -.Dq up , -the keyword -.Cm up -must be given last on its command line. +device needs a route to the decapsulating host that does not run over the tunnel, +as this would be a loop. .Pp The kernel must be set to forward datagrams by setting the .Va net.inet.ip.forwarding @@ -296,41 +140,20 @@ .Xr gif 4 , .Xr inet 4 , .Xr ip 4 , +.Xr me 4 , .Xr netintro 4 , -.\" Xr options 4 , .Xr protocols 5 , .Xr ifconfig 8 , .Xr sysctl 8 .Pp -A description of GRE encapsulation can be found in RFC 1701 and RFC 1702. -.Pp -A description of MOBILE encapsulation can be found in RFC 2004. +A description of GRE encapsulation can be found in RFC 2784 and RFC 2890. .Sh AUTHORS +.An Andrey V. Elsukov Aq Mt ae@FreeBSD.org .An Heiko W.Rupp Aq Mt hwr@pilhuhn.de .Sh BUGS -The -.Fn compute_route -code in -.Pa if_gre.c -toggles the last bit of the -IP-address to provoke the search for a less specific route than the -one directly over the tunnel to prevent loops. -This is possibly not the best solution. -.Pp -To avoid the address munging described above, turn on the -.Cm link1 -flag on the -.Xr ifconfig 8 -command line. -This implies that the GRE packet destination and the ifconfig remote host -are not the same IP addresses, and that the GRE destination does not route -over the -.Nm -interface itself. .Pp The current implementation uses the key only for outgoing packets. Incoming packets with a different key or without a key will be treated as if they would belong to this interface. .Pp -RFC1701 is not fully supported, however all unsupported features have been -deprecated in RFC2784. +The sequence number field also used only for outgoing packets. Index: share/man/man4/me.4 =================================================================== --- /dev/null +++ share/man/man4/me.4 @@ -0,0 +1,85 @@ +.\" Copyright (c) Andrey V. Elsukov +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd October 29, 2014 +.Dt ME 4 +.Os +.Sh NAME +.Nm me +.Nd encapsulating network device +.Sh SYNOPSIS +To compile the +driver into the kernel, place the following line in the kernel +configuration file: +.Bd -ragged -offset indent +.Cd "device me" +.Ed +.Pp +Alternatively, to load the +driver as a module at boot time, place the following line in +.Xr loader.conf 5 : +.Bd -literal -offset indent +if_me_load="YES" +.Ed +.Sh DESCRIPTION +The +.Nm +network interface pseudo device encapsulates datagrams +into IP. +These encapsulated datagrams are routed to a destination host, +where they are decapsulated and further routed to their final destination. +.Pp +.Nm +interfaces are dynamically created and destroyed with the +.Xr ifconfig 8 +.Cm create +and +.Cm destroy +subcommands. +.Pp +This driver corresponds to RFC 2004. +Datagrams are encapsulated into IP with a shorter encapsulation. +The original +IP header is modified and the modifications are inserted between the +so modified header and the original payload. +The protocol number 55 is used for outer header. +.Sh NOTES +.Pp +For correct operation, the +.Nm +device needs a route to the decapsulating host that does not run over the tunnel, +as this would be a loop. +.Sh SEE ALSO +.Xr gif 4 , +.Xr gre 4 , +.Xr inet 4 , +.Xr ip 4 , +.Xr netintro 4 , +.Xr protocols 5 , +.Xr ifconfig 8 , +.Xr sysctl 8 +.Sh AUTHORS +.An Andrey V. Elsukov Aq Mt ae@FreeBSD.org Index: sys/conf/NOTES =================================================================== --- sys/conf/NOTES +++ sys/conf/NOTES @@ -879,12 +879,15 @@ # The `gif' device implements IPv6 over IP4 tunneling, # IPv4 over IPv6 tunneling, IPv4 over IPv4 tunneling and # IPv6 over IPv6 tunneling. -# The `gre' device implements two types of IP4 over IP4 tunneling: -# GRE and MOBILE, as specified in the RFC1701 and RFC2004. +# The `gre' device implements GRE (Generic Routing Encapsulation) tunneling, +# as specified in the RFC 2784 and RFC 2890. +# The `me' device implements Minimal Encapsulation within IPv4 as +# specified in the RFC 2004. # The XBONEHACK option allows the same pair of addresses to be configured on # multiple gif interfaces. device gif device gre +device me options XBONEHACK # The `faith' device captures packets sent to it and diverts them Index: sys/conf/files =================================================================== --- sys/conf/files +++ sys/conf/files @@ -3234,11 +3234,12 @@ net/if_fwsubr.c optional fwip net/if_gif.c optional gif inet | gif inet6 | \ netgraph_gif inet | netgraph_gif inet6 -net/if_gre.c optional gre inet +net/if_gre.c optional gre inet | gre inet6 net/if_iso88025subr.c optional token net/if_lagg.c optional lagg net/if_loop.c optional loop net/if_llatbl.c standard +net/if_me.c optional me inet net/if_media.c standard net/if_mib.c standard net/if_spppfr.c optional sppp | netgraph_sppp @@ -3473,6 +3474,7 @@ netinet6/in6_rmx.c optional inet6 netinet6/in6_src.c optional inet6 netinet6/ip6_forward.c optional inet6 +netinet6/ip6_gre.c optional gre inet6 netinet6/ip6_id.c optional inet6 netinet6/ip6_input.c optional inet6 netinet6/ip6_mroute.c optional mrouting inet6 Index: sys/modules/Makefile =================================================================== --- sys/modules/Makefile +++ sys/modules/Makefile @@ -146,8 +146,9 @@ if_edsc \ if_epair \ if_faith \ - if_gif \ + ${_if_gif} \ ${_if_gre} \ + ${_if_me} \ if_lagg \ ${_if_ndis} \ if_stf \ @@ -403,10 +404,12 @@ defined(ALL_MODULES) _carp= carp _toecore= toecore +_if_gif= if_gif +_if_gre= if_gre .endif .if ${MK_INET_SUPPORT} != "no" || defined(ALL_MODULES) -_if_gre= if_gre +_if_me= if_me _ipdivert= ipdivert _ipfw= ipfw .endif Index: sys/modules/if_gre/Makefile =================================================================== --- sys/modules/if_gre/Makefile +++ sys/modules/if_gre/Makefile @@ -1,8 +1,17 @@ # $FreeBSD$ .PATH: ${.CURDIR}/../../net ${.CURDIR}/../../netinet ${.CURDIR}/../../netinet6 +.include "${.CURDIR}/../../conf/kern.opts.mk" KMOD= if_gre -SRCS= if_gre.c ip_gre.c opt_inet.h opt_inet6.h +SRCS= if_gre.c opt_inet.h opt_inet6.h + +.if ${MK_INET_SUPPORT} != "no" +SRCS+= ip_gre.c +.endif + +.if ${MK_INET6_SUPPORT} != "no" +SRCS+= ip6_gre.c +.endif .include Index: sys/modules/if_me/Makefile =================================================================== --- /dev/null +++ sys/modules/if_me/Makefile @@ -0,0 +1,8 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../net + +KMOD= if_me +SRCS= if_me.c + +.include Index: sys/net/if_gre.h =================================================================== --- sys/net/if_gre.h +++ sys/net/if_gre.h @@ -1,8 +1,6 @@ -/* $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ */ -/* $FreeBSD$ */ - /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. + * Copyright (c) 2014 Andrey V. Elsukov * All rights reserved * * This code is derived from software contributed to The NetBSD Foundation @@ -28,166 +26,102 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. + * + * $NetBSD: if_gre.h,v 1.13 2003/11/10 08:51:52 wiz Exp $ + * $FreeBSD$ */ -#ifndef _NET_IF_GRE_H -#define _NET_IF_GRE_H +#ifndef _NET_IF_GRE_H_ +#define _NET_IF_GRE_H_ -#include #ifdef _KERNEL -#include - -/* - * Version of the WCCP, need to be configured manually since - * header for version 2 is the same but IP payload is prepended - * with additional 4-bytes field. - */ -typedef enum { - WCCP_V1 = 0, - WCCP_V2 -} wccp_ver_t; - -struct gre_softc { - struct ifnet *sc_ifp; - LIST_ENTRY(gre_softc) sc_list; - int gre_unit; - int gre_flags; - u_int gre_fibnum; /* use this fib for envelopes */ - struct in_addr g_src; /* source address of gre packets */ - struct in_addr g_dst; /* destination address of gre packets */ - struct route route; /* routing entry that determines, where a - encapsulated packet should go */ - u_char g_proto; /* protocol of encapsulator */ - - const struct encaptab *encap; /* encapsulation cookie */ - - uint32_t key; /* key included in outgoing GRE packets */ - /* zero means none */ - - wccp_ver_t wccp_ver; /* version of the WCCP */ -}; -#define GRE2IFP(sc) ((sc)->sc_ifp) - - -struct gre_h { - u_int16_t flags; /* GRE flags */ - u_int16_t ptype; /* protocol type of payload typically - Ether protocol type*/ - uint32_t options[0]; /* optional options */ -/* - * from here on: fields are optional, presence indicated by flags - * - u_int_16 checksum checksum (one-complements of GRE header - and payload - Present if (ck_pres | rt_pres == 1). - Valid if (ck_pres == 1). - u_int_16 offset offset from start of routing filed to - first octet of active SRE (see below). - Present if (ck_pres | rt_pres == 1). - Valid if (rt_pres == 1). - u_int_32 key inserted by encapsulator e.g. for - authentication - Present if (key_pres ==1 ). - u_int_32 seq_num Sequence number to allow for packet order - Present if (seq_pres ==1 ). - struct gre_sre[] routing Routing fileds (see below) - Present if (rt_pres == 1) - */ +/* GRE header according to RFC 2784 and RFC 2890 */ +struct grehdr { + uint16_t gre_flags; /* GRE flags */ +#define GRE_FLAGS_CP 0x8000 /* checksum present */ +#define GRE_FLAGS_KP 0x2000 /* key present */ +#define GRE_FLAGS_SP 0x1000 /* sequence present */ +#define GRE_FLAGS_MASK (GRE_FLAGS_CP|GRE_FLAGS_KP|GRE_FLAGS_SP) + uint16_t gre_proto; /* protocol type */ + uint32_t gre_opts[0]; /* optional fields */ } __packed; +#ifdef INET struct greip { - struct ip gi_i; - struct gre_h gi_g; + struct ip gi_ip; + struct grehdr gi_gre; } __packed; +#endif -#define gi_pr gi_i.ip_p -#define gi_len gi_i.ip_len -#define gi_src gi_i.ip_src -#define gi_dst gi_i.ip_dst -#define gi_ptype gi_g.ptype -#define gi_flags gi_g.flags -#define gi_options gi_g.options - -#define GRE_CP 0x8000 /* Checksum Present */ -#define GRE_RP 0x4000 /* Routing Present */ -#define GRE_KP 0x2000 /* Key Present */ -#define GRE_SP 0x1000 /* Sequence Present */ -#define GRE_SS 0x0800 /* Strict Source Route */ +#ifdef INET6 +struct greip6 { + struct ip6_hdr gi6_ip6; + struct grehdr gi6_gre; +} __packed; +#endif + +struct gre_softc { + struct ifnet *gre_ifp; + LIST_ENTRY(gre_softc) gre_list; + struct rmlock gre_lock; + int gre_family; /* AF of delivery header */ + uint32_t gre_iseq; + uint32_t gre_oseq; + uint32_t gre_key; + uint32_t gre_options; + uint32_t gre_mtu; + u_int gre_fibnum; + u_int gre_hlen; /* header size */ + union { + void *hdr; +#ifdef INET + struct greip *gihdr; +#endif +#ifdef INET6 + struct greip6 *gi6hdr; +#endif + } gre_uhdr; + const struct encaptab *gre_ecookie; +}; +#define GRE2IFP(sc) ((sc)->gre_ifp) +#define GRE_LOCK_INIT(sc) rm_init(&(sc)->gre_lock, "gre softc") +#define GRE_LOCK_DESTROY(sc) rm_destroy(&(sc)->gre_lock) +#define GRE_RLOCK_TRACKER struct rm_priotracker gre_tracker +#define GRE_RLOCK(sc) rm_rlock(&(sc)->gre_lock, &gre_tracker) +#define GRE_RUNLOCK(sc) rm_runlock(&(sc)->gre_lock, &gre_tracker) +#define GRE_RLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_RLOCKED) +#define GRE_WLOCK(sc) rm_wlock(&(sc)->gre_lock) +#define GRE_WUNLOCK(sc) rm_wunlock(&(sc)->gre_lock) +#define GRE_WLOCK_ASSERT(sc) rm_assert(&(sc)->gre_lock, RA_WLOCKED) + +#define gre_hdr gre_uhdr.hdr +#define gre_gihdr gre_uhdr.gihdr +#define gre_gi6hdr gre_uhdr.gi6hdr +#define gre_oip gre_gihdr->gi_ip +#define gre_oip6 gre_gi6hdr->gi6_ip6 /* * CISCO uses special type for GRE tunnel created as part of WCCP * connection, while in fact those packets are just IPv4 encapsulated * into GRE. */ -#define WCCP_PROTOCOL_TYPE 0x883E - -/* - * gre_sre defines a Source route Entry. These are needed if packets - * should be routed over more than one tunnel hop by hop - */ -struct gre_sre { - u_int16_t sre_family; /* address family */ - u_char sre_offset; /* offset to first octet of active entry */ - u_char sre_length; /* number of octets in the SRE. - sre_lengthl==0 -> last entry. */ - u_char *sre_rtinfo; /* the routing information */ -}; - -struct greioctl { - int unit; - struct in_addr addr; -}; - -/* for mobile encaps */ - -struct mobile_h { - u_int16_t proto; /* protocol and S-bit */ - u_int16_t hcrc; /* header checksum */ - u_int32_t odst; /* original destination address */ - u_int32_t osrc; /* original source addr, if S-bit set */ -} __packed; - -struct mobip_h { - struct ip mi; - struct mobile_h mh; -} __packed; - - -#define MOB_H_SIZ_S (sizeof(struct mobile_h) - sizeof(u_int32_t)) -#define MOB_H_SIZ_L (sizeof(struct mobile_h)) -#define MOB_H_SBIT 0x0080 - -#define GRE_TTL 30 - +#define ETHERTYPE_WCCP 0x883E #endif /* _KERNEL */ -/* - * ioctls needed to manipulate the interface - */ - #define GRESADDRS _IOW('i', 101, struct ifreq) #define GRESADDRD _IOW('i', 102, struct ifreq) #define GREGADDRS _IOWR('i', 103, struct ifreq) #define GREGADDRD _IOWR('i', 104, struct ifreq) #define GRESPROTO _IOW('i' , 105, struct ifreq) #define GREGPROTO _IOWR('i', 106, struct ifreq) -#define GREGKEY _IOWR('i', 107, struct ifreq) -#define GRESKEY _IOW('i', 108, struct ifreq) -#ifdef _KERNEL -LIST_HEAD(gre_softc_head, gre_softc); -VNET_DECLARE(struct gre_softc_head, gre_softc_list); -#define V_gre_softc_list VNET(gre_softc_list) - -VNET_DECLARE(struct mtx, gre_mtx); -#define V_gre_mtx VNET(gre_mtx) -#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \ - MTX_DEF) -#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx) -#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx) -#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx) +#define GREGKEY _IOWR('i', 107, struct ifreq) +#define GRESKEY _IOW('i', 108, struct ifreq) +#define GREGOPTS _IOWR('i', 109, struct ifreq) +#define GRESOPTS _IOW('i', 110, struct ifreq) + +#define GRE_ENABLE_CSUM 0x0001 +#define GRE_ENABLE_SEQ 0x0002 +#define GRE_OPTMASK (GRE_ENABLE_CSUM|GRE_ENABLE_SEQ) -u_int16_t gre_in_cksum(u_int16_t *, u_int); -#endif /* _KERNEL */ - -#endif +#endif /* _NET_IF_GRE_H_ */ Index: sys/net/if_gre.c =================================================================== --- sys/net/if_gre.c +++ sys/net/if_gre.c @@ -1,8 +1,6 @@ -/* $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */ -/* $FreeBSD$ */ - /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. + * Copyright (c) 2014 Andrey V. Elsukov * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -30,16 +28,12 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. + * + * $NetBSD: if_gre.c,v 1.49 2003/12/11 00:22:29 itojun Exp $ */ -/* - * Encapsulate L3 protocols into IP - * See RFC 2784 (successor of RFC 1701 and 1702) for more details. - * If_gre is compatible with Cisco GRE tunnels, so you can - * have a NetBSD box as the other end of a tunnel interface of a Cisco - * router. See gre(4) for more details. - * Also supported: IP in IP encaps (proto 55) as of RFC 2004 - */ +#include +__FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" @@ -47,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -54,9 +49,12 @@ #include #include #include +#include #include #include +#include #include +#include #include #include @@ -64,85 +62,71 @@ #include #include #include -#include +#include #include -#ifdef INET #include +#ifdef INET #include #include #include -#include #include -#include -#else -#error "Huh? if_gre without inet?" #endif -#include +#ifdef INET6 +#include +#include +#include +#include +#endif +#include +#include #include -/* - * It is not easy to calculate the right value for a GRE MTU. - * We leave this task to the admin and use the same default that - * other vendors use. - */ -#define GREMTU 1476 - -#define MTAG_COOKIE_GRE 1307983903 -#define MTAG_GRE_NESTING 1 -struct mtag_gre_nesting { - uint16_t count; - uint16_t max; - struct ifnet *ifp[]; -}; - -/* - * gre_mtx protects all global variables in if_gre.c. - * XXX: gre_softc data not protected yet. - */ -VNET_DEFINE(struct mtx, gre_mtx); -VNET_DEFINE(struct gre_softc_head, gre_softc_list); +#include +#include +#define GREMTU 1500 static const char grename[] = "gre"; static MALLOC_DEFINE(M_GRE, grename, "Generic Routing Encapsulation"); +static VNET_DEFINE(struct mtx, gre_mtx); +#define V_gre_mtx VNET(gre_mtx) +#define GRE_LIST_LOCK_INIT(x) mtx_init(&V_gre_mtx, "gre_mtx", NULL, \ + MTX_DEF) +#define GRE_LIST_LOCK_DESTROY(x) mtx_destroy(&V_gre_mtx) +#define GRE_LIST_LOCK(x) mtx_lock(&V_gre_mtx) +#define GRE_LIST_UNLOCK(x) mtx_unlock(&V_gre_mtx) + +static VNET_DEFINE(LIST_HEAD(, gre_softc), gre_softc_list); +#define V_gre_softc_list VNET(gre_softc_list) +static struct sx gre_ioctl_sx; +SX_SYSINIT(gre_ioctl_sx, &gre_ioctl_sx, "gre_ioctl"); static int gre_clone_create(struct if_clone *, int, caddr_t); static void gre_clone_destroy(struct ifnet *); static VNET_DEFINE(struct if_clone *, gre_cloner); #define V_gre_cloner VNET(gre_cloner) +static void gre_qflush(struct ifnet *); +static int gre_transmit(struct ifnet *, struct mbuf *); static int gre_ioctl(struct ifnet *, u_long, caddr_t); static int gre_output(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); -static int gre_compute_route(struct gre_softc *sc); +static void gre_updatehdr(struct gre_softc *); +static int gre_set_tunnel(struct ifnet *, struct sockaddr *, + struct sockaddr *); +static void gre_delete_tunnel(struct ifnet *); +int gre_input(struct mbuf **, int *, int); #ifdef INET -extern struct domain inetdomain; -static const struct protosw in_gre_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_GRE, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = gre_input, - .pr_output = rip_output, - .pr_ctlinput = rip_ctlinput, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs -}; -static const struct protosw in_mobile_protosw = { - .pr_type = SOCK_RAW, - .pr_domain = &inetdomain, - .pr_protocol = IPPROTO_MOBILE, - .pr_flags = PR_ATOMIC|PR_ADDR, - .pr_input = gre_mobile_input, - .pr_output = rip_output, - .pr_ctlinput = rip_ctlinput, - .pr_ctloutput = rip_ctloutput, - .pr_usrreqs = &rip_usrreqs -}; +extern int in_gre_attach(struct gre_softc *); +extern int in_gre_output(struct mbuf *, int, int); +#endif +#ifdef INET6 +extern int in6_gre_attach(struct gre_softc *); +extern int in6_gre_output(struct mbuf *, int, int); #endif SYSCTL_DECL(_net_link); @@ -159,6 +143,7 @@ */ #define MAX_GRE_NEST 1 #endif + static VNET_DEFINE(int, max_gre_nesting) = MAX_GRE_NEST; #define V_max_gre_nesting VNET(max_gre_nesting) SYSCTL_INT(_net_link_gre, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, @@ -191,34 +176,22 @@ struct gre_softc *sc; sc = malloc(sizeof(struct gre_softc), M_GRE, M_WAITOK | M_ZERO); - + sc->gre_fibnum = curthread->td_proc->p_fibnum; GRE2IFP(sc) = if_alloc(IFT_TUNNEL); - if (GRE2IFP(sc) == NULL) { - free(sc, M_GRE); - return (ENOSPC); - } - + GRE_LOCK_INIT(sc); GRE2IFP(sc)->if_softc = sc; if_initname(GRE2IFP(sc), grename, unit); - GRE2IFP(sc)->if_snd.ifq_maxlen = ifqmaxlen; - GRE2IFP(sc)->if_addrlen = 0; - GRE2IFP(sc)->if_hdrlen = 24; /* IP + GRE */ - GRE2IFP(sc)->if_mtu = GREMTU; + GRE2IFP(sc)->if_mtu = sc->gre_mtu = GREMTU; GRE2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; GRE2IFP(sc)->if_output = gre_output; GRE2IFP(sc)->if_ioctl = gre_ioctl; - sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY; - sc->g_proto = IPPROTO_GRE; - GRE2IFP(sc)->if_flags |= IFF_LINK0; - sc->encap = NULL; - sc->gre_fibnum = curthread->td_proc->p_fibnum; - sc->wccp_ver = WCCP_V1; - sc->key = 0; + GRE2IFP(sc)->if_transmit = gre_transmit; + GRE2IFP(sc)->if_qflush = gre_qflush; if_attach(GRE2IFP(sc)); bpfattach(GRE2IFP(sc), DLT_NULL, sizeof(u_int32_t)); GRE_LIST_LOCK(); - LIST_INSERT_HEAD(&V_gre_softc_list, sc, sc_list); + LIST_INSERT_HEAD(&V_gre_softc_list, sc, gre_list); GRE_LIST_UNLOCK(); return (0); } @@ -226,687 +199,749 @@ static void gre_clone_destroy(struct ifnet *ifp) { - struct gre_softc *sc = ifp->if_softc; + struct gre_softc *sc; + sx_xlock(&gre_ioctl_sx); + sc = ifp->if_softc; + gre_delete_tunnel(ifp); GRE_LIST_LOCK(); - LIST_REMOVE(sc, sc_list); + LIST_REMOVE(sc, gre_list); GRE_LIST_UNLOCK(); - -#ifdef INET - if (sc->encap != NULL) - encap_detach(sc->encap); -#endif bpfdetach(ifp); if_detach(ifp); + ifp->if_softc = NULL; + sx_xunlock(&gre_ioctl_sx); + if_free(ifp); + GRE_LOCK_DESTROY(sc); free(sc, M_GRE); } -/* - * The output routine. Takes a packet and encapsulates it in the protocol - * given by sc->g_proto. See also RFC 1701 and RFC 2004 - */ static int -gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, - struct route *ro) +gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { - int error = 0; - struct gre_softc *sc = ifp->if_softc; - struct greip *gh; - struct ip *ip; - struct m_tag *mtag; - struct mtag_gre_nesting *gt; - size_t len; - u_short gre_ip_id = 0; - uint8_t gre_ip_tos = 0; - u_int16_t etype = 0; - struct mobile_h mob_h; - u_int32_t af; - int extra = 0, max; - - /* - * gre may cause infinite recursion calls when misconfigured. High - * nesting level may cause stack exhaustion. We'll prevent this by - * detecting loops and by introducing upper limit. - */ - mtag = m_tag_locate(m, MTAG_COOKIE_GRE, MTAG_GRE_NESTING, NULL); - if (mtag != NULL) { - struct ifnet **ifp2; - - gt = (struct mtag_gre_nesting *)(mtag + 1); - gt->count++; - if (gt->count > min(gt->max, V_max_gre_nesting)) { - printf("%s: hit maximum recursion limit %u on %s\n", - __func__, gt->count - 1, ifp->if_xname); - m_freem(m); - error = EIO; /* is there better errno? */ - goto end; - } - - ifp2 = gt->ifp; - for (max = gt->count - 1; max > 0; max--) { - if (*ifp2 == ifp) - break; - ifp2++; - } - if (*ifp2 == ifp) { - printf("%s: detected loop with nexting %u on %s\n", - __func__, gt->count-1, ifp->if_xname); - m_freem(m); - error = EIO; /* is there better errno? */ - goto end; - } - *ifp2 = ifp; + GRE_RLOCK_TRACKER; + struct ifreq *ifr = (struct ifreq *)data; + struct sockaddr *src, *dst; + struct gre_softc *sc; +#ifdef INET + struct sockaddr_in *sin = NULL; +#endif +#ifdef INET6 + struct sockaddr_in6 *sin6 = NULL; +#endif + uint32_t opt; + int error; - } else { - /* - * Given that people should NOT increase max_gre_nesting beyond - * their real needs, we allocate once per packet rather than - * allocating an mtag once per passing through gre. - * - * Note: the sysctl does not actually check for saneness, so we - * limit the maximum numbers of possible recursions here. - */ - max = imin(V_max_gre_nesting, 256); - /* If someone sets the sysctl <= 0, we want at least 1. */ - max = imax(max, 1); - len = sizeof(struct mtag_gre_nesting) + - max * sizeof(struct ifnet *); - mtag = m_tag_alloc(MTAG_COOKIE_GRE, MTAG_GRE_NESTING, len, - M_NOWAIT); - if (mtag == NULL) { - m_freem(m); - error = ENOMEM; - goto end; - } - gt = (struct mtag_gre_nesting *)(mtag + 1); - bzero(gt, len); - gt->count = 1; - gt->max = max; - *gt->ifp = ifp; - m_tag_prepend(m, mtag); + switch (cmd) { + case SIOCSIFMTU: + /* XXX: */ + if (ifr->ifr_mtu < 576) + return (EINVAL); + break; + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + case SIOCSIFFLAGS: + case SIOCADDMULTI: + case SIOCDELMULTI: + return (0); + case GRESADDRS: + case GRESADDRD: + case GREGADDRS: + case GREGADDRD: + case GRESPROTO: + case GREGPROTO: + return (EOPNOTSUPP); } - - if (!((ifp->if_flags & IFF_UP) && - (ifp->if_drv_flags & IFF_DRV_RUNNING)) || - sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) { - m_freem(m); - error = ENETDOWN; + src = dst = NULL; + sx_xlock(&gre_ioctl_sx); + sc = ifp->if_softc; + if (sc == NULL) { + error = ENXIO; goto end; } - - gh = NULL; - ip = NULL; - - /* BPF writes need to be handled specially. */ - if (dst->sa_family == AF_UNSPEC) - bcopy(dst->sa_data, &af, sizeof(af)); - else - af = dst->sa_family; - - if (bpf_peers_present(ifp->if_bpf)) - bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m); - - if ((ifp->if_flags & IFF_MONITOR) != 0) { - m_freem(m); - error = ENETDOWN; + error = 0; + switch (cmd) { + case SIOCSIFMTU: + GRE_WLOCK(sc); + sc->gre_mtu = ifr->ifr_mtu; + gre_updatehdr(sc); + GRE_WUNLOCK(sc); goto end; - } - - m->m_flags &= ~(M_BCAST|M_MCAST); - - if (sc->g_proto == IPPROTO_MOBILE) { - if (af == AF_INET) { - struct mbuf *m0; - int msiz; - - ip = mtod(m, struct ip *); - - /* - * RFC2004 specifies that fragmented diagrams shouldn't - * be encapsulated. - */ - if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - m_freem(m); - error = EINVAL; /* is there better errno? */ - goto end; - } - memset(&mob_h, 0, MOB_H_SIZ_L); - mob_h.proto = (ip->ip_p) << 8; - mob_h.odst = ip->ip_dst.s_addr; - ip->ip_dst.s_addr = sc->g_dst.s_addr; - - /* - * If the packet comes from our host, we only change - * the destination address in the IP header. - * Else we also need to save and change the source - */ - if (in_hosteq(ip->ip_src, sc->g_src)) { - msiz = MOB_H_SIZ_S; - } else { - mob_h.proto |= MOB_H_SBIT; - mob_h.osrc = ip->ip_src.s_addr; - ip->ip_src.s_addr = sc->g_src.s_addr; - msiz = MOB_H_SIZ_L; - } - mob_h.proto = htons(mob_h.proto); - mob_h.hcrc = gre_in_cksum((u_int16_t *)&mob_h, msiz); - - if ((m->m_data - msiz) < m->m_pktdat) { - m0 = m_gethdr(M_NOWAIT, MT_DATA); - if (m0 == NULL) { - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - m_freem(m); - error = ENOBUFS; - goto end; - } - m0->m_next = m; - m->m_data += sizeof(struct ip); - m->m_len -= sizeof(struct ip); - m0->m_pkthdr.len = m->m_pkthdr.len + msiz; - m0->m_len = msiz + sizeof(struct ip); - m0->m_data += max_linkhdr; - memcpy(mtod(m0, caddr_t), (caddr_t)ip, - sizeof(struct ip)); - m = m0; - } else { /* we have some space left in the old one */ - m->m_data -= msiz; - m->m_len += msiz; - m->m_pkthdr.len += msiz; - bcopy(ip, mtod(m, caddr_t), - sizeof(struct ip)); - } - ip = mtod(m, struct ip *); - memcpy((caddr_t)(ip + 1), &mob_h, (unsigned)msiz); - ip->ip_len = htons(ntohs(ip->ip_len) + msiz); - } else { /* AF_INET */ - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - m_freem(m); - error = EINVAL; - goto end; - } - } else if (sc->g_proto == IPPROTO_GRE) { - switch (af) { - case AF_INET: - ip = mtod(m, struct ip *); - gre_ip_tos = ip->ip_tos; - gre_ip_id = ip->ip_id; - if (sc->wccp_ver == WCCP_V2) { - extra = sizeof(uint32_t); - etype = WCCP_PROTOCOL_TYPE; - } else { - etype = ETHERTYPE_IP; - } + case SIOCSIFPHYADDR: +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: +#endif + error = EINVAL; + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + src = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in_aliasreq *)data)->ifra_dstaddr); break; +#endif #ifdef INET6 - case AF_INET6: - gre_ip_id = ip_newid(); - etype = ETHERTYPE_IPV6; + case SIOCSIFPHYADDR_IN6: + src = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr *) + &(((struct in6_aliasreq *)data)->ifra_dstaddr); break; #endif default: - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - m_freem(m); error = EAFNOSUPPORT; goto end; } - - /* Reserve space for GRE header + optional GRE key */ - int hdrlen = sizeof(struct greip) + extra; - if (sc->key) - hdrlen += sizeof(uint32_t); - M_PREPEND(m, hdrlen, M_NOWAIT); - } else { - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - m_freem(m); - error = EINVAL; - goto end; - } - - if (m == NULL) { /* mbuf allocation failed */ - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - error = ENOBUFS; - goto end; - } - - M_SETFIB(m, sc->gre_fibnum); /* The envelope may use a different FIB */ - - gh = mtod(m, struct greip *); - if (sc->g_proto == IPPROTO_GRE) { - uint32_t *options = gh->gi_options; - - memset((void *)gh, 0, sizeof(struct greip) + extra); - gh->gi_ptype = htons(etype); - gh->gi_flags = 0; - - /* Add key option */ - if (sc->key) - { - gh->gi_flags |= htons(GRE_KP); - *(options++) = htonl(sc->key); - } - } - - gh->gi_pr = sc->g_proto; - if (sc->g_proto != IPPROTO_MOBILE) { - gh->gi_src = sc->g_src; - gh->gi_dst = sc->g_dst; - ((struct ip*)gh)->ip_v = IPPROTO_IPV4; - ((struct ip*)gh)->ip_hl = (sizeof(struct ip)) >> 2; - ((struct ip*)gh)->ip_ttl = GRE_TTL; - ((struct ip*)gh)->ip_tos = gre_ip_tos; - ((struct ip*)gh)->ip_id = gre_ip_id; - gh->gi_len = htons(m->m_pkthdr.len); - } - - if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); - if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); - /* - * Send it off and with IP_FORWARD flag to prevent it from - * overwriting the ip_id again. ip_id is already set to the - * ip_id of the encapsulated packet. - */ - error = ip_output(m, NULL, &sc->route, IP_FORWARDING, - (struct ip_moptions *)NULL, (struct inpcb *)NULL); - end: - if (error) - if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); - return (error); -} - -static int -gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) -{ - struct ifreq *ifr = (struct ifreq *)data; - struct in_aliasreq *aifr = (struct in_aliasreq *)data; - struct gre_softc *sc = ifp->if_softc; - struct sockaddr_in si; - struct sockaddr *sa = NULL; - int error, adj; - struct sockaddr_in sp, sm, dp, dm; - uint32_t key; - - error = 0; - adj = 0; + /* sa_family must be equal */ + if (src->sa_family != dst->sa_family || + src->sa_len != dst->sa_len) + goto end; - switch (cmd) { - case SIOCSIFADDR: - ifp->if_flags |= IFF_UP; - break; - case SIOCSIFFLAGS: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_SETIFFLAGS)) != 0) - break; - if ((ifr->ifr_flags & IFF_LINK0) != 0) - sc->g_proto = IPPROTO_GRE; - else - sc->g_proto = IPPROTO_MOBILE; - if ((ifr->ifr_flags & IFF_LINK2) != 0) - sc->wccp_ver = WCCP_V2; - else - sc->wccp_ver = WCCP_V1; - goto recompute; - case SIOCSIFMTU: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_SETIFMTU)) != 0) - break; - if (ifr->ifr_mtu < 576) { - error = EINVAL; - break; - } - ifp->if_mtu = ifr->ifr_mtu; - break; - case SIOCGIFMTU: - ifr->ifr_mtu = GRE2IFP(sc)->if_mtu; - break; - case SIOCADDMULTI: - /* - * XXXRW: Isn't this priv_checkr() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_ADDMULTI)) != 0) - break; - if (ifr == 0) { - error = EAFNOSUPPORT; - break; - } - switch (ifr->ifr_addr.sa_family) { + /* validate sa_len */ + switch (src->sa_family) { #ifdef INET case AF_INET: + if (src->sa_len != sizeof(struct sockaddr_in)) + goto end; break; #endif #ifdef INET6 case AF_INET6: + if (src->sa_len != sizeof(struct sockaddr_in6)) + goto end; break; #endif default: error = EAFNOSUPPORT; - break; + goto end; } - break; - case SIOCDELMULTI: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_DELIFGROUP)) != 0) - break; - if (ifr == 0) { - error = EAFNOSUPPORT; - break; + /* check sa_family looks sane for the cmd */ + error = EAFNOSUPPORT; + switch (cmd) { +#ifdef INET + case SIOCSIFPHYADDR: + if (src->sa_family == AF_INET) + break; + goto end; +#endif +#ifdef INET6 + case SIOCSIFPHYADDR_IN6: + if (src->sa_family == AF_INET6) + break; + goto end; +#endif } - switch (ifr->ifr_addr.sa_family) { + error = EADDRNOTAVAIL; + switch (src->sa_family) { #ifdef INET case AF_INET: + if (satosin(src)->sin_addr.s_addr == INADDR_ANY || + satosin(dst)->sin_addr.s_addr == INADDR_ANY) + goto end; break; #endif #ifdef INET6 case AF_INET6: - break; + if (IN6_IS_ADDR_UNSPECIFIED(&satosin6(src)->sin6_addr) + || + IN6_IS_ADDR_UNSPECIFIED(&satosin6(dst)->sin6_addr)) + goto end; + /* + * Check validity of the scope zone ID of the + * addresses, and convert it into the kernel + * internal form if necessary. + */ + error = sa6_embedscope(satosin6(src), 0); + if (error != 0) + goto end; + error = sa6_embedscope(satosin6(dst), 0); + if (error != 0) + goto end; #endif - default: - error = EAFNOSUPPORT; - break; - } - break; - case GRESPROTO: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) - break; - sc->g_proto = ifr->ifr_flags; - switch (sc->g_proto) { - case IPPROTO_GRE: - ifp->if_flags |= IFF_LINK0; - break; - case IPPROTO_MOBILE: - ifp->if_flags &= ~IFF_LINK0; - break; - default: - error = EPROTONOSUPPORT; - break; - } - goto recompute; - case GREGPROTO: - ifr->ifr_flags = sc->g_proto; - break; - case GRESADDRS: - case GRESADDRD: - error = priv_check(curthread, PRIV_NET_GRE); - if (error) - return (error); - /* - * set tunnel endpoints, compute a less specific route - * to the remote end and mark if as up - */ - sa = &ifr->ifr_addr; - if (cmd == GRESADDRS) - sc->g_src = (satosin(sa))->sin_addr; - if (cmd == GRESADDRD) - sc->g_dst = (satosin(sa))->sin_addr; - recompute: -#ifdef INET - if (sc->encap != NULL) { - encap_detach(sc->encap); - sc->encap = NULL; - } -#endif - if ((sc->g_src.s_addr != INADDR_ANY) && - (sc->g_dst.s_addr != INADDR_ANY)) { - bzero(&sp, sizeof(sp)); - bzero(&sm, sizeof(sm)); - bzero(&dp, sizeof(dp)); - bzero(&dm, sizeof(dm)); - sp.sin_len = sm.sin_len = dp.sin_len = dm.sin_len = - sizeof(struct sockaddr_in); - sp.sin_family = sm.sin_family = dp.sin_family = - dm.sin_family = AF_INET; - sp.sin_addr = sc->g_src; - dp.sin_addr = sc->g_dst; - sm.sin_addr.s_addr = dm.sin_addr.s_addr = - INADDR_BROADCAST; -#ifdef INET - sc->encap = encap_attach(AF_INET, sc->g_proto, - sintosa(&sp), sintosa(&sm), sintosa(&dp), - sintosa(&dm), (sc->g_proto == IPPROTO_GRE) ? - &in_gre_protosw : &in_mobile_protosw, sc); - if (sc->encap == NULL) - printf("%s: unable to attach encap\n", - if_name(GRE2IFP(sc))); -#endif - if (sc->route.ro_rt != 0) /* free old route */ - RTFREE(sc->route.ro_rt); - if (gre_compute_route(sc) == 0) - ifp->if_drv_flags |= IFF_DRV_RUNNING; - else - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - } + }; + error = gre_set_tunnel(ifp, src, dst); break; - case GREGADDRS: - memset(&si, 0, sizeof(si)); - si.sin_family = AF_INET; - si.sin_len = sizeof(struct sockaddr_in); - si.sin_addr.s_addr = sc->g_src.s_addr; - sa = sintosa(&si); - error = prison_if(curthread->td_ucred, sa); - if (error != 0) - break; - ifr->ifr_addr = *sa; - break; - case GREGADDRD: - memset(&si, 0, sizeof(si)); - si.sin_family = AF_INET; - si.sin_len = sizeof(struct sockaddr_in); - si.sin_addr.s_addr = sc->g_dst.s_addr; - sa = sintosa(&si); - error = prison_if(curthread->td_ucred, sa); - if (error != 0) - break; - ifr->ifr_addr = *sa; - break; - case SIOCSIFPHYADDR: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) - break; - if (aifr->ifra_addr.sin_family != AF_INET || - aifr->ifra_dstaddr.sin_family != AF_INET) { - error = EAFNOSUPPORT; - break; - } - if (aifr->ifra_addr.sin_len != sizeof(si) || - aifr->ifra_dstaddr.sin_len != sizeof(si)) { - error = EINVAL; - break; - } - sc->g_src = aifr->ifra_addr.sin_addr; - sc->g_dst = aifr->ifra_dstaddr.sin_addr; - goto recompute; case SIOCDIFPHYADDR: - /* - * XXXRW: Isn't this priv_check() redundant to the ifnet - * layer check? - */ - if ((error = priv_check(curthread, PRIV_NET_SETIFPHYS)) != 0) - break; - sc->g_src.s_addr = INADDR_ANY; - sc->g_dst.s_addr = INADDR_ANY; - goto recompute; + gre_delete_tunnel(ifp); + break; case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: #ifdef INET6 case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: #endif - if (sc->g_src.s_addr == INADDR_ANY) { + if (sc->gre_family == 0) { error = EADDRNOTAVAIL; break; } - memset(&si, 0, sizeof(si)); - si.sin_family = AF_INET; - si.sin_len = sizeof(struct sockaddr_in); - si.sin_addr.s_addr = sc->g_src.s_addr; - error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); - if (error != 0) + GRE_RLOCK(sc); + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + if (sc->gre_family != AF_INET) { + error = EADDRNOTAVAIL; + break; + } + sin = (struct sockaddr_in *)&ifr->ifr_addr; + memset(sin, 0, sizeof(*sin)); + sin->sin_family = AF_INET; + sin->sin_len = sizeof(*sin); break; - bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr)); - break; - case SIOCGIFPDSTADDR: -#ifdef INET6 - case SIOCGIFPDSTADDR_IN6: #endif - if (sc->g_dst.s_addr == INADDR_ANY) { - error = EADDRNOTAVAIL; +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + if (sc->gre_family != AF_INET6) { + error = EADDRNOTAVAIL; + break; + } + sin6 = (struct sockaddr_in6 *) + &(((struct in6_ifreq *)data)->ifr_addr); + memset(sin6, 0, sizeof(*sin6)); + sin6->sin6_family = AF_INET6; + sin6->sin6_len = sizeof(*sin6); break; +#endif + } + if (error == 0) { + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + sin->sin_addr = sc->gre_oip.ip_src; + break; + case SIOCGIFPDSTADDR: + sin->sin_addr = sc->gre_oip.ip_dst; + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + sin6->sin6_addr = sc->gre_oip6.ip6_src; + break; + case SIOCGIFPDSTADDR_IN6: + sin6->sin6_addr = sc->gre_oip6.ip6_dst; + break; +#endif + } } - memset(&si, 0, sizeof(si)); - si.sin_family = AF_INET; - si.sin_len = sizeof(struct sockaddr_in); - si.sin_addr.s_addr = sc->g_dst.s_addr; - error = prison_if(curthread->td_ucred, (struct sockaddr *)&si); + GRE_RUNLOCK(sc); if (error != 0) break; - bcopy(&si, &ifr->ifr_addr, sizeof(ifr->ifr_addr)); + switch (cmd) { +#ifdef INET + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + error = prison_if(curthread->td_ucred, + (struct sockaddr *)sin); + if (error != 0) + memset(sin, 0, sizeof(*sin)); + break; +#endif +#ifdef INET6 + case SIOCGIFPSRCADDR_IN6: + case SIOCGIFPDSTADDR_IN6: + error = prison_if(curthread->td_ucred, + (struct sockaddr *)sin6); + if (error == 0) + error = sa6_recoverscope(sin6); + if (error != 0) + memset(sin6, 0, sizeof(*sin6)); +#endif + } break; case GRESKEY: - error = priv_check(curthread, PRIV_NET_GRE); - if (error) - break; - error = copyin(ifr->ifr_data, &key, sizeof(key)); - if (error) + if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) break; - /* adjust MTU for option header */ - if (key == 0 && sc->key != 0) /* clear */ - adj += sizeof(key); - else if (key != 0 && sc->key == 0) /* set */ - adj -= sizeof(key); - - if (ifp->if_mtu + adj < 576) { - error = EINVAL; + if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0) break; + if (sc->gre_key != opt) { + GRE_WLOCK(sc); + sc->gre_key = opt; + gre_updatehdr(sc); + GRE_WUNLOCK(sc); } - ifp->if_mtu += adj; - sc->key = key; break; case GREGKEY: - error = copyout(&sc->key, ifr->ifr_data, sizeof(sc->key)); + error = copyout(&sc->gre_key, ifr->ifr_data, sizeof(sc->gre_key)); + break; + case GRESOPTS: + if ((error = priv_check(curthread, PRIV_NET_GRE)) != 0) + break; + if ((error = copyin(ifr->ifr_data, &opt, sizeof(opt))) != 0) + break; + if (opt & ~GRE_OPTMASK) + error = EINVAL; + else { + if (sc->gre_options != opt) { + GRE_WLOCK(sc); + sc->gre_options = opt; + gre_updatehdr(sc); + GRE_WUNLOCK(sc); + } + } break; + case GREGOPTS: + error = copyout(&sc->gre_options, ifr->ifr_data, + sizeof(sc->gre_options)); + break; default: error = EINVAL; break; } - +end: + sx_xunlock(&gre_ioctl_sx); return (error); } -/* - * computes a route to our destination that is not the one - * which would be taken by ip_output(), as this one will loop back to - * us. If the interface is p2p as a--->b, then a routing entry exists - * If we now send a packet to b (e.g. ping b), this will come down here - * gets src=a, dst=b tacked on and would from ip_output() sent back to - * if_gre. - * Goal here is to compute a route to b that is less specific than - * a-->b. We know that this one exists as in normal operation we have - * at least a default route which matches. - */ -static int -gre_compute_route(struct gre_softc *sc) +static void +gre_updatehdr(struct gre_softc *sc) { - struct route *ro; + struct grehdr *gh = NULL; + uint32_t *opts; + uint16_t flags; + + GRE_WLOCK_ASSERT(sc); + switch (sc->gre_family) { +#ifdef INET + case AF_INET: + sc->gre_hlen = sizeof(struct greip); + sc->gre_oip.ip_v = IPPROTO_IPV4; + sc->gre_oip.ip_hl = sizeof(struct ip) >> 2; + sc->gre_oip.ip_p = IPPROTO_GRE; + gh = &sc->gre_gihdr->gi_gre; + break; +#endif +#ifdef INET6 + case AF_INET6: + sc->gre_hlen = sizeof(struct greip6); + sc->gre_oip6.ip6_vfc = IPV6_VERSION; + sc->gre_oip6.ip6_nxt = IPPROTO_GRE; + gh = &sc->gre_gi6hdr->gi6_gre; + break; +#endif + default: + return; + } + flags = 0; + opts = gh->gre_opts; + if (sc->gre_options & GRE_ENABLE_CSUM) { + flags |= GRE_FLAGS_CP; + sc->gre_hlen += 2 * sizeof(uint16_t); + *opts++ = 0; + } + if (sc->gre_key != 0) { + flags |= GRE_FLAGS_KP; + sc->gre_hlen += sizeof(uint32_t); + *opts++ = htonl(sc->gre_key); + } + if (sc->gre_options & GRE_ENABLE_SEQ) { + flags |= GRE_FLAGS_SP; + sc->gre_hlen += sizeof(uint32_t); + *opts++ = 0; + } else + sc->gre_oseq = 0; + gh->gre_flags = htons(flags); + GRE2IFP(sc)->if_mtu = sc->gre_mtu - sc->gre_hlen; +} - ro = &sc->route; +static void +gre_detach(struct gre_softc *sc) +{ - memset(ro, 0, sizeof(struct route)); - ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; - ro->ro_dst.sa_family = AF_INET; - ro->ro_dst.sa_len = sizeof(ro->ro_dst); + sx_assert(&gre_ioctl_sx, SA_XLOCKED); + if (sc->gre_ecookie != NULL) + encap_detach(sc->gre_ecookie); + sc->gre_ecookie = NULL; +} - /* - * toggle last bit, so our interface is not found, but a less - * specific route. I'd rather like to specify a shorter mask, - * but this is not possible. Should work though. XXX - * XXX MRT Use a different FIB for the tunnel to solve this problem. - */ - if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) { - ((struct sockaddr_in *)&ro->ro_dst)->sin_addr.s_addr ^= - htonl(0x01); - } - -#ifdef DIAGNOSTIC - printf("%s: searching for a route to %s", if_name(GRE2IFP(sc)), - inet_ntoa(((struct sockaddr_in *)&ro->ro_dst)->sin_addr)); +static int +gre_set_tunnel(struct ifnet *ifp, struct sockaddr *src, + struct sockaddr *dst) +{ + struct gre_softc *sc, *tsc; +#ifdef INET6 + struct ip6_hdr *ip6; #endif +#ifdef INET + struct ip *ip; +#endif + void *hdr; + int error; - rtalloc_fib(ro, sc->gre_fibnum); + sx_assert(&gre_ioctl_sx, SA_XLOCKED); + GRE_LIST_LOCK(); + sc = ifp->if_softc; + LIST_FOREACH(tsc, &V_gre_softc_list, gre_list) { + if (tsc == sc || tsc->gre_family != src->sa_family) + continue; +#ifdef INET + if (tsc->gre_family == AF_INET && + tsc->gre_oip.ip_src.s_addr == + satosin(src)->sin_addr.s_addr && + tsc->gre_oip.ip_dst.s_addr == + satosin(dst)->sin_addr.s_addr) { + GRE_LIST_UNLOCK(); + return (EADDRNOTAVAIL); + } +#endif +#ifdef INET6 + if (tsc->gre_family == AF_INET6 && + IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_src, + &satosin6(src)->sin6_addr) && + IN6_ARE_ADDR_EQUAL(&tsc->gre_oip6.ip6_dst, + &satosin6(dst)->sin6_addr)) { + GRE_LIST_UNLOCK(); + return (EADDRNOTAVAIL); + } +#endif + } + GRE_LIST_UNLOCK(); - /* - * check if this returned a route at all and this route is no - * recursion to ourself - */ - if (ro->ro_rt == NULL || ro->ro_rt->rt_ifp->if_softc == sc) { -#ifdef DIAGNOSTIC - if (ro->ro_rt == NULL) - printf(" - no route found!\n"); - else - printf(" - route loops back to ourself!\n"); + switch (src->sa_family) { +#ifdef INET + case AF_INET: + hdr = ip = malloc(sizeof(struct greip) + + 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); + ip->ip_src = satosin(src)->sin_addr; + ip->ip_dst = satosin(dst)->sin_addr; + break; #endif - return EADDRNOTAVAIL; +#ifdef INET6 + case AF_INET6: + hdr = ip6 = malloc(sizeof(struct greip6) + + 3 * sizeof(uint32_t), M_GRE, M_WAITOK | M_ZERO); + ip6->ip6_src = satosin6(src)->sin6_addr; + ip6->ip6_dst = satosin6(dst)->sin6_addr; + break; +#endif + default: + return (EAFNOSUPPORT); } + if (sc->gre_family != src->sa_family) + gre_detach(sc); + GRE_WLOCK(sc); + if (sc->gre_family != 0) + free(sc->gre_hdr, M_GRE); + sc->gre_family = src->sa_family; + sc->gre_hdr = hdr; + sc->gre_oseq = 0; + sc->gre_iseq = UINT32_MAX; + gre_updatehdr(sc); + GRE_WUNLOCK(sc); + switch (src->sa_family) { +#ifdef INET + case AF_INET: + error = in_gre_attach(sc); + break; +#endif +#ifdef INET6 + case AF_INET6: + error = in6_gre_attach(sc); + break; +#endif + } + if (error == 0) + ifp->if_drv_flags |= IFF_DRV_RUNNING; + return (error); +} + +static void +gre_delete_tunnel(struct ifnet *ifp) +{ + struct gre_softc *sc = ifp->if_softc; + int family; + + GRE_WLOCK(sc); + family = sc->gre_family; + sc->gre_family = 0; + GRE_WUNLOCK(sc); + if (family != 0) { + gre_detach(sc); + free(sc->gre_hdr, M_GRE); + } + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; +} + +int +gre_input(struct mbuf **mp, int *offp, int proto) +{ + struct gre_softc *sc; + struct grehdr *gh; + struct ifnet *ifp; + struct mbuf *m; + uint32_t *opts, key; + uint16_t flags; + int hlen, isr, af; + + m = *mp; + sc = encap_getarg(m); + KASSERT(sc != NULL, ("encap_getarg returned NULL")); + + ifp = GRE2IFP(sc); + gh = (struct grehdr *)mtodo(m, *offp); + flags = ntohs(gh->gre_flags); + if (flags & ~GRE_FLAGS_MASK) + goto drop; + opts = gh->gre_opts; + hlen = 2 * sizeof(uint16_t); + if (flags & GRE_FLAGS_CP) { + /* reserved1 field must be zero */ + if (((uint16_t *)opts)[1] != 0) + goto drop; + if (in_cksum_skip(m, m->m_pkthdr.len, *offp) != 0) + goto drop; + hlen += 2 * sizeof(uint16_t); + opts++; + } + if (flags & GRE_FLAGS_KP) { + key = ntohl(*opts); + hlen += sizeof(uint32_t); + opts++; + } else + key = 0; /* - * now change it back - else ip_output will just drop - * the route and search one to this interface ... - */ - if ((GRE2IFP(sc)->if_flags & IFF_LINK1) == 0) - ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = sc->g_dst; - -#ifdef DIAGNOSTIC - printf(", choosing %s with gateway %s", if_name(ro->ro_rt->rt_ifp), - inet_ntoa(((struct sockaddr_in *)(ro->ro_rt->rt_gateway))->sin_addr)); - printf("\n"); + if (sc->gre_key != 0 && (key != sc->gre_key || key != 0)) + goto drop; + */ + if (flags & GRE_FLAGS_SP) { + /* seq = ntohl(*opts); */ + hlen += sizeof(uint32_t); + } + switch (ntohs(gh->gre_proto)) { + case ETHERTYPE_WCCP: + /* + * For WCCP skip an additional 4 bytes if after GRE header + * doesn't follow an IP header. + */ + if (flags == 0 && (*(uint8_t *)gh->gre_opts & 0xF0) != 0x40) + hlen += sizeof(uint32_t); + /* FALLTHROUGH */ + case ETHERTYPE_IP: + isr = NETISR_IP; + af = AF_INET; + break; + case ETHERTYPE_IPV6: + isr = NETISR_IPV6; + af = AF_INET6; + break; + default: + goto drop; + } + m_adj(m, *offp + hlen); + m_clrprotoflags(m); + m->m_pkthdr.rcvif = ifp; + M_SETFIB(m, sc->gre_fibnum); +#ifdef MAC + mac_ifnet_create_mbuf(ifp, m); +#endif + BPF_MTAP2(ifp, &af, sizeof(af), m); + if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); + if ((ifp->if_flags & IFF_MONITOR) != 0) + m_freem(m); + else + netisr_dispatch(isr, m); + return (IPPROTO_DONE); +drop: + if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); + m_freem(m); + return (IPPROTO_DONE); +} + +#define MTAG_GRE 1307983903 +static int +gre_check_nesting(struct ifnet *ifp, struct mbuf *m) +{ + struct m_tag *mtag; + int count; + + count = 1; + mtag = NULL; + while ((mtag = m_tag_locate(m, MTAG_GRE, 0, NULL)) != NULL) { + if (*(struct ifnet **)(mtag + 1) == ifp) { + log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname); + return (EIO); + } + count++; + } + if (count > V_max_gre_nesting) { + log(LOG_NOTICE, + "%s: if_output recursively called too many times(%d)\n", + ifp->if_xname, count); + return (EIO); + } + mtag = m_tag_alloc(MTAG_GRE, 0, sizeof(struct ifnet *), M_NOWAIT); + if (mtag == NULL) + return (ENOMEM); + *(struct ifnet **)(mtag + 1) = ifp; + m_tag_prepend(m, mtag); + return (0); +} + +static int +gre_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, + struct route *ro) +{ + uint32_t af; + int error; + +#ifdef MAC + error = mac_ifnet_check_transmit(ifp, m); + if (error != 0) + goto drop; #endif + if ((ifp->if_flags & IFF_MONITOR) != 0 || + (ifp->if_flags & IFF_UP) == 0) { + error = ENETDOWN; + goto drop; + } - return 0; + error = gre_check_nesting(ifp, m); + if (error != 0) + goto drop; + + m->m_flags &= ~(M_BCAST|M_MCAST); + if (dst->sa_family == AF_UNSPEC) + bcopy(dst->sa_data, &af, sizeof(af)); + else + af = dst->sa_family; + BPF_MTAP2(ifp, &af, sizeof(af), m); + m->m_pkthdr.csum_data = af; /* save af for if_transmit */ + return (ifp->if_transmit(ifp, m)); +drop: + m_freem(m); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return (error); } -/* - * do a checksum of a buffer - much like in_cksum, which operates on - * mbufs. - */ -u_int16_t -gre_in_cksum(u_int16_t *p, u_int len) +static void +gre_setseqn(struct grehdr *gh, uint32_t seq) { - u_int32_t sum = 0; - int nwords = len >> 1; + uint32_t *opts; + uint16_t flags; - while (nwords-- != 0) - sum += *p++; + opts = gh->gre_opts; + flags = ntohs(gh->gre_flags); + KASSERT((flags & GRE_FLAGS_SP) != 0, + ("gre_setseqn called, but GRE_FLAGS_SP isn't set ")); + if (flags & GRE_FLAGS_CP) + opts++; + if (flags & GRE_FLAGS_KP) + opts++; + *opts = htonl(seq); +} + +static int +gre_transmit(struct ifnet *ifp, struct mbuf *m) +{ + GRE_RLOCK_TRACKER; + struct gre_softc *sc; + struct grehdr *gh; + uint32_t iaf, oaf, oseq; + int error, hlen, olen, plen; + int want_seq, want_csum; + + plen = 0; + sc = ifp->if_softc; + if (sc == NULL) { + error = ENETDOWN; + m_freem(m); + goto drop; + } + GRE_RLOCK(sc); + if (sc->gre_family == 0) { + GRE_RUNLOCK(sc); + error = ENETDOWN; + m_freem(m); + goto drop; + } + iaf = m->m_pkthdr.csum_data; + oaf = sc->gre_family; + hlen = sc->gre_hlen; + want_seq = (sc->gre_options & GRE_ENABLE_SEQ) != 0; + if (want_seq) + oseq = sc->gre_oseq++; /* XXX */ + want_csum = (sc->gre_options & GRE_ENABLE_CSUM) != 0; + M_SETFIB(m, sc->gre_fibnum); + M_PREPEND(m, hlen, M_NOWAIT); + if (m == NULL) { + GRE_RUNLOCK(sc); + error = ENOBUFS; + goto drop; + } + bcopy(sc->gre_hdr, mtod(m, void *), hlen); + GRE_RUNLOCK(sc); + switch (oaf) { +#ifdef INET + case AF_INET: + olen = sizeof(struct ip); + break; +#endif +#ifdef INET6 + case AF_INET6: + olen = sizeof(struct ip6_hdr); + break; +#endif + default: + error = ENETDOWN; + goto drop; + } + gh = (struct grehdr *)mtodo(m, olen); + switch (iaf) { +#ifdef INET + case AF_INET: + gh->gre_proto = htons(ETHERTYPE_IP); + break; +#endif +#ifdef INET6 + case AF_INET6: + gh->gre_proto = htons(ETHERTYPE_IPV6); + break; +#endif + default: + error = ENETDOWN; + goto drop; + } + if (want_seq) + gre_setseqn(gh, oseq); + if (want_csum) { + *(uint16_t *)gh->gre_opts = in_cksum_skip(m, + m->m_pkthdr.len, olen); + } + plen = m->m_pkthdr.len - hlen; + switch (oaf) { +#ifdef INET + case AF_INET: + error = in_gre_output(m, iaf, hlen); + break; +#endif +#ifdef INET6 + case AF_INET6: + error = in6_gre_output(m, iaf, hlen); + break; +#endif + default: + m_freem(m); + error = ENETDOWN; + }; +drop: + if (error) + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + else { + if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); + } + return (error); +} + +static void +gre_qflush(struct ifnet *ifp __unused) +{ - if (len & 1) { - union { - u_short w; - u_char c[2]; - } u; - u.c[0] = *(u_char *)p; - u.c[1] = 0; - sum += u.w; - } - - /* end-around-carry */ - sum = (sum >> 16) + (sum & 0xffff); - sum += (sum >> 16); - return (~sum); } static int Index: sys/net/if_me.h =================================================================== --- /dev/null +++ sys/net/if_me.h @@ -0,0 +1,65 @@ +/*- + * Copyright (c) 2014 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _NET_IF_ME_H_ +#define _NET_IF_ME_H_ + +#ifdef _KERNEL +/* Minimal forwarding header RFC 2004 */ +struct mobhdr { + uint8_t mob_proto; /* protocol */ + uint8_t mob_flags; /* flags */ +#define MOB_FLAGS_SP 0x80 /* source present */ + uint16_t mob_csum; /* header checksum */ + struct in_addr mob_dst; /* original destination address */ + struct in_addr mob_src; /* original source addr (optional) */ +} __packed; + +struct me_softc { + struct ifnet *me_ifp; + LIST_ENTRY(me_softc) me_list; + struct rmlock me_lock; + u_int me_fibnum; + const struct encaptab *me_ecookie; + struct in_addr me_src; + struct in_addr me_dst; +}; +#define ME2IFP(sc) ((sc)->me_ifp) +#define ME_READY(sc) ((sc)->me_src.s_addr != 0) +#define ME_LOCK_INIT(sc) rm_init(&(sc)->me_lock, "me softc") +#define ME_LOCK_DESTROY(sc) rm_destroy(&(sc)->me_lock) +#define ME_RLOCK_TRACKER struct rm_priotracker me_tracker +#define ME_RLOCK(sc) rm_rlock(&(sc)->me_lock, &me_tracker) +#define ME_RUNLOCK(sc) rm_runlock(&(sc)->me_lock, &me_tracker) +#define ME_RLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_RLOCKED) +#define ME_WLOCK(sc) rm_wlock(&(sc)->me_lock) +#define ME_WUNLOCK(sc) rm_wunlock(&(sc)->me_lock) +#define ME_WLOCK_ASSERT(sc) rm_assert(&(sc)->me_lock, RA_WLOCKED) + +#endif /* _KERNEL */ +#endif /* _NET_IF_ME_H_ */ Index: sys/net/if_me.c =================================================================== --- /dev/null +++ sys/net/if_me.c @@ -0,0 +1,616 @@ +/*- + * Copyright (c) 2014 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#define MEMTU 1500 +static const char mename[] = "me"; +static MALLOC_DEFINE(M_IFME, mename, "Minimal Encapsulation for IP"); +static VNET_DEFINE(struct mtx, me_mtx); +#define V_me_mtx VNET(me_mtx) +#define ME_LIST_LOCK_INIT(x) mtx_init(&V_me_mtx, "me_mtx", NULL, MTX_DEF) +#define ME_LIST_LOCK_DESTROY(x) mtx_destroy(&V_me_mtx) +#define ME_LIST_LOCK(x) mtx_lock(&V_me_mtx) +#define ME_LIST_UNLOCK(x) mtx_unlock(&V_me_mtx) + +static VNET_DEFINE(LIST_HEAD(, me_softc), me_softc_list); +#define V_me_softc_list VNET(me_softc_list) +static struct sx me_ioctl_sx; +SX_SYSINIT(me_ioctl_sx, &me_ioctl_sx, "me_ioctl"); + +static int me_clone_create(struct if_clone *, int, caddr_t); +static void me_clone_destroy(struct ifnet *); +static VNET_DEFINE(struct if_clone *, me_cloner); +#define V_me_cloner VNET(me_cloner) + +static void me_qflush(struct ifnet *); +static int me_transmit(struct ifnet *, struct mbuf *); +static int me_ioctl(struct ifnet *, u_long, caddr_t); +static int me_output(struct ifnet *, struct mbuf *, + const struct sockaddr *, struct route *); +static int me_input(struct mbuf **, int *, int); + +static int me_set_tunnel(struct ifnet *, struct sockaddr_in *, + struct sockaddr_in *); +static void me_delete_tunnel(struct ifnet *); + +SYSCTL_DECL(_net_link); +static SYSCTL_NODE(_net_link, IFT_TUNNEL, me, CTLFLAG_RW, 0, + "Minimal Encapsulation for IP (RFC 2004)"); +#ifndef MAX_ME_NEST +#define MAX_ME_NEST 1 +#endif + +static VNET_DEFINE(int, max_me_nesting) = MAX_ME_NEST; +#define V_max_me_nesting VNET(max_me_nesting) +SYSCTL_INT(_net_link_me, OID_AUTO, max_nesting, CTLFLAG_RW | CTLFLAG_VNET, + &VNET_NAME(max_me_nesting), 0, "Max nested tunnels"); + +extern struct domain inetdomain; +static const struct protosw in_mobile_protosw = { + .pr_type = SOCK_RAW, + .pr_domain = &inetdomain, + .pr_protocol = IPPROTO_MOBILE, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = me_input, + .pr_output = rip_output, + .pr_ctlinput = rip_ctlinput, + .pr_ctloutput = rip_ctloutput, + .pr_usrreqs = &rip_usrreqs +}; + +static void +vnet_me_init(const void *unused __unused) +{ + LIST_INIT(&V_me_softc_list); + ME_LIST_LOCK_INIT(); + V_me_cloner = if_clone_simple(mename, me_clone_create, + me_clone_destroy, 0); +} +VNET_SYSINIT(vnet_me_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_me_init, NULL); + +static void +vnet_me_uninit(const void *unused __unused) +{ + + if_clone_detach(V_me_cloner); + ME_LIST_LOCK_DESTROY(); +} +VNET_SYSUNINIT(vnet_me_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY, + vnet_me_uninit, NULL); + +static int +me_clone_create(struct if_clone *ifc, int unit, caddr_t params) +{ + struct me_softc *sc; + + sc = malloc(sizeof(struct me_softc), M_IFME, M_WAITOK | M_ZERO); + sc->me_fibnum = curthread->td_proc->p_fibnum; + ME2IFP(sc) = if_alloc(IFT_TUNNEL); + ME_LOCK_INIT(sc); + ME2IFP(sc)->if_softc = sc; + if_initname(ME2IFP(sc), mename, unit); + + ME2IFP(sc)->if_mtu = MEMTU - sizeof(struct mobhdr); + ME2IFP(sc)->if_flags = IFF_POINTOPOINT|IFF_MULTICAST; + ME2IFP(sc)->if_output = me_output; + ME2IFP(sc)->if_ioctl = me_ioctl; + ME2IFP(sc)->if_transmit = me_transmit; + ME2IFP(sc)->if_qflush = me_qflush; + if_attach(ME2IFP(sc)); + bpfattach(ME2IFP(sc), DLT_NULL, sizeof(u_int32_t)); + ME_LIST_LOCK(); + LIST_INSERT_HEAD(&V_me_softc_list, sc, me_list); + ME_LIST_UNLOCK(); + return (0); +} + +static void +me_clone_destroy(struct ifnet *ifp) +{ + struct me_softc *sc; + + sx_xlock(&me_ioctl_sx); + sc = ifp->if_softc; + me_delete_tunnel(ifp); + ME_LIST_LOCK(); + LIST_REMOVE(sc, me_list); + ME_LIST_UNLOCK(); + bpfdetach(ifp); + if_detach(ifp); + ifp->if_softc = NULL; + sx_xunlock(&me_ioctl_sx); + + if_free(ifp); + ME_LOCK_DESTROY(sc); + free(sc, M_IFME); +} + +static int +me_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) +{ + ME_RLOCK_TRACKER; + struct ifreq *ifr = (struct ifreq *)data; + struct sockaddr_in *src, *dst; + struct me_softc *sc; + int error; + + switch (cmd) { + case SIOCSIFMTU: + if (ifr->ifr_mtu < 576) + return (EINVAL); + ifp->if_mtu = ifr->ifr_mtu - sizeof(struct mobhdr); + return (0); + case SIOCSIFADDR: + ifp->if_flags |= IFF_UP; + case SIOCSIFFLAGS: + case SIOCADDMULTI: + case SIOCDELMULTI: + return (0); + } + sx_xlock(&me_ioctl_sx); + sc = ifp->if_softc; + if (sc == NULL) { + error = ENXIO; + goto end; + } + error = 0; + switch (cmd) { + case SIOCSIFPHYADDR: + src = (struct sockaddr_in *) + &(((struct in_aliasreq *)data)->ifra_addr); + dst = (struct sockaddr_in *) + &(((struct in_aliasreq *)data)->ifra_dstaddr); + if (src->sin_family != dst->sin_family || + src->sin_family != AF_INET || + src->sin_len != dst->sin_len || + src->sin_len != sizeof(struct sockaddr_in)) { + error = EINVAL; + break; + } + if (src->sin_addr.s_addr == INADDR_ANY || + dst->sin_addr.s_addr == INADDR_ANY) { + error = EADDRNOTAVAIL; + break; + } + error = me_set_tunnel(ifp, src, dst); + break; + case SIOCDIFPHYADDR: + me_delete_tunnel(ifp); + break; + case SIOCGIFPSRCADDR: + case SIOCGIFPDSTADDR: + ME_RLOCK(sc); + if (!ME_READY(sc)) { + error = EADDRNOTAVAIL; + ME_RUNLOCK(sc); + break; + } + src = (struct sockaddr_in *)&ifr->ifr_addr; + memset(src, 0, sizeof(*src)); + src->sin_family = AF_INET; + src->sin_len = sizeof(*src); + switch (cmd) { + case SIOCGIFPSRCADDR: + src->sin_addr = sc->me_src; + break; + case SIOCGIFPDSTADDR: + src->sin_addr = sc->me_dst; + break; + } + ME_RUNLOCK(sc); + error = prison_if(curthread->td_ucred, sintosa(src)); + if (error != 0) + memset(src, 0, sizeof(*src)); + break; + default: + error = EINVAL; + break; + } +end: + sx_xunlock(&me_ioctl_sx); + return (error); +} + +static int +me_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +{ + ME_RLOCK_TRACKER; + struct me_softc *sc; + struct ip *ip; + int ret; + + sc = (struct me_softc *)arg; + if ((ME2IFP(sc)->if_flags & IFF_UP) == 0) + return (0); + + M_ASSERTPKTHDR(m); + + if (m->m_pkthdr.len < sizeof(struct ip) + sizeof(struct mobhdr) - + sizeof(struct in_addr)) + return (0); + + ret = 0; + ME_RLOCK(sc); + if (ME_READY(sc)) { + ip = mtod(m, struct ip *); + if (sc->me_src.s_addr == ip->ip_dst.s_addr && + sc->me_dst.s_addr == ip->ip_src.s_addr) + ret = 32 * 2; + } + ME_RUNLOCK(sc); + return (ret); +} + +static int +me_set_tunnel(struct ifnet *ifp, struct sockaddr_in *src, + struct sockaddr_in *dst) +{ + struct me_softc *sc, *tsc; + + sx_assert(&me_ioctl_sx, SA_XLOCKED); + ME_LIST_LOCK(); + sc = ifp->if_softc; + LIST_FOREACH(tsc, &V_me_softc_list, me_list) { + if (tsc == sc || !ME_READY(tsc)) + continue; + if (tsc->me_src.s_addr == src->sin_addr.s_addr && + tsc->me_dst.s_addr == dst->sin_addr.s_addr) { + ME_LIST_UNLOCK(); + return (EADDRNOTAVAIL); + } + } + ME_LIST_UNLOCK(); + + ME_WLOCK(sc); + sc->me_dst = dst->sin_addr; + sc->me_src = src->sin_addr; + ME_WUNLOCK(sc); + + if (sc->me_ecookie == NULL) + sc->me_ecookie = encap_attach_func(AF_INET, IPPROTO_MOBILE, + me_encapcheck, &in_mobile_protosw, sc); + if (sc->me_ecookie != NULL) + ifp->if_drv_flags |= IFF_DRV_RUNNING; + return (0); +} + +static void +me_delete_tunnel(struct ifnet *ifp) +{ + struct me_softc *sc = ifp->if_softc; + + sx_assert(&me_ioctl_sx, SA_XLOCKED); + if (sc->me_ecookie != NULL) + encap_detach(sc->me_ecookie); + sc->me_ecookie = NULL; + ME_WLOCK(sc); + sc->me_src.s_addr = 0; + sc->me_dst.s_addr = 0; + ME_WUNLOCK(sc); + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; +} + +static uint16_t +me_in_cksum(uint16_t *p, int nwords) +{ + uint32_t sum = 0; + + while (nwords-- > 0) + sum += *p++; + sum = (sum >> 16) + (sum & 0xffff); + sum += (sum >> 16); + return (~sum); +} + +int +me_input(struct mbuf **mp, int *offp, int proto) +{ + struct me_softc *sc; + struct mobhdr *mh; + struct ifnet *ifp; + struct mbuf *m; + struct ip *ip; + int hlen; + + m = *mp; + sc = encap_getarg(m); + KASSERT(sc != NULL, ("encap_getarg returned NULL")); + + ifp = ME2IFP(sc); + /* checks for short packets */ + hlen = sizeof(struct mobhdr); + if (m->m_pkthdr.len < sizeof(struct ip) + hlen) + hlen -= sizeof(struct in_addr); + if (m->m_len < sizeof(struct ip) + hlen) + m = m_pullup(m, sizeof(struct ip) + hlen); + if (m == NULL) + goto drop; + mh = (struct mobhdr *)mtodo(m, sizeof(struct ip)); + /* check for wrong flags */ + if (mh->mob_flags & (~MOB_FLAGS_SP)) { + m_freem(m); + goto drop; + } + if (mh->mob_flags) { + if (hlen != sizeof(struct mobhdr)) { + m_freem(m); + goto drop; + } + } else + hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); + /* check mobile header checksum */ + if (me_in_cksum((uint16_t *)mh, hlen / sizeof(uint16_t)) != 0) { + m_freem(m); + goto drop; + } +#ifdef MAC + mac_ifnet_create_mbuf(ifp, m); +#endif + ip = mtod(m, struct ip *); + ip->ip_dst = mh->mob_dst; + ip->ip_p = mh->mob_proto; + ip->ip_sum = 0; + ip->ip_len = htons(m->m_pkthdr.len - hlen); + if (mh->mob_flags) + ip->ip_src = mh->mob_src; + memmove(mtodo(m, hlen), ip, sizeof(struct ip)); + m_adj(m, hlen); + m_clrprotoflags(m); + m->m_pkthdr.rcvif = ifp; + m->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED | CSUM_IP_VALID); + M_SETFIB(m, sc->me_fibnum); + hlen = AF_INET; + BPF_MTAP2(ifp, &hlen, sizeof(hlen), m); + if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); + if ((ifp->if_flags & IFF_MONITOR) != 0) + m_freem(m); + else + netisr_dispatch(NETISR_IP, m); + return (IPPROTO_DONE); +drop: + if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); + return (IPPROTO_DONE); +} + +#define MTAG_ME 1414491977 +static int +me_check_nesting(struct ifnet *ifp, struct mbuf *m) +{ + struct m_tag *mtag; + int count; + + count = 1; + mtag = NULL; + while ((mtag = m_tag_locate(m, MTAG_ME, 0, NULL)) != NULL) { + if (*(struct ifnet **)(mtag + 1) == ifp) { + log(LOG_NOTICE, "%s: loop detected\n", ifp->if_xname); + return (EIO); + } + count++; + } + if (count > V_max_me_nesting) { + log(LOG_NOTICE, + "%s: if_output recursively called too many times(%d)\n", + ifp->if_xname, count); + return (EIO); + } + mtag = m_tag_alloc(MTAG_ME, 0, sizeof(struct ifnet *), M_NOWAIT); + if (mtag == NULL) + return (ENOMEM); + *(struct ifnet **)(mtag + 1) = ifp; + m_tag_prepend(m, mtag); + return (0); +} + +static int +me_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, + struct route *ro) +{ + uint32_t af; + int error; + +#ifdef MAC + error = mac_ifnet_check_transmit(ifp, m); + if (error != 0) + goto drop; +#endif + if ((ifp->if_flags & IFF_MONITOR) != 0 || + (ifp->if_flags & IFF_UP) == 0) { + error = ENETDOWN; + goto drop; + } + + error = me_check_nesting(ifp, m); + if (error != 0) + goto drop; + + m->m_flags &= ~(M_BCAST|M_MCAST); + if (dst->sa_family == AF_UNSPEC) + bcopy(dst->sa_data, &af, sizeof(af)); + else + af = dst->sa_family; + if (af != AF_INET) { + error = EAFNOSUPPORT; + goto drop; + } + BPF_MTAP2(ifp, &af, sizeof(af), m); + return (ifp->if_transmit(ifp, m)); +drop: + m_freem(m); + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + return (error); +} + +static int +me_transmit(struct ifnet *ifp, struct mbuf *m) +{ + ME_RLOCK_TRACKER; + struct mobhdr mh; + struct me_softc *sc; + struct ip *ip; + int error, hlen, plen; + + sc = ifp->if_softc; + if (sc == NULL) { + error = ENETDOWN; + m_freem(m); + goto drop; + } + if (m->m_len < sizeof(struct ip)) + m = m_pullup(m, sizeof(struct ip)); + if (m == NULL) { + error = ENOBUFS; + goto drop; + } + ip = mtod(m, struct ip *); + /* Fragmented datagramms shouldn't be encapsulated */ + if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { + error = EINVAL; + m_freem(m); + goto drop; + } + mh.mob_proto = ip->ip_p; + mh.mob_src = ip->ip_src; + mh.mob_dst = ip->ip_dst; + ME_RLOCK(sc); + if (!ME_READY(sc)) { + ME_RUNLOCK(sc); + error = ENETDOWN; + m_freem(m); + goto drop; + } + if (in_hosteq(sc->me_src, ip->ip_src)) { + hlen = sizeof(struct mobhdr) - sizeof(struct in_addr); + mh.mob_flags = 0; + } else { + hlen = sizeof(struct mobhdr); + mh.mob_flags = MOB_FLAGS_SP; + } + plen = m->m_pkthdr.len; + ip->ip_src = sc->me_src; + ip->ip_dst = sc->me_dst; + M_SETFIB(m, sc->me_fibnum); + ME_RUNLOCK(sc); + M_PREPEND(m, hlen, M_NOWAIT); + if (m == NULL) { + error = ENOBUFS; + goto drop; + } + if (m->m_len < sizeof(struct ip) + hlen) + m = m_pullup(m, sizeof(struct ip) + hlen); + if (m == NULL) { + error = ENOBUFS; + goto drop; + } + memmove(mtod(m, void *), mtodo(m, hlen), sizeof(struct ip)); + ip = mtod(m, struct ip *); + ip->ip_len = htons(m->m_pkthdr.len); + ip->ip_p = IPPROTO_MOBILE; + ip->ip_sum = 0; + mh.mob_csum = 0; + mh.mob_csum = me_in_cksum((uint16_t *)&mh, hlen / sizeof(uint16_t)); + bcopy(&mh, mtodo(m, sizeof(struct ip)), hlen); + error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); +drop: + if (error) + if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); + else { + if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); + if_inc_counter(ifp, IFCOUNTER_OBYTES, plen); + } + return (error); +} + +static void +me_qflush(struct ifnet *ifp __unused) +{ + +} + +static int +memodevent(module_t mod, int type, void *data) +{ + + switch (type) { + case MOD_LOAD: + case MOD_UNLOAD: + break; + default: + return (EOPNOTSUPP); + } + return (0); +} + +static moduledata_t me_mod = { + "if_me", + memodevent, + 0 +}; + +DECLARE_MODULE(if_me, me_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); +MODULE_VERSION(if_me, 1); Index: sys/netinet/in_gif.c =================================================================== --- sys/netinet/in_gif.c +++ sys/netinet/in_gif.c @@ -68,9 +68,6 @@ #include -static int gif_validate4(const struct ip *, struct gif_softc *, - struct ifnet *); - extern struct domain inetdomain; struct protosw in_gif_protosw = { .pr_type = SOCK_RAW, @@ -162,14 +159,20 @@ } /* - * validate outer address. + * we know that we are in IFF_UP, outer address available, and outer family + * matched the physical addr family. see gif_encapcheck(). */ -static int -gif_validate4(const struct ip *ip, struct gif_softc *sc, struct ifnet *ifp) +int +in_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) { + struct ip *ip; + struct gif_softc *sc; + /* sanity check done in caller */ + sc = (struct gif_softc *)arg; GIF_RLOCK_ASSERT(sc); + ip = mtod(m, struct ip *); /* check for address match */ if (sc->gif_iphdr->ip_src.s_addr != ip->ip_dst.s_addr || sc->gif_iphdr->ip_dst.s_addr != ip->ip_src.s_addr) @@ -186,7 +189,7 @@ } /* ingress filters on outer source */ - if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0 && ifp) { + if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) { struct sockaddr_in sin; struct rtentry *rt; @@ -197,7 +200,7 @@ /* XXX MRT check for the interface we would use on output */ rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, sc->gif_fibnum); - if (!rt || rt->rt_ifp != ifp) { + if (!rt || rt->rt_ifp != m->m_pkthdr.rcvif) { if (rt) RTFREE_LOCKED(rt); return (0); @@ -207,26 +210,6 @@ return (32 * 2); } -/* - * we know that we are in IFF_UP, outer address available, and outer family - * matched the physical addr family. see gif_encapcheck(). - */ -int -in_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) -{ - struct ip ip; - struct gif_softc *sc; - struct ifnet *ifp; - - /* sanity check done in caller */ - sc = (struct gif_softc *)arg; - GIF_RLOCK_ASSERT(sc); - - m_copydata(m, 0, sizeof(ip), (caddr_t)&ip); - ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL; - return (gif_validate4(&ip, sc, ifp)); -} - int in_gif_attach(struct gif_softc *sc) { Index: sys/netinet/ip_gre.h =================================================================== --- sys/netinet/ip_gre.h +++ /dev/null @@ -1,36 +0,0 @@ -/* $NetBSD: ip_gre.h,v 1.5 2002/06/09 16:33:40 itojun Exp $ */ -/* $FreeBSD$ */ - -/*- - * Copyright (c) 1998 The NetBSD Foundation, Inc. - * All rights reserved. - * - * This code is derived from software contributed to The NetBSD Foundation - * by Heiko W.Rupp - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifdef _KERNEL -int gre_input(struct mbuf **, int *, int); -int gre_mobile_input(struct mbuf **, int *, int); -#endif /* _KERNEL */ Index: sys/netinet/ip_gre.c =================================================================== --- sys/netinet/ip_gre.c +++ sys/netinet/ip_gre.c @@ -1,7 +1,6 @@ -/* $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $ */ - /*- * Copyright (c) 1998 The NetBSD Foundation, Inc. + * Copyright (c) 2014 Andrey V. Elsukov * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation @@ -29,12 +28,8 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - */ - -/* - * deencapsulate tunneled packets and send them on - * output half is in net/if_gre.[ch] - * This currently handles IPPROTO_GRE, IPPROTO_MOBILE + * + * $NetBSD: ip_gre.c,v 1.29 2003/09/05 23:02:43 itojun Exp $ */ #include @@ -52,281 +47,131 @@ #include #include #include -#include -#include +#include +#include +#include #include #include #include -#include -#include #include +#include -#ifdef INET #include #include #include #include +#include #include -#include #include -#else -#error "ip_gre requires INET" + +#ifdef INET6 +#include #endif /* Needs IP headers. */ #include - #include -#if 1 -void gre_inet_ntoa(struct in_addr in); /* XXX */ -#endif +extern struct domain inetdomain; +extern int gre_input(struct mbuf **, int *, int); -static struct gre_softc *gre_lookup(struct mbuf *, u_int8_t); +int in_gre_attach(struct gre_softc *); +int in_gre_output(struct mbuf *, int, int); -static struct mbuf *gre_input2(struct mbuf *, int, u_char); +static const struct protosw in_gre_protosw = { + .pr_type = SOCK_RAW, + .pr_domain = &inetdomain, + .pr_protocol = IPPROTO_GRE, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = gre_input, + .pr_output = rip_output, + .pr_ctlinput = rip_ctlinput, + .pr_ctloutput = rip_ctloutput, + .pr_usrreqs = &rip_usrreqs +}; + +#define GRE_TTL 30 +VNET_DEFINE(int, ip_gre_ttl) = GRE_TTL; +#define V_ip_gre_ttl VNET(ip_gre_ttl) +SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, grettl, CTLFLAG_RW, + &VNET_NAME(ip_gre_ttl), 0, ""); -/* - * De-encapsulate a packet and feed it back through ip input (this - * routine is called whenever IP gets a packet with proto type - * IPPROTO_GRE and a local destination address). - * This really is simple - */ -int -gre_input(struct mbuf **mp, int *offp, int proto) +static int +in_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg) { - struct mbuf *m; - int off; - - m = *mp; - off = *offp; - *mp = NULL; + GRE_RLOCK_TRACKER; + struct gre_softc *sc; + struct ip *ip; - m = gre_input2(m, off, proto); + sc = (struct gre_softc *)arg; + if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0) + return (0); + M_ASSERTPKTHDR(m); /* - * If no matching tunnel that is up is found. We inject - * the mbuf to raw ip socket to see if anyone picks it up. + * We expect that payload contains at least IPv4 + * or IPv6 packet. */ - if (m != NULL) { - *mp = m; - rip_input(mp, offp, proto); - } - return (IPPROTO_DONE); -} + if (m->m_pkthdr.len < sizeof(struct greip) + sizeof(struct ip)) + return (0); -/* - * Decapsulate. Does the real work and is called from gre_input() - * (above). Returns an mbuf back if packet is not yet processed, - * and NULL if it needs no further processing. proto is the protocol - * number of the "calling" foo_input() routine. - */ -static struct mbuf * -gre_input2(struct mbuf *m ,int hlen, u_char proto) -{ - struct greip *gip; - int isr; - struct gre_softc *sc; - u_int16_t flags; - u_int32_t af; + GRE_RLOCK(sc); + if (sc->gre_family == 0) + goto bad; - if ((sc = gre_lookup(m, proto)) == NULL) { - /* No matching tunnel or tunnel is down. */ - return (m); - } - - if (m->m_len < sizeof(*gip)) { - m = m_pullup(m, sizeof(*gip)); - if (m == NULL) - return (NULL); - } - gip = mtod(m, struct greip *); - - if_inc_counter(GRE2IFP(sc), IFCOUNTER_IPACKETS, 1); - if_inc_counter(GRE2IFP(sc), IFCOUNTER_IBYTES, m->m_pkthdr.len); - - switch (proto) { - case IPPROTO_GRE: - hlen += sizeof(struct gre_h); - - /* process GRE flags as packet can be of variable len */ - flags = ntohs(gip->gi_flags); - - /* Checksum & Offset are present */ - if ((flags & GRE_CP) | (flags & GRE_RP)) - hlen += 4; - /* We don't support routing fields (variable length) */ - if (flags & GRE_RP) - return (m); - if (flags & GRE_KP) - hlen += 4; - if (flags & GRE_SP) - hlen += 4; - - switch (ntohs(gip->gi_ptype)) { /* ethertypes */ - case WCCP_PROTOCOL_TYPE: - if (sc->wccp_ver == WCCP_V2) - hlen += 4; - /* FALLTHROUGH */ - case ETHERTYPE_IP: /* shouldn't need a schednetisr(), */ - isr = NETISR_IP;/* as we are in ip_input */ - af = AF_INET; - break; -#ifdef INET6 - case ETHERTYPE_IPV6: - isr = NETISR_IPV6; - af = AF_INET6; - break; -#endif - default: - /* Others not yet supported. */ - return (m); - } - break; - default: - /* Others not yet supported. */ - return (m); - } + KASSERT(sc->gre_family == AF_INET, + ("wrong gre_family: %d", sc->gre_family)); - if (hlen > m->m_pkthdr.len) { - m_freem(m); - return (NULL); - } - /* Unlike NetBSD, in FreeBSD m_adj() adjusts m->m_pkthdr.len as well */ - m_adj(m, hlen); - - if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) { - bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m); - } - - if ((GRE2IFP(sc)->if_flags & IFF_MONITOR) != 0) { - m_freem(m); - return(NULL); - } - - m->m_pkthdr.rcvif = GRE2IFP(sc); - m_clrprotoflags(m); - netisr_queue(isr, m); - - /* Packet is done, no further processing needed. */ - return (NULL); + ip = mtod(m, struct ip *); + if (sc->gre_oip.ip_src.s_addr != ip->ip_dst.s_addr || + sc->gre_oip.ip_dst.s_addr != ip->ip_src.s_addr) + goto bad; + + GRE_RUNLOCK(sc); + return (32 * 2); +bad: + GRE_RUNLOCK(sc); + return (0); } -/* - * input routine for IPPRPOTO_MOBILE - * This is a little bit diffrent from the other modes, as the - * encapsulating header was not prepended, but instead inserted - * between IP header and payload - */ - int -gre_mobile_input(struct mbuf **mp, int *offp, int proto) +in_gre_output(struct mbuf *m, int af, int hlen) { - struct ip *ip; - struct mobip_h *mip; - struct mbuf *m; - struct gre_softc *sc; - int msiz; - - m = *mp; - if ((sc = gre_lookup(m, IPPROTO_MOBILE)) == NULL) { - /* No matching tunnel or tunnel is down. */ - m_freem(m); - return (IPPROTO_DONE); - } - - if (m->m_len < sizeof(*mip)) { - m = m_pullup(m, sizeof(*mip)); - if (m == NULL) - return (IPPROTO_DONE); - } - ip = mtod(m, struct ip *); - mip = mtod(m, struct mobip_h *); - - if_inc_counter(GRE2IFP(sc), IFCOUNTER_IPACKETS, 1); - if_inc_counter(GRE2IFP(sc), IFCOUNTER_IBYTES, m->m_pkthdr.len); - - if (ntohs(mip->mh.proto) & MOB_H_SBIT) { - msiz = MOB_H_SIZ_L; - mip->mi.ip_src.s_addr = mip->mh.osrc; - } else - msiz = MOB_H_SIZ_S; - - if (m->m_len < (ip->ip_hl << 2) + msiz) { - m = m_pullup(m, (ip->ip_hl << 2) + msiz); - if (m == NULL) - return (IPPROTO_DONE); - ip = mtod(m, struct ip *); - mip = mtod(m, struct mobip_h *); - } - - mip->mi.ip_dst.s_addr = mip->mh.odst; - mip->mi.ip_p = (ntohs(mip->mh.proto) >> 8); - - if (gre_in_cksum((u_int16_t *)&mip->mh, msiz) != 0) { - m_freem(m); - return (IPPROTO_DONE); - } - - bcopy((caddr_t)(ip) + (ip->ip_hl << 2) + msiz, (caddr_t)(ip) + - (ip->ip_hl << 2), m->m_len - msiz - (ip->ip_hl << 2)); - m->m_len -= msiz; - m->m_pkthdr.len -= msiz; - - /* - * On FreeBSD, rip_input() supplies us with ip->ip_len - * decreased by the lengh of IP header, however, ip_input() - * expects it to be full size of IP packet, so adjust accordingly. - */ - ip->ip_len = htons(ntohs(ip->ip_len) + sizeof(struct ip) - msiz); - - ip->ip_sum = 0; - ip->ip_sum = in_cksum(m, (ip->ip_hl << 2)); - - if (bpf_peers_present(GRE2IFP(sc)->if_bpf)) { - u_int32_t af = AF_INET; - bpf_mtap2(GRE2IFP(sc)->if_bpf, &af, sizeof(af), m); - } + struct greip *gi; - if ((GRE2IFP(sc)->if_flags & IFF_MONITOR) != 0) { - m_freem(m); - return (IPPROTO_DONE); + gi = mtod(m, struct greip *); + switch (af) { + case AF_INET: + /* + * gre_transmit() has used M_PREPEND() that doesn't guarantee + * m_data is contiguous more than hlen bytes. Use m_copydata() + * here to avoid m_pullup(). + */ + m_copydata(m, hlen + offsetof(struct ip, ip_tos), + sizeof(u_char), &gi->gi_ip.ip_tos); + m_copydata(m, hlen + offsetof(struct ip, ip_id), + sizeof(u_short), (caddr_t)&gi->gi_ip.ip_id); + break; +#ifdef INET6 + case AF_INET6: + gi->gi_ip.ip_tos = 0; /* XXX */ + gi->gi_ip.ip_id = ip_newid(); + break; +#endif } - - m->m_pkthdr.rcvif = GRE2IFP(sc); - - netisr_queue(NETISR_IP, m); - return (IPPROTO_DONE); + gi->gi_ip.ip_ttl = V_ip_gre_ttl; + gi->gi_ip.ip_len = htons(m->m_pkthdr.len); + return (ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL)); } -/* - * Find the gre interface associated with our src/dst/proto set. - * - * XXXRW: Need some sort of drain/refcount mechanism so that the softc - * reference remains valid after it's returned from gre_lookup(). Right - * now, I'm thinking it should be reference-counted with a gre_dropref() - * when the caller is done with the softc. This is complicated by how - * to handle destroying the gre softc; probably using a gre_drain() in - * in_gre.c during destroy. - */ -static struct gre_softc * -gre_lookup(struct mbuf *m, u_int8_t proto) +int +in_gre_attach(struct gre_softc *sc) { - struct ip *ip = mtod(m, struct ip *); - struct gre_softc *sc; - - GRE_LIST_LOCK(); - for (sc = LIST_FIRST(&V_gre_softc_list); sc != NULL; - sc = LIST_NEXT(sc, sc_list)) { - if ((sc->g_dst.s_addr == ip->ip_src.s_addr) && - (sc->g_src.s_addr == ip->ip_dst.s_addr) && - (sc->g_proto == proto) && - ((GRE2IFP(sc)->if_flags & IFF_UP) != 0)) { - GRE_LIST_UNLOCK(); - return (sc); - } - } - GRE_LIST_UNLOCK(); - return (NULL); + KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL")); + sc->gre_ecookie = encap_attach_func(AF_INET, IPPROTO_GRE, + in_gre_encapcheck, &in_gre_protosw, sc); + if (sc->gre_ecookie == NULL) + return (EEXIST); + return (0); } Index: sys/netinet6/in6_gif.c =================================================================== --- sys/netinet6/in6_gif.c +++ sys/netinet6/in6_gif.c @@ -81,9 +81,6 @@ SYSCTL_VNET_INT(_net_inet6_ip6, IPV6CTL_GIF_HLIM, gifhlim, CTLFLAG_RW, &VNET_NAME(ip6_gif_hlim), 0, ""); -static int gif_validate6(const struct ip6_hdr *, struct gif_softc *, - struct ifnet *); - extern struct domain inet6domain; struct protosw in6_gif_protosw = { .pr_type = SOCK_RAW, @@ -173,14 +170,20 @@ } /* - * validate outer address. + * we know that we are in IFF_UP, outer address available, and outer family + * matched the physical addr family. see gif_encapcheck(). */ -static int -gif_validate6(const struct ip6_hdr *ip6, struct gif_softc *sc, - struct ifnet *ifp) +int +in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) { + struct ip6_hdr *ip6; + struct gif_softc *sc; + /* sanity check done in caller */ + sc = (struct gif_softc *)arg; GIF_RLOCK_ASSERT(sc); + + ip6 = mtod(m, struct ip6_hdr *); /* * Check for address match. Note that the check is for an incoming * packet. We should compare the *source* address in our configuration @@ -193,7 +196,7 @@ /* martian filters on outer source - done in ip6_input */ /* ingress filters on outer source */ - if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0 && ifp) { + if ((GIF2IFP(sc)->if_flags & IFF_LINK2) == 0) { struct sockaddr_in6 sin6; struct rtentry *rt; @@ -205,7 +208,7 @@ rt = in6_rtalloc1((struct sockaddr *)&sin6, 0, 0UL, sc->gif_fibnum); - if (!rt || rt->rt_ifp != ifp) { + if (!rt || rt->rt_ifp != m->m_pkthdr.rcvif) { if (rt) RTFREE_LOCKED(rt); return (0); @@ -216,26 +219,6 @@ return (128 * 2); } -/* - * we know that we are in IFF_UP, outer address available, and outer family - * matched the physical addr family. see gif_encapcheck(). - */ -int -in6_gif_encapcheck(const struct mbuf *m, int off, int proto, void *arg) -{ - struct ip6_hdr ip6; - struct gif_softc *sc; - struct ifnet *ifp; - - /* sanity check done in caller */ - sc = (struct gif_softc *)arg; - GIF_RLOCK_ASSERT(sc); - - m_copydata(m, 0, sizeof(ip6), (caddr_t)&ip6); - ifp = ((m->m_flags & M_PKTHDR) != 0) ? m->m_pkthdr.rcvif : NULL; - return (gif_validate6(&ip6, sc, ifp)); -} - int in6_gif_attach(struct gif_softc *sc) { Index: sys/netinet6/in6_proto.c =================================================================== --- sys/netinet6/in6_proto.c +++ sys/netinet6/in6_proto.c @@ -332,6 +332,17 @@ { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_GRE, + .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, + .pr_input = encap6_input, + .pr_output = rip6_output, + .pr_ctloutput = rip6_ctloutput, + .pr_init = encap_init, + .pr_usrreqs = &rip6_usrreqs +}, +{ + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, .pr_protocol = IPPROTO_PIM, .pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR, .pr_input = encap6_input, Index: sys/netinet6/ip6_gre.c =================================================================== --- /dev/null +++ sys/netinet6/ip6_gre.c @@ -0,0 +1,151 @@ +/*- + * Copyright (c) 2014 Andrey V. Elsukov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#ifdef INET +#include +#include +#endif +#include +#include +#include +#include +#include + +extern struct domain inet6domain; +extern int gre_input(struct mbuf **, int *, int); + +int in6_gre_attach(struct gre_softc *); +int in6_gre_output(struct mbuf *, int, int); + +struct protosw in6_gre_protosw = { + .pr_type = SOCK_RAW, + .pr_domain = &inet6domain, + .pr_protocol = IPPROTO_GRE, + .pr_flags = PR_ATOMIC|PR_ADDR, + .pr_input = gre_input, + .pr_output = rip6_output, + .pr_ctloutput = rip6_ctloutput, + .pr_usrreqs = &rip6_usrreqs +}; + +VNET_DEFINE(int, ip6_gre_hlim) = IPV6_DEFHLIM; +#define V_ip6_gre_hlim VNET(ip6_gre_hlim) + +SYSCTL_DECL(_net_inet6_ip6); +SYSCTL_VNET_INT(_net_inet6_ip6, OID_AUTO, grehlim, CTLFLAG_RW, + &VNET_NAME(ip6_gre_hlim), 0, ""); + +static int +in6_gre_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +{ + GRE_RLOCK_TRACKER; + struct gre_softc *sc; + struct ip6_hdr *ip6; + + sc = (struct gre_softc *)arg; + if ((GRE2IFP(sc)->if_flags & IFF_UP) == 0) + return (0); + + M_ASSERTPKTHDR(m); + /* + * We expect that payload contains at least IPv4 + * or IPv6 packet. + */ + if (m->m_pkthdr.len < sizeof(struct greip6) + +#ifdef INET + sizeof(struct ip)) +#else + sizeof(struct ip6_hdr)) +#endif + return (0); + + GRE_RLOCK(sc); + if (sc->gre_family == 0) + goto bad; + + KASSERT(sc->gre_family == AF_INET6, + ("wrong gre_family: %d", sc->gre_family)); + + ip6 = mtod(m, struct ip6_hdr *); + if (!IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_src, &ip6->ip6_dst) || + !IN6_ARE_ADDR_EQUAL(&sc->gre_oip6.ip6_dst, &ip6->ip6_src)) + goto bad; + + GRE_RUNLOCK(sc); + return (128 * 2); +bad: + GRE_RUNLOCK(sc); + return (0); +} + +int +in6_gre_output(struct mbuf *m, int af, int hlen) +{ + struct greip6 *gi6; + + gi6 = mtod(m, struct greip6 *); + gi6->gi6_ip6.ip6_hlim = V_ip6_gre_hlim; + return (ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL, NULL)); +} + +int +in6_gre_attach(struct gre_softc *sc) +{ + + KASSERT(sc->gre_ecookie == NULL, ("gre_ecookie isn't NULL")); + sc->gre_ecookie = encap_attach_func(AF_INET6, IPPROTO_GRE, + in6_gre_encapcheck, &in6_gre_protosw, sc); + if (sc->gre_ecookie == NULL) + return (EEXIST); + return (0); +}