Index: projects/vnet/sys/contrib/ipfilter/netinet/ip_fil.h =================================================================== --- projects/vnet/sys/contrib/ipfilter/netinet/ip_fil.h (revision 302198) +++ projects/vnet/sys/contrib/ipfilter/netinet/ip_fil.h (revision 302199) @@ -1,2000 +1,1999 @@ /* * Copyright (C) 2012 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. * * @(#)ip_fil.h 1.35 6/5/96 * $FreeBSD$ * Id: ip_fil.h,v 2.170.2.51 2007/10/10 09:48:03 darrenr Exp $ */ #ifndef __IP_FIL_H__ #define __IP_FIL_H__ #include "netinet/ip_compat.h" #include "netinet/ipf_rb.h" #if NETBSD_GE_REV(104040000) # include #endif #if defined(BSD) && defined(_KERNEL) # if NETBSD_LT_REV(399000000) || defined(__osf__) || FREEBSD_LT_REV(500043) # include # else # include # endif #endif #if !defined(linux) || !defined(_KERNEL) # include #endif #ifndef SOLARIS # define SOLARIS (defined(sun) && (defined(__svr4__) || defined(__SVR4))) #endif #ifndef __P # ifdef __STDC__ # define __P(x) x # else # define __P(x) () # endif #endif #if defined(__STDC__) || defined(__GNUC__) || defined(_AIX51) # define SIOCADAFR _IOW('r', 60, struct ipfobj) # define SIOCRMAFR _IOW('r', 61, struct ipfobj) # define SIOCSETFF _IOW('r', 62, u_int) # define SIOCGETFF _IOR('r', 63, u_int) # define SIOCGETFS _IOWR('r', 64, struct ipfobj) # define SIOCIPFFL _IOWR('r', 65, int) # define SIOCIPFFB _IOR('r', 66, int) # define SIOCADIFR _IOW('r', 67, struct ipfobj) # define SIOCRMIFR _IOW('r', 68, struct ipfobj) # define SIOCSWAPA _IOR('r', 69, u_int) # define SIOCINAFR _IOW('r', 70, struct ipfobj) # define SIOCINIFR _IOW('r', 71, struct ipfobj) # define SIOCFRENB _IOW('r', 72, u_int) # define SIOCFRSYN _IOW('r', 73, u_int) # define SIOCFRZST _IOWR('r', 74, struct ipfobj) # define SIOCZRLST _IOWR('r', 75, struct ipfobj) # define SIOCAUTHW _IOWR('r', 76, struct ipfobj) # define SIOCAUTHR _IOWR('r', 77, struct ipfobj) # define SIOCSTAT1 _IOWR('r', 78, struct ipfobj) # define SIOCSTLCK _IOWR('r', 79, u_int) # define SIOCSTPUT _IOWR('r', 80, struct ipfobj) # define SIOCSTGET _IOWR('r', 81, struct ipfobj) # define SIOCSTGSZ _IOWR('r', 82, struct ipfobj) # define SIOCSTAT2 _IOWR('r', 83, struct ipfobj) # define SIOCSETLG _IOWR('r', 84, int) # define SIOCGETLG _IOWR('r', 85, int) # define SIOCFUNCL _IOWR('r', 86, struct ipfunc_resolve) # define SIOCIPFGETNEXT _IOWR('r', 87, struct ipfobj) # define SIOCIPFGET _IOWR('r', 88, struct ipfobj) # define SIOCIPFSET _IOWR('r', 89, struct ipfobj) # define SIOCIPFL6 _IOWR('r', 90, int) # define SIOCIPFITER _IOWR('r', 91, struct ipfobj) # define SIOCGENITER _IOWR('r', 92, struct ipfobj) # define SIOCGTABL _IOWR('r', 93, struct ipfobj) # define SIOCIPFDELTOK _IOWR('r', 94, int) # define SIOCLOOKUPITER _IOWR('r', 95, struct ipfobj) # define SIOCGTQTAB _IOWR('r', 96, struct ipfobj) # define SIOCMATCHFLUSH _IOWR('r', 97, struct ipfobj) # define SIOCIPFINTERROR _IOR('r', 98, int) #else # define SIOCADAFR _IOW(r, 60, struct ipfobj) # define SIOCRMAFR _IOW(r, 61, struct ipfobj) # define SIOCSETFF _IOW(r, 62, u_int) # define SIOCGETFF _IOR(r, 63, u_int) # define SIOCGETFS _IOWR(r, 64, struct ipfobj) # define SIOCIPFFL _IOWR(r, 65, int) # define SIOCIPFFB _IOR(r, 66, int) # define SIOCADIFR _IOW(r, 67, struct ipfobj) # define SIOCRMIFR _IOW(r, 68, struct ipfobj) # define SIOCSWAPA _IOR(r, 69, u_int) # define SIOCINAFR _IOW(r, 70, struct ipfobj) # define SIOCINIFR _IOW(r, 71, struct ipfobj) # define SIOCFRENB _IOW(r, 72, u_int) # define SIOCFRSYN _IOW(r, 73, u_int) # define SIOCFRZST _IOWR(r, 74, struct ipfobj) # define SIOCZRLST _IOWR(r, 75, struct ipfobj) # define SIOCAUTHW _IOWR(r, 76, struct ipfobj) # define SIOCAUTHR _IOWR(r, 77, struct ipfobj) # define SIOCSTAT1 _IOWR(r, 78, struct ipfobj) # define SIOCSTLCK _IOWR(r, 79, u_int) # define SIOCSTPUT _IOWR(r, 80, struct ipfobj) # define SIOCSTGET _IOWR(r, 81, struct ipfobj) # define SIOCSTGSZ _IOWR(r, 82, struct ipfobj) # define SIOCSTAT2 _IOWR(r, 83, struct ipfobj) # define SIOCSETLG _IOWR(r, 84, int) # define SIOCGETLG _IOWR(r, 85, int) # define SIOCFUNCL _IOWR(r, 86, struct ipfunc_resolve) # define SIOCIPFGETNEXT _IOWR(r, 87, struct ipfobj) # define SIOCIPFGET _IOWR(r, 88, struct ipfobj) # define SIOCIPFSET _IOWR(r, 89, struct ipfobj) # define SIOCIPFL6 _IOWR(r, 90, int) # define SIOCIPFITER _IOWR(r, 91, struct ipfobj) # define SIOCGENITER _IOWR(r, 92, struct ipfobj) # define SIOCGTABL _IOWR(r, 93, struct ipfobj) # define SIOCIPFDELTOK _IOWR(r, 94, int) # define SIOCLOOKUPITER _IOWR(r, 95, struct ipfobj) # define SIOCGTQTAB _IOWR(r, 96, struct ipfobj) # define SIOCMATCHFLUSH _IOWR(r, 97, struct ipfobj) # define SIOCIPFINTERROR _IOR(r, 98, int) #endif #define SIOCADDFR SIOCADAFR #define SIOCDELFR SIOCRMAFR #define SIOCINSFR SIOCINAFR #define SIOCATHST SIOCSTAT1 #define SIOCGFRST SIOCSTAT2 struct ipscan; struct ifnet; struct ipf_main_softc_s; typedef int (* lookupfunc_t) __P((struct ipf_main_softc_s *, void *, int, void *, u_int)); /* * i6addr is used as a container for both IPv4 and IPv6 addresses, as well * as other types of objects, depending on its qualifier. */ #ifdef USE_INET6 typedef union i6addr { u_32_t i6[4]; struct in_addr in4; struct in6_addr in6; void *vptr[2]; lookupfunc_t lptr[2]; struct { u_short type; u_short subtype; int name; } i6un; } i6addr_t; #else typedef union i6addr { u_32_t i6[4]; struct in_addr in4; void *vptr[2]; lookupfunc_t lptr[2]; struct { u_short type; u_short subtype; int name; } i6un; } i6addr_t; #endif #define in4_addr in4.s_addr #define iplookupnum i6[1] #define iplookupname i6un.name #define iplookuptype i6un.type #define iplookupsubtype i6un.subtype /* * NOTE: These DO overlap the above on 64bit systems and this IS recognised. */ #define iplookupptr vptr[0] #define iplookupfunc lptr[1] #define I60(x) (((u_32_t *)(x))[0]) #define I61(x) (((u_32_t *)(x))[1]) #define I62(x) (((u_32_t *)(x))[2]) #define I63(x) (((u_32_t *)(x))[3]) #define HI60(x) ntohl(((u_32_t *)(x))[0]) #define HI61(x) ntohl(((u_32_t *)(x))[1]) #define HI62(x) ntohl(((u_32_t *)(x))[2]) #define HI63(x) ntohl(((u_32_t *)(x))[3]) #define IP6_EQ(a,b) ((I63(a) == I63(b)) && (I62(a) == I62(b)) && \ (I61(a) == I61(b)) && (I60(a) == I60(b))) #define IP6_NEQ(a,b) ((I63(a) != I63(b)) || (I62(a) != I62(b)) || \ (I61(a) != I61(b)) || (I60(a) != I60(b))) #define IP6_ISZERO(a) ((I60(a) | I61(a) | I62(a) | I63(a)) == 0) #define IP6_NOTZERO(a) ((I60(a) | I61(a) | I62(a) | I63(a)) != 0) #define IP6_ISONES(a) ((I63(a) == 0xffffffff) && (I62(a) == 0xffffffff) && \ (I61(a) == 0xffffffff) && (I60(a) == 0xffffffff)) #define IP6_GT(a,b) (ntohl(HI60(a)) > ntohl(HI60(b)) || \ (HI60(a) == HI60(b) && \ (ntohl(HI61(a)) > ntohl(HI61(b)) || \ (HI61(a) == HI61(b) && \ (ntohl(HI62(a)) > ntohl(HI62(b)) || \ (HI62(a) == HI62(b) && \ ntohl(HI63(a)) > ntohl(HI63(b)))))))) #define IP6_LT(a,b) (ntohl(HI60(a)) < ntohl(HI60(b)) || \ (HI60(a) == HI60(b) && \ (ntohl(HI61(a)) < ntohl(HI61(b)) || \ (HI61(a) == HI61(b) && \ (ntohl(HI62(a)) < ntohl(HI62(b)) || \ (HI62(a) == HI62(b) && \ ntohl(HI63(a)) < ntohl(HI63(b)))))))) #define NLADD(n,x) htonl(ntohl(n) + (x)) #define IP6_INC(a) \ do { u_32_t *_i6 = (u_32_t *)(a); \ _i6[3] = NLADD(_i6[3], 1); \ if (_i6[3] == 0) { \ _i6[2] = NLADD(_i6[2], 1); \ if (_i6[2] == 0) { \ _i6[1] = NLADD(_i6[1], 1); \ if (_i6[1] == 0) { \ _i6[0] = NLADD(_i6[0], 1); \ } \ } \ } \ } while (0) #define IP6_ADD(a,x,d) \ do { i6addr_t *_s = (i6addr_t *)(a); \ i6addr_t *_d = (i6addr_t *)(d); \ _d->i6[0] = NLADD(_s->i6[0], x); \ if (ntohl(_d->i6[0]) < ntohl(_s->i6[0])) { \ _d->i6[1] = NLADD(_d->i6[1], 1); \ if (ntohl(_d->i6[1]) < ntohl(_s->i6[1])) { \ _d->i6[2] = NLADD(_d->i6[2], 1); \ if (ntohl(_d->i6[2]) < ntohl(_s->i6[2])) { \ _d->i6[3] = NLADD(_d->i6[3], 1); \ } \ } \ } \ } while (0) #define IP6_AND(a,b,d) do { i6addr_t *_s1 = (i6addr_t *)(a); \ i6addr_t *_s2 = (i6addr_t *)(b); \ i6addr_t *_d = (i6addr_t *)(d); \ _d->i6[0] = _s1->i6[0] & _s2->i6[0]; \ _d->i6[1] = _s1->i6[1] & _s2->i6[1]; \ _d->i6[2] = _s1->i6[2] & _s2->i6[2]; \ _d->i6[3] = _s1->i6[3] & _s2->i6[3]; \ } while (0) #define IP6_ANDASSIGN(a,m) \ do { i6addr_t *_d = (i6addr_t *)(a); \ i6addr_t *_m = (i6addr_t *)(m); \ _d->i6[0] &= _m->i6[0]; \ _d->i6[1] &= _m->i6[1]; \ _d->i6[2] &= _m->i6[2]; \ _d->i6[3] &= _m->i6[3]; \ } while (0) #define IP6_MASKEQ(a,m,b) \ (((I60(a) & I60(m)) == I60(b)) && \ ((I61(a) & I61(m)) == I61(b)) && \ ((I62(a) & I62(m)) == I62(b)) && \ ((I63(a) & I63(m)) == I63(b))) #define IP6_MASKNEQ(a,m,b) \ (((I60(a) & I60(m)) != I60(b)) || \ ((I61(a) & I61(m)) != I61(b)) || \ ((I62(a) & I62(m)) != I62(b)) || \ ((I63(a) & I63(m)) != I63(b))) #define IP6_MERGE(a,b,c) \ do { i6addr_t *_d, *_s1, *_s2; \ _d = (i6addr_t *)(a); \ _s1 = (i6addr_t *)(b); \ _s2 = (i6addr_t *)(c); \ _d->i6[0] |= _s1->i6[0] & ~_s2->i6[0]; \ _d->i6[1] |= _s1->i6[1] & ~_s2->i6[1]; \ _d->i6[2] |= _s1->i6[2] & ~_s2->i6[2]; \ _d->i6[3] |= _s1->i6[3] & ~_s2->i6[3]; \ } while (0) #define IP6_MASK(a,b,c) \ do { i6addr_t *_d, *_s1, *_s2; \ _d = (i6addr_t *)(a); \ _s1 = (i6addr_t *)(b); \ _s2 = (i6addr_t *)(c); \ _d->i6[0] = _s1->i6[0] & ~_s2->i6[0]; \ _d->i6[1] = _s1->i6[1] & ~_s2->i6[1]; \ _d->i6[2] = _s1->i6[2] & ~_s2->i6[2]; \ _d->i6[3] = _s1->i6[3] & ~_s2->i6[3]; \ } while (0) #define IP6_SETONES(a) \ do { i6addr_t *_d = (i6addr_t *)(a); \ _d->i6[0] = 0xffffffff; \ _d->i6[1] = 0xffffffff; \ _d->i6[2] = 0xffffffff; \ _d->i6[3] = 0xffffffff; \ } while (0) typedef union ipso_u { u_short ipso_ripso[2]; u_32_t ipso_doi; } ipso_t; typedef struct fr_ip { u_32_t fi_v:4; /* IP version */ u_32_t fi_xx:4; /* spare */ u_32_t fi_tos:8; /* IP packet TOS */ u_32_t fi_ttl:8; /* IP packet TTL */ u_32_t fi_p:8; /* IP packet protocol */ u_32_t fi_optmsk; /* bitmask composed from IP options */ i6addr_t fi_src; /* source address from packet */ i6addr_t fi_dst; /* destination address from packet */ ipso_t fi_ipso; /* IP security options */ u_32_t fi_flx; /* packet flags */ u_32_t fi_tcpmsk; /* TCP options set/reset */ u_32_t fi_ports[2]; /* TCP ports */ u_char fi_tcpf; /* TCP flags */ u_char fi_sensitivity; u_char fi_xxx[2]; /* pad */ } fr_ip_t; /* * For use in fi_flx */ #define FI_TCPUDP 0x0001 /* TCP/UCP implied comparison*/ #define FI_OPTIONS 0x0002 #define FI_FRAG 0x0004 #define FI_SHORT 0x0008 #define FI_NATED 0x0010 #define FI_MULTICAST 0x0020 #define FI_BROADCAST 0x0040 #define FI_MBCAST 0x0080 #define FI_STATE 0x0100 #define FI_BADNAT 0x0200 #define FI_BAD 0x0400 #define FI_OOW 0x0800 /* Out of state window, else match */ #define FI_ICMPERR 0x1000 #define FI_FRAGBODY 0x2000 #define FI_BADSRC 0x4000 #define FI_LOWTTL 0x8000 #define FI_CMP 0x5cfe3 /* Not FI_FRAG,FI_NATED,FI_FRAGTAIL */ #define FI_ICMPCMP 0x0003 /* Flags we can check for ICMP error packets */ #define FI_WITH 0x5effe /* Not FI_TCPUDP */ #define FI_V6EXTHDR 0x10000 #define FI_COALESCE 0x20000 #define FI_NEWNAT 0x40000 #define FI_ICMPQUERY 0x80000 #define FI_ENCAP 0x100000 /* encap/decap with NAT */ #define FI_AH 0x200000 /* AH header present */ #define FI_DOCKSUM 0x10000000 /* Proxy wants L4 recalculation */ #define FI_NOCKSUM 0x20000000 /* don't do a L4 checksum validation */ #define FI_NOWILD 0x40000000 /* Do not do wildcard searches */ #define FI_IGNORE 0x80000000 #define fi_secmsk fi_ipso.ipso_ripso[0] #define fi_auth fi_ipso.ipso_ripso[1] #define fi_doi fi_ipso.ipso_doi #define fi_saddr fi_src.in4.s_addr #define fi_daddr fi_dst.in4.s_addr #define fi_srcnum fi_src.iplookupnum #define fi_dstnum fi_dst.iplookupnum #define fi_srcname fi_src.iplookupname #define fi_dstname fi_dst.iplookupname #define fi_srctype fi_src.iplookuptype #define fi_dsttype fi_dst.iplookuptype #define fi_srcsubtype fi_src.iplookupsubtype #define fi_dstsubtype fi_dst.iplookupsubtype #define fi_srcptr fi_src.iplookupptr #define fi_dstptr fi_dst.iplookupptr #define fi_srcfunc fi_src.iplookupfunc #define fi_dstfunc fi_dst.iplookupfunc /* * These are both used by the state and NAT code to indicate that one port or * the other should be treated as a wildcard. * NOTE: When updating, check bit masks in ip_state.h and update there too. */ #define SI_W_SPORT 0x00000100 #define SI_W_DPORT 0x00000200 #define SI_WILDP (SI_W_SPORT|SI_W_DPORT) #define SI_W_SADDR 0x00000400 #define SI_W_DADDR 0x00000800 #define SI_WILDA (SI_W_SADDR|SI_W_DADDR) #define SI_NEWFR 0x00001000 #define SI_CLONE 0x00002000 #define SI_CLONED 0x00004000 #define SI_NEWCLONE 0x00008000 typedef struct { u_short fda_ports[2]; u_char fda_tcpf; /* TCP header flags (SYN, ACK, etc) */ } frdat_t; typedef enum fr_breasons_e { FRB_BLOCKED = 0, FRB_LOGFAIL = 1, FRB_PPSRATE = 2, FRB_JUMBO = 3, FRB_MAKEFRIP = 4, FRB_STATEADD = 5, FRB_UPDATEIPID = 6, FRB_LOGFAIL2 = 7, FRB_DECAPFRIP = 8, FRB_AUTHNEW = 9, FRB_AUTHCAPTURE = 10, FRB_COALESCE = 11, FRB_PULLUP = 12, FRB_AUTHFEEDBACK = 13, FRB_BADFRAG = 14, FRB_NATV4 = 15, FRB_NATV6 = 16, } fr_breason_t; #define FRB_MAX_VALUE 16 typedef enum ipf_cksum_e { FI_CK_BAD = -1, FI_CK_NEEDED = 0, FI_CK_SUMOK = 1, FI_CK_L4PART = 2, FI_CK_L4FULL = 4 } ipf_cksum_t; typedef struct fr_info { void *fin_main_soft; void *fin_ifp; /* interface packet is `on' */ struct frentry *fin_fr; /* last matching rule */ int fin_out; /* in or out ? 1 == out, 0 == in */ fr_ip_t fin_fi; /* IP Packet summary */ frdat_t fin_dat; /* TCP/UDP ports, ICMP code/type */ int fin_dlen; /* length of data portion of packet */ int fin_plen; u_32_t fin_rule; /* rule # last matched */ u_short fin_hlen; /* length of IP header in bytes */ char fin_group[FR_GROUPLEN]; /* group number, -1 for none */ void *fin_dp; /* start of data past IP header */ /* * Fields after fin_dp aren't used for compression of log records. * fin_fi contains the IP version (fin_family) * fin_rule isn't included because adding a new rule can change it but * not change fin_fr. fin_rule is the rule number reported. * It isn't necessary to include fin_crc because that is checked * for explicitly, before calling bcmp. */ u_32_t fin_crc; /* Simple calculation for logging */ int fin_family; /* AF_INET, etc. */ int fin_icode; /* ICMP error to return */ int fin_mtu; /* MTU input for ICMP need-frag */ int fin_rev; /* state only: 1 = reverse */ int fin_ipoff; /* # bytes from buffer start to hdr */ u_32_t fin_id; /* IP packet id field */ u_short fin_l4hlen; /* length of L4 header, if known */ u_short fin_off; int fin_depth; /* Group nesting depth */ int fin_error; /* Error code to return */ ipf_cksum_t fin_cksum; /* -1 = bad, 1 = good, 0 = not done */ fr_breason_t fin_reason; /* why auto blocked */ u_int fin_pktnum; void *fin_nattag; struct frdest *fin_dif; struct frdest *fin_tif; union { ip_t *fip_ip; #ifdef USE_INET6 ip6_t *fip_ip6; #endif } fin_ipu; mb_t **fin_mp; /* pointer to pointer to mbuf */ mb_t *fin_m; /* pointer to mbuf */ #ifdef MENTAT mb_t *fin_qfm; /* pointer to mblk where pkt starts */ void *fin_qpi; char fin_ifname[LIFNAMSIZ]; #endif #ifdef __sgi void *fin_hbuf; #endif void *fin_fraghdr; /* pointer to start of ipv6 frag hdr */ } fr_info_t; #define fin_ip fin_ipu.fip_ip #define fin_ip6 fin_ipu.fip_ip6 #define fin_v fin_fi.fi_v #define fin_p fin_fi.fi_p #define fin_flx fin_fi.fi_flx #define fin_optmsk fin_fi.fi_optmsk #define fin_secmsk fin_fi.fi_secmsk #define fin_doi fin_fi.fi_doi #define fin_auth fin_fi.fi_auth #define fin_src fin_fi.fi_src.in4 #define fin_saddr fin_fi.fi_saddr #define fin_dst fin_fi.fi_dst.in4 #define fin_daddr fin_fi.fi_daddr #define fin_data fin_fi.fi_ports #define fin_sport fin_fi.fi_ports[0] #define fin_dport fin_fi.fi_ports[1] #define fin_tcpf fin_fi.fi_tcpf #define fin_src6 fin_fi.fi_src #define fin_dst6 fin_fi.fi_dst #define fin_srcip6 fin_fi.fi_src.in6 #define fin_dstip6 fin_fi.fi_dst.in6 #define IPF_IN 0 #define IPF_OUT 1 typedef struct frentry *(*ipfunc_t) __P((fr_info_t *, u_32_t *)); typedef int (*ipfuncinit_t) __P((struct ipf_main_softc_s *, struct frentry *)); typedef struct ipfunc_resolve { char ipfu_name[32]; ipfunc_t ipfu_addr; ipfuncinit_t ipfu_init; ipfuncinit_t ipfu_fini; } ipfunc_resolve_t; /* * Size for compares on fr_info structures */ #define FI_CSIZE offsetof(fr_info_t, fin_icode) #define FI_LCSIZE offsetof(fr_info_t, fin_dp) /* * Size for copying cache fr_info structure */ #define FI_COPYSIZE offsetof(fr_info_t, fin_dp) /* * Structure for holding IPFilter's tag information */ #define IPFTAG_LEN 16 typedef struct { union { u_32_t iptu_num[4]; char iptu_tag[IPFTAG_LEN]; } ipt_un; int ipt_not; } ipftag_t; #define ipt_tag ipt_un.iptu_tag #define ipt_num ipt_un.iptu_num /* * Structure to define address for pool lookups. */ typedef struct { u_char adf_len; sa_family_t adf_family; u_char adf_xxx[2]; i6addr_t adf_addr; } addrfamily_t; RBI_LINK(ipf_rb, host_node_s); typedef struct host_node_s { RBI_FIELD(ipf_rb) hn_entry; addrfamily_t hn_addr; int hn_active; } host_node_t; typedef RBI_HEAD(ipf_rb, host_node_s) ipf_rb_head_t; typedef struct host_track_s { ipf_rb_head_t ht_root; int ht_max_nodes; int ht_max_per_node; int ht_netmask; int ht_cur_nodes; } host_track_t; typedef enum fr_dtypes_e { FRD_NORMAL = 0, FRD_DSTLIST } fr_dtypes_t; /* * This structure is used to hold information about the next hop for where * to forward a packet. */ typedef struct frdest { void *fd_ptr; addrfamily_t fd_addr; fr_dtypes_t fd_type; int fd_name; int fd_local; } frdest_t; #define fd_ip6 fd_addr.adf_addr #define fd_ip fd_ip6.in4 typedef enum fr_ctypes_e { FR_NONE = 0, FR_EQUAL, FR_NEQUAL, FR_LESST, FR_GREATERT, FR_LESSTE, FR_GREATERTE, FR_OUTRANGE, FR_INRANGE, FR_INCRANGE } fr_ctypes_t; /* * This structure holds information about a port comparison. */ typedef struct frpcmp { fr_ctypes_t frp_cmp; /* data for port comparisons */ u_32_t frp_port; /* top port for <> and >< */ u_32_t frp_top; /* top port for <> and >< */ } frpcmp_t; /* * Structure containing all the relevant TCP things that can be checked in * a filter rule. */ typedef struct frtuc { u_char ftu_tcpfm; /* tcp flags mask */ u_char ftu_tcpf; /* tcp flags */ frpcmp_t ftu_src; frpcmp_t ftu_dst; } frtuc_t; #define ftu_scmp ftu_src.frp_cmp #define ftu_dcmp ftu_dst.frp_cmp #define ftu_sport ftu_src.frp_port #define ftu_dport ftu_dst.frp_port #define ftu_stop ftu_src.frp_top #define ftu_dtop ftu_dst.frp_top #define FR_TCPFMAX 0x3f typedef enum fr_atypes_e { FRI_NONE = -1, /* For LHS of NAT */ FRI_NORMAL = 0, /* Normal address */ FRI_DYNAMIC, /* dynamic address */ FRI_LOOKUP, /* address is a pool # */ FRI_RANGE, /* address/mask is a range */ FRI_NETWORK, /* network address from if */ FRI_BROADCAST, /* broadcast address from if */ FRI_PEERADDR, /* Peer address for P-to-P */ FRI_NETMASKED, /* network address with netmask from if */ FRI_SPLIT, /* For NAT compatibility */ FRI_INTERFACE /* address is based on interface name */ } fr_atypes_t; /* * This structure makes up what is considered to be the IPFilter specific * matching components of a filter rule, as opposed to the data structures * used to define the result which are in frentry_t and not here. */ typedef struct fripf { fr_ip_t fri_ip; fr_ip_t fri_mip; /* mask structure */ u_short fri_icmpm; /* data for ICMP packets (mask) */ u_short fri_icmp; frtuc_t fri_tuc; fr_atypes_t fri_satype; /* addres type */ fr_atypes_t fri_datype; /* addres type */ int fri_sifpidx; /* doing dynamic addressing */ int fri_difpidx; /* index into fr_ifps[] to use when */ } fripf_t; #define fri_dlookup fri_mip.fi_dst #define fri_slookup fri_mip.fi_src #define fri_dstnum fri_mip.fi_dstnum #define fri_srcnum fri_mip.fi_srcnum #define fri_dstname fri_mip.fi_dstname #define fri_srcname fri_mip.fi_srcname #define fri_dstptr fri_mip.fi_dstptr #define fri_srcptr fri_mip.fi_srcptr typedef enum fr_rtypes_e { FR_T_NONE = 0, FR_T_IPF, /* IPF structures */ FR_T_BPFOPC, /* BPF opcode */ FR_T_CALLFUNC, /* callout to function in fr_func only */ FR_T_COMPIPF, /* compiled C code */ FR_T_IPFEXPR, /* IPF expression */ FR_T_BUILTIN = 0x40000000, /* rule is in kernel space */ FR_T_IPF_BUILTIN, FR_T_BPFOPC_BUILTIN, FR_T_CALLFUNC_BUILTIN, FR_T_COMPIPF_BUILTIN, FR_T_IPFEXPR_BUILTIN } fr_rtypes_t; typedef struct frentry * (* frentfunc_t) __P((fr_info_t *)); typedef struct frentry { ipfmutex_t fr_lock; struct frentry *fr_next; struct frentry **fr_pnext; struct frgroup *fr_grp; struct frgroup *fr_grphead; struct frgroup *fr_icmpgrp; struct ipscan *fr_isc; struct frentry *fr_dnext; /* 2 fr_die linked list pointers */ struct frentry **fr_pdnext; void *fr_ifas[4]; void *fr_ptr; /* for use with fr_arg */ int fr_comment; /* text comment for rule */ int fr_size; /* size of this structure */ int fr_ref; /* reference count */ int fr_statecnt; /* state count - for limit rules */ u_32_t fr_die; /* only used on loading the rule */ u_int fr_cksum; /* checksum on filter rules for performance */ /* * The line number from a file is here because we need to be able to * match the rule generated with ``grep rule ipf.conf | ipf -rf -'' * with the rule loaded using ``ipf -f ipf.conf'' - thus it can't be * on the other side of fr_func. */ int fr_flineno; /* line number from conf file */ /* * These are only incremented when a packet matches this rule and * it is the last match */ U_QUAD_T fr_hits; U_QUAD_T fr_bytes; /* * For PPS rate limiting * fr_lpu is used to always have the same size for this field, * allocating 64bits for seconds and 32bits for milliseconds. */ union { struct timeval frp_lastpkt; char frp_bytes[12]; } fr_lpu; int fr_curpps; union { void *fru_data; char *fru_caddr; fripf_t *fru_ipf; frentfunc_t fru_func; } fr_dun; /* * Fields after this may not change whilst in the kernel. */ ipfunc_t fr_func; /* call this function */ int fr_dsize; int fr_pps; fr_rtypes_t fr_type; u_32_t fr_flags; /* per-rule flags && options (see below) */ u_32_t fr_logtag; /* user defined log tag # */ u_32_t fr_collect; /* collection number */ u_int fr_arg; /* misc. numeric arg for rule */ u_int fr_loglevel; /* syslog log facility + priority */ u_char fr_family; u_char fr_icode; /* return ICMP code */ int fr_group; /* group to which this rule belongs */ int fr_grhead; /* group # which this rule starts */ int fr_ifnames[4]; int fr_isctag; int fr_rpc; /* XID Filtering */ ipftag_t fr_nattag; frdest_t fr_tifs[2]; /* "to"/"reply-to" interface */ frdest_t fr_dif; /* duplicate packet interface */ /* * These are all options related to stateful filtering */ host_track_t fr_srctrack; int fr_nostatelog; int fr_statemax; /* max reference count */ int fr_icmphead; /* ICMP group for state options */ u_int fr_age[2]; /* non-TCP state timeouts */ /* * How big is the name buffer at the end? */ int fr_namelen; char fr_names[1]; } frentry_t; #define fr_lastpkt fr_lpu.frp_lastpkt #define fr_caddr fr_dun.fru_caddr #define fr_data fr_dun.fru_data #define fr_dfunc fr_dun.fru_func #define fr_ipf fr_dun.fru_ipf #define fr_ip fr_ipf->fri_ip #define fr_mip fr_ipf->fri_mip #define fr_icmpm fr_ipf->fri_icmpm #define fr_icmp fr_ipf->fri_icmp #define fr_tuc fr_ipf->fri_tuc #define fr_satype fr_ipf->fri_satype #define fr_datype fr_ipf->fri_datype #define fr_sifpidx fr_ipf->fri_sifpidx #define fr_difpidx fr_ipf->fri_difpidx #define fr_proto fr_ip.fi_p #define fr_mproto fr_mip.fi_p #define fr_ttl fr_ip.fi_ttl #define fr_mttl fr_mip.fi_ttl #define fr_tos fr_ip.fi_tos #define fr_mtos fr_mip.fi_tos #define fr_tcpfm fr_tuc.ftu_tcpfm #define fr_tcpf fr_tuc.ftu_tcpf #define fr_scmp fr_tuc.ftu_scmp #define fr_dcmp fr_tuc.ftu_dcmp #define fr_dport fr_tuc.ftu_dport #define fr_sport fr_tuc.ftu_sport #define fr_stop fr_tuc.ftu_stop #define fr_dtop fr_tuc.ftu_dtop #define fr_dst fr_ip.fi_dst.in4 #define fr_dst6 fr_ip.fi_dst #define fr_daddr fr_ip.fi_dst.in4.s_addr #define fr_src fr_ip.fi_src.in4 #define fr_src6 fr_ip.fi_src #define fr_saddr fr_ip.fi_src.in4.s_addr #define fr_dmsk fr_mip.fi_dst.in4 #define fr_dmsk6 fr_mip.fi_dst #define fr_dmask fr_mip.fi_dst.in4.s_addr #define fr_smsk fr_mip.fi_src.in4 #define fr_smsk6 fr_mip.fi_src #define fr_smask fr_mip.fi_src.in4.s_addr #define fr_dstnum fr_ip.fi_dstnum #define fr_srcnum fr_ip.fi_srcnum #define fr_dlookup fr_ip.fi_dst #define fr_slookup fr_ip.fi_src #define fr_dstname fr_ip.fi_dstname #define fr_srcname fr_ip.fi_srcname #define fr_dsttype fr_ip.fi_dsttype #define fr_srctype fr_ip.fi_srctype #define fr_dstsubtype fr_ip.fi_dstsubtype #define fr_srcsubtype fr_ip.fi_srcsubtype #define fr_dstptr fr_mip.fi_dstptr #define fr_srcptr fr_mip.fi_srcptr #define fr_dstfunc fr_mip.fi_dstfunc #define fr_srcfunc fr_mip.fi_srcfunc #define fr_optbits fr_ip.fi_optmsk #define fr_optmask fr_mip.fi_optmsk #define fr_secbits fr_ip.fi_secmsk #define fr_secmask fr_mip.fi_secmsk #define fr_authbits fr_ip.fi_auth #define fr_authmask fr_mip.fi_auth #define fr_doi fr_ip.fi_doi #define fr_doimask fr_mip.fi_doi #define fr_flx fr_ip.fi_flx #define fr_mflx fr_mip.fi_flx #define fr_ifa fr_ifas[0] #define fr_oifa fr_ifas[2] #define fr_tif fr_tifs[0] #define fr_rif fr_tifs[1] #define FR_NOLOGTAG 0 #define FR_CMPSIZ (sizeof(struct frentry) - \ offsetof(struct frentry, fr_func)) #define FR_NAME(_f, _n) (_f)->fr_names + (_f)->_n /* * fr_flags */ #define FR_BLOCK 0x00001 /* do not allow packet to pass */ #define FR_PASS 0x00002 /* allow packet to pass */ #define FR_AUTH 0x00003 /* use authentication */ #define FR_PREAUTH 0x00004 /* require preauthentication */ #define FR_ACCOUNT 0x00005 /* Accounting rule */ #define FR_SKIP 0x00006 /* skip rule */ #define FR_DECAPSULATE 0x00008 /* decapsulate rule */ #define FR_CALL 0x00009 /* call rule */ #define FR_CMDMASK 0x0000f #define FR_LOG 0x00010 /* Log */ #define FR_LOGB 0x00011 /* Log-fail */ #define FR_LOGP 0x00012 /* Log-pass */ #define FR_LOGMASK (FR_LOG|FR_CMDMASK) #define FR_CALLNOW 0x00020 /* call another function (fr_func) if matches */ #define FR_NOTSRCIP 0x00040 #define FR_NOTDSTIP 0x00080 #define FR_QUICK 0x00100 /* match & stop processing list */ #define FR_KEEPFRAG 0x00200 /* keep fragment information */ #define FR_KEEPSTATE 0x00400 /* keep `connection' state information */ #define FR_FASTROUTE 0x00800 /* bypass normal routing */ #define FR_RETRST 0x01000 /* Return TCP RST packet - reset connection */ #define FR_RETICMP 0x02000 /* Return ICMP unreachable packet */ #define FR_FAKEICMP 0x03000 /* Return ICMP unreachable with fake source */ #define FR_OUTQUE 0x04000 /* outgoing packets */ #define FR_INQUE 0x08000 /* ingoing packets */ #define FR_LOGBODY 0x10000 /* Log the body */ #define FR_LOGFIRST 0x20000 /* Log the first byte if state held */ #define FR_LOGORBLOCK 0x40000 /* block the packet if it can't be logged */ #define FR_STLOOSE 0x80000 /* loose state checking */ #define FR_FRSTRICT 0x100000 /* strict frag. cache */ #define FR_STSTRICT 0x200000 /* strict keep state */ #define FR_NEWISN 0x400000 /* new ISN for outgoing TCP */ #define FR_NOICMPERR 0x800000 /* do not match ICMP errors in state */ #define FR_STATESYNC 0x1000000 /* synchronize state to slave */ #define FR_COPIED 0x2000000 /* copied from user space */ #define FR_INACTIVE 0x4000000 /* only used when flush'ing rules */ #define FR_NOMATCH 0x8000000 /* no match occured */ /* 0x10000000 FF_LOGPASS */ /* 0x20000000 FF_LOGBLOCK */ /* 0x40000000 FF_LOGNOMATCH */ /* 0x80000000 FF_BLOCKNONIP */ #define FR_RETMASK (FR_RETICMP|FR_RETRST|FR_FAKEICMP) #define FR_ISBLOCK(x) (((x) & FR_CMDMASK) == FR_BLOCK) #define FR_ISPASS(x) (((x) & FR_CMDMASK) == FR_PASS) #define FR_ISAUTH(x) (((x) & FR_CMDMASK) == FR_AUTH) #define FR_ISPREAUTH(x) (((x) & FR_CMDMASK) == FR_PREAUTH) #define FR_ISACCOUNT(x) (((x) & FR_CMDMASK) == FR_ACCOUNT) #define FR_ISSKIP(x) (((x) & FR_CMDMASK) == FR_SKIP) #define FR_ISDECAPS(x) (((x) & FR_CMDMASK) == FR_DECAPSULATE) #define FR_ISNOMATCH(x) ((x) & FR_NOMATCH) #define FR_INOUT (FR_INQUE|FR_OUTQUE) /* * recognized flags for SIOCGETFF and SIOCSETFF, and get put in fr_flags */ #define FF_LOGPASS 0x10000000 #define FF_LOGBLOCK 0x20000000 #define FF_LOGNOMATCH 0x40000000 #define FF_LOGGING (FF_LOGPASS|FF_LOGBLOCK|FF_LOGNOMATCH) #define FF_BLOCKNONIP 0x80000000 /* Solaris2 Only */ /* * Structure that passes information on what/how to flush to the kernel. */ typedef struct ipfflush { int ipflu_how; int ipflu_arg; } ipfflush_t; /* * */ typedef struct ipfgetctl { u_int ipfg_min; /* min value */ u_int ipfg_current; /* current value */ u_int ipfg_max; /* max value */ u_int ipfg_default; /* default value */ u_int ipfg_steps; /* value increments */ char ipfg_name[40]; /* tag name for this control */ } ipfgetctl_t; typedef struct ipfsetctl { int ipfs_which; /* 0 = min 1 = current 2 = max 3 = default */ u_int ipfs_value; /* min value */ char ipfs_name[40]; /* tag name for this control */ } ipfsetctl_t; /* * Some of the statistics below are in their own counters, but most are kept * in this single structure so that they can all easily be collected and * copied back as required. */ typedef struct ipf_statistics { u_long fr_icmp_coalesce; u_long fr_tcp_frag; u_long fr_tcp_pullup; u_long fr_tcp_short; u_long fr_tcp_small; u_long fr_tcp_bad_flags; u_long fr_udp_pullup; u_long fr_ip_freed; u_long fr_v6_ah_bad; u_long fr_v6_bad; u_long fr_v6_badfrag; u_long fr_v6_dst_bad; u_long fr_v6_esp_pullup; u_long fr_v6_ext_short; u_long fr_v6_ext_pullup; u_long fr_v6_ext_hlen; u_long fr_v6_frag_bad; u_long fr_v6_frag_pullup; u_long fr_v6_frag_size; u_long fr_v6_gre_pullup; u_long fr_v6_icmp6_pullup; u_long fr_v6_rh_bad; u_long fr_v6_badttl; /* TTL in packet doesn't reach minimum */ u_long fr_v4_ah_bad; u_long fr_v4_ah_pullup; u_long fr_v4_esp_pullup; u_long fr_v4_cipso_bad; u_long fr_v4_cipso_tlen; u_long fr_v4_gre_frag; u_long fr_v4_gre_pullup; u_long fr_v4_icmp_frag; u_long fr_v4_icmp_pullup; u_long fr_v4_badttl; /* TTL in packet doesn't reach minimum */ u_long fr_v4_badsrc; /* source received doesn't match route */ u_long fr_l4_badcksum; /* layer 4 header checksum failure */ u_long fr_badcoalesces; u_long fr_pass; /* packets allowed */ u_long fr_block; /* packets denied */ u_long fr_nom; /* packets which don't match any rule */ u_long fr_short; /* packets which are short */ u_long fr_ppkl; /* packets allowed and logged */ u_long fr_bpkl; /* packets denied and logged */ u_long fr_npkl; /* packets unmatched and logged */ u_long fr_ret; /* packets for which a return is sent */ u_long fr_acct; /* packets for which counting was performed */ u_long fr_bnfr; /* bad attempts to allocate fragment state */ u_long fr_nfr; /* new fragment state kept */ u_long fr_cfr; /* add new fragment state but complete pkt */ u_long fr_bads; /* bad attempts to allocate packet state */ u_long fr_ads; /* new packet state kept */ u_long fr_chit; /* cached hit */ u_long fr_cmiss; /* cached miss */ u_long fr_tcpbad; /* TCP checksum check failures */ u_long fr_pull[2]; /* good and bad pullup attempts */ u_long fr_bad; /* bad IP packets to the filter */ u_long fr_ipv6; /* IPv6 packets in/out */ u_long fr_ppshit; /* dropped because of pps ceiling */ u_long fr_ipud; /* IP id update failures */ u_long fr_blocked[FRB_MAX_VALUE + 1]; } ipf_statistics_t; /* * Log structure. Each packet header logged is prepended by one of these. * Following this in the log records read from the device will be an ipflog * structure which is then followed by any packet data. */ typedef struct iplog { u_32_t ipl_magic; u_int ipl_count; u_32_t ipl_seqnum; struct timeval ipl_time; size_t ipl_dsize; struct iplog *ipl_next; } iplog_t; #define ipl_sec ipl_time.tv_sec #define ipl_usec ipl_time.tv_usec #define IPL_MAGIC 0x49504c4d /* 'IPLM' */ #define IPL_MAGIC_NAT 0x49504c4e /* 'IPLN' */ #define IPL_MAGIC_STATE 0x49504c53 /* 'IPLS' */ #define IPLOG_SIZE sizeof(iplog_t) typedef struct ipflog { #if (defined(NetBSD) && (NetBSD <= 1991011) && (NetBSD >= 199603)) || \ (defined(OpenBSD) && (OpenBSD >= 199603)) #else u_int fl_unit; #endif u_32_t fl_rule; u_32_t fl_flags; u_32_t fl_lflags; u_32_t fl_logtag; ipftag_t fl_nattag; u_short fl_plen; /* extra data after hlen */ u_short fl_loglevel; /* syslog log level */ char fl_group[FR_GROUPLEN]; u_char fl_hlen; /* length of IP headers saved */ u_char fl_dir; u_char fl_breason; /* from fin_reason */ u_char fl_family; /* address family of packet logged */ char fl_ifname[LIFNAMSIZ]; } ipflog_t; #ifndef IPF_LOGGING # define IPF_LOGGING 0 #endif #ifndef IPF_DEFAULT_PASS # define IPF_DEFAULT_PASS FR_PASS #endif #define DEFAULT_IPFLOGSIZE 32768 #ifndef IPFILTER_LOGSIZE # define IPFILTER_LOGSIZE DEFAULT_IPFLOGSIZE #else # if IPFILTER_LOGSIZE < 8192 # error IPFILTER_LOGSIZE too small. Must be >= 8192 # endif #endif #define IPF_OPTCOPY 0x07ff00 /* bit mask of copied options */ /* * Device filenames for reading log information. Use ipf on Solaris2 because * ipl is already a name used by something else. */ #ifndef IPL_NAME # if SOLARIS # define IPL_NAME "/dev/ipf" # else # define IPL_NAME "/dev/ipl" # endif #endif /* * Pathnames for various IP Filter control devices. Used by LKM * and userland, so defined here. */ #define IPNAT_NAME "/dev/ipnat" #define IPSTATE_NAME "/dev/ipstate" #define IPAUTH_NAME "/dev/ipauth" #define IPSYNC_NAME "/dev/ipsync" #define IPSCAN_NAME "/dev/ipscan" #define IPLOOKUP_NAME "/dev/iplookup" #define IPL_LOGIPF 0 /* Minor device #'s for accessing logs */ #define IPL_LOGNAT 1 #define IPL_LOGSTATE 2 #define IPL_LOGAUTH 3 #define IPL_LOGSYNC 4 #define IPL_LOGSCAN 5 #define IPL_LOGLOOKUP 6 #define IPL_LOGCOUNT 7 #define IPL_LOGMAX 7 #define IPL_LOGSIZE IPL_LOGMAX + 1 #define IPL_LOGALL -1 #define IPL_LOGNONE -2 /* * For SIOCGETFS */ typedef struct friostat { ipf_statistics_t f_st[2]; frentry_t *f_ipf[2][2]; frentry_t *f_acct[2][2]; frentry_t *f_auth; struct frgroup *f_groups[IPL_LOGSIZE][2]; u_long f_froute[2]; u_long f_log_ok; u_long f_log_fail; u_long f_rb_no_mem; u_long f_rb_node_max; u_32_t f_ticks; int f_locks[IPL_LOGSIZE]; int f_defpass; /* default pass - from fr_pass */ int f_active; /* 1 or 0 - active rule set */ int f_running; /* 1 if running, else 0 */ int f_logging; /* 1 if enabled, else 0 */ int f_features; char f_version[32]; /* version string */ } friostat_t; #define f_fin f_ipf[0] #define f_fout f_ipf[1] #define f_acctin f_acct[0] #define f_acctout f_acct[1] #define IPF_FEAT_LKM 0x001 #define IPF_FEAT_LOG 0x002 #define IPF_FEAT_LOOKUP 0x004 #define IPF_FEAT_BPF 0x008 #define IPF_FEAT_COMPILED 0x010 #define IPF_FEAT_CKSUM 0x020 #define IPF_FEAT_SYNC 0x040 #define IPF_FEAT_SCAN 0x080 #define IPF_FEAT_IPV6 0x100 typedef struct optlist { u_short ol_val; int ol_bit; } optlist_t; /* * Group list structure. */ typedef struct frgroup { struct frgroup *fg_next; struct frentry *fg_head; struct frentry *fg_start; struct frgroup **fg_set; u_32_t fg_flags; int fg_ref; char fg_name[FR_GROUPLEN]; } frgroup_t; #define FG_NAME(g) (*(g)->fg_name == '\0' ? "" : (g)->fg_name) /* * Used by state and NAT tables */ typedef struct icmpinfo { u_short ici_id; u_short ici_seq; u_char ici_type; } icmpinfo_t; typedef struct udpinfo { u_short us_sport; u_short us_dport; } udpinfo_t; typedef struct tcpdata { u_32_t td_end; u_32_t td_maxend; u_32_t td_maxwin; u_32_t td_winscale; u_32_t td_maxseg; int td_winflags; } tcpdata_t; #define TCP_WSCALE_MAX 14 #define TCP_WSCALE_SEEN 0x00000001 #define TCP_WSCALE_FIRST 0x00000002 #define TCP_SACK_PERMIT 0x00000004 typedef struct tcpinfo { u_32_t ts_sport; u_32_t ts_dport; tcpdata_t ts_data[2]; } tcpinfo_t; /* * Structures to define a GRE header as seen in a packet. */ struct grebits { #if defined(sparc) u_32_t grb_ver:3; u_32_t grb_flags:3; u_32_t grb_A:1; u_32_t grb_recur:1; u_32_t grb_s:1; u_32_t grb_S:1; u_32_t grb_K:1; u_32_t grb_R:1; u_32_t grb_C:1; #else u_32_t grb_C:1; u_32_t grb_R:1; u_32_t grb_K:1; u_32_t grb_S:1; u_32_t grb_s:1; u_32_t grb_recur:1; u_32_t grb_A:1; u_32_t grb_flags:3; u_32_t grb_ver:3; #endif u_short grb_ptype; }; typedef struct grehdr { union { struct grebits gru_bits; u_short gru_flags; } gr_un; u_short gr_len; u_short gr_call; } grehdr_t; #define gr_flags gr_un.gru_flags #define gr_bits gr_un.gru_bits #define gr_ptype gr_bits.grb_ptype #define gr_C gr_bits.grb_C #define gr_R gr_bits.grb_R #define gr_K gr_bits.grb_K #define gr_S gr_bits.grb_S #define gr_s gr_bits.grb_s #define gr_recur gr_bits.grb_recur #define gr_A gr_bits.grb_A #define gr_ver gr_bits.grb_ver /* * GRE information tracked by "keep state" */ typedef struct greinfo { u_short gs_call[2]; u_short gs_flags; u_short gs_ptype; } greinfo_t; #define GRE_REV(x) ((ntohs(x) >> 13) & 7) /* * Format of an Authentication header */ typedef struct authhdr { u_char ah_next; u_char ah_plen; u_short ah_reserved; u_32_t ah_spi; u_32_t ah_seq; /* Following the sequence number field is 0 or more bytes of */ /* authentication data, as specified by ah_plen - RFC 2402. */ } authhdr_t; /* * Timeout tail queue list member */ typedef struct ipftqent { struct ipftqent **tqe_pnext; struct ipftqent *tqe_next; struct ipftq *tqe_ifq; void *tqe_parent; /* pointer back to NAT/state struct */ u_32_t tqe_die; /* when this entriy is to die */ u_32_t tqe_touched; int tqe_flags; int tqe_state[2]; /* current state of this entry */ } ipftqent_t; #define TQE_RULEBASED 0x00000001 #define TQE_DELETE 0x00000002 /* * Timeout tail queue head for IPFilter */ typedef struct ipftq { ipfmutex_t ifq_lock; u_int ifq_ttl; ipftqent_t *ifq_head; ipftqent_t **ifq_tail; struct ipftq *ifq_next; struct ipftq **ifq_pnext; int ifq_ref; u_int ifq_flags; } ipftq_t; #define IFQF_USER 0x01 /* User defined aging */ #define IFQF_DELETE 0x02 /* Marked for deletion */ #define IFQF_PROXY 0x04 /* Timeout queue in use by a proxy */ #define IPFTQ_INIT(x,y,z) do { \ (x)->ifq_ttl = (y); \ (x)->ifq_head = NULL; \ (x)->ifq_ref = 1; \ (x)->ifq_tail = &(x)->ifq_head; \ MUTEX_INIT(&(x)->ifq_lock, (z)); \ } while (0) #define IPF_HZ_MULT 1 #define IPF_HZ_DIVIDE 2 /* How many times a second ipfilter */ /* checks its timeout queues. */ #define IPF_TTLVAL(x) (((x) / IPF_HZ_MULT) * IPF_HZ_DIVIDE) typedef int (*ipftq_delete_fn_t)(struct ipf_main_softc_s *, void *); /* * Object structure description. For passing through in ioctls. */ typedef struct ipfobj { u_32_t ipfo_rev; /* IPFilter version number */ u_32_t ipfo_size; /* size of object at ipfo_ptr */ void *ipfo_ptr; /* pointer to object */ int ipfo_type; /* type of object being pointed to */ int ipfo_offset; /* bytes from ipfo_ptr where to start */ int ipfo_retval; /* return value */ u_char ipfo_xxxpad[28]; /* reserved for future use */ } ipfobj_t; #define IPFOBJ_FRENTRY 0 /* struct frentry */ #define IPFOBJ_IPFSTAT 1 /* struct friostat */ #define IPFOBJ_IPFINFO 2 /* struct fr_info */ #define IPFOBJ_AUTHSTAT 3 /* struct fr_authstat */ #define IPFOBJ_FRAGSTAT 4 /* struct ipfrstat */ #define IPFOBJ_IPNAT 5 /* struct ipnat */ #define IPFOBJ_NATSTAT 6 /* struct natstat */ #define IPFOBJ_STATESAVE 7 /* struct ipstate_save */ #define IPFOBJ_NATSAVE 8 /* struct nat_save */ #define IPFOBJ_NATLOOKUP 9 /* struct natlookup */ #define IPFOBJ_IPSTATE 10 /* struct ipstate */ #define IPFOBJ_STATESTAT 11 /* struct ips_stat */ #define IPFOBJ_FRAUTH 12 /* struct frauth */ #define IPFOBJ_TUNEABLE 13 /* struct ipftune */ #define IPFOBJ_NAT 14 /* struct nat */ #define IPFOBJ_IPFITER 15 /* struct ipfruleiter */ #define IPFOBJ_GENITER 16 /* struct ipfgeniter */ #define IPFOBJ_GTABLE 17 /* struct ipftable */ #define IPFOBJ_LOOKUPITER 18 /* struct ipflookupiter */ #define IPFOBJ_STATETQTAB 19 /* struct ipftq * NSTATES */ #define IPFOBJ_IPFEXPR 20 #define IPFOBJ_PROXYCTL 21 /* strct ap_ctl */ #define IPFOBJ_FRIPF 22 /* structfripf */ #define IPFOBJ_COUNT 23 /* How many #defines are above this? */ typedef union ipftunevalptr { void *ipftp_void; u_long *ipftp_long; u_int *ipftp_int; u_short *ipftp_short; u_char *ipftp_char; u_long ipftp_offset; } ipftunevalptr_t; typedef union ipftuneval { u_long ipftu_long; u_int ipftu_int; u_short ipftu_short; u_char ipftu_char; } ipftuneval_t; struct ipftuneable; typedef int (* ipftunefunc_t) __P((struct ipf_main_softc_s *, struct ipftuneable *, ipftuneval_t *)); typedef struct ipftuneable { ipftunevalptr_t ipft_una; const char *ipft_name; u_long ipft_min; u_long ipft_max; int ipft_sz; int ipft_flags; struct ipftuneable *ipft_next; ipftunefunc_t ipft_func; } ipftuneable_t; #define ipft_addr ipft_una.ipftp_void #define ipft_plong ipft_una.ipftp_long #define ipft_pint ipft_una.ipftp_int #define ipft_pshort ipft_una.ipftp_short #define ipft_pchar ipft_una.ipftp_char #define IPFT_RDONLY 1 /* read-only */ #define IPFT_WRDISABLED 2 /* write when disabled only */ typedef struct ipftune { void *ipft_cookie; ipftuneval_t ipft_un; u_long ipft_min; u_long ipft_max; int ipft_sz; int ipft_flags; char ipft_name[80]; } ipftune_t; #define ipft_vlong ipft_un.ipftu_long #define ipft_vint ipft_un.ipftu_int #define ipft_vshort ipft_un.ipftu_short #define ipft_vchar ipft_un.ipftu_char /* * Hash table header */ #define IPFHASH(x,y) typedef struct { \ ipfrwlock_t ipfh_lock; \ struct x *ipfh_head; \ } y /* ** HPUX Port */ #ifdef __hpux /* HP-UX locking sequence deadlock detection module lock MAJOR ID */ # define IPF_SMAJ 0 /* temp assignment XXX, not critical */ #endif #if !defined(CDEV_MAJOR) && defined (__FreeBSD_version) && \ (__FreeBSD_version >= 220000) # define CDEV_MAJOR 79 #endif /* * Post NetBSD 1.2 has the PFIL interface for packet filters. This turns * on those hooks. We don't need any special mods in non-IP Filter code * with this! */ #if (defined(NetBSD) && (NetBSD > 199609) && (NetBSD <= 1991011)) || \ (defined(NetBSD1_2) && NetBSD1_2 > 1) || \ (defined(__FreeBSD__) && (__FreeBSD_version >= 500043)) # if (defined(NetBSD) && NetBSD >= 199905) # define PFIL_HOOKS # endif # ifdef PFIL_HOOKS # define NETBSD_PF # endif #endif #ifdef _KERNEL # define FR_VERBOSE(verb_pr) # define FR_DEBUG(verb_pr) #else extern void ipfkdebug __P((char *, ...)); extern void ipfkverbose __P((char *, ...)); # define FR_VERBOSE(verb_pr) ipfkverbose verb_pr # define FR_DEBUG(verb_pr) ipfkdebug verb_pr #endif /* * */ typedef struct ipfruleiter { int iri_inout; char iri_group[FR_GROUPLEN]; int iri_active; int iri_nrules; int iri_v; /* No longer used (compatibility) */ frentry_t *iri_rule; } ipfruleiter_t; /* * Values for iri_inout */ #define F_IN 0 #define F_OUT 1 #define F_ACIN 2 #define F_ACOUT 3 typedef struct ipfgeniter { int igi_type; int igi_nitems; void *igi_data; } ipfgeniter_t; #define IPFGENITER_IPF 0 #define IPFGENITER_NAT 1 #define IPFGENITER_IPNAT 2 #define IPFGENITER_FRAG 3 #define IPFGENITER_AUTH 4 #define IPFGENITER_STATE 5 #define IPFGENITER_NATFRAG 6 #define IPFGENITER_HOSTMAP 7 #define IPFGENITER_LOOKUP 8 typedef struct ipftable { int ita_type; void *ita_table; } ipftable_t; #define IPFTABLE_BUCKETS 1 #define IPFTABLE_BUCKETS_NATIN 2 #define IPFTABLE_BUCKETS_NATOUT 3 typedef struct ipf_v4_masktab_s { u_32_t imt4_active[33]; int imt4_masks[33]; int imt4_max; } ipf_v4_masktab_t; typedef struct ipf_v6_masktab_s { i6addr_t imt6_active[129]; int imt6_masks[129]; int imt6_max; } ipf_v6_masktab_t; /* * */ typedef struct ipftoken { struct ipftoken *ipt_next; struct ipftoken **ipt_pnext; void *ipt_ctx; void *ipt_data; u_long ipt_die; int ipt_type; int ipt_uid; int ipt_subtype; int ipt_ref; int ipt_complete; } ipftoken_t; /* * */ typedef struct ipfexp { int ipfe_cmd; int ipfe_not; int ipfe_narg; int ipfe_size; int ipfe_arg0[1]; } ipfexp_t; /* * Currently support commands (ipfe_cmd) * 32bits is split up follows: * aabbcccc * aa = 0 = packet matching, 1 = meta data matching * bb = IP protocol number * cccc = command */ #define IPF_EXP_IP_PR 0x00000001 #define IPF_EXP_IP_ADDR 0x00000002 #define IPF_EXP_IP_SRCADDR 0x00000003 #define IPF_EXP_IP_DSTADDR 0x00000004 #define IPF_EXP_IP6_ADDR 0x00000005 #define IPF_EXP_IP6_SRCADDR 0x00000006 #define IPF_EXP_IP6_DSTADDR 0x00000007 #define IPF_EXP_TCP_FLAGS 0x00060001 #define IPF_EXP_TCP_PORT 0x00060002 #define IPF_EXP_TCP_SPORT 0x00060003 #define IPF_EXP_TCP_DPORT 0x00060004 #define IPF_EXP_UDP_PORT 0x00110002 #define IPF_EXP_UDP_SPORT 0x00110003 #define IPF_EXP_UDP_DPORT 0x00110004 #define IPF_EXP_IDLE_GT 0x01000001 #define IPF_EXP_TCP_STATE 0x01060002 #define IPF_EXP_END 0xffffffff #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ #define FIVE_DAYS (5 * ONE_DAY) typedef struct ipf_main_softc_s { struct ipf_main_softc_s *ipf_next; ipfmutex_t ipf_rw; ipfmutex_t ipf_timeoutlock; ipfrwlock_t ipf_mutex; ipfrwlock_t ipf_frag; ipfrwlock_t ipf_global; ipfrwlock_t ipf_tokens; ipfrwlock_t ipf_state; ipfrwlock_t ipf_nat; ipfrwlock_t ipf_natfrag; ipfrwlock_t ipf_poolrw; int ipf_dynamic_softc; int ipf_refcnt; int ipf_running; int ipf_flags; int ipf_active; int ipf_control_forwarding; int ipf_update_ipid; int ipf_chksrc; /* causes a system crash if enabled */ int ipf_pass; int ipf_minttl; int ipf_icmpminfragmtu; int ipf_interror; /* Should be in a struct that is per */ /* thread or process. Does not belong */ /* here but there's a lot more work */ /* in doing that properly. For now, */ /* it is squatting. */ u_int ipf_tcpidletimeout; u_int ipf_tcpclosewait; u_int ipf_tcplastack; u_int ipf_tcptimewait; u_int ipf_tcptimeout; u_int ipf_tcpsynsent; u_int ipf_tcpsynrecv; u_int ipf_tcpclosed; u_int ipf_tcphalfclosed; u_int ipf_udptimeout; u_int ipf_udpacktimeout; u_int ipf_icmptimeout; u_int ipf_icmpacktimeout; u_int ipf_iptimeout; u_long ipf_ticks; u_long ipf_userifqs; u_long ipf_rb_no_mem; u_long ipf_rb_node_max; u_long ipf_frouteok[2]; ipftuneable_t *ipf_tuners; void *ipf_frag_soft; void *ipf_nat_soft; void *ipf_state_soft; void *ipf_auth_soft; void *ipf_proxy_soft; void *ipf_sync_soft; void *ipf_lookup_soft; void *ipf_log_soft; struct frgroup *ipf_groups[IPL_LOGSIZE][2]; frentry_t *ipf_rules[2][2]; frentry_t *ipf_acct[2][2]; frentry_t *ipf_rule_explist[2]; ipftoken_t *ipf_token_head; ipftoken_t **ipf_token_tail; #if defined(__FreeBSD_version) && (__FreeBSD_version >= 300000) && \ defined(_KERNEL) struct callout ipf_slow_ch; #endif #if defined(linux) && defined(_KERNEL) struct timer_list ipf_timer; #endif #if NETBSD_GE_REV(104040000) struct callout ipf_slow_ch; #endif #if SOLARIS # if SOLARIS2 >= 7 timeout_id_t ipf_slow_ch; # else int ipf_slow_ch; # endif #endif #if defined(_KERNEL) # if SOLARIS struct pollhead ipf_poll_head[IPL_LOGSIZE]; void *ipf_dip; # if defined(INSTANCES) int ipf_get_loopback; u_long ipf_idnum; net_handle_t ipf_nd_v4; net_handle_t ipf_nd_v6; hook_t *ipf_hk_v4_in; hook_t *ipf_hk_v4_out; hook_t *ipf_hk_v4_nic; hook_t *ipf_hk_v6_in; hook_t *ipf_hk_v6_out; hook_t *ipf_hk_v6_nic; hook_t *ipf_hk_loop_v4_in; hook_t *ipf_hk_loop_v4_out; hook_t *ipf_hk_loop_v6_in; hook_t *ipf_hk_loop_v6_out; # endif # else # if defined(linux) && defined(_KERNEL) struct poll_table_struct ipf_selwait[IPL_LOGSIZE]; wait_queue_head_t iplh_linux[IPL_LOGSIZE]; # else struct selinfo ipf_selwait[IPL_LOGSIZE]; # endif # endif #endif void *ipf_slow; ipf_statistics_t ipf_stats[2]; u_char ipf_iss_secret[32]; u_short ipf_ip_id; } ipf_main_softc_t; #define IPFERROR(_e) do { softc->ipf_interror = (_e); \ DT1(user_error, int, _e); \ } while (0) #ifndef _KERNEL extern int ipf_check __P((void *, struct ip *, int, void *, int, mb_t **)); -extern int (*ipf_checkp) __P((ip_t *, int, void *, int, mb_t **)); extern struct ifnet *get_unit __P((char *, int)); extern char *get_ifname __P((struct ifnet *)); extern int ipfioctl __P((ipf_main_softc_t *, int, ioctlcmd_t, caddr_t, int)); extern void m_freem __P((mb_t *)); extern size_t msgdsize __P((mb_t *)); extern int bcopywrap __P((void *, void *, size_t)); extern void ip_fillid(struct ip *); #else /* #ifndef _KERNEL */ # if defined(__NetBSD__) && defined(PFIL_HOOKS) extern void ipfilterattach __P((int)); # endif extern int ipl_enable __P((void)); extern int ipl_disable __P((void)); # ifdef MENTAT extern int ipf_check __P((void *, struct ip *, int, void *, int, void *, mblk_t **)); # if SOLARIS extern void ipf_prependmbt(fr_info_t *, mblk_t *); # if SOLARIS2 >= 7 extern int ipfioctl __P((dev_t, int, intptr_t, int, cred_t *, int *)); # else extern int ipfioctl __P((dev_t, int, int *, int, cred_t *, int *)); # endif # endif # ifdef __hpux extern int ipfioctl __P((dev_t, int, caddr_t, int)); extern int ipf_select __P((dev_t, int)); # endif extern int ipf_qout __P((queue_t *, mblk_t *)); # else /* MENTAT */ extern int ipf_check __P((void *, struct ip *, int, void *, int, mb_t **)); extern int (*fr_checkp) __P((ip_t *, int, void *, int, mb_t **)); extern size_t mbufchainlen __P((mb_t *)); # ifdef __sgi # include extern int ipfioctl __P((dev_t, int, caddr_t, int, cred_t *, int *)); extern int ipfilter_sgi_attach __P((void)); extern void ipfilter_sgi_detach __P((void)); extern void ipfilter_sgi_intfsync __P((void)); # else # ifdef IPFILTER_LKM extern int ipf_identify __P((char *)); # endif # if BSDOS_GE_REV(199510) || FREEBSD_GE_REV(220000) || \ (defined(NetBSD) && (NetBSD >= 199511)) || defined(__OpenBSD__) # if defined(__NetBSD__) || BSDOS_GE_REV(199701) || \ defined(__OpenBSD__) || FREEBSD_GE_REV(300000) # if (__FreeBSD_version >= 500024) # if (__FreeBSD_version >= 502116) extern int ipfioctl __P((struct cdev*, u_long, caddr_t, int, struct thread *)); # else extern int ipfioctl __P((dev_t, u_long, caddr_t, int, struct thread *)); # endif /* __FreeBSD_version >= 502116 */ # else # if NETBSD_GE_REV(499001000) extern int ipfioctl __P((dev_t, u_long, void *, int, struct lwp *)); # else # if NETBSD_GE_REV(399001400) extern int ipfioctl __P((dev_t, u_long, caddr_t, int, struct lwp *)); # else extern int ipfioctl __P((dev_t, u_long, caddr_t, int, struct proc *)); # endif # endif # endif /* __FreeBSD_version >= 500024 */ # else extern int ipfioctl __P((dev_t, int, caddr_t, int, struct proc *)); # endif # else # ifdef linux extern int ipfioctl __P((struct inode *, struct file *, u_int, u_long)); # else extern int ipfioctl __P((dev_t, int, caddr_t, int)); # endif # endif /* (_BSDI_VERSION >= 199510) */ # endif /* __ sgi */ # endif /* MENTAT */ # if defined(__FreeBSD_version) extern int ipf_pfil_hook __P((void)); extern int ipf_pfil_unhook __P((void)); extern void ipf_event_reg __P((void)); extern void ipf_event_dereg __P((void)); # endif # if defined(INSTANCES) extern ipf_main_softc_t *ipf_find_softc __P((u_long)); extern int ipf_set_loopback __P((ipf_main_softc_t *, ipftuneable_t *, ipftuneval_t *)); # endif #endif /* #ifndef _KERNEL */ extern char *memstr __P((const char *, char *, size_t, size_t)); extern int count4bits __P((u_32_t)); #ifdef USE_INET6 extern int count6bits __P((u_32_t *)); #endif extern int frrequest __P((ipf_main_softc_t *, int, ioctlcmd_t, caddr_t, int, int)); extern char *getifname __P((struct ifnet *)); extern int ipfattach __P((ipf_main_softc_t *)); extern int ipfdetach __P((ipf_main_softc_t *)); extern u_short ipf_cksum __P((u_short *, int)); extern int copyinptr __P((ipf_main_softc_t *, void *, void *, size_t)); extern int copyoutptr __P((ipf_main_softc_t *, void *, void *, size_t)); extern int ipf_fastroute __P((mb_t *, mb_t **, fr_info_t *, frdest_t *)); extern int ipf_inject __P((fr_info_t *, mb_t *)); extern int ipf_inobj __P((ipf_main_softc_t *, void *, ipfobj_t *, void *, int)); extern int ipf_inobjsz __P((ipf_main_softc_t *, void *, void *, int , int)); extern int ipf_ioctlswitch __P((ipf_main_softc_t *, int, void *, ioctlcmd_t, int, int, void *)); extern int ipf_ipf_ioctl __P((ipf_main_softc_t *, caddr_t, ioctlcmd_t, int, int, void *)); extern int ipf_ipftune __P((ipf_main_softc_t *, ioctlcmd_t, void *)); extern int ipf_matcharray_load __P((ipf_main_softc_t *, caddr_t, ipfobj_t *, int **)); extern int ipf_matcharray_verify __P((int *, int)); extern int ipf_outobj __P((ipf_main_softc_t *, void *, void *, int)); extern int ipf_outobjk __P((ipf_main_softc_t *, ipfobj_t *, void *)); extern int ipf_outobjsz __P((ipf_main_softc_t *, void *, void *, int, int)); extern void *ipf_pullup __P((mb_t *, fr_info_t *, int)); extern int ipf_resolvedest __P((ipf_main_softc_t *, char *, struct frdest *, int)); extern int ipf_resolvefunc __P((ipf_main_softc_t *, void *)); extern void *ipf_resolvenic __P((ipf_main_softc_t *, char *, int)); extern int ipf_send_icmp_err __P((int, fr_info_t *, int)); extern int ipf_send_reset __P((fr_info_t *)); #if (defined(__FreeBSD_version) && (__FreeBSD_version < 501000)) || \ !defined(_KERNEL) || defined(linux) #endif extern void ipf_apply_timeout __P((ipftq_t *, u_int)); extern ipftq_t *ipf_addtimeoutqueue __P((ipf_main_softc_t *, ipftq_t **, u_int)); extern void ipf_deletequeueentry __P((ipftqent_t *)); extern int ipf_deletetimeoutqueue __P((ipftq_t *)); extern void ipf_freetimeoutqueue __P((ipf_main_softc_t *, ipftq_t *)); extern void ipf_movequeue __P((u_long, ipftqent_t *, ipftq_t *, ipftq_t *)); extern void ipf_queueappend __P((u_long, ipftqent_t *, ipftq_t *, void *)); extern void ipf_queueback __P((u_long, ipftqent_t *)); extern int ipf_queueflush __P((ipf_main_softc_t *, ipftq_delete_fn_t, ipftq_t *, ipftq_t *, u_int *, int, int)); extern void ipf_queuefront __P((ipftqent_t *)); extern int ipf_settimeout_tcp __P((ipftuneable_t *, ipftuneval_t *, ipftq_t *)); extern int ipf_checkv4sum __P((fr_info_t *)); extern int ipf_checkl4sum __P((fr_info_t *)); extern int ipf_ifpfillv4addr __P((int, struct sockaddr_in *, struct sockaddr_in *, struct in_addr *, struct in_addr *)); extern int ipf_coalesce __P((fr_info_t *)); #ifdef USE_INET6 extern int ipf_checkv6sum __P((fr_info_t *)); extern int ipf_ifpfillv6addr __P((int, struct sockaddr_in6 *, struct sockaddr_in6 *, i6addr_t *, i6addr_t *)); #endif extern int ipf_tune_add __P((ipf_main_softc_t *, ipftuneable_t *)); extern int ipf_tune_add_array __P((ipf_main_softc_t *, ipftuneable_t *)); extern int ipf_tune_del __P((ipf_main_softc_t *, ipftuneable_t *)); extern int ipf_tune_del_array __P((ipf_main_softc_t *, ipftuneable_t *)); extern int ipf_tune_array_link __P((ipf_main_softc_t *, ipftuneable_t *)); extern int ipf_tune_array_unlink __P((ipf_main_softc_t *, ipftuneable_t *)); extern ipftuneable_t *ipf_tune_array_copy __P((void *, size_t, ipftuneable_t *)); extern int ipf_pr_pullup __P((fr_info_t *, int)); extern int ipf_flush __P((ipf_main_softc_t *, minor_t, int)); extern frgroup_t *ipf_group_add __P((ipf_main_softc_t *, char *, void *, u_32_t, minor_t, int)); extern void ipf_group_del __P((ipf_main_softc_t *, frgroup_t *, frentry_t *)); extern int ipf_derefrule __P((ipf_main_softc_t *, frentry_t **)); extern frgroup_t *ipf_findgroup __P((ipf_main_softc_t *, char *, minor_t, int, frgroup_t ***)); extern int ipf_log_init __P((void)); extern int ipf_log_bytesused __P((ipf_main_softc_t *, int)); extern int ipf_log_canread __P((ipf_main_softc_t *, int)); extern int ipf_log_clear __P((ipf_main_softc_t *, minor_t)); extern u_long ipf_log_failures __P((ipf_main_softc_t *, int)); extern int ipf_log_read __P((ipf_main_softc_t *, minor_t, uio_t *)); extern int ipf_log_items __P((ipf_main_softc_t *, int, fr_info_t *, void **, size_t *, int *, int)); extern u_long ipf_log_logok __P((ipf_main_softc_t *, int)); extern void ipf_log_unload __P((ipf_main_softc_t *)); extern int ipf_log_pkt __P((fr_info_t *, u_int)); extern frentry_t *ipf_acctpkt __P((fr_info_t *, u_32_t *)); extern u_short fr_cksum __P((fr_info_t *, ip_t *, int, void *)); extern void ipf_deinitialise __P((ipf_main_softc_t *)); extern int ipf_deliverlocal __P((ipf_main_softc_t *, int, void *, i6addr_t *)); extern frentry_t *ipf_dstgrpmap __P((fr_info_t *, u_32_t *)); extern void ipf_fixskip __P((frentry_t **, frentry_t *, int)); extern void ipf_forgetifp __P((ipf_main_softc_t *, void *)); extern frentry_t *ipf_getrulen __P((ipf_main_softc_t *, int, char *, u_32_t)); extern int ipf_ifpaddr __P((ipf_main_softc_t *, int, int, void *, i6addr_t *, i6addr_t *)); extern void ipf_inet_mask_add __P((int, ipf_v4_masktab_t *)); extern void ipf_inet_mask_del __P((int, ipf_v4_masktab_t *)); #ifdef USE_INET6 extern void ipf_inet6_mask_add __P((int, i6addr_t *, ipf_v6_masktab_t *)); extern void ipf_inet6_mask_del __P((int, i6addr_t *, ipf_v6_masktab_t *)); #endif extern int ipf_initialise __P((void)); extern int ipf_lock __P((caddr_t, int *)); extern int ipf_makefrip __P((int, ip_t *, fr_info_t *)); extern int ipf_matchtag __P((ipftag_t *, ipftag_t *)); extern int ipf_matchicmpqueryreply __P((int, icmpinfo_t *, struct icmp *, int)); extern u_32_t ipf_newisn __P((fr_info_t *)); extern u_int ipf_pcksum __P((fr_info_t *, int, u_int)); extern void ipf_rule_expire __P((ipf_main_softc_t *)); extern int ipf_scanlist __P((fr_info_t *, u_32_t)); extern frentry_t *ipf_srcgrpmap __P((fr_info_t *, u_32_t *)); extern int ipf_tcpudpchk __P((fr_ip_t *, frtuc_t *)); extern int ipf_verifysrc __P((fr_info_t *fin)); extern int ipf_zerostats __P((ipf_main_softc_t *, char *)); extern int ipf_getnextrule __P((ipf_main_softc_t *, ipftoken_t *, void *)); extern int ipf_sync __P((ipf_main_softc_t *, void *)); extern int ipf_token_deref __P((ipf_main_softc_t *, ipftoken_t *)); extern void ipf_token_expire __P((ipf_main_softc_t *)); extern ipftoken_t *ipf_token_find __P((ipf_main_softc_t *, int, int, void *)); extern int ipf_token_del __P((ipf_main_softc_t *, int, int, void *)); extern void ipf_token_mark_complete __P((ipftoken_t *)); extern int ipf_genericiter __P((ipf_main_softc_t *, void *, int, void *)); #ifdef IPFILTER_LOOKUP extern void *ipf_resolvelookup __P((int, u_int, u_int, lookupfunc_t *)); #endif extern u_32_t ipf_random __P((void)); extern int ipf_main_load __P((void)); extern void *ipf_main_soft_create __P((void *)); extern void ipf_main_soft_destroy __P((ipf_main_softc_t *)); extern int ipf_main_soft_init __P((ipf_main_softc_t *)); extern int ipf_main_soft_fini __P((ipf_main_softc_t *)); extern int ipf_main_unload __P((void)); extern int ipf_load_all __P((void)); extern int ipf_unload_all __P((void)); extern void ipf_destroy_all __P((ipf_main_softc_t *)); extern ipf_main_softc_t *ipf_create_all __P((void *)); extern int ipf_init_all __P((ipf_main_softc_t *)); extern int ipf_fini_all __P((ipf_main_softc_t *)); extern void ipf_log_soft_destroy __P((ipf_main_softc_t *, void *)); extern void *ipf_log_soft_create __P((ipf_main_softc_t *)); extern int ipf_log_soft_init __P((ipf_main_softc_t *, void *)); extern int ipf_log_soft_fini __P((ipf_main_softc_t *, void *)); extern int ipf_log_main_load __P((void)); extern int ipf_log_main_unload __P((void)); extern char ipfilter_version[]; #ifdef USE_INET6 extern int icmptoicmp6types[ICMP_MAXTYPE+1]; extern int icmptoicmp6unreach[ICMP_MAX_UNREACH]; extern int icmpreplytype6[ICMP6_MAXTYPE + 1]; #endif #ifdef IPFILTER_COMPAT extern int ipf_in_compat __P((ipf_main_softc_t *, ipfobj_t *, void *,int)); extern int ipf_out_compat __P((ipf_main_softc_t *, ipfobj_t *, void *)); #endif extern int icmpreplytype4[ICMP_MAXTYPE + 1]; extern int ipf_ht_node_add __P((ipf_main_softc_t *, host_track_t *, int, i6addr_t *)); extern int ipf_ht_node_del __P((host_track_t *, int, i6addr_t *)); extern void ipf_rb_ht_flush __P((host_track_t *)); extern void ipf_rb_ht_freenode __P((host_node_t *, void *)); extern void ipf_rb_ht_init __P((host_track_t *)); #endif /* __IP_FIL_H__ */ Index: projects/vnet/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c =================================================================== --- projects/vnet/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c (revision 302198) +++ projects/vnet/sys/contrib/ipfilter/netinet/ip_fil_freebsd.c (revision 302199) @@ -1,1465 +1,1459 @@ /* $FreeBSD$ */ /* * Copyright (C) 2012 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. */ #if !defined(lint) static const char sccsid[] = "@(#)ip_fil.c 2.41 6/5/96 (C) 1993-2000 Darren Reed"; static const char rcsid[] = "@(#)$Id$"; #endif #if defined(KERNEL) || defined(_KERNEL) # undef KERNEL # undef _KERNEL # define KERNEL 1 # define _KERNEL 1 #endif #if defined(__FreeBSD_version) && (__FreeBSD_version >= 400000) && \ !defined(KLD_MODULE) && !defined(IPFILTER_LKM) # include "opt_inet6.h" #endif #if defined(__FreeBSD_version) && (__FreeBSD_version >= 440000) && \ !defined(KLD_MODULE) && !defined(IPFILTER_LKM) # include "opt_random_ip_id.h" #endif #include #include #include #include # include # include #include #include # include #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000) #include #endif # include # include # include #if !defined(__hpux) # include #endif #include # include # include #include # include # include #include #include #include #include #include #include #include #include #if defined(__FreeBSD_version) && (__FreeBSD_version >= 800000) #include #else #define CURVNET_SET(arg) #define CURVNET_RESTORE() #endif #if defined(__osf__) # include #endif #include #include #include #include "netinet/ip_compat.h" #ifdef USE_INET6 # include #endif #include "netinet/ip_fil.h" #include "netinet/ip_nat.h" #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" #include "netinet/ip_auth.h" #include "netinet/ip_sync.h" #include "netinet/ip_lookup.h" #include "netinet/ip_dstlist.h" #ifdef IPFILTER_SCAN #include "netinet/ip_scan.h" #endif #include "netinet/ip_pool.h" # include #include #ifdef CSUM_DATA_VALID #include #endif extern int ip_optcopy __P((struct ip *, struct ip *)); # ifdef IPFILTER_M_IPFILTER MALLOC_DEFINE(M_IPFILTER, "ipfilter", "IP Filter packet filter data structures"); # endif -static int (*ipf_savep) __P((void *, ip_t *, int, void *, int, struct mbuf **)); static int ipf_send_ip __P((fr_info_t *, mb_t *)); static void ipf_timer_func __P((void *arg)); -int ipf_locks_done = 0; -ipf_main_softc_t ipfmain; +VNET_DEFINE(ipf_main_softc_t, ipfmain); +#define V_ipfmain VNET(ipfmain) # include # if defined(NETBSD_PF) # include # endif /* NETBSD_PF */ -/* - * We provide the ipf_checkp name just to minimize changes later. - */ -int (*ipf_checkp) __P((void *, ip_t *ip, int hlen, void *ifp, int out, mb_t **mp)); - static eventhandler_tag ipf_arrivetag, ipf_departtag, ipf_clonetag; -static void ipf_ifevent(void *arg); +static void ipf_ifevent(void *arg, struct ifnet *ifp); -static void ipf_ifevent(arg) +static void ipf_ifevent(arg, ifp) void *arg; + struct ifnet *ifp; { - ipf_sync(arg, NULL); + + CURVNET_SET(ifp->if_vnet); + ipf_sync(&V_ipfmain, NULL); + CURVNET_RESTORE(); } static int ipf_check_wrapper(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) { struct ip *ip = mtod(*mp, struct ip *); int rv; /* * IPFilter expects evreything in network byte order */ #if (__FreeBSD_version < 1000019) ip->ip_len = htons(ip->ip_len); ip->ip_off = htons(ip->ip_off); #endif - rv = ipf_check(&ipfmain, ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT), + CURVNET_SET(ifp->if_vnet); + rv = ipf_check(&V_ipfmain, ip, ip->ip_hl << 2, ifp, (dir == PFIL_OUT), mp); + CURVNET_RESTORE(); #if (__FreeBSD_version < 1000019) if ((rv == 0) && (*mp != NULL)) { ip = mtod(*mp, struct ip *); ip->ip_len = ntohs(ip->ip_len); ip->ip_off = ntohs(ip->ip_off); } #endif return rv; } # ifdef USE_INET6 # include static int ipf_check_wrapper6(void *arg, struct mbuf **mp, struct ifnet *ifp, int dir) { - return (ipf_check(&ipfmain, mtod(*mp, struct ip *), - sizeof(struct ip6_hdr), ifp, (dir == PFIL_OUT), mp)); + int error; + + CURVNET_SET(ifp->if_vnet); + error = ipf_check(&V_ipfmain, mtod(*mp, struct ip *), + sizeof(struct ip6_hdr), ifp, (dir == PFIL_OUT), mp); + CURVNET_RESTORE(); + return (error); } # endif #if defined(IPFILTER_LKM) int ipf_identify(s) char *s; { if (strcmp(s, "ipl") == 0) return 1; return 0; } #endif /* IPFILTER_LKM */ static void ipf_timer_func(arg) void *arg; { ipf_main_softc_t *softc = arg; SPL_INT(s); SPL_NET(s); READ_ENTER(&softc->ipf_global); if (softc->ipf_running > 0) ipf_slowtimer(softc); if (softc->ipf_running == -1 || softc->ipf_running == 1) { #if 0 softc->ipf_slow_ch = timeout(ipf_timer_func, softc, hz/2); #endif callout_init(&softc->ipf_slow_ch, 1); callout_reset(&softc->ipf_slow_ch, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT, ipf_timer_func, softc); } RWLOCK_EXIT(&softc->ipf_global); SPL_X(s); } int ipfattach(softc) ipf_main_softc_t *softc; { #ifdef USE_SPL int s; #endif SPL_NET(s); if (softc->ipf_running > 0) { SPL_X(s); return EBUSY; } if (ipf_init_all(softc) < 0) { SPL_X(s); return EIO; } - if (ipf_checkp != ipf_check) { - ipf_savep = ipf_checkp; - ipf_checkp = ipf_check; - } - - bzero((char *)ipfmain.ipf_selwait, sizeof(ipfmain.ipf_selwait)); + bzero((char *)V_ipfmain.ipf_selwait, sizeof(V_ipfmain.ipf_selwait)); softc->ipf_running = 1; if (softc->ipf_control_forwarding & 1) V_ipforwarding = 1; SPL_X(s); #if 0 softc->ipf_slow_ch = timeout(ipf_timer_func, softc, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT); #endif callout_init(&softc->ipf_slow_ch, 1); callout_reset(&softc->ipf_slow_ch, (hz / IPF_HZ_DIVIDE) * IPF_HZ_MULT, ipf_timer_func, softc); return 0; } /* * Disable the filter by removing the hooks from the IP input/output * stream. */ int ipfdetach(softc) ipf_main_softc_t *softc; { #ifdef USE_SPL int s; #endif if (softc->ipf_control_forwarding & 2) V_ipforwarding = 0; SPL_NET(s); #if 0 if (softc->ipf_slow_ch.callout != NULL) untimeout(ipf_timer_func, softc, softc->ipf_slow_ch); bzero(&softc->ipf_slow, sizeof(softc->ipf_slow)); #endif callout_drain(&softc->ipf_slow_ch); -#ifndef NETBSD_PF - if (ipf_checkp != NULL) - ipf_checkp = ipf_savep; - ipf_savep = NULL; -#endif - ipf_fini_all(softc); softc->ipf_running = -2; SPL_X(s); return 0; } /* * Filter ioctl interface. */ int ipfioctl(dev, cmd, data, mode , p) struct thread *p; # define p_cred td_ucred # define p_uid td_ucred->cr_ruid struct cdev *dev; ioctlcmd_t cmd; caddr_t data; int mode; { int error = 0, unit = 0; SPL_INT(s); #if (BSD >= 199306) if (securelevel_ge(p->p_cred, 3) && (mode & FWRITE)) { - ipfmain.ipf_interror = 130001; + V_ipfmain.ipf_interror = 130001; return EPERM; } #endif unit = GET_MINOR(dev); if ((IPL_LOGMAX < unit) || (unit < 0)) { - ipfmain.ipf_interror = 130002; + V_ipfmain.ipf_interror = 130002; return ENXIO; } - if (ipfmain.ipf_running <= 0) { + if (V_ipfmain.ipf_running <= 0) { if (unit != IPL_LOGIPF && cmd != SIOCIPFINTERROR) { - ipfmain.ipf_interror = 130003; + V_ipfmain.ipf_interror = 130003; return EIO; } if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET && cmd != SIOCIPFSET && cmd != SIOCFRENB && cmd != SIOCGETFS && cmd != SIOCGETFF && cmd != SIOCIPFINTERROR) { - ipfmain.ipf_interror = 130004; + V_ipfmain.ipf_interror = 130004; return EIO; } } SPL_NET(s); CURVNET_SET(TD_TO_VNET(p)); - error = ipf_ioctlswitch(&ipfmain, unit, data, cmd, mode, p->p_uid, p); + error = ipf_ioctlswitch(&V_ipfmain, unit, data, cmd, mode, p->p_uid, p); CURVNET_RESTORE(); if (error != -1) { SPL_X(s); return error; } SPL_X(s); return error; } /* * ipf_send_reset - this could conceivably be a call to tcp_respond(), but that * requires a large amount of setting up and isn't any more efficient. */ int ipf_send_reset(fin) fr_info_t *fin; { struct tcphdr *tcp, *tcp2; int tlen = 0, hlen; struct mbuf *m; #ifdef USE_INET6 ip6_t *ip6; #endif ip_t *ip; tcp = fin->fin_dp; if (tcp->th_flags & TH_RST) return -1; /* feedback loop */ if (ipf_checkl4sum(fin) == -1) return -1; tlen = fin->fin_dlen - (TCP_OFF(tcp) << 2) + ((tcp->th_flags & TH_SYN) ? 1 : 0) + ((tcp->th_flags & TH_FIN) ? 1 : 0); #ifdef USE_INET6 hlen = (fin->fin_v == 6) ? sizeof(ip6_t) : sizeof(ip_t); #else hlen = sizeof(ip_t); #endif #ifdef MGETHDR MGETHDR(m, M_NOWAIT, MT_HEADER); #else MGET(m, M_NOWAIT, MT_HEADER); #endif if (m == NULL) return -1; if (sizeof(*tcp2) + hlen > MLEN) { if (!(MCLGET(m, M_NOWAIT))) { FREE_MB_T(m); return -1; } } m->m_len = sizeof(*tcp2) + hlen; #if (BSD >= 199103) m->m_data += max_linkhdr; m->m_pkthdr.len = m->m_len; m->m_pkthdr.rcvif = (struct ifnet *)0; #endif ip = mtod(m, struct ip *); bzero((char *)ip, hlen); #ifdef USE_INET6 ip6 = (ip6_t *)ip; #endif tcp2 = (struct tcphdr *)((char *)ip + hlen); tcp2->th_sport = tcp->th_dport; tcp2->th_dport = tcp->th_sport; if (tcp->th_flags & TH_ACK) { tcp2->th_seq = tcp->th_ack; tcp2->th_flags = TH_RST; tcp2->th_ack = 0; } else { tcp2->th_seq = 0; tcp2->th_ack = ntohl(tcp->th_seq); tcp2->th_ack += tlen; tcp2->th_ack = htonl(tcp2->th_ack); tcp2->th_flags = TH_RST|TH_ACK; } TCP_X2_A(tcp2, 0); TCP_OFF_A(tcp2, sizeof(*tcp2) >> 2); tcp2->th_win = tcp->th_win; tcp2->th_sum = 0; tcp2->th_urp = 0; #ifdef USE_INET6 if (fin->fin_v == 6) { ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_hlim = 0; ip6->ip6_src = fin->fin_dst6.in6; ip6->ip6_dst = fin->fin_src6.in6; tcp2->th_sum = in6_cksum(m, IPPROTO_TCP, sizeof(*ip6), sizeof(*tcp2)); return ipf_send_ip(fin, m); } #endif ip->ip_p = IPPROTO_TCP; ip->ip_len = htons(sizeof(struct tcphdr)); ip->ip_src.s_addr = fin->fin_daddr; ip->ip_dst.s_addr = fin->fin_saddr; tcp2->th_sum = in_cksum(m, hlen + sizeof(*tcp2)); ip->ip_len = htons(hlen + sizeof(*tcp2)); return ipf_send_ip(fin, m); } /* * ip_len must be in network byte order when called. */ static int ipf_send_ip(fin, m) fr_info_t *fin; mb_t *m; { fr_info_t fnew; ip_t *ip, *oip; int hlen; ip = mtod(m, ip_t *); bzero((char *)&fnew, sizeof(fnew)); fnew.fin_main_soft = fin->fin_main_soft; IP_V_A(ip, fin->fin_v); switch (fin->fin_v) { case 4 : oip = fin->fin_ip; hlen = sizeof(*oip); fnew.fin_v = 4; fnew.fin_p = ip->ip_p; fnew.fin_plen = ntohs(ip->ip_len); IP_HL_A(ip, sizeof(*oip) >> 2); ip->ip_tos = oip->ip_tos; ip->ip_id = fin->fin_ip->ip_id; #if defined(FreeBSD) && (__FreeBSD_version > 460000) ip->ip_off = htons(path_mtu_discovery ? IP_DF : 0); #else ip->ip_off = 0; #endif ip->ip_ttl = V_ip_defttl; ip->ip_sum = 0; break; #ifdef USE_INET6 case 6 : { ip6_t *ip6 = (ip6_t *)ip; ip6->ip6_vfc = 0x60; ip6->ip6_hlim = IPDEFTTL; hlen = sizeof(*ip6); fnew.fin_p = ip6->ip6_nxt; fnew.fin_v = 6; fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen; break; } #endif default : return EINVAL; } #ifdef IPSEC m->m_pkthdr.rcvif = NULL; #endif fnew.fin_ifp = fin->fin_ifp; fnew.fin_flx = FI_NOCKSUM; fnew.fin_m = m; fnew.fin_ip = ip; fnew.fin_mp = &m; fnew.fin_hlen = hlen; fnew.fin_dp = (char *)ip + hlen; (void) ipf_makefrip(hlen, ip, &fnew); return ipf_fastroute(m, &m, &fnew, NULL); } int ipf_send_icmp_err(type, fin, dst) int type; fr_info_t *fin; int dst; { int err, hlen, xtra, iclen, ohlen, avail, code; struct in_addr dst4; struct icmp *icmp; struct mbuf *m; i6addr_t dst6; void *ifp; #ifdef USE_INET6 ip6_t *ip6; #endif ip_t *ip, *ip2; if ((type < 0) || (type >= ICMP_MAXTYPE)) return -1; code = fin->fin_icode; #ifdef USE_INET6 #if 0 /* XXX Fix an off by one error: s/>/>=/ was: if ((code < 0) || (code > sizeof(icmptoicmp6unreach)/sizeof(int))) Fix obtained from NetBSD ip_fil_netbsd.c r1.4: */ #endif if ((code < 0) || (code >= sizeof(icmptoicmp6unreach)/sizeof(int))) return -1; #endif if (ipf_checkl4sum(fin) == -1) return -1; #ifdef MGETHDR MGETHDR(m, M_NOWAIT, MT_HEADER); #else MGET(m, M_NOWAIT, MT_HEADER); #endif if (m == NULL) return -1; avail = MHLEN; xtra = 0; hlen = 0; ohlen = 0; dst4.s_addr = 0; ifp = fin->fin_ifp; if (fin->fin_v == 4) { if ((fin->fin_p == IPPROTO_ICMP) && !(fin->fin_flx & FI_SHORT)) switch (ntohs(fin->fin_data[0]) >> 8) { case ICMP_ECHO : case ICMP_TSTAMP : case ICMP_IREQ : case ICMP_MASKREQ : break; default : FREE_MB_T(m); return 0; } if (dst == 0) { - if (ipf_ifpaddr(&ipfmain, 4, FRI_NORMAL, ifp, + if (ipf_ifpaddr(&V_ipfmain, 4, FRI_NORMAL, ifp, &dst6, NULL) == -1) { FREE_MB_T(m); return -1; } dst4 = dst6.in4; } else dst4.s_addr = fin->fin_daddr; hlen = sizeof(ip_t); ohlen = fin->fin_hlen; iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen; if (fin->fin_hlen < fin->fin_plen) xtra = MIN(fin->fin_dlen, 8); else xtra = 0; } #ifdef USE_INET6 else if (fin->fin_v == 6) { hlen = sizeof(ip6_t); ohlen = sizeof(ip6_t); iclen = hlen + offsetof(struct icmp, icmp_ip) + ohlen; type = icmptoicmp6types[type]; if (type == ICMP6_DST_UNREACH) code = icmptoicmp6unreach[code]; if (iclen + max_linkhdr + fin->fin_plen > avail) { if (!(MCLGET(m, M_NOWAIT))) { FREE_MB_T(m); return -1; } avail = MCLBYTES; } xtra = MIN(fin->fin_plen, avail - iclen - max_linkhdr); xtra = MIN(xtra, IPV6_MMTU - iclen); if (dst == 0) { - if (ipf_ifpaddr(&ipfmain, 6, FRI_NORMAL, ifp, + if (ipf_ifpaddr(&V_ipfmain, 6, FRI_NORMAL, ifp, &dst6, NULL) == -1) { FREE_MB_T(m); return -1; } } else dst6 = fin->fin_dst6; } #endif else { FREE_MB_T(m); return -1; } avail -= (max_linkhdr + iclen); if (avail < 0) { FREE_MB_T(m); return -1; } if (xtra > avail) xtra = avail; iclen += xtra; m->m_data += max_linkhdr; m->m_pkthdr.rcvif = (struct ifnet *)0; m->m_pkthdr.len = iclen; m->m_len = iclen; ip = mtod(m, ip_t *); icmp = (struct icmp *)((char *)ip + hlen); ip2 = (ip_t *)&icmp->icmp_ip; icmp->icmp_type = type; icmp->icmp_code = fin->fin_icode; icmp->icmp_cksum = 0; #ifdef icmp_nextmtu if (type == ICMP_UNREACH && fin->fin_icode == ICMP_UNREACH_NEEDFRAG) { if (fin->fin_mtu != 0) { icmp->icmp_nextmtu = htons(fin->fin_mtu); } else if (ifp != NULL) { icmp->icmp_nextmtu = htons(GETIFMTU_4(ifp)); } else { /* make up a number... */ icmp->icmp_nextmtu = htons(fin->fin_plen - 20); } } #endif bcopy((char *)fin->fin_ip, (char *)ip2, ohlen); #ifdef USE_INET6 ip6 = (ip6_t *)ip; if (fin->fin_v == 6) { ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow; ip6->ip6_plen = htons(iclen - hlen); ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 0; ip6->ip6_src = dst6.in6; ip6->ip6_dst = fin->fin_src6.in6; if (xtra > 0) bcopy((char *)fin->fin_ip + ohlen, (char *)&icmp->icmp_ip + ohlen, xtra); icmp->icmp_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), iclen - hlen); } else #endif { ip->ip_p = IPPROTO_ICMP; ip->ip_src.s_addr = dst4.s_addr; ip->ip_dst.s_addr = fin->fin_saddr; if (xtra > 0) bcopy((char *)fin->fin_ip + ohlen, (char *)&icmp->icmp_ip + ohlen, xtra); icmp->icmp_cksum = ipf_cksum((u_short *)icmp, sizeof(*icmp) + 8); ip->ip_len = htons(iclen); ip->ip_p = IPPROTO_ICMP; } err = ipf_send_ip(fin, m); return err; } /* * m0 - pointer to mbuf where the IP packet starts * mpp - pointer to the mbuf pointer that is the start of the mbuf chain */ int ipf_fastroute(m0, mpp, fin, fdp) mb_t *m0, **mpp; fr_info_t *fin; frdest_t *fdp; { register struct ip *ip, *mhip; register struct mbuf *m = *mpp; int len, off, error = 0, hlen, code; struct ifnet *ifp, *sifp; struct sockaddr_in dst; struct nhop4_extended nh4; int has_nhop = 0; u_long fibnum = 0; u_short ip_off; frdest_t node; frentry_t *fr; #ifdef M_WRITABLE /* * HOT FIX/KLUDGE: * * If the mbuf we're about to send is not writable (because of * a cluster reference, for example) we'll need to make a copy * of it since this routine modifies the contents. * * If you have non-crappy network hardware that can transmit data * from the mbuf, rather than making a copy, this is gonna be a * problem. */ if (M_WRITABLE(m) == 0) { m0 = m_dup(m, M_NOWAIT); if (m0 != NULL) { FREE_MB_T(m); m = m0; *mpp = m; } else { error = ENOBUFS; FREE_MB_T(m); goto done; } } #endif #ifdef USE_INET6 if (fin->fin_v == 6) { /* * currently "to " and "to :ip#" are not supported * for IPv6 */ return ip6_output(m, NULL, NULL, 0, NULL, NULL, NULL); } #endif hlen = fin->fin_hlen; ip = mtod(m0, struct ip *); ifp = NULL; /* * Route packet. */ bzero(&dst, sizeof (dst)); dst.sin_family = AF_INET; dst.sin_addr = ip->ip_dst; dst.sin_len = sizeof(dst); fr = fin->fin_fr; if ((fr != NULL) && !(fr->fr_flags & FR_KEEPSTATE) && (fdp != NULL) && (fdp->fd_type == FRD_DSTLIST)) { if (ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL, &node) == 0) fdp = &node; } if (fdp != NULL) ifp = fdp->fd_ptr; else ifp = fin->fin_ifp; if ((ifp == NULL) && ((fr == NULL) || !(fr->fr_flags & FR_FASTROUTE))) { error = -2; goto bad; } if ((fdp != NULL) && (fdp->fd_ip.s_addr != 0)) dst.sin_addr = fdp->fd_ip; fibnum = M_GETFIB(m0); if (fib4_lookup_nh_ext(fibnum, dst.sin_addr, NHR_REF, 0, &nh4) != 0) { if (in_localaddr(ip->ip_dst)) error = EHOSTUNREACH; else error = ENETUNREACH; goto bad; } has_nhop = 1; if (ifp == NULL) ifp = nh4.nh_ifp; if (nh4.nh_flags & NHF_GATEWAY) dst.sin_addr = nh4.nh_addr; /* * For input packets which are being "fastrouted", they won't * go back through output filtering and miss their chance to get * NAT'd and counted. Duplicated packets aren't considered to be * part of the normal packet stream, so do not NAT them or pass * them through stateful checking, etc. */ if ((fdp != &fr->fr_dif) && (fin->fin_out == 0)) { sifp = fin->fin_ifp; fin->fin_ifp = ifp; fin->fin_out = 1; (void) ipf_acctpkt(fin, NULL); fin->fin_fr = NULL; if (!fr || !(fr->fr_flags & FR_RETMASK)) { u_32_t pass; (void) ipf_state_check(fin, &pass); } switch (ipf_nat_checkout(fin, NULL)) { case 0 : break; case 1 : ip->ip_sum = 0; break; case -1 : error = -1; goto bad; break; } fin->fin_ifp = sifp; fin->fin_out = 0; } else ip->ip_sum = 0; /* * If small enough for interface, can just send directly. */ if (ntohs(ip->ip_len) <= ifp->if_mtu) { if (!ip->ip_sum) ip->ip_sum = in_cksum(m, hlen); error = (*ifp->if_output)(ifp, m, (struct sockaddr *)&dst, NULL ); goto done; } /* * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. */ ip_off = ntohs(ip->ip_off); if (ip_off & IP_DF) { error = EMSGSIZE; goto bad; } len = (ifp->if_mtu - hlen) &~ 7; if (len < 8) { error = EMSGSIZE; goto bad; } { int mhlen, firstlen = len; struct mbuf **mnext = &m->m_act; /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto chain. */ m0 = m; mhlen = sizeof (struct ip); for (off = hlen + len; off < ntohs(ip->ip_len); off += len) { #ifdef MGETHDR MGETHDR(m, M_NOWAIT, MT_HEADER); #else MGET(m, M_NOWAIT, MT_HEADER); #endif if (m == NULL) { m = m0; error = ENOBUFS; goto bad; } m->m_data += max_linkhdr; mhip = mtod(m, struct ip *); bcopy((char *)ip, (char *)mhip, sizeof(*ip)); if (hlen > sizeof (struct ip)) { mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); IP_HL_A(mhip, mhlen >> 2); } m->m_len = mhlen; mhip->ip_off = ((off - hlen) >> 3) + ip_off; if (off + len >= ntohs(ip->ip_len)) len = ntohs(ip->ip_len) - off; else mhip->ip_off |= IP_MF; mhip->ip_len = htons((u_short)(len + mhlen)); *mnext = m; m->m_next = m_copy(m0, off, len); if (m->m_next == 0) { error = ENOBUFS; /* ??? */ goto sendorfree; } m->m_pkthdr.len = mhlen + len; m->m_pkthdr.rcvif = NULL; mhip->ip_off = htons((u_short)mhip->ip_off); mhip->ip_sum = 0; mhip->ip_sum = in_cksum(m, mhlen); mnext = &m->m_act; } /* * Update first fragment by trimming what's been copied out * and updating header, then send each fragment (in order). */ m_adj(m0, hlen + firstlen - ip->ip_len); ip->ip_len = htons((u_short)(hlen + firstlen)); ip->ip_off = htons((u_short)IP_MF); ip->ip_sum = 0; ip->ip_sum = in_cksum(m0, hlen); sendorfree: for (m = m0; m; m = m0) { m0 = m->m_act; m->m_act = 0; if (error == 0) error = (*ifp->if_output)(ifp, m, (struct sockaddr *)&dst, NULL ); else FREE_MB_T(m); } } done: if (!error) - ipfmain.ipf_frouteok[0]++; + V_ipfmain.ipf_frouteok[0]++; else - ipfmain.ipf_frouteok[1]++; + V_ipfmain.ipf_frouteok[1]++; if (has_nhop) fib4_free_nh_ext(fibnum, &nh4); return 0; bad: if (error == EMSGSIZE) { sifp = fin->fin_ifp; code = fin->fin_icode; fin->fin_icode = ICMP_UNREACH_NEEDFRAG; fin->fin_ifp = ifp; (void) ipf_send_icmp_err(ICMP_UNREACH, fin, 1); fin->fin_ifp = sifp; fin->fin_icode = code; } FREE_MB_T(m); goto done; } int ipf_verifysrc(fin) fr_info_t *fin; { struct nhop4_basic nh4; if (fib4_lookup_nh_basic(0, fin->fin_src, 0, 0, &nh4) != 0) return (0); return (fin->fin_ifp == nh4.nh_ifp); } /* * return the first IP Address associated with an interface */ int ipf_ifpaddr(softc, v, atype, ifptr, inp, inpmask) ipf_main_softc_t *softc; int v, atype; void *ifptr; i6addr_t *inp, *inpmask; { #ifdef USE_INET6 struct in6_addr *inp6 = NULL; #endif struct sockaddr *sock, *mask; struct sockaddr_in *sin; struct ifaddr *ifa; struct ifnet *ifp; if ((ifptr == NULL) || (ifptr == (void *)-1)) return -1; sin = NULL; ifp = ifptr; if (v == 4) inp->in4.s_addr = 0; #ifdef USE_INET6 else if (v == 6) bzero((char *)inp, sizeof(*inp)); #endif ifa = TAILQ_FIRST(&ifp->if_addrhead); sock = ifa->ifa_addr; while (sock != NULL && ifa != NULL) { sin = (struct sockaddr_in *)sock; if ((v == 4) && (sin->sin_family == AF_INET)) break; #ifdef USE_INET6 if ((v == 6) && (sin->sin_family == AF_INET6)) { inp6 = &((struct sockaddr_in6 *)sin)->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(inp6) && !IN6_IS_ADDR_LOOPBACK(inp6)) break; } #endif ifa = TAILQ_NEXT(ifa, ifa_link); if (ifa != NULL) sock = ifa->ifa_addr; } if (ifa == NULL || sin == NULL) return -1; mask = ifa->ifa_netmask; if (atype == FRI_BROADCAST) sock = ifa->ifa_broadaddr; else if (atype == FRI_PEERADDR) sock = ifa->ifa_dstaddr; if (sock == NULL) return -1; #ifdef USE_INET6 if (v == 6) { return ipf_ifpfillv6addr(atype, (struct sockaddr_in6 *)sock, (struct sockaddr_in6 *)mask, inp, inpmask); } #endif return ipf_ifpfillv4addr(atype, (struct sockaddr_in *)sock, (struct sockaddr_in *)mask, &inp->in4, &inpmask->in4); } u_32_t ipf_newisn(fin) fr_info_t *fin; { u_32_t newiss; newiss = arc4random(); return newiss; } INLINE int ipf_checkv4sum(fin) fr_info_t *fin; { #ifdef CSUM_DATA_VALID int manual = 0; u_short sum; ip_t *ip; mb_t *m; if ((fin->fin_flx & FI_NOCKSUM) != 0) return 0; if ((fin->fin_flx & FI_SHORT) != 0) return 1; if (fin->fin_cksum != FI_CK_NEEDED) return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1; m = fin->fin_m; if (m == NULL) { manual = 1; goto skipauto; } ip = fin->fin_ip; if ((m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID)) == CSUM_IP_CHECKED) { fin->fin_cksum = FI_CK_BAD; fin->fin_flx |= FI_BAD; DT2(ipf_fi_bad_checkv4sum_csum_ip_checked, fr_info_t *, fin, u_int, m->m_pkthdr.csum_flags & (CSUM_IP_CHECKED|CSUM_IP_VALID)); return -1; } if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { /* Depending on the driver, UDP may have zero checksum */ if (fin->fin_p == IPPROTO_UDP && (fin->fin_flx & (FI_FRAG|FI_SHORT|FI_BAD)) == 0) { udphdr_t *udp = fin->fin_dp; if (udp->uh_sum == 0) { /* * we're good no matter what the hardware * checksum flags and csum_data say (handling * of csum_data for zero UDP checksum is not * consistent across all drivers) */ fin->fin_cksum = 1; return 0; } } if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) sum = m->m_pkthdr.csum_data; else sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htonl(m->m_pkthdr.csum_data + fin->fin_dlen + fin->fin_p)); sum ^= 0xffff; if (sum != 0) { fin->fin_cksum = FI_CK_BAD; fin->fin_flx |= FI_BAD; DT2(ipf_fi_bad_checkv4sum_sum, fr_info_t *, fin, u_int, sum); } else { fin->fin_cksum = FI_CK_SUMOK; return 0; } } else { if (m->m_pkthdr.csum_flags == CSUM_DELAY_DATA) { fin->fin_cksum = FI_CK_L4FULL; return 0; } else if (m->m_pkthdr.csum_flags == CSUM_TCP || m->m_pkthdr.csum_flags == CSUM_UDP) { fin->fin_cksum = FI_CK_L4PART; return 0; } else if (m->m_pkthdr.csum_flags == CSUM_IP) { fin->fin_cksum = FI_CK_L4PART; return 0; } else { manual = 1; } } skipauto: if (manual != 0) { if (ipf_checkl4sum(fin) == -1) { fin->fin_flx |= FI_BAD; DT2(ipf_fi_bad_checkv4sum_manual, fr_info_t *, fin, u_int, manual); return -1; } } #else if (ipf_checkl4sum(fin) == -1) { fin->fin_flx |= FI_BAD; DT2(ipf_fi_bad_checkv4sum_checkl4sum, fr_info_t *, fin, u_int, -1); return -1; } #endif return 0; } #ifdef USE_INET6 INLINE int ipf_checkv6sum(fin) fr_info_t *fin; { if ((fin->fin_flx & FI_NOCKSUM) != 0) { DT(ipf_checkv6sum_fi_nocksum); return 0; } if ((fin->fin_flx & FI_SHORT) != 0) { DT(ipf_checkv6sum_fi_short); return 1; } if (fin->fin_cksum != FI_CK_NEEDED) { DT(ipf_checkv6sum_fi_ck_needed); return (fin->fin_cksum > FI_CK_NEEDED) ? 0 : -1; } if (ipf_checkl4sum(fin) == -1) { fin->fin_flx |= FI_BAD; DT2(ipf_fi_bad_checkv6sum_checkl4sum, fr_info_t *, fin, u_int, -1); return -1; } return 0; } #endif /* USE_INET6 */ size_t mbufchainlen(m0) struct mbuf *m0; { size_t len; if ((m0->m_flags & M_PKTHDR) != 0) { len = m0->m_pkthdr.len; } else { struct mbuf *m; for (m = m0, len = 0; m != NULL; m = m->m_next) len += m->m_len; } return len; } /* ------------------------------------------------------------------------ */ /* Function: ipf_pullup */ /* Returns: NULL == pullup failed, else pointer to protocol header */ /* Parameters: xmin(I)- pointer to buffer where data packet starts */ /* fin(I) - pointer to packet information */ /* len(I) - number of bytes to pullup */ /* */ /* Attempt to move at least len bytes (from the start of the buffer) into a */ /* single buffer for ease of access. Operating system native functions are */ /* used to manage buffers - if necessary. If the entire packet ends up in */ /* a single buffer, set the FI_COALESCE flag even though ipf_coalesce() has */ /* not been called. Both fin_ip and fin_dp are updated before exiting _IF_ */ /* and ONLY if the pullup succeeds. */ /* */ /* We assume that 'xmin' is a pointer to a buffer that is part of the chain */ /* of buffers that starts at *fin->fin_mp. */ /* ------------------------------------------------------------------------ */ void * ipf_pullup(xmin, fin, len) mb_t *xmin; fr_info_t *fin; int len; { int dpoff, ipoff; mb_t *m = xmin; char *ip; if (m == NULL) return NULL; ip = (char *)fin->fin_ip; if ((fin->fin_flx & FI_COALESCE) != 0) return ip; ipoff = fin->fin_ipoff; if (fin->fin_dp != NULL) dpoff = (char *)fin->fin_dp - (char *)ip; else dpoff = 0; if (M_LEN(m) < len) { mb_t *n = *fin->fin_mp; /* * Assume that M_PKTHDR is set and just work with what is left * rather than check.. * Should not make any real difference, anyway. */ if (m != n) { /* * Record the mbuf that points to the mbuf that we're * about to go to work on so that we can update the * m_next appropriately later. */ for (; n->m_next != m; n = n->m_next) ; } else { n = NULL; } #ifdef MHLEN if (len > MHLEN) #else if (len > MLEN) #endif { #ifdef HAVE_M_PULLDOWN if (m_pulldown(m, 0, len, NULL) == NULL) m = NULL; #else FREE_MB_T(*fin->fin_mp); m = NULL; n = NULL; #endif } else { m = m_pullup(m, len); } if (n != NULL) n->m_next = m; if (m == NULL) { /* * When n is non-NULL, it indicates that m pointed to * a sub-chain (tail) of the mbuf and that the head * of this chain has not yet been free'd. */ if (n != NULL) { FREE_MB_T(*fin->fin_mp); } *fin->fin_mp = NULL; fin->fin_m = NULL; return NULL; } if (n == NULL) *fin->fin_mp = m; while (M_LEN(m) == 0) { m = m->m_next; } fin->fin_m = m; ip = MTOD(m, char *) + ipoff; fin->fin_ip = (ip_t *)ip; if (fin->fin_dp != NULL) fin->fin_dp = (char *)fin->fin_ip + dpoff; if (fin->fin_fraghdr != NULL) fin->fin_fraghdr = (char *)ip + ((char *)fin->fin_fraghdr - (char *)fin->fin_ip); } if (len == fin->fin_plen) fin->fin_flx |= FI_COALESCE; return ip; } int ipf_inject(fin, m) fr_info_t *fin; mb_t *m; { int error = 0; if (fin->fin_out == 0) { netisr_dispatch(NETISR_IP, m); } else { fin->fin_ip->ip_len = ntohs(fin->fin_ip->ip_len); fin->fin_ip->ip_off = ntohs(fin->fin_ip->ip_off); error = ip_output(m, NULL, NULL, IP_FORWARDING, NULL, NULL); } return error; } int ipf_pfil_unhook(void) { #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011) struct pfil_head *ph_inet; # ifdef USE_INET6 struct pfil_head *ph_inet6; # endif #endif #ifdef NETBSD_PF ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); if (ph_inet != NULL) pfil_remove_hook((void *)ipf_check_wrapper, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet); # ifdef USE_INET6 ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); if (ph_inet6 != NULL) pfil_remove_hook((void *)ipf_check_wrapper6, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6); # endif #endif return (0); } int ipf_pfil_hook(void) { #if defined(NETBSD_PF) && (__FreeBSD_version >= 500011) struct pfil_head *ph_inet; # ifdef USE_INET6 struct pfil_head *ph_inet6; # endif #endif # ifdef NETBSD_PF ph_inet = pfil_head_get(PFIL_TYPE_AF, AF_INET); # ifdef USE_INET6 ph_inet6 = pfil_head_get(PFIL_TYPE_AF, AF_INET6); # endif if (ph_inet == NULL # ifdef USE_INET6 && ph_inet6 == NULL # endif ) { return ENODEV; } if (ph_inet != NULL) pfil_add_hook((void *)ipf_check_wrapper, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet); # ifdef USE_INET6 if (ph_inet6 != NULL) pfil_add_hook((void *)ipf_check_wrapper6, NULL, PFIL_IN|PFIL_OUT|PFIL_WAITOK, ph_inet6); # endif # endif return (0); } void ipf_event_reg(void) { ipf_arrivetag = EVENTHANDLER_REGISTER(ifnet_arrival_event, \ - ipf_ifevent, &ipfmain, \ + ipf_ifevent, NULL, \ EVENTHANDLER_PRI_ANY); ipf_departtag = EVENTHANDLER_REGISTER(ifnet_departure_event, \ - ipf_ifevent, &ipfmain, \ + ipf_ifevent, NULL, \ EVENTHANDLER_PRI_ANY); ipf_clonetag = EVENTHANDLER_REGISTER(if_clone_event, ipf_ifevent, \ - &ipfmain, EVENTHANDLER_PRI_ANY); + NULL, EVENTHANDLER_PRI_ANY); } void ipf_event_dereg(void) { if (ipf_arrivetag != NULL) { EVENTHANDLER_DEREGISTER(ifnet_arrival_event, ipf_arrivetag); } if (ipf_departtag != NULL) { EVENTHANDLER_DEREGISTER(ifnet_departure_event, ipf_departtag); } if (ipf_clonetag != NULL) { EVENTHANDLER_DEREGISTER(if_clone_event, ipf_clonetag); } } u_32_t ipf_random() { return arc4random(); } u_int ipf_pcksum(fin, hlen, sum) fr_info_t *fin; int hlen; u_int sum; { struct mbuf *m; u_int sum2; int off; m = fin->fin_m; off = (char *)fin->fin_dp - (char *)fin->fin_ip; m->m_data += hlen; m->m_len -= hlen; sum2 = in_cksum(fin->fin_m, fin->fin_plen - off); m->m_len += hlen; m->m_data -= hlen; /* * Both sum and sum2 are partial sums, so combine them together. */ sum += ~sum2 & 0xffff; while (sum > 0xffff) sum = (sum & 0xffff) + (sum >> 16); sum2 = ~sum & 0xffff; return sum2; } Index: projects/vnet/sys/contrib/ipfilter/netinet/ip_nat.c =================================================================== --- projects/vnet/sys/contrib/ipfilter/netinet/ip_nat.c (revision 302198) +++ projects/vnet/sys/contrib/ipfilter/netinet/ip_nat.c (revision 302199) @@ -1,8596 +1,8591 @@ /* $FreeBSD$ */ /* * Copyright (C) 2012 by Darren Reed. * * See the IPFILTER.LICENCE file for details on licencing. */ #if defined(KERNEL) || defined(_KERNEL) # undef KERNEL # undef _KERNEL # define KERNEL 1 # define _KERNEL 1 #endif #include #include #include #include #include #if defined(_KERNEL) && \ (defined(__NetBSD_Version) && (__NetBSD_Version >= 399002000)) # include #endif #if !defined(_KERNEL) # include # include # include # define KERNEL # ifdef _OpenBSD__ struct file; # endif # include # undef KERNEL #endif #if defined(_KERNEL) && \ defined(__FreeBSD_version) && (__FreeBSD_version >= 220000) # include # include #else # include #endif #if !defined(AIX) # include #endif #if !defined(linux) # include #endif #include #if defined(_KERNEL) # include # if !defined(__SVR4) && !defined(__svr4__) # include # endif #endif #if defined(__SVR4) || defined(__svr4__) # include # include # ifdef KERNEL # include # endif # include # include #endif #if __FreeBSD_version >= 300000 # include #endif #include #if __FreeBSD_version >= 300000 # include #endif #ifdef sun # include #endif #include #include #include #ifdef RFC1825 # include # include extern struct ifnet vpnif; #endif #if !defined(linux) # include #endif #include #include #include #include "netinet/ip_compat.h" #include #include "netinet/ipl.h" #include "netinet/ip_fil.h" #include "netinet/ip_nat.h" #include "netinet/ip_frag.h" #include "netinet/ip_state.h" #include "netinet/ip_proxy.h" #include "netinet/ip_lookup.h" #include "netinet/ip_dstlist.h" #include "netinet/ip_sync.h" #if FREEBSD_GE_REV(300000) # include #endif #ifdef HAS_SYS_MD5_H # include #else # include "md5.h" #endif /* END OF INCLUDES */ #undef SOCKADDR_IN #define SOCKADDR_IN struct sockaddr_in #if !defined(lint) static const char sccsid[] = "@(#)ip_nat.c 1.11 6/5/96 (C) 1995 Darren Reed"; static const char rcsid[] = "@(#)$FreeBSD$"; /* static const char rcsid[] = "@(#)$Id: ip_nat.c,v 2.195.2.102 2007/10/16 10:08:10 darrenr Exp $"; */ #endif #define NATFSUM(n,v,f) ((v) == 4 ? (n)->f.in4.s_addr : (n)->f.i6[0] + \ (n)->f.i6[1] + (n)->f.i6[2] + (n)->f.i6[3]) #define NBUMP(x) softn->(x)++ #define NBUMPD(x, y) do { \ softn->x.y++; \ DT(y); \ } while (0) #define NBUMPSIDE(y,x) softn->ipf_nat_stats.ns_side[y].x++ #define NBUMPSIDED(y,x) do { softn->ipf_nat_stats.ns_side[y].x++; \ DT(x); } while (0) #define NBUMPSIDEX(y,x,z) \ do { softn->ipf_nat_stats.ns_side[y].x++; \ DT(z); } while (0) #define NBUMPSIDEDF(y,x)do { softn->ipf_nat_stats.ns_side[y].x++; \ DT1(x, fr_info_t *, fin); } while (0) -frentry_t ipfnatblock; - static ipftuneable_t ipf_nat_tuneables[] = { /* nat */ { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_lock) }, "nat_lock", 0, 1, stsizeof(ipf_nat_softc_t, ipf_nat_lock), IPFT_RDONLY, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_sz) }, "nat_table_size", 1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_table_sz), 0, NULL, ipf_nat_rehash }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_max) }, "nat_table_max", 1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_table_max), 0, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_maprules_sz) }, "nat_rules_size", 1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_maprules_sz), 0, NULL, ipf_nat_rehash_rules }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_rdrrules_sz) }, "rdr_rules_size", 1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_rdrrules_sz), 0, NULL, ipf_nat_rehash_rules }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_hostmap_sz) }, "hostmap_size", 1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_hostmap_sz), 0, NULL, ipf_nat_hostmap_rehash }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_maxbucket) }, "nat_maxbucket",1, 0x7fffffff, stsizeof(ipf_nat_softc_t, ipf_nat_maxbucket), 0, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_logging) }, "nat_logging", 0, 1, stsizeof(ipf_nat_softc_t, ipf_nat_logging), 0, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_doflush) }, "nat_doflush", 0, 1, stsizeof(ipf_nat_softc_t, ipf_nat_doflush), 0, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_wm_low) }, "nat_table_wm_low", 1, 99, stsizeof(ipf_nat_softc_t, ipf_nat_table_wm_low), 0, NULL, NULL }, { { (void *)offsetof(ipf_nat_softc_t, ipf_nat_table_wm_high) }, "nat_table_wm_high", 2, 100, stsizeof(ipf_nat_softc_t, ipf_nat_table_wm_high), 0, NULL, NULL }, { { 0 }, NULL, 0, 0, 0, 0, NULL, NULL } }; /* ======================================================================== */ /* How the NAT is organised and works. */ /* */ /* Inside (interface y) NAT Outside (interface x) */ /* -------------------- -+- ------------------------------------- */ /* Packet going | out, processsed by ipf_nat_checkout() for x */ /* ------------> | ------------> */ /* src=10.1.1.1 | src=192.1.1.1 */ /* | */ /* | in, processed by ipf_nat_checkin() for x */ /* <------------ | <------------ */ /* dst=10.1.1.1 | dst=192.1.1.1 */ /* -------------------- -+- ------------------------------------- */ /* ipf_nat_checkout() - changes ip_src and if required, sport */ /* - creates a new mapping, if required. */ /* ipf_nat_checkin() - changes ip_dst and if required, dport */ /* */ /* In the NAT table, internal source is recorded as "in" and externally */ /* seen as "out". */ /* ======================================================================== */ #if SOLARIS && !defined(INSTANCES) extern int pfil_delayed_copy; #endif static int ipf_nat_flush_entry __P((ipf_main_softc_t *, void *)); static int ipf_nat_getent __P((ipf_main_softc_t *, caddr_t, int)); static int ipf_nat_getsz __P((ipf_main_softc_t *, caddr_t, int)); static int ipf_nat_putent __P((ipf_main_softc_t *, caddr_t, int)); static void ipf_nat_addmap __P((ipf_nat_softc_t *, ipnat_t *)); static void ipf_nat_addrdr __P((ipf_nat_softc_t *, ipnat_t *)); static int ipf_nat_builddivertmp __P((ipf_nat_softc_t *, ipnat_t *)); static int ipf_nat_clearlist __P((ipf_main_softc_t *, ipf_nat_softc_t *)); static int ipf_nat_cmp_rules __P((ipnat_t *, ipnat_t *)); static int ipf_nat_decap __P((fr_info_t *, nat_t *)); static void ipf_nat_delrule __P((ipf_main_softc_t *, ipf_nat_softc_t *, ipnat_t *, int)); static int ipf_nat_extraflush __P((ipf_main_softc_t *, ipf_nat_softc_t *, int)); static int ipf_nat_finalise __P((fr_info_t *, nat_t *)); static int ipf_nat_flushtable __P((ipf_main_softc_t *, ipf_nat_softc_t *)); static int ipf_nat_getnext __P((ipf_main_softc_t *, ipftoken_t *, ipfgeniter_t *, ipfobj_t *)); static int ipf_nat_gettable __P((ipf_main_softc_t *, ipf_nat_softc_t *, char *)); static hostmap_t *ipf_nat_hostmap __P((ipf_nat_softc_t *, ipnat_t *, struct in_addr, struct in_addr, struct in_addr, u_32_t)); static int ipf_nat_icmpquerytype __P((int)); static int ipf_nat_iterator __P((ipf_main_softc_t *, ipftoken_t *, ipfgeniter_t *, ipfobj_t *)); static int ipf_nat_match __P((fr_info_t *, ipnat_t *)); static int ipf_nat_matcharray __P((nat_t *, int *, u_long)); static int ipf_nat_matchflush __P((ipf_main_softc_t *, ipf_nat_softc_t *, caddr_t)); static void ipf_nat_mssclamp __P((tcphdr_t *, u_32_t, fr_info_t *, u_short *)); static int ipf_nat_newmap __P((fr_info_t *, nat_t *, natinfo_t *)); static int ipf_nat_newdivert __P((fr_info_t *, nat_t *, natinfo_t *)); static int ipf_nat_newrdr __P((fr_info_t *, nat_t *, natinfo_t *)); static int ipf_nat_newrewrite __P((fr_info_t *, nat_t *, natinfo_t *)); static int ipf_nat_nextaddr __P((fr_info_t *, nat_addr_t *, u_32_t *, u_32_t *)); static int ipf_nat_nextaddrinit __P((ipf_main_softc_t *, char *, nat_addr_t *, int, void *)); static int ipf_nat_resolverule __P((ipf_main_softc_t *, ipnat_t *)); static int ipf_nat_ruleaddrinit __P((ipf_main_softc_t *, ipf_nat_softc_t *, ipnat_t *)); static void ipf_nat_rule_fini __P((ipf_main_softc_t *, ipnat_t *)); static int ipf_nat_rule_init __P((ipf_main_softc_t *, ipf_nat_softc_t *, ipnat_t *)); static int ipf_nat_siocaddnat __P((ipf_main_softc_t *, ipf_nat_softc_t *, ipnat_t *, int)); static void ipf_nat_siocdelnat __P((ipf_main_softc_t *, ipf_nat_softc_t *, ipnat_t *, int)); static void ipf_nat_tabmove __P((ipf_nat_softc_t *, nat_t *)); /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_main_load */ /* Returns: int - 0 == success, -1 == failure */ /* Parameters: Nil */ /* */ /* The only global NAT structure that needs to be initialised is the filter */ /* rule that is used with blocking packets. */ /* ------------------------------------------------------------------------ */ int ipf_nat_main_load() { - bzero((char *)&ipfnatblock, sizeof(ipfnatblock)); - ipfnatblock.fr_flags = FR_BLOCK|FR_QUICK; - ipfnatblock.fr_ref = 1; return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_main_unload */ /* Returns: int - 0 == success, -1 == failure */ /* Parameters: Nil */ /* */ /* A null-op function that exists as a placeholder so that the flow in */ /* other functions is obvious. */ /* ------------------------------------------------------------------------ */ int ipf_nat_main_unload() { return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_soft_create */ /* Returns: void * - NULL = failure, else pointer to NAT context */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* Allocate the initial soft context structure for NAT and populate it with */ /* some default values. Creating the tables is left until we call _init so */ /* that sizes can be changed before we get under way. */ /* ------------------------------------------------------------------------ */ void * ipf_nat_soft_create(softc) ipf_main_softc_t *softc; { ipf_nat_softc_t *softn; KMALLOC(softn, ipf_nat_softc_t *); if (softn == NULL) return NULL; bzero((char *)softn, sizeof(*softn)); softn->ipf_nat_tune = ipf_tune_array_copy(softn, sizeof(ipf_nat_tuneables), ipf_nat_tuneables); if (softn->ipf_nat_tune == NULL) { ipf_nat_soft_destroy(softc, softn); return NULL; } if (ipf_tune_array_link(softc, softn->ipf_nat_tune) == -1) { ipf_nat_soft_destroy(softc, softn); return NULL; } softn->ipf_nat_list_tail = &softn->ipf_nat_list; softn->ipf_nat_table_max = NAT_TABLE_MAX; softn->ipf_nat_table_sz = NAT_TABLE_SZ; softn->ipf_nat_maprules_sz = NAT_SIZE; softn->ipf_nat_rdrrules_sz = RDR_SIZE; softn->ipf_nat_hostmap_sz = HOSTMAP_SIZE; softn->ipf_nat_doflush = 0; #ifdef IPFILTER_LOG softn->ipf_nat_logging = 1; #else softn->ipf_nat_logging = 0; #endif softn->ipf_nat_defage = DEF_NAT_AGE; softn->ipf_nat_defipage = IPF_TTLVAL(60); softn->ipf_nat_deficmpage = IPF_TTLVAL(3); softn->ipf_nat_table_wm_high = 99; softn->ipf_nat_table_wm_low = 90; return softn; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_soft_destroy */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* ------------------------------------------------------------------------ */ void ipf_nat_soft_destroy(softc, arg) ipf_main_softc_t *softc; void *arg; { ipf_nat_softc_t *softn = arg; if (softn->ipf_nat_tune != NULL) { ipf_tune_array_unlink(softc, softn->ipf_nat_tune); KFREES(softn->ipf_nat_tune, sizeof(ipf_nat_tuneables)); softn->ipf_nat_tune = NULL; } KFREE(softn); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_init */ /* Returns: int - 0 == success, -1 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* Initialise all of the NAT locks, tables and other structures. */ /* ------------------------------------------------------------------------ */ int ipf_nat_soft_init(softc, arg) ipf_main_softc_t *softc; void *arg; { ipf_nat_softc_t *softn = arg; ipftq_t *tq; int i; KMALLOCS(softn->ipf_nat_table[0], nat_t **, \ sizeof(nat_t *) * softn->ipf_nat_table_sz); if (softn->ipf_nat_table[0] != NULL) { bzero((char *)softn->ipf_nat_table[0], softn->ipf_nat_table_sz * sizeof(nat_t *)); } else { return -1; } KMALLOCS(softn->ipf_nat_table[1], nat_t **, \ sizeof(nat_t *) * softn->ipf_nat_table_sz); if (softn->ipf_nat_table[1] != NULL) { bzero((char *)softn->ipf_nat_table[1], softn->ipf_nat_table_sz * sizeof(nat_t *)); } else { return -2; } KMALLOCS(softn->ipf_nat_map_rules, ipnat_t **, \ sizeof(ipnat_t *) * softn->ipf_nat_maprules_sz); if (softn->ipf_nat_map_rules != NULL) { bzero((char *)softn->ipf_nat_map_rules, softn->ipf_nat_maprules_sz * sizeof(ipnat_t *)); } else { return -3; } KMALLOCS(softn->ipf_nat_rdr_rules, ipnat_t **, \ sizeof(ipnat_t *) * softn->ipf_nat_rdrrules_sz); if (softn->ipf_nat_rdr_rules != NULL) { bzero((char *)softn->ipf_nat_rdr_rules, softn->ipf_nat_rdrrules_sz * sizeof(ipnat_t *)); } else { return -4; } KMALLOCS(softn->ipf_hm_maptable, hostmap_t **, \ sizeof(hostmap_t *) * softn->ipf_nat_hostmap_sz); if (softn->ipf_hm_maptable != NULL) { bzero((char *)softn->ipf_hm_maptable, sizeof(hostmap_t *) * softn->ipf_nat_hostmap_sz); } else { return -5; } softn->ipf_hm_maplist = NULL; KMALLOCS(softn->ipf_nat_stats.ns_side[0].ns_bucketlen, u_int *, softn->ipf_nat_table_sz * sizeof(u_int)); if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen == NULL) { return -6; } bzero((char *)softn->ipf_nat_stats.ns_side[0].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); KMALLOCS(softn->ipf_nat_stats.ns_side[1].ns_bucketlen, u_int *, softn->ipf_nat_table_sz * sizeof(u_int)); if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen == NULL) { return -7; } bzero((char *)softn->ipf_nat_stats.ns_side[1].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); if (softn->ipf_nat_maxbucket == 0) { for (i = softn->ipf_nat_table_sz; i > 0; i >>= 1) softn->ipf_nat_maxbucket++; softn->ipf_nat_maxbucket *= 2; } ipf_sttab_init(softc, softn->ipf_nat_tcptq); /* * Increase this because we may have "keep state" following this too * and packet storms can occur if this is removed too quickly. */ softn->ipf_nat_tcptq[IPF_TCPS_CLOSED].ifq_ttl = softc->ipf_tcplastack; softn->ipf_nat_tcptq[IPF_TCP_NSTATES - 1].ifq_next = &softn->ipf_nat_udptq; IPFTQ_INIT(&softn->ipf_nat_udptq, softn->ipf_nat_defage, "nat ipftq udp tab"); softn->ipf_nat_udptq.ifq_next = &softn->ipf_nat_udpacktq; IPFTQ_INIT(&softn->ipf_nat_udpacktq, softn->ipf_nat_defage, "nat ipftq udpack tab"); softn->ipf_nat_udpacktq.ifq_next = &softn->ipf_nat_icmptq; IPFTQ_INIT(&softn->ipf_nat_icmptq, softn->ipf_nat_deficmpage, "nat icmp ipftq tab"); softn->ipf_nat_icmptq.ifq_next = &softn->ipf_nat_icmpacktq; IPFTQ_INIT(&softn->ipf_nat_icmpacktq, softn->ipf_nat_defage, "nat icmpack ipftq tab"); softn->ipf_nat_icmpacktq.ifq_next = &softn->ipf_nat_iptq; IPFTQ_INIT(&softn->ipf_nat_iptq, softn->ipf_nat_defipage, "nat ip ipftq tab"); softn->ipf_nat_iptq.ifq_next = &softn->ipf_nat_pending; IPFTQ_INIT(&softn->ipf_nat_pending, 1, "nat pending ipftq tab"); softn->ipf_nat_pending.ifq_next = NULL; for (i = 0, tq = softn->ipf_nat_tcptq; i < IPF_TCP_NSTATES; i++, tq++) { if (tq->ifq_ttl < softn->ipf_nat_deficmpage) tq->ifq_ttl = softn->ipf_nat_deficmpage; #ifdef LARGE_NAT else if (tq->ifq_ttl > softn->ipf_nat_defage) tq->ifq_ttl = softn->ipf_nat_defage; #endif } /* * Increase this because we may have "keep state" following * this too and packet storms can occur if this is removed * too quickly. */ softn->ipf_nat_tcptq[IPF_TCPS_CLOSED].ifq_ttl = softc->ipf_tcplastack; MUTEX_INIT(&softn->ipf_nat_new, "ipf nat new mutex"); MUTEX_INIT(&softn->ipf_nat_io, "ipf nat io mutex"); softn->ipf_nat_inited = 1; return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_soft_fini */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* Free all memory used by NAT structures allocated at runtime. */ /* ------------------------------------------------------------------------ */ int ipf_nat_soft_fini(softc, arg) ipf_main_softc_t *softc; void *arg; { ipf_nat_softc_t *softn = arg; ipftq_t *ifq, *ifqnext; (void) ipf_nat_clearlist(softc, softn); (void) ipf_nat_flushtable(softc, softn); /* * Proxy timeout queues are not cleaned here because although they * exist on the NAT list, ipf_proxy_unload is called after unload * and the proxies actually are responsible for them being created. * Should the proxy timeouts have their own list? There's no real * justification as this is the only complication. */ for (ifq = softn->ipf_nat_utqe; ifq != NULL; ifq = ifqnext) { ifqnext = ifq->ifq_next; if (ipf_deletetimeoutqueue(ifq) == 0) ipf_freetimeoutqueue(softc, ifq); } if (softn->ipf_nat_table[0] != NULL) { KFREES(softn->ipf_nat_table[0], sizeof(nat_t *) * softn->ipf_nat_table_sz); softn->ipf_nat_table[0] = NULL; } if (softn->ipf_nat_table[1] != NULL) { KFREES(softn->ipf_nat_table[1], sizeof(nat_t *) * softn->ipf_nat_table_sz); softn->ipf_nat_table[1] = NULL; } if (softn->ipf_nat_map_rules != NULL) { KFREES(softn->ipf_nat_map_rules, sizeof(ipnat_t *) * softn->ipf_nat_maprules_sz); softn->ipf_nat_map_rules = NULL; } if (softn->ipf_nat_rdr_rules != NULL) { KFREES(softn->ipf_nat_rdr_rules, sizeof(ipnat_t *) * softn->ipf_nat_rdrrules_sz); softn->ipf_nat_rdr_rules = NULL; } if (softn->ipf_hm_maptable != NULL) { KFREES(softn->ipf_hm_maptable, sizeof(hostmap_t *) * softn->ipf_nat_hostmap_sz); softn->ipf_hm_maptable = NULL; } if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side[0].ns_bucketlen, sizeof(u_int) * softn->ipf_nat_table_sz); softn->ipf_nat_stats.ns_side[0].ns_bucketlen = NULL; } if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side[1].ns_bucketlen, sizeof(u_int) * softn->ipf_nat_table_sz); softn->ipf_nat_stats.ns_side[1].ns_bucketlen = NULL; } if (softn->ipf_nat_inited == 1) { softn->ipf_nat_inited = 0; ipf_sttab_destroy(softn->ipf_nat_tcptq); MUTEX_DESTROY(&softn->ipf_nat_new); MUTEX_DESTROY(&softn->ipf_nat_io); MUTEX_DESTROY(&softn->ipf_nat_udptq.ifq_lock); MUTEX_DESTROY(&softn->ipf_nat_udpacktq.ifq_lock); MUTEX_DESTROY(&softn->ipf_nat_icmptq.ifq_lock); MUTEX_DESTROY(&softn->ipf_nat_icmpacktq.ifq_lock); MUTEX_DESTROY(&softn->ipf_nat_iptq.ifq_lock); MUTEX_DESTROY(&softn->ipf_nat_pending.ifq_lock); } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_setlock */ /* Returns: Nil */ /* Parameters: arg(I) - pointer to soft state information */ /* tmp(I) - new lock value */ /* */ /* Set the "lock status" of NAT to the value in tmp. */ /* ------------------------------------------------------------------------ */ void ipf_nat_setlock(arg, tmp) void *arg; int tmp; { ipf_nat_softc_t *softn = arg; softn->ipf_nat_lock = tmp; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_addrdr */ /* Returns: Nil */ /* Parameters: n(I) - pointer to NAT rule to add */ /* */ /* Adds a redirect rule to the hash table of redirect rules and the list of */ /* loaded NAT rules. Updates the bitmask indicating which netmasks are in */ /* use by redirect rules. */ /* ------------------------------------------------------------------------ */ static void ipf_nat_addrdr(softn, n) ipf_nat_softc_t *softn; ipnat_t *n; { ipnat_t **np; u_32_t j; u_int hv; u_int rhv; int k; if (n->in_odstatype == FRI_NORMAL) { k = count4bits(n->in_odstmsk); ipf_inet_mask_add(k, &softn->ipf_nat_rdr_mask); j = (n->in_odstaddr & n->in_odstmsk); rhv = NAT_HASH_FN(j, 0, 0xffffffff); } else { ipf_inet_mask_add(0, &softn->ipf_nat_rdr_mask); j = 0; rhv = 0; } hv = rhv % softn->ipf_nat_rdrrules_sz; np = softn->ipf_nat_rdr_rules + hv; while (*np != NULL) np = &(*np)->in_rnext; n->in_rnext = NULL; n->in_prnext = np; n->in_hv[0] = hv; n->in_use++; *np = n; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_addmap */ /* Returns: Nil */ /* Parameters: n(I) - pointer to NAT rule to add */ /* */ /* Adds a NAT map rule to the hash table of rules and the list of loaded */ /* NAT rules. Updates the bitmask indicating which netmasks are in use by */ /* redirect rules. */ /* ------------------------------------------------------------------------ */ static void ipf_nat_addmap(softn, n) ipf_nat_softc_t *softn; ipnat_t *n; { ipnat_t **np; u_32_t j; u_int hv; u_int rhv; int k; if (n->in_osrcatype == FRI_NORMAL) { k = count4bits(n->in_osrcmsk); ipf_inet_mask_add(k, &softn->ipf_nat_map_mask); j = (n->in_osrcaddr & n->in_osrcmsk); rhv = NAT_HASH_FN(j, 0, 0xffffffff); } else { ipf_inet_mask_add(0, &softn->ipf_nat_map_mask); j = 0; rhv = 0; } hv = rhv % softn->ipf_nat_maprules_sz; np = softn->ipf_nat_map_rules + hv; while (*np != NULL) np = &(*np)->in_mnext; n->in_mnext = NULL; n->in_pmnext = np; n->in_hv[1] = rhv; n->in_use++; *np = n; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_delrdr */ /* Returns: Nil */ /* Parameters: n(I) - pointer to NAT rule to delete */ /* */ /* Removes a redirect rule from the hash table of redirect rules. */ /* ------------------------------------------------------------------------ */ void ipf_nat_delrdr(softn, n) ipf_nat_softc_t *softn; ipnat_t *n; { if (n->in_odstatype == FRI_NORMAL) { int k = count4bits(n->in_odstmsk); ipf_inet_mask_del(k, &softn->ipf_nat_rdr_mask); } else { ipf_inet_mask_del(0, &softn->ipf_nat_rdr_mask); } if (n->in_rnext) n->in_rnext->in_prnext = n->in_prnext; *n->in_prnext = n->in_rnext; n->in_use--; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_delmap */ /* Returns: Nil */ /* Parameters: n(I) - pointer to NAT rule to delete */ /* */ /* Removes a NAT map rule from the hash table of NAT map rules. */ /* ------------------------------------------------------------------------ */ void ipf_nat_delmap(softn, n) ipf_nat_softc_t *softn; ipnat_t *n; { if (n->in_osrcatype == FRI_NORMAL) { int k = count4bits(n->in_osrcmsk); ipf_inet_mask_del(k, &softn->ipf_nat_map_mask); } else { ipf_inet_mask_del(0, &softn->ipf_nat_map_mask); } if (n->in_mnext != NULL) n->in_mnext->in_pmnext = n->in_pmnext; *n->in_pmnext = n->in_mnext; n->in_use--; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_hostmap */ /* Returns: struct hostmap* - NULL if no hostmap could be created, */ /* else a pointer to the hostmapping to use */ /* Parameters: np(I) - pointer to NAT rule */ /* real(I) - real IP address */ /* map(I) - mapped IP address */ /* port(I) - destination port number */ /* Write Locks: ipf_nat */ /* */ /* Check if an ip address has already been allocated for a given mapping */ /* that is not doing port based translation. If is not yet allocated, then */ /* create a new entry if a non-NULL NAT rule pointer has been supplied. */ /* ------------------------------------------------------------------------ */ static struct hostmap * ipf_nat_hostmap(softn, np, src, dst, map, port) ipf_nat_softc_t *softn; ipnat_t *np; struct in_addr src; struct in_addr dst; struct in_addr map; u_32_t port; { hostmap_t *hm; u_int hv, rhv; hv = (src.s_addr ^ dst.s_addr); hv += src.s_addr; hv += dst.s_addr; rhv = hv; hv %= softn->ipf_nat_hostmap_sz; for (hm = softn->ipf_hm_maptable[hv]; hm; hm = hm->hm_hnext) if ((hm->hm_osrcip.s_addr == src.s_addr) && (hm->hm_odstip.s_addr == dst.s_addr) && ((np == NULL) || (np == hm->hm_ipnat)) && ((port == 0) || (port == hm->hm_port))) { softn->ipf_nat_stats.ns_hm_addref++; hm->hm_ref++; return hm; } if (np == NULL) { softn->ipf_nat_stats.ns_hm_nullnp++; return NULL; } KMALLOC(hm, hostmap_t *); if (hm) { hm->hm_next = softn->ipf_hm_maplist; hm->hm_pnext = &softn->ipf_hm_maplist; if (softn->ipf_hm_maplist != NULL) softn->ipf_hm_maplist->hm_pnext = &hm->hm_next; softn->ipf_hm_maplist = hm; hm->hm_hnext = softn->ipf_hm_maptable[hv]; hm->hm_phnext = softn->ipf_hm_maptable + hv; if (softn->ipf_hm_maptable[hv] != NULL) softn->ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext; softn->ipf_hm_maptable[hv] = hm; hm->hm_ipnat = np; np->in_use++; hm->hm_osrcip = src; hm->hm_odstip = dst; hm->hm_nsrcip = map; hm->hm_ndstip.s_addr = 0; hm->hm_ref = 1; hm->hm_port = port; hm->hm_hv = rhv; hm->hm_v = 4; softn->ipf_nat_stats.ns_hm_new++; } else { softn->ipf_nat_stats.ns_hm_newfail++; } return hm; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_hostmapdel */ /* Returns: Nil */ /* Parameters: hmp(I) - pointer to hostmap structure pointer */ /* Write Locks: ipf_nat */ /* */ /* Decrement the references to this hostmap structure by one. If this */ /* reaches zero then remove it and free it. */ /* ------------------------------------------------------------------------ */ void ipf_nat_hostmapdel(softc, hmp) ipf_main_softc_t *softc; struct hostmap **hmp; { struct hostmap *hm; hm = *hmp; *hmp = NULL; hm->hm_ref--; if (hm->hm_ref == 0) { ipf_nat_rule_deref(softc, &hm->hm_ipnat); if (hm->hm_hnext) hm->hm_hnext->hm_phnext = hm->hm_phnext; *hm->hm_phnext = hm->hm_hnext; if (hm->hm_next) hm->hm_next->hm_pnext = hm->hm_pnext; *hm->hm_pnext = hm->hm_next; KFREE(hm); } } /* ------------------------------------------------------------------------ */ /* Function: ipf_fix_outcksum */ /* Returns: Nil */ /* Parameters: fin(I) - pointer to packet information */ /* sp(I) - location of 16bit checksum to update */ /* n((I) - amount to adjust checksum by */ /* */ /* Adjusts the 16bit checksum by "n" for packets going out. */ /* ------------------------------------------------------------------------ */ void ipf_fix_outcksum(cksum, sp, n, partial) int cksum; u_short *sp; u_32_t n, partial; { u_short sumshort; u_32_t sum1; if (n == 0) return; if (cksum == 4) { *sp = 0; return; } if (cksum == 2) { sum1 = partial; sum1 = (sum1 & 0xffff) + (sum1 >> 16); *sp = htons(sum1); return; } sum1 = (~ntohs(*sp)) & 0xffff; sum1 += (n); sum1 = (sum1 >> 16) + (sum1 & 0xffff); /* Again */ sum1 = (sum1 >> 16) + (sum1 & 0xffff); sumshort = ~(u_short)sum1; *(sp) = htons(sumshort); } /* ------------------------------------------------------------------------ */ /* Function: ipf_fix_incksum */ /* Returns: Nil */ /* Parameters: fin(I) - pointer to packet information */ /* sp(I) - location of 16bit checksum to update */ /* n((I) - amount to adjust checksum by */ /* */ /* Adjusts the 16bit checksum by "n" for packets going in. */ /* ------------------------------------------------------------------------ */ void ipf_fix_incksum(cksum, sp, n, partial) int cksum; u_short *sp; u_32_t n, partial; { u_short sumshort; u_32_t sum1; if (n == 0) return; if (cksum == 4) { *sp = 0; return; } if (cksum == 2) { sum1 = partial; sum1 = (sum1 & 0xffff) + (sum1 >> 16); *sp = htons(sum1); return; } sum1 = (~ntohs(*sp)) & 0xffff; sum1 += ~(n) & 0xffff; sum1 = (sum1 >> 16) + (sum1 & 0xffff); /* Again */ sum1 = (sum1 >> 16) + (sum1 & 0xffff); sumshort = ~(u_short)sum1; *(sp) = htons(sumshort); } /* ------------------------------------------------------------------------ */ /* Function: ipf_fix_datacksum */ /* Returns: Nil */ /* Parameters: sp(I) - location of 16bit checksum to update */ /* n((I) - amount to adjust checksum by */ /* */ /* Fix_datacksum is used *only* for the adjustments of checksums in the */ /* data section of an IP packet. */ /* */ /* The only situation in which you need to do this is when NAT'ing an */ /* ICMP error message. Such a message, contains in its body the IP header */ /* of the original IP packet, that causes the error. */ /* */ /* You can't use fix_incksum or fix_outcksum in that case, because for the */ /* kernel the data section of the ICMP error is just data, and no special */ /* processing like hardware cksum or ntohs processing have been done by the */ /* kernel on the data section. */ /* ------------------------------------------------------------------------ */ void ipf_fix_datacksum(sp, n) u_short *sp; u_32_t n; { u_short sumshort; u_32_t sum1; if (n == 0) return; sum1 = (~ntohs(*sp)) & 0xffff; sum1 += (n); sum1 = (sum1 >> 16) + (sum1 & 0xffff); /* Again */ sum1 = (sum1 >> 16) + (sum1 & 0xffff); sumshort = ~(u_short)sum1; *(sp) = htons(sumshort); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_ioctl */ /* Returns: int - 0 == success, != 0 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* data(I) - pointer to ioctl data */ /* cmd(I) - ioctl command integer */ /* mode(I) - file mode bits used with open */ /* uid(I) - uid of calling process */ /* ctx(I) - pointer used as key for finding context */ /* */ /* Processes an ioctl call made to operate on the IP Filter NAT device. */ /* ------------------------------------------------------------------------ */ int ipf_nat_ioctl(softc, data, cmd, mode, uid, ctx) ipf_main_softc_t *softc; ioctlcmd_t cmd; caddr_t data; int mode, uid; void *ctx; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; int error = 0, ret, arg, getlock; ipnat_t *nat, *nt, *n; ipnat_t natd; SPL_INT(s); #if BSD_GE_YEAR(199306) && defined(_KERNEL) # if NETBSD_GE_REV(399002000) if ((mode & FWRITE) && kauth_authorize_network(curlwp->l_cred, KAUTH_NETWORK_FIREWALL, KAUTH_REQ_NETWORK_FIREWALL_FW, NULL, NULL, NULL)) # else # if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034) if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE)) # else if ((securelevel >= 3) && (mode & FWRITE)) # endif # endif { IPFERROR(60001); return EPERM; } #endif #if defined(__osf__) && defined(_KERNEL) getlock = 0; #else getlock = (mode & NAT_LOCKHELD) ? 0 : 1; #endif n = NULL; nt = NULL; nat = NULL; if ((cmd == (ioctlcmd_t)SIOCADNAT) || (cmd == (ioctlcmd_t)SIOCRMNAT) || (cmd == (ioctlcmd_t)SIOCPURGENAT)) { if (mode & NAT_SYSSPACE) { bcopy(data, (char *)&natd, sizeof(natd)); nat = &natd; error = 0; } else { bzero(&natd, sizeof(natd)); error = ipf_inobj(softc, data, NULL, &natd, IPFOBJ_IPNAT); if (error != 0) goto done; if (natd.in_size < sizeof(ipnat_t)) { error = EINVAL; goto done; } KMALLOCS(nt, ipnat_t *, natd.in_size); if (nt == NULL) { IPFERROR(60070); error = ENOMEM; goto done; } bzero(nt, natd.in_size); error = ipf_inobjsz(softc, data, nt, IPFOBJ_IPNAT, natd.in_size); if (error) goto done; nat = nt; } /* * For add/delete, look to see if the NAT entry is * already present */ nat->in_flags &= IPN_USERFLAGS; if ((nat->in_redir & NAT_MAPBLK) == 0) { if (nat->in_osrcatype == FRI_NORMAL || nat->in_osrcatype == FRI_NONE) nat->in_osrcaddr &= nat->in_osrcmsk; if (nat->in_odstatype == FRI_NORMAL || nat->in_odstatype == FRI_NONE) nat->in_odstaddr &= nat->in_odstmsk; if ((nat->in_flags & (IPN_SPLIT|IPN_SIPRANGE)) == 0) { if (nat->in_nsrcatype == FRI_NORMAL) nat->in_nsrcaddr &= nat->in_nsrcmsk; if (nat->in_ndstatype == FRI_NORMAL) nat->in_ndstaddr &= nat->in_ndstmsk; } } error = ipf_nat_rule_init(softc, softn, nat); if (error != 0) goto done; MUTEX_ENTER(&softn->ipf_nat_io); for (n = softn->ipf_nat_list; n != NULL; n = n->in_next) if (ipf_nat_cmp_rules(nat, n) == 0) break; } switch (cmd) { #ifdef IPFILTER_LOG case SIOCIPFFB : { int tmp; if (!(mode & FWRITE)) { IPFERROR(60002); error = EPERM; } else { tmp = ipf_log_clear(softc, IPL_LOGNAT); error = BCOPYOUT(&tmp, data, sizeof(tmp)); if (error != 0) { IPFERROR(60057); error = EFAULT; } } break; } case SIOCSETLG : if (!(mode & FWRITE)) { IPFERROR(60003); error = EPERM; } else { error = BCOPYIN(data, &softn->ipf_nat_logging, sizeof(softn->ipf_nat_logging)); if (error != 0) error = EFAULT; } break; case SIOCGETLG : error = BCOPYOUT(&softn->ipf_nat_logging, data, sizeof(softn->ipf_nat_logging)); if (error != 0) { IPFERROR(60004); error = EFAULT; } break; case FIONREAD : arg = ipf_log_bytesused(softc, IPL_LOGNAT); error = BCOPYOUT(&arg, data, sizeof(arg)); if (error != 0) { IPFERROR(60005); error = EFAULT; } break; #endif case SIOCADNAT : if (!(mode & FWRITE)) { IPFERROR(60006); error = EPERM; } else if (n != NULL) { natd.in_flineno = n->in_flineno; (void) ipf_outobj(softc, data, &natd, IPFOBJ_IPNAT); IPFERROR(60007); error = EEXIST; } else if (nt == NULL) { IPFERROR(60008); error = ENOMEM; } if (error != 0) { MUTEX_EXIT(&softn->ipf_nat_io); break; } if (nat != nt) bcopy((char *)nat, (char *)nt, sizeof(*n)); error = ipf_nat_siocaddnat(softc, softn, nt, getlock); MUTEX_EXIT(&softn->ipf_nat_io); if (error == 0) { nat = NULL; nt = NULL; } break; case SIOCRMNAT : case SIOCPURGENAT : if (!(mode & FWRITE)) { IPFERROR(60009); error = EPERM; n = NULL; } else if (n == NULL) { IPFERROR(60010); error = ESRCH; } if (error != 0) { MUTEX_EXIT(&softn->ipf_nat_io); break; } if (cmd == (ioctlcmd_t)SIOCPURGENAT) { error = ipf_outobjsz(softc, data, n, IPFOBJ_IPNAT, n->in_size); if (error) { MUTEX_EXIT(&softn->ipf_nat_io); goto done; } n->in_flags |= IPN_PURGE; } ipf_nat_siocdelnat(softc, softn, n, getlock); MUTEX_EXIT(&softn->ipf_nat_io); n = NULL; break; case SIOCGNATS : { natstat_t *nsp = &softn->ipf_nat_stats; nsp->ns_side[0].ns_table = softn->ipf_nat_table[0]; nsp->ns_side[1].ns_table = softn->ipf_nat_table[1]; nsp->ns_list = softn->ipf_nat_list; nsp->ns_maptable = softn->ipf_hm_maptable; nsp->ns_maplist = softn->ipf_hm_maplist; nsp->ns_nattab_sz = softn->ipf_nat_table_sz; nsp->ns_nattab_max = softn->ipf_nat_table_max; nsp->ns_rultab_sz = softn->ipf_nat_maprules_sz; nsp->ns_rdrtab_sz = softn->ipf_nat_rdrrules_sz; nsp->ns_hostmap_sz = softn->ipf_nat_hostmap_sz; nsp->ns_instances = softn->ipf_nat_instances; nsp->ns_ticks = softc->ipf_ticks; #ifdef IPFILTER_LOGGING nsp->ns_log_ok = ipf_log_logok(softc, IPF_LOGNAT); nsp->ns_log_fail = ipf_log_failures(softc, IPF_LOGNAT); #else nsp->ns_log_ok = 0; nsp->ns_log_fail = 0; #endif error = ipf_outobj(softc, data, nsp, IPFOBJ_NATSTAT); break; } case SIOCGNATL : { natlookup_t nl; error = ipf_inobj(softc, data, NULL, &nl, IPFOBJ_NATLOOKUP); if (error == 0) { void *ptr; if (getlock) { READ_ENTER(&softc->ipf_nat); } switch (nl.nl_v) { case 4 : ptr = ipf_nat_lookupredir(&nl); break; #ifdef USE_INET6 case 6 : ptr = ipf_nat6_lookupredir(&nl); break; #endif default: ptr = NULL; break; } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (ptr != NULL) { error = ipf_outobj(softc, data, &nl, IPFOBJ_NATLOOKUP); } else { IPFERROR(60011); error = ESRCH; } } break; } case SIOCIPFFL : /* old SIOCFLNAT & SIOCCNATL */ if (!(mode & FWRITE)) { IPFERROR(60012); error = EPERM; break; } if (getlock) { WRITE_ENTER(&softc->ipf_nat); } error = BCOPYIN(data, &arg, sizeof(arg)); if (error != 0) { IPFERROR(60013); error = EFAULT; } else { if (arg == 0) ret = ipf_nat_flushtable(softc, softn); else if (arg == 1) ret = ipf_nat_clearlist(softc, softn); else ret = ipf_nat_extraflush(softc, softn, arg); ipf_proxy_flush(softc->ipf_proxy_soft, arg); } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (error == 0) { error = BCOPYOUT(&ret, data, sizeof(ret)); } break; case SIOCMATCHFLUSH : if (!(mode & FWRITE)) { IPFERROR(60014); error = EPERM; break; } if (getlock) { WRITE_ENTER(&softc->ipf_nat); } error = ipf_nat_matchflush(softc, softn, data); if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } break; case SIOCPROXY : error = ipf_proxy_ioctl(softc, data, cmd, mode, ctx); break; case SIOCSTLCK : if (!(mode & FWRITE)) { IPFERROR(60015); error = EPERM; } else { error = ipf_lock(data, &softn->ipf_nat_lock); } break; case SIOCSTPUT : if ((mode & FWRITE) != 0) { error = ipf_nat_putent(softc, data, getlock); } else { IPFERROR(60016); error = EACCES; } break; case SIOCSTGSZ : if (softn->ipf_nat_lock) { error = ipf_nat_getsz(softc, data, getlock); } else { IPFERROR(60017); error = EACCES; } break; case SIOCSTGET : if (softn->ipf_nat_lock) { error = ipf_nat_getent(softc, data, getlock); } else { IPFERROR(60018); error = EACCES; } break; case SIOCGENITER : { ipfgeniter_t iter; ipftoken_t *token; ipfobj_t obj; error = ipf_inobj(softc, data, &obj, &iter, IPFOBJ_GENITER); if (error != 0) break; SPL_SCHED(s); token = ipf_token_find(softc, iter.igi_type, uid, ctx); if (token != NULL) { error = ipf_nat_iterator(softc, token, &iter, &obj); WRITE_ENTER(&softc->ipf_tokens); ipf_token_deref(softc, token); RWLOCK_EXIT(&softc->ipf_tokens); } SPL_X(s); break; } case SIOCIPFDELTOK : error = BCOPYIN(data, &arg, sizeof(arg)); if (error == 0) { SPL_SCHED(s); error = ipf_token_del(softc, arg, uid, ctx); SPL_X(s); } else { IPFERROR(60019); error = EFAULT; } break; case SIOCGTQTAB : error = ipf_outobj(softc, data, softn->ipf_nat_tcptq, IPFOBJ_STATETQTAB); break; case SIOCGTABL : error = ipf_nat_gettable(softc, softn, data); break; default : IPFERROR(60020); error = EINVAL; break; } done: if (nat != NULL) ipf_nat_rule_fini(softc, nat); if (nt != NULL) KFREES(nt, nt->in_size); return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_siocaddnat */ /* Returns: int - 0 == success, != 0 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* n(I) - pointer to new NAT rule */ /* np(I) - pointer to where to insert new NAT rule */ /* getlock(I) - flag indicating if lock on is held */ /* Mutex Locks: ipf_nat_io */ /* */ /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ /* from information passed to the kernel, then add it to the appropriate */ /* NAT rule table(s). */ /* ------------------------------------------------------------------------ */ static int ipf_nat_siocaddnat(softc, softn, n, getlock) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; ipnat_t *n; int getlock; { int error = 0; if (ipf_nat_resolverule(softc, n) != 0) { IPFERROR(60022); return ENOENT; } if ((n->in_age[0] == 0) && (n->in_age[1] != 0)) { IPFERROR(60023); return EINVAL; } if (n->in_redir == (NAT_DIVERTUDP|NAT_MAP)) { /* * Prerecord whether or not the destination of the divert * is local or not to the interface the packet is going * to be sent out. */ n->in_dlocal = ipf_deliverlocal(softc, n->in_v[1], n->in_ifps[1], &n->in_ndstip6); } if (getlock) { WRITE_ENTER(&softc->ipf_nat); } n->in_next = NULL; n->in_pnext = softn->ipf_nat_list_tail; *n->in_pnext = n; softn->ipf_nat_list_tail = &n->in_next; n->in_use++; if (n->in_redir & NAT_REDIRECT) { n->in_flags &= ~IPN_NOTDST; switch (n->in_v[0]) { case 4 : ipf_nat_addrdr(softn, n); break; #ifdef USE_INET6 case 6 : ipf_nat6_addrdr(softn, n); break; #endif default : break; } ATOMIC_INC32(softn->ipf_nat_stats.ns_rules_rdr); } if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { n->in_flags &= ~IPN_NOTSRC; switch (n->in_v[0]) { case 4 : ipf_nat_addmap(softn, n); break; #ifdef USE_INET6 case 6 : ipf_nat6_addmap(softn, n); break; #endif default : break; } ATOMIC_INC32(softn->ipf_nat_stats.ns_rules_map); } if (n->in_age[0] != 0) n->in_tqehead[0] = ipf_addtimeoutqueue(softc, &softn->ipf_nat_utqe, n->in_age[0]); if (n->in_age[1] != 0) n->in_tqehead[1] = ipf_addtimeoutqueue(softc, &softn->ipf_nat_utqe, n->in_age[1]); MUTEX_INIT(&n->in_lock, "ipnat rule lock"); n = NULL; ATOMIC_INC32(softn->ipf_nat_stats.ns_rules); #if SOLARIS && !defined(INSTANCES) pfil_delayed_copy = 0; #endif if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); /* WRITE */ } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_ruleaddrinit */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* n(I) - pointer to NAT rule */ /* */ /* Initialise all of the NAT address structures in a NAT rule. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_ruleaddrinit(softc, softn, n) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; ipnat_t *n; { int idx, error; if ((n->in_ndst.na_atype == FRI_LOOKUP) && (n->in_ndst.na_type != IPLT_DSTLIST)) { IPFERROR(60071); return EINVAL; } if ((n->in_nsrc.na_atype == FRI_LOOKUP) && (n->in_nsrc.na_type != IPLT_DSTLIST)) { IPFERROR(60069); return EINVAL; } if (n->in_redir == NAT_BIMAP) { n->in_ndstaddr = n->in_osrcaddr; n->in_ndstmsk = n->in_osrcmsk; n->in_odstaddr = n->in_nsrcaddr; n->in_odstmsk = n->in_nsrcmsk; } if (n->in_redir & NAT_REDIRECT) idx = 1; else idx = 0; /* * Initialise all of the address fields. */ error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_osrc, 1, n->in_ifps[idx]); if (error != 0) return error; error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_odst, 1, n->in_ifps[idx]); if (error != 0) return error; error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_nsrc, 1, n->in_ifps[idx]); if (error != 0) return error; error = ipf_nat_nextaddrinit(softc, n->in_names, &n->in_ndst, 1, n->in_ifps[idx]); if (error != 0) return error; if (n->in_redir & NAT_DIVERTUDP) ipf_nat_builddivertmp(softn, n); return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_resolvrule */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* n(I) - pointer to NAT rule */ /* */ /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ /* from information passed to the kernel, then add it to the appropriate */ /* NAT rule table(s). */ /* ------------------------------------------------------------------------ */ static int ipf_nat_resolverule(softc, n) ipf_main_softc_t *softc; ipnat_t *n; { char *base; base = n->in_names; n->in_ifps[0] = ipf_resolvenic(softc, base + n->in_ifnames[0], n->in_v[0]); if (n->in_ifnames[1] == -1) { n->in_ifnames[1] = n->in_ifnames[0]; n->in_ifps[1] = n->in_ifps[0]; } else { n->in_ifps[1] = ipf_resolvenic(softc, base + n->in_ifnames[1], n->in_v[1]); } if (n->in_plabel != -1) { if (n->in_redir & NAT_REDIRECT) n->in_apr = ipf_proxy_lookup(softc->ipf_proxy_soft, n->in_pr[0], base + n->in_plabel); else n->in_apr = ipf_proxy_lookup(softc->ipf_proxy_soft, n->in_pr[1], base + n->in_plabel); if (n->in_apr == NULL) return -1; } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_siocdelnat */ /* Returns: int - 0 == success, != 0 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* n(I) - pointer to new NAT rule */ /* getlock(I) - flag indicating if lock on is held */ /* Mutex Locks: ipf_nat_io */ /* */ /* Handle SIOCADNAT. Resolve and calculate details inside the NAT rule */ /* from information passed to the kernel, then add it to the appropriate */ /* NAT rule table(s). */ /* ------------------------------------------------------------------------ */ static void ipf_nat_siocdelnat(softc, softn, n, getlock) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; ipnat_t *n; int getlock; { #ifdef IPF_NAT6 int i; #endif if (getlock) { WRITE_ENTER(&softc->ipf_nat); } ipf_nat_delrule(softc, softn, n, 1); if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); /* READ/WRITE */ } } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_getsz */ /* Returns: int - 0 == success, != 0 is the error value. */ /* Parameters: softc(I) - pointer to soft context main structure */ /* data(I) - pointer to natget structure with kernel */ /* pointer get the size of. */ /* getlock(I) - flag indicating whether or not the caller */ /* holds a lock on ipf_nat */ /* */ /* Handle SIOCSTGSZ. */ /* Return the size of the nat list entry to be copied back to user space. */ /* The size of the entry is stored in the ng_sz field and the enture natget */ /* structure is copied back to the user. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_getsz(softc, data, getlock) ipf_main_softc_t *softc; caddr_t data; int getlock; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; ap_session_t *aps; nat_t *nat, *n; natget_t ng; int error; error = BCOPYIN(data, &ng, sizeof(ng)); if (error != 0) { IPFERROR(60024); return EFAULT; } if (getlock) { READ_ENTER(&softc->ipf_nat); } nat = ng.ng_ptr; if (!nat) { nat = softn->ipf_nat_instances; ng.ng_sz = 0; /* * Empty list so the size returned is 0. Simple. */ if (nat == NULL) { if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } error = BCOPYOUT(&ng, data, sizeof(ng)); if (error != 0) { IPFERROR(60025); return EFAULT; } return 0; } } else { /* * Make sure the pointer we're copying from exists in the * current list of entries. Security precaution to prevent * copying of random kernel data. */ for (n = softn->ipf_nat_instances; n; n = n->nat_next) if (n == nat) break; if (n == NULL) { if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } IPFERROR(60026); return ESRCH; } } /* * Incluse any space required for proxy data structures. */ ng.ng_sz = sizeof(nat_save_t); aps = nat->nat_aps; if (aps != NULL) { ng.ng_sz += sizeof(ap_session_t) - 4; if (aps->aps_data != 0) ng.ng_sz += aps->aps_psiz; } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } error = BCOPYOUT(&ng, data, sizeof(ng)); if (error != 0) { IPFERROR(60027); return EFAULT; } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_getent */ /* Returns: int - 0 == success, != 0 is the error value. */ /* Parameters: softc(I) - pointer to soft context main structure */ /* data(I) - pointer to natget structure with kernel pointer*/ /* to NAT structure to copy out. */ /* getlock(I) - flag indicating whether or not the caller */ /* holds a lock on ipf_nat */ /* */ /* Handle SIOCSTGET. */ /* Copies out NAT entry to user space. Any additional data held for a */ /* proxy is also copied, as to is the NAT rule which was responsible for it */ /* ------------------------------------------------------------------------ */ static int ipf_nat_getent(softc, data, getlock) ipf_main_softc_t *softc; caddr_t data; int getlock; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; int error, outsize; ap_session_t *aps; nat_save_t *ipn, ipns; nat_t *n, *nat; error = ipf_inobj(softc, data, NULL, &ipns, IPFOBJ_NATSAVE); if (error != 0) return error; if ((ipns.ipn_dsize < sizeof(ipns)) || (ipns.ipn_dsize > 81920)) { IPFERROR(60028); return EINVAL; } KMALLOCS(ipn, nat_save_t *, ipns.ipn_dsize); if (ipn == NULL) { IPFERROR(60029); return ENOMEM; } if (getlock) { READ_ENTER(&softc->ipf_nat); } ipn->ipn_dsize = ipns.ipn_dsize; nat = ipns.ipn_next; if (nat == NULL) { nat = softn->ipf_nat_instances; if (nat == NULL) { if (softn->ipf_nat_instances == NULL) { IPFERROR(60030); error = ENOENT; } goto finished; } } else { /* * Make sure the pointer we're copying from exists in the * current list of entries. Security precaution to prevent * copying of random kernel data. */ for (n = softn->ipf_nat_instances; n; n = n->nat_next) if (n == nat) break; if (n == NULL) { IPFERROR(60031); error = ESRCH; goto finished; } } ipn->ipn_next = nat->nat_next; /* * Copy the NAT structure. */ bcopy((char *)nat, &ipn->ipn_nat, sizeof(*nat)); /* * If we have a pointer to the NAT rule it belongs to, save that too. */ if (nat->nat_ptr != NULL) bcopy((char *)nat->nat_ptr, (char *)&ipn->ipn_ipnat, ipn->ipn_ipnat.in_size); /* * If we also know the NAT entry has an associated filter rule, * save that too. */ if (nat->nat_fr != NULL) bcopy((char *)nat->nat_fr, (char *)&ipn->ipn_fr, sizeof(ipn->ipn_fr)); /* * Last but not least, if there is an application proxy session set * up for this NAT entry, then copy that out too, including any * private data saved along side it by the proxy. */ aps = nat->nat_aps; outsize = ipn->ipn_dsize - sizeof(*ipn) + sizeof(ipn->ipn_data); if (aps != NULL) { char *s; if (outsize < sizeof(*aps)) { IPFERROR(60032); error = ENOBUFS; goto finished; } s = ipn->ipn_data; bcopy((char *)aps, s, sizeof(*aps)); s += sizeof(*aps); outsize -= sizeof(*aps); if ((aps->aps_data != NULL) && (outsize >= aps->aps_psiz)) bcopy(aps->aps_data, s, aps->aps_psiz); else { IPFERROR(60033); error = ENOBUFS; } } if (error == 0) { if (getlock) { READ_ENTER(&softc->ipf_nat); getlock = 0; } error = ipf_outobjsz(softc, data, ipn, IPFOBJ_NATSAVE, ipns.ipn_dsize); } finished: if (getlock) { READ_ENTER(&softc->ipf_nat); } if (ipn != NULL) { KFREES(ipn, ipns.ipn_dsize); } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_putent */ /* Returns: int - 0 == success, != 0 is the error value. */ /* Parameters: softc(I) - pointer to soft context main structure */ /* data(I) - pointer to natget structure with NAT */ /* structure information to load into the kernel */ /* getlock(I) - flag indicating whether or not a write lock */ /* on is already held. */ /* */ /* Handle SIOCSTPUT. */ /* Loads a NAT table entry from user space, including a NAT rule, proxy and */ /* firewall rule data structures, if pointers to them indicate so. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_putent(softc, data, getlock) ipf_main_softc_t *softc; caddr_t data; int getlock; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; nat_save_t ipn, *ipnn; ap_session_t *aps; nat_t *n, *nat; frentry_t *fr; fr_info_t fin; ipnat_t *in; int error; error = ipf_inobj(softc, data, NULL, &ipn, IPFOBJ_NATSAVE); if (error != 0) return error; /* * Initialise early because of code at junkput label. */ n = NULL; in = NULL; aps = NULL; nat = NULL; ipnn = NULL; fr = NULL; /* * New entry, copy in the rest of the NAT entry if it's size is more * than just the nat_t structure. */ if (ipn.ipn_dsize > sizeof(ipn)) { if (ipn.ipn_dsize > 81920) { IPFERROR(60034); error = ENOMEM; goto junkput; } KMALLOCS(ipnn, nat_save_t *, ipn.ipn_dsize); if (ipnn == NULL) { IPFERROR(60035); return ENOMEM; } bzero(ipnn, ipn.ipn_dsize); error = ipf_inobjsz(softc, data, ipnn, IPFOBJ_NATSAVE, ipn.ipn_dsize); if (error != 0) { goto junkput; } } else ipnn = &ipn; KMALLOC(nat, nat_t *); if (nat == NULL) { IPFERROR(60037); error = ENOMEM; goto junkput; } bcopy((char *)&ipnn->ipn_nat, (char *)nat, sizeof(*nat)); switch (nat->nat_v[0]) { case 4: #ifdef USE_INET6 case 6 : #endif break; default : IPFERROR(60061); error = EPROTONOSUPPORT; goto junkput; /*NOTREACHED*/ } /* * Initialize all these so that ipf_nat_delete() doesn't cause a crash. */ bzero((char *)nat, offsetof(struct nat, nat_tqe)); nat->nat_tqe.tqe_pnext = NULL; nat->nat_tqe.tqe_next = NULL; nat->nat_tqe.tqe_ifq = NULL; nat->nat_tqe.tqe_parent = nat; /* * Restore the rule associated with this nat session */ in = ipnn->ipn_nat.nat_ptr; if (in != NULL) { KMALLOCS(in, ipnat_t *, ipnn->ipn_ipnat.in_size); nat->nat_ptr = in; if (in == NULL) { IPFERROR(60038); error = ENOMEM; goto junkput; } bcopy((char *)&ipnn->ipn_ipnat, (char *)in, ipnn->ipn_ipnat.in_size); in->in_use = 1; in->in_flags |= IPN_DELETE; ATOMIC_INC32(softn->ipf_nat_stats.ns_rules); if (ipf_nat_resolverule(softc, in) != 0) { IPFERROR(60039); error = ESRCH; goto junkput; } } /* * Check that the NAT entry doesn't already exist in the kernel. * * For NAT_OUTBOUND, we're lookup for a duplicate MAP entry. To do * this, we check to see if the inbound combination of addresses and * ports is already known. Similar logic is applied for NAT_INBOUND. * */ bzero((char *)&fin, sizeof(fin)); fin.fin_v = nat->nat_v[0]; fin.fin_p = nat->nat_pr[0]; fin.fin_rev = nat->nat_rev; fin.fin_ifp = nat->nat_ifps[0]; fin.fin_data[0] = ntohs(nat->nat_ndport); fin.fin_data[1] = ntohs(nat->nat_nsport); switch (nat->nat_dir) { case NAT_OUTBOUND : case NAT_DIVERTOUT : if (getlock) { READ_ENTER(&softc->ipf_nat); } fin.fin_v = nat->nat_v[1]; if (nat->nat_v[1] == 4) { n = ipf_nat_inlookup(&fin, nat->nat_flags, fin.fin_p, nat->nat_ndstip, nat->nat_nsrcip); #ifdef USE_INET6 } else if (nat->nat_v[1] == 6) { n = ipf_nat6_inlookup(&fin, nat->nat_flags, fin.fin_p, &nat->nat_ndst6.in6, &nat->nat_nsrc6.in6); #endif } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (n != NULL) { IPFERROR(60040); error = EEXIST; goto junkput; } break; case NAT_INBOUND : case NAT_DIVERTIN : if (getlock) { READ_ENTER(&softc->ipf_nat); } if (fin.fin_v == 4) { n = ipf_nat_outlookup(&fin, nat->nat_flags, fin.fin_p, nat->nat_ndstip, nat->nat_nsrcip); #ifdef USE_INET6 } else if (fin.fin_v == 6) { n = ipf_nat6_outlookup(&fin, nat->nat_flags, fin.fin_p, &nat->nat_ndst6.in6, &nat->nat_nsrc6.in6); #endif } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (n != NULL) { IPFERROR(60041); error = EEXIST; goto junkput; } break; default : IPFERROR(60042); error = EINVAL; goto junkput; } /* * Restore ap_session_t structure. Include the private data allocated * if it was there. */ aps = nat->nat_aps; if (aps != NULL) { KMALLOC(aps, ap_session_t *); nat->nat_aps = aps; if (aps == NULL) { IPFERROR(60043); error = ENOMEM; goto junkput; } bcopy(ipnn->ipn_data, (char *)aps, sizeof(*aps)); if (in != NULL) aps->aps_apr = in->in_apr; else aps->aps_apr = NULL; if (aps->aps_psiz != 0) { if (aps->aps_psiz > 81920) { IPFERROR(60044); error = ENOMEM; goto junkput; } KMALLOCS(aps->aps_data, void *, aps->aps_psiz); if (aps->aps_data == NULL) { IPFERROR(60045); error = ENOMEM; goto junkput; } bcopy(ipnn->ipn_data + sizeof(*aps), aps->aps_data, aps->aps_psiz); } else { aps->aps_psiz = 0; aps->aps_data = NULL; } } /* * If there was a filtering rule associated with this entry then * build up a new one. */ fr = nat->nat_fr; if (fr != NULL) { if ((nat->nat_flags & SI_NEWFR) != 0) { KMALLOC(fr, frentry_t *); nat->nat_fr = fr; if (fr == NULL) { IPFERROR(60046); error = ENOMEM; goto junkput; } ipnn->ipn_nat.nat_fr = fr; fr->fr_ref = 1; (void) ipf_outobj(softc, data, ipnn, IPFOBJ_NATSAVE); bcopy((char *)&ipnn->ipn_fr, (char *)fr, sizeof(*fr)); fr->fr_ref = 1; fr->fr_dsize = 0; fr->fr_data = NULL; fr->fr_type = FR_T_NONE; MUTEX_NUKE(&fr->fr_lock); MUTEX_INIT(&fr->fr_lock, "nat-filter rule lock"); } else { if (getlock) { READ_ENTER(&softc->ipf_nat); } for (n = softn->ipf_nat_instances; n; n = n->nat_next) if (n->nat_fr == fr) break; if (n != NULL) { MUTEX_ENTER(&fr->fr_lock); fr->fr_ref++; MUTEX_EXIT(&fr->fr_lock); } if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (n == NULL) { IPFERROR(60047); error = ESRCH; goto junkput; } } } if (ipnn != &ipn) { KFREES(ipnn, ipn.ipn_dsize); ipnn = NULL; } if (getlock) { WRITE_ENTER(&softc->ipf_nat); } if (fin.fin_v == 4) error = ipf_nat_finalise(&fin, nat); #ifdef USE_INET6 else error = ipf_nat6_finalise(&fin, nat); #endif if (getlock) { RWLOCK_EXIT(&softc->ipf_nat); } if (error == 0) return 0; IPFERROR(60048); error = ENOMEM; junkput: if (fr != NULL) { (void) ipf_derefrule(softc, &fr); } if ((ipnn != NULL) && (ipnn != &ipn)) { KFREES(ipnn, ipn.ipn_dsize); } if (nat != NULL) { if (aps != NULL) { if (aps->aps_data != NULL) { KFREES(aps->aps_data, aps->aps_psiz); } KFREE(aps); } if (in != NULL) { if (in->in_apr) ipf_proxy_deref(in->in_apr); KFREES(in, in->in_size); } KFREE(nat); } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_delete */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* nat(I) - pointer to NAT structure to delete */ /* logtype(I) - type of LOG record to create before deleting */ /* Write Lock: ipf_nat */ /* */ /* Delete a nat entry from the various lists and table. If NAT logging is */ /* enabled then generate a NAT log record for this event. */ /* ------------------------------------------------------------------------ */ void ipf_nat_delete(softc, nat, logtype) ipf_main_softc_t *softc; struct nat *nat; int logtype; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; int madeorphan = 0, bkt, removed = 0; nat_stat_side_t *nss; struct ipnat *ipn; if (logtype != 0 && softn->ipf_nat_logging != 0) ipf_nat_log(softc, softn, nat, logtype); /* * Take it as a general indication that all the pointers are set if * nat_pnext is set. */ if (nat->nat_pnext != NULL) { removed = 1; bkt = nat->nat_hv[0] % softn->ipf_nat_table_sz; nss = &softn->ipf_nat_stats.ns_side[0]; nss->ns_bucketlen[bkt]--; if (nss->ns_bucketlen[bkt] == 0) { nss->ns_inuse--; } bkt = nat->nat_hv[1] % softn->ipf_nat_table_sz; nss = &softn->ipf_nat_stats.ns_side[1]; nss->ns_bucketlen[bkt]--; if (nss->ns_bucketlen[bkt] == 0) { nss->ns_inuse--; } *nat->nat_pnext = nat->nat_next; if (nat->nat_next != NULL) { nat->nat_next->nat_pnext = nat->nat_pnext; nat->nat_next = NULL; } nat->nat_pnext = NULL; *nat->nat_phnext[0] = nat->nat_hnext[0]; if (nat->nat_hnext[0] != NULL) { nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; nat->nat_hnext[0] = NULL; } nat->nat_phnext[0] = NULL; *nat->nat_phnext[1] = nat->nat_hnext[1]; if (nat->nat_hnext[1] != NULL) { nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; nat->nat_hnext[1] = NULL; } nat->nat_phnext[1] = NULL; if ((nat->nat_flags & SI_WILDP) != 0) { ATOMIC_DEC32(softn->ipf_nat_stats.ns_wilds); } madeorphan = 1; } if (nat->nat_me != NULL) { *nat->nat_me = NULL; nat->nat_me = NULL; nat->nat_ref--; ASSERT(nat->nat_ref >= 0); } if (nat->nat_tqe.tqe_ifq != NULL) { /* * No call to ipf_freetimeoutqueue() is made here, they are * garbage collected in ipf_nat_expire(). */ (void) ipf_deletequeueentry(&nat->nat_tqe); } if (nat->nat_sync) { ipf_sync_del_nat(softc->ipf_sync_soft, nat->nat_sync); nat->nat_sync = NULL; } if (logtype == NL_EXPIRE) softn->ipf_nat_stats.ns_expire++; MUTEX_ENTER(&nat->nat_lock); /* * NL_DESTROY should only be passed in when we've got nat_ref >= 2. * This happens when a nat'd packet is blocked and we want to throw * away the NAT session. */ if (logtype == NL_DESTROY) { if (nat->nat_ref > 2) { nat->nat_ref -= 2; MUTEX_EXIT(&nat->nat_lock); if (removed) softn->ipf_nat_stats.ns_orphans++; return; } } else if (nat->nat_ref > 1) { nat->nat_ref--; MUTEX_EXIT(&nat->nat_lock); if (madeorphan == 1) softn->ipf_nat_stats.ns_orphans++; return; } ASSERT(nat->nat_ref >= 0); MUTEX_EXIT(&nat->nat_lock); nat->nat_ref = 0; if (madeorphan == 0) softn->ipf_nat_stats.ns_orphans--; /* * At this point, nat_ref can be either 0 or -1 */ softn->ipf_nat_stats.ns_proto[nat->nat_pr[0]]--; if (nat->nat_fr != NULL) { (void) ipf_derefrule(softc, &nat->nat_fr); } if (nat->nat_hm != NULL) { ipf_nat_hostmapdel(softc, &nat->nat_hm); } /* * If there is an active reference from the nat entry to its parent * rule, decrement the rule's reference count and free it too if no * longer being used. */ ipn = nat->nat_ptr; nat->nat_ptr = NULL; if (ipn != NULL) { ipn->in_space++; ipf_nat_rule_deref(softc, &ipn); } if (nat->nat_aps != NULL) { ipf_proxy_free(softc, nat->nat_aps); nat->nat_aps = NULL; } MUTEX_DESTROY(&nat->nat_lock); softn->ipf_nat_stats.ns_active--; /* * If there's a fragment table entry too for this nat entry, then * dereference that as well. This is after nat_lock is released * because of Tru64. */ ipf_frag_natforget(softc, (void *)nat); KFREE(nat); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_flushtable */ /* Returns: int - number of NAT rules deleted */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* Write Lock: ipf_nat */ /* */ /* Deletes all currently active NAT sessions. In deleting each NAT entry a */ /* log record should be emitted in ipf_nat_delete() if NAT logging is */ /* enabled. */ /* ------------------------------------------------------------------------ */ /* * nat_flushtable - clear the NAT table of all mapping entries. */ static int ipf_nat_flushtable(softc, softn) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; { nat_t *nat; int j = 0; /* * ALL NAT mappings deleted, so lets just make the deletions * quicker. */ if (softn->ipf_nat_table[0] != NULL) bzero((char *)softn->ipf_nat_table[0], sizeof(softn->ipf_nat_table[0]) * softn->ipf_nat_table_sz); if (softn->ipf_nat_table[1] != NULL) bzero((char *)softn->ipf_nat_table[1], sizeof(softn->ipf_nat_table[1]) * softn->ipf_nat_table_sz); while ((nat = softn->ipf_nat_instances) != NULL) { ipf_nat_delete(softc, nat, NL_FLUSH); j++; } return j; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_clearlist */ /* Returns: int - number of NAT/RDR rules deleted */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* */ /* Delete all rules in the current list of rules. There is nothing elegant */ /* about this cleanup: simply free all entries on the list of rules and */ /* clear out the tables used for hashed NAT rule lookups. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_clearlist(softc, softn) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; { ipnat_t *n; int i = 0; if (softn->ipf_nat_map_rules != NULL) { bzero((char *)softn->ipf_nat_map_rules, sizeof(*softn->ipf_nat_map_rules) * softn->ipf_nat_maprules_sz); } if (softn->ipf_nat_rdr_rules != NULL) { bzero((char *)softn->ipf_nat_rdr_rules, sizeof(*softn->ipf_nat_rdr_rules) * softn->ipf_nat_rdrrules_sz); } while ((n = softn->ipf_nat_list) != NULL) { ipf_nat_delrule(softc, softn, n, 0); i++; } #if SOLARIS && !defined(INSTANCES) pfil_delayed_copy = 1; #endif return i; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_delrule */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* np(I) - pointer to NAT rule to delete */ /* purge(I) - 1 == allow purge, 0 == prevent purge */ /* Locks: WRITE(ipf_nat) */ /* */ /* Preventing "purge" from occuring is allowed because when all of the NAT */ /* rules are being removed, allowing the "purge" to walk through the list */ /* of NAT sessions, possibly multiple times, would be a large performance */ /* hit, on the order of O(N^2). */ /* ------------------------------------------------------------------------ */ static void ipf_nat_delrule(softc, softn, np, purge) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; ipnat_t *np; int purge; { if (np->in_pnext != NULL) { *np->in_pnext = np->in_next; if (np->in_next != NULL) np->in_next->in_pnext = np->in_pnext; if (softn->ipf_nat_list_tail == &np->in_next) softn->ipf_nat_list_tail = np->in_pnext; } if ((purge == 1) && ((np->in_flags & IPN_PURGE) != 0)) { nat_t *next; nat_t *nat; for (next = softn->ipf_nat_instances; (nat = next) != NULL;) { next = nat->nat_next; if (nat->nat_ptr == np) ipf_nat_delete(softc, nat, NL_PURGE); } } if ((np->in_flags & IPN_DELETE) == 0) { if (np->in_redir & NAT_REDIRECT) { switch (np->in_v[0]) { case 4 : ipf_nat_delrdr(softn, np); break; #ifdef USE_INET6 case 6 : ipf_nat6_delrdr(softn, np); break; #endif } } if (np->in_redir & (NAT_MAPBLK|NAT_MAP)) { switch (np->in_v[0]) { case 4 : ipf_nat_delmap(softn, np); break; #ifdef USE_INET6 case 6 : ipf_nat6_delmap(softn, np); break; #endif } } } np->in_flags |= IPN_DELETE; ipf_nat_rule_deref(softc, &np); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_newmap */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT entry */ /* ni(I) - pointer to structure with misc. information needed */ /* to create new NAT entry. */ /* */ /* Given an empty NAT structure, populate it with new information about a */ /* new NAT session, as defined by the matching NAT rule. */ /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ /* to the new IP address for the translation. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_newmap(fin, nat, ni) fr_info_t *fin; nat_t *nat; natinfo_t *ni; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short st_port, dport, sport, port, sp, dp; struct in_addr in, inb; hostmap_t *hm; u_32_t flags; u_32_t st_ip; ipnat_t *np; nat_t *natl; int l; /* * If it's an outbound packet which doesn't match any existing * record, then create a new port */ l = 0; hm = NULL; np = ni->nai_np; st_ip = np->in_snip; st_port = np->in_spnext; flags = nat->nat_flags; if (flags & IPN_ICMPQUERY) { sport = fin->fin_data[1]; dport = 0; } else { sport = htons(fin->fin_data[0]); dport = htons(fin->fin_data[1]); } /* * Do a loop until we either run out of entries to try or we find * a NAT mapping that isn't currently being used. This is done * because the change to the source is not (usually) being fixed. */ do { port = 0; in.s_addr = htonl(np->in_snip); if (l == 0) { /* * Check to see if there is an existing NAT * setup for this IP address pair. */ hm = ipf_nat_hostmap(softn, np, fin->fin_src, fin->fin_dst, in, 0); if (hm != NULL) in.s_addr = hm->hm_nsrcip.s_addr; } else if ((l == 1) && (hm != NULL)) { ipf_nat_hostmapdel(softc, &hm); } in.s_addr = ntohl(in.s_addr); nat->nat_hm = hm; if ((np->in_nsrcmsk == 0xffffffff) && (np->in_spnext == 0)) { if (l > 0) { NBUMPSIDEX(1, ns_exhausted, ns_exhausted_1); return -1; } } if (np->in_redir == NAT_BIMAP && np->in_osrcmsk == np->in_nsrcmsk) { /* * map the address block in a 1:1 fashion */ in.s_addr = np->in_nsrcaddr; in.s_addr |= fin->fin_saddr & ~np->in_osrcmsk; in.s_addr = ntohl(in.s_addr); } else if (np->in_redir & NAT_MAPBLK) { if ((l >= np->in_ppip) || ((l > 0) && !(flags & IPN_TCPUDP))) { NBUMPSIDEX(1, ns_exhausted, ns_exhausted_2); return -1; } /* * map-block - Calculate destination address. */ in.s_addr = ntohl(fin->fin_saddr); in.s_addr &= ntohl(~np->in_osrcmsk); inb.s_addr = in.s_addr; in.s_addr /= np->in_ippip; in.s_addr &= ntohl(~np->in_nsrcmsk); in.s_addr += ntohl(np->in_nsrcaddr); /* * Calculate destination port. */ if ((flags & IPN_TCPUDP) && (np->in_ppip != 0)) { port = ntohs(sport) + l; port %= np->in_ppip; port += np->in_ppip * (inb.s_addr % np->in_ippip); port += MAPBLK_MINPORT; port = htons(port); } } else if ((np->in_nsrcaddr == 0) && (np->in_nsrcmsk == 0xffffffff)) { i6addr_t in6; /* * 0/32 - use the interface's IP address. */ if ((l > 0) || ipf_ifpaddr(softc, 4, FRI_NORMAL, fin->fin_ifp, &in6, NULL) == -1) { NBUMPSIDEX(1, ns_new_ifpaddr, ns_new_ifpaddr_1); return -1; } in.s_addr = ntohl(in6.in4.s_addr); } else if ((np->in_nsrcaddr == 0) && (np->in_nsrcmsk == 0)) { /* * 0/0 - use the original source address/port. */ if (l > 0) { NBUMPSIDEX(1, ns_exhausted, ns_exhausted_3); return -1; } in.s_addr = ntohl(fin->fin_saddr); } else if ((np->in_nsrcmsk != 0xffffffff) && (np->in_spnext == 0) && ((l > 0) || (hm == NULL))) np->in_snip++; natl = NULL; if ((flags & IPN_TCPUDP) && ((np->in_redir & NAT_MAPBLK) == 0) && (np->in_flags & IPN_AUTOPORTMAP)) { /* * "ports auto" (without map-block) */ if ((l > 0) && (l % np->in_ppip == 0)) { if ((l > np->in_ppip) && np->in_nsrcmsk != 0xffffffff) np->in_snip++; } if (np->in_ppip != 0) { port = ntohs(sport); port += (l % np->in_ppip); port %= np->in_ppip; port += np->in_ppip * (ntohl(fin->fin_saddr) % np->in_ippip); port += MAPBLK_MINPORT; port = htons(port); } } else if (((np->in_redir & NAT_MAPBLK) == 0) && (flags & IPN_TCPUDPICMP) && (np->in_spnext != 0)) { /* * Standard port translation. Select next port. */ if (np->in_flags & IPN_SEQUENTIAL) { port = np->in_spnext; } else { port = ipf_random() % (np->in_spmax - np->in_spmin + 1); port += np->in_spmin; } port = htons(port); np->in_spnext++; if (np->in_spnext > np->in_spmax) { np->in_spnext = np->in_spmin; if (np->in_nsrcmsk != 0xffffffff) np->in_snip++; } } if (np->in_flags & IPN_SIPRANGE) { if (np->in_snip > ntohl(np->in_nsrcmsk)) np->in_snip = ntohl(np->in_nsrcaddr); } else { if ((np->in_nsrcmsk != 0xffffffff) && ((np->in_snip + 1) & ntohl(np->in_nsrcmsk)) > ntohl(np->in_nsrcaddr)) np->in_snip = ntohl(np->in_nsrcaddr) + 1; } if ((port == 0) && (flags & (IPN_TCPUDPICMP|IPN_ICMPQUERY))) port = sport; /* * Here we do a lookup of the connection as seen from * the outside. If an IP# pair already exists, try * again. So if you have A->B becomes C->B, you can * also have D->E become C->E but not D->B causing * another C->B. Also take protocol and ports into * account when determining whether a pre-existing * NAT setup will cause an external conflict where * this is appropriate. */ inb.s_addr = htonl(in.s_addr); sp = fin->fin_data[0]; dp = fin->fin_data[1]; fin->fin_data[0] = fin->fin_data[1]; fin->fin_data[1] = ntohs(port); natl = ipf_nat_inlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), (u_int)fin->fin_p, fin->fin_dst, inb); fin->fin_data[0] = sp; fin->fin_data[1] = dp; /* * Has the search wrapped around and come back to the * start ? */ if ((natl != NULL) && (np->in_spnext != 0) && (st_port == np->in_spnext) && (np->in_snip != 0) && (st_ip == np->in_snip)) { NBUMPSIDED(1, ns_wrap); return -1; } l++; } while (natl != NULL); /* Setup the NAT table */ nat->nat_osrcip = fin->fin_src; nat->nat_nsrcaddr = htonl(in.s_addr); nat->nat_odstip = fin->fin_dst; nat->nat_ndstip = fin->fin_dst; if (nat->nat_hm == NULL) nat->nat_hm = ipf_nat_hostmap(softn, np, fin->fin_src, fin->fin_dst, nat->nat_nsrcip, 0); if (flags & IPN_TCPUDP) { nat->nat_osport = sport; nat->nat_nsport = port; /* sport */ nat->nat_odport = dport; nat->nat_ndport = dport; ((tcphdr_t *)fin->fin_dp)->th_sport = port; } else if (flags & IPN_ICMPQUERY) { nat->nat_oicmpid = fin->fin_data[1]; ((icmphdr_t *)fin->fin_dp)->icmp_id = port; nat->nat_nicmpid = port; } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_newrdr */ /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ /* allow rule to be moved if IPN_ROUNDR is set. */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT entry */ /* ni(I) - pointer to structure with misc. information needed */ /* to create new NAT entry. */ /* */ /* ni.nai_ip is passed in uninitialised and must be set, in host byte order,*/ /* to the new IP address for the translation. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_newrdr(fin, nat, ni) fr_info_t *fin; nat_t *nat; natinfo_t *ni; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short nport, dport, sport; struct in_addr in, inb; u_short sp, dp; hostmap_t *hm; u_32_t flags; ipnat_t *np; nat_t *natl; int move; move = 1; hm = NULL; in.s_addr = 0; np = ni->nai_np; flags = nat->nat_flags; if (flags & IPN_ICMPQUERY) { dport = fin->fin_data[1]; sport = 0; } else { sport = htons(fin->fin_data[0]); dport = htons(fin->fin_data[1]); } /* TRACE sport, dport */ /* * If the matching rule has IPN_STICKY set, then we want to have the * same rule kick in as before. Why would this happen? If you have * a collection of rdr rules with "round-robin sticky", the current * packet might match a different one to the previous connection but * we want the same destination to be used. */ if (((np->in_flags & (IPN_ROUNDR|IPN_SPLIT)) != 0) && ((np->in_flags & IPN_STICKY) != 0)) { hm = ipf_nat_hostmap(softn, NULL, fin->fin_src, fin->fin_dst, in, (u_32_t)dport); if (hm != NULL) { in.s_addr = ntohl(hm->hm_ndstip.s_addr); np = hm->hm_ipnat; ni->nai_np = np; move = 0; ipf_nat_hostmapdel(softc, &hm); } } /* * Otherwise, it's an inbound packet. Most likely, we don't * want to rewrite source ports and source addresses. Instead, * we want to rewrite to a fixed internal address and fixed * internal port. */ if (np->in_flags & IPN_SPLIT) { in.s_addr = np->in_dnip; inb.s_addr = htonl(in.s_addr); if ((np->in_flags & (IPN_ROUNDR|IPN_STICKY)) == IPN_STICKY) { hm = ipf_nat_hostmap(softn, NULL, fin->fin_src, fin->fin_dst, inb, (u_32_t)dport); if (hm != NULL) { in.s_addr = hm->hm_ndstip.s_addr; move = 0; } } if (hm == NULL || hm->hm_ref == 1) { if (np->in_ndstaddr == htonl(in.s_addr)) { np->in_dnip = ntohl(np->in_ndstmsk); move = 0; } else { np->in_dnip = ntohl(np->in_ndstaddr); } } if (hm != NULL) ipf_nat_hostmapdel(softc, &hm); } else if ((np->in_ndstaddr == 0) && (np->in_ndstmsk == 0xffffffff)) { i6addr_t in6; /* * 0/32 - use the interface's IP address. */ if (ipf_ifpaddr(softc, 4, FRI_NORMAL, fin->fin_ifp, &in6, NULL) == -1) { NBUMPSIDEX(0, ns_new_ifpaddr, ns_new_ifpaddr_2); return -1; } in.s_addr = ntohl(in6.in4.s_addr); } else if ((np->in_ndstaddr == 0) && (np->in_ndstmsk== 0)) { /* * 0/0 - use the original destination address/port. */ in.s_addr = ntohl(fin->fin_daddr); } else if (np->in_redir == NAT_BIMAP && np->in_ndstmsk == np->in_odstmsk) { /* * map the address block in a 1:1 fashion */ in.s_addr = np->in_ndstaddr; in.s_addr |= fin->fin_daddr & ~np->in_ndstmsk; in.s_addr = ntohl(in.s_addr); } else { in.s_addr = ntohl(np->in_ndstaddr); } if ((np->in_dpnext == 0) || ((flags & NAT_NOTRULEPORT) != 0)) nport = dport; else { /* * Whilst not optimized for the case where * pmin == pmax, the gain is not significant. */ if (((np->in_flags & IPN_FIXEDDPORT) == 0) && (np->in_odport != np->in_dtop)) { nport = ntohs(dport) - np->in_odport + np->in_dpmax; nport = htons(nport); } else { nport = htons(np->in_dpnext); np->in_dpnext++; if (np->in_dpnext > np->in_dpmax) np->in_dpnext = np->in_dpmin; } } /* * When the redirect-to address is set to 0.0.0.0, just * assume a blank `forwarding' of the packet. We don't * setup any translation for this either. */ if (in.s_addr == 0) { if (nport == dport) { NBUMPSIDED(0, ns_xlate_null); return -1; } in.s_addr = ntohl(fin->fin_daddr); } /* * Check to see if this redirect mapping already exists and if * it does, return "failure" (allowing it to be created will just * cause one or both of these "connections" to stop working.) */ inb.s_addr = htonl(in.s_addr); sp = fin->fin_data[0]; dp = fin->fin_data[1]; fin->fin_data[1] = fin->fin_data[0]; fin->fin_data[0] = ntohs(nport); natl = ipf_nat_outlookup(fin, flags & ~(SI_WILDP|NAT_SEARCH), (u_int)fin->fin_p, inb, fin->fin_src); fin->fin_data[0] = sp; fin->fin_data[1] = dp; if (natl != NULL) { DT2(ns_new_xlate_exists, fr_info_t *, fin, nat_t *, natl); NBUMPSIDE(0, ns_xlate_exists); return -1; } inb.s_addr = htonl(in.s_addr); nat->nat_ndstaddr = htonl(in.s_addr); nat->nat_odstip = fin->fin_dst; nat->nat_nsrcip = fin->fin_src; nat->nat_osrcip = fin->fin_src; if ((nat->nat_hm == NULL) && ((np->in_flags & IPN_STICKY) != 0)) nat->nat_hm = ipf_nat_hostmap(softn, np, fin->fin_src, fin->fin_dst, inb, (u_32_t)dport); if (flags & IPN_TCPUDP) { nat->nat_odport = dport; nat->nat_ndport = nport; nat->nat_osport = sport; nat->nat_nsport = sport; ((tcphdr_t *)fin->fin_dp)->th_dport = nport; } else if (flags & IPN_ICMPQUERY) { nat->nat_oicmpid = fin->fin_data[1]; ((icmphdr_t *)fin->fin_dp)->icmp_id = nport; nat->nat_nicmpid = nport; } return move; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_add */ /* Returns: nat_t* - NULL == failure to create new NAT structure, */ /* else pointer to new NAT structure */ /* Parameters: fin(I) - pointer to packet information */ /* np(I) - pointer to NAT rule */ /* natsave(I) - pointer to where to store NAT struct pointer */ /* flags(I) - flags describing the current packet */ /* direction(I) - direction of packet (in/out) */ /* Write Lock: ipf_nat */ /* */ /* Attempts to create a new NAT entry. Does not actually change the packet */ /* in any way. */ /* */ /* This fucntion is in three main parts: (1) deal with creating a new NAT */ /* structure for a "MAP" rule (outgoing NAT translation); (2) deal with */ /* creating a new NAT structure for a "RDR" rule (incoming NAT translation) */ /* and (3) building that structure and putting it into the NAT table(s). */ /* */ /* NOTE: natsave should NOT be used top point back to an ipstate_t struct */ /* as it can result in memory being corrupted. */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_add(fin, np, natsave, flags, direction) fr_info_t *fin; ipnat_t *np; nat_t **natsave; u_int flags; int direction; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; hostmap_t *hm = NULL; nat_t *nat, *natl; natstat_t *nsp; u_int nflags; natinfo_t ni; int move; nsp = &softn->ipf_nat_stats; if ((nsp->ns_active * 100 / softn->ipf_nat_table_max) > softn->ipf_nat_table_wm_high) { softn->ipf_nat_doflush = 1; } if (nsp->ns_active >= softn->ipf_nat_table_max) { NBUMPSIDED(fin->fin_out, ns_table_max); return NULL; } move = 1; nflags = np->in_flags & flags; nflags &= NAT_FROMRULE; ni.nai_np = np; ni.nai_dport = 0; ni.nai_sport = 0; /* Give me a new nat */ KMALLOC(nat, nat_t *); if (nat == NULL) { NBUMPSIDED(fin->fin_out, ns_memfail); /* * Try to automatically tune the max # of entries in the * table allowed to be less than what will cause kmem_alloc() * to fail and try to eliminate panics due to out of memory * conditions arising. */ if ((softn->ipf_nat_table_max > softn->ipf_nat_table_sz) && (nsp->ns_active > 100)) { softn->ipf_nat_table_max = nsp->ns_active - 100; printf("table_max reduced to %d\n", softn->ipf_nat_table_max); } return NULL; } if (flags & IPN_ICMPQUERY) { /* * In the ICMP query NAT code, we translate the ICMP id fields * to make them unique. This is indepedent of the ICMP type * (e.g. in the unlikely event that a host sends an echo and * an tstamp request with the same id, both packets will have * their ip address/id field changed in the same way). */ /* The icmp_id field is used by the sender to identify the * process making the icmp request. (the receiver justs * copies it back in its response). So, it closely matches * the concept of source port. We overlay sport, so we can * maximally reuse the existing code. */ ni.nai_sport = fin->fin_data[1]; ni.nai_dport = 0; } bzero((char *)nat, sizeof(*nat)); nat->nat_flags = flags; nat->nat_redir = np->in_redir; nat->nat_dir = direction; nat->nat_pr[0] = fin->fin_p; nat->nat_pr[1] = fin->fin_p; /* * Search the current table for a match and create a new mapping * if there is none found. */ if (np->in_redir & NAT_DIVERTUDP) { move = ipf_nat_newdivert(fin, nat, &ni); } else if (np->in_redir & NAT_REWRITE) { move = ipf_nat_newrewrite(fin, nat, &ni); } else if (direction == NAT_OUTBOUND) { /* * We can now arrange to call this for the same connection * because ipf_nat_new doesn't protect the code path into * this function. */ natl = ipf_nat_outlookup(fin, nflags, (u_int)fin->fin_p, fin->fin_src, fin->fin_dst); if (natl != NULL) { KFREE(nat); nat = natl; goto done; } move = ipf_nat_newmap(fin, nat, &ni); } else { /* * NAT_INBOUND is used for redirects rules */ natl = ipf_nat_inlookup(fin, nflags, (u_int)fin->fin_p, fin->fin_src, fin->fin_dst); if (natl != NULL) { KFREE(nat); nat = natl; goto done; } move = ipf_nat_newrdr(fin, nat, &ni); } if (move == -1) goto badnat; np = ni.nai_np; nat->nat_mssclamp = np->in_mssclamp; nat->nat_me = natsave; nat->nat_fr = fin->fin_fr; nat->nat_rev = fin->fin_rev; nat->nat_ptr = np; nat->nat_dlocal = np->in_dlocal; if ((np->in_apr != NULL) && ((nat->nat_flags & NAT_SLAVE) == 0)) { if (ipf_proxy_new(fin, nat) == -1) { NBUMPSIDED(fin->fin_out, ns_appr_fail); goto badnat; } } nat->nat_ifps[0] = np->in_ifps[0]; if (np->in_ifps[0] != NULL) { COPYIFNAME(np->in_v[0], np->in_ifps[0], nat->nat_ifnames[0]); } nat->nat_ifps[1] = np->in_ifps[1]; if (np->in_ifps[1] != NULL) { COPYIFNAME(np->in_v[1], np->in_ifps[1], nat->nat_ifnames[1]); } if (ipf_nat_finalise(fin, nat) == -1) { goto badnat; } np->in_use++; if ((move == 1) && (np->in_flags & IPN_ROUNDR)) { if ((np->in_redir & (NAT_REDIRECT|NAT_MAP)) == NAT_REDIRECT) { ipf_nat_delrdr(softn, np); ipf_nat_addrdr(softn, np); } else if ((np->in_redir & (NAT_REDIRECT|NAT_MAP)) == NAT_MAP) { ipf_nat_delmap(softn, np); ipf_nat_addmap(softn, np); } } if (flags & SI_WILDP) nsp->ns_wilds++; nsp->ns_proto[nat->nat_pr[0]]++; goto done; badnat: DT2(ns_badnatnew, fr_info_t *, fin, nat_t *, nat); NBUMPSIDE(fin->fin_out, ns_badnatnew); if ((hm = nat->nat_hm) != NULL) ipf_nat_hostmapdel(softc, &hm); KFREE(nat); nat = NULL; done: if (nat != NULL && np != NULL) np->in_hits++; if (natsave != NULL) *natsave = nat; return nat; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_finalise */ /* Returns: int - 0 == sucess, -1 == failure */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT entry */ /* Write Lock: ipf_nat */ /* */ /* This is the tail end of constructing a new NAT entry and is the same */ /* for both IPv4 and IPv6. */ /* ------------------------------------------------------------------------ */ /*ARGSUSED*/ static int ipf_nat_finalise(fin, nat) fr_info_t *fin; nat_t *nat; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t sum1, sum2, sumd; frentry_t *fr; u_32_t flags; #if SOLARIS && defined(_KERNEL) && (SOLARIS2 >= 6) && defined(ICK_M_CTL_MAGIC) qpktinfo_t *qpi = fin->fin_qpi; #endif flags = nat->nat_flags; switch (nat->nat_pr[0]) { case IPPROTO_ICMP : sum1 = LONG_SUM(ntohs(nat->nat_oicmpid)); sum2 = LONG_SUM(ntohs(nat->nat_nicmpid)); CALC_SUMD(sum1, sum2, sumd); nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); break; default : sum1 = LONG_SUM(ntohl(nat->nat_osrcaddr) + \ ntohs(nat->nat_osport)); sum2 = LONG_SUM(ntohl(nat->nat_nsrcaddr) + \ ntohs(nat->nat_nsport)); CALC_SUMD(sum1, sum2, sumd); nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); sum1 = LONG_SUM(ntohl(nat->nat_odstaddr) + \ ntohs(nat->nat_odport)); sum2 = LONG_SUM(ntohl(nat->nat_ndstaddr) + \ ntohs(nat->nat_ndport)); CALC_SUMD(sum1, sum2, sumd); nat->nat_sumd[0] += (sumd & 0xffff) + (sumd >> 16); break; } /* * Compute the partial checksum, just in case. * This is only ever placed into outbound packets so care needs * to be taken over which pair of addresses are used. */ if (nat->nat_dir == NAT_OUTBOUND) { sum1 = LONG_SUM(ntohl(nat->nat_nsrcaddr)); sum1 += LONG_SUM(ntohl(nat->nat_ndstaddr)); } else { sum1 = LONG_SUM(ntohl(nat->nat_osrcaddr)); sum1 += LONG_SUM(ntohl(nat->nat_odstaddr)); } sum1 += nat->nat_pr[1]; nat->nat_sumd[1] = (sum1 & 0xffff) + (sum1 >> 16); sum1 = LONG_SUM(ntohl(nat->nat_osrcaddr)); sum2 = LONG_SUM(ntohl(nat->nat_nsrcaddr)); CALC_SUMD(sum1, sum2, sumd); nat->nat_ipsumd = (sumd & 0xffff) + (sumd >> 16); sum1 = LONG_SUM(ntohl(nat->nat_odstaddr)); sum2 = LONG_SUM(ntohl(nat->nat_ndstaddr)); CALC_SUMD(sum1, sum2, sumd); nat->nat_ipsumd += (sumd & 0xffff) + (sumd >> 16); nat->nat_v[0] = 4; nat->nat_v[1] = 4; if ((nat->nat_ifps[0] != NULL) && (nat->nat_ifps[0] != (void *)-1)) { nat->nat_mtu[0] = GETIFMTU_4(nat->nat_ifps[0]); } if ((nat->nat_ifps[1] != NULL) && (nat->nat_ifps[1] != (void *)-1)) { nat->nat_mtu[1] = GETIFMTU_4(nat->nat_ifps[1]); } if ((nat->nat_flags & SI_CLONE) == 0) nat->nat_sync = ipf_sync_new(softc, SMC_NAT, fin, nat); if (ipf_nat_insert(softc, softn, nat) == 0) { if (softn->ipf_nat_logging) ipf_nat_log(softc, softn, nat, NL_NEW); fr = nat->nat_fr; if (fr != NULL) { MUTEX_ENTER(&fr->fr_lock); fr->fr_ref++; MUTEX_EXIT(&fr->fr_lock); } return 0; } NBUMPSIDED(fin->fin_out, ns_unfinalised); /* * nat_insert failed, so cleanup time... */ if (nat->nat_sync != NULL) ipf_sync_del_nat(softc->ipf_sync_soft, nat->nat_sync); return -1; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_insert */ /* Returns: int - 0 == sucess, -1 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* nat(I) - pointer to NAT structure */ /* Write Lock: ipf_nat */ /* */ /* Insert a NAT entry into the hash tables for searching and add it to the */ /* list of active NAT entries. Adjust global counters when complete. */ /* ------------------------------------------------------------------------ */ int ipf_nat_insert(softc, softn, nat) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; nat_t *nat; { u_int hv0, hv1; u_int sp, dp; ipnat_t *in; /* * Try and return an error as early as possible, so calculate the hash * entry numbers first and then proceed. */ if ((nat->nat_flags & (SI_W_SPORT|SI_W_DPORT)) == 0) { if ((nat->nat_flags & IPN_TCPUDP) != 0) { sp = nat->nat_osport; dp = nat->nat_odport; } else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) { sp = 0; dp = nat->nat_oicmpid; } else { sp = 0; dp = 0; } hv0 = NAT_HASH_FN(nat->nat_osrcaddr, sp, 0xffffffff); hv0 = NAT_HASH_FN(nat->nat_odstaddr, hv0 + dp, 0xffffffff); /* * TRACE nat_osrcaddr, nat_osport, nat_odstaddr, * nat_odport, hv0 */ if ((nat->nat_flags & IPN_TCPUDP) != 0) { sp = nat->nat_nsport; dp = nat->nat_ndport; } else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) { sp = 0; dp = nat->nat_nicmpid; } else { sp = 0; dp = 0; } hv1 = NAT_HASH_FN(nat->nat_nsrcaddr, sp, 0xffffffff); hv1 = NAT_HASH_FN(nat->nat_ndstaddr, hv1 + dp, 0xffffffff); /* * TRACE nat_nsrcaddr, nat_nsport, nat_ndstaddr, * nat_ndport, hv1 */ } else { hv0 = NAT_HASH_FN(nat->nat_osrcaddr, 0, 0xffffffff); hv0 = NAT_HASH_FN(nat->nat_odstaddr, hv0, 0xffffffff); /* TRACE nat_osrcaddr, nat_odstaddr, hv0 */ hv1 = NAT_HASH_FN(nat->nat_nsrcaddr, 0, 0xffffffff); hv1 = NAT_HASH_FN(nat->nat_ndstaddr, hv1, 0xffffffff); /* TRACE nat_nsrcaddr, nat_ndstaddr, hv1 */ } nat->nat_hv[0] = hv0; nat->nat_hv[1] = hv1; MUTEX_INIT(&nat->nat_lock, "nat entry lock"); in = nat->nat_ptr; nat->nat_ref = nat->nat_me ? 2 : 1; nat->nat_ifnames[0][LIFNAMSIZ - 1] = '\0'; nat->nat_ifps[0] = ipf_resolvenic(softc, nat->nat_ifnames[0], 4); if (nat->nat_ifnames[1][0] != '\0') { nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; nat->nat_ifps[1] = ipf_resolvenic(softc, nat->nat_ifnames[1], 4); } else if (in->in_ifnames[1] != -1) { char *name; name = in->in_names + in->in_ifnames[1]; if (name[1] != '\0' && name[0] != '-' && name[0] != '*') { (void) strncpy(nat->nat_ifnames[1], nat->nat_ifnames[0], LIFNAMSIZ); nat->nat_ifnames[1][LIFNAMSIZ - 1] = '\0'; nat->nat_ifps[1] = nat->nat_ifps[0]; } } if ((nat->nat_ifps[0] != NULL) && (nat->nat_ifps[0] != (void *)-1)) { nat->nat_mtu[0] = GETIFMTU_4(nat->nat_ifps[0]); } if ((nat->nat_ifps[1] != NULL) && (nat->nat_ifps[1] != (void *)-1)) { nat->nat_mtu[1] = GETIFMTU_4(nat->nat_ifps[1]); } return ipf_nat_hashtab_add(softc, softn, nat); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_hashtab_add */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* nat(I) - pointer to NAT structure */ /* */ /* Handle the insertion of a NAT entry into the table/list. */ /* ------------------------------------------------------------------------ */ int ipf_nat_hashtab_add(softc, softn, nat) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; nat_t *nat; { nat_t **natp; u_int hv0; u_int hv1; hv0 = nat->nat_hv[0] % softn->ipf_nat_table_sz; hv1 = nat->nat_hv[1] % softn->ipf_nat_table_sz; if (nat->nat_dir == NAT_INBOUND || nat->nat_dir == NAT_DIVERTIN) { u_int swap; swap = hv0; hv0 = hv1; hv1 = swap; } if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen[hv0] >= softn->ipf_nat_maxbucket) { DT1(ns_bucket_max_0, int, softn->ipf_nat_stats.ns_side[0].ns_bucketlen[hv0]); NBUMPSIDE(0, ns_bucket_max); return -1; } if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen[hv1] >= softn->ipf_nat_maxbucket) { DT1(ns_bucket_max_1, int, softn->ipf_nat_stats.ns_side[1].ns_bucketlen[hv1]); NBUMPSIDE(1, ns_bucket_max); return -1; } /* * The ordering of operations in the list and hash table insertion * is very important. The last operation for each task should be * to update the top of the list, after all the "nexts" have been * done so that walking the list while it is being done does not * find strange pointers. * * Global list of NAT instances */ nat->nat_next = softn->ipf_nat_instances; nat->nat_pnext = &softn->ipf_nat_instances; if (softn->ipf_nat_instances) softn->ipf_nat_instances->nat_pnext = &nat->nat_next; softn->ipf_nat_instances = nat; /* * Inbound hash table. */ natp = &softn->ipf_nat_table[0][hv0]; nat->nat_phnext[0] = natp; nat->nat_hnext[0] = *natp; if (*natp) { (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; } else { NBUMPSIDE(0, ns_inuse); } *natp = nat; NBUMPSIDE(0, ns_bucketlen[hv0]); /* * Outbound hash table. */ natp = &softn->ipf_nat_table[1][hv1]; nat->nat_phnext[1] = natp; nat->nat_hnext[1] = *natp; if (*natp) (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; else { NBUMPSIDE(1, ns_inuse); } *natp = nat; NBUMPSIDE(1, ns_bucketlen[hv1]); ipf_nat_setqueue(softc, softn, nat); if (nat->nat_dir & NAT_OUTBOUND) { NBUMPSIDE(1, ns_added); } else { NBUMPSIDE(0, ns_added); } softn->ipf_nat_stats.ns_active++; return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_icmperrorlookup */ /* Returns: nat_t* - point to matching NAT structure */ /* Parameters: fin(I) - pointer to packet information */ /* dir(I) - direction of packet (in/out) */ /* */ /* Check if the ICMP error message is related to an existing TCP, UDP or */ /* ICMP query nat entry. It is assumed that the packet is already of the */ /* the required length. */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_icmperrorlookup(fin, dir) fr_info_t *fin; int dir; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; int flags = 0, type, minlen; icmphdr_t *icmp, *orgicmp; nat_stat_side_t *nside; tcphdr_t *tcp = NULL; u_short data[2]; nat_t *nat; ip_t *oip; u_int p; icmp = fin->fin_dp; type = icmp->icmp_type; nside = &softn->ipf_nat_stats.ns_side[fin->fin_out]; /* * Does it at least have the return (basic) IP header ? * Only a basic IP header (no options) should be with an ICMP error * header. Also, if it's not an error type, then return. */ if ((fin->fin_hlen != sizeof(ip_t)) || !(fin->fin_flx & FI_ICMPERR)) { ATOMIC_INCL(nside->ns_icmp_basic); return NULL; } /* * Check packet size */ oip = (ip_t *)((char *)fin->fin_dp + 8); minlen = IP_HL(oip) << 2; if ((minlen < sizeof(ip_t)) || (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen)) { ATOMIC_INCL(nside->ns_icmp_size); return NULL; } /* * Is the buffer big enough for all of it ? It's the size of the IP * header claimed in the encapsulated part which is of concern. It * may be too big to be in this buffer but not so big that it's * outside the ICMP packet, leading to TCP deref's causing problems. * This is possible because we don't know how big oip_hl is when we * do the pullup early in ipf_check() and thus can't gaurantee it is * all here now. */ #ifdef ipf_nat_KERNEL { mb_t *m; m = fin->fin_m; # if defined(MENTAT) if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)m->b_wptr) { ATOMIC_INCL(nside->ns_icmp_mbuf); return NULL; } # else if ((char *)oip + fin->fin_dlen - ICMPERR_ICMPHLEN > (char *)fin->fin_ip + M_LEN(m)) { ATOMIC_INCL(nside->ns_icmp_mbuf); return NULL; } # endif } #endif if (fin->fin_daddr != oip->ip_src.s_addr) { ATOMIC_INCL(nside->ns_icmp_address); return NULL; } p = oip->ip_p; if (p == IPPROTO_TCP) flags = IPN_TCP; else if (p == IPPROTO_UDP) flags = IPN_UDP; else if (p == IPPROTO_ICMP) { orgicmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); /* see if this is related to an ICMP query */ if (ipf_nat_icmpquerytype(orgicmp->icmp_type)) { data[0] = fin->fin_data[0]; data[1] = fin->fin_data[1]; fin->fin_data[0] = 0; fin->fin_data[1] = orgicmp->icmp_id; flags = IPN_ICMPERR|IPN_ICMPQUERY; /* * NOTE : dir refers to the direction of the original * ip packet. By definition the icmp error * message flows in the opposite direction. */ if (dir == NAT_INBOUND) nat = ipf_nat_inlookup(fin, flags, p, oip->ip_dst, oip->ip_src); else nat = ipf_nat_outlookup(fin, flags, p, oip->ip_dst, oip->ip_src); fin->fin_data[0] = data[0]; fin->fin_data[1] = data[1]; return nat; } } if (flags & IPN_TCPUDP) { minlen += 8; /* + 64bits of data to get ports */ /* TRACE (fin,minlen) */ if (fin->fin_plen < ICMPERR_IPICMPHLEN + minlen) { ATOMIC_INCL(nside->ns_icmp_short); return NULL; } data[0] = fin->fin_data[0]; data[1] = fin->fin_data[1]; tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); fin->fin_data[0] = ntohs(tcp->th_dport); fin->fin_data[1] = ntohs(tcp->th_sport); if (dir == NAT_INBOUND) { nat = ipf_nat_inlookup(fin, flags, p, oip->ip_dst, oip->ip_src); } else { nat = ipf_nat_outlookup(fin, flags, p, oip->ip_dst, oip->ip_src); } fin->fin_data[0] = data[0]; fin->fin_data[1] = data[1]; return nat; } if (dir == NAT_INBOUND) nat = ipf_nat_inlookup(fin, 0, p, oip->ip_dst, oip->ip_src); else nat = ipf_nat_outlookup(fin, 0, p, oip->ip_dst, oip->ip_src); return nat; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_icmperror */ /* Returns: nat_t* - point to matching NAT structure */ /* Parameters: fin(I) - pointer to packet information */ /* nflags(I) - NAT flags for this packet */ /* dir(I) - direction of packet (in/out) */ /* */ /* Fix up an ICMP packet which is an error message for an existing NAT */ /* session. This will correct both packet header data and checksums. */ /* */ /* This should *ONLY* be used for incoming ICMP error packets to make sure */ /* a NAT'd ICMP packet gets correctly recognised. */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_icmperror(fin, nflags, dir) fr_info_t *fin; u_int *nflags; int dir; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t sum1, sum2, sumd, sumd2; struct in_addr a1, a2, a3, a4; int flags, dlen, odst; icmphdr_t *icmp; u_short *csump; tcphdr_t *tcp; nat_t *nat; ip_t *oip; void *dp; if ((fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { NBUMPSIDED(fin->fin_out, ns_icmp_short); return NULL; } /* * ipf_nat_icmperrorlookup() will return NULL for `defective' packets. */ if ((fin->fin_v != 4) || !(nat = ipf_nat_icmperrorlookup(fin, dir))) { NBUMPSIDED(fin->fin_out, ns_icmp_notfound); return NULL; } tcp = NULL; csump = NULL; flags = 0; sumd2 = 0; *nflags = IPN_ICMPERR; icmp = fin->fin_dp; oip = (ip_t *)&icmp->icmp_ip; dp = (((char *)oip) + (IP_HL(oip) << 2)); if (oip->ip_p == IPPROTO_TCP) { tcp = (tcphdr_t *)dp; csump = (u_short *)&tcp->th_sum; flags = IPN_TCP; } else if (oip->ip_p == IPPROTO_UDP) { udphdr_t *udp; udp = (udphdr_t *)dp; tcp = (tcphdr_t *)dp; csump = (u_short *)&udp->uh_sum; flags = IPN_UDP; } else if (oip->ip_p == IPPROTO_ICMP) flags = IPN_ICMPQUERY; dlen = fin->fin_plen - ((char *)dp - (char *)fin->fin_ip); /* * Need to adjust ICMP header to include the real IP#'s and * port #'s. Only apply a checksum change relative to the * IP address change as it will be modified again in ipf_nat_checkout * for both address and port. Two checksum changes are * necessary for the two header address changes. Be careful * to only modify the checksum once for the port # and twice * for the IP#. */ /* * Step 1 * Fix the IP addresses in the offending IP packet. You also need * to adjust the IP header checksum of that offending IP packet. * * Normally, you would expect that the ICMP checksum of the * ICMP error message needs to be adjusted as well for the * IP address change in oip. * However, this is a NOP, because the ICMP checksum is * calculated over the complete ICMP packet, which includes the * changed oip IP addresses and oip->ip_sum. However, these * two changes cancel each other out (if the delta for * the IP address is x, then the delta for ip_sum is minus x), * so no change in the icmp_cksum is necessary. * * Inbound ICMP * ------------ * MAP rule, SRC=a,DST=b -> SRC=c,DST=b * - response to outgoing packet (a,b)=>(c,b) (OIP_SRC=c,OIP_DST=b) * - OIP_SRC(c)=nat_newsrcip, OIP_DST(b)=nat_newdstip *=> OIP_SRC(c)=nat_oldsrcip, OIP_DST(b)=nat_olddstip * * RDR rule, SRC=a,DST=b -> SRC=a,DST=c * - response to outgoing packet (c,a)=>(b,a) (OIP_SRC=b,OIP_DST=a) * - OIP_SRC(b)=nat_olddstip, OIP_DST(a)=nat_oldsrcip *=> OIP_SRC(b)=nat_newdstip, OIP_DST(a)=nat_newsrcip * * REWRITE out rule, SRC=a,DST=b -> SRC=c,DST=d * - response to outgoing packet (a,b)=>(c,d) (OIP_SRC=c,OIP_DST=d) * - OIP_SRC(c)=nat_newsrcip, OIP_DST(d)=nat_newdstip *=> OIP_SRC(c)=nat_oldsrcip, OIP_DST(d)=nat_olddstip * * REWRITE in rule, SRC=a,DST=b -> SRC=c,DST=d * - response to outgoing packet (d,c)=>(b,a) (OIP_SRC=b,OIP_DST=a) * - OIP_SRC(b)=nat_olddstip, OIP_DST(a)=nat_oldsrcip *=> OIP_SRC(b)=nat_newdstip, OIP_DST(a)=nat_newsrcip * * Outbound ICMP * ------------- * MAP rule, SRC=a,DST=b -> SRC=c,DST=b * - response to incoming packet (b,c)=>(b,a) (OIP_SRC=b,OIP_DST=a) * - OIP_SRC(b)=nat_olddstip, OIP_DST(a)=nat_oldsrcip *=> OIP_SRC(b)=nat_newdstip, OIP_DST(a)=nat_newsrcip * * RDR rule, SRC=a,DST=b -> SRC=a,DST=c * - response to incoming packet (a,b)=>(a,c) (OIP_SRC=a,OIP_DST=c) * - OIP_SRC(a)=nat_newsrcip, OIP_DST(c)=nat_newdstip *=> OIP_SRC(a)=nat_oldsrcip, OIP_DST(c)=nat_olddstip * * REWRITE out rule, SRC=a,DST=b -> SRC=c,DST=d * - response to incoming packet (d,c)=>(b,a) (OIP_SRC=c,OIP_DST=d) * - OIP_SRC(c)=nat_olddstip, OIP_DST(d)=nat_oldsrcip *=> OIP_SRC(b)=nat_newdstip, OIP_DST(a)=nat_newsrcip * * REWRITE in rule, SRC=a,DST=b -> SRC=c,DST=d * - response to incoming packet (a,b)=>(c,d) (OIP_SRC=b,OIP_DST=a) * - OIP_SRC(b)=nat_newsrcip, OIP_DST(a)=nat_newdstip *=> OIP_SRC(a)=nat_oldsrcip, OIP_DST(c)=nat_olddstip */ if (((fin->fin_out == 0) && ((nat->nat_redir & NAT_MAP) != 0)) || ((fin->fin_out == 1) && ((nat->nat_redir & NAT_REDIRECT) != 0))) { a1.s_addr = ntohl(nat->nat_osrcaddr); a4.s_addr = ntohl(oip->ip_src.s_addr); a3.s_addr = ntohl(nat->nat_odstaddr); a2.s_addr = ntohl(oip->ip_dst.s_addr); oip->ip_src.s_addr = htonl(a1.s_addr); oip->ip_dst.s_addr = htonl(a3.s_addr); odst = 1; } else { a1.s_addr = ntohl(nat->nat_ndstaddr); a2.s_addr = ntohl(oip->ip_dst.s_addr); a3.s_addr = ntohl(nat->nat_nsrcaddr); a4.s_addr = ntohl(oip->ip_src.s_addr); oip->ip_dst.s_addr = htonl(a3.s_addr); oip->ip_src.s_addr = htonl(a1.s_addr); odst = 0; } sum1 = 0; sum2 = 0; sumd = 0; CALC_SUMD(a2.s_addr, a3.s_addr, sum1); CALC_SUMD(a4.s_addr, a1.s_addr, sum2); sumd = sum2 + sum1; if (sumd != 0) ipf_fix_datacksum(&oip->ip_sum, sumd); sumd2 = sumd; sum1 = 0; sum2 = 0; /* * Fix UDP pseudo header checksum to compensate for the * IP address change. */ if (((flags & IPN_TCPUDP) != 0) && (dlen >= 4)) { u_32_t sum3, sum4, sumt; /* * Step 2 : * For offending TCP/UDP IP packets, translate the ports as * well, based on the NAT specification. Of course such * a change may be reflected in the ICMP checksum as well. * * Since the port fields are part of the TCP/UDP checksum * of the offending IP packet, you need to adjust that checksum * as well... except that the change in the port numbers should * be offset by the checksum change. However, the TCP/UDP * checksum will also need to change if there has been an * IP address change. */ if (odst == 1) { sum1 = ntohs(nat->nat_osport); sum4 = ntohs(tcp->th_sport); sum3 = ntohs(nat->nat_odport); sum2 = ntohs(tcp->th_dport); tcp->th_sport = htons(sum1); tcp->th_dport = htons(sum3); } else { sum1 = ntohs(nat->nat_ndport); sum2 = ntohs(tcp->th_dport); sum3 = ntohs(nat->nat_nsport); sum4 = ntohs(tcp->th_sport); tcp->th_dport = htons(sum3); tcp->th_sport = htons(sum1); } CALC_SUMD(sum4, sum1, sumt); sumd += sumt; CALC_SUMD(sum2, sum3, sumt); sumd += sumt; if (sumd != 0 || sumd2 != 0) { /* * At this point, sumd is the delta to apply to the * TCP/UDP header, given the changes in both the IP * address and the ports and sumd2 is the delta to * apply to the ICMP header, given the IP address * change delta that may need to be applied to the * TCP/UDP checksum instead. * * If we will both the IP and TCP/UDP checksums * then the ICMP checksum changes by the address * delta applied to the TCP/UDP checksum. If we * do not change the TCP/UDP checksum them we * apply the delta in ports to the ICMP checksum. */ if (oip->ip_p == IPPROTO_UDP) { if ((dlen >= 8) && (*csump != 0)) { ipf_fix_datacksum(csump, sumd); } else { CALC_SUMD(sum1, sum4, sumd2); CALC_SUMD(sum3, sum2, sumt); sumd2 += sumt; } } else if (oip->ip_p == IPPROTO_TCP) { if (dlen >= 18) { ipf_fix_datacksum(csump, sumd); } else { CALC_SUMD(sum1, sum4, sumd2); CALC_SUMD(sum3, sum2, sumt); sumd2 += sumt; } } if (sumd2 != 0) { sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); sumd2 = (sumd2 & 0xffff) + (sumd2 >> 16); ipf_fix_incksum(0, &icmp->icmp_cksum, sumd2, 0); } } } else if (((flags & IPN_ICMPQUERY) != 0) && (dlen >= 8)) { icmphdr_t *orgicmp; /* * XXX - what if this is bogus hl and we go off the end ? * In this case, ipf_nat_icmperrorlookup() will have * returned NULL. */ orgicmp = (icmphdr_t *)dp; if (odst == 1) { if (orgicmp->icmp_id != nat->nat_osport) { /* * Fix ICMP checksum (of the offening ICMP * query packet) to compensate the change * in the ICMP id of the offending ICMP * packet. * * Since you modify orgicmp->icmp_id with * a delta (say x) and you compensate that * in origicmp->icmp_cksum with a delta * minus x, you don't have to adjust the * overall icmp->icmp_cksum */ sum1 = ntohs(orgicmp->icmp_id); sum2 = ntohs(nat->nat_oicmpid); CALC_SUMD(sum1, sum2, sumd); orgicmp->icmp_id = nat->nat_oicmpid; ipf_fix_datacksum(&orgicmp->icmp_cksum, sumd); } } /* nat_dir == NAT_INBOUND is impossible for icmp queries */ } return nat; } /* * MAP-IN MAP-OUT RDR-IN RDR-OUT * osrc X == src == src X * odst X == dst == dst X * nsrc == dst X X == dst * ndst == src X X == src * MAP = NAT_OUTBOUND, RDR = NAT_INBOUND */ /* * NB: these lookups don't lock access to the list, it assumed that it has * already been done! */ /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_inlookup */ /* Returns: nat_t* - NULL == no match, */ /* else pointer to matching NAT entry */ /* Parameters: fin(I) - pointer to packet information */ /* flags(I) - NAT flags for this packet */ /* p(I) - protocol for this packet */ /* src(I) - source IP address */ /* mapdst(I) - destination IP address */ /* */ /* Lookup a nat entry based on the mapped destination ip address/port and */ /* real source address/port. We use this lookup when receiving a packet, */ /* we're looking for a table entry, based on the destination address. */ /* */ /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ /* */ /* NOTE: IT IS ASSUMED THAT IS ONLY HELD WITH A READ LOCK WHEN */ /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ /* */ /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ /* the packet is of said protocol */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_inlookup(fin, flags, p, src, mapdst) fr_info_t *fin; u_int flags, p; struct in_addr src , mapdst; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short sport, dport; grehdr_t *gre; ipnat_t *ipn; u_int sflags; nat_t *nat; int nflags; u_32_t dst; void *ifp; u_int hv, rhv; ifp = fin->fin_ifp; gre = NULL; dst = mapdst.s_addr; sflags = flags & NAT_TCPUDPICMP; switch (p) { case IPPROTO_TCP : case IPPROTO_UDP : sport = htons(fin->fin_data[0]); dport = htons(fin->fin_data[1]); break; case IPPROTO_ICMP : if (flags & IPN_ICMPERR) { sport = fin->fin_data[1]; dport = 0; } else { dport = fin->fin_data[1]; sport = 0; } break; default : sport = 0; dport = 0; break; } if ((flags & SI_WILDP) != 0) goto find_in_wild_ports; rhv = NAT_HASH_FN(dst, dport, 0xffffffff); rhv = NAT_HASH_FN(src.s_addr, rhv + sport, 0xffffffff); hv = rhv % softn->ipf_nat_table_sz; nat = softn->ipf_nat_table[1][hv]; /* TRACE dst, dport, src, sport, hv, nat */ for (; nat; nat = nat->nat_hnext[1]) { if (nat->nat_ifps[0] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) continue; } if (nat->nat_pr[0] != p) continue; switch (nat->nat_dir) { case NAT_INBOUND : case NAT_DIVERTIN : if (nat->nat_v[0] != 4) continue; if (nat->nat_osrcaddr != src.s_addr || nat->nat_odstaddr != dst) continue; if ((nat->nat_flags & IPN_TCPUDP) != 0) { if (nat->nat_osport != sport) continue; if (nat->nat_odport != dport) continue; } else if (p == IPPROTO_ICMP) { if (nat->nat_osport != dport) { continue; } } break; case NAT_DIVERTOUT : if (nat->nat_dlocal) continue; case NAT_OUTBOUND : if (nat->nat_v[1] != 4) continue; if (nat->nat_dlocal) continue; if (nat->nat_dlocal) continue; if (nat->nat_ndstaddr != src.s_addr || nat->nat_nsrcaddr != dst) continue; if ((nat->nat_flags & IPN_TCPUDP) != 0) { if (nat->nat_ndport != sport) continue; if (nat->nat_nsport != dport) continue; } else if (p == IPPROTO_ICMP) { if (nat->nat_osport != dport) { continue; } } break; } if ((nat->nat_flags & IPN_TCPUDP) != 0) { ipn = nat->nat_ptr; if ((ipn != NULL) && (nat->nat_aps != NULL)) if (ipf_proxy_match(fin, nat) != 0) continue; } if ((nat->nat_ifps[0] == NULL) && (ifp != NULL)) { nat->nat_ifps[0] = ifp; nat->nat_mtu[0] = GETIFMTU_4(ifp); } return nat; } /* * So if we didn't find it but there are wildcard members in the hash * table, go back and look for them. We do this search and update here * because it is modifying the NAT table and we want to do this only * for the first packet that matches. The exception, of course, is * for "dummy" (FI_IGNORE) lookups. */ find_in_wild_ports: if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) { NBUMPSIDEX(0, ns_lookup_miss, ns_lookup_miss_0); return NULL; } if (softn->ipf_nat_stats.ns_wilds == 0 || (fin->fin_flx & FI_NOWILD)) { NBUMPSIDEX(0, ns_lookup_nowild, ns_lookup_nowild_0); return NULL; } RWLOCK_EXIT(&softc->ipf_nat); hv = NAT_HASH_FN(dst, 0, 0xffffffff); hv = NAT_HASH_FN(src.s_addr, hv, softn->ipf_nat_table_sz); WRITE_ENTER(&softc->ipf_nat); nat = softn->ipf_nat_table[1][hv]; /* TRACE dst, src, hv, nat */ for (; nat; nat = nat->nat_hnext[1]) { if (nat->nat_ifps[0] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[0])) continue; } if (nat->nat_pr[0] != fin->fin_p) continue; switch (nat->nat_dir & (NAT_INBOUND|NAT_OUTBOUND)) { case NAT_INBOUND : if (nat->nat_v[0] != 4) continue; if (nat->nat_osrcaddr != src.s_addr || nat->nat_odstaddr != dst) continue; break; case NAT_OUTBOUND : if (nat->nat_v[1] != 4) continue; if (nat->nat_ndstaddr != src.s_addr || nat->nat_nsrcaddr != dst) continue; break; } nflags = nat->nat_flags; if (!(nflags & (NAT_TCPUDP|SI_WILDP))) continue; if (ipf_nat_wildok(nat, (int)sport, (int)dport, nflags, NAT_INBOUND) == 1) { if ((fin->fin_flx & FI_IGNORE) != 0) break; if ((nflags & SI_CLONE) != 0) { nat = ipf_nat_clone(fin, nat); if (nat == NULL) break; } else { MUTEX_ENTER(&softn->ipf_nat_new); softn->ipf_nat_stats.ns_wilds--; MUTEX_EXIT(&softn->ipf_nat_new); } if (nat->nat_dir == NAT_INBOUND) { if (nat->nat_osport == 0) { nat->nat_osport = sport; nat->nat_nsport = sport; } if (nat->nat_odport == 0) { nat->nat_odport = dport; nat->nat_ndport = dport; } } else if (nat->nat_dir == NAT_OUTBOUND) { if (nat->nat_osport == 0) { nat->nat_osport = dport; nat->nat_nsport = dport; } if (nat->nat_odport == 0) { nat->nat_odport = sport; nat->nat_ndport = sport; } } if ((nat->nat_ifps[0] == NULL) && (ifp != NULL)) { nat->nat_ifps[0] = ifp; nat->nat_mtu[0] = GETIFMTU_4(ifp); } nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); ipf_nat_tabmove(softn, nat); break; } } MUTEX_DOWNGRADE(&softc->ipf_nat); if (nat == NULL) { NBUMPSIDE(0, ns_lookup_miss); } return nat; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_tabmove */ /* Returns: Nil */ /* Parameters: softn(I) - pointer to NAT context structure */ /* nat(I) - pointer to NAT structure */ /* Write Lock: ipf_nat */ /* */ /* This function is only called for TCP/UDP NAT table entries where the */ /* original was placed in the table without hashing on the ports and we now */ /* want to include hashing on port numbers. */ /* ------------------------------------------------------------------------ */ static void ipf_nat_tabmove(softn, nat) ipf_nat_softc_t *softn; nat_t *nat; { u_int hv0, hv1, rhv0, rhv1; natstat_t *nsp; nat_t **natp; if (nat->nat_flags & SI_CLONE) return; nsp = &softn->ipf_nat_stats; /* * Remove the NAT entry from the old location */ if (nat->nat_hnext[0]) nat->nat_hnext[0]->nat_phnext[0] = nat->nat_phnext[0]; *nat->nat_phnext[0] = nat->nat_hnext[0]; nsp->ns_side[0].ns_bucketlen[nat->nat_hv[0] % softn->ipf_nat_table_sz]--; if (nat->nat_hnext[1]) nat->nat_hnext[1]->nat_phnext[1] = nat->nat_phnext[1]; *nat->nat_phnext[1] = nat->nat_hnext[1]; nsp->ns_side[1].ns_bucketlen[nat->nat_hv[1] % softn->ipf_nat_table_sz]--; /* * Add into the NAT table in the new position */ rhv0 = NAT_HASH_FN(nat->nat_osrcaddr, nat->nat_osport, 0xffffffff); rhv0 = NAT_HASH_FN(nat->nat_odstaddr, rhv0 + nat->nat_odport, 0xffffffff); rhv1 = NAT_HASH_FN(nat->nat_nsrcaddr, nat->nat_nsport, 0xffffffff); rhv1 = NAT_HASH_FN(nat->nat_ndstaddr, rhv1 + nat->nat_ndport, 0xffffffff); hv0 = rhv0 % softn->ipf_nat_table_sz; hv1 = rhv1 % softn->ipf_nat_table_sz; if (nat->nat_dir == NAT_INBOUND || nat->nat_dir == NAT_DIVERTIN) { u_int swap; swap = hv0; hv0 = hv1; hv1 = swap; } /* TRACE nat_osrcaddr, nat_osport, nat_odstaddr, nat_odport, hv0 */ /* TRACE nat_nsrcaddr, nat_nsport, nat_ndstaddr, nat_ndport, hv1 */ nat->nat_hv[0] = rhv0; natp = &softn->ipf_nat_table[0][hv0]; if (*natp) (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; nat->nat_phnext[0] = natp; nat->nat_hnext[0] = *natp; *natp = nat; nsp->ns_side[0].ns_bucketlen[hv0]++; nat->nat_hv[1] = rhv1; natp = &softn->ipf_nat_table[1][hv1]; if (*natp) (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; nat->nat_phnext[1] = natp; nat->nat_hnext[1] = *natp; *natp = nat; nsp->ns_side[1].ns_bucketlen[hv1]++; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_outlookup */ /* Returns: nat_t* - NULL == no match, */ /* else pointer to matching NAT entry */ /* Parameters: fin(I) - pointer to packet information */ /* flags(I) - NAT flags for this packet */ /* p(I) - protocol for this packet */ /* src(I) - source IP address */ /* dst(I) - destination IP address */ /* rw(I) - 1 == write lock on held, 0 == read lock. */ /* */ /* Lookup a nat entry based on the source 'real' ip address/port and */ /* destination address/port. We use this lookup when sending a packet out, */ /* we're looking for a table entry, based on the source address. */ /* */ /* NOTE: THE PACKET BEING CHECKED (IF FOUND) HAS A MAPPING ALREADY. */ /* */ /* NOTE: IT IS ASSUMED THAT IS ONLY HELD WITH A READ LOCK WHEN */ /* THIS FUNCTION IS CALLED WITH NAT_SEARCH SET IN nflags. */ /* */ /* flags -> relevant are IPN_UDP/IPN_TCP/IPN_ICMPQUERY that indicate if */ /* the packet is of said protocol */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_outlookup(fin, flags, p, src, dst) fr_info_t *fin; u_int flags, p; struct in_addr src , dst; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_short sport, dport; u_int sflags; ipnat_t *ipn; nat_t *nat; void *ifp; u_int hv; ifp = fin->fin_ifp; sflags = flags & IPN_TCPUDPICMP; sport = 0; dport = 0; switch (p) { case IPPROTO_TCP : case IPPROTO_UDP : sport = htons(fin->fin_data[0]); dport = htons(fin->fin_data[1]); break; case IPPROTO_ICMP : if (flags & IPN_ICMPERR) sport = fin->fin_data[1]; else dport = fin->fin_data[1]; break; default : break; } if ((flags & SI_WILDP) != 0) goto find_out_wild_ports; hv = NAT_HASH_FN(src.s_addr, sport, 0xffffffff); hv = NAT_HASH_FN(dst.s_addr, hv + dport, softn->ipf_nat_table_sz); nat = softn->ipf_nat_table[0][hv]; /* TRACE src, sport, dst, dport, hv, nat */ for (; nat; nat = nat->nat_hnext[0]) { if (nat->nat_ifps[1] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) continue; } if (nat->nat_pr[1] != p) continue; switch (nat->nat_dir) { case NAT_INBOUND : case NAT_DIVERTIN : if (nat->nat_v[1] != 4) continue; if (nat->nat_ndstaddr != src.s_addr || nat->nat_nsrcaddr != dst.s_addr) continue; if ((nat->nat_flags & IPN_TCPUDP) != 0) { if (nat->nat_ndport != sport) continue; if (nat->nat_nsport != dport) continue; } else if (p == IPPROTO_ICMP) { if (nat->nat_osport != dport) { continue; } } break; case NAT_OUTBOUND : case NAT_DIVERTOUT : if (nat->nat_v[0] != 4) continue; if (nat->nat_osrcaddr != src.s_addr || nat->nat_odstaddr != dst.s_addr) continue; if ((nat->nat_flags & IPN_TCPUDP) != 0) { if (nat->nat_odport != dport) continue; if (nat->nat_osport != sport) continue; } else if (p == IPPROTO_ICMP) { if (nat->nat_osport != dport) { continue; } } break; } ipn = nat->nat_ptr; if ((ipn != NULL) && (nat->nat_aps != NULL)) if (ipf_proxy_match(fin, nat) != 0) continue; if ((nat->nat_ifps[1] == NULL) && (ifp != NULL)) { nat->nat_ifps[1] = ifp; nat->nat_mtu[1] = GETIFMTU_4(ifp); } return nat; } /* * So if we didn't find it but there are wildcard members in the hash * table, go back and look for them. We do this search and update here * because it is modifying the NAT table and we want to do this only * for the first packet that matches. The exception, of course, is * for "dummy" (FI_IGNORE) lookups. */ find_out_wild_ports: if (!(flags & NAT_TCPUDP) || !(flags & NAT_SEARCH)) { NBUMPSIDEX(1, ns_lookup_miss, ns_lookup_miss_1); return NULL; } if (softn->ipf_nat_stats.ns_wilds == 0 || (fin->fin_flx & FI_NOWILD)) { NBUMPSIDEX(1, ns_lookup_nowild, ns_lookup_nowild_1); return NULL; } RWLOCK_EXIT(&softc->ipf_nat); hv = NAT_HASH_FN(src.s_addr, 0, 0xffffffff); hv = NAT_HASH_FN(dst.s_addr, hv, softn->ipf_nat_table_sz); WRITE_ENTER(&softc->ipf_nat); nat = softn->ipf_nat_table[0][hv]; for (; nat; nat = nat->nat_hnext[0]) { if (nat->nat_ifps[1] != NULL) { if ((ifp != NULL) && (ifp != nat->nat_ifps[1])) continue; } if (nat->nat_pr[1] != fin->fin_p) continue; switch (nat->nat_dir & (NAT_INBOUND|NAT_OUTBOUND)) { case NAT_INBOUND : if (nat->nat_v[1] != 4) continue; if (nat->nat_ndstaddr != src.s_addr || nat->nat_nsrcaddr != dst.s_addr) continue; break; case NAT_OUTBOUND : if (nat->nat_v[0] != 4) continue; if (nat->nat_osrcaddr != src.s_addr || nat->nat_odstaddr != dst.s_addr) continue; break; } if (!(nat->nat_flags & (NAT_TCPUDP|SI_WILDP))) continue; if (ipf_nat_wildok(nat, (int)sport, (int)dport, nat->nat_flags, NAT_OUTBOUND) == 1) { if ((fin->fin_flx & FI_IGNORE) != 0) break; if ((nat->nat_flags & SI_CLONE) != 0) { nat = ipf_nat_clone(fin, nat); if (nat == NULL) break; } else { MUTEX_ENTER(&softn->ipf_nat_new); softn->ipf_nat_stats.ns_wilds--; MUTEX_EXIT(&softn->ipf_nat_new); } if (nat->nat_dir == NAT_OUTBOUND) { if (nat->nat_osport == 0) { nat->nat_osport = sport; nat->nat_nsport = sport; } if (nat->nat_odport == 0) { nat->nat_odport = dport; nat->nat_ndport = dport; } } else if (nat->nat_dir == NAT_INBOUND) { if (nat->nat_osport == 0) { nat->nat_osport = dport; nat->nat_nsport = dport; } if (nat->nat_odport == 0) { nat->nat_odport = sport; nat->nat_ndport = sport; } } if ((nat->nat_ifps[1] == NULL) && (ifp != NULL)) { nat->nat_ifps[1] = ifp; nat->nat_mtu[1] = GETIFMTU_4(ifp); } nat->nat_flags &= ~(SI_W_DPORT|SI_W_SPORT); ipf_nat_tabmove(softn, nat); break; } } MUTEX_DOWNGRADE(&softc->ipf_nat); if (nat == NULL) { NBUMPSIDE(1, ns_lookup_miss); } return nat; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_lookupredir */ /* Returns: nat_t* - NULL == no match, */ /* else pointer to matching NAT entry */ /* Parameters: np(I) - pointer to description of packet to find NAT table */ /* entry for. */ /* */ /* Lookup the NAT tables to search for a matching redirect */ /* The contents of natlookup_t should imitate those found in a packet that */ /* would be translated - ie a packet coming in for RDR or going out for MAP.*/ /* We can do the lookup in one of two ways, imitating an inbound or */ /* outbound packet. By default we assume outbound, unless IPN_IN is set. */ /* For IN, the fields are set as follows: */ /* nl_real* = source information */ /* nl_out* = destination information (translated) */ /* For an out packet, the fields are set like this: */ /* nl_in* = source information (untranslated) */ /* nl_out* = destination information (translated) */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_lookupredir(np) natlookup_t *np; { fr_info_t fi; nat_t *nat; bzero((char *)&fi, sizeof(fi)); if (np->nl_flags & IPN_IN) { fi.fin_data[0] = ntohs(np->nl_realport); fi.fin_data[1] = ntohs(np->nl_outport); } else { fi.fin_data[0] = ntohs(np->nl_inport); fi.fin_data[1] = ntohs(np->nl_outport); } if (np->nl_flags & IPN_TCP) fi.fin_p = IPPROTO_TCP; else if (np->nl_flags & IPN_UDP) fi.fin_p = IPPROTO_UDP; else if (np->nl_flags & (IPN_ICMPERR|IPN_ICMPQUERY)) fi.fin_p = IPPROTO_ICMP; /* * We can do two sorts of lookups: * - IPN_IN: we have the `real' and `out' address, look for `in'. * - default: we have the `in' and `out' address, look for `real'. */ if (np->nl_flags & IPN_IN) { if ((nat = ipf_nat_inlookup(&fi, np->nl_flags, fi.fin_p, np->nl_realip, np->nl_outip))) { np->nl_inip = nat->nat_odstip; np->nl_inport = nat->nat_odport; } } else { /* * If nl_inip is non null, this is a lookup based on the real * ip address. Else, we use the fake. */ if ((nat = ipf_nat_outlookup(&fi, np->nl_flags, fi.fin_p, np->nl_inip, np->nl_outip))) { if ((np->nl_flags & IPN_FINDFORWARD) != 0) { fr_info_t fin; bzero((char *)&fin, sizeof(fin)); fin.fin_p = nat->nat_pr[0]; fin.fin_data[0] = ntohs(nat->nat_ndport); fin.fin_data[1] = ntohs(nat->nat_nsport); if (ipf_nat_inlookup(&fin, np->nl_flags, fin.fin_p, nat->nat_ndstip, nat->nat_nsrcip) != NULL) { np->nl_flags &= ~IPN_FINDFORWARD; } } np->nl_realip = nat->nat_ndstip; np->nl_realport = nat->nat_ndport; } } return nat; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_match */ /* Returns: int - 0 == no match, 1 == match */ /* Parameters: fin(I) - pointer to packet information */ /* np(I) - pointer to NAT rule */ /* */ /* Pull the matching of a packet against a NAT rule out of that complex */ /* loop inside ipf_nat_checkin() and lay it out properly in its own function. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_match(fin, np) fr_info_t *fin; ipnat_t *np; { ipf_main_softc_t *softc = fin->fin_main_soft; frtuc_t *ft; int match; match = 0; switch (np->in_osrcatype) { case FRI_NORMAL : match = ((fin->fin_saddr & np->in_osrcmsk) != np->in_osrcaddr); break; case FRI_LOOKUP : match = (*np->in_osrcfunc)(softc, np->in_osrcptr, 4, &fin->fin_saddr, fin->fin_plen); break; } match ^= ((np->in_flags & IPN_NOTSRC) != 0); if (match) return 0; match = 0; switch (np->in_odstatype) { case FRI_NORMAL : match = ((fin->fin_daddr & np->in_odstmsk) != np->in_odstaddr); break; case FRI_LOOKUP : match = (*np->in_odstfunc)(softc, np->in_odstptr, 4, &fin->fin_daddr, fin->fin_plen); break; } match ^= ((np->in_flags & IPN_NOTDST) != 0); if (match) return 0; ft = &np->in_tuc; if (!(fin->fin_flx & FI_TCPUDP) || (fin->fin_flx & (FI_SHORT|FI_FRAGBODY))) { if (ft->ftu_scmp || ft->ftu_dcmp) return 0; return 1; } return ipf_tcpudpchk(&fin->fin_fi, ft); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_update */ /* Returns: Nil */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT structure */ /* */ /* Updates the lifetime of a NAT table entry for non-TCP packets. Must be */ /* called with fin_rev updated - i.e. after calling ipf_nat_proto(). */ /* */ /* This *MUST* be called after ipf_nat_proto() as it expects fin_rev to */ /* already be set. */ /* ------------------------------------------------------------------------ */ void ipf_nat_update(fin, nat) fr_info_t *fin; nat_t *nat; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; ipftq_t *ifq, *ifq2; ipftqent_t *tqe; ipnat_t *np = nat->nat_ptr; tqe = &nat->nat_tqe; ifq = tqe->tqe_ifq; /* * We allow over-riding of NAT timeouts from NAT rules, even for * TCP, however, if it is TCP and there is no rule timeout set, * then do not update the timeout here. */ if (np != NULL) { np->in_bytes[fin->fin_rev] += fin->fin_plen; ifq2 = np->in_tqehead[fin->fin_rev]; } else { ifq2 = NULL; } if (nat->nat_pr[0] == IPPROTO_TCP && ifq2 == NULL) { (void) ipf_tcp_age(&nat->nat_tqe, fin, softn->ipf_nat_tcptq, 0, 2); } else { if (ifq2 == NULL) { if (nat->nat_pr[0] == IPPROTO_UDP) ifq2 = fin->fin_rev ? &softn->ipf_nat_udpacktq : &softn->ipf_nat_udptq; else if (nat->nat_pr[0] == IPPROTO_ICMP || nat->nat_pr[0] == IPPROTO_ICMPV6) ifq2 = fin->fin_rev ? &softn->ipf_nat_icmpacktq: &softn->ipf_nat_icmptq; else ifq2 = &softn->ipf_nat_iptq; } ipf_movequeue(softc->ipf_ticks, tqe, ifq, ifq2); } } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_checkout */ /* Returns: int - -1 == packet failed NAT checks so block it, */ /* 0 == no packet translation occurred, */ /* 1 == packet was successfully translated. */ /* Parameters: fin(I) - pointer to packet information */ /* passp(I) - pointer to filtering result flags */ /* */ /* Check to see if an outcoming packet should be changed. ICMP packets are */ /* first checked to see if they match an existing entry (if an error), */ /* otherwise a search of the current NAT table is made. If neither results */ /* in a match then a search for a matching NAT rule is made. Create a new */ /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ /* packet header(s) as required. */ /* ------------------------------------------------------------------------ */ int ipf_nat_checkout(fin, passp) fr_info_t *fin; u_32_t *passp; { ipnat_t *np = NULL, *npnext; struct ifnet *ifp, *sifp; ipf_main_softc_t *softc; ipf_nat_softc_t *softn; icmphdr_t *icmp = NULL; tcphdr_t *tcp = NULL; int rval, natfailed; u_int nflags = 0; u_32_t ipa, iph; int natadd = 1; frentry_t *fr; nat_t *nat; if (fin->fin_v == 6) { #ifdef USE_INET6 return ipf_nat6_checkout(fin, passp); #else return 0; #endif } softc = fin->fin_main_soft; softn = softc->ipf_nat_soft; if (softn->ipf_nat_lock != 0) return 0; if (softn->ipf_nat_stats.ns_rules == 0 && softn->ipf_nat_instances == NULL) return 0; natfailed = 0; fr = fin->fin_fr; sifp = fin->fin_ifp; if (fr != NULL) { ifp = fr->fr_tifs[fin->fin_rev].fd_ptr; if ((ifp != NULL) && (ifp != (void *)-1)) fin->fin_ifp = ifp; } ifp = fin->fin_ifp; if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { switch (fin->fin_p) { case IPPROTO_TCP : nflags = IPN_TCP; break; case IPPROTO_UDP : nflags = IPN_UDP; break; case IPPROTO_ICMP : icmp = fin->fin_dp; /* * This is an incoming packet, so the destination is * the icmp_id and the source port equals 0 */ if ((fin->fin_flx & FI_ICMPQUERY) != 0) nflags = IPN_ICMPQUERY; break; default : break; } if ((nflags & IPN_TCPUDP)) tcp = fin->fin_dp; } ipa = fin->fin_saddr; READ_ENTER(&softc->ipf_nat); if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && (nat = ipf_nat_icmperror(fin, &nflags, NAT_OUTBOUND))) /*EMPTY*/; else if ((fin->fin_flx & FI_FRAG) && (nat = ipf_frag_natknown(fin))) natadd = 0; else if ((nat = ipf_nat_outlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, fin->fin_src, fin->fin_dst))) { nflags = nat->nat_flags; } else if (fin->fin_off == 0) { u_32_t hv, msk, nmsk = 0; /* * If there is no current entry in the nat table for this IP#, * create one for it (if there is a matching rule). */ maskloop: msk = softn->ipf_nat_map_active_masks[nmsk]; iph = ipa & msk; hv = NAT_HASH_FN(iph, 0, softn->ipf_nat_maprules_sz); retry_roundrobin: for (np = softn->ipf_nat_map_rules[hv]; np; np = npnext) { npnext = np->in_mnext; if ((np->in_ifps[1] && (np->in_ifps[1] != ifp))) continue; if (np->in_v[0] != 4) continue; if (np->in_pr[1] && (np->in_pr[1] != fin->fin_p)) continue; if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) continue; if (np->in_flags & IPN_FILTER) { switch (ipf_nat_match(fin, np)) { case 0 : continue; case -1 : rval = -1; goto outmatchfail; case 1 : default : break; } } else if ((ipa & np->in_osrcmsk) != np->in_osrcaddr) continue; if ((fr != NULL) && !ipf_matchtag(&np->in_tag, &fr->fr_nattag)) continue; if (np->in_plabel != -1) { if (((np->in_flags & IPN_FILTER) == 0) && (np->in_odport != fin->fin_data[1])) continue; if (ipf_proxy_ok(fin, tcp, np) == 0) continue; } if (np->in_flags & IPN_NO) { np->in_hits++; break; } MUTEX_ENTER(&softn->ipf_nat_new); /* * If we've matched a round-robin rule but it has * moved in the list since we got it, start over as * this is now no longer correct. */ if (npnext != np->in_mnext) { if ((np->in_flags & IPN_ROUNDR) != 0) { MUTEX_EXIT(&softn->ipf_nat_new); goto retry_roundrobin; } npnext = np->in_mnext; } nat = ipf_nat_add(fin, np, NULL, nflags, NAT_OUTBOUND); MUTEX_EXIT(&softn->ipf_nat_new); if (nat != NULL) { natfailed = 0; break; } natfailed = -1; } if ((np == NULL) && (nmsk < softn->ipf_nat_map_max)) { nmsk++; goto maskloop; } } if (nat != NULL) { rval = ipf_nat_out(fin, nat, natadd, nflags); if (rval == 1) { MUTEX_ENTER(&nat->nat_lock); ipf_nat_update(fin, nat); nat->nat_bytes[1] += fin->fin_plen; nat->nat_pkts[1]++; fin->fin_pktnum = nat->nat_pkts[1]; MUTEX_EXIT(&nat->nat_lock); } } else rval = natfailed; outmatchfail: RWLOCK_EXIT(&softc->ipf_nat); switch (rval) { case -1 : if (passp != NULL) { DT1(frb_natv4out, fr_info_t *, fin); NBUMPSIDED(1, ns_drop); *passp = FR_BLOCK; fin->fin_reason = FRB_NATV4; } fin->fin_flx |= FI_BADNAT; NBUMPSIDED(1, ns_badnat); break; case 0 : NBUMPSIDE(1, ns_ignored); break; case 1 : NBUMPSIDE(1, ns_translated); break; } fin->fin_ifp = sifp; return rval; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_out */ /* Returns: int - -1 == packet failed NAT checks so block it, */ /* 1 == packet was successfully translated. */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT structure */ /* natadd(I) - flag indicating if it is safe to add frag cache */ /* nflags(I) - NAT flags set for this packet */ /* */ /* Translate a packet coming "out" on an interface. */ /* ------------------------------------------------------------------------ */ int ipf_nat_out(fin, nat, natadd, nflags) fr_info_t *fin; nat_t *nat; int natadd; u_32_t nflags; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; icmphdr_t *icmp; tcphdr_t *tcp; ipnat_t *np; int skip; int i; tcp = NULL; icmp = NULL; np = nat->nat_ptr; if ((natadd != 0) && (fin->fin_flx & FI_FRAG) && (np != NULL)) (void) ipf_frag_natnew(softc, fin, 0, nat); /* * Fix up checksums, not by recalculating them, but * simply computing adjustments. * This is only done for STREAMS based IP implementations where the * checksum has already been calculated by IP. In all other cases, * IPFilter is called before the checksum needs calculating so there * is no call to modify whatever is in the header now. */ if (nflags == IPN_ICMPERR) { u_32_t s1, s2, sumd, msumd; s1 = LONG_SUM(ntohl(fin->fin_saddr)); if (nat->nat_dir == NAT_OUTBOUND) { s2 = LONG_SUM(ntohl(nat->nat_nsrcaddr)); } else { s2 = LONG_SUM(ntohl(nat->nat_odstaddr)); } CALC_SUMD(s1, s2, sumd); msumd = sumd; s1 = LONG_SUM(ntohl(fin->fin_daddr)); if (nat->nat_dir == NAT_OUTBOUND) { s2 = LONG_SUM(ntohl(nat->nat_ndstaddr)); } else { s2 = LONG_SUM(ntohl(nat->nat_osrcaddr)); } CALC_SUMD(s1, s2, sumd); msumd += sumd; ipf_fix_outcksum(0, &fin->fin_ip->ip_sum, msumd, 0); } #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(linux) || defined(BRIDGE_IPF) || defined(__FreeBSD__) else { /* * Strictly speaking, this isn't necessary on BSD * kernels because they do checksum calculation after * this code has run BUT if ipfilter is being used * to do NAT as a bridge, that code doesn't exist. */ switch (nat->nat_dir) { case NAT_OUTBOUND : ipf_fix_outcksum(fin->fin_cksum & FI_CK_L4PART, &fin->fin_ip->ip_sum, nat->nat_ipsumd, 0); break; case NAT_INBOUND : ipf_fix_incksum(fin->fin_cksum & FI_CK_L4PART, &fin->fin_ip->ip_sum, nat->nat_ipsumd, 0); break; default : break; } } #endif /* * Address assignment is after the checksum modification because * we are using the address in the packet for determining the * correct checksum offset (the ICMP error could be coming from * anyone...) */ switch (nat->nat_dir) { case NAT_OUTBOUND : fin->fin_ip->ip_src = nat->nat_nsrcip; fin->fin_saddr = nat->nat_nsrcaddr; fin->fin_ip->ip_dst = nat->nat_ndstip; fin->fin_daddr = nat->nat_ndstaddr; break; case NAT_INBOUND : fin->fin_ip->ip_src = nat->nat_odstip; fin->fin_saddr = nat->nat_ndstaddr; fin->fin_ip->ip_dst = nat->nat_osrcip; fin->fin_daddr = nat->nat_nsrcaddr; break; case NAT_DIVERTIN : { mb_t *m; skip = ipf_nat_decap(fin, nat); if (skip <= 0) { NBUMPSIDED(1, ns_decap_fail); return -1; } m = fin->fin_m; #if defined(MENTAT) && defined(_KERNEL) m->b_rptr += skip; #else m->m_data += skip; m->m_len -= skip; # ifdef M_PKTHDR if (m->m_flags & M_PKTHDR) m->m_pkthdr.len -= skip; # endif #endif MUTEX_ENTER(&nat->nat_lock); ipf_nat_update(fin, nat); MUTEX_EXIT(&nat->nat_lock); fin->fin_flx |= FI_NATED; if (np != NULL && np->in_tag.ipt_num[0] != 0) fin->fin_nattag = &np->in_tag; return 1; /* NOTREACHED */ } case NAT_DIVERTOUT : { u_32_t s1, s2, sumd; udphdr_t *uh; ip_t *ip; mb_t *m; m = M_DUP(np->in_divmp); if (m == NULL) { NBUMPSIDED(1, ns_divert_dup); return -1; } ip = MTOD(m, ip_t *); ip_fillid(ip); s2 = ntohs(ip->ip_id); s1 = ip->ip_len; ip->ip_len = ntohs(ip->ip_len); ip->ip_len += fin->fin_plen; ip->ip_len = htons(ip->ip_len); s2 += ntohs(ip->ip_len); CALC_SUMD(s1, s2, sumd); uh = (udphdr_t *)(ip + 1); uh->uh_ulen += fin->fin_plen; uh->uh_ulen = htons(uh->uh_ulen); #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(linux) || defined(BRIDGE_IPF) || defined(__FreeBSD__) ipf_fix_outcksum(0, &ip->ip_sum, sumd, 0); #endif PREP_MB_T(fin, m); fin->fin_src = ip->ip_src; fin->fin_dst = ip->ip_dst; fin->fin_ip = ip; fin->fin_plen += sizeof(ip_t) + 8; /* UDP + IPv4 hdr */ fin->fin_dlen += sizeof(ip_t) + 8; /* UDP + IPv4 hdr */ nflags &= ~IPN_TCPUDPICMP; break; } default : break; } if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { u_short *csump; if ((nat->nat_nsport != 0) && (nflags & IPN_TCPUDP)) { tcp = fin->fin_dp; switch (nat->nat_dir) { case NAT_OUTBOUND : tcp->th_sport = nat->nat_nsport; fin->fin_data[0] = ntohs(nat->nat_nsport); tcp->th_dport = nat->nat_ndport; fin->fin_data[1] = ntohs(nat->nat_ndport); break; case NAT_INBOUND : tcp->th_sport = nat->nat_odport; fin->fin_data[0] = ntohs(nat->nat_odport); tcp->th_dport = nat->nat_osport; fin->fin_data[1] = ntohs(nat->nat_osport); break; } } if ((nat->nat_nsport != 0) && (nflags & IPN_ICMPQUERY)) { icmp = fin->fin_dp; icmp->icmp_id = nat->nat_nicmpid; } csump = ipf_nat_proto(fin, nat, nflags); /* * The above comments do not hold for layer 4 (or higher) * checksums... */ if (csump != NULL) { if (nat->nat_dir == NAT_OUTBOUND) ipf_fix_outcksum(fin->fin_cksum, csump, nat->nat_sumd[0], nat->nat_sumd[1] + fin->fin_dlen); else ipf_fix_incksum(fin->fin_cksum, csump, nat->nat_sumd[0], nat->nat_sumd[1] + fin->fin_dlen); } } ipf_sync_update(softc, SMC_NAT, fin, nat->nat_sync); /* ------------------------------------------------------------- */ /* A few quick notes: */ /* Following are test conditions prior to calling the */ /* ipf_proxy_check routine. */ /* */ /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ /* with a redirect rule, we attempt to match the packet's */ /* source port against in_dport, otherwise we'd compare the */ /* packet's destination. */ /* ------------------------------------------------------------- */ if ((np != NULL) && (np->in_apr != NULL)) { i = ipf_proxy_check(fin, nat); if (i == 0) { i = 1; } else if (i == -1) { NBUMPSIDED(1, ns_ipf_proxy_fail); } } else { i = 1; } fin->fin_flx |= FI_NATED; return i; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_checkin */ /* Returns: int - -1 == packet failed NAT checks so block it, */ /* 0 == no packet translation occurred, */ /* 1 == packet was successfully translated. */ /* Parameters: fin(I) - pointer to packet information */ /* passp(I) - pointer to filtering result flags */ /* */ /* Check to see if an incoming packet should be changed. ICMP packets are */ /* first checked to see if they match an existing entry (if an error), */ /* otherwise a search of the current NAT table is made. If neither results */ /* in a match then a search for a matching NAT rule is made. Create a new */ /* NAT entry if a we matched a NAT rule. Lastly, actually change the */ /* packet header(s) as required. */ /* ------------------------------------------------------------------------ */ int ipf_nat_checkin(fin, passp) fr_info_t *fin; u_32_t *passp; { ipf_main_softc_t *softc; ipf_nat_softc_t *softn; u_int nflags, natadd; ipnat_t *np, *npnext; int rval, natfailed; struct ifnet *ifp; struct in_addr in; icmphdr_t *icmp; tcphdr_t *tcp; u_short dport; nat_t *nat; u_32_t iph; softc = fin->fin_main_soft; softn = softc->ipf_nat_soft; if (softn->ipf_nat_lock != 0) return 0; if (softn->ipf_nat_stats.ns_rules == 0 && softn->ipf_nat_instances == NULL) return 0; tcp = NULL; icmp = NULL; dport = 0; natadd = 1; nflags = 0; natfailed = 0; ifp = fin->fin_ifp; if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { switch (fin->fin_p) { case IPPROTO_TCP : nflags = IPN_TCP; break; case IPPROTO_UDP : nflags = IPN_UDP; break; case IPPROTO_ICMP : icmp = fin->fin_dp; /* * This is an incoming packet, so the destination is * the icmp_id and the source port equals 0 */ if ((fin->fin_flx & FI_ICMPQUERY) != 0) { nflags = IPN_ICMPQUERY; dport = icmp->icmp_id; } break; default : break; } if ((nflags & IPN_TCPUDP)) { tcp = fin->fin_dp; dport = fin->fin_data[1]; } } in = fin->fin_dst; READ_ENTER(&softc->ipf_nat); if ((fin->fin_p == IPPROTO_ICMP) && !(nflags & IPN_ICMPQUERY) && (nat = ipf_nat_icmperror(fin, &nflags, NAT_INBOUND))) /*EMPTY*/; else if ((fin->fin_flx & FI_FRAG) && (nat = ipf_frag_natknown(fin))) natadd = 0; else if ((nat = ipf_nat_inlookup(fin, nflags|NAT_SEARCH, (u_int)fin->fin_p, fin->fin_src, in))) { nflags = nat->nat_flags; } else if (fin->fin_off == 0) { u_32_t hv, msk, rmsk = 0; /* * If there is no current entry in the nat table for this IP#, * create one for it (if there is a matching rule). */ maskloop: msk = softn->ipf_nat_rdr_active_masks[rmsk]; iph = in.s_addr & msk; hv = NAT_HASH_FN(iph, 0, softn->ipf_nat_rdrrules_sz); retry_roundrobin: /* TRACE (iph,msk,rmsk,hv,softn->ipf_nat_rdrrules_sz) */ for (np = softn->ipf_nat_rdr_rules[hv]; np; np = npnext) { npnext = np->in_rnext; if (np->in_ifps[0] && (np->in_ifps[0] != ifp)) continue; if (np->in_v[0] != 4) continue; if (np->in_pr[0] && (np->in_pr[0] != fin->fin_p)) continue; if ((np->in_flags & IPN_RF) && !(np->in_flags & nflags)) continue; if (np->in_flags & IPN_FILTER) { switch (ipf_nat_match(fin, np)) { case 0 : continue; case -1 : rval = -1; goto inmatchfail; case 1 : default : break; } } else { if ((in.s_addr & np->in_odstmsk) != np->in_odstaddr) continue; if (np->in_odport && ((np->in_dtop < dport) || (dport < np->in_odport))) continue; } if (np->in_plabel != -1) { if (!ipf_proxy_ok(fin, tcp, np)) { continue; } } if (np->in_flags & IPN_NO) { np->in_hits++; break; } MUTEX_ENTER(&softn->ipf_nat_new); /* * If we've matched a round-robin rule but it has * moved in the list since we got it, start over as * this is now no longer correct. */ if (npnext != np->in_rnext) { if ((np->in_flags & IPN_ROUNDR) != 0) { MUTEX_EXIT(&softn->ipf_nat_new); goto retry_roundrobin; } npnext = np->in_rnext; } nat = ipf_nat_add(fin, np, NULL, nflags, NAT_INBOUND); MUTEX_EXIT(&softn->ipf_nat_new); if (nat != NULL) { natfailed = 0; break; } natfailed = -1; } if ((np == NULL) && (rmsk < softn->ipf_nat_rdr_max)) { rmsk++; goto maskloop; } } if (nat != NULL) { rval = ipf_nat_in(fin, nat, natadd, nflags); if (rval == 1) { MUTEX_ENTER(&nat->nat_lock); ipf_nat_update(fin, nat); nat->nat_bytes[0] += fin->fin_plen; nat->nat_pkts[0]++; fin->fin_pktnum = nat->nat_pkts[0]; MUTEX_EXIT(&nat->nat_lock); } } else rval = natfailed; inmatchfail: RWLOCK_EXIT(&softc->ipf_nat); switch (rval) { case -1 : if (passp != NULL) { DT1(frb_natv4in, fr_info_t *, fin); NBUMPSIDED(0, ns_drop); *passp = FR_BLOCK; fin->fin_reason = FRB_NATV4; } fin->fin_flx |= FI_BADNAT; NBUMPSIDED(0, ns_badnat); break; case 0 : NBUMPSIDE(0, ns_ignored); break; case 1 : NBUMPSIDE(0, ns_translated); break; } return rval; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_in */ /* Returns: int - -1 == packet failed NAT checks so block it, */ /* 1 == packet was successfully translated. */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT structure */ /* natadd(I) - flag indicating if it is safe to add frag cache */ /* nflags(I) - NAT flags set for this packet */ /* Locks Held: ipf_nat(READ) */ /* */ /* Translate a packet coming "in" on an interface. */ /* ------------------------------------------------------------------------ */ int ipf_nat_in(fin, nat, natadd, nflags) fr_info_t *fin; nat_t *nat; int natadd; u_32_t nflags; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t sumd, ipsumd, sum1, sum2; icmphdr_t *icmp; tcphdr_t *tcp; ipnat_t *np; int skip; int i; tcp = NULL; np = nat->nat_ptr; fin->fin_fr = nat->nat_fr; if (np != NULL) { if ((natadd != 0) && (fin->fin_flx & FI_FRAG)) (void) ipf_frag_natnew(softc, fin, 0, nat); /* ------------------------------------------------------------- */ /* A few quick notes: */ /* Following are test conditions prior to calling the */ /* ipf_proxy_check routine. */ /* */ /* A NULL tcp indicates a non TCP/UDP packet. When dealing */ /* with a map rule, we attempt to match the packet's */ /* source port against in_dport, otherwise we'd compare the */ /* packet's destination. */ /* ------------------------------------------------------------- */ if (np->in_apr != NULL) { i = ipf_proxy_check(fin, nat); if (i == -1) { NBUMPSIDED(0, ns_ipf_proxy_fail); return -1; } } } ipf_sync_update(softc, SMC_NAT, fin, nat->nat_sync); ipsumd = nat->nat_ipsumd; /* * Fix up checksums, not by recalculating them, but * simply computing adjustments. * Why only do this for some platforms on inbound packets ? * Because for those that it is done, IP processing is yet to happen * and so the IPv4 header checksum has not yet been evaluated. * Perhaps it should always be done for the benefit of things like * fast forwarding (so that it doesn't need to be recomputed) but with * header checksum offloading, perhaps it is a moot point. */ switch (nat->nat_dir) { case NAT_INBOUND : if ((fin->fin_flx & FI_ICMPERR) == 0) { fin->fin_ip->ip_src = nat->nat_nsrcip; fin->fin_saddr = nat->nat_nsrcaddr; } else { sum1 = nat->nat_osrcaddr; sum2 = nat->nat_nsrcaddr; CALC_SUMD(sum1, sum2, sumd); ipsumd -= sumd; } fin->fin_ip->ip_dst = nat->nat_ndstip; fin->fin_daddr = nat->nat_ndstaddr; #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(__osf__) || defined(linux) ipf_fix_outcksum(0, &fin->fin_ip->ip_sum, ipsumd, 0); #endif break; case NAT_OUTBOUND : if ((fin->fin_flx & FI_ICMPERR) == 0) { fin->fin_ip->ip_src = nat->nat_odstip; fin->fin_saddr = nat->nat_odstaddr; } else { sum1 = nat->nat_odstaddr; sum2 = nat->nat_ndstaddr; CALC_SUMD(sum1, sum2, sumd); ipsumd -= sumd; } fin->fin_ip->ip_dst = nat->nat_osrcip; fin->fin_daddr = nat->nat_osrcaddr; #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(__osf__) || defined(linux) ipf_fix_incksum(0, &fin->fin_ip->ip_sum, ipsumd, 0); #endif break; case NAT_DIVERTIN : { udphdr_t *uh; ip_t *ip; mb_t *m; m = M_DUP(np->in_divmp); if (m == NULL) { NBUMPSIDED(0, ns_divert_dup); return -1; } ip = MTOD(m, ip_t *); ip_fillid(ip); sum1 = ntohs(ip->ip_len); ip->ip_len = ntohs(ip->ip_len); ip->ip_len += fin->fin_plen; ip->ip_len = htons(ip->ip_len); uh = (udphdr_t *)(ip + 1); uh->uh_ulen += fin->fin_plen; uh->uh_ulen = htons(uh->uh_ulen); sum2 = ntohs(ip->ip_id) + ntohs(ip->ip_len); sum2 += ntohs(ip->ip_off) & IP_DF; CALC_SUMD(sum1, sum2, sumd); #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(__osf__) || defined(linux) ipf_fix_outcksum(0, &ip->ip_sum, sumd, 0); #endif PREP_MB_T(fin, m); fin->fin_ip = ip; fin->fin_plen += sizeof(ip_t) + 8; /* UDP + new IPv4 hdr */ fin->fin_dlen += sizeof(ip_t) + 8; /* UDP + old IPv4 hdr */ nflags &= ~IPN_TCPUDPICMP; break; } case NAT_DIVERTOUT : { mb_t *m; skip = ipf_nat_decap(fin, nat); if (skip <= 0) { NBUMPSIDED(0, ns_decap_fail); return -1; } m = fin->fin_m; #if defined(MENTAT) && defined(_KERNEL) m->b_rptr += skip; #else m->m_data += skip; m->m_len -= skip; # ifdef M_PKTHDR if (m->m_flags & M_PKTHDR) m->m_pkthdr.len -= skip; # endif #endif ipf_nat_update(fin, nat); nflags &= ~IPN_TCPUDPICMP; fin->fin_flx |= FI_NATED; if (np != NULL && np->in_tag.ipt_num[0] != 0) fin->fin_nattag = &np->in_tag; return 1; /* NOTREACHED */ } } if (nflags & IPN_TCPUDP) tcp = fin->fin_dp; if (!(fin->fin_flx & FI_SHORT) && (fin->fin_off == 0)) { u_short *csump; if ((nat->nat_odport != 0) && (nflags & IPN_TCPUDP)) { switch (nat->nat_dir) { case NAT_INBOUND : tcp->th_sport = nat->nat_nsport; fin->fin_data[0] = ntohs(nat->nat_nsport); tcp->th_dport = nat->nat_ndport; fin->fin_data[1] = ntohs(nat->nat_ndport); break; case NAT_OUTBOUND : tcp->th_sport = nat->nat_odport; fin->fin_data[0] = ntohs(nat->nat_odport); tcp->th_dport = nat->nat_osport; fin->fin_data[1] = ntohs(nat->nat_osport); break; } } if ((nat->nat_odport != 0) && (nflags & IPN_ICMPQUERY)) { icmp = fin->fin_dp; icmp->icmp_id = nat->nat_nicmpid; } csump = ipf_nat_proto(fin, nat, nflags); /* * The above comments do not hold for layer 4 (or higher) * checksums... */ if (csump != NULL) { if (nat->nat_dir == NAT_OUTBOUND) ipf_fix_incksum(0, csump, nat->nat_sumd[0], 0); else ipf_fix_outcksum(0, csump, nat->nat_sumd[0], 0); } } fin->fin_flx |= FI_NATED; if (np != NULL && np->in_tag.ipt_num[0] != 0) fin->fin_nattag = &np->in_tag; return 1; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_proto */ /* Returns: u_short* - pointer to transport header checksum to update, */ /* NULL if the transport protocol is not recognised */ /* as needing a checksum update. */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT structure */ /* nflags(I) - NAT flags set for this packet */ /* */ /* Return the pointer to the checksum field for each protocol so understood.*/ /* If support for making other changes to a protocol header is required, */ /* that is not strictly 'address' translation, such as clamping the MSS in */ /* TCP down to a specific value, then do it from here. */ /* ------------------------------------------------------------------------ */ u_short * ipf_nat_proto(fin, nat, nflags) fr_info_t *fin; nat_t *nat; u_int nflags; { icmphdr_t *icmp; u_short *csump; tcphdr_t *tcp; udphdr_t *udp; csump = NULL; if (fin->fin_out == 0) { fin->fin_rev = (nat->nat_dir & NAT_OUTBOUND); } else { fin->fin_rev = ((nat->nat_dir & NAT_OUTBOUND) == 0); } switch (fin->fin_p) { case IPPROTO_TCP : tcp = fin->fin_dp; if ((nflags & IPN_TCP) != 0) csump = &tcp->th_sum; /* * Do a MSS CLAMPING on a SYN packet, * only deal IPv4 for now. */ if ((nat->nat_mssclamp != 0) && (tcp->th_flags & TH_SYN) != 0) ipf_nat_mssclamp(tcp, nat->nat_mssclamp, fin, csump); break; case IPPROTO_UDP : udp = fin->fin_dp; if ((nflags & IPN_UDP) != 0) { if (udp->uh_sum != 0) csump = &udp->uh_sum; } break; case IPPROTO_ICMP : icmp = fin->fin_dp; if ((nflags & IPN_ICMPQUERY) != 0) { if (icmp->icmp_cksum != 0) csump = &icmp->icmp_cksum; } break; #ifdef USE_INET6 case IPPROTO_ICMPV6 : { struct icmp6_hdr *icmp6 = (struct icmp6_hdr *)fin->fin_dp; icmp6 = fin->fin_dp; if ((nflags & IPN_ICMPQUERY) != 0) { if (icmp6->icmp6_cksum != 0) csump = &icmp6->icmp6_cksum; } break; } #endif } return csump; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_expire */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* Check all of the timeout queues for entries at the top which need to be */ /* expired. */ /* ------------------------------------------------------------------------ */ void ipf_nat_expire(softc) ipf_main_softc_t *softc; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; ipftq_t *ifq, *ifqnext; ipftqent_t *tqe, *tqn; int i; SPL_INT(s); SPL_NET(s); WRITE_ENTER(&softc->ipf_nat); for (ifq = softn->ipf_nat_tcptq, i = 0; ifq != NULL; ifq = ifq->ifq_next) { for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { if (tqe->tqe_die > softc->ipf_ticks) break; tqn = tqe->tqe_next; ipf_nat_delete(softc, tqe->tqe_parent, NL_EXPIRE); } } for (ifq = softn->ipf_nat_utqe; ifq != NULL; ifq = ifq->ifq_next) { for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); i++) { if (tqe->tqe_die > softc->ipf_ticks) break; tqn = tqe->tqe_next; ipf_nat_delete(softc, tqe->tqe_parent, NL_EXPIRE); } } for (ifq = softn->ipf_nat_utqe; ifq != NULL; ifq = ifqnext) { ifqnext = ifq->ifq_next; if (((ifq->ifq_flags & IFQF_DELETE) != 0) && (ifq->ifq_ref == 0)) { ipf_freetimeoutqueue(softc, ifq); } } if (softn->ipf_nat_doflush != 0) { ipf_nat_extraflush(softc, softn, 2); softn->ipf_nat_doflush = 0; } RWLOCK_EXIT(&softc->ipf_nat); SPL_X(s); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_sync */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* ifp(I) - pointer to network interface */ /* */ /* Walk through all of the currently active NAT sessions, looking for those */ /* which need to have their translated address updated. */ /* ------------------------------------------------------------------------ */ void ipf_nat_sync(softc, ifp) ipf_main_softc_t *softc; void *ifp; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t sum1, sum2, sumd; i6addr_t in; ipnat_t *n; nat_t *nat; void *ifp2; int idx; SPL_INT(s); if (softc->ipf_running <= 0) return; /* * Change IP addresses for NAT sessions for any protocol except TCP * since it will break the TCP connection anyway. The only rules * which will get changed are those which are "map ... -> 0/32", * where the rule specifies the address is taken from the interface. */ SPL_NET(s); WRITE_ENTER(&softc->ipf_nat); if (softc->ipf_running <= 0) { RWLOCK_EXIT(&softc->ipf_nat); return; } for (nat = softn->ipf_nat_instances; nat; nat = nat->nat_next) { if ((nat->nat_flags & IPN_TCP) != 0) continue; n = nat->nat_ptr; if (n != NULL) { if (n->in_v[1] == 4) { if (n->in_redir & NAT_MAP) { if ((n->in_nsrcaddr != 0) || (n->in_nsrcmsk != 0xffffffff)) continue; } else if (n->in_redir & NAT_REDIRECT) { if ((n->in_ndstaddr != 0) || (n->in_ndstmsk != 0xffffffff)) continue; } } #ifdef USE_INET6 if (n->in_v[1] == 4) { if (n->in_redir & NAT_MAP) { if (!IP6_ISZERO(&n->in_nsrcaddr) || !IP6_ISONES(&n->in_nsrcmsk)) continue; } else if (n->in_redir & NAT_REDIRECT) { if (!IP6_ISZERO(&n->in_ndstaddr) || !IP6_ISONES(&n->in_ndstmsk)) continue; } } #endif } if (((ifp == NULL) || (ifp == nat->nat_ifps[0]) || (ifp == nat->nat_ifps[1]))) { nat->nat_ifps[0] = GETIFP(nat->nat_ifnames[0], nat->nat_v[0]); if ((nat->nat_ifps[0] != NULL) && (nat->nat_ifps[0] != (void *)-1)) { nat->nat_mtu[0] = GETIFMTU_4(nat->nat_ifps[0]); } if (nat->nat_ifnames[1][0] != '\0') { nat->nat_ifps[1] = GETIFP(nat->nat_ifnames[1], nat->nat_v[1]); } else { nat->nat_ifps[1] = nat->nat_ifps[0]; } if ((nat->nat_ifps[1] != NULL) && (nat->nat_ifps[1] != (void *)-1)) { nat->nat_mtu[1] = GETIFMTU_4(nat->nat_ifps[1]); } ifp2 = nat->nat_ifps[0]; if (ifp2 == NULL) continue; /* * Change the map-to address to be the same as the * new one. */ sum1 = NATFSUM(nat, nat->nat_v[1], nat_nsrc6); if (ipf_ifpaddr(softc, nat->nat_v[0], FRI_NORMAL, ifp2, &in, NULL) != -1) { if (nat->nat_v[0] == 4) nat->nat_nsrcip = in.in4; } sum2 = NATFSUM(nat, nat->nat_v[1], nat_nsrc6); if (sum1 == sum2) continue; /* * Readjust the checksum adjustment to take into * account the new IP#. */ CALC_SUMD(sum1, sum2, sumd); /* XXX - dont change for TCP when solaris does * hardware checksumming. */ sumd += nat->nat_sumd[0]; nat->nat_sumd[0] = (sumd & 0xffff) + (sumd >> 16); nat->nat_sumd[1] = nat->nat_sumd[0]; } } for (n = softn->ipf_nat_list; (n != NULL); n = n->in_next) { char *base = n->in_names; if ((ifp == NULL) || (n->in_ifps[0] == ifp)) n->in_ifps[0] = ipf_resolvenic(softc, base + n->in_ifnames[0], n->in_v[0]); if ((ifp == NULL) || (n->in_ifps[1] == ifp)) n->in_ifps[1] = ipf_resolvenic(softc, base + n->in_ifnames[1], n->in_v[1]); if (n->in_redir & NAT_REDIRECT) idx = 1; else idx = 0; if (((ifp == NULL) || (n->in_ifps[idx] == ifp)) && (n->in_ifps[idx] != NULL && n->in_ifps[idx] != (void *)-1)) { ipf_nat_nextaddrinit(softc, n->in_names, &n->in_osrc, 0, n->in_ifps[idx]); ipf_nat_nextaddrinit(softc, n->in_names, &n->in_odst, 0, n->in_ifps[idx]); ipf_nat_nextaddrinit(softc, n->in_names, &n->in_nsrc, 0, n->in_ifps[idx]); ipf_nat_nextaddrinit(softc, n->in_names, &n->in_ndst, 0, n->in_ifps[idx]); } } RWLOCK_EXIT(&softc->ipf_nat); SPL_X(s); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_icmpquerytype */ /* Returns: int - 1 == success, 0 == failure */ /* Parameters: icmptype(I) - ICMP type number */ /* */ /* Tests to see if the ICMP type number passed is a query/response type or */ /* not. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_icmpquerytype(icmptype) int icmptype; { /* * For the ICMP query NAT code, it is essential that both the query * and the reply match on the NAT rule. Because the NAT structure * does not keep track of the icmptype, and a single NAT structure * is used for all icmp types with the same src, dest and id, we * simply define the replies as queries as well. The funny thing is, * altough it seems silly to call a reply a query, this is exactly * as it is defined in the IPv4 specification */ switch (icmptype) { case ICMP_ECHOREPLY: case ICMP_ECHO: /* route aedvertisement/solliciation is currently unsupported: */ /* it would require rewriting the ICMP data section */ case ICMP_TSTAMP: case ICMP_TSTAMPREPLY: case ICMP_IREQ: case ICMP_IREQREPLY: case ICMP_MASKREQ: case ICMP_MASKREPLY: return 1; default: return 0; } } /* ------------------------------------------------------------------------ */ /* Function: nat_log */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* nat(I) - pointer to NAT structure */ /* action(I) - action related to NAT structure being performed */ /* */ /* Creates a NAT log entry. */ /* ------------------------------------------------------------------------ */ void ipf_nat_log(softc, softn, nat, action) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; struct nat *nat; u_int action; { #ifdef IPFILTER_LOG # ifndef LARGE_NAT struct ipnat *np; int rulen; # endif struct natlog natl; void *items[1]; size_t sizes[1]; int types[1]; bcopy((char *)&nat->nat_osrc6, (char *)&natl.nl_osrcip, sizeof(natl.nl_osrcip)); bcopy((char *)&nat->nat_nsrc6, (char *)&natl.nl_nsrcip, sizeof(natl.nl_nsrcip)); bcopy((char *)&nat->nat_odst6, (char *)&natl.nl_odstip, sizeof(natl.nl_odstip)); bcopy((char *)&nat->nat_ndst6, (char *)&natl.nl_ndstip, sizeof(natl.nl_ndstip)); natl.nl_bytes[0] = nat->nat_bytes[0]; natl.nl_bytes[1] = nat->nat_bytes[1]; natl.nl_pkts[0] = nat->nat_pkts[0]; natl.nl_pkts[1] = nat->nat_pkts[1]; natl.nl_odstport = nat->nat_odport; natl.nl_osrcport = nat->nat_osport; natl.nl_nsrcport = nat->nat_nsport; natl.nl_ndstport = nat->nat_ndport; natl.nl_p[0] = nat->nat_pr[0]; natl.nl_p[1] = nat->nat_pr[1]; natl.nl_v[0] = nat->nat_v[0]; natl.nl_v[1] = nat->nat_v[1]; natl.nl_type = nat->nat_redir; natl.nl_action = action; natl.nl_rule = -1; bcopy(nat->nat_ifnames[0], natl.nl_ifnames[0], sizeof(nat->nat_ifnames[0])); bcopy(nat->nat_ifnames[1], natl.nl_ifnames[1], sizeof(nat->nat_ifnames[1])); # ifndef LARGE_NAT if (nat->nat_ptr != NULL) { for (rulen = 0, np = softn->ipf_nat_list; np != NULL; np = np->in_next, rulen++) if (np == nat->nat_ptr) { natl.nl_rule = rulen; break; } } # endif items[0] = &natl; sizes[0] = sizeof(natl); types[0] = 0; (void) ipf_log_items(softc, IPL_LOGNAT, NULL, items, sizes, types, 1); #endif } #if defined(__OpenBSD__) /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_ifdetach */ /* Returns: Nil */ /* Parameters: ifp(I) - pointer to network interface */ /* */ /* Compatibility interface for OpenBSD to trigger the correct updating of */ /* interface references within IPFilter. */ /* ------------------------------------------------------------------------ */ void ipf_nat_ifdetach(ifp) void *ifp; { ipf_main_softc_t *softc; softc = ipf_get_softc(0); ipf_sync(ifp); return; } #endif /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_rule_deref */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* inp(I) - pointer to pointer to NAT rule */ /* Write Locks: ipf_nat */ /* */ /* Dropping the refernce count for a rule means that whatever held the */ /* pointer to this rule (*inp) is no longer interested in it and when the */ /* reference count drops to zero, any resources allocated for the rule can */ /* be released and the rule itself free'd. */ /* ------------------------------------------------------------------------ */ void ipf_nat_rule_deref(softc, inp) ipf_main_softc_t *softc; ipnat_t **inp; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; ipnat_t *n; n = *inp; *inp = NULL; n->in_use--; if (n->in_use > 0) return; if (n->in_apr != NULL) ipf_proxy_deref(n->in_apr); ipf_nat_rule_fini(softc, n); if (n->in_redir & NAT_REDIRECT) { if ((n->in_flags & IPN_PROXYRULE) == 0) { ATOMIC_DEC32(softn->ipf_nat_stats.ns_rules_rdr); } } if (n->in_redir & (NAT_MAP|NAT_MAPBLK)) { if ((n->in_flags & IPN_PROXYRULE) == 0) { ATOMIC_DEC32(softn->ipf_nat_stats.ns_rules_map); } } if (n->in_tqehead[0] != NULL) { if (ipf_deletetimeoutqueue(n->in_tqehead[0]) == 0) { ipf_freetimeoutqueue(softc, n->in_tqehead[1]); } } if (n->in_tqehead[1] != NULL) { if (ipf_deletetimeoutqueue(n->in_tqehead[1]) == 0) { ipf_freetimeoutqueue(softc, n->in_tqehead[1]); } } if ((n->in_flags & IPN_PROXYRULE) == 0) { ATOMIC_DEC32(softn->ipf_nat_stats.ns_rules); } MUTEX_DESTROY(&n->in_lock); KFREES(n, n->in_size); #if SOLARIS && !defined(INSTANCES) if (softn->ipf_nat_stats.ns_rules == 0) pfil_delayed_copy = 1; #endif } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_deref */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* natp(I) - pointer to pointer to NAT table entry */ /* */ /* Decrement the reference counter for this NAT table entry and free it if */ /* there are no more things using it. */ /* */ /* IF nat_ref == 1 when this function is called, then we have an orphan nat */ /* structure *because* it only gets called on paths _after_ nat_ref has been*/ /* incremented. If nat_ref == 1 then we shouldn't decrement it here */ /* because nat_delete() will do that and send nat_ref to -1. */ /* */ /* Holding the lock on nat_lock is required to serialise nat_delete() being */ /* called from a NAT flush ioctl with a deref happening because of a packet.*/ /* ------------------------------------------------------------------------ */ void ipf_nat_deref(softc, natp) ipf_main_softc_t *softc; nat_t **natp; { nat_t *nat; nat = *natp; *natp = NULL; MUTEX_ENTER(&nat->nat_lock); if (nat->nat_ref > 1) { nat->nat_ref--; ASSERT(nat->nat_ref >= 0); MUTEX_EXIT(&nat->nat_lock); return; } MUTEX_EXIT(&nat->nat_lock); WRITE_ENTER(&softc->ipf_nat); ipf_nat_delete(softc, nat, NL_EXPIRE); RWLOCK_EXIT(&softc->ipf_nat); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_clone */ /* Returns: ipstate_t* - NULL == cloning failed, */ /* else pointer to new state structure */ /* Parameters: fin(I) - pointer to packet information */ /* is(I) - pointer to master state structure */ /* Write Lock: ipf_nat */ /* */ /* Create a "duplcate" state table entry from the master. */ /* ------------------------------------------------------------------------ */ nat_t * ipf_nat_clone(fin, nat) fr_info_t *fin; nat_t *nat; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; frentry_t *fr; nat_t *clone; ipnat_t *np; KMALLOC(clone, nat_t *); if (clone == NULL) { NBUMPSIDED(fin->fin_out, ns_clone_nomem); return NULL; } bcopy((char *)nat, (char *)clone, sizeof(*clone)); MUTEX_NUKE(&clone->nat_lock); clone->nat_rev = fin->fin_rev; clone->nat_aps = NULL; /* * Initialize all these so that ipf_nat_delete() doesn't cause a crash. */ clone->nat_tqe.tqe_pnext = NULL; clone->nat_tqe.tqe_next = NULL; clone->nat_tqe.tqe_ifq = NULL; clone->nat_tqe.tqe_parent = clone; clone->nat_flags &= ~SI_CLONE; clone->nat_flags |= SI_CLONED; if (clone->nat_hm) clone->nat_hm->hm_ref++; if (ipf_nat_insert(softc, softn, clone) == -1) { KFREE(clone); NBUMPSIDED(fin->fin_out, ns_insert_fail); return NULL; } np = clone->nat_ptr; if (np != NULL) { if (softn->ipf_nat_logging) ipf_nat_log(softc, softn, clone, NL_CLONE); np->in_use++; } fr = clone->nat_fr; if (fr != NULL) { MUTEX_ENTER(&fr->fr_lock); fr->fr_ref++; MUTEX_EXIT(&fr->fr_lock); } /* * Because the clone is created outside the normal loop of things and * TCP has special needs in terms of state, initialise the timeout * state of the new NAT from here. */ if (clone->nat_pr[0] == IPPROTO_TCP) { (void) ipf_tcp_age(&clone->nat_tqe, fin, softn->ipf_nat_tcptq, clone->nat_flags, 2); } clone->nat_sync = ipf_sync_new(softc, SMC_NAT, fin, clone); if (softn->ipf_nat_logging) ipf_nat_log(softc, softn, clone, NL_CLONE); return clone; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_wildok */ /* Returns: int - 1 == packet's ports match wildcards */ /* 0 == packet's ports don't match wildcards */ /* Parameters: nat(I) - NAT entry */ /* sport(I) - source port */ /* dport(I) - destination port */ /* flags(I) - wildcard flags */ /* dir(I) - packet direction */ /* */ /* Use NAT entry and packet direction to determine which combination of */ /* wildcard flags should be used. */ /* ------------------------------------------------------------------------ */ int ipf_nat_wildok(nat, sport, dport, flags, dir) nat_t *nat; int sport, dport, flags, dir; { /* * When called by dir is set to * nat_inlookup NAT_INBOUND (0) * nat_outlookup NAT_OUTBOUND (1) * * We simply combine the packet's direction in dir with the original * "intended" direction of that NAT entry in nat->nat_dir to decide * which combination of wildcard flags to allow. */ switch ((dir << 1) | (nat->nat_dir & (NAT_INBOUND|NAT_OUTBOUND))) { case 3: /* outbound packet / outbound entry */ if (((nat->nat_osport == sport) || (flags & SI_W_SPORT)) && ((nat->nat_odport == dport) || (flags & SI_W_DPORT))) return 1; break; case 2: /* outbound packet / inbound entry */ if (((nat->nat_osport == dport) || (flags & SI_W_SPORT)) && ((nat->nat_odport == sport) || (flags & SI_W_DPORT))) return 1; break; case 1: /* inbound packet / outbound entry */ if (((nat->nat_osport == dport) || (flags & SI_W_SPORT)) && ((nat->nat_odport == sport) || (flags & SI_W_DPORT))) return 1; break; case 0: /* inbound packet / inbound entry */ if (((nat->nat_osport == sport) || (flags & SI_W_SPORT)) && ((nat->nat_odport == dport) || (flags & SI_W_DPORT))) return 1; break; default: break; } return(0); } /* ------------------------------------------------------------------------ */ /* Function: nat_mssclamp */ /* Returns: Nil */ /* Parameters: tcp(I) - pointer to TCP header */ /* maxmss(I) - value to clamp the TCP MSS to */ /* fin(I) - pointer to packet information */ /* csump(I) - pointer to TCP checksum */ /* */ /* Check for MSS option and clamp it if necessary. If found and changed, */ /* then the TCP header checksum will be updated to reflect the change in */ /* the MSS. */ /* ------------------------------------------------------------------------ */ static void ipf_nat_mssclamp(tcp, maxmss, fin, csump) tcphdr_t *tcp; u_32_t maxmss; fr_info_t *fin; u_short *csump; { u_char *cp, *ep, opt; int hlen, advance; u_32_t mss, sumd; hlen = TCP_OFF(tcp) << 2; if (hlen > sizeof(*tcp)) { cp = (u_char *)tcp + sizeof(*tcp); ep = (u_char *)tcp + hlen; while (cp < ep) { opt = cp[0]; if (opt == TCPOPT_EOL) break; else if (opt == TCPOPT_NOP) { cp++; continue; } if (cp + 1 >= ep) break; advance = cp[1]; if ((cp + advance > ep) || (advance <= 0)) break; switch (opt) { case TCPOPT_MAXSEG: if (advance != 4) break; mss = cp[2] * 256 + cp[3]; if (mss > maxmss) { cp[2] = maxmss / 256; cp[3] = maxmss & 0xff; CALC_SUMD(mss, maxmss, sumd); ipf_fix_outcksum(0, csump, sumd, 0); } break; default: /* ignore unknown options */ break; } cp += advance; } } } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_setqueue */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* nat(I)- pointer to NAT structure */ /* Locks: ipf_nat (read or write) */ /* */ /* Put the NAT entry on its default queue entry, using rev as a helped in */ /* determining which queue it should be placed on. */ /* ------------------------------------------------------------------------ */ void ipf_nat_setqueue(softc, softn, nat) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; nat_t *nat; { ipftq_t *oifq, *nifq; int rev = nat->nat_rev; if (nat->nat_ptr != NULL) nifq = nat->nat_ptr->in_tqehead[rev]; else nifq = NULL; if (nifq == NULL) { switch (nat->nat_pr[0]) { case IPPROTO_UDP : nifq = &softn->ipf_nat_udptq; break; case IPPROTO_ICMP : nifq = &softn->ipf_nat_icmptq; break; case IPPROTO_TCP : nifq = softn->ipf_nat_tcptq + nat->nat_tqe.tqe_state[rev]; break; default : nifq = &softn->ipf_nat_iptq; break; } } oifq = nat->nat_tqe.tqe_ifq; /* * If it's currently on a timeout queue, move it from one queue to * another, else put it on the end of the newly determined queue. */ if (oifq != NULL) ipf_movequeue(softc->ipf_ticks, &nat->nat_tqe, oifq, nifq); else ipf_queueappend(softc->ipf_ticks, &nat->nat_tqe, nifq, nat); return; } /* ------------------------------------------------------------------------ */ /* Function: nat_getnext */ /* Returns: int - 0 == ok, else error */ /* Parameters: softc(I) - pointer to soft context main structure */ /* t(I) - pointer to ipftoken structure */ /* itp(I) - pointer to ipfgeniter_t structure */ /* */ /* Fetch the next nat/ipnat structure pointer from the linked list and */ /* copy it out to the storage space pointed to by itp_data. The next item */ /* in the list to look at is put back in the ipftoken struture. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_getnext(softc, t, itp, objp) ipf_main_softc_t *softc; ipftoken_t *t; ipfgeniter_t *itp; ipfobj_t *objp; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; hostmap_t *hm, *nexthm = NULL, zerohm; ipnat_t *ipn, *nextipnat = NULL, zeroipn; nat_t *nat, *nextnat = NULL, zeronat; int error = 0; void *nnext; if (itp->igi_nitems != 1) { IPFERROR(60075); return ENOSPC; } READ_ENTER(&softc->ipf_nat); switch (itp->igi_type) { case IPFGENITER_HOSTMAP : hm = t->ipt_data; if (hm == NULL) { nexthm = softn->ipf_hm_maplist; } else { nexthm = hm->hm_next; } if (nexthm != NULL) { ATOMIC_INC32(nexthm->hm_ref); t->ipt_data = nexthm; } else { bzero(&zerohm, sizeof(zerohm)); nexthm = &zerohm; t->ipt_data = NULL; } nnext = nexthm->hm_next; break; case IPFGENITER_IPNAT : ipn = t->ipt_data; if (ipn == NULL) { nextipnat = softn->ipf_nat_list; } else { nextipnat = ipn->in_next; } if (nextipnat != NULL) { ATOMIC_INC32(nextipnat->in_use); t->ipt_data = nextipnat; } else { bzero(&zeroipn, sizeof(zeroipn)); nextipnat = &zeroipn; t->ipt_data = NULL; } nnext = nextipnat->in_next; break; case IPFGENITER_NAT : nat = t->ipt_data; if (nat == NULL) { nextnat = softn->ipf_nat_instances; } else { nextnat = nat->nat_next; } if (nextnat != NULL) { MUTEX_ENTER(&nextnat->nat_lock); nextnat->nat_ref++; MUTEX_EXIT(&nextnat->nat_lock); t->ipt_data = nextnat; } else { bzero(&zeronat, sizeof(zeronat)); nextnat = &zeronat; t->ipt_data = NULL; } nnext = nextnat->nat_next; break; default : RWLOCK_EXIT(&softc->ipf_nat); IPFERROR(60055); return EINVAL; } RWLOCK_EXIT(&softc->ipf_nat); objp->ipfo_ptr = itp->igi_data; switch (itp->igi_type) { case IPFGENITER_HOSTMAP : error = COPYOUT(nexthm, objp->ipfo_ptr, sizeof(*nexthm)); if (error != 0) { IPFERROR(60049); error = EFAULT; } if (hm != NULL) { WRITE_ENTER(&softc->ipf_nat); ipf_nat_hostmapdel(softc, &hm); RWLOCK_EXIT(&softc->ipf_nat); } break; case IPFGENITER_IPNAT : objp->ipfo_size = nextipnat->in_size; objp->ipfo_type = IPFOBJ_IPNAT; error = ipf_outobjk(softc, objp, nextipnat); if (ipn != NULL) { WRITE_ENTER(&softc->ipf_nat); ipf_nat_rule_deref(softc, &ipn); RWLOCK_EXIT(&softc->ipf_nat); } break; case IPFGENITER_NAT : objp->ipfo_size = sizeof(nat_t); objp->ipfo_type = IPFOBJ_NAT; error = ipf_outobjk(softc, objp, nextnat); if (nat != NULL) ipf_nat_deref(softc, &nat); break; } if (nnext == NULL) ipf_token_mark_complete(t); return error; } /* ------------------------------------------------------------------------ */ /* Function: nat_extraflush */ /* Returns: int - 0 == success, -1 == failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* which(I) - how to flush the active NAT table */ /* Write Locks: ipf_nat */ /* */ /* Flush nat tables. Three actions currently defined: */ /* which == 0 : flush all nat table entries */ /* which == 1 : flush TCP connections which have started to close but are */ /* stuck for some reason. */ /* which == 2 : flush TCP connections which have been idle for a long time, */ /* starting at > 4 days idle and working back in successive half-*/ /* days to at most 12 hours old. If this fails to free enough */ /* slots then work backwards in half hour slots to 30 minutes. */ /* If that too fails, then work backwards in 30 second intervals */ /* for the last 30 minutes to at worst 30 seconds idle. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_extraflush(softc, softn, which) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; int which; { nat_t *nat, **natp; ipftqent_t *tqn; ipftq_t *ifq; int removed; SPL_INT(s); removed = 0; SPL_NET(s); switch (which) { case 0 : softn->ipf_nat_stats.ns_flush_all++; /* * Style 0 flush removes everything... */ for (natp = &softn->ipf_nat_instances; ((nat = *natp) != NULL); ) { ipf_nat_delete(softc, nat, NL_FLUSH); removed++; } break; case 1 : softn->ipf_nat_stats.ns_flush_closing++; /* * Since we're only interested in things that are closing, * we can start with the appropriate timeout queue. */ for (ifq = softn->ipf_nat_tcptq + IPF_TCPS_CLOSE_WAIT; ifq != NULL; ifq = ifq->ifq_next) { for (tqn = ifq->ifq_head; tqn != NULL; ) { nat = tqn->tqe_parent; tqn = tqn->tqe_next; if (nat->nat_pr[0] != IPPROTO_TCP || nat->nat_pr[1] != IPPROTO_TCP) break; ipf_nat_delete(softc, nat, NL_EXPIRE); removed++; } } /* * Also need to look through the user defined queues. */ for (ifq = softn->ipf_nat_utqe; ifq != NULL; ifq = ifq->ifq_next) { for (tqn = ifq->ifq_head; tqn != NULL; ) { nat = tqn->tqe_parent; tqn = tqn->tqe_next; if (nat->nat_pr[0] != IPPROTO_TCP || nat->nat_pr[1] != IPPROTO_TCP) continue; if ((nat->nat_tcpstate[0] > IPF_TCPS_ESTABLISHED) && (nat->nat_tcpstate[1] > IPF_TCPS_ESTABLISHED)) { ipf_nat_delete(softc, nat, NL_EXPIRE); removed++; } } } break; /* * Args 5-11 correspond to flushing those particular states * for TCP connections. */ case IPF_TCPS_CLOSE_WAIT : case IPF_TCPS_FIN_WAIT_1 : case IPF_TCPS_CLOSING : case IPF_TCPS_LAST_ACK : case IPF_TCPS_FIN_WAIT_2 : case IPF_TCPS_TIME_WAIT : case IPF_TCPS_CLOSED : softn->ipf_nat_stats.ns_flush_state++; tqn = softn->ipf_nat_tcptq[which].ifq_head; while (tqn != NULL) { nat = tqn->tqe_parent; tqn = tqn->tqe_next; ipf_nat_delete(softc, nat, NL_FLUSH); removed++; } break; default : if (which < 30) break; softn->ipf_nat_stats.ns_flush_timeout++; /* * Take a large arbitrary number to mean the number of seconds * for which which consider to be the maximum value we'll allow * the expiration to be. */ which = IPF_TTLVAL(which); for (natp = &softn->ipf_nat_instances; ((nat = *natp) != NULL); ) { if (softc->ipf_ticks - nat->nat_touched > which) { ipf_nat_delete(softc, nat, NL_FLUSH); removed++; } else natp = &nat->nat_next; } break; } if (which != 2) { SPL_X(s); return removed; } softn->ipf_nat_stats.ns_flush_queue++; /* * Asked to remove inactive entries because the table is full, try * again, 3 times, if first attempt failed with a different criteria * each time. The order tried in must be in decreasing age. * Another alternative is to implement random drop and drop N entries * at random until N have been freed up. */ if (softc->ipf_ticks - softn->ipf_nat_last_force_flush > IPF_TTLVAL(5)) { softn->ipf_nat_last_force_flush = softc->ipf_ticks; removed = ipf_queueflush(softc, ipf_nat_flush_entry, softn->ipf_nat_tcptq, softn->ipf_nat_utqe, &softn->ipf_nat_stats.ns_active, softn->ipf_nat_table_sz, softn->ipf_nat_table_wm_low); } SPL_X(s); return removed; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_flush_entry */ /* Returns: 0 - always succeeds */ /* Parameters: softc(I) - pointer to soft context main structure */ /* entry(I) - pointer to NAT entry */ /* Write Locks: ipf_nat */ /* */ /* This function is a stepping stone between ipf_queueflush() and */ /* nat_dlete(). It is used so we can provide a uniform interface via the */ /* ipf_queueflush() function. Since the nat_delete() function returns void */ /* we translate that to mean it always succeeds in deleting something. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_flush_entry(softc, entry) ipf_main_softc_t *softc; void *entry; { ipf_nat_delete(softc, entry, NL_FLUSH); return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_iterator */ /* Returns: int - 0 == ok, else error */ /* Parameters: softc(I) - pointer to soft context main structure */ /* token(I) - pointer to ipftoken structure */ /* itp(I) - pointer to ipfgeniter_t structure */ /* obj(I) - pointer to data description structure */ /* */ /* This function acts as a handler for the SIOCGENITER ioctls that use a */ /* generic structure to iterate through a list. There are three different */ /* linked lists of NAT related information to go through: NAT rules, active */ /* NAT mappings and the NAT fragment cache. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_iterator(softc, token, itp, obj) ipf_main_softc_t *softc; ipftoken_t *token; ipfgeniter_t *itp; ipfobj_t *obj; { int error; if (itp->igi_data == NULL) { IPFERROR(60052); return EFAULT; } switch (itp->igi_type) { case IPFGENITER_HOSTMAP : case IPFGENITER_IPNAT : case IPFGENITER_NAT : error = ipf_nat_getnext(softc, token, itp, obj); break; case IPFGENITER_NATFRAG : error = ipf_frag_nat_next(softc, token, itp); break; default : IPFERROR(60053); error = EINVAL; break; } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_setpending */ /* Returns: Nil */ /* Parameters: softc(I) - pointer to soft context main structure */ /* nat(I) - pointer to NAT structure */ /* Locks: ipf_nat (read or write) */ /* */ /* Put the NAT entry on to the pending queue - this queue has a very short */ /* lifetime where items are put that can't be deleted straight away because */ /* of locking issues but we want to delete them ASAP, anyway. In calling */ /* this function, it is assumed that the owner (if there is one, as shown */ /* by nat_me) is no longer interested in it. */ /* ------------------------------------------------------------------------ */ void ipf_nat_setpending(softc, nat) ipf_main_softc_t *softc; nat_t *nat; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; ipftq_t *oifq; oifq = nat->nat_tqe.tqe_ifq; if (oifq != NULL) ipf_movequeue(softc->ipf_ticks, &nat->nat_tqe, oifq, &softn->ipf_nat_pending); else ipf_queueappend(softc->ipf_ticks, &nat->nat_tqe, &softn->ipf_nat_pending, nat); if (nat->nat_me != NULL) { *nat->nat_me = NULL; nat->nat_me = NULL; nat->nat_ref--; ASSERT(nat->nat_ref >= 0); } } /* ------------------------------------------------------------------------ */ /* Function: nat_newrewrite */ /* Returns: int - -1 == error, 0 == success (no move), 1 == success and */ /* allow rule to be moved if IPN_ROUNDR is set. */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT entry */ /* ni(I) - pointer to structure with misc. information needed */ /* to create new NAT entry. */ /* Write Lock: ipf_nat */ /* */ /* This function is responsible for setting up an active NAT session where */ /* we are changing both the source and destination parameters at the same */ /* time. The loop in here works differently to elsewhere - each iteration */ /* is responsible for changing a single parameter that can be incremented. */ /* So one pass may increase the source IP#, next source port, next dest. IP#*/ /* and the last destination port for a total of 4 iterations to try each. */ /* This is done to try and exhaustively use the translation space available.*/ /* ------------------------------------------------------------------------ */ static int ipf_nat_newrewrite(fin, nat, nai) fr_info_t *fin; nat_t *nat; natinfo_t *nai; { int src_search = 1; int dst_search = 1; fr_info_t frnat; u_32_t flags; u_short swap; ipnat_t *np; nat_t *natl; int l = 0; int changed; natl = NULL; changed = -1; np = nai->nai_np; flags = nat->nat_flags; bcopy((char *)fin, (char *)&frnat, sizeof(*fin)); nat->nat_hm = NULL; do { changed = -1; /* TRACE (l, src_search, dst_search, np) */ if ((src_search == 0) && (np->in_spnext == 0) && (dst_search == 0) && (np->in_dpnext == 0)) { if (l > 0) return -1; } /* * Find a new source address */ if (ipf_nat_nextaddr(fin, &np->in_nsrc, &frnat.fin_saddr, &frnat.fin_saddr) == -1) { return -1; } if ((np->in_nsrcaddr == 0) && (np->in_nsrcmsk == 0xffffffff)) { src_search = 0; if (np->in_stepnext == 0) np->in_stepnext = 1; } else if ((np->in_nsrcaddr == 0) && (np->in_nsrcmsk == 0)) { src_search = 0; if (np->in_stepnext == 0) np->in_stepnext = 1; } else if (np->in_nsrcmsk == 0xffffffff) { src_search = 0; if (np->in_stepnext == 0) np->in_stepnext = 1; } else if (np->in_nsrcmsk != 0xffffffff) { if (np->in_stepnext == 0 && changed == -1) { np->in_snip++; np->in_stepnext++; changed = 0; } } if ((flags & IPN_TCPUDPICMP) != 0) { if (np->in_spnext != 0) frnat.fin_data[0] = np->in_spnext; /* * Standard port translation. Select next port. */ if ((flags & IPN_FIXEDSPORT) != 0) { np->in_stepnext = 2; } else if ((np->in_stepnext == 1) && (changed == -1) && (natl != NULL)) { np->in_spnext++; np->in_stepnext++; changed = 1; if (np->in_spnext > np->in_spmax) np->in_spnext = np->in_spmin; } } else { np->in_stepnext = 2; } np->in_stepnext &= 0x3; /* * Find a new destination address */ /* TRACE (fin, np, l, frnat) */ if (ipf_nat_nextaddr(fin, &np->in_ndst, &frnat.fin_daddr, &frnat.fin_daddr) == -1) return -1; if ((np->in_ndstaddr == 0) && (np->in_ndstmsk == 0xffffffff)) { dst_search = 0; if (np->in_stepnext == 2) np->in_stepnext = 3; } else if ((np->in_ndstaddr == 0) && (np->in_ndstmsk == 0)) { dst_search = 0; if (np->in_stepnext == 2) np->in_stepnext = 3; } else if (np->in_ndstmsk == 0xffffffff) { dst_search = 0; if (np->in_stepnext == 2) np->in_stepnext = 3; } else if (np->in_ndstmsk != 0xffffffff) { if ((np->in_stepnext == 2) && (changed == -1) && (natl != NULL)) { changed = 2; np->in_stepnext++; np->in_dnip++; } } if ((flags & IPN_TCPUDPICMP) != 0) { if (np->in_dpnext != 0) frnat.fin_data[1] = np->in_dpnext; /* * Standard port translation. Select next port. */ if ((flags & IPN_FIXEDDPORT) != 0) { np->in_stepnext = 0; } else if (np->in_stepnext == 3 && changed == -1) { np->in_dpnext++; np->in_stepnext++; changed = 3; if (np->in_dpnext > np->in_dpmax) np->in_dpnext = np->in_dpmin; } } else { if (np->in_stepnext == 3) np->in_stepnext = 0; } /* TRACE (frnat) */ /* * Here we do a lookup of the connection as seen from * the outside. If an IP# pair already exists, try * again. So if you have A->B becomes C->B, you can * also have D->E become C->E but not D->B causing * another C->B. Also take protocol and ports into * account when determining whether a pre-existing * NAT setup will cause an external conflict where * this is appropriate. * * fin_data[] is swapped around because we are doing a * lookup of the packet is if it were moving in the opposite * direction of the one we are working with now. */ if (flags & IPN_TCPUDP) { swap = frnat.fin_data[0]; frnat.fin_data[0] = frnat.fin_data[1]; frnat.fin_data[1] = swap; } if (fin->fin_out == 1) { natl = ipf_nat_inlookup(&frnat, flags & ~(SI_WILDP|NAT_SEARCH), (u_int)frnat.fin_p, frnat.fin_dst, frnat.fin_src); } else { natl = ipf_nat_outlookup(&frnat, flags & ~(SI_WILDP|NAT_SEARCH), (u_int)frnat.fin_p, frnat.fin_dst, frnat.fin_src); } if (flags & IPN_TCPUDP) { swap = frnat.fin_data[0]; frnat.fin_data[0] = frnat.fin_data[1]; frnat.fin_data[1] = swap; } /* TRACE natl, in_stepnext, l */ if ((natl != NULL) && (l > 8)) /* XXX 8 is arbitrary */ return -1; np->in_stepnext &= 0x3; l++; changed = -1; } while (natl != NULL); nat->nat_osrcip = fin->fin_src; nat->nat_odstip = fin->fin_dst; nat->nat_nsrcip = frnat.fin_src; nat->nat_ndstip = frnat.fin_dst; if ((flags & IPN_TCPUDP) != 0) { nat->nat_osport = htons(fin->fin_data[0]); nat->nat_odport = htons(fin->fin_data[1]); nat->nat_nsport = htons(frnat.fin_data[0]); nat->nat_ndport = htons(frnat.fin_data[1]); } else if ((flags & IPN_ICMPQUERY) != 0) { nat->nat_oicmpid = fin->fin_data[1]; nat->nat_nicmpid = frnat.fin_data[1]; } return 0; } /* ------------------------------------------------------------------------ */ /* Function: nat_newdivert */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT entry */ /* ni(I) - pointer to structure with misc. information needed */ /* to create new NAT entry. */ /* Write Lock: ipf_nat */ /* */ /* Create a new NAT divert session as defined by the NAT rule. This is */ /* somewhat different to other NAT session creation routines because we */ /* do not iterate through either port numbers or IP addresses, searching */ /* for a unique mapping, however, a complimentary duplicate check is made. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_newdivert(fin, nat, nai) fr_info_t *fin; nat_t *nat; natinfo_t *nai; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; fr_info_t frnat; ipnat_t *np; nat_t *natl; int p; np = nai->nai_np; bcopy((char *)fin, (char *)&frnat, sizeof(*fin)); nat->nat_pr[0] = 0; nat->nat_osrcaddr = fin->fin_saddr; nat->nat_odstaddr = fin->fin_daddr; frnat.fin_saddr = htonl(np->in_snip); frnat.fin_daddr = htonl(np->in_dnip); if ((nat->nat_flags & IPN_TCPUDP) != 0) { nat->nat_osport = htons(fin->fin_data[0]); nat->nat_odport = htons(fin->fin_data[1]); } else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) { nat->nat_oicmpid = fin->fin_data[1]; } if (np->in_redir & NAT_DIVERTUDP) { frnat.fin_data[0] = np->in_spnext; frnat.fin_data[1] = np->in_dpnext; frnat.fin_flx |= FI_TCPUDP; p = IPPROTO_UDP; } else { frnat.fin_flx &= ~FI_TCPUDP; p = IPPROTO_IPIP; } if (fin->fin_out == 1) { natl = ipf_nat_inlookup(&frnat, 0, p, frnat.fin_dst, frnat.fin_src); } else { natl = ipf_nat_outlookup(&frnat, 0, p, frnat.fin_dst, frnat.fin_src); } if (natl != NULL) { NBUMPSIDED(fin->fin_out, ns_divert_exist); return -1; } nat->nat_nsrcaddr = frnat.fin_saddr; nat->nat_ndstaddr = frnat.fin_daddr; if ((nat->nat_flags & IPN_TCPUDP) != 0) { nat->nat_nsport = htons(frnat.fin_data[0]); nat->nat_ndport = htons(frnat.fin_data[1]); } else if ((nat->nat_flags & IPN_ICMPQUERY) != 0) { nat->nat_nicmpid = frnat.fin_data[1]; } nat->nat_pr[fin->fin_out] = fin->fin_p; nat->nat_pr[1 - fin->fin_out] = p; if (np->in_redir & NAT_REDIRECT) nat->nat_dir = NAT_DIVERTIN; else nat->nat_dir = NAT_DIVERTOUT; return 0; } /* ------------------------------------------------------------------------ */ /* Function: nat_builddivertmp */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: softn(I) - pointer to NAT context structure */ /* np(I) - pointer to a NAT rule */ /* */ /* For divert rules, a skeleton packet representing what will be prepended */ /* to the real packet is created. Even though we don't have the full */ /* packet here, a checksum is calculated that we update later when we */ /* fill in the final details. At present a 0 checksum for UDP is being set */ /* here because it is expected that divert will be used for localhost. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_builddivertmp(softn, np) ipf_nat_softc_t *softn; ipnat_t *np; { udphdr_t *uh; size_t len; ip_t *ip; if ((np->in_redir & NAT_DIVERTUDP) != 0) len = sizeof(ip_t) + sizeof(udphdr_t); else len = sizeof(ip_t); ALLOC_MB_T(np->in_divmp, len); if (np->in_divmp == NULL) { NBUMPD(ipf_nat_stats, ns_divert_build); return -1; } /* * First, the header to get the packet diverted to the new destination */ ip = MTOD(np->in_divmp, ip_t *); IP_V_A(ip, 4); IP_HL_A(ip, 5); ip->ip_tos = 0; if ((np->in_redir & NAT_DIVERTUDP) != 0) ip->ip_p = IPPROTO_UDP; else ip->ip_p = IPPROTO_IPIP; ip->ip_ttl = 255; ip->ip_off = 0; ip->ip_sum = 0; ip->ip_len = htons(len); ip->ip_id = 0; ip->ip_src.s_addr = htonl(np->in_snip); ip->ip_dst.s_addr = htonl(np->in_dnip); ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip)); if (np->in_redir & NAT_DIVERTUDP) { uh = (udphdr_t *)(ip + 1); uh->uh_sum = 0; uh->uh_ulen = 8; uh->uh_sport = htons(np->in_spnext); uh->uh_dport = htons(np->in_dpnext); } return 0; } #define MINDECAP (sizeof(ip_t) + sizeof(udphdr_t) + sizeof(ip_t)) /* ------------------------------------------------------------------------ */ /* Function: nat_decap */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to current NAT session */ /* */ /* This function is responsible for undoing a packet's encapsulation in the */ /* reverse of an encap/divert rule. After removing the outer encapsulation */ /* it is necessary to call ipf_makefrip() again so that the contents of 'fin'*/ /* match the "new" packet as it may still be used by IPFilter elsewhere. */ /* We use "dir" here as the basis for some of the expectations about the */ /* outer header. If we return an error, the goal is to leave the original */ /* packet information undisturbed - this falls short at the end where we'd */ /* need to back a backup copy of "fin" - expensive. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_decap(fin, nat) fr_info_t *fin; nat_t *nat; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; char *hdr; int hlen; int skip; mb_t *m; if ((fin->fin_flx & FI_ICMPERR) != 0) { /* * ICMP packets don't get decapsulated, instead what we need * to do is change the ICMP reply from including (in the data * portion for errors) the encapsulated packet that we sent * out to something that resembles the original packet prior * to encapsulation. This isn't done here - all we're doing * here is changing the outer address to ensure that it gets * targetted back to the correct system. */ if (nat->nat_dir & NAT_OUTBOUND) { u_32_t sum1, sum2, sumd; sum1 = ntohl(fin->fin_daddr); sum2 = ntohl(nat->nat_osrcaddr); CALC_SUMD(sum1, sum2, sumd); fin->fin_ip->ip_dst = nat->nat_osrcip; fin->fin_daddr = nat->nat_osrcaddr; #if !defined(_KERNEL) || defined(MENTAT) || defined(__sgi) || \ defined(__osf__) || defined(linux) ipf_fix_outcksum(0, &fin->fin_ip->ip_sum, sumd, 0); #endif } return 0; } m = fin->fin_m; skip = fin->fin_hlen; switch (nat->nat_dir) { case NAT_DIVERTIN : case NAT_DIVERTOUT : if (fin->fin_plen < MINDECAP) return -1; skip += sizeof(udphdr_t); break; case NAT_ENCAPIN : case NAT_ENCAPOUT : if (fin->fin_plen < (skip + sizeof(ip_t))) return -1; break; default : return -1; /* NOTREACHED */ } /* * The aim here is to keep the original packet details in "fin" for * as long as possible so that returning with an error is for the * original packet and there is little undoing work to do. */ if (M_LEN(m) < skip + sizeof(ip_t)) { if (ipf_pr_pullup(fin, skip + sizeof(ip_t)) == -1) return -1; } hdr = MTOD(fin->fin_m, char *); fin->fin_ip = (ip_t *)(hdr + skip); hlen = IP_HL(fin->fin_ip) << 2; if (ipf_pr_pullup(fin, skip + hlen) == -1) { NBUMPSIDED(fin->fin_out, ns_decap_pullup); return -1; } fin->fin_hlen = hlen; fin->fin_dlen -= skip; fin->fin_plen -= skip; fin->fin_ipoff += skip; if (ipf_makefrip(hlen, (ip_t *)hdr, fin) == -1) { NBUMPSIDED(fin->fin_out, ns_decap_bad); return -1; } return skip; } /* ------------------------------------------------------------------------ */ /* Function: nat_nextaddr */ /* Returns: int - -1 == bad input (no new address), */ /* 0 == success and dst has new address */ /* Parameters: fin(I) - pointer to packet information */ /* na(I) - how to generate new address */ /* old(I) - original address being replaced */ /* dst(O) - where to put the new address */ /* Write Lock: ipf_nat */ /* */ /* This function uses the contents of the "na" structure, in combination */ /* with "old" to produce a new address to store in "dst". Not all of the */ /* possible uses of "na" will result in a new address. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_nextaddr(fin, na, old, dst) fr_info_t *fin; nat_addr_t *na; u_32_t *old, *dst; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; u_32_t amin, amax, new; i6addr_t newip; int error; new = 0; amin = na->na_addr[0].in4.s_addr; switch (na->na_atype) { case FRI_RANGE : amax = na->na_addr[1].in4.s_addr; break; case FRI_NETMASKED : case FRI_DYNAMIC : case FRI_NORMAL : /* * Compute the maximum address by adding the inverse of the * netmask to the minimum address. */ amax = ~na->na_addr[1].in4.s_addr; amax |= amin; break; case FRI_LOOKUP : break; case FRI_BROADCAST : case FRI_PEERADDR : case FRI_NETWORK : default : return -1; } error = -1; if (na->na_atype == FRI_LOOKUP) { if (na->na_type == IPLT_DSTLIST) { error = ipf_dstlist_select_node(fin, na->na_ptr, dst, NULL); } else { NBUMPSIDE(fin->fin_out, ns_badnextaddr); } } else if (na->na_atype == IPLT_NONE) { /* * 0/0 as the new address means leave it alone. */ if (na->na_addr[0].in4.s_addr == 0 && na->na_addr[1].in4.s_addr == 0) { new = *old; /* * 0/32 means get the interface's address */ } else if (na->na_addr[0].in4.s_addr == 0 && na->na_addr[1].in4.s_addr == 0xffffffff) { if (ipf_ifpaddr(softc, 4, na->na_atype, fin->fin_ifp, &newip, NULL) == -1) { NBUMPSIDED(fin->fin_out, ns_ifpaddrfail); return -1; } new = newip.in4.s_addr; } else { new = htonl(na->na_nextip); } *dst = new; error = 0; } else { NBUMPSIDE(fin->fin_out, ns_badnextaddr); } return error; } /* ------------------------------------------------------------------------ */ /* Function: nat_nextaddrinit */ /* Returns: int - 0 == success, else error number */ /* Parameters: softc(I) - pointer to soft context main structure */ /* na(I) - NAT address information for generating new addr*/ /* initial(I) - flag indicating if it is the first call for */ /* this "na" structure. */ /* ifp(I) - network interface to derive address */ /* information from. */ /* */ /* This function is expected to be called in two scenarious: when a new NAT */ /* rule is loaded into the kernel and when the list of NAT rules is sync'd */ /* up with the valid network interfaces (possibly due to them changing.) */ /* To distinguish between these, the "initial" parameter is used. If it is */ /* 1 then this indicates the rule has just been reloaded and 0 for when we */ /* are updating information. This difference is important because in */ /* instances where we are not updating address information associated with */ /* a network interface, we don't want to disturb what the "next" address to */ /* come out of ipf_nat_nextaddr() will be. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_nextaddrinit(softc, base, na, initial, ifp) ipf_main_softc_t *softc; char *base; nat_addr_t *na; int initial; void *ifp; { switch (na->na_atype) { case FRI_LOOKUP : if (na->na_subtype == 0) { na->na_ptr = ipf_lookup_res_num(softc, IPL_LOGNAT, na->na_type, na->na_num, &na->na_func); } else if (na->na_subtype == 1) { na->na_ptr = ipf_lookup_res_name(softc, IPL_LOGNAT, na->na_type, base + na->na_num, &na->na_func); } if (na->na_func == NULL) { IPFERROR(60060); return ESRCH; } if (na->na_ptr == NULL) { IPFERROR(60056); return ESRCH; } break; case FRI_DYNAMIC : case FRI_BROADCAST : case FRI_NETWORK : case FRI_NETMASKED : case FRI_PEERADDR : if (ifp != NULL) (void )ipf_ifpaddr(softc, 4, na->na_atype, ifp, &na->na_addr[0], &na->na_addr[1]); break; case FRI_SPLIT : case FRI_RANGE : if (initial) na->na_nextip = ntohl(na->na_addr[0].in4.s_addr); break; case FRI_NONE : na->na_addr[0].in4.s_addr &= na->na_addr[1].in4.s_addr; return 0; case FRI_NORMAL : na->na_addr[0].in4.s_addr &= na->na_addr[1].in4.s_addr; break; default : IPFERROR(60054); return EINVAL; } if (initial && (na->na_atype == FRI_NORMAL)) { if (na->na_addr[0].in4.s_addr == 0) { if ((na->na_addr[1].in4.s_addr == 0xffffffff) || (na->na_addr[1].in4.s_addr == 0)) { return 0; } } if (na->na_addr[1].in4.s_addr == 0xffffffff) { na->na_nextip = ntohl(na->na_addr[0].in4.s_addr); } else { na->na_nextip = ntohl(na->na_addr[0].in4.s_addr) + 1; } } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_matchflush */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* nat(I) - pointer to current NAT session */ /* */ /* ------------------------------------------------------------------------ */ static int ipf_nat_matchflush(softc, softn, data) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; caddr_t data; { int *array, flushed, error; nat_t *nat, *natnext; ipfobj_t obj; error = ipf_matcharray_load(softc, data, &obj, &array); if (error != 0) return error; flushed = 0; for (nat = softn->ipf_nat_instances; nat != NULL; nat = natnext) { natnext = nat->nat_next; if (ipf_nat_matcharray(nat, array, softc->ipf_ticks) == 0) { ipf_nat_delete(softc, nat, NL_FLUSH); flushed++; } } obj.ipfo_retval = flushed; error = BCOPYOUT(&obj, data, sizeof(obj)); KFREES(array, array[0] * sizeof(*array)); return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_matcharray */ /* Returns: int - -1 == error, 0 == success */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to current NAT session */ /* */ /* ------------------------------------------------------------------------ */ static int ipf_nat_matcharray(nat, array, ticks) nat_t *nat; int *array; u_long ticks; { int i, n, *x, e, p; e = 0; n = array[0]; x = array + 1; for (; n > 0; x += 3 + x[2]) { if (x[0] == IPF_EXP_END) break; e = 0; n -= x[2] + 3; if (n < 0) break; p = x[0] >> 16; if (p != 0 && p != nat->nat_pr[1]) break; switch (x[0]) { case IPF_EXP_IP_PR : for (i = 0; !e && i < x[2]; i++) { e |= (nat->nat_pr[1] == x[i + 3]); } break; case IPF_EXP_IP_SRCADDR : if (nat->nat_v[0] == 4) { for (i = 0; !e && i < x[2]; i++) { e |= ((nat->nat_osrcaddr & x[i + 4]) == x[i + 3]); } } if (nat->nat_v[1] == 4) { for (i = 0; !e && i < x[2]; i++) { e |= ((nat->nat_nsrcaddr & x[i + 4]) == x[i + 3]); } } break; case IPF_EXP_IP_DSTADDR : if (nat->nat_v[0] == 4) { for (i = 0; !e && i < x[2]; i++) { e |= ((nat->nat_odstaddr & x[i + 4]) == x[i + 3]); } } if (nat->nat_v[1] == 4) { for (i = 0; !e && i < x[2]; i++) { e |= ((nat->nat_ndstaddr & x[i + 4]) == x[i + 3]); } } break; case IPF_EXP_IP_ADDR : for (i = 0; !e && i < x[2]; i++) { if (nat->nat_v[0] == 4) { e |= ((nat->nat_osrcaddr & x[i + 4]) == x[i + 3]); } if (nat->nat_v[1] == 4) { e |= ((nat->nat_nsrcaddr & x[i + 4]) == x[i + 3]); } if (nat->nat_v[0] == 4) { e |= ((nat->nat_odstaddr & x[i + 4]) == x[i + 3]); } if (nat->nat_v[1] == 4) { e |= ((nat->nat_ndstaddr & x[i + 4]) == x[i + 3]); } } break; #ifdef USE_INET6 case IPF_EXP_IP6_SRCADDR : if (nat->nat_v[0] == 6) { for (i = 0; !e && i < x[3]; i++) { e |= IP6_MASKEQ(&nat->nat_osrc6, x + i + 7, x + i + 3); } } if (nat->nat_v[1] == 6) { for (i = 0; !e && i < x[3]; i++) { e |= IP6_MASKEQ(&nat->nat_nsrc6, x + i + 7, x + i + 3); } } break; case IPF_EXP_IP6_DSTADDR : if (nat->nat_v[0] == 6) { for (i = 0; !e && i < x[3]; i++) { e |= IP6_MASKEQ(&nat->nat_odst6, x + i + 7, x + i + 3); } } if (nat->nat_v[1] == 6) { for (i = 0; !e && i < x[3]; i++) { e |= IP6_MASKEQ(&nat->nat_ndst6, x + i + 7, x + i + 3); } } break; case IPF_EXP_IP6_ADDR : for (i = 0; !e && i < x[3]; i++) { if (nat->nat_v[0] == 6) { e |= IP6_MASKEQ(&nat->nat_osrc6, x + i + 7, x + i + 3); } if (nat->nat_v[0] == 6) { e |= IP6_MASKEQ(&nat->nat_odst6, x + i + 7, x + i + 3); } if (nat->nat_v[1] == 6) { e |= IP6_MASKEQ(&nat->nat_nsrc6, x + i + 7, x + i + 3); } if (nat->nat_v[1] == 6) { e |= IP6_MASKEQ(&nat->nat_ndst6, x + i + 7, x + i + 3); } } break; #endif case IPF_EXP_UDP_PORT : case IPF_EXP_TCP_PORT : for (i = 0; !e && i < x[2]; i++) { e |= (nat->nat_nsport == x[i + 3]) || (nat->nat_ndport == x[i + 3]); } break; case IPF_EXP_UDP_SPORT : case IPF_EXP_TCP_SPORT : for (i = 0; !e && i < x[2]; i++) { e |= (nat->nat_nsport == x[i + 3]); } break; case IPF_EXP_UDP_DPORT : case IPF_EXP_TCP_DPORT : for (i = 0; !e && i < x[2]; i++) { e |= (nat->nat_ndport == x[i + 3]); } break; case IPF_EXP_TCP_STATE : for (i = 0; !e && i < x[2]; i++) { e |= (nat->nat_tcpstate[0] == x[i + 3]) || (nat->nat_tcpstate[1] == x[i + 3]); } break; case IPF_EXP_IDLE_GT : e |= (ticks - nat->nat_touched > x[3]); break; } e ^= x[1]; if (!e) break; } return e; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_gettable */ /* Returns: int - 0 = success, else error */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* data(I) - pointer to ioctl data */ /* */ /* This function handles ioctl requests for tables of nat information. */ /* At present the only table it deals with is the hash bucket statistics. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_gettable(softc, softn, data) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; char *data; { ipftable_t table; int error; error = ipf_inobj(softc, data, NULL, &table, IPFOBJ_GTABLE); if (error != 0) return error; switch (table.ita_type) { case IPFTABLE_BUCKETS_NATIN : error = COPYOUT(softn->ipf_nat_stats.ns_side[0].ns_bucketlen, table.ita_table, softn->ipf_nat_table_sz * sizeof(u_int)); break; case IPFTABLE_BUCKETS_NATOUT : error = COPYOUT(softn->ipf_nat_stats.ns_side[1].ns_bucketlen, table.ita_table, softn->ipf_nat_table_sz * sizeof(u_int)); break; default : IPFERROR(60058); return EINVAL; } if (error != 0) { IPFERROR(60059); error = EFAULT; } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_settimeout */ /* Returns: int - 0 = success, else failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* t(I) - pointer to tunable */ /* p(I) - pointer to new tuning data */ /* */ /* Apply the timeout change to the NAT timeout queues. */ /* ------------------------------------------------------------------------ */ int ipf_nat_settimeout(softc, t, p) struct ipf_main_softc_s *softc; ipftuneable_t *t; ipftuneval_t *p; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; if (!strncmp(t->ipft_name, "tcp_", 4)) return ipf_settimeout_tcp(t, p, softn->ipf_nat_tcptq); if (!strcmp(t->ipft_name, "udp_timeout")) { ipf_apply_timeout(&softn->ipf_nat_udptq, p->ipftu_int); } else if (!strcmp(t->ipft_name, "udp_ack_timeout")) { ipf_apply_timeout(&softn->ipf_nat_udpacktq, p->ipftu_int); } else if (!strcmp(t->ipft_name, "icmp_timeout")) { ipf_apply_timeout(&softn->ipf_nat_icmptq, p->ipftu_int); } else if (!strcmp(t->ipft_name, "icmp_ack_timeout")) { ipf_apply_timeout(&softn->ipf_nat_icmpacktq, p->ipftu_int); } else if (!strcmp(t->ipft_name, "ip_timeout")) { ipf_apply_timeout(&softn->ipf_nat_iptq, p->ipftu_int); } else { IPFERROR(60062); return ESRCH; } return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_rehash */ /* Returns: int - 0 = success, else failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* t(I) - pointer to tunable */ /* p(I) - pointer to new tuning data */ /* */ /* To change the size of the basic NAT table, we need to first allocate the */ /* new tables (lest it fails and we've got nowhere to store all of the NAT */ /* sessions currently active) and then walk through the entire list and */ /* insert them into the table. There are two tables here: an inbound one */ /* and an outbound one. Each NAT entry goes into each table once. */ /* ------------------------------------------------------------------------ */ int ipf_nat_rehash(softc, t, p) ipf_main_softc_t *softc; ipftuneable_t *t; ipftuneval_t *p; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; nat_t **newtab[2], *nat, **natp; u_int *bucketlens[2]; u_int maxbucket; u_int newsize; int error; u_int hv; int i; newsize = p->ipftu_int; /* * In case there is nothing to do... */ if (newsize == softn->ipf_nat_table_sz) return 0; newtab[0] = NULL; newtab[1] = NULL; bucketlens[0] = NULL; bucketlens[1] = NULL; /* * 4 tables depend on the NAT table size: the inbound looking table, * the outbound lookup table and the hash chain length for each. */ KMALLOCS(newtab[0], nat_t **, newsize * sizeof(nat_t *)); if (newtab[0] == NULL) { error = 60063; goto badrehash; } KMALLOCS(newtab[1], nat_t **, newsize * sizeof(nat_t *)); if (newtab[1] == NULL) { error = 60064; goto badrehash; } KMALLOCS(bucketlens[0], u_int *, newsize * sizeof(u_int)); if (bucketlens[0] == NULL) { error = 60065; goto badrehash; } KMALLOCS(bucketlens[1], u_int *, newsize * sizeof(u_int)); if (bucketlens[1] == NULL) { error = 60066; goto badrehash; } /* * Recalculate the maximum length based on the new size. */ for (maxbucket = 0, i = newsize; i > 0; i >>= 1) maxbucket++; maxbucket *= 2; bzero((char *)newtab[0], newsize * sizeof(nat_t *)); bzero((char *)newtab[1], newsize * sizeof(nat_t *)); bzero((char *)bucketlens[0], newsize * sizeof(u_int)); bzero((char *)bucketlens[1], newsize * sizeof(u_int)); WRITE_ENTER(&softc->ipf_nat); if (softn->ipf_nat_table[0] != NULL) { KFREES(softn->ipf_nat_table[0], softn->ipf_nat_table_sz * sizeof(*softn->ipf_nat_table[0])); } softn->ipf_nat_table[0] = newtab[0]; if (softn->ipf_nat_table[1] != NULL) { KFREES(softn->ipf_nat_table[1], softn->ipf_nat_table_sz * sizeof(*softn->ipf_nat_table[1])); } softn->ipf_nat_table[1] = newtab[1]; if (softn->ipf_nat_stats.ns_side[0].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side[0].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); } softn->ipf_nat_stats.ns_side[0].ns_bucketlen = bucketlens[0]; if (softn->ipf_nat_stats.ns_side[1].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side[1].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); } softn->ipf_nat_stats.ns_side[1].ns_bucketlen = bucketlens[1]; #ifdef USE_INET6 if (softn->ipf_nat_stats.ns_side6[0].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side6[0].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); } softn->ipf_nat_stats.ns_side6[0].ns_bucketlen = bucketlens[0]; if (softn->ipf_nat_stats.ns_side6[1].ns_bucketlen != NULL) { KFREES(softn->ipf_nat_stats.ns_side6[1].ns_bucketlen, softn->ipf_nat_table_sz * sizeof(u_int)); } softn->ipf_nat_stats.ns_side6[1].ns_bucketlen = bucketlens[1]; #endif softn->ipf_nat_maxbucket = maxbucket; softn->ipf_nat_table_sz = newsize; /* * Walk through the entire list of NAT table entries and put them * in the new NAT table, somewhere. Because we have a new table, * we need to restart the counter of how many chains are in use. */ softn->ipf_nat_stats.ns_side[0].ns_inuse = 0; softn->ipf_nat_stats.ns_side[1].ns_inuse = 0; #ifdef USE_INET6 softn->ipf_nat_stats.ns_side6[0].ns_inuse = 0; softn->ipf_nat_stats.ns_side6[1].ns_inuse = 0; #endif for (nat = softn->ipf_nat_instances; nat != NULL; nat = nat->nat_next) { nat->nat_hnext[0] = NULL; nat->nat_phnext[0] = NULL; hv = nat->nat_hv[0] % softn->ipf_nat_table_sz; natp = &softn->ipf_nat_table[0][hv]; if (*natp) { (*natp)->nat_phnext[0] = &nat->nat_hnext[0]; } else { NBUMPSIDE(0, ns_inuse); } nat->nat_phnext[0] = natp; nat->nat_hnext[0] = *natp; *natp = nat; NBUMPSIDE(0, ns_bucketlen[hv]); nat->nat_hnext[1] = NULL; nat->nat_phnext[1] = NULL; hv = nat->nat_hv[1] % softn->ipf_nat_table_sz; natp = &softn->ipf_nat_table[1][hv]; if (*natp) { (*natp)->nat_phnext[1] = &nat->nat_hnext[1]; } else { NBUMPSIDE(1, ns_inuse); } nat->nat_phnext[1] = natp; nat->nat_hnext[1] = *natp; *natp = nat; NBUMPSIDE(1, ns_bucketlen[hv]); } RWLOCK_EXIT(&softc->ipf_nat); return 0; badrehash: if (bucketlens[1] != NULL) { KFREES(bucketlens[0], newsize * sizeof(u_int)); } if (bucketlens[0] != NULL) { KFREES(bucketlens[0], newsize * sizeof(u_int)); } if (newtab[0] != NULL) { KFREES(newtab[0], newsize * sizeof(nat_t *)); } if (newtab[1] != NULL) { KFREES(newtab[1], newsize * sizeof(nat_t *)); } IPFERROR(error); return ENOMEM; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_rehash_rules */ /* Returns: int - 0 = success, else failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* t(I) - pointer to tunable */ /* p(I) - pointer to new tuning data */ /* */ /* All of the NAT rules hang off of a hash table that is searched with a */ /* hash on address after the netmask is applied. There is a different table*/ /* for both inbound rules (rdr) and outbound (map.) The resizing will only */ /* affect one of these two tables. */ /* ------------------------------------------------------------------------ */ int ipf_nat_rehash_rules(softc, t, p) ipf_main_softc_t *softc; ipftuneable_t *t; ipftuneval_t *p; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; ipnat_t **newtab, *np, ***old, **npp; u_int newsize; u_int mask; u_int hv; newsize = p->ipftu_int; /* * In case there is nothing to do... */ if (newsize == *t->ipft_pint) return 0; /* * All inbound rules have the NAT_REDIRECT bit set in in_redir and * all outbound rules have either NAT_MAP or MAT_MAPBLK set. * This if statement allows for some more generic code to be below, * rather than two huge gobs of code that almost do the same thing. */ if (t->ipft_pint == &softn->ipf_nat_rdrrules_sz) { old = &softn->ipf_nat_rdr_rules; mask = NAT_REDIRECT; } else { old = &softn->ipf_nat_map_rules; mask = NAT_MAP|NAT_MAPBLK; } KMALLOCS(newtab, ipnat_t **, newsize * sizeof(ipnat_t *)); if (newtab == NULL) { IPFERROR(60067); return ENOMEM; } bzero((char *)newtab, newsize * sizeof(ipnat_t *)); WRITE_ENTER(&softc->ipf_nat); if (*old != NULL) { KFREES(*old, *t->ipft_pint * sizeof(ipnat_t **)); } *old = newtab; *t->ipft_pint = newsize; for (np = softn->ipf_nat_list; np != NULL; np = np->in_next) { if ((np->in_redir & mask) == 0) continue; if (np->in_redir & NAT_REDIRECT) { np->in_rnext = NULL; hv = np->in_hv[0] % newsize; for (npp = newtab + hv; *npp != NULL; ) npp = &(*npp)->in_rnext; np->in_prnext = npp; *npp = np; } if (np->in_redir & NAT_MAP) { np->in_mnext = NULL; hv = np->in_hv[1] % newsize; for (npp = newtab + hv; *npp != NULL; ) npp = &(*npp)->in_mnext; np->in_pmnext = npp; *npp = np; } } RWLOCK_EXIT(&softc->ipf_nat); return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_hostmap_rehash */ /* Returns: int - 0 = success, else failure */ /* Parameters: softc(I) - pointer to soft context main structure */ /* t(I) - pointer to tunable */ /* p(I) - pointer to new tuning data */ /* */ /* Allocate and populate a new hash table that will contain a reference to */ /* all of the active IP# translations currently in place. */ /* ------------------------------------------------------------------------ */ int ipf_nat_hostmap_rehash(softc, t, p) ipf_main_softc_t *softc; ipftuneable_t *t; ipftuneval_t *p; { ipf_nat_softc_t *softn = softc->ipf_nat_soft; hostmap_t *hm, **newtab; u_int newsize; u_int hv; newsize = p->ipftu_int; /* * In case there is nothing to do... */ if (newsize == *t->ipft_pint) return 0; KMALLOCS(newtab, hostmap_t **, newsize * sizeof(hostmap_t *)); if (newtab == NULL) { IPFERROR(60068); return ENOMEM; } bzero((char *)newtab, newsize * sizeof(hostmap_t *)); WRITE_ENTER(&softc->ipf_nat); if (softn->ipf_hm_maptable != NULL) { KFREES(softn->ipf_hm_maptable, softn->ipf_nat_hostmap_sz * sizeof(hostmap_t *)); } softn->ipf_hm_maptable = newtab; softn->ipf_nat_hostmap_sz = newsize; for (hm = softn->ipf_hm_maplist; hm != NULL; hm = hm->hm_next) { hv = hm->hm_hv % softn->ipf_nat_hostmap_sz; hm->hm_hnext = softn->ipf_hm_maptable[hv]; hm->hm_phnext = softn->ipf_hm_maptable + hv; if (softn->ipf_hm_maptable[hv] != NULL) softn->ipf_hm_maptable[hv]->hm_phnext = &hm->hm_hnext; softn->ipf_hm_maptable[hv] = hm; } RWLOCK_EXIT(&softc->ipf_nat); return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_add_tq */ /* Parameters: softc(I) - pointer to soft context main structure */ /* */ /* ------------------------------------------------------------------------ */ ipftq_t * ipf_nat_add_tq(softc, ttl) ipf_main_softc_t *softc; int ttl; { ipf_nat_softc_t *softs = softc->ipf_nat_soft; return ipf_addtimeoutqueue(softc, &softs->ipf_nat_utqe, ttl); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_uncreate */ /* Returns: Nil */ /* Parameters: fin(I) - pointer to packet information */ /* */ /* This function is used to remove a NAT entry from the NAT table when we */ /* decide that the create was actually in error. It is thus assumed that */ /* fin_flx will have both FI_NATED and FI_NATNEW set. Because we're dealing */ /* with the translated packet (not the original), we have to reverse the */ /* lookup. Although doing the lookup is expensive (relatively speaking), it */ /* is not anticipated that this will be a frequent occurance for normal */ /* traffic patterns. */ /* ------------------------------------------------------------------------ */ void ipf_nat_uncreate(fin) fr_info_t *fin; { ipf_main_softc_t *softc = fin->fin_main_soft; ipf_nat_softc_t *softn = softc->ipf_nat_soft; int nflags; nat_t *nat; switch (fin->fin_p) { case IPPROTO_TCP : nflags = IPN_TCP; break; case IPPROTO_UDP : nflags = IPN_UDP; break; default : nflags = 0; break; } WRITE_ENTER(&softc->ipf_nat); if (fin->fin_out == 0) { nat = ipf_nat_outlookup(fin, nflags, (u_int)fin->fin_p, fin->fin_dst, fin->fin_src); } else { nat = ipf_nat_inlookup(fin, nflags, (u_int)fin->fin_p, fin->fin_src, fin->fin_dst); } if (nat != NULL) { NBUMPSIDE(fin->fin_out, ns_uncreate[0]); ipf_nat_delete(softc, nat, NL_DESTROY); } else { NBUMPSIDE(fin->fin_out, ns_uncreate[1]); } RWLOCK_EXIT(&softc->ipf_nat); } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_cmp_rules */ /* Returns: int - 0 == success, else rules do not match. */ /* Parameters: n1(I) - first rule to compare */ /* n2(I) - first rule to compare */ /* */ /* Compare two rules using pointers to each rule. A straight bcmp will not */ /* work as some fields (such as in_dst, in_pkts) actually do change once */ /* the rule has been loaded into the kernel. Whilst this function returns */ /* various non-zero returns, they're strictly to aid in debugging. Use of */ /* this function should simply care if the result is zero or not. */ /* ------------------------------------------------------------------------ */ static int ipf_nat_cmp_rules(n1, n2) ipnat_t *n1, *n2; { if (n1->in_size != n2->in_size) return 1; if (bcmp((char *)&n1->in_v, (char *)&n2->in_v, offsetof(ipnat_t, in_ndst) - offsetof(ipnat_t, in_v)) != 0) return 2; if (bcmp((char *)&n1->in_tuc, (char *)&n2->in_tuc, n1->in_size - offsetof(ipnat_t, in_tuc)) != 0) return 3; if (n1->in_ndst.na_atype != n2->in_ndst.na_atype) return 5; if (n1->in_ndst.na_function != n2->in_ndst.na_function) return 6; if (bcmp((char *)&n1->in_ndst.na_addr, (char *)&n2->in_ndst.na_addr, sizeof(n1->in_ndst.na_addr))) return 7; if (n1->in_nsrc.na_atype != n2->in_nsrc.na_atype) return 8; if (n1->in_nsrc.na_function != n2->in_nsrc.na_function) return 9; if (bcmp((char *)&n1->in_nsrc.na_addr, (char *)&n2->in_nsrc.na_addr, sizeof(n1->in_nsrc.na_addr))) return 10; if (n1->in_odst.na_atype != n2->in_odst.na_atype) return 11; if (n1->in_odst.na_function != n2->in_odst.na_function) return 12; if (bcmp((char *)&n1->in_odst.na_addr, (char *)&n2->in_odst.na_addr, sizeof(n1->in_odst.na_addr))) return 13; if (n1->in_osrc.na_atype != n2->in_osrc.na_atype) return 14; if (n1->in_osrc.na_function != n2->in_osrc.na_function) return 15; if (bcmp((char *)&n1->in_osrc.na_addr, (char *)&n2->in_osrc.na_addr, sizeof(n1->in_osrc.na_addr))) return 16; return 0; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_rule_init */ /* Returns: int - 0 == success, else rules do not match. */ /* Parameters: softc(I) - pointer to soft context main structure */ /* softn(I) - pointer to NAT context structure */ /* n(I) - first rule to compare */ /* */ /* ------------------------------------------------------------------------ */ static int ipf_nat_rule_init(softc, softn, n) ipf_main_softc_t *softc; ipf_nat_softc_t *softn; ipnat_t *n; { int error = 0; if ((n->in_flags & IPN_SIPRANGE) != 0) n->in_nsrcatype = FRI_RANGE; if ((n->in_flags & IPN_DIPRANGE) != 0) n->in_ndstatype = FRI_RANGE; if ((n->in_flags & IPN_SPLIT) != 0) n->in_ndstatype = FRI_SPLIT; if ((n->in_redir & (NAT_MAP|NAT_REWRITE|NAT_DIVERTUDP)) != 0) n->in_spnext = n->in_spmin; if ((n->in_redir & (NAT_REWRITE|NAT_DIVERTUDP)) != 0) { n->in_dpnext = n->in_dpmin; } else if (n->in_redir == NAT_REDIRECT) { n->in_dpnext = n->in_dpmin; } n->in_stepnext = 0; switch (n->in_v[0]) { case 4 : error = ipf_nat_ruleaddrinit(softc, softn, n); if (error != 0) return error; break; #ifdef USE_INET6 case 6 : error = ipf_nat6_ruleaddrinit(softc, softn, n); if (error != 0) return error; break; #endif default : break; } if (n->in_redir == (NAT_DIVERTUDP|NAT_MAP)) { /* * Prerecord whether or not the destination of the divert * is local or not to the interface the packet is going * to be sent out. */ n->in_dlocal = ipf_deliverlocal(softc, n->in_v[1], n->in_ifps[1], &n->in_ndstip6); } return error; } /* ------------------------------------------------------------------------ */ /* Function: ipf_nat_rule_fini */ /* Returns: int - 0 == success, else rules do not match. */ /* Parameters: softc(I) - pointer to soft context main structure */ /* n(I) - rule to work on */ /* */ /* This function is used to release any objects that were referenced during */ /* the rule initialisation. This is useful both when free'ing the rule and */ /* when handling ioctls that need to initialise these fields but not */ /* actually use them after the ioctl processing has finished. */ /* ------------------------------------------------------------------------ */ static void ipf_nat_rule_fini(softc, n) ipf_main_softc_t *softc; ipnat_t *n; { if (n->in_odst.na_atype == FRI_LOOKUP && n->in_odst.na_ptr != NULL) ipf_lookup_deref(softc, n->in_odst.na_type, n->in_odst.na_ptr); if (n->in_osrc.na_atype == FRI_LOOKUP && n->in_osrc.na_ptr != NULL) ipf_lookup_deref(softc, n->in_osrc.na_type, n->in_osrc.na_ptr); if (n->in_ndst.na_atype == FRI_LOOKUP && n->in_ndst.na_ptr != NULL) ipf_lookup_deref(softc, n->in_ndst.na_type, n->in_ndst.na_ptr); if (n->in_nsrc.na_atype == FRI_LOOKUP && n->in_nsrc.na_ptr != NULL) ipf_lookup_deref(softc, n->in_nsrc.na_type, n->in_nsrc.na_ptr); if (n->in_divmp != NULL) FREE_MB_T(n->in_divmp); } Index: projects/vnet/sys/contrib/ipfilter/netinet/ip_rpcb_pxy.c =================================================================== --- projects/vnet/sys/contrib/ipfilter/netinet/ip_rpcb_pxy.c (revision 302198) +++ projects/vnet/sys/contrib/ipfilter/netinet/ip_rpcb_pxy.c (revision 302199) @@ -1,1473 +1,1475 @@ /* * Copyright (C) 2002-2012 by Ryan Beasley * * See the IPFILTER.LICENCE file for details on licencing. */ /* * Overview: * This is an in-kernel application proxy for Sun's RPCBIND (nee portmap) * protocol as defined in RFC1833. It is far from complete, mostly * lacking in less-likely corner cases, but it's definitely functional. * * Invocation: * rdr /32 port -> port udp proxy rpcbu * * If the host running IP Filter is the same as the RPC server, it's * perfectly legal for both the internal and external addresses and ports * to match. * * When triggered by appropriate IP NAT rules, this proxy works by * examining data contained in received packets. Requests and replies are * modified, NAT and state table entries created, etc., as necessary. */ /* * TODO / NOTES * * o Must implement locking to protect proxy session data. * o Fragmentation isn't supported. * o Only supports UDP. * o Doesn't support multiple RPC records in a single request. * o Errors should be more fine-grained. (e.g., malloc failure vs. * illegal RPCB request / reply) * o Even with the limit on the total amount of recorded transactions, * should there be a timeout on transaction removal? * o There is a potential collision between cloning, wildcard NAT and * state entries. There should be an appr_getport routine for * to avoid this. * o The enclosed hack of STREAMS support is pretty sick and most likely * broken. * * $Id$ */ #define IPF_RPCB_PROXY /* * Function prototypes */ void ipf_p_rpcb_main_load __P((void)); void ipf_p_rpcb_main_unload __P((void)); int ipf_p_rpcb_new __P((void *, fr_info_t *, ap_session_t *, nat_t *)); void ipf_p_rpcb_del __P((ipf_main_softc_t *, ap_session_t *)); int ipf_p_rpcb_in __P((void *, fr_info_t *, ap_session_t *, nat_t *)); int ipf_p_rpcb_out __P((void *, fr_info_t *, ap_session_t *, nat_t *)); static void ipf_p_rpcb_flush __P((rpcb_session_t *)); static int ipf_p_rpcb_decodereq __P((fr_info_t *, nat_t *, rpcb_session_t *, rpc_msg_t *)); static int ipf_p_rpcb_skipauth __P((rpc_msg_t *, xdr_auth_t *, u_32_t **)); static int ipf_p_rpcb_insert __P((rpcb_session_t *, rpcb_xact_t *)); static int ipf_p_rpcb_xdrrpcb __P((rpc_msg_t *, u_32_t *, rpcb_args_t *)); static int ipf_p_rpcb_getuaddr __P((rpc_msg_t *, xdr_uaddr_t *, u_32_t **)); static u_int ipf_p_rpcb_atoi __P((char *)); static int ipf_p_rpcb_modreq __P((fr_info_t *, nat_t *, rpc_msg_t *, mb_t *, u_int)); static int ipf_p_rpcb_decoderep __P((fr_info_t *, nat_t *, rpcb_session_t *, rpc_msg_t *, rpcb_xact_t **)); static rpcb_xact_t * ipf_p_rpcb_lookup __P((rpcb_session_t *, u_32_t)); static void ipf_p_rpcb_deref __P((rpcb_session_t *, rpcb_xact_t *)); static int ipf_p_rpcb_getproto __P((rpc_msg_t *, xdr_proto_t *, u_32_t **)); static int ipf_p_rpcb_getnat __P((fr_info_t *, nat_t *, u_int, u_int)); static int ipf_p_rpcb_modv3 __P((fr_info_t *, nat_t *, rpc_msg_t *, mb_t *, u_int)); static int ipf_p_rpcb_modv4 __P((fr_info_t *, nat_t *, rpc_msg_t *, mb_t *, u_int)); static void ipf_p_rpcb_fixlen __P((fr_info_t *, int)); /* * Global variables */ static frentry_t rpcbfr; /* Skeleton rule for reference by entities this proxy creates. */ -static int rpcbcnt; /* Upper bound of allocated RPCB sessions. */ +static VNET_DEFINE(int, rpcbcnt); +#define V_rpcbcnt VNET(rpcbcnt) + /* Upper bound of allocated RPCB sessions. */ /* XXX rpcbcnt still requires locking. */ static int rpcb_proxy_init = 0; /* * Since rpc_msg contains only pointers, one should use this macro as a * handy way to get to the goods. (In case you're wondering about the name, * this started as BYTEREF -> BREF -> B.) */ #define B(r) (u_32_t)ntohl(*(r)) /* * Public subroutines */ /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_main_load */ /* Returns: void */ /* Parameters: (void) */ /* */ /* Initialize the filter rule entry and session limiter. */ /* -------------------------------------------------------------------- */ void ipf_p_rpcb_main_load() { - rpcbcnt = 0; + V_rpcbcnt = 0; bzero((char *)&rpcbfr, sizeof(rpcbfr)); rpcbfr.fr_ref = 1; rpcbfr.fr_flags = FR_PASS|FR_QUICK|FR_KEEPSTATE; MUTEX_INIT(&rpcbfr.fr_lock, "ipf Sun RPCB proxy rule lock"); rpcb_proxy_init = 1; } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_main_unload */ /* Returns: void */ /* Parameters: (void) */ /* */ /* Destroy rpcbfr's mutex to avoid a lock leak. */ /* -------------------------------------------------------------------- */ void ipf_p_rpcb_main_unload() { if (rpcb_proxy_init == 1) { MUTEX_DESTROY(&rpcbfr.fr_lock); rpcb_proxy_init = 0; } } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_new */ /* Returns: int - -1 == failure, 0 == success */ /* Parameters: fin(I) - pointer to packet information */ /* aps(I) - pointer to proxy session structure */ /* nat(I) - pointer to NAT session structure */ /* */ /* Allocate resources for per-session proxy structures. */ /* -------------------------------------------------------------------- */ int ipf_p_rpcb_new(arg, fin, aps, nat) void *arg; fr_info_t *fin; ap_session_t *aps; nat_t *nat; { rpcb_session_t *rs; nat = nat; /* LINT */ if (fin->fin_v != 4) return -1; KMALLOC(rs, rpcb_session_t *); if (rs == NULL) return(-1); bzero((char *)rs, sizeof(*rs)); MUTEX_INIT(&rs->rs_rxlock, "ipf Sun RPCB proxy session lock"); aps->aps_data = rs; return(0); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_del */ /* Returns: void */ /* Parameters: aps(I) - pointer to proxy session structure */ /* */ /* Free up a session's list of RPCB requests. */ /* -------------------------------------------------------------------- */ void ipf_p_rpcb_del(softc, aps) ipf_main_softc_t *softc; ap_session_t *aps; { rpcb_session_t *rs; rs = (rpcb_session_t *)aps->aps_data; MUTEX_ENTER(&rs->rs_rxlock); ipf_p_rpcb_flush(rs); MUTEX_EXIT(&rs->rs_rxlock); MUTEX_DESTROY(&rs->rs_rxlock); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_in */ /* Returns: int - APR_ERR(1) == drop the packet, */ /* APR_ERR(2) == kill the proxy session, */ /* else change in packet length (in bytes) */ /* Parameters: fin(I) - pointer to packet information */ /* ip(I) - pointer to packet header */ /* aps(I) - pointer to proxy session structure */ /* nat(I) - pointer to NAT session structure */ /* */ /* Given a presumed RPCB request, perform some minor tests and pass off */ /* for decoding. Also pass packet off for a rewrite if necessary. */ /* -------------------------------------------------------------------- */ int ipf_p_rpcb_in(arg, fin, aps, nat) void *arg; fr_info_t *fin; ap_session_t *aps; nat_t *nat; { rpc_msg_t rpcmsg, *rm; rpcb_session_t *rs; u_int off, dlen; mb_t *m; int rv; /* Disallow fragmented or illegally short packets. */ if ((fin->fin_flx & (FI_FRAG|FI_SHORT)) != 0) return(APR_ERR(1)); /* Perform basic variable initialization. */ rs = (rpcb_session_t *)aps->aps_data; m = fin->fin_m; off = (char *)fin->fin_dp - (char *)fin->fin_ip; off += sizeof(udphdr_t) + fin->fin_ipoff; dlen = fin->fin_dlen - sizeof(udphdr_t); /* Disallow packets outside legal range for supported requests. */ if ((dlen < RPCB_REQMIN) || (dlen > RPCB_REQMAX)) return(APR_ERR(1)); /* Copy packet over to convenience buffer. */ rm = &rpcmsg; bzero((char *)rm, sizeof(*rm)); COPYDATA(m, off, dlen, (caddr_t)&rm->rm_msgbuf); rm->rm_buflen = dlen; /* Send off to decode request. */ rv = ipf_p_rpcb_decodereq(fin, nat, rs, rm); switch(rv) { case -1: return(APR_ERR(1)); /*NOTREACHED*/ break; case 0: break; case 1: rv = ipf_p_rpcb_modreq(fin, nat, rm, m, off); break; default: /*CONSTANTCONDITION*/ IPF_PANIC(1, ("illegal rv %d (ipf_p_rpcb_req)", rv)); } return(rv); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_out */ /* Returns: int - APR_ERR(1) == drop the packet, */ /* APR_ERR(2) == kill the proxy session, */ /* else change in packet length (in bytes) */ /* Parameters: fin(I) - pointer to packet information */ /* ip(I) - pointer to packet header */ /* aps(I) - pointer to proxy session structure */ /* nat(I) - pointer to NAT session structure */ /* */ /* Given a presumed RPCB reply, perform some minor tests and pass off */ /* for decoding. If the message indicates a successful request with */ /* valid addressing information, create NAT and state structures to */ /* allow direct communication between RPC client and server. */ /* -------------------------------------------------------------------- */ int ipf_p_rpcb_out(arg, fin, aps, nat) void *arg; fr_info_t *fin; ap_session_t *aps; nat_t *nat; { rpc_msg_t rpcmsg, *rm; rpcb_session_t *rs; rpcb_xact_t *rx; u_int off, dlen; int rv, diff; mb_t *m; /* Disallow fragmented or illegally short packets. */ if ((fin->fin_flx & (FI_FRAG|FI_SHORT)) != 0) return(APR_ERR(1)); /* Perform basic variable initialization. */ rs = (rpcb_session_t *)aps->aps_data; rx = NULL; m = fin->fin_m; off = (char *)fin->fin_dp - (char *)fin->fin_ip; off += sizeof(udphdr_t) + fin->fin_ipoff; dlen = fin->fin_dlen - sizeof(udphdr_t); diff = 0; /* Disallow packets outside legal range for supported requests. */ if ((dlen < RPCB_REPMIN) || (dlen > RPCB_REPMAX)) return(APR_ERR(1)); /* Copy packet over to convenience buffer. */ rm = &rpcmsg; bzero((char *)rm, sizeof(*rm)); COPYDATA(m, off, dlen, (caddr_t)&rm->rm_msgbuf); rm->rm_buflen = dlen; rx = NULL; /* XXX gcc */ /* Send off to decode reply. */ rv = ipf_p_rpcb_decoderep(fin, nat, rs, rm, &rx); switch(rv) { case -1: /* Bad packet */ if (rx != NULL) { MUTEX_ENTER(&rs->rs_rxlock); ipf_p_rpcb_deref(rs, rx); MUTEX_EXIT(&rs->rs_rxlock); } return(APR_ERR(1)); /*NOTREACHED*/ break; case 0: /* Negative reply / request rejected */ break; case 1: /* Positive reply */ /* * With the IP address embedded in a GETADDR(LIST) reply, * we'll need to rewrite the packet in the very possible * event that the internal & external addresses aren't the * same. (i.e., this box is either a router or rpcbind * only listens on loopback.) */ if (nat->nat_odstaddr != nat->nat_ndstaddr) { if (rx->rx_type == RPCB_RES_STRING) diff = ipf_p_rpcb_modv3(fin, nat, rm, m, off); else if (rx->rx_type == RPCB_RES_LIST) diff = ipf_p_rpcb_modv4(fin, nat, rm, m, off); } break; default: /*CONSTANTCONDITION*/ IPF_PANIC(1, ("illegal rv %d (ipf_p_rpcb_decoderep)", rv)); } if (rx != NULL) { MUTEX_ENTER(&rs->rs_rxlock); /* XXX Gross hack - I'm overloading the reference * counter to deal with both threads and retransmitted * requests. One deref signals that this thread is * finished with rx, and the other signals that we've * processed its reply. */ ipf_p_rpcb_deref(rs, rx); ipf_p_rpcb_deref(rs, rx); MUTEX_EXIT(&rs->rs_rxlock); } return(diff); } /* * Private support subroutines */ /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_flush */ /* Returns: void */ /* Parameters: rs(I) - pointer to RPCB session structure */ /* */ /* Simply flushes the list of outstanding transactions, if any. */ /* -------------------------------------------------------------------- */ static void ipf_p_rpcb_flush(rs) rpcb_session_t *rs; { rpcb_xact_t *r1, *r2; r1 = rs->rs_rxlist; if (r1 == NULL) return; while (r1 != NULL) { r2 = r1; r1 = r1->rx_next; KFREE(r2); } } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_decodereq */ /* Returns: int - -1 == bad request or critical failure, */ /* 0 == request successfully decoded, */ /* 1 == request successfully decoded; requires */ /* address rewrite/modification */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT session structure */ /* rs(I) - pointer to RPCB session structure */ /* rm(I) - pointer to RPC message structure */ /* */ /* Take a presumed RPCB request, decode it, and store the results in */ /* the transaction list. If the internal target address needs to be */ /* modified, store its location in ptr. */ /* WARNING: It's the responsibility of the caller to make sure there */ /* is enough room in rs_buf for the basic RPC message "preamble". */ /* -------------------------------------------------------------------- */ static int ipf_p_rpcb_decodereq(fin, nat, rs, rm) fr_info_t *fin; nat_t *nat; rpcb_session_t *rs; rpc_msg_t *rm; { rpcb_args_t *ra; u_32_t xdr, *p; rpc_call_t *rc; rpcb_xact_t rx; int mod; p = (u_32_t *)rm->rm_msgbuf; mod = 0; bzero((char *)&rx, sizeof(rx)); rc = &rm->rm_call; rm->rm_xid = p; rx.rx_xid = B(p++); /* Record this message's XID. */ /* Parse out and test the RPC header. */ if ((B(p++) != RPCB_CALL) || (B(p++) != RPCB_MSG_VERSION) || (B(p++) != RPCB_PROG)) return(-1); /* Record the RPCB version and procedure. */ rc->rc_vers = p++; rc->rc_proc = p++; /* Bypass RPC authentication stuff. */ if (ipf_p_rpcb_skipauth(rm, &rc->rc_authcred, &p) != 0) return(-1); if (ipf_p_rpcb_skipauth(rm, &rc->rc_authverf, &p) != 0) return(-1); /* Compare RPCB version and procedure numbers. */ switch(B(rc->rc_vers)) { case 2: /* This proxy only supports PMAP_GETPORT. */ if (B(rc->rc_proc) != RPCB_GETPORT) return(-1); /* Portmap requests contain four 4 byte parameters. */ if (RPCB_BUF_EQ(rm, p, 16) == 0) return(-1); p += 2; /* Skip requested program and version numbers. */ /* Sanity check the requested protocol. */ xdr = B(p); if (!(xdr == IPPROTO_UDP || xdr == IPPROTO_TCP)) return(-1); rx.rx_type = RPCB_RES_PMAP; rx.rx_proto = xdr; break; case 3: case 4: /* GETADDRLIST is exclusive to v4; GETADDR for v3 & v4 */ switch(B(rc->rc_proc)) { case RPCB_GETADDR: rx.rx_type = RPCB_RES_STRING; rx.rx_proto = (u_int)fin->fin_p; break; case RPCB_GETADDRLIST: if (B(rc->rc_vers) != 4) return(-1); rx.rx_type = RPCB_RES_LIST; break; default: return(-1); } ra = &rc->rc_rpcbargs; /* Decode the 'struct rpcb' request. */ if (ipf_p_rpcb_xdrrpcb(rm, p, ra) != 0) return(-1); /* Are the target address & port valid? */ if ((ra->ra_maddr.xu_ip != nat->nat_ndstaddr) || (ra->ra_maddr.xu_port != nat->nat_ndport)) return(-1); /* Do we need to rewrite this packet? */ if ((nat->nat_ndstaddr != nat->nat_odstaddr) || (nat->nat_ndport != nat->nat_odport)) mod = 1; break; default: return(-1); } MUTEX_ENTER(&rs->rs_rxlock); if (ipf_p_rpcb_insert(rs, &rx) != 0) { MUTEX_EXIT(&rs->rs_rxlock); return(-1); } MUTEX_EXIT(&rs->rs_rxlock); return(mod); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_skipauth */ /* Returns: int -- -1 == illegal auth parameters (lengths) */ /* 0 == valid parameters, pointer advanced */ /* Parameters: rm(I) - pointer to RPC message structure */ /* auth(I) - pointer to RPC auth structure */ /* buf(IO) - pointer to location within convenience buffer */ /* */ /* Record auth data length & location of auth data, then advance past */ /* it. */ /* -------------------------------------------------------------------- */ static int ipf_p_rpcb_skipauth(rm, auth, buf) rpc_msg_t *rm; xdr_auth_t *auth; u_32_t **buf; { u_32_t *p, xdr; p = *buf; /* Make sure we have enough space for expected fixed auth parms. */ if (RPCB_BUF_GEQ(rm, p, 8) == 0) return(-1); p++; /* We don't care about auth_flavor. */ auth->xa_string.xs_len = p; xdr = B(p++); /* Length of auth_data */ /* Test for absurdity / illegality of auth_data length. */ if ((XDRALIGN(xdr) < xdr) || (RPCB_BUF_GEQ(rm, p, XDRALIGN(xdr)) == 0)) return(-1); auth->xa_string.xs_str = (char *)p; p += XDRALIGN(xdr); /* Advance our location. */ *buf = (u_32_t *)p; return(0); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_insert */ /* Returns: int -- -1 == list insertion failed, */ /* 0 == item successfully added */ /* Parameters: rs(I) - pointer to RPCB session structure */ /* rx(I) - pointer to RPCB transaction structure */ /* -------------------------------------------------------------------- */ static int ipf_p_rpcb_insert(rs, rx) rpcb_session_t *rs; rpcb_xact_t *rx; { rpcb_xact_t *rxp; rxp = ipf_p_rpcb_lookup(rs, rx->rx_xid); if (rxp != NULL) { ++rxp->rx_ref; return(0); } - if (rpcbcnt == RPCB_MAXREQS) + if (V_rpcbcnt == RPCB_MAXREQS) return(-1); KMALLOC(rxp, rpcb_xact_t *); if (rxp == NULL) return(-1); bcopy((char *)rx, (char *)rxp, sizeof(*rx)); if (rs->rs_rxlist != NULL) rs->rs_rxlist->rx_pnext = &rxp->rx_next; rxp->rx_pnext = &rs->rs_rxlist; rxp->rx_next = rs->rs_rxlist; rs->rs_rxlist = rxp; rxp->rx_ref = 1; - ++rpcbcnt; + ++V_rpcbcnt; return(0); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_xdrrpcb */ /* Returns: int -- -1 == failure to properly decode the request */ /* 0 == rpcb successfully decoded */ /* Parameters: rs(I) - pointer to RPCB session structure */ /* p(I) - pointer to location within session buffer */ /* rpcb(O) - pointer to rpcb (xdr type) structure */ /* */ /* Decode a XDR encoded rpcb structure and record its contents in rpcb */ /* within only the context of TCP/UDP over IP networks. */ /* -------------------------------------------------------------------- */ static int ipf_p_rpcb_xdrrpcb(rm, p, ra) rpc_msg_t *rm; u_32_t *p; rpcb_args_t *ra; { if (!RPCB_BUF_GEQ(rm, p, 20)) return(-1); /* Bypass target program & version. */ p += 2; /* Decode r_netid. Must be "tcp" or "udp". */ if (ipf_p_rpcb_getproto(rm, &ra->ra_netid, &p) != 0) return(-1); /* Decode r_maddr. */ if (ipf_p_rpcb_getuaddr(rm, &ra->ra_maddr, &p) != 0) return(-1); /* Advance to r_owner and make sure it's empty. */ if (!RPCB_BUF_EQ(rm, p, 4) || (B(p) != 0)) return(-1); return(0); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_getuaddr */ /* Returns: int -- -1 == illegal string, */ /* 0 == string parsed; contents recorded */ /* Parameters: rm(I) - pointer to RPC message structure */ /* xu(I) - pointer to universal address structure */ /* p(IO) - pointer to location within message buffer */ /* */ /* Decode the IP address / port at p and record them in xu. */ /* -------------------------------------------------------------------- */ static int ipf_p_rpcb_getuaddr(rm, xu, p) rpc_msg_t *rm; xdr_uaddr_t *xu; u_32_t **p; { char *c, *i, *b, *pp; u_int d, dd, l, t; char uastr[24]; /* Test for string length. */ if (!RPCB_BUF_GEQ(rm, *p, 4)) return(-1); xu->xu_xslen = (*p)++; xu->xu_xsstr = (char *)*p; /* Length check */ l = B(xu->xu_xslen); if (l < 11 || l > 23 || !RPCB_BUF_GEQ(rm, *p, XDRALIGN(l))) return(-1); /* Advance p */ *(char **)p += XDRALIGN(l); /* Copy string to local buffer & terminate C style */ bcopy(xu->xu_xsstr, uastr, l); uastr[l] = '\0'; i = (char *)&xu->xu_ip; pp = (char *)&xu->xu_port; /* * Expected format: a.b.c.d.e.f where [a-d] correspond to bytes of * an IP address and [ef] are the bytes of a L4 port. */ if (!(ISDIGIT(uastr[0]) && ISDIGIT(uastr[l-1]))) return(-1); b = uastr; for (c = &uastr[1], d = 0, dd = 0; c < &uastr[l-1]; c++) { if (ISDIGIT(*c)) { dd = 0; continue; } if (*c == '.') { if (dd != 0) return(-1); /* Check for ASCII byte. */ *c = '\0'; t = ipf_p_rpcb_atoi(b); if (t > 255) return(-1); /* Aim b at beginning of the next byte. */ b = c + 1; /* Switch off IP addr vs port parsing. */ if (d < 4) i[d++] = t & 0xff; else pp[d++ - 4] = t & 0xff; dd = 1; continue; } return(-1); } if (d != 5) /* String must contain exactly 5 periods. */ return(-1); /* Handle the last byte (port low byte) */ t = ipf_p_rpcb_atoi(b); if (t > 255) return(-1); pp[d - 4] = t & 0xff; return(0); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_atoi (XXX should be generic for all proxies) */ /* Returns: int -- integer representation of supplied string */ /* Parameters: ptr(I) - input string */ /* */ /* Simple version of atoi(3) ripped from ip_rcmd_pxy.c. */ /* -------------------------------------------------------------------- */ static u_int ipf_p_rpcb_atoi(ptr) char *ptr; { register char *s = ptr, c; register u_int i = 0; while (((c = *s++) != '\0') && ISDIGIT(c)) { i *= 10; i += c - '0'; } return i; } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_modreq */ /* Returns: int -- change in datagram length */ /* APR_ERR(2) - critical failure */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT session */ /* rm(I) - pointer to RPC message structure */ /* m(I) - pointer to mbuf chain */ /* off(I) - current offset within mbuf chain */ /* */ /* When external and internal addresses differ, we rewrite the former */ /* with the latter. (This is exclusive to protocol versions 3 & 4). */ /* -------------------------------------------------------------------- */ static int ipf_p_rpcb_modreq(fin, nat, rm, m, off) fr_info_t *fin; nat_t *nat; rpc_msg_t *rm; mb_t *m; u_int off; { u_int len, xlen, pos, bogo; rpcb_args_t *ra; char uaddr[24]; udphdr_t *udp; char *i, *p; int diff; ra = &rm->rm_call.rc_rpcbargs; i = (char *)&nat->nat_odstaddr; p = (char *)&nat->nat_odport; /* Form new string. */ bzero(uaddr, sizeof(uaddr)); /* Just in case we need padding. */ #if defined(SNPRINTF) && defined(_KERNEL) SNPRINTF(uaddr, sizeof(uaddr), #else (void) sprintf(uaddr, #endif "%u.%u.%u.%u.%u.%u", i[0] & 0xff, i[1] & 0xff, i[2] & 0xff, i[3] & 0xff, p[0] & 0xff, p[1] & 0xff); len = strlen(uaddr); xlen = XDRALIGN(len); /* Determine mbuf offset to start writing to. */ pos = (char *)ra->ra_maddr.xu_xslen - rm->rm_msgbuf; off += pos; /* Write new string length. */ bogo = htonl(len); COPYBACK(m, off, 4, (caddr_t)&bogo); off += 4; /* Write new string. */ COPYBACK(m, off, xlen, uaddr); off += xlen; /* Write in zero r_owner. */ bogo = 0; COPYBACK(m, off, 4, (caddr_t)&bogo); /* Determine difference in data lengths. */ diff = xlen - XDRALIGN(B(ra->ra_maddr.xu_xslen)); /* * If our new string has a different length, make necessary * adjustments. */ if (diff != 0) { udp = fin->fin_dp; udp->uh_ulen = htons(ntohs(udp->uh_ulen) + diff); fin->fin_plen += diff; fin->fin_ip->ip_len = htons(fin->fin_plen); fin->fin_dlen += diff; /* XXX Storage lengths. */ } return(diff); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_decoderep */ /* Returns: int - -1 == bad request or critical failure, */ /* 0 == valid, negative reply */ /* 1 == vaddlid, positive reply; needs no changes */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT session structure */ /* rs(I) - pointer to RPCB session structure */ /* rm(I) - pointer to RPC message structure */ /* rxp(O) - pointer to RPCB transaction structure */ /* */ /* Take a presumed RPCB reply, extract the XID, search for the original */ /* request information, and determine whether the request was accepted */ /* or rejected. With a valid accepted reply, go ahead and create NAT */ /* and state entries, and finish up by rewriting the packet as */ /* required. */ /* */ /* WARNING: It's the responsibility of the caller to make sure there */ /* is enough room in rs_buf for the basic RPC message "preamble". */ /* -------------------------------------------------------------------- */ static int ipf_p_rpcb_decoderep(fin, nat, rs, rm, rxp) fr_info_t *fin; nat_t *nat; rpcb_session_t *rs; rpc_msg_t *rm; rpcb_xact_t **rxp; { rpcb_listp_t *rl; rpcb_entry_t *re; rpcb_xact_t *rx; u_32_t xdr, *p; rpc_resp_t *rr; int rv, cnt; p = (u_32_t *)rm->rm_msgbuf; bzero((char *)&rx, sizeof(rx)); rr = &rm->rm_resp; rm->rm_xid = p; xdr = B(p++); /* Record this message's XID. */ /* Lookup XID */ MUTEX_ENTER(&rs->rs_rxlock); if ((rx = ipf_p_rpcb_lookup(rs, xdr)) == NULL) { MUTEX_EXIT(&rs->rs_rxlock); return(-1); } ++rx->rx_ref; /* per thread reference */ MUTEX_EXIT(&rs->rs_rxlock); *rxp = rx; /* Test call vs reply */ if (B(p++) != RPCB_REPLY) return(-1); /* Test reply_stat */ switch(B(p++)) { case RPCB_MSG_DENIED: return(0); case RPCB_MSG_ACCEPTED: break; default: return(-1); } /* Bypass RPC authentication stuff. */ if (ipf_p_rpcb_skipauth(rm, &rr->rr_authverf, &p) != 0) return(-1); /* Test accept status */ if (!RPCB_BUF_GEQ(rm, p, 4)) return(-1); if (B(p++) != 0) return(0); /* Parse out the expected reply */ switch(rx->rx_type) { case RPCB_RES_PMAP: /* There must be only one 4 byte argument. */ if (!RPCB_BUF_EQ(rm, p, 4)) return(-1); rr->rr_v2 = p; xdr = B(rr->rr_v2); /* Reply w/ a 0 port indicates service isn't registered */ if (xdr == 0) return(0); /* Is the value sane? */ if (xdr > 65535) return(-1); /* Create NAT & state table entries. */ if (ipf_p_rpcb_getnat(fin, nat, rx->rx_proto, (u_int)xdr) != 0) return(-1); break; case RPCB_RES_STRING: /* Expecting a XDR string; need 4 bytes for length */ if (!RPCB_BUF_GEQ(rm, p, 4)) return(-1); rr->rr_v3.xu_str.xs_len = p++; rr->rr_v3.xu_str.xs_str = (char *)p; xdr = B(rr->rr_v3.xu_xslen); /* A null string indicates an unregistered service */ if ((xdr == 0) && RPCB_BUF_EQ(rm, p, 0)) return(0); /* Decode the target IP address / port. */ if (ipf_p_rpcb_getuaddr(rm, &rr->rr_v3, &p) != 0) return(-1); /* Validate the IP address and port contained. */ if (nat->nat_odstaddr != rr->rr_v3.xu_ip) return(-1); /* Create NAT & state table entries. */ if (ipf_p_rpcb_getnat(fin, nat, rx->rx_proto, (u_int)rr->rr_v3.xu_port) != 0) return(-1); break; case RPCB_RES_LIST: if (!RPCB_BUF_GEQ(rm, p, 4)) return(-1); /* rpcb_entry_list_ptr */ switch(B(p)) { case 0: return(0); /*NOTREACHED*/ break; case 1: break; default: return(-1); } rl = &rr->rr_v4; rl->rl_list = p++; cnt = 0; for(;;) { re = &rl->rl_entries[rl->rl_cnt]; if (ipf_p_rpcb_getuaddr(rm, &re->re_maddr, &p) != 0) return(-1); if (ipf_p_rpcb_getproto(rm, &re->re_netid, &p) != 0) return(-1); /* re_semantics & re_pfamily length */ if (!RPCB_BUF_GEQ(rm, p, 12)) return(-1); p++; /* Skipping re_semantics. */ xdr = B(p++); if ((xdr != 4) || strncmp((char *)p, "inet", 4)) return(-1); p++; if (ipf_p_rpcb_getproto(rm, &re->re_proto, &p) != 0) return(-1); if (!RPCB_BUF_GEQ(rm, p, 4)) return(-1); re->re_more = p; if (B(re->re_more) > 1) /* 0,1 only legal values */ return(-1); ++rl->rl_cnt; ++cnt; if (B(re->re_more) == 0) break; /* Replies in max out at 2; TCP and/or UDP */ if (cnt > 2) return(-1); p++; } for(rl->rl_cnt = 0; rl->rl_cnt < cnt; rl->rl_cnt++) { re = &rl->rl_entries[rl->rl_cnt]; rv = ipf_p_rpcb_getnat(fin, nat, re->re_proto.xp_proto, (u_int)re->re_maddr.xu_port); if (rv != 0) return(-1); } break; default: /*CONSTANTCONDITION*/ IPF_PANIC(1, ("illegal rx_type %d", rx->rx_type)); } return(1); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_lookup */ /* Returns: rpcb_xact_t * - NULL == no matching record, */ /* else pointer to relevant entry */ /* Parameters: rs(I) - pointer to RPCB session */ /* xid(I) - XID to look for */ /* -------------------------------------------------------------------- */ static rpcb_xact_t * ipf_p_rpcb_lookup(rs, xid) rpcb_session_t *rs; u_32_t xid; { rpcb_xact_t *rx; if (rs->rs_rxlist == NULL) return(NULL); for (rx = rs->rs_rxlist; rx != NULL; rx = rx->rx_next) if (rx->rx_xid == xid) break; return(rx); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_deref */ /* Returns: (void) */ /* Parameters: rs(I) - pointer to RPCB session */ /* rx(I) - pointer to RPC transaction struct to remove */ /* force(I) - indicates to delete entry regardless of */ /* reference count */ /* Locking: rs->rs_rxlock must be held write only */ /* */ /* Free the RPCB transaction record rx from the chain of entries. */ /* -------------------------------------------------------------------- */ static void ipf_p_rpcb_deref(rs, rx) rpcb_session_t *rs; rpcb_xact_t *rx; { rs = rs; /* LINT */ if (rx == NULL) return; if (--rx->rx_ref != 0) return; if (rx->rx_next != NULL) rx->rx_next->rx_pnext = rx->rx_pnext; *rx->rx_pnext = rx->rx_next; KFREE(rx); - --rpcbcnt; + --V_rpcbcnt; } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_getproto */ /* Returns: int - -1 == illegal protocol/netid, */ /* 0 == legal protocol/netid */ /* Parameters: rm(I) - pointer to RPC message structure */ /* xp(I) - pointer to netid structure */ /* p(IO) - pointer to location within packet buffer */ /* */ /* Decode netid/proto stored at p and record its numeric value. */ /* -------------------------------------------------------------------- */ static int ipf_p_rpcb_getproto(rm, xp, p) rpc_msg_t *rm; xdr_proto_t *xp; u_32_t **p; { u_int len; /* Must have 4 bytes for length & 4 bytes for "tcp" or "udp". */ if (!RPCB_BUF_GEQ(rm, p, 8)) return(-1); xp->xp_xslen = (*p)++; xp->xp_xsstr = (char *)*p; /* Test the string length. */ len = B(xp->xp_xslen); if (len != 3) return(-1); /* Test the actual string & record the protocol accordingly. */ if (!strncmp((char *)xp->xp_xsstr, "tcp\0", 4)) xp->xp_proto = IPPROTO_TCP; else if (!strncmp((char *)xp->xp_xsstr, "udp\0", 4)) xp->xp_proto = IPPROTO_UDP; else { return(-1); } /* Advance past the string. */ (*p)++; return(0); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_getnat */ /* Returns: int -- -1 == failed to create table entries, */ /* 0 == success */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT table entry */ /* proto(I) - transport protocol for new entries */ /* port(I) - new port to use w/ wildcard table entries */ /* */ /* Create state and NAT entries to handle an anticipated connection */ /* attempt between RPC client and server. */ /* -------------------------------------------------------------------- */ static int ipf_p_rpcb_getnat(fin, nat, proto, port) fr_info_t *fin; nat_t *nat; u_int proto; u_int port; { ipf_main_softc_t *softc = fin->fin_main_soft; ipnat_t *ipn, ipnat; tcphdr_t tcp; ipstate_t *is; fr_info_t fi; nat_t *natl; int nflags; ipn = nat->nat_ptr; /* Generate dummy fr_info */ bcopy((char *)fin, (char *)&fi, sizeof(fi)); fi.fin_out = 0; fi.fin_p = proto; fi.fin_sport = 0; fi.fin_dport = port & 0xffff; fi.fin_flx |= FI_IGNORE; fi.fin_saddr = nat->nat_osrcaddr; fi.fin_daddr = nat->nat_odstaddr; bzero((char *)&tcp, sizeof(tcp)); tcp.th_dport = htons(port); if (proto == IPPROTO_TCP) { tcp.th_win = htons(8192); TCP_OFF_A(&tcp, sizeof(tcphdr_t) >> 2); fi.fin_dlen = sizeof(tcphdr_t); tcp.th_flags = TH_SYN; nflags = NAT_TCP; } else { fi.fin_dlen = sizeof(udphdr_t); nflags = NAT_UDP; } nflags |= SI_W_SPORT|NAT_SEARCH; fi.fin_dp = &tcp; fi.fin_plen = fi.fin_hlen + fi.fin_dlen; /* * Search for existing NAT & state entries. Pay close attention to * mutexes / locks grabbed from lookup routines, as not doing so could * lead to bad things. * * If successful, fr_stlookup returns with ipf_state locked. We have * no use for this lock, so simply unlock it if necessary. */ is = ipf_state_lookup(&fi, &tcp, NULL); if (is != NULL) { RWLOCK_EXIT(&softc->ipf_state); } RWLOCK_EXIT(&softc->ipf_nat); WRITE_ENTER(&softc->ipf_nat); natl = ipf_nat_inlookup(&fi, nflags, proto, fi.fin_src, fi.fin_dst); if ((natl != NULL) && (is != NULL)) { MUTEX_DOWNGRADE(&softc->ipf_nat); return(0); } /* Slightly modify the following structures for actual use in creating * NAT and/or state entries. We're primarily concerned with stripping * flags that may be detrimental to the creation process or simply * shouldn't be associated with a table entry. */ fi.fin_fr = &rpcbfr; fi.fin_flx &= ~FI_IGNORE; nflags &= ~NAT_SEARCH; if (natl == NULL) { #ifdef USE_MUTEXES ipf_nat_softc_t *softn = softc->ipf_nat_soft; #endif /* XXX Since we're just copying the original ipn contents * back, would we be better off just sending a pointer to * the 'temp' copy off to nat_new instead? */ /* Generate template/bogus NAT rule. */ bcopy((char *)ipn, (char *)&ipnat, sizeof(ipnat)); ipn->in_flags = nflags & IPN_TCPUDP; ipn->in_apr = NULL; ipn->in_pr[0] = proto; ipn->in_pr[1] = proto; ipn->in_dpmin = fi.fin_dport; ipn->in_dpmax = fi.fin_dport; ipn->in_dpnext = fi.fin_dport; ipn->in_space = 1; ipn->in_ippip = 1; if (ipn->in_flags & IPN_FILTER) { ipn->in_scmp = 0; ipn->in_dcmp = 0; } ipn->in_plabel = -1; /* Create NAT entry. return NULL if this fails. */ MUTEX_ENTER(&softn->ipf_nat_new); natl = ipf_nat_add(&fi, ipn, NULL, nflags|SI_CLONE|NAT_SLAVE, NAT_INBOUND); MUTEX_EXIT(&softn->ipf_nat_new); bcopy((char *)&ipnat, (char *)ipn, sizeof(ipnat)); if (natl == NULL) { MUTEX_DOWNGRADE(&softc->ipf_nat); return(-1); } natl->nat_ptr = ipn; fi.fin_saddr = natl->nat_nsrcaddr; fi.fin_daddr = natl->nat_ndstaddr; ipn->in_use++; (void) ipf_nat_proto(&fi, natl, nflags); MUTEX_ENTER(&natl->nat_lock); ipf_nat_update(&fi, natl); MUTEX_EXIT(&natl->nat_lock); } MUTEX_DOWNGRADE(&softc->ipf_nat); if (is == NULL) { /* Create state entry. Return NULL if this fails. */ fi.fin_flx |= FI_NATED; fi.fin_flx &= ~FI_STATE; nflags &= NAT_TCPUDP; nflags |= SI_W_SPORT|SI_CLONE; if (ipf_state_add(softc, &fi, NULL, nflags) != 0) { /* * XXX nat_delete is private to ip_nat.c. Should * check w/ Darren about this one. * * nat_delete(natl, NL_EXPIRE); */ return(-1); } } return(0); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_modv3 */ /* Returns: int -- change in packet length */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT session */ /* rm(I) - pointer to RPC message structure */ /* m(I) - pointer to mbuf chain */ /* off(I) - offset within mbuf chain */ /* */ /* Write a new universal address string to this packet, adjusting */ /* lengths as necessary. */ /* -------------------------------------------------------------------- */ static int ipf_p_rpcb_modv3(fin, nat, rm, m, off) fr_info_t *fin; nat_t *nat; rpc_msg_t *rm; mb_t *m; u_int off; { u_int len, xlen, pos, bogo; rpc_resp_t *rr; char uaddr[24]; char *i, *p; int diff; rr = &rm->rm_resp; i = (char *)&nat->nat_ndstaddr; p = (char *)&rr->rr_v3.xu_port; /* Form new string. */ bzero(uaddr, sizeof(uaddr)); /* Just in case we need padding. */ #if defined(SNPRINTF) && defined(_KERNEL) SNPRINTF(uaddr, sizeof(uaddr), #else (void) sprintf(uaddr, #endif "%u.%u.%u.%u.%u.%u", i[0] & 0xff, i[1] & 0xff, i[2] & 0xff, i[3] & 0xff, p[0] & 0xff, p[1] & 0xff); len = strlen(uaddr); xlen = XDRALIGN(len); /* Determine mbuf offset to write to. */ pos = (char *)rr->rr_v3.xu_xslen - rm->rm_msgbuf; off += pos; /* Write new string length. */ bogo = htonl(len); COPYBACK(m, off, 4, (caddr_t)&bogo); off += 4; /* Write new string. */ COPYBACK(m, off, xlen, uaddr); /* Determine difference in data lengths. */ diff = xlen - XDRALIGN(B(rr->rr_v3.xu_xslen)); /* * If our new string has a different length, make necessary * adjustments. */ if (diff != 0) ipf_p_rpcb_fixlen(fin, diff); return(diff); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_modv4 */ /* Returns: int -- change in packet length */ /* Parameters: fin(I) - pointer to packet information */ /* nat(I) - pointer to NAT session */ /* rm(I) - pointer to RPC message structure */ /* m(I) - pointer to mbuf chain */ /* off(I) - offset within mbuf chain */ /* */ /* Write new rpcb_entry list, adjusting lengths as necessary. */ /* -------------------------------------------------------------------- */ static int ipf_p_rpcb_modv4(fin, nat, rm, m, off) fr_info_t *fin; nat_t *nat; rpc_msg_t *rm; mb_t *m; u_int off; { u_int len, xlen, pos, bogo; rpcb_listp_t *rl; rpcb_entry_t *re; rpc_resp_t *rr; char uaddr[24]; int diff, cnt; char *i, *p; diff = 0; rr = &rm->rm_resp; rl = &rr->rr_v4; i = (char *)&nat->nat_ndstaddr; /* Determine mbuf offset to write to. */ re = &rl->rl_entries[0]; pos = (char *)re->re_maddr.xu_xslen - rm->rm_msgbuf; off += pos; for (cnt = 0; cnt < rl->rl_cnt; cnt++) { re = &rl->rl_entries[cnt]; p = (char *)&re->re_maddr.xu_port; /* Form new string. */ bzero(uaddr, sizeof(uaddr)); /* Just in case we need padding. */ #if defined(SNPRINTF) && defined(_KERNEL) SNPRINTF(uaddr, sizeof(uaddr), #else (void) sprintf(uaddr, #endif "%u.%u.%u.%u.%u.%u", i[0] & 0xff, i[1] & 0xff, i[2] & 0xff, i[3] & 0xff, p[0] & 0xff, p[1] & 0xff); len = strlen(uaddr); xlen = XDRALIGN(len); /* Write new string length. */ bogo = htonl(len); COPYBACK(m, off, 4, (caddr_t)&bogo); off += 4; /* Write new string. */ COPYBACK(m, off, xlen, uaddr); off += xlen; /* Record any change in length. */ diff += xlen - XDRALIGN(B(re->re_maddr.xu_xslen)); /* If the length changed, copy back the rest of this entry. */ len = ((char *)re->re_more + 4) - (char *)re->re_netid.xp_xslen; if (diff != 0) { COPYBACK(m, off, len, (caddr_t)re->re_netid.xp_xslen); } off += len; } /* * If our new string has a different length, make necessary * adjustments. */ if (diff != 0) ipf_p_rpcb_fixlen(fin, diff); return(diff); } /* -------------------------------------------------------------------- */ /* Function: ipf_p_rpcb_fixlen */ /* Returns: (void) */ /* Parameters: fin(I) - pointer to packet information */ /* len(I) - change in packet length */ /* */ /* Adjust various packet related lengths held in structure and packet */ /* header fields. */ /* -------------------------------------------------------------------- */ static void ipf_p_rpcb_fixlen(fin, len) fr_info_t *fin; int len; { udphdr_t *udp; udp = fin->fin_dp; udp->uh_ulen = htons(ntohs(udp->uh_ulen) + len); fin->fin_plen += len; fin->fin_ip->ip_len = htons(fin->fin_plen); fin->fin_dlen += len; } #undef B Index: projects/vnet/sys/contrib/ipfilter/netinet/ip_rules.c =================================================================== --- projects/vnet/sys/contrib/ipfilter/netinet/ip_rules.c (revision 302198) +++ projects/vnet/sys/contrib/ipfilter/netinet/ip_rules.c (revision 302199) @@ -1,264 +1,265 @@ /* $FreeBSD$ */ /* * Copyright (C) 2012 by Darren Reed. * * Redistribution and use in source and binary forms are permitted * provided that this notice is preserved and due credit is given * to the original author and the contributors. */ #include #include #include #include #if defined(__FreeBSD_version) && (__FreeBSD_version >= 40000) # if defined(_KERNEL) # include # else # include # endif #endif #if defined(__NetBSD_Version__) && (__NetBSD_Version__ >= 399000000) #else # if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__sgi) # include # endif #endif #include #include #if !defined(__SVR4) && !defined(__svr4__) && !defined(__hpux) # include #endif #if defined(__FreeBSD__) && (__FreeBSD_version > 220000) # include #else # include #endif /* FreeBSD */ #include #include #include #include #include #include "netinet/ip_compat.h" #include "netinet/ip_fil.h" #include "netinet/ip_rules.h" #ifndef _KERNEL # include #endif /* _KERNEL */ #ifdef IPFILTER_COMPILED -extern ipf_main_softc_t ipfmain; +VNET_DECLARE(ipf_main_softc_t, ipfmain); +#define V_ipfmain VNET(ipfmain) static u_long in_rule__0[] = { 0, 0, 0, 0, 0, 0, 0, 0x8070d88, 0, 0, 0, 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0xffffffff, 0x1b0, 0x1, 0, 0, 0, 0x2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x40000000, 0x8002, 0, 0, 0, 0xffff, 0, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0, 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0, 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffffffff, 0, 0, 0, 0 }; static u_long out_rule__0[] = { 0, 0, 0, 0, 0, 0, 0, 0x8070d88, 0, 0, 0, 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0xffffffff, 0x1b0, 0x1, 0, 0, 0, 0x3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x40000000, 0x4002, 0, 0, 0, 0xffff, 0, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0, 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0, 0xffffffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffffffff, 0, 0, 0, 0 }; frentry_t *ipf_rules_in_[1] = { (frentry_t *)&in_rule__0 }; /* XXX This file (ip_rules.c) is not part of the ipfilter tarball, it is XXX generated by the ipfilter build process. Unfortunately the build XXX process did not generate the following lines so they are added XXX by hand here. This is a bit of a hack but it works for now. Future XXX imports/merges of ipfilter may generate this so the following will XXX need to be removed following some future merge. XXX */ frentry_t *ipf_rules_out_[1] = { (frentry_t *)&out_rule__0 }; frentry_t *ipfrule_match_in_(fin, passp) fr_info_t *fin; u_32_t *passp; { frentry_t *fr = NULL; fr = (frentry_t *)&in_rule__0; return fr; } frentry_t *ipfrule_match_out_(fin, passp) fr_info_t *fin; u_32_t *passp; { frentry_t *fr = NULL; fr = (frentry_t *)&out_rule__0; return fr; } static frentry_t ipfrule_out_; int ipfrule_add_out_() { int i, j, err = 0, max; frentry_t *fp; max = sizeof(ipf_rules_out_)/sizeof(frentry_t *); for (i = 0; i < max; i++) { fp = ipf_rules_out_[i]; fp->fr_next = NULL; for (j = i + 1; j < max; j++) if (strncmp(fp->fr_names + fp->fr_group, ipf_rules_out_[j]->fr_names + ipf_rules_out_[j]->fr_group, FR_GROUPLEN) == 0) { if (ipf_rules_out_[j] != NULL) ipf_rules_out_[j]->fr_pnext = &fp->fr_next; fp->fr_pnext = &ipf_rules_out_[j]; fp->fr_next = ipf_rules_out_[j]; break; } } fp = &ipfrule_out_; bzero((char *)fp, sizeof(*fp)); fp->fr_type = FR_T_CALLFUNC_BUILTIN; fp->fr_flags = FR_OUTQUE|FR_NOMATCH; fp->fr_data = (void *)ipf_rules_out_[0]; fp->fr_dsize = sizeof(ipf_rules_out_[0]); fp->fr_family = AF_INET; fp->fr_func = (ipfunc_t)ipfrule_match_out_; - err = frrequest(&ipfmain, IPL_LOGIPF, SIOCADDFR, (caddr_t)fp, - ipfmain.ipf_active, 0); + err = frrequest(&V_ipfmain, IPL_LOGIPF, SIOCADDFR, (caddr_t)fp, + V_ipfmain.ipf_active, 0); return err; } int ipfrule_remove_out_() { int err = 0, i; frentry_t *fp; /* * Try to remove the outbound rule. */ if (ipfrule_out_.fr_ref > 0) { err = EBUSY; } else { i = sizeof(ipf_rules_out_)/sizeof(frentry_t *) - 1; for (; i >= 0; i--) { fp = ipf_rules_out_[i]; if (fp->fr_ref > 1) { err = EBUSY; break; } } } if (err == 0) - err = frrequest(&ipfmain, IPL_LOGIPF, SIOCDELFR, + err = frrequest(&V_ipfmain, IPL_LOGIPF, SIOCDELFR, (caddr_t)&ipfrule_out_, - ipfmain.ipf_active, 0); + V_ipfmain.ipf_active, 0); if (err) return err; return err; } static frentry_t ipfrule_in_; int ipfrule_add_in_() { int i, j, err = 0, max; frentry_t *fp; max = sizeof(ipf_rules_in_)/sizeof(frentry_t *); for (i = 0; i < max; i++) { fp = ipf_rules_in_[i]; fp->fr_next = NULL; for (j = i + 1; j < max; j++) if (strncmp(fp->fr_names + fp->fr_group, ipf_rules_in_[j]->fr_names + ipf_rules_in_[j]->fr_group, FR_GROUPLEN) == 0) { if (ipf_rules_in_[j] != NULL) ipf_rules_in_[j]->fr_pnext = &fp->fr_next; fp->fr_pnext = &ipf_rules_in_[j]; fp->fr_next = ipf_rules_in_[j]; break; } } fp = &ipfrule_in_; bzero((char *)fp, sizeof(*fp)); fp->fr_type = FR_T_CALLFUNC_BUILTIN; fp->fr_flags = FR_INQUE|FR_NOMATCH; fp->fr_data = (void *)ipf_rules_in_[0]; fp->fr_dsize = sizeof(ipf_rules_in_[0]); fp->fr_family = AF_INET; fp->fr_func = (ipfunc_t)ipfrule_match_in_; - err = frrequest(&ipfmain, IPL_LOGIPF, SIOCADDFR, (caddr_t)fp, - ipfmain.ipf_active, 0); + err = frrequest(&V_ipfmain, IPL_LOGIPF, SIOCADDFR, (caddr_t)fp, + V_ipfmain.ipf_active, 0); return err; } int ipfrule_remove_in_() { int err = 0, i; frentry_t *fp; /* * Try to remove the inbound rule. */ if (ipfrule_in_.fr_ref > 0) { err = EBUSY; } else { i = sizeof(ipf_rules_in_)/sizeof(frentry_t *) - 1; for (; i >= 0; i--) { fp = ipf_rules_in_[i]; if (fp->fr_ref > 1) { err = EBUSY; break; } } } if (err == 0) - err = frrequest(&ipfmain, IPL_LOGIPF, SIOCDELFR, + err = frrequest(&V_ipfmain, IPL_LOGIPF, SIOCDELFR, (caddr_t)&ipfrule_in_, - ipfmain.ipf_active, 0); + V_ipfmain.ipf_active, 0); if (err) return err; return err; } int ipfrule_add() { int err; err = ipfrule_add_out_(); if (err != 0) return err; err = ipfrule_add_in_(); if (err != 0) return err; return 0; } int ipfrule_remove() { int err; err = ipfrule_remove_out_(); if (err != 0) return err; err = ipfrule_remove_in_(); if (err != 0) return err; return 0; } #endif /* IPFILTER_COMPILED */ Index: projects/vnet/sys/contrib/ipfilter/netinet/mlfk_ipl.c =================================================================== --- projects/vnet/sys/contrib/ipfilter/netinet/mlfk_ipl.c (revision 302198) +++ projects/vnet/sys/contrib/ipfilter/netinet/mlfk_ipl.c (revision 302199) @@ -1,568 +1,611 @@ /* $FreeBSD$ */ /* * Copyright (C) 2012 by Darren Reed. * * $FreeBSD$ * See the IPFILTER.LICENCE file for details on licencing. */ #include #include #include #include #include #include #include #include #if __FreeBSD_version >= 500000 # include +# include #endif #include #include #include #include "netinet/ipl.h" #include "netinet/ip_compat.h" #include "netinet/ip_fil.h" #include "netinet/ip_state.h" #include "netinet/ip_nat.h" #include "netinet/ip_auth.h" #include "netinet/ip_frag.h" #include "netinet/ip_sync.h" -extern ipf_main_softc_t ipfmain; +VNET_DECLARE(ipf_main_softc_t, ipfmain); +#define V_ipfmain VNET(ipfmain) #if __FreeBSD_version >= 502116 static struct cdev *ipf_devs[IPL_LOGSIZE]; #else static dev_t ipf_devs[IPL_LOGSIZE]; #endif static int sysctl_ipf_int ( SYSCTL_HANDLER_ARGS ); static int ipf_modload(void); static int ipf_modunload(void); static int ipf_fbsd_sysctl_create(ipf_main_softc_t*); static int ipf_fbsd_sysctl_destroy(ipf_main_softc_t*); #if (__FreeBSD_version >= 500024) # if (__FreeBSD_version >= 502116) static int ipfopen __P((struct cdev*, int, int, struct thread *)); static int ipfclose __P((struct cdev*, int, int, struct thread *)); # else static int ipfopen __P((dev_t, int, int, struct thread *)); static int ipfclose __P((dev_t, int, int, struct thread *)); # endif /* __FreeBSD_version >= 502116 */ #else static int ipfopen __P((dev_t, int, int, struct proc *)); static int ipfclose __P((dev_t, int, int, struct proc *)); #endif #if (__FreeBSD_version >= 502116) static int ipfread __P((struct cdev*, struct uio *, int)); static int ipfwrite __P((struct cdev*, struct uio *, int)); #else static int ipfread __P((dev_t, struct uio *, int)); static int ipfwrite __P((dev_t, struct uio *, int)); #endif /* __FreeBSD_version >= 502116 */ SYSCTL_DECL(_net_inet); #define SYSCTL_IPF(parent, nbr, name, access, ptr, val, descr) \ - SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|access, \ - ptr, val, sysctl_ipf_int, "I", descr); + SYSCTL_OID(parent, nbr, name, CTLTYPE_INT|CTLFLAG_VNET|access, \ + ptr, val, sysctl_ipf_int, "I", descr) #define SYSCTL_DYN_IPF(parent, nbr, name, access,ptr, val, descr) \ - SYSCTL_ADD_OID(&ipf_clist, SYSCTL_STATIC_CHILDREN(parent), nbr, name, \ - CTLFLAG_DYN|CTLTYPE_INT|access, ptr, val, sysctl_ipf_int, "I", descr) -static struct sysctl_ctx_list ipf_clist; + SYSCTL_ADD_OID(&V_ipf_clist, SYSCTL_STATIC_CHILDREN(parent), nbr, name, \ + CTLFLAG_DYN|CTLTYPE_INT|CTLFLAG_VNET|access, ptr, val, sysctl_ipf_int, "I", descr) +static VNET_DEFINE(struct sysctl_ctx_list, ipf_clist); +#define V_ipf_clist VNET(ipf_clist) #define CTLFLAG_OFF 0x00800000 /* IPFilter must be disabled */ #define CTLFLAG_RWO (CTLFLAG_RW|CTLFLAG_OFF) SYSCTL_NODE(_net_inet, OID_AUTO, ipf, CTLFLAG_RW, 0, "IPF"); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &ipfmain.ipf_flags, 0, "IPF flags"); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_pass, CTLFLAG_RW, &ipfmain.ipf_pass, 0, "default pass/block"); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &ipfmain.ipf_active, 0, "IPF is active"); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_flags, CTLFLAG_RW, &VNET_NAME(ipfmain.ipf_flags), 0, "IPF flags"); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, ipf_pass, CTLFLAG_RW, &VNET_NAME(ipfmain.ipf_pass), 0, "default pass/block"); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_active, CTLFLAG_RD, &VNET_NAME(ipfmain.ipf_active), 0, "IPF is active"); SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpidletimeout, CTLFLAG_RWO, - &ipfmain.ipf_tcpidletimeout, 0, "TCP idle timeout in seconds"); + &VNET_NAME(ipfmain.ipf_tcpidletimeout), 0, "TCP idle timeout in seconds"); SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcphalfclosed, CTLFLAG_RWO, - &ipfmain.ipf_tcphalfclosed, 0, "timeout for half closed TCP sessions"); + &VNET_NAME(ipfmain.ipf_tcphalfclosed), 0, "timeout for half closed TCP sessions"); SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosewait, CTLFLAG_RWO, - &ipfmain.ipf_tcpclosewait, 0, "timeout for TCP sessions in closewait status"); + &VNET_NAME(ipfmain.ipf_tcpclosewait), 0, "timeout for TCP sessions in closewait status"); SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcplastack, CTLFLAG_RWO, - &ipfmain.ipf_tcplastack, 0, "timeout for TCP sessions in last ack status"); + &VNET_NAME(ipfmain.ipf_tcplastack), 0, "timeout for TCP sessions in last ack status"); SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcptimeout, CTLFLAG_RWO, - &ipfmain.ipf_tcptimeout, 0, ""); + &VNET_NAME(ipfmain.ipf_tcptimeout), 0, ""); SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_tcpclosed, CTLFLAG_RWO, - &ipfmain.ipf_tcpclosed, 0, ""); + &VNET_NAME(ipfmain.ipf_tcpclosed), 0, ""); SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udptimeout, CTLFLAG_RWO, - &ipfmain.ipf_udptimeout, 0, "UDP timeout"); + &VNET_NAME(ipfmain.ipf_udptimeout), 0, "UDP timeout"); SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_udpacktimeout, CTLFLAG_RWO, - &ipfmain.ipf_udpacktimeout, 0, ""); + &VNET_NAME(ipfmain.ipf_udpacktimeout), 0, ""); SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_icmptimeout, CTLFLAG_RWO, - &ipfmain.ipf_icmptimeout, 0, "ICMP timeout"); + &VNET_NAME(ipfmain.ipf_icmptimeout), 0, "ICMP timeout"); SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_running, CTLFLAG_RD, - &ipfmain.ipf_running, 0, "IPF is running"); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_chksrc, CTLFLAG_RW, &ipfmain.ipf_chksrc, 0, ""); -SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_minttl, CTLFLAG_RW, &ipfmain.ipf_minttl, 0, ""); + &VNET_NAME(ipfmain.ipf_running), 0, "IPF is running"); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_chksrc, CTLFLAG_RW, &VNET_NAME(ipfmain.ipf_chksrc), 0, ""); +SYSCTL_IPF(_net_inet_ipf, OID_AUTO, fr_minttl, CTLFLAG_RW, &VNET_NAME(ipfmain.ipf_minttl), 0, ""); #define CDEV_MAJOR 79 #include #if __FreeBSD_version >= 500043 # include static int ipfpoll(struct cdev *dev, int events, struct thread *td); static struct cdevsw ipf_cdevsw = { #if __FreeBSD_version >= 502103 .d_version = D_VERSION, .d_flags = 0, /* D_NEEDGIANT - Should be SMP safe */ #endif .d_open = ipfopen, .d_close = ipfclose, .d_read = ipfread, .d_write = ipfwrite, .d_ioctl = ipfioctl, .d_poll = ipfpoll, .d_name = "ipf", #if __FreeBSD_version < 600000 .d_maj = CDEV_MAJOR, #endif }; #else static int ipfpoll(dev_t dev, int events, struct proc *td); static struct cdevsw ipf_cdevsw = { /* open */ ipfopen, /* close */ ipfclose, /* read */ ipfread, /* write */ ipfwrite, /* ioctl */ ipfioctl, /* poll */ ipfpoll, /* mmap */ nommap, /* strategy */ nostrategy, /* name */ "ipf", /* maj */ CDEV_MAJOR, /* dump */ nodump, /* psize */ nopsize, /* flags */ 0, # if (__FreeBSD_version < 500043) /* bmaj */ -1, # endif # if (__FreeBSD_version >= 430000) /* kqfilter */ NULL # endif }; #endif static char *ipf_devfiles[] = { IPL_NAME, IPNAT_NAME, IPSTATE_NAME, IPAUTH_NAME, IPSYNC_NAME, IPSCAN_NAME, IPLOOKUP_NAME, NULL }; static int ipfilter_modevent(module_t mod, int type, void *unused) { int error = 0; switch (type) { case MOD_LOAD : error = ipf_modload(); break; case MOD_UNLOAD : error = ipf_modunload(); break; default: error = EINVAL; break; } return error; } +static void +vnet_ipf_init(void) +{ + char *defpass; + int error; + + if (ipf_create_all(&V_ipfmain) == NULL) + return; + + if (ipf_fbsd_sysctl_create(&V_ipfmain) != 0) { + ipf_destroy_all(&V_ipfmain); + return; + } + + error = ipfattach(&V_ipfmain); + if (error) { + (void)ipf_fbsd_sysctl_destroy(&V_ipfmain); + ipf_destroy_all(&V_ipfmain); + return; + } + + if (FR_ISPASS(V_ipfmain.ipf_pass)) + defpass = "pass"; + else if (FR_ISBLOCK(V_ipfmain.ipf_pass)) + defpass = "block"; + else + defpass = "no-match -> block"; + + if (IS_DEFAULT_VNET(curvnet)) + printf("%s initialized. Default = %s all, Logging = %s%s\n", + ipfilter_version, defpass, +#ifdef IPFILTER_LOG + "enabled", +#else + "disabled", +#endif +#ifdef IPFILTER_COMPILED + " (COMPILED)" +#else + "" +#endif + ); +} +VNET_SYSINIT(vnet_ipf_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, + vnet_ipf_init, NULL); + static int ipf_modload() { - char *defpass, *c, *str; + char *c, *str; int i, j, error; if (ipf_load_all() != 0) return EIO; - if (ipf_create_all(&ipfmain) == NULL) - return EIO; - - if (ipf_fbsd_sysctl_create(&ipfmain) != 0) - return EIO; - - error = ipfattach(&ipfmain); - if (error) - return error; - for (i = 0; i < IPL_LOGSIZE; i++) ipf_devs[i] = NULL; - for (i = 0; (str = ipf_devfiles[i]); i++) { c = NULL; for(j = strlen(str); j > 0; j--) if (str[j] == '/') { c = str + j + 1; break; } if (!c) c = str; ipf_devs[i] = make_dev(&ipf_cdevsw, i, 0, 0, 0600, "%s", c); } error = ipf_pfil_hook(); if (error != 0) return error; ipf_event_reg(); - if (FR_ISPASS(ipfmain.ipf_pass)) - defpass = "pass"; - else if (FR_ISBLOCK(ipfmain.ipf_pass)) - defpass = "block"; - else - defpass = "no-match -> block"; - - printf("%s initialized. Default = %s all, Logging = %s%s\n", - ipfilter_version, defpass, -#ifdef IPFILTER_LOG - "enabled", -#else - "disabled", -#endif -#ifdef IPFILTER_COMPILED - " (COMPILED)" -#else - "" -#endif - ); return 0; } +static void +vnet_ipf_uninit(void) +{ + if (V_ipfmain.ipf_refcnt) + return; + + if (ipf_fbsd_sysctl_destroy(&V_ipfmain) != 0) + return; + + if (V_ipfmain.ipf_running >= 0) { + if (ipfdetach(&V_ipfmain) != 0) + return; + + ipf_fbsd_sysctl_destroy(&V_ipfmain); + ipf_destroy_all(&V_ipfmain); + } + + V_ipfmain.ipf_running = -2; +} +VNET_SYSUNINIT(vnet_ipf_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_THIRD, + vnet_ipf_uninit, NULL); + static int ipf_modunload() { int error, i; - if (ipfmain.ipf_refcnt) - return EBUSY; + ipf_event_dereg(); - if (ipf_fbsd_sysctl_destroy(&ipfmain) != 0) - return EIO; - error = ipf_pfil_unhook(); if (error != 0) return error; - if (ipfmain.ipf_running >= 0) { - error = ipfdetach(&ipfmain); - if (error != 0) - return error; - - ipf_fbsd_sysctl_destroy(&ipfmain); - ipf_destroy_all(&ipfmain); - ipf_unload_all(); - } else - error = 0; - - ipfmain.ipf_running = -2; - for (i = 0; ipf_devfiles[i]; i++) { if (ipf_devs[i] != NULL) destroy_dev(ipf_devs[i]); } + ipf_unload_all(); + printf("%s unloaded\n", ipfilter_version); return error; } static moduledata_t ipfiltermod = { "ipfilter", ipfilter_modevent, 0 }; -DECLARE_MODULE(ipfilter, ipfiltermod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); +DECLARE_MODULE(ipfilter, ipfiltermod, SI_SUB_PROTO_FIREWALL, SI_ORDER_SECOND); #ifdef MODULE_VERSION MODULE_VERSION(ipfilter, 1); #endif #ifdef SYSCTL_IPF int sysctl_ipf_int ( SYSCTL_HANDLER_ARGS ) { int error = 0; if (arg1) error = SYSCTL_OUT(req, arg1, sizeof(int)); else error = SYSCTL_OUT(req, &arg2, sizeof(int)); if (error || !req->newptr) return (error); if (!arg1) error = EPERM; else { - if ((oidp->oid_kind & CTLFLAG_OFF) && (ipfmain.ipf_running > 0)) + if ((oidp->oid_kind & CTLFLAG_OFF) && (V_ipfmain.ipf_running > 0)) error = EBUSY; else error = SYSCTL_IN(req, arg1, sizeof(int)); } return (error); } #endif static int #if __FreeBSD_version >= 500043 ipfpoll(struct cdev *dev, int events, struct thread *td) #else ipfpoll(dev_t dev, int events, struct proc *td) #endif { int unit = GET_MINOR(dev); int revents; if (unit < 0 || unit > IPL_LOGMAX) return 0; revents = 0; + CURVNET_SET(TD_TO_VNET(td)); switch (unit) { case IPL_LOGIPF : case IPL_LOGNAT : case IPL_LOGSTATE : #ifdef IPFILTER_LOG - if ((events & (POLLIN | POLLRDNORM)) && ipf_log_canread(&ipfmain, unit)) + if ((events & (POLLIN | POLLRDNORM)) && ipf_log_canread(&V_ipfmain, unit)) revents |= events & (POLLIN | POLLRDNORM); #endif break; case IPL_LOGAUTH : - if ((events & (POLLIN | POLLRDNORM)) && ipf_auth_waiting(&ipfmain)) + if ((events & (POLLIN | POLLRDNORM)) && ipf_auth_waiting(&V_ipfmain)) revents |= events & (POLLIN | POLLRDNORM); break; case IPL_LOGSYNC : - if ((events & (POLLIN | POLLRDNORM)) && ipf_sync_canread(&ipfmain)) + if ((events & (POLLIN | POLLRDNORM)) && ipf_sync_canread(&V_ipfmain)) revents |= events & (POLLIN | POLLRDNORM); - if ((events & (POLLOUT | POLLWRNORM)) && ipf_sync_canwrite(&ipfmain)) + if ((events & (POLLOUT | POLLWRNORM)) && ipf_sync_canwrite(&V_ipfmain)) revents |= events & (POLLOUT | POLLWRNORM); break; case IPL_LOGSCAN : case IPL_LOGLOOKUP : default : break; } if ((revents == 0) && ((events & (POLLIN|POLLRDNORM)) != 0)) - selrecord(td, &ipfmain.ipf_selwait[unit]); + selrecord(td, &V_ipfmain.ipf_selwait[unit]); + CURVNET_RESTORE(); return revents; } /* * routines below for saving IP headers to buffer */ static int ipfopen(dev, flags #if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) , devtype, p) int devtype; # if (__FreeBSD_version >= 500024) struct thread *p; # else struct proc *p; # endif /* __FreeBSD_version >= 500024 */ #else ) #endif #if (__FreeBSD_version >= 502116) struct cdev *dev; #else dev_t dev; #endif int flags; { int unit = GET_MINOR(dev); int error; if (IPL_LOGMAX < unit) error = ENXIO; else { switch (unit) { case IPL_LOGIPF : case IPL_LOGNAT : case IPL_LOGSTATE : case IPL_LOGAUTH : case IPL_LOGLOOKUP : case IPL_LOGSYNC : #ifdef IPFILTER_SCAN case IPL_LOGSCAN : #endif error = 0; break; default : error = ENXIO; break; } } return error; } static int ipfclose(dev, flags #if ((BSD >= 199506) || (__FreeBSD_version >= 220000)) , devtype, p) int devtype; # if (__FreeBSD_version >= 500024) struct thread *p; # else struct proc *p; # endif /* __FreeBSD_version >= 500024 */ #else ) #endif #if (__FreeBSD_version >= 502116) struct cdev *dev; #else dev_t dev; #endif int flags; { int unit = GET_MINOR(dev); if (IPL_LOGMAX < unit) unit = ENXIO; else unit = 0; return unit; } /* * ipfread/ipflog * both of these must operate with at least splnet() lest they be * called during packet processing and cause an inconsistancy to appear in * the filter lists. */ #if (BSD >= 199306) static int ipfread(dev, uio, ioflag) int ioflag; #else static int ipfread(dev, uio) #endif #if (__FreeBSD_version >= 502116) struct cdev *dev; #else dev_t dev; #endif struct uio *uio; { + int error; int unit = GET_MINOR(dev); if (unit < 0) return ENXIO; - if (ipfmain.ipf_running < 1) + CURVNET_SET(CRED_TO_VNET(dev->si_cred)); + if (V_ipfmain.ipf_running < 1) { + CURVNET_RESTORE(); return EIO; + } - if (unit == IPL_LOGSYNC) - return ipf_sync_read(&ipfmain, uio); + if (unit == IPL_LOGSYNC) { + error = ipf_sync_read(&V_ipfmain, uio); + CURVNET_RESTORE(); + return error; + } #ifdef IPFILTER_LOG - return ipf_log_read(&ipfmain, unit, uio); + error = ipf_log_read(&V_ipfmain, unit, uio); #else - return ENXIO; + error = ENXIO; #endif + CURVNET_RESTORE(); + return error; } /* * ipfwrite * both of these must operate with at least splnet() lest they be * called during packet processing and cause an inconsistancy to appear in * the filter lists. */ #if (BSD >= 199306) static int ipfwrite(dev, uio, ioflag) int ioflag; #else static int ipfwrite(dev, uio) #endif #if (__FreeBSD_version >= 502116) struct cdev *dev; #else dev_t dev; #endif struct uio *uio; { + int error; - if (ipfmain.ipf_running < 1) + CURVNET_SET(CRED_TO_VNET(dev->si_cred)); + if (V_ipfmain.ipf_running < 1) { + CURVNET_RESTORE(); return EIO; + } - if (GET_MINOR(dev) == IPL_LOGSYNC) - return ipf_sync_write(&ipfmain, uio); + if (GET_MINOR(dev) == IPL_LOGSYNC) { + error = ipf_sync_write(&V_ipfmain, uio); + CURVNET_RESTORE(); + return error; + } return ENXIO; } static int ipf_fbsd_sysctl_create(main_softc) ipf_main_softc_t *main_softc; { ipf_nat_softc_t *nat_softc; ipf_state_softc_t *state_softc; ipf_auth_softc_t *auth_softc; ipf_frag_softc_t *frag_softc; nat_softc = main_softc->ipf_nat_soft; state_softc = main_softc->ipf_state_soft; auth_softc = main_softc->ipf_auth_soft; frag_softc = main_softc->ipf_frag_soft; - sysctl_ctx_init(&ipf_clist); + sysctl_ctx_init(&V_ipf_clist); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_defnatage", CTLFLAG_RWO, &nat_softc->ipf_nat_defage, 0, ""); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_statesize", CTLFLAG_RWO, &state_softc->ipf_state_size, 0, ""); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_statemax", CTLFLAG_RWO, &state_softc->ipf_state_max, 0, ""); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "ipf_nattable_max", CTLFLAG_RWO, &nat_softc->ipf_nat_table_max, 0, ""); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "ipf_nattable_sz", CTLFLAG_RWO, &nat_softc->ipf_nat_table_sz, 0, ""); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "ipf_natrules_sz", CTLFLAG_RWO, &nat_softc->ipf_nat_maprules_sz, 0, ""); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "ipf_rdrrules_sz", CTLFLAG_RWO, &nat_softc->ipf_nat_rdrrules_sz, 0, ""); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "ipf_hostmap_sz", CTLFLAG_RWO, &nat_softc->ipf_nat_hostmap_sz, 0, ""); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_authsize", CTLFLAG_RWO, &auth_softc->ipf_auth_size, 0, ""); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_authused", CTLFLAG_RD, &auth_softc->ipf_auth_used, 0, ""); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_defaultauthage", CTLFLAG_RW, &auth_softc->ipf_auth_defaultage, 0, ""); SYSCTL_DYN_IPF(_net_inet_ipf, OID_AUTO, "fr_ipfrttl", CTLFLAG_RW, &frag_softc->ipfr_ttl, 0, ""); return 0; } static int ipf_fbsd_sysctl_destroy(main_softc) ipf_main_softc_t *main_softc; { - if (sysctl_ctx_free(&ipf_clist)) { + if (sysctl_ctx_free(&V_ipf_clist)) { printf("sysctl_ctx_free failed"); return(ENOTEMPTY); } return 0; }