Index: sys/netinet/in.h =================================================================== --- sys/netinet/in.h +++ sys/netinet/in.h @@ -637,6 +637,8 @@ #define IPCTL_GIF_TTL 16 /* default TTL for gif encap packet */ #define IPCTL_INTRDQMAXLEN 17 /* max length of direct netisr queue */ #define IPCTL_INTRDQDROPS 18 /* number of direct netisr q drops */ +#define IPCTL_ICMPERRLEN 19 /* length of IP packet to be copied + back by ICMP error message" */ #endif /* __BSD_VISIBLE */ Index: sys/netinet/ip_input.c =================================================================== --- sys/netinet/ip_input.c +++ sys/netinet/ip_input.c @@ -118,6 +118,36 @@ &VNET_NAME(ipsendredirects), 0, "Enable sending IP redirects"); +/* + * Allow configuring length of data copied back + * inside ICMP error packets. Minimum us 8, maximum value + * is determined by mbuf structure size. + */ +#define RFC_ICMP_ERROR_MIN_LEN 8 +#define ICMP_ERROR_MAX_LEN (MHLEN) +VNET_DEFINE(int, icmperrlen) = RFC_ICMP_ERROR_MIN_LEN; +static int +sysctl_icmperrlen_handle(SYSCTL_HANDLER_ARGS) +{ + int rc, val; + + val = VNET(icmperrlen); + rc = sysctl_handle_int(oidp, &val, 0, req); + if (rc != 0 || req->newptr == NULL) + return (rc); + + if ((val < RFC_ICMP_ERROR_MIN_LEN) || + (val > ICMP_ERROR_MAX_LEN)) + return (EINVAL); + + VNET(icmperrlen) = val; + + return (0); +} +SYSCTL_PROC(_net_inet_ip, IPCTL_ICMPERRLEN, icmperrlen, CTLTYPE_INT | CTLFLAG_RWTUN | + CTLFLAG_MPSAFE, NULL, 0, sysctl_icmperrlen_handle, "I", + "Length of IP packet to be reported back by ICMP error message"); + /* * XXX - Setting ip_checkinterface mostly implements the receive side of * the Strong ES model described in RFC 1122, but since the routing table @@ -957,7 +987,7 @@ { struct ip *ip = mtod(m, struct ip *); struct in_ifaddr *ia; - struct mbuf *mcopy; + struct mbuf *mcopy, *mcopy2; struct sockaddr_in *sin; struct in_addr dest; struct route ro; @@ -995,31 +1025,24 @@ * Save the IP header and at most 8 bytes of the payload, * in case we need to generate an ICMP message to the src. * - * XXX this can be optimized a lot by saving the data in a local - * buffer on the stack (72 bytes at most), and only allocating the - * mbuf if really necessary. The vast majority of the packets - * are forwarded without having to send an ICMP back (either - * because unnecessary, or because rate limited), so we are - * really we are wasting a lot of work here. - * * We don't use m_copym() because it might return a reference * to a shared cluster. Both this function and ip_output() * assume exclusive access to the IP header in `m', so any * data in a cluster may change before we reach icmp_error(). */ - mcopy = m_gethdr(M_NOWAIT, m->m_type); - if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { + mcopy = __builtin_alloca(MPKTHSIZE + VNET(icmperrlen)); + if (mcopy != NULL && !m_init(mcopy, M_NOWAIT, m->m_type, M_PKTHDR) && + !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { /* * It's probably ok if the pkthdr dup fails (because * the deep copy of the tag chain failed), but for now * be conservative and just discard the copy since * code below may some day want the tags. */ - m_free(mcopy); mcopy = NULL; } if (mcopy != NULL) { - mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); + mcopy->m_len = min(VNET(icmperrlen), ntohs(ip->ip_len)); mcopy->m_pkthdr.len = mcopy->m_len; m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); } @@ -1032,7 +1055,6 @@ if ((error = IPSEC_FORWARD(ipv4, m)) != 0) { /* mbuf consumed by IPsec */ RO_NHFREE(&ro); - m_freem(mcopy); if (error != EINPROGRESS) IPSTAT_INC(ips_cantforward); return; @@ -1088,8 +1110,6 @@ if (type) IPSTAT_INC(ips_redirectsent); else { - if (mcopy) - m_freem(mcopy); return; } } @@ -1134,10 +1154,22 @@ case ENOBUFS: case EACCES: /* ipfw denied packet */ - m_freem(mcopy); return; } - icmp_error(mcopy, type, code, dest.s_addr, mtu); + + /* Create real mbuf before sending it to icmp_error */ + mcopy2 = m_gethdr(M_NOWAIT, mcopy->m_type); + if (mcopy2 != NULL && !m_dup_pkthdr(mcopy2, mcopy, M_NOWAIT)) + mcopy2 = NULL; + if (mcopy2 != NULL) { + mcopy2->m_len = mcopy->m_len; + mcopy2->m_pkthdr.len = mcopy2->m_len; + m_copydata(mcopy, 0, mcopy->m_len, mtod(mcopy2, caddr_t)); + } + if (mcopy2 == NULL) + return; + + icmp_error(mcopy2, type, code, dest.s_addr, mtu); } #define CHECK_SO_CT(sp, ct) \