Index: sys/netinet/icmp_var.h =================================================================== --- sys/netinet/icmp_var.h +++ sys/netinet/icmp_var.h @@ -56,6 +56,7 @@ u_long icps_bmcasttstamp; /* b/mcast tstamp requests dropped */ u_long icps_badaddr; /* bad return address */ u_long icps_noroute; /* no route back */ + u_long icps_pmtuchg; /* path MTU changes */ }; #ifdef _KERNEL Index: sys/netinet/ip.h =================================================================== --- sys/netinet/ip.h +++ sys/netinet/ip.h @@ -213,6 +213,7 @@ #define IPFRAGTTL 60 /* time to live for frags, slowhz */ #define IPTTLDEC 1 /* subtracted when forwarding */ #define IP_MSS 576 /* default maximum segment size */ +#define IP_MMTU 68 /* Minimal MTU from RFC 1191 */ /* * This is the real IPv4 pseudo header, used for computing the TCP and UDP Index: sys/netinet/ip_output.c =================================================================== --- sys/netinet/ip_output.c +++ sys/netinet/ip_output.c @@ -85,6 +85,8 @@ #include #include +#include + #include #include @@ -105,6 +107,10 @@ &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); #endif +VNET_DEFINE(int, rfc1191); +SYSCTL_INT(_net_inet_ip, 0, rfc1191, CTLFLAG_VNET | CTLFLAG_RW, + &VNET_NAME(rfc1191), 0, "Enable RFC1191 PMTU support"); + static void ip_mloopback(struct ifnet *, const struct mbuf *, int); extern int in_mcast_loop; @@ -304,6 +310,46 @@ ro->ro_flags |= (nh_flags & NHF_GATEWAY) ? RT_HAS_GW : 0; } +/* + * Calculates IPv4 path MTU for @dst based on the hostcache data and a fallback + * nexthop MTU value @nh_mtu. + * + * Note that @nh_mtu is a valid fall-back MTU if there is no PMTU entry in the + * hostcache. It does not need to necessarily come from nh_object, as is the + * case when this function is called from ip_output() for a broadcast + * destination. + * + * Returns the calculated MTU. + */ +static uint32_t +ip_calcpmtu(const struct in_addr dst, uint32_t nh_mtu, u_int proto) +{ + uint32_t mtu = 0; + struct in_conninfo inc; + + bzero(&inc, sizeof(inc)); + inc.inc_faddr = dst; + + /* TCP is known to react to pmtu changes so skip hc, + * XXX: Either add list of known good protocols or an inpcb flag filled + * by the PMTU aware protocol. + */ + if (proto != IPPROTO_TCP) + mtu = tcp_hc_getmtu(&inc); + + if (mtu) + mtu = min(mtu, nh_mtu); + else + mtu = nh_mtu; + /* XXX: below is an extremely unlikely case, because it is a minimal + * unfragmented packet size (RFC 791). + */ + if (__predict_false(mtu < IP_MMTU)) + mtu = IP_MMTU; + + return mtu; +} + /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). @@ -326,7 +372,6 @@ struct ifnet *ifp = NULL; /* keep compiler happy */ struct mbuf *m0; int hlen = sizeof (struct ip); - int mtu = 0; int error = 0; int vlan_pcp = -1; struct sockaddr_in *dst, sin; @@ -335,7 +380,7 @@ struct in_addr src; int isbroadcast; uint16_t ip_len, ip_off; - uint32_t fibnum; + uint32_t fibnum, mtu = 0; #if defined(IPSEC) || defined(IPSEC_SUPPORT) int no_route_but_check_spd = 0; #endif @@ -550,6 +595,9 @@ in_ifaddr_broadcast(dst->sin_addr, ia))); } + /* Determine the path MTU. */ + if (V_rfc1191) + mtu = ip_calcpmtu(ip->ip_dst, mtu, ip->ip_p); /* Catch a possible divide by zero later. */ KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (nh_flags=0x%08x) ifp=%p", __func__, mtu, ro, Index: sys/netinet/ip_var.h =================================================================== --- sys/netinet/ip_var.h +++ sys/netinet/ip_var.h @@ -180,6 +180,7 @@ struct sockopt; struct inpcbinfo; +VNET_DECLARE(int, rfc1191); VNET_DECLARE(int, ip_defttl); /* default IP ttl */ VNET_DECLARE(int, ipforwarding); /* ip forwarding */ VNET_DECLARE(int, ipsendredirects); @@ -195,6 +196,7 @@ VNET_DECLARE(int, drop_redirect); extern struct pr_usrreqs rip_usrreqs; +#define V_rfc1191 VNET(rfc1191) #define V_ip_id VNET(ip_id) #define V_ip_defttl VNET(ip_defttl) #define V_ipforwarding VNET(ipforwarding)