Index: stable/2.1/sys/netinet/igmp.c =================================================================== --- stable/2.1/sys/netinet/igmp.c (revision 10582) +++ stable/2.1/sys/netinet/igmp.c (revision 10583) @@ -1,646 +1,627 @@ /* * Copyright (c) 1988 Stephen Deering. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)igmp.c 8.1 (Berkeley) 7/19/93 - * $Id: igmp.c,v 1.10 1995/05/16 01:28:29 davidg Exp $ + * $Id: igmp.c,v 1.12 1995/06/13 17:51:05 wollman Exp $ */ /* * Internet Group Management Protocol (IGMP) routines. * * Written by Steve Deering, Stanford, May 1988. * Modified by Rosen Sharma, Stanford, Aug 1994. + * Modified by Bill Fenner, Xerox PARC, Feb 1995. * - * MULTICAST 1.4 + * MULTICAST Revision: 3.3.1.2 */ #include #include #include #include #include #include /* XXX needed for sysctl.h */ #include /* XXX needed for sysctl.h */ #include #include #include #include #include #include #include #include #include #include struct igmpstat igmpstat; -static int igmp_timers_are_running = 0; +static int igmp_timers_are_running; static u_long igmp_all_hosts_group; -static struct router_info *Head = 0; +static u_long igmp_local_group; +static u_long igmp_local_group_mask; +static struct router_info *Head; static void igmp_sendpkt(struct in_multi *, int); static void igmp_sendleave(struct in_multi *); void igmp_init() { /* * To avoid byte-swapping the same value over and over again. */ igmp_all_hosts_group = htonl(INADDR_ALLHOSTS_GROUP); + igmp_local_group = htonl(0xe0000000); /* 224.0.0.0 */ + igmp_local_group_mask = htonl(0xffffff00); /* ........^ */ + + igmp_timers_are_running = 0; + Head = (struct router_info *) 0; } int fill_rti(inm) struct in_multi *inm; { register struct router_info *rti = Head; #ifdef IGMP_DEBUG printf("[igmp.c, _fill_rti] --> entering \n"); #endif while (rti) { - if (rti->ifp == inm->inm_ifp){ /* ? is it ok to compare */ - /* pointers */ + if (rti->ifp == inm->inm_ifp) { inm->inm_rti = rti; #ifdef IGMP_DEBUG printf("[igmp.c, _fill_rti] --> found old entry \n"); #endif - if (rti->type == IGMP_OLD_ROUTER) + if (rti->type == IGMP_OLD_ROUTER) return IGMP_HOST_MEMBERSHIP_REPORT; else return IGMP_HOST_NEW_MEMBERSHIP_REPORT; } rti = rti->next; } MALLOC(rti, struct router_info *, sizeof *rti, M_MRTABLE, M_NOWAIT); rti->ifp = inm->inm_ifp; rti->type = IGMP_NEW_ROUTER; rti->time = IGMP_AGE_THRESHOLD; rti->next = Head; - Head = rti; + Head = rti; inm->inm_rti = rti; #ifdef IGMP_DEBUG printf("[igmp.c, _fill_rti] --> created new entry \n"); #endif return IGMP_HOST_NEW_MEMBERSHIP_REPORT; } struct router_info * find_rti(ifp) struct ifnet *ifp; { register struct router_info *rti = Head; #ifdef IGMP_DEBUG printf("[igmp.c, _find_rti] --> entering \n"); #endif while (rti) { - if (rti->ifp == ifp){ /* ? is it ok to compare pointers */ + if (rti->ifp == ifp) { #ifdef IGMP_DEBUG printf("[igmp.c, _find_rti] --> found old entry \n"); #endif return rti; } rti = rti->next; } MALLOC(rti, struct router_info *, sizeof *rti, M_MRTABLE, M_NOWAIT); rti->ifp = ifp; rti->type = IGMP_NEW_ROUTER; rti->time = IGMP_AGE_THRESHOLD; rti->next = Head; Head = rti; #ifdef IGMP_DEBUG printf("[igmp.c, _find_rti] --> created an entry \n"); #endif return rti; } void igmp_input(m, iphlen) register struct mbuf *m; register int iphlen; { register struct igmp *igmp; register struct ip *ip; register int igmplen; register struct ifnet *ifp = m->m_pkthdr.rcvif; register int minlen; register struct in_multi *inm; register struct in_ifaddr *ia; struct in_multistep step; struct router_info *rti; - + int timer; /** timer value in the igmp query header **/ ++igmpstat.igps_rcv_total; ip = mtod(m, struct ip *); igmplen = ip->ip_len; /* * Validate lengths */ if (igmplen < IGMP_MINLEN) { ++igmpstat.igps_rcv_tooshort; m_freem(m); return; } minlen = iphlen + IGMP_MINLEN; if ((m->m_flags & M_EXT || m->m_len < minlen) && (m = m_pullup(m, minlen)) == 0) { ++igmpstat.igps_rcv_tooshort; return; } /* * Validate checksum */ m->m_data += iphlen; m->m_len -= iphlen; igmp = mtod(m, struct igmp *); if (in_cksum(m, igmplen)) { ++igmpstat.igps_rcv_badsum; m_freem(m); return; } m->m_data -= iphlen; m->m_len += iphlen; ip = mtod(m, struct ip *); timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE; rti = find_rti(ifp); switch (igmp->igmp_type) { case IGMP_HOST_MEMBERSHIP_QUERY: ++igmpstat.igps_rcv_queries; if (ifp->if_flags & IFF_LOOPBACK) break; if (igmp->igmp_code == 0) { rti->type = IGMP_OLD_ROUTER; rti->time = 0; /* ** Do exactly as RFC 1112 says */ if (ip->ip_dst.s_addr != igmp_all_hosts_group) { ++igmpstat.igps_rcv_badqueries; m_freem(m); return; } /* * Start the timers in all of our membership records for * the interface on which the query arrived, except those - * that are already running and those that belong to the - * "all-hosts" group. + * that are already running and those that belong to a + * "local" group (224.0.0.X). */ IN_FIRST_MULTI(step, inm); while (inm != NULL) { - if (inm->inm_ifp == ifp + if (inm->inm_ifp == ifp && inm->inm_timer == 0 - && inm->inm_addr.s_addr - != igmp_all_hosts_group) { + && ((inm->inm_addr.s_addr + & igmp_local_group_mask) + != igmp_local_group)) { inm->inm_state = IGMP_DELAYING_MEMBER; inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_MAX_HOST_REPORT_DELAY * PR_FASTHZ ); igmp_timers_are_running = 1; } IN_NEXT_MULTI(step, inm); } } else { /* ** New Router */ - - if (ip->ip_dst.s_addr != igmp_all_hosts_group) { - if (!(m->m_flags & M_MCAST)) { - ++igmpstat.igps_rcv_badqueries; - m_freem(m); - return; - } + + if (!(m->m_flags & M_MCAST)) { + ++igmpstat.igps_rcv_badqueries; + m_freem(m); + return; } - if (ip->ip_dst.s_addr == igmp_all_hosts_group) { - - /* - * - Start the timers in all of our membership records - * for the interface on which the query arrived - * excl. those that belong to the "all-hosts" group. - * - For timers already running check if they need to - * be reset. - * - Use the igmp->igmp_code filed as the maximum - * delay possible - */ - IN_FIRST_MULTI(step, inm); - while (inm != NULL){ - switch(inm->inm_state){ + + /* + * - Start the timers in all of our membership records + * that the query applies to for the interface on + * which the query arrived excl. those that belong + * to a "local" group (224.0.0.X) + * - For timers already running check if they need to + * be reset. + * - Use the igmp->igmp_code field as the maximum + * delay possible + */ + IN_FIRST_MULTI(step, inm); + while (inm != NULL) { + if (inm->inm_ifp == ifp && + (inm->inm_addr.s_addr & igmp_local_group_mask) != + igmp_local_group && + (ip->ip_dst.s_addr == igmp_all_hosts_group || + ip->ip_dst.s_addr == inm->inm_addr.s_addr)) { + switch(inm->inm_state) { case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: - if (inm->inm_ifp == ifp && - inm->inm_addr.s_addr != - igmp_all_hosts_group) { inm->inm_timer = IGMP_RANDOM_DELAY(timer); igmp_timers_are_running = 1; inm->inm_state = IGMP_DELAYING_MEMBER; - } - break; + break; case IGMP_DELAYING_MEMBER: - if (inm->inm_ifp == ifp && - (inm->inm_timer > timer) && - inm->inm_addr.s_addr != - igmp_all_hosts_group) { + if (inm->inm_timer > timer) { inm->inm_timer = IGMP_RANDOM_DELAY(timer); igmp_timers_are_running = 1; inm->inm_state = IGMP_DELAYING_MEMBER; } break; case IGMP_SLEEPING_MEMBER: inm->inm_state = IGMP_AWAKENING_MEMBER; break; } - IN_NEXT_MULTI(step, inm); - } - } else { - /* - ** group specific query - */ - - IN_FIRST_MULTI(step, inm); - while (inm != NULL) { - if (inm->inm_addr.s_addr == ip->ip_dst.s_addr) { - switch(inm->inm_state ){ - case IGMP_IDLE_MEMBER: - case IGMP_LAZY_MEMBER: - case IGMP_AWAKENING_MEMBER: - inm->inm_state = IGMP_DELAYING_MEMBER; - if (inm->inm_ifp == ifp ) { - inm->inm_timer = IGMP_RANDOM_DELAY(timer); - igmp_timers_are_running = 1; - inm->inm_state = IGMP_DELAYING_MEMBER; - } - break; - case IGMP_DELAYING_MEMBER: - inm->inm_state = IGMP_DELAYING_MEMBER; - if (inm->inm_ifp == ifp && - (inm->inm_timer > timer) ) { - inm->inm_timer = IGMP_RANDOM_DELAY(timer); - igmp_timers_are_running = 1; - inm->inm_state = IGMP_DELAYING_MEMBER; - } - break; - case IGMP_SLEEPING_MEMBER: - inm->inm_state = IGMP_AWAKENING_MEMBER; - break; - } } IN_NEXT_MULTI(step, inm); } - } } + break; case IGMP_HOST_MEMBERSHIP_REPORT: + /* + * an old report + */ ++igmpstat.igps_rcv_reports; if (ifp->if_flags & IFF_LOOPBACK) break; if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || igmp->igmp_group.s_addr != ip->ip_dst.s_addr) { ++igmpstat.igps_rcv_badreports; m_freem(m); return; } /* * KLUDGE: if the IP source address of the report has an * unspecified (i.e., zero) subnet number, as is allowed for * a booting host, replace it with the correct subnet number * so that a process-level multicast routing demon can * determine which subnet it arrived from. This is necessary * to compensate for the lack of any way for a process to * determine the arrival interface of an incoming packet. */ if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0) { IFP_TO_IA(ifp, ia); if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet); } /* * If we belong to the group being reported, stop * our timer for that group. */ IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm); - if (inm != NULL) { - inm->inm_timer = 0; - ++igmpstat.igps_rcv_ourreports; - } if (inm != NULL) { inm->inm_timer = 0; ++igmpstat.igps_rcv_ourreports; - + switch(inm->inm_state){ case IGMP_IDLE_MEMBER: case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: case IGMP_SLEEPING_MEMBER: inm->inm_state = IGMP_SLEEPING_MEMBER; break; case IGMP_DELAYING_MEMBER: - /** check this out - this was if (oldrouter) **/ if (inm->inm_rti->type == IGMP_OLD_ROUTER) inm->inm_state = IGMP_LAZY_MEMBER; - else inm->inm_state = IGMP_SLEEPING_MEMBER; + else + inm->inm_state = IGMP_SLEEPING_MEMBER; break; } } - + break; case IGMP_HOST_NEW_MEMBERSHIP_REPORT: /* - * an new report + * a new report */ - ++igmpstat.igps_rcv_reports; + /* + * We can get confused and think there's someone + * else out there if we are a multicast router. + * For fast leave to work, we have to know that + * we are the only member. + */ + IFP_TO_IA(ifp, ia); + if (ia && ip->ip_src.s_addr == IA_SIN(ia)->sin_addr.s_addr) + break; + + ++igmpstat.igps_rcv_reports; + if (ifp->if_flags & IFF_LOOPBACK) break; - + if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || igmp->igmp_group.s_addr != ip->ip_dst.s_addr) { ++igmpstat.igps_rcv_badreports; m_freem(m); return; } - + /* * KLUDGE: if the IP source address of the report has an * unspecified (i.e., zero) subnet number, as is allowed for * a booting host, replace it with the correct subnet number * so that a process-level multicast routing demon can * determine which subnet it arrived from. This is necessary * to compensate for the lack of any way for a process to * determine the arrival interface of an incoming packet. */ if ((ntohl(ip->ip_src.s_addr) & IN_CLASSA_NET) == 0) { +/* #ifndef MROUTING XXX - I don't think the ifdef is necessary */ IFP_TO_IA(ifp, ia); +/* #endif */ if (ia) ip->ip_src.s_addr = htonl(ia->ia_subnet); } - + /* * If we belong to the group being reported, stop * our timer for that group. */ IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm); if (inm != NULL) { inm->inm_timer = 0; ++igmpstat.igps_rcv_ourreports; - + switch(inm->inm_state){ case IGMP_DELAYING_MEMBER: case IGMP_IDLE_MEMBER: inm->inm_state = IGMP_LAZY_MEMBER; break; case IGMP_AWAKENING_MEMBER: inm->inm_state = IGMP_LAZY_MEMBER; break; case IGMP_LAZY_MEMBER: case IGMP_SLEEPING_MEMBER: break; } } } /* * Pass all valid IGMP packets up to any process(es) listening * on a raw IGMP socket. */ rip_input(m); } void igmp_joingroup(inm) struct in_multi *inm; { - register int s = splnet(); + int s = splnet(); inm->inm_state = IGMP_IDLE_MEMBER; - if (inm->inm_addr.s_addr == igmp_all_hosts_group || - inm->inm_ifp->if_flags & IFF_LOOPBACK) + if ((inm->inm_addr.s_addr & igmp_local_group_mask) == igmp_local_group + || inm->inm_ifp->if_flags & IFF_LOOPBACK) inm->inm_timer = 0; else { igmp_sendpkt(inm,fill_rti(inm)); inm->inm_timer = IGMP_RANDOM_DELAY( IGMP_MAX_HOST_REPORT_DELAY*PR_FASTHZ); inm->inm_state = IGMP_DELAYING_MEMBER; igmp_timers_are_running = 1; } splx(s); } void igmp_leavegroup(inm) struct in_multi *inm; { - /* - * No action required on leaving a group. - */ - switch(inm->inm_state){ + switch(inm->inm_state) { case IGMP_DELAYING_MEMBER: case IGMP_IDLE_MEMBER: - if (!(inm->inm_addr.s_addr == igmp_all_hosts_group || - inm->inm_ifp->if_flags & IFF_LOOPBACK)) + if (((inm->inm_addr.s_addr & igmp_local_group_mask) + != igmp_local_group) + && !(inm->inm_ifp->if_flags & IFF_LOOPBACK)) if (inm->inm_rti->type != IGMP_OLD_ROUTER) igmp_sendleave(inm); break; case IGMP_LAZY_MEMBER: case IGMP_AWAKENING_MEMBER: case IGMP_SLEEPING_MEMBER: break; } } void igmp_fasttimo() { register struct in_multi *inm; - register int s; struct in_multistep step; + int s; /* * Quick check to see if any work needs to be done, in order * to minimize the overhead of fasttimo processing. */ + if (!igmp_timers_are_running) return; s = splnet(); igmp_timers_are_running = 0; IN_FIRST_MULTI(step, inm); while (inm != NULL) { if (inm->inm_timer == 0) { /* do nothing */ } else if (--inm->inm_timer == 0) { if (inm->inm_state == IGMP_DELAYING_MEMBER) { if (inm->inm_rti->type == IGMP_OLD_ROUTER) igmp_sendpkt(inm, IGMP_HOST_MEMBERSHIP_REPORT); else igmp_sendpkt(inm, IGMP_HOST_NEW_MEMBERSHIP_REPORT); inm->inm_state = IGMP_IDLE_MEMBER; } } else { igmp_timers_are_running = 1; } IN_NEXT_MULTI(step, inm); } splx(s); } void igmp_slowtimo() { int s = splnet(); register struct router_info *rti = Head; #ifdef IGMP_DEBUG printf("[igmp.c,_slowtimo] -- > entering \n"); #endif while (rti) { rti->time ++; if (rti->time >= IGMP_AGE_THRESHOLD){ rti->type = IGMP_NEW_ROUTER; rti->time = IGMP_AGE_THRESHOLD; } rti = rti->next; } -#ifdef IGMP_DEBUG +#ifdef IGMP_DEBUG printf("[igmp.c,_slowtimo] -- > exiting \n"); #endif splx(s); } static void igmp_sendpkt(inm, type) struct in_multi *inm; int type; { struct mbuf *m; struct igmp *igmp; struct ip *ip; struct ip_moptions *imo; MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m == NULL) return; MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_DONTWAIT); if (!imo) { m_free(m); return; } m->m_pkthdr.rcvif = loif; m->m_pkthdr.len = sizeof(struct ip) + IGMP_MINLEN; MH_ALIGN(m, IGMP_MINLEN + sizeof(struct ip)); m->m_data += sizeof(struct ip); m->m_len = IGMP_MINLEN; igmp = mtod(m, struct igmp *); igmp->igmp_type = type; igmp->igmp_code = 0; igmp->igmp_group = inm->inm_addr; igmp->igmp_cksum = 0; igmp->igmp_cksum = in_cksum(m, IGMP_MINLEN); m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); ip = mtod(m, struct ip *); ip->ip_tos = 0; ip->ip_len = sizeof(struct ip) + IGMP_MINLEN; ip->ip_off = 0; ip->ip_p = IPPROTO_IGMP; ip->ip_src.s_addr = INADDR_ANY; ip->ip_dst = igmp->igmp_group; imo->imo_multicast_ifp = inm->inm_ifp; imo->imo_multicast_ttl = 1; + imo->imo_multicast_vif = -1; /* * Request loopback of the report if we are acting as a multicast * router, so that the process-level routing demon can hear it. */ imo->imo_multicast_loop = (ip_mrouter != NULL); ip_output(m, (struct mbuf *)0, (struct route *)0, 0, imo); FREE(imo, M_IPMOPTS); ++igmpstat.igps_snd_reports; - } static void igmp_sendleave(inm) struct in_multi *inm; { igmp_sendpkt(inm, IGMP_HOST_LEAVE_MESSAGE); } int igmp_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen) { /* All sysctl names at this level are terminal. */ if (namelen != 1) return ENOTDIR; /* XXX overloaded */ switch(name[0]) { case IGMPCTL_STATS: - return sysctl_rdstruct(oldp, oldlenp, newp, &igmpstat, + return sysctl_rdstruct(oldp, oldlenp, newp, &igmpstat, sizeof igmpstat); default: return ENOPROTOOPT; } } Index: stable/2.1/sys/netinet/igmp.h =================================================================== --- stable/2.1/sys/netinet/igmp.h (revision 10582) +++ stable/2.1/sys/netinet/igmp.h (revision 10583) @@ -1,93 +1,106 @@ /* * Copyright (c) 1988 Stephen Deering. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)igmp.h 8.1 (Berkeley) 6/10/93 - * $Id: igmp.h,v 1.4 1994/09/06 22:42:17 wollman Exp $ + * $Id: igmp.h,v 1.6 1995/06/13 17:51:06 wollman Exp $ */ #ifndef _NETINET_IGMP_H_ #define _NETINET_IGMP_H_ /* * Internet Group Management Protocol (IGMP) definitions. * * Written by Steve Deering, Stanford, May 1988. * - * MULTICAST 1.2 + * MULTICAST Revision: 3.3.1.2 */ /* * IGMP packet format. */ struct igmp { u_char igmp_type; /* version & type of IGMP message */ - u_char igmp_code; /* unused, should be zero */ + u_char igmp_code; /* subtype for routing msgs */ u_short igmp_cksum; /* IP-style checksum */ struct in_addr igmp_group; /* group address being reported */ }; /* (zero for queries) */ #define IGMP_MINLEN 8 -#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* message types, incl. version */ -#define IGMP_HOST_MEMBERSHIP_REPORT 0x12 -#define IGMP_DVMRP 0x13 /* for experimental multicast */ - /* routing protocol */ -#define IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16 -#define IGMP_HOST_LEAVE_MESSAGE 0x17 -#define IGMP_MTRACE 0x1f /* mcast traceroute messages */ -#define IGMP_MTRACE_RESP 0x1e /* traceroute resp. (to sender) */ +/* + * Message types, including version number. + */ +#define IGMP_HOST_MEMBERSHIP_QUERY 0x11 /* Host membership query */ +#define IGMP_HOST_MEMBERSHIP_REPORT 0x12 /* Old membership report */ +#define IGMP_DVMRP 0x13 /* DVMRP routing message */ +#define IGMP_PIM 0x14 /* PIM routing message */ -#define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */ +#define IGMP_HOST_NEW_MEMBERSHIP_REPORT 0x16 /* New membership report */ + +#define IGMP_HOST_LEAVE_MESSAGE 0x17 /* Leave-group message */ + +#define IGMP_MTRACE_RESP 0x1e /* traceroute resp. (to sender) */ +#define IGMP_MTRACE 0x1f /* mcast traceroute messages */ + +#define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */ + /* query (in seconds) */ + #define IGMP_TIMER_SCALE 10 /* denotes that the igmp->timer filed */ - /*specifies time in 10th os seconds */ + /*specifies time in tenths of seconds */ +/* + * States for the IGMPv2 state table + */ #define IGMP_DELAYING_MEMBER 1 #define IGMP_IDLE_MEMBER 2 -#define IGMP_LAZY_MEMBER 3 -#define IGMP_SLEEPING_MEMBER 4 -#define IGMP_AWAKENING_MEMBER 5 +#define IGMP_LAZY_MEMBER 3 +#define IGMP_SLEEPING_MEMBER 4 +#define IGMP_AWAKENING_MEMBER 5 - +/* + * We must remember whether the querier is an old or a new router. + */ #define IGMP_OLD_ROUTER 0 #define IGMP_NEW_ROUTER 1 -#define IGMP_AGE_THRESHOLD 540 +/* + * Revert to new router if we haven't heard from an old router in + * this amount of time. + */ +#define IGMP_AGE_THRESHOLD 540 -#ifdef IGMP_STATES -static char *tostate[]={"","DELAYING_MEMBER","IDLE","LAZY","SLEEPING", - "AWAKENING" }; -#endif /* IGMP_STATES */ #endif /* _NETINET_IGMP_H_ */ Index: stable/2.1/sys/netinet/igmp_var.h =================================================================== --- stable/2.1/sys/netinet/igmp_var.h (revision 10582) +++ stable/2.1/sys/netinet/igmp_var.h (revision 10583) @@ -1,92 +1,92 @@ /* * Copyright (c) 1988 Stephen Deering. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)igmp_var.h 8.1 (Berkeley) 7/19/93 - * $Id: igmp_var.h,v 1.4 1994/09/06 22:42:17 wollman Exp $ + * $Id: igmp_var.h,v 1.6 1995/06/13 17:51:07 wollman Exp $ */ #ifndef _NETINET_IGMP_VAR_H_ #define _NETINET_IGMP_VAR_H_ /* * Internet Group Management Protocol (IGMP), * implementation-specific definitions. * * Written by Steve Deering, Stanford, May 1988. * - * MULTICAST 1.1 + * MULTICAST Revisiob: 3.3.1.1 */ struct igmpstat { - u_long igps_rcv_total; /* total IGMP messages received */ - u_long igps_rcv_tooshort; /* received with too few bytes */ - u_long igps_rcv_badsum; /* received with bad checksum */ - u_long igps_rcv_queries; /* received membership queries */ - u_long igps_rcv_badqueries; /* received invalid queries */ - u_long igps_rcv_reports; /* received membership reports */ - u_long igps_rcv_badreports; /* received invalid reports */ - u_long igps_rcv_ourreports; /* received reports for our groups */ - u_long igps_snd_reports; /* sent membership reports */ + u_int igps_rcv_total; /* total IGMP messages received */ + u_int igps_rcv_tooshort; /* received with too few bytes */ + u_int igps_rcv_badsum; /* received with bad checksum */ + u_int igps_rcv_queries; /* received membership queries */ + u_int igps_rcv_badqueries; /* received invalid queries */ + u_int igps_rcv_reports; /* received membership reports */ + u_int igps_rcv_badreports; /* received invalid reports */ + u_int igps_rcv_ourreports; /* received reports for our groups */ + u_int igps_snd_reports; /* sent membership reports */ }; #ifdef KERNEL extern struct igmpstat igmpstat; #define IGMP_RANDOM_DELAY(X) (random() % (X) + 1) void igmp_init __P((void)); void igmp_input __P((struct mbuf *, int)); void igmp_joingroup __P((struct in_multi *)); void igmp_leavegroup __P((struct in_multi *)); void igmp_fasttimo __P((void)); void igmp_slowtimo __P((void)); int igmp_sysctl(int *, u_int, void *, size_t *, void *, size_t); #endif /* * Names for IGMP sysctl objects */ #define IGMPCTL_STATS 1 /* statistics (read-only) */ #define IGMPCTL_MAXID 2 #define IGMPCTL_NAMES { \ { 0, 0 }, \ { "stats", CTLTYPE_STRUCT }, \ } #endif Index: stable/2.1/sys/netinet/in.h =================================================================== --- stable/2.1/sys/netinet/in.h (revision 10582) +++ stable/2.1/sys/netinet/in.h (revision 10583) @@ -1,261 +1,263 @@ /* * Copyright (c) 1982, 1986, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.h 8.3 (Berkeley) 1/3/94 - * $Id: in.h,v 1.7 1995/02/14 23:04:50 wollman Exp $ + * $Id: in.h,v 1.10 1995/06/13 17:51:08 wollman Exp $ */ #ifndef _NETINET_IN_H_ #define _NETINET_IN_H_ /* * Constants and structures defined by the internet system, * Per RFC 790, September 1981, and numerous additions. */ /* * Protocols */ #define IPPROTO_IP 0 /* dummy for IP */ #define IPPROTO_ICMP 1 /* control message protocol */ #define IPPROTO_IGMP 2 /* group mgmt protocol */ #define IPPROTO_GGP 3 /* gateway^2 (deprecated) */ +#define IPPROTO_IPIP 4 /* IP encapsulation in IP */ #define IPPROTO_TCP 6 /* tcp */ #define IPPROTO_EGP 8 /* exterior gateway protocol */ #define IPPROTO_PUP 12 /* pup */ #define IPPROTO_UDP 17 /* user datagram protocol */ #define IPPROTO_IDP 22 /* xns idp */ #define IPPROTO_TP 29 /* tp-4 w/ class negotiation */ #define IPPROTO_RSVP 46 /* resource reservation */ #define IPPROTO_EON 80 /* ISO cnlp */ #define IPPROTO_ENCAP 98 /* encapsulation header */ #define IPPROTO_RAW 255 /* raw IP packet */ #define IPPROTO_MAX 256 /* * Local port number conventions: * Ports < IPPORT_RESERVED are reserved for * privileged processes (e.g. root). * Ports > IPPORT_USERRESERVED are reserved * for servers, not necessarily privileged. */ #define IPPORT_RESERVED 1024 #define IPPORT_USERRESERVED 5000 /* * Internet address (a structure for historical reasons) */ struct in_addr { u_long s_addr; }; /* * Definitions of bits in internet address integers. * On subnets, the decomposition of addresses to host and net parts * is done according to subnet mask, not the masks here. */ #define IN_CLASSA(i) (((long)(i) & 0x80000000) == 0) #define IN_CLASSA_NET 0xff000000 #define IN_CLASSA_NSHIFT 24 #define IN_CLASSA_HOST 0x00ffffff #define IN_CLASSA_MAX 128 #define IN_CLASSB(i) (((long)(i) & 0xc0000000) == 0x80000000) #define IN_CLASSB_NET 0xffff0000 #define IN_CLASSB_NSHIFT 16 #define IN_CLASSB_HOST 0x0000ffff #define IN_CLASSB_MAX 65536 #define IN_CLASSC(i) (((long)(i) & 0xe0000000) == 0xc0000000) #define IN_CLASSC_NET 0xffffff00 #define IN_CLASSC_NSHIFT 8 #define IN_CLASSC_HOST 0x000000ff #define IN_CLASSD(i) (((long)(i) & 0xf0000000) == 0xe0000000) #define IN_CLASSD_NET 0xf0000000 /* These ones aren't really */ #define IN_CLASSD_NSHIFT 28 /* net and host fields, but */ #define IN_CLASSD_HOST 0x0fffffff /* routing needn't know. */ #define IN_MULTICAST(i) IN_CLASSD(i) #define IN_EXPERIMENTAL(i) (((long)(i) & 0xf0000000) == 0xf0000000) #define IN_BADCLASS(i) (((long)(i) & 0xf0000000) == 0xf0000000) #define INADDR_ANY (u_long)0x00000000 #define INADDR_BROADCAST (u_long)0xffffffff /* must be masked */ #ifndef KERNEL #define INADDR_NONE 0xffffffff /* -1 return */ #endif #define INADDR_UNSPEC_GROUP (u_long)0xe0000000 /* 224.0.0.0 */ #define INADDR_ALLHOSTS_GROUP (u_long)0xe0000001 /* 224.0.0.1 */ #define INADDR_MAX_LOCAL_GROUP (u_long)0xe00000ff /* 224.0.0.255 */ #define IN_LOOPBACKNET 127 /* official! */ /* * Socket address, internet style. */ struct sockaddr_in { u_char sin_len; u_char sin_family; u_short sin_port; struct in_addr sin_addr; char sin_zero[8]; }; /* * Structure used to describe IP options. * Used to store options internally, to pass them to a process, * or to restore options retrieved earlier. * The ip_dst is used for the first-hop gateway when using a source route * (this gets put into the header proper). */ struct ip_opts { struct in_addr ip_dst; /* first hop, 0 w/o src rt */ char ip_opts[40]; /* actually variable in size */ }; /* * Options for use with [gs]etsockopt at the IP level. * First word of comment is data type; bool is stored in int. */ #define IP_OPTIONS 1 /* buf/ip_opts; set/get IP options */ #define IP_HDRINCL 2 /* int; header is included with data */ #define IP_TOS 3 /* int; IP type of service and preced. */ #define IP_TTL 4 /* int; IP time to live */ #define IP_RECVOPTS 5 /* bool; receive all IP opts w/dgram */ #define IP_RECVRETOPTS 6 /* bool; receive IP opts for response */ #define IP_RECVDSTADDR 7 /* bool; receive IP dst addr w/dgram */ #define IP_RETOPTS 8 /* ip_opts; set/get IP options */ #define IP_MULTICAST_IF 9 /* u_char; set/get IP multicast i/f */ #define IP_MULTICAST_TTL 10 /* u_char; set/get IP multicast ttl */ #define IP_MULTICAST_LOOP 11 /* u_char; set/get IP multicast loopback */ #define IP_ADD_MEMBERSHIP 12 /* ip_mreq; add an IP group membership */ #define IP_DROP_MEMBERSHIP 13 /* ip_mreq; drop an IP group membership */ #define IP_MULTICAST_VIF 14 /* set/get IP mcast virt. iface */ #define IP_RSVP_ON 15 /* enable RSVP in kernel */ #define IP_RSVP_OFF 16 /* disable RSVP in kernel */ +#define IP_RSVP_VIF_ON 17 /* set RSVP per-vif socket */ +#define IP_RSVP_VIF_OFF 18 /* unset RSVP per-vif socket */ - /* * Defaults and limits for options */ #define IP_DEFAULT_MULTICAST_TTL 1 /* normally limit m'casts to 1 hop */ #define IP_DEFAULT_MULTICAST_LOOP 1 /* normally hear sends if a member */ -#define IP_MAX_MEMBERSHIPS 20 /* per socket; must fit in one mbuf */ +#define IP_MAX_MEMBERSHIPS 20 /* per socket */ /* * Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP. */ struct ip_mreq { struct in_addr imr_multiaddr; /* IP multicast address of group */ struct in_addr imr_interface; /* local IP address of interface */ }; /* * Definitions for inet sysctl operations. * * Third level is protocol number. * Fourth level is desired variable within that protocol. */ #define IPPROTO_MAXID (IPPROTO_IDP + 1) /* don't list to IPPROTO_MAX */ #define CTL_IPPROTO_NAMES { \ { "ip", CTLTYPE_NODE }, \ { "icmp", CTLTYPE_NODE }, \ { "igmp", CTLTYPE_NODE }, \ { "ggp", CTLTYPE_NODE }, \ { 0, 0 }, \ { 0, 0 }, \ { "tcp", CTLTYPE_NODE }, \ { 0, 0 }, \ { "egp", CTLTYPE_NODE }, \ { 0, 0 }, \ { 0, 0 }, \ { 0, 0 }, \ { "pup", CTLTYPE_NODE }, \ { 0, 0 }, \ { 0, 0 }, \ { 0, 0 }, \ { 0, 0 }, \ { "udp", CTLTYPE_NODE }, \ { 0, 0 }, \ { 0, 0 }, \ { 0, 0 }, \ { 0, 0 }, \ { "idp", CTLTYPE_NODE }, \ } /* * Names for IP sysctl objects */ #define IPCTL_FORWARDING 1 /* act as router */ #define IPCTL_SENDREDIRECTS 2 /* may send redirects when forwarding */ #define IPCTL_DEFTTL 3 /* default TTL */ #ifdef notyet #define IPCTL_DEFMTU 4 /* default MTU */ #endif #define IPCTL_RTEXPIRE 5 /* cloned route expiration time */ #define IPCTL_RTMINEXPIRE 6 /* min value for expiration time */ #define IPCTL_RTMAXCACHE 7 /* trigger level for dynamic expire */ #define IPCTL_SOURCEROUTE 8 /* may perform source routes */ #define IPCTL_MAXID 9 #define IPCTL_NAMES { \ { 0, 0 }, \ { "forwarding", CTLTYPE_INT }, \ { "redirect", CTLTYPE_INT }, \ { "ttl", CTLTYPE_INT }, \ { "mtu", CTLTYPE_INT }, \ { "rtexpire", CTLTYPE_INT }, \ { "rtminexpire", CTLTYPE_INT }, \ { "rtmaxcache", CTLTYPE_INT }, \ { "sourceroute", CTLTYPE_INT }, \ } #ifdef KERNEL struct ifnet; struct mbuf; /* forward declarations for Standard C */ int in_broadcast __P((struct in_addr, struct ifnet *)); int in_canforward __P((struct in_addr)); int in_cksum __P((struct mbuf *, int)); int in_localaddr __P((struct in_addr)); u_long in_netof __P((struct in_addr)); void in_socktrim __P((struct sockaddr_in *)); char *inet_ntoa __P((struct in_addr)); /* in libkern */ #endif #endif Index: stable/2.1/sys/netinet/in_proto.c =================================================================== --- stable/2.1/sys/netinet/in_proto.c (revision 10582) +++ stable/2.1/sys/netinet/in_proto.c (revision 10583) @@ -1,213 +1,214 @@ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)in_proto.c 8.1 (Berkeley) 6/10/93 - * $Id: in_proto.c,v 1.14 1995/05/11 00:13:17 wollman Exp $ + * $Id: in_proto.c,v 1.17 1995/06/26 16:11:51 wollman Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef TCPDEBUG #include #endif #include #include /* * TCP/IP protocol family: IP, ICMP, UDP, TCP. */ #ifdef NSIP void idpip_input(), nsip_ctlinput(); #endif #ifdef TPIP void tpip_input(), tpip_ctlinput(), tp_ctloutput(); int tp_init(), tp_slowtimo(), tp_drain(), tp_usrreq(); #endif #ifdef EON void eoninput(), eonctlinput(), eonprotoinit(); #endif /* EON */ -void multiencap_decap(struct mbuf *); +void rsvp_input(struct mbuf *, int); +void ipip_input(struct mbuf *, int); extern struct domain inetdomain; struct protosw inetsw[] = { { 0, &inetdomain, 0, 0, 0, ip_output, 0, 0, 0, ip_init, 0, ip_slowtimo, ip_drain, ip_sysctl }, { SOCK_DGRAM, &inetdomain, IPPROTO_UDP, PR_ATOMIC|PR_ADDR, udp_input, 0, udp_ctlinput, ip_ctloutput, udp_usrreq, udp_init, 0, 0, 0, udp_sysctl }, { SOCK_STREAM, &inetdomain, IPPROTO_TCP, PR_CONNREQUIRED|PR_IMPLOPCL|PR_WANTRCVD, tcp_input, 0, tcp_ctlinput, tcp_ctloutput, tcp_usrreq, tcp_init, tcp_fasttimo, tcp_slowtimo, tcp_drain, tcp_sysctl }, { SOCK_RAW, &inetdomain, IPPROTO_RAW, PR_ATOMIC|PR_ADDR, rip_input, rip_output, 0, rip_ctloutput, rip_usrreq, 0, 0, 0, 0, }, { SOCK_RAW, &inetdomain, IPPROTO_ICMP, PR_ATOMIC|PR_ADDR, icmp_input, rip_output, 0, rip_ctloutput, rip_usrreq, 0, 0, 0, 0, icmp_sysctl }, { SOCK_RAW, &inetdomain, IPPROTO_IGMP, PR_ATOMIC|PR_ADDR, igmp_input, rip_output, 0, rip_ctloutput, rip_usrreq, igmp_init, igmp_fasttimo, igmp_slowtimo, 0, igmp_sysctl }, { SOCK_RAW, &inetdomain, IPPROTO_RSVP, PR_ATOMIC|PR_ADDR, - rip_input, rip_output, 0, rip_ctloutput, + rsvp_input, rip_output, 0, rip_ctloutput, rip_usrreq, 0, 0, 0, 0, }, -{ SOCK_RAW, &inetdomain, IPPROTO_ENCAP, PR_ATOMIC|PR_ADDR, - multiencap_decap, rip_output, 0, rip_ctloutput, +{ SOCK_RAW, &inetdomain, IPPROTO_IPIP, PR_ATOMIC|PR_ADDR, + ipip_input, rip_output, 0, rip_ctloutput, rip_usrreq, 0, 0, 0, 0, }, #ifdef TPIP { SOCK_SEQPACKET,&inetdomain, IPPROTO_TP, PR_CONNREQUIRED|PR_WANTRCVD, tpip_input, 0, tpip_ctlinput, tp_ctloutput, tp_usrreq, tp_init, 0, tp_slowtimo, tp_drain, }, #endif /* EON (ISO CLNL over IP) */ #ifdef EON { SOCK_RAW, &inetdomain, IPPROTO_EON, 0, eoninput, 0, eonctlinput, 0, 0, eonprotoinit, 0, 0, 0, }, #endif #ifdef NSIP { SOCK_RAW, &inetdomain, IPPROTO_IDP, PR_ATOMIC|PR_ADDR, idpip_input, rip_output, nsip_ctlinput, 0, rip_usrreq, 0, 0, 0, 0, }, #endif /* raw wildcard */ { SOCK_RAW, &inetdomain, 0, PR_ATOMIC|PR_ADDR, rip_input, rip_output, 0, rip_ctloutput, rip_usrreq, rip_init, 0, 0, 0, }, }; extern int in_inithead(void **, int); struct domain inetdomain = - { AF_INET, "internet", 0, 0, 0, + { AF_INET, "internet", 0, 0, 0, inetsw, &inetsw[sizeof(inetsw)/sizeof(inetsw[0])], 0, in_inithead, 32, sizeof(struct sockaddr_in) }; DOMAIN_SET(inet); #include "imp.h" #if NIMP > 0 extern struct domain impdomain; int rimp_output(), hostslowtimo(); struct protosw impsw[] = { { SOCK_RAW, &impdomain, 0, PR_ATOMIC|PR_ADDR, 0, rimp_output, 0, 0, rip_usrreq, 0, 0, hostslowtimo, 0, }, }; struct domain impdomain = { AF_IMPLINK, "imp", 0, 0, 0, impsw, &impsw[sizeof (impsw)/sizeof(impsw[0])] }; DOMAIN_SET(imp); #endif #if 0 #include "hy.h" #if NHY > 0 /* * HYPERchannel protocol family: raw interface. */ int rhy_output(); extern struct domain hydomain; struct protosw hysw[] = { { SOCK_RAW, &hydomain, 0, PR_ATOMIC|PR_ADDR, 0, rhy_output, 0, 0, rip_usrreq, 0, 0, 0, 0, }, }; struct domain hydomain = { AF_HYLINK, "hy", 0, 0, 0, hysw, &hysw[sizeof (hysw)/sizeof(hysw[0])] }; DOMAIN_SET(hy); #endif #endif Index: stable/2.1/sys/netinet/ip_input.c =================================================================== --- stable/2.1/sys/netinet/ip_input.c (revision 10582) +++ stable/2.1/sys/netinet/ip_input.c (revision 10583) @@ -1,1251 +1,1269 @@ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 - * $Id: ip_input.c,v 1.22 1995/05/30 08:09:44 rgrimes Exp $ + * $Id: ip_input.c,v 1.25 1995/07/09 14:29:46 davidg Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +int rsvp_on = 0; +int ip_rsvp_on; struct socket *ip_rsvpd; #ifndef IPFORWARDING #ifdef GATEWAY #define IPFORWARDING 1 /* forward IP packets not for us */ #else /* GATEWAY */ #define IPFORWARDING 0 /* don't forward IP packets not for us */ #endif /* GATEWAY */ #endif /* IPFORWARDING */ #ifndef IPSENDREDIRECTS #define IPSENDREDIRECTS 1 #endif int ipforwarding = IPFORWARDING; int ipsendredirects = IPSENDREDIRECTS; int ip_defttl = IPDEFTTL; int ip_dosourceroute = 0; #ifdef DIAGNOSTIC int ipprintfs = 0; #endif extern struct domain inetdomain; extern struct protosw inetsw[]; u_char ip_protox[IPPROTO_MAX]; int ipqmaxlen = IFQ_MAXLEN; struct in_ifaddr *in_ifaddr; /* first inet address */ struct ifqueue ipintrq; struct ipstat ipstat; struct ipq ipq; /* * We need to save the IP options in case a protocol wants to respond * to an incoming packet over the same route if the packet got here * using IP source routing. This allows connection establishment and * maintenance when the remote end is on a network that is not known * to us. */ int ip_nhops = 0; static struct ip_srcrt { struct in_addr dst; /* final destination */ char nop; /* one NOP to align */ char srcopt[IPOPT_OFFSET + 1]; /* OPTVAL, OLEN and OFFSET */ struct in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)]; } ip_srcrt; static void save_rte __P((u_char *, struct in_addr)); /* * IP initialization: fill in IP protocol switch table. * All protocols not implemented in kernel go to raw IP protocol handler. */ void ip_init() { register struct protosw *pr; register int i; pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); if (pr == 0) panic("ip_init"); for (i = 0; i < IPPROTO_MAX; i++) ip_protox[i] = pr - inetsw; for (pr = inetdomain.dom_protosw; pr < inetdomain.dom_protoswNPROTOSW; pr++) if (pr->pr_domain->dom_family == PF_INET && pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) ip_protox[pr->pr_protocol] = pr - inetsw; ipq.next = ipq.prev = &ipq; ip_id = time.tv_sec & 0xffff; ipintrq.ifq_maxlen = ipqmaxlen; } struct sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET }; struct route ipforward_rt; /* * Ip input routine. Checksum and byte swap header. If fragmented * try to reassemble. Process options. Pass to next level. */ void ipintr(void) { register struct ip *ip; register struct mbuf *m; register struct ipq *fp; register struct in_ifaddr *ia; int hlen, s; next: /* * Get next datagram off input queue and get IP header * in first mbuf. */ s = splimp(); IF_DEQUEUE(&ipintrq, m); splx(s); if (m == 0) return; #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("ipintr no HDR"); #endif /* * If no IP addresses have been set yet but the interfaces * are receiving, can't do anything with incoming packets yet. */ if (in_ifaddr == NULL) goto bad; ipstat.ips_total++; if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == 0) { ipstat.ips_toosmall++; goto next; } ip = mtod(m, struct ip *); if (ip->ip_v != IPVERSION) { ipstat.ips_badvers++; goto bad; } hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { /* minimum header length */ ipstat.ips_badhlen++; goto bad; } if (hlen > m->m_len) { if ((m = m_pullup(m, hlen)) == 0) { ipstat.ips_badhlen++; goto next; } ip = mtod(m, struct ip *); } ip->ip_sum = in_cksum(m, hlen); if (ip->ip_sum) { ipstat.ips_badsum++; goto bad; } /* * Convert fields to host representation. */ NTOHS(ip->ip_len); if (ip->ip_len < hlen) { ipstat.ips_badlen++; goto bad; } NTOHS(ip->ip_id); NTOHS(ip->ip_off); /* * Check that the amount of data in the buffers * is as at least much as the IP header would have us expect. * Trim mbufs if longer than we expect. * Drop packet if shorter than we expect. */ if (m->m_pkthdr.len < ip->ip_len) { ipstat.ips_tooshort++; goto bad; } if (m->m_pkthdr.len > ip->ip_len) { if (m->m_len == m->m_pkthdr.len) { m->m_len = ip->ip_len; m->m_pkthdr.len = ip->ip_len; } else m_adj(m, ip->ip_len - m->m_pkthdr.len); } /* * IpHack's section. * Right now when no processing on packet has done * and it is still fresh out of network we do our black * deals with it. * - Firewall: deny/allow * - Wrap: fake packet's addr/port * - Encapsulate: put it in another IP and send out. */ if (ip_fw_chk_ptr!=NULL) if (!(*ip_fw_chk_ptr)(m,ip,m->m_pkthdr.rcvif,ip_fw_chain) ) { goto next; } /* * Process options and, if not destined for us, * ship it on. ip_dooptions returns 1 when an * error was detected (causing an icmp message * to be sent and the original packet to be freed). */ ip_nhops = 0; /* for source routed packets */ if (hlen > sizeof (struct ip) && ip_dooptions(m)) goto next; /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no - * matter if it is destined to another node, or whether it is + * matter if it is destined to another node, or whether it is * a multicast one, RSVP wants it! and prevents it from being forwarded * anywhere else. Also checks if the rsvp daemon is running before * grabbing the packet. */ - if (ip_rsvpd != NULL && ip->ip_p==IPPROTO_RSVP) + if (rsvp_on && ip->ip_p==IPPROTO_RSVP) goto ours; /* * Check our list of addresses, to see if the packet is for us. */ for (ia = in_ifaddr; ia; ia = ia->ia_next) { #define satosin(sa) ((struct sockaddr_in *)(sa)) if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr) goto ours; if ( #ifdef DIRECTED_BROADCAST ia->ia_ifp == m->m_pkthdr.rcvif && #endif (ia->ia_ifp->if_flags & IFF_BROADCAST)) { u_long t; if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == ip->ip_dst.s_addr) goto ours; if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr) goto ours; /* * Look for all-0's host part (old broadcast addr), * either for subnet or net. */ t = ntohl(ip->ip_dst.s_addr); if (t == ia->ia_subnet) goto ours; if (t == ia->ia_net) goto ours; } } if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { struct in_multi *inm; if (ip_mrouter) { /* * If we are acting as a multicast router, all * incoming multicast packets are passed to the * kernel-level multicast forwarding function. * The packet is returned (relatively) intact; if * ip_mforward() returns a non-zero value, the packet * must be discarded, else it may be accepted below. * * (The IP ident field is put in the same byte order * as expected when ip_mforward() is called from * ip_output().) */ ip->ip_id = htons(ip->ip_id); if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) { ipstat.ips_cantforward++; m_freem(m); goto next; } ip->ip_id = ntohs(ip->ip_id); /* * The process-level routing demon needs to receive * all multicast IGMP packets, whether or not this * host belongs to their destination groups. */ if (ip->ip_p == IPPROTO_IGMP) goto ours; ipstat.ips_forward++; } /* * See if we belong to the destination multicast group on the * arrival interface. */ IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm); if (inm == NULL) { ipstat.ips_cantforward++; m_freem(m); goto next; } goto ours; } if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) goto ours; if (ip->ip_dst.s_addr == INADDR_ANY) goto ours; /* * Not for us; forward if possible and desirable. */ if (ipforwarding == 0) { ipstat.ips_cantforward++; m_freem(m); } else ip_forward(m, 0); goto next; ours: /* * If packet came to us we count it... - * This way we count all incoming packets which has + * This way we count all incoming packets which has * not been forwarded... - * Do not convert ip_len to host byte order when + * Do not convert ip_len to host byte order when * counting,ppl already made it for us before.. */ if (ip_acct_cnt_ptr!=NULL) (*ip_acct_cnt_ptr)(ip,m->m_pkthdr.rcvif,ip_acct_chain,0); /* * If offset or IP_MF are set, must reassemble. * Otherwise, nothing need be done. * (We could look in the reassembly queue to see * if the packet was previously fragmented, * but it's not worth the time; just let them time out.) */ if (ip->ip_off &~ IP_DF) { if (m->m_flags & M_EXT) { /* XXX */ if ((m = m_pullup(m, sizeof (struct ip))) == 0) { ipstat.ips_toosmall++; goto next; } ip = mtod(m, struct ip *); } /* * Look for queue of fragments * of this datagram. */ for (fp = ipq.next; fp != &ipq; fp = fp->next) if (ip->ip_id == fp->ipq_id && ip->ip_src.s_addr == fp->ipq_src.s_addr && ip->ip_dst.s_addr == fp->ipq_dst.s_addr && ip->ip_p == fp->ipq_p) goto found; fp = 0; found: /* * Adjust ip_len to not reflect header, * set ip_mff if more fragments are expected, * convert offset of this to bytes. */ ip->ip_len -= hlen; ((struct ipasfrag *)ip)->ipf_mff &= ~1; if (ip->ip_off & IP_MF) ((struct ipasfrag *)ip)->ipf_mff |= 1; ip->ip_off <<= 3; /* * If datagram marked as having more fragments * or if this is not the first fragment, * attempt reassembly; if it succeeds, proceed. */ if (((struct ipasfrag *)ip)->ipf_mff & 1 || ip->ip_off) { ipstat.ips_fragments++; ip = ip_reass((struct ipasfrag *)ip, fp); if (ip == 0) goto next; ipstat.ips_reassembled++; m = dtom(ip); } else if (fp) ip_freef(fp); } else ip->ip_len -= hlen; /* * Switch out to protocol's input routine. */ ipstat.ips_delivered++; (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); goto next; bad: m_freem(m); goto next; } NETISR_SET(NETISR_IP, ipintr); /* * Take incoming datagram fragment and try to * reassemble it into whole datagram. If a chain for * reassembly of this datagram already exists, then it * is given as fp; otherwise have to make a chain. */ struct ip * ip_reass(ip, fp) register struct ipasfrag *ip; register struct ipq *fp; { register struct mbuf *m = dtom(ip); register struct ipasfrag *q; struct mbuf *t; int hlen = ip->ip_hl << 2; int i, next; /* * Presence of header sizes in mbufs * would confuse code below. */ m->m_data += hlen; m->m_len -= hlen; /* * If first fragment to arrive, create a reassembly queue. */ if (fp == 0) { if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL) goto dropfrag; fp = mtod(t, struct ipq *); insque(fp, &ipq); fp->ipq_ttl = IPFRAGTTL; fp->ipq_p = ip->ip_p; fp->ipq_id = ip->ip_id; fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp; fp->ipq_src = ((struct ip *)ip)->ip_src; fp->ipq_dst = ((struct ip *)ip)->ip_dst; q = (struct ipasfrag *)fp; goto insert; } /* * Find a segment which begins after this one does. */ for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) if (q->ip_off > ip->ip_off) break; /* * If there is a preceding segment, it may provide some of * our data already. If so, drop the data from the incoming * segment. If it provides all of our data, drop us. */ if (q->ipf_prev != (struct ipasfrag *)fp) { i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off; if (i > 0) { if (i >= ip->ip_len) goto dropfrag; m_adj(dtom(ip), i); ip->ip_off += i; ip->ip_len -= i; } } /* * While we overlap succeeding segments trim them or, * if they are completely covered, dequeue them. */ while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) { i = (ip->ip_off + ip->ip_len) - q->ip_off; if (i < q->ip_len) { q->ip_len -= i; q->ip_off += i; m_adj(dtom(q), i); break; } q = q->ipf_next; m_freem(dtom(q->ipf_prev)); ip_deq(q->ipf_prev); } insert: /* * Stick new segment in its place; * check for complete reassembly. */ ip_enq(ip, q->ipf_prev); next = 0; for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) { if (q->ip_off != next) return (0); next += q->ip_len; } if (q->ipf_prev->ipf_mff & 1) return (0); /* * Reassembly is complete; concatenate fragments. */ q = fp->ipq_next; m = dtom(q); t = m->m_next; m->m_next = 0; m_cat(m, t); q = q->ipf_next; while (q != (struct ipasfrag *)fp) { t = dtom(q); q = q->ipf_next; m_cat(m, t); } /* * Create header for new ip packet by * modifying header of first packet; * dequeue and discard fragment reassembly header. * Make header visible. */ ip = fp->ipq_next; ip->ip_len = next; ip->ipf_mff &= ~1; ((struct ip *)ip)->ip_src = fp->ipq_src; ((struct ip *)ip)->ip_dst = fp->ipq_dst; remque(fp); (void) m_free(dtom(fp)); m = dtom(ip); m->m_len += (ip->ip_hl << 2); m->m_data -= (ip->ip_hl << 2); /* some debugging cruft by sklower, below, will go away soon */ if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */ register int plen = 0; for (t = m; m; m = m->m_next) plen += m->m_len; t->m_pkthdr.len = plen; } return ((struct ip *)ip); dropfrag: ipstat.ips_fragdropped++; m_freem(m); return (0); } /* * Free a fragment reassembly header and all * associated datagrams. */ void ip_freef(fp) struct ipq *fp; { register struct ipasfrag *q, *p; for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) { p = q->ipf_next; ip_deq(q); m_freem(dtom(q)); } remque(fp); (void) m_free(dtom(fp)); } /* * Put an ip fragment on a reassembly chain. * Like insque, but pointers in middle of structure. */ void ip_enq(p, prev) register struct ipasfrag *p, *prev; { p->ipf_prev = prev; p->ipf_next = prev->ipf_next; prev->ipf_next->ipf_prev = p; prev->ipf_next = p; } /* * To ip_enq as remque is to insque. */ void ip_deq(p) register struct ipasfrag *p; { p->ipf_prev->ipf_next = p->ipf_next; p->ipf_next->ipf_prev = p->ipf_prev; } /* * IP timer processing; * if a timer expires on a reassembly * queue, discard it. */ void ip_slowtimo() { register struct ipq *fp; int s = splnet(); fp = ipq.next; if (fp == 0) { splx(s); return; } while (fp != &ipq) { --fp->ipq_ttl; fp = fp->next; if (fp->prev->ipq_ttl == 0) { ipstat.ips_fragtimeout++; ip_freef(fp->prev); } } splx(s); } /* * Drain off all datagram fragments. */ void ip_drain() { while (ipq.next != &ipq) { ipstat.ips_fragdropped++; ip_freef(ipq.next); } } /* * Do option processing on a datagram, * possibly discarding it if bad options are encountered, * or forwarding it if source-routed. * Returns 1 if packet has been forwarded/freed, * 0 if the packet should be processed further. */ int ip_dooptions(m) struct mbuf *m; { register struct ip *ip = mtod(m, struct ip *); register u_char *cp; register struct ip_timestamp *ipt; register struct in_ifaddr *ia; int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0; struct in_addr *sin, dst; n_time ntime; dst = ip->ip_dst; cp = (u_char *)(ip + 1); cnt = (ip->ip_hl << 2) - sizeof (struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) optlen = 1; else { optlen = cp[IPOPT_OLEN]; if (optlen <= 0 || optlen > cnt) { code = &cp[IPOPT_OLEN] - (u_char *)ip; goto bad; } } switch (opt) { default: break; /* * Source routing with record. * Find interface with current destination address. * If none on this machine then drop if strictly routed, * or do nothing if loosely routed. * Record interface address and bring up next address * component. If strictly routed make sure next * address is on directly accessible net. */ case IPOPT_LSRR: case IPOPT_SSRR: if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } ipaddr.sin_addr = ip->ip_dst; ia = (struct in_ifaddr *) ifa_ifwithaddr((struct sockaddr *)&ipaddr); if (ia == 0) { if (opt == IPOPT_SSRR) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; } /* * Loose routing, and not at next destination * yet; nothing to do except forward. */ break; } off--; /* 0 origin */ if (off > optlen - sizeof(struct in_addr)) { /* * End of source route. Should be for us. */ save_rte(cp, ip->ip_src); break; } if (!ip_dosourceroute) { char buf[4*sizeof "123"]; strcpy(buf, inet_ntoa(ip->ip_dst)); - log(LOG_WARNING, + log(LOG_WARNING, "attempted source route from %s to %s\n", inet_ntoa(ip->ip_src), buf); type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; } /* * locate outgoing interface */ (void)memcpy(&ipaddr.sin_addr, cp + off, sizeof(ipaddr.sin_addr)); if (opt == IPOPT_SSRR) { #define INA struct in_ifaddr * #define SA struct sockaddr * if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0) ia = (INA)ifa_ifwithnet((SA)&ipaddr); } else ia = ip_rtaddr(ipaddr.sin_addr); if (ia == 0) { type = ICMP_UNREACH; code = ICMP_UNREACH_SRCFAIL; goto bad; } ip->ip_dst = ipaddr.sin_addr; (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), sizeof(struct in_addr)); cp[IPOPT_OFFSET] += sizeof(struct in_addr); /* * Let ip_intr's mcast routing check handle mcast pkts */ forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr)); break; case IPOPT_RR: if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) { code = &cp[IPOPT_OFFSET] - (u_char *)ip; goto bad; } /* * If no space remains, ignore. */ off--; /* 0 origin */ if (off > optlen - sizeof(struct in_addr)) break; (void)memcpy(&ipaddr.sin_addr, &ip->ip_dst, sizeof(ipaddr.sin_addr)); /* * locate outgoing interface; if we're the destination, * use the incoming interface (should be same). */ if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 && (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) { type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; goto bad; } (void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr), sizeof(struct in_addr)); cp[IPOPT_OFFSET] += sizeof(struct in_addr); break; case IPOPT_TS: code = cp - (u_char *)ip; ipt = (struct ip_timestamp *)cp; if (ipt->ipt_len < 5) goto bad; if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) { if (++ipt->ipt_oflw == 0) goto bad; break; } sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1); switch (ipt->ipt_flg) { case IPOPT_TS_TSONLY: break; case IPOPT_TS_TSANDADDR: if (ipt->ipt_ptr + sizeof(n_time) + sizeof(struct in_addr) > ipt->ipt_len) goto bad; ipaddr.sin_addr = dst; ia = (INA)ifaof_ifpforaddr((SA)&ipaddr, m->m_pkthdr.rcvif); if (ia == 0) continue; (void)memcpy(sin, &IA_SIN(ia)->sin_addr, sizeof(struct in_addr)); ipt->ipt_ptr += sizeof(struct in_addr); break; case IPOPT_TS_PRESPEC: if (ipt->ipt_ptr + sizeof(n_time) + sizeof(struct in_addr) > ipt->ipt_len) goto bad; (void)memcpy(&ipaddr.sin_addr, sin, sizeof(struct in_addr)); if (ifa_ifwithaddr((SA)&ipaddr) == 0) continue; ipt->ipt_ptr += sizeof(struct in_addr); break; default: goto bad; } ntime = iptime(); (void)memcpy(cp + ipt->ipt_ptr - 1, &ntime, sizeof(n_time)); ipt->ipt_ptr += sizeof(n_time); } } if (forward) { ip_forward(m, 1); return (1); } return (0); bad: ip->ip_len -= ip->ip_hl << 2; /* XXX icmp_error adds in hdr length */ icmp_error(m, type, code, 0, 0); ipstat.ips_badoptions++; return (1); } /* * Given address of next destination (final or next hop), * return internet address info of interface to be used to get there. */ struct in_ifaddr * ip_rtaddr(dst) struct in_addr dst; { register struct sockaddr_in *sin; sin = (struct sockaddr_in *) &ipforward_rt.ro_dst; if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) { if (ipforward_rt.ro_rt) { RTFREE(ipforward_rt.ro_rt); ipforward_rt.ro_rt = 0; } sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = dst; rtalloc_ign(&ipforward_rt, RTF_PRCLONING); } if (ipforward_rt.ro_rt == 0) return ((struct in_ifaddr *)0); return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa); } /* * Save incoming source route for use in replies, * to be picked up later by ip_srcroute if the receiver is interested. */ void save_rte(option, dst) u_char *option; struct in_addr dst; { unsigned olen; olen = option[IPOPT_OLEN]; #ifdef DIAGNOSTIC if (ipprintfs) printf("save_rte: olen %d\n", olen); #endif if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst))) return; (void)memcpy(ip_srcrt.srcopt, option, olen); ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr); ip_srcrt.dst = dst; } /* * Retrieve incoming source route for use in replies, * in the same form used by setsockopt. * The first hop is placed before the options, will be removed later. */ struct mbuf * ip_srcroute() { register struct in_addr *p, *q; register struct mbuf *m; if (ip_nhops == 0) return ((struct mbuf *)0); m = m_get(M_DONTWAIT, MT_SOOPTS); if (m == 0) return ((struct mbuf *)0); #define OPTSIZ (sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt)) /* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */ m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) + OPTSIZ; #ifdef DIAGNOSTIC if (ipprintfs) printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len); #endif /* * First save first hop for return route */ p = &ip_srcrt.route[ip_nhops - 1]; *(mtod(m, struct in_addr *)) = *p--; #ifdef DIAGNOSTIC if (ipprintfs) printf(" hops %lx", ntohl(mtod(m, struct in_addr *)->s_addr)); #endif /* * Copy option fields and padding (nop) to mbuf. */ ip_srcrt.nop = IPOPT_NOP; ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF; (void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr), &ip_srcrt.nop, OPTSIZ); q = (struct in_addr *)(mtod(m, caddr_t) + sizeof(struct in_addr) + OPTSIZ); #undef OPTSIZ /* * Record return path as an IP source route, * reversing the path (pointers are now aligned). */ while (p >= ip_srcrt.route) { #ifdef DIAGNOSTIC if (ipprintfs) printf(" %lx", ntohl(q->s_addr)); #endif *q++ = *p--; } /* * Last hop goes to final destination. */ *q = ip_srcrt.dst; #ifdef DIAGNOSTIC if (ipprintfs) printf(" %lx\n", ntohl(q->s_addr)); #endif return (m); } /* * Strip out IP options, at higher * level protocol in the kernel. * Second argument is buffer to which options * will be moved, and return value is their length. * XXX should be deleted; last arg currently ignored. */ void ip_stripoptions(m, mopt) register struct mbuf *m; struct mbuf *mopt; { register int i; struct ip *ip = mtod(m, struct ip *); register caddr_t opts; int olen; olen = (ip->ip_hl<<2) - sizeof (struct ip); opts = (caddr_t)(ip + 1); i = m->m_len - (sizeof (struct ip) + olen); bcopy(opts + olen, opts, (unsigned)i); m->m_len -= olen; if (m->m_flags & M_PKTHDR) m->m_pkthdr.len -= olen; ip->ip_hl = sizeof(struct ip) >> 2; } u_char inetctlerrmap[PRC_NCMDS] = { 0, 0, 0, 0, 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, EMSGSIZE, EHOSTUNREACH, 0, 0, 0, 0, 0, 0, ENOPROTOOPT }; /* * Forward a packet. If some error occurs return the sender * an icmp packet. Note we can't always generate a meaningful * icmp message because icmp doesn't have a large enough repertoire * of codes and types. * * If not forwarding, just drop the packet. This could be confusing * if ipforwarding was zero but some routing protocol was advancing * us as a gateway to somewhere. However, we must let the routing * protocol deal with that. * * The srcrt parameter indicates whether the packet is being forwarded * via a source route. */ void ip_forward(m, srcrt) struct mbuf *m; int srcrt; { register struct ip *ip = mtod(m, struct ip *); register struct sockaddr_in *sin; register struct rtentry *rt; int error, type = 0, code = 0; struct mbuf *mcopy; n_long dest; struct ifnet *destifp; dest = 0; #ifdef DIAGNOSTIC if (ipprintfs) printf("forward: src %lx dst %lx ttl %x\n", ip->ip_src.s_addr, ip->ip_dst.s_addr, ip->ip_ttl); #endif if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) { ipstat.ips_cantforward++; m_freem(m); return; } HTONS(ip->ip_id); if (ip->ip_ttl <= IPTTLDEC) { icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0); return; } ip->ip_ttl -= IPTTLDEC; sin = (struct sockaddr_in *)&ipforward_rt.ro_dst; if ((rt = ipforward_rt.ro_rt) == 0 || ip->ip_dst.s_addr != sin->sin_addr.s_addr) { if (ipforward_rt.ro_rt) { RTFREE(ipforward_rt.ro_rt); ipforward_rt.ro_rt = 0; } sin->sin_family = AF_INET; sin->sin_len = sizeof(*sin); sin->sin_addr = ip->ip_dst; rtalloc_ign(&ipforward_rt, RTF_PRCLONING); if (ipforward_rt.ro_rt == 0) { icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0); return; } rt = ipforward_rt.ro_rt; } /* * Save at most 64 bytes of the packet in case * we need to generate an ICMP message to the src. */ mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64)); /* * If forwarding packet using same interface that it came in on, * perhaps should send a redirect to sender to shortcut a hop. * Only send redirect if source is sending directly to us, * and if packet was not source routed (or has any options). * Also, don't send redirect if forwarding using a default route * or a route modified by a redirect. */ #define satosin(sa) ((struct sockaddr_in *)(sa)) if (rt->rt_ifp == m->m_pkthdr.rcvif && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && satosin(rt_key(rt))->sin_addr.s_addr != 0 && ipsendredirects && !srcrt) { #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) u_long src = ntohl(ip->ip_src.s_addr); if (RTA(rt) && (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { if (rt->rt_flags & RTF_GATEWAY) dest = satosin(rt->rt_gateway)->sin_addr.s_addr; else dest = ip->ip_dst.s_addr; /* Router requirements says to only send host redirects */ type = ICMP_REDIRECT; code = ICMP_REDIRECT_HOST; #ifdef DIAGNOSTIC if (ipprintfs) printf("redirect (%d) to %lx\n", code, (u_long)dest); #endif } } error = ip_output(m, (struct mbuf *)0, &ipforward_rt, IP_FORWARDING #ifdef DIRECTED_BROADCAST | IP_ALLOWBROADCAST #endif , 0); if (error) ipstat.ips_cantforward++; else { ipstat.ips_forward++; if (type) ipstat.ips_redirectsent++; else { if (mcopy) m_freem(mcopy); return; } } if (mcopy == NULL) return; destifp = NULL; switch (error) { case 0: /* forwarded, but need redirect */ /* type, code set above */ break; case ENETUNREACH: /* shouldn't happen, checked above */ case EHOSTUNREACH: case ENETDOWN: case EHOSTDOWN: default: type = ICMP_UNREACH; code = ICMP_UNREACH_HOST; break; case EMSGSIZE: type = ICMP_UNREACH; code = ICMP_UNREACH_NEEDFRAG; if (ipforward_rt.ro_rt) destifp = ipforward_rt.ro_rt->rt_ifp; ipstat.ips_cantfrag++; break; case ENOBUFS: type = ICMP_SOURCEQUENCH; code = 0; break; } icmp_error(mcopy, type, code, dest, destifp); } int ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen) int *name; u_int namelen; void *oldp; size_t *oldlenp; void *newp; size_t newlen; { /* All sysctl names at this level are terminal. */ if (namelen != 1) return (ENOTDIR); switch (name[0]) { case IPCTL_FORWARDING: return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding)); case IPCTL_SENDREDIRECTS: return (sysctl_int(oldp, oldlenp, newp, newlen, &ipsendredirects)); case IPCTL_DEFTTL: return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl)); case IPCTL_SOURCEROUTE: - return (sysctl_int(oldp, oldlenp, newp, newlen, + return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_dosourceroute)); #ifdef notyet case IPCTL_DEFMTU: return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu)); #endif case IPCTL_RTEXPIRE: - return (sysctl_int(oldp, oldlenp, newp, newlen, + return (sysctl_int(oldp, oldlenp, newp, newlen, &rtq_reallyold)); case IPCTL_RTMINEXPIRE: - return (sysctl_int(oldp, oldlenp, newp, newlen, + return (sysctl_int(oldp, oldlenp, newp, newlen, &rtq_minreallyold)); case IPCTL_RTMAXCACHE: return (sysctl_int(oldp, oldlenp, newp, newlen, &rtq_toomany)); default: return (EOPNOTSUPP); } /* NOTREACHED */ } int ip_rsvp_init(struct socket *so) { if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) return EOPNOTSUPP; if (ip_rsvpd != NULL) return EADDRINUSE; ip_rsvpd = so; + /* + * This may seem silly, but we need to be sure we don't over-increment + * the RSVP counter, in case something slips up. + */ + if (!ip_rsvp_on) { + ip_rsvp_on = 1; + rsvp_on++; + } return 0; } int ip_rsvp_done(void) { ip_rsvpd = NULL; + /* + * This may seem silly, but we need to be sure we don't over-decrement + * the RSVP counter, in case something slips up. + */ + if (ip_rsvp_on) { + ip_rsvp_on = 0; + rsvp_on--; + } return 0; } Index: stable/2.1/sys/netinet/ip_mroute.c =================================================================== --- stable/2.1/sys/netinet/ip_mroute.c (revision 10582) +++ stable/2.1/sys/netinet/ip_mroute.c (revision 10583) @@ -1,1925 +1,2283 @@ /* * IP multicast forwarding procedures * * Written by David Waitzman, BBN Labs, August 1988. * Modified by Steve Deering, Stanford, February 1989. * Modified by Mark J. Steiglitz, Stanford, May, 1991 * Modified by Van Jacobson, LBL, January 1993 * Modified by Ajit Thyagarajan, PARC, August 1993 + * Modified by Bill Fenner, PARC, April 1995 * - * MROUTING 1.8 + * MROUTING Revision: 3.5 + * $Id: ip_mroute.c,v 1.22 1995/08/23 18:20:15 wollman Exp $ */ #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #ifndef NTOHL #if BYTE_ORDER != BIG_ENDIAN #define NTOHL(d) ((d) = ntohl((d))) #define NTOHS(d) ((d) = ntohs((u_short)(d))) #define HTONL(d) ((d) = htonl((d))) #define HTONS(d) ((d) = htons((u_short)(d))) #else #define NTOHL(d) #define NTOHS(d) #define HTONL(d) #define HTONS(d) #endif #endif #ifndef MROUTING /* * Dummy routines and globals used when multicast routing is not compiled in. */ -u_int ip_mrtproto = 0; struct socket *ip_mrouter = NULL; +u_int ip_mrtproto = 0; struct mrtstat mrtstat; +u_int rsvpdebug = 0; - int -_ip_mrouter_cmd(cmd, so, m) +_ip_mrouter_set(cmd, so, m) int cmd; struct socket *so; struct mbuf *m; { return(EOPNOTSUPP); } -int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = _ip_mrouter_cmd; +int (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = _ip_mrouter_set; + int +_ip_mrouter_get(cmd, so, m) + int cmd; + struct socket *so; + struct mbuf **m; +{ + return(EOPNOTSUPP); +} + +int (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = _ip_mrouter_get; + +int _ip_mrouter_done() { return(0); } int (*ip_mrouter_done)(void) = _ip_mrouter_done; int _ip_mforward(ip, ifp, m, imo) struct ip *ip; struct ifnet *ifp; struct mbuf *m; struct ip_moptions *imo; { return(0); } int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *) = _ip_mforward; int _mrt_ioctl(int req, caddr_t data, struct proc *p) { return EOPNOTSUPP; } int (*mrt_ioctl)(int, caddr_t, struct proc *) = _mrt_ioctl; -void multiencap_decap(struct mbuf *m) { /* XXX must fixup manually */ +void +rsvp_input(m, iphlen) /* XXX must fixup manually */ + struct mbuf *m; + int iphlen; +{ + /* Can still get packets with rsvp_on = 0 if there is a local member + * of the group to which the RSVP packet is addressed. But in this + * case we want to throw the packet away. + */ + if (!rsvp_on) { + m_freem(m); + return; + } + + if (ip_rsvpd != NULL) { + if (rsvpdebug) + printf("rsvp_input: Sending packet up old-style socket\n"); rip_input(m); + return; + } + /* Drop the packet */ + m_freem(m); } +void ipip_input(struct mbuf *m) { /* XXX must fixup manually */ + rip_input(m); +} + int (*legal_vif_num)(int) = 0; +/* + * This should never be called, since IP_MULTICAST_VIF should fail, but + * just in case it does get called, the code a little lower in ip_output + * will assign the packet a local address. + */ +u_long +_ip_mcast_src(int vifi) { return INADDR_ANY; } +u_long (*ip_mcast_src)(int) = _ip_mcast_src; + +int +ip_rsvp_vif_init(so, m) + struct socket *so; + struct mbuf *m; +{ + return(EINVAL); +} + +int +ip_rsvp_vif_done(so, m) + struct socket *so; + struct mbuf *m; +{ + return(EINVAL); +} + +void +ip_rsvp_force_done(so) + struct socket *so; +{ + return; +} + #else /* MROUTING */ +#define M_HASCL(m) ((m)->m_flags & M_EXT) + #define INSIZ sizeof(struct in_addr) #define same(a1, a2) \ (bcmp((caddr_t)(a1), (caddr_t)(a2), INSIZ) == 0) #define MT_MRTABLE MT_RTABLE /* since nothing else uses it */ /* * Globals. All but ip_mrouter and ip_mrtproto could be static, * except for netstat or debugging purposes. */ #ifndef MROUTE_LKM struct socket *ip_mrouter = NULL; struct mrtstat mrtstat; int ip_mrtproto = IGMP_DVMRP; /* for netstat only */ #else /* MROUTE_LKM */ extern struct mrtstat mrtstat; extern int ip_mrtproto; #endif #define NO_RTE_FOUND 0x1 #define RTE_FOUND 0x2 struct mbuf *mfctable[MFCTBLSIZ]; +u_char nexpire[MFCTBLSIZ]; struct vif viftable[MAXVIFS]; u_int mrtdebug = 0; /* debug level */ +#define DEBUG_MFC 0x02 +#define DEBUG_FORWARD 0x04 +#define DEBUG_EXPIRE 0x08 +#define DEBUG_XMIT 0x10 u_int tbfdebug = 0; /* tbf debug level */ +u_int rsvpdebug = 0; /* rsvp debug level */ -u_long timeout_val = 0; /* count of outstanding upcalls */ +#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ +#define UPCALL_EXPIRE 6 /* number of timeouts */ /* * Define the token bucket filter structures - * tbftable -> each vif has one of these for storing info - * qtable -> each interface has an associated queue of pkts + * tbftable -> each vif has one of these for storing info */ struct tbf tbftable[MAXVIFS]; -struct pkt_queue qtable[MAXVIFS][MAXQSIZE]; +#define TBF_REPROCESS (hz / 100) /* 100x / second */ /* * 'Interfaces' associated with decapsulator (so we can tell * packets that went through it from ones that get reflected * by a broken gateway). These interfaces are never linked into * the system ifnet list & no routes point to them. I.e., packets * can't be sent this way. They only exist as a placeholder for * multicast source verification. */ struct ifnet multicast_decap_if[MAXVIFS]; #define ENCAP_TTL 64 -#define ENCAP_PROTO 4 +#define ENCAP_PROTO IPPROTO_IPIP /* 4 */ /* prototype IP hdr for encapsulated packets */ struct ip multicast_encap_iphdr = { #if BYTE_ORDER == LITTLE_ENDIAN sizeof(struct ip) >> 2, IPVERSION, #else IPVERSION, sizeof(struct ip) >> 2, #endif 0, /* tos */ sizeof(struct ip), /* total length */ 0, /* id */ 0, /* frag offset */ - ENCAP_TTL, ENCAP_PROTO, + ENCAP_TTL, ENCAP_PROTO, 0, /* checksum */ }; /* * Private variables. */ static vifi_t numvifs = 0; static void (*encap_oldrawip)() = 0; +static int have_encap_tunnel = 0; /* - * one-back cache used by multiencap_decap to locate a tunnel's vif + * one-back cache used by ipip_input to locate a tunnel's vif * given a datagram's src ip address. */ static u_long last_encap_src; static struct vif *last_encap_vif; -static u_long nethash_fc(u_long, u_long); -static struct mfc *mfcfind(u_long, u_long); -int get_sg_cnt(struct sioc_sg_req *); -int get_vif_cnt(struct sioc_vif_req *); -int get_vifs(caddr_t); +static int get_sg_cnt(struct sioc_sg_req *); +static int get_vif_cnt(struct sioc_vif_req *); +int ip_mrouter_init(struct socket *, struct mbuf *); static int add_vif(struct vifctl *); static int del_vif(vifi_t *); static int add_mfc(struct mfcctl *); -static int del_mfc(struct delmfcctl *); -static void cleanup_cache(void *); -static int ip_mdq(struct mbuf *, struct ifnet *, u_long, struct mfc *, - struct ip_moptions *); +static int del_mfc(struct mfcctl *); +static int get_version(struct mbuf *); +static int get_assert(struct mbuf *); +static int set_assert(int *); +static void expire_upcalls(void *); +static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, + vifi_t); static void phyint_send(struct ip *, struct vif *, struct mbuf *); -static void srcrt_send(struct ip *, struct vif *, struct mbuf *); static void encap_send(struct ip *, struct vif *, struct mbuf *); -void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long, - struct ip_moptions *); -void tbf_queue(struct vif *, struct mbuf *, struct ip *, struct ip_moptions *); -void tbf_process_q(struct vif *); -void tbf_dequeue(struct vif *, int); -void tbf_reprocess_q(void *); -int tbf_dq_sel(struct vif *, struct ip *); -void tbf_send_packet(struct vif *, struct mbuf *, struct ip_moptions *); -void tbf_update_tokens(struct vif *); +static void tbf_control(struct vif *, struct mbuf *, struct ip *, u_long); +static void tbf_queue(struct vif *, struct mbuf *); +static void tbf_process_q(struct vif *); +static void tbf_reprocess_q(void *); +static int tbf_dq_sel(struct vif *, struct ip *); +static void tbf_send_packet(struct vif *, struct mbuf *); +static void tbf_update_tokens(struct vif *); static int priority(struct vif *, struct ip *); -static int ip_mrouter_init(struct socket *); -void multiencap_decap(struct mbuf *m); +void multiencap_decap(struct mbuf *); /* - * A simple hash function: returns MFCHASHMOD of the low-order octet of - * the argument's network or subnet number and the multicast group assoc. + * whether or not special PIM assert processing is enabled. */ -static u_long -nethash_fc(m,n) - register u_long m; - register u_long n; -{ - struct in_addr in1; - struct in_addr in2; +static int pim_assert; +/* + * Rate limit for assert notification messages, in usec + */ +#define ASSERT_MSG_TIME 3000000 - in1.s_addr = m; - m = in_netof(in1); - while ((m & 0xff) == 0) m >>= 8; - - in2.s_addr = n; - n = in_netof(in2); - while ((n & 0xff) == 0) n >>= 8; - - return (MFCHASHMOD(m) ^ MFCHASHMOD(n)); -} - /* - * this is a direct-mapped cache used to speed the mapping from a - * datagram source address to the associated multicast route. Note - * that unlike mrttable, the hash is on IP address, not IP net number. + * Hash function for a source, group entry */ -#define MFCHASHSIZ 1024 -#define MFCHASH(a, g) ((((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ - ((g) >> 20) ^ ((g) >> 10) ^ (g)) & (MFCHASHSIZ-1)) -struct mfc *mfchash[MFCHASHSIZ]; +#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ + ((g) >> 20) ^ ((g) >> 10) ^ (g)) /* * Find a route for a given origin IP address and Multicast group address * Type of service parameter to be added in the future!!! */ + #define MFCFIND(o, g, rt) { \ - register u_int _mrhasho = o; \ - register u_int _mrhashg = g; \ - _mrhasho = MFCHASH(_mrhasho, _mrhashg); \ + register struct mbuf *_mb_rt = mfctable[MFCHASH(o,g)]; \ + register struct mfc *_rt = NULL; \ + rt = NULL; \ ++mrtstat.mrts_mfc_lookups; \ - rt = mfchash[_mrhasho]; \ - if ((rt == NULL) || \ - ((o & rt->mfc_originmask.s_addr) != rt->mfc_origin.s_addr) || \ - (g != rt->mfc_mcastgrp.s_addr)) \ - if ((rt = mfcfind(o, g)) != NULL) \ - mfchash[_mrhasho] = rt; \ + while (_mb_rt) { \ + _rt = mtod(_mb_rt, struct mfc *); \ + if ((_rt->mfc_origin.s_addr == o) && \ + (_rt->mfc_mcastgrp.s_addr == g) && \ + (_mb_rt->m_act == NULL)) { \ + rt = _rt; \ + break; \ + } \ + _mb_rt = _mb_rt->m_next; \ + } \ + if (rt == NULL) { \ + ++mrtstat.mrts_mfc_misses; \ + } \ } -/* - * Find route by examining hash table entries - */ -static struct mfc * -mfcfind(origin, mcastgrp) - u_long origin; - u_long mcastgrp; -{ - register struct mbuf *mb_rt; - register struct mfc *rt; - register u_long hash; - hash = nethash_fc(origin, mcastgrp); - for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { - rt = mtod(mb_rt, struct mfc *); - if (((origin & rt->mfc_originmask.s_addr) == rt->mfc_origin.s_addr) && - (mcastgrp == rt->mfc_mcastgrp.s_addr) && - (mb_rt->m_act == NULL)) - return (rt); - } - mrtstat.mrts_mfc_misses++; - return NULL; -} - /* * Macros to compute elapsed time efficiently * Borrowed from Van Jacobson's scheduling code */ #define TV_DELTA(a, b, delta) { \ register int xxs; \ \ delta = (a).tv_usec - (b).tv_usec; \ if ((xxs = (a).tv_sec - (b).tv_sec)) { \ switch (xxs) { \ case 2: \ delta += 1000000; \ /* fall through */ \ case 1: \ delta += 1000000; \ break; \ default: \ delta += (1000000 * xxs); \ } \ } \ } #define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) +#ifdef UPCALL_TIMING +u_long upcall_data[51]; +static void collate(struct timeval *); +#endif /* UPCALL_TIMING */ + + /* - * Handle DVMRP setsockopt commands to modify the multicast routing tables. + * Handle MRT setsockopt commands to modify the multicast routing tables. */ int -X_ip_mrouter_cmd(cmd, so, m) +X_ip_mrouter_set(cmd, so, m) int cmd; struct socket *so; struct mbuf *m; { - if (cmd != DVMRP_INIT && so != ip_mrouter) return EACCES; + if (cmd != MRT_INIT && so != ip_mrouter) return EACCES; switch (cmd) { - case DVMRP_INIT: return ip_mrouter_init(so); - case DVMRP_DONE: return ip_mrouter_done(); - case DVMRP_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); - case DVMRP_DEL_VIF: return del_vif (mtod(m, vifi_t *)); - case DVMRP_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); - case DVMRP_DEL_MFC: return del_mfc (mtod(m, struct delmfcctl *)); + case MRT_INIT: return ip_mrouter_init(so, m); + case MRT_DONE: return ip_mrouter_done(); + case MRT_ADD_VIF: return add_vif (mtod(m, struct vifctl *)); + case MRT_DEL_VIF: return del_vif (mtod(m, vifi_t *)); + case MRT_ADD_MFC: return add_mfc (mtod(m, struct mfcctl *)); + case MRT_DEL_MFC: return del_mfc (mtod(m, struct mfcctl *)); + case MRT_ASSERT: return set_assert(mtod(m, int *)); default: return EOPNOTSUPP; } } #ifndef MROUTE_LKM -int (*ip_mrouter_cmd)(int, struct socket *, struct mbuf *) = X_ip_mrouter_cmd; +int (*ip_mrouter_set)(int, struct socket *, struct mbuf *) = X_ip_mrouter_set; #endif /* + * Handle MRT getsockopt commands + */ +int +X_ip_mrouter_get(cmd, so, m) + int cmd; + struct socket *so; + struct mbuf **m; +{ + struct mbuf *mb; + + if (so != ip_mrouter) return EACCES; + + *m = mb = m_get(M_WAIT, MT_SOOPTS); + + switch (cmd) { + case MRT_VERSION: return get_version(mb); + case MRT_ASSERT: return get_assert(mb); + default: return EOPNOTSUPP; + } +} + +#ifndef MROUTE_LKM +int (*ip_mrouter_get)(int, struct socket *, struct mbuf **) = X_ip_mrouter_get; +#endif + +/* * Handle ioctl commands to obtain information from the cache */ int X_mrt_ioctl(cmd, data) int cmd; caddr_t data; { int error = 0; switch (cmd) { - case (SIOCGETVIFINF): /* Read Virtual Interface (m/cast) */ - return (get_vifs(data)); - break; - case (SIOCGETVIFCNT): - return (get_vif_cnt((struct sioc_vif_req *)data)); - break; - case (SIOCGETSGCNT): - return (get_sg_cnt((struct sioc_sg_req *)data)); - break; + case (SIOCGETVIFCNT): + return (get_vif_cnt((struct sioc_vif_req *)data)); + break; + case (SIOCGETSGCNT): + return (get_sg_cnt((struct sioc_sg_req *)data)); + break; default: - return (EINVAL); - break; + return (EINVAL); + break; } return error; } #ifndef MROUTE_LKM int (*mrt_ioctl)(int, caddr_t, struct proc *) = X_mrt_ioctl; #endif /* - * returns the packet count for the source group provided + * returns the packet, byte, rpf-failure count for the source group provided */ -int +static int get_sg_cnt(req) register struct sioc_sg_req *req; { register struct mfc *rt; int s; s = splnet(); MFCFIND(req->src.s_addr, req->grp.s_addr, rt); splx(s); - if (rt != NULL) - req->count = rt->mfc_pkt_cnt; - else - req->count = 0xffffffff; + if (rt != NULL) { + req->pktcnt = rt->mfc_pkt_cnt; + req->bytecnt = rt->mfc_byte_cnt; + req->wrong_if = rt->mfc_wrong_if; + } else + req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; return 0; } /* - * returns the input and output packet counts on the interface provided + * returns the input and output packet and byte counts on the vif provided */ -int +static int get_vif_cnt(req) register struct sioc_vif_req *req; { register vifi_t vifi = req->vifi; + if (vifi >= numvifs) return EINVAL; + req->icount = viftable[vifi].v_pkt_in; req->ocount = viftable[vifi].v_pkt_out; + req->ibytes = viftable[vifi].v_bytes_in; + req->obytes = viftable[vifi].v_bytes_out; return 0; } -int -get_vifs(data) - char *data; -{ - struct vif_conf *vifc = (struct vif_conf *)data; - struct vif_req *vifrp, vifr; - int space, error=0; - - vifi_t vifi; - int s; - - space = vifc->vifc_len; - vifrp = vifc->vifc_req; - - s = splnet(); - vifc->vifc_num=numvifs; - - for (vifi = 0; vifi < numvifs; vifi++, vifrp++) { - if (viftable[vifi].v_lcl_addr.s_addr != 0) { - vifr.v_flags=viftable[vifi].v_flags; - vifr.v_threshold=viftable[vifi].v_threshold; - vifr.v_lcl_addr=viftable[vifi].v_lcl_addr; - vifr.v_rmt_addr=viftable[vifi].v_rmt_addr; - strncpy(vifr.v_if_name,viftable[vifi].v_ifp->if_name,IFNAMSIZ); - if ((space -= sizeof(vifr)) < 0) { - splx(s); - return(ENOSPC); - } - error = copyout((caddr_t)&vifr,(caddr_t)vifrp,(u_int)(sizeof vifr)); - if (error) { - splx(s); - return(error); - } - } - } - splx(s); - return 0; -} /* * Enable multicast routing */ -static int -ip_mrouter_init(so) +int +ip_mrouter_init(so, m) struct socket *so; + struct mbuf *m; { + int *v; + int i; + + if (mrtdebug) + log(LOG_DEBUG,"ip_mrouter_init: so_type = %d, pr_protocol = %d", + so->so_type, so->so_proto->pr_protocol); + if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_IGMP) return EOPNOTSUPP; + if (!m || (m->m_len != sizeof(int *))) + return ENOPROTOOPT; + + v = mtod(m, int *); + if (*v != 1) + return ENOPROTOOPT; + if (ip_mrouter != NULL) return EADDRINUSE; ip_mrouter = so; + bzero((caddr_t)mfctable, sizeof(mfctable)); + bzero((caddr_t)nexpire, sizeof(nexpire)); + + pim_assert = 0; + + timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); + if (mrtdebug) - log(LOG_DEBUG, "ip_mrouter_init\n"); + log(LOG_DEBUG, "ip_mrouter_init"); return 0; } /* * Disable multicast routing */ int X_ip_mrouter_done() { vifi_t vifi; int i; struct ifnet *ifp; struct ifreq ifr; struct mbuf *mb_rt; + struct mfc *rt; struct mbuf *m; struct rtdetq *rte; int s; s = splnet(); /* * For each phyint in use, disable promiscuous reception of all IP * multicasts. */ for (vifi = 0; vifi < numvifs; vifi++) { if (viftable[vifi].v_lcl_addr.s_addr != 0 && !(viftable[vifi].v_flags & VIFF_TUNNEL)) { ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; ifp = viftable[vifi].v_ifp; (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); } } - bzero((caddr_t)qtable, sizeof(qtable)); bzero((caddr_t)tbftable, sizeof(tbftable)); bzero((caddr_t)viftable, sizeof(viftable)); numvifs = 0; + pim_assert = 0; + untimeout(expire_upcalls, (caddr_t)NULL); + /* - * Check if any outstanding timeouts remain + * Free all multicast forwarding cache entries. */ - if (timeout_val != 0) - for (i = 0; i < MFCTBLSIZ; i++) { - mb_rt = mfctable[i]; - while (mb_rt) { - if ( mb_rt->m_act != NULL) { - untimeout(cleanup_cache, (caddr_t)mb_rt); - while (mb_rt->m_act) { - m = mb_rt->m_act; - mb_rt->m_act = m->m_act; - rte = mtod(m, struct rtdetq *); - m_freem(rte->m); - m_free(m); - } - timeout_val--; + for (i = 0; i < MFCTBLSIZ; i++) { + mb_rt = mfctable[i]; + while (mb_rt) { + if (mb_rt->m_act != NULL) { + while (mb_rt->m_act) { + m = mb_rt->m_act; + mb_rt->m_act = m->m_act; + rte = mtod(m, struct rtdetq *); + m_freem(rte->m); + m_free(m); } - mb_rt = mb_rt->m_next; } - if (timeout_val == 0) - break; + mb_rt = m_free(mb_rt); } + } - /* - * Free all multicast forwarding cache entries. - */ - for (i = 0; i < MFCTBLSIZ; i++) - m_freem(mfctable[i]); - bzero((caddr_t)mfctable, sizeof(mfctable)); - bzero((caddr_t)mfchash, sizeof(mfchash)); /* * Reset de-encapsulation cache */ last_encap_src = NULL; last_encap_vif = NULL; - + have_encap_tunnel = 0; + ip_mrouter = NULL; splx(s); if (mrtdebug) - log(LOG_DEBUG, "ip_mrouter_done\n"); + log(LOG_DEBUG, "ip_mrouter_done"); return 0; } #ifndef MROUTE_LKM int (*ip_mrouter_done)(void) = X_ip_mrouter_done; #endif +static int +get_version(mb) + struct mbuf *mb; +{ + int *v; + + v = mtod(mb, int *); + + *v = 0x0305; /* XXX !!!! */ + mb->m_len = sizeof(int); + + return 0; +} + /* + * Set PIM assert processing global + */ +static int +set_assert(i) + int *i; +{ + if ((*i != 1) && (*i != 0)) + return EINVAL; + + pim_assert = *i; + + return 0; +} + +/* + * Get PIM assert processing global + */ +static int +get_assert(m) + struct mbuf *m; +{ + int *i; + + i = mtod(m, int *); + + *i = pim_assert; + + return 0; +} + +/* * Add a vif to the vif table */ static int add_vif(vifcp) register struct vifctl *vifcp; { register struct vif *vifp = viftable + vifcp->vifc_vifi; static struct sockaddr_in sin = {sizeof sin, AF_INET}; struct ifaddr *ifa; struct ifnet *ifp; struct ifreq ifr; int error, s; struct tbf *v_tbf = tbftable + vifcp->vifc_vifi; if (vifcp->vifc_vifi >= MAXVIFS) return EINVAL; if (vifp->v_lcl_addr.s_addr != 0) return EADDRINUSE; /* Find the interface with an address in AF_INET family */ sin.sin_addr = vifcp->vifc_lcl_addr; ifa = ifa_ifwithaddr((struct sockaddr *)&sin); if (ifa == 0) return EADDRNOTAVAIL; ifp = ifa->ifa_ifp; if (vifcp->vifc_flags & VIFF_TUNNEL) { if ((vifcp->vifc_flags & VIFF_SRCRT) == 0) { - if (encap_oldrawip == 0) { - extern struct protosw inetsw[]; - register u_char pr = ip_protox[ENCAP_PROTO]; - - encap_oldrawip = inetsw[pr].pr_input; - inetsw[pr].pr_input = multiencap_decap; - for (s = 0; s < MAXVIFS; ++s) { - multicast_decap_if[s].if_name = "mdecap"; - multicast_decap_if[s].if_unit = s; + /* + * An encapsulating tunnel is wanted. Tell ipip_input() to + * start paying attention to encapsulated packets. + */ + if (have_encap_tunnel == 0) { + have_encap_tunnel = 1; + for (s = 0; s < MAXVIFS; ++s) { + multicast_decap_if[s].if_name = "mdecap"; + multicast_decap_if[s].if_unit = s; + } } - } - ifp = &multicast_decap_if[vifcp->vifc_vifi]; + /* + * Set interface to fake encapsulator interface + */ + ifp = &multicast_decap_if[vifcp->vifc_vifi]; + /* + * Prepare cached route entry + */ + bzero(&vifp->v_route, sizeof(vifp->v_route)); } else { - ifp = 0; + log(LOG_ERR, "Source routed tunnels not supported."); + return EOPNOTSUPP; } } else { /* Make sure the interface supports multicast */ if ((ifp->if_flags & IFF_MULTICAST) == 0) return EOPNOTSUPP; /* Enable promiscuous reception of all IP multicasts from the if */ ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; s = splnet(); error = (*ifp->if_ioctl)(ifp, SIOCADDMULTI, (caddr_t)&ifr); splx(s); if (error) return error; } s = splnet(); /* define parameters for the tbf structure */ vifp->v_tbf = v_tbf; - vifp->v_tbf->q_len = 0; - vifp->v_tbf->n_tok = 0; - vifp->v_tbf->last_pkt_t = 0; + GET_TIME(vifp->v_tbf->tbf_last_pkt_t); + vifp->v_tbf->tbf_n_tok = 0; + vifp->v_tbf->tbf_q_len = 0; + vifp->v_tbf->tbf_max_q_len = MAXQSIZE; + vifp->v_tbf->tbf_q = vifp->v_tbf->tbf_t = NULL; vifp->v_flags = vifcp->vifc_flags; vifp->v_threshold = vifcp->vifc_threshold; vifp->v_lcl_addr = vifcp->vifc_lcl_addr; vifp->v_rmt_addr = vifcp->vifc_rmt_addr; vifp->v_ifp = ifp; - vifp->v_rate_limit= vifcp->vifc_rate_limit; + /* scaling up here allows division by 1024 in critical code */ + vifp->v_rate_limit= vifcp->vifc_rate_limit * 1024 / 1000; + vifp->v_rsvp_on = 0; + vifp->v_rsvpd = NULL; /* initialize per vif pkt counters */ vifp->v_pkt_in = 0; vifp->v_pkt_out = 0; + vifp->v_bytes_in = 0; + vifp->v_bytes_out = 0; splx(s); /* Adjust numvifs up if the vifi is higher than numvifs */ if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; if (mrtdebug) - log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d\n", - vifcp->vifc_vifi, + log(LOG_DEBUG, "add_vif #%d, lcladdr %x, %s %x, thresh %x, rate %d", + vifcp->vifc_vifi, ntohl(vifcp->vifc_lcl_addr.s_addr), (vifcp->vifc_flags & VIFF_TUNNEL) ? "rmtaddr" : "mask", ntohl(vifcp->vifc_rmt_addr.s_addr), vifcp->vifc_threshold, - vifcp->vifc_rate_limit); + vifcp->vifc_rate_limit); return 0; } /* * Delete a vif from the vif table */ static int del_vif(vifip) vifi_t *vifip; { register struct vif *vifp = viftable + *vifip; register vifi_t vifi; + register struct mbuf *m; struct ifnet *ifp; struct ifreq ifr; int s; if (*vifip >= numvifs) return EINVAL; if (vifp->v_lcl_addr.s_addr == 0) return EADDRNOTAVAIL; s = splnet(); if (!(vifp->v_flags & VIFF_TUNNEL)) { ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_family = AF_INET; ((struct sockaddr_in *)&(ifr.ifr_addr))->sin_addr.s_addr = INADDR_ANY; ifp = vifp->v_ifp; (*ifp->if_ioctl)(ifp, SIOCDELMULTI, (caddr_t)&ifr); } if (vifp == last_encap_vif) { last_encap_vif = 0; last_encap_src = 0; } - bzero((caddr_t)qtable[*vifip], - sizeof(qtable[*vifip])); + /* + * Free packets queued at the interface + */ + while (vifp->v_tbf->tbf_q) { + m = vifp->v_tbf->tbf_q; + vifp->v_tbf->tbf_q = m->m_act; + m_freem(m); + } + bzero((caddr_t)vifp->v_tbf, sizeof(*(vifp->v_tbf))); bzero((caddr_t)vifp, sizeof (*vifp)); /* Adjust numvifs down */ for (vifi = numvifs; vifi > 0; vifi--) if (viftable[vifi-1].v_lcl_addr.s_addr != 0) break; numvifs = vifi; splx(s); if (mrtdebug) - log(LOG_DEBUG, "del_vif %d, numvifs %d\n", *vifip, numvifs); + log(LOG_DEBUG, "del_vif %d, numvifs %d", *vifip, numvifs); return 0; } /* * Add an mfc entry */ static int add_mfc(mfccp) struct mfcctl *mfccp; { struct mfc *rt; - struct mfc *rt1 = 0; register struct mbuf *mb_rt; - struct mbuf *prev_mb_rt; u_long hash; struct mbuf *mb_ntry; struct rtdetq *rte; register u_short nstl; int s; int i; - rt = mfcfind(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); + MFCFIND(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr, rt); /* If an entry already exists, just update the fields */ if (rt) { - if (mrtdebug) - log(LOG_DEBUG,"add_mfc update o %x g %x m %x p %x\n", + if (mrtdebug & DEBUG_MFC) + log(LOG_DEBUG,"add_mfc update o %x g %x p %x", ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr), - ntohl(mfccp->mfcc_originmask.s_addr), mfccp->mfcc_parent); s = splnet(); rt->mfc_parent = mfccp->mfcc_parent; for (i = 0; i < numvifs; i++) - VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; splx(s); return 0; } - /* + /* * Find the entry for which the upcall was made and update */ s = splnet(); - hash = nethash_fc(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); - for (prev_mb_rt = mb_rt = mfctable[hash], nstl = 0; - mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { + hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); + for (mb_rt = mfctable[hash], nstl = 0; mb_rt; mb_rt = mb_rt->m_next) { rt = mtod(mb_rt, struct mfc *); - if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) - == mfccp->mfcc_origin.s_addr) && + if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && (mb_rt->m_act != NULL)) { + + if (nstl++) + log(LOG_ERR, "add_mfc %s o %x g %x p %x dbx %x", + "multiple kernel entries", + ntohl(mfccp->mfcc_origin.s_addr), + ntohl(mfccp->mfcc_mcastgrp.s_addr), + mfccp->mfcc_parent, mb_rt->m_act); - if (!nstl++) { - if (mrtdebug) - log(LOG_DEBUG,"add_mfc o %x g %x m %x p %x dbg %x\n", - ntohl(mfccp->mfcc_origin.s_addr), - ntohl(mfccp->mfcc_mcastgrp.s_addr), - ntohl(mfccp->mfcc_originmask.s_addr), - mfccp->mfcc_parent, mb_rt->m_act); + if (mrtdebug & DEBUG_MFC) + log(LOG_DEBUG,"add_mfc o %x g %x p %x dbg %x", + ntohl(mfccp->mfcc_origin.s_addr), + ntohl(mfccp->mfcc_mcastgrp.s_addr), + mfccp->mfcc_parent, mb_rt->m_act); - rt->mfc_origin = mfccp->mfcc_origin; - rt->mfc_originmask = mfccp->mfcc_originmask; - rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; - rt->mfc_parent = mfccp->mfcc_parent; - for (i = 0; i < numvifs; i++) - VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); - /* initialize pkt counters per src-grp */ - rt->mfc_pkt_cnt = 0; - rt1 = rt; - } + rt->mfc_origin = mfccp->mfcc_origin; + rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; + rt->mfc_parent = mfccp->mfcc_parent; + for (i = 0; i < numvifs; i++) + rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; + /* initialize pkt counters per src-grp */ + rt->mfc_pkt_cnt = 0; + rt->mfc_byte_cnt = 0; + rt->mfc_wrong_if = 0; + rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; - /* prevent cleanup of cache entry */ - untimeout(cleanup_cache, (caddr_t)mb_rt); - timeout_val--; + rt->mfc_expire = 0; /* Don't clean this guy up */ + nexpire[hash]--; /* free packets Qed at the end of this entry */ while (mb_rt->m_act) { mb_ntry = mb_rt->m_act; rte = mtod(mb_ntry, struct rtdetq *); - ip_mdq(rte->m, rte->ifp, rte->tunnel_src, - rt1, rte->imo); +/* #ifdef RSVP_ISI */ + ip_mdq(rte->m, rte->ifp, rt, -1); +/* #endif */ mb_rt->m_act = mb_ntry->m_act; m_freem(rte->m); +#ifdef UPCALL_TIMING + collate(&(rte->t)); +#endif /* UPCALL_TIMING */ m_free(mb_ntry); } - - /* - * If more than one entry was created for a single upcall - * delete that entry - */ - if (nstl > 1) { - MFREE(mb_rt, prev_mb_rt->m_next); - mb_rt = prev_mb_rt; - } } } /* * It is possible that an entry is being inserted without an upcall */ if (nstl == 0) { - if (mrtdebug) - log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x m %x p %x\n", + if (mrtdebug & DEBUG_MFC) + log(LOG_DEBUG,"add_mfc no upcall h %d o %x g %x p %x", hash, ntohl(mfccp->mfcc_origin.s_addr), ntohl(mfccp->mfcc_mcastgrp.s_addr), - ntohl(mfccp->mfcc_originmask.s_addr), mfccp->mfcc_parent); - - for (prev_mb_rt = mb_rt = mfctable[hash]; - mb_rt; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { - + + for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { + rt = mtod(mb_rt, struct mfc *); - if (((rt->mfc_origin.s_addr & mfccp->mfcc_originmask.s_addr) - == mfccp->mfcc_origin.s_addr) && + if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { rt->mfc_origin = mfccp->mfcc_origin; - rt->mfc_originmask = mfccp->mfcc_originmask; rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; rt->mfc_parent = mfccp->mfcc_parent; for (i = 0; i < numvifs; i++) - VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; /* initialize pkt counters per src-grp */ rt->mfc_pkt_cnt = 0; + rt->mfc_byte_cnt = 0; + rt->mfc_wrong_if = 0; + rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; + if (rt->mfc_expire) + nexpire[hash]--; + rt->mfc_expire = 0; } } if (mb_rt == NULL) { /* no upcall, so make a new entry */ MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); if (mb_rt == NULL) { splx(s); return ENOBUFS; } - + rt = mtod(mb_rt, struct mfc *); - + /* insert new entry at head of hash chain */ rt->mfc_origin = mfccp->mfcc_origin; - rt->mfc_originmask = mfccp->mfcc_originmask; rt->mfc_mcastgrp = mfccp->mfcc_mcastgrp; rt->mfc_parent = mfccp->mfcc_parent; for (i = 0; i < numvifs; i++) - VIFM_COPY(mfccp->mfcc_ttls[i], rt->mfc_ttls[i]); + rt->mfc_ttls[i] = mfccp->mfcc_ttls[i]; /* initialize pkt counters per src-grp */ rt->mfc_pkt_cnt = 0; - + rt->mfc_byte_cnt = 0; + rt->mfc_wrong_if = 0; + rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; + rt->mfc_expire = 0; + /* link into table */ mb_rt->m_next = mfctable[hash]; mfctable[hash] = mb_rt; mb_rt->m_act = NULL; } } splx(s); return 0; } +#ifdef UPCALL_TIMING /* + * collect delay statistics on the upcalls + */ +static void collate(t) +register struct timeval *t; +{ + register u_long d; + register struct timeval tp; + register u_long delta; + + GET_TIME(tp); + + if (TV_LT(*t, tp)) + { + TV_DELTA(tp, *t, delta); + + d = delta >> 10; + if (d > 50) + d = 50; + + ++upcall_data[d]; + } +} +#endif /* UPCALL_TIMING */ + +/* * Delete an mfc entry */ static int del_mfc(mfccp) - struct delmfcctl *mfccp; + struct mfcctl *mfccp; { struct in_addr origin; struct in_addr mcastgrp; struct mfc *rt; struct mbuf *mb_rt; - struct mbuf *prev_mb_rt; + struct mbuf **nptr; u_long hash; - struct mfc **cmfc; - struct mfc **cmfcend; - int s; + int s, i; origin = mfccp->mfcc_origin; mcastgrp = mfccp->mfcc_mcastgrp; - hash = nethash_fc(origin.s_addr, mcastgrp.s_addr); + hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); - if (mrtdebug) - log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x\n", + if (mrtdebug & DEBUG_MFC) + log(LOG_DEBUG,"del_mfc orig %x mcastgrp %x", ntohl(origin.s_addr), ntohl(mcastgrp.s_addr)); - for (prev_mb_rt = mb_rt = mfctable[hash] - ; mb_rt - ; prev_mb_rt = mb_rt, mb_rt = mb_rt->m_next) { + s = splnet(); + + nptr = &mfctable[hash]; + while ((mb_rt = *nptr) != NULL) { rt = mtod(mb_rt, struct mfc *); if (origin.s_addr == rt->mfc_origin.s_addr && mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && mb_rt->m_act == NULL) break; + + nptr = &mb_rt->m_next; } if (mb_rt == NULL) { - return ESRCH; + splx(s); + return EADDRNOTAVAIL; } - s = splnet(); + MFREE(mb_rt, *nptr); - cmfc = mfchash; - cmfcend = cmfc + MFCHASHSIZ; - for ( ; cmfc < cmfcend; ++cmfc) - if (*cmfc == rt) - *cmfc = 0; - - if (prev_mb_rt != mb_rt) { /* if moved past head of list */ - MFREE(mb_rt, prev_mb_rt->m_next); - } else /* delete head of list, it is in the table */ - mfctable[hash] = m_free(mb_rt); - splx(s); return 0; } /* + * Send a message to mrouted on the multicast routing socket + */ +static int +socket_send(s, mm, src) + struct socket *s; + struct mbuf *mm; + struct sockaddr_in *src; +{ + if (s) { + if (sbappendaddr(&s->so_rcv, + (struct sockaddr *)src, + mm, (struct mbuf *)0) != 0) { + sorwakeup(s); + return 0; + } + } + m_freem(mm); + return -1; +} + +/* * IP multicast forwarding function. This function assumes that the packet * pointed to by "ip" has arrived on (or is about to be sent to) the interface * pointed to by "ifp", and the packet is to be relayed to other networks * that have members of the packet's destination IP multicast group. * - * The packet is returned unscathed to the caller, unless it is tunneled - * or erroneous, in which case a non-zero return value tells the caller to + * The packet is returned unscathed to the caller, unless it is + * erroneous, in which case a non-zero return value tells the caller to * discard it. */ #define IP_HDR_LEN 20 /* # bytes of fixed IP header (excluding options) */ #define TUNNEL_LEN 12 /* # bytes of IP option for tunnel encapsulation */ int X_ip_mforward(ip, ifp, m, imo) register struct ip *ip; struct ifnet *ifp; struct mbuf *m; struct ip_moptions *imo; { - register struct mfc *rt; + register struct mfc *rt = 0; /* XXX uninit warning */ register u_char *ipoptions; - u_long tunnel_src; static struct sockproto k_igmpproto = { AF_INET, IPPROTO_IGMP }; static struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; - static struct sockaddr_in k_igmpdst = { sizeof k_igmpdst, AF_INET }; + static int srctun = 0; register struct mbuf *mm; - register struct ip *k_data; int s; + vifi_t vifi; + struct vif *vifp; - if (mrtdebug > 1) - log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x (%s%d)\n", - ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp, - ifp->if_name, ifp->if_unit); + if (mrtdebug & DEBUG_FORWARD) + log(LOG_DEBUG, "ip_mforward: src %x, dst %x, ifp %x", + ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), ifp); if (ip->ip_hl < (IP_HDR_LEN + TUNNEL_LEN) >> 2 || (ipoptions = (u_char *)(ip + 1))[1] != IPOPT_LSRR ) { /* - * Packet arrived via a physical interface. + * Packet arrived via a physical interface or + * an encapsulated tunnel. */ - tunnel_src = 0; } else { /* * Packet arrived through a source-route tunnel. - * - * A source-route tunneled packet has a single NOP option and a - * two-element - * loose-source-and-record-route (LSRR) option immediately following - * the fixed-size part of the IP header. At this point in processing, - * the IP header should contain the following IP addresses: - * - * original source - in the source address field - * destination group - in the destination address field - * remote tunnel end-point - in the first element of LSRR - * one of this host's addrs - in the second element of LSRR - * - * NOTE: RFC-1075 would have the original source and remote tunnel - * end-point addresses swapped. However, that could cause - * delivery of ICMP error messages to innocent applications - * on intermediate routing hosts! Therefore, we hereby - * change the spec. + * Source-route tunnels are no longer supported. */ + if ((srctun++ % 1000) == 0) + log(LOG_ERR, "ip_mforward: received source-routed packet from %x", + ntohl(ip->ip_src.s_addr)); - /* - * Verify that the tunnel options are well-formed. - */ - if (ipoptions[0] != IPOPT_NOP || - ipoptions[2] != 11 || /* LSRR option length */ - ipoptions[3] != 12 || /* LSRR address pointer */ - (tunnel_src = *(u_long *)(&ipoptions[4])) == 0) { - mrtstat.mrts_bad_tunnel++; - if (mrtdebug) - log(LOG_DEBUG, - "ip_mforward: bad tunnel from %u (%x %x %x %x %x %x)\n", - ntohl(ip->ip_src.s_addr), - ipoptions[0], ipoptions[1], ipoptions[2], ipoptions[3], - *(u_long *)(&ipoptions[4]), *(u_long *)(&ipoptions[8])); - return 1; - } + return 1; + } - /* - * Delete the tunnel options from the packet. - */ - ovbcopy((caddr_t)(ipoptions + TUNNEL_LEN), (caddr_t)ipoptions, - (unsigned)(m->m_len - (IP_HDR_LEN + TUNNEL_LEN))); - m->m_len -= TUNNEL_LEN; - ip->ip_len -= TUNNEL_LEN; - ip->ip_hl -= TUNNEL_LEN >> 2; - - ifp = 0; + if ((imo) && ((vifi = imo->imo_multicast_vif) < numvifs)) { + if (ip->ip_ttl < 255) + ip->ip_ttl++; /* compensate for -1 in *_send routines */ + if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { + vifp = viftable + vifi; + printf("Sending IPPROTO_RSVP from %x to %x on vif %d (%s%s%d)\n", + ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr), vifi, + (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", + vifp->v_ifp->if_name, vifp->v_ifp->if_unit); + } + return (ip_mdq(m, ifp, rt, vifi)); } + if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { + printf("Warning: IPPROTO_RSVP from %x to %x without vif option\n", + ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr)); + if(!imo) + printf("In fact, no options were specified at all\n"); + } /* * Don't forward a packet with time-to-live of zero or one, * or a packet destined to a local-only group. */ if (ip->ip_ttl <= 1 || ntohl(ip->ip_dst.s_addr) <= INADDR_MAX_LOCAL_GROUP) - return (int)tunnel_src; + return 0; /* * Determine forwarding vifs from the forwarding cache table */ s = splnet(); MFCFIND(ip->ip_src.s_addr, ip->ip_dst.s_addr, rt); /* Entry exists, so forward if necessary */ if (rt != NULL) { splx(s); - return (ip_mdq(m, ifp, tunnel_src, rt, imo)); - } - - else { + return (ip_mdq(m, ifp, rt, -1)); + } else { /* * If we don't have a route for packet's origin, * Make a copy of the packet & * send message to routing daemon */ register struct mbuf *mb_rt; register struct mbuf *mb_ntry; register struct mbuf *mb0; register struct rtdetq *rte; register struct mbuf *rte_m; register u_long hash; + register int npkts; +#ifdef UPCALL_TIMING + struct timeval tp; + GET_TIME(tp); +#endif + mrtstat.mrts_no_route++; - if (mrtdebug) - log(LOG_DEBUG, "ip_mforward: no rte s %x g %x\n", + if (mrtdebug & (DEBUG_FORWARD | DEBUG_MFC)) + log(LOG_DEBUG, "ip_mforward: no rte s %x g %x", ntohl(ip->ip_src.s_addr), ntohl(ip->ip_dst.s_addr)); + /* + * Allocate mbufs early so that we don't do extra work if we are + * just going to fail anyway. + */ + MGET(mb_ntry, M_DONTWAIT, MT_DATA); + if (mb_ntry == NULL) { + splx(s); + return ENOBUFS; + } + mb0 = m_copy(m, 0, M_COPYALL); + if (mb0 == NULL) { + m_free(mb_ntry); + splx(s); + return ENOBUFS; + } + /* is there an upcall waiting for this packet? */ - hash = nethash_fc(ip->ip_src.s_addr, ip->ip_dst.s_addr); + hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); for (mb_rt = mfctable[hash]; mb_rt; mb_rt = mb_rt->m_next) { rt = mtod(mb_rt, struct mfc *); - if (((ip->ip_src.s_addr & rt->mfc_originmask.s_addr) == - rt->mfc_origin.s_addr) && + if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && (mb_rt->m_act != NULL)) break; } if (mb_rt == NULL) { + int hlen = ip->ip_hl << 2; + int i; + struct igmpmsg *im; + /* no upcall, so make a new entry */ MGET(mb_rt, M_DONTWAIT, MT_MRTABLE); if (mb_rt == NULL) { + m_free(mb_ntry); + m_freem(mb0); splx(s); return ENOBUFS; } + /* Make a copy of the header to send to the user level process */ + mm = m_copy(m, 0, hlen); + if (mm && (M_HASCL(mm) || mm->m_len < hlen)) + mm = m_pullup(mm, hlen); + if (mm == NULL) { + m_free(mb_ntry); + m_freem(mb0); + m_free(mb_rt); + splx(s); + return ENOBUFS; + } + /* + * Send message to routing daemon to install + * a route into the kernel table + */ + k_igmpsrc.sin_addr = ip->ip_src; + + im = mtod(mm, struct igmpmsg *); + im->im_msgtype = IGMPMSG_NOCACHE; + im->im_mbz = 0; + + mrtstat.mrts_upcalls++; + + if (socket_send(ip_mrouter, mm, &k_igmpsrc) < 0) { + log(LOG_WARNING, "ip_mforward: ip_mrouter socket queue full"); + ++mrtstat.mrts_upq_sockfull; + m_free(mb_ntry); + m_freem(mb0); + m_free(mb_rt); + splx(s); + return ENOBUFS; + } + rt = mtod(mb_rt, struct mfc *); /* insert new entry at head of hash chain */ rt->mfc_origin.s_addr = ip->ip_src.s_addr; - rt->mfc_originmask.s_addr = (u_long)0xffffffff; rt->mfc_mcastgrp.s_addr = ip->ip_dst.s_addr; + rt->mfc_expire = UPCALL_EXPIRE; + nexpire[hash]++; + for (i = 0; i < numvifs; i++) + rt->mfc_ttls[i] = 0; + rt->mfc_parent = -1; /* link into table */ - hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); mb_rt->m_next = mfctable[hash]; mfctable[hash] = mb_rt; mb_rt->m_act = NULL; - } + rte_m = mb_rt; + } else { + /* determine if q has overflowed */ + for (rte_m = mb_rt, npkts = 0; rte_m->m_act; rte_m = rte_m->m_act) + npkts++; - /* determine if q has overflowed */ - for (rte_m = mb_rt, hash = 0; rte_m->m_act; rte_m = rte_m->m_act) - hash++; - - if (hash > MAX_UPQ) { - mrtstat.mrts_upq_ovflw++; - splx(s); - return 0; + if (npkts > MAX_UPQ) { + mrtstat.mrts_upq_ovflw++; + m_free(mb_ntry); + m_freem(mb0); + splx(s); + return 0; + } } - /* add this packet and timing, ifp info to m_act */ - MGET(mb_ntry, M_DONTWAIT, MT_DATA); - if (mb_ntry == NULL) { - splx(s); - return ENOBUFS; - } - mb_ntry->m_act = NULL; rte = mtod(mb_ntry, struct rtdetq *); - mb0 = m_copy(m, 0, M_COPYALL); - if (mb0 == NULL) { - splx(s); - return ENOBUFS; - } - rte->m = mb0; rte->ifp = ifp; - rte->tunnel_src = tunnel_src; - rte->imo = imo; +#ifdef UPCALL_TIMING + rte->t = tp; +#endif - rte_m->m_act = mb_ntry; + /* Add this entry to the end of the queue */ + rte_m->m_act = mb_ntry; splx(s); - if (hash == 0) { - /* - * Send message to routing daemon to install - * a route into the kernel table - */ - k_igmpsrc.sin_addr = ip->ip_src; - k_igmpdst.sin_addr = ip->ip_dst; - - mm = m_copy(m, 0, M_COPYALL); - if (mm == NULL) { - splx(s); - return ENOBUFS; - } - - k_data = mtod(mm, struct ip *); - k_data->ip_p = 0; - - mrtstat.mrts_upcalls++; - - rip_ip_input(mm, ip_mrouter, (struct sockaddr *)&k_igmpsrc); - - /* set timer to cleanup entry if upcall is lost */ - timeout(cleanup_cache, (caddr_t)mb_rt, 100); - timeout_val++; - } - return 0; - } + } } #ifndef MROUTE_LKM int (*ip_mforward)(struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *) = X_ip_mforward; #endif /* * Clean up the cache entry if upcall is not serviced */ static void -cleanup_cache(xmb_rt) - void *xmb_rt; +expire_upcalls(void *unused) { - struct mbuf *mb_rt = xmb_rt; - struct mfc *rt; - u_long hash; - struct mbuf *prev_m0; - struct mbuf *m0; - struct mbuf *m; + struct mbuf *mb_rt, *m, **nptr; struct rtdetq *rte; + struct mfc *mfc; + int i; int s; - rt = mtod(mb_rt, struct mfc *); - hash = nethash_fc(rt->mfc_origin.s_addr, rt->mfc_mcastgrp.s_addr); - - if (mrtdebug) - log(LOG_DEBUG, "ip_mforward: cleanup ipm %d h %d s %x g %x\n", - ip_mrouter, hash, ntohl(rt->mfc_origin.s_addr), - ntohl(rt->mfc_mcastgrp.s_addr)); - - mrtstat.mrts_cache_cleanups++; - - /* - * determine entry to be cleaned up in cache table - */ s = splnet(); - for (prev_m0 = m0 = mfctable[hash]; m0; prev_m0 = m0, m0 = m0->m_next) - if (m0 == mb_rt) - break; + for (i = 0; i < MFCTBLSIZ; i++) { + if (nexpire[i] == 0) + continue; + nptr = &mfctable[i]; + for (mb_rt = *nptr; mb_rt != NULL; mb_rt = *nptr) { + mfc = mtod(mb_rt, struct mfc *); - /* - * drop all the packets - * free the mbuf with the pkt, if, timing info - */ - while (mb_rt->m_act) { - m = mb_rt->m_act; - mb_rt->m_act = m->m_act; + /* + * Skip real cache entries + * Make sure it wasn't marked to not expire (shouldn't happen) + * If it expires now + */ + if (mb_rt->m_act != NULL && + mfc->mfc_expire != 0 && + --mfc->mfc_expire == 0) { + if (mrtdebug & DEBUG_EXPIRE) + log(LOG_DEBUG, "expire_upcalls: expiring (%x %x)", + ntohl(mfc->mfc_origin.s_addr), + ntohl(mfc->mfc_mcastgrp.s_addr)); + /* + * drop all the packets + * free the mbuf with the pkt, if, timing info + */ + while (mb_rt->m_act) { + m = mb_rt->m_act; + mb_rt->m_act = m->m_act; + + rte = mtod(m, struct rtdetq *); + m_freem(rte->m); + m_free(m); + } + ++mrtstat.mrts_cache_cleanups; + nexpire[i]--; - rte = mtod(m, struct rtdetq *); - m_freem(rte->m); - m_free(m); + MFREE(mb_rt, *nptr); + } else { + nptr = &mb_rt->m_next; + } + } } - - /* - * Delete the entry from the cache - */ - if (prev_m0 != m0) { /* if moved past head of list */ - MFREE(m0, prev_m0->m_next); - } else /* delete head of list, it is in the table */ - mfctable[hash] = m_free(m0); - - timeout_val--; splx(s); + timeout(expire_upcalls, (caddr_t)NULL, EXPIRE_TIMEOUT); } /* * Packet forwarding routine once entry in the cache is made */ static int -ip_mdq(m, ifp, tunnel_src, rt, imo) +ip_mdq(m, ifp, rt, xmt_vif) register struct mbuf *m; register struct ifnet *ifp; - register u_long tunnel_src; register struct mfc *rt; - register struct ip_moptions *imo; + register vifi_t xmt_vif; { register struct ip *ip = mtod(m, struct ip *); register vifi_t vifi; register struct vif *vifp; + register struct mbuf *tmp; + register int plen = ntohs(ip->ip_len); +/* + * Macro to send packet on vif. Since RSVP packets don't get counted on + * input, they shouldn't get counted on output, so statistics keeping is + * seperate. + */ +#define MC_SEND(ip,vifp,m) { \ + if ((vifp)->v_flags & VIFF_TUNNEL) \ + encap_send((ip), (vifp), (m)); \ + else \ + phyint_send((ip), (vifp), (m)); \ +} + /* + * If xmt_vif is not -1, send on only the requested vif. + * + * (since vifi_t is u_short, -1 becomes MAXUSHORT, which > numvifs.) + */ + if (xmt_vif < numvifs) { + MC_SEND(ip, viftable + xmt_vif, m); + return 1; + } + + /* * Don't forward if it didn't arrive from the parent vif for its origin. - * Notes: v_ifp is zero for src route tunnels, multicast_decap_if - * for encapsulated tunnels and a real ifnet for non-tunnels so - * the first part of the if catches wrong physical interface or - * tunnel type; v_rmt_addr is zero for non-tunneled packets so - * the 2nd part catches both packets that arrive via a tunnel - * that shouldn't and packets that arrive via the wrong tunnel. */ vifi = rt->mfc_parent; - if (viftable[vifi].v_ifp != ifp || - (ifp == 0 && viftable[vifi].v_rmt_addr.s_addr != tunnel_src)) { + if ((vifi >= numvifs) || (viftable[vifi].v_ifp != ifp)) { /* came in the wrong interface */ - if (mrtdebug) - log(LOG_DEBUG, "wrong if: ifp %x vifi %d\n", - ifp, vifi); + if (mrtdebug & DEBUG_FORWARD) + log(LOG_DEBUG, "wrong if: ifp %x vifi %d vififp %x", + ifp, vifi, viftable[vifi].v_ifp); ++mrtstat.mrts_wrong_if; - return (int)tunnel_src; + ++rt->mfc_wrong_if; + /* + * If we are doing PIM assert processing, and we are forwarding + * packets on this interface, and it is a broadcast medium + * interface (and not a tunnel), send a message to the routing daemon. + */ + if (pim_assert && rt->mfc_ttls[vifi] && + (ifp->if_flags & IFF_BROADCAST) && + !(viftable[vifi].v_flags & VIFF_TUNNEL)) { + struct sockaddr_in k_igmpsrc; + struct mbuf *mm; + struct igmpmsg *im; + int hlen = ip->ip_hl << 2; + struct timeval now; + register u_long delta; + + GET_TIME(now); + + TV_DELTA(rt->mfc_last_assert, now, delta); + + if (delta > ASSERT_MSG_TIME) { + mm = m_copy(m, 0, hlen); + if (mm && (M_HASCL(mm) || mm->m_len < hlen)) + mm = m_pullup(mm, hlen); + if (mm == NULL) { + return ENOBUFS; + } + + rt->mfc_last_assert = now; + + im = mtod(mm, struct igmpmsg *); + im->im_msgtype = IGMPMSG_WRONGVIF; + im->im_mbz = 0; + im->im_vif = vifi; + + k_igmpsrc.sin_addr = im->im_src; + + socket_send(ip_mrouter, mm, &k_igmpsrc); + } + } + return 0; } - /* increment the interface and s-g counters */ - viftable[vifi].v_pkt_in++; + /* If I sourced this packet, it counts as output, else it was input. */ + if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { + viftable[vifi].v_pkt_out++; + viftable[vifi].v_bytes_out += plen; + } else { + viftable[vifi].v_pkt_in++; + viftable[vifi].v_bytes_in += plen; + } rt->mfc_pkt_cnt++; + rt->mfc_byte_cnt += plen; /* * For each vif, decide if a copy of the packet should be forwarded. * Forward if: * - the ttl exceeds the vif's threshold * - there are group members downstream on interface */ -#define MC_SEND(ip,vifp,m) { \ - (vifp)->v_pkt_out++; \ - if ((vifp)->v_flags & VIFF_SRCRT) \ - srcrt_send((ip), (vifp), (m)); \ - else if ((vifp)->v_flags & VIFF_TUNNEL) \ - encap_send((ip), (vifp), (m)); \ - else \ - phyint_send((ip), (vifp), (m)); \ - } - -/* If no options or the imo_multicast_vif option is 0, don't do this part - */ - if ((imo != NULL) && - (( vifi = imo->imo_multicast_vif - 1) < numvifs) /*&& (vifi>=0)*/) - { - MC_SEND(ip,viftable+vifi,m); - return (1); /* make sure we are done: No more physical sends */ - } - for (vifp = viftable, vifi = 0; vifi < numvifs; vifp++, vifi++) if ((rt->mfc_ttls[vifi] > 0) && - (ip->ip_ttl > rt->mfc_ttls[vifi])) + (ip->ip_ttl > rt->mfc_ttls[vifi])) { + vifp->v_pkt_out++; + vifp->v_bytes_out += plen; MC_SEND(ip, vifp, m); + } return 0; } -/* check if a vif number is legal/ok. This is used by ip_output, to export - * numvifs there, +/* + * check if a vif number is legal/ok. This is used by ip_output, to export + * numvifs there, */ int X_legal_vif_num(vif) int vif; -{ if (vif>=0 && vif<=numvifs) +{ + if (vif >= 0 && vif < numvifs) return(1); else return(0); } #ifndef MROUTE_LKM int (*legal_vif_num)(int) = X_legal_vif_num; #endif -static void -phyint_send(ip, vifp, m) - struct ip *ip; - struct vif *vifp; - struct mbuf *m; +/* + * Return the local address used by this vif + */ +u_long +X_ip_mcast_src(vifi) + int vifi; { - register struct mbuf *mb_copy; - int hlen = ip->ip_hl << 2; - register struct ip_moptions *imo; - - if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) - return; - - /* - * Make sure the header isn't in an cluster, because the sharing - * in clusters defeats the whole purpose of making the copy above. - */ - mb_copy = m_pullup(mb_copy, hlen); - if (mb_copy == NULL) - return; - - MALLOC(imo, struct ip_moptions *, sizeof *imo, M_IPMOPTS, M_NOWAIT); - if (imo == NULL) { - m_freem(mb_copy); - return; - } - - imo->imo_multicast_ifp = vifp->v_ifp; - imo->imo_multicast_ttl = ip->ip_ttl - 1; - imo->imo_multicast_loop = 1; - - if (vifp->v_rate_limit <= 0) - tbf_send_packet(vifp, mb_copy, imo); + if (vifi >= 0 && vifi < numvifs) + return viftable[vifi].v_lcl_addr.s_addr; else - tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len, - imo); + return INADDR_ANY; } +#ifndef MROUTE_LKM +u_long (*ip_mcast_src)(int) = X_ip_mcast_src; +#endif + static void -srcrt_send(ip, vifp, m) +phyint_send(ip, vifp, m) struct ip *ip; struct vif *vifp; struct mbuf *m; { - struct mbuf *mb_copy, *mb_opts; - int hlen = ip->ip_hl << 2; - register struct ip *ip_copy; - u_char *cp; + register struct mbuf *mb_copy; + register int hlen = ip->ip_hl << 2; /* - * Make sure that adding the tunnel options won't exceed the - * maximum allowed number of option bytes. + * Make a new reference to the packet; make sure that + * the IP header is actually copied, not just referenced, + * so that ip_output() only scribbles on the copy. */ - if (ip->ip_hl > (60 - TUNNEL_LEN) >> 2) { - mrtstat.mrts_cant_tunnel++; - if (mrtdebug) - log(LOG_DEBUG, "srcrt_send: no room for tunnel options, from %u\n", - ntohl(ip->ip_src.s_addr)); + mb_copy = m_copy(m, 0, M_COPYALL); + if (mb_copy && (M_HASCL(mb_copy) || mb_copy->m_len < hlen)) + mb_copy = m_pullup(mb_copy, hlen); + if (mb_copy == NULL) return; - } - if ((mb_copy = m_copy(m, 0, M_COPYALL)) == NULL) - return; - - MGETHDR(mb_opts, M_DONTWAIT, MT_HEADER); - if (mb_opts == NULL) { - m_freem(mb_copy); - return; - } - /* - * 'Delete' the base ip header from the mb_copy chain - */ - mb_copy->m_len -= hlen; - mb_copy->m_data += hlen; - /* - * Make mb_opts be the new head of the packet chain. - * Any options of the packet were left in the old packet chain head - */ - mb_opts->m_next = mb_copy; - mb_opts->m_len = hlen + TUNNEL_LEN; - mb_opts->m_data += MSIZE - mb_opts->m_len; - mb_opts->m_pkthdr.len = mb_copy->m_pkthdr.len + TUNNEL_LEN; - /* - * Copy the base ip header from the mb_copy chain to the new head mbuf - */ - ip_copy = mtod(mb_opts, struct ip *); - bcopy((caddr_t)ip_copy, mtod(mb_opts, caddr_t), hlen); - ip_copy->ip_ttl--; - ip_copy->ip_dst = vifp->v_rmt_addr; /* remote tunnel end-point */ - /* - * Adjust the ip header length to account for the tunnel options. - */ - ip_copy->ip_hl += TUNNEL_LEN >> 2; - ip_copy->ip_len += TUNNEL_LEN; - /* - * Add the NOP and LSRR after the base ip header - */ - cp = mtod(mb_opts, u_char *) + IP_HDR_LEN; - *cp++ = IPOPT_NOP; - *cp++ = IPOPT_LSRR; - *cp++ = 11; /* LSRR option length */ - *cp++ = 8; /* LSSR pointer to second element */ - *(u_long*)cp = vifp->v_lcl_addr.s_addr; /* local tunnel end-point */ - cp += 4; - *(u_long*)cp = ip->ip_dst.s_addr; /* destination group */ - if (vifp->v_rate_limit <= 0) - tbf_send_packet(vifp, mb_opts, 0); + tbf_send_packet(vifp, mb_copy); else - tbf_control(vifp, mb_opts, - mtod(mb_opts, struct ip *), ip_copy->ip_len, 0); + tbf_control(vifp, mb_copy, mtod(mb_copy, struct ip *), ip->ip_len); } static void encap_send(ip, vifp, m) register struct ip *ip; register struct vif *vifp; register struct mbuf *m; { register struct mbuf *mb_copy; register struct ip *ip_copy; int hlen = ip->ip_hl << 2; register int i, len = ip->ip_len; /* * copy the old packet & pullup it's IP header into the * new mbuf so we can modify it. Try to fill the new * mbuf since if we don't the ethernet driver will. */ MGET(mb_copy, M_DONTWAIT, MT_DATA); if (mb_copy == NULL) return; mb_copy->m_data += 16; mb_copy->m_len = sizeof(multicast_encap_iphdr); if ((mb_copy->m_next = m_copy(m, 0, M_COPYALL)) == NULL) { m_freem(mb_copy); return; } i = MHLEN - M_LEADINGSPACE(mb_copy); if (i > len) i = len; mb_copy = m_pullup(mb_copy, i); if (mb_copy == NULL) return; mb_copy->m_pkthdr.len = len + sizeof(multicast_encap_iphdr); /* * fill in the encapsulating IP header. */ ip_copy = mtod(mb_copy, struct ip *); *ip_copy = multicast_encap_iphdr; ip_copy->ip_id = htons(ip_id++); ip_copy->ip_len += len; ip_copy->ip_src = vifp->v_lcl_addr; ip_copy->ip_dst = vifp->v_rmt_addr; /* * turn the encapsulated IP header back into a valid one. */ ip = (struct ip *)((caddr_t)ip_copy + sizeof(multicast_encap_iphdr)); --ip->ip_ttl; HTONS(ip->ip_len); HTONS(ip->ip_off); ip->ip_sum = 0; #if defined(LBL) && !defined(ultrix) ip->ip_sum = ~oc_cksum((caddr_t)ip, ip->ip_hl << 2, 0); #else mb_copy->m_data += sizeof(multicast_encap_iphdr); ip->ip_sum = in_cksum(mb_copy, ip->ip_hl << 2); mb_copy->m_data -= sizeof(multicast_encap_iphdr); #endif if (vifp->v_rate_limit <= 0) - tbf_send_packet(vifp, mb_copy, 0); + tbf_send_packet(vifp, mb_copy); else - tbf_control(vifp, mb_copy, ip, ip_copy->ip_len, 0); + tbf_control(vifp, mb_copy, ip, ip_copy->ip_len); } /* * De-encapsulate a packet and feed it back through ip input (this * routine is called whenever IP gets a packet with proto type * ENCAP_PROTO and a local destination address). */ void #ifdef MROUTE_LKM -X_multiencap_decap(m) +X_ipip_input(m) #else -multiencap_decap(m) +ipip_input(m, iphlen) #endif - register struct mbuf *m; + register struct mbuf *m; + int iphlen; { struct ifnet *ifp = m->m_pkthdr.rcvif; register struct ip *ip = mtod(m, struct ip *); register int hlen = ip->ip_hl << 2; register int s; register struct ifqueue *ifq; register struct vif *vifp; - if (ip->ip_p != ENCAP_PROTO) { - rip_input(m); - return; + if (!have_encap_tunnel) { + rip_input(m); + return; } /* * dump the packet if it's not to a multicast destination or if * we don't have an encapsulating tunnel with the source. * Note: This code assumes that the remote site IP address * uniquely identifies the tunnel (i.e., that this site has * at most one tunnel with the remote site). */ if (! IN_MULTICAST(ntohl(((struct ip *)((char *)ip + hlen))->ip_dst.s_addr))) { ++mrtstat.mrts_bad_tunnel; m_freem(m); return; } if (ip->ip_src.s_addr != last_encap_src) { register struct vif *vife; - + vifp = viftable; vife = vifp + numvifs; last_encap_src = ip->ip_src.s_addr; last_encap_vif = 0; for ( ; vifp < vife; ++vifp) if (vifp->v_rmt_addr.s_addr == ip->ip_src.s_addr) { if ((vifp->v_flags & (VIFF_TUNNEL|VIFF_SRCRT)) == VIFF_TUNNEL) last_encap_vif = vifp; break; } } if ((vifp = last_encap_vif) == 0) { last_encap_src = 0; mrtstat.mrts_cant_tunnel++; /*XXX*/ m_freem(m); if (mrtdebug) - log(LOG_DEBUG, "ip_mforward: no tunnel with %x\n", + log(LOG_DEBUG, "ip_mforward: no tunnel with %x", ntohl(ip->ip_src.s_addr)); return; } ifp = vifp->v_ifp; if (hlen > IP_HDR_LEN) ip_stripoptions(m, (struct mbuf *) 0); m->m_data += IP_HDR_LEN; m->m_len -= IP_HDR_LEN; m->m_pkthdr.len -= IP_HDR_LEN; m->m_pkthdr.rcvif = ifp; ifq = &ipintrq; s = splimp(); if (IF_QFULL(ifq)) { IF_DROP(ifq); m_freem(m); } else { IF_ENQUEUE(ifq, m); /* * normally we would need a "schednetisr(NETISR_IP)" * here but we were called by ip_input and it is going * to loop back & try to dequeue the packet we just * queued as soon as we return so we avoid the * unnecessary software interrrupt. */ } splx(s); } /* * Token bucket filter module */ -void -tbf_control(vifp, m, ip, p_len, imo) + +static void +tbf_control(vifp, m, ip, p_len) register struct vif *vifp; register struct mbuf *m; register struct ip *ip; register u_long p_len; - struct ip_moptions *imo; { + register struct tbf *t = vifp->v_tbf; + + if (p_len > MAX_BKT_SIZE) { + /* drop if packet is too large */ + mrtstat.mrts_pkt2large++; + m_freem(m); + return; + } + tbf_update_tokens(vifp); - /* if there are enough tokens, + /* if there are enough tokens, * and the queue is empty, * send this packet out */ - if (vifp->v_tbf->q_len == 0) { - if (p_len <= vifp->v_tbf->n_tok) { - vifp->v_tbf->n_tok -= p_len; - tbf_send_packet(vifp, m, imo); - } else if (p_len > MAX_BKT_SIZE) { - /* drop if packet is too large */ - mrtstat.mrts_pkt2large++; - m_freem(m); - return; + if (t->tbf_q_len == 0) { + /* queue empty, send packet if enough tokens */ + if (p_len <= t->tbf_n_tok) { + t->tbf_n_tok -= p_len; + tbf_send_packet(vifp, m); } else { /* queue packet and timeout till later */ - tbf_queue(vifp, m, ip, imo); - timeout(tbf_reprocess_q, (caddr_t)vifp, 1); + tbf_queue(vifp, m); + timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); } - } else if (vifp->v_tbf->q_len < MAXQSIZE) { + } else if (t->tbf_q_len < t->tbf_max_q_len) { /* finite queue length, so queue pkts and process queue */ - tbf_queue(vifp, m, ip, imo); + tbf_queue(vifp, m); tbf_process_q(vifp); } else { /* queue length too much, try to dq and queue and process */ if (!tbf_dq_sel(vifp, ip)) { mrtstat.mrts_q_overflow++; m_freem(m); return; } else { - tbf_queue(vifp, m, ip, imo); + tbf_queue(vifp, m); tbf_process_q(vifp); } } return; } -/* +/* * adds a packet to the queue at the interface */ -void -tbf_queue(vifp, m, ip, imo) +static void +tbf_queue(vifp, m) register struct vif *vifp; register struct mbuf *m; - register struct ip *ip; - struct ip_moptions *imo; { - register u_long ql; - register int index = (vifp - viftable); register int s = splnet(); + register struct tbf *t = vifp->v_tbf; - ql = vifp->v_tbf->q_len; + if (t->tbf_t == NULL) { + /* Queue was empty */ + t->tbf_q = m; + } else { + /* Insert at tail */ + t->tbf_t->m_act = m; + } - qtable[index][ql].pkt_m = m; - qtable[index][ql].pkt_len = (mtod(m, struct ip *))->ip_len; - qtable[index][ql].pkt_ip = ip; - qtable[index][ql].pkt_imo = imo; + /* Set new tail pointer */ + t->tbf_t = m; - vifp->v_tbf->q_len++; +#ifdef DIAGNOSTIC + /* Make sure we didn't get fed a bogus mbuf */ + if (m->m_act) + panic("tbf_queue: m_act"); +#endif + m->m_act = NULL; + + t->tbf_q_len++; + splx(s); } -/* +/* * processes the queue at the interface */ -void +static void tbf_process_q(vifp) register struct vif *vifp; { - register struct pkt_queue pkt_1; - register int index = (vifp - viftable); + register struct mbuf *m; + register int len; register int s = splnet(); + register struct tbf *t = vifp->v_tbf; /* loop through the queue at the interface and send as many packets * as possible */ - while (vifp->v_tbf->q_len > 0) { - /* locate the first packet */ - pkt_1.pkt_len = ((qtable[index][0]).pkt_len); - pkt_1.pkt_m = (qtable[index][0]).pkt_m; - pkt_1.pkt_ip = (qtable[index][0]).pkt_ip; - pkt_1.pkt_imo = (qtable[index][0]).pkt_imo; + while (t->tbf_q_len > 0) { + m = t->tbf_q; + len = mtod(m, struct ip *)->ip_len; + /* determine if the packet can be sent */ - if (pkt_1.pkt_len <= vifp->v_tbf->n_tok) { + if (len <= t->tbf_n_tok) { /* if so, - * reduce no of tokens, dequeue the queue, + * reduce no of tokens, dequeue the packet, * send the packet. */ - vifp->v_tbf->n_tok -= pkt_1.pkt_len; + t->tbf_n_tok -= len; - tbf_dequeue(vifp, 0); + t->tbf_q = m->m_act; + if (--t->tbf_q_len == 0) + t->tbf_t = NULL; - tbf_send_packet(vifp, pkt_1.pkt_m, pkt_1.pkt_imo); + m->m_act = NULL; + tbf_send_packet(vifp, m); } else break; } splx(s); } -/* - * removes the jth packet from the queue at the interface - */ -void -tbf_dequeue(vifp,j) - register struct vif *vifp; - register int j; -{ - register u_long index = vifp - viftable; - register int i; - - for (i=j+1; i <= vifp->v_tbf->q_len - 1; i++) { - qtable[index][i-1].pkt_m = qtable[index][i].pkt_m; - qtable[index][i-1].pkt_len = qtable[index][i].pkt_len; - qtable[index][i-1].pkt_ip = qtable[index][i].pkt_ip; - qtable[index][i-1].pkt_imo = qtable[index][i].pkt_imo; - } - qtable[index][i-1].pkt_m = NULL; - qtable[index][i-1].pkt_len = NULL; - qtable[index][i-1].pkt_ip = NULL; - qtable[index][i-1].pkt_imo = NULL; - - vifp->v_tbf->q_len--; - - if (tbfdebug > 1) - log(LOG_DEBUG, "tbf_dequeue: vif# %d qlen %d\n",vifp-viftable, i-1); -} - -void +static void tbf_reprocess_q(xvifp) void *xvifp; { register struct vif *vifp = xvifp; - if (ip_mrouter == NULL) + if (ip_mrouter == NULL) return; tbf_update_tokens(vifp); tbf_process_q(vifp); - if (vifp->v_tbf->q_len) - timeout(tbf_reprocess_q, (caddr_t)vifp, 1); + if (vifp->v_tbf->tbf_q_len) + timeout(tbf_reprocess_q, (caddr_t)vifp, TBF_REPROCESS); } /* function that will selectively discard a member of the queue - * based on the precedence value and the priority obtained through - * a lookup table - not yet implemented accurately! + * based on the precedence value and the priority */ -int +static int tbf_dq_sel(vifp, ip) register struct vif *vifp; register struct ip *ip; { register int i; register int s = splnet(); register u_int p; + register struct mbuf *m, *last; + register struct mbuf **np; + register struct tbf *t = vifp->v_tbf; p = priority(vifp, ip); - for(i=vifp->v_tbf->q_len-1;i >= 0;i--) { - if (p > priority(vifp, qtable[vifp-viftable][i].pkt_ip)) { - m_freem(qtable[vifp-viftable][i].pkt_m); - tbf_dequeue(vifp,i); + np = &t->tbf_q; + last = NULL; + while ((m = *np) != NULL) { + if (p > priority(vifp, mtod(m, struct ip *))) { + *np = m->m_act; + /* If we're removing the last packet, fix the tail pointer */ + if (m == t->tbf_t) + t->tbf_t = last; + m_freem(m); + /* it's impossible for the queue to be empty, but + * we check anyway. */ + if (--t->tbf_q_len == 0) + t->tbf_t = NULL; splx(s); mrtstat.mrts_drop_sel++; return(1); } + np = &m->m_act; + last = m; } splx(s); return(0); } -void -tbf_send_packet(vifp, m, imo) +static void +tbf_send_packet(vifp, m) register struct vif *vifp; register struct mbuf *m; - struct ip_moptions *imo; { + struct ip_moptions imo; int error; int s = splnet(); - /* if source route tunnels */ - if (vifp->v_flags & VIFF_SRCRT) { - error = ip_output(m, (struct mbuf *)0, (struct route *)0, - IP_FORWARDING, imo); - if (mrtdebug > 1) - log(LOG_DEBUG, "srcrt_send on vif %d err %d\n", vifp-viftable, error); - } else if (vifp->v_flags & VIFF_TUNNEL) { + if (vifp->v_flags & VIFF_TUNNEL) { /* If tunnel options */ ip_output(m, (struct mbuf *)0, (struct route *)0, - IP_FORWARDING, imo); + IP_FORWARDING, (struct ip_moptions *)0); } else { - /* if physical interface option, extract the options and then send */ + imo.imo_multicast_ifp = vifp->v_ifp; + imo.imo_multicast_ttl = mtod(m, struct ip *)->ip_ttl - 1; + imo.imo_multicast_loop = 1; + imo.imo_multicast_vif = -1; + error = ip_output(m, (struct mbuf *)0, (struct route *)0, - IP_FORWARDING, imo); - FREE(imo, M_IPMOPTS); + IP_FORWARDING, &imo); - if (mrtdebug > 1) - log(LOG_DEBUG, "phyint_send on vif %d err %d\n", vifp-viftable, error); + if (mrtdebug & DEBUG_XMIT) + log(LOG_DEBUG, "phyint_send on vif %d err %d", vifp-viftable, error); } splx(s); } /* determine the current time and then * the elapsed time (between the last time and time now) * in milliseconds & update the no. of tokens in the bucket */ -void +static void tbf_update_tokens(vifp) register struct vif *vifp; { struct timeval tp; - register u_long t; - register u_long elapsed; + register u_long tm; register int s = splnet(); + register struct tbf *t = vifp->v_tbf; GET_TIME(tp); - t = tp.tv_sec*1000 + tp.tv_usec/1000; + TV_DELTA(tp, t->tbf_last_pkt_t, tm); - elapsed = (t - vifp->v_tbf->last_pkt_t) * vifp->v_rate_limit /8; - vifp->v_tbf->n_tok += elapsed; - vifp->v_tbf->last_pkt_t = t; + /* + * This formula is actually + * "time in seconds" * "bytes/second". + * + * (tm / 1000000) * (v_rate_limit * 1000 * (1000/1024) / 8) + * + * The (1000/1024) was introduced in add_vif to optimize + * this divide into a shift. + */ + t->tbf_n_tok += tm * vifp->v_rate_limit / 1024 / 8; + t->tbf_last_pkt_t = tp; - if (vifp->v_tbf->n_tok > MAX_BKT_SIZE) - vifp->v_tbf->n_tok = MAX_BKT_SIZE; + if (t->tbf_n_tok > MAX_BKT_SIZE) + t->tbf_n_tok = MAX_BKT_SIZE; splx(s); } static int priority(vifp, ip) register struct vif *vifp; register struct ip *ip; { - register u_long graddr; register int prio; - /* temporary hack; will add general packet classifier some day */ + /* temporary hack; may add general packet classifier some day */ - prio = 50; /* default priority */ + /* + * The UDP port space is divided up into four priority ranges: + * [0, 16384) : unclassified - lowest priority + * [16384, 32768) : audio - highest priority + * [32768, 49152) : whiteboard - medium priority + * [49152, 65536) : video - low priority + */ + if (ip->ip_p == IPPROTO_UDP) { + struct udphdr *udp = (struct udphdr *)(((char *)ip) + (ip->ip_hl << 2)); + switch (ntohs(udp->uh_dport) & 0xc000) { + case 0x4000: + prio = 70; + break; + case 0x8000: + prio = 60; + break; + case 0xc000: + prio = 55; + break; + default: + prio = 50; + break; + } + if (tbfdebug > 1) + log(LOG_DEBUG, "port %x prio%d", ntohs(udp->uh_dport), prio); + } else { + prio = 50; + } + return prio; +} - /* check for source route options and add option length to get dst */ - if (vifp->v_flags & VIFF_SRCRT) - graddr = ntohl((ip+8)->ip_dst.s_addr); - else - graddr = ntohl(ip->ip_dst.s_addr); +/* + * End of token bucket filter modifications + */ - switch (graddr & 0xf) { - case 0x0: break; - case 0x1: if (graddr == 0xe0020001) prio = 65; /* MBone Audio */ - break; - case 0x2: break; - case 0x3: break; - case 0x4: break; - case 0x5: break; - case 0x6: break; - case 0x7: break; - case 0x8: break; - case 0x9: break; - case 0xa: if (graddr == 0xe000010a) prio = 85; /* IETF Low Audio 1 */ - break; - case 0xb: if (graddr == 0xe000010b) prio = 75; /* IETF Audio 1 */ - break; - case 0xc: if (graddr == 0xe000010c) prio = 60; /* IETF Video 1 */ - break; - case 0xd: if (graddr == 0xe000010d) prio = 80; /* IETF Low Audio 2 */ - break; - case 0xe: if (graddr == 0xe000010e) prio = 70; /* IETF Audio 2 */ - break; - case 0xf: if (graddr == 0xe000010f) prio = 55; /* IETF Video 2 */ - break; +int +ip_rsvp_vif_init(so, m) + struct socket *so; + struct mbuf *m; +{ + int i; + register int s; + + if (rsvpdebug) + printf("ip_rsvp_vif_init: so_type = %d, pr_protocol = %d\n", + so->so_type, so->so_proto->pr_protocol); + + if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) + return EOPNOTSUPP; + + /* Check mbuf. */ + if (m == NULL || m->m_len != sizeof(int)) { + return EINVAL; } + i = *(mtod(m, int *)); + + if (rsvpdebug) + printf("ip_rsvp_vif_init: vif = %d rsvp_on = %d\n",i,rsvp_on); + + s = splnet(); - if (tbfdebug > 1) log(LOG_DEBUG, "graddr%x prio%d\n", graddr, prio); + /* Check vif. */ + if (!legal_vif_num(i)) { + splx(s); + return EADDRNOTAVAIL; + } - return prio; + /* Check if socket is available. */ + if (viftable[i].v_rsvpd != NULL) { + splx(s); + return EADDRINUSE; + } + + viftable[i].v_rsvpd = so; + /* This may seem silly, but we need to be sure we don't over-increment + * the RSVP counter, in case something slips up. + */ + if (!viftable[i].v_rsvp_on) { + viftable[i].v_rsvp_on = 1; + rsvp_on++; + } + + splx(s); + return 0; } -/* - * End of token bucket filter modifications - */ +int +ip_rsvp_vif_done(so, m) + struct socket *so; + struct mbuf *m; +{ + int i; + register int s; + + if (rsvpdebug) + printf("ip_rsvp_vif_done: so_type = %d, pr_protocol = %d\n", + so->so_type, so->so_proto->pr_protocol); + + if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) + return EOPNOTSUPP; + + /* Check mbuf. */ + if (m == NULL || m->m_len != sizeof(int)) { + return EINVAL; + } + i = *(mtod(m, int *)); + + s = splnet(); + + /* Check vif. */ + if (!legal_vif_num(i)) { + splx(s); + return EADDRNOTAVAIL; + } + if (rsvpdebug) + printf("ip_rsvp_vif_done: v_rsvpd = %x so = %x\n", + viftable[i].v_rsvpd, so); + + viftable[i].v_rsvpd = NULL; + /* This may seem silly, but we need to be sure we don't over-decrement + * the RSVP counter, in case something slips up. + */ + if (viftable[i].v_rsvp_on) { + viftable[i].v_rsvp_on = 0; + rsvp_on--; + } + + splx(s); + return 0; +} + +void +ip_rsvp_force_done(so) + struct socket *so; +{ + int vifi; + register int s; + + /* Don't bother if it is not the right type of socket. */ + if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) + return; + + s = splnet(); + + /* The socket may be attached to more than one vif...this + * is perfectly legal. + */ + for (vifi = 0; vifi < numvifs; vifi++) { + if (viftable[vifi].v_rsvpd == so) { + viftable[vifi].v_rsvpd = NULL; + /* This may seem silly, but we need to be sure we don't + * over-decrement the RSVP counter, in case something slips up. + */ + if (viftable[vifi].v_rsvp_on) { + viftable[vifi].v_rsvp_on = 0; + rsvp_on--; + } + } + } + + splx(s); + return; +} + +void +rsvp_input(m, iphlen) + struct mbuf *m; + int iphlen; +{ + int vifi; + register struct ip *ip = mtod(m, struct ip *); + static struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET }; + register int s; + struct ifnet *ifp; + + if (rsvpdebug) + printf("rsvp_input: rsvp_on %d\n",rsvp_on); + + /* Can still get packets with rsvp_on = 0 if there is a local member + * of the group to which the RSVP packet is addressed. But in this + * case we want to throw the packet away. + */ + if (!rsvp_on) { + m_freem(m); + return; + } + + /* If the old-style non-vif-associated socket is set, then use + * it and ignore the new ones. + */ + if (ip_rsvpd != NULL) { + if (rsvpdebug) + printf("rsvp_input: Sending packet up old-style socket\n"); + rip_input(m); + return; + } + + s = splnet(); + + if (rsvpdebug) + printf("rsvp_input: check vifs\n"); + +#ifdef DIAGNOSTIC + if (!(m->m_flags & M_PKTHDR)) + panic("rsvp_input no hdr"); +#endif + + ifp = m->m_pkthdr.rcvif; + /* Find which vif the packet arrived on. */ + for (vifi = 0; vifi < numvifs; vifi++) { + if (viftable[vifi].v_ifp == ifp) + break; + } + + if (vifi == numvifs) { + /* Can't find vif packet arrived on. Drop packet. */ + if (rsvpdebug) + printf("rsvp_input: Can't find vif for packet...dropping it.\n"); + m_freem(m); + splx(s); + return; + } + + if (rsvpdebug) + printf("rsvp_input: check socket\n"); + + if (viftable[vifi].v_rsvpd == NULL) { + /* drop packet, since there is no specific socket for this + * interface */ + if (rsvpdebug) + printf("rsvp_input: No socket defined for vif %d\n",vifi); + m_freem(m); + splx(s); + return; + } + rsvp_src.sin_addr = ip->ip_src; + + if (rsvpdebug && m) + printf("rsvp_input: m->m_len = %d, sbspace() = %d\n", + m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); + + if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) + if (rsvpdebug) + printf("rsvp_input: Failed to append to socket\n"); + else + if (rsvpdebug) + printf("rsvp_input: send packet up\n"); + + splx(s); +} + #ifdef MROUTE_LKM #include #include #include #include MOD_MISC("ip_mroute_mod") static int ip_mroute_mod_handle(struct lkm_table *lkmtp, int cmd) { int i; struct lkm_misc *args = lkmtp->private.lkm_misc; int err = 0; switch(cmd) { static int (*old_ip_mrouter_cmd)(); static int (*old_ip_mrouter_done)(); static int (*old_ip_mforward)(); static int (*old_mrt_ioctl)(); static void (*old_proto4_input)(); static int (*old_legal_vif_num)(); extern struct protosw inetsw[]; case LKM_E_LOAD: if(lkmexists(lkmtp) || ip_mrtproto) return(EEXIST); old_ip_mrouter_cmd = ip_mrouter_cmd; ip_mrouter_cmd = X_ip_mrouter_cmd; old_ip_mrouter_done = ip_mrouter_done; ip_mrouter_done = X_ip_mrouter_done; old_ip_mforward = ip_mforward; ip_mforward = X_ip_mforward; old_mrt_ioctl = mrt_ioctl; mrt_ioctl = X_mrt_ioctl; old_proto4_input = inetsw[ip_protox[ENCAP_PROTO]].pr_input; - inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_multiencap_decap; + inetsw[ip_protox[ENCAP_PROTO]].pr_input = X_ipip_input; old_legal_vif_num = legal_vif_num; legal_vif_num = X_legal_vif_num; ip_mrtproto = IGMP_DVMRP; printf("\nIP multicast routing loaded\n"); break; case LKM_E_UNLOAD: if (ip_mrouter) return EINVAL; ip_mrouter_cmd = old_ip_mrouter_cmd; ip_mrouter_done = old_ip_mrouter_done; ip_mforward = old_ip_mforward; mrt_ioctl = old_mrt_ioctl; inetsw[ip_protox[ENCAP_PROTO]].pr_input = old_proto4_input; legal_vif_num = old_legal_vif_num; ip_mrtproto = 0; break; default: err = EINVAL; break; } return(err); } int ip_mroute_mod(struct lkm_table *lkmtp, int cmd, int ver) { DISPATCH(lkmtp, cmd, ver, ip_mroute_mod_handle, ip_mroute_mod_handle, nosys); } #endif /* MROUTE_LKM */ #endif /* MROUTING */ - - Index: stable/2.1/sys/netinet/ip_mroute.h =================================================================== --- stable/2.1/sys/netinet/ip_mroute.h (revision 10582) +++ stable/2.1/sys/netinet/ip_mroute.h (revision 10583) @@ -1,255 +1,258 @@ /* * Copyright (c) 1989 Stephen Deering. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Stephen Deering of Stanford University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_mroute.h 8.1 (Berkeley) 6/10/93 - * $Id: ip_mroute.h,v 1.6 1995/03/16 18:14:59 bde Exp $ + * $Id: ip_mroute.h,v 1.9 1995/08/23 18:20:17 wollman Exp $ */ #ifndef _NETINET_IP_MROUTE_H_ #define _NETINET_IP_MROUTE_H_ /* - * Definitions for the kernel part of DVMRP, - * a Distance-Vector Multicast Routing Protocol. - * (See RFC-1075.) + * Definitions for IP multicast forwarding. * * Written by David Waitzman, BBN Labs, August 1988. * Modified by Steve Deering, Stanford, February 1989. * Modified by Ajit Thyagarajan, PARC, August 1993. * Modified by Ajit Thyagarajan, PARC, August 1994. * - * MROUTING 1.5 + * MROUTING Revision: 3.3.1.3 */ /* - * DVMRP-specific setsockopt commands. + * Multicast Routing set/getsockopt commands. */ -#define DVMRP_INIT 100 /* initialize forwarder */ -#define DVMRP_DONE 101 /* shut down forwarder */ -#define DVMRP_ADD_VIF 102 /* create virtual interface */ -#define DVMRP_DEL_VIF 103 /* delete virtual interface */ -#define DVMRP_ADD_MFC 104 /* insert forwarding cache entry */ -#define DVMRP_DEL_MFC 105 /* delete forwarding cache entry */ +#define MRT_INIT 100 /* initialize forwarder */ +#define MRT_DONE 101 /* shut down forwarder */ +#define MRT_ADD_VIF 102 /* create virtual interface */ +#define MRT_DEL_VIF 103 /* delete virtual interface */ +#define MRT_ADD_MFC 104 /* insert forwarding cache entry */ +#define MRT_DEL_MFC 105 /* delete forwarding cache entry */ +#define MRT_VERSION 106 /* get kernel version number */ +#define MRT_ASSERT 107 /* enable PIM assert processing */ + #define GET_TIME(t) microtime(&t) /* * Types and macros for handling bitmaps with one bit per virtual interface. */ #define MAXVIFS 32 typedef u_long vifbitmap_t; typedef u_short vifi_t; /* type of a vif index */ +#define ALL_VIFS (vifi_t)-1 #define VIFM_SET(n, m) ((m) |= (1 << (n))) #define VIFM_CLR(n, m) ((m) &= ~(1 << (n))) #define VIFM_ISSET(n, m) ((m) & (1 << (n))) #define VIFM_CLRALL(m) ((m) = 0x00000000) #define VIFM_COPY(mfrom, mto) ((mto) = (mfrom)) #define VIFM_SAME(m1, m2) ((m1) == (m2)) /* - * Argument structure for DVMRP_ADD_VIF. - * (DVMRP_DEL_VIF takes a single vifi_t argument.) + * Argument structure for MRT_ADD_VIF. + * (MRT_DEL_VIF takes a single vifi_t argument.) */ struct vifctl { - vifi_t vifc_vifi; /* the index of the vif to be added */ - u_char vifc_flags; /* VIFF_ flags defined below */ - u_char vifc_threshold; /* min ttl required to forward on vif */ - u_int vifc_rate_limit; /* max tate */ + vifi_t vifc_vifi; /* the index of the vif to be added */ + u_char vifc_flags; /* VIFF_ flags defined below */ + u_char vifc_threshold; /* min ttl required to forward on vif */ + u_int vifc_rate_limit; /* max rate */ struct in_addr vifc_lcl_addr; /* local interface address */ struct in_addr vifc_rmt_addr; /* remote address (tunnels only) */ }; #define VIFF_TUNNEL 0x1 /* vif represents a tunnel end-point */ -#define VIFF_SRCRT 0x2 /* tunnel uses IP source routing */ +#define VIFF_SRCRT 0x2 /* tunnel uses IP source routing */ /* - * Argument structure for DVMRP_ADD_MFC + * Argument structure for MRT_ADD_MFC and MRT_DEL_MFC * (mfcc_tos to be added at a future point) */ struct mfcctl { - struct in_addr mfcc_origin; /* subnet origin of mcasts */ + struct in_addr mfcc_origin; /* ip origin of mcasts */ struct in_addr mfcc_mcastgrp; /* multicast group associated*/ - struct in_addr mfcc_originmask; /* subnet mask for origin */ vifi_t mfcc_parent; /* incoming vif */ u_char mfcc_ttls[MAXVIFS]; /* forwarding ttls on vifs */ }; /* - * Argument structure for DVMRP_DEL_MFC - */ -struct delmfcctl { - struct in_addr mfcc_origin; /* subnet origin of multicasts */ - struct in_addr mfcc_mcastgrp; /* multicast group assoc. w/ origin */ -}; - -/* - * Argument structure used by RSVP daemon to get vif information - */ -struct vif_req { - u_char v_flags; /* VIFF_ flags defined above */ - u_char v_threshold; /* min ttl required to forward on vif */ - struct in_addr v_lcl_addr; /* local interface address */ - struct in_addr v_rmt_addr; - char v_if_name[IFNAMSIZ]; /* if name */ -}; - -struct vif_conf { - u_int vifc_len; - u_int vifc_num; - struct vif_req *vifc_req; -}; - -/* * The kernel's multicast routing statistics. */ struct mrtstat { u_long mrts_mfc_lookups; /* # forw. cache hash table hits */ u_long mrts_mfc_misses; /* # forw. cache hash table misses */ u_long mrts_upcalls; /* # calls to mrouted */ u_long mrts_no_route; /* no route for packet's origin */ u_long mrts_bad_tunnel; /* malformed tunnel options */ u_long mrts_cant_tunnel; /* no room for tunnel options */ u_long mrts_wrong_if; /* arrived on wrong interface */ u_long mrts_upq_ovflw; /* upcall Q overflow */ u_long mrts_cache_cleanups; /* # entries with no upcalls */ u_long mrts_drop_sel; /* pkts dropped selectively */ u_long mrts_q_overflow; /* pkts dropped - Q overflow */ u_long mrts_pkt2large; /* pkts dropped - size > BKT SIZE */ + u_long mrts_upq_sockfull; /* upcalls dropped - socket full */ }; /* * Argument structure used by mrouted to get src-grp pkt counts */ struct sioc_sg_req { struct in_addr src; struct in_addr grp; - u_long count; + u_long pktcnt; + u_long bytecnt; + u_long wrong_if; }; /* * Argument structure used by mrouted to get vif pkt counts */ struct sioc_vif_req { - vifi_t vifi; - u_long icount; - u_long ocount; + vifi_t vifi; /* vif number */ + u_long icount; /* Input packet count on vif */ + u_long ocount; /* Output packet count on vif */ + u_long ibytes; /* Input byte count on vif */ + u_long obytes; /* Output byte count on vif */ }; + - #ifdef KERNEL +/* + * The kernel's virtual-interface structure. + */ struct vif { u_char v_flags; /* VIFF_ flags defined above */ u_char v_threshold; /* min ttl required to forward on vif*/ u_int v_rate_limit; /* max rate */ struct tbf *v_tbf; /* token bucket structure at intf. */ struct in_addr v_lcl_addr; /* local interface address */ struct in_addr v_rmt_addr; /* remote address (tunnels only) */ struct ifnet *v_ifp; /* pointer to interface */ u_long v_pkt_in; /* # pkts in on interface */ u_long v_pkt_out; /* # pkts out on interface */ + u_long v_bytes_in; /* # bytes in on interface */ + u_long v_bytes_out; /* # bytes out on interface */ + struct route v_route; /* cached route if this is a tunnel */ + u_int v_rsvp_on; /* RSVP listening on this vif */ + struct socket *v_rsvpd; /* RSVP daemon socket */ }; /* - * The kernel's multicast forwarding cache entry structure - * (A field for the type of service (mfc_tos) is to be added + * The kernel's multicast forwarding cache entry structure + * (A field for the type of service (mfc_tos) is to be added * at a future point) */ struct mfc { - struct in_addr mfc_origin; /* subnet origin of mcasts */ + struct in_addr mfc_origin; /* IP origin of mcasts */ struct in_addr mfc_mcastgrp; /* multicast group associated*/ - struct in_addr mfc_originmask; /* subnet mask for origin */ vifi_t mfc_parent; /* incoming vif */ u_char mfc_ttls[MAXVIFS]; /* forwarding ttls on vifs */ u_long mfc_pkt_cnt; /* pkt count for src-grp */ + u_long mfc_byte_cnt; /* byte count for src-grp */ + u_long mfc_wrong_if; /* wrong if for src-grp */ + int mfc_expire; /* time to clean entry up */ + struct timeval mfc_last_assert; /* last time I sent an assert*/ }; /* + * Struct used to communicate from kernel to multicast router + * note the convenient similarity to an IP packet + */ +struct igmpmsg { + u_long unused1; + u_long unused2; + u_char im_msgtype; /* what type of message */ +#define IGMPMSG_NOCACHE 1 +#define IGMPMSG_WRONGVIF 2 + u_char im_mbz; /* must be zero */ + u_char im_vif; /* vif rec'd on */ + u_char unused3; + struct in_addr im_src, im_dst; +}; + +/* * Argument structure used for pkt info. while upcall is made */ struct rtdetq { - struct mbuf *m; - struct ifnet *ifp; - u_long tunnel_src; - struct ip_moptions *imo; + struct mbuf *m; /* A copy of the packet */ + struct ifnet *ifp; /* Interface pkt came in on */ + vifi_t xmt_vif; /* Saved copy of imo_multicast_vif */ +#ifdef UPCALL_TIMING + struct timeval t; /* Timestamp */ +#endif /* UPCALL_TIMING */ }; #define MFCTBLSIZ 256 #if (MFCTBLSIZ & (MFCTBLSIZ - 1)) == 0 /* from sys:route.h */ #define MFCHASHMOD(h) ((h) & (MFCTBLSIZ - 1)) #else #define MFCHASHMOD(h) ((h) % MFCTBLSIZ) #endif #define MAX_UPQ 4 /* max. no of pkts in upcall Q */ /* - * Token Bucket filter code + * Token Bucket filter code */ #define MAX_BKT_SIZE 10000 /* 10K bytes size */ #define MAXQSIZE 10 /* max # of pkts in queue */ /* - * queue structure at each vif - */ -struct pkt_queue -{ - u_long pkt_len; /* length of packet in queue */ - struct mbuf *pkt_m; /* pointer to packet mbuf */ - struct ip *pkt_ip; /* pointer to ip header */ - struct ip_moptions *pkt_imo; /* IP multicast options assoc. with pkt */ -}; - -/* * the token bucket filter at each vif */ struct tbf { - u_long last_pkt_t; /* arr. time of last pkt */ - u_long n_tok; /* no of tokens in bucket */ - u_long q_len; /* length of queue at this vif */ + struct timeval tbf_last_pkt_t; /* arr. time of last pkt */ + u_long tbf_n_tok; /* no of tokens in bucket */ + u_long tbf_q_len; /* length of queue at this vif */ + u_long tbf_max_q_len; /* max. queue length */ + struct mbuf *tbf_q; /* Packet queue */ + struct mbuf *tbf_t; /* tail-insertion pointer */ }; -extern int (*ip_mrouter_cmd) __P((int, struct socket *, struct mbuf *)); +extern int (*ip_mrouter_set) __P((int, struct socket *, struct mbuf *)); +extern int (*ip_mrouter_get) __P((int, struct socket *, struct mbuf **)); extern int (*ip_mrouter_done) __P((void)); extern int (*mrt_ioctl) __P((int, caddr_t, struct proc *)); #endif /* KERNEL */ #endif /* _NETINET_IP_MROUTE_H_ */ Index: stable/2.1/sys/netinet/ip_output.c =================================================================== --- stable/2.1/sys/netinet/ip_output.c (revision 10582) +++ stable/2.1/sys/netinet/ip_output.c (revision 10583) @@ -1,1152 +1,1157 @@ /* * Copyright (c) 1982, 1986, 1988, 1990, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_output.c 8.3 (Berkeley) 1/21/94 - * $Id: ip_output.c,v 1.19 1995/05/30 08:09:49 rgrimes Exp $ + * $Id: ip_output.c,v 1.23 1995/07/26 18:05:13 wollman Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef vax #include #endif u_short ip_id; static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *)); static void ip_mloopback __P((struct ifnet *, struct mbuf *, struct sockaddr_in *)); /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. */ int ip_output(m0, opt, ro, flags, imo) struct mbuf *m0; struct mbuf *opt; struct route *ro; int flags; struct ip_moptions *imo; { register struct ip *ip, *mhip; register struct ifnet *ifp; register struct mbuf *m = m0; register int hlen = sizeof (struct ip); int len, off, error = 0; struct route iproute; struct sockaddr_in *dst; struct in_ifaddr *ia; #ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("ip_output no HDR"); #endif if (opt) { m = ip_insertoptions(m, opt, &len); hlen = len; } ip = mtod(m, struct ip *); /* * Fill in IP header. */ if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) { ip->ip_v = IPVERSION; ip->ip_off &= IP_DF; ip->ip_id = htons(ip_id++); ip->ip_hl = hlen >> 2; ipstat.ips_localout++; } else { hlen = ip->ip_hl << 2; } /* * Route packet. */ if (ro == 0) { ro = &iproute; bzero((caddr_t)ro, sizeof (*ro)); } dst = (struct sockaddr_in *)&ro->ro_dst; /* * If there is a cached route, * check that it is to the same destination * and is still up. If not, free it and try again. */ if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || dst->sin_addr.s_addr != ip->ip_dst.s_addr)) { RTFREE(ro->ro_rt); ro->ro_rt = (struct rtentry *)0; } if (ro->ro_rt == 0) { dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); dst->sin_addr = ip->ip_dst; } /* * If routing to interface only, * short circuit routing lookup. */ #define ifatoia(ifa) ((struct in_ifaddr *)(ifa)) #define sintosa(sin) ((struct sockaddr *)(sin)) if (flags & IP_ROUTETOIF) { if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 && (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) { ipstat.ips_noroute++; error = ENETUNREACH; goto bad; } ifp = ia->ia_ifp; ip->ip_ttl = 1; } else { /* * If this is the case, we probably don't want to allocate * a protocol-cloned route since we didn't get one from the * ULP. This lets TCP do its thing, while not burdening * forwarding or ICMP with the overhead of cloning a route. * Of course, we still want to do any cloning requested by * the link layer, as this is probably required in all cases * for correct operation (as it is for ARP). */ if (ro->ro_rt == 0) rtalloc_ign(ro, RTF_PRCLONING); if (ro->ro_rt == 0) { ipstat.ips_noroute++; error = EHOSTUNREACH; goto bad; } ia = ifatoia(ro->ro_rt->rt_ifa); ifp = ro->ro_rt->rt_ifp; ro->ro_rt->rt_use++; if (ro->ro_rt->rt_flags & RTF_GATEWAY) dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway; } if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { struct in_multi *inm; m->m_flags |= M_MCAST; /* * IP destination address is multicast. Make sure "dst" * still points to the address in "ro". (It may have been * changed to point to a gateway address, above.) */ dst = (struct sockaddr_in *)&ro->ro_dst; /* * See if the caller provided any multicast options */ if (imo != NULL) { ip->ip_ttl = imo->imo_multicast_ttl; if (imo->imo_multicast_ifp != NULL) ifp = imo->imo_multicast_ifp; + if (imo->imo_multicast_vif != -1) + ip->ip_src.s_addr = + ip_mcast_src(imo->imo_multicast_vif); } else ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL; /* * Confirm that the outgoing interface supports multicast. */ - if ((ifp->if_flags & IFF_MULTICAST) == 0) { - ipstat.ips_noroute++; - error = ENETUNREACH; - goto bad; + if ((imo == NULL) || (imo->imo_multicast_vif == -1)) { + if ((ifp->if_flags & IFF_MULTICAST) == 0) { + ipstat.ips_noroute++; + error = ENETUNREACH; + goto bad; + } } /* * If source address not specified yet, use address * of outgoing interface. */ if (ip->ip_src.s_addr == INADDR_ANY) { register struct in_ifaddr *ia; for (ia = in_ifaddr; ia; ia = ia->ia_next) if (ia->ia_ifp == ifp) { ip->ip_src = IA_SIN(ia)->sin_addr; break; } } IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm); if (inm != NULL && (imo == NULL || imo->imo_multicast_loop)) { /* * If we belong to the destination multicast group * on the outgoing interface, and the caller did not * forbid loopback, loop back a copy. */ ip_mloopback(ifp, m, dst); } else { /* * If we are acting as a multicast router, perform * multicast forwarding as if the packet had just * arrived on the interface to which we are about * to send. The multicast forwarding function * recursively calls this function, using the * IP_FORWARDING flag to prevent infinite recursion. * * Multicasts that are looped back by ip_mloopback(), * above, will be forwarded by the ip_input() routine, * if necessary. */ if (ip_mrouter && (flags & IP_FORWARDING) == 0) { /* * Check if rsvp daemon is running. If not, don't * set ip_moptions. This ensures that the packet * is multicast and not just sent down one link * as prescribed by rsvpd. */ - if (ip_rsvpd == NULL) + if (!rsvp_on) imo = NULL; if (ip_mforward(ip, ifp, m, imo) != 0) { m_freem(m); goto done; } } } /* * Multicasts with a time-to-live of zero may be looped- * back, above, but must not be transmitted on a network. * Also, multicasts addressed to the loopback interface * are not sent -- the above call to ip_mloopback() will * loop back a copy if this host actually belongs to the * destination group on the loopback interface. */ if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) { m_freem(m); goto done; } goto sendit; } #ifndef notdef /* * If source address not specified yet, use address * of outgoing interface. */ if (ip->ip_src.s_addr == INADDR_ANY) ip->ip_src = IA_SIN(ia)->sin_addr; #endif /* * Verify that we have any chance at all of being able to queue * the packet or packet fragments */ if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >= ifp->if_snd.ifq_maxlen) { error = ENOBUFS; goto bad; } /* * Look for broadcast address and * and verify user is allowed to send * such a packet. */ if (in_broadcast(dst->sin_addr, ifp)) { if ((ifp->if_flags & IFF_BROADCAST) == 0) { error = EADDRNOTAVAIL; goto bad; } if ((flags & IP_ALLOWBROADCAST) == 0) { error = EACCES; goto bad; } /* don't allow broadcast messages to be fragmented */ if ((u_short)ip->ip_len > ifp->if_mtu) { error = EMSGSIZE; goto bad; } m->m_flags |= M_BCAST; } else m->m_flags &= ~M_BCAST; sendit: /* * If small enough for interface, can just send directly. */ if ((u_short)ip->ip_len <= ifp->if_mtu) { ip->ip_len = htons((u_short)ip->ip_len); ip->ip_off = htons((u_short)ip->ip_off); ip->ip_sum = 0; ip->ip_sum = in_cksum(m, hlen); error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro->ro_rt); goto done; } /* * Too large for interface; fragment if possible. * Must be able to put at least 8 bytes per fragment. */ if (ip->ip_off & IP_DF) { error = EMSGSIZE; ipstat.ips_cantfrag++; goto bad; } len = (ifp->if_mtu - hlen) &~ 7; if (len < 8) { error = EMSGSIZE; goto bad; } { int mhlen, firstlen = len; struct mbuf **mnext = &m->m_nextpkt; /* * Loop through length of segment after first fragment, * make new header and copy data of each part and link onto chain. */ m0 = m; mhlen = sizeof (struct ip); for (off = hlen + len; off < (u_short)ip->ip_len; off += len) { MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m == 0) { error = ENOBUFS; ipstat.ips_odropped++; goto sendorfree; } m->m_data += max_linkhdr; mhip = mtod(m, struct ip *); *mhip = *ip; if (hlen > sizeof (struct ip)) { mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip); mhip->ip_hl = mhlen >> 2; } m->m_len = mhlen; mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF); if (ip->ip_off & IP_MF) mhip->ip_off |= IP_MF; if (off + len >= (u_short)ip->ip_len) len = (u_short)ip->ip_len - off; else mhip->ip_off |= IP_MF; mhip->ip_len = htons((u_short)(len + mhlen)); m->m_next = m_copy(m0, off, len); if (m->m_next == 0) { (void) m_free(m); error = ENOBUFS; /* ??? */ ipstat.ips_odropped++; goto sendorfree; } m->m_pkthdr.len = mhlen + len; m->m_pkthdr.rcvif = (struct ifnet *)0; mhip->ip_off = htons((u_short)mhip->ip_off); mhip->ip_sum = 0; mhip->ip_sum = in_cksum(m, mhlen); *mnext = m; mnext = &m->m_nextpkt; ipstat.ips_ofragments++; } /* * Update first fragment by trimming what's been copied out * and updating header, then send each fragment (in order). */ m = m0; m_adj(m, hlen + firstlen - (u_short)ip->ip_len); m->m_pkthdr.len = hlen + firstlen; ip->ip_len = htons((u_short)m->m_pkthdr.len); ip->ip_off = htons((u_short)(ip->ip_off | IP_MF)); ip->ip_sum = 0; ip->ip_sum = in_cksum(m, hlen); sendorfree: for (m = m0; m; m = m0) { m0 = m->m_nextpkt; m->m_nextpkt = 0; if (error == 0) error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, ro->ro_rt); else m_freem(m); } if (error == 0) ipstat.ips_fragmented++; } done: if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt) RTFREE(ro->ro_rt); /* * Count outgoing packet,here we count both our packets and * those we forward. * Here we want to convert ip_len to host byte order when counting * so we set 3rd arg to 1. * This is locally generated packet so it has not * incoming interface. */ if (ip_acct_cnt_ptr!=NULL) (*ip_acct_cnt_ptr)(ip,NULL,ip_acct_chain,1); return (error); bad: m_freem(m0); goto done; } /* * Insert IP options into preformed packet. * Adjust IP destination as required for IP source routing, * as indicated by a non-zero in_addr at the start of the options. */ static struct mbuf * ip_insertoptions(m, opt, phlen) register struct mbuf *m; struct mbuf *opt; int *phlen; { register struct ipoption *p = mtod(opt, struct ipoption *); struct mbuf *n; register struct ip *ip = mtod(m, struct ip *); unsigned optlen; optlen = opt->m_len - sizeof(p->ipopt_dst); if (optlen + (u_short)ip->ip_len > IP_MAXPACKET) return (m); /* XXX should fail */ if (p->ipopt_dst.s_addr) ip->ip_dst = p->ipopt_dst; if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) { MGETHDR(n, M_DONTWAIT, MT_HEADER); if (n == 0) return (m); n->m_pkthdr.len = m->m_pkthdr.len + optlen; m->m_len -= sizeof(struct ip); m->m_data += sizeof(struct ip); n->m_next = m; m = n; m->m_len = optlen + sizeof(struct ip); m->m_data += max_linkhdr; (void)memcpy(mtod(m, void *), ip, sizeof(struct ip)); } else { m->m_data -= optlen; m->m_len += optlen; m->m_pkthdr.len += optlen; ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); } ip = mtod(m, struct ip *); (void)memcpy(ip + 1, p->ipopt_list, (unsigned)optlen); *phlen = sizeof(struct ip) + optlen; ip->ip_len += optlen; return (m); } /* * Copy options from ip to jp, * omitting those not copied during fragmentation. */ int ip_optcopy(ip, jp) struct ip *ip, *jp; { register u_char *cp, *dp; int opt, optlen, cnt; cp = (u_char *)(ip + 1); dp = (u_char *)(jp + 1); cnt = (ip->ip_hl << 2) - sizeof (struct ip); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[0]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) { /* Preserve for IP mcast tunnel's LSRR alignment. */ *dp++ = IPOPT_NOP; optlen = 1; continue; } else optlen = cp[IPOPT_OLEN]; /* bogus lengths should have been caught by ip_dooptions */ if (optlen > cnt) optlen = cnt; if (IPOPT_COPIED(opt)) { (void)memcpy(dp, cp, (unsigned)optlen); dp += optlen; } } for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++) *dp++ = IPOPT_EOL; return (optlen); } /* * IP socket option processing. */ int ip_ctloutput(op, so, level, optname, mp) int op; struct socket *so; int level, optname; struct mbuf **mp; { register struct inpcb *inp = sotoinpcb(so); register struct mbuf *m = *mp; register int optval = 0; int error = 0; if (level != IPPROTO_IP) { error = EINVAL; if (op == PRCO_SETOPT && *mp) (void) m_free(*mp); } else switch (op) { case PRCO_SETOPT: switch (optname) { case IP_OPTIONS: #ifdef notyet case IP_RETOPTS: return (ip_pcbopts(optname, &inp->inp_options, m)); #else return (ip_pcbopts(&inp->inp_options, m)); #endif case IP_TOS: case IP_TTL: case IP_RECVOPTS: case IP_RECVRETOPTS: case IP_RECVDSTADDR: if (m == 0 || m->m_len != sizeof(int)) error = EINVAL; else { optval = *mtod(m, int *); switch (optname) { case IP_TOS: inp->inp_ip.ip_tos = optval; break; case IP_TTL: inp->inp_ip.ip_ttl = optval; break; #define OPTSET(bit) \ if (optval) \ inp->inp_flags |= bit; \ else \ inp->inp_flags &= ~bit; case IP_RECVOPTS: OPTSET(INP_RECVOPTS); break; case IP_RECVRETOPTS: OPTSET(INP_RECVRETOPTS); break; case IP_RECVDSTADDR: OPTSET(INP_RECVDSTADDR); break; } } break; #undef OPTSET case IP_MULTICAST_IF: case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: case IP_DROP_MEMBERSHIP: error = ip_setmoptions(optname, &inp->inp_moptions, m); break; default: error = ENOPROTOOPT; break; } if (m) (void)m_free(m); break; case PRCO_GETOPT: switch (optname) { case IP_OPTIONS: case IP_RETOPTS: *mp = m = m_get(M_WAIT, MT_SOOPTS); if (inp->inp_options) { m->m_len = inp->inp_options->m_len; (void)memcpy(mtod(m, void *), mtod(inp->inp_options, void *), (unsigned)m->m_len); } else m->m_len = 0; break; case IP_TOS: case IP_TTL: case IP_RECVOPTS: case IP_RECVRETOPTS: case IP_RECVDSTADDR: *mp = m = m_get(M_WAIT, MT_SOOPTS); m->m_len = sizeof(int); switch (optname) { case IP_TOS: optval = inp->inp_ip.ip_tos; break; case IP_TTL: optval = inp->inp_ip.ip_ttl; break; #define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0) case IP_RECVOPTS: optval = OPTBIT(INP_RECVOPTS); break; case IP_RECVRETOPTS: optval = OPTBIT(INP_RECVRETOPTS); break; case IP_RECVDSTADDR: optval = OPTBIT(INP_RECVDSTADDR); break; } *mtod(m, int *) = optval; break; case IP_MULTICAST_IF: case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: case IP_DROP_MEMBERSHIP: error = ip_getmoptions(optname, inp->inp_moptions, mp); break; default: error = ENOPROTOOPT; break; } break; } return (error); } /* * Set up IP options in pcb for insertion in output packets. * Store in mbuf with pointer in pcbopt, adding pseudo-option * with destination address if source routed. */ int #ifdef notyet ip_pcbopts(optname, pcbopt, m) int optname; #else ip_pcbopts(pcbopt, m) #endif struct mbuf **pcbopt; register struct mbuf *m; { register cnt, optlen; register u_char *cp; u_char opt; /* turn off any old options */ if (*pcbopt) (void)m_free(*pcbopt); *pcbopt = 0; if (m == (struct mbuf *)0 || m->m_len == 0) { /* * Only turning off any previous options. */ if (m) (void)m_free(m); return (0); } #ifndef vax if (m->m_len % sizeof(long)) goto bad; #endif /* * IP first-hop destination address will be stored before * actual options; move other options back * and clear it when none present. */ if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN]) goto bad; cnt = m->m_len; m->m_len += sizeof(struct in_addr); cp = mtod(m, u_char *) + sizeof(struct in_addr); ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt); bzero(mtod(m, caddr_t), sizeof(struct in_addr)); for (; cnt > 0; cnt -= optlen, cp += optlen) { opt = cp[IPOPT_OPTVAL]; if (opt == IPOPT_EOL) break; if (opt == IPOPT_NOP) optlen = 1; else { optlen = cp[IPOPT_OLEN]; if (optlen <= IPOPT_OLEN || optlen > cnt) goto bad; } switch (opt) { default: break; case IPOPT_LSRR: case IPOPT_SSRR: /* * user process specifies route as: * ->A->B->C->D * D must be our final destination (but we can't * check that since we may not have connected yet). * A is first hop destination, which doesn't appear in * actual IP option, but is stored before the options. */ if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr)) goto bad; m->m_len -= sizeof(struct in_addr); cnt -= sizeof(struct in_addr); optlen -= sizeof(struct in_addr); cp[IPOPT_OLEN] = optlen; /* * Move first hop before start of options. */ bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t), sizeof(struct in_addr)); /* * Then copy rest of options back * to close up the deleted entry. */ ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] + sizeof(struct in_addr)), (caddr_t)&cp[IPOPT_OFFSET+1], (unsigned)cnt + sizeof(struct in_addr)); break; } } if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr)) goto bad; *pcbopt = m; return (0); bad: (void)m_free(m); return (EINVAL); } /* * Set the IP multicast options in response to user setsockopt(). */ int ip_setmoptions(optname, imop, m) int optname; struct ip_moptions **imop; struct mbuf *m; { register int error = 0; u_char loop; register int i; struct in_addr addr; register struct ip_mreq *mreq; register struct ifnet *ifp; register struct ip_moptions *imo = *imop; struct route ro; register struct sockaddr_in *dst; int s; if (imo == NULL) { /* * No multicast option buffer attached to the pcb; * allocate one and initialize to default values. */ imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); if (imo == NULL) return (ENOBUFS); *imop = imo; imo->imo_multicast_ifp = NULL; - imo->imo_multicast_vif = 0; + imo->imo_multicast_vif = -1; imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; imo->imo_num_memberships = 0; } switch (optname) { /* store an index number for the vif you wanna use in the send */ case IP_MULTICAST_VIF: if (!legal_vif_num) { error = EOPNOTSUPP; break; } if (m == NULL || m->m_len != sizeof(int)) { error = EINVAL; break; } i = *(mtod(m, int *)); - if (!legal_vif_num(i)) { + if (!legal_vif_num(i) && (i != -1)) { error = EINVAL; break; } imo->imo_multicast_vif = i; break; case IP_MULTICAST_IF: /* * Select the interface for outgoing multicast packets. */ if (m == NULL || m->m_len != sizeof(struct in_addr)) { error = EINVAL; break; } addr = *(mtod(m, struct in_addr *)); /* * INADDR_ANY is used to remove a previous selection. * When no interface is selected, a default one is * chosen every time a multicast packet is sent. */ if (addr.s_addr == INADDR_ANY) { imo->imo_multicast_ifp = NULL; break; } /* * The selected interface is identified by its local * IP address. Find the interface and confirm that * it supports multicasting. */ s = splimp(); INADDR_TO_IFP(addr, ifp); if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; break; } imo->imo_multicast_ifp = ifp; splx(s); break; case IP_MULTICAST_TTL: /* * Set the IP time-to-live for outgoing multicast packets. */ if (m == NULL || m->m_len != 1) { error = EINVAL; break; } imo->imo_multicast_ttl = *(mtod(m, u_char *)); break; case IP_MULTICAST_LOOP: /* * Set the loopback flag for outgoing multicast packets. * Must be zero or one. */ if (m == NULL || m->m_len != 1 || (loop = *(mtod(m, u_char *))) > 1) { error = EINVAL; break; } imo->imo_multicast_loop = loop; break; case IP_ADD_MEMBERSHIP: /* * Add a multicast group membership. * Group must be a valid IP multicast address. */ if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { error = EINVAL; break; } mreq = mtod(m, struct ip_mreq *); if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) { error = EINVAL; break; } s = splimp(); /* * If no interface address was provided, use the interface of * the route to the given multicast address. */ if (mreq->imr_interface.s_addr == INADDR_ANY) { - ro.ro_rt = NULL; + bzero((caddr_t)&ro, sizeof(ro)); dst = (struct sockaddr_in *)&ro.ro_dst; dst->sin_len = sizeof(*dst); dst->sin_family = AF_INET; dst->sin_addr = mreq->imr_multiaddr; rtalloc(&ro); if (ro.ro_rt == NULL) { error = EADDRNOTAVAIL; splx(s); break; } ifp = ro.ro_rt->rt_ifp; rtfree(ro.ro_rt); } else { INADDR_TO_IFP(mreq->imr_interface, ifp); } /* * See if we found an interface, and confirm that it * supports multicast. */ if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { error = EADDRNOTAVAIL; splx(s); break; } /* * See if the membership already exists or if all the * membership slots are full. */ for (i = 0; i < imo->imo_num_memberships; ++i) { if (imo->imo_membership[i]->inm_ifp == ifp && imo->imo_membership[i]->inm_addr.s_addr == mreq->imr_multiaddr.s_addr) break; } if (i < imo->imo_num_memberships) { error = EADDRINUSE; splx(s); break; } if (i == IP_MAX_MEMBERSHIPS) { error = ETOOMANYREFS; splx(s); break; } /* * Everything looks good; add a new record to the multicast * address list for the given interface. */ if ((imo->imo_membership[i] = in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) { error = ENOBUFS; splx(s); break; } ++imo->imo_num_memberships; splx(s); break; case IP_DROP_MEMBERSHIP: /* * Drop a multicast group membership. * Group must be a valid IP multicast address. */ if (m == NULL || m->m_len != sizeof(struct ip_mreq)) { error = EINVAL; break; } mreq = mtod(m, struct ip_mreq *); if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) { error = EINVAL; break; } s = splimp(); /* * If an interface address was specified, get a pointer * to its ifnet structure. */ if (mreq->imr_interface.s_addr == INADDR_ANY) ifp = NULL; else { INADDR_TO_IFP(mreq->imr_interface, ifp); if (ifp == NULL) { error = EADDRNOTAVAIL; splx(s); break; } } /* * Find the membership in the membership array. */ for (i = 0; i < imo->imo_num_memberships; ++i) { if ((ifp == NULL || imo->imo_membership[i]->inm_ifp == ifp) && imo->imo_membership[i]->inm_addr.s_addr == mreq->imr_multiaddr.s_addr) break; } if (i == imo->imo_num_memberships) { error = EADDRNOTAVAIL; splx(s); break; } /* * Give up the multicast address record to which the * membership points. */ in_delmulti(imo->imo_membership[i]); /* * Remove the gap in the membership array. */ for (++i; i < imo->imo_num_memberships; ++i) imo->imo_membership[i-1] = imo->imo_membership[i]; --imo->imo_num_memberships; splx(s); break; default: error = EOPNOTSUPP; break; } /* * If all options have default values, no need to keep the mbuf. */ if (imo->imo_multicast_ifp == NULL && - imo->imo_multicast_vif == 0 && + imo->imo_multicast_vif == -1 && imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL && imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP && imo->imo_num_memberships == 0) { free(*imop, M_IPMOPTS); *imop = NULL; } return (error); } /* * Return the IP multicast options in response to user getsockopt(). */ int ip_getmoptions(optname, imo, mp) int optname; register struct ip_moptions *imo; register struct mbuf **mp; { u_char *ttl; u_char *loop; struct in_addr *addr; struct in_ifaddr *ia; *mp = m_get(M_WAIT, MT_SOOPTS); switch (optname) { - case IP_MULTICAST_VIF: + case IP_MULTICAST_VIF: if (imo != NULL) *(mtod(*mp, int *)) = imo->imo_multicast_vif; else - *(mtod(*mp, int *)) = 7890; + *(mtod(*mp, int *)) = -1; (*mp)->m_len = sizeof(int); return(0); case IP_MULTICAST_IF: addr = mtod(*mp, struct in_addr *); (*mp)->m_len = sizeof(struct in_addr); if (imo == NULL || imo->imo_multicast_ifp == NULL) addr->s_addr = INADDR_ANY; else { IFP_TO_IA(imo->imo_multicast_ifp, ia); addr->s_addr = (ia == NULL) ? INADDR_ANY : IA_SIN(ia)->sin_addr.s_addr; } return (0); case IP_MULTICAST_TTL: ttl = mtod(*mp, u_char *); (*mp)->m_len = 1; *ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL : imo->imo_multicast_ttl; return (0); case IP_MULTICAST_LOOP: loop = mtod(*mp, u_char *); (*mp)->m_len = 1; *loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP : imo->imo_multicast_loop; return (0); default: return (EOPNOTSUPP); } } /* * Discard the IP multicast options. */ void ip_freemoptions(imo) register struct ip_moptions *imo; { register int i; if (imo != NULL) { for (i = 0; i < imo->imo_num_memberships; ++i) in_delmulti(imo->imo_membership[i]); free(imo, M_IPMOPTS); } } /* * Routine called from ip_output() to loop back a copy of an IP multicast * packet to the input queue of a specified interface. Note that this * calls the output routine of the loopback "driver", but with an interface * pointer that might NOT be a loopback interface -- evil, but easier than * replicating that code here. */ static void ip_mloopback(ifp, m, dst) struct ifnet *ifp; register struct mbuf *m; register struct sockaddr_in *dst; { register struct ip *ip; struct mbuf *copym; copym = m_copy(m, 0, M_COPYALL); if (copym != NULL) { /* * We don't bother to fragment if the IP length is greater * than the interface's MTU. Can this possibly matter? */ ip = mtod(copym, struct ip *); ip->ip_len = htons((u_short)ip->ip_len); ip->ip_off = htons((u_short)ip->ip_off); ip->ip_sum = 0; ip->ip_sum = in_cksum(copym, ip->ip_hl << 2); (void) looutput(ifp, copym, (struct sockaddr *)dst, NULL); } } Index: stable/2.1/sys/netinet/ip_var.h =================================================================== --- stable/2.1/sys/netinet/ip_var.h (revision 10582) +++ stable/2.1/sys/netinet/ip_var.h (revision 10583) @@ -1,205 +1,210 @@ /* * Copyright (c) 1982, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_var.h 8.1 (Berkeley) 6/10/93 - * $Id: ip_var.h,v 1.9 1995/03/16 18:15:01 bde Exp $ + * $Id: ip_var.h,v 1.13 1995/07/26 18:05:16 wollman Exp $ */ #ifndef _NETINET_IP_VAR_H_ #define _NETINET_IP_VAR_H_ /* * Overlay for ip header used by other protocols (tcp, udp). */ struct ipovly { caddr_t ih_next, ih_prev; /* for protocol sequence q's */ u_char ih_x1; /* (unused) */ u_char ih_pr; /* protocol */ short ih_len; /* protocol length */ struct in_addr ih_src; /* source internet address */ struct in_addr ih_dst; /* destination internet address */ }; /* * Ip reassembly queue structure. Each fragment * being reassembled is attached to one of these structures. * They are timed out after ipq_ttl drops to 0, and may also * be reclaimed if memory becomes tight. */ struct ipq { struct ipq *next,*prev; /* to other reass headers */ u_char ipq_ttl; /* time for reass q to live */ u_char ipq_p; /* protocol of this fragment */ u_short ipq_id; /* sequence id for reassembly */ struct ipasfrag *ipq_next,*ipq_prev; /* to ip headers of fragments */ struct in_addr ipq_src,ipq_dst; }; /* * Ip header, when holding a fragment. * * Note: ipf_next must be at same offset as ipq_next above */ struct ipasfrag { -#if BYTE_ORDER == LITTLE_ENDIAN +#if BYTE_ORDER == LITTLE_ENDIAN u_char ip_hl:4, ip_v:4; #endif -#if BYTE_ORDER == BIG_ENDIAN +#if BYTE_ORDER == BIG_ENDIAN u_char ip_v:4, ip_hl:4; #endif u_char ipf_mff; /* XXX overlays ip_tos: use low bit * to avoid destroying tos; * copied from (ip_off&IP_MF) */ short ip_len; u_short ip_id; short ip_off; u_char ip_ttl; u_char ip_p; u_short ip_sum; struct ipasfrag *ipf_next; /* next fragment */ struct ipasfrag *ipf_prev; /* previous fragment */ }; /* * Structure stored in mbuf in inpcb.ip_options * and passed to ip_output when ip options are in use. * The actual length of the options (including ipopt_dst) * is in m_len. */ #define MAX_IPOPTLEN 40 struct ipoption { struct in_addr ipopt_dst; /* first-hop dst if source routed */ char ipopt_list[MAX_IPOPTLEN]; /* options proper */ }; /* * Structure attached to inpcb.ip_moptions and * passed to ip_output when IP multicast options are in use. */ struct ip_moptions { struct ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */ - u_long imo_multicast_vif; /* vif num outgoing multicasts */ u_char imo_multicast_ttl; /* TTL for outgoing multicasts */ u_char imo_multicast_loop; /* 1 => hear sends if a member */ u_short imo_num_memberships; /* no. memberships this socket */ struct in_multi *imo_membership[IP_MAX_MEMBERSHIPS]; + u_long imo_multicast_vif; /* vif num outgoing multicasts */ }; struct ipstat { u_long ips_total; /* total packets received */ u_long ips_badsum; /* checksum bad */ u_long ips_tooshort; /* packet too short */ u_long ips_toosmall; /* not enough data */ u_long ips_badhlen; /* ip header length < data size */ u_long ips_badlen; /* ip length < ip header length */ u_long ips_fragments; /* fragments received */ u_long ips_fragdropped; /* frags dropped (dups, out of space) */ u_long ips_fragtimeout; /* fragments timed out */ u_long ips_forward; /* packets forwarded */ u_long ips_cantforward; /* packets rcvd for unreachable dest */ u_long ips_redirectsent; /* packets forwarded on same net */ u_long ips_noproto; /* unknown or unsupported protocol */ u_long ips_delivered; /* datagrams delivered to upper level*/ u_long ips_localout; /* total ip packets generated here */ u_long ips_odropped; /* lost packets due to nobufs, etc. */ u_long ips_reassembled; /* total packets reassembled ok */ u_long ips_fragmented; /* datagrams sucessfully fragmented */ u_long ips_ofragments; /* output fragments created */ u_long ips_cantfrag; /* don't fragment flag was set, etc. */ u_long ips_badoptions; /* error in option processing */ u_long ips_noroute; /* packets discarded due to no route */ u_long ips_badvers; /* ip version != 4 */ u_long ips_rawout; /* total raw ip packets generated */ }; #ifdef KERNEL /* flags passed to ip_output as last parameter */ #define IP_FORWARDING 0x1 /* most of ip header exists */ #define IP_RAWOUTPUT 0x2 /* raw ip header exists */ #define IP_ROUTETOIF SO_DONTROUTE /* bypass routing tables */ #define IP_ALLOWBROADCAST SO_BROADCAST /* can send broadcast packets */ extern struct ipstat ipstat; extern struct ipq ipq; /* ip reass. queue */ extern u_short ip_id; /* ip packet ctr, for ids */ extern int ip_defttl; /* default IP ttl */ extern u_char ip_protox[]; extern struct socket *ip_rsvpd; /* reservation protocol daemon */ extern struct socket *ip_mrouter; /* multicast routing daemon */ extern int (*legal_vif_num) __P((int)); +extern u_long (*ip_mcast_src) __P((int)); +extern int rsvp_on; int ip_ctloutput __P((int, struct socket *, int, int, struct mbuf **)); void ip_deq __P((struct ipasfrag *)); int ip_dooptions __P((struct mbuf *)); void ip_drain __P((void)); void ip_enq __P((struct ipasfrag *, struct ipasfrag *)); void ip_forward __P((struct mbuf *, int)); void ip_freef __P((struct ipq *)); void ip_freemoptions __P((struct ip_moptions *)); int ip_getmoptions __P((int, struct ip_moptions *, struct mbuf **)); void ip_init __P((void)); extern int (*ip_mforward) __P((struct ip *, struct ifnet *, struct mbuf *, struct ip_moptions *)); int ip_optcopy __P((struct ip *, struct ip *)); int ip_output __P((struct mbuf *, struct mbuf *, struct route *, int, struct ip_moptions *)); int ip_pcbopts __P((struct mbuf **, struct mbuf *)); struct ip * ip_reass __P((struct ipasfrag *, struct ipq *)); struct in_ifaddr * ip_rtaddr __P((struct in_addr)); int ip_setmoptions __P((int, struct ip_moptions **, struct mbuf *)); void ip_slowtimo __P((void)); struct mbuf * ip_srcroute __P((void)); void ip_stripoptions __P((struct mbuf *, struct mbuf *)); int ip_sysctl __P((int *, u_int, void *, size_t *, void *, size_t)); void ipintr __P((void)); int rip_ctloutput __P((int, struct socket *, int, int, struct mbuf **)); void rip_init __P((void)); void rip_input __P((struct mbuf *)); int rip_output __P((struct mbuf *, struct socket *, u_long)); int rip_usrreq __P((struct socket *, int, struct mbuf *, struct mbuf *, struct mbuf *)); int ip_rsvp_init __P((struct socket *)); int ip_rsvp_done __P((void)); +int ip_rsvp_vif_init __P((struct socket *, struct mbuf *)); +int ip_rsvp_vif_done __P((struct socket *, struct mbuf *)); +void ip_rsvp_force_done __P((struct socket *)); void rip_ip_input __P((struct mbuf *mm, register struct socket *ip_mrouter, struct sockaddr *src)); #endif #endif Index: stable/2.1/sys/netinet/raw_ip.c =================================================================== --- stable/2.1/sys/netinet/raw_ip.c (revision 10582) +++ stable/2.1/sys/netinet/raw_ip.c (revision 10583) @@ -1,461 +1,451 @@ /* * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)raw_ip.c 8.2 (Berkeley) 1/4/94 - * $Id: raw_ip.c,v 1.18 1995/05/12 20:00:21 ache Exp $ + * $Id: raw_ip.c,v 1.21 1995/07/24 16:33:51 wollman Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct inpcbhead ripcb; struct inpcbinfo ripcbinfo; /* * Nominal space allocated to a raw ip socket. */ #define RIPSNDQ 8192 #define RIPRCVQ 8192 /* * Raw interface to IP protocol. */ /* * Initialize raw connection block q. */ void rip_init() { LIST_INIT(&ripcb); ripcbinfo.listhead = &ripcb; /* * XXX We don't use the hash list for raw IP, but it's easier * to allocate a one entry hash list than it is to check all * over the place for hashbase == NULL. */ ripcbinfo.hashbase = phashinit(1, M_PCB, &ripcbinfo.hashsize); } struct sockaddr_in ripsrc = { sizeof(ripsrc), AF_INET }; /* * Setup generic address and protocol structures * for raw_input routine, then pass them along with * mbuf chain. */ void rip_input(m) struct mbuf *m; { register struct ip *ip = mtod(m, struct ip *); register struct inpcb *inp; struct socket *last = 0; ripsrc.sin_addr = ip->ip_src; for (inp = ripcb.lh_first; inp != NULL; inp = inp->inp_list.le_next) { if (inp->inp_ip.ip_p && inp->inp_ip.ip_p != ip->ip_p) continue; if (inp->inp_laddr.s_addr && inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; if (inp->inp_faddr.s_addr && inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; if (last) { struct mbuf *n = m_copy(m, 0, (int)M_COPYALL); if (n) { if (sbappendaddr(&last->so_rcv, (struct sockaddr *)&ripsrc, n, (struct mbuf *)0) == 0) /* should notify about lost packet */ m_freem(n); else sorwakeup(last); } } last = inp->inp_socket; } if (last) { if (sbappendaddr(&last->so_rcv, (struct sockaddr *)&ripsrc, m, (struct mbuf *)0) == 0) m_freem(m); else sorwakeup(last); } else { m_freem(m); ipstat.ips_noproto++; ipstat.ips_delivered--; } } -void rip_ip_input(mm, ip_mrouter, src) - struct mbuf *mm; - register struct socket *ip_mrouter; - struct sockaddr *src; -{ - if (ip_mrouter) - { - if (sbappendaddr(&ip_mrouter->so_rcv, src, - mm, (struct mbuf *) 0) == 0) - m_freem(mm); - else - sorwakeup(ip_mrouter); - } - else - { - m_freem(mm); - ipstat.ips_noproto++; - ipstat.ips_delivered--; - } -} - /* * Generate IP header and pass packet to ip_output. * Tack on options user may have setup with control call. */ int rip_output(m, so, dst) register struct mbuf *m; struct socket *so; u_long dst; { register struct ip *ip; register struct inpcb *inp = sotoinpcb(so); struct mbuf *opts; int flags = (so->so_options & SO_DONTROUTE) | IP_ALLOWBROADCAST; /* * If the user handed us a complete IP packet, use it. * Otherwise, allocate an mbuf for a header and fill it in. */ if ((inp->inp_flags & INP_HDRINCL) == 0) { M_PREPEND(m, sizeof(struct ip), M_WAIT); ip = mtod(m, struct ip *); ip->ip_tos = 0; ip->ip_off = 0; ip->ip_p = inp->inp_ip.ip_p; ip->ip_len = m->m_pkthdr.len; ip->ip_src = inp->inp_laddr; ip->ip_dst.s_addr = dst; ip->ip_ttl = MAXTTL; opts = inp->inp_options; } else { ip = mtod(m, struct ip *); if (ip->ip_id == 0) ip->ip_id = htons(ip_id++); opts = NULL; /* XXX prevent ip_output from overwriting header fields */ flags |= IP_RAWOUTPUT; ipstat.ips_rawout++; } return (ip_output(m, opts, &inp->inp_route, flags, inp->inp_moptions)); } /* * Raw IP socket option processing. */ int rip_ctloutput(op, so, level, optname, m) int op; struct socket *so; int level, optname; struct mbuf **m; { register struct inpcb *inp = sotoinpcb(so); register int error; if (level != IPPROTO_IP) { if (op == PRCO_SETOPT && *m) (void)m_free(*m); return (EINVAL); } switch (optname) { case IP_HDRINCL: if (op == PRCO_SETOPT || op == PRCO_GETOPT) { if (m == 0 || *m == 0 || (*m)->m_len < sizeof (int)) return (EINVAL); if (op == PRCO_SETOPT) { if (*mtod(*m, int *)) inp->inp_flags |= INP_HDRINCL; else inp->inp_flags &= ~INP_HDRINCL; (void)m_free(*m); } else { (*m)->m_len = sizeof (int); *mtod(*m, int *) = inp->inp_flags & INP_HDRINCL; } return (0); } break; case IP_FW_ADD: case IP_FW_DEL: case IP_FW_FLUSH: case IP_FW_POLICY: if (ip_fw_ctl_ptr==NULL) { if (*m) (void)m_free(*m); return(EINVAL); } if (op == PRCO_SETOPT) { - error=(*ip_fw_ctl_ptr)(optname, *m); + error=(*ip_fw_ctl_ptr)(optname, *m); if (*m) (void)m_free(*m); } else error=EINVAL; return(error); case IP_ACCT_DEL: case IP_ACCT_ADD: case IP_ACCT_CLR: case IP_ACCT_FLUSH: - case IP_ACCT_ZERO: + case IP_ACCT_ZERO: if (ip_acct_ctl_ptr==NULL) { if (*m) (void)m_free(*m); return(EINVAL); } - + if (op == PRCO_SETOPT) { error=(*ip_acct_ctl_ptr)(optname, *m); if (*m) (void)m_free(*m); } else error=EINVAL; return(error); case IP_RSVP_ON: return ip_rsvp_init(so); break; case IP_RSVP_OFF: return ip_rsvp_done(); break; - case DVMRP_INIT: - case DVMRP_DONE: - case DVMRP_ADD_VIF: - case DVMRP_DEL_VIF: - case DVMRP_ADD_MFC: - case DVMRP_DEL_MFC: + case IP_RSVP_VIF_ON: + return ip_rsvp_vif_init(so, *m); + + case IP_RSVP_VIF_OFF: + return ip_rsvp_vif_done(so, *m); + + case MRT_INIT: + case MRT_DONE: + case MRT_ADD_VIF: + case MRT_DEL_VIF: + case MRT_ADD_MFC: + case MRT_DEL_MFC: + case MRT_VERSION: + case MRT_ASSERT: if (op == PRCO_SETOPT) { - error = ip_mrouter_cmd(optname, so, *m); + error = ip_mrouter_set(optname, so, *m); if (*m) (void)m_free(*m); + } else if (op == PRCO_GETOPT) { + error = ip_mrouter_get(optname, so, m); } else error = EINVAL; return (error); } return (ip_ctloutput(op, so, level, optname, m)); } u_long rip_sendspace = RIPSNDQ; u_long rip_recvspace = RIPRCVQ; /*ARGSUSED*/ int rip_usrreq(so, req, m, nam, control) register struct socket *so; int req; struct mbuf *m, *nam, *control; { register int error = 0; register struct inpcb *inp = sotoinpcb(so); switch (req) { case PRU_ATTACH: if (inp) panic("rip_attach"); if ((so->so_state & SS_PRIV) == 0) { error = EACCES; break; } if ((error = soreserve(so, rip_sendspace, rip_recvspace)) || (error = in_pcballoc(so, &ripcbinfo))) break; inp = (struct inpcb *)so->so_pcb; inp->inp_ip.ip_p = (int)nam; break; case PRU_DISCONNECT: if ((so->so_state & SS_ISCONNECTED) == 0) { error = ENOTCONN; break; } /* FALLTHROUGH */ case PRU_ABORT: soisdisconnected(so); /* FALLTHROUGH */ case PRU_DETACH: if (inp == 0) panic("rip_detach"); if (so == ip_mrouter) ip_mrouter_done(); + ip_rsvp_force_done(so); if (so == ip_rsvpd) ip_rsvp_done(); in_pcbdetach(inp); break; case PRU_BIND: { struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); if (nam->m_len != sizeof(*addr)) { error = EINVAL; break; } if ((ifnet == 0) || ((addr->sin_family != AF_INET) && (addr->sin_family != AF_IMPLINK)) || (addr->sin_addr.s_addr && ifa_ifwithaddr((struct sockaddr *)addr) == 0)) { error = EADDRNOTAVAIL; break; } inp->inp_laddr = addr->sin_addr; break; } case PRU_CONNECT: { struct sockaddr_in *addr = mtod(nam, struct sockaddr_in *); if (nam->m_len != sizeof(*addr)) { error = EINVAL; break; } if (ifnet == 0) { error = EADDRNOTAVAIL; break; } if ((addr->sin_family != AF_INET) && (addr->sin_family != AF_IMPLINK)) { error = EAFNOSUPPORT; break; } inp->inp_faddr = addr->sin_addr; soisconnected(so); break; } case PRU_CONNECT2: error = EOPNOTSUPP; break; /* * Mark the connection as being incapable of further input. */ case PRU_SHUTDOWN: socantsendmore(so); break; /* * Ship a packet out. The appropriate raw output * routine handles any massaging necessary. */ case PRU_SEND: { register u_long dst; if (so->so_state & SS_ISCONNECTED) { if (nam) { error = EISCONN; break; } dst = inp->inp_faddr.s_addr; } else { if (nam == NULL) { error = ENOTCONN; break; } dst = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr; } error = rip_output(m, so, dst); m = NULL; break; } case PRU_SENSE: /* * stat: don't bother with a blocksize. */ return (0); /* * Not supported. */ case PRU_RCVOOB: case PRU_RCVD: case PRU_LISTEN: case PRU_ACCEPT: case PRU_SENDOOB: error = EOPNOTSUPP; break; case PRU_SOCKADDR: in_setsockaddr(inp, nam); break; case PRU_PEERADDR: in_setpeeraddr(inp, nam); break; default: panic("rip_usrreq"); } if (m != NULL) m_freem(m); return (error); }