Page MenuHomeFreeBSD

No OneTemporary

This file is larger than 256 KB, so syntax highlighting was skipped.
Index: projects/arpv2_merge_1/sys/net/if.c
===================================================================
--- projects/arpv2_merge_1/sys/net/if.c (revision 185838)
+++ projects/arpv2_merge_1/sys/net/if.c (revision 185839)
@@ -1,2882 +1,2884 @@
/*-
* Copyright (c) 1980, 1986, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)if.c 8.5 (Berkeley) 1/9/95
* $FreeBSD$
*/
#include "opt_compat.h"
#include "opt_inet6.h"
#include "opt_inet.h"
#include "opt_mac.h"
#include "opt_carp.h"
#include <sys/param.h>
#include <sys/types.h>
#include <sys/conf.h>
#include <sys/malloc.h>
#include <sys/sbuf.h>
#include <sys/bus.h>
#include <sys/mbuf.h>
#include <sys/systm.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/protosw.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/rwlock.h>
#include <sys/sockio.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
#include <sys/taskqueue.h>
#include <sys/domain.h>
#include <sys/jail.h>
#include <sys/vimage.h>
#include <machine/stdarg.h>
#include <vm/uma.h>
#include <net/if.h>
#include <net/if_arp.h>
#include <net/if_clone.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/radix.h>
#include <net/route.h>
#include <net/vnet.h>
-#include <net/if_llatbl.h>
#if defined(INET) || defined(INET6)
/*XXX*/
#include <netinet/in.h>
#include <netinet/in_var.h>
#ifdef INET6
#include <netinet6/in6_var.h>
#include <netinet6/in6_ifattach.h>
#endif
#endif
#ifdef INET
#include <netinet/if_ether.h>
#include <netinet/vinet.h>
#endif
#ifdef DEV_CARP
#include <netinet/ip_carp.h>
#endif
#include <security/mac/mac_framework.h>
SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers");
SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management");
/* Log link state change events */
static int log_link_state_change = 1;
SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW,
&log_link_state_change, 0,
"log interface link state change events");
void (*bstp_linkstate_p)(struct ifnet *ifp, int state);
void (*ng_ether_link_state_p)(struct ifnet *ifp, int state);
void (*lagg_linkstate_p)(struct ifnet *ifp, int state);
struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL;
/*
* XXX: Style; these should be sorted alphabetically, and unprototyped
* static functions should be prototyped. Currently they are sorted by
* declaration order.
*/
static void if_attachdomain(void *);
static void if_attachdomain1(struct ifnet *);
static int ifconf(u_long, caddr_t);
static void if_freemulti(struct ifmultiaddr *);
static void if_grow(void);
static void if_init(void *);
static void if_qflush(struct ifnet *);
static void if_route(struct ifnet *, int flag, int fam);
static int if_setflag(struct ifnet *, int, int, int *, int);
static void if_slowtimo(void *);
static int if_transmit(struct ifnet *ifp, struct mbuf *m);
static void if_unroute(struct ifnet *, int flag, int fam);
static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *);
static int if_rtdel(struct radix_node *, void *);
static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *);
static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int);
static void if_start_deferred(void *context, int pending);
static void do_link_state_change(void *, int);
static int if_getgroup(struct ifgroupreq *, struct ifnet *);
static int if_getgroupmembers(struct ifgroupreq *);
#ifdef INET6
/*
* XXX: declare here to avoid to include many inet6 related files..
* should be more generalized?
*/
extern void nd6_setmtu(struct ifnet *);
#endif
#ifdef VIMAGE_GLOBALS
struct ifnethead ifnet; /* depend on static init XXX */
struct ifgrouphead ifg_head;
int if_index;
static int if_indexlim;
/* Table of ifnet/cdev by index. Locked with ifnet_lock. */
static struct ifindex_entry *ifindex_table;
static struct knlist ifklist;
#endif
int ifqmaxlen = IFQ_MAXLEN;
struct mtx ifnet_lock;
static if_com_alloc_t *if_com_alloc[256];
static if_com_free_t *if_com_free[256];
static void filt_netdetach(struct knote *kn);
static int filt_netdev(struct knote *kn, long hint);
static struct filterops netdev_filtops =
{ 1, NULL, filt_netdetach, filt_netdev };
/*
* System initialization
*/
SYSINIT(interfaces, SI_SUB_INIT_IF, SI_ORDER_FIRST, if_init, NULL);
SYSINIT(interface_check, SI_SUB_PROTO_IF, SI_ORDER_FIRST, if_slowtimo, NULL);
MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals");
MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address");
MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address");
struct ifnet *
ifnet_byindex(u_short idx)
{
INIT_VNET_NET(curvnet);
struct ifnet *ifp;
IFNET_RLOCK();
ifp = V_ifindex_table[idx].ife_ifnet;
IFNET_RUNLOCK();
return (ifp);
}
static void
ifnet_setbyindex(u_short idx, struct ifnet *ifp)
{
INIT_VNET_NET(curvnet);
IFNET_WLOCK_ASSERT();
V_ifindex_table[idx].ife_ifnet = ifp;
}
struct ifaddr *
ifaddr_byindex(u_short idx)
{
struct ifaddr *ifa;
IFNET_RLOCK();
ifa = ifnet_byindex(idx)->if_addr;
IFNET_RUNLOCK();
return (ifa);
}
struct cdev *
ifdev_byindex(u_short idx)
{
INIT_VNET_NET(curvnet);
struct cdev *cdev;
IFNET_RLOCK();
cdev = V_ifindex_table[idx].ife_dev;
IFNET_RUNLOCK();
return (cdev);
}
static void
ifdev_setbyindex(u_short idx, struct cdev *cdev)
{
INIT_VNET_NET(curvnet);
IFNET_WLOCK();
V_ifindex_table[idx].ife_dev = cdev;
IFNET_WUNLOCK();
}
static d_open_t netopen;
static d_close_t netclose;
static d_ioctl_t netioctl;
static d_kqfilter_t netkqfilter;
static struct cdevsw net_cdevsw = {
.d_version = D_VERSION,
.d_flags = D_NEEDGIANT,
.d_open = netopen,
.d_close = netclose,
.d_ioctl = netioctl,
.d_name = "net",
.d_kqfilter = netkqfilter,
};
static int
netopen(struct cdev *dev, int flag, int mode, struct thread *td)
{
return (0);
}
static int
netclose(struct cdev *dev, int flags, int fmt, struct thread *td)
{
return (0);
}
static int
netioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
{
struct ifnet *ifp;
int error, idx;
/* only support interface specific ioctls */
if (IOCGROUP(cmd) != 'i')
return (EOPNOTSUPP);
idx = dev2unit(dev);
if (idx == 0) {
/*
* special network device, not interface.
*/
if (cmd == SIOCGIFCONF)
return (ifconf(cmd, data)); /* XXX remove cmd */
#ifdef __amd64__
if (cmd == SIOCGIFCONF32)
return (ifconf(cmd, data)); /* XXX remove cmd */
#endif
return (EOPNOTSUPP);
}
ifp = ifnet_byindex(idx);
if (ifp == NULL)
return (ENXIO);
error = ifhwioctl(cmd, ifp, data, td);
if (error == ENOIOCTL)
error = EOPNOTSUPP;
return (error);
}
static int
netkqfilter(struct cdev *dev, struct knote *kn)
{
INIT_VNET_NET(curvnet);
struct knlist *klist;
struct ifnet *ifp;
int idx;
switch (kn->kn_filter) {
case EVFILT_NETDEV:
kn->kn_fop = &netdev_filtops;
break;
default:
return (EINVAL);
}
idx = dev2unit(dev);
if (idx == 0) {
klist = &V_ifklist;
} else {
ifp = ifnet_byindex(idx);
if (ifp == NULL)
return (1);
klist = &ifp->if_klist;
}
kn->kn_hook = (caddr_t)klist;
knlist_add(klist, kn, 0);
return (0);
}
static void
filt_netdetach(struct knote *kn)
{
struct knlist *klist = (struct knlist *)kn->kn_hook;
knlist_remove(klist, kn, 0);
}
static int
filt_netdev(struct knote *kn, long hint)
{
struct knlist *klist = (struct knlist *)kn->kn_hook;
/*
* Currently NOTE_EXIT is abused to indicate device detach.
*/
if (hint == NOTE_EXIT) {
kn->kn_data = NOTE_LINKINV;
kn->kn_flags |= (EV_EOF | EV_ONESHOT);
knlist_remove_inevent(klist, kn);
return (1);
}
if (hint != 0)
kn->kn_data = hint; /* current status */
if (kn->kn_sfflags & hint)
kn->kn_fflags |= hint;
return (kn->kn_fflags != 0);
}
/*
* Network interface utility routines.
*
* Routines with ifa_ifwith* names take sockaddr *'s as
* parameters.
*/
/* ARGSUSED*/
static void
if_init(void *dummy __unused)
{
INIT_VNET_NET(curvnet);
V_if_index = 0;
V_ifindex_table = NULL;
V_if_indexlim = 8;
IFNET_LOCK_INIT();
TAILQ_INIT(&V_ifnet);
TAILQ_INIT(&V_ifg_head);
knlist_init(&V_ifklist, NULL, NULL, NULL, NULL);
if_grow(); /* create initial table */
ifdev_setbyindex(0, make_dev(&net_cdevsw, 0, UID_ROOT, GID_WHEEL,
0600, "network"));
if_clone_init();
}
static void
if_grow(void)
{
INIT_VNET_NET(curvnet);
u_int n;
struct ifindex_entry *e;
V_if_indexlim <<= 1;
n = V_if_indexlim * sizeof(*e);
e = malloc(n, M_IFNET, M_WAITOK | M_ZERO);
if (V_ifindex_table != NULL) {
memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2);
free((caddr_t)V_ifindex_table, M_IFNET);
}
V_ifindex_table = e;
}
/*
* Allocate a struct ifnet and an index for an interface. A layer 2
* common structure will also be allocated if an allocation routine is
* registered for the passed type.
*/
struct ifnet*
if_alloc(u_char type)
{
INIT_VNET_NET(curvnet);
struct ifnet *ifp;
ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO);
/*
* Try to find an empty slot below if_index. If we fail, take
* the next slot.
*
* XXX: should be locked!
*/
for (ifp->if_index = 1; ifp->if_index <= V_if_index; ifp->if_index++) {
if (ifnet_byindex(ifp->if_index) == NULL)
break;
}
/* Catch if_index overflow. */
if (ifp->if_index < 1) {
free(ifp, M_IFNET);
return (NULL);
}
if (ifp->if_index > V_if_index)
V_if_index = ifp->if_index;
if (V_if_index >= V_if_indexlim)
if_grow();
ifp->if_type = type;
if (if_com_alloc[type] != NULL) {
ifp->if_l2com = if_com_alloc[type](type, ifp);
if (ifp->if_l2com == NULL) {
free(ifp, M_IFNET);
return (NULL);
}
}
IFNET_WLOCK();
ifnet_setbyindex(ifp->if_index, ifp);
IFNET_WUNLOCK();
IF_ADDR_LOCK_INIT(ifp);
return (ifp);
}
/*
* Free the struct ifnet, the associated index, and the layer 2 common
* structure if needed. All the work is done in if_free_type().
*
* Do not add code to this function! Add it to if_free_type().
*/
void
if_free(struct ifnet *ifp)
{
if_free_type(ifp, ifp->if_type);
}
/*
* Do the actual work of freeing a struct ifnet, associated index, and
* layer 2 common structure. This version should only be called by
* intefaces that switch their type after calling if_alloc().
*/
void
if_free_type(struct ifnet *ifp, u_char type)
{
INIT_VNET_NET(curvnet); /* ifp->if_vnet can be NULL here ! */
if (ifp != ifnet_byindex(ifp->if_index)) {
if_printf(ifp, "%s: value was not if_alloced, skipping\n",
__func__);
return;
}
IFNET_WLOCK();
ifnet_setbyindex(ifp->if_index, NULL);
/* XXX: should be locked with if_findindex() */
while (V_if_index > 0 && ifnet_byindex(V_if_index) == NULL)
V_if_index--;
IFNET_WUNLOCK();
if (if_com_free[type] != NULL)
if_com_free[type](ifp->if_l2com, type);
IF_ADDR_LOCK_DESTROY(ifp);
free(ifp, M_IFNET);
};
void
ifq_attach(struct ifaltq *ifq, struct ifnet *ifp)
{
mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF);
if (ifq->ifq_maxlen == 0)
ifq->ifq_maxlen = ifqmaxlen;
ifq->altq_type = 0;
ifq->altq_disc = NULL;
ifq->altq_flags &= ALTQF_CANTCHANGE;
ifq->altq_tbr = NULL;
ifq->altq_ifp = ifp;
}
void
ifq_detach(struct ifaltq *ifq)
{
mtx_destroy(&ifq->ifq_mtx);
}
/*
* Perform generic interface initalization tasks and attach the interface
* to the list of "active" interfaces.
*
* XXX:
* - The decision to return void and thus require this function to
* succeed is questionable.
* - We do more initialization here then is probably a good idea.
* Some of this should probably move to if_alloc().
* - We should probably do more sanity checking. For instance we don't
* do anything to insure if_xname is unique or non-empty.
*/
void
if_attach(struct ifnet *ifp)
{
INIT_VNET_NET(curvnet);
unsigned socksize, ifasize;
int namelen, masklen;
struct sockaddr_dl *sdl;
struct ifaddr *ifa;
if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index))
panic ("%s: BUG: if_attach called without if_alloc'd input()\n",
ifp->if_xname);
TASK_INIT(&ifp->if_starttask, 0, if_start_deferred, ifp);
TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp);
IF_AFDATA_LOCK_INIT(ifp);
ifp->if_afdata_initialized = 0;
TAILQ_INIT(&ifp->if_addrhead);
TAILQ_INIT(&ifp->if_prefixhead);
TAILQ_INIT(&ifp->if_multiaddrs);
TAILQ_INIT(&ifp->if_groups);
if_addgroup(ifp, IFG_ALL);
knlist_init(&ifp->if_klist, NULL, NULL, NULL, NULL);
getmicrotime(&ifp->if_lastchange);
ifp->if_data.ifi_epoch = time_uptime;
ifp->if_data.ifi_datalen = sizeof(struct if_data);
ifp->if_transmit = if_transmit;
ifp->if_qflush = if_qflush;
#ifdef MAC
mac_ifnet_init(ifp);
mac_ifnet_create(ifp);
#endif
ifdev_setbyindex(ifp->if_index, make_dev(&net_cdevsw,
ifp->if_index, UID_ROOT, GID_WHEEL, 0600, "%s/%s",
net_cdevsw.d_name, ifp->if_xname));
make_dev_alias(ifdev_byindex(ifp->if_index), "%s%d",
net_cdevsw.d_name, ifp->if_index);
ifq_attach(&ifp->if_snd, ifp);
/*
* create a Link Level name for this device
*/
namelen = strlen(ifp->if_xname);
/*
* Always save enough space for any possiable name so we can do
* a rename in place later.
*/
masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ;
socksize = masklen + ifp->if_addrlen;
if (socksize < sizeof(*sdl))
socksize = sizeof(*sdl);
socksize = roundup2(socksize, sizeof(long));
ifasize = sizeof(*ifa) + 2 * socksize;
ifa = malloc(ifasize, M_IFADDR, M_WAITOK | M_ZERO);
IFA_LOCK_INIT(ifa);
sdl = (struct sockaddr_dl *)(ifa + 1);
sdl->sdl_len = socksize;
sdl->sdl_family = AF_LINK;
bcopy(ifp->if_xname, sdl->sdl_data, namelen);
sdl->sdl_nlen = namelen;
sdl->sdl_index = ifp->if_index;
sdl->sdl_type = ifp->if_type;
ifp->if_addr = ifa;
ifa->ifa_ifp = ifp;
ifa->ifa_rtrequest = link_rtrequest;
ifa->ifa_addr = (struct sockaddr *)sdl;
sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl);
ifa->ifa_netmask = (struct sockaddr *)sdl;
sdl->sdl_len = masklen;
while (namelen != 0)
sdl->sdl_data[--namelen] = 0xff;
ifa->ifa_refcnt = 1;
TAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link);
ifp->if_broadcastaddr = NULL; /* reliably crash if used uninitialized */
IFNET_WLOCK();
TAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link);
IFNET_WUNLOCK();
if (domain_init_status >= 2)
if_attachdomain1(ifp);
EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL);
/* Announce the interface. */
rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
if (ifp->if_watchdog != NULL)
if_printf(ifp,
"WARNING: using obsoleted if_watchdog interface\n");
if (ifp->if_flags & IFF_NEEDSGIANT)
if_printf(ifp,
"WARNING: using obsoleted IFF_NEEDSGIANT flag\n");
}
static void
if_attachdomain(void *dummy)
{
INIT_VNET_NET(curvnet);
struct ifnet *ifp;
int s;
s = splnet();
TAILQ_FOREACH(ifp, &V_ifnet, if_link)
if_attachdomain1(ifp);
splx(s);
}
SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND,
if_attachdomain, NULL);
static void
if_attachdomain1(struct ifnet *ifp)
{
struct domain *dp;
int s;
s = splnet();
/*
* Since dp->dom_ifattach calls malloc() with M_WAITOK, we
* cannot lock ifp->if_afdata initialization, entirely.
*/
if (IF_AFDATA_TRYLOCK(ifp) == 0) {
splx(s);
return;
}
if (ifp->if_afdata_initialized >= domain_init_status) {
IF_AFDATA_UNLOCK(ifp);
splx(s);
printf("if_attachdomain called more than once on %s\n",
ifp->if_xname);
return;
}
ifp->if_afdata_initialized = domain_init_status;
IF_AFDATA_UNLOCK(ifp);
/* address family dependent data region */
bzero(ifp->if_afdata, sizeof(ifp->if_afdata));
for (dp = domains; dp; dp = dp->dom_next) {
if (dp->dom_ifattach)
ifp->if_afdata[dp->dom_family] =
(*dp->dom_ifattach)(ifp);
}
splx(s);
}
/*
* Remove any unicast or broadcast network addresses from an interface.
*/
void
if_purgeaddrs(struct ifnet *ifp)
{
struct ifaddr *ifa, *next;
TAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) {
if (ifa->ifa_addr->sa_family == AF_LINK)
continue;
#ifdef INET
/* XXX: Ugly!! ad hoc just for INET */
if (ifa->ifa_addr->sa_family == AF_INET) {
struct ifaliasreq ifr;
bzero(&ifr, sizeof(ifr));
ifr.ifra_addr = *ifa->ifa_addr;
if (ifa->ifa_dstaddr)
ifr.ifra_broadaddr = *ifa->ifa_dstaddr;
if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp,
NULL) == 0)
continue;
}
#endif /* INET */
#ifdef INET6
if (ifa->ifa_addr->sa_family == AF_INET6) {
in6_purgeaddr(ifa);
/* ifp_addrhead is already updated */
continue;
}
#endif /* INET6 */
TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
IFAFREE(ifa);
}
}
/*
* Remove any multicast network addresses from an interface.
*/
void
if_purgemaddrs(struct ifnet *ifp)
{
struct ifmultiaddr *ifma;
struct ifmultiaddr *next;
IF_ADDR_LOCK(ifp);
TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next)
if_delmulti_locked(ifp, ifma, 1);
IF_ADDR_UNLOCK(ifp);
}
/*
* Detach an interface, removing it from the
* list of "active" interfaces.
*
* XXXRW: There are some significant questions about event ordering, and
* how to prevent things from starting to use the interface during detach.
*/
void
if_detach(struct ifnet *ifp)
{
INIT_VNET_NET(ifp->if_vnet);
struct ifaddr *ifa;
struct radix_node_head *rnh;
int s, i, j;
struct domain *dp;
struct ifnet *iter;
int found = 0;
IFNET_WLOCK();
TAILQ_FOREACH(iter, &V_ifnet, if_link)
if (iter == ifp) {
TAILQ_REMOVE(&V_ifnet, ifp, if_link);
found = 1;
break;
}
IFNET_WUNLOCK();
if (!found)
return;
/*
* Remove/wait for pending events.
*/
taskqueue_drain(taskqueue_swi, &ifp->if_linktask);
/*
* Remove routes and flush queues.
*/
s = splnet();
if_down(ifp);
#ifdef ALTQ
if (ALTQ_IS_ENABLED(&ifp->if_snd))
altq_disable(&ifp->if_snd);
if (ALTQ_IS_ATTACHED(&ifp->if_snd))
altq_detach(&ifp->if_snd);
#endif
if_purgeaddrs(ifp);
#ifdef INET
in_ifdetach(ifp);
#endif
#ifdef INET6
/*
* Remove all IPv6 kernel structs related to ifp. This should be done
* before removing routing entries below, since IPv6 interface direct
* routes are expected to be removed by the IPv6-specific kernel API.
* Otherwise, the kernel will detect some inconsistency and bark it.
*/
in6_ifdetach(ifp);
#endif
if_purgemaddrs(ifp);
/*
* Remove link ifaddr pointer and maybe decrement if_index.
* Clean up all addresses.
*/
ifp->if_addr = NULL;
destroy_dev(ifdev_byindex(ifp->if_index));
ifdev_setbyindex(ifp->if_index, NULL);
/* We can now free link ifaddr. */
if (!TAILQ_EMPTY(&ifp->if_addrhead)) {
ifa = TAILQ_FIRST(&ifp->if_addrhead);
TAILQ_REMOVE(&ifp->if_addrhead, ifa, ifa_link);
IFAFREE(ifa);
}
/*
* Delete all remaining routes using this interface
* Unfortuneatly the only way to do this is to slog through
* the entire routing table looking for routes which point
* to this interface...oh well...
*/
for (i = 1; i <= AF_MAX; i++) {
for (j = 0; j < rt_numfibs; j++) {
if ((rnh = V_rt_tables[j][i]) == NULL)
continue;
RADIX_NODE_HEAD_LOCK(rnh);
(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);
RADIX_NODE_HEAD_UNLOCK(rnh);
}
}
/* Announce that the interface is gone. */
rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL);
IF_AFDATA_LOCK(ifp);
for (dp = domains; dp; dp = dp->dom_next) {
if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family])
(*dp->dom_ifdetach)(ifp,
ifp->if_afdata[dp->dom_family]);
}
IF_AFDATA_UNLOCK(ifp);
#ifdef MAC
mac_ifnet_destroy(ifp);
#endif /* MAC */
KNOTE_UNLOCKED(&ifp->if_klist, NOTE_EXIT);
knlist_clear(&ifp->if_klist, 0);
knlist_destroy(&ifp->if_klist);
ifq_detach(&ifp->if_snd);
IF_AFDATA_DESTROY(ifp);
splx(s);
}
/*
* Add a group to an interface
*/
int
if_addgroup(struct ifnet *ifp, const char *groupname)
{
INIT_VNET_NET(ifp->if_vnet);
struct ifg_list *ifgl;
struct ifg_group *ifg = NULL;
struct ifg_member *ifgm;
if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' &&
groupname[strlen(groupname) - 1] <= '9')
return (EINVAL);
IFNET_WLOCK();
TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) {
IFNET_WUNLOCK();
return (EEXIST);
}
if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP,
M_NOWAIT)) == NULL) {
IFNET_WUNLOCK();
return (ENOMEM);
}
if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member),
M_TEMP, M_NOWAIT)) == NULL) {
free(ifgl, M_TEMP);
IFNET_WUNLOCK();
return (ENOMEM);
}
TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
if (!strcmp(ifg->ifg_group, groupname))
break;
if (ifg == NULL) {
if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group),
M_TEMP, M_NOWAIT)) == NULL) {
free(ifgl, M_TEMP);
free(ifgm, M_TEMP);
IFNET_WUNLOCK();
return (ENOMEM);
}
strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group));
ifg->ifg_refcnt = 0;
TAILQ_INIT(&ifg->ifg_members);
EVENTHANDLER_INVOKE(group_attach_event, ifg);
TAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next);
}
ifg->ifg_refcnt++;
ifgl->ifgl_group = ifg;
ifgm->ifgm_ifp = ifp;
IF_ADDR_LOCK(ifp);
TAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next);
TAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next);
IF_ADDR_UNLOCK(ifp);
IFNET_WUNLOCK();
EVENTHANDLER_INVOKE(group_change_event, groupname);
return (0);
}
/*
* Remove a group from an interface
*/
int
if_delgroup(struct ifnet *ifp, const char *groupname)
{
INIT_VNET_NET(ifp->if_vnet);
struct ifg_list *ifgl;
struct ifg_member *ifgm;
IFNET_WLOCK();
TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
if (!strcmp(ifgl->ifgl_group->ifg_group, groupname))
break;
if (ifgl == NULL) {
IFNET_WUNLOCK();
return (ENOENT);
}
IF_ADDR_LOCK(ifp);
TAILQ_REMOVE(&ifp->if_groups, ifgl, ifgl_next);
IF_ADDR_UNLOCK(ifp);
TAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next)
if (ifgm->ifgm_ifp == ifp)
break;
if (ifgm != NULL) {
TAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifgm_next);
free(ifgm, M_TEMP);
}
if (--ifgl->ifgl_group->ifg_refcnt == 0) {
TAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_next);
EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group);
free(ifgl->ifgl_group, M_TEMP);
}
IFNET_WUNLOCK();
free(ifgl, M_TEMP);
EVENTHANDLER_INVOKE(group_change_event, groupname);
return (0);
}
/*
* Stores all groups from an interface in memory pointed
* to by data
*/
static int
if_getgroup(struct ifgroupreq *data, struct ifnet *ifp)
{
int len, error;
struct ifg_list *ifgl;
struct ifg_req ifgrq, *ifgp;
struct ifgroupreq *ifgr = data;
if (ifgr->ifgr_len == 0) {
IF_ADDR_LOCK(ifp);
TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next)
ifgr->ifgr_len += sizeof(struct ifg_req);
IF_ADDR_UNLOCK(ifp);
return (0);
}
len = ifgr->ifgr_len;
ifgp = ifgr->ifgr_groups;
/* XXX: wire */
IF_ADDR_LOCK(ifp);
TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
if (len < sizeof(ifgrq)) {
IF_ADDR_UNLOCK(ifp);
return (EINVAL);
}
bzero(&ifgrq, sizeof ifgrq);
strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group,
sizeof(ifgrq.ifgrq_group));
if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
IF_ADDR_UNLOCK(ifp);
return (error);
}
len -= sizeof(ifgrq);
ifgp++;
}
IF_ADDR_UNLOCK(ifp);
return (0);
}
/*
* Stores all members of a group in memory pointed to by data
*/
static int
if_getgroupmembers(struct ifgroupreq *data)
{
INIT_VNET_NET(curvnet);
struct ifgroupreq *ifgr = data;
struct ifg_group *ifg;
struct ifg_member *ifgm;
struct ifg_req ifgrq, *ifgp;
int len, error;
IFNET_RLOCK();
TAILQ_FOREACH(ifg, &V_ifg_head, ifg_next)
if (!strcmp(ifg->ifg_group, ifgr->ifgr_name))
break;
if (ifg == NULL) {
IFNET_RUNLOCK();
return (ENOENT);
}
if (ifgr->ifgr_len == 0) {
TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next)
ifgr->ifgr_len += sizeof(ifgrq);
IFNET_RUNLOCK();
return (0);
}
len = ifgr->ifgr_len;
ifgp = ifgr->ifgr_groups;
TAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) {
if (len < sizeof(ifgrq)) {
IFNET_RUNLOCK();
return (EINVAL);
}
bzero(&ifgrq, sizeof ifgrq);
strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname,
sizeof(ifgrq.ifgrq_member));
if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) {
IFNET_RUNLOCK();
return (error);
}
len -= sizeof(ifgrq);
ifgp++;
}
IFNET_RUNLOCK();
return (0);
}
/*
* Delete Routes for a Network Interface
*
* Called for each routing entry via the rnh->rnh_walktree() call above
* to delete all route entries referencing a detaching network interface.
*
* Arguments:
* rn pointer to node in the routing table
* arg argument passed to rnh->rnh_walktree() - detaching interface
*
* Returns:
* 0 successful
* errno failed - reason indicated
*
*/
static int
if_rtdel(struct radix_node *rn, void *arg)
{
struct rtentry *rt = (struct rtentry *)rn;
struct ifnet *ifp = arg;
int err;
if (rt->rt_ifp == ifp) {
/*
* Protect (sorta) against walktree recursion problems
* with cloned routes
*/
if ((rt->rt_flags & RTF_UP) == 0)
return (0);
err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
rt_mask(rt), rt->rt_flags,
(struct rtentry **) NULL, rt->rt_fibnum);
if (err) {
log(LOG_WARNING, "if_rtdel: error %d\n", err);
}
}
return (0);
}
/*
* XXX: Because sockaddr_dl has deeper structure than the sockaddr
* structs used to represent other address families, it is necessary
* to perform a different comparison.
*/
#define sa_equal(a1, a2) \
(bcmp((a1), (a2), ((a1))->sa_len) == 0)
#define sa_dl_equal(a1, a2) \
((((struct sockaddr_dl *)(a1))->sdl_len == \
((struct sockaddr_dl *)(a2))->sdl_len) && \
(bcmp(LLADDR((struct sockaddr_dl *)(a1)), \
LLADDR((struct sockaddr_dl *)(a2)), \
((struct sockaddr_dl *)(a1))->sdl_alen) == 0))
/*
* Locate an interface based on a complete address.
*/
/*ARGSUSED*/
struct ifaddr *
ifa_ifwithaddr(struct sockaddr *addr)
{
INIT_VNET_NET(curvnet);
struct ifnet *ifp;
struct ifaddr *ifa;
IFNET_RLOCK();
TAILQ_FOREACH(ifp, &V_ifnet, if_link)
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family)
continue;
if (sa_equal(addr, ifa->ifa_addr))
goto done;
/* IP6 doesn't have broadcast */
if ((ifp->if_flags & IFF_BROADCAST) &&
ifa->ifa_broadaddr &&
ifa->ifa_broadaddr->sa_len != 0 &&
sa_equal(ifa->ifa_broadaddr, addr))
goto done;
}
ifa = NULL;
done:
IFNET_RUNLOCK();
return (ifa);
}
/*
* Locate an interface based on the broadcast address.
*/
/* ARGSUSED */
struct ifaddr *
ifa_ifwithbroadaddr(struct sockaddr *addr)
{
INIT_VNET_NET(curvnet);
struct ifnet *ifp;
struct ifaddr *ifa;
IFNET_RLOCK();
TAILQ_FOREACH(ifp, &V_ifnet, if_link)
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family)
continue;
if ((ifp->if_flags & IFF_BROADCAST) &&
ifa->ifa_broadaddr &&
ifa->ifa_broadaddr->sa_len != 0 &&
sa_equal(ifa->ifa_broadaddr, addr))
goto done;
}
ifa = NULL;
done:
IFNET_RUNLOCK();
return (ifa);
}
/*
* Locate the point to point interface with a given destination address.
*/
/*ARGSUSED*/
struct ifaddr *
ifa_ifwithdstaddr(struct sockaddr *addr)
{
INIT_VNET_NET(curvnet);
struct ifnet *ifp;
struct ifaddr *ifa;
IFNET_RLOCK();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
continue;
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != addr->sa_family)
continue;
if (ifa->ifa_dstaddr != NULL &&
sa_equal(addr, ifa->ifa_dstaddr))
goto done;
}
}
ifa = NULL;
done:
IFNET_RUNLOCK();
return (ifa);
}
/*
* Find an interface on a specific network. If many, choice
* is most specific found.
*/
struct ifaddr *
ifa_ifwithnet(struct sockaddr *addr)
{
INIT_VNET_NET(curvnet);
struct ifnet *ifp;
struct ifaddr *ifa;
struct ifaddr *ifa_maybe = (struct ifaddr *) 0;
u_int af = addr->sa_family;
char *addr_data = addr->sa_data, *cplim;
/*
* AF_LINK addresses can be looked up directly by their index number,
* so do that if we can.
*/
if (af == AF_LINK) {
struct sockaddr_dl *sdl = (struct sockaddr_dl *)addr;
if (sdl->sdl_index && sdl->sdl_index <= V_if_index)
return (ifaddr_byindex(sdl->sdl_index));
}
/*
* Scan though each interface, looking for ones that have
* addresses in this address family.
*/
IFNET_RLOCK();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
char *cp, *cp2, *cp3;
if (ifa->ifa_addr->sa_family != af)
next: continue;
if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT) {
/*
* This is a bit broken as it doesn't
* take into account that the remote end may
* be a single node in the network we are
* looking for.
* The trouble is that we don't know the
* netmask for the remote end.
*/
if (ifa->ifa_dstaddr != NULL &&
sa_equal(addr, ifa->ifa_dstaddr))
goto done;
} else {
/*
* if we have a special address handler,
* then use it instead of the generic one.
*/
if (ifa->ifa_claim_addr) {
if ((*ifa->ifa_claim_addr)(ifa, addr))
goto done;
continue;
}
/*
* Scan all the bits in the ifa's address.
* If a bit dissagrees with what we are
* looking for, mask it with the netmask
* to see if it really matters.
* (A byte at a time)
*/
if (ifa->ifa_netmask == 0)
continue;
cp = addr_data;
cp2 = ifa->ifa_addr->sa_data;
cp3 = ifa->ifa_netmask->sa_data;
cplim = ifa->ifa_netmask->sa_len
+ (char *)ifa->ifa_netmask;
while (cp3 < cplim)
if ((*cp++ ^ *cp2++) & *cp3++)
goto next; /* next address! */
/*
* If the netmask of what we just found
* is more specific than what we had before
* (if we had one) then remember the new one
* before continuing to search
* for an even better one.
*/
if (ifa_maybe == 0 ||
rn_refines((caddr_t)ifa->ifa_netmask,
(caddr_t)ifa_maybe->ifa_netmask))
ifa_maybe = ifa;
}
}
}
ifa = ifa_maybe;
done:
IFNET_RUNLOCK();
return (ifa);
}
/*
* Find an interface address specific to an interface best matching
* a given address.
*/
struct ifaddr *
ifaof_ifpforaddr(struct sockaddr *addr, struct ifnet *ifp)
{
struct ifaddr *ifa;
char *cp, *cp2, *cp3;
char *cplim;
struct ifaddr *ifa_maybe = 0;
u_int af = addr->sa_family;
if (af >= AF_MAX)
return (0);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != af)
continue;
if (ifa_maybe == 0)
ifa_maybe = ifa;
if (ifa->ifa_netmask == 0) {
if (sa_equal(addr, ifa->ifa_addr) ||
(ifa->ifa_dstaddr &&
sa_equal(addr, ifa->ifa_dstaddr)))
goto done;
continue;
}
if (ifp->if_flags & IFF_POINTOPOINT) {
if (sa_equal(addr, ifa->ifa_dstaddr))
goto done;
} else {
cp = addr->sa_data;
cp2 = ifa->ifa_addr->sa_data;
cp3 = ifa->ifa_netmask->sa_data;
cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask;
for (; cp3 < cplim; cp3++)
if ((*cp++ ^ *cp2++) & *cp3)
break;
if (cp3 == cplim)
goto done;
}
}
ifa = ifa_maybe;
done:
return (ifa);
}
+
+#include <net/route.h>
+#include <net/if_llatbl.h>
/*
* Default action when installing a route with a Link Level gateway.
* Lookup an appropriate real ifa to point to.
* This should be moved to /sys/net/link.c eventually.
*/
static void
link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info)
{
struct ifaddr *ifa, *oifa;
struct sockaddr *dst;
struct ifnet *ifp;
RT_LOCK_ASSERT(rt);
if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == 0) ||
((ifp = ifa->ifa_ifp) == 0) || ((dst = rt_key(rt)) == 0))
return;
ifa = ifaof_ifpforaddr(dst, ifp);
if (ifa) {
IFAREF(ifa); /* XXX */
oifa = rt->rt_ifa;
rt->rt_ifa = ifa;
IFAFREE(oifa);
if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest)
ifa->ifa_rtrequest(cmd, rt, info);
}
}
/*
* Mark an interface down and notify protocols of
* the transition.
* NOTE: must be called at splnet or eqivalent.
*/
static void
if_unroute(struct ifnet *ifp, int flag, int fam)
{
struct ifaddr *ifa;
KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP"));
ifp->if_flags &= ~flag;
getmicrotime(&ifp->if_lastchange);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
pfctlinput(PRC_IFDOWN, ifa->ifa_addr);
ifp->if_qflush(ifp);
#ifdef DEV_CARP
if (ifp->if_carp)
carp_carpdev_state(ifp->if_carp);
#endif
rt_ifmsg(ifp);
}
/*
* Mark an interface up and notify protocols of
* the transition.
* NOTE: must be called at splnet or eqivalent.
*/
static void
if_route(struct ifnet *ifp, int flag, int fam)
{
struct ifaddr *ifa;
KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP"));
ifp->if_flags |= flag;
getmicrotime(&ifp->if_lastchange);
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family))
pfctlinput(PRC_IFUP, ifa->ifa_addr);
#ifdef DEV_CARP
if (ifp->if_carp)
carp_carpdev_state(ifp->if_carp);
#endif
rt_ifmsg(ifp);
#ifdef INET6
in6_if_up(ifp);
#endif
}
void (*vlan_link_state_p)(struct ifnet *, int); /* XXX: private from if_vlan */
void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */
/*
* Handle a change in the interface link state. To avoid LORs
* between driver lock and upper layer locks, as well as possible
* recursions, we post event to taskqueue, and all job
* is done in static do_link_state_change().
*/
void
if_link_state_change(struct ifnet *ifp, int link_state)
{
/* Return if state hasn't changed. */
if (ifp->if_link_state == link_state)
return;
ifp->if_link_state = link_state;
taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask);
}
static void
do_link_state_change(void *arg, int pending)
{
struct ifnet *ifp = (struct ifnet *)arg;
int link_state = ifp->if_link_state;
int link;
CURVNET_SET(ifp->if_vnet);
/* Notify that the link state has changed. */
rt_ifmsg(ifp);
if (link_state == LINK_STATE_UP)
link = NOTE_LINKUP;
else if (link_state == LINK_STATE_DOWN)
link = NOTE_LINKDOWN;
else
link = NOTE_LINKINV;
KNOTE_UNLOCKED(&ifp->if_klist, link);
if (ifp->if_vlantrunk != NULL)
(*vlan_link_state_p)(ifp, link);
if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) &&
IFP2AC(ifp)->ac_netgraph != NULL)
(*ng_ether_link_state_p)(ifp, link_state);
#ifdef DEV_CARP
if (ifp->if_carp)
carp_carpdev_state(ifp->if_carp);
#endif
if (ifp->if_bridge) {
KASSERT(bstp_linkstate_p != NULL,("if_bridge bstp not loaded!"));
(*bstp_linkstate_p)(ifp, link_state);
}
if (ifp->if_lagg) {
KASSERT(lagg_linkstate_p != NULL,("if_lagg not loaded!"));
(*lagg_linkstate_p)(ifp, link_state);
}
devctl_notify("IFNET", ifp->if_xname,
(link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL);
if (pending > 1)
if_printf(ifp, "%d link states coalesced\n", pending);
if (log_link_state_change)
log(LOG_NOTICE, "%s: link state changed to %s\n", ifp->if_xname,
(link_state == LINK_STATE_UP) ? "UP" : "DOWN" );
CURVNET_RESTORE();
}
/*
* Mark an interface down and notify protocols of
* the transition.
* NOTE: must be called at splnet or eqivalent.
*/
void
if_down(struct ifnet *ifp)
{
if_unroute(ifp, IFF_UP, AF_UNSPEC);
}
/*
* Mark an interface up and notify protocols of
* the transition.
* NOTE: must be called at splnet or eqivalent.
*/
void
if_up(struct ifnet *ifp)
{
if_route(ifp, IFF_UP, AF_UNSPEC);
}
/*
* Flush an interface queue.
*/
static void
if_qflush(struct ifnet *ifp)
{
struct mbuf *m, *n;
struct ifaltq *ifq;
ifq = &ifp->if_snd;
IFQ_LOCK(ifq);
#ifdef ALTQ
if (ALTQ_IS_ENABLED(ifq))
ALTQ_PURGE(ifq);
#endif
n = ifq->ifq_head;
while ((m = n) != 0) {
n = m->m_act;
m_freem(m);
}
ifq->ifq_head = 0;
ifq->ifq_tail = 0;
ifq->ifq_len = 0;
IFQ_UNLOCK(ifq);
}
/*
* Handle interface watchdog timer routines. Called
* from softclock, we decrement timers (if set) and
* call the appropriate interface routine on expiration.
*
* XXXRW: Note that because timeouts run with Giant, if_watchdog() is called
* holding Giant. If we switch to an MPSAFE callout, we likely need to grab
* Giant before entering if_watchdog() on an IFF_NEEDSGIANT interface.
*/
static void
if_slowtimo(void *arg)
{
VNET_ITERATOR_DECL(vnet_iter);
struct ifnet *ifp;
int s = splimp();
IFNET_RLOCK();
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
INIT_VNET_NET(vnet_iter);
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (ifp->if_timer == 0 || --ifp->if_timer)
continue;
if (ifp->if_watchdog)
(*ifp->if_watchdog)(ifp);
}
CURVNET_RESTORE();
}
VNET_LIST_RUNLOCK();
IFNET_RUNLOCK();
splx(s);
timeout(if_slowtimo, (void *)0, hz / IFNET_SLOWHZ);
}
/*
* Map interface name to
* interface structure pointer.
*/
struct ifnet *
ifunit(const char *name)
{
INIT_VNET_NET(curvnet);
struct ifnet *ifp;
IFNET_RLOCK();
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0)
break;
}
IFNET_RUNLOCK();
return (ifp);
}
/*
* Hardware specific interface ioctls.
*/
static int
ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td)
{
struct ifreq *ifr;
struct ifstat *ifs;
int error = 0;
int new_flags, temp_flags;
size_t namelen, onamelen;
char new_name[IFNAMSIZ];
struct ifaddr *ifa;
struct sockaddr_dl *sdl;
ifr = (struct ifreq *)data;
switch (cmd) {
case SIOCGIFINDEX:
ifr->ifr_index = ifp->if_index;
break;
case SIOCGIFFLAGS:
temp_flags = ifp->if_flags | ifp->if_drv_flags;
ifr->ifr_flags = temp_flags & 0xffff;
ifr->ifr_flagshigh = temp_flags >> 16;
break;
case SIOCGIFCAP:
ifr->ifr_reqcap = ifp->if_capabilities;
ifr->ifr_curcap = ifp->if_capenable;
break;
#ifdef MAC
case SIOCGIFMAC:
error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp);
break;
#endif
case SIOCGIFMETRIC:
ifr->ifr_metric = ifp->if_metric;
break;
case SIOCGIFMTU:
ifr->ifr_mtu = ifp->if_mtu;
break;
case SIOCGIFPHYS:
ifr->ifr_phys = ifp->if_physical;
break;
case SIOCSIFFLAGS:
error = priv_check(td, PRIV_NET_SETIFFLAGS);
if (error)
return (error);
/*
* Currently, no driver owned flags pass the IFF_CANTCHANGE
* check, so we don't need special handling here yet.
*/
new_flags = (ifr->ifr_flags & 0xffff) |
(ifr->ifr_flagshigh << 16);
if (ifp->if_flags & IFF_SMART) {
/* Smart drivers twiddle their own routes */
} else if (ifp->if_flags & IFF_UP &&
(new_flags & IFF_UP) == 0) {
int s = splimp();
if_down(ifp);
splx(s);
} else if (new_flags & IFF_UP &&
(ifp->if_flags & IFF_UP) == 0) {
int s = splimp();
if_up(ifp);
splx(s);
}
/* See if permanently promiscuous mode bit is about to flip */
if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) {
if (new_flags & IFF_PPROMISC)
ifp->if_flags |= IFF_PROMISC;
else if (ifp->if_pcount == 0)
ifp->if_flags &= ~IFF_PROMISC;
log(LOG_INFO, "%s: permanently promiscuous mode %s\n",
ifp->if_xname,
(new_flags & IFF_PPROMISC) ? "enabled" : "disabled");
}
ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) |
(new_flags &~ IFF_CANTCHANGE);
if (ifp->if_ioctl) {
IFF_LOCKGIANT(ifp);
(void) (*ifp->if_ioctl)(ifp, cmd, data);
IFF_UNLOCKGIANT(ifp);
}
getmicrotime(&ifp->if_lastchange);
break;
case SIOCSIFCAP:
error = priv_check(td, PRIV_NET_SETIFCAP);
if (error)
return (error);
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
if (ifr->ifr_reqcap & ~ifp->if_capabilities)
return (EINVAL);
IFF_LOCKGIANT(ifp);
error = (*ifp->if_ioctl)(ifp, cmd, data);
IFF_UNLOCKGIANT(ifp);
if (error == 0)
getmicrotime(&ifp->if_lastchange);
break;
#ifdef MAC
case SIOCSIFMAC:
error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp);
break;
#endif
case SIOCSIFNAME:
error = priv_check(td, PRIV_NET_SETIFNAME);
if (error)
return (error);
error = copyinstr(ifr->ifr_data, new_name, IFNAMSIZ, NULL);
if (error != 0)
return (error);
if (new_name[0] == '\0')
return (EINVAL);
if (ifunit(new_name) != NULL)
return (EEXIST);
/* Announce the departure of the interface. */
rt_ifannouncemsg(ifp, IFAN_DEPARTURE);
EVENTHANDLER_INVOKE(ifnet_departure_event, ifp);
log(LOG_INFO, "%s: changing name to '%s'\n",
ifp->if_xname, new_name);
strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname));
ifa = ifp->if_addr;
IFA_LOCK(ifa);
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
namelen = strlen(new_name);
onamelen = sdl->sdl_nlen;
/*
* Move the address if needed. This is safe because we
* allocate space for a name of length IFNAMSIZ when we
* create this in if_attach().
*/
if (namelen != onamelen) {
bcopy(sdl->sdl_data + onamelen,
sdl->sdl_data + namelen, sdl->sdl_alen);
}
bcopy(new_name, sdl->sdl_data, namelen);
sdl->sdl_nlen = namelen;
sdl = (struct sockaddr_dl *)ifa->ifa_netmask;
bzero(sdl->sdl_data, onamelen);
while (namelen != 0)
sdl->sdl_data[--namelen] = 0xff;
IFA_UNLOCK(ifa);
EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp);
/* Announce the return of the interface. */
rt_ifannouncemsg(ifp, IFAN_ARRIVAL);
break;
case SIOCSIFMETRIC:
error = priv_check(td, PRIV_NET_SETIFMETRIC);
if (error)
return (error);
ifp->if_metric = ifr->ifr_metric;
getmicrotime(&ifp->if_lastchange);
break;
case SIOCSIFPHYS:
error = priv_check(td, PRIV_NET_SETIFPHYS);
if (error)
return (error);
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
IFF_LOCKGIANT(ifp);
error = (*ifp->if_ioctl)(ifp, cmd, data);
IFF_UNLOCKGIANT(ifp);
if (error == 0)
getmicrotime(&ifp->if_lastchange);
break;
case SIOCSIFMTU:
{
u_long oldmtu = ifp->if_mtu;
error = priv_check(td, PRIV_NET_SETIFMTU);
if (error)
return (error);
if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU)
return (EINVAL);
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
IFF_LOCKGIANT(ifp);
error = (*ifp->if_ioctl)(ifp, cmd, data);
IFF_UNLOCKGIANT(ifp);
if (error == 0) {
getmicrotime(&ifp->if_lastchange);
rt_ifmsg(ifp);
}
/*
* If the link MTU changed, do network layer specific procedure.
*/
if (ifp->if_mtu != oldmtu) {
#ifdef INET6
nd6_setmtu(ifp);
#endif
}
break;
}
case SIOCADDMULTI:
case SIOCDELMULTI:
if (cmd == SIOCADDMULTI)
error = priv_check(td, PRIV_NET_ADDMULTI);
else
error = priv_check(td, PRIV_NET_DELMULTI);
if (error)
return (error);
/* Don't allow group membership on non-multicast interfaces. */
if ((ifp->if_flags & IFF_MULTICAST) == 0)
return (EOPNOTSUPP);
/* Don't let users screw up protocols' entries. */
if (ifr->ifr_addr.sa_family != AF_LINK)
return (EINVAL);
if (cmd == SIOCADDMULTI) {
struct ifmultiaddr *ifma;
/*
* Userland is only permitted to join groups once
* via the if_addmulti() KPI, because it cannot hold
* struct ifmultiaddr * between calls. It may also
* lose a race while we check if the membership
* already exists.
*/
IF_ADDR_LOCK(ifp);
ifma = if_findmulti(ifp, &ifr->ifr_addr);
IF_ADDR_UNLOCK(ifp);
if (ifma != NULL)
error = EADDRINUSE;
else
error = if_addmulti(ifp, &ifr->ifr_addr, &ifma);
} else {
error = if_delmulti(ifp, &ifr->ifr_addr);
}
if (error == 0)
getmicrotime(&ifp->if_lastchange);
break;
case SIOCSIFPHYADDR:
case SIOCDIFPHYADDR:
#ifdef INET6
case SIOCSIFPHYADDR_IN6:
#endif
case SIOCSLIFPHYADDR:
case SIOCSIFMEDIA:
case SIOCSIFGENERIC:
error = priv_check(td, PRIV_NET_HWIOCTL);
if (error)
return (error);
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
IFF_LOCKGIANT(ifp);
error = (*ifp->if_ioctl)(ifp, cmd, data);
IFF_UNLOCKGIANT(ifp);
if (error == 0)
getmicrotime(&ifp->if_lastchange);
break;
case SIOCGIFSTATUS:
ifs = (struct ifstat *)data;
ifs->ascii[0] = '\0';
case SIOCGIFPSRCADDR:
case SIOCGIFPDSTADDR:
case SIOCGLIFPHYADDR:
case SIOCGIFMEDIA:
case SIOCGIFGENERIC:
if (ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
IFF_LOCKGIANT(ifp);
error = (*ifp->if_ioctl)(ifp, cmd, data);
IFF_UNLOCKGIANT(ifp);
break;
case SIOCSIFLLADDR:
error = priv_check(td, PRIV_NET_SETLLADDR);
if (error)
return (error);
error = if_setlladdr(ifp,
ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len);
break;
case SIOCAIFGROUP:
{
struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
error = priv_check(td, PRIV_NET_ADDIFGROUP);
if (error)
return (error);
if ((error = if_addgroup(ifp, ifgr->ifgr_group)))
return (error);
break;
}
case SIOCGIFGROUP:
if ((error = if_getgroup((struct ifgroupreq *)ifr, ifp)))
return (error);
break;
case SIOCDIFGROUP:
{
struct ifgroupreq *ifgr = (struct ifgroupreq *)ifr;
error = priv_check(td, PRIV_NET_DELIFGROUP);
if (error)
return (error);
if ((error = if_delgroup(ifp, ifgr->ifgr_group)))
return (error);
break;
}
default:
error = ENOIOCTL;
break;
}
return (error);
}
/*
* Interface ioctls.
*/
int
ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td)
{
struct ifnet *ifp;
struct ifreq *ifr;
int error;
int oif_flags;
switch (cmd) {
case SIOCGIFCONF:
case OSIOCGIFCONF:
#ifdef __amd64__
case SIOCGIFCONF32:
#endif
return (ifconf(cmd, data));
}
ifr = (struct ifreq *)data;
switch (cmd) {
case SIOCIFCREATE:
case SIOCIFCREATE2:
error = priv_check(td, PRIV_NET_IFCREATE);
if (error)
return (error);
return (if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name),
cmd == SIOCIFCREATE2 ? ifr->ifr_data : NULL));
case SIOCIFDESTROY:
error = priv_check(td, PRIV_NET_IFDESTROY);
if (error)
return (error);
return if_clone_destroy(ifr->ifr_name);
case SIOCIFGCLONERS:
return (if_clone_list((struct if_clonereq *)data));
case SIOCGIFGMEMB:
return (if_getgroupmembers((struct ifgroupreq *)data));
}
ifp = ifunit(ifr->ifr_name);
if (ifp == 0)
return (ENXIO);
error = ifhwioctl(cmd, ifp, data, td);
if (error != ENOIOCTL)
return (error);
oif_flags = ifp->if_flags;
if (so->so_proto == 0)
return (EOPNOTSUPP);
#ifndef COMPAT_43
error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd,
data,
ifp, td));
#else
{
int ocmd = cmd;
switch (cmd) {
case SIOCSIFDSTADDR:
case SIOCSIFADDR:
case SIOCSIFBRDADDR:
case SIOCSIFNETMASK:
#if BYTE_ORDER != BIG_ENDIAN
if (ifr->ifr_addr.sa_family == 0 &&
ifr->ifr_addr.sa_len < 16) {
ifr->ifr_addr.sa_family = ifr->ifr_addr.sa_len;
ifr->ifr_addr.sa_len = 16;
}
#else
if (ifr->ifr_addr.sa_len == 0)
ifr->ifr_addr.sa_len = 16;
#endif
break;
case OSIOCGIFADDR:
cmd = SIOCGIFADDR;
break;
case OSIOCGIFDSTADDR:
cmd = SIOCGIFDSTADDR;
break;
case OSIOCGIFBRDADDR:
cmd = SIOCGIFBRDADDR;
break;
case OSIOCGIFNETMASK:
cmd = SIOCGIFNETMASK;
}
error = ((*so->so_proto->pr_usrreqs->pru_control)(so,
cmd,
data,
ifp, td));
switch (ocmd) {
case OSIOCGIFADDR:
case OSIOCGIFDSTADDR:
case OSIOCGIFBRDADDR:
case OSIOCGIFNETMASK:
*(u_short *)&ifr->ifr_addr = ifr->ifr_addr.sa_family;
}
}
#endif /* COMPAT_43 */
if ((oif_flags ^ ifp->if_flags) & IFF_UP) {
#ifdef INET6
DELAY(100);/* XXX: temporary workaround for fxp issue*/
if (ifp->if_flags & IFF_UP) {
int s = splimp();
in6_if_up(ifp);
splx(s);
}
#endif
}
return (error);
}
/*
* The code common to handling reference counted flags,
* e.g., in ifpromisc() and if_allmulti().
* The "pflag" argument can specify a permanent mode flag to check,
* such as IFF_PPROMISC for promiscuous mode; should be 0 if none.
*
* Only to be used on stack-owned flags, not driver-owned flags.
*/
static int
if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch)
{
struct ifreq ifr;
int error;
int oldflags, oldcount;
/* Sanity checks to catch programming errors */
KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0,
("%s: setting driver-owned flag %d", __func__, flag));
if (onswitch)
KASSERT(*refcount >= 0,
("%s: increment negative refcount %d for flag %d",
__func__, *refcount, flag));
else
KASSERT(*refcount > 0,
("%s: decrement non-positive refcount %d for flag %d",
__func__, *refcount, flag));
/* In case this mode is permanent, just touch refcount */
if (ifp->if_flags & pflag) {
*refcount += onswitch ? 1 : -1;
return (0);
}
/* Save ifnet parameters for if_ioctl() may fail */
oldcount = *refcount;
oldflags = ifp->if_flags;
/*
* See if we aren't the only and touching refcount is enough.
* Actually toggle interface flag if we are the first or last.
*/
if (onswitch) {
if ((*refcount)++)
return (0);
ifp->if_flags |= flag;
} else {
if (--(*refcount))
return (0);
ifp->if_flags &= ~flag;
}
/* Call down the driver since we've changed interface flags */
if (ifp->if_ioctl == NULL) {
error = EOPNOTSUPP;
goto recover;
}
ifr.ifr_flags = ifp->if_flags & 0xffff;
ifr.ifr_flagshigh = ifp->if_flags >> 16;
IFF_LOCKGIANT(ifp);
error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
IFF_UNLOCKGIANT(ifp);
if (error)
goto recover;
/* Notify userland that interface flags have changed */
rt_ifmsg(ifp);
return (0);
recover:
/* Recover after driver error */
*refcount = oldcount;
ifp->if_flags = oldflags;
return (error);
}
/*
* Set/clear promiscuous mode on interface ifp based on the truth value
* of pswitch. The calls are reference counted so that only the first
* "on" request actually has an effect, as does the final "off" request.
* Results are undefined if the "off" and "on" requests are not matched.
*/
int
ifpromisc(struct ifnet *ifp, int pswitch)
{
int error;
int oldflags = ifp->if_flags;
error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC,
&ifp->if_pcount, pswitch);
/* If promiscuous mode status has changed, log a message */
if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC))
log(LOG_INFO, "%s: promiscuous mode %s\n",
ifp->if_xname,
(ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled");
return (error);
}
/*
* Return interface configuration
* of system. List may be used
* in later ioctl's (above) to get
* other information.
*/
/*ARGSUSED*/
static int
ifconf(u_long cmd, caddr_t data)
{
INIT_VNET_NET(curvnet);
struct ifconf *ifc = (struct ifconf *)data;
#ifdef __amd64__
struct ifconf32 *ifc32 = (struct ifconf32 *)data;
struct ifconf ifc_swab;
#endif
struct ifnet *ifp;
struct ifaddr *ifa;
struct ifreq ifr;
struct sbuf *sb;
int error, full = 0, valid_len, max_len;
#ifdef __amd64__
if (cmd == SIOCGIFCONF32) {
ifc_swab.ifc_len = ifc32->ifc_len;
ifc_swab.ifc_buf = (caddr_t)(uintptr_t)ifc32->ifc_buf;
ifc = &ifc_swab;
}
#endif
/* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */
max_len = MAXPHYS - 1;
/* Prevent hostile input from being able to crash the system */
if (ifc->ifc_len <= 0)
return (EINVAL);
again:
if (ifc->ifc_len <= max_len) {
max_len = ifc->ifc_len;
full = 1;
}
sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN);
max_len = 0;
valid_len = 0;
IFNET_RLOCK(); /* could sleep XXX */
TAILQ_FOREACH(ifp, &V_ifnet, if_link) {
int addrs;
/*
* Zero the ifr_name buffer to make sure we don't
* disclose the contents of the stack.
*/
memset(ifr.ifr_name, 0, sizeof(ifr.ifr_name));
if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name))
>= sizeof(ifr.ifr_name)) {
sbuf_delete(sb);
IFNET_RUNLOCK();
return (ENAMETOOLONG);
}
addrs = 0;
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
struct sockaddr *sa = ifa->ifa_addr;
if (jailed(curthread->td_ucred) &&
!prison_if(curthread->td_ucred, sa))
continue;
addrs++;
#ifdef COMPAT_43
if (cmd == OSIOCGIFCONF) {
struct osockaddr *osa =
(struct osockaddr *)&ifr.ifr_addr;
ifr.ifr_addr = *sa;
osa->sa_family = sa->sa_family;
sbuf_bcat(sb, &ifr, sizeof(ifr));
max_len += sizeof(ifr);
} else
#endif
if (sa->sa_len <= sizeof(*sa)) {
ifr.ifr_addr = *sa;
sbuf_bcat(sb, &ifr, sizeof(ifr));
max_len += sizeof(ifr);
} else {
sbuf_bcat(sb, &ifr,
offsetof(struct ifreq, ifr_addr));
max_len += offsetof(struct ifreq, ifr_addr);
sbuf_bcat(sb, sa, sa->sa_len);
max_len += sa->sa_len;
}
if (!sbuf_overflowed(sb))
valid_len = sbuf_len(sb);
}
if (addrs == 0) {
bzero((caddr_t)&ifr.ifr_addr, sizeof(ifr.ifr_addr));
sbuf_bcat(sb, &ifr, sizeof(ifr));
max_len += sizeof(ifr);
if (!sbuf_overflowed(sb))
valid_len = sbuf_len(sb);
}
}
IFNET_RUNLOCK();
/*
* If we didn't allocate enough space (uncommon), try again. If
* we have already allocated as much space as we are allowed,
* return what we've got.
*/
if (valid_len != max_len && !full) {
sbuf_delete(sb);
goto again;
}
ifc->ifc_len = valid_len;
#ifdef __amd64__
if (cmd == SIOCGIFCONF32)
ifc32->ifc_len = valid_len;
#endif
sbuf_finish(sb);
error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len);
sbuf_delete(sb);
return (error);
}
/*
* Just like ifpromisc(), but for all-multicast-reception mode.
*/
int
if_allmulti(struct ifnet *ifp, int onswitch)
{
return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch));
}
struct ifmultiaddr *
if_findmulti(struct ifnet *ifp, struct sockaddr *sa)
{
struct ifmultiaddr *ifma;
IF_ADDR_LOCK_ASSERT(ifp);
TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
if (sa->sa_family == AF_LINK) {
if (sa_dl_equal(ifma->ifma_addr, sa))
break;
} else {
if (sa_equal(ifma->ifma_addr, sa))
break;
}
}
return ifma;
}
/*
* Allocate a new ifmultiaddr and initialize based on passed arguments. We
* make copies of passed sockaddrs. The ifmultiaddr will not be added to
* the ifnet multicast address list here, so the caller must do that and
* other setup work (such as notifying the device driver). The reference
* count is initialized to 1.
*/
static struct ifmultiaddr *
if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa,
int mflags)
{
struct ifmultiaddr *ifma;
struct sockaddr *dupsa;
ifma = malloc(sizeof *ifma, M_IFMADDR, mflags |
M_ZERO);
if (ifma == NULL)
return (NULL);
dupsa = malloc(sa->sa_len, M_IFMADDR, mflags);
if (dupsa == NULL) {
free(ifma, M_IFMADDR);
return (NULL);
}
bcopy(sa, dupsa, sa->sa_len);
ifma->ifma_addr = dupsa;
ifma->ifma_ifp = ifp;
ifma->ifma_refcount = 1;
ifma->ifma_protospec = NULL;
if (llsa == NULL) {
ifma->ifma_lladdr = NULL;
return (ifma);
}
dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags);
if (dupsa == NULL) {
free(ifma->ifma_addr, M_IFMADDR);
free(ifma, M_IFMADDR);
return (NULL);
}
bcopy(llsa, dupsa, llsa->sa_len);
ifma->ifma_lladdr = dupsa;
return (ifma);
}
/*
* if_freemulti: free ifmultiaddr structure and possibly attached related
* addresses. The caller is responsible for implementing reference
* counting, notifying the driver, handling routing messages, and releasing
* any dependent link layer state.
*/
static void
if_freemulti(struct ifmultiaddr *ifma)
{
KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d",
ifma->ifma_refcount));
KASSERT(ifma->ifma_protospec == NULL,
("if_freemulti: protospec not NULL"));
if (ifma->ifma_lladdr != NULL)
free(ifma->ifma_lladdr, M_IFMADDR);
free(ifma->ifma_addr, M_IFMADDR);
free(ifma, M_IFMADDR);
}
/*
* Register an additional multicast address with a network interface.
*
* - If the address is already present, bump the reference count on the
* address and return.
* - If the address is not link-layer, look up a link layer address.
* - Allocate address structures for one or both addresses, and attach to the
* multicast address list on the interface. If automatically adding a link
* layer address, the protocol address will own a reference to the link
* layer address, to be freed when it is freed.
* - Notify the network device driver of an addition to the multicast address
* list.
*
* 'sa' points to caller-owned memory with the desired multicast address.
*
* 'retifma' will be used to return a pointer to the resulting multicast
* address reference, if desired.
*/
int
if_addmulti(struct ifnet *ifp, struct sockaddr *sa,
struct ifmultiaddr **retifma)
{
struct ifmultiaddr *ifma, *ll_ifma;
struct sockaddr *llsa;
int error;
/*
* If the address is already present, return a new reference to it;
* otherwise, allocate storage and set up a new address.
*/
IF_ADDR_LOCK(ifp);
ifma = if_findmulti(ifp, sa);
if (ifma != NULL) {
ifma->ifma_refcount++;
if (retifma != NULL)
*retifma = ifma;
IF_ADDR_UNLOCK(ifp);
return (0);
}
/*
* The address isn't already present; resolve the protocol address
* into a link layer address, and then look that up, bump its
* refcount or allocate an ifma for that also. If 'llsa' was
* returned, we will need to free it later.
*/
llsa = NULL;
ll_ifma = NULL;
if (ifp->if_resolvemulti != NULL) {
error = ifp->if_resolvemulti(ifp, &llsa, sa);
if (error)
goto unlock_out;
}
/*
* Allocate the new address. Don't hook it up yet, as we may also
* need to allocate a link layer multicast address.
*/
ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT);
if (ifma == NULL) {
error = ENOMEM;
goto free_llsa_out;
}
/*
* If a link layer address is found, we'll need to see if it's
* already present in the address list, or allocate is as well.
* When this block finishes, the link layer address will be on the
* list.
*/
if (llsa != NULL) {
ll_ifma = if_findmulti(ifp, llsa);
if (ll_ifma == NULL) {
ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT);
if (ll_ifma == NULL) {
--ifma->ifma_refcount;
if_freemulti(ifma);
error = ENOMEM;
goto free_llsa_out;
}
TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma,
ifma_link);
} else
ll_ifma->ifma_refcount++;
ifma->ifma_llifma = ll_ifma;
}
/*
* We now have a new multicast address, ifma, and possibly a new or
* referenced link layer address. Add the primary address to the
* ifnet address list.
*/
TAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link);
if (retifma != NULL)
*retifma = ifma;
/*
* Must generate the message while holding the lock so that 'ifma'
* pointer is still valid.
*/
rt_newmaddrmsg(RTM_NEWMADDR, ifma);
IF_ADDR_UNLOCK(ifp);
/*
* We are certain we have added something, so call down to the
* interface to let them know about it.
*/
if (ifp->if_ioctl != NULL) {
IFF_LOCKGIANT(ifp);
(void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0);
IFF_UNLOCKGIANT(ifp);
}
if (llsa != NULL)
free(llsa, M_IFMADDR);
return (0);
free_llsa_out:
if (llsa != NULL)
free(llsa, M_IFMADDR);
unlock_out:
IF_ADDR_UNLOCK(ifp);
return (error);
}
/*
* Delete a multicast group membership by network-layer group address.
*
* Returns ENOENT if the entry could not be found. If ifp no longer
* exists, results are undefined. This entry point should only be used
* from subsystems which do appropriate locking to hold ifp for the
* duration of the call.
* Network-layer protocol domains must use if_delmulti_ifma().
*/
int
if_delmulti(struct ifnet *ifp, struct sockaddr *sa)
{
struct ifmultiaddr *ifma;
int lastref;
#ifdef INVARIANTS
struct ifnet *oifp;
INIT_VNET_NET(ifp->if_vnet);
IFNET_RLOCK();
TAILQ_FOREACH(oifp, &V_ifnet, if_link)
if (ifp == oifp)
break;
if (ifp != oifp)
ifp = NULL;
IFNET_RUNLOCK();
KASSERT(ifp != NULL, ("%s: ifnet went away", __func__));
#endif
if (ifp == NULL)
return (ENOENT);
IF_ADDR_LOCK(ifp);
lastref = 0;
ifma = if_findmulti(ifp, sa);
if (ifma != NULL)
lastref = if_delmulti_locked(ifp, ifma, 0);
IF_ADDR_UNLOCK(ifp);
if (ifma == NULL)
return (ENOENT);
if (lastref && ifp->if_ioctl != NULL) {
IFF_LOCKGIANT(ifp);
(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
IFF_UNLOCKGIANT(ifp);
}
return (0);
}
/*
* Delete a multicast group membership by group membership pointer.
* Network-layer protocol domains must use this routine.
*
* It is safe to call this routine if the ifp disappeared. Callers should
* hold IFF_LOCKGIANT() to avoid a LOR in case the hardware needs to be
* reconfigured.
*/
void
if_delmulti_ifma(struct ifmultiaddr *ifma)
{
#ifdef DIAGNOSTIC
INIT_VNET_NET(curvnet);
#endif
struct ifnet *ifp;
int lastref;
ifp = ifma->ifma_ifp;
#ifdef DIAGNOSTIC
if (ifp == NULL) {
printf("%s: ifma_ifp seems to be detached\n", __func__);
} else {
struct ifnet *oifp;
IFNET_RLOCK();
TAILQ_FOREACH(oifp, &V_ifnet, if_link)
if (ifp == oifp)
break;
if (ifp != oifp) {
printf("%s: ifnet %p disappeared\n", __func__, ifp);
ifp = NULL;
}
IFNET_RUNLOCK();
}
#endif
/*
* If and only if the ifnet instance exists: Acquire the address lock.
*/
if (ifp != NULL)
IF_ADDR_LOCK(ifp);
lastref = if_delmulti_locked(ifp, ifma, 0);
if (ifp != NULL) {
/*
* If and only if the ifnet instance exists:
* Release the address lock.
* If the group was left: update the hardware hash filter.
*/
IF_ADDR_UNLOCK(ifp);
if (lastref && ifp->if_ioctl != NULL) {
IFF_LOCKGIANT(ifp);
(void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0);
IFF_UNLOCKGIANT(ifp);
}
}
}
/*
* Perform deletion of network-layer and/or link-layer multicast address.
*
* Return 0 if the reference count was decremented.
* Return 1 if the final reference was released, indicating that the
* hardware hash filter should be reprogrammed.
*/
static int
if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching)
{
struct ifmultiaddr *ll_ifma;
if (ifp != NULL && ifma->ifma_ifp != NULL) {
KASSERT(ifma->ifma_ifp == ifp,
("%s: inconsistent ifp %p", __func__, ifp));
IF_ADDR_LOCK_ASSERT(ifp);
}
ifp = ifma->ifma_ifp;
/*
* If the ifnet is detaching, null out references to ifnet,
* so that upper protocol layers will notice, and not attempt
* to obtain locks for an ifnet which no longer exists. The
* routing socket announcement must happen before the ifnet
* instance is detached from the system.
*/
if (detaching) {
#ifdef DIAGNOSTIC
printf("%s: detaching ifnet instance %p\n", __func__, ifp);
#endif
/*
* ifp may already be nulled out if we are being reentered
* to delete the ll_ifma.
*/
if (ifp != NULL) {
rt_newmaddrmsg(RTM_DELMADDR, ifma);
ifma->ifma_ifp = NULL;
}
}
if (--ifma->ifma_refcount > 0)
return 0;
/*
* If this ifma is a network-layer ifma, a link-layer ifma may
* have been associated with it. Release it first if so.
*/
ll_ifma = ifma->ifma_llifma;
if (ll_ifma != NULL) {
KASSERT(ifma->ifma_lladdr != NULL,
("%s: llifma w/o lladdr", __func__));
if (detaching)
ll_ifma->ifma_ifp = NULL; /* XXX */
if (--ll_ifma->ifma_refcount == 0) {
if (ifp != NULL) {
TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma,
ifma_link);
}
if_freemulti(ll_ifma);
}
}
if (ifp != NULL)
TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link);
if_freemulti(ifma);
/*
* The last reference to this instance of struct ifmultiaddr
* was released; the hardware should be notified of this change.
*/
return 1;
}
/*
* Set the link layer address on an interface.
*
* At this time we only support certain types of interfaces,
* and we don't allow the length of the address to change.
*/
int
if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len)
{
struct sockaddr_dl *sdl;
struct ifaddr *ifa;
struct ifreq ifr;
ifa = ifp->if_addr;
if (ifa == NULL)
return (EINVAL);
sdl = (struct sockaddr_dl *)ifa->ifa_addr;
if (sdl == NULL)
return (EINVAL);
if (len != sdl->sdl_alen) /* don't allow length to change */
return (EINVAL);
switch (ifp->if_type) {
case IFT_ETHER:
case IFT_FDDI:
case IFT_XETHER:
case IFT_ISO88025:
case IFT_L2VLAN:
case IFT_BRIDGE:
case IFT_ARCNET:
case IFT_IEEE8023ADLAG:
bcopy(lladdr, LLADDR(sdl), len);
break;
default:
return (ENODEV);
}
/*
* If the interface is already up, we need
* to re-init it in order to reprogram its
* address filter.
*/
if ((ifp->if_flags & IFF_UP) != 0) {
if (ifp->if_ioctl) {
IFF_LOCKGIANT(ifp);
ifp->if_flags &= ~IFF_UP;
ifr.ifr_flags = ifp->if_flags & 0xffff;
ifr.ifr_flagshigh = ifp->if_flags >> 16;
(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
ifp->if_flags |= IFF_UP;
ifr.ifr_flags = ifp->if_flags & 0xffff;
ifr.ifr_flagshigh = ifp->if_flags >> 16;
(*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr);
IFF_UNLOCKGIANT(ifp);
}
#ifdef INET
/*
* Also send gratuitous ARPs to notify other nodes about
* the address change.
*/
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family == AF_INET)
arp_ifinit(ifp, ifa);
}
#endif
}
return (0);
}
/*
* The name argument must be a pointer to storage which will last as
* long as the interface does. For physical devices, the result of
* device_get_name(dev) is a good choice and for pseudo-devices a
* static string works well.
*/
void
if_initname(struct ifnet *ifp, const char *name, int unit)
{
ifp->if_dname = name;
ifp->if_dunit = unit;
if (unit != IF_DUNIT_NONE)
snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit);
else
strlcpy(ifp->if_xname, name, IFNAMSIZ);
}
int
if_printf(struct ifnet *ifp, const char * fmt, ...)
{
va_list ap;
int retval;
retval = printf("%s: ", ifp->if_xname);
va_start(ap, fmt);
retval += vprintf(fmt, ap);
va_end(ap);
return (retval);
}
/*
* When an interface is marked IFF_NEEDSGIANT, its if_start() routine cannot
* be called without Giant. However, we often can't acquire the Giant lock
* at those points; instead, we run it via a task queue that holds Giant via
* if_start_deferred.
*
* XXXRW: We need to make sure that the ifnet isn't fully detached until any
* outstanding if_start_deferred() tasks that will run after the free. This
* probably means waiting in if_detach().
*/
void
if_start(struct ifnet *ifp)
{
if (ifp->if_flags & IFF_NEEDSGIANT) {
if (mtx_owned(&Giant))
(*(ifp)->if_start)(ifp);
else
taskqueue_enqueue(taskqueue_swi_giant,
&ifp->if_starttask);
} else
(*(ifp)->if_start)(ifp);
}
static void
if_start_deferred(void *context, int pending)
{
struct ifnet *ifp;
GIANT_REQUIRED;
ifp = context;
(ifp->if_start)(ifp);
}
/*
* Backwards compatibility interface for drivers
* that have not implemented it
*/
static int
if_transmit(struct ifnet *ifp, struct mbuf *m)
{
int error;
IFQ_HANDOFF(ifp, m, error);
return (error);
}
int
if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust)
{
int active = 0;
IF_LOCK(ifq);
if (_IF_QFULL(ifq)) {
_IF_DROP(ifq);
IF_UNLOCK(ifq);
m_freem(m);
return (0);
}
if (ifp != NULL) {
ifp->if_obytes += m->m_pkthdr.len + adjust;
if (m->m_flags & (M_BCAST|M_MCAST))
ifp->if_omcasts++;
active = ifp->if_drv_flags & IFF_DRV_OACTIVE;
}
_IF_ENQUEUE(ifq, m);
IF_UNLOCK(ifq);
if (ifp != NULL && !active)
if_start(ifp);
return (1);
}
void
if_register_com_alloc(u_char type,
if_com_alloc_t *a, if_com_free_t *f)
{
KASSERT(if_com_alloc[type] == NULL,
("if_register_com_alloc: %d already registered", type));
KASSERT(if_com_free[type] == NULL,
("if_register_com_alloc: %d free already registered", type));
if_com_alloc[type] = a;
if_com_free[type] = f;
}
void
if_deregister_com_alloc(u_char type)
{
KASSERT(if_com_alloc[type] != NULL,
("if_deregister_com_alloc: %d not registered", type));
KASSERT(if_com_free[type] != NULL,
("if_deregister_com_alloc: %d free not registered", type));
if_com_alloc[type] = NULL;
if_com_free[type] = NULL;
}
Index: projects/arpv2_merge_1/sys/net/if_llatbl.c
===================================================================
--- projects/arpv2_merge_1/sys/net/if_llatbl.c (revision 185838)
+++ projects/arpv2_merge_1/sys/net/if_llatbl.c (revision 185839)
@@ -1,298 +1,306 @@
/*
* Copyright (c) 2007 Qing Li, Luigi Rizzo, Alessandro Cerri. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/syslog.h>
#include <sys/sysctl.h>
#include <sys/socket.h>
#include <sys/kernel.h>
#include <sys/mutex.h>
+#include <sys/rwlock.h>
#include <sys/vimage.h>
#include <vm/uma.h>
#include <netinet/in.h>
#include <net/if_llatbl.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_var.h>
#include <net/route.h>
#include <netinet/if_ether.h>
#include <netinet6/in6_var.h>
#include <netinet6/nd6.h>
MALLOC_DEFINE(M_LLTABLE, "lltable", "link level address tables");
static SLIST_HEAD(, lltable) lltables = SLIST_HEAD_INITIALIZER(lltables);
extern void arprequest(struct ifnet *, struct in_addr *, struct in_addr *,
u_char *);
/*
* Dump arp state for a specific address family.
*/
int
lltable_sysctl_dumparp(int af, struct sysctl_req *wr)
{
struct lltable *llt;
int error = 0;
IFNET_RLOCK();
SLIST_FOREACH(llt, &lltables, llt_link) {
if (llt->llt_af == af) {
error = llt->llt_dump(llt, wr);
if (error != 0)
goto done;
}
}
done:
IFNET_RUNLOCK();
return (error);
}
/*
* Deletes an address from the address table.
* This function is called by the timer functions
* such as arptimer() and nd6_llinfo_timer(), and
* the caller does the locking.
*/
void
llentry_free(struct llentry *lle)
{
- struct lltable *llt = lle->lle_tbl;
+ LLE_WLOCK(lle);
LIST_REMOVE(lle, lle_next);
if (lle->la_hold != NULL)
m_freem(lle->la_hold);
- llt->llt_free(llt, lle);
+
+ LLE_FREE_LOCKED(lle);
}
/*
* Free all entries from given table and free itself.
* Since lltables collects from all of the intefaces,
* the caller of this function must acquire IFNET_WLOCK().
*/
void
lltable_free(struct lltable *llt)
{
struct llentry *lle, *next;
int i;
KASSERT(llt != NULL, ("%s: llt is NULL", __func__));
IFNET_WLOCK();
SLIST_REMOVE(&lltables, llt, lltable, llt_link);
IFNET_WUNLOCK();
for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
LIST_FOREACH_SAFE(lle, &llt->lle_head[i], lle_next, next) {
callout_drain(&lle->la_timer);
llentry_free(lle);
}
}
free(llt, M_LLTABLE);
}
void
lltable_drain(int af)
{
struct lltable *llt;
struct llentry *lle;
register int i;
IFNET_RLOCK();
SLIST_FOREACH(llt, &lltables, llt_link) {
if (llt->llt_af != af)
continue;
for (i=0; i < LLTBL_HASHTBL_SIZE; i++) {
LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
if (lle->la_hold) {
m_freem(lle->la_hold);
lle->la_hold = NULL;
}
}
}
}
IFNET_RUNLOCK();
}
/*
* Create a new lltable.
*/
struct lltable *
lltable_init(struct ifnet *ifp, int af)
{
struct lltable *llt;
register int i;
llt = malloc(sizeof(struct lltable), M_LLTABLE, M_WAITOK);
if (llt == NULL)
return (NULL);
llt->llt_af = af;
llt->llt_ifp = ifp;
for (i = 0; i < LLTBL_HASHTBL_SIZE; i++)
LIST_INIT(&llt->lle_head[i]);
IFNET_WLOCK();
SLIST_INSERT_HEAD(&lltables, llt, llt_link);
IFNET_WUNLOCK();
return (llt);
}
/*
* Called in route_output when adding/deleting a route to an interface.
*/
int
lla_rt_output(struct rt_msghdr *rtm, struct rt_addrinfo *info)
{
struct sockaddr_dl *dl =
(struct sockaddr_dl *)info->rti_info[RTAX_GATEWAY];
struct sockaddr *dst = (struct sockaddr *)info->rti_info[RTAX_DST];
struct ifnet *ifp;
struct lltable *llt;
struct llentry *lle;
- u_int flags = 0;
+ u_int laflags = 0, flags = 0;
+ int error = 0;
if (dl == NULL || dl->sdl_family != AF_LINK) {
log(LOG_INFO, "%s: invalid dl\n", __func__);
return EINVAL;
}
ifp = ifnet_byindex(dl->sdl_index);
if (ifp == NULL) {
log(LOG_INFO, "%s: invalid ifp (sdl_index %d)\n",
__func__, dl->sdl_index);
return EINVAL;
}
switch (rtm->rtm_type) {
case RTM_ADD:
if (rtm->rtm_flags & RTF_ANNOUNCE) {
flags |= LLE_PUB;
#ifdef INET
if (dst->sa_family == AF_INET &&
((struct sockaddr_inarp *)dst)->sin_other != 0) {
struct rtentry *rt = rtalloc1(dst, 0, 0);
if (rt == NULL || !(rt->rt_flags & RTF_HOST)) {
log(LOG_INFO, "%s: RTM_ADD publish "
"(proxy only) is invalid\n",
__func__);
- rtfree(rt);
+ RTFREE(rt);
return EINVAL;
}
- rtfree(rt);
+ RTFREE(rt);
flags |= LLE_PROXY;
}
#endif
}
flags |= LLE_CREATE;
break;
case RTM_DELETE:
flags |= LLE_DELETE;
break;
case RTM_CHANGE:
break;
default:
return EINVAL; /* XXX not implemented yet */
}
/*
* XXXXXXXX:
* REVISE this approach if possible.
*/
- IFNET_WLOCK();
+ IFNET_RLOCK();
SLIST_FOREACH(llt, &lltables, llt_link) {
if (llt->llt_af == dst->sa_family &&
llt->llt_ifp == ifp)
break;
}
- IFNET_WUNLOCK();
+ IFNET_RUNLOCK();
KASSERT(llt != NULL, ("Yep, ugly hacks are bad\n"));
+ if (flags && LLE_CREATE)
+ flags |= LLE_EXCLUSIVE;
+
IF_AFDATA_LOCK(ifp);
lle = lla_lookup(llt, flags, dst);
+ IF_AFDATA_UNLOCK(ifp);
if (lle != NULL) {
if (flags & LLE_CREATE) {
/* qing: if we delay the delete, then if a subsequent
* "arp add" on the same host should look up this entry,
* reset the LLE_DELETED flag, and reset the expiration timer
*/
bcopy(LLADDR(dl), &lle->ll_addr, ifp->if_addrlen);
lle->la_flags |= LLE_VALID;
lle->la_flags &= ~LLE_DELETED;
#ifdef INET6
/*
* ND6
*/
if (dst->sa_family == AF_INET6)
lle->ln_state = ND6_LLINFO_REACHABLE;
#endif
/*
* "arp" and "ndp" always sets the (RTF_STATIC | RTF_HOST) flags
*/
+
if (rtm->rtm_rmx.rmx_expire == 0) {
lle->la_flags |= LLE_STATIC;
lle->la_expire = 0;
} else
lle->la_expire = rtm->rtm_rmx.rmx_expire;
+ laflags = lle->la_flags;
+ LLE_WUNLOCK(lle);
#ifdef INET
/* gratuious ARP */
- if ((lle->la_flags & LLE_PUB) &&
+ if ((laflags & LLE_PUB) &&
dst->sa_family == AF_INET) {
arprequest(ifp,
&((struct sockaddr_in *)dst)->sin_addr,
&((struct sockaddr_in *)dst)->sin_addr,
- ((lle->la_flags & LLE_PROXY) ?
+ ((laflags & LLE_PROXY) ?
(u_char *)IF_LLADDR(ifp) :
(u_char *)LLADDR(dl)));
}
#endif
- }
+ } else
+ LLE_RUNLOCK(lle);
} else {
- if (flags & LLE_DELETE) {
- IF_AFDATA_UNLOCK(ifp);
- return EINVAL;
- }
+ if (flags & LLE_DELETE)
+ error = EINVAL;
}
- IF_AFDATA_UNLOCK(ifp);
- return 0;
+ return (error);
}
Index: projects/arpv2_merge_1/sys/net/if_llatbl.h
===================================================================
--- projects/arpv2_merge_1/sys/net/if_llatbl.h (revision 185838)
+++ projects/arpv2_merge_1/sys/net/if_llatbl.h (revision 185839)
@@ -1,131 +1,187 @@
/*
* Copyright (c) 2007 Qing Li, Luigi Rizzo, Alessandro Cerri. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#ifndef _NET_IF_LLATBL_H_
#define _NET_IF_LLATBL_H_
+#include <sys/_rwlock.h>
#include <netinet/in.h>
struct ifnet;
struct sysctl_req;
struct rt_msghdr;
struct rt_addrinfo;
struct llentry;
LIST_HEAD(llentries, llentry);
+/*
+ * Code referencing llentry must at least hold
+ * a shared lock
+ */
struct llentry {
LIST_ENTRY(llentry) lle_next;
+ struct rwlock lle_lock;
struct lltable *lle_tbl;
struct llentries *lle_head;
struct mbuf *la_hold;
time_t la_expire;
uint16_t la_flags;
uint16_t la_asked;
uint16_t la_preempt;
uint16_t ln_byhint;
int16_t ln_state; /* IPv6 has ND6_LLINFO_NOSTATE == -2 */
uint16_t ln_router;
time_t ln_ntick;
+ int lle_refcnt;
+
union {
uint64_t mac_aligned;
uint16_t mac16[3];
} ll_addr;
/* XXX af-private? */
union {
struct callout ln_timer_ch;
struct callout la_timer;
} lle_timer;
/* NB: struct sockaddr must immediately follow */
};
+#define LLE_WLOCK(lle) rw_wlock(&(lle)->lle_lock)
+#define LLE_RLOCK(lle) rw_rlock(&(lle)->lle_lock)
+#define LLE_WUNLOCK(lle) rw_wunlock(&(lle)->lle_lock)
+#define LLE_RUNLOCK(lle) rw_runlock(&(lle)->lle_lock)
+#define LLE_DOWNGRADE(lle) rw_downgrade(&(lle)->lle_lock)
+#define LLE_TRY_UPGRADE(lle) rw_try_upgrade(&(lle)->lle_lock)
+#define LLE_LOCK_INIT(lle) rw_init_flags(&(lle)->lle_lock, "lle", RW_DUPOK)
+#define LLE_WLOCK_ASSERT(lle) rw_assert(&(lle)->lle_lock, RA_WLOCKED)
+
+#define LLE_ADDREF(lle) do { \
+ LLE_WLOCK_ASSERT(lle); \
+ KASSERT((lle)->lle_refcnt >= 0, \
+ ("negative refcnt %d", (lle)->lle_refcnt)); \
+ (lle)->lle_refcnt++; \
+} while (0)
+
+#define LLE_REMREF(lle) do { \
+ LLE_WLOCK_ASSERT(lle); \
+ KASSERT((lle)->rt_refcnt > 0, \
+ ("bogus refcnt %ld", (lle)->rt_refcnt)); \
+ (lle)->rt_refcnt--; \
+} while (0)
+
+#define LLE_FREE_LOCKED(lle) do { \
+ if ((lle)->lle_refcnt <= 1) \
+ (lle)->lle_tbl->llt_free((lle)->lle_tbl, (lle));\
+ else { \
+ (lle)->lle_refcnt--; \
+ LLE_WUNLOCK(lle); \
+ } \
+ /* guard against invalid refs */ \
+ lle = 0; \
+} while (0)
+
+#define LLE_FREE(lle) do { \
+ LLE_WLOCK(lle); \
+ if ((lle)->lle_refcnt <= 1) \
+ (lle)->lle_tbl->llt_free((lle)->lle_tbl, (lle));\
+ else { \
+ (lle)->lle_refcnt--; \
+ LLE_WUNLOCK(lle); \
+ } \
+ /* guard against invalid refs */ \
+ lle = 0; \
+} while (0)
+
+
#define ln_timer_ch lle_timer.ln_timer_ch
#define la_timer lle_timer.la_timer
/* XXX bad name */
#define L3_ADDR(lle) ((struct sockaddr *)(&lle[1]))
#define L3_ADDR_LEN(lle) (((struct sockaddr *)(&lle[1]))->sa_len)
#ifndef LLTBL_HASHTBL_SIZE
#define LLTBL_HASHTBL_SIZE 32 /* default 32 ? */
#endif
#ifndef LLTBL_HASHMASK
#define LLTBL_HASHMASK (LLTBL_HASHTBL_SIZE - 1)
#endif
struct lltable {
SLIST_ENTRY(lltable) llt_link;
struct llentries lle_head[LLTBL_HASHTBL_SIZE];
int llt_af;
struct ifnet *llt_ifp;
struct llentry * (*llt_new)(const struct sockaddr *, u_int);
void (*llt_free)(struct lltable *, struct llentry *);
struct llentry * (*llt_lookup)(struct lltable *, u_int flags,
const struct sockaddr *l3addr);
int (*llt_rtcheck)(struct ifnet *,
const struct sockaddr *);
int (*llt_dump)(struct lltable *,
struct sysctl_req *);
};
MALLOC_DECLARE(M_LLTABLE);
/*
* flags to be passed to arplookup.
*/
#define LLE_DELETED 0x0001 /* entry must be deleted */
#define LLE_STATIC 0x0002 /* entry is static */
#define LLE_IFADDR 0x0004 /* entry is interface addr */
#define LLE_VALID 0x0008 /* ll_addr is valid */
#define LLE_PROXY 0x0010 /* proxy entry ??? */
#define LLE_PUB 0x0020 /* publish entry ??? */
-#define LLE_CREATE 0x8000 /* create on a lookup miss */
#define LLE_DELETE 0x4000 /* delete on a lookup - match LLE_IFADDR */
+#define LLE_CREATE 0x8000 /* create on a lookup miss */
+#define LLE_EXCLUSIVE 0x2000 /* return lle xlocked */
#define LLATBL_HASH(key, mask) \
(((((((key >> 8) ^ key) >> 8) ^ key) >> 8) ^ key) & mask)
struct lltable *lltable_init(struct ifnet *, int);
void lltable_free(struct lltable *);
void lltable_drain(int);
int lltable_sysctl_dumparp(int, struct sysctl_req *);
void llentry_free(struct llentry *);
/*
* Generic link layer address lookup function.
*/
static __inline struct llentry *
lla_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
{
return llt->llt_lookup(llt, flags, l3addr);
}
int lla_rt_output(struct rt_msghdr *, struct rt_addrinfo *);
#endif /* _NET_IF_LLATBL_H_ */
Index: projects/arpv2_merge_1/sys/net/if_var.h
===================================================================
--- projects/arpv2_merge_1/sys/net/if_var.h (revision 185838)
+++ projects/arpv2_merge_1/sys/net/if_var.h (revision 185839)
@@ -1,726 +1,728 @@
/*-
* Copyright (c) 1982, 1986, 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* From: @(#)if.h 8.1 (Berkeley) 6/10/93
* $FreeBSD$
*/
#ifndef _NET_IF_VAR_H_
#define _NET_IF_VAR_H_
/*
* Structures defining a network interface, providing a packet
* transport mechanism (ala level 0 of the PUP protocols).
*
* Each interface accepts output datagrams of a specified maximum
* length, and provides higher level routines with input datagrams
* received from its medium.
*
* Output occurs when the routine if_output is called, with three parameters:
* (*ifp->if_output)(ifp, m, dst, rt)
* Here m is the mbuf chain to be sent and dst is the destination address.
* The output routine encapsulates the supplied datagram if necessary,
* and then transmits it on its medium.
*
* On input, each interface unwraps the data received by it, and either
* places it on the input queue of an internetwork datagram routine
* and posts the associated software interrupt, or passes the datagram to a raw
* packet input routine.
*
* Routines exist for locating interfaces by their addresses
* or for locating an interface on a certain network, as well as more general
* routing and gateway routines maintaining information used to locate
* interfaces. These routines live in the files if.c and route.c
*/
#ifdef __STDC__
/*
* Forward structure declarations for function prototypes [sic].
*/
struct mbuf;
struct thread;
struct rtentry;
struct rt_addrinfo;
struct socket;
struct ether_header;
struct lltable;
struct carp_if;
struct ifvlantrunk;
#endif
#include <sys/queue.h> /* get TAILQ macros */
#ifdef _KERNEL
#include <sys/mbuf.h>
#include <sys/eventhandler.h>
#endif /* _KERNEL */
#include <sys/lock.h> /* XXX */
#include <sys/mutex.h> /* XXX */
#include <sys/event.h> /* XXX */
#include <sys/_task.h>
#define IF_DUNIT_NONE -1
#include <altq/if_altq.h>
TAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */
TAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */
TAILQ_HEAD(ifprefixhead, ifprefix);
TAILQ_HEAD(ifmultihead, ifmultiaddr);
TAILQ_HEAD(ifgrouphead, ifg_group);
/*
* Structure defining a queue for a network interface.
*/
struct ifqueue {
struct mbuf *ifq_head;
struct mbuf *ifq_tail;
int ifq_len;
int ifq_maxlen;
int ifq_drops;
struct mtx ifq_mtx;
};
/*
* Structure defining a network interface.
*
* (Would like to call this struct ``if'', but C isn't PL/1.)
*/
struct ifnet {
void *if_softc; /* pointer to driver state */
void *if_l2com; /* pointer to protocol bits */
TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
char if_xname[IFNAMSIZ]; /* external name (name + unit) */
const char *if_dname; /* driver name */
int if_dunit; /* unit or IF_DUNIT_NONE */
struct ifaddrhead if_addrhead; /* linked list of addresses per if */
/*
* if_addrhead is the list of all addresses associated to
* an interface.
* Some code in the kernel assumes that first element
* of the list has type AF_LINK, and contains sockaddr_dl
* addresses which store the link-level address and the name
* of the interface.
* However, access to the AF_LINK address through this
* field is deprecated. Use if_addr or ifaddr_byindex() instead.
*/
struct knlist if_klist; /* events attached to this if */
int if_pcount; /* number of promiscuous listeners */
struct carp_if *if_carp; /* carp interface structure */
struct bpf_if *if_bpf; /* packet filter structure */
u_short if_index; /* numeric abbreviation for this if */
short if_timer; /* time 'til if_watchdog called */
struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
int if_flags; /* up/down, broadcast, etc. */
int if_capabilities; /* interface features & capabilities */
int if_capenable; /* enabled features & capabilities */
void *if_linkmib; /* link-type-specific MIB data */
size_t if_linkmiblen; /* length of above data */
struct if_data if_data;
struct ifmultihead if_multiaddrs; /* multicast addresses configured */
int if_amcount; /* number of all-multicast requests */
/* procedure handles */
int (*if_output) /* output routine (enqueue) */
(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
void (*if_input) /* input routine (from h/w driver) */
(struct ifnet *, struct mbuf *);
void (*if_start) /* initiate output routine */
(struct ifnet *);
int (*if_ioctl) /* ioctl routine */
(struct ifnet *, u_long, caddr_t);
void (*if_watchdog) /* timer routine */
(struct ifnet *);
void (*if_init) /* Init routine */
(void *);
int (*if_resolvemulti) /* validate/resolve multicast */
(struct ifnet *, struct sockaddr **, struct sockaddr *);
struct ifaddr *if_addr; /* pointer to link-level address */
void *if_llsoftc; /* link layer softc */
int if_drv_flags; /* driver-managed status flags */
u_int if_spare_flags2; /* spare flags 2 */
struct ifaltq if_snd; /* output queue (includes altq) */
const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */
void *if_bridge; /* bridge glue */
struct label *if_label; /* interface MAC label */
/* these are only used by IPv6 */
struct ifprefixhead if_prefixhead; /* list of prefixes per if */
void *if_afdata[AF_MAX];
int if_afdata_initialized;
struct mtx if_afdata_mtx;
struct task if_starttask; /* task for IFF_NEEDSGIANT */
struct task if_linktask; /* task for link change events */
struct mtx if_addr_mtx; /* mutex to protect address lists */
LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */
TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
/* protected by if_addr_mtx */
void *if_pf_kif;
void *if_lagg; /* lagg glue */
void *if_pspare[8]; /* multiq/TOE 3; vimage 3; general use 4 */
void (*if_qflush) /* flush any queues */
(struct ifnet *);
int (*if_transmit) /* initiate output routine */
(struct ifnet *, struct mbuf *);
int if_ispare[2]; /* general use 2 */
};
typedef void if_init_f_t(void *);
/*
* XXX These aliases are terribly dangerous because they could apply
* to anything.
*/
#define if_mtu if_data.ifi_mtu
#define if_type if_data.ifi_type
#define if_physical if_data.ifi_physical
#define if_addrlen if_data.ifi_addrlen
#define if_hdrlen if_data.ifi_hdrlen
#define if_metric if_data.ifi_metric
#define if_link_state if_data.ifi_link_state
#define if_baudrate if_data.ifi_baudrate
#define if_hwassist if_data.ifi_hwassist
#define if_ipackets if_data.ifi_ipackets
#define if_ierrors if_data.ifi_ierrors
#define if_opackets if_data.ifi_opackets
#define if_oerrors if_data.ifi_oerrors
#define if_collisions if_data.ifi_collisions
#define if_ibytes if_data.ifi_ibytes
#define if_obytes if_data.ifi_obytes
#define if_imcasts if_data.ifi_imcasts
#define if_omcasts if_data.ifi_omcasts
#define if_iqdrops if_data.ifi_iqdrops
#define if_noproto if_data.ifi_noproto
#define if_lastchange if_data.ifi_lastchange
#define if_rawoutput(if, m, sa) if_output(if, m, sa, (struct rtentry *)NULL)
/* for compatibility with other BSDs */
#define if_addrlist if_addrhead
#define if_list if_link
#define if_name(ifp) ((ifp)->if_xname)
/*
* Locks for address lists on the network interface.
*/
#define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_mtx, \
"if_addr_mtx", NULL, MTX_DEF)
#define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_mtx)
#define IF_ADDR_LOCK(if) mtx_lock(&(if)->if_addr_mtx)
#define IF_ADDR_UNLOCK(if) mtx_unlock(&(if)->if_addr_mtx)
#define IF_ADDR_LOCK_ASSERT(if) mtx_assert(&(if)->if_addr_mtx, MA_OWNED)
/*
* Output queues (ifp->if_snd) and slow device input queues (*ifp->if_slowq)
* are queues of messages stored on ifqueue structures
* (defined above). Entries are added to and deleted from these structures
* by these macros, which should be called with ipl raised to splimp().
*/
#define IF_LOCK(ifq) mtx_lock(&(ifq)->ifq_mtx)
#define IF_UNLOCK(ifq) mtx_unlock(&(ifq)->ifq_mtx)
#define IF_LOCK_ASSERT(ifq) mtx_assert(&(ifq)->ifq_mtx, MA_OWNED)
#define _IF_QFULL(ifq) ((ifq)->ifq_len >= (ifq)->ifq_maxlen)
#define _IF_DROP(ifq) ((ifq)->ifq_drops++)
#define _IF_QLEN(ifq) ((ifq)->ifq_len)
#define _IF_ENQUEUE(ifq, m) do { \
(m)->m_nextpkt = NULL; \
if ((ifq)->ifq_tail == NULL) \
(ifq)->ifq_head = m; \
else \
(ifq)->ifq_tail->m_nextpkt = m; \
(ifq)->ifq_tail = m; \
(ifq)->ifq_len++; \
} while (0)
#define IF_ENQUEUE(ifq, m) do { \
IF_LOCK(ifq); \
_IF_ENQUEUE(ifq, m); \
IF_UNLOCK(ifq); \
} while (0)
#define _IF_PREPEND(ifq, m) do { \
(m)->m_nextpkt = (ifq)->ifq_head; \
if ((ifq)->ifq_tail == NULL) \
(ifq)->ifq_tail = (m); \
(ifq)->ifq_head = (m); \
(ifq)->ifq_len++; \
} while (0)
#define IF_PREPEND(ifq, m) do { \
IF_LOCK(ifq); \
_IF_PREPEND(ifq, m); \
IF_UNLOCK(ifq); \
} while (0)
#define _IF_DEQUEUE(ifq, m) do { \
(m) = (ifq)->ifq_head; \
if (m) { \
if (((ifq)->ifq_head = (m)->m_nextpkt) == NULL) \
(ifq)->ifq_tail = NULL; \
(m)->m_nextpkt = NULL; \
(ifq)->ifq_len--; \
} \
} while (0)
#define IF_DEQUEUE(ifq, m) do { \
IF_LOCK(ifq); \
_IF_DEQUEUE(ifq, m); \
IF_UNLOCK(ifq); \
} while (0)
#define _IF_POLL(ifq, m) ((m) = (ifq)->ifq_head)
#define IF_POLL(ifq, m) _IF_POLL(ifq, m)
#define _IF_DRAIN(ifq) do { \
struct mbuf *m; \
for (;;) { \
_IF_DEQUEUE(ifq, m); \
if (m == NULL) \
break; \
m_freem(m); \
} \
} while (0)
#define IF_DRAIN(ifq) do { \
IF_LOCK(ifq); \
_IF_DRAIN(ifq); \
IF_UNLOCK(ifq); \
} while(0)
#ifdef _KERNEL
/* interface address change event */
typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t);
/* new interface arrival event */
typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t);
/* interface departure event */
typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *);
EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t);
/*
* interface groups
*/
struct ifg_group {
char ifg_group[IFNAMSIZ];
u_int ifg_refcnt;
void *ifg_pf_kif;
TAILQ_HEAD(, ifg_member) ifg_members;
TAILQ_ENTRY(ifg_group) ifg_next;
};
struct ifg_member {
TAILQ_ENTRY(ifg_member) ifgm_next;
struct ifnet *ifgm_ifp;
};
struct ifg_list {
struct ifg_group *ifgl_group;
TAILQ_ENTRY(ifg_list) ifgl_next;
};
/* group attach event */
typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *);
EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t);
/* group detach event */
typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *);
EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t);
/* group change event */
typedef void (*group_change_event_handler_t)(void *, const char *);
EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t);
#define IF_AFDATA_LOCK_INIT(ifp) \
- mtx_init(&(ifp)->if_afdata_mtx, "if_afdata", NULL, \
- (MTX_DEF | MTX_RECURSE))
+ mtx_init(&(ifp)->if_afdata_mtx, "if_afdata", NULL, MTX_DEF)
#define IF_AFDATA_LOCK(ifp) mtx_lock(&(ifp)->if_afdata_mtx)
#define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_mtx)
#define IF_AFDATA_UNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_mtx)
#define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_mtx)
+
+#define IF_AFDATA_LOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_mtx, MA_OWNED)
+#define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_mtx, MA_NOTOWNED)
#define IFF_LOCKGIANT(ifp) do { \
if ((ifp)->if_flags & IFF_NEEDSGIANT) \
mtx_lock(&Giant); \
} while (0)
#define IFF_UNLOCKGIANT(ifp) do { \
if ((ifp)->if_flags & IFF_NEEDSGIANT) \
mtx_unlock(&Giant); \
} while (0)
int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp,
int adjust);
#define IF_HANDOFF(ifq, m, ifp) \
if_handoff((struct ifqueue *)ifq, m, ifp, 0)
#define IF_HANDOFF_ADJ(ifq, m, ifp, adj) \
if_handoff((struct ifqueue *)ifq, m, ifp, adj)
void if_start(struct ifnet *);
#define IFQ_ENQUEUE(ifq, m, err) \
do { \
IF_LOCK(ifq); \
if (ALTQ_IS_ENABLED(ifq)) \
ALTQ_ENQUEUE(ifq, m, NULL, err); \
else { \
if (_IF_QFULL(ifq)) { \
m_freem(m); \
(err) = ENOBUFS; \
} else { \
_IF_ENQUEUE(ifq, m); \
(err) = 0; \
} \
} \
if (err) \
(ifq)->ifq_drops++; \
IF_UNLOCK(ifq); \
} while (0)
#define IFQ_DEQUEUE_NOLOCK(ifq, m) \
do { \
if (TBR_IS_ENABLED(ifq)) \
(m) = tbr_dequeue_ptr(ifq, ALTDQ_REMOVE); \
else if (ALTQ_IS_ENABLED(ifq)) \
ALTQ_DEQUEUE(ifq, m); \
else \
_IF_DEQUEUE(ifq, m); \
} while (0)
#define IFQ_DEQUEUE(ifq, m) \
do { \
IF_LOCK(ifq); \
IFQ_DEQUEUE_NOLOCK(ifq, m); \
IF_UNLOCK(ifq); \
} while (0)
#define IFQ_POLL_NOLOCK(ifq, m) \
do { \
if (TBR_IS_ENABLED(ifq)) \
(m) = tbr_dequeue_ptr(ifq, ALTDQ_POLL); \
else if (ALTQ_IS_ENABLED(ifq)) \
ALTQ_POLL(ifq, m); \
else \
_IF_POLL(ifq, m); \
} while (0)
#define IFQ_POLL(ifq, m) \
do { \
IF_LOCK(ifq); \
IFQ_POLL_NOLOCK(ifq, m); \
IF_UNLOCK(ifq); \
} while (0)
#define IFQ_PURGE_NOLOCK(ifq) \
do { \
if (ALTQ_IS_ENABLED(ifq)) { \
ALTQ_PURGE(ifq); \
} else \
_IF_DRAIN(ifq); \
} while (0)
#define IFQ_PURGE(ifq) \
do { \
IF_LOCK(ifq); \
IFQ_PURGE_NOLOCK(ifq); \
IF_UNLOCK(ifq); \
} while (0)
#define IFQ_SET_READY(ifq) \
do { ((ifq)->altq_flags |= ALTQF_READY); } while (0)
#define IFQ_LOCK(ifq) IF_LOCK(ifq)
#define IFQ_UNLOCK(ifq) IF_UNLOCK(ifq)
#define IFQ_LOCK_ASSERT(ifq) IF_LOCK_ASSERT(ifq)
#define IFQ_IS_EMPTY(ifq) ((ifq)->ifq_len == 0)
#define IFQ_INC_LEN(ifq) ((ifq)->ifq_len++)
#define IFQ_DEC_LEN(ifq) (--(ifq)->ifq_len)
#define IFQ_INC_DROPS(ifq) ((ifq)->ifq_drops++)
#define IFQ_SET_MAXLEN(ifq, len) ((ifq)->ifq_maxlen = (len))
/*
* The IFF_DRV_OACTIVE test should really occur in the device driver, not in
* the handoff logic, as that flag is locked by the device driver.
*/
#define IFQ_HANDOFF_ADJ(ifp, m, adj, err) \
do { \
int len; \
short mflags; \
\
len = (m)->m_pkthdr.len; \
mflags = (m)->m_flags; \
IFQ_ENQUEUE(&(ifp)->if_snd, m, err); \
if ((err) == 0) { \
(ifp)->if_obytes += len + (adj); \
if (mflags & M_MCAST) \
(ifp)->if_omcasts++; \
if (((ifp)->if_drv_flags & IFF_DRV_OACTIVE) == 0) \
if_start(ifp); \
} \
} while (0)
#define IFQ_HANDOFF(ifp, m, err) \
IFQ_HANDOFF_ADJ(ifp, m, 0, err)
#define IFQ_DRV_DEQUEUE(ifq, m) \
do { \
(m) = (ifq)->ifq_drv_head; \
if (m) { \
if (((ifq)->ifq_drv_head = (m)->m_nextpkt) == NULL) \
(ifq)->ifq_drv_tail = NULL; \
(m)->m_nextpkt = NULL; \
(ifq)->ifq_drv_len--; \
} else { \
IFQ_LOCK(ifq); \
IFQ_DEQUEUE_NOLOCK(ifq, m); \
while ((ifq)->ifq_drv_len < (ifq)->ifq_drv_maxlen) { \
struct mbuf *m0; \
IFQ_DEQUEUE_NOLOCK(ifq, m0); \
if (m0 == NULL) \
break; \
m0->m_nextpkt = NULL; \
if ((ifq)->ifq_drv_tail == NULL) \
(ifq)->ifq_drv_head = m0; \
else \
(ifq)->ifq_drv_tail->m_nextpkt = m0; \
(ifq)->ifq_drv_tail = m0; \
(ifq)->ifq_drv_len++; \
} \
IFQ_UNLOCK(ifq); \
} \
} while (0)
#define IFQ_DRV_PREPEND(ifq, m) \
do { \
(m)->m_nextpkt = (ifq)->ifq_drv_head; \
if ((ifq)->ifq_drv_tail == NULL) \
(ifq)->ifq_drv_tail = (m); \
(ifq)->ifq_drv_head = (m); \
(ifq)->ifq_drv_len++; \
} while (0)
#define IFQ_DRV_IS_EMPTY(ifq) \
(((ifq)->ifq_drv_len == 0) && ((ifq)->ifq_len == 0))
#define IFQ_DRV_PURGE(ifq) \
do { \
struct mbuf *m, *n = (ifq)->ifq_drv_head; \
while((m = n) != NULL) { \
n = m->m_nextpkt; \
m_freem(m); \
} \
(ifq)->ifq_drv_head = (ifq)->ifq_drv_tail = NULL; \
(ifq)->ifq_drv_len = 0; \
IFQ_PURGE(ifq); \
} while (0)
/*
* 72 was chosen below because it is the size of a TCP/IP
* header (40) + the minimum mss (32).
*/
#define IF_MINMTU 72
#define IF_MAXMTU 65535
#endif /* _KERNEL */
/*
* The ifaddr structure contains information about one address
* of an interface. They are maintained by the different address families,
* are allocated and attached when an address is set, and are linked
* together so all addresses for an interface can be located.
*
* NOTE: a 'struct ifaddr' is always at the beginning of a larger
* chunk of malloc'ed memory, where we store the three addresses
* (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here.
*/
struct ifaddr {
struct sockaddr *ifa_addr; /* address of interface */
struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */
#define ifa_broadaddr ifa_dstaddr /* broadcast address interface */
struct sockaddr *ifa_netmask; /* used to determine subnet */
struct if_data if_data; /* not all members are meaningful */
struct ifnet *ifa_ifp; /* back-pointer to interface */
TAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */
void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */
(int, struct rtentry *, struct rt_addrinfo *);
u_short ifa_flags; /* mostly rt_flags for cloning */
u_int ifa_refcnt; /* references to this structure */
int ifa_metric; /* cost of going out this interface */
int (*ifa_claim_addr) /* check if an addr goes to this if */
(struct ifaddr *, struct sockaddr *);
struct mtx ifa_mtx;
};
#define IFA_ROUTE RTF_UP /* route installed */
/* for compatibility with other BSDs */
#define ifa_list ifa_link
#define IFA_LOCK_INIT(ifa) \
mtx_init(&(ifa)->ifa_mtx, "ifaddr", NULL, MTX_DEF)
#define IFA_LOCK(ifa) mtx_lock(&(ifa)->ifa_mtx)
#define IFA_UNLOCK(ifa) mtx_unlock(&(ifa)->ifa_mtx)
#define IFA_DESTROY(ifa) mtx_destroy(&(ifa)->ifa_mtx)
/*
* The prefix structure contains information about one prefix
* of an interface. They are maintained by the different address families,
* are allocated and attached when a prefix or an address is set,
* and are linked together so all prefixes for an interface can be located.
*/
struct ifprefix {
struct sockaddr *ifpr_prefix; /* prefix of interface */
struct ifnet *ifpr_ifp; /* back-pointer to interface */
TAILQ_ENTRY(ifprefix) ifpr_list; /* queue macro glue */
u_char ifpr_plen; /* prefix length in bits */
u_char ifpr_type; /* protocol dependent prefix type */
};
/*
* Multicast address structure. This is analogous to the ifaddr
* structure except that it keeps track of multicast addresses.
*/
struct ifmultiaddr {
TAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */
struct sockaddr *ifma_addr; /* address this membership is for */
struct sockaddr *ifma_lladdr; /* link-layer translation, if any */
struct ifnet *ifma_ifp; /* back-pointer to interface */
u_int ifma_refcount; /* reference count */
void *ifma_protospec; /* protocol-specific state, if any */
struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */
};
#ifdef _KERNEL
#define IFAFREE(ifa) \
do { \
IFA_LOCK(ifa); \
KASSERT((ifa)->ifa_refcnt > 0, \
("ifa %p !(ifa_refcnt > 0)", ifa)); \
if (--(ifa)->ifa_refcnt == 0) { \
IFA_DESTROY(ifa); \
free(ifa, M_IFADDR); \
} else \
IFA_UNLOCK(ifa); \
} while (0)
#define IFAREF(ifa) \
do { \
IFA_LOCK(ifa); \
++(ifa)->ifa_refcnt; \
IFA_UNLOCK(ifa); \
} while (0)
extern struct mtx ifnet_lock;
#define IFNET_LOCK_INIT() \
mtx_init(&ifnet_lock, "ifnet", NULL, MTX_DEF | MTX_RECURSE)
#define IFNET_WLOCK() mtx_lock(&ifnet_lock)
#define IFNET_WUNLOCK() mtx_unlock(&ifnet_lock)
#define IFNET_WLOCK_ASSERT() mtx_assert(&ifnet_lock, MA_OWNED)
#define IFNET_RLOCK() IFNET_WLOCK()
#define IFNET_RUNLOCK() IFNET_WUNLOCK()
struct ifindex_entry {
struct ifnet *ife_ifnet;
struct cdev *ife_dev;
};
struct ifnet *ifnet_byindex(u_short idx);
/*
* Given the index, ifaddr_byindex() returns the one and only
* link-level ifaddr for the interface. You are not supposed to use
* it to traverse the list of addresses associated to the interface.
*/
struct ifaddr *ifaddr_byindex(u_short idx);
struct cdev *ifdev_byindex(u_short idx);
extern struct ifnethead ifnet;
extern int ifqmaxlen;
extern struct ifnet *loif; /* first loopback interface */
extern int if_index;
int if_addgroup(struct ifnet *, const char *);
int if_delgroup(struct ifnet *, const char *);
int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **);
int if_allmulti(struct ifnet *, int);
struct ifnet* if_alloc(u_char);
void if_attach(struct ifnet *);
int if_delmulti(struct ifnet *, struct sockaddr *);
void if_delmulti_ifma(struct ifmultiaddr *);
void if_detach(struct ifnet *);
void if_purgeaddrs(struct ifnet *);
void if_purgemaddrs(struct ifnet *);
void if_down(struct ifnet *);
struct ifmultiaddr *
if_findmulti(struct ifnet *, struct sockaddr *);
void if_free(struct ifnet *);
void if_free_type(struct ifnet *, u_char);
void if_initname(struct ifnet *, const char *, int);
void if_link_state_change(struct ifnet *, int);
int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3);
int if_setlladdr(struct ifnet *, const u_char *, int);
void if_up(struct ifnet *);
/*void ifinit(void);*/ /* declared in systm.h for main() */
int ifioctl(struct socket *, u_long, caddr_t, struct thread *);
int ifpromisc(struct ifnet *, int);
struct ifnet *ifunit(const char *);
void ifq_attach(struct ifaltq *, struct ifnet *ifp);
void ifq_detach(struct ifaltq *);
struct ifaddr *ifa_ifwithaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithbroadaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithdstaddr(struct sockaddr *);
struct ifaddr *ifa_ifwithnet(struct sockaddr *);
struct ifaddr *ifa_ifwithroute(int, struct sockaddr *, struct sockaddr *);
struct ifaddr *ifa_ifwithroute_fib(int, struct sockaddr *, struct sockaddr *, u_int);
struct ifaddr *ifaof_ifpforaddr(struct sockaddr *, struct ifnet *);
int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen);
typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp);
typedef void if_com_free_t(void *com, u_char type);
void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f);
void if_deregister_com_alloc(u_char type);
#define IF_LLADDR(ifp) \
LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr))
#ifdef DEVICE_POLLING
enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS };
typedef void poll_handler_t(struct ifnet *ifp, enum poll_cmd cmd, int count);
int ether_poll_register(poll_handler_t *h, struct ifnet *ifp);
int ether_poll_deregister(struct ifnet *ifp);
#endif /* DEVICE_POLLING */
#endif /* _KERNEL */
#endif /* !_NET_IF_VAR_H_ */
Index: projects/arpv2_merge_1/sys/netinet/if_ether.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet/if_ether.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet/if_ether.c (revision 185839)
@@ -1,782 +1,824 @@
/*-
* Copyright (c) 1982, 1986, 1988, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)if_ether.c 8.1 (Berkeley) 6/10/93
*/
/*
* Ethernet address resolution protocol.
* TODO:
* add "inuse/lock" bit (or ref. count) along with valid bit
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_mac.h"
#include "opt_carp.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/queue.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/socket.h>
#include <sys/syslog.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/netisr.h>
#include <net/if_llc.h>
#include <net/ethernet.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <net/if_llatbl.h>
#include <netinet/if_ether.h>
#include <netinet/vinet.h>
#include <net/if_arc.h>
#include <net/iso88025.h>
#ifdef DEV_CARP
#include <netinet/ip_carp.h>
#endif
#include <security/mac/mac_framework.h>
#define SIN(s) ((struct sockaddr_in *)s)
#define SDL(s) ((struct sockaddr_dl *)s)
#define LLTABLE(ifp) ((struct lltable *)(ifp)->if_afdata[AF_INET])
SYSCTL_DECL(_net_link_ether);
SYSCTL_NODE(_net_link_ether, PF_INET, inet, CTLFLAG_RW, 0, "");
/* timer values */
#ifdef VIMAGE_GLOBALS
static int arpt_keep; /* once resolved, good for 20 more minutes */
static int arp_maxtries;
int useloopback; /* use loopback interface for local traffic */
static int arp_proxyall;
#endif
SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, max_age,
CTLFLAG_RW, arpt_keep, 0, "ARP entry lifetime in seconds");
static struct ifqueue arpintrq;
SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, maxtries,
CTLFLAG_RW, arp_maxtries, 0,
"ARP resolution attempts before returning error");
SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, useloopback,
CTLFLAG_RW, useloopback, 0,
"Use the loopback interface for local traffic");
SYSCTL_V_INT(V_NET, vnet_inet, _net_link_ether_inet, OID_AUTO, proxyall,
CTLFLAG_RW, arp_proxyall, 0,
"Enable proxy ARP for all suitable requests");
static void arp_init(void);
void arprequest(struct ifnet *,
struct in_addr *, struct in_addr *, u_char *);
static void arpintr(struct mbuf *);
static void arptimer(void *);
#ifdef INET
static void in_arpinput(struct mbuf *);
#endif
#ifdef AF_INET
void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
/*
* called by in_ifscrub to remove entry from the table when
* the interface goes away
*/
void
arp_ifscrub(struct ifnet *ifp, uint32_t addr)
{
struct sockaddr_in addr4;
- struct llentry *lle;
bzero((void *)&addr4, sizeof(addr4));
addr4.sin_len = sizeof(addr4);
addr4.sin_family = AF_INET;
addr4.sin_addr.s_addr = addr;
IF_AFDATA_LOCK(ifp);
- lle = lla_lookup(LLTABLE(ifp), (LLE_DELETE | LLE_IFADDR),
+ lla_lookup(LLTABLE(ifp), (LLE_DELETE | LLE_IFADDR),
(struct sockaddr *)&addr4);
IF_AFDATA_UNLOCK(ifp);
-#if 0
- if (lle == NULL)
- log(LOG_INFO, "arp_ifscrub: interface address is missing from cache\n");
-#endif
}
#endif
/*
* Timeout routine. Age arp_tab entries periodically.
*/
static void
arptimer(void *arg)
{
struct ifnet *ifp;
struct llentry *lle = (struct llentry *)arg;
if (lle == NULL) {
panic("%s: NULL entry!\n", __func__);
return;
}
ifp = lle->lle_tbl->llt_ifp;
- IF_AFDATA_LOCK(ifp);
if ((lle->la_flags & LLE_DELETED) ||
(time_second >= lle->la_expire)) {
+ printf("deleting entry\n");
+
+ IF_AFDATA_LOCK(ifp);
if (!callout_pending(&lle->la_timer) &&
(callout_active(&lle->la_timer))) {
(void)llentry_free(lle);
}
+ IF_AFDATA_UNLOCK(ifp);
+ } else {
+ /*
+ * Still valid, just drop our reference
+ */
+ LLE_FREE(lle);
}
- IF_AFDATA_UNLOCK(ifp);
}
/*
* Broadcast an ARP request. Caller specifies:
* - arp header source ip address
* - arp header target ip address
* - arp header source ethernet address
*/
void
arprequest(struct ifnet *ifp, struct in_addr *sip, struct in_addr *tip,
u_char *enaddr)
{
struct mbuf *m;
struct arphdr *ah;
struct sockaddr sa;
if (sip == NULL) {
/*
* The caller did not supply a source address, try to find
* a compatible one among those assigned to this interface.
*/
struct ifaddr *ifa;
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (!ifa->ifa_addr ||
ifa->ifa_addr->sa_family != AF_INET)
continue;
sip = &SIN(ifa->ifa_addr)->sin_addr;
if (0 == ((sip->s_addr ^ tip->s_addr) &
SIN(ifa->ifa_netmask)->sin_addr.s_addr) )
break; /* found it. */
}
if (sip == NULL) {
printf("%s: cannot find matching address\n", __func__);
return;
}
}
if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
return;
m->m_len = sizeof(*ah) + 2*sizeof(struct in_addr) +
2*ifp->if_data.ifi_addrlen;
m->m_pkthdr.len = m->m_len;
MH_ALIGN(m, m->m_len);
ah = mtod(m, struct arphdr *);
bzero((caddr_t)ah, m->m_len);
#ifdef MAC
mac_netinet_arp_send(ifp, m);
#endif
ah->ar_pro = htons(ETHERTYPE_IP);
ah->ar_hln = ifp->if_addrlen; /* hardware address length */
ah->ar_pln = sizeof(struct in_addr); /* protocol address length */
ah->ar_op = htons(ARPOP_REQUEST);
bcopy((caddr_t)enaddr, (caddr_t)ar_sha(ah), ah->ar_hln);
bcopy((caddr_t)sip, (caddr_t)ar_spa(ah), ah->ar_pln);
bcopy((caddr_t)tip, (caddr_t)ar_tpa(ah), ah->ar_pln);
sa.sa_family = AF_ARP;
sa.sa_len = 2;
m->m_flags |= M_BCAST;
(*ifp->if_output)(ifp, m, &sa, (struct rtentry *)0);
}
/*
* Resolve an IP address into an ethernet address.
* On input:
* ifp is the interface we use
* rt0 is the route to the final destination (possibly useless)
* m is the mbuf. May be NULL if we don't have a packet.
* dst is the next hop,
* desten is where we want the address.
*
* On success, desten is filled in and the function returns 0;
* If the packet must be held pending resolution, we return EWOULDBLOCK
* On other errors, we return the corresponding error code.
* Note that m_freem() handles NULL.
*/
int
arpresolve(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
struct sockaddr *dst, u_char *desten, struct llentry **lle)
{
INIT_VNET_INET(ifp->if_vnet);
struct llentry *la = 0;
u_int flags;
- int error;
+ int error, renew;
+ log(LOG_DEBUG, "arpesolve called\n");
*lle = NULL;
-
if (m != NULL) {
if (m->m_flags & M_BCAST) {
/* broadcast */
(void)memcpy(desten,
ifp->if_broadcastaddr, ifp->if_addrlen);
return (0);
}
if (m->m_flags & M_MCAST && ifp->if_type != IFT_ARCNET) {
/* multicast */
ETHER_MAP_IP_MULTICAST(&SIN(dst)->sin_addr, desten);
return (0);
}
}
flags = (ifp->if_flags & (IFF_NOARP | IFF_STATICARP)) ? 0 : LLE_CREATE;
/* XXXXX
* Since this function returns an llentry, the
* lock is held by the caller.
*/
+retry:
la = lla_lookup(LLTABLE(ifp), flags, dst);
if (la == NULL) {
if (flags & LLE_CREATE)
log(LOG_DEBUG,
"arpresolve: can't allocate llinfo for %s\n",
inet_ntoa(SIN(dst)->sin_addr));
m_freem(m);
+ log(LOG_DEBUG, "arpesolve: lla_lookup fail\n");
return (EINVAL);
}
- if (la->la_flags & LLE_VALID &&
- (la->la_flags & LLE_STATIC || la->la_expire > time_uptime)) {
+ if ((la->la_flags & LLE_VALID) &&
+ ((la->la_flags & LLE_STATIC) || (la->la_expire > time_uptime))) {
bcopy(&la->ll_addr, desten, ifp->if_addrlen);
/*
* If entry has an expiry time and it is approaching,
* see if we need to send an ARP request within this
* arpt_down interval.
*/
if (!(la->la_flags & LLE_STATIC) &&
time_uptime + la->la_preempt > la->la_expire) {
arprequest(ifp, NULL,
&SIN(dst)->sin_addr, IF_LLADDR(ifp));
la->la_preempt--;
- }
+ }
+ log(LOG_DEBUG, "arpresolve: success\n");
+
*lle = la;
- return (0);
- }
-
+ error = 0;
+ goto done;
+ } else
+ log(LOG_DEBUG,
+ "la=%p valid=%d static=%d expire=%ld uptime=%ld\n", la,
+ !!(la->la_flags & LLE_VALID), !!(la->la_flags & LLE_STATIC),
+ la->la_expire, time_uptime);
+
if (la->la_flags & LLE_STATIC) { /* should not happen! */
log(LOG_DEBUG, "arpresolve: ouch, empty static llinfo for %s\n",
inet_ntoa(SIN(dst)->sin_addr));
m_freem(m);
- return (EINVAL);
+ error = EINVAL;
+ goto done;
}
+
+ renew = (la->la_asked == 0 || la->la_expire != time_uptime);
/*
* There is an arptab entry, but no ethernet address
* response yet. Replace the held mbuf with this
* latest one.
*/
if (m) {
+ if ((flags & LLE_EXCLUSIVE) == 0) {
+ flags |= LLE_EXCLUSIVE;
+ LLE_RUNLOCK(la);
+ goto retry;
+ }
if (la->la_hold)
m_freem(la->la_hold);
la->la_hold = m;
+ if (renew == 0 && (flags & LLE_EXCLUSIVE)) {
+ flags &= ~LLE_EXCLUSIVE;
+ LLE_DOWNGRADE(la);
+ }
+
}
/*
* Return EWOULDBLOCK if we have tried less than arp_maxtries. It
* will be masked by ether_output(). Return EHOSTDOWN/EHOSTUNREACH
* if we have already sent arp_maxtries ARP requests. Retransmit the
* ARP request, but not faster than one request per second.
*/
if (la->la_asked < V_arp_maxtries)
error = EWOULDBLOCK; /* First request. */
else
error =
(rt0->rt_flags & RTF_GATEWAY) ? EHOSTDOWN : EHOSTUNREACH;
- if (la->la_asked == 0 || la->la_expire != time_uptime) {
+ if (renew) {
+ log(LOG_DEBUG,
+ "arpresolve: kicking off new resolve expire=%ld\n",
+ la->la_expire);
+ LLE_ADDREF(la);
la->la_expire = time_uptime;
callout_reset(&la->la_timer, hz, arptimer, la);
la->la_asked++;
-
+ LLE_WUNLOCK(la);
arprequest(ifp, NULL, &SIN(dst)->sin_addr,
IF_LLADDR(ifp));
+ return (error);
}
- return (EWOULDBLOCK);
+done:
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(la);
+ else
+ LLE_RUNLOCK(la);
+ return (error);
}
/*
* Common length and type checks are done here,
* then the protocol-specific routine is called.
*/
static void
arpintr(struct mbuf *m)
{
struct arphdr *ar;
if (m->m_len < sizeof(struct arphdr) &&
((m = m_pullup(m, sizeof(struct arphdr))) == NULL)) {
log(LOG_ERR, "arp: runt packet -- m_pullup failed\n");
return;
}
ar = mtod(m, struct arphdr *);
if (ntohs(ar->ar_hrd) != ARPHRD_ETHER &&
ntohs(ar->ar_hrd) != ARPHRD_IEEE802 &&
ntohs(ar->ar_hrd) != ARPHRD_ARCNET &&
ntohs(ar->ar_hrd) != ARPHRD_IEEE1394) {
log(LOG_ERR, "arp: unknown hardware address format (0x%2D)\n",
(unsigned char *)&ar->ar_hrd, "");
m_freem(m);
return;
}
if (m->m_len < arphdr_len(ar)) {
if ((m = m_pullup(m, arphdr_len(ar))) == NULL) {
log(LOG_ERR, "arp: runt packet\n");
m_freem(m);
return;
}
ar = mtod(m, struct arphdr *);
}
switch (ntohs(ar->ar_pro)) {
#ifdef INET
case ETHERTYPE_IP:
in_arpinput(m);
return;
#endif
}
m_freem(m);
}
#ifdef INET
/*
* ARP for Internet protocols on 10 Mb/s Ethernet.
* Algorithm is that given in RFC 826.
* In addition, a sanity check is performed on the sender
* protocol address, to catch impersonators.
* We no longer handle negotiations for use of trailer protocol:
* Formerly, ARP replied for protocol type ETHERTYPE_TRAIL sent
* along with IP replies if we wanted trailers sent to us,
* and also sent them in response to IP replies.
* This allowed either end to announce the desire to receive
* trailer packets.
* We no longer reply to requests for ETHERTYPE_TRAIL protocol either,
* but formerly didn't normally send requests.
*/
static int log_arp_wrong_iface = 1;
static int log_arp_movements = 1;
static int log_arp_permanent_modify = 1;
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_wrong_iface, CTLFLAG_RW,
&log_arp_wrong_iface, 0,
"log arp packets arriving on the wrong interface");
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_movements, CTLFLAG_RW,
&log_arp_movements, 0,
"log arp replies from MACs different than the one in the cache");
SYSCTL_INT(_net_link_ether_inet, OID_AUTO, log_arp_permanent_modify, CTLFLAG_RW,
&log_arp_permanent_modify, 0,
"log arp replies from MACs different than the one in the permanent arp entry");
static void
in_arpinput(struct mbuf *m)
{
struct arphdr *ah;
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct llentry *la = NULL;
struct rtentry *rt;
struct ifaddr *ifa;
struct in_ifaddr *ia;
struct sockaddr sa;
struct in_addr isaddr, itaddr, myaddr;
u_int8_t *enaddr = NULL;
- int op, flag, lock_owned = 0;
+ int op, flags;
+ struct mbuf *m0;
/*
, rif_len;
*/
int req_len;
int bridged = 0, is_bridge = 0;
#ifdef DEV_CARP
int carp_match = 0;
#endif
struct sockaddr_in sin;
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = 0;
INIT_VNET_INET(ifp->if_vnet);
if (ifp->if_bridge)
bridged = 1;
if (ifp->if_type == IFT_BRIDGE)
is_bridge = 1;
req_len = arphdr_len2(ifp->if_addrlen, sizeof(struct in_addr));
if (m->m_len < req_len && (m = m_pullup(m, req_len)) == NULL) {
log(LOG_ERR, "in_arp: runt packet -- m_pullup failed\n");
return;
}
ah = mtod(m, struct arphdr *);
op = ntohs(ah->ar_op);
(void)memcpy(&isaddr, ar_spa(ah), sizeof (isaddr));
(void)memcpy(&itaddr, ar_tpa(ah), sizeof (itaddr));
/*
* For a bridge, we want to check the address irrespective
* of the receive interface. (This will change slightly
* when we have clusters of interfaces).
* If the interface does not match, but the recieving interface
* is part of carp, we call carp_iamatch to see if this is a
* request for the virtual host ip.
* XXX: This is really ugly!
*/
LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
(ia->ia_ifp == ifp)) &&
itaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
goto match;
#ifdef DEV_CARP
if (ifp->if_carp != NULL &&
carp_iamatch(ifp->if_carp, ia, &isaddr, &enaddr) &&
itaddr.s_addr == ia->ia_addr.sin_addr.s_addr) {
carp_match = 1;
goto match;
}
#endif
}
LIST_FOREACH(ia, INADDR_HASH(isaddr.s_addr), ia_hash)
if (((bridged && ia->ia_ifp->if_bridge != NULL) ||
(ia->ia_ifp == ifp)) &&
isaddr.s_addr == ia->ia_addr.sin_addr.s_addr)
goto match;
#define BDG_MEMBER_MATCHES_ARP(addr, ifp, ia) \
(ia->ia_ifp->if_bridge == ifp->if_softc && \
!bcmp(IF_LLADDR(ia->ia_ifp), IF_LLADDR(ifp), ifp->if_addrlen) && \
addr == ia->ia_addr.sin_addr.s_addr)
/*
* Check the case when bridge shares its MAC address with
* some of its children, so packets are claimed by bridge
* itself (bridge_input() does it first), but they are really
* meant to be destined to the bridge member.
*/
if (is_bridge) {
LIST_FOREACH(ia, INADDR_HASH(itaddr.s_addr), ia_hash) {
if (BDG_MEMBER_MATCHES_ARP(itaddr.s_addr, ifp, ia)) {
ifp = ia->ia_ifp;
goto match;
}
}
}
#undef BDG_MEMBER_MATCHES_ARP
/*
* No match, use the first inet address on the receive interface
* as a dummy address for the rest of the function.
*/
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (ifa->ifa_addr->sa_family == AF_INET) {
ia = ifatoia(ifa);
goto match;
}
/*
* If bridging, fall back to using any inet address.
*/
if (!bridged || (ia = TAILQ_FIRST(&V_in_ifaddrhead)) == NULL)
goto drop;
match:
+ log(LOG_DEBUG,"in_arpinput: match\n");
if (!enaddr)
enaddr = (u_int8_t *)IF_LLADDR(ifp);
myaddr = ia->ia_addr.sin_addr;
if (!bcmp(ar_sha(ah), enaddr, ifp->if_addrlen))
goto drop; /* it's from me, ignore it. */
if (!bcmp(ar_sha(ah), ifp->if_broadcastaddr, ifp->if_addrlen)) {
log(LOG_ERR,
"arp: link address is broadcast for IP address %s!\n",
inet_ntoa(isaddr));
goto drop;
}
/*
* Warn if another host is using the same IP address, but only if the
* IP address isn't 0.0.0.0, which is used for DHCP only, in which
* case we suppress the warning to avoid false positive complaints of
* potential misconfiguration.
*/
if (!bridged && isaddr.s_addr == myaddr.s_addr && myaddr.s_addr != 0) {
log(LOG_ERR,
"arp: %*D is using my IP address %s on %s!\n",
ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
inet_ntoa(isaddr), ifp->if_xname);
itaddr = myaddr;
goto reply;
}
if (ifp->if_flags & IFF_STATICARP)
goto reply;
bzero(&sin, sizeof(sin));
sin.sin_len = sizeof(struct sockaddr_in);
sin.sin_family = AF_INET;
sin.sin_addr = isaddr;
- flag = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0;
+ flags = (itaddr.s_addr == myaddr.s_addr) ? LLE_CREATE : 0;
+ flags |= LLE_EXCLUSIVE;
IF_AFDATA_LOCK(ifp);
- lock_owned = 1;
- la = lla_lookup(LLTABLE(ifp), flag, (struct sockaddr *)&sin);
+ la = lla_lookup(LLTABLE(ifp), flags, (struct sockaddr *)&sin);
+ IF_AFDATA_UNLOCK(ifp);
if (la != NULL) {
+ log(LOG_DEBUG, "in_arpinput: la found\n");
/* the following is not an error when doing bridging */
if (!bridged && la->lle_tbl->llt_ifp != ifp
#ifdef DEV_CARP
&& (ifp->if_type != IFT_CARP || !carp_match)
#endif
- ) {
+ ) {
if (log_arp_wrong_iface)
log(LOG_ERR, "arp: %s is on %s "
"but got reply from %*D on %s\n",
inet_ntoa(isaddr),
la->lle_tbl->llt_ifp->if_xname,
ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
ifp->if_xname);
goto reply;
}
-
- if (la->la_flags & LLE_VALID &&
+ if ((la->la_flags & LLE_VALID) &&
bcmp(ar_sha(ah), &la->ll_addr, ifp->if_addrlen)) {
+ log(LOG_DEBUG, "LLE_VALID and match\n");
if (la->la_flags & LLE_STATIC) {
log(LOG_ERR,
"arp: %*D attempts to modify permanent "
"entry for %s on %s\n",
ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
inet_ntoa(isaddr), ifp->if_xname);
goto reply;
}
if (log_arp_movements) {
log(LOG_INFO, "arp: %s moved from %*D "
"to %*D on %s\n",
inet_ntoa(isaddr),
ifp->if_addrlen,
(u_char *)&la->ll_addr, ":",
ifp->if_addrlen, (u_char *)ar_sha(ah), ":",
ifp->if_xname);
}
}
+
if (ifp->if_addrlen != ah->ar_hln) {
log(LOG_WARNING,
"arp from %*D: addr len: new %d, i/f %d (ignored)",
ifp->if_addrlen, (u_char *) ar_sha(ah), ":",
ah->ar_hln, ifp->if_addrlen);
goto reply;
}
(void)memcpy(&la->ll_addr, ar_sha(ah), ifp->if_addrlen);
la->la_flags |= LLE_VALID;
+ log(LOG_DEBUG, "in_arpinput: la=%p valid set\n", la);
if (!(la->la_flags & LLE_STATIC)) {
la->la_expire = time_uptime + arpt_keep;
callout_reset(&la->la_timer, hz * V_arpt_keep,
arptimer, la);
}
la->la_asked = 0;
la->la_preempt = V_arp_maxtries;
if (la->la_hold) {
- (*ifp->if_output)(ifp, la->la_hold, L3_ADDR(la), NULL);
+ m0 = la->la_hold;
la->la_hold = 0;
+ memcpy(&sa, L3_ADDR(la), sizeof(sa));
+ LLE_WUNLOCK(la);
+
+ (*ifp->if_output)(ifp, m0, &sa, NULL);
+ return;
}
}
reply:
if (op != ARPOP_REQUEST)
goto drop;
if (itaddr.s_addr == myaddr.s_addr) {
/* Shortcut.. the receiving interface is the target. */
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
} else {
if (la == NULL) {
if (!V_arp_proxyall)
goto drop;
sin.sin_addr = itaddr;
-
/* XXX MRT use table 0 for arp reply */
rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
if (!rt)
goto drop;
/*
* Don't send proxies for nodes on the same interface
* as this one came out of, or we'll get into a fight
* over who claims what Ether address.
*/
if (rt->rt_ifp == ifp) {
RTFREE_LOCKED(rt);
goto drop;
}
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
(void)memcpy(ar_sha(ah), enaddr, ah->ar_hln);
RTFREE_LOCKED(rt);
/*
* Also check that the node which sent the ARP packet
* is on the the interface we expect it to be on. This
* avoids ARP chaos if an interface is connected to the
* wrong network.
*/
sin.sin_addr = isaddr;
/* XXX MRT use table 0 for arp checks */
rt = in_rtalloc1((struct sockaddr *)&sin, 0, 0UL, 0);
if (!rt)
goto drop;
if (rt->rt_ifp != ifp) {
log(LOG_INFO, "arp_proxy: ignoring request"
" from %s via %s, expecting %s\n",
inet_ntoa(isaddr), ifp->if_xname,
rt->rt_ifp->if_xname);
RTFREE_LOCKED(rt);
goto drop;
}
RTFREE_LOCKED(rt);
#ifdef DEBUG_PROXY
printf("arp: proxying for %s\n",
inet_ntoa(itaddr));
#endif
} else {
/*
* Return proxied ARP replies only on the interface
* or bridge cluster where this network resides.
* Otherwise we may conflict with the host we are
* proxying for.
*/
if (la->lle_tbl->llt_ifp != ifp &&
(la->lle_tbl->llt_ifp->if_bridge != ifp->if_bridge ||
ifp->if_bridge == NULL)) {
goto drop;
}
(void)memcpy(ar_tha(ah), ar_sha(ah), ah->ar_hln);
(void)memcpy(ar_sha(ah), &la->ll_addr, ah->ar_hln);
}
}
- if (lock_owned != 0) {
- IF_AFDATA_UNLOCK(ifp);
- lock_owned = 0;
- }
-
+ if (la)
+ LLE_WUNLOCK(la);
if (itaddr.s_addr == myaddr.s_addr &&
IN_LINKLOCAL(ntohl(itaddr.s_addr))) {
/* RFC 3927 link-local IPv4; always reply by broadcast. */
#ifdef DEBUG_LINKLOCAL
printf("arp: sending reply for link-local addr %s\n",
inet_ntoa(itaddr));
#endif
m->m_flags |= M_BCAST;
m->m_flags &= ~M_MCAST;
} else {
/* default behaviour; never reply by broadcast. */
m->m_flags &= ~(M_BCAST|M_MCAST);
}
(void)memcpy(ar_tpa(ah), ar_spa(ah), ah->ar_pln);
(void)memcpy(ar_spa(ah), &itaddr, ah->ar_pln);
ah->ar_op = htons(ARPOP_REPLY);
ah->ar_pro = htons(ETHERTYPE_IP); /* let's be sure! */
m->m_len = sizeof(*ah) + (2 * ah->ar_pln) + (2 * ah->ar_hln);
m->m_pkthdr.len = m->m_len;
sa.sa_family = AF_ARP;
sa.sa_len = 2;
(*ifp->if_output)(ifp, m, &sa, (struct rtentry *)0);
return;
drop:
- if (lock_owned != 0)
- IF_AFDATA_UNLOCK(ifp);
+ if (la)
+ LLE_WUNLOCK(la);
m_freem(m);
}
#endif
void
arp_ifinit(struct ifnet *ifp, struct ifaddr *ifa)
{
struct llentry *lle;
if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
arprequest(ifp, &IA_SIN(ifa)->sin_addr,
&IA_SIN(ifa)->sin_addr, IF_LLADDR(ifp));
/*
* interface address is considered static entry
* because the output of the arp utility shows
* that L2 entry as permanent
*/
IF_AFDATA_LOCK(ifp);
lle = lla_lookup(LLTABLE(ifp), (LLE_CREATE | LLE_IFADDR | LLE_STATIC),
(struct sockaddr *)IA_SIN(ifa));
IF_AFDATA_UNLOCK(ifp);
if (lle == NULL)
log(LOG_INFO, "arp_ifinit: cannot create arp "
"entry for interface address\n");
+ LLE_RUNLOCK(lle);
ifa->ifa_rtrequest = NULL;
}
void
arp_ifinit2(struct ifnet *ifp, struct ifaddr *ifa, u_char *enaddr)
{
if (ntohl(IA_SIN(ifa)->sin_addr.s_addr) != INADDR_ANY)
arprequest(ifp, &IA_SIN(ifa)->sin_addr,
&IA_SIN(ifa)->sin_addr, enaddr);
ifa->ifa_rtrequest = NULL;
}
static void
arp_init(void)
{
INIT_VNET_INET(curvnet);
V_arpt_keep = (20*60); /* once resolved, good for 20 more minutes */
V_arp_maxtries = 5;
V_useloopback = 1; /* use loopback interface for local traffic */
V_arp_proxyall = 0;
arpintrq.ifq_maxlen = 50;
mtx_init(&arpintrq.ifq_mtx, "arp_inq", NULL, MTX_DEF);
netisr_register(NETISR_ARP, arpintr, &arpintrq, 0);
}
SYSINIT(arp, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY, arp_init, 0);
Index: projects/arpv2_merge_1/sys/netinet/in.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet/in.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet/in.c (revision 185839)
@@ -1,1234 +1,1255 @@
/*-
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
* Copyright (C) 2001 WIDE Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in.c 8.4 (Berkeley) 1/9/95
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_carp.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sockio.h>
#include <sys/malloc.h>
#include <sys/priv.h>
#include <sys/socket.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/if_llatbl.h>
#include <net/if_types.h>
#include <net/route.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#include <netinet/vinet.h>
static int in_mask2len(struct in_addr *);
static void in_len2mask(struct in_addr *, int);
static int in_lifaddr_ioctl(struct socket *, u_long, caddr_t,
struct ifnet *, struct thread *);
static int in_addprefix(struct in_ifaddr *, int);
static int in_scrubprefix(struct in_ifaddr *);
static void in_socktrim(struct sockaddr_in *);
static int in_ifinit(struct ifnet *,
struct in_ifaddr *, struct sockaddr_in *, int);
static void in_purgemaddrs(struct ifnet *);
#ifdef VIMAGE_GLOBALS
static int subnetsarelocal;
static int sameprefixcarponly;
extern struct inpcbinfo ripcbinfo;
#endif
SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, subnets_are_local,
CTLFLAG_RW, subnetsarelocal, 0,
"Treat all subnets as directly connected");
SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_ip, OID_AUTO, same_prefix_carp_only,
CTLFLAG_RW, sameprefixcarponly, 0,
"Refuse to create same prefixes on different interfaces");
/*
* Return 1 if an internet address is for a ``local'' host
* (one to which we have a connection). If subnetsarelocal
* is true, this includes other subnets of the local net.
* Otherwise, it includes only the directly-connected (sub)nets.
*/
int
in_localaddr(struct in_addr in)
{
INIT_VNET_INET(curvnet);
register u_long i = ntohl(in.s_addr);
register struct in_ifaddr *ia;
if (V_subnetsarelocal) {
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link)
if ((i & ia->ia_netmask) == ia->ia_net)
return (1);
} else {
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link)
if ((i & ia->ia_subnetmask) == ia->ia_subnet)
return (1);
}
return (0);
}
/*
* Return 1 if an internet address is for the local host and configured
* on one of its interfaces.
*/
int
in_localip(struct in_addr in)
{
INIT_VNET_INET(curvnet);
struct in_ifaddr *ia;
LIST_FOREACH(ia, INADDR_HASH(in.s_addr), ia_hash) {
if (IA_SIN(ia)->sin_addr.s_addr == in.s_addr)
return (1);
}
return (0);
}
/*
* Determine whether an IP address is in a reserved set of addresses
* that may not be forwarded, or whether datagrams to that destination
* may be forwarded.
*/
int
in_canforward(struct in_addr in)
{
register u_long i = ntohl(in.s_addr);
register u_long net;
if (IN_EXPERIMENTAL(i) || IN_MULTICAST(i) || IN_LINKLOCAL(i))
return (0);
if (IN_CLASSA(i)) {
net = i & IN_CLASSA_NET;
if (net == 0 || net == (IN_LOOPBACKNET << IN_CLASSA_NSHIFT))
return (0);
}
return (1);
}
/*
* Trim a mask in a sockaddr
*/
static void
in_socktrim(struct sockaddr_in *ap)
{
register char *cplim = (char *) &ap->sin_addr;
register char *cp = (char *) (&ap->sin_addr + 1);
ap->sin_len = 0;
while (--cp >= cplim)
if (*cp) {
(ap)->sin_len = cp - (char *) (ap) + 1;
break;
}
}
static int
in_mask2len(mask)
struct in_addr *mask;
{
int x, y;
u_char *p;
p = (u_char *)mask;
for (x = 0; x < sizeof(*mask); x++) {
if (p[x] != 0xff)
break;
}
y = 0;
if (x < sizeof(*mask)) {
for (y = 0; y < 8; y++) {
if ((p[x] & (0x80 >> y)) == 0)
break;
}
}
return (x * 8 + y);
}
static void
in_len2mask(struct in_addr *mask, int len)
{
int i;
u_char *p;
p = (u_char *)mask;
bzero(mask, sizeof(*mask));
for (i = 0; i < len / 8; i++)
p[i] = 0xff;
if (len % 8)
p[i] = (0xff00 >> (len % 8)) & 0xff;
}
/*
* Generic internet control operations (ioctl's).
* Ifp is 0 if not an interface-specific ioctl.
*/
/* ARGSUSED */
int
in_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp,
struct thread *td)
{
INIT_VNET_INET(curvnet); /* both so and ifp can be NULL here! */
register struct ifreq *ifr = (struct ifreq *)data;
register struct in_ifaddr *ia, *iap;
register struct ifaddr *ifa;
struct in_addr allhosts_addr;
struct in_addr dst;
struct in_ifaddr *oia;
struct in_aliasreq *ifra = (struct in_aliasreq *)data;
struct sockaddr_in oldaddr;
int error, hostIsNew, iaIsNew, maskIsNew, s;
int iaIsFirst;
ia = NULL;
iaIsFirst = 0;
iaIsNew = 0;
allhosts_addr.s_addr = htonl(INADDR_ALLHOSTS_GROUP);
switch (cmd) {
case SIOCALIFADDR:
if (td != NULL) {
error = priv_check(td, PRIV_NET_ADDIFADDR);
if (error)
return (error);
}
if (ifp == NULL)
return (EINVAL);
return in_lifaddr_ioctl(so, cmd, data, ifp, td);
case SIOCDLIFADDR:
if (td != NULL) {
error = priv_check(td, PRIV_NET_DELIFADDR);
if (error)
return (error);
}
if (ifp == NULL)
return (EINVAL);
return in_lifaddr_ioctl(so, cmd, data, ifp, td);
case SIOCGLIFADDR:
if (ifp == NULL)
return (EINVAL);
return in_lifaddr_ioctl(so, cmd, data, ifp, td);
}
/*
* Find address for this interface, if it exists.
*
* If an alias address was specified, find that one instead of
* the first one on the interface, if possible.
*/
if (ifp != NULL) {
dst = ((struct sockaddr_in *)&ifr->ifr_addr)->sin_addr;
LIST_FOREACH(iap, INADDR_HASH(dst.s_addr), ia_hash)
if (iap->ia_ifp == ifp &&
iap->ia_addr.sin_addr.s_addr == dst.s_addr) {
ia = iap;
break;
}
if (ia == NULL)
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
iap = ifatoia(ifa);
if (iap->ia_addr.sin_family == AF_INET) {
ia = iap;
break;
}
}
if (ia == NULL)
iaIsFirst = 1;
}
switch (cmd) {
case SIOCAIFADDR:
case SIOCDIFADDR:
if (ifp == NULL)
return (EADDRNOTAVAIL);
if (ifra->ifra_addr.sin_family == AF_INET) {
for (oia = ia; ia; ia = TAILQ_NEXT(ia, ia_link)) {
if (ia->ia_ifp == ifp &&
ia->ia_addr.sin_addr.s_addr ==
ifra->ifra_addr.sin_addr.s_addr)
break;
}
if ((ifp->if_flags & IFF_POINTOPOINT)
&& (cmd == SIOCAIFADDR)
&& (ifra->ifra_dstaddr.sin_addr.s_addr
== INADDR_ANY)) {
return (EDESTADDRREQ);
}
}
if (cmd == SIOCDIFADDR && ia == NULL)
return (EADDRNOTAVAIL);
/* FALLTHROUGH */
case SIOCSIFADDR:
case SIOCSIFNETMASK:
case SIOCSIFDSTADDR:
if (td != NULL) {
error = priv_check(td, (cmd == SIOCDIFADDR) ?
PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR);
if (error)
return (error);
}
if (ifp == NULL)
return (EADDRNOTAVAIL);
if (ia == NULL) {
ia = (struct in_ifaddr *)
malloc(sizeof *ia, M_IFADDR, M_WAITOK | M_ZERO);
if (ia == NULL)
return (ENOBUFS);
/*
* Protect from ipintr() traversing address list
* while we're modifying it.
*/
s = splnet();
ifa = &ia->ia_ifa;
IFA_LOCK_INIT(ifa);
ifa->ifa_addr = (struct sockaddr *)&ia->ia_addr;
ifa->ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr;
ifa->ifa_netmask = (struct sockaddr *)&ia->ia_sockmask;
ifa->ifa_refcnt = 1;
TAILQ_INSERT_TAIL(&ifp->if_addrhead, ifa, ifa_link);
ia->ia_sockmask.sin_len = 8;
ia->ia_sockmask.sin_family = AF_INET;
if (ifp->if_flags & IFF_BROADCAST) {
ia->ia_broadaddr.sin_len = sizeof(ia->ia_addr);
ia->ia_broadaddr.sin_family = AF_INET;
}
ia->ia_ifp = ifp;
TAILQ_INSERT_TAIL(&V_in_ifaddrhead, ia, ia_link);
splx(s);
iaIsNew = 1;
}
break;
case SIOCSIFBRDADDR:
if (td != NULL) {
error = priv_check(td, PRIV_NET_ADDIFADDR);
if (error)
return (error);
}
/* FALLTHROUGH */
case SIOCGIFADDR:
case SIOCGIFNETMASK:
case SIOCGIFDSTADDR:
case SIOCGIFBRDADDR:
if (ia == NULL)
return (EADDRNOTAVAIL);
break;
}
switch (cmd) {
case SIOCGIFADDR:
*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_addr;
return (0);
case SIOCGIFBRDADDR:
if ((ifp->if_flags & IFF_BROADCAST) == 0)
return (EINVAL);
*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_broadaddr;
return (0);
case SIOCGIFDSTADDR:
if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
return (EINVAL);
*((struct sockaddr_in *)&ifr->ifr_dstaddr) = ia->ia_dstaddr;
return (0);
case SIOCGIFNETMASK:
*((struct sockaddr_in *)&ifr->ifr_addr) = ia->ia_sockmask;
return (0);
case SIOCSIFDSTADDR:
if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
return (EINVAL);
oldaddr = ia->ia_dstaddr;
ia->ia_dstaddr = *(struct sockaddr_in *)&ifr->ifr_dstaddr;
if (ifp->if_ioctl != NULL) {
IFF_LOCKGIANT(ifp);
error = (*ifp->if_ioctl)(ifp, SIOCSIFDSTADDR,
(caddr_t)ia);
IFF_UNLOCKGIANT(ifp);
if (error) {
ia->ia_dstaddr = oldaddr;
return (error);
}
}
if (ia->ia_flags & IFA_ROUTE) {
ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&oldaddr;
rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST);
ia->ia_ifa.ifa_dstaddr =
(struct sockaddr *)&ia->ia_dstaddr;
rtinit(&(ia->ia_ifa), (int)RTM_ADD, RTF_HOST|RTF_UP);
}
return (0);
case SIOCSIFBRDADDR:
if ((ifp->if_flags & IFF_BROADCAST) == 0)
return (EINVAL);
ia->ia_broadaddr = *(struct sockaddr_in *)&ifr->ifr_broadaddr;
return (0);
case SIOCSIFADDR:
error = in_ifinit(ifp, ia,
(struct sockaddr_in *) &ifr->ifr_addr, 1);
if (error != 0 && iaIsNew)
break;
if (error == 0) {
if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST) != 0)
in_addmulti(&allhosts_addr, ifp);
EVENTHANDLER_INVOKE(ifaddr_event, ifp);
}
return (0);
case SIOCSIFNETMASK:
ia->ia_sockmask.sin_addr = ifra->ifra_addr.sin_addr;
ia->ia_subnetmask = ntohl(ia->ia_sockmask.sin_addr.s_addr);
return (0);
case SIOCAIFADDR:
maskIsNew = 0;
hostIsNew = 1;
error = 0;
if (ia->ia_addr.sin_family == AF_INET) {
if (ifra->ifra_addr.sin_len == 0) {
ifra->ifra_addr = ia->ia_addr;
hostIsNew = 0;
} else if (ifra->ifra_addr.sin_addr.s_addr ==
ia->ia_addr.sin_addr.s_addr)
hostIsNew = 0;
}
if (ifra->ifra_mask.sin_len) {
in_ifscrub(ifp, ia);
ia->ia_sockmask = ifra->ifra_mask;
ia->ia_sockmask.sin_family = AF_INET;
ia->ia_subnetmask =
ntohl(ia->ia_sockmask.sin_addr.s_addr);
maskIsNew = 1;
}
if ((ifp->if_flags & IFF_POINTOPOINT) &&
(ifra->ifra_dstaddr.sin_family == AF_INET)) {
in_ifscrub(ifp, ia);
ia->ia_dstaddr = ifra->ifra_dstaddr;
maskIsNew = 1; /* We lie; but the effect's the same */
}
if (ifra->ifra_addr.sin_family == AF_INET &&
(hostIsNew || maskIsNew))
error = in_ifinit(ifp, ia, &ifra->ifra_addr, 0);
if (error != 0 && iaIsNew)
break;
if ((ifp->if_flags & IFF_BROADCAST) &&
(ifra->ifra_broadaddr.sin_family == AF_INET))
ia->ia_broadaddr = ifra->ifra_broadaddr;
if (error == 0) {
if (iaIsFirst && (ifp->if_flags & IFF_MULTICAST) != 0)
in_addmulti(&allhosts_addr, ifp);
EVENTHANDLER_INVOKE(ifaddr_event, ifp);
}
return (error);
case SIOCDIFADDR:
/*
* in_ifscrub kills the interface route.
*/
in_ifscrub(ifp, ia);
/*
* in_ifadown gets rid of all the rest of
* the routes. This is not quite the right
* thing to do, but at least if we are running
* a routing process they will come back.
*/
in_ifadown(&ia->ia_ifa, 1);
EVENTHANDLER_INVOKE(ifaddr_event, ifp);
error = 0;
break;
default:
if (ifp == NULL || ifp->if_ioctl == NULL)
return (EOPNOTSUPP);
IFF_LOCKGIANT(ifp);
error = (*ifp->if_ioctl)(ifp, cmd, data);
IFF_UNLOCKGIANT(ifp);
return (error);
}
/*
* Protect from ipintr() traversing address list while we're modifying
* it.
*/
s = splnet();
TAILQ_REMOVE(&ifp->if_addrhead, &ia->ia_ifa, ifa_link);
TAILQ_REMOVE(&V_in_ifaddrhead, ia, ia_link);
if (ia->ia_addr.sin_family == AF_INET) {
LIST_REMOVE(ia, ia_hash);
/*
* If this is the last IPv4 address configured on this
* interface, leave the all-hosts group.
* XXX: This is quite ugly because of locking and structure.
*/
oia = NULL;
IFP_TO_IA(ifp, oia);
if (oia == NULL) {
struct in_multi *inm;
IFF_LOCKGIANT(ifp);
IN_MULTI_LOCK();
IN_LOOKUP_MULTI(allhosts_addr, ifp, inm);
if (inm != NULL)
in_delmulti_locked(inm);
IN_MULTI_UNLOCK();
IFF_UNLOCKGIANT(ifp);
}
}
IFAFREE(&ia->ia_ifa);
splx(s);
return (error);
}
/*
* SIOC[GAD]LIFADDR.
* SIOCGLIFADDR: get first address. (?!?)
* SIOCGLIFADDR with IFLR_PREFIX:
* get first address that matches the specified prefix.
* SIOCALIFADDR: add the specified address.
* SIOCALIFADDR with IFLR_PREFIX:
* EINVAL since we can't deduce hostid part of the address.
* SIOCDLIFADDR: delete the specified address.
* SIOCDLIFADDR with IFLR_PREFIX:
* delete the first address that matches the specified prefix.
* return values:
* EINVAL on invalid parameters
* EADDRNOTAVAIL on prefix match failed/specified address not found
* other values may be returned from in_ioctl()
*/
static int
in_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
struct ifnet *ifp, struct thread *td)
{
struct if_laddrreq *iflr = (struct if_laddrreq *)data;
struct ifaddr *ifa;
/* sanity checks */
if (data == NULL || ifp == NULL) {
panic("invalid argument to in_lifaddr_ioctl");
/*NOTRECHED*/
}
switch (cmd) {
case SIOCGLIFADDR:
/* address must be specified on GET with IFLR_PREFIX */
if ((iflr->flags & IFLR_PREFIX) == 0)
break;
/*FALLTHROUGH*/
case SIOCALIFADDR:
case SIOCDLIFADDR:
/* address must be specified on ADD and DELETE */
if (iflr->addr.ss_family != AF_INET)
return (EINVAL);
if (iflr->addr.ss_len != sizeof(struct sockaddr_in))
return (EINVAL);
/* XXX need improvement */
if (iflr->dstaddr.ss_family
&& iflr->dstaddr.ss_family != AF_INET)
return (EINVAL);
if (iflr->dstaddr.ss_family
&& iflr->dstaddr.ss_len != sizeof(struct sockaddr_in))
return (EINVAL);
break;
default: /*shouldn't happen*/
return (EOPNOTSUPP);
}
if (sizeof(struct in_addr) * 8 < iflr->prefixlen)
return (EINVAL);
switch (cmd) {
case SIOCALIFADDR:
{
struct in_aliasreq ifra;
if (iflr->flags & IFLR_PREFIX)
return (EINVAL);
/* copy args to in_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
bzero(&ifra, sizeof(ifra));
bcopy(iflr->iflr_name, ifra.ifra_name,
sizeof(ifra.ifra_name));
bcopy(&iflr->addr, &ifra.ifra_addr, iflr->addr.ss_len);
if (iflr->dstaddr.ss_family) { /*XXX*/
bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
iflr->dstaddr.ss_len);
}
ifra.ifra_mask.sin_family = AF_INET;
ifra.ifra_mask.sin_len = sizeof(struct sockaddr_in);
in_len2mask(&ifra.ifra_mask.sin_addr, iflr->prefixlen);
return (in_control(so, SIOCAIFADDR, (caddr_t)&ifra, ifp, td));
}
case SIOCGLIFADDR:
case SIOCDLIFADDR:
{
struct in_ifaddr *ia;
struct in_addr mask, candidate, match;
struct sockaddr_in *sin;
bzero(&mask, sizeof(mask));
bzero(&match, sizeof(match));
if (iflr->flags & IFLR_PREFIX) {
/* lookup a prefix rather than address. */
in_len2mask(&mask, iflr->prefixlen);
sin = (struct sockaddr_in *)&iflr->addr;
match.s_addr = sin->sin_addr.s_addr;
match.s_addr &= mask.s_addr;
/* if you set extra bits, that's wrong */
if (match.s_addr != sin->sin_addr.s_addr)
return (EINVAL);
} else {
/* on getting an address, take the 1st match */
/* on deleting an address, do exact match */
if (cmd != SIOCGLIFADDR) {
in_len2mask(&mask, 32);
sin = (struct sockaddr_in *)&iflr->addr;
match.s_addr = sin->sin_addr.s_addr;
}
}
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
if (match.s_addr == 0)
break;
candidate.s_addr = ((struct sockaddr_in *)&ifa->ifa_addr)->sin_addr.s_addr;
candidate.s_addr &= mask.s_addr;
if (candidate.s_addr == match.s_addr)
break;
}
if (ifa == NULL)
return (EADDRNOTAVAIL);
ia = (struct in_ifaddr *)ifa;
if (cmd == SIOCGLIFADDR) {
/* fill in the if_laddrreq structure */
bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin_len);
if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
ia->ia_dstaddr.sin_len);
} else
bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
iflr->prefixlen =
in_mask2len(&ia->ia_sockmask.sin_addr);
iflr->flags = 0; /*XXX*/
return (0);
} else {
struct in_aliasreq ifra;
/* fill in_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
bzero(&ifra, sizeof(ifra));
bcopy(iflr->iflr_name, ifra.ifra_name,
sizeof(ifra.ifra_name));
bcopy(&ia->ia_addr, &ifra.ifra_addr,
ia->ia_addr.sin_len);
if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
ia->ia_dstaddr.sin_len);
}
bcopy(&ia->ia_sockmask, &ifra.ifra_dstaddr,
ia->ia_sockmask.sin_len);
return (in_control(so, SIOCDIFADDR, (caddr_t)&ifra,
ifp, td));
}
}
}
return (EOPNOTSUPP); /*just for safety*/
}
/*
* Delete any existing route for an interface.
*/
void
in_ifscrub(struct ifnet *ifp, struct in_ifaddr *ia)
{
in_scrubprefix(ia);
}
/*
* Initialize an interface's internet address
* and routing table entry.
*/
static int
in_ifinit(struct ifnet *ifp, struct in_ifaddr *ia, struct sockaddr_in *sin,
int scrub)
{
INIT_VNET_INET(ifp->if_vnet);
register u_long i = ntohl(sin->sin_addr.s_addr);
struct sockaddr_in oldaddr;
int s = splimp(), flags = RTF_UP, error = 0;
oldaddr = ia->ia_addr;
if (oldaddr.sin_family == AF_INET)
LIST_REMOVE(ia, ia_hash);
ia->ia_addr = *sin;
if (ia->ia_addr.sin_family == AF_INET)
LIST_INSERT_HEAD(INADDR_HASH(ia->ia_addr.sin_addr.s_addr),
ia, ia_hash);
/*
* Give the interface a chance to initialize
* if this is its first address,
* and to validate the address if necessary.
*/
if (ifp->if_ioctl != NULL) {
IFF_LOCKGIANT(ifp);
error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
IFF_UNLOCKGIANT(ifp);
if (error) {
splx(s);
/* LIST_REMOVE(ia, ia_hash) is done in in_control */
ia->ia_addr = oldaddr;
if (ia->ia_addr.sin_family == AF_INET)
LIST_INSERT_HEAD(INADDR_HASH(
ia->ia_addr.sin_addr.s_addr), ia, ia_hash);
else
/*
* If oldaddr family is not AF_INET (e.g.
* interface has been just created) in_control
* does not call LIST_REMOVE, and we end up
* with bogus ia entries in hash
*/
LIST_REMOVE(ia, ia_hash);
return (error);
}
}
splx(s);
if (scrub) {
ia->ia_ifa.ifa_addr = (struct sockaddr *)&oldaddr;
in_ifscrub(ifp, ia);
ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
}
if (IN_CLASSA(i))
ia->ia_netmask = IN_CLASSA_NET;
else if (IN_CLASSB(i))
ia->ia_netmask = IN_CLASSB_NET;
else
ia->ia_netmask = IN_CLASSC_NET;
/*
* The subnet mask usually includes at least the standard network part,
* but may may be smaller in the case of supernetting.
* If it is set, we believe it.
*/
if (ia->ia_subnetmask == 0) {
ia->ia_subnetmask = ia->ia_netmask;
ia->ia_sockmask.sin_addr.s_addr = htonl(ia->ia_subnetmask);
} else
ia->ia_netmask &= ia->ia_subnetmask;
ia->ia_net = i & ia->ia_netmask;
ia->ia_subnet = i & ia->ia_subnetmask;
in_socktrim(&ia->ia_sockmask);
#ifdef DEV_CARP
/*
* XXX: carp(4) does not have interface route
*/
if (ifp->if_type == IFT_CARP)
return (0);
#endif
/*
* Add route for the network.
*/
ia->ia_ifa.ifa_metric = ifp->if_metric;
if (ifp->if_flags & IFF_BROADCAST) {
ia->ia_broadaddr.sin_addr.s_addr =
htonl(ia->ia_subnet | ~ia->ia_subnetmask);
ia->ia_netbroadcast.s_addr =
htonl(ia->ia_net | ~ ia->ia_netmask);
} else if (ifp->if_flags & IFF_LOOPBACK) {
ia->ia_dstaddr = ia->ia_addr;
flags |= RTF_HOST;
} else if (ifp->if_flags & IFF_POINTOPOINT) {
if (ia->ia_dstaddr.sin_family != AF_INET)
return (0);
flags |= RTF_HOST;
}
if ((error = in_addprefix(ia, flags)) != 0)
return (error);
return (error);
}
#define rtinitflags(x) \
((((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) != 0) \
? RTF_HOST : 0)
/*
* Check if we have a route for the given prefix already or add one accordingly.
*/
static int
in_addprefix(struct in_ifaddr *target, int flags)
{
INIT_VNET_INET(curvnet);
struct in_ifaddr *ia;
struct in_addr prefix, mask, p, m;
int error;
if ((flags & RTF_HOST) != 0) {
prefix = target->ia_dstaddr.sin_addr;
mask.s_addr = 0;
} else {
prefix = target->ia_addr.sin_addr;
mask = target->ia_sockmask.sin_addr;
prefix.s_addr &= mask.s_addr;
}
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if (rtinitflags(ia)) {
p = ia->ia_addr.sin_addr;
if (prefix.s_addr != p.s_addr)
continue;
} else {
p = ia->ia_addr.sin_addr;
m = ia->ia_sockmask.sin_addr;
p.s_addr &= m.s_addr;
if (prefix.s_addr != p.s_addr ||
mask.s_addr != m.s_addr)
continue;
}
/*
* If we got a matching prefix route inserted by other
* interface address, we are done here.
*/
if (ia->ia_flags & IFA_ROUTE) {
if (V_sameprefixcarponly &&
target->ia_ifp->if_type != IFT_CARP &&
ia->ia_ifp->if_type != IFT_CARP)
return (EEXIST);
else
return (0);
}
}
/*
* No-one seem to have this prefix route, so we try to insert it.
*/
error = rtinit(&target->ia_ifa, (int)RTM_ADD, flags);
if (!error)
target->ia_flags |= IFA_ROUTE;
return (error);
}
extern void arp_ifscrub(struct ifnet *ifp, uint32_t addr);
/*
* If there is no other address in the system that can serve a route to the
* same prefix, remove the route. Hand over the route to the new address
* otherwise.
*/
static int
in_scrubprefix(struct in_ifaddr *target)
{
INIT_VNET_INET(curvnet);
struct in_ifaddr *ia;
struct in_addr prefix, mask, p;
int error;
if ((target->ia_flags & IFA_ROUTE) == 0)
return (0);
if (rtinitflags(target))
prefix = target->ia_dstaddr.sin_addr;
else {
prefix = target->ia_addr.sin_addr;
mask = target->ia_sockmask.sin_addr;
prefix.s_addr &= mask.s_addr;
/* remove arp cache */
arp_ifscrub(target->ia_ifp, IA_SIN(target)->sin_addr.s_addr);
}
TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
if (rtinitflags(ia))
p = ia->ia_dstaddr.sin_addr;
else {
p = ia->ia_addr.sin_addr;
p.s_addr &= ia->ia_sockmask.sin_addr.s_addr;
}
if (prefix.s_addr != p.s_addr)
continue;
/*
* If we got a matching prefix address, move IFA_ROUTE and
* the route itself to it. Make sure that routing daemons
* get a heads-up.
*
* XXX: a special case for carp(4) interface
*/
if ((ia->ia_flags & IFA_ROUTE) == 0
#ifdef DEV_CARP
&& (ia->ia_ifp->if_type != IFT_CARP)
#endif
) {
rtinit(&(target->ia_ifa), (int)RTM_DELETE,
rtinitflags(target));
target->ia_flags &= ~IFA_ROUTE;
error = rtinit(&ia->ia_ifa, (int)RTM_ADD,
rtinitflags(ia) | RTF_UP);
if (error == 0)
ia->ia_flags |= IFA_ROUTE;
return (error);
}
}
/*
* As no-one seem to have this prefix, we can remove the route.
*/
rtinit(&(target->ia_ifa), (int)RTM_DELETE, rtinitflags(target));
target->ia_flags &= ~IFA_ROUTE;
return (0);
}
#undef rtinitflags
/*
* Return 1 if the address might be a local broadcast address.
*/
int
in_broadcast(struct in_addr in, struct ifnet *ifp)
{
register struct ifaddr *ifa;
u_long t;
if (in.s_addr == INADDR_BROADCAST ||
in.s_addr == INADDR_ANY)
return (1);
if ((ifp->if_flags & IFF_BROADCAST) == 0)
return (0);
t = ntohl(in.s_addr);
/*
* Look through the list of addresses for a match
* with a broadcast address.
*/
#define ia ((struct in_ifaddr *)ifa)
TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link)
if (ifa->ifa_addr->sa_family == AF_INET &&
(in.s_addr == ia->ia_broadaddr.sin_addr.s_addr ||
in.s_addr == ia->ia_netbroadcast.s_addr ||
/*
* Check for old-style (host 0) broadcast.
*/
t == ia->ia_subnet || t == ia->ia_net) &&
/*
* Check for an all one subnetmask. These
* only exist when an interface gets a secondary
* address.
*/
ia->ia_subnetmask != (u_long)0xffffffff)
return (1);
return (0);
#undef ia
}
/*
* Delete all IPv4 multicast address records, and associated link-layer
* multicast address records, associated with ifp.
*/
static void
in_purgemaddrs(struct ifnet *ifp)
{
INIT_VNET_INET(ifp->if_vnet);
struct in_multi *inm;
struct in_multi *oinm;
#ifdef DIAGNOSTIC
printf("%s: purging ifp %p\n", __func__, ifp);
#endif
IFF_LOCKGIANT(ifp);
IN_MULTI_LOCK();
LIST_FOREACH_SAFE(inm, &V_in_multihead, inm_link, oinm) {
if (inm->inm_ifp == ifp)
in_delmulti_locked(inm);
}
IN_MULTI_UNLOCK();
IFF_UNLOCKGIANT(ifp);
}
/*
* On interface removal, clean up IPv4 data structures hung off of the ifnet.
*/
void
in_ifdetach(struct ifnet *ifp)
{
INIT_VNET_INET(ifp->if_vnet);
in_pcbpurgeif0(&V_ripcbinfo, ifp);
in_pcbpurgeif0(&V_udbinfo, ifp);
in_purgemaddrs(ifp);
}
#include <sys/syslog.h>
#include <net/if_dl.h>
#include <netinet/if_ether.h>
struct in_llentry {
struct llentry base;
struct sockaddr_in l3_addr4;
};
static struct llentry *
in_lltable_new(const struct sockaddr *l3addr, u_int flags)
{
struct in_llentry *lle;
lle = malloc(sizeof(struct in_llentry), M_LLTABLE, M_DONTWAIT | M_ZERO);
if (lle == NULL) /* NB: caller generates msg */
return NULL;
callout_init(&lle->base.la_timer, CALLOUT_MPSAFE);
/* qing
* For IPv4 this will trigger "arpresolve" to generate
* an ARP request
*/
lle->base.la_expire = time_second; /* mark expired */
lle->l3_addr4 = *(const struct sockaddr_in *)l3addr;
-
+ lle->base.lle_refcnt = 1;
+ LLE_LOCK_INIT(&lle->base);
return &lle->base;
}
/*
* Deletes an address from the address table.
* This function is called by the timer functions
* such as arptimer() and nd6_llinfo_timer(), and
* the caller does the locking.
*/
static void
in_lltable_free(struct lltable *llt, struct llentry *lle)
{
free(lle, M_LLTABLE);
}
static int
in_lltable_rtcheck(struct ifnet *ifp, const struct sockaddr *l3addr)
{
struct rtentry *rt;
KASSERT(l3addr->sa_family == AF_INET,
("sin_family %d", l3addr->sa_family));
/* XXX rtalloc1 should take a const param */
rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) {
log(LOG_INFO, "IPv4 address: \"%s\" is not on the network\n",
inet_ntoa(((const struct sockaddr_in *)l3addr)->sin_addr));
if (rt != NULL)
- rtfree(rt);
- return EINVAL;
+ RTFREE_LOCKED(rt);
+ return (EINVAL);
}
- rtfree(rt);
+ RTFREE_LOCKED(rt);
return 0;
}
+/*
+ * Returns NULL if not found or marked for deletion
+ * if found returns lle read locked
+ *
+ */
static struct llentry *
in_lltable_lookup(struct lltable *llt, u_int flags, const struct sockaddr *l3addr)
{
const struct sockaddr_in *sin = (const struct sockaddr_in *)l3addr;
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
struct llentries *lleh;
u_int hashkey;
KASSERT(l3addr->sa_family == AF_INET,
("sin_family %d", l3addr->sa_family));
hashkey = sin->sin_addr.s_addr;
lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
LIST_FOREACH(lle, lleh, lle_next) {
if (lle->la_flags & LLE_DELETED)
continue;
if (bcmp(L3_ADDR(lle), l3addr, sizeof(struct sockaddr_in)) == 0)
break;
}
-
if (lle == NULL) {
+#ifdef INVARIANTS
+ if (flags & LLE_DELETE)
+ log(LOG_INFO, "interface address is missing from cache = %p in delete\n", lle);
+#endif
if (!(flags & LLE_CREATE))
return (NULL);
/*
* A route that covers the given address must have
* been installed 1st because we are doing a resolution,
* verify this.
*/
if (!(flags & LLE_IFADDR) &&
in_lltable_rtcheck(ifp, l3addr) != 0)
- return NULL;
+ goto done;
lle = in_lltable_new(l3addr, flags);
if (lle == NULL) {
log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
- return NULL;
+ goto done;
}
lle->la_flags = flags & ~LLE_CREATE;
if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
lle->la_flags |= (LLE_VALID | LLE_STATIC);
}
lle->lle_tbl = llt;
lle->lle_head = lleh;
LIST_INSERT_HEAD(lleh, lle, lle_next);
- } else {
- if (flags & LLE_DELETE)
- lle->la_flags = LLE_DELETED;
+ } else if (flags & LLE_DELETE) {
+ LLE_WLOCK(lle);
+ lle->la_flags = LLE_DELETED;
+ LLE_WUNLOCK(lle);
+#ifdef INVARIANTS
+ log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
+#endif
+ lle = NULL;
}
- return lle;
+ if (lle) {
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WLOCK(lle);
+ else
+ LLE_RLOCK(lle);
+ }
+done:
+ return (lle);
}
static int
in_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
{
#define SIN(lle) ((struct sockaddr_in *) L3_ADDR(lle))
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
/* XXX stack use */
struct {
struct rt_msghdr rtm;
struct sockaddr_inarp sin;
struct sockaddr_dl sdl;
} arpc;
int error, i;
/* XXXXX
* current IFNET_RLOCK() is mapped to IFNET_WLOCK()
* so it is okay to use this ASSERT, change it when
* IFNET lock is finalized
*/
IFNET_WLOCK_ASSERT();
error = 0;
for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
/* skip deleted entries */
if (lle->la_flags & LLE_DELETED)
continue;
/*
* produce a msg made of:
* struct rt_msghdr;
* struct sockaddr_inarp; (IPv4)
* struct sockaddr_dl;
*/
bzero(&arpc, sizeof(arpc));
arpc.rtm.rtm_msglen = sizeof(arpc);
arpc.sin.sin_family = AF_INET;
arpc.sin.sin_len = sizeof(arpc.sin);
arpc.sin.sin_addr.s_addr = SIN(lle)->sin_addr.s_addr;
/* publish */
if (lle->la_flags & LLE_PUB) {
arpc.rtm.rtm_flags |= RTF_ANNOUNCE;
/* proxy only */
if (lle->la_flags & LLE_PROXY)
arpc.sin.sin_other = SIN_PROXY;
}
if (lle->la_flags & LLE_VALID) { /* valid MAC */
struct sockaddr_dl *sdl = &arpc.sdl;
sdl->sdl_family = AF_LINK;
sdl->sdl_len = sizeof(*sdl);
sdl->sdl_alen = ifp->if_addrlen;
sdl->sdl_index = ifp->if_index;
sdl->sdl_type = ifp->if_type;
bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
}
arpc.rtm.rtm_rmx.rmx_expire =
lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
arpc.rtm.rtm_flags |= RTF_LLINFO | RTF_HOST;
if (lle->la_flags & LLE_STATIC)
arpc.rtm.rtm_flags |= RTF_STATIC;
arpc.rtm.rtm_index = ifp->if_index;
error = SYSCTL_OUT(wr, &arpc, sizeof(arpc));
if (error)
break;
}
}
return error;
#undef SIN
}
void *
in_domifattach(struct ifnet *ifp)
{
struct lltable *llt = lltable_init(ifp, AF_INET);
if (llt != NULL) {
llt->llt_new = in_lltable_new;
llt->llt_free = in_lltable_free;
llt->llt_rtcheck = in_lltable_rtcheck;
llt->llt_lookup = in_lltable_lookup;
llt->llt_dump = in_lltable_dump;
}
return (llt);
}
void
in_domifdetach(struct ifnet *ifp __unused, void *aux)
{
struct lltable *llt = (struct lltable *)aux;
lltable_free(llt);
}
Index: projects/arpv2_merge_1/sys/netinet/ip_output.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet/ip_output.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet/ip_output.c (revision 185839)
@@ -1,1202 +1,1197 @@
/*-
* Copyright (c) 1982, 1986, 1988, 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ip_output.c 8.3 (Berkeley) 1/21/94
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_ipfw.h"
#include "opt_ipsec.h"
#include "opt_mac.h"
#include "opt_mbuf_stress_test.h"
#include "opt_mpath.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sysctl.h>
#include <sys/ucred.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/netisr.h>
#include <net/pfil.h>
#include <net/route.h>
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
#include <netinet/ip_options.h>
#include <netinet/vinet.h>
#ifdef IPSEC
#include <netinet/ip_ipsec.h>
#include <netipsec/ipsec.h>
#endif /* IPSEC*/
#include <machine/in_cksum.h>
#include <security/mac/mac_framework.h>
#define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\
x, (ntohl(a.s_addr)>>24)&0xFF,\
(ntohl(a.s_addr)>>16)&0xFF,\
(ntohl(a.s_addr)>>8)&0xFF,\
(ntohl(a.s_addr))&0xFF, y);
#ifdef VIMAGE_GLOBALS
u_short ip_id;
#endif
#ifdef MBUF_STRESS_TEST
int mbuf_frag_size = 0;
SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW,
&mbuf_frag_size, 0, "Fragment outgoing mbufs to this size");
#endif
static void ip_mloopback
(struct ifnet *, struct mbuf *, struct sockaddr_in *, int);
extern struct protosw inetsw[];
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
* The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed.
* In the IP forwarding case, the packet will arrive with options already
* inserted, so must have a NULL opt pointer.
*/
int
ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
struct ip_moptions *imo, struct inpcb *inp)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET(curvnet);
struct ip *ip;
struct ifnet *ifp = NULL; /* keep compiler happy */
struct mbuf *m0;
int hlen = sizeof (struct ip);
int mtu;
int len, error = 0;
struct sockaddr_in *dst = NULL; /* keep compiler happy */
struct in_ifaddr *ia = NULL;
int isbroadcast, sw_csum;
struct route iproute;
struct in_addr odst;
#ifdef IPFIREWALL_FORWARD
struct m_tag *fwd_tag = NULL;
#endif
M_ASSERTPKTHDR(m);
if (ro == NULL) {
ro = &iproute;
bzero(ro, sizeof (*ro));
}
if (inp != NULL) {
M_SETFIB(m, inp->inp_inc.inc_fibnum);
INP_LOCK_ASSERT(inp);
}
if (opt) {
len = 0;
m = ip_insertoptions(m, opt, &len);
if (len != 0)
hlen = len;
}
ip = mtod(m, struct ip *);
/*
* Fill in IP header. If we are not allowing fragmentation,
* then the ip_id field is meaningless, but we don't set it
* to zero. Doing so causes various problems when devices along
* the path (routers, load balancers, firewalls, etc.) illegally
* disable DF on our packet. Note that a 16-bit counter
* will wrap around in less than 10 seconds at 100 Mbit/s on a
* medium with MTU 1500. See Steven M. Bellovin, "A Technique
* for Counting NATted Hosts", Proc. IMW'02, available at
* <http://www.cs.columbia.edu/~smb/papers/fnat.pdf>.
*/
if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
ip->ip_v = IPVERSION;
ip->ip_hl = hlen >> 2;
ip->ip_id = ip_newid();
V_ipstat.ips_localout++;
} else {
hlen = ip->ip_hl << 2;
}
dst = (struct sockaddr_in *)&ro->ro_dst;
again:
/*
* If there is a cached route,
* check that it is to the same destination
* and is still up. If not, free it and try again.
* The address family should also be checked in case of sharing the
* cache with IPv6.
*/
if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
RTFREE(ro->ro_rt);
ro->ro_rt = (struct rtentry *)NULL;
}
#ifdef IPFIREWALL_FORWARD
if (ro->ro_rt == NULL && fwd_tag == NULL) {
#else
if (ro->ro_rt == NULL) {
#endif
bzero(dst, sizeof(*dst));
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
dst->sin_addr = ip->ip_dst;
}
/*
* If routing to interface only, short circuit routing lookup.
* The use of an all-ones broadcast address implies this; an
* interface is specified by the broadcast address of an interface,
* or the destination address of a ptp interface.
*/
if (flags & IP_SENDONES) {
if ((ia = ifatoia(ifa_ifwithbroadaddr(sintosa(dst)))) == NULL &&
(ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL) {
V_ipstat.ips_noroute++;
error = ENETUNREACH;
goto bad;
}
ip->ip_dst.s_addr = INADDR_BROADCAST;
dst->sin_addr = ip->ip_dst;
ifp = ia->ia_ifp;
ip->ip_ttl = 1;
isbroadcast = 1;
} else if (flags & IP_ROUTETOIF) {
if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == NULL &&
(ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == NULL) {
V_ipstat.ips_noroute++;
error = ENETUNREACH;
goto bad;
}
ifp = ia->ia_ifp;
ip->ip_ttl = 1;
isbroadcast = in_broadcast(dst->sin_addr, ifp);
} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
imo != NULL && imo->imo_multicast_ifp != NULL) {
/*
* Bypass the normal routing lookup for multicast
* packets if the interface is specified.
*/
ifp = imo->imo_multicast_ifp;
IFP_TO_IA(ifp, ia);
isbroadcast = 0; /* fool gcc */
} else {
/*
* We want to do any cloning requested by the link layer,
* as this is probably required in all cases for correct
* operation (as it is for ARP).
*/
if (ro->ro_rt == NULL)
#ifdef RADIX_MPATH
rtalloc_mpath_fib(ro,
ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
#else
in_rtalloc_ign(ro, 0,
inp ? inp->inp_inc.inc_fibnum : M_GETFIB(m));
#endif
if (ro->ro_rt == NULL) {
V_ipstat.ips_noroute++;
error = EHOSTUNREACH;
goto bad;
}
ia = ifatoia(ro->ro_rt->rt_ifa);
ifp = ro->ro_rt->rt_ifp;
ro->ro_rt->rt_rmx.rmx_pksent++;
if (ro->ro_rt->rt_flags & RTF_GATEWAY)
dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
if (ro->ro_rt->rt_flags & RTF_HOST)
isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
else
isbroadcast = in_broadcast(dst->sin_addr, ifp);
}
/*
* Calculate MTU. If we have a route that is up, use that,
* otherwise use the interface's MTU.
*/
if (ro->ro_rt != NULL && (ro->ro_rt->rt_flags & (RTF_UP|RTF_HOST))) {
/*
* This case can happen if the user changed the MTU
* of an interface after enabling IP on it. Because
* most netifs don't keep track of routes pointing to
* them, there is no way for one to update all its
* routes when the MTU is changed.
*/
if (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)
ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
mtu = ro->ro_rt->rt_rmx.rmx_mtu;
} else {
mtu = ifp->if_mtu;
}
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
struct in_multi *inm;
m->m_flags |= M_MCAST;
/*
* IP destination address is multicast. Make sure "dst"
* still points to the address in "ro". (It may have been
* changed to point to a gateway address, above.)
*/
dst = (struct sockaddr_in *)&ro->ro_dst;
/*
* See if the caller provided any multicast options
*/
if (imo != NULL) {
ip->ip_ttl = imo->imo_multicast_ttl;
if (imo->imo_multicast_vif != -1)
ip->ip_src.s_addr =
ip_mcast_src ?
ip_mcast_src(imo->imo_multicast_vif) :
INADDR_ANY;
} else
ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
/*
* Confirm that the outgoing interface supports multicast.
*/
if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
if ((ifp->if_flags & IFF_MULTICAST) == 0) {
V_ipstat.ips_noroute++;
error = ENETUNREACH;
goto bad;
}
}
/*
* If source address not specified yet, use address
* of outgoing interface.
*/
if (ip->ip_src.s_addr == INADDR_ANY) {
/* Interface may have no addresses. */
if (ia != NULL)
ip->ip_src = IA_SIN(ia)->sin_addr;
}
IN_MULTI_LOCK();
IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
if (inm != NULL &&
(imo == NULL || imo->imo_multicast_loop)) {
IN_MULTI_UNLOCK();
/*
* If we belong to the destination multicast group
* on the outgoing interface, and the caller did not
* forbid loopback, loop back a copy.
*/
ip_mloopback(ifp, m, dst, hlen);
}
else {
IN_MULTI_UNLOCK();
/*
* If we are acting as a multicast router, perform
* multicast forwarding as if the packet had just
* arrived on the interface to which we are about
* to send. The multicast forwarding function
* recursively calls this function, using the
* IP_FORWARDING flag to prevent infinite recursion.
*
* Multicasts that are looped back by ip_mloopback(),
* above, will be forwarded by the ip_input() routine,
* if necessary.
*/
if (V_ip_mrouter && (flags & IP_FORWARDING) == 0) {
/*
* If rsvp daemon is not running, do not
* set ip_moptions. This ensures that the packet
* is multicast and not just sent down one link
* as prescribed by rsvpd.
*/
if (!V_rsvp_on)
imo = NULL;
if (ip_mforward &&
ip_mforward(ip, ifp, m, imo) != 0) {
m_freem(m);
goto done;
}
}
}
/*
* Multicasts with a time-to-live of zero may be looped-
* back, above, but must not be transmitted on a network.
* Also, multicasts addressed to the loopback interface
* are not sent -- the above call to ip_mloopback() will
* loop back a copy if this host actually belongs to the
* destination group on the loopback interface.
*/
if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
m_freem(m);
goto done;
}
goto sendit;
}
/*
* If the source address is not specified yet, use the address
* of the outoing interface.
*/
if (ip->ip_src.s_addr == INADDR_ANY) {
/* Interface may have no addresses. */
if (ia != NULL) {
ip->ip_src = IA_SIN(ia)->sin_addr;
}
}
/*
* Verify that we have any chance at all of being able to queue the
* packet or packet fragments, unless ALTQ is enabled on the given
* interface in which case packetdrop should be done by queueing.
*/
#ifdef ALTQ
if ((!ALTQ_IS_ENABLED(&ifp->if_snd)) &&
((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
ifp->if_snd.ifq_maxlen))
#else
if ((ifp->if_snd.ifq_len + ip->ip_len / mtu + 1) >=
ifp->if_snd.ifq_maxlen)
#endif /* ALTQ */
{
error = ENOBUFS;
V_ipstat.ips_odropped++;
ifp->if_snd.ifq_drops += (ip->ip_len / ifp->if_mtu + 1);
goto bad;
}
/*
* Look for broadcast address and
* verify user is allowed to send
* such a packet.
*/
if (isbroadcast) {
if ((ifp->if_flags & IFF_BROADCAST) == 0) {
error = EADDRNOTAVAIL;
goto bad;
}
if ((flags & IP_ALLOWBROADCAST) == 0) {
error = EACCES;
goto bad;
}
/* don't allow broadcast messages to be fragmented */
if (ip->ip_len > mtu) {
error = EMSGSIZE;
goto bad;
}
m->m_flags |= M_BCAST;
} else {
m->m_flags &= ~M_BCAST;
}
sendit:
#ifdef IPSEC
switch(ip_ipsec_output(&m, inp, &flags, &error, &ro, &iproute, &dst, &ia, &ifp)) {
case 1:
goto bad;
case -1:
goto done;
case 0:
default:
break; /* Continue with packet processing. */
}
/* Update variables that are affected by ipsec4_output(). */
ip = mtod(m, struct ip *);
hlen = ip->ip_hl << 2;
#endif /* IPSEC */
/* Jump over all PFIL processing if hooks are not active. */
if (!PFIL_HOOKED(&inet_pfil_hook))
goto passout;
/* Run through list of hooks for output packets. */
odst.s_addr = ip->ip_dst.s_addr;
error = pfil_run_hooks(&inet_pfil_hook, &m, ifp, PFIL_OUT, inp);
if (error != 0 || m == NULL)
goto done;
ip = mtod(m, struct ip *);
/* See if destination IP address was changed by packet filter. */
if (odst.s_addr != ip->ip_dst.s_addr) {
m->m_flags |= M_SKIP_FIREWALL;
/* If destination is now ourself drop to ip_input(). */
if (in_localip(ip->ip_dst)) {
m->m_flags |= M_FASTFWD_OURS;
if (m->m_pkthdr.rcvif == NULL)
m->m_pkthdr.rcvif = V_loif;
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
m->m_pkthdr.csum_flags |=
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xffff;
}
m->m_pkthdr.csum_flags |=
CSUM_IP_CHECKED | CSUM_IP_VALID;
error = netisr_queue(NETISR_IP, m);
goto done;
} else
goto again; /* Redo the routing table lookup. */
}
#ifdef IPFIREWALL_FORWARD
/* See if local, if yes, send it to netisr with IP_FASTFWD_OURS. */
if (m->m_flags & M_FASTFWD_OURS) {
if (m->m_pkthdr.rcvif == NULL)
m->m_pkthdr.rcvif = V_loif;
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
m->m_pkthdr.csum_flags |=
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xffff;
}
m->m_pkthdr.csum_flags |=
CSUM_IP_CHECKED | CSUM_IP_VALID;
error = netisr_queue(NETISR_IP, m);
goto done;
}
/* Or forward to some other address? */
fwd_tag = m_tag_find(m, PACKET_TAG_IPFORWARD, NULL);
if (fwd_tag) {
dst = (struct sockaddr_in *)&ro->ro_dst;
bcopy((fwd_tag+1), dst, sizeof(struct sockaddr_in));
m->m_flags |= M_SKIP_FIREWALL;
m_tag_delete(m, fwd_tag);
goto again;
}
#endif /* IPFIREWALL_FORWARD */
passout:
/* 127/8 must not appear on wire - RFC1122. */
if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
(ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
V_ipstat.ips_badaddr++;
error = EADDRNOTAVAIL;
goto bad;
}
}
m->m_pkthdr.csum_flags |= CSUM_IP;
sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
if (sw_csum & CSUM_DELAY_DATA) {
in_delayed_cksum(m);
sw_csum &= ~CSUM_DELAY_DATA;
}
m->m_pkthdr.csum_flags &= ifp->if_hwassist;
/*
* If small enough for interface, or the interface will take
* care of the fragmentation for us, we can just send directly.
*/
if (ip->ip_len <= mtu ||
(m->m_pkthdr.csum_flags & ifp->if_hwassist & CSUM_TSO) != 0 ||
((ip->ip_off & IP_DF) == 0 && (ifp->if_hwassist & CSUM_FRAGMENT))) {
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
ip->ip_sum = in_cksum(m, hlen);
/*
* Record statistics for this interface address.
* With CSUM_TSO the byte/packet count will be slightly
* incorrect because we count the IP+TCP headers only
* once instead of for every generated packet.
*/
if (!(flags & IP_FORWARDING) && ia) {
if (m->m_pkthdr.csum_flags & CSUM_TSO)
ia->ia_ifa.if_opackets +=
m->m_pkthdr.len / m->m_pkthdr.tso_segsz;
else
ia->ia_ifa.if_opackets++;
ia->ia_ifa.if_obytes += m->m_pkthdr.len;
}
#ifdef MBUF_STRESS_TEST
if (mbuf_frag_size && m->m_pkthdr.len > mbuf_frag_size)
m = m_fragment(m, M_DONTWAIT, mbuf_frag_size);
#endif
/*
* Reset layer specific mbuf flags
* to avoid confusing lower layers.
*/
m->m_flags &= ~(M_PROTOFLAGS);
-
- IF_AFDATA_LOCK(ifp);
error = (*ifp->if_output)(ifp, m,
(struct sockaddr *)dst, ro->ro_rt);
- IF_AFDATA_UNLOCK(ifp);
goto done;
}
/* Balk when DF bit is set or the interface didn't support TSO. */
if ((ip->ip_off & IP_DF) || (m->m_pkthdr.csum_flags & CSUM_TSO)) {
error = EMSGSIZE;
V_ipstat.ips_cantfrag++;
goto bad;
}
/*
* Too large for interface; fragment if possible. If successful,
* on return, m will point to a list of packets to be sent.
*/
error = ip_fragment(ip, &m, mtu, ifp->if_hwassist, sw_csum);
if (error)
goto bad;
for (; m; m = m0) {
m0 = m->m_nextpkt;
m->m_nextpkt = 0;
if (error == 0) {
/* Record statistics for this interface address. */
if (ia != NULL) {
ia->ia_ifa.if_opackets++;
ia->ia_ifa.if_obytes += m->m_pkthdr.len;
}
/*
* Reset layer specific mbuf flags
* to avoid confusing upper layers.
*/
m->m_flags &= ~(M_PROTOFLAGS);
- IF_AFDATA_LOCK(ifp);
error = (*ifp->if_output)(ifp, m,
(struct sockaddr *)dst, ro->ro_rt);
- IF_AFDATA_UNLOCK(ifp);
} else
m_freem(m);
}
if (error == 0)
V_ipstat.ips_fragmented++;
done:
if (ro == &iproute && ro->ro_rt) {
RTFREE(ro->ro_rt);
}
return (error);
bad:
m_freem(m);
goto done;
}
/*
* Create a chain of fragments which fit the given mtu. m_frag points to the
* mbuf to be fragmented; on return it points to the chain with the fragments.
* Return 0 if no error. If error, m_frag may contain a partially built
* chain of fragments that should be freed by the caller.
*
* if_hwassist_flags is the hw offload capabilities (see if_data.ifi_hwassist)
* sw_csum contains the delayed checksums flags (e.g., CSUM_DELAY_IP).
*/
int
ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu,
u_long if_hwassist_flags, int sw_csum)
{
INIT_VNET_INET(curvnet);
int error = 0;
int hlen = ip->ip_hl << 2;
int len = (mtu - hlen) & ~7; /* size of payload in each fragment */
int off;
struct mbuf *m0 = *m_frag; /* the original packet */
int firstlen;
struct mbuf **mnext;
int nfrags;
if (ip->ip_off & IP_DF) { /* Fragmentation not allowed */
V_ipstat.ips_cantfrag++;
return EMSGSIZE;
}
/*
* Must be able to put at least 8 bytes per fragment.
*/
if (len < 8)
return EMSGSIZE;
/*
* If the interface will not calculate checksums on
* fragmented packets, then do it here.
*/
if (m0->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
(if_hwassist_flags & CSUM_IP_FRAGS) == 0) {
in_delayed_cksum(m0);
m0->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
}
if (len > PAGE_SIZE) {
/*
* Fragment large datagrams such that each segment
* contains a multiple of PAGE_SIZE amount of data,
* plus headers. This enables a receiver to perform
* page-flipping zero-copy optimizations.
*
* XXX When does this help given that sender and receiver
* could have different page sizes, and also mtu could
* be less than the receiver's page size ?
*/
int newlen;
struct mbuf *m;
for (m = m0, off = 0; m && (off+m->m_len) <= mtu; m = m->m_next)
off += m->m_len;
/*
* firstlen (off - hlen) must be aligned on an
* 8-byte boundary
*/
if (off < hlen)
goto smart_frag_failure;
off = ((off - hlen) & ~7) + hlen;
newlen = (~PAGE_MASK) & mtu;
if ((newlen + sizeof (struct ip)) > mtu) {
/* we failed, go back the default */
smart_frag_failure:
newlen = len;
off = hlen + len;
}
len = newlen;
} else {
off = hlen + len;
}
firstlen = off - hlen;
mnext = &m0->m_nextpkt; /* pointer to next packet */
/*
* Loop through length of segment after first fragment,
* make new header and copy data of each part and link onto chain.
* Here, m0 is the original packet, m is the fragment being created.
* The fragments are linked off the m_nextpkt of the original
* packet, which after processing serves as the first fragment.
*/
for (nfrags = 1; off < ip->ip_len; off += len, nfrags++) {
struct ip *mhip; /* ip header on the fragment */
struct mbuf *m;
int mhlen = sizeof (struct ip);
MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
V_ipstat.ips_odropped++;
goto done;
}
m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
/*
* In the first mbuf, leave room for the link header, then
* copy the original IP header including options. The payload
* goes into an additional mbuf chain returned by m_copy().
*/
m->m_data += max_linkhdr;
mhip = mtod(m, struct ip *);
*mhip = *ip;
if (hlen > sizeof (struct ip)) {
mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
mhip->ip_v = IPVERSION;
mhip->ip_hl = mhlen >> 2;
}
m->m_len = mhlen;
/* XXX do we need to add ip->ip_off below ? */
mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
if (off + len >= ip->ip_len) { /* last fragment */
len = ip->ip_len - off;
m->m_flags |= M_LASTFRAG;
} else
mhip->ip_off |= IP_MF;
mhip->ip_len = htons((u_short)(len + mhlen));
m->m_next = m_copy(m0, off, len);
if (m->m_next == NULL) { /* copy failed */
m_free(m);
error = ENOBUFS; /* ??? */
V_ipstat.ips_odropped++;
goto done;
}
m->m_pkthdr.len = mhlen + len;
m->m_pkthdr.rcvif = NULL;
#ifdef MAC
mac_netinet_fragment(m0, m);
#endif
m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
mhip->ip_off = htons(mhip->ip_off);
mhip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
mhip->ip_sum = in_cksum(m, mhlen);
*mnext = m;
mnext = &m->m_nextpkt;
}
V_ipstat.ips_ofragments += nfrags;
/* set first marker for fragment chain */
m0->m_flags |= M_FIRSTFRAG | M_FRAG;
m0->m_pkthdr.csum_data = nfrags;
/*
* Update first fragment by trimming what's been copied out
* and updating header.
*/
m_adj(m0, hlen + firstlen - ip->ip_len);
m0->m_pkthdr.len = hlen + firstlen;
ip->ip_len = htons((u_short)m0->m_pkthdr.len);
ip->ip_off |= IP_MF;
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
if (sw_csum & CSUM_DELAY_IP)
ip->ip_sum = in_cksum(m0, hlen);
done:
*m_frag = m0;
return error;
}
void
in_delayed_cksum(struct mbuf *m)
{
INIT_VNET_INET(curvnet);
struct ip *ip;
u_short csum, offset;
ip = mtod(m, struct ip *);
offset = ip->ip_hl << 2 ;
csum = in_cksum_skip(m, ip->ip_len, offset);
if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
csum = 0xffff;
offset += m->m_pkthdr.csum_data; /* checksum offset */
if (offset + sizeof(u_short) > m->m_len) {
printf("delayed m_pullup, m->len: %d off: %d p: %d\n",
m->m_len, offset, ip->ip_p);
/*
* XXX
* this shouldn't happen, but if it does, the
* correct behavior may be to insert the checksum
* in the appropriate next mbuf in the chain.
*/
return;
}
*(u_short *)(m->m_data + offset) = csum;
}
/*
* IP socket option processing.
*/
int
ip_ctloutput(struct socket *so, struct sockopt *sopt)
{
struct inpcb *inp = sotoinpcb(so);
int error, optval;
error = optval = 0;
if (sopt->sopt_level != IPPROTO_IP) {
if ((sopt->sopt_level == SOL_SOCKET) &&
(sopt->sopt_name == SO_SETFIB)) {
inp->inp_inc.inc_fibnum = so->so_fibnum;
return (0);
}
return (EINVAL);
}
switch (sopt->sopt_dir) {
case SOPT_SET:
switch (sopt->sopt_name) {
case IP_OPTIONS:
#ifdef notyet
case IP_RETOPTS:
#endif
{
struct mbuf *m;
if (sopt->sopt_valsize > MLEN) {
error = EMSGSIZE;
break;
}
MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
if (m == NULL) {
error = ENOBUFS;
break;
}
m->m_len = sopt->sopt_valsize;
error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
m->m_len);
if (error) {
m_free(m);
break;
}
INP_WLOCK(inp);
error = ip_pcbopts(inp, sopt->sopt_name, m);
INP_WUNLOCK(inp);
return (error);
}
case IP_TOS:
case IP_TTL:
case IP_MINTTL:
case IP_RECVOPTS:
case IP_RECVRETOPTS:
case IP_RECVDSTADDR:
case IP_RECVTTL:
case IP_RECVIF:
case IP_FAITH:
case IP_ONESBCAST:
case IP_DONTFRAG:
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
break;
switch (sopt->sopt_name) {
case IP_TOS:
inp->inp_ip_tos = optval;
break;
case IP_TTL:
inp->inp_ip_ttl = optval;
break;
case IP_MINTTL:
if (optval > 0 && optval <= MAXTTL)
inp->inp_ip_minttl = optval;
else
error = EINVAL;
break;
#define OPTSET(bit) do { \
INP_WLOCK(inp); \
if (optval) \
inp->inp_flags |= bit; \
else \
inp->inp_flags &= ~bit; \
INP_WUNLOCK(inp); \
} while (0)
case IP_RECVOPTS:
OPTSET(INP_RECVOPTS);
break;
case IP_RECVRETOPTS:
OPTSET(INP_RECVRETOPTS);
break;
case IP_RECVDSTADDR:
OPTSET(INP_RECVDSTADDR);
break;
case IP_RECVTTL:
OPTSET(INP_RECVTTL);
break;
case IP_RECVIF:
OPTSET(INP_RECVIF);
break;
case IP_FAITH:
OPTSET(INP_FAITH);
break;
case IP_ONESBCAST:
OPTSET(INP_ONESBCAST);
break;
case IP_DONTFRAG:
OPTSET(INP_DONTFRAG);
break;
}
break;
#undef OPTSET
/*
* Multicast socket options are processed by the in_mcast
* module.
*/
case IP_MULTICAST_IF:
case IP_MULTICAST_VIF:
case IP_MULTICAST_TTL:
case IP_MULTICAST_LOOP:
case IP_ADD_MEMBERSHIP:
case IP_DROP_MEMBERSHIP:
case IP_ADD_SOURCE_MEMBERSHIP:
case IP_DROP_SOURCE_MEMBERSHIP:
case IP_BLOCK_SOURCE:
case IP_UNBLOCK_SOURCE:
case IP_MSFILTER:
case MCAST_JOIN_GROUP:
case MCAST_LEAVE_GROUP:
case MCAST_JOIN_SOURCE_GROUP:
case MCAST_LEAVE_SOURCE_GROUP:
case MCAST_BLOCK_SOURCE:
case MCAST_UNBLOCK_SOURCE:
error = inp_setmoptions(inp, sopt);
break;
case IP_PORTRANGE:
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
if (error)
break;
INP_WLOCK(inp);
switch (optval) {
case IP_PORTRANGE_DEFAULT:
inp->inp_flags &= ~(INP_LOWPORT);
inp->inp_flags &= ~(INP_HIGHPORT);
break;
case IP_PORTRANGE_HIGH:
inp->inp_flags &= ~(INP_LOWPORT);
inp->inp_flags |= INP_HIGHPORT;
break;
case IP_PORTRANGE_LOW:
inp->inp_flags &= ~(INP_HIGHPORT);
inp->inp_flags |= INP_LOWPORT;
break;
default:
error = EINVAL;
break;
}
INP_WUNLOCK(inp);
break;
#ifdef IPSEC
case IP_IPSEC_POLICY:
{
caddr_t req;
struct mbuf *m;
if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
break;
if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
break;
req = mtod(m, caddr_t);
error = ipsec4_set_policy(inp, sopt->sopt_name, req,
m->m_len, (sopt->sopt_td != NULL) ?
sopt->sopt_td->td_ucred : NULL);
m_freem(m);
break;
}
#endif /* IPSEC */
default:
error = ENOPROTOOPT;
break;
}
break;
case SOPT_GET:
switch (sopt->sopt_name) {
case IP_OPTIONS:
case IP_RETOPTS:
if (inp->inp_options)
error = sooptcopyout(sopt,
mtod(inp->inp_options,
char *),
inp->inp_options->m_len);
else
sopt->sopt_valsize = 0;
break;
case IP_TOS:
case IP_TTL:
case IP_MINTTL:
case IP_RECVOPTS:
case IP_RECVRETOPTS:
case IP_RECVDSTADDR:
case IP_RECVTTL:
case IP_RECVIF:
case IP_PORTRANGE:
case IP_FAITH:
case IP_ONESBCAST:
case IP_DONTFRAG:
switch (sopt->sopt_name) {
case IP_TOS:
optval = inp->inp_ip_tos;
break;
case IP_TTL:
optval = inp->inp_ip_ttl;
break;
case IP_MINTTL:
optval = inp->inp_ip_minttl;
break;
#define OPTBIT(bit) (inp->inp_flags & bit ? 1 : 0)
case IP_RECVOPTS:
optval = OPTBIT(INP_RECVOPTS);
break;
case IP_RECVRETOPTS:
optval = OPTBIT(INP_RECVRETOPTS);
break;
case IP_RECVDSTADDR:
optval = OPTBIT(INP_RECVDSTADDR);
break;
case IP_RECVTTL:
optval = OPTBIT(INP_RECVTTL);
break;
case IP_RECVIF:
optval = OPTBIT(INP_RECVIF);
break;
case IP_PORTRANGE:
if (inp->inp_flags & INP_HIGHPORT)
optval = IP_PORTRANGE_HIGH;
else if (inp->inp_flags & INP_LOWPORT)
optval = IP_PORTRANGE_LOW;
else
optval = 0;
break;
case IP_FAITH:
optval = OPTBIT(INP_FAITH);
break;
case IP_ONESBCAST:
optval = OPTBIT(INP_ONESBCAST);
break;
case IP_DONTFRAG:
optval = OPTBIT(INP_DONTFRAG);
break;
}
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
/*
* Multicast socket options are processed by the in_mcast
* module.
*/
case IP_MULTICAST_IF:
case IP_MULTICAST_VIF:
case IP_MULTICAST_TTL:
case IP_MULTICAST_LOOP:
case IP_MSFILTER:
error = inp_getmoptions(inp, sopt);
break;
#ifdef IPSEC
case IP_IPSEC_POLICY:
{
struct mbuf *m = NULL;
caddr_t req = NULL;
size_t len = 0;
if (m != 0) {
req = mtod(m, caddr_t);
len = m->m_len;
}
error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
if (error == 0)
error = soopt_mcopyout(sopt, m); /* XXX */
if (error == 0)
m_freem(m);
break;
}
#endif /* IPSEC */
default:
error = ENOPROTOOPT;
break;
}
break;
}
return (error);
}
/*
* Routine called from ip_output() to loop back a copy of an IP multicast
* packet to the input queue of a specified interface. Note that this
* calls the output routine of the loopback "driver", but with an interface
* pointer that might NOT be a loopback interface -- evil, but easier than
* replicating that code here.
*/
static void
ip_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in *dst,
int hlen)
{
register struct ip *ip;
struct mbuf *copym;
/*
* Make a deep copy of the packet because we're going to
* modify the pack in order to generate checksums.
*/
copym = m_dup(m, M_DONTWAIT);
if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
copym = m_pullup(copym, hlen);
if (copym != NULL) {
/* If needed, compute the checksum and mark it as valid. */
if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
in_delayed_cksum(copym);
copym->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
copym->m_pkthdr.csum_flags |=
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
copym->m_pkthdr.csum_data = 0xffff;
}
/*
* We don't bother to fragment if the IP length is greater
* than the interface's MTU. Can this possibly matter?
*/
ip = mtod(copym, struct ip *);
ip->ip_len = htons(ip->ip_len);
ip->ip_off = htons(ip->ip_off);
ip->ip_sum = 0;
ip->ip_sum = in_cksum(copym, hlen);
#if 1 /* XXX */
if (dst->sin_family != AF_INET) {
printf("ip_mloopback: bad address family %d\n",
dst->sin_family);
dst->sin_family = AF_INET;
}
#endif
if_simloop(ifp, copym, dst->sin_family, 0);
}
}
Index: projects/arpv2_merge_1/sys/netinet6/icmp6.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet6/icmp6.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet6/icmp6.c (revision 185839)
@@ -1,2831 +1,2828 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $
*/
/*-
* Copyright (c) 1982, 1986, 1988, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include <sys/param.h>
#include <sys/domain.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sx.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/if_dl.h>
+#include <net/if_llatbl.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/vnet.h>
-#include <net/if_llatbl.h>
#include <netinet/in.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet/tcp_var.h>
#include <netinet/vinet.h>
#include <netinet6/in6_ifattach.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/mld6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/vinet6.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/key.h>
#endif
extern struct domain inet6domain;
#ifdef VIMAGE_GLOBALS
extern struct inpcbinfo ripcbinfo;
extern struct inpcbhead ripcb;
extern int icmp6errppslim;
extern int icmp6_nodeinfo;
struct icmp6stat icmp6stat;
static int icmp6errpps_count;
static struct timeval icmp6errppslim_last;
#endif
static void icmp6_errcount(struct icmp6errstat *, int, int);
static int icmp6_rip6_input(struct mbuf **, int);
static int icmp6_ratelimit(const struct in6_addr *, const int, const int);
static const char *icmp6_redirect_diag __P((struct in6_addr *,
struct in6_addr *, struct in6_addr *));
static struct mbuf *ni6_input(struct mbuf *, int);
static struct mbuf *ni6_nametodns(const char *, int, int);
static int ni6_dnsmatch(const char *, int, const char *, int);
static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *,
struct ifnet **, struct in6_addr *));
static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *,
struct ifnet *, int));
static int icmp6_notify_error(struct mbuf **, int, int, int);
void
icmp6_init(void)
{
INIT_VNET_INET6(curvnet);
V_icmp6errpps_count = 0;
mld6_init();
}
static void
icmp6_errcount(struct icmp6errstat *stat, int type, int code)
{
switch (type) {
case ICMP6_DST_UNREACH:
switch (code) {
case ICMP6_DST_UNREACH_NOROUTE:
stat->icp6errs_dst_unreach_noroute++;
return;
case ICMP6_DST_UNREACH_ADMIN:
stat->icp6errs_dst_unreach_admin++;
return;
case ICMP6_DST_UNREACH_BEYONDSCOPE:
stat->icp6errs_dst_unreach_beyondscope++;
return;
case ICMP6_DST_UNREACH_ADDR:
stat->icp6errs_dst_unreach_addr++;
return;
case ICMP6_DST_UNREACH_NOPORT:
stat->icp6errs_dst_unreach_noport++;
return;
}
break;
case ICMP6_PACKET_TOO_BIG:
stat->icp6errs_packet_too_big++;
return;
case ICMP6_TIME_EXCEEDED:
switch (code) {
case ICMP6_TIME_EXCEED_TRANSIT:
stat->icp6errs_time_exceed_transit++;
return;
case ICMP6_TIME_EXCEED_REASSEMBLY:
stat->icp6errs_time_exceed_reassembly++;
return;
}
break;
case ICMP6_PARAM_PROB:
switch (code) {
case ICMP6_PARAMPROB_HEADER:
stat->icp6errs_paramprob_header++;
return;
case ICMP6_PARAMPROB_NEXTHEADER:
stat->icp6errs_paramprob_nextheader++;
return;
case ICMP6_PARAMPROB_OPTION:
stat->icp6errs_paramprob_option++;
return;
}
break;
case ND_REDIRECT:
stat->icp6errs_redirect++;
return;
}
stat->icp6errs_unknown++;
}
/*
* A wrapper function for icmp6_error() necessary when the erroneous packet
* may not contain enough scope zone information.
*/
void
icmp6_error2(struct mbuf *m, int type, int code, int param,
struct ifnet *ifp)
{
INIT_VNET_INET6(curvnet);
struct ip6_hdr *ip6;
if (ifp == NULL)
return;
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
#else
if (m->m_len < sizeof(struct ip6_hdr)) {
m = m_pullup(m, sizeof(struct ip6_hdr));
if (m == NULL)
return;
}
#endif
ip6 = mtod(m, struct ip6_hdr *);
if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0)
return;
if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
return;
icmp6_error(m, type, code, param);
}
/*
* Generate an error packet of type error in response to bad IP6 packet.
*/
void
icmp6_error(struct mbuf *m, int type, int code, int param)
{
INIT_VNET_INET6(curvnet);
struct ip6_hdr *oip6, *nip6;
struct icmp6_hdr *icmp6;
u_int preplen;
int off;
int nxt;
V_icmp6stat.icp6s_error++;
/* count per-type-code statistics */
icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, type, code);
#ifdef M_DECRYPTED /*not openbsd*/
if (m->m_flags & M_DECRYPTED) {
V_icmp6stat.icp6s_canterror++;
goto freeit;
}
#endif
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), );
#else
if (m->m_len < sizeof(struct ip6_hdr)) {
m = m_pullup(m, sizeof(struct ip6_hdr));
if (m == NULL)
return;
}
#endif
oip6 = mtod(m, struct ip6_hdr *);
/*
* If the destination address of the erroneous packet is a multicast
* address, or the packet was sent using link-layer multicast,
* we should basically suppress sending an error (RFC 2463, Section
* 2.4).
* We have two exceptions (the item e.2 in that section):
* - the Pakcet Too Big message can be sent for path MTU discovery.
* - the Parameter Problem Message that can be allowed an icmp6 error
* in the option type field. This check has been done in
* ip6_unknown_opt(), so we can just check the type and code.
*/
if ((m->m_flags & (M_BCAST|M_MCAST) ||
IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) &&
(type != ICMP6_PACKET_TOO_BIG &&
(type != ICMP6_PARAM_PROB ||
code != ICMP6_PARAMPROB_OPTION)))
goto freeit;
/*
* RFC 2463, 2.4 (e.5): source address check.
* XXX: the case of anycast source?
*/
if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) ||
IN6_IS_ADDR_MULTICAST(&oip6->ip6_src))
goto freeit;
/*
* If we are about to send ICMPv6 against ICMPv6 error/redirect,
* don't do it.
*/
nxt = -1;
off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt);
if (off >= 0 && nxt == IPPROTO_ICMPV6) {
struct icmp6_hdr *icp;
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), );
icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
#else
IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off,
sizeof(*icp));
if (icp == NULL) {
V_icmp6stat.icp6s_tooshort++;
return;
}
#endif
if (icp->icmp6_type < ICMP6_ECHO_REQUEST ||
icp->icmp6_type == ND_REDIRECT) {
/*
* ICMPv6 error
* Special case: for redirect (which is
* informational) we must not send icmp6 error.
*/
V_icmp6stat.icp6s_canterror++;
goto freeit;
} else {
/* ICMPv6 informational - send the error */
}
} else {
/* non-ICMPv6 - send the error */
}
oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */
/* Finally, do rate limitation check. */
if (icmp6_ratelimit(&oip6->ip6_src, type, code)) {
V_icmp6stat.icp6s_toofreq++;
goto freeit;
}
/*
* OK, ICMP6 can be generated.
*/
if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN)
m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len);
preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
M_PREPEND(m, preplen, M_DONTWAIT);
if (m && m->m_len < preplen)
m = m_pullup(m, preplen);
if (m == NULL) {
nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__));
return;
}
nip6 = mtod(m, struct ip6_hdr *);
nip6->ip6_src = oip6->ip6_src;
nip6->ip6_dst = oip6->ip6_dst;
in6_clearscope(&oip6->ip6_src);
in6_clearscope(&oip6->ip6_dst);
icmp6 = (struct icmp6_hdr *)(nip6 + 1);
icmp6->icmp6_type = type;
icmp6->icmp6_code = code;
icmp6->icmp6_pptr = htonl((u_int32_t)param);
/*
* icmp6_reflect() is designed to be in the input path.
* icmp6_error() can be called from both input and output path,
* and if we are in output path rcvif could contain bogus value.
* clear m->m_pkthdr.rcvif for safety, we should have enough scope
* information in ip header (nip6).
*/
m->m_pkthdr.rcvif = NULL;
V_icmp6stat.icp6s_outhist[type]++;
icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */
return;
freeit:
/*
* If we can't tell whether or not we can generate ICMP6, free it.
*/
m_freem(m);
}
/*
* Process a received ICMP6 message.
*/
int
icmp6_input(struct mbuf **mp, int *offp, int proto)
{
INIT_VNET_INET6(curvnet);
INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
struct mbuf *m = *mp, *n;
struct ip6_hdr *ip6, *nip6;
struct icmp6_hdr *icmp6, *nicmp6;
int off = *offp;
int icmp6len = m->m_pkthdr.len - *offp;
int code, sum, noff;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE);
/* m might change if M_LOOP. So, call mtod after this */
#endif
/*
* Locate icmp6 structure in mbuf, and check
* that not corrupted and of at least minimum length
*/
ip6 = mtod(m, struct ip6_hdr *);
if (icmp6len < sizeof(struct icmp6_hdr)) {
V_icmp6stat.icp6s_tooshort++;
goto freeit;
}
/*
* calculate the checksum
*/
#ifndef PULLDOWN_TEST
icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
#else
IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
if (icmp6 == NULL) {
V_icmp6stat.icp6s_tooshort++;
return IPPROTO_DONE;
}
#endif
code = icmp6->icmp6_code;
if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) {
nd6log((LOG_ERR,
"ICMP6 checksum error(%d|%x) %s\n",
icmp6->icmp6_type, sum,
ip6_sprintf(ip6bufs, &ip6->ip6_src)));
V_icmp6stat.icp6s_checksum++;
goto freeit;
}
if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) {
/*
* Deliver very specific ICMP6 type only.
* This is important to deliver TOOBIG. Otherwise PMTUD
* will not work.
*/
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
case ICMP6_PACKET_TOO_BIG:
case ICMP6_TIME_EXCEEDED:
break;
default:
goto freeit;
}
}
V_icmp6stat.icp6s_inhist[icmp6->icmp6_type]++;
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg);
if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK)
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error);
switch (icmp6->icmp6_type) {
case ICMP6_DST_UNREACH:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach);
switch (code) {
case ICMP6_DST_UNREACH_NOROUTE:
code = PRC_UNREACH_NET;
break;
case ICMP6_DST_UNREACH_ADMIN:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib);
code = PRC_UNREACH_PROTOCOL; /* is this a good code? */
break;
case ICMP6_DST_UNREACH_ADDR:
code = PRC_HOSTDEAD;
break;
case ICMP6_DST_UNREACH_BEYONDSCOPE:
/* I mean "source address was incorrect." */
code = PRC_PARAMPROB;
break;
case ICMP6_DST_UNREACH_NOPORT:
code = PRC_UNREACH_PORT;
break;
default:
goto badcode;
}
goto deliver;
break;
case ICMP6_PACKET_TOO_BIG:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig);
/* validation is made in icmp6_mtudisc_update */
code = PRC_MSGSIZE;
/*
* Updating the path MTU will be done after examining
* intermediate extension headers.
*/
goto deliver;
break;
case ICMP6_TIME_EXCEEDED:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed);
switch (code) {
case ICMP6_TIME_EXCEED_TRANSIT:
code = PRC_TIMXCEED_INTRANS;
break;
case ICMP6_TIME_EXCEED_REASSEMBLY:
code = PRC_TIMXCEED_REASS;
break;
default:
goto badcode;
}
goto deliver;
break;
case ICMP6_PARAM_PROB:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob);
switch (code) {
case ICMP6_PARAMPROB_NEXTHEADER:
code = PRC_UNREACH_PROTOCOL;
break;
case ICMP6_PARAMPROB_HEADER:
case ICMP6_PARAMPROB_OPTION:
code = PRC_PARAMPROB;
break;
default:
goto badcode;
}
goto deliver;
break;
case ICMP6_ECHO_REQUEST:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo);
if (code != 0)
goto badcode;
if ((n = m_copy(m, 0, M_COPYALL)) == NULL) {
/* Give up remote */
break;
}
if ((n->m_flags & M_EXT) != 0
|| n->m_len < off + sizeof(struct icmp6_hdr)) {
struct mbuf *n0 = n;
const int maxlen = sizeof(*nip6) + sizeof(*nicmp6);
int n0len;
MGETHDR(n, M_DONTWAIT, n0->m_type);
n0len = n0->m_pkthdr.len; /* save for use below */
if (n)
M_MOVE_PKTHDR(n, n0);
if (n && maxlen >= MHLEN) {
MCLGET(n, M_DONTWAIT);
if ((n->m_flags & M_EXT) == 0) {
m_free(n);
n = NULL;
}
}
if (n == NULL) {
/* Give up remote */
m_freem(n0);
break;
}
/*
* Copy IPv6 and ICMPv6 only.
*/
nip6 = mtod(n, struct ip6_hdr *);
bcopy(ip6, nip6, sizeof(struct ip6_hdr));
nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
noff = sizeof(struct ip6_hdr);
/* new mbuf contains only ipv6+icmpv6 headers */
n->m_len = noff + sizeof(struct icmp6_hdr);
/*
* Adjust mbuf. ip6_plen will be adjusted in
* ip6_output().
*/
m_adj(n0, off + sizeof(struct icmp6_hdr));
/* recalculate complete packet size */
n->m_pkthdr.len = n0len + (noff - off);
n->m_next = n0;
} else {
nip6 = mtod(n, struct ip6_hdr *);
IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off,
sizeof(*nicmp6));
noff = off;
}
nicmp6->icmp6_type = ICMP6_ECHO_REPLY;
nicmp6->icmp6_code = 0;
if (n) {
V_icmp6stat.icp6s_reflect++;
V_icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++;
icmp6_reflect(n, noff);
}
break;
case ICMP6_ECHO_REPLY:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply);
if (code != 0)
goto badcode;
break;
case MLD_LISTENER_QUERY:
case MLD_LISTENER_REPORT:
if (icmp6len < sizeof(struct mld_hdr))
goto badlen;
if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery);
else
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport);
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
mld6_input(m, off);
m = NULL;
goto freeit;
}
mld6_input(n, off);
/* m stays. */
break;
case MLD_LISTENER_DONE:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone);
if (icmp6len < sizeof(struct mld_hdr)) /* necessary? */
goto badlen;
break; /* nothing to be done in kernel */
case MLD_MTRACE_RESP:
case MLD_MTRACE:
/* XXX: these two are experimental. not officially defined. */
/* XXX: per-interface statistics? */
break; /* just pass it to applications */
case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */
{
enum { WRU, FQDN } mode;
if (!V_icmp6_nodeinfo)
break;
if (icmp6len == sizeof(struct icmp6_hdr) + 4)
mode = WRU;
else if (icmp6len >= sizeof(struct icmp6_nodeinfo))
mode = FQDN;
else
goto badlen;
#define hostnamelen strlen(V_hostname)
if (mode == FQDN) {
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo),
IPPROTO_DONE);
#endif
n = m_copy(m, 0, M_COPYALL);
if (n)
n = ni6_input(n, off);
/* XXX meaningless if n == NULL */
noff = sizeof(struct ip6_hdr);
} else {
u_char *p;
int maxlen, maxhlen;
/*
* XXX: this combination of flags is pointless,
* but should we keep this for compatibility?
*/
if ((V_icmp6_nodeinfo & 5) != 5)
break;
if (code != 0)
goto badcode;
maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4;
if (maxlen >= MCLBYTES) {
/* Give up remote */
break;
}
MGETHDR(n, M_DONTWAIT, m->m_type);
if (n && maxlen > MHLEN) {
MCLGET(n, M_DONTWAIT);
if ((n->m_flags & M_EXT) == 0) {
m_free(n);
n = NULL;
}
}
if (n && !m_dup_pkthdr(n, m, M_DONTWAIT)) {
/*
* Previous code did a blind M_COPY_PKTHDR
* and said "just for rcvif". If true, then
* we could tolerate the dup failing (due to
* the deep copy of the tag chain). For now
* be conservative and just fail.
*/
m_free(n);
n = NULL;
}
if (n == NULL) {
/* Give up remote */
break;
}
n->m_pkthdr.rcvif = NULL;
n->m_len = 0;
maxhlen = M_TRAILINGSPACE(n) - maxlen;
mtx_lock(&hostname_mtx);
if (maxhlen > hostnamelen)
maxhlen = hostnamelen;
/*
* Copy IPv6 and ICMPv6 only.
*/
nip6 = mtod(n, struct ip6_hdr *);
bcopy(ip6, nip6, sizeof(struct ip6_hdr));
nicmp6 = (struct icmp6_hdr *)(nip6 + 1);
bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr));
p = (u_char *)(nicmp6 + 1);
bzero(p, 4);
bcopy(V_hostname, p + 4, maxhlen); /* meaningless TTL */
mtx_unlock(&hostname_mtx);
noff = sizeof(struct ip6_hdr);
n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
sizeof(struct icmp6_hdr) + 4 + maxhlen;
nicmp6->icmp6_type = ICMP6_WRUREPLY;
nicmp6->icmp6_code = 0;
}
#undef hostnamelen
if (n) {
V_icmp6stat.icp6s_reflect++;
V_icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++;
icmp6_reflect(n, noff);
}
break;
}
case ICMP6_WRUREPLY:
if (code != 0)
goto badcode;
break;
case ND_ROUTER_SOLICIT:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit);
if (code != 0)
goto badcode;
if (icmp6len < sizeof(struct nd_router_solicit))
goto badlen;
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
nd6_rs_input(m, off, icmp6len);
m = NULL;
goto freeit;
}
nd6_rs_input(n, off, icmp6len);
/* m stays. */
break;
case ND_ROUTER_ADVERT:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert);
if (code != 0)
goto badcode;
if (icmp6len < sizeof(struct nd_router_advert))
goto badlen;
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
nd6_ra_input(m, off, icmp6len);
m = NULL;
goto freeit;
}
nd6_ra_input(n, off, icmp6len);
/* m stays. */
break;
case ND_NEIGHBOR_SOLICIT:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit);
if (code != 0)
goto badcode;
if (icmp6len < sizeof(struct nd_neighbor_solicit))
goto badlen;
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
nd6_ns_input(m, off, icmp6len);
m = NULL;
goto freeit;
}
nd6_ns_input(n, off, icmp6len);
/* m stays. */
break;
case ND_NEIGHBOR_ADVERT:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert);
if (code != 0)
goto badcode;
if (icmp6len < sizeof(struct nd_neighbor_advert))
goto badlen;
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
nd6_na_input(m, off, icmp6len);
m = NULL;
goto freeit;
}
nd6_na_input(n, off, icmp6len);
/* m stays. */
break;
case ND_REDIRECT:
icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect);
if (code != 0)
goto badcode;
if (icmp6len < sizeof(struct nd_redirect))
goto badlen;
if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) {
/* give up local */
icmp6_redirect_input(m, off);
m = NULL;
goto freeit;
}
icmp6_redirect_input(n, off);
/* m stays. */
break;
case ICMP6_ROUTER_RENUMBERING:
if (code != ICMP6_ROUTER_RENUMBERING_COMMAND &&
code != ICMP6_ROUTER_RENUMBERING_RESULT)
goto badcode;
if (icmp6len < sizeof(struct icmp6_router_renum))
goto badlen;
break;
default:
nd6log((LOG_DEBUG,
"icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n",
icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst),
m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0));
if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) {
/* ICMPv6 error: MUST deliver it by spec... */
code = PRC_NCMDS;
/* deliver */
} else {
/* ICMPv6 informational: MUST not deliver */
break;
}
deliver:
if (icmp6_notify_error(&m, off, icmp6len, code)) {
/* In this case, m should've been freed. */
return (IPPROTO_DONE);
}
break;
badcode:
V_icmp6stat.icp6s_badcode++;
break;
badlen:
V_icmp6stat.icp6s_badlen++;
break;
}
/* deliver the packet to appropriate sockets */
icmp6_rip6_input(&m, *offp);
return IPPROTO_DONE;
freeit:
m_freem(m);
return IPPROTO_DONE;
}
static int
icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code)
{
INIT_VNET_INET6(curvnet);
struct mbuf *m = *mp;
struct icmp6_hdr *icmp6;
struct ip6_hdr *eip6;
u_int32_t notifymtu;
struct sockaddr_in6 icmp6src, icmp6dst;
if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) {
V_icmp6stat.icp6s_tooshort++;
goto freeit;
}
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off,
sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1);
icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
#else
IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
sizeof(*icmp6) + sizeof(struct ip6_hdr));
if (icmp6 == NULL) {
V_icmp6stat.icp6s_tooshort++;
return (-1);
}
#endif
eip6 = (struct ip6_hdr *)(icmp6 + 1);
/* Detect the upper level protocol */
{
void (*ctlfunc)(int, struct sockaddr *, void *);
u_int8_t nxt = eip6->ip6_nxt;
int eoff = off + sizeof(struct icmp6_hdr) +
sizeof(struct ip6_hdr);
struct ip6ctlparam ip6cp;
struct in6_addr *finaldst = NULL;
int icmp6type = icmp6->icmp6_type;
struct ip6_frag *fh;
struct ip6_rthdr *rth;
struct ip6_rthdr0 *rth0;
int rthlen;
while (1) { /* XXX: should avoid infinite loop explicitly? */
struct ip6_ext *eh;
switch (nxt) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
case IPPROTO_AH:
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, 0,
eoff + sizeof(struct ip6_ext), -1);
eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff);
#else
IP6_EXTHDR_GET(eh, struct ip6_ext *, m,
eoff, sizeof(*eh));
if (eh == NULL) {
V_icmp6stat.icp6s_tooshort++;
return (-1);
}
#endif
if (nxt == IPPROTO_AH)
eoff += (eh->ip6e_len + 2) << 2;
else
eoff += (eh->ip6e_len + 1) << 3;
nxt = eh->ip6e_nxt;
break;
case IPPROTO_ROUTING:
/*
* When the erroneous packet contains a
* routing header, we should examine the
* header to determine the final destination.
* Otherwise, we can't properly update
* information that depends on the final
* destination (e.g. path MTU).
*/
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1);
rth = (struct ip6_rthdr *)
(mtod(m, caddr_t) + eoff);
#else
IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m,
eoff, sizeof(*rth));
if (rth == NULL) {
V_icmp6stat.icp6s_tooshort++;
return (-1);
}
#endif
rthlen = (rth->ip6r_len + 1) << 3;
/*
* XXX: currently there is no
* officially defined type other
* than type-0.
* Note that if the segment left field
* is 0, all intermediate hops must
* have been passed.
*/
if (rth->ip6r_segleft &&
rth->ip6r_type == IPV6_RTHDR_TYPE_0) {
int hops;
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1);
rth0 = (struct ip6_rthdr0 *)
(mtod(m, caddr_t) + eoff);
#else
IP6_EXTHDR_GET(rth0,
struct ip6_rthdr0 *, m,
eoff, rthlen);
if (rth0 == NULL) {
V_icmp6stat.icp6s_tooshort++;
return (-1);
}
#endif
/* just ignore a bogus header */
if ((rth0->ip6r0_len % 2) == 0 &&
(hops = rth0->ip6r0_len/2))
finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1);
}
eoff += rthlen;
nxt = rth->ip6r_nxt;
break;
case IPPROTO_FRAGMENT:
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, 0, eoff +
sizeof(struct ip6_frag), -1);
fh = (struct ip6_frag *)(mtod(m, caddr_t) +
eoff);
#else
IP6_EXTHDR_GET(fh, struct ip6_frag *, m,
eoff, sizeof(*fh));
if (fh == NULL) {
V_icmp6stat.icp6s_tooshort++;
return (-1);
}
#endif
/*
* Data after a fragment header is meaningless
* unless it is the first fragment, but
* we'll go to the notify label for path MTU
* discovery.
*/
if (fh->ip6f_offlg & IP6F_OFF_MASK)
goto notify;
eoff += sizeof(struct ip6_frag);
nxt = fh->ip6f_nxt;
break;
default:
/*
* This case includes ESP and the No Next
* Header. In such cases going to the notify
* label does not have any meaning
* (i.e. ctlfunc will be NULL), but we go
* anyway since we might have to update
* path MTU information.
*/
goto notify;
}
}
notify:
#ifndef PULLDOWN_TEST
icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off);
#else
IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off,
sizeof(*icmp6) + sizeof(struct ip6_hdr));
if (icmp6 == NULL) {
V_icmp6stat.icp6s_tooshort++;
return (-1);
}
#endif
/*
* retrieve parameters from the inner IPv6 header, and convert
* them into sockaddr structures.
* XXX: there is no guarantee that the source or destination
* addresses of the inner packet are in the same scope as
* the addresses of the icmp packet. But there is no other
* way to determine the zone.
*/
eip6 = (struct ip6_hdr *)(icmp6 + 1);
bzero(&icmp6dst, sizeof(icmp6dst));
icmp6dst.sin6_len = sizeof(struct sockaddr_in6);
icmp6dst.sin6_family = AF_INET6;
if (finaldst == NULL)
icmp6dst.sin6_addr = eip6->ip6_dst;
else
icmp6dst.sin6_addr = *finaldst;
if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL))
goto freeit;
bzero(&icmp6src, sizeof(icmp6src));
icmp6src.sin6_len = sizeof(struct sockaddr_in6);
icmp6src.sin6_family = AF_INET6;
icmp6src.sin6_addr = eip6->ip6_src;
if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL))
goto freeit;
icmp6src.sin6_flowinfo =
(eip6->ip6_flow & IPV6_FLOWLABEL_MASK);
if (finaldst == NULL)
finaldst = &eip6->ip6_dst;
ip6cp.ip6c_m = m;
ip6cp.ip6c_icmp6 = icmp6;
ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1);
ip6cp.ip6c_off = eoff;
ip6cp.ip6c_finaldst = finaldst;
ip6cp.ip6c_src = &icmp6src;
ip6cp.ip6c_nxt = nxt;
if (icmp6type == ICMP6_PACKET_TOO_BIG) {
notifymtu = ntohl(icmp6->icmp6_mtu);
ip6cp.ip6c_cmdarg = (void *)&notifymtu;
icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/
}
ctlfunc = (void (*)(int, struct sockaddr *, void *))
(inet6sw[ip6_protox[nxt]].pr_ctlinput);
if (ctlfunc) {
(void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst,
&ip6cp);
}
}
*mp = m;
return (0);
freeit:
m_freem(m);
return (-1);
}
void
icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated)
{
INIT_VNET_INET6(curvnet);
struct in6_addr *dst = ip6cp->ip6c_finaldst;
struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6;
struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */
u_int mtu = ntohl(icmp6->icmp6_mtu);
struct in_conninfo inc;
#if 0
/*
* RFC2460 section 5, last paragraph.
* even though minimum link MTU for IPv6 is IPV6_MMTU,
* we may see ICMPv6 too big with mtu < IPV6_MMTU
* due to packet translator in the middle.
* see ip6_output() and ip6_getpmtu() "alwaysfrag" case for
* special handling.
*/
if (mtu < IPV6_MMTU)
return;
#endif
/*
* we reject ICMPv6 too big with abnormally small value.
* XXX what is the good definition of "abnormally small"?
*/
if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8)
return;
if (!validated)
return;
/*
* In case the suggested mtu is less than IPV6_MMTU, we
* only need to remember that it was for above mentioned
* "alwaysfrag" case.
* Try to be as close to the spec as possible.
*/
if (mtu < IPV6_MMTU)
mtu = IPV6_MMTU - 8;
bzero(&inc, sizeof(inc));
inc.inc_flags = 1; /* IPv6 */
inc.inc6_faddr = *dst;
if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL))
return;
if (mtu < tcp_maxmtu6(&inc, NULL)) {
tcp_hc_updatemtu(&inc, mtu);
V_icmp6stat.icp6s_pmtuchg++;
}
}
/*
* Process a Node Information Query packet, based on
* draft-ietf-ipngwg-icmp-name-lookups-07.
*
* Spec incompatibilities:
* - IPv6 Subject address handling
* - IPv4 Subject address handling support missing
* - Proxy reply (answer even if it's not for me)
* - joins NI group address at in6_ifattach() time only, does not cope
* with hostname changes by sethostname(3)
*/
#define hostnamelen strlen(V_hostname)
static struct mbuf *
ni6_input(struct mbuf *m, int off)
{
INIT_VNET_INET6(curvnet);
INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
struct icmp6_nodeinfo *ni6, *nni6;
struct mbuf *n = NULL;
u_int16_t qtype;
int subjlen;
int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
struct ni_reply_fqdn *fqdn;
int addrs; /* for NI_QTYPE_NODEADDR */
struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */
struct in6_addr in6_subj; /* subject address */
struct ip6_hdr *ip6;
int oldfqdn = 0; /* if 1, return pascal string (03 draft) */
char *subj = NULL;
struct in6_ifaddr *ia6 = NULL;
ip6 = mtod(m, struct ip6_hdr *);
#ifndef PULLDOWN_TEST
ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off);
#else
IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6));
if (ni6 == NULL) {
/* m is already reclaimed */
return (NULL);
}
#endif
/*
* Validate IPv6 source address.
* The default configuration MUST be to refuse answering queries from
* global-scope addresses according to RFC4602.
* Notes:
* - it's not very clear what "refuse" means; this implementation
* simply drops it.
* - it's not very easy to identify global-scope (unicast) addresses
* since there are many prefixes for them. It should be safer
* and in practice sufficient to check "all" but loopback and
* link-local (note that site-local unicast was deprecated and
* ULA is defined as global scope-wise)
*/
if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 &&
!IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
!IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src))
goto bad;
/*
* Validate IPv6 destination address.
*
* The Responder must discard the Query without further processing
* unless it is one of the Responder's unicast or anycast addresses, or
* a link-local scope multicast address which the Responder has joined.
* [RFC4602, Section 5.]
*/
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
goto bad;
/* else it's a link-local multicast, fine */
} else { /* unicast or anycast */
if ((ia6 = ip6_getdstifaddr(m)) == NULL)
goto bad; /* XXX impossible */
if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) &&
!(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) {
nd6log((LOG_DEBUG, "ni6_input: ignore node info to "
"a temporary address in %s:%d",
__FILE__, __LINE__));
goto bad;
}
}
/* validate query Subject field. */
qtype = ntohs(ni6->ni_qtype);
subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo);
switch (qtype) {
case NI_QTYPE_NOOP:
case NI_QTYPE_SUPTYPES:
/* 07 draft */
if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0)
break;
/* FALLTHROUGH */
case NI_QTYPE_FQDN:
case NI_QTYPE_NODEADDR:
case NI_QTYPE_IPV4ADDR:
switch (ni6->ni_code) {
case ICMP6_NI_SUBJ_IPV6:
#if ICMP6_NI_SUBJ_IPV6 != 0
case 0:
#endif
/*
* backward compatibility - try to accept 03 draft
* format, where no Subject is present.
*/
if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 &&
subjlen == 0) {
oldfqdn++;
break;
}
#if ICMP6_NI_SUBJ_IPV6 != 0
if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6)
goto bad;
#endif
if (subjlen != sizeof(struct in6_addr))
goto bad;
/*
* Validate Subject address.
*
* Not sure what exactly "address belongs to the node"
* means in the spec, is it just unicast, or what?
*
* At this moment we consider Subject address as
* "belong to the node" if the Subject address equals
* to the IPv6 destination address; validation for
* IPv6 destination address should have done enough
* check for us.
*
* We do not do proxy at this moment.
*/
/* m_pulldown instead of copy? */
m_copydata(m, off + sizeof(struct icmp6_nodeinfo),
subjlen, (caddr_t)&in6_subj);
if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL))
goto bad;
subj = (char *)&in6_subj;
if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj))
break;
/*
* XXX if we are to allow other cases, we should really
* be careful about scope here.
* basically, we should disallow queries toward IPv6
* destination X with subject Y,
* if scope(X) > scope(Y).
* if we allow scope(X) > scope(Y), it will result in
* information leakage across scope boundary.
*/
goto bad;
case ICMP6_NI_SUBJ_FQDN:
/*
* Validate Subject name with gethostname(3).
*
* The behavior may need some debate, since:
* - we are not sure if the node has FQDN as
* hostname (returned by gethostname(3)).
* - the code does wildcard match for truncated names.
* however, we are not sure if we want to perform
* wildcard match, if gethostname(3) side has
* truncated hostname.
*/
mtx_lock(&hostname_mtx);
n = ni6_nametodns(V_hostname, hostnamelen, 0);
mtx_unlock(&hostname_mtx);
if (!n || n->m_next || n->m_len == 0)
goto bad;
IP6_EXTHDR_GET(subj, char *, m,
off + sizeof(struct icmp6_nodeinfo), subjlen);
if (subj == NULL)
goto bad;
if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *),
n->m_len)) {
goto bad;
}
m_freem(n);
n = NULL;
break;
case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */
default:
goto bad;
}
break;
}
/* refuse based on configuration. XXX ICMP6_NI_REFUSED? */
switch (qtype) {
case NI_QTYPE_FQDN:
if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0)
goto bad;
break;
case NI_QTYPE_NODEADDR:
case NI_QTYPE_IPV4ADDR:
if ((V_icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0)
goto bad;
break;
}
/* guess reply length */
switch (qtype) {
case NI_QTYPE_NOOP:
break; /* no reply data */
case NI_QTYPE_SUPTYPES:
replylen += sizeof(u_int32_t);
break;
case NI_QTYPE_FQDN:
/* XXX will append an mbuf */
replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
break;
case NI_QTYPE_NODEADDR:
addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj);
if ((replylen += addrs * (sizeof(struct in6_addr) +
sizeof(u_int32_t))) > MCLBYTES)
replylen = MCLBYTES; /* XXX: will truncate pkt later */
break;
case NI_QTYPE_IPV4ADDR:
/* unsupported - should respond with unknown Qtype? */
break;
default:
/*
* XXX: We must return a reply with the ICMP6 code
* `unknown Qtype' in this case. However we regard the case
* as an FQDN query for backward compatibility.
* Older versions set a random value to this field,
* so it rarely varies in the defined qtypes.
* But the mechanism is not reliable...
* maybe we should obsolete older versions.
*/
qtype = NI_QTYPE_FQDN;
/* XXX will append an mbuf */
replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen);
oldfqdn++;
break;
}
/* allocate an mbuf to reply. */
MGETHDR(n, M_DONTWAIT, m->m_type);
if (n == NULL) {
m_freem(m);
return (NULL);
}
M_MOVE_PKTHDR(n, m); /* just for recvif */
if (replylen > MHLEN) {
if (replylen > MCLBYTES) {
/*
* XXX: should we try to allocate more? But MCLBYTES
* is probably much larger than IPV6_MMTU...
*/
goto bad;
}
MCLGET(n, M_DONTWAIT);
if ((n->m_flags & M_EXT) == 0) {
goto bad;
}
}
n->m_pkthdr.len = n->m_len = replylen;
/* copy mbuf header and IPv6 + Node Information base headers */
bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr));
nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1);
bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo));
/* qtype dependent procedure */
switch (qtype) {
case NI_QTYPE_NOOP:
nni6->ni_code = ICMP6_NI_SUCCESS;
nni6->ni_flags = 0;
break;
case NI_QTYPE_SUPTYPES:
{
u_int32_t v;
nni6->ni_code = ICMP6_NI_SUCCESS;
nni6->ni_flags = htons(0x0000); /* raw bitmap */
/* supports NOOP, SUPTYPES, FQDN, and NODEADDR */
v = (u_int32_t)htonl(0x0000000f);
bcopy(&v, nni6 + 1, sizeof(u_int32_t));
break;
}
case NI_QTYPE_FQDN:
nni6->ni_code = ICMP6_NI_SUCCESS;
fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) +
sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo));
nni6->ni_flags = 0; /* XXX: meaningless TTL */
fqdn->ni_fqdn_ttl = 0; /* ditto. */
/*
* XXX do we really have FQDN in variable "hostname"?
*/
mtx_lock(&hostname_mtx);
n->m_next = ni6_nametodns(V_hostname, hostnamelen, oldfqdn);
mtx_unlock(&hostname_mtx);
if (n->m_next == NULL)
goto bad;
/* XXX we assume that n->m_next is not a chain */
if (n->m_next->m_next != NULL)
goto bad;
n->m_pkthdr.len += n->m_next->m_len;
break;
case NI_QTYPE_NODEADDR:
{
int lenlim, copied;
nni6->ni_code = ICMP6_NI_SUCCESS;
n->m_pkthdr.len = n->m_len =
sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo);
lenlim = M_TRAILINGSPACE(n);
copied = ni6_store_addrs(ni6, nni6, ifp, lenlim);
/* XXX: reset mbuf length */
n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) +
sizeof(struct icmp6_nodeinfo) + copied;
break;
}
default:
break; /* XXX impossible! */
}
nni6->ni_type = ICMP6_NI_REPLY;
m_freem(m);
return (n);
bad:
m_freem(m);
if (n)
m_freem(n);
return (NULL);
}
#undef hostnamelen
/*
* make a mbuf with DNS-encoded string. no compression support.
*
* XXX names with less than 2 dots (like "foo" or "foo.section") will be
* treated as truncated name (two \0 at the end). this is a wild guess.
*
* old - return pascal string if non-zero
*/
static struct mbuf *
ni6_nametodns(const char *name, int namelen, int old)
{
struct mbuf *m;
char *cp, *ep;
const char *p, *q;
int i, len, nterm;
if (old)
len = namelen + 1;
else
len = MCLBYTES;
/* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */
MGET(m, M_DONTWAIT, MT_DATA);
if (m && len > MLEN) {
MCLGET(m, M_DONTWAIT);
if ((m->m_flags & M_EXT) == 0)
goto fail;
}
if (!m)
goto fail;
m->m_next = NULL;
if (old) {
m->m_len = len;
*mtod(m, char *) = namelen;
bcopy(name, mtod(m, char *) + 1, namelen);
return m;
} else {
m->m_len = 0;
cp = mtod(m, char *);
ep = mtod(m, char *) + M_TRAILINGSPACE(m);
/* if not certain about my name, return empty buffer */
if (namelen == 0)
return m;
/*
* guess if it looks like shortened hostname, or FQDN.
* shortened hostname needs two trailing "\0".
*/
i = 0;
for (p = name; p < name + namelen; p++) {
if (*p && *p == '.')
i++;
}
if (i < 2)
nterm = 2;
else
nterm = 1;
p = name;
while (cp < ep && p < name + namelen) {
i = 0;
for (q = p; q < name + namelen && *q && *q != '.'; q++)
i++;
/* result does not fit into mbuf */
if (cp + i + 1 >= ep)
goto fail;
/*
* DNS label length restriction, RFC1035 page 8.
* "i == 0" case is included here to avoid returning
* 0-length label on "foo..bar".
*/
if (i <= 0 || i >= 64)
goto fail;
*cp++ = i;
bcopy(p, cp, i);
cp += i;
p = q;
if (p < name + namelen && *p == '.')
p++;
}
/* termination */
if (cp + nterm >= ep)
goto fail;
while (nterm-- > 0)
*cp++ = '\0';
m->m_len = cp - mtod(m, char *);
return m;
}
panic("should not reach here");
/* NOTREACHED */
fail:
if (m)
m_freem(m);
return NULL;
}
/*
* check if two DNS-encoded string matches. takes care of truncated
* form (with \0\0 at the end). no compression support.
* XXX upper/lowercase match (see RFC2065)
*/
static int
ni6_dnsmatch(const char *a, int alen, const char *b, int blen)
{
const char *a0, *b0;
int l;
/* simplest case - need validation? */
if (alen == blen && bcmp(a, b, alen) == 0)
return 1;
a0 = a;
b0 = b;
/* termination is mandatory */
if (alen < 2 || blen < 2)
return 0;
if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0')
return 0;
alen--;
blen--;
while (a - a0 < alen && b - b0 < blen) {
if (a - a0 + 1 > alen || b - b0 + 1 > blen)
return 0;
if ((signed char)a[0] < 0 || (signed char)b[0] < 0)
return 0;
/* we don't support compression yet */
if (a[0] >= 64 || b[0] >= 64)
return 0;
/* truncated case */
if (a[0] == 0 && a - a0 == alen - 1)
return 1;
if (b[0] == 0 && b - b0 == blen - 1)
return 1;
if (a[0] == 0 || b[0] == 0)
return 0;
if (a[0] != b[0])
return 0;
l = a[0];
if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen)
return 0;
if (bcmp(a + 1, b + 1, l) != 0)
return 0;
a += 1 + l;
b += 1 + l;
}
if (a - a0 == alen && b - b0 == blen)
return 1;
else
return 0;
}
/*
* calculate the number of addresses to be returned in the node info reply.
*/
static int
ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp,
struct in6_addr *subj)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
struct ifnet *ifp;
struct in6_ifaddr *ifa6;
struct ifaddr *ifa;
int addrs = 0, addrsofif, iffound = 0;
int niflags = ni6->ni_flags;
if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) {
switch (ni6->ni_code) {
case ICMP6_NI_SUBJ_IPV6:
if (subj == NULL) /* must be impossible... */
return (0);
break;
default:
/*
* XXX: we only support IPv6 subject address for
* this Qtype.
*/
return (0);
}
}
IFNET_RLOCK();
for (ifp = TAILQ_FIRST(&V_ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
addrsofif = 0;
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
ifa6 = (struct in6_ifaddr *)ifa;
if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 &&
IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr))
iffound = 1;
/*
* IPv4-mapped addresses can only be returned by a
* Node Information proxy, since they represent
* addresses of IPv4-only nodes, which perforce do
* not implement this protocol.
* [icmp-name-lookups-07, Section 5.4]
* So we don't support NI_NODEADDR_FLAG_COMPAT in
* this function at this moment.
*/
/* What do we have to do about ::1? */
switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
case IPV6_ADDR_SCOPE_LINKLOCAL:
if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
continue;
break;
case IPV6_ADDR_SCOPE_SITELOCAL:
if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
continue;
break;
case IPV6_ADDR_SCOPE_GLOBAL:
if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
continue;
break;
default:
continue;
}
/*
* check if anycast is okay.
* XXX: just experimental. not in the spec.
*/
if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
(niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
continue; /* we need only unicast addresses */
if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
continue;
}
addrsofif++; /* count the address */
}
if (iffound) {
*ifpp = ifp;
IFNET_RUNLOCK();
return (addrsofif);
}
addrs += addrsofif;
}
IFNET_RUNLOCK();
return (addrs);
}
static int
ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6,
struct ifnet *ifp0, int resid)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&V_ifnet);
struct in6_ifaddr *ifa6;
struct ifaddr *ifa;
struct ifnet *ifp_dep = NULL;
int copied = 0, allow_deprecated = 0;
u_char *cp = (u_char *)(nni6 + 1);
int niflags = ni6->ni_flags;
u_int32_t ltime;
if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL))
return (0); /* needless to copy */
IFNET_RLOCK();
again:
for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) {
for (ifa = ifp->if_addrlist.tqh_first; ifa;
ifa = ifa->ifa_list.tqe_next) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
ifa6 = (struct in6_ifaddr *)ifa;
if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 &&
allow_deprecated == 0) {
/*
* prefererred address should be put before
* deprecated addresses.
*/
/* record the interface for later search */
if (ifp_dep == NULL)
ifp_dep = ifp;
continue;
} else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 &&
allow_deprecated != 0)
continue; /* we now collect deprecated addrs */
/* What do we have to do about ::1? */
switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) {
case IPV6_ADDR_SCOPE_LINKLOCAL:
if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0)
continue;
break;
case IPV6_ADDR_SCOPE_SITELOCAL:
if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0)
continue;
break;
case IPV6_ADDR_SCOPE_GLOBAL:
if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0)
continue;
break;
default:
continue;
}
/*
* check if anycast is okay.
* XXX: just experimental. not in the spec.
*/
if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 &&
(niflags & NI_NODEADDR_FLAG_ANYCAST) == 0)
continue;
if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
(V_icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) {
continue;
}
/* now we can copy the address */
if (resid < sizeof(struct in6_addr) +
sizeof(u_int32_t)) {
/*
* We give up much more copy.
* Set the truncate flag and return.
*/
nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE;
IFNET_RUNLOCK();
return (copied);
}
/*
* Set the TTL of the address.
* The TTL value should be one of the following
* according to the specification:
*
* 1. The remaining lifetime of a DHCP lease on the
* address, or
* 2. The remaining Valid Lifetime of a prefix from
* which the address was derived through Stateless
* Autoconfiguration.
*
* Note that we currently do not support stateful
* address configuration by DHCPv6, so the former
* case can't happen.
*/
if (ifa6->ia6_lifetime.ia6t_expire == 0)
ltime = ND6_INFINITE_LIFETIME;
else {
if (ifa6->ia6_lifetime.ia6t_expire >
time_second)
ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second);
else
ltime = 0;
}
bcopy(&ltime, cp, sizeof(u_int32_t));
cp += sizeof(u_int32_t);
/* copy the address itself */
bcopy(&ifa6->ia_addr.sin6_addr, cp,
sizeof(struct in6_addr));
in6_clearscope((struct in6_addr *)cp); /* XXX */
cp += sizeof(struct in6_addr);
resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t));
copied += (sizeof(struct in6_addr) + sizeof(u_int32_t));
}
if (ifp0) /* we need search only on the specified IF */
break;
}
if (allow_deprecated == 0 && ifp_dep != NULL) {
ifp = ifp_dep;
allow_deprecated = 1;
goto again;
}
IFNET_RUNLOCK();
return (copied);
}
/*
* XXX almost dup'ed code with rip6_input.
*/
static int
icmp6_rip6_input(struct mbuf **mp, int off)
{
INIT_VNET_INET(curvnet);
INIT_VNET_INET6(curvnet);
struct mbuf *m = *mp;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct in6pcb *in6p;
struct in6pcb *last = NULL;
struct sockaddr_in6 fromsa;
struct icmp6_hdr *icmp6;
struct mbuf *opts = NULL;
#ifndef PULLDOWN_TEST
/* this is assumed to be safe. */
icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off);
#else
IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6));
if (icmp6 == NULL) {
/* m is already reclaimed */
return (IPPROTO_DONE);
}
#endif
/*
* XXX: the address may have embedded scope zone ID, which should be
* hidden from applications.
*/
bzero(&fromsa, sizeof(fromsa));
fromsa.sin6_family = AF_INET6;
fromsa.sin6_len = sizeof(struct sockaddr_in6);
fromsa.sin6_addr = ip6->ip6_src;
if (sa6_recoverscope(&fromsa)) {
m_freem(m);
return (IPPROTO_DONE);
}
INP_INFO_RLOCK(&V_ripcbinfo);
LIST_FOREACH(in6p, &V_ripcb, inp_list) {
if ((in6p->inp_vflag & INP_IPV6) == 0)
continue;
if (in6p->in6p_ip6_nxt != IPPROTO_ICMPV6)
continue;
if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) &&
!IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst))
continue;
if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) &&
!IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src))
continue;
INP_RLOCK(in6p);
if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
in6p->in6p_icmp6filt)) {
INP_RUNLOCK(in6p);
continue;
}
if (last) {
struct mbuf *n = NULL;
/*
* Recent network drivers tend to allocate a single
* mbuf cluster, rather than to make a couple of
* mbufs without clusters. Also, since the IPv6 code
* path tries to avoid m_pullup(), it is highly
* probable that we still have an mbuf cluster here
* even though the necessary length can be stored in an
* mbuf's internal buffer.
* Meanwhile, the default size of the receive socket
* buffer for raw sockets is not so large. This means
* the possibility of packet loss is relatively higher
* than before. To avoid this scenario, we copy the
* received data to a separate mbuf that does not use
* a cluster, if possible.
* XXX: it is better to copy the data after stripping
* intermediate headers.
*/
if ((m->m_flags & M_EXT) && m->m_next == NULL &&
m->m_len <= MHLEN) {
MGET(n, M_DONTWAIT, m->m_type);
if (n != NULL) {
if (m_dup_pkthdr(n, m, M_NOWAIT)) {
bcopy(m->m_data, n->m_data,
m->m_len);
n->m_len = m->m_len;
} else {
m_free(n);
n = NULL;
}
}
}
if (n != NULL ||
(n = m_copy(m, 0, (int)M_COPYALL)) != NULL) {
if (last->in6p_flags & IN6P_CONTROLOPTS)
ip6_savecontrol(last, n, &opts);
/* strip intermediate headers */
m_adj(n, off);
SOCKBUF_LOCK(&last->in6p_socket->so_rcv);
if (sbappendaddr_locked(
&last->in6p_socket->so_rcv,
(struct sockaddr *)&fromsa, n, opts)
== 0) {
/* should notify about lost packet */
m_freem(n);
if (opts) {
m_freem(opts);
}
SOCKBUF_UNLOCK(
&last->in6p_socket->so_rcv);
} else
sorwakeup_locked(last->in6p_socket);
opts = NULL;
}
INP_RUNLOCK(last);
}
last = in6p;
}
INP_INFO_RUNLOCK(&V_ripcbinfo);
if (last) {
if (last->in6p_flags & IN6P_CONTROLOPTS)
ip6_savecontrol(last, m, &opts);
/* strip intermediate headers */
m_adj(m, off);
/* avoid using mbuf clusters if possible (see above) */
if ((m->m_flags & M_EXT) && m->m_next == NULL &&
m->m_len <= MHLEN) {
struct mbuf *n;
MGET(n, M_DONTWAIT, m->m_type);
if (n != NULL) {
if (m_dup_pkthdr(n, m, M_NOWAIT)) {
bcopy(m->m_data, n->m_data, m->m_len);
n->m_len = m->m_len;
m_freem(m);
m = n;
} else {
m_freem(n);
n = NULL;
}
}
}
SOCKBUF_LOCK(&last->in6p_socket->so_rcv);
if (sbappendaddr_locked(&last->in6p_socket->so_rcv,
(struct sockaddr *)&fromsa, m, opts) == 0) {
m_freem(m);
if (opts)
m_freem(opts);
SOCKBUF_UNLOCK(&last->in6p_socket->so_rcv);
} else
sorwakeup_locked(last->in6p_socket);
INP_RUNLOCK(last);
} else {
m_freem(m);
V_ip6stat.ip6s_delivered--;
}
return IPPROTO_DONE;
}
/*
* Reflect the ip6 packet back to the source.
* OFF points to the icmp6 header, counted from the top of the mbuf.
*/
void
icmp6_reflect(struct mbuf *m, size_t off)
{
INIT_VNET_INET6(curvnet);
struct ip6_hdr *ip6;
struct icmp6_hdr *icmp6;
struct in6_ifaddr *ia;
int plen;
int type, code;
struct ifnet *outif = NULL;
struct in6_addr origdst, *src = NULL;
/* too short to reflect */
if (off < sizeof(struct ip6_hdr)) {
nd6log((LOG_DEBUG,
"sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n",
(u_long)off, (u_long)sizeof(struct ip6_hdr),
__FILE__, __LINE__));
goto bad;
}
/*
* If there are extra headers between IPv6 and ICMPv6, strip
* off that header first.
*/
#ifdef DIAGNOSTIC
if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN)
panic("assumption failed in icmp6_reflect");
#endif
if (off > sizeof(struct ip6_hdr)) {
size_t l;
struct ip6_hdr nip6;
l = off - sizeof(struct ip6_hdr);
m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6);
m_adj(m, l);
l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
if (m->m_len < l) {
if ((m = m_pullup(m, l)) == NULL)
return;
}
bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6));
} else /* off == sizeof(struct ip6_hdr) */ {
size_t l;
l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr);
if (m->m_len < l) {
if ((m = m_pullup(m, l)) == NULL)
return;
}
}
plen = m->m_pkthdr.len - sizeof(struct ip6_hdr);
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_nxt = IPPROTO_ICMPV6;
icmp6 = (struct icmp6_hdr *)(ip6 + 1);
type = icmp6->icmp6_type; /* keep type for statistics */
code = icmp6->icmp6_code; /* ditto. */
origdst = ip6->ip6_dst;
/*
* ip6_input() drops a packet if its src is multicast.
* So, the src is never multicast.
*/
ip6->ip6_dst = ip6->ip6_src;
/*
* If the incoming packet was addressed directly to us (i.e. unicast),
* use dst as the src for the reply.
* The IN6_IFF_NOTREADY case should be VERY rare, but is possible
* (for example) when we encounter an error while forwarding procedure
* destined to a duplicated address of ours.
* Note that ip6_getdstifaddr() may fail if we are in an error handling
* procedure of an outgoing packet of our own, in which case we need
* to search in the ifaddr list.
*/
if (!IN6_IS_ADDR_MULTICAST(&origdst)) {
if ((ia = ip6_getdstifaddr(m))) {
if (!(ia->ia6_flags &
(IN6_IFF_ANYCAST|IN6_IFF_NOTREADY)))
src = &ia->ia_addr.sin6_addr;
} else {
struct sockaddr_in6 d;
bzero(&d, sizeof(d));
d.sin6_family = AF_INET6;
d.sin6_len = sizeof(d);
d.sin6_addr = origdst;
ia = (struct in6_ifaddr *)
ifa_ifwithaddr((struct sockaddr *)&d);
if (ia &&
!(ia->ia6_flags &
(IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) {
src = &ia->ia_addr.sin6_addr;
}
}
}
if (src == NULL) {
int e;
struct sockaddr_in6 sin6;
struct route_in6 ro;
/*
* This case matches to multicasts, our anycast, or unicasts
* that we do not own. Select a source address based on the
* source address of the erroneous packet.
*/
bzero(&sin6, sizeof(sin6));
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(sin6);
sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */
bzero(&ro, sizeof(ro));
src = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, &outif, &e);
if (ro.ro_rt)
RTFREE(ro.ro_rt); /* XXX: we could use this */
if (src == NULL) {
char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_DEBUG,
"icmp6_reflect: source can't be determined: "
"dst=%s, error=%d\n",
ip6_sprintf(ip6buf, &sin6.sin6_addr), e));
goto bad;
}
}
ip6->ip6_src = *src;
ip6->ip6_flow = 0;
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
ip6->ip6_vfc |= IPV6_VERSION;
ip6->ip6_nxt = IPPROTO_ICMPV6;
if (outif)
ip6->ip6_hlim = ND_IFINFO(outif)->chlim;
else if (m->m_pkthdr.rcvif) {
/* XXX: This may not be the outgoing interface */
ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim;
} else
ip6->ip6_hlim = V_ip6_defhlim;
icmp6->icmp6_cksum = 0;
icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6,
sizeof(struct ip6_hdr), plen);
/*
* XXX option handling
*/
m->m_flags &= ~(M_BCAST|M_MCAST);
ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
if (outif)
icmp6_ifoutstat_inc(outif, type, code);
return;
bad:
m_freem(m);
return;
}
void
icmp6_fasttimo(void)
{
return;
}
static const char *
icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6,
struct in6_addr *tgt6)
{
static char buf[1024];
char ip6bufs[INET6_ADDRSTRLEN];
char ip6bufd[INET6_ADDRSTRLEN];
char ip6buft[INET6_ADDRSTRLEN];
snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)",
ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6),
ip6_sprintf(ip6buft, tgt6));
return buf;
}
void
icmp6_redirect_input(struct mbuf *m, int off)
{
INIT_VNET_INET6(curvnet);
struct ifnet *ifp;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct nd_redirect *nd_rd;
int icmp6len = ntohs(ip6->ip6_plen);
char *lladdr = NULL;
int lladdrlen = 0;
u_char *redirhdr = NULL;
int redirhdrlen = 0;
struct rtentry *rt = NULL;
int is_router;
int is_onlink;
struct in6_addr src6 = ip6->ip6_src;
struct in6_addr redtgt6;
struct in6_addr reddst6;
union nd_opts ndopts;
char ip6buf[INET6_ADDRSTRLEN];
if (!m)
return;
ifp = m->m_pkthdr.rcvif;
if (!ifp)
return;
/* XXX if we are router, we don't update route by icmp6 redirect */
if (V_ip6_forwarding)
goto freeit;
if (!V_icmp6_rediraccept)
goto freeit;
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, icmp6len,);
nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off);
#else
IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len);
if (nd_rd == NULL) {
V_icmp6stat.icp6s_tooshort++;
return;
}
#endif
redtgt6 = nd_rd->nd_rd_target;
reddst6 = nd_rd->nd_rd_dst;
if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) ||
in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) {
goto freeit;
}
/* validation */
if (!IN6_IS_ADDR_LINKLOCAL(&src6)) {
nd6log((LOG_ERR,
"ICMP6 redirect sent from %s rejected; "
"must be from linklocal\n",
ip6_sprintf(ip6buf, &src6)));
goto bad;
}
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
"ICMP6 redirect sent from %s rejected; "
"hlim=%d (must be 255)\n",
ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim));
goto bad;
}
{
/* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */
struct sockaddr_in6 sin6;
struct in6_addr *gw6;
bzero(&sin6, sizeof(sin6));
sin6.sin6_family = AF_INET6;
sin6.sin6_len = sizeof(struct sockaddr_in6);
bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6));
rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL);
if (rt) {
if (rt->rt_gateway == NULL ||
rt->rt_gateway->sa_family != AF_INET6) {
nd6log((LOG_ERR,
"ICMP6 redirect rejected; no route "
"with inet6 gateway found for redirect dst: %s\n",
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
RTFREE_LOCKED(rt);
goto bad;
}
gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr);
if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) {
nd6log((LOG_ERR,
"ICMP6 redirect rejected; "
"not equal to gw-for-src=%s (must be same): "
"%s\n",
ip6_sprintf(ip6buf, gw6),
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
RTFREE_LOCKED(rt);
goto bad;
}
} else {
nd6log((LOG_ERR,
"ICMP6 redirect rejected; "
"no route found for redirect dst: %s\n",
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
goto bad;
}
RTFREE_LOCKED(rt);
rt = NULL;
}
if (IN6_IS_ADDR_MULTICAST(&reddst6)) {
nd6log((LOG_ERR,
"ICMP6 redirect rejected; "
"redirect dst must be unicast: %s\n",
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
goto bad;
}
is_router = is_onlink = 0;
if (IN6_IS_ADDR_LINKLOCAL(&redtgt6))
is_router = 1; /* router case */
if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0)
is_onlink = 1; /* on-link destination case */
if (!is_router && !is_onlink) {
nd6log((LOG_ERR,
"ICMP6 redirect rejected; "
"neither router case nor onlink case: %s\n",
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
goto bad;
}
/* validation passed */
icmp6len -= sizeof(*nd_rd);
nd6_option_init(nd_rd + 1, icmp6len, &ndopts);
if (nd6_options(&ndopts) < 0) {
nd6log((LOG_INFO, "icmp6_redirect_input: "
"invalid ND option, rejected: %s\n",
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
/* nd6_options have incremented stats */
goto freeit;
}
if (ndopts.nd_opts_tgt_lladdr) {
lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
}
if (ndopts.nd_opts_rh) {
redirhdrlen = ndopts.nd_opts_rh->nd_opt_rh_len;
redirhdr = (u_char *)(ndopts.nd_opts_rh + 1); /* xxx */
}
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
nd6log((LOG_INFO,
"icmp6_redirect_input: lladdrlen mismatch for %s "
"(if %d, icmp6 packet %d): %s\n",
ip6_sprintf(ip6buf, &redtgt6),
ifp->if_addrlen, lladdrlen - 2,
icmp6_redirect_diag(&src6, &reddst6, &redtgt6)));
goto bad;
}
/* RFC 2461 8.3 */
- IF_AFDATA_LOCK(ifp);
nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT,
is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER);
- IF_AFDATA_UNLOCK(ifp);
if (!is_onlink) { /* better router case. perform rtredirect. */
/* perform rtredirect */
struct sockaddr_in6 sdst;
struct sockaddr_in6 sgw;
struct sockaddr_in6 ssrc;
bzero(&sdst, sizeof(sdst));
bzero(&sgw, sizeof(sgw));
bzero(&ssrc, sizeof(ssrc));
sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6;
sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len =
sizeof(struct sockaddr_in6);
bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr));
bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr));
rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw,
(struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST,
(struct sockaddr *)&ssrc);
}
/* finally update cached route in each socket via pfctlinput */
{
struct sockaddr_in6 sdst;
bzero(&sdst, sizeof(sdst));
sdst.sin6_family = AF_INET6;
sdst.sin6_len = sizeof(struct sockaddr_in6);
bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr));
pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst);
#ifdef IPSEC
key_sa_routechange((struct sockaddr *)&sdst);
#endif /* IPSEC */
}
freeit:
m_freem(m);
return;
bad:
V_icmp6stat.icp6s_badredirect++;
m_freem(m);
}
void
icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt)
{
INIT_VNET_INET6(curvnet);
struct ifnet *ifp; /* my outgoing interface */
struct in6_addr *ifp_ll6;
struct in6_addr *router_ll6;
struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */
struct mbuf *m = NULL; /* newly allocated one */
struct ip6_hdr *ip6; /* m as struct ip6_hdr */
struct nd_redirect *nd_rd;
size_t maxlen;
u_char *p;
struct ifnet *outif = NULL;
struct sockaddr_in6 src_sa;
icmp6_errcount(&V_icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0);
/* if we are not router, we don't send icmp6 redirect */
if (!V_ip6_forwarding)
goto fail;
/* sanity check */
if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp))
goto fail;
/*
* Address check:
* the source address must identify a neighbor, and
* the destination address must not be a multicast address
* [RFC 2461, sec 8.2]
*/
sip6 = mtod(m0, struct ip6_hdr *);
bzero(&src_sa, sizeof(src_sa));
src_sa.sin6_family = AF_INET6;
src_sa.sin6_len = sizeof(src_sa);
src_sa.sin6_addr = sip6->ip6_src;
if (nd6_is_addr_neighbor(&src_sa, ifp) == 0)
goto fail;
if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst))
goto fail; /* what should we do here? */
/* rate limit */
if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0))
goto fail;
/*
* Since we are going to append up to 1280 bytes (= IPV6_MMTU),
* we almost always ask for an mbuf cluster for simplicity.
* (MHLEN < IPV6_MMTU is almost always true)
*/
#if IPV6_MMTU >= MCLBYTES
# error assumption failed about IPV6_MMTU and MCLBYTES
#endif
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (m && IPV6_MMTU >= MHLEN)
MCLGET(m, M_DONTWAIT);
if (!m)
goto fail;
m->m_pkthdr.rcvif = NULL;
m->m_len = 0;
maxlen = M_TRAILINGSPACE(m);
maxlen = min(IPV6_MMTU, maxlen);
/* just for safety */
if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) +
((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) {
goto fail;
}
{
/* get ip6 linklocal address for ifp(my outgoing interface). */
struct in6_ifaddr *ia;
if ((ia = in6ifa_ifpforlinklocal(ifp,
IN6_IFF_NOTREADY|
IN6_IFF_ANYCAST)) == NULL)
goto fail;
ifp_ll6 = &ia->ia_addr.sin6_addr;
}
/* get ip6 linklocal address for the router. */
if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) {
struct sockaddr_in6 *sin6;
sin6 = (struct sockaddr_in6 *)rt->rt_gateway;
router_ll6 = &sin6->sin6_addr;
if (!IN6_IS_ADDR_LINKLOCAL(router_ll6))
router_ll6 = (struct in6_addr *)NULL;
} else
router_ll6 = (struct in6_addr *)NULL;
/* ip6 */
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_flow = 0;
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
ip6->ip6_vfc |= IPV6_VERSION;
/* ip6->ip6_plen will be set later */
ip6->ip6_nxt = IPPROTO_ICMPV6;
ip6->ip6_hlim = 255;
/* ip6->ip6_src must be linklocal addr for my outgoing if. */
bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr));
bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr));
/* ND Redirect */
nd_rd = (struct nd_redirect *)(ip6 + 1);
nd_rd->nd_rd_type = ND_REDIRECT;
nd_rd->nd_rd_code = 0;
nd_rd->nd_rd_reserved = 0;
if (rt->rt_flags & RTF_GATEWAY) {
/*
* nd_rd->nd_rd_target must be a link-local address in
* better router cases.
*/
if (!router_ll6)
goto fail;
bcopy(router_ll6, &nd_rd->nd_rd_target,
sizeof(nd_rd->nd_rd_target));
bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
sizeof(nd_rd->nd_rd_dst));
} else {
/* make sure redtgt == reddst */
bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target,
sizeof(nd_rd->nd_rd_target));
bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst,
sizeof(nd_rd->nd_rd_dst));
}
p = (u_char *)(nd_rd + 1);
if (!router_ll6)
goto nolladdropt;
{
/* target lladdr option */
int len;
struct llentry *ln;
struct nd_opt_hdr *nd_opt;
char *lladdr;
IF_AFDATA_LOCK(ifp);
ln = nd6_lookup(router_ll6, 0, ifp);
- if (!ln) {
- IF_AFDATA_UNLOCK(ifp);
+ IF_AFDATA_UNLOCK(ifp);
+ if (!ln)
goto nolladdropt;
- }
+
len = sizeof(*nd_opt) + ifp->if_addrlen;
len = (len + 7) & ~7; /* round by 8 */
/* safety check */
- if (len + (p - (u_char *)ip6) > maxlen) {
- IF_AFDATA_UNLOCK(ifp);
+ if (len + (p - (u_char *)ip6) > maxlen)
goto nolladdropt;
- }
+
if (ln->la_flags & LLE_VALID) {
nd_opt = (struct nd_opt_hdr *)p;
nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
nd_opt->nd_opt_len = len >> 3;
lladdr = (char *)(nd_opt + 1);
bcopy(&ln->ll_addr, lladdr, ifp->if_addrlen);
p += len;
}
- IF_AFDATA_UNLOCK(ifp);
+ LLE_RUNLOCK(ln);
}
nolladdropt:;
m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
/* just to be safe */
#ifdef M_DECRYPTED /*not openbsd*/
if (m0->m_flags & M_DECRYPTED)
goto noredhdropt;
#endif
if (p - (u_char *)ip6 > maxlen)
goto noredhdropt;
{
/* redirected header option */
int len;
struct nd_opt_rd_hdr *nd_opt_rh;
/*
* compute the maximum size for icmp6 redirect header option.
* XXX room for auth header?
*/
len = maxlen - (p - (u_char *)ip6);
len &= ~7;
/* This is just for simplicity. */
if (m0->m_pkthdr.len != m0->m_len) {
if (m0->m_next) {
m_freem(m0->m_next);
m0->m_next = NULL;
}
m0->m_pkthdr.len = m0->m_len;
}
/*
* Redirected header option spec (RFC2461 4.6.3) talks nothing
* about padding/truncate rule for the original IP packet.
* From the discussion on IPv6imp in Feb 1999,
* the consensus was:
* - "attach as much as possible" is the goal
* - pad if not aligned (original size can be guessed by
* original ip6 header)
* Following code adds the padding if it is simple enough,
* and truncates if not.
*/
if (m0->m_next || m0->m_pkthdr.len != m0->m_len)
panic("assumption failed in %s:%d", __FILE__,
__LINE__);
if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) {
/* not enough room, truncate */
m0->m_pkthdr.len = m0->m_len = len -
sizeof(*nd_opt_rh);
} else {
/* enough room, pad or truncate */
size_t extra;
extra = m0->m_pkthdr.len % 8;
if (extra) {
/* pad if easy enough, truncate if not */
if (8 - extra <= M_TRAILINGSPACE(m0)) {
/* pad */
m0->m_len += (8 - extra);
m0->m_pkthdr.len += (8 - extra);
} else {
/* truncate */
m0->m_pkthdr.len -= extra;
m0->m_len -= extra;
}
}
len = m0->m_pkthdr.len + sizeof(*nd_opt_rh);
m0->m_pkthdr.len = m0->m_len = len -
sizeof(*nd_opt_rh);
}
nd_opt_rh = (struct nd_opt_rd_hdr *)p;
bzero(nd_opt_rh, sizeof(*nd_opt_rh));
nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER;
nd_opt_rh->nd_opt_rh_len = len >> 3;
p += sizeof(*nd_opt_rh);
m->m_pkthdr.len = m->m_len = p - (u_char *)ip6;
/* connect m0 to m */
m_tag_delete_chain(m0, NULL);
m0->m_flags &= ~M_PKTHDR;
m->m_next = m0;
m->m_pkthdr.len = m->m_len + m0->m_len;
m0 = NULL;
}
noredhdropt:;
if (m0) {
m_freem(m0);
m0 = NULL;
}
/* XXX: clear embedded link IDs in the inner header */
in6_clearscope(&sip6->ip6_src);
in6_clearscope(&sip6->ip6_dst);
in6_clearscope(&nd_rd->nd_rd_target);
in6_clearscope(&nd_rd->nd_rd_dst);
ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr));
nd_rd->nd_rd_cksum = 0;
nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6,
sizeof(*ip6), ntohs(ip6->ip6_plen));
/* send the packet to outside... */
ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL);
if (outif) {
icmp6_ifstat_inc(outif, ifs6_out_msg);
icmp6_ifstat_inc(outif, ifs6_out_redirect);
}
V_icmp6stat.icp6s_outhist[ND_REDIRECT]++;
return;
fail:
if (m)
m_freem(m);
if (m0)
m_freem(m0);
}
/*
* ICMPv6 socket option processing.
*/
int
icmp6_ctloutput(struct socket *so, struct sockopt *sopt)
{
int error = 0;
int optlen;
struct inpcb *inp = sotoinpcb(so);
int level, op, optname;
if (sopt) {
level = sopt->sopt_level;
op = sopt->sopt_dir;
optname = sopt->sopt_name;
optlen = sopt->sopt_valsize;
} else
level = op = optname = optlen = 0;
if (level != IPPROTO_ICMPV6) {
return EINVAL;
}
switch (op) {
case PRCO_SETOPT:
switch (optname) {
case ICMP6_FILTER:
{
struct icmp6_filter ic6f;
if (optlen != sizeof(ic6f)) {
error = EMSGSIZE;
break;
}
error = sooptcopyin(sopt, &ic6f, optlen, optlen);
if (error == 0) {
INP_WLOCK(inp);
*inp->in6p_icmp6filt = ic6f;
INP_WUNLOCK(inp);
}
break;
}
default:
error = ENOPROTOOPT;
break;
}
break;
case PRCO_GETOPT:
switch (optname) {
case ICMP6_FILTER:
{
struct icmp6_filter ic6f;
INP_RLOCK(inp);
ic6f = *inp->in6p_icmp6filt;
INP_RUNLOCK(inp);
error = sooptcopyout(sopt, &ic6f, sizeof(ic6f));
break;
}
default:
error = ENOPROTOOPT;
break;
}
break;
}
return (error);
}
/*
* Perform rate limit check.
* Returns 0 if it is okay to send the icmp6 packet.
* Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate
* limitation.
*
* XXX per-destination/type check necessary?
*
* dst - not used at this moment
* type - not used at this moment
* code - not used at this moment
*/
static int
icmp6_ratelimit(const struct in6_addr *dst, const int type,
const int code)
{
INIT_VNET_INET6(curvnet);
int ret;
ret = 0; /* okay to send */
/* PPS limit */
if (!ppsratecheck(&V_icmp6errppslim_last, &V_icmp6errpps_count,
V_icmp6errppslim)) {
/* The packet is subject to rate limit */
ret++;
}
return ret;
}
Index: projects/arpv2_merge_1/sys/netinet6/in6.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet6/in6.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet6/in6.c (revision 185839)
@@ -1,2395 +1,2403 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $
*/
/*-
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)in.c 8.2 (Berkeley) 11/15/93
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/errno.h>
#include <sys/malloc.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/systm.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/if_dl.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <net/if_llatbl.h>
#include <netinet/if_ether.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_pcb.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/mld6_var.h>
#include <netinet6/ip6_mroute.h>
#include <netinet6/in6_ifattach.h>
#include <netinet6/scope6_var.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/vinet6.h>
MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "internet multicast address");
/*
* Definitions of some costant IP6 addresses.
*/
const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
const struct in6_addr in6addr_nodelocal_allnodes =
IN6ADDR_NODELOCAL_ALLNODES_INIT;
const struct in6_addr in6addr_linklocal_allnodes =
IN6ADDR_LINKLOCAL_ALLNODES_INIT;
const struct in6_addr in6addr_linklocal_allrouters =
IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
const struct in6_addr in6mask0 = IN6MASK0;
const struct in6_addr in6mask32 = IN6MASK32;
const struct in6_addr in6mask64 = IN6MASK64;
const struct in6_addr in6mask96 = IN6MASK96;
const struct in6_addr in6mask128 = IN6MASK128;
const struct sockaddr_in6 sa6_any =
{ sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 };
static int in6_lifaddr_ioctl __P((struct socket *, u_long, caddr_t,
struct ifnet *, struct thread *));
static int in6_ifinit __P((struct ifnet *, struct in6_ifaddr *,
struct sockaddr_in6 *, int));
static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *);
struct in6_multihead in6_multihead; /* XXX BSS initialization */
int (*faithprefix_p)(struct in6_addr *);
int
in6_mask2len(struct in6_addr *mask, u_char *lim0)
{
int x = 0, y;
u_char *lim = lim0, *p;
/* ignore the scope_id part */
if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask))
lim = (u_char *)mask + sizeof(*mask);
for (p = (u_char *)mask; p < lim; x++, p++) {
if (*p != 0xff)
break;
}
y = 0;
if (p < lim) {
for (y = 0; y < 8; y++) {
if ((*p & (0x80 >> y)) == 0)
break;
}
}
/*
* when the limit pointer is given, do a stricter check on the
* remaining bits.
*/
if (p < lim) {
if (y != 0 && (*p & (0x00ff >> y)) != 0)
return (-1);
for (p = p + 1; p < lim; p++)
if (*p != 0)
return (-1);
}
return x * 8 + y;
}
#define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa))
#define ia62ifa(ia6) (&((ia6)->ia_ifa))
int
in6_control(struct socket *so, u_long cmd, caddr_t data,
struct ifnet *ifp, struct thread *td)
{
INIT_VNET_INET6(curvnet);
struct in6_ifreq *ifr = (struct in6_ifreq *)data;
struct in6_ifaddr *ia = NULL;
struct in6_aliasreq *ifra = (struct in6_aliasreq *)data;
struct sockaddr_in6 *sa6;
int error;
switch (cmd) {
case SIOCGETSGCNT_IN6:
case SIOCGETMIFCNT_IN6:
return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP);
}
switch(cmd) {
case SIOCAADDRCTL_POLICY:
case SIOCDADDRCTL_POLICY:
if (td != NULL) {
error = priv_check(td, PRIV_NETINET_ADDRCTRL6);
if (error)
return (error);
}
return (in6_src_ioctl(cmd, data));
}
if (ifp == NULL)
return (EOPNOTSUPP);
switch (cmd) {
case SIOCSNDFLUSH_IN6:
case SIOCSPFXFLUSH_IN6:
case SIOCSRTRFLUSH_IN6:
case SIOCSDEFIFACE_IN6:
case SIOCSIFINFO_FLAGS:
if (td != NULL) {
error = priv_check(td, PRIV_NETINET_ND6);
if (error)
return (error);
}
/* FALLTHROUGH */
case OSIOCGIFINFO_IN6:
case SIOCGIFINFO_IN6:
case SIOCSIFINFO_IN6:
case SIOCGDRLST_IN6:
case SIOCGPRLST_IN6:
case SIOCGNBRINFO_IN6:
case SIOCGDEFIFACE_IN6:
return (nd6_ioctl(cmd, data, ifp));
}
switch (cmd) {
case SIOCSIFPREFIX_IN6:
case SIOCDIFPREFIX_IN6:
case SIOCAIFPREFIX_IN6:
case SIOCCIFPREFIX_IN6:
case SIOCSGIFPREFIX_IN6:
case SIOCGIFPREFIX_IN6:
log(LOG_NOTICE,
"prefix ioctls are now invalidated. "
"please use ifconfig.\n");
return (EOPNOTSUPP);
}
switch (cmd) {
case SIOCSSCOPE6:
if (td != NULL) {
error = priv_check(td, PRIV_NETINET_SCOPE6);
if (error)
return (error);
}
return (scope6_set(ifp,
(struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
case SIOCGSCOPE6:
return (scope6_get(ifp,
(struct scope6_id *)ifr->ifr_ifru.ifru_scope_id));
case SIOCGSCOPE6DEF:
return (scope6_get_default((struct scope6_id *)
ifr->ifr_ifru.ifru_scope_id));
}
switch (cmd) {
case SIOCALIFADDR:
if (td != NULL) {
error = priv_check(td, PRIV_NET_ADDIFADDR);
if (error)
return (error);
}
return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
case SIOCDLIFADDR:
if (td != NULL) {
error = priv_check(td, PRIV_NET_DELIFADDR);
if (error)
return (error);
}
/* FALLTHROUGH */
case SIOCGLIFADDR:
return in6_lifaddr_ioctl(so, cmd, data, ifp, td);
}
/*
* Find address for this interface, if it exists.
*
* In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation
* only, and used the first interface address as the target of other
* operations (without checking ifra_addr). This was because netinet
* code/API assumed at most 1 interface address per interface.
* Since IPv6 allows a node to assign multiple addresses
* on a single interface, we almost always look and check the
* presence of ifra_addr, and reject invalid ones here.
* It also decreases duplicated code among SIOC*_IN6 operations.
*/
switch (cmd) {
case SIOCAIFADDR_IN6:
case SIOCSIFPHYADDR_IN6:
sa6 = &ifra->ifra_addr;
break;
case SIOCSIFADDR_IN6:
case SIOCGIFADDR_IN6:
case SIOCSIFDSTADDR_IN6:
case SIOCSIFNETMASK_IN6:
case SIOCGIFDSTADDR_IN6:
case SIOCGIFNETMASK_IN6:
case SIOCDIFADDR_IN6:
case SIOCGIFPSRCADDR_IN6:
case SIOCGIFPDSTADDR_IN6:
case SIOCGIFAFLAG_IN6:
case SIOCSNDFLUSH_IN6:
case SIOCSPFXFLUSH_IN6:
case SIOCSRTRFLUSH_IN6:
case SIOCGIFALIFETIME_IN6:
case SIOCSIFALIFETIME_IN6:
case SIOCGIFSTAT_IN6:
case SIOCGIFSTAT_ICMP6:
sa6 = &ifr->ifr_addr;
break;
default:
sa6 = NULL;
break;
}
if (sa6 && sa6->sin6_family == AF_INET6) {
int error = 0;
if (sa6->sin6_scope_id != 0)
error = sa6_embedscope(sa6, 0);
else
error = in6_setscope(&sa6->sin6_addr, ifp, NULL);
if (error != 0)
return (error);
ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr);
} else
ia = NULL;
switch (cmd) {
case SIOCSIFADDR_IN6:
case SIOCSIFDSTADDR_IN6:
case SIOCSIFNETMASK_IN6:
/*
* Since IPv6 allows a node to assign multiple addresses
* on a single interface, SIOCSIFxxx ioctls are deprecated.
*/
/* we decided to obsolete this command (20000704) */
return (EINVAL);
case SIOCDIFADDR_IN6:
/*
* for IPv4, we look for existing in_ifaddr here to allow
* "ifconfig if0 delete" to remove the first IPv4 address on
* the interface. For IPv6, as the spec allows multiple
* interface address from the day one, we consider "remove the
* first one" semantics to be not preferable.
*/
if (ia == NULL)
return (EADDRNOTAVAIL);
/* FALLTHROUGH */
case SIOCAIFADDR_IN6:
/*
* We always require users to specify a valid IPv6 address for
* the corresponding operation.
*/
if (ifra->ifra_addr.sin6_family != AF_INET6 ||
ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6))
return (EAFNOSUPPORT);
if (td != NULL) {
error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ?
PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR);
if (error)
return (error);
}
break;
case SIOCGIFADDR_IN6:
/* This interface is basically deprecated. use SIOCGIFCONF. */
/* FALLTHROUGH */
case SIOCGIFAFLAG_IN6:
case SIOCGIFNETMASK_IN6:
case SIOCGIFDSTADDR_IN6:
case SIOCGIFALIFETIME_IN6:
/* must think again about its semantics */
if (ia == NULL)
return (EADDRNOTAVAIL);
break;
case SIOCSIFALIFETIME_IN6:
{
struct in6_addrlifetime *lt;
if (td != NULL) {
error = priv_check(td, PRIV_NETINET_ALIFETIME6);
if (error)
return (error);
}
if (ia == NULL)
return (EADDRNOTAVAIL);
/* sanity for overflow - beware unsigned */
lt = &ifr->ifr_ifru.ifru_lifetime;
if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME &&
lt->ia6t_vltime + time_second < time_second) {
return EINVAL;
}
if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME &&
lt->ia6t_pltime + time_second < time_second) {
return EINVAL;
}
break;
}
}
switch (cmd) {
case SIOCGIFADDR_IN6:
ifr->ifr_addr = ia->ia_addr;
if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0)
return (error);
break;
case SIOCGIFDSTADDR_IN6:
if ((ifp->if_flags & IFF_POINTOPOINT) == 0)
return (EINVAL);
/*
* XXX: should we check if ifa_dstaddr is NULL and return
* an error?
*/
ifr->ifr_dstaddr = ia->ia_dstaddr;
if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0)
return (error);
break;
case SIOCGIFNETMASK_IN6:
ifr->ifr_addr = ia->ia_prefixmask;
break;
case SIOCGIFAFLAG_IN6:
ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags;
break;
case SIOCGIFSTAT_IN6:
if (ifp == NULL)
return EINVAL;
bzero(&ifr->ifr_ifru.ifru_stat,
sizeof(ifr->ifr_ifru.ifru_stat));
ifr->ifr_ifru.ifru_stat =
*((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat;
break;
case SIOCGIFSTAT_ICMP6:
if (ifp == NULL)
return EINVAL;
bzero(&ifr->ifr_ifru.ifru_icmp6stat,
sizeof(ifr->ifr_ifru.ifru_icmp6stat));
ifr->ifr_ifru.ifru_icmp6stat =
*((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat;
break;
case SIOCGIFALIFETIME_IN6:
ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime;
if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
time_t maxexpire;
struct in6_addrlifetime *retlt =
&ifr->ifr_ifru.ifru_lifetime;
/*
* XXX: adjust expiration time assuming time_t is
* signed.
*/
maxexpire = (-1) &
~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
if (ia->ia6_lifetime.ia6t_vltime <
maxexpire - ia->ia6_updatetime) {
retlt->ia6t_expire = ia->ia6_updatetime +
ia->ia6_lifetime.ia6t_vltime;
} else
retlt->ia6t_expire = maxexpire;
}
if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
time_t maxexpire;
struct in6_addrlifetime *retlt =
&ifr->ifr_ifru.ifru_lifetime;
/*
* XXX: adjust expiration time assuming time_t is
* signed.
*/
maxexpire = (-1) &
~((time_t)1 << ((sizeof(maxexpire) * 8) - 1));
if (ia->ia6_lifetime.ia6t_pltime <
maxexpire - ia->ia6_updatetime) {
retlt->ia6t_preferred = ia->ia6_updatetime +
ia->ia6_lifetime.ia6t_pltime;
} else
retlt->ia6t_preferred = maxexpire;
}
break;
case SIOCSIFALIFETIME_IN6:
ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime;
/* for sanity */
if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_expire =
time_second + ia->ia6_lifetime.ia6t_vltime;
} else
ia->ia6_lifetime.ia6t_expire = 0;
if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_preferred =
time_second + ia->ia6_lifetime.ia6t_pltime;
} else
ia->ia6_lifetime.ia6t_preferred = 0;
break;
case SIOCAIFADDR_IN6:
{
int i, error = 0;
struct nd_prefixctl pr0;
struct nd_prefix *pr;
/*
* first, make or update the interface address structure,
* and link it to the list.
*/
if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0)
return (error);
if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr))
== NULL) {
/*
* this can happen when the user specify the 0 valid
* lifetime.
*/
break;
}
/*
* then, make the prefix on-link on the interface.
* XXX: we'd rather create the prefix before the address, but
* we need at least one address to install the corresponding
* interface route, so we configure the address first.
*/
/*
* convert mask to prefix length (prefixmask has already
* been validated in in6_update_ifa().
*/
bzero(&pr0, sizeof(pr0));
pr0.ndpr_ifp = ifp;
pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
NULL);
if (pr0.ndpr_plen == 128) {
break; /* we don't need to install a host route. */
}
pr0.ndpr_prefix = ifra->ifra_addr;
/* apply the mask for safety. */
for (i = 0; i < 4; i++) {
pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &=
ifra->ifra_prefixmask.sin6_addr.s6_addr32[i];
}
/*
* XXX: since we don't have an API to set prefix (not address)
* lifetimes, we just use the same lifetimes as addresses.
* The (temporarily) installed lifetimes can be overridden by
* later advertised RAs (when accept_rtadv is non 0), which is
* an intended behavior.
*/
pr0.ndpr_raf_onlink = 1; /* should be configurable? */
pr0.ndpr_raf_auto =
((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0);
pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime;
pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime;
/* add the prefix if not yet. */
if ((pr = nd6_prefix_lookup(&pr0)) == NULL) {
/*
* nd6_prelist_add will install the corresponding
* interface route.
*/
if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0)
return (error);
if (pr == NULL) {
log(LOG_ERR, "nd6_prelist_add succeeded but "
"no prefix\n");
return (EINVAL); /* XXX panic here? */
}
}
/* relate the address to the prefix */
if (ia->ia6_ndpr == NULL) {
ia->ia6_ndpr = pr;
pr->ndpr_refcnt++;
/*
* If this is the first autoconf address from the
* prefix, create a temporary address as well
* (when required).
*/
if ((ia->ia6_flags & IN6_IFF_AUTOCONF) &&
V_ip6_use_tempaddr && pr->ndpr_refcnt == 1) {
int e;
if ((e = in6_tmpifadd(ia, 1, 0)) != 0) {
log(LOG_NOTICE, "in6_control: failed "
"to create a temporary address, "
"errno=%d\n", e);
}
}
}
/*
* this might affect the status of autoconfigured addresses,
* that is, this address might make other addresses detached.
*/
pfxlist_onlink_check();
if (error == 0 && ia)
EVENTHANDLER_INVOKE(ifaddr_event, ifp);
break;
}
case SIOCDIFADDR_IN6:
{
struct nd_prefix *pr;
/*
* If the address being deleted is the only one that owns
* the corresponding prefix, expire the prefix as well.
* XXX: theoretically, we don't have to worry about such
* relationship, since we separate the address management
* and the prefix management. We do this, however, to provide
* as much backward compatibility as possible in terms of
* the ioctl operation.
* Note that in6_purgeaddr() will decrement ndpr_refcnt.
*/
pr = ia->ia6_ndpr;
in6_purgeaddr(&ia->ia_ifa);
if (pr && pr->ndpr_refcnt == 0)
prelist_remove(pr);
EVENTHANDLER_INVOKE(ifaddr_event, ifp);
break;
}
default:
if (ifp == NULL || ifp->if_ioctl == 0)
return (EOPNOTSUPP);
return ((*ifp->if_ioctl)(ifp, cmd, data));
}
return (0);
}
/*
* Update parameters of an IPv6 interface address.
* If necessary, a new entry is created and linked into address chains.
* This function is separated from in6_control().
* XXX: should this be performed under splnet()?
*/
int
in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
struct in6_ifaddr *ia, int flags)
{
INIT_VNET_INET6(ifp->if_vnet);
INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
int error = 0, hostIsNew = 0, plen = -1;
struct in6_ifaddr *oia;
struct sockaddr_in6 dst6;
struct in6_addrlifetime *lt;
struct in6_multi_mship *imm;
struct in6_multi *in6m_sol;
struct rtentry *rt;
int delay;
char ip6buf[INET6_ADDRSTRLEN];
/* Validate parameters */
if (ifp == NULL || ifra == NULL) /* this maybe redundant */
return (EINVAL);
/*
* The destination address for a p2p link must have a family
* of AF_UNSPEC or AF_INET6.
*/
if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
ifra->ifra_dstaddr.sin6_family != AF_INET6 &&
ifra->ifra_dstaddr.sin6_family != AF_UNSPEC)
return (EAFNOSUPPORT);
/*
* validate ifra_prefixmask. don't check sin6_family, netmask
* does not carry fields other than sin6_len.
*/
if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6))
return (EINVAL);
/*
* Because the IPv6 address architecture is classless, we require
* users to specify a (non 0) prefix length (mask) for a new address.
* We also require the prefix (when specified) mask is valid, and thus
* reject a non-consecutive mask.
*/
if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0)
return (EINVAL);
if (ifra->ifra_prefixmask.sin6_len != 0) {
plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr,
(u_char *)&ifra->ifra_prefixmask +
ifra->ifra_prefixmask.sin6_len);
if (plen <= 0)
return (EINVAL);
} else {
/*
* In this case, ia must not be NULL. We just use its prefix
* length.
*/
plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
}
/*
* If the destination address on a p2p interface is specified,
* and the address is a scoped one, validate/set the scope
* zone identifier.
*/
dst6 = ifra->ifra_dstaddr;
if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 &&
(dst6.sin6_family == AF_INET6)) {
struct in6_addr in6_tmp;
u_int32_t zoneid;
in6_tmp = dst6.sin6_addr;
if (in6_setscope(&in6_tmp, ifp, &zoneid))
return (EINVAL); /* XXX: should be impossible */
if (dst6.sin6_scope_id != 0) {
if (dst6.sin6_scope_id != zoneid)
return (EINVAL);
} else /* user omit to specify the ID. */
dst6.sin6_scope_id = zoneid;
/* convert into the internal form */
if (sa6_embedscope(&dst6, 0))
return (EINVAL); /* XXX: should be impossible */
}
/*
* The destination address can be specified only for a p2p or a
* loopback interface. If specified, the corresponding prefix length
* must be 128.
*/
if (ifra->ifra_dstaddr.sin6_family == AF_INET6) {
if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) {
/* XXX: noisy message */
nd6log((LOG_INFO, "in6_update_ifa: a destination can "
"be specified for a p2p or a loopback IF only\n"));
return (EINVAL);
}
if (plen != 128) {
nd6log((LOG_INFO, "in6_update_ifa: prefixlen should "
"be 128 when dstaddr is specified\n"));
return (EINVAL);
}
}
/* lifetime consistency check */
lt = &ifra->ifra_lifetime;
if (lt->ia6t_pltime > lt->ia6t_vltime)
return (EINVAL);
if (lt->ia6t_vltime == 0) {
/*
* the following log might be noisy, but this is a typical
* configuration mistake or a tool's bug.
*/
nd6log((LOG_INFO,
"in6_update_ifa: valid lifetime is 0 for %s\n",
ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr)));
if (ia == NULL)
return (0); /* there's nothing to do */
}
/*
* If this is a new address, allocate a new ifaddr and link it
* into chains.
*/
if (ia == NULL) {
hostIsNew = 1;
/*
* When in6_update_ifa() is called in a process of a received
* RA, it is called under an interrupt context. So, we should
* call malloc with M_NOWAIT.
*/
ia = (struct in6_ifaddr *) malloc(sizeof(*ia), M_IFADDR,
M_NOWAIT);
if (ia == NULL)
return (ENOBUFS);
bzero((caddr_t)ia, sizeof(*ia));
LIST_INIT(&ia->ia6_memberships);
/* Initialize the address and masks, and put time stamp */
IFA_LOCK_INIT(&ia->ia_ifa);
ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr;
ia->ia_addr.sin6_family = AF_INET6;
ia->ia_addr.sin6_len = sizeof(ia->ia_addr);
ia->ia6_createtime = time_second;
if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) {
/*
* XXX: some functions expect that ifa_dstaddr is not
* NULL for p2p interfaces.
*/
ia->ia_ifa.ifa_dstaddr =
(struct sockaddr *)&ia->ia_dstaddr;
} else {
ia->ia_ifa.ifa_dstaddr = NULL;
}
ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask;
ia->ia_ifp = ifp;
if ((oia = V_in6_ifaddr) != NULL) {
for ( ; oia->ia_next; oia = oia->ia_next)
continue;
oia->ia_next = ia;
} else
V_in6_ifaddr = ia;
ia->ia_ifa.ifa_refcnt = 1;
TAILQ_INSERT_TAIL(&ifp->if_addrlist, &ia->ia_ifa, ifa_list);
}
/* update timestamp */
ia->ia6_updatetime = time_second;
/* set prefix mask */
if (ifra->ifra_prefixmask.sin6_len) {
/*
* We prohibit changing the prefix length of an existing
* address, because
* + such an operation should be rare in IPv6, and
* + the operation would confuse prefix management.
*/
if (ia->ia_prefixmask.sin6_len &&
in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) {
nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an"
" existing (%s) address should not be changed\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
error = EINVAL;
goto unlink;
}
ia->ia_prefixmask = ifra->ifra_prefixmask;
}
/*
* If a new destination address is specified, scrub the old one and
* install the new destination. Note that the interface must be
* p2p or loopback (see the check above.)
*/
if (dst6.sin6_family == AF_INET6 &&
!IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) {
int e;
if ((ia->ia_flags & IFA_ROUTE) != 0 &&
(e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) {
nd6log((LOG_ERR, "in6_update_ifa: failed to remove "
"a route to the old destination: %s\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
/* proceed anyway... */
} else
ia->ia_flags &= ~IFA_ROUTE;
ia->ia_dstaddr = dst6;
}
/*
* Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred
* to see if the address is deprecated or invalidated, but initialize
* these members for applications.
*/
ia->ia6_lifetime = ifra->ifra_lifetime;
if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_expire =
time_second + ia->ia6_lifetime.ia6t_vltime;
} else
ia->ia6_lifetime.ia6t_expire = 0;
if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
ia->ia6_lifetime.ia6t_preferred =
time_second + ia->ia6_lifetime.ia6t_pltime;
} else
ia->ia6_lifetime.ia6t_preferred = 0;
/* reset the interface and routing table appropriately. */
if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0)
goto unlink;
/*
* configure address flags.
*/
ia->ia6_flags = ifra->ifra_flags;
/*
* backward compatibility - if IN6_IFF_DEPRECATED is set from the
* userland, make it deprecated.
*/
if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) {
ia->ia6_lifetime.ia6t_pltime = 0;
ia->ia6_lifetime.ia6t_preferred = time_second;
}
/*
* Make the address tentative before joining multicast addresses,
* so that corresponding MLD responses would not have a tentative
* source address.
*/
ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */
if (hostIsNew && in6if_do_dad(ifp))
ia->ia6_flags |= IN6_IFF_TENTATIVE;
/*
* We are done if we have simply modified an existing address.
*/
if (!hostIsNew)
return (error);
/*
* Beyond this point, we should call in6_purgeaddr upon an error,
* not just go to unlink.
*/
/* Join necessary multicast groups */
in6m_sol = NULL;
if ((ifp->if_flags & IFF_MULTICAST) != 0) {
struct sockaddr_in6 mltaddr, mltmask;
struct in6_addr llsol;
/* join solicited multicast addr for new host id */
bzero(&llsol, sizeof(struct in6_addr));
llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL;
llsol.s6_addr32[1] = 0;
llsol.s6_addr32[2] = htonl(1);
llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3];
llsol.s6_addr8[12] = 0xff;
if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) {
/* XXX: should not happen */
log(LOG_ERR, "in6_update_ifa: "
"in6_setscope failed\n");
goto cleanup;
}
delay = 0;
if ((flags & IN6_IFAUPDATE_DADDELAY)) {
/*
* We need a random delay for DAD on the address
* being configured. It also means delaying
* transmission of the corresponding MLD report to
* avoid report collision.
* [draft-ietf-ipv6-rfc2462bis-02.txt]
*/
delay = arc4random() %
(MAX_RTR_SOLICITATION_DELAY * hz);
}
imm = in6_joingroup(ifp, &llsol, &error, delay);
if (imm == NULL) {
nd6log((LOG_WARNING,
"in6_update_ifa: addmulti failed for "
"%s on %s (errno=%d)\n",
ip6_sprintf(ip6buf, &llsol), if_name(ifp),
error));
in6_purgeaddr((struct ifaddr *)ia);
return (error);
}
LIST_INSERT_HEAD(&ia->ia6_memberships,
imm, i6mm_chain);
in6m_sol = imm->i6mm_maddr;
bzero(&mltmask, sizeof(mltmask));
mltmask.sin6_len = sizeof(struct sockaddr_in6);
mltmask.sin6_family = AF_INET6;
mltmask.sin6_addr = in6mask32;
#define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */
/*
* join link-local all-nodes address
*/
bzero(&mltaddr, sizeof(mltaddr));
mltaddr.sin6_len = sizeof(struct sockaddr_in6);
mltaddr.sin6_family = AF_INET6;
mltaddr.sin6_addr = in6addr_linklocal_allnodes;
if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) !=
0)
goto cleanup; /* XXX: should not fail */
/*
* XXX: do we really need this automatic routes?
* We should probably reconsider this stuff. Most applications
* actually do not need the routes, since they usually specify
* the outgoing interface.
*/
rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL);
if (rt) {
/* XXX: only works in !SCOPEDROUTING case. */
if (memcmp(&mltaddr.sin6_addr,
&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
MLTMASK_LEN)) {
RTFREE_LOCKED(rt);
rt = NULL;
}
}
if (!rt) {
/* XXX: we need RTF_CLONING to fake nd6_rtrequest */
error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
(struct sockaddr *)&ia->ia_addr,
(struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING,
(struct rtentry **)0);
if (error)
goto cleanup;
} else {
RTFREE_LOCKED(rt);
}
imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
if (!imm) {
nd6log((LOG_WARNING,
"in6_update_ifa: addmulti failed for "
"%s on %s (errno=%d)\n",
ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
if_name(ifp), error));
goto cleanup;
}
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
/*
* join node information group address
*/
#define hostnamelen strlen(V_hostname)
delay = 0;
if ((flags & IN6_IFAUPDATE_DADDELAY)) {
/*
* The spec doesn't say anything about delay for this
* group, but the same logic should apply.
*/
delay = arc4random() %
(MAX_RTR_SOLICITATION_DELAY * hz);
}
mtx_lock(&hostname_mtx);
if (in6_nigroup(ifp, V_hostname, hostnamelen,
&mltaddr.sin6_addr) == 0) {
mtx_unlock(&hostname_mtx);
imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error,
delay); /* XXX jinmei */
if (!imm) {
nd6log((LOG_WARNING, "in6_update_ifa: "
"addmulti failed for %s on %s "
"(errno=%d)\n",
ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
if_name(ifp), error));
/* XXX not very fatal, go on... */
} else {
LIST_INSERT_HEAD(&ia->ia6_memberships,
imm, i6mm_chain);
}
} else
mtx_unlock(&hostname_mtx);
#undef hostnamelen
/*
* join interface-local all-nodes address.
* (ff01::1%ifN, and ff01::%ifN/32)
*/
mltaddr.sin6_addr = in6addr_nodelocal_allnodes;
if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL))
!= 0)
goto cleanup; /* XXX: should not fail */
/* XXX: again, do we really need the route? */
rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL);
if (rt) {
if (memcmp(&mltaddr.sin6_addr,
&((struct sockaddr_in6 *)rt_key(rt))->sin6_addr,
MLTMASK_LEN)) {
RTFREE_LOCKED(rt);
rt = NULL;
}
}
if (!rt) {
error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr,
(struct sockaddr *)&ia->ia_addr,
(struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING,
(struct rtentry **)0);
if (error)
goto cleanup;
} else
RTFREE_LOCKED(rt);
imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0);
if (!imm) {
nd6log((LOG_WARNING, "in6_update_ifa: "
"addmulti failed for %s on %s "
"(errno=%d)\n",
ip6_sprintf(ip6buf, &mltaddr.sin6_addr),
if_name(ifp), error));
goto cleanup;
}
LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain);
#undef MLTMASK_LEN
}
/*
* Perform DAD, if needed.
* XXX It may be of use, if we can administratively
* disable DAD.
*/
if (hostIsNew && in6if_do_dad(ifp) &&
((ifra->ifra_flags & IN6_IFF_NODAD) == 0) &&
(ia->ia6_flags & IN6_IFF_TENTATIVE))
{
int mindelay, maxdelay;
delay = 0;
if ((flags & IN6_IFAUPDATE_DADDELAY)) {
/*
* We need to impose a delay before sending an NS
* for DAD. Check if we also needed a delay for the
* corresponding MLD message. If we did, the delay
* should be larger than the MLD delay (this could be
* relaxed a bit, but this simple logic is at least
* safe).
*/
mindelay = 0;
if (in6m_sol != NULL &&
in6m_sol->in6m_state == MLD_REPORTPENDING) {
mindelay = in6m_sol->in6m_timer;
}
maxdelay = MAX_RTR_SOLICITATION_DELAY * hz;
if (maxdelay - mindelay == 0)
delay = 0;
else {
delay =
(arc4random() % (maxdelay - mindelay)) +
mindelay;
}
}
nd6_dad_start((struct ifaddr *)ia, delay);
}
return (error);
unlink:
/*
* XXX: if a change of an existing address failed, keep the entry
* anyway.
*/
if (hostIsNew)
in6_unlink_ifa(ia, ifp);
return (error);
cleanup:
in6_purgeaddr(&ia->ia_ifa);
return error;
}
void
in6_purgeaddr(struct ifaddr *ifa)
{
struct ifnet *ifp = ifa->ifa_ifp;
struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa;
- struct llentry *ln = NULL;
struct in6_multi_mship *imm;
/* stop DAD processing */
nd6_dad_stop(ifa);
IF_AFDATA_LOCK(ifp);
- ln = lla_lookup(LLTABLE6(ifp), (LLE_DELETE | LLE_IFADDR),
+ lla_lookup(LLTABLE6(ifp), (LLE_DELETE | LLE_IFADDR),
(struct sockaddr *)&ia->ia_addr);
- if (ln == NULL)
- log(LOG_INFO, "nd6_purgeaddr: interface address is missing from cache\n");
- else
- log(LOG_INFO, "nd6_purgeaddr: ifaddr cache = %p is deleted\n", ln);
IF_AFDATA_UNLOCK(ifp);
-
+
/*
* leave from multicast groups we have joined for the interface
*/
while ((imm = ia->ia6_memberships.lh_first) != NULL) {
LIST_REMOVE(imm, i6mm_chain);
in6_leavegroup(imm);
}
in6_unlink_ifa(ia, ifp);
}
static void
in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp)
{
INIT_VNET_INET6(ifp->if_vnet);
struct in6_ifaddr *oia;
int s = splnet();
TAILQ_REMOVE(&ifp->if_addrlist, &ia->ia_ifa, ifa_list);
oia = ia;
if (oia == (ia = V_in6_ifaddr))
V_in6_ifaddr = ia->ia_next;
else {
while (ia->ia_next && (ia->ia_next != oia))
ia = ia->ia_next;
if (ia->ia_next)
ia->ia_next = oia->ia_next;
else {
/* search failed */
printf("Couldn't unlink in6_ifaddr from in6_ifaddr\n");
}
}
/*
* Release the reference to the base prefix. There should be a
* positive reference.
*/
if (oia->ia6_ndpr == NULL) {
nd6log((LOG_NOTICE,
"in6_unlink_ifa: autoconf'ed address "
"%p has no prefix\n", oia));
} else {
oia->ia6_ndpr->ndpr_refcnt--;
oia->ia6_ndpr = NULL;
}
/*
* Also, if the address being removed is autoconf'ed, call
* pfxlist_onlink_check() since the release might affect the status of
* other (detached) addresses.
*/
if ((oia->ia6_flags & IN6_IFF_AUTOCONF)) {
pfxlist_onlink_check();
}
/*
* release another refcnt for the link from in6_ifaddr.
* Note that we should decrement the refcnt at least once for all *BSD.
*/
IFAFREE(&oia->ia_ifa);
splx(s);
}
void
in6_purgeif(struct ifnet *ifp)
{
struct ifaddr *ifa, *nifa;
for (ifa = TAILQ_FIRST(&ifp->if_addrlist); ifa != NULL; ifa = nifa) {
nifa = TAILQ_NEXT(ifa, ifa_list);
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
in6_purgeaddr(ifa);
}
in6_ifdetach(ifp);
}
/*
* SIOC[GAD]LIFADDR.
* SIOCGLIFADDR: get first address. (?)
* SIOCGLIFADDR with IFLR_PREFIX:
* get first address that matches the specified prefix.
* SIOCALIFADDR: add the specified address.
* SIOCALIFADDR with IFLR_PREFIX:
* add the specified prefix, filling hostid part from
* the first link-local address. prefixlen must be <= 64.
* SIOCDLIFADDR: delete the specified address.
* SIOCDLIFADDR with IFLR_PREFIX:
* delete the first address that matches the specified prefix.
* return values:
* EINVAL on invalid parameters
* EADDRNOTAVAIL on prefix match failed/specified address not found
* other values may be returned from in6_ioctl()
*
* NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64.
* this is to accomodate address naming scheme other than RFC2374,
* in the future.
* RFC2373 defines interface id to be 64bit, but it allows non-RFC2374
* address encoding scheme. (see figure on page 8)
*/
static int
in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data,
struct ifnet *ifp, struct thread *td)
{
struct if_laddrreq *iflr = (struct if_laddrreq *)data;
struct ifaddr *ifa;
struct sockaddr *sa;
/* sanity checks */
if (!data || !ifp) {
panic("invalid argument to in6_lifaddr_ioctl");
/* NOTREACHED */
}
switch (cmd) {
case SIOCGLIFADDR:
/* address must be specified on GET with IFLR_PREFIX */
if ((iflr->flags & IFLR_PREFIX) == 0)
break;
/* FALLTHROUGH */
case SIOCALIFADDR:
case SIOCDLIFADDR:
/* address must be specified on ADD and DELETE */
sa = (struct sockaddr *)&iflr->addr;
if (sa->sa_family != AF_INET6)
return EINVAL;
if (sa->sa_len != sizeof(struct sockaddr_in6))
return EINVAL;
/* XXX need improvement */
sa = (struct sockaddr *)&iflr->dstaddr;
if (sa->sa_family && sa->sa_family != AF_INET6)
return EINVAL;
if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6))
return EINVAL;
break;
default: /* shouldn't happen */
#if 0
panic("invalid cmd to in6_lifaddr_ioctl");
/* NOTREACHED */
#else
return EOPNOTSUPP;
#endif
}
if (sizeof(struct in6_addr) * 8 < iflr->prefixlen)
return EINVAL;
switch (cmd) {
case SIOCALIFADDR:
{
struct in6_aliasreq ifra;
struct in6_addr *hostid = NULL;
int prefixlen;
if ((iflr->flags & IFLR_PREFIX) != 0) {
struct sockaddr_in6 *sin6;
/*
* hostid is to fill in the hostid part of the
* address. hostid points to the first link-local
* address attached to the interface.
*/
ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0);
if (!ifa)
return EADDRNOTAVAIL;
hostid = IFA_IN6(ifa);
/* prefixlen must be <= 64. */
if (64 < iflr->prefixlen)
return EINVAL;
prefixlen = iflr->prefixlen;
/* hostid part must be zero. */
sin6 = (struct sockaddr_in6 *)&iflr->addr;
if (sin6->sin6_addr.s6_addr32[2] != 0 ||
sin6->sin6_addr.s6_addr32[3] != 0) {
return EINVAL;
}
} else
prefixlen = iflr->prefixlen;
/* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */
bzero(&ifra, sizeof(ifra));
bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name));
bcopy(&iflr->addr, &ifra.ifra_addr,
((struct sockaddr *)&iflr->addr)->sa_len);
if (hostid) {
/* fill in hostid part */
ifra.ifra_addr.sin6_addr.s6_addr32[2] =
hostid->s6_addr32[2];
ifra.ifra_addr.sin6_addr.s6_addr32[3] =
hostid->s6_addr32[3];
}
if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */
bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr,
((struct sockaddr *)&iflr->dstaddr)->sa_len);
if (hostid) {
ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] =
hostid->s6_addr32[2];
ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] =
hostid->s6_addr32[3];
}
}
ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen);
ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX;
return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td);
}
case SIOCGLIFADDR:
case SIOCDLIFADDR:
{
struct in6_ifaddr *ia;
struct in6_addr mask, candidate, match;
struct sockaddr_in6 *sin6;
int cmp;
bzero(&mask, sizeof(mask));
if (iflr->flags & IFLR_PREFIX) {
/* lookup a prefix rather than address. */
in6_prefixlen2mask(&mask, iflr->prefixlen);
sin6 = (struct sockaddr_in6 *)&iflr->addr;
bcopy(&sin6->sin6_addr, &match, sizeof(match));
match.s6_addr32[0] &= mask.s6_addr32[0];
match.s6_addr32[1] &= mask.s6_addr32[1];
match.s6_addr32[2] &= mask.s6_addr32[2];
match.s6_addr32[3] &= mask.s6_addr32[3];
/* if you set extra bits, that's wrong */
if (bcmp(&match, &sin6->sin6_addr, sizeof(match)))
return EINVAL;
cmp = 1;
} else {
if (cmd == SIOCGLIFADDR) {
/* on getting an address, take the 1st match */
cmp = 0; /* XXX */
} else {
/* on deleting an address, do exact match */
in6_prefixlen2mask(&mask, 128);
sin6 = (struct sockaddr_in6 *)&iflr->addr;
bcopy(&sin6->sin6_addr, &match, sizeof(match));
cmp = 1;
}
}
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
if (!cmp)
break;
/*
* XXX: this is adhoc, but is necessary to allow
* a user to specify fe80::/64 (not /10) for a
* link-local address.
*/
bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate));
in6_clearscope(&candidate);
candidate.s6_addr32[0] &= mask.s6_addr32[0];
candidate.s6_addr32[1] &= mask.s6_addr32[1];
candidate.s6_addr32[2] &= mask.s6_addr32[2];
candidate.s6_addr32[3] &= mask.s6_addr32[3];
if (IN6_ARE_ADDR_EQUAL(&candidate, &match))
break;
}
if (!ifa)
return EADDRNOTAVAIL;
ia = ifa2ia6(ifa);
if (cmd == SIOCGLIFADDR) {
int error;
/* fill in the if_laddrreq structure */
bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len);
error = sa6_recoverscope(
(struct sockaddr_in6 *)&iflr->addr);
if (error != 0)
return (error);
if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
bcopy(&ia->ia_dstaddr, &iflr->dstaddr,
ia->ia_dstaddr.sin6_len);
error = sa6_recoverscope(
(struct sockaddr_in6 *)&iflr->dstaddr);
if (error != 0)
return (error);
} else
bzero(&iflr->dstaddr, sizeof(iflr->dstaddr));
iflr->prefixlen =
in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL);
iflr->flags = ia->ia6_flags; /* XXX */
return 0;
} else {
struct in6_aliasreq ifra;
/* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */
bzero(&ifra, sizeof(ifra));
bcopy(iflr->iflr_name, ifra.ifra_name,
sizeof(ifra.ifra_name));
bcopy(&ia->ia_addr, &ifra.ifra_addr,
ia->ia_addr.sin6_len);
if ((ifp->if_flags & IFF_POINTOPOINT) != 0) {
bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr,
ia->ia_dstaddr.sin6_len);
} else {
bzero(&ifra.ifra_dstaddr,
sizeof(ifra.ifra_dstaddr));
}
bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr,
ia->ia_prefixmask.sin6_len);
ifra.ifra_flags = ia->ia6_flags;
return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra,
ifp, td);
}
}
}
return EOPNOTSUPP; /* just for safety */
}
/*
* Initialize an interface's intetnet6 address
* and routing table entry.
*/
static int
in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia,
struct sockaddr_in6 *sin6, int newhost)
{
int error = 0, plen, ifacount = 0;
int s = splimp();
struct ifaddr *ifa;
/*
* Give the interface a chance to initialize
* if this is its first address,
* and to validate the address if necessary.
*/
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
ifacount++;
}
ia->ia_addr = *sin6;
if (ifacount <= 1 && ifp->if_ioctl) {
IFF_LOCKGIANT(ifp);
error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia);
IFF_UNLOCKGIANT(ifp);
if (error) {
splx(s);
return (error);
}
}
splx(s);
ia->ia_ifa.ifa_metric = ifp->if_metric;
/* we could do in(6)_socktrim here, but just omit it at this moment. */
/*
* Special case:
* If a new destination address is specified for a point-to-point
* interface, install a route to the destination as an interface
* direct route.
* XXX: the logic below rejects assigning multiple addresses on a p2p
* interface that share the same destination.
*/
#if 0 /* QING - verify */
plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
ia->ia_dstaddr.sin6_family == AF_INET6) {
int rtflags = RTF_UP | RTF_HOST;
struct rtentry *rt = NULL, **rtp = NULL;
if (nd6_need_cache(ifp) != 0) {
rtflags |= RTF_LLINFO;
rtp = &rt;
}
error = rtrequest(RTM_ADD,
(struct sockaddr *)&ia->ia_dstaddr,
(struct sockaddr *)&ia->ia_addr,
(struct sockaddr *)&ia->ia_prefixmask,
ia->ia_flags | rtflags, rtp);
if (error != 0)
return (error);
if (rt != NULL) {
struct llinfo_nd6 *ln;
RT_LOCK(rt);
ln = (struct llinfo_nd6 *)rt->rt_llinfo;
if (ln != NULL) {
/*
* Set the state to STALE because we don't
* have to perform address resolution on this
* link.
*/
ln->ln_state = ND6_LLINFO_STALE;
}
RT_REMREF(rt);
RT_UNLOCK(rt);
}
ia->ia_flags |= IFA_ROUTE;
}
if (plen < 128) {
/*
* The RTF_CLONING flag is necessary for in6_is_ifloop_auto().
*/
ia->ia_ifa.ifa_flags |= RTF_CLONING;
}
#else
plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */
if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 &&
ia->ia_dstaddr.sin6_family == AF_INET6) {
if ((error = rtinit(&(ia->ia_ifa), (int)RTM_ADD,
RTF_UP | RTF_HOST)) != 0)
return (error);
ia->ia_flags |= IFA_ROUTE;
}
if (plen < 128) {
/*
* The RTF_CLONING flag is necessary for in6_is_ifloop_auto().
*/
ia->ia_ifa.ifa_flags |= RTF_CLONING;
}
#endif
/* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */
if (newhost) {
struct llentry *ln;
IF_AFDATA_LOCK(ifp);
ia->ia_ifa.ifa_rtrequest = NULL;
/* Qing
* we need to report rt_newaddrmsg
*/
- ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR),
+ ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR | LLE_EXCLUSIVE),
(struct sockaddr *)&ia->ia_addr);
+ IF_AFDATA_UNLOCK(ifp);
if (ln) {
ln->la_expire = 0; /* for IPv6 this means permanent */
ln->ln_state = ND6_LLINFO_REACHABLE;
+ LLE_WUNLOCK(ln);
}
- IF_AFDATA_UNLOCK(ifp);
}
return (error);
}
struct in6_multi_mship *
in6_joingroup(struct ifnet *ifp, struct in6_addr *addr,
int *errorp, int delay)
{
struct in6_multi_mship *imm;
imm = malloc(sizeof(*imm), M_IP6MADDR, M_NOWAIT);
if (!imm) {
*errorp = ENOBUFS;
return NULL;
}
imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, delay);
if (!imm->i6mm_maddr) {
/* *errorp is alrady set */
free(imm, M_IP6MADDR);
return NULL;
}
return imm;
}
int
in6_leavegroup(struct in6_multi_mship *imm)
{
if (imm->i6mm_maddr)
in6_delmulti(imm->i6mm_maddr);
free(imm, M_IP6MADDR);
return 0;
}
/*
* Find an IPv6 interface link-local address specific to an interface.
*/
struct in6_ifaddr *
in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags)
{
struct ifaddr *ifa;
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) {
if ((((struct in6_ifaddr *)ifa)->ia6_flags &
ignoreflags) != 0)
continue;
break;
}
}
return ((struct in6_ifaddr *)ifa);
}
/*
* find the internet address corresponding to a given interface and address.
*/
struct in6_ifaddr *
in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr)
{
struct ifaddr *ifa;
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa)))
break;
}
return ((struct in6_ifaddr *)ifa);
}
/*
* Convert IP6 address to printable (loggable) representation. Caller
* has to make sure that ip6buf is at least INET6_ADDRSTRLEN long.
*/
static char digits[] = "0123456789abcdef";
char *
ip6_sprintf(char *ip6buf, const struct in6_addr *addr)
{
int i;
char *cp;
const u_int16_t *a = (const u_int16_t *)addr;
const u_int8_t *d;
int dcolon = 0, zero = 0;
cp = ip6buf;
for (i = 0; i < 8; i++) {
if (dcolon == 1) {
if (*a == 0) {
if (i == 7)
*cp++ = ':';
a++;
continue;
} else
dcolon = 2;
}
if (*a == 0) {
if (dcolon == 0 && *(a + 1) == 0) {
if (i == 0)
*cp++ = ':';
*cp++ = ':';
dcolon = 1;
} else {
*cp++ = '0';
*cp++ = ':';
}
a++;
continue;
}
d = (const u_char *)a;
/* Try to eliminate leading zeros in printout like in :0001. */
zero = 1;
*cp = digits[*d >> 4];
if (*cp != '0') {
zero = 0;
cp++;
}
*cp = digits[*d++ & 0xf];
if (zero == 0 || (*cp != '0')) {
zero = 0;
cp++;
}
*cp = digits[*d >> 4];
if (zero == 0 || (*cp != '0')) {
zero = 0;
cp++;
}
*cp++ = digits[*d & 0xf];
*cp++ = ':';
a++;
}
*--cp = '\0';
return (ip6buf);
}
int
in6_localaddr(struct in6_addr *in6)
{
INIT_VNET_INET6(curvnet);
struct in6_ifaddr *ia;
if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6))
return 1;
for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) {
if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr,
&ia->ia_prefixmask.sin6_addr)) {
return 1;
}
}
return (0);
}
int
in6_is_addr_deprecated(struct sockaddr_in6 *sa6)
{
INIT_VNET_INET6(curvnet);
struct in6_ifaddr *ia;
for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) {
if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
&sa6->sin6_addr) &&
(ia->ia6_flags & IN6_IFF_DEPRECATED) != 0)
return (1); /* true */
/* XXX: do we still have to go thru the rest of the list? */
}
return (0); /* false */
}
/*
* return length of part which dst and src are equal
* hard coding...
*/
int
in6_matchlen(struct in6_addr *src, struct in6_addr *dst)
{
int match = 0;
u_char *s = (u_char *)src, *d = (u_char *)dst;
u_char *lim = s + 16, r;
while (s < lim)
if ((r = (*d++ ^ *s++)) != 0) {
while (r < 128) {
match++;
r <<= 1;
}
break;
} else
match += 8;
return match;
}
/* XXX: to be scope conscious */
int
in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len)
{
int bytelen, bitlen;
/* sanity check */
if (0 > len || len > 128) {
log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n",
len);
return (0);
}
bytelen = len / 8;
bitlen = len % 8;
if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen))
return (0);
if (bitlen != 0 &&
p1->s6_addr[bytelen] >> (8 - bitlen) !=
p2->s6_addr[bytelen] >> (8 - bitlen))
return (0);
return (1);
}
void
in6_prefixlen2mask(struct in6_addr *maskp, int len)
{
u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
int bytelen, bitlen, i;
/* sanity check */
if (0 > len || len > 128) {
log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n",
len);
return;
}
bzero(maskp, sizeof(*maskp));
bytelen = len / 8;
bitlen = len % 8;
for (i = 0; i < bytelen; i++)
maskp->s6_addr[i] = 0xff;
if (bitlen)
maskp->s6_addr[bytelen] = maskarray[bitlen - 1];
}
/*
* return the best address out of the same scope. if no address was
* found, return the first valid address from designated IF.
*/
struct in6_ifaddr *
in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst)
{
INIT_VNET_INET6(curvnet);
int dst_scope = in6_addrscope(dst), blen = -1, tlen;
struct ifaddr *ifa;
struct in6_ifaddr *besta = 0;
struct in6_ifaddr *dep[2]; /* last-resort: deprecated */
dep[0] = dep[1] = NULL;
/*
* We first look for addresses in the same scope.
* If there is one, return it.
* If two or more, return one which matches the dst longest.
* If none, return one of global addresses assigned other ifs.
*/
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
continue; /* XXX: is there any case to allow anycast? */
if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
continue; /* don't use this interface */
if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
continue;
if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
if (V_ip6_use_deprecated)
dep[0] = (struct in6_ifaddr *)ifa;
continue;
}
if (dst_scope == in6_addrscope(IFA_IN6(ifa))) {
/*
* call in6_matchlen() as few as possible
*/
if (besta) {
if (blen == -1)
blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst);
tlen = in6_matchlen(IFA_IN6(ifa), dst);
if (tlen > blen) {
blen = tlen;
besta = (struct in6_ifaddr *)ifa;
}
} else
besta = (struct in6_ifaddr *)ifa;
}
}
if (besta)
return (besta);
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST)
continue; /* XXX: is there any case to allow anycast? */
if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY)
continue; /* don't use this interface */
if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED)
continue;
if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) {
if (V_ip6_use_deprecated)
dep[1] = (struct in6_ifaddr *)ifa;
continue;
}
return (struct in6_ifaddr *)ifa;
}
/* use the last-resort values, that are, deprecated addresses */
if (dep[0])
return dep[0];
if (dep[1])
return dep[1];
return NULL;
}
/*
* perform DAD when interface becomes IFF_UP.
*/
void
in6_if_up(struct ifnet *ifp)
{
struct ifaddr *ifa;
struct in6_ifaddr *ia;
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
ia = (struct in6_ifaddr *)ifa;
if (ia->ia6_flags & IN6_IFF_TENTATIVE) {
/*
* The TENTATIVE flag was likely set by hand
* beforehand, implicitly indicating the need for DAD.
* We may be able to skip the random delay in this
* case, but we impose delays just in case.
*/
nd6_dad_start(ifa,
arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz));
}
}
/*
* special cases, like 6to4, are handled in in6_ifattach
*/
in6_ifattach(ifp, NULL);
}
int
in6if_do_dad(struct ifnet *ifp)
{
if ((ifp->if_flags & IFF_LOOPBACK) != 0)
return (0);
switch (ifp->if_type) {
#ifdef IFT_DUMMY
case IFT_DUMMY:
#endif
case IFT_FAITH:
/*
* These interfaces do not have the IFF_LOOPBACK flag,
* but loop packets back. We do not have to do DAD on such
* interfaces. We should even omit it, because loop-backed
* NS would confuse the DAD procedure.
*/
return (0);
default:
/*
* Our DAD routine requires the interface up and running.
* However, some interfaces can be up before the RUNNING
* status. Additionaly, users may try to assign addresses
* before the interface becomes up (or running).
* We simply skip DAD in such a case as a work around.
* XXX: we should rather mark "tentative" on such addresses,
* and do DAD after the interface becomes ready.
*/
if (!((ifp->if_flags & IFF_UP) &&
(ifp->if_drv_flags & IFF_DRV_RUNNING)))
return (0);
return (1);
}
}
/*
* Calculate max IPv6 MTU through all the interfaces and store it
* to in6_maxmtu.
*/
void
in6_setmaxmtu(void)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
unsigned long maxmtu = 0;
struct ifnet *ifp;
IFNET_RLOCK();
for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
ifp = TAILQ_NEXT(ifp, if_list)) {
/* this function can be called during ifnet initialization */
if (!ifp->if_afdata[AF_INET6])
continue;
if ((ifp->if_flags & IFF_LOOPBACK) == 0 &&
IN6_LINKMTU(ifp) > maxmtu)
maxmtu = IN6_LINKMTU(ifp);
}
IFNET_RUNLOCK();
if (maxmtu) /* update only when maxmtu is positive */
V_in6_maxmtu = maxmtu;
}
/*
* Provide the length of interface identifiers to be used for the link attached
* to the given interface. The length should be defined in "IPv6 over
* xxx-link" document. Note that address architecture might also define
* the length for a particular set of address prefixes, regardless of the
* link type. As clarified in rfc2462bis, those two definitions should be
* consistent, and those really are as of August 2004.
*/
int
in6_if2idlen(struct ifnet *ifp)
{
switch (ifp->if_type) {
case IFT_ETHER: /* RFC2464 */
#ifdef IFT_PROPVIRTUAL
case IFT_PROPVIRTUAL: /* XXX: no RFC. treat it as ether */
#endif
#ifdef IFT_L2VLAN
case IFT_L2VLAN: /* ditto */
#endif
#ifdef IFT_IEEE80211
case IFT_IEEE80211: /* ditto */
#endif
#ifdef IFT_MIP
case IFT_MIP: /* ditto */
#endif
return (64);
case IFT_FDDI: /* RFC2467 */
return (64);
case IFT_ISO88025: /* RFC2470 (IPv6 over Token Ring) */
return (64);
case IFT_PPP: /* RFC2472 */
return (64);
case IFT_ARCNET: /* RFC2497 */
return (64);
case IFT_FRELAY: /* RFC2590 */
return (64);
case IFT_IEEE1394: /* RFC3146 */
return (64);
case IFT_GIF:
return (64); /* draft-ietf-v6ops-mech-v2-07 */
case IFT_LOOP:
return (64); /* XXX: is this really correct? */
default:
/*
* Unknown link type:
* It might be controversial to use the today's common constant
* of 64 for these cases unconditionally. For full compliance,
* we should return an error in this case. On the other hand,
* if we simply miss the standard for the link type or a new
* standard is defined for a new link type, the IFID length
* is very likely to be the common constant. As a compromise,
* we always use the constant, but make an explicit notice
* indicating the "unknown" case.
*/
printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type);
return (64);
}
}
#include <sys/sysctl.h>
struct in6_llentry {
struct llentry base;
struct sockaddr_in6 l3_addr6;
};
static struct llentry *
in6_lltable_new(const struct sockaddr *l3addr, u_int flags)
{
struct in6_llentry *lle;
lle = malloc(sizeof(struct in6_llentry), M_LLTABLE,
M_DONTWAIT | M_ZERO);
if (lle == NULL) /* NB: caller generates msg */
return NULL;
callout_init(&lle->base.ln_timer_ch, CALLOUT_MPSAFE);
lle->l3_addr6 = *(const struct sockaddr_in6 *)l3addr;
-
+ lle->base.lle_refcnt = 1;
+ LLE_LOCK_INIT(&lle->base);
return &lle->base;
}
/*
* Deletes an address from the address table.
* This function is called by the timer functions
* such as arptimer() and nd6_llinfo_timer(), and
* the caller does the locking.
*/
static void
in6_lltable_free(struct lltable *llt, struct llentry *lle)
{
free(lle, M_LLTABLE);
}
static int
in6_lltable_rtcheck(struct ifnet *ifp, const struct sockaddr *l3addr)
{
struct rtentry *rt;
char ip6buf[INET6_ADDRSTRLEN];
KASSERT(l3addr->sa_family == AF_INET6,
("sin_family %d", l3addr->sa_family));
/* XXX rtalloc1 should take a const param */
rt = rtalloc1(__DECONST(struct sockaddr *, l3addr), 0, 0);
if (rt == NULL || (rt->rt_flags & RTF_GATEWAY) || rt->rt_ifp != ifp) {
struct ifaddr *ifa;
/*
* Create an ND6 cache for an IPv6 neighbor
* that is not covered by our own prefix.
*/
/* XXX ifaof_ifpforaddr should take a const param */
ifa = ifaof_ifpforaddr(__DECONST(struct sockaddr *, l3addr), ifp);
if (ifa != NULL) {
if (rt != NULL)
rtfree(rt);
return 0;
}
log(LOG_INFO, "IPv6 address: \"%s\" is not on the network\n",
ip6_sprintf(ip6buf, &((const struct sockaddr_in6 *)l3addr)->sin6_addr));
if (rt != NULL)
rtfree(rt);
return EINVAL;
}
rtfree(rt);
return 0;
}
static struct llentry *
in6_lltable_lookup(struct lltable *llt, u_int flags,
const struct sockaddr *l3addr)
{
const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)l3addr;
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
struct llentries *lleh;
u_int hashkey;
KASSERT(l3addr->sa_family == AF_INET6,
("sin_family %d", l3addr->sa_family));
hashkey = sin6->sin6_addr.s6_addr32[3];
lleh = &llt->lle_head[LLATBL_HASH(hashkey, LLTBL_HASHMASK)];
LIST_FOREACH(lle, lleh, lle_next) {
if (lle->la_flags & LLE_DELETED)
continue;
if (bcmp(L3_ADDR(lle), l3addr, l3addr->sa_len) == 0)
break;
}
if (lle == NULL) {
if (!(flags & LLE_CREATE))
return (NULL);
/*
* A route that covers the given address must have
* been installed 1st because we are doing a resolution,
* verify this.
*/
if (!(flags & LLE_IFADDR) &&
in6_lltable_rtcheck(ifp, l3addr) != 0)
return NULL;
lle = in6_lltable_new(l3addr, flags);
if (lle == NULL) {
log(LOG_INFO, "lla_lookup: new lle malloc failed\n");
return NULL;
}
lle->la_flags = flags & ~LLE_CREATE;
if ((flags & (LLE_CREATE | LLE_IFADDR)) == (LLE_CREATE | LLE_IFADDR)) {
bcopy(IF_LLADDR(ifp), &lle->ll_addr, ifp->if_addrlen);
lle->la_flags |= (LLE_VALID | LLE_STATIC);
}
lle->lle_tbl = llt;
lle->lle_head = lleh;
LIST_INSERT_HEAD(lleh, lle, lle_next);
- } else {
- if (flags & LLE_DELETE)
- lle->la_flags = LLE_DELETED;
+ } else if (flags & LLE_DELETE) {
+ LLE_WLOCK(lle);
+ lle->la_flags = LLE_DELETED;
+ LLE_WUNLOCK(lle);
+#ifdef INVARIANTS
+ log(LOG_INFO, "ifaddr cache = %p is deleted\n", lle);
+#endif
+ lle = NULL;
}
- return lle;
+ if (lle) {
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WLOCK(lle);
+ else
+ LLE_RLOCK(lle);
+ }
+ return (lle);
}
static int
in6_lltable_dump(struct lltable *llt, struct sysctl_req *wr)
{
struct ifnet *ifp = llt->llt_ifp;
struct llentry *lle;
/* XXX stack use */
struct {
struct rt_msghdr rtm;
struct sockaddr_in6 sin6;
struct sockaddr_dl sdl;
} ndpc;
int i, error;
/* XXXXX
* current IFNET_RLOCK() is mapped to IFNET_WLOCK()
* so it is okay to use this ASSERT, change it when
* IFNET lock is finalized
*/
IFNET_WLOCK_ASSERT();
error = 0;
for (i = 0; i < LLTBL_HASHTBL_SIZE; i++) {
LIST_FOREACH(lle, &llt->lle_head[i], lle_next) {
/* skip deleted entries */
if (lle->la_flags & LLE_DELETED)
continue;
/*
* produce a msg made of:
* struct rt_msghdr;
* struct sockaddr_in6 (IPv6)
* struct sockaddr_dl;
*/
bzero(&ndpc, sizeof(ndpc));
ndpc.rtm.rtm_msglen = sizeof(ndpc);
ndpc.sin6.sin6_family = AF_INET6;
ndpc.sin6.sin6_len = sizeof(ndpc.sin6);
bcopy(L3_ADDR(lle), &ndpc.sin6, L3_ADDR_LEN(lle));
/* publish */
if (lle->la_flags & LLE_PUB)
ndpc.rtm.rtm_flags |= RTF_ANNOUNCE;
if (lle->la_flags & LLE_VALID) { /* valid MAC */
struct sockaddr_dl *sdl = &ndpc.sdl;
sdl->sdl_family = AF_LINK;
sdl->sdl_len = sizeof(*sdl);
sdl->sdl_alen = ifp->if_addrlen;
sdl->sdl_index = ifp->if_index;
sdl->sdl_type = ifp->if_type;
bcopy(&lle->ll_addr, LLADDR(sdl), ifp->if_addrlen);
}
ndpc.rtm.rtm_rmx.rmx_expire =
lle->la_flags & LLE_STATIC ? 0 : lle->la_expire;
ndpc.rtm.rtm_flags |= RTF_LLINFO | RTF_HOST;
if (lle->la_flags & LLE_STATIC)
ndpc.rtm.rtm_flags |= RTF_STATIC;
ndpc.rtm.rtm_index = ifp->if_index;
error = SYSCTL_OUT(wr, &ndpc, sizeof(ndpc));
if (error)
break;
}
}
return error;
}
void *
in6_domifattach(struct ifnet *ifp)
{
struct in6_ifextra *ext;
ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK);
bzero(ext, sizeof(*ext));
ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat),
M_IFADDR, M_WAITOK);
bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat));
ext->icmp6_ifstat =
(struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat),
M_IFADDR, M_WAITOK);
bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat));
ext->nd_ifinfo = nd6_ifattach(ifp);
ext->scope6_id = scope6_ifattach(ifp);
ext->lltable = lltable_init(ifp, AF_INET6);
if (ext->lltable != NULL) {
ext->lltable->llt_new = in6_lltable_new;
ext->lltable->llt_free = in6_lltable_free;
ext->lltable->llt_rtcheck = in6_lltable_rtcheck;
ext->lltable->llt_lookup = in6_lltable_lookup;
ext->lltable->llt_dump = in6_lltable_dump;
}
return ext;
}
void
in6_domifdetach(struct ifnet *ifp, void *aux)
{
struct in6_ifextra *ext = (struct in6_ifextra *)aux;
scope6_ifdetach(ext->scope6_id);
nd6_ifdetach(ext->nd_ifinfo);
lltable_free(ext->lltable);
free(ext->in6_ifstat, M_IFADDR);
free(ext->icmp6_ifstat, M_IFADDR);
free(ext, M_IFADDR);
}
/*
* Convert sockaddr_in6 to sockaddr_in. Original sockaddr_in6 must be
* v4 mapped addr or v4 compat addr
*/
void
in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
{
bzero(sin, sizeof(*sin));
sin->sin_len = sizeof(struct sockaddr_in);
sin->sin_family = AF_INET;
sin->sin_port = sin6->sin6_port;
sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3];
}
/* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */
void
in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6)
{
bzero(sin6, sizeof(*sin6));
sin6->sin6_len = sizeof(struct sockaddr_in6);
sin6->sin6_family = AF_INET6;
sin6->sin6_port = sin->sin_port;
sin6->sin6_addr.s6_addr32[0] = 0;
sin6->sin6_addr.s6_addr32[1] = 0;
sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP;
sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr;
}
/* Convert sockaddr_in6 into sockaddr_in. */
void
in6_sin6_2_sin_in_sock(struct sockaddr *nam)
{
struct sockaddr_in *sin_p;
struct sockaddr_in6 sin6;
/*
* Save original sockaddr_in6 addr and convert it
* to sockaddr_in.
*/
sin6 = *(struct sockaddr_in6 *)nam;
sin_p = (struct sockaddr_in *)nam;
in6_sin6_2_sin(sin_p, &sin6);
}
/* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */
void
in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam)
{
struct sockaddr_in *sin_p;
struct sockaddr_in6 *sin6_p;
sin6_p = malloc(sizeof *sin6_p, M_SONAME,
M_WAITOK);
sin_p = (struct sockaddr_in *)*nam;
in6_sin_2_v4mapsin6(sin_p, sin6_p);
free(*nam, M_SONAME);
*nam = (struct sockaddr *)sin6_p;
}
Index: projects/arpv2_merge_1/sys/netinet6/in6_rmx.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet6/in6_rmx.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet6/in6_rmx.c (revision 185839)
@@ -1,504 +1,504 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $KAME: in6_rmx.c,v 1.11 2001/07/26 06:53:16 jinmei Exp $
*/
/*-
* Copyright 1994, 1995 Massachusetts Institute of Technology
*
* Permission to use, copy, modify, and distribute this software and
* its documentation for any purpose and without fee is hereby
* granted, provided that both the above copyright notice and this
* permission notice appear in all copies, that both the above
* copyright notice and this permission notice appear in all
* supporting documentation, and that the name of M.I.T. not be used
* in advertising or publicity pertaining to distribution of the
* software without specific, written prior permission. M.I.T. makes
* no representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied
* warranty.
*
* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
/*
* This code does two things necessary for the enhanced TCP metrics to
* function in a useful manner:
* 1) It marks all non-host routes as `cloning', thus ensuring that
* every actual reference to such a route actually gets turned
* into a reference to a host route to the specific destination
* requested.
* 2) When such routes lose all their references, it arranges for them
* to be deleted in some random collection of circumstances, so that
* a large quantity of stale routing data is not kept in kernel memory
* indefinitely. See in6_rtqtimo() below for the exact mechanism.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/sysctl.h>
#include <sys/queue.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/mbuf.h>
#include <sys/rwlock.h>
#include <sys/syslog.h>
#include <sys/callout.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/ip_var.h>
#include <netinet/in_var.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet/icmp6.h>
#include <netinet6/nd6.h>
#include <netinet6/vinet6.h>
#include <netinet/tcp.h>
#include <netinet/tcp_seq.h>
#include <netinet/tcp_timer.h>
#include <netinet/tcp_var.h>
extern int in6_inithead(void **head, int off);
#define RTPRF_OURS RTF_PROTO3 /* set on routes we manage */
/*
* Do what we need to do when inserting a route.
*/
static struct radix_node *
in6_addroute(void *v_arg, void *n_arg, struct radix_node_head *head,
struct radix_node *treenodes)
{
struct rtentry *rt = (struct rtentry *)treenodes;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)rt_key(rt);
struct radix_node *ret;
if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
rt->rt_flags |= RTF_MULTICAST;
/*
* A little bit of help for both IPv6 output and input:
* For local addresses, we make sure that RTF_LOCAL is set,
* with the thought that this might one day be used to speed up
* ip_input().
*
* We also mark routes to multicast addresses as such, because
* it's easy to do and might be useful (but this is much more
* dubious since it's so easy to inspect the address). (This
* is done above.)
*
* XXX
* should elaborate the code.
*/
if (rt->rt_flags & RTF_HOST) {
if (IN6_ARE_ADDR_EQUAL(&satosin6(rt->rt_ifa->ifa_addr)
->sin6_addr,
&sin6->sin6_addr)) {
rt->rt_flags |= RTF_LOCAL;
}
}
if (!rt->rt_rmx.rmx_mtu && rt->rt_ifp)
rt->rt_rmx.rmx_mtu = IN6_LINKMTU(rt->rt_ifp);
ret = rn_addroute(v_arg, n_arg, head, treenodes);
if (ret == NULL && rt->rt_flags & RTF_HOST) {
struct rtentry *rt2;
/*
* We are trying to add a host route, but can't.
* Find out if it is because of an
* ARP entry and delete it if so.
*/
rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_CLONING);
if (rt2) {
if (rt2->rt_flags & RTF_LLINFO &&
rt2->rt_flags & RTF_HOST &&
rt2->rt_gateway &&
rt2->rt_gateway->sa_family == AF_LINK) {
rtexpunge(rt2);
RTFREE_LOCKED(rt2);
ret = rn_addroute(v_arg, n_arg, head,
treenodes);
} else
RTFREE_LOCKED(rt2);
}
} else if (ret == NULL && rt->rt_flags & RTF_CLONING) {
struct rtentry *rt2;
/*
* We are trying to add a net route, but can't.
* The following case should be allowed, so we'll make a
* special check for this:
* Two IPv6 addresses with the same prefix is assigned
* to a single interrface.
* # ifconfig if0 inet6 3ffe:0501::1 prefix 64 alias (*1)
* # ifconfig if0 inet6 3ffe:0501::2 prefix 64 alias (*2)
* In this case, (*1) and (*2) want to add the same
* net route entry, 3ffe:0501:: -> if0.
* This case should not raise an error.
*/
rt2 = rtalloc1((struct sockaddr *)sin6, 0, RTF_CLONING);
if (rt2) {
if ((rt2->rt_flags & (RTF_CLONING|RTF_HOST|RTF_GATEWAY))
== RTF_CLONING
&& rt2->rt_gateway
&& rt2->rt_gateway->sa_family == AF_LINK
&& rt2->rt_ifp == rt->rt_ifp) {
ret = rt2->rt_nodes;
}
RTFREE_LOCKED(rt2);
}
}
return ret;
}
/*
* This code is the inverse of in6_clsroute: on first reference, if we
* were managing the route, stop doing so and set the expiration timer
* back off again.
*/
static struct radix_node *
in6_matroute(void *v_arg, struct radix_node_head *head)
{
struct radix_node *rn = rn_match(v_arg, head);
struct rtentry *rt = (struct rtentry *)rn;
if (rt && rt->rt_refcnt == 0) { /* this is first reference */
if (rt->rt_flags & RTPRF_OURS) {
rt->rt_flags &= ~RTPRF_OURS;
rt->rt_rmx.rmx_expire = 0;
}
}
return rn;
}
SYSCTL_DECL(_net_inet6_ip6);
#ifdef VIMAGE_GLOBALS
static int rtq_reallyold6;
static int rtq_minreallyold6;
static int rtq_toomany6;
#endif
SYSCTL_V_INT(V_NET, vnet_inet6, _net_inet6_ip6, IPV6CTL_RTEXPIRE,
rtexpire, CTLFLAG_RW, rtq_reallyold6 , 0, "");
SYSCTL_V_INT(V_NET, vnet_inet6, _net_inet6_ip6, IPV6CTL_RTMINEXPIRE,
rtminexpire, CTLFLAG_RW, rtq_minreallyold6 , 0, "");
SYSCTL_V_INT(V_NET, vnet_inet6, _net_inet6_ip6, IPV6CTL_RTMAXCACHE,
rtmaxcache, CTLFLAG_RW, rtq_toomany6 , 0, "");
/*
* On last reference drop, mark the route as belong to us so that it can be
* timed out.
*/
static void
in6_clsroute(struct radix_node *rn, struct radix_node_head *head)
{
INIT_VNET_INET6(curvnet);
struct rtentry *rt = (struct rtentry *)rn;
RT_LOCK_ASSERT(rt);
if (!(rt->rt_flags & RTF_UP))
return; /* prophylactic measures */
if ((rt->rt_flags & (RTF_LLINFO | RTF_HOST)) != RTF_HOST)
return;
if ((rt->rt_flags & (RTF_WASCLONED | RTPRF_OURS)) != RTF_WASCLONED)
return;
/*
* As requested by David Greenman:
* If rtq_reallyold6 is 0, just delete the route without
* waiting for a timeout cycle to kill it.
*/
if (V_rtq_reallyold6 != 0) {
rt->rt_flags |= RTPRF_OURS;
rt->rt_rmx.rmx_expire = time_uptime + V_rtq_reallyold6;
} else {
rtexpunge(rt);
}
}
struct rtqk_arg {
struct radix_node_head *rnh;
int mode;
int updating;
int draining;
int killed;
int found;
time_t nextstop;
};
/*
* Get rid of old routes. When draining, this deletes everything, even when
* the timeout is not expired yet. When updating, this makes sure that
* nothing has a timeout longer than the current value of rtq_reallyold6.
*/
static int
in6_rtqkill(struct radix_node *rn, void *rock)
{
INIT_VNET_INET6(curvnet);
struct rtqk_arg *ap = rock;
struct rtentry *rt = (struct rtentry *)rn;
int err;
if (rt->rt_flags & RTPRF_OURS) {
ap->found++;
if (ap->draining || rt->rt_rmx.rmx_expire <= time_uptime) {
if (rt->rt_refcnt > 0)
panic("rtqkill route really not free");
err = rtrequest(RTM_DELETE,
(struct sockaddr *)rt_key(rt),
rt->rt_gateway, rt_mask(rt),
- rt->rt_flags, 0);
+ rt->rt_flags|RTF_RNH_LOCKED, 0);
if (err) {
log(LOG_WARNING, "in6_rtqkill: error %d", err);
} else {
ap->killed++;
}
} else {
if (ap->updating
&& (rt->rt_rmx.rmx_expire - time_uptime
> V_rtq_reallyold6)) {
rt->rt_rmx.rmx_expire = time_uptime
+ V_rtq_reallyold6;
}
ap->nextstop = lmin(ap->nextstop,
rt->rt_rmx.rmx_expire);
}
}
return 0;
}
#define RTQ_TIMEOUT 60*10 /* run no less than once every ten minutes */
#ifdef VIMAGE_GLOBALS
static int rtq_timeout6;
static struct callout rtq_timer6;
#endif
static void
in6_rtqtimo(void *rock)
{
CURVNET_SET_QUIET((struct vnet *) rock);
INIT_VNET_NET((struct vnet *) rock);
INIT_VNET_INET6((struct vnet *) rock);
struct radix_node_head *rnh = rock;
struct rtqk_arg arg;
struct timeval atv;
static time_t last_adjusted_timeout = 0;
arg.found = arg.killed = 0;
arg.rnh = rnh;
arg.nextstop = time_uptime + V_rtq_timeout6;
arg.draining = arg.updating = 0;
RADIX_NODE_HEAD_LOCK(rnh);
rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
RADIX_NODE_HEAD_UNLOCK(rnh);
/*
* Attempt to be somewhat dynamic about this:
* If there are ``too many'' routes sitting around taking up space,
* then crank down the timeout, and see if we can't make some more
* go away. However, we make sure that we will never adjust more
* than once in rtq_timeout6 seconds, to keep from cranking down too
* hard.
*/
if ((arg.found - arg.killed > V_rtq_toomany6)
&& (time_uptime - last_adjusted_timeout >= V_rtq_timeout6)
&& V_rtq_reallyold6 > V_rtq_minreallyold6) {
V_rtq_reallyold6 = 2*V_rtq_reallyold6 / 3;
if (V_rtq_reallyold6 < V_rtq_minreallyold6) {
V_rtq_reallyold6 = V_rtq_minreallyold6;
}
last_adjusted_timeout = time_uptime;
#ifdef DIAGNOSTIC
log(LOG_DEBUG, "in6_rtqtimo: adjusted rtq_reallyold6 to %d",
V_rtq_reallyold6);
#endif
arg.found = arg.killed = 0;
arg.updating = 1;
RADIX_NODE_HEAD_LOCK(rnh);
rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
RADIX_NODE_HEAD_UNLOCK(rnh);
}
atv.tv_usec = 0;
atv.tv_sec = arg.nextstop - time_uptime;
callout_reset(&V_rtq_timer6, tvtohz(&atv), in6_rtqtimo, rock);
CURVNET_RESTORE();
}
/*
* Age old PMTUs.
*/
struct mtuex_arg {
struct radix_node_head *rnh;
time_t nextstop;
};
#ifdef VIMAGE_GLOBALS
static struct callout rtq_mtutimer;
#endif
static int
in6_mtuexpire(struct radix_node *rn, void *rock)
{
struct rtentry *rt = (struct rtentry *)rn;
struct mtuex_arg *ap = rock;
/* sanity */
if (!rt)
panic("rt == NULL in in6_mtuexpire");
if (rt->rt_rmx.rmx_expire && !(rt->rt_flags & RTF_PROBEMTU)) {
if (rt->rt_rmx.rmx_expire <= time_uptime) {
rt->rt_flags |= RTF_PROBEMTU;
} else {
ap->nextstop = lmin(ap->nextstop,
rt->rt_rmx.rmx_expire);
}
}
return 0;
}
#define MTUTIMO_DEFAULT (60*1)
static void
in6_mtutimo(void *rock)
{
CURVNET_SET_QUIET((struct vnet *) rock);
INIT_VNET_NET((struct vnet *) rock);
INIT_VNET_INET6((struct vnet *) rock);
struct radix_node_head *rnh = rock;
struct mtuex_arg arg;
struct timeval atv;
arg.rnh = rnh;
arg.nextstop = time_uptime + MTUTIMO_DEFAULT;
RADIX_NODE_HEAD_LOCK(rnh);
rnh->rnh_walktree(rnh, in6_mtuexpire, &arg);
RADIX_NODE_HEAD_UNLOCK(rnh);
atv.tv_usec = 0;
atv.tv_sec = arg.nextstop - time_uptime;
if (atv.tv_sec < 0) {
printf("invalid mtu expiration time on routing table\n");
arg.nextstop = time_uptime + 30; /* last resort */
atv.tv_sec = 30;
}
callout_reset(&V_rtq_mtutimer, tvtohz(&atv), in6_mtutimo, rock);
CURVNET_RESTORE();
}
#if 0
void
in6_rtqdrain(void)
{
INIT_VNET_NET(curvnet);
struct radix_node_head *rnh = V_rt_tables[AF_INET6];
struct rtqk_arg arg;
arg.found = arg.killed = 0;
arg.rnh = rnh;
arg.nextstop = 0;
arg.draining = 1;
arg.updating = 0;
RADIX_NODE_HEAD_LOCK(rnh);
rnh->rnh_walktree(rnh, in6_rtqkill, &arg);
RADIX_NODE_HEAD_UNLOCK(rnh);
}
#endif
/*
* Initialize our routing tree.
* XXX MRT When off == 0, we are being called from vfs_export.c
* so just set up their table and leave. (we know what the correct
* value should be so just use that).. FIX AFTER RELENG_7 is MFC'd
* see also comments in in_inithead() vfs_export.c and domain.h
*/
int
in6_inithead(void **head, int off)
{
INIT_VNET_INET6(curvnet);
struct radix_node_head *rnh;
if (!rn_inithead(head, offsetof(struct sockaddr_in6, sin6_addr) << 3))
return 0; /* See above */
if (off == 0) /* See above */
return 1; /* only do the rest for the real thing */
V_rtq_reallyold6 = 60*60; /* one hour is ``really old'' */
V_rtq_minreallyold6 = 10; /* never automatically crank down to less */
V_rtq_toomany6 = 128; /* 128 cached routes is ``too many'' */
V_rtq_timeout6 = RTQ_TIMEOUT;
rnh = *head;
rnh->rnh_addaddr = in6_addroute;
rnh->rnh_matchaddr = in6_matroute;
rnh->rnh_close = in6_clsroute;
callout_init(&V_rtq_timer6, CALLOUT_MPSAFE);
in6_rtqtimo(rnh); /* kick off timeout first time */
callout_init(&V_rtq_mtutimer, CALLOUT_MPSAFE);
in6_mtutimo(rnh); /* kick off timeout first time */
return 1;
}
Index: projects/arpv2_merge_1/sys/netinet6/ip6_input.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet6/ip6_input.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet6/ip6_input.c (revision 185839)
@@ -1,1695 +1,1696 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $KAME: ip6_input.c,v 1.259 2002/01/21 04:58:09 jinmei Exp $
*/
/*-
* Copyright (c) 1982, 1986, 1988, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ip_input.c 8.2 (Berkeley) 1/4/94
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/domain.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/errno.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/syslog.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/netisr.h>
#include <net/pfil.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <net/if_llatbl.h>
#ifdef INET
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#include <netinet/vinet.h>
#endif /* INET */
#include <netinet/ip6.h>
#include <netinet6/in6_var.h>
#include <netinet6/ip6_var.h>
#include <netinet/in_pcb.h>
#include <netinet/icmp6.h>
#include <netinet6/scope6_var.h>
#include <netinet6/in6_ifattach.h>
#include <netinet6/nd6.h>
#include <netinet6/vinet6.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netinet6/ip6_ipsec.h>
#include <netipsec/ipsec6.h>
#endif /* IPSEC */
#include <netinet6/ip6protosw.h>
extern struct domain inet6domain;
u_char ip6_protox[IPPROTO_MAX];
static struct ifqueue ip6intrq;
#ifdef VIMAGE_GLOBALS
static int ip6qmaxlen;
struct in6_ifaddr *in6_ifaddr;
struct ip6stat ip6stat;
#endif
extern struct callout in6_tmpaddrtimer_ch;
extern int dad_init;
extern int pmtu_expire;
extern int pmtu_probe;
extern u_long rip6_sendspace;
extern u_long rip6_recvspace;
extern int icmp6errppslim;
extern int icmp6_nodeinfo;
extern int udp6_sendspace;
extern int udp6_recvspace;
#ifdef VIMAGE_GLOBALS
int ip6_forward_srcrt; /* XXX */
int ip6_sourcecheck; /* XXX */
int ip6_sourcecheck_interval; /* XXX */
int ip6_ours_check_algorithm;
#endif
struct pfil_head inet6_pfil_hook;
static void ip6_init2(void *);
static struct ip6aux *ip6_setdstifaddr(struct mbuf *, struct in6_ifaddr *);
static int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *);
#ifdef PULLDOWN_TEST
static struct mbuf *ip6_pullexthdr(struct mbuf *, size_t, int);
#endif
/*
* IP6 initialization: fill in IP6 protocol switch table.
* All protocols not implemented in kernel go to raw IP6 protocol handler.
*/
void
ip6_init(void)
{
INIT_VNET_INET6(curvnet);
struct ip6protosw *pr;
int i;
V_ip6qmaxlen = IFQ_MAXLEN;
V_in6_maxmtu = 0;
#ifdef IP6_AUTO_LINKLOCAL
V_ip6_auto_linklocal = IP6_AUTO_LINKLOCAL;
#else
V_ip6_auto_linklocal = 1; /* enable by default */
#endif
#ifndef IPV6FORWARDING
#ifdef GATEWAY6
#define IPV6FORWARDING 1 /* forward IP6 packets not for us */
#else
#define IPV6FORWARDING 0 /* don't forward IP6 packets not for us */
#endif /* GATEWAY6 */
#endif /* !IPV6FORWARDING */
#ifndef IPV6_SENDREDIRECTS
#define IPV6_SENDREDIRECTS 1
#endif
V_ip6_forwarding = IPV6FORWARDING; /* act as router? */
V_ip6_sendredirects = IPV6_SENDREDIRECTS;
V_ip6_defhlim = IPV6_DEFHLIM;
V_ip6_defmcasthlim = IPV6_DEFAULT_MULTICAST_HOPS;
V_ip6_accept_rtadv = 0; /* "IPV6FORWARDING ? 0 : 1" is dangerous */
V_ip6_log_interval = 5;
V_ip6_hdrnestlimit = 15; /* How many header options will we process? */
V_ip6_dad_count = 1; /* DupAddrDetectionTransmits */
V_ip6_auto_flowlabel = 1;
V_ip6_use_deprecated = 1;/* allow deprecated addr (RFC2462 5.5.4) */
V_ip6_rr_prune = 5; /* router renumbering prefix
* walk list every 5 sec. */
V_ip6_mcast_pmtu = 0; /* enable pMTU discovery for multicast? */
V_ip6_v6only = 1;
V_ip6_keepfaith = 0;
V_ip6_log_time = (time_t)0L;
#ifdef IPSTEALTH
V_ip6stealth = 0;
#endif
V_nd6_onlink_ns_rfc4861 = 0; /* allow 'on-link' nd6 NS (RFC 4861) */
V_pmtu_expire = 60*10;
V_pmtu_probe = 60*2;
/* raw IP6 parameters */
/*
* Nominal space allocated to a raw ip socket.
*/
#define RIPV6SNDQ 8192
#define RIPV6RCVQ 8192
V_rip6_sendspace = RIPV6SNDQ;
V_rip6_recvspace = RIPV6RCVQ;
/* ICMPV6 parameters */
V_icmp6_rediraccept = 1; /* accept and process redirects */
V_icmp6_redirtimeout = 10 * 60; /* 10 minutes */
V_icmp6errppslim = 100; /* 100pps */
/* control how to respond to NI queries */
V_icmp6_nodeinfo = (ICMP6_NODEINFO_FQDNOK|ICMP6_NODEINFO_NODEADDROK);
/* UDP on IP6 parameters */
V_udp6_sendspace = 9216; /* really max datagram size */
V_udp6_recvspace = 40 * (1024 + sizeof(struct sockaddr_in6));
/* 40 1K datagrams */
V_dad_init = 0;
#ifdef DIAGNOSTIC
if (sizeof(struct protosw) != sizeof(struct ip6protosw))
panic("sizeof(protosw) != sizeof(ip6protosw)");
#endif
pr = (struct ip6protosw *)pffindproto(PF_INET6, IPPROTO_RAW, SOCK_RAW);
if (pr == 0)
panic("ip6_init");
/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
for (i = 0; i < IPPROTO_MAX; i++)
ip6_protox[i] = pr - inet6sw;
/*
* Cycle through IP protocols and put them into the appropriate place
* in ip6_protox[].
*/
for (pr = (struct ip6protosw *)inet6domain.dom_protosw;
pr < (struct ip6protosw *)inet6domain.dom_protoswNPROTOSW; pr++)
if (pr->pr_domain->dom_family == PF_INET6 &&
pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
/* Be careful to only index valid IP protocols. */
if (pr->pr_protocol < IPPROTO_MAX)
ip6_protox[pr->pr_protocol] = pr - inet6sw;
}
/* Initialize packet filter hooks. */
inet6_pfil_hook.ph_type = PFIL_TYPE_AF;
inet6_pfil_hook.ph_af = AF_INET6;
if ((i = pfil_head_register(&inet6_pfil_hook)) != 0)
printf("%s: WARNING: unable to register pfil hook, "
"error %d\n", __func__, i);
ip6intrq.ifq_maxlen = V_ip6qmaxlen;
mtx_init(&ip6intrq.ifq_mtx, "ip6_inq", NULL, MTX_DEF);
netisr_register(NETISR_IPV6, ip6_input, &ip6intrq, 0);
scope6_init();
addrsel_policy_init();
nd6_init();
frag6_init();
V_ip6_desync_factor = arc4random() % MAX_TEMP_DESYNC_FACTOR;
}
static void
ip6_init2(void *dummy)
{
INIT_VNET_INET6(curvnet);
/* nd6_timer_init */
callout_init(&V_nd6_timer_ch, 0);
callout_reset(&V_nd6_timer_ch, hz, nd6_timer, NULL);
/* timer for regeneranation of temporary addresses randomize ID */
callout_init(&V_in6_tmpaddrtimer_ch, 0);
callout_reset(&V_in6_tmpaddrtimer_ch,
(V_ip6_temp_preferred_lifetime - V_ip6_desync_factor -
V_ip6_temp_regen_advance) * hz,
in6_tmpaddrtimer, NULL);
}
/* cheat */
/* This must be after route_init(), which is now SI_ORDER_THIRD */
SYSINIT(netinet6init2, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, ip6_init2, NULL);
extern struct route_in6 ip6_forward_rt;
void
ip6_input(struct mbuf *m)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
struct ip6_hdr *ip6;
int off = sizeof(struct ip6_hdr), nest;
u_int32_t plen;
u_int32_t rtalert = ~0;
int nxt, ours = 0;
struct ifnet *deliverifp = NULL, *ifp = NULL;
struct in6_addr odst;
int srcrt = 0;
struct llentry *lle = NULL;
struct sockaddr_in6 dst6;
#ifdef IPSEC
/*
* should the inner packet be considered authentic?
* see comment in ah4_input().
* NB: m cannot be NULL when passed to the input routine
*/
m->m_flags &= ~M_AUTHIPHDR;
m->m_flags &= ~M_AUTHIPDGM;
#endif /* IPSEC */
/*
* make sure we don't have onion peering information into m_tag.
*/
ip6_delaux(m);
/*
* mbuf statistics
*/
if (m->m_flags & M_EXT) {
if (m->m_next)
V_ip6stat.ip6s_mext2m++;
else
V_ip6stat.ip6s_mext1++;
} else {
#define M2MMAX (sizeof(V_ip6stat.ip6s_m2m)/sizeof(V_ip6stat.ip6s_m2m[0]))
if (m->m_next) {
if (m->m_flags & M_LOOP) {
V_ip6stat.ip6s_m2m[V_loif[0].if_index]++; /* XXX */
} else if (m->m_pkthdr.rcvif->if_index < M2MMAX)
V_ip6stat.ip6s_m2m[m->m_pkthdr.rcvif->if_index]++;
else
V_ip6stat.ip6s_m2m[0]++;
} else
V_ip6stat.ip6s_m1++;
#undef M2MMAX
}
/* drop the packet if IPv6 operation is disabled on the IF */
if ((ND_IFINFO(m->m_pkthdr.rcvif)->flags & ND6_IFF_IFDISABLED)) {
m_freem(m);
return;
}
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_receive);
V_ip6stat.ip6s_total++;
#ifndef PULLDOWN_TEST
/*
* L2 bridge code and some other code can return mbuf chain
* that does not conform to KAME requirement. too bad.
* XXX: fails to join if interface MTU > MCLBYTES. jumbogram?
*/
if (m && m->m_next != NULL && m->m_pkthdr.len < MCLBYTES) {
struct mbuf *n;
MGETHDR(n, M_DONTWAIT, MT_HEADER);
if (n)
M_MOVE_PKTHDR(n, m);
if (n && n->m_pkthdr.len > MHLEN) {
MCLGET(n, M_DONTWAIT);
if ((n->m_flags & M_EXT) == 0) {
m_freem(n);
n = NULL;
}
}
if (n == NULL) {
m_freem(m);
return; /* ENOBUFS */
}
m_copydata(m, 0, n->m_pkthdr.len, mtod(n, caddr_t));
n->m_len = n->m_pkthdr.len;
m_freem(m);
m = n;
}
IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), /* nothing */);
#endif
if (m->m_len < sizeof(struct ip6_hdr)) {
struct ifnet *inifp;
inifp = m->m_pkthdr.rcvif;
if ((m = m_pullup(m, sizeof(struct ip6_hdr))) == NULL) {
V_ip6stat.ip6s_toosmall++;
in6_ifstat_inc(inifp, ifs6_in_hdrerr);
return;
}
}
ip6 = mtod(m, struct ip6_hdr *);
if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
V_ip6stat.ip6s_badvers++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
goto bad;
}
V_ip6stat.ip6s_nxthist[ip6->ip6_nxt]++;
/*
* Check against address spoofing/corruption.
*/
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src) ||
IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_dst)) {
/*
* XXX: "badscope" is not very suitable for a multicast source.
*/
V_ip6stat.ip6s_badscope++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
goto bad;
}
if (IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) &&
!(m->m_flags & M_LOOP)) {
/*
* In this case, the packet should come from the loopback
* interface. However, we cannot just check the if_flags,
* because ip6_mloopback() passes the "actual" interface
* as the outgoing/incoming interface.
*/
V_ip6stat.ip6s_badscope++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
goto bad;
}
#ifdef ALTQ
if (altq_input != NULL && (*altq_input)(m, AF_INET6) == 0) {
/* packet is dropped by traffic conditioner */
return;
}
#endif
/*
* The following check is not documented in specs. A malicious
* party may be able to use IPv4 mapped addr to confuse tcp/udp stack
* and bypass security checks (act as if it was from 127.0.0.1 by using
* IPv6 src ::ffff:127.0.0.1). Be cautious.
*
* This check chokes if we are in an SIIT cloud. As none of BSDs
* support IPv4-less kernel compilation, we cannot support SIIT
* environment at all. So, it makes more sense for us to reject any
* malicious packets for non-SIIT environment, than try to do a
* partial support for SIIT environment.
*/
if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
V_ip6stat.ip6s_badscope++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
goto bad;
}
#if 0
/*
* Reject packets with IPv4 compatible addresses (auto tunnel).
*
* The code forbids auto tunnel relay case in RFC1933 (the check is
* stronger than RFC1933). We may want to re-enable it if mech-xx
* is revised to forbid relaying case.
*/
if (IN6_IS_ADDR_V4COMPAT(&ip6->ip6_src) ||
IN6_IS_ADDR_V4COMPAT(&ip6->ip6_dst)) {
V_ip6stat.ip6s_badscope++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
goto bad;
}
#endif
/*
* Run through list of hooks for input packets.
*
* NB: Beware of the destination address changing
* (e.g. by NAT rewriting). When this happens,
* tell ip6_forward to do the right thing.
*/
odst = ip6->ip6_dst;
/* Jump over all PFIL processing if hooks are not active. */
if (!PFIL_HOOKED(&inet6_pfil_hook))
goto passin;
if (pfil_run_hooks(&inet6_pfil_hook, &m, m->m_pkthdr.rcvif, PFIL_IN, NULL))
return;
if (m == NULL) /* consumed by filter */
return;
ip6 = mtod(m, struct ip6_hdr *);
srcrt = !IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst);
passin:
/*
* Disambiguate address scope zones (if there is ambiguity).
* We first make sure that the original source or destination address
* is not in our internal form for scoped addresses. Such addresses
* are not necessarily invalid spec-wise, but we cannot accept them due
* to the usage conflict.
* in6_setscope() then also checks and rejects the cases where src or
* dst are the loopback address and the receiving interface
* is not loopback.
*/
if (in6_clearscope(&ip6->ip6_src) || in6_clearscope(&ip6->ip6_dst)) {
V_ip6stat.ip6s_badscope++; /* XXX */
goto bad;
}
if (in6_setscope(&ip6->ip6_src, m->m_pkthdr.rcvif, NULL) ||
in6_setscope(&ip6->ip6_dst, m->m_pkthdr.rcvif, NULL)) {
V_ip6stat.ip6s_badscope++;
goto bad;
}
/*
* Multicast check
*/
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
struct in6_multi *in6m = 0;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mcast);
/*
* See if we belong to the destination multicast group on the
* arrival interface.
*/
IN6_LOOKUP_MULTI(ip6->ip6_dst, m->m_pkthdr.rcvif, in6m);
if (in6m)
ours = 1;
else if (!ip6_mrouter) {
V_ip6stat.ip6s_notmember++;
V_ip6stat.ip6s_cantforward++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
goto bad;
}
deliverifp = m->m_pkthdr.rcvif;
goto hbhcheck;
}
/*
* Unicast check
*/
bzero(&dst6, sizeof(dst6));
dst6.sin6_family = AF_INET6;
dst6.sin6_len = sizeof(struct sockaddr_in6);
dst6.sin6_addr = ip6->ip6_dst;
ifp = m->m_pkthdr.rcvif;
IF_AFDATA_LOCK(ifp);
lle = lla_lookup(LLTABLE6(ifp), 0,
(struct sockaddr *)&dst6);
+ IF_AFDATA_UNLOCK(ifp);
if ((lle != NULL) && (lle->la_flags & LLE_IFADDR)) {
ours = 1;
deliverifp = ifp;
- IF_AFDATA_UNLOCK(ifp);
+ LLE_RUNLOCK(lle);
goto hbhcheck;
}
- IF_AFDATA_UNLOCK(ifp);
+ LLE_RUNLOCK(lle);
if (ip6_forward_rt.ro_rt != NULL &&
(ip6_forward_rt.ro_rt->rt_flags & RTF_UP) != 0 &&
IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
&((struct sockaddr_in6 *)(&V_ip6_forward_rt.ro_dst))->sin6_addr))
V_ip6stat.ip6s_forward_cachehit++;
else {
struct sockaddr_in6 *dst6;
if (V_ip6_forward_rt.ro_rt) {
/* route is down or destination is different */
V_ip6stat.ip6s_forward_cachemiss++;
RTFREE(V_ip6_forward_rt.ro_rt);
V_ip6_forward_rt.ro_rt = 0;
}
bzero(&V_ip6_forward_rt.ro_dst, sizeof(struct sockaddr_in6));
dst6 = (struct sockaddr_in6 *)&V_ip6_forward_rt.ro_dst;
dst6->sin6_len = sizeof(struct sockaddr_in6);
dst6->sin6_family = AF_INET6;
dst6->sin6_addr = ip6->ip6_dst;
rtalloc((struct route *)&V_ip6_forward_rt);
}
#define rt6_key(r) ((struct sockaddr_in6 *)((r)->rt_nodes->rn_key))
/*
* Accept the packet if the forwarding interface to the destination
* according to the routing table is the loopback interface,
* unless the associated route has a gateway.
* Note that this approach causes to accept a packet if there is a
* route to the loopback interface for the destination of the packet.
* But we think it's even useful in some situations, e.g. when using
* a special daemon which wants to intercept the packet.
*
* XXX: some OSes automatically make a cloned route for the destination
* of an outgoing packet. If the outgoing interface of the packet
* is a loopback one, the kernel would consider the packet to be
* accepted, even if we have no such address assinged on the interface.
* We check the cloned flag of the route entry to reject such cases,
* assuming that route entries for our own addresses are not made by
* cloning (it should be true because in6_addloop explicitly installs
* the host route). However, we might have to do an explicit check
* while it would be less efficient. Or, should we rather install a
* reject route for such a case?
*/
if (V_ip6_forward_rt.ro_rt &&
(V_ip6_forward_rt.ro_rt->rt_flags &
(RTF_HOST|RTF_GATEWAY)) == RTF_HOST &&
#ifdef RTF_WASCLONED
!(V_ip6_forward_rt.ro_rt->rt_flags & RTF_WASCLONED) &&
#endif
#ifdef RTF_CLONED
!(V_ip6_forward_rt.ro_rt->rt_flags & RTF_CLONED) &&
#endif
#if 0
/*
* The check below is redundant since the comparison of
* the destination and the key of the rtentry has
* already done through looking up the routing table.
*/
IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
&rt6_key(V_ip6_forward_rt.ro_rt)->sin6_addr)
#endif
V_ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_LOOP) {
struct in6_ifaddr *ia6 =
(struct in6_ifaddr *)V_ip6_forward_rt.ro_rt->rt_ifa;
/*
* record address information into m_tag.
*/
(void)ip6_setdstifaddr(m, ia6);
/*
* packets to a tentative, duplicated, or somehow invalid
* address must not be accepted.
*/
if (!(ia6->ia6_flags & IN6_IFF_NOTREADY)) {
/* this address is ready */
ours = 1;
deliverifp = ia6->ia_ifp; /* correct? */
/* Count the packet in the ip address stats */
ia6->ia_ifa.if_ipackets++;
ia6->ia_ifa.if_ibytes += m->m_pkthdr.len;
goto hbhcheck;
} else {
char ip6bufs[INET6_ADDRSTRLEN];
char ip6bufd[INET6_ADDRSTRLEN];
/* address is not ready, so discard the packet. */
nd6log((LOG_INFO,
"ip6_input: packet to an unready address %s->%s\n",
ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst)));
goto bad;
}
}
/*
* FAITH (Firewall Aided Internet Translator)
*/
if (V_ip6_keepfaith) {
if (V_ip6_forward_rt.ro_rt && V_ip6_forward_rt.ro_rt->rt_ifp
&& V_ip6_forward_rt.ro_rt->rt_ifp->if_type == IFT_FAITH) {
/* XXX do we need more sanity checks? */
ours = 1;
deliverifp = V_ip6_forward_rt.ro_rt->rt_ifp; /* faith */
goto hbhcheck;
}
}
/*
* Now there is no reason to process the packet if it's not our own
* and we're not a router.
*/
if (!V_ip6_forwarding) {
V_ip6stat.ip6s_cantforward++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
goto bad;
}
hbhcheck:
/*
* record address information into m_tag, if we don't have one yet.
* note that we are unable to record it, if the address is not listed
* as our interface address (e.g. multicast addresses, addresses
* within FAITH prefixes and such).
*/
if (deliverifp && !ip6_getdstifaddr(m)) {
struct in6_ifaddr *ia6;
ia6 = in6_ifawithifp(deliverifp, &ip6->ip6_dst);
if (ia6) {
if (!ip6_setdstifaddr(m, ia6)) {
/*
* XXX maybe we should drop the packet here,
* as we could not provide enough information
* to the upper layers.
*/
}
}
}
/*
* Process Hop-by-Hop options header if it's contained.
* m may be modified in ip6_hopopts_input().
* If a JumboPayload option is included, plen will also be modified.
*/
plen = (u_int32_t)ntohs(ip6->ip6_plen);
if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
struct ip6_hbh *hbh;
if (ip6_hopopts_input(&plen, &rtalert, &m, &off)) {
#if 0 /*touches NULL pointer*/
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
#endif
return; /* m have already been freed */
}
/* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
/*
* if the payload length field is 0 and the next header field
* indicates Hop-by-Hop Options header, then a Jumbo Payload
* option MUST be included.
*/
if (ip6->ip6_plen == 0 && plen == 0) {
/*
* Note that if a valid jumbo payload option is
* contained, ip6_hopopts_input() must set a valid
* (non-zero) payload length to the variable plen.
*/
V_ip6stat.ip6s_badoptions++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_discard);
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_hdrerr);
icmp6_error(m, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_HEADER,
(caddr_t)&ip6->ip6_plen - (caddr_t)ip6);
return;
}
#ifndef PULLDOWN_TEST
/* ip6_hopopts_input() ensures that mbuf is contiguous */
hbh = (struct ip6_hbh *)(ip6 + 1);
#else
IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
sizeof(struct ip6_hbh));
if (hbh == NULL) {
V_ip6stat.ip6s_tooshort++;
return;
}
#endif
nxt = hbh->ip6h_nxt;
/*
* If we are acting as a router and the packet contains a
* router alert option, see if we know the option value.
* Currently, we only support the option value for MLD, in which
* case we should pass the packet to the multicast routing
* daemon.
*/
if (rtalert != ~0 && V_ip6_forwarding) {
switch (rtalert) {
case IP6OPT_RTALERT_MLD:
ours = 1;
break;
default:
/*
* RFC2711 requires unrecognized values must be
* silently ignored.
*/
break;
}
}
} else
nxt = ip6->ip6_nxt;
/*
* Check that the amount of data in the buffers
* is as at least much as the IPv6 header would have us expect.
* Trim mbufs if longer than we expect.
* Drop packet if shorter than we expect.
*/
if (m->m_pkthdr.len - sizeof(struct ip6_hdr) < plen) {
V_ip6stat.ip6s_tooshort++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
goto bad;
}
if (m->m_pkthdr.len > sizeof(struct ip6_hdr) + plen) {
if (m->m_len == m->m_pkthdr.len) {
m->m_len = sizeof(struct ip6_hdr) + plen;
m->m_pkthdr.len = sizeof(struct ip6_hdr) + plen;
} else
m_adj(m, sizeof(struct ip6_hdr) + plen - m->m_pkthdr.len);
}
/*
* Forward if desirable.
*/
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
/*
* If we are acting as a multicast router, all
* incoming multicast packets are passed to the
* kernel-level multicast forwarding function.
* The packet is returned (relatively) intact; if
* ip6_mforward() returns a non-zero value, the packet
* must be discarded, else it may be accepted below.
*/
if (ip6_mrouter && ip6_mforward &&
ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) {
V_ip6stat.ip6s_cantforward++;
m_freem(m);
return;
}
if (!ours) {
m_freem(m);
return;
}
} else if (!ours) {
ip6_forward(m, srcrt);
return;
}
ip6 = mtod(m, struct ip6_hdr *);
/*
* Malicious party may be able to use IPv4 mapped addr to confuse
* tcp/udp stack and bypass security checks (act as if it was from
* 127.0.0.1 by using IPv6 src ::ffff:127.0.0.1). Be cautious.
*
* For SIIT end node behavior, you may want to disable the check.
* However, you will become vulnerable to attacks using IPv4 mapped
* source.
*/
if (IN6_IS_ADDR_V4MAPPED(&ip6->ip6_src) ||
IN6_IS_ADDR_V4MAPPED(&ip6->ip6_dst)) {
V_ip6stat.ip6s_badscope++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_addrerr);
goto bad;
}
/*
* Tell launch routine the next header
*/
V_ip6stat.ip6s_delivered++;
in6_ifstat_inc(deliverifp, ifs6_in_deliver);
nest = 0;
while (nxt != IPPROTO_DONE) {
if (V_ip6_hdrnestlimit && (++nest > V_ip6_hdrnestlimit)) {
V_ip6stat.ip6s_toomanyhdr++;
goto bad;
}
/*
* protection against faulty packet - there should be
* more sanity checks in header chain processing.
*/
if (m->m_pkthdr.len < off) {
V_ip6stat.ip6s_tooshort++;
in6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_truncated);
goto bad;
}
#ifdef IPSEC
/*
* enforce IPsec policy checking if we are seeing last header.
* note that we do not visit this with protocols with pcb layer
* code - like udp/tcp/raw ip.
*/
if (ip6_ipsec_input(m, nxt))
goto bad;
#endif /* IPSEC */
nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt);
}
return;
bad:
m_freem(m);
}
/*
* set/grab in6_ifaddr correspond to IPv6 destination address.
* XXX backward compatibility wrapper
*/
static struct ip6aux *
ip6_setdstifaddr(struct mbuf *m, struct in6_ifaddr *ia6)
{
struct ip6aux *ip6a;
ip6a = ip6_addaux(m);
if (ip6a)
ip6a->ip6a_dstia6 = ia6;
return ip6a; /* NULL if failed to set */
}
struct in6_ifaddr *
ip6_getdstifaddr(struct mbuf *m)
{
struct ip6aux *ip6a;
ip6a = ip6_findaux(m);
if (ip6a)
return ip6a->ip6a_dstia6;
else
return NULL;
}
/*
* Hop-by-Hop options header processing. If a valid jumbo payload option is
* included, the real payload length will be stored in plenp.
*
* rtalertp - XXX: should be stored more smart way
*/
static int
ip6_hopopts_input(u_int32_t *plenp, u_int32_t *rtalertp,
struct mbuf **mp, int *offp)
{
INIT_VNET_INET6(curvnet);
struct mbuf *m = *mp;
int off = *offp, hbhlen;
struct ip6_hbh *hbh;
u_int8_t *opt;
/* validation of the length of the header */
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, sizeof(*hbh), -1);
hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
hbhlen = (hbh->ip6h_len + 1) << 3;
IP6_EXTHDR_CHECK(m, off, hbhlen, -1);
hbh = (struct ip6_hbh *)(mtod(m, caddr_t) + off);
#else
IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m,
sizeof(struct ip6_hdr), sizeof(struct ip6_hbh));
if (hbh == NULL) {
V_ip6stat.ip6s_tooshort++;
return -1;
}
hbhlen = (hbh->ip6h_len + 1) << 3;
IP6_EXTHDR_GET(hbh, struct ip6_hbh *, m, sizeof(struct ip6_hdr),
hbhlen);
if (hbh == NULL) {
V_ip6stat.ip6s_tooshort++;
return -1;
}
#endif
off += hbhlen;
hbhlen -= sizeof(struct ip6_hbh);
opt = (u_int8_t *)hbh + sizeof(struct ip6_hbh);
if (ip6_process_hopopts(m, (u_int8_t *)hbh + sizeof(struct ip6_hbh),
hbhlen, rtalertp, plenp) < 0)
return (-1);
*offp = off;
*mp = m;
return (0);
}
/*
* Search header for all Hop-by-hop options and process each option.
* This function is separate from ip6_hopopts_input() in order to
* handle a case where the sending node itself process its hop-by-hop
* options header. In such a case, the function is called from ip6_output().
*
* The function assumes that hbh header is located right after the IPv6 header
* (RFC2460 p7), opthead is pointer into data content in m, and opthead to
* opthead + hbhlen is located in continuous memory region.
*/
int
ip6_process_hopopts(struct mbuf *m, u_int8_t *opthead, int hbhlen,
u_int32_t *rtalertp, u_int32_t *plenp)
{
INIT_VNET_INET6(curvnet);
struct ip6_hdr *ip6;
int optlen = 0;
u_int8_t *opt = opthead;
u_int16_t rtalert_val;
u_int32_t jumboplen;
const int erroff = sizeof(struct ip6_hdr) + sizeof(struct ip6_hbh);
for (; hbhlen > 0; hbhlen -= optlen, opt += optlen) {
switch (*opt) {
case IP6OPT_PAD1:
optlen = 1;
break;
case IP6OPT_PADN:
if (hbhlen < IP6OPT_MINLEN) {
V_ip6stat.ip6s_toosmall++;
goto bad;
}
optlen = *(opt + 1) + 2;
break;
case IP6OPT_ROUTER_ALERT:
/* XXX may need check for alignment */
if (hbhlen < IP6OPT_RTALERT_LEN) {
V_ip6stat.ip6s_toosmall++;
goto bad;
}
if (*(opt + 1) != IP6OPT_RTALERT_LEN - 2) {
/* XXX stat */
icmp6_error(m, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_HEADER,
erroff + opt + 1 - opthead);
return (-1);
}
optlen = IP6OPT_RTALERT_LEN;
bcopy((caddr_t)(opt + 2), (caddr_t)&rtalert_val, 2);
*rtalertp = ntohs(rtalert_val);
break;
case IP6OPT_JUMBO:
/* XXX may need check for alignment */
if (hbhlen < IP6OPT_JUMBO_LEN) {
V_ip6stat.ip6s_toosmall++;
goto bad;
}
if (*(opt + 1) != IP6OPT_JUMBO_LEN - 2) {
/* XXX stat */
icmp6_error(m, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_HEADER,
erroff + opt + 1 - opthead);
return (-1);
}
optlen = IP6OPT_JUMBO_LEN;
/*
* IPv6 packets that have non 0 payload length
* must not contain a jumbo payload option.
*/
ip6 = mtod(m, struct ip6_hdr *);
if (ip6->ip6_plen) {
V_ip6stat.ip6s_badoptions++;
icmp6_error(m, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_HEADER,
erroff + opt - opthead);
return (-1);
}
/*
* We may see jumbolen in unaligned location, so
* we'd need to perform bcopy().
*/
bcopy(opt + 2, &jumboplen, sizeof(jumboplen));
jumboplen = (u_int32_t)htonl(jumboplen);
#if 1
/*
* if there are multiple jumbo payload options,
* *plenp will be non-zero and the packet will be
* rejected.
* the behavior may need some debate in ipngwg -
* multiple options does not make sense, however,
* there's no explicit mention in specification.
*/
if (*plenp != 0) {
V_ip6stat.ip6s_badoptions++;
icmp6_error(m, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_HEADER,
erroff + opt + 2 - opthead);
return (-1);
}
#endif
/*
* jumbo payload length must be larger than 65535.
*/
if (jumboplen <= IPV6_MAXPACKET) {
V_ip6stat.ip6s_badoptions++;
icmp6_error(m, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_HEADER,
erroff + opt + 2 - opthead);
return (-1);
}
*plenp = jumboplen;
break;
default: /* unknown option */
if (hbhlen < IP6OPT_MINLEN) {
V_ip6stat.ip6s_toosmall++;
goto bad;
}
optlen = ip6_unknown_opt(opt, m,
erroff + opt - opthead);
if (optlen == -1)
return (-1);
optlen += 2;
break;
}
}
return (0);
bad:
m_freem(m);
return (-1);
}
/*
* Unknown option processing.
* The third argument `off' is the offset from the IPv6 header to the option,
* which is necessary if the IPv6 header the and option header and IPv6 header
* is not continuous in order to return an ICMPv6 error.
*/
int
ip6_unknown_opt(u_int8_t *optp, struct mbuf *m, int off)
{
INIT_VNET_INET6(curvnet);
struct ip6_hdr *ip6;
switch (IP6OPT_TYPE(*optp)) {
case IP6OPT_TYPE_SKIP: /* ignore the option */
return ((int)*(optp + 1));
case IP6OPT_TYPE_DISCARD: /* silently discard */
m_freem(m);
return (-1);
case IP6OPT_TYPE_FORCEICMP: /* send ICMP even if multicasted */
V_ip6stat.ip6s_badoptions++;
icmp6_error(m, ICMP6_PARAM_PROB, ICMP6_PARAMPROB_OPTION, off);
return (-1);
case IP6OPT_TYPE_ICMP: /* send ICMP if not multicasted */
V_ip6stat.ip6s_badoptions++;
ip6 = mtod(m, struct ip6_hdr *);
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) ||
(m->m_flags & (M_BCAST|M_MCAST)))
m_freem(m);
else
icmp6_error(m, ICMP6_PARAM_PROB,
ICMP6_PARAMPROB_OPTION, off);
return (-1);
}
m_freem(m); /* XXX: NOTREACHED */
return (-1);
}
/*
* Create the "control" list for this pcb.
* These functions will not modify mbuf chain at all.
*
* With KAME mbuf chain restriction:
* The routine will be called from upper layer handlers like tcp6_input().
* Thus the routine assumes that the caller (tcp6_input) have already
* called IP6_EXTHDR_CHECK() and all the extension headers are located in the
* very first mbuf on the mbuf chain.
*
* ip6_savecontrol_v4 will handle those options that are possible to be
* set on a v4-mapped socket.
* ip6_savecontrol will directly call ip6_savecontrol_v4 to handle those
* options and handle the v6-only ones itself.
*/
struct mbuf **
ip6_savecontrol_v4(struct inpcb *inp, struct mbuf *m, struct mbuf **mp,
int *v4only)
{
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
#ifdef SO_TIMESTAMP
if ((inp->inp_socket->so_options & SO_TIMESTAMP) != 0) {
struct timeval tv;
microtime(&tv);
*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
SCM_TIMESTAMP, SOL_SOCKET);
if (*mp)
mp = &(*mp)->m_next;
}
#endif
if ((ip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
if (v4only != NULL)
*v4only = 1;
return (mp);
}
#define IS2292(inp, x, y) (((inp)->inp_flags & IN6P_RFC2292) ? (x) : (y))
/* RFC 2292 sec. 5 */
if ((inp->inp_flags & IN6P_PKTINFO) != 0) {
struct in6_pktinfo pi6;
bcopy(&ip6->ip6_dst, &pi6.ipi6_addr, sizeof(struct in6_addr));
in6_clearscope(&pi6.ipi6_addr); /* XXX */
pi6.ipi6_ifindex =
(m && m->m_pkthdr.rcvif) ? m->m_pkthdr.rcvif->if_index : 0;
*mp = sbcreatecontrol((caddr_t) &pi6,
sizeof(struct in6_pktinfo),
IS2292(inp, IPV6_2292PKTINFO, IPV6_PKTINFO), IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
}
if ((inp->inp_flags & IN6P_HOPLIMIT) != 0) {
int hlim = ip6->ip6_hlim & 0xff;
*mp = sbcreatecontrol((caddr_t) &hlim, sizeof(int),
IS2292(inp, IPV6_2292HOPLIMIT, IPV6_HOPLIMIT),
IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
}
if (v4only != NULL)
*v4only = 0;
return (mp);
}
void
ip6_savecontrol(struct inpcb *in6p, struct mbuf *m, struct mbuf **mp)
{
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
int v4only = 0;
mp = ip6_savecontrol_v4(in6p, m, mp, &v4only);
if (v4only)
return;
if ((in6p->in6p_flags & IN6P_TCLASS) != 0) {
u_int32_t flowinfo;
int tclass;
flowinfo = (u_int32_t)ntohl(ip6->ip6_flow & IPV6_FLOWINFO_MASK);
flowinfo >>= 20;
tclass = flowinfo & 0xff;
*mp = sbcreatecontrol((caddr_t) &tclass, sizeof(tclass),
IPV6_TCLASS, IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
}
/*
* IPV6_HOPOPTS socket option. Recall that we required super-user
* privilege for the option (see ip6_ctloutput), but it might be too
* strict, since there might be some hop-by-hop options which can be
* returned to normal user.
* See also RFC 2292 section 6 (or RFC 3542 section 8).
*/
if ((in6p->in6p_flags & IN6P_HOPOPTS) != 0) {
/*
* Check if a hop-by-hop options header is contatined in the
* received packet, and if so, store the options as ancillary
* data. Note that a hop-by-hop options header must be
* just after the IPv6 header, which is assured through the
* IPv6 input processing.
*/
if (ip6->ip6_nxt == IPPROTO_HOPOPTS) {
struct ip6_hbh *hbh;
int hbhlen = 0;
#ifdef PULLDOWN_TEST
struct mbuf *ext;
#endif
#ifndef PULLDOWN_TEST
hbh = (struct ip6_hbh *)(ip6 + 1);
hbhlen = (hbh->ip6h_len + 1) << 3;
#else
ext = ip6_pullexthdr(m, sizeof(struct ip6_hdr),
ip6->ip6_nxt);
if (ext == NULL) {
V_ip6stat.ip6s_tooshort++;
return;
}
hbh = mtod(ext, struct ip6_hbh *);
hbhlen = (hbh->ip6h_len + 1) << 3;
if (hbhlen != ext->m_len) {
m_freem(ext);
V_ip6stat.ip6s_tooshort++;
return;
}
#endif
/*
* XXX: We copy the whole header even if a
* jumbo payload option is included, the option which
* is to be removed before returning according to
* RFC2292.
* Note: this constraint is removed in RFC3542
*/
*mp = sbcreatecontrol((caddr_t)hbh, hbhlen,
IS2292(in6p, IPV6_2292HOPOPTS, IPV6_HOPOPTS),
IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
#ifdef PULLDOWN_TEST
m_freem(ext);
#endif
}
}
if ((in6p->in6p_flags & (IN6P_RTHDR | IN6P_DSTOPTS)) != 0) {
int nxt = ip6->ip6_nxt, off = sizeof(struct ip6_hdr);
/*
* Search for destination options headers or routing
* header(s) through the header chain, and stores each
* header as ancillary data.
* Note that the order of the headers remains in
* the chain of ancillary data.
*/
while (1) { /* is explicit loop prevention necessary? */
struct ip6_ext *ip6e = NULL;
int elen;
#ifdef PULLDOWN_TEST
struct mbuf *ext = NULL;
#endif
/*
* if it is not an extension header, don't try to
* pull it from the chain.
*/
switch (nxt) {
case IPPROTO_DSTOPTS:
case IPPROTO_ROUTING:
case IPPROTO_HOPOPTS:
case IPPROTO_AH: /* is it possible? */
break;
default:
goto loopend;
}
#ifndef PULLDOWN_TEST
if (off + sizeof(*ip6e) > m->m_len)
goto loopend;
ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + off);
if (nxt == IPPROTO_AH)
elen = (ip6e->ip6e_len + 2) << 2;
else
elen = (ip6e->ip6e_len + 1) << 3;
if (off + elen > m->m_len)
goto loopend;
#else
ext = ip6_pullexthdr(m, off, nxt);
if (ext == NULL) {
V_ip6stat.ip6s_tooshort++;
return;
}
ip6e = mtod(ext, struct ip6_ext *);
if (nxt == IPPROTO_AH)
elen = (ip6e->ip6e_len + 2) << 2;
else
elen = (ip6e->ip6e_len + 1) << 3;
if (elen != ext->m_len) {
m_freem(ext);
V_ip6stat.ip6s_tooshort++;
return;
}
#endif
switch (nxt) {
case IPPROTO_DSTOPTS:
if (!(in6p->in6p_flags & IN6P_DSTOPTS))
break;
*mp = sbcreatecontrol((caddr_t)ip6e, elen,
IS2292(in6p,
IPV6_2292DSTOPTS, IPV6_DSTOPTS),
IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
break;
case IPPROTO_ROUTING:
if (!in6p->in6p_flags & IN6P_RTHDR)
break;
*mp = sbcreatecontrol((caddr_t)ip6e, elen,
IS2292(in6p, IPV6_2292RTHDR, IPV6_RTHDR),
IPPROTO_IPV6);
if (*mp)
mp = &(*mp)->m_next;
break;
case IPPROTO_HOPOPTS:
case IPPROTO_AH: /* is it possible? */
break;
default:
/*
* other cases have been filtered in the above.
* none will visit this case. here we supply
* the code just in case (nxt overwritten or
* other cases).
*/
#ifdef PULLDOWN_TEST
m_freem(ext);
#endif
goto loopend;
}
/* proceed with the next header. */
off += elen;
nxt = ip6e->ip6e_nxt;
ip6e = NULL;
#ifdef PULLDOWN_TEST
m_freem(ext);
ext = NULL;
#endif
}
loopend:
;
}
}
#undef IS2292
void
ip6_notify_pmtu(struct inpcb *in6p, struct sockaddr_in6 *dst, u_int32_t *mtu)
{
struct socket *so;
struct mbuf *m_mtu;
struct ip6_mtuinfo mtuctl;
so = in6p->inp_socket;
if (mtu == NULL)
return;
#ifdef DIAGNOSTIC
if (so == NULL) /* I believe this is impossible */
panic("ip6_notify_pmtu: socket is NULL");
#endif
bzero(&mtuctl, sizeof(mtuctl)); /* zero-clear for safety */
mtuctl.ip6m_mtu = *mtu;
mtuctl.ip6m_addr = *dst;
if (sa6_recoverscope(&mtuctl.ip6m_addr))
return;
if ((m_mtu = sbcreatecontrol((caddr_t)&mtuctl, sizeof(mtuctl),
IPV6_PATHMTU, IPPROTO_IPV6)) == NULL)
return;
if (sbappendaddr(&so->so_rcv, (struct sockaddr *)dst, NULL, m_mtu)
== 0) {
m_freem(m_mtu);
/* XXX: should count statistics */
} else
sorwakeup(so);
return;
}
#ifdef PULLDOWN_TEST
/*
* pull single extension header from mbuf chain. returns single mbuf that
* contains the result, or NULL on error.
*/
static struct mbuf *
ip6_pullexthdr(struct mbuf *m, size_t off, int nxt)
{
struct ip6_ext ip6e;
size_t elen;
struct mbuf *n;
#ifdef DIAGNOSTIC
switch (nxt) {
case IPPROTO_DSTOPTS:
case IPPROTO_ROUTING:
case IPPROTO_HOPOPTS:
case IPPROTO_AH: /* is it possible? */
break;
default:
printf("ip6_pullexthdr: invalid nxt=%d\n", nxt);
}
#endif
m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
if (nxt == IPPROTO_AH)
elen = (ip6e.ip6e_len + 2) << 2;
else
elen = (ip6e.ip6e_len + 1) << 3;
MGET(n, M_DONTWAIT, MT_DATA);
if (n && elen >= MLEN) {
MCLGET(n, M_DONTWAIT);
if ((n->m_flags & M_EXT) == 0) {
m_free(n);
n = NULL;
}
}
if (!n)
return NULL;
n->m_len = 0;
if (elen >= M_TRAILINGSPACE(n)) {
m_free(n);
return NULL;
}
m_copydata(m, off, elen, mtod(n, caddr_t));
n->m_len = elen;
return n;
}
#endif
/*
* Get pointer to the previous header followed by the header
* currently processed.
* XXX: This function supposes that
* M includes all headers,
* the next header field and the header length field of each header
* are valid, and
* the sum of each header length equals to OFF.
* Because of these assumptions, this function must be called very
* carefully. Moreover, it will not be used in the near future when
* we develop `neater' mechanism to process extension headers.
*/
char *
ip6_get_prevhdr(struct mbuf *m, int off)
{
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
if (off == sizeof(struct ip6_hdr))
return (&ip6->ip6_nxt);
else {
int len, nxt;
struct ip6_ext *ip6e = NULL;
nxt = ip6->ip6_nxt;
len = sizeof(struct ip6_hdr);
while (len < off) {
ip6e = (struct ip6_ext *)(mtod(m, caddr_t) + len);
switch (nxt) {
case IPPROTO_FRAGMENT:
len += sizeof(struct ip6_frag);
break;
case IPPROTO_AH:
len += (ip6e->ip6e_len + 2) << 2;
break;
default:
len += (ip6e->ip6e_len + 1) << 3;
break;
}
nxt = ip6e->ip6e_nxt;
}
if (ip6e)
return (&ip6e->ip6e_nxt);
else
return NULL;
}
}
/*
* get next header offset. m will be retained.
*/
int
ip6_nexthdr(struct mbuf *m, int off, int proto, int *nxtp)
{
struct ip6_hdr ip6;
struct ip6_ext ip6e;
struct ip6_frag fh;
/* just in case */
if (m == NULL)
panic("ip6_nexthdr: m == NULL");
if ((m->m_flags & M_PKTHDR) == 0 || m->m_pkthdr.len < off)
return -1;
switch (proto) {
case IPPROTO_IPV6:
if (m->m_pkthdr.len < off + sizeof(ip6))
return -1;
m_copydata(m, off, sizeof(ip6), (caddr_t)&ip6);
if (nxtp)
*nxtp = ip6.ip6_nxt;
off += sizeof(ip6);
return off;
case IPPROTO_FRAGMENT:
/*
* terminate parsing if it is not the first fragment,
* it does not make sense to parse through it.
*/
if (m->m_pkthdr.len < off + sizeof(fh))
return -1;
m_copydata(m, off, sizeof(fh), (caddr_t)&fh);
/* IP6F_OFF_MASK = 0xfff8(BigEndian), 0xf8ff(LittleEndian) */
if (fh.ip6f_offlg & IP6F_OFF_MASK)
return -1;
if (nxtp)
*nxtp = fh.ip6f_nxt;
off += sizeof(struct ip6_frag);
return off;
case IPPROTO_AH:
if (m->m_pkthdr.len < off + sizeof(ip6e))
return -1;
m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
if (nxtp)
*nxtp = ip6e.ip6e_nxt;
off += (ip6e.ip6e_len + 2) << 2;
return off;
case IPPROTO_HOPOPTS:
case IPPROTO_ROUTING:
case IPPROTO_DSTOPTS:
if (m->m_pkthdr.len < off + sizeof(ip6e))
return -1;
m_copydata(m, off, sizeof(ip6e), (caddr_t)&ip6e);
if (nxtp)
*nxtp = ip6e.ip6e_nxt;
off += (ip6e.ip6e_len + 1) << 3;
return off;
case IPPROTO_NONE:
case IPPROTO_ESP:
case IPPROTO_IPCOMP:
/* give up */
return -1;
default:
return -1;
}
return -1;
}
/*
* get offset for the last header in the chain. m will be kept untainted.
*/
int
ip6_lasthdr(struct mbuf *m, int off, int proto, int *nxtp)
{
int newoff;
int nxt;
if (!nxtp) {
nxt = -1;
nxtp = &nxt;
}
while (1) {
newoff = ip6_nexthdr(m, off, proto, nxtp);
if (newoff < 0)
return off;
else if (newoff < off)
return -1; /* invalid */
else if (newoff == off)
return newoff;
off = newoff;
proto = *nxtp;
}
}
struct ip6aux *
ip6_addaux(struct mbuf *m)
{
struct m_tag *mtag;
mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
if (!mtag) {
mtag = m_tag_get(PACKET_TAG_IPV6_INPUT, sizeof(struct ip6aux),
M_NOWAIT);
if (mtag) {
m_tag_prepend(m, mtag);
bzero(mtag + 1, sizeof(struct ip6aux));
}
}
return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
}
struct ip6aux *
ip6_findaux(struct mbuf *m)
{
struct m_tag *mtag;
mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
return mtag ? (struct ip6aux *)(mtag + 1) : NULL;
}
void
ip6_delaux(struct mbuf *m)
{
struct m_tag *mtag;
mtag = m_tag_find(m, PACKET_TAG_IPV6_INPUT, NULL);
if (mtag)
m_tag_delete(m, mtag);
}
/*
* System control for IP6
*/
u_char inet6ctlerrmap[PRC_NCMDS] = {
0, 0, 0, 0,
0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH,
EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED,
EMSGSIZE, EHOSTUNREACH, 0, 0,
0, 0, 0, 0,
ENOPROTOOPT
};
Index: projects/arpv2_merge_1/sys/netinet6/ip6_mroute.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet6/ip6_mroute.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet6/ip6_mroute.c (revision 185839)
@@ -1,1954 +1,1952 @@
/*-
* Copyright (C) 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $KAME: ip6_mroute.c,v 1.58 2001/12/18 02:36:31 itojun Exp $
*/
/*-
* Copyright (c) 1989 Stephen Deering
* Copyright (c) 1992, 1993
* The Regents of the University of California. All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* Stephen Deering of Stanford University.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ip_mroute.c 8.2 (Berkeley) 11/15/93
* BSDI ip_mroute.c,v 2.10 1996/11/14 00:29:52 jch Exp
*/
/*
* IP multicast forwarding procedures
*
* Written by David Waitzman, BBN Labs, August 1988.
* Modified by Steve Deering, Stanford, February 1989.
* Modified by Mark J. Steiglitz, Stanford, May, 1991
* Modified by Van Jacobson, LBL, January 1993
* Modified by Ajit Thyagarajan, PARC, August 1993
* Modified by Bill Fenner, PARC, April 1994
*
* MROUTING Revision: 3.5.1.2 + PIM-SMv2 (pimd) Support
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/callout.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/signalvar.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/sockio.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/systm.h>
#include <sys/time.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/raw_cb.h>
#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/icmp6.h>
#include <netinet/vinet.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#include <netinet6/ip6_mroute.h>
#include <netinet6/ip6protosw.h>
#include <netinet6/pim6.h>
#include <netinet6/pim6_var.h>
#include <netinet6/vinet6.h>
static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry");
/* XXX: this is a very common idiom; move to <sys/mbuf.h> ? */
#define M_HASCL(m) ((m)->m_flags & M_EXT)
static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *);
static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *);
static void pim6_init(void);
static int set_pim6(int *);
static int socket_send __P((struct socket *, struct mbuf *,
struct sockaddr_in6 *));
static int register_send __P((struct ip6_hdr *, struct mif6 *,
struct mbuf *));
extern struct domain inet6domain;
/* XXX: referenced from ip_mroute.c for dynamically loading this code. */
struct ip6protosw in6_pim_protosw = {
.pr_type = SOCK_RAW,
.pr_domain = &inet6domain,
.pr_protocol = IPPROTO_PIM,
.pr_flags = PR_ATOMIC|PR_ADDR|PR_LASTHDR,
.pr_input = pim6_input,
.pr_output = rip6_output,
.pr_ctloutput = rip6_ctloutput,
.pr_init = pim6_init,
.pr_usrreqs = &rip6_usrreqs
};
#ifdef VIMAGE_GLOBALS
static int ip6_mrouter_ver;
#endif
SYSCTL_DECL(_net_inet6);
SYSCTL_DECL(_net_inet6_ip6);
SYSCTL_NODE(_net_inet6, IPPROTO_PIM, pim, CTLFLAG_RW, 0, "PIM");
static struct mrt6stat mrt6stat;
SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RW,
&mrt6stat, mrt6stat,
"Multicast Routing Statistics (struct mrt6stat, netinet6/ip6_mroute.h)");
#define NO_RTE_FOUND 0x1
#define RTE_FOUND 0x2
static struct mf6c *mf6ctable[MF6CTBLSIZ];
SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mf6ctable, CTLFLAG_RD,
&mf6ctable, sizeof(mf6ctable), "S,*mf6ctable[MF6CTBLSIZ]",
"Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], "
"netinet6/ip6_mroute.h)");
static u_char n6expire[MF6CTBLSIZ];
static struct mif6 mif6table[MAXMIFS];
SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mif6table, CTLFLAG_RD,
&mif6table, sizeof(mif6table), "S,vif[MAXMIFS]",
"Multicast Interfaces (struct mif[MAXMIFS], netinet6/ip6_mroute.h)");
#ifdef MRT6DEBUG
#ifdef VIMAGE_GLOBALS
static u_int mrt6debug = 0; /* debug level */
#endif
#define DEBUG_MFC 0x02
#define DEBUG_FORWARD 0x04
#define DEBUG_EXPIRE 0x08
#define DEBUG_XMIT 0x10
#define DEBUG_REG 0x20
#define DEBUG_PIM 0x40
#endif
static void expire_upcalls(void *);
#define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */
#define UPCALL_EXPIRE 6 /* number of timeouts */
#ifdef INET
#ifdef MROUTING
extern struct socket *ip_mrouter;
#endif
#endif
/*
* 'Interfaces' associated with decapsulator (so we can tell
* packets that went through it from ones that get reflected
* by a broken gateway). Different from IPv4 register_if,
* these interfaces are linked into the system ifnet list,
* because per-interface IPv6 statistics are maintained in
* ifp->if_afdata. But it does not have any routes point
* to them. I.e., packets can't be sent this way. They
* only exist as a placeholder for multicast source
* verification.
*/
static struct ifnet *multicast_register_if6;
#define ENCAP_HOPS 64
/*
* Private variables.
*/
static mifi_t nummifs = 0;
static mifi_t reg_mif_num = (mifi_t)-1;
static struct pim6stat pim6stat;
SYSCTL_STRUCT(_net_inet6_pim, PIM6CTL_STATS, stats, CTLFLAG_RD,
&pim6stat, pim6stat,
"PIM Statistics (struct pim6stat, netinet6/pim_var.h)");
#ifdef VIMAGE_GLOBALS
static int pim6;
#endif
/*
* Hash function for a source, group entry
*/
#define MF6CHASH(a, g) MF6CHASHMOD((a).s6_addr32[0] ^ (a).s6_addr32[1] ^ \
(a).s6_addr32[2] ^ (a).s6_addr32[3] ^ \
(g).s6_addr32[0] ^ (g).s6_addr32[1] ^ \
(g).s6_addr32[2] ^ (g).s6_addr32[3])
/*
* Find a route for a given origin IPv6 address and Multicast group address.
*/
#define MF6CFIND(o, g, rt) do { \
struct mf6c *_rt = mf6ctable[MF6CHASH(o,g)]; \
rt = NULL; \
mrt6stat.mrt6s_mfc_lookups++; \
while (_rt) { \
if (IN6_ARE_ADDR_EQUAL(&_rt->mf6c_origin.sin6_addr, &(o)) && \
IN6_ARE_ADDR_EQUAL(&_rt->mf6c_mcastgrp.sin6_addr, &(g)) && \
(_rt->mf6c_stall == NULL)) { \
rt = _rt; \
break; \
} \
_rt = _rt->mf6c_next; \
} \
if (rt == NULL) { \
mrt6stat.mrt6s_mfc_misses++; \
} \
} while (/*CONSTCOND*/ 0)
/*
* Macros to compute elapsed time efficiently
* Borrowed from Van Jacobson's scheduling code
* XXX: replace with timersub() ?
*/
#define TV_DELTA(a, b, delta) do { \
int xxs; \
\
delta = (a).tv_usec - (b).tv_usec; \
if ((xxs = (a).tv_sec - (b).tv_sec)) { \
switch (xxs) { \
case 2: \
delta += 1000000; \
/* FALLTHROUGH */ \
case 1: \
delta += 1000000; \
break; \
default: \
delta += (1000000 * xxs); \
} \
} \
} while (/*CONSTCOND*/ 0)
/* XXX: replace with timercmp(a, b, <) ? */
#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \
(a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec)
#ifdef UPCALL_TIMING
#define UPCALL_MAX 50
static u_long upcall_data[UPCALL_MAX + 1];
static void collate();
#endif /* UPCALL_TIMING */
static int get_sg_cnt(struct sioc_sg_req6 *);
static int get_mif6_cnt(struct sioc_mif_req6 *);
static int ip6_mrouter_init(struct socket *, int, int);
static int add_m6if(struct mif6ctl *);
static int del_m6if(mifi_t *);
static int add_m6fc(struct mf6cctl *);
static int del_m6fc(struct mf6cctl *);
static struct callout expire_upcalls_ch;
int X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m);
int X_ip6_mrouter_done(void);
int X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt);
int X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt);
int X_mrt6_ioctl(int cmd, caddr_t data);
static void
pim6_init(void)
{
INIT_VNET_INET6(curvnet);
V_ip6_mrouter_ver = 0;
#ifdef MRT6DEBUG
V_mrt6debug = 0; /* debug level */
#endif
}
/*
* Handle MRT setsockopt commands to modify the multicast routing tables.
*/
int
X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt)
{
int error = 0;
int optval;
struct mif6ctl mifc;
struct mf6cctl mfcc;
mifi_t mifi;
if (so != ip6_mrouter && sopt->sopt_name != MRT6_INIT)
return (EACCES);
switch (sopt->sopt_name) {
case MRT6_INIT:
#ifdef MRT6_OINIT
case MRT6_OINIT:
#endif
error = sooptcopyin(sopt, &optval, sizeof(optval),
sizeof(optval));
if (error)
break;
error = ip6_mrouter_init(so, optval, sopt->sopt_name);
break;
case MRT6_DONE:
error = X_ip6_mrouter_done();
break;
case MRT6_ADD_MIF:
error = sooptcopyin(sopt, &mifc, sizeof(mifc), sizeof(mifc));
if (error)
break;
error = add_m6if(&mifc);
break;
case MRT6_ADD_MFC:
error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc));
if (error)
break;
error = add_m6fc(&mfcc);
break;
case MRT6_DEL_MFC:
error = sooptcopyin(sopt, &mfcc, sizeof(mfcc), sizeof(mfcc));
if (error)
break;
error = del_m6fc(&mfcc);
break;
case MRT6_DEL_MIF:
error = sooptcopyin(sopt, &mifi, sizeof(mifi), sizeof(mifi));
if (error)
break;
error = del_m6if(&mifi);
break;
case MRT6_PIM:
error = sooptcopyin(sopt, &optval, sizeof(optval),
sizeof(optval));
if (error)
break;
error = set_pim6(&optval);
break;
default:
error = EOPNOTSUPP;
break;
}
return (error);
}
/*
* Handle MRT getsockopt commands
*/
int
X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt)
{
INIT_VNET_INET6(curvnet);
int error = 0;
if (so != ip6_mrouter)
return (EACCES);
switch (sopt->sopt_name) {
case MRT6_PIM:
error = sooptcopyout(sopt, &V_pim6, sizeof(V_pim6));
break;
}
return (error);
}
/*
* Handle ioctl commands to obtain information from the cache
*/
int
X_mrt6_ioctl(int cmd, caddr_t data)
{
switch (cmd) {
case SIOCGETSGCNT_IN6:
return (get_sg_cnt((struct sioc_sg_req6 *)data));
case SIOCGETMIFCNT_IN6:
return (get_mif6_cnt((struct sioc_mif_req6 *)data));
default:
return (EINVAL);
}
}
/*
* returns the packet, byte, rpf-failure count for the source group provided
*/
static int
get_sg_cnt(struct sioc_sg_req6 *req)
{
struct mf6c *rt;
int s;
s = splnet();
MF6CFIND(req->src.sin6_addr, req->grp.sin6_addr, rt);
splx(s);
if (rt != NULL) {
req->pktcnt = rt->mf6c_pkt_cnt;
req->bytecnt = rt->mf6c_byte_cnt;
req->wrong_if = rt->mf6c_wrong_if;
} else
return (ESRCH);
#if 0
req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff;
#endif
return (0);
}
/*
* returns the input and output packet and byte counts on the mif provided
*/
static int
get_mif6_cnt(struct sioc_mif_req6 *req)
{
mifi_t mifi = req->mifi;
if (mifi >= nummifs)
return (EINVAL);
req->icount = mif6table[mifi].m6_pkt_in;
req->ocount = mif6table[mifi].m6_pkt_out;
req->ibytes = mif6table[mifi].m6_bytes_in;
req->obytes = mif6table[mifi].m6_bytes_out;
return (0);
}
static int
set_pim6(int *i)
{
INIT_VNET_INET6(curvnet);
if ((*i != 1) && (*i != 0))
return (EINVAL);
V_pim6 = *i;
return (0);
}
/*
* Enable multicast routing
*/
static int
ip6_mrouter_init(struct socket *so, int v, int cmd)
{
INIT_VNET_INET6(curvnet);
#ifdef MRT6DEBUG
if (V_mrt6debug)
log(LOG_DEBUG,
"ip6_mrouter_init: so_type = %d, pr_protocol = %d\n",
so->so_type, so->so_proto->pr_protocol);
#endif
if (so->so_type != SOCK_RAW ||
so->so_proto->pr_protocol != IPPROTO_ICMPV6)
return (EOPNOTSUPP);
if (v != 1)
return (ENOPROTOOPT);
if (ip6_mrouter != NULL)
return (EADDRINUSE);
ip6_mrouter = so;
V_ip6_mrouter_ver = cmd;
bzero((caddr_t)mf6ctable, sizeof(mf6ctable));
bzero((caddr_t)n6expire, sizeof(n6expire));
V_pim6 = 0;/* used for stubbing out/in pim stuff */
callout_init(&expire_upcalls_ch, 0);
callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
expire_upcalls, NULL);
#ifdef MRT6DEBUG
if (V_mrt6debug)
log(LOG_DEBUG, "ip6_mrouter_init\n");
#endif
return (0);
}
/*
* Disable multicast routing
*/
int
X_ip6_mrouter_done(void)
{
INIT_VNET_INET6(curvnet);
mifi_t mifi;
int i;
struct mf6c *rt;
struct rtdetq *rte;
int s;
s = splnet();
/*
* For each phyint in use, disable promiscuous reception of all IPv6
* multicasts.
*/
#ifdef INET
#ifdef MROUTING
/*
* If there is still IPv4 multicast routing daemon,
* we remain interfaces to receive all muliticasted packets.
* XXX: there may be an interface in which the IPv4 multicast
* daemon is not interested...
*/
if (!V_ip_mrouter)
#endif
#endif
{
for (mifi = 0; mifi < nummifs; mifi++) {
if (mif6table[mifi].m6_ifp &&
!(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
if_allmulti(mif6table[mifi].m6_ifp, 0);
}
}
}
bzero((caddr_t)mif6table, sizeof(mif6table));
nummifs = 0;
V_pim6 = 0; /* used to stub out/in pim specific code */
callout_stop(&expire_upcalls_ch);
/*
* Free all multicast forwarding cache entries.
*/
for (i = 0; i < MF6CTBLSIZ; i++) {
rt = mf6ctable[i];
while (rt) {
struct mf6c *frt;
for (rte = rt->mf6c_stall; rte != NULL; ) {
struct rtdetq *n = rte->next;
m_free(rte->m);
free(rte, M_MRTABLE6);
rte = n;
}
frt = rt;
rt = rt->mf6c_next;
free(frt, M_MRTABLE6);
}
}
bzero((caddr_t)mf6ctable, sizeof(mf6ctable));
/*
* Reset register interface
*/
if (reg_mif_num != (mifi_t)-1 && multicast_register_if6 != NULL) {
if_detach(multicast_register_if6);
if_free(multicast_register_if6);
reg_mif_num = (mifi_t)-1;
multicast_register_if6 = NULL;
}
ip6_mrouter = NULL;
V_ip6_mrouter_ver = 0;
splx(s);
#ifdef MRT6DEBUG
if (V_mrt6debug)
log(LOG_DEBUG, "ip6_mrouter_done\n");
#endif
return (0);
}
static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };
/*
* Add a mif to the mif table
*/
static int
add_m6if(struct mif6ctl *mifcp)
{
INIT_VNET_NET(curvnet);
struct mif6 *mifp;
struct ifnet *ifp;
int error, s;
if (mifcp->mif6c_mifi >= MAXMIFS)
return (EINVAL);
mifp = mif6table + mifcp->mif6c_mifi;
if (mifp->m6_ifp)
return (EADDRINUSE); /* XXX: is it appropriate? */
if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > V_if_index)
return (ENXIO);
ifp = ifnet_byindex(mifcp->mif6c_pifi);
if (mifcp->mif6c_flags & MIFF_REGISTER) {
if (reg_mif_num == (mifi_t)-1) {
ifp = if_alloc(IFT_OTHER);
if_initname(ifp, "register_mif", 0);
ifp->if_flags |= IFF_LOOPBACK;
if_attach(ifp);
multicast_register_if6 = ifp;
reg_mif_num = mifcp->mif6c_mifi;
/*
* it is impossible to guess the ifindex of the
* register interface. So mif6c_pifi is automatically
* calculated.
*/
mifcp->mif6c_pifi = ifp->if_index;
} else {
ifp = multicast_register_if6;
}
} /* if REGISTER */
else {
/* Make sure the interface supports multicast */
if ((ifp->if_flags & IFF_MULTICAST) == 0)
return (EOPNOTSUPP);
s = splnet();
error = if_allmulti(ifp, 1);
splx(s);
if (error)
return (error);
}
s = splnet();
mifp->m6_flags = mifcp->mif6c_flags;
mifp->m6_ifp = ifp;
/* initialize per mif pkt counters */
mifp->m6_pkt_in = 0;
mifp->m6_pkt_out = 0;
mifp->m6_bytes_in = 0;
mifp->m6_bytes_out = 0;
splx(s);
/* Adjust nummifs up if the mifi is higher than nummifs */
if (nummifs <= mifcp->mif6c_mifi)
nummifs = mifcp->mif6c_mifi + 1;
#ifdef MRT6DEBUG
if (V_mrt6debug)
log(LOG_DEBUG,
"add_mif #%d, phyint %s\n",
mifcp->mif6c_mifi,
ifp->if_xname);
#endif
return (0);
}
/*
* Delete a mif from the mif table
*/
static int
del_m6if(mifi_t *mifip)
{
struct mif6 *mifp = mif6table + *mifip;
mifi_t mifi;
struct ifnet *ifp;
int s;
if (*mifip >= nummifs)
return (EINVAL);
if (mifp->m6_ifp == NULL)
return (EINVAL);
s = splnet();
if (!(mifp->m6_flags & MIFF_REGISTER)) {
/*
* XXX: what if there is yet IPv4 multicast daemon
* using the interface?
*/
ifp = mifp->m6_ifp;
if_allmulti(ifp, 0);
} else {
if (reg_mif_num != (mifi_t)-1 &&
multicast_register_if6 != NULL) {
if_detach(multicast_register_if6);
if_free(multicast_register_if6);
reg_mif_num = (mifi_t)-1;
multicast_register_if6 = NULL;
}
}
bzero((caddr_t)mifp, sizeof(*mifp));
/* Adjust nummifs down */
for (mifi = nummifs; mifi > 0; mifi--)
if (mif6table[mifi - 1].m6_ifp)
break;
nummifs = mifi;
splx(s);
#ifdef MRT6DEBUG
if (V_mrt6debug)
log(LOG_DEBUG, "del_m6if %d, nummifs %d\n", *mifip, nummifs);
#endif
return (0);
}
/*
* Add an mfc entry
*/
static int
add_m6fc(struct mf6cctl *mfccp)
{
struct mf6c *rt;
u_long hash;
struct rtdetq *rte;
u_short nstl;
int s;
char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
MF6CFIND(mfccp->mf6cc_origin.sin6_addr,
mfccp->mf6cc_mcastgrp.sin6_addr, rt);
/* If an entry already exists, just update the fields */
if (rt) {
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_MFC) {
log(LOG_DEBUG,
"add_m6fc no upcall h %d o %s g %s p %x\n",
ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
mfccp->mf6cc_parent);
}
#endif
s = splnet();
rt->mf6c_parent = mfccp->mf6cc_parent;
rt->mf6c_ifset = mfccp->mf6cc_ifset;
splx(s);
return (0);
}
/*
* Find the entry for which the upcall was made and update
*/
s = splnet();
hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr,
mfccp->mf6cc_mcastgrp.sin6_addr);
for (rt = mf6ctable[hash], nstl = 0; rt; rt = rt->mf6c_next) {
if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
&mfccp->mf6cc_origin.sin6_addr) &&
IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
&mfccp->mf6cc_mcastgrp.sin6_addr) &&
(rt->mf6c_stall != NULL)) {
if (nstl++)
log(LOG_ERR,
"add_m6fc: %s o %s g %s p %x dbx %p\n",
"multiple kernel entries",
ip6_sprintf(ip6bufo,
&mfccp->mf6cc_origin.sin6_addr),
ip6_sprintf(ip6bufg,
&mfccp->mf6cc_mcastgrp.sin6_addr),
mfccp->mf6cc_parent, rt->mf6c_stall);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_MFC)
log(LOG_DEBUG,
"add_m6fc o %s g %s p %x dbg %x\n",
ip6_sprintf(ip6bufo,
&mfccp->mf6cc_origin.sin6_addr),
ip6_sprintf(ip6bufg,
&mfccp->mf6cc_mcastgrp.sin6_addr),
mfccp->mf6cc_parent, rt->mf6c_stall);
#endif
rt->mf6c_origin = mfccp->mf6cc_origin;
rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp;
rt->mf6c_parent = mfccp->mf6cc_parent;
rt->mf6c_ifset = mfccp->mf6cc_ifset;
/* initialize pkt counters per src-grp */
rt->mf6c_pkt_cnt = 0;
rt->mf6c_byte_cnt = 0;
rt->mf6c_wrong_if = 0;
rt->mf6c_expire = 0; /* Don't clean this guy up */
n6expire[hash]--;
/* free packets Qed at the end of this entry */
for (rte = rt->mf6c_stall; rte != NULL; ) {
struct rtdetq *n = rte->next;
ip6_mdq(rte->m, rte->ifp, rt);
m_freem(rte->m);
#ifdef UPCALL_TIMING
collate(&(rte->t));
#endif /* UPCALL_TIMING */
free(rte, M_MRTABLE6);
rte = n;
}
rt->mf6c_stall = NULL;
}
}
/*
* It is possible that an entry is being inserted without an upcall
*/
if (nstl == 0) {
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_MFC)
log(LOG_DEBUG,
"add_mfc no upcall h %d o %s g %s p %x\n",
hash,
ip6_sprintf(ip6bufo, &mfccp->mf6cc_origin.sin6_addr),
ip6_sprintf(ip6bufg, &mfccp->mf6cc_mcastgrp.sin6_addr),
mfccp->mf6cc_parent);
#endif
for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
if (IN6_ARE_ADDR_EQUAL(&rt->mf6c_origin.sin6_addr,
&mfccp->mf6cc_origin.sin6_addr)&&
IN6_ARE_ADDR_EQUAL(&rt->mf6c_mcastgrp.sin6_addr,
&mfccp->mf6cc_mcastgrp.sin6_addr)) {
rt->mf6c_origin = mfccp->mf6cc_origin;
rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp;
rt->mf6c_parent = mfccp->mf6cc_parent;
rt->mf6c_ifset = mfccp->mf6cc_ifset;
/* initialize pkt counters per src-grp */
rt->mf6c_pkt_cnt = 0;
rt->mf6c_byte_cnt = 0;
rt->mf6c_wrong_if = 0;
if (rt->mf6c_expire)
n6expire[hash]--;
rt->mf6c_expire = 0;
}
}
if (rt == NULL) {
/* no upcall, so make a new entry */
rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6,
M_NOWAIT);
if (rt == NULL) {
splx(s);
return (ENOBUFS);
}
/* insert new entry at head of hash chain */
rt->mf6c_origin = mfccp->mf6cc_origin;
rt->mf6c_mcastgrp = mfccp->mf6cc_mcastgrp;
rt->mf6c_parent = mfccp->mf6cc_parent;
rt->mf6c_ifset = mfccp->mf6cc_ifset;
/* initialize pkt counters per src-grp */
rt->mf6c_pkt_cnt = 0;
rt->mf6c_byte_cnt = 0;
rt->mf6c_wrong_if = 0;
rt->mf6c_expire = 0;
rt->mf6c_stall = NULL;
/* link into table */
rt->mf6c_next = mf6ctable[hash];
mf6ctable[hash] = rt;
}
}
splx(s);
return (0);
}
#ifdef UPCALL_TIMING
/*
* collect delay statistics on the upcalls
*/
static void
collate(struct timeval *t)
{
u_long d;
struct timeval tp;
u_long delta;
GET_TIME(tp);
if (TV_LT(*t, tp))
{
TV_DELTA(tp, *t, delta);
d = delta >> 10;
if (d > UPCALL_MAX)
d = UPCALL_MAX;
++upcall_data[d];
}
}
#endif /* UPCALL_TIMING */
/*
* Delete an mfc entry
*/
static int
del_m6fc(struct mf6cctl *mfccp)
{
struct sockaddr_in6 origin;
struct sockaddr_in6 mcastgrp;
struct mf6c *rt;
struct mf6c **nptr;
u_long hash;
int s;
origin = mfccp->mf6cc_origin;
mcastgrp = mfccp->mf6cc_mcastgrp;
hash = MF6CHASH(origin.sin6_addr, mcastgrp.sin6_addr);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_MFC) {
char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN];
log(LOG_DEBUG,"del_m6fc orig %s mcastgrp %s\n",
ip6_sprintf(ip6bufo, &origin.sin6_addr),
ip6_sprintf(ip6bufg, &mcastgrp.sin6_addr));
}
#endif
s = splnet();
nptr = &mf6ctable[hash];
while ((rt = *nptr) != NULL) {
if (IN6_ARE_ADDR_EQUAL(&origin.sin6_addr,
&rt->mf6c_origin.sin6_addr) &&
IN6_ARE_ADDR_EQUAL(&mcastgrp.sin6_addr,
&rt->mf6c_mcastgrp.sin6_addr) &&
rt->mf6c_stall == NULL)
break;
nptr = &rt->mf6c_next;
}
if (rt == NULL) {
splx(s);
return (EADDRNOTAVAIL);
}
*nptr = rt->mf6c_next;
free(rt, M_MRTABLE6);
splx(s);
return (0);
}
static int
socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in6 *src)
{
if (s) {
if (sbappendaddr(&s->so_rcv,
(struct sockaddr *)src,
mm, (struct mbuf *)0) != 0) {
sorwakeup(s);
return (0);
}
}
m_freem(mm);
return (-1);
}
/*
* IPv6 multicast forwarding function. This function assumes that the packet
* pointed to by "ip6" has arrived on (or is about to be sent to) the interface
* pointed to by "ifp", and the packet is to be relayed to other networks
* that have members of the packet's destination IPv6 multicast group.
*
* The packet is returned unscathed to the caller, unless it is
* erroneous, in which case a non-zero return value tells the caller to
* discard it.
*
* NOTE: this implementation assumes that m->m_pkthdr.rcvif is NULL iff
* this function is called in the originating context (i.e., not when
* forwarding a packet from other node). ip6_output(), which is currently the
* only function that calls this function is called in the originating context,
* explicitly ensures this condition. It is caller's responsibility to ensure
* that if this function is called from somewhere else in the originating
* context in the future.
*/
int
X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m)
{
INIT_VNET_INET6(curvnet);
struct mf6c *rt;
struct mif6 *mifp;
struct mbuf *mm;
int s;
mifi_t mifi;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_FORWARD)
log(LOG_DEBUG, "ip6_mforward: src %s, dst %s, ifindex %d\n",
ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst),
ifp->if_index);
#endif
/*
* Don't forward a packet with Hop limit of zero or one,
* or a packet destined to a local-only group.
*/
if (ip6->ip6_hlim <= 1 || IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst) ||
IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst))
return (0);
ip6->ip6_hlim--;
/*
* Source address check: do not forward packets with unspecified
* source. It was discussed in July 2000, on ipngwg mailing list.
* This is rather more serious than unicast cases, because some
* MLD packets can be sent with the unspecified source address
* (although such packets must normally set 1 to the hop limit field).
*/
if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src)) {
V_ip6stat.ip6s_cantforward++;
if (V_ip6_log_time + V_ip6_log_interval < time_second) {
V_ip6_log_time = time_second;
log(LOG_DEBUG,
"cannot forward "
"from %s to %s nxt %d received on %s\n",
ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst),
ip6->ip6_nxt,
if_name(m->m_pkthdr.rcvif));
}
return (0);
}
/*
* Determine forwarding mifs from the forwarding cache table
*/
s = splnet();
MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt);
/* Entry exists, so forward if necessary */
if (rt) {
splx(s);
return (ip6_mdq(m, ifp, rt));
} else {
/*
* If we don't have a route for packet's origin,
* Make a copy of the packet &
* send message to routing daemon
*/
struct mbuf *mb0;
struct rtdetq *rte;
u_long hash;
/* int i, npkts;*/
#ifdef UPCALL_TIMING
struct timeval tp;
GET_TIME(tp);
#endif /* UPCALL_TIMING */
mrt6stat.mrt6s_no_route++;
#ifdef MRT6DEBUG
if (V_mrt6debug & (DEBUG_FORWARD | DEBUG_MFC))
log(LOG_DEBUG, "ip6_mforward: no rte s %s g %s\n",
ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst));
#endif
/*
* Allocate mbufs early so that we don't do extra work if we
* are just going to fail anyway.
*/
rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE6,
M_NOWAIT);
if (rte == NULL) {
splx(s);
return (ENOBUFS);
}
mb0 = m_copy(m, 0, M_COPYALL);
/*
* Pullup packet header if needed before storing it,
* as other references may modify it in the meantime.
*/
if (mb0 &&
(M_HASCL(mb0) || mb0->m_len < sizeof(struct ip6_hdr)))
mb0 = m_pullup(mb0, sizeof(struct ip6_hdr));
if (mb0 == NULL) {
free(rte, M_MRTABLE6);
splx(s);
return (ENOBUFS);
}
/* is there an upcall waiting for this packet? */
hash = MF6CHASH(ip6->ip6_src, ip6->ip6_dst);
for (rt = mf6ctable[hash]; rt; rt = rt->mf6c_next) {
if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_src,
&rt->mf6c_origin.sin6_addr) &&
IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst,
&rt->mf6c_mcastgrp.sin6_addr) &&
(rt->mf6c_stall != NULL))
break;
}
if (rt == NULL) {
struct mrt6msg *im;
#ifdef MRT6_OINIT
struct omrt6msg *oim;
#endif
/* no upcall, so make a new entry */
rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6,
M_NOWAIT);
if (rt == NULL) {
free(rte, M_MRTABLE6);
m_freem(mb0);
splx(s);
return (ENOBUFS);
}
/*
* Make a copy of the header to send to the user
* level process
*/
mm = m_copy(mb0, 0, sizeof(struct ip6_hdr));
if (mm == NULL) {
free(rte, M_MRTABLE6);
m_freem(mb0);
free(rt, M_MRTABLE6);
splx(s);
return (ENOBUFS);
}
/*
* Send message to routing daemon
*/
sin6.sin6_addr = ip6->ip6_src;
im = NULL;
#ifdef MRT6_OINIT
oim = NULL;
#endif
switch (V_ip6_mrouter_ver) {
#ifdef MRT6_OINIT
case MRT6_OINIT:
oim = mtod(mm, struct omrt6msg *);
oim->im6_msgtype = MRT6MSG_NOCACHE;
oim->im6_mbz = 0;
break;
#endif
case MRT6_INIT:
im = mtod(mm, struct mrt6msg *);
im->im6_msgtype = MRT6MSG_NOCACHE;
im->im6_mbz = 0;
break;
default:
free(rte, M_MRTABLE6);
m_freem(mb0);
free(rt, M_MRTABLE6);
splx(s);
return (EINVAL);
}
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_FORWARD)
log(LOG_DEBUG,
"getting the iif info in the kernel\n");
#endif
for (mifp = mif6table, mifi = 0;
mifi < nummifs && mifp->m6_ifp != ifp;
mifp++, mifi++)
;
switch (V_ip6_mrouter_ver) {
#ifdef MRT6_OINIT
case MRT6_OINIT:
oim->im6_mif = mifi;
break;
#endif
case MRT6_INIT:
im->im6_mif = mifi;
break;
}
if (socket_send(ip6_mrouter, mm, &sin6) < 0) {
log(LOG_WARNING, "ip6_mforward: ip6_mrouter "
"socket queue full\n");
mrt6stat.mrt6s_upq_sockfull++;
free(rte, M_MRTABLE6);
m_freem(mb0);
free(rt, M_MRTABLE6);
splx(s);
return (ENOBUFS);
}
mrt6stat.mrt6s_upcalls++;
/* insert new entry at head of hash chain */
bzero(rt, sizeof(*rt));
rt->mf6c_origin.sin6_family = AF_INET6;
rt->mf6c_origin.sin6_len = sizeof(struct sockaddr_in6);
rt->mf6c_origin.sin6_addr = ip6->ip6_src;
rt->mf6c_mcastgrp.sin6_family = AF_INET6;
rt->mf6c_mcastgrp.sin6_len = sizeof(struct sockaddr_in6);
rt->mf6c_mcastgrp.sin6_addr = ip6->ip6_dst;
rt->mf6c_expire = UPCALL_EXPIRE;
n6expire[hash]++;
rt->mf6c_parent = MF6C_INCOMPLETE_PARENT;
/* link into table */
rt->mf6c_next = mf6ctable[hash];
mf6ctable[hash] = rt;
/* Add this entry to the end of the queue */
rt->mf6c_stall = rte;
} else {
/* determine if q has overflowed */
struct rtdetq **p;
int npkts = 0;
for (p = &rt->mf6c_stall; *p != NULL; p = &(*p)->next)
if (++npkts > MAX_UPQ6) {
mrt6stat.mrt6s_upq_ovflw++;
free(rte, M_MRTABLE6);
m_freem(mb0);
splx(s);
return (0);
}
/* Add this entry to the end of the queue */
*p = rte;
}
rte->next = NULL;
rte->m = mb0;
rte->ifp = ifp;
#ifdef UPCALL_TIMING
rte->t = tp;
#endif /* UPCALL_TIMING */
splx(s);
return (0);
}
}
/*
* Clean up cache entries if upcalls are not serviced
* Call from the Slow Timeout mechanism, every half second.
*/
static void
expire_upcalls(void *unused)
{
struct rtdetq *rte;
struct mf6c *mfc, **nptr;
int i;
int s;
s = splnet();
for (i = 0; i < MF6CTBLSIZ; i++) {
if (n6expire[i] == 0)
continue;
nptr = &mf6ctable[i];
while ((mfc = *nptr) != NULL) {
rte = mfc->mf6c_stall;
/*
* Skip real cache entries
* Make sure it wasn't marked to not expire (shouldn't happen)
* If it expires now
*/
if (rte != NULL &&
mfc->mf6c_expire != 0 &&
--mfc->mf6c_expire == 0) {
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_EXPIRE) {
char ip6bufo[INET6_ADDRSTRLEN];
char ip6bufg[INET6_ADDRSTRLEN];
log(LOG_DEBUG, "expire_upcalls: expiring (%s %s)\n",
ip6_sprintf(ip6bufo, &mfc->mf6c_origin.sin6_addr),
ip6_sprintf(ip6bufg, &mfc->mf6c_mcastgrp.sin6_addr));
}
#endif
/*
* drop all the packets
* free the mbuf with the pkt, if, timing info
*/
do {
struct rtdetq *n = rte->next;
m_freem(rte->m);
free(rte, M_MRTABLE6);
rte = n;
} while (rte != NULL);
mrt6stat.mrt6s_cache_cleanups++;
n6expire[i]--;
*nptr = mfc->mf6c_next;
free(mfc, M_MRTABLE6);
} else {
nptr = &mfc->mf6c_next;
}
}
}
splx(s);
callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT,
expire_upcalls, NULL);
}
/*
* Packet forwarding routine once entry in the cache is made
*/
static int
ip6_mdq(struct mbuf *m, struct ifnet *ifp, struct mf6c *rt)
{
INIT_VNET_INET6(curvnet);
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
mifi_t mifi, iif;
struct mif6 *mifp;
int plen = m->m_pkthdr.len;
struct in6_addr src0, dst0; /* copies for local work */
u_int32_t iszone, idzone, oszone, odzone;
int error = 0;
/*
* Macro to send packet on mif. Since RSVP packets don't get counted on
* input, they shouldn't get counted on output, so statistics keeping is
* separate.
*/
#define MC6_SEND(ip6, mifp, m) do { \
if ((mifp)->m6_flags & MIFF_REGISTER) \
register_send((ip6), (mifp), (m)); \
else \
phyint_send((ip6), (mifp), (m)); \
} while (/*CONSTCOND*/ 0)
/*
* Don't forward if it didn't arrive from the parent mif
* for its origin.
*/
mifi = rt->mf6c_parent;
if ((mifi >= nummifs) || (mif6table[mifi].m6_ifp != ifp)) {
/* came in the wrong interface */
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_FORWARD)
log(LOG_DEBUG,
"wrong if: ifid %d mifi %d mififid %x\n",
ifp->if_index, mifi,
mif6table[mifi].m6_ifp->if_index);
#endif
mrt6stat.mrt6s_wrong_if++;
rt->mf6c_wrong_if++;
/*
* If we are doing PIM processing, and we are forwarding
* packets on this interface, send a message to the
* routing daemon.
*/
/* have to make sure this is a valid mif */
if (mifi < nummifs && mif6table[mifi].m6_ifp)
if (V_pim6 && (m->m_flags & M_LOOP) == 0) {
/*
* Check the M_LOOP flag to avoid an
* unnecessary PIM assert.
* XXX: M_LOOP is an ad-hoc hack...
*/
static struct sockaddr_in6 sin6 =
{ sizeof(sin6), AF_INET6 };
struct mbuf *mm;
struct mrt6msg *im;
#ifdef MRT6_OINIT
struct omrt6msg *oim;
#endif
mm = m_copy(m, 0, sizeof(struct ip6_hdr));
if (mm &&
(M_HASCL(mm) ||
mm->m_len < sizeof(struct ip6_hdr)))
mm = m_pullup(mm, sizeof(struct ip6_hdr));
if (mm == NULL)
return (ENOBUFS);
#ifdef MRT6_OINIT
oim = NULL;
#endif
im = NULL;
switch (V_ip6_mrouter_ver) {
#ifdef MRT6_OINIT
case MRT6_OINIT:
oim = mtod(mm, struct omrt6msg *);
oim->im6_msgtype = MRT6MSG_WRONGMIF;
oim->im6_mbz = 0;
break;
#endif
case MRT6_INIT:
im = mtod(mm, struct mrt6msg *);
im->im6_msgtype = MRT6MSG_WRONGMIF;
im->im6_mbz = 0;
break;
default:
m_freem(mm);
return (EINVAL);
}
for (mifp = mif6table, iif = 0;
iif < nummifs && mifp &&
mifp->m6_ifp != ifp;
mifp++, iif++)
;
switch (V_ip6_mrouter_ver) {
#ifdef MRT6_OINIT
case MRT6_OINIT:
oim->im6_mif = iif;
sin6.sin6_addr = oim->im6_src;
break;
#endif
case MRT6_INIT:
im->im6_mif = iif;
sin6.sin6_addr = im->im6_src;
break;
}
mrt6stat.mrt6s_upcalls++;
if (socket_send(ip6_mrouter, mm, &sin6) < 0) {
#ifdef MRT6DEBUG
if (V_mrt6debug)
log(LOG_WARNING, "mdq, ip6_mrouter socket queue full\n");
#endif
++mrt6stat.mrt6s_upq_sockfull;
return (ENOBUFS);
} /* if socket Q full */
} /* if PIM */
return (0);
} /* if wrong iif */
/* If I sourced this packet, it counts as output, else it was input. */
if (m->m_pkthdr.rcvif == NULL) {
/* XXX: is rcvif really NULL when output?? */
mif6table[mifi].m6_pkt_out++;
mif6table[mifi].m6_bytes_out += plen;
} else {
mif6table[mifi].m6_pkt_in++;
mif6table[mifi].m6_bytes_in += plen;
}
rt->mf6c_pkt_cnt++;
rt->mf6c_byte_cnt += plen;
/*
* For each mif, forward a copy of the packet if there are group
* members downstream on the interface.
*/
src0 = ip6->ip6_src;
dst0 = ip6->ip6_dst;
if ((error = in6_setscope(&src0, ifp, &iszone)) != 0 ||
(error = in6_setscope(&dst0, ifp, &idzone)) != 0) {
V_ip6stat.ip6s_badscope++;
return (error);
}
for (mifp = mif6table, mifi = 0; mifi < nummifs; mifp++, mifi++) {
if (IF_ISSET(mifi, &rt->mf6c_ifset)) {
/*
* check if the outgoing packet is going to break
* a scope boundary.
* XXX For packets through PIM register tunnel
* interface, we believe a routing daemon.
*/
if (!(mif6table[rt->mf6c_parent].m6_flags &
MIFF_REGISTER) &&
!(mif6table[mifi].m6_flags & MIFF_REGISTER)) {
if (in6_setscope(&src0, mif6table[mifi].m6_ifp,
&oszone) ||
in6_setscope(&dst0, mif6table[mifi].m6_ifp,
&odzone) ||
iszone != oszone ||
idzone != odzone) {
V_ip6stat.ip6s_badscope++;
continue;
}
}
mifp->m6_pkt_out++;
mifp->m6_bytes_out += plen;
MC6_SEND(ip6, mifp, m);
}
}
return (0);
}
static void
phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m)
{
INIT_VNET_INET6(curvnet);
struct mbuf *mb_copy;
struct ifnet *ifp = mifp->m6_ifp;
int error = 0;
int s = splnet(); /* needs to protect static "ro" below. */
static struct route_in6 ro;
struct in6_multi *in6m;
struct sockaddr_in6 *dst6;
u_long linkmtu;
/*
* Make a new reference to the packet; make sure that
* the IPv6 header is actually copied, not just referenced,
* so that ip6_output() only scribbles on the copy.
*/
mb_copy = m_copy(m, 0, M_COPYALL);
if (mb_copy &&
(M_HASCL(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr)))
mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr));
if (mb_copy == NULL) {
splx(s);
return;
}
/* set MCAST flag to the outgoing packet */
mb_copy->m_flags |= M_MCAST;
/*
* If we sourced the packet, call ip6_output since we may devide
* the packet into fragments when the packet is too big for the
* outgoing interface.
* Otherwise, we can simply send the packet to the interface
* sending queue.
*/
if (m->m_pkthdr.rcvif == NULL) {
struct ip6_moptions im6o;
im6o.im6o_multicast_ifp = ifp;
/* XXX: ip6_output will override ip6->ip6_hlim */
im6o.im6o_multicast_hlim = ip6->ip6_hlim;
im6o.im6o_multicast_loop = 1;
error = ip6_output(mb_copy, NULL, &ro,
IPV6_FORWARDING, &im6o, NULL, NULL);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_XMIT)
log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
mifp - mif6table, error);
#endif
splx(s);
return;
}
/*
* If we belong to the destination multicast group
* on the outgoing interface, loop back a copy.
*/
dst6 = (struct sockaddr_in6 *)&ro.ro_dst;
IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
if (in6m != NULL) {
dst6->sin6_len = sizeof(struct sockaddr_in6);
dst6->sin6_family = AF_INET6;
dst6->sin6_addr = ip6->ip6_dst;
ip6_mloopback(ifp, m, (struct sockaddr_in6 *)&ro.ro_dst);
}
/*
* Put the packet into the sending queue of the outgoing interface
* if it would fit in the MTU of the interface.
*/
linkmtu = IN6_LINKMTU(ifp);
if (mb_copy->m_pkthdr.len <= linkmtu || linkmtu < IPV6_MMTU) {
dst6->sin6_len = sizeof(struct sockaddr_in6);
dst6->sin6_family = AF_INET6;
dst6->sin6_addr = ip6->ip6_dst;
/*
* We just call if_output instead of nd6_output here, since
* we need no ND for a multicast forwarded packet...right?
*/
- IF_AFDATA_LOCK(ifp);
error = (*ifp->if_output)(ifp, mb_copy,
(struct sockaddr *)&ro.ro_dst, NULL);
- IF_AFDATA_UNLOCK(ifp);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_XMIT)
log(LOG_DEBUG, "phyint_send on mif %d err %d\n",
mifp - mif6table, error);
#endif
} else {
/*
* pMTU discovery is intentionally disabled by default, since
* various router may notify pMTU in multicast, which can be
* a DDoS to a router
*/
if (V_ip6_mcast_pmtu)
icmp6_error(mb_copy, ICMP6_PACKET_TOO_BIG, 0, linkmtu);
else {
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_XMIT) {
char ip6bufs[INET6_ADDRSTRLEN];
char ip6bufd[INET6_ADDRSTRLEN];
log(LOG_DEBUG,
"phyint_send: packet too big on %s o %s "
"g %s size %d(discarded)\n",
if_name(ifp),
ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst),
mb_copy->m_pkthdr.len);
}
#endif /* MRT6DEBUG */
m_freem(mb_copy); /* simply discard the packet */
}
}
splx(s);
}
static int
register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m)
{
struct mbuf *mm;
int i, len = m->m_pkthdr.len;
static struct sockaddr_in6 sin6 = { sizeof(sin6), AF_INET6 };
struct mrt6msg *im6;
#ifdef MRT6DEBUG
if (V_mrt6debug) {
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
log(LOG_DEBUG, "** IPv6 register_send **\n src %s dst %s\n",
ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst));
}
#endif
++pim6stat.pim6s_snd_registers;
/* Make a copy of the packet to send to the user level process */
MGETHDR(mm, M_DONTWAIT, MT_HEADER);
if (mm == NULL)
return (ENOBUFS);
mm->m_pkthdr.rcvif = NULL;
mm->m_data += max_linkhdr;
mm->m_len = sizeof(struct ip6_hdr);
if ((mm->m_next = m_copy(m, 0, M_COPYALL)) == NULL) {
m_freem(mm);
return (ENOBUFS);
}
i = MHLEN - M_LEADINGSPACE(mm);
if (i > len)
i = len;
mm = m_pullup(mm, i);
if (mm == NULL)
return (ENOBUFS);
/* TODO: check it! */
mm->m_pkthdr.len = len + sizeof(struct ip6_hdr);
/*
* Send message to routing daemon
*/
sin6.sin6_addr = ip6->ip6_src;
im6 = mtod(mm, struct mrt6msg *);
im6->im6_msgtype = MRT6MSG_WHOLEPKT;
im6->im6_mbz = 0;
im6->im6_mif = mif - mif6table;
/* iif info is not given for reg. encap.n */
mrt6stat.mrt6s_upcalls++;
if (socket_send(ip6_mrouter, mm, &sin6) < 0) {
#ifdef MRT6DEBUG
if (V_mrt6debug)
log(LOG_WARNING,
"register_send: ip6_mrouter socket queue full\n");
#endif
++mrt6stat.mrt6s_upq_sockfull;
return (ENOBUFS);
}
return (0);
}
/*
* PIM sparse mode hook
* Receives the pim control messages, and passes them up to the listening
* socket, using rip6_input.
* The only message processed is the REGISTER pim message; the pim header
* is stripped off, and the inner packet is passed to register_mforward.
*/
int
pim6_input(struct mbuf **mp, int *offp, int proto)
{
INIT_VNET_INET6(curvnet);
struct pim *pim; /* pointer to a pim struct */
struct ip6_hdr *ip6;
int pimlen;
struct mbuf *m = *mp;
int minlen;
int off = *offp;
++pim6stat.pim6s_rcv_total;
ip6 = mtod(m, struct ip6_hdr *);
pimlen = m->m_pkthdr.len - *offp;
/*
* Validate lengths
*/
if (pimlen < PIM_MINLEN) {
++pim6stat.pim6s_rcv_tooshort;
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_PIM)
log(LOG_DEBUG,"pim6_input: PIM packet too short\n");
#endif
m_freem(m);
return (IPPROTO_DONE);
}
/*
* if the packet is at least as big as a REGISTER, go ahead
* and grab the PIM REGISTER header size, to avoid another
* possible m_pullup() later.
*
* PIM_MINLEN == pimhdr + u_int32 == 8
* PIM6_REG_MINLEN == pimhdr + reghdr + eip6hdr == 4 + 4 + 40
*/
minlen = (pimlen >= PIM6_REG_MINLEN) ? PIM6_REG_MINLEN : PIM_MINLEN;
/*
* Make sure that the IP6 and PIM headers in contiguous memory, and
* possibly the PIM REGISTER header
*/
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, minlen, IPPROTO_DONE);
/* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
/* adjust mbuf to point to the PIM header */
pim = (struct pim *)((caddr_t)ip6 + off);
#else
IP6_EXTHDR_GET(pim, struct pim *, m, off, minlen);
if (pim == NULL) {
pim6stat.pim6s_rcv_tooshort++;
return (IPPROTO_DONE);
}
#endif
#define PIM6_CHECKSUM
#ifdef PIM6_CHECKSUM
{
int cksumlen;
/*
* Validate checksum.
* If PIM REGISTER, exclude the data packet
*/
if (pim->pim_type == PIM_REGISTER)
cksumlen = PIM_MINLEN;
else
cksumlen = pimlen;
if (in6_cksum(m, IPPROTO_PIM, off, cksumlen)) {
++pim6stat.pim6s_rcv_badsum;
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_PIM)
log(LOG_DEBUG,
"pim6_input: invalid checksum\n");
#endif
m_freem(m);
return (IPPROTO_DONE);
}
}
#endif /* PIM_CHECKSUM */
/* PIM version check */
if (pim->pim_ver != PIM_VERSION) {
++pim6stat.pim6s_rcv_badversion;
#ifdef MRT6DEBUG
log(LOG_ERR,
"pim6_input: incorrect version %d, expecting %d\n",
pim->pim_ver, PIM_VERSION);
#endif
m_freem(m);
return (IPPROTO_DONE);
}
if (pim->pim_type == PIM_REGISTER) {
/*
* since this is a REGISTER, we'll make a copy of the register
* headers ip6+pim+u_int32_t+encap_ip6, to be passed up to the
* routing daemon.
*/
static struct sockaddr_in6 dst = { sizeof(dst), AF_INET6 };
struct mbuf *mcp;
struct ip6_hdr *eip6;
u_int32_t *reghdr;
int rc;
#ifdef MRT6DEBUG
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
#endif
++pim6stat.pim6s_rcv_registers;
if ((reg_mif_num >= nummifs) || (reg_mif_num == (mifi_t) -1)) {
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_PIM)
log(LOG_DEBUG,
"pim6_input: register mif not set: %d\n",
reg_mif_num);
#endif
m_freem(m);
return (IPPROTO_DONE);
}
reghdr = (u_int32_t *)(pim + 1);
if ((ntohl(*reghdr) & PIM_NULL_REGISTER))
goto pim6_input_to_daemon;
/*
* Validate length
*/
if (pimlen < PIM6_REG_MINLEN) {
++pim6stat.pim6s_rcv_tooshort;
++pim6stat.pim6s_rcv_badregisters;
#ifdef MRT6DEBUG
log(LOG_ERR,
"pim6_input: register packet size too "
"small %d from %s\n",
pimlen, ip6_sprintf(ip6bufs, &ip6->ip6_src));
#endif
m_freem(m);
return (IPPROTO_DONE);
}
eip6 = (struct ip6_hdr *) (reghdr + 1);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_PIM)
log(LOG_DEBUG,
"pim6_input[register], eip6: %s -> %s, "
"eip6 plen %d\n",
ip6_sprintf(ip6bufs, &eip6->ip6_src),
ip6_sprintf(ip6bufd, &eip6->ip6_dst),
ntohs(eip6->ip6_plen));
#endif
/* verify the version number of the inner packet */
if ((eip6->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION) {
++pim6stat.pim6s_rcv_badregisters;
#ifdef MRT6DEBUG
log(LOG_DEBUG, "pim6_input: invalid IP version (%d) "
"of the inner packet\n",
(eip6->ip6_vfc & IPV6_VERSION));
#endif
m_freem(m);
return (IPPROTO_NONE);
}
/* verify the inner packet is destined to a mcast group */
if (!IN6_IS_ADDR_MULTICAST(&eip6->ip6_dst)) {
++pim6stat.pim6s_rcv_badregisters;
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_PIM)
log(LOG_DEBUG,
"pim6_input: inner packet of register "
"is not multicast %s\n",
ip6_sprintf(ip6bufd, &eip6->ip6_dst));
#endif
m_freem(m);
return (IPPROTO_DONE);
}
/*
* make a copy of the whole header to pass to the daemon later.
*/
mcp = m_copy(m, 0, off + PIM6_REG_MINLEN);
if (mcp == NULL) {
#ifdef MRT6DEBUG
log(LOG_ERR,
"pim6_input: pim register: "
"could not copy register head\n");
#endif
m_freem(m);
return (IPPROTO_DONE);
}
/*
* forward the inner ip6 packet; point m_data at the inner ip6.
*/
m_adj(m, off + PIM_MINLEN);
#ifdef MRT6DEBUG
if (V_mrt6debug & DEBUG_PIM) {
log(LOG_DEBUG,
"pim6_input: forwarding decapsulated register: "
"src %s, dst %s, mif %d\n",
ip6_sprintf(ip6bufs, &eip6->ip6_src),
ip6_sprintf(ip6bufd, &eip6->ip6_dst),
reg_mif_num);
}
#endif
rc = if_simloop(mif6table[reg_mif_num].m6_ifp, m,
dst.sin6_family, 0);
/* prepare the register head to send to the mrouting daemon */
m = mcp;
}
/*
* Pass the PIM message up to the daemon; if it is a register message
* pass the 'head' only up to the daemon. This includes the
* encapsulator ip6 header, pim header, register header and the
* encapsulated ip6 header.
*/
pim6_input_to_daemon:
rip6_input(&m, offp, proto);
return (IPPROTO_DONE);
}
Index: projects/arpv2_merge_1/sys/netinet6/ip6_output.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet6/ip6_output.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet6/ip6_output.c (revision 185839)
@@ -1,3352 +1,3348 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $KAME: ip6_output.c,v 1.279 2002/01/26 06:12:30 jinmei Exp $
*/
/*-
* Copyright (c) 1982, 1986, 1988, 1990, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)ip_output.c 8.3 (Berkeley) 1/21/94
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/errno.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <sys/socketvar.h>
#include <sys/ucred.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/pfil.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet/icmp6.h>
#include <netinet6/ip6_var.h>
#include <netinet/in_pcb.h>
#include <netinet/tcp_var.h>
#include <netinet6/nd6.h>
#include <netinet/vinet.h>
#ifdef IPSEC
#include <netipsec/ipsec.h>
#include <netipsec/ipsec6.h>
#include <netipsec/key.h>
#include <netinet6/ip6_ipsec.h>
#endif /* IPSEC */
#include <netinet6/ip6protosw.h>
#include <netinet6/scope6_var.h>
#include <netinet6/vinet6.h>
static MALLOC_DEFINE(M_IP6MOPTS, "ip6_moptions", "internet multicast options");
struct ip6_exthdrs {
struct mbuf *ip6e_ip6;
struct mbuf *ip6e_hbh;
struct mbuf *ip6e_dest1;
struct mbuf *ip6e_rthdr;
struct mbuf *ip6e_dest2;
};
static int ip6_pcbopt __P((int, u_char *, int, struct ip6_pktopts **,
struct ucred *, int));
static int ip6_pcbopts __P((struct ip6_pktopts **, struct mbuf *,
struct socket *, struct sockopt *));
static int ip6_getpcbopt(struct ip6_pktopts *, int, struct sockopt *);
static int ip6_setpktopt __P((int, u_char *, int, struct ip6_pktopts *,
struct ucred *, int, int, int));
static int ip6_setmoptions(int, struct ip6_moptions **, struct mbuf *);
static int ip6_getmoptions(int, struct ip6_moptions *, struct mbuf **);
static int ip6_copyexthdr(struct mbuf **, caddr_t, int);
static int ip6_insertfraghdr __P((struct mbuf *, struct mbuf *, int,
struct ip6_frag **));
static int ip6_insert_jumboopt(struct ip6_exthdrs *, u_int32_t);
static int ip6_splithdr(struct mbuf *, struct ip6_exthdrs *);
static int ip6_getpmtu __P((struct route_in6 *, struct route_in6 *,
struct ifnet *, struct in6_addr *, u_long *, int *));
static int copypktopts(struct ip6_pktopts *, struct ip6_pktopts *, int);
/*
* Make an extension header from option data. hp is the source, and
* mp is the destination.
*/
#define MAKE_EXTHDR(hp, mp) \
do { \
if (hp) { \
struct ip6_ext *eh = (struct ip6_ext *)(hp); \
error = ip6_copyexthdr((mp), (caddr_t)(hp), \
((eh)->ip6e_len + 1) << 3); \
if (error) \
goto freehdrs; \
} \
} while (/*CONSTCOND*/ 0)
/*
* Form a chain of extension headers.
* m is the extension header mbuf
* mp is the previous mbuf in the chain
* p is the next header
* i is the type of option.
*/
#define MAKE_CHAIN(m, mp, p, i)\
do {\
if (m) {\
if (!hdrsplit) \
panic("assumption failed: hdr not split"); \
*mtod((m), u_char *) = *(p);\
*(p) = (i);\
p = mtod((m), u_char *);\
(m)->m_next = (mp)->m_next;\
(mp)->m_next = (m);\
(mp) = (m);\
}\
} while (/*CONSTCOND*/ 0)
/*
* IP6 output. The packet in mbuf chain m contains a skeletal IP6
* header (with pri, len, nxt, hlim, src, dst).
* This function may modify ver and hlim only.
* The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed.
*
* type of "mtu": rt_rmx.rmx_mtu is u_long, ifnet.ifr_mtu is int, and
* nd_ifinfo.linkmtu is u_int32_t. so we use u_long to hold largest one,
* which is rt_rmx.rmx_mtu.
*
* ifpp - XXX: just for statistics
*/
int
ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
struct route_in6 *ro, int flags, struct ip6_moptions *im6o,
struct ifnet **ifpp, struct inpcb *inp)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
struct ip6_hdr *ip6, *mhip6;
struct ifnet *ifp, *origifp;
struct mbuf *m = m0;
struct mbuf *mprev = NULL;
int hlen, tlen, len, off;
struct route_in6 ip6route;
struct rtentry *rt = NULL;
struct sockaddr_in6 *dst, src_sa, dst_sa;
struct in6_addr odst;
int error = 0;
struct in6_ifaddr *ia = NULL;
u_long mtu;
int alwaysfrag, dontfrag;
u_int32_t optlen = 0, plen = 0, unfragpartlen = 0;
struct ip6_exthdrs exthdrs;
struct in6_addr finaldst, src0, dst0;
u_int32_t zone;
struct route_in6 *ro_pmtu = NULL;
int hdrsplit = 0;
int needipsec = 0;
#ifdef IPSEC
struct ipsec_output_state state;
struct ip6_rthdr *rh = NULL;
int needipsectun = 0;
int segleft_org = 0;
struct secpolicy *sp = NULL;
#endif /* IPSEC */
ip6 = mtod(m, struct ip6_hdr *);
if (ip6 == NULL) {
printf ("ip6 is NULL");
goto bad;
}
finaldst = ip6->ip6_dst;
bzero(&exthdrs, sizeof(exthdrs));
if (opt) {
/* Hop-by-Hop options header */
MAKE_EXTHDR(opt->ip6po_hbh, &exthdrs.ip6e_hbh);
/* Destination options header(1st part) */
if (opt->ip6po_rthdr) {
/*
* Destination options header(1st part)
* This only makes sense with a routing header.
* See Section 9.2 of RFC 3542.
* Disabling this part just for MIP6 convenience is
* a bad idea. We need to think carefully about a
* way to make the advanced API coexist with MIP6
* options, which might automatically be inserted in
* the kernel.
*/
MAKE_EXTHDR(opt->ip6po_dest1, &exthdrs.ip6e_dest1);
}
/* Routing header */
MAKE_EXTHDR(opt->ip6po_rthdr, &exthdrs.ip6e_rthdr);
/* Destination options header(2nd part) */
MAKE_EXTHDR(opt->ip6po_dest2, &exthdrs.ip6e_dest2);
}
/*
* IPSec checking which handles several cases.
* FAST IPSEC: We re-injected the packet.
*/
#ifdef IPSEC
switch(ip6_ipsec_output(&m, inp, &flags, &error, &ifp, &sp))
{
case 1: /* Bad packet */
goto freehdrs;
case -1: /* Do IPSec */
needipsec = 1;
case 0: /* No IPSec */
default:
break;
}
#endif /* IPSEC */
/*
* Calculate the total length of the extension header chain.
* Keep the length of the unfragmentable part for fragmentation.
*/
optlen = 0;
if (exthdrs.ip6e_hbh)
optlen += exthdrs.ip6e_hbh->m_len;
if (exthdrs.ip6e_dest1)
optlen += exthdrs.ip6e_dest1->m_len;
if (exthdrs.ip6e_rthdr)
optlen += exthdrs.ip6e_rthdr->m_len;
unfragpartlen = optlen + sizeof(struct ip6_hdr);
/* NOTE: we don't add AH/ESP length here. do that later. */
if (exthdrs.ip6e_dest2)
optlen += exthdrs.ip6e_dest2->m_len;
/*
* If we need IPsec, or there is at least one extension header,
* separate IP6 header from the payload.
*/
if ((needipsec || optlen) && !hdrsplit) {
if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
m = NULL;
goto freehdrs;
}
m = exthdrs.ip6e_ip6;
hdrsplit++;
}
/* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
/* adjust mbuf packet header length */
m->m_pkthdr.len += optlen;
plen = m->m_pkthdr.len - sizeof(*ip6);
/* If this is a jumbo payload, insert a jumbo payload option. */
if (plen > IPV6_MAXPACKET) {
if (!hdrsplit) {
if ((error = ip6_splithdr(m, &exthdrs)) != 0) {
m = NULL;
goto freehdrs;
}
m = exthdrs.ip6e_ip6;
hdrsplit++;
}
/* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
if ((error = ip6_insert_jumboopt(&exthdrs, plen)) != 0)
goto freehdrs;
ip6->ip6_plen = 0;
} else
ip6->ip6_plen = htons(plen);
/*
* Concatenate headers and fill in next header fields.
* Here we have, on "m"
* IPv6 payload
* and we insert headers accordingly. Finally, we should be getting:
* IPv6 hbh dest1 rthdr ah* [esp* dest2 payload]
*
* during the header composing process, "m" points to IPv6 header.
* "mprev" points to an extension header prior to esp.
*/
u_char *nexthdrp = &ip6->ip6_nxt;
mprev = m;
/*
* we treat dest2 specially. this makes IPsec processing
* much easier. the goal here is to make mprev point the
* mbuf prior to dest2.
*
* result: IPv6 dest2 payload
* m and mprev will point to IPv6 header.
*/
if (exthdrs.ip6e_dest2) {
if (!hdrsplit)
panic("assumption failed: hdr not split");
exthdrs.ip6e_dest2->m_next = m->m_next;
m->m_next = exthdrs.ip6e_dest2;
*mtod(exthdrs.ip6e_dest2, u_char *) = ip6->ip6_nxt;
ip6->ip6_nxt = IPPROTO_DSTOPTS;
}
/*
* result: IPv6 hbh dest1 rthdr dest2 payload
* m will point to IPv6 header. mprev will point to the
* extension header prior to dest2 (rthdr in the above case).
*/
MAKE_CHAIN(exthdrs.ip6e_hbh, mprev, nexthdrp, IPPROTO_HOPOPTS);
MAKE_CHAIN(exthdrs.ip6e_dest1, mprev, nexthdrp,
IPPROTO_DSTOPTS);
MAKE_CHAIN(exthdrs.ip6e_rthdr, mprev, nexthdrp,
IPPROTO_ROUTING);
#ifdef IPSEC
if (!needipsec)
goto skip_ipsec2;
/*
* pointers after IPsec headers are not valid any more.
* other pointers need a great care too.
* (IPsec routines should not mangle mbufs prior to AH/ESP)
*/
exthdrs.ip6e_dest2 = NULL;
if (exthdrs.ip6e_rthdr) {
rh = mtod(exthdrs.ip6e_rthdr, struct ip6_rthdr *);
segleft_org = rh->ip6r_segleft;
rh->ip6r_segleft = 0;
}
bzero(&state, sizeof(state));
state.m = m;
error = ipsec6_output_trans(&state, nexthdrp, mprev, sp, flags,
&needipsectun);
m = state.m;
if (error == EJUSTRETURN) {
/*
* We had a SP with a level of 'use' and no SA. We
* will just continue to process the packet without
* IPsec processing.
*/
;
} else if (error) {
/* mbuf is already reclaimed in ipsec6_output_trans. */
m = NULL;
switch (error) {
case EHOSTUNREACH:
case ENETUNREACH:
case EMSGSIZE:
case ENOBUFS:
case ENOMEM:
break;
default:
printf("[%s:%d] (ipsec): error code %d\n",
__func__, __LINE__, error);
/* FALLTHROUGH */
case ENOENT:
/* don't show these error codes to the user */
error = 0;
break;
}
goto bad;
} else if (!needipsectun) {
/*
* In the FAST IPSec case we have already
* re-injected the packet and it has been freed
* by the ipsec_done() function. So, just clean
* up after ourselves.
*/
m = NULL;
goto done;
}
if (exthdrs.ip6e_rthdr) {
/* ah6_output doesn't modify mbuf chain */
rh->ip6r_segleft = segleft_org;
}
skip_ipsec2:;
#endif /* IPSEC */
/*
* If there is a routing header, replace the destination address field
* with the first hop of the routing header.
*/
if (exthdrs.ip6e_rthdr) {
struct ip6_rthdr *rh =
(struct ip6_rthdr *)(mtod(exthdrs.ip6e_rthdr,
struct ip6_rthdr *));
struct ip6_rthdr0 *rh0;
struct in6_addr *addr;
struct sockaddr_in6 sa;
switch (rh->ip6r_type) {
case IPV6_RTHDR_TYPE_0:
rh0 = (struct ip6_rthdr0 *)rh;
addr = (struct in6_addr *)(rh0 + 1);
/*
* construct a sockaddr_in6 form of
* the first hop.
*
* XXX: we may not have enough
* information about its scope zone;
* there is no standard API to pass
* the information from the
* application.
*/
bzero(&sa, sizeof(sa));
sa.sin6_family = AF_INET6;
sa.sin6_len = sizeof(sa);
sa.sin6_addr = addr[0];
if ((error = sa6_embedscope(&sa,
V_ip6_use_defzone)) != 0) {
goto bad;
}
ip6->ip6_dst = sa.sin6_addr;
bcopy(&addr[1], &addr[0], sizeof(struct in6_addr)
* (rh0->ip6r0_segleft - 1));
addr[rh0->ip6r0_segleft - 1] = finaldst;
/* XXX */
in6_clearscope(addr + rh0->ip6r0_segleft - 1);
break;
default: /* is it possible? */
error = EINVAL;
goto bad;
}
}
/* Source address validation */
if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) &&
(flags & IPV6_UNSPECSRC) == 0) {
error = EOPNOTSUPP;
V_ip6stat.ip6s_badscope++;
goto bad;
}
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_src)) {
error = EOPNOTSUPP;
V_ip6stat.ip6s_badscope++;
goto bad;
}
V_ip6stat.ip6s_localout++;
/*
* Route packet.
*/
if (ro == 0) {
ro = &ip6route;
bzero((caddr_t)ro, sizeof(*ro));
}
ro_pmtu = ro;
if (opt && opt->ip6po_rthdr)
ro = &opt->ip6po_route;
dst = (struct sockaddr_in6 *)&ro->ro_dst;
again:
/*
* if specified, try to fill in the traffic class field.
* do not override if a non-zero value is already set.
* we check the diffserv field and the ecn field separately.
*/
if (opt && opt->ip6po_tclass >= 0) {
int mask = 0;
if ((ip6->ip6_flow & htonl(0xfc << 20)) == 0)
mask |= 0xfc;
if ((ip6->ip6_flow & htonl(0x03 << 20)) == 0)
mask |= 0x03;
if (mask != 0)
ip6->ip6_flow |= htonl((opt->ip6po_tclass & mask) << 20);
}
/* fill in or override the hop limit field, if necessary. */
if (opt && opt->ip6po_hlim != -1)
ip6->ip6_hlim = opt->ip6po_hlim & 0xff;
else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
if (im6o != NULL)
ip6->ip6_hlim = im6o->im6o_multicast_hlim;
else
ip6->ip6_hlim = V_ip6_defmcasthlim;
}
#ifdef IPSEC
/*
* We may re-inject packets into the stack here.
*/
if (needipsec && needipsectun) {
struct ipsec_output_state state;
/*
* All the extension headers will become inaccessible
* (since they can be encrypted).
* Don't panic, we need no more updates to extension headers
* on inner IPv6 packet (since they are now encapsulated).
*
* IPv6 [ESP|AH] IPv6 [extension headers] payload
*/
bzero(&exthdrs, sizeof(exthdrs));
exthdrs.ip6e_ip6 = m;
bzero(&state, sizeof(state));
state.m = m;
state.ro = (struct route *)ro;
state.dst = (struct sockaddr *)dst;
error = ipsec6_output_tunnel(&state, sp, flags);
m = state.m;
ro = (struct route_in6 *)state.ro;
dst = (struct sockaddr_in6 *)state.dst;
if (error == EJUSTRETURN) {
/*
* We had a SP with a level of 'use' and no SA. We
* will just continue to process the packet without
* IPsec processing.
*/
;
} else if (error) {
/* mbuf is already reclaimed in ipsec6_output_tunnel. */
m0 = m = NULL;
m = NULL;
switch (error) {
case EHOSTUNREACH:
case ENETUNREACH:
case EMSGSIZE:
case ENOBUFS:
case ENOMEM:
break;
default:
printf("[%s:%d] (ipsec): error code %d\n",
__func__, __LINE__, error);
/* FALLTHROUGH */
case ENOENT:
/* don't show these error codes to the user */
error = 0;
break;
}
goto bad;
} else {
/*
* In the FAST IPSec case we have already
* re-injected the packet and it has been freed
* by the ipsec_done() function. So, just clean
* up after ourselves.
*/
m = NULL;
goto done;
}
exthdrs.ip6e_ip6 = m;
}
#endif /* IPSEC */
/* adjust pointer */
ip6 = mtod(m, struct ip6_hdr *);
bzero(&dst_sa, sizeof(dst_sa));
dst_sa.sin6_family = AF_INET6;
dst_sa.sin6_len = sizeof(dst_sa);
dst_sa.sin6_addr = ip6->ip6_dst;
if ((error = in6_selectroute(&dst_sa, opt, im6o, ro,
&ifp, &rt, 0)) != 0) {
switch (error) {
case EHOSTUNREACH:
V_ip6stat.ip6s_noroute++;
break;
case EADDRNOTAVAIL:
default:
break; /* XXX statistics? */
}
if (ifp != NULL)
in6_ifstat_inc(ifp, ifs6_out_discard);
goto bad;
}
if (rt == NULL) {
/*
* If in6_selectroute() does not return a route entry,
* dst may not have been updated.
*/
*dst = dst_sa; /* XXX */
}
/*
* then rt (for unicast) and ifp must be non-NULL valid values.
*/
if ((flags & IPV6_FORWARDING) == 0) {
/* XXX: the FORWARDING flag can be set for mrouting. */
in6_ifstat_inc(ifp, ifs6_out_request);
}
if (rt != NULL) {
ia = (struct in6_ifaddr *)(rt->rt_ifa);
rt->rt_use++;
}
/*
* The outgoing interface must be in the zone of source and
* destination addresses. We should use ia_ifp to support the
* case of sending packets to an address of our own.
*/
if (ia != NULL && ia->ia_ifp)
origifp = ia->ia_ifp;
else
origifp = ifp;
src0 = ip6->ip6_src;
if (in6_setscope(&src0, origifp, &zone))
goto badscope;
bzero(&src_sa, sizeof(src_sa));
src_sa.sin6_family = AF_INET6;
src_sa.sin6_len = sizeof(src_sa);
src_sa.sin6_addr = ip6->ip6_src;
if (sa6_recoverscope(&src_sa) || zone != src_sa.sin6_scope_id)
goto badscope;
dst0 = ip6->ip6_dst;
if (in6_setscope(&dst0, origifp, &zone))
goto badscope;
/* re-initialize to be sure */
bzero(&dst_sa, sizeof(dst_sa));
dst_sa.sin6_family = AF_INET6;
dst_sa.sin6_len = sizeof(dst_sa);
dst_sa.sin6_addr = ip6->ip6_dst;
if (sa6_recoverscope(&dst_sa) || zone != dst_sa.sin6_scope_id) {
goto badscope;
}
/* scope check is done. */
goto routefound;
badscope:
V_ip6stat.ip6s_badscope++;
in6_ifstat_inc(origifp, ifs6_out_discard);
if (error == 0)
error = EHOSTUNREACH; /* XXX */
goto bad;
routefound:
if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
if (opt && opt->ip6po_nextroute.ro_rt) {
/*
* The nexthop is explicitly specified by the
* application. We assume the next hop is an IPv6
* address.
*/
dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
}
else if ((rt->rt_flags & RTF_GATEWAY))
dst = (struct sockaddr_in6 *)rt->rt_gateway;
}
if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
m->m_flags &= ~(M_BCAST | M_MCAST); /* just in case */
} else {
struct in6_multi *in6m;
m->m_flags = (m->m_flags & ~M_BCAST) | M_MCAST;
in6_ifstat_inc(ifp, ifs6_out_mcast);
/*
* Confirm that the outgoing interface supports multicast.
*/
if (!(ifp->if_flags & IFF_MULTICAST)) {
V_ip6stat.ip6s_noroute++;
in6_ifstat_inc(ifp, ifs6_out_discard);
error = ENETUNREACH;
goto bad;
}
IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m);
if (in6m != NULL &&
(im6o == NULL || im6o->im6o_multicast_loop)) {
/*
* If we belong to the destination multicast group
* on the outgoing interface, and the caller did not
* forbid loopback, loop back a copy.
*/
ip6_mloopback(ifp, m, dst);
} else {
/*
* If we are acting as a multicast router, perform
* multicast forwarding as if the packet had just
* arrived on the interface to which we are about
* to send. The multicast forwarding function
* recursively calls this function, using the
* IPV6_FORWARDING flag to prevent infinite recursion.
*
* Multicasts that are looped back by ip6_mloopback(),
* above, will be forwarded by the ip6_input() routine,
* if necessary.
*/
if (ip6_mrouter && (flags & IPV6_FORWARDING) == 0) {
/*
* XXX: ip6_mforward expects that rcvif is NULL
* when it is called from the originating path.
* However, it is not always the case, since
* some versions of MGETHDR() does not
* initialize the field.
*/
m->m_pkthdr.rcvif = NULL;
if (ip6_mforward(ip6, ifp, m) != 0) {
m_freem(m);
goto done;
}
}
}
/*
* Multicasts with a hoplimit of zero may be looped back,
* above, but must not be transmitted on a network.
* Also, multicasts addressed to the loopback interface
* are not sent -- the above call to ip6_mloopback() will
* loop back a copy if this host actually belongs to the
* destination group on the loopback interface.
*/
if (ip6->ip6_hlim == 0 || (ifp->if_flags & IFF_LOOPBACK) ||
IN6_IS_ADDR_MC_INTFACELOCAL(&ip6->ip6_dst)) {
m_freem(m);
goto done;
}
}
/*
* Fill the outgoing inteface to tell the upper layer
* to increment per-interface statistics.
*/
if (ifpp)
*ifpp = ifp;
/* Determine path MTU. */
if ((error = ip6_getpmtu(ro_pmtu, ro, ifp, &finaldst, &mtu,
&alwaysfrag)) != 0)
goto bad;
/*
* The caller of this function may specify to use the minimum MTU
* in some cases.
* An advanced API option (IPV6_USE_MIN_MTU) can also override MTU
* setting. The logic is a bit complicated; by default, unicast
* packets will follow path MTU while multicast packets will be sent at
* the minimum MTU. If IP6PO_MINMTU_ALL is specified, all packets
* including unicast ones will be sent at the minimum MTU. Multicast
* packets will always be sent at the minimum MTU unless
* IP6PO_MINMTU_DISABLE is explicitly specified.
* See RFC 3542 for more details.
*/
if (mtu > IPV6_MMTU) {
if ((flags & IPV6_MINMTU))
mtu = IPV6_MMTU;
else if (opt && opt->ip6po_minmtu == IP6PO_MINMTU_ALL)
mtu = IPV6_MMTU;
else if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) &&
(opt == NULL ||
opt->ip6po_minmtu != IP6PO_MINMTU_DISABLE)) {
mtu = IPV6_MMTU;
}
}
/*
* clear embedded scope identifiers if necessary.
* in6_clearscope will touch the addresses only when necessary.
*/
in6_clearscope(&ip6->ip6_src);
in6_clearscope(&ip6->ip6_dst);
/*
* If the outgoing packet contains a hop-by-hop options header,
* it must be examined and processed even by the source node.
* (RFC 2460, section 4.)
*/
if (exthdrs.ip6e_hbh) {
struct ip6_hbh *hbh = mtod(exthdrs.ip6e_hbh, struct ip6_hbh *);
u_int32_t dummy; /* XXX unused */
u_int32_t plen = 0; /* XXX: ip6_process will check the value */
#ifdef DIAGNOSTIC
if ((hbh->ip6h_len + 1) << 3 > exthdrs.ip6e_hbh->m_len)
panic("ip6e_hbh is not continuous");
#endif
/*
* XXX: if we have to send an ICMPv6 error to the sender,
* we need the M_LOOP flag since icmp6_error() expects
* the IPv6 and the hop-by-hop options header are
* continuous unless the flag is set.
*/
m->m_flags |= M_LOOP;
m->m_pkthdr.rcvif = ifp;
if (ip6_process_hopopts(m, (u_int8_t *)(hbh + 1),
((hbh->ip6h_len + 1) << 3) - sizeof(struct ip6_hbh),
&dummy, &plen) < 0) {
/* m was already freed at this point */
error = EINVAL;/* better error? */
goto done;
}
m->m_flags &= ~M_LOOP; /* XXX */
m->m_pkthdr.rcvif = NULL;
}
/* Jump over all PFIL processing if hooks are not active. */
if (!PFIL_HOOKED(&inet6_pfil_hook))
goto passout;
odst = ip6->ip6_dst;
/* Run through list of hooks for output packets. */
error = pfil_run_hooks(&inet6_pfil_hook, &m, ifp, PFIL_OUT, inp);
if (error != 0 || m == NULL)
goto done;
ip6 = mtod(m, struct ip6_hdr *);
/* See if destination IP address was changed by packet filter. */
if (!IN6_ARE_ADDR_EQUAL(&odst, &ip6->ip6_dst)) {
m->m_flags |= M_SKIP_FIREWALL;
/* If destination is now ourself drop to ip6_input(). */
if (in6_localaddr(&ip6->ip6_dst)) {
if (m->m_pkthdr.rcvif == NULL)
m->m_pkthdr.rcvif = V_loif;
if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
m->m_pkthdr.csum_flags |=
CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
m->m_pkthdr.csum_data = 0xffff;
}
m->m_pkthdr.csum_flags |=
CSUM_IP_CHECKED | CSUM_IP_VALID;
error = netisr_queue(NETISR_IPV6, m);
goto done;
} else
goto again; /* Redo the routing table lookup. */
}
/* XXX: IPFIREWALL_FORWARD */
passout:
/*
* Send the packet to the outgoing interface.
* If necessary, do IPv6 fragmentation before sending.
*
* the logic here is rather complex:
* 1: normal case (dontfrag == 0, alwaysfrag == 0)
* 1-a: send as is if tlen <= path mtu
* 1-b: fragment if tlen > path mtu
*
* 2: if user asks us not to fragment (dontfrag == 1)
* 2-a: send as is if tlen <= interface mtu
* 2-b: error if tlen > interface mtu
*
* 3: if we always need to attach fragment header (alwaysfrag == 1)
* always fragment
*
* 4: if dontfrag == 1 && alwaysfrag == 1
* error, as we cannot handle this conflicting request
*/
tlen = m->m_pkthdr.len;
if (opt && (opt->ip6po_flags & IP6PO_DONTFRAG))
dontfrag = 1;
else
dontfrag = 0;
if (dontfrag && alwaysfrag) { /* case 4 */
/* conflicting request - can't transmit */
error = EMSGSIZE;
goto bad;
}
if (dontfrag && tlen > IN6_LINKMTU(ifp)) { /* case 2-b */
/*
* Even if the DONTFRAG option is specified, we cannot send the
* packet when the data length is larger than the MTU of the
* outgoing interface.
* Notify the error by sending IPV6_PATHMTU ancillary data as
* well as returning an error code (the latter is not described
* in the API spec.)
*/
u_int32_t mtu32;
struct ip6ctlparam ip6cp;
mtu32 = (u_int32_t)mtu;
bzero(&ip6cp, sizeof(ip6cp));
ip6cp.ip6c_cmdarg = (void *)&mtu32;
pfctlinput2(PRC_MSGSIZE, (struct sockaddr *)&ro_pmtu->ro_dst,
(void *)&ip6cp);
error = EMSGSIZE;
goto bad;
}
/*
* transmit packet without fragmentation
*/
if (dontfrag || (!alwaysfrag && tlen <= mtu)) { /* case 1-a and 2-a */
struct in6_ifaddr *ia6;
ip6 = mtod(m, struct ip6_hdr *);
ia6 = in6_ifawithifp(ifp, &ip6->ip6_src);
if (ia6) {
/* Record statistics for this interface address. */
ia6->ia_ifa.if_opackets++;
ia6->ia_ifa.if_obytes += m->m_pkthdr.len;
}
- IF_AFDATA_LOCK(ifp);
error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
- IF_AFDATA_UNLOCK(ifp);
goto done;
}
/*
* try to fragment the packet. case 1-b and 3
*/
if (mtu < IPV6_MMTU) {
/* path MTU cannot be less than IPV6_MMTU */
error = EMSGSIZE;
in6_ifstat_inc(ifp, ifs6_out_fragfail);
goto bad;
} else if (ip6->ip6_plen == 0) {
/* jumbo payload cannot be fragmented */
error = EMSGSIZE;
in6_ifstat_inc(ifp, ifs6_out_fragfail);
goto bad;
} else {
struct mbuf **mnext, *m_frgpart;
struct ip6_frag *ip6f;
u_int32_t id = htonl(ip6_randomid());
u_char nextproto;
int qslots = ifp->if_snd.ifq_maxlen - ifp->if_snd.ifq_len;
/*
* Too large for the destination or interface;
* fragment if possible.
* Must be able to put at least 8 bytes per fragment.
*/
hlen = unfragpartlen;
if (mtu > IPV6_MAXPACKET)
mtu = IPV6_MAXPACKET;
len = (mtu - hlen - sizeof(struct ip6_frag)) & ~7;
if (len < 8) {
error = EMSGSIZE;
in6_ifstat_inc(ifp, ifs6_out_fragfail);
goto bad;
}
/*
* Verify that we have any chance at all of being able to queue
* the packet or packet fragments
*/
if (qslots <= 0 || ((u_int)qslots * (mtu - hlen)
< tlen /* - hlen */)) {
error = ENOBUFS;
V_ip6stat.ip6s_odropped++;
goto bad;
}
mnext = &m->m_nextpkt;
/*
* Change the next header field of the last header in the
* unfragmentable part.
*/
if (exthdrs.ip6e_rthdr) {
nextproto = *mtod(exthdrs.ip6e_rthdr, u_char *);
*mtod(exthdrs.ip6e_rthdr, u_char *) = IPPROTO_FRAGMENT;
} else if (exthdrs.ip6e_dest1) {
nextproto = *mtod(exthdrs.ip6e_dest1, u_char *);
*mtod(exthdrs.ip6e_dest1, u_char *) = IPPROTO_FRAGMENT;
} else if (exthdrs.ip6e_hbh) {
nextproto = *mtod(exthdrs.ip6e_hbh, u_char *);
*mtod(exthdrs.ip6e_hbh, u_char *) = IPPROTO_FRAGMENT;
} else {
nextproto = ip6->ip6_nxt;
ip6->ip6_nxt = IPPROTO_FRAGMENT;
}
/*
* Loop through length of segment after first fragment,
* make new header and copy data of each part and link onto
* chain.
*/
m0 = m;
for (off = hlen; off < tlen; off += len) {
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (!m) {
error = ENOBUFS;
V_ip6stat.ip6s_odropped++;
goto sendorfree;
}
m->m_pkthdr.rcvif = NULL;
m->m_flags = m0->m_flags & M_COPYFLAGS;
*mnext = m;
mnext = &m->m_nextpkt;
m->m_data += max_linkhdr;
mhip6 = mtod(m, struct ip6_hdr *);
*mhip6 = *ip6;
m->m_len = sizeof(*mhip6);
error = ip6_insertfraghdr(m0, m, hlen, &ip6f);
if (error) {
V_ip6stat.ip6s_odropped++;
goto sendorfree;
}
ip6f->ip6f_offlg = htons((u_short)((off - hlen) & ~7));
if (off + len >= tlen)
len = tlen - off;
else
ip6f->ip6f_offlg |= IP6F_MORE_FRAG;
mhip6->ip6_plen = htons((u_short)(len + hlen +
sizeof(*ip6f) - sizeof(struct ip6_hdr)));
if ((m_frgpart = m_copy(m0, off, len)) == 0) {
error = ENOBUFS;
V_ip6stat.ip6s_odropped++;
goto sendorfree;
}
m_cat(m, m_frgpart);
m->m_pkthdr.len = len + hlen + sizeof(*ip6f);
m->m_pkthdr.rcvif = NULL;
ip6f->ip6f_reserved = 0;
ip6f->ip6f_ident = id;
ip6f->ip6f_nxt = nextproto;
V_ip6stat.ip6s_ofragments++;
in6_ifstat_inc(ifp, ifs6_out_fragcreat);
}
in6_ifstat_inc(ifp, ifs6_out_fragok);
}
/*
* Remove leading garbages.
*/
sendorfree:
m = m0->m_nextpkt;
m0->m_nextpkt = 0;
m_freem(m0);
for (m0 = m; m; m = m0) {
m0 = m->m_nextpkt;
m->m_nextpkt = 0;
if (error == 0) {
/* Record statistics for this interface address. */
if (ia) {
ia->ia_ifa.if_opackets++;
ia->ia_ifa.if_obytes += m->m_pkthdr.len;
}
- IF_AFDATA_LOCK(ifp);
error = nd6_output(ifp, origifp, m, dst, ro->ro_rt);
- IF_AFDATA_UNLOCK(ifp);
} else
m_freem(m);
}
if (error == 0)
V_ip6stat.ip6s_fragmented++;
done:
if (ro == &ip6route && ro->ro_rt) { /* brace necessary for RTFREE */
RTFREE(ro->ro_rt);
} else if (ro_pmtu == &ip6route && ro_pmtu->ro_rt) {
RTFREE(ro_pmtu->ro_rt);
}
#ifdef IPSEC
if (sp != NULL)
KEY_FREESP(&sp);
#endif
return (error);
freehdrs:
m_freem(exthdrs.ip6e_hbh); /* m_freem will check if mbuf is 0 */
m_freem(exthdrs.ip6e_dest1);
m_freem(exthdrs.ip6e_rthdr);
m_freem(exthdrs.ip6e_dest2);
/* FALLTHROUGH */
bad:
if (m)
m_freem(m);
goto done;
}
static int
ip6_copyexthdr(struct mbuf **mp, caddr_t hdr, int hlen)
{
struct mbuf *m;
if (hlen > MCLBYTES)
return (ENOBUFS); /* XXX */
MGET(m, M_DONTWAIT, MT_DATA);
if (!m)
return (ENOBUFS);
if (hlen > MLEN) {
MCLGET(m, M_DONTWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
return (ENOBUFS);
}
}
m->m_len = hlen;
if (hdr)
bcopy(hdr, mtod(m, caddr_t), hlen);
*mp = m;
return (0);
}
/*
* Insert jumbo payload option.
*/
static int
ip6_insert_jumboopt(struct ip6_exthdrs *exthdrs, u_int32_t plen)
{
struct mbuf *mopt;
u_char *optbuf;
u_int32_t v;
#define JUMBOOPTLEN 8 /* length of jumbo payload option and padding */
/*
* If there is no hop-by-hop options header, allocate new one.
* If there is one but it doesn't have enough space to store the
* jumbo payload option, allocate a cluster to store the whole options.
* Otherwise, use it to store the options.
*/
if (exthdrs->ip6e_hbh == 0) {
MGET(mopt, M_DONTWAIT, MT_DATA);
if (mopt == 0)
return (ENOBUFS);
mopt->m_len = JUMBOOPTLEN;
optbuf = mtod(mopt, u_char *);
optbuf[1] = 0; /* = ((JUMBOOPTLEN) >> 3) - 1 */
exthdrs->ip6e_hbh = mopt;
} else {
struct ip6_hbh *hbh;
mopt = exthdrs->ip6e_hbh;
if (M_TRAILINGSPACE(mopt) < JUMBOOPTLEN) {
/*
* XXX assumption:
* - exthdrs->ip6e_hbh is not referenced from places
* other than exthdrs.
* - exthdrs->ip6e_hbh is not an mbuf chain.
*/
int oldoptlen = mopt->m_len;
struct mbuf *n;
/*
* XXX: give up if the whole (new) hbh header does
* not fit even in an mbuf cluster.
*/
if (oldoptlen + JUMBOOPTLEN > MCLBYTES)
return (ENOBUFS);
/*
* As a consequence, we must always prepare a cluster
* at this point.
*/
MGET(n, M_DONTWAIT, MT_DATA);
if (n) {
MCLGET(n, M_DONTWAIT);
if ((n->m_flags & M_EXT) == 0) {
m_freem(n);
n = NULL;
}
}
if (!n)
return (ENOBUFS);
n->m_len = oldoptlen + JUMBOOPTLEN;
bcopy(mtod(mopt, caddr_t), mtod(n, caddr_t),
oldoptlen);
optbuf = mtod(n, caddr_t) + oldoptlen;
m_freem(mopt);
mopt = exthdrs->ip6e_hbh = n;
} else {
optbuf = mtod(mopt, u_char *) + mopt->m_len;
mopt->m_len += JUMBOOPTLEN;
}
optbuf[0] = IP6OPT_PADN;
optbuf[1] = 1;
/*
* Adjust the header length according to the pad and
* the jumbo payload option.
*/
hbh = mtod(mopt, struct ip6_hbh *);
hbh->ip6h_len += (JUMBOOPTLEN >> 3);
}
/* fill in the option. */
optbuf[2] = IP6OPT_JUMBO;
optbuf[3] = 4;
v = (u_int32_t)htonl(plen + JUMBOOPTLEN);
bcopy(&v, &optbuf[4], sizeof(u_int32_t));
/* finally, adjust the packet header length */
exthdrs->ip6e_ip6->m_pkthdr.len += JUMBOOPTLEN;
return (0);
#undef JUMBOOPTLEN
}
/*
* Insert fragment header and copy unfragmentable header portions.
*/
static int
ip6_insertfraghdr(struct mbuf *m0, struct mbuf *m, int hlen,
struct ip6_frag **frghdrp)
{
struct mbuf *n, *mlast;
if (hlen > sizeof(struct ip6_hdr)) {
n = m_copym(m0, sizeof(struct ip6_hdr),
hlen - sizeof(struct ip6_hdr), M_DONTWAIT);
if (n == 0)
return (ENOBUFS);
m->m_next = n;
} else
n = m;
/* Search for the last mbuf of unfragmentable part. */
for (mlast = n; mlast->m_next; mlast = mlast->m_next)
;
if ((mlast->m_flags & M_EXT) == 0 &&
M_TRAILINGSPACE(mlast) >= sizeof(struct ip6_frag)) {
/* use the trailing space of the last mbuf for the fragment hdr */
*frghdrp = (struct ip6_frag *)(mtod(mlast, caddr_t) +
mlast->m_len);
mlast->m_len += sizeof(struct ip6_frag);
m->m_pkthdr.len += sizeof(struct ip6_frag);
} else {
/* allocate a new mbuf for the fragment header */
struct mbuf *mfrg;
MGET(mfrg, M_DONTWAIT, MT_DATA);
if (mfrg == 0)
return (ENOBUFS);
mfrg->m_len = sizeof(struct ip6_frag);
*frghdrp = mtod(mfrg, struct ip6_frag *);
mlast->m_next = mfrg;
}
return (0);
}
static int
ip6_getpmtu(struct route_in6 *ro_pmtu, struct route_in6 *ro,
struct ifnet *ifp, struct in6_addr *dst, u_long *mtup,
int *alwaysfragp)
{
u_int32_t mtu = 0;
int alwaysfrag = 0;
int error = 0;
if (ro_pmtu != ro) {
/* The first hop and the final destination may differ. */
struct sockaddr_in6 *sa6_dst =
(struct sockaddr_in6 *)&ro_pmtu->ro_dst;
if (ro_pmtu->ro_rt &&
((ro_pmtu->ro_rt->rt_flags & RTF_UP) == 0 ||
!IN6_ARE_ADDR_EQUAL(&sa6_dst->sin6_addr, dst))) {
RTFREE(ro_pmtu->ro_rt);
ro_pmtu->ro_rt = (struct rtentry *)NULL;
}
if (ro_pmtu->ro_rt == NULL) {
bzero(sa6_dst, sizeof(*sa6_dst));
sa6_dst->sin6_family = AF_INET6;
sa6_dst->sin6_len = sizeof(struct sockaddr_in6);
sa6_dst->sin6_addr = *dst;
rtalloc((struct route *)ro_pmtu);
}
}
if (ro_pmtu->ro_rt) {
u_int32_t ifmtu;
struct in_conninfo inc;
bzero(&inc, sizeof(inc));
inc.inc_flags = 1; /* IPv6 */
inc.inc6_faddr = *dst;
if (ifp == NULL)
ifp = ro_pmtu->ro_rt->rt_ifp;
ifmtu = IN6_LINKMTU(ifp);
mtu = tcp_hc_getmtu(&inc);
if (mtu)
mtu = min(mtu, ro_pmtu->ro_rt->rt_rmx.rmx_mtu);
else
mtu = ro_pmtu->ro_rt->rt_rmx.rmx_mtu;
if (mtu == 0)
mtu = ifmtu;
else if (mtu < IPV6_MMTU) {
/*
* RFC2460 section 5, last paragraph:
* if we record ICMPv6 too big message with
* mtu < IPV6_MMTU, transmit packets sized IPV6_MMTU
* or smaller, with framgent header attached.
* (fragment header is needed regardless from the
* packet size, for translators to identify packets)
*/
alwaysfrag = 1;
mtu = IPV6_MMTU;
} else if (mtu > ifmtu) {
/*
* The MTU on the route is larger than the MTU on
* the interface! This shouldn't happen, unless the
* MTU of the interface has been changed after the
* interface was brought up. Change the MTU in the
* route to match the interface MTU (as long as the
* field isn't locked).
*/
mtu = ifmtu;
ro_pmtu->ro_rt->rt_rmx.rmx_mtu = mtu;
}
} else if (ifp) {
mtu = IN6_LINKMTU(ifp);
} else
error = EHOSTUNREACH; /* XXX */
*mtup = mtu;
if (alwaysfragp)
*alwaysfragp = alwaysfrag;
return (error);
}
/*
* IP6 socket option processing.
*/
int
ip6_ctloutput(struct socket *so, struct sockopt *sopt)
{
int optdatalen, uproto;
void *optdata;
struct inpcb *in6p = sotoinpcb(so);
int error, optval;
int level, op, optname;
int optlen;
struct thread *td;
level = sopt->sopt_level;
op = sopt->sopt_dir;
optname = sopt->sopt_name;
optlen = sopt->sopt_valsize;
td = sopt->sopt_td;
error = 0;
optval = 0;
uproto = (int)so->so_proto->pr_protocol;
if (level == IPPROTO_IPV6) {
switch (op) {
case SOPT_SET:
switch (optname) {
case IPV6_2292PKTOPTIONS:
#ifdef IPV6_PKTOPTIONS
case IPV6_PKTOPTIONS:
#endif
{
struct mbuf *m;
error = soopt_getm(sopt, &m); /* XXX */
if (error != 0)
break;
error = soopt_mcopyin(sopt, m); /* XXX */
if (error != 0)
break;
error = ip6_pcbopts(&in6p->in6p_outputopts,
m, so, sopt);
m_freem(m); /* XXX */
break;
}
/*
* Use of some Hop-by-Hop options or some
* Destination options, might require special
* privilege. That is, normal applications
* (without special privilege) might be forbidden
* from setting certain options in outgoing packets,
* and might never see certain options in received
* packets. [RFC 2292 Section 6]
* KAME specific note:
* KAME prevents non-privileged users from sending or
* receiving ANY hbh/dst options in order to avoid
* overhead of parsing options in the kernel.
*/
case IPV6_RECVHOPOPTS:
case IPV6_RECVDSTOPTS:
case IPV6_RECVRTHDRDSTOPTS:
if (td != NULL) {
error = priv_check(td,
PRIV_NETINET_SETHDROPTS);
if (error)
break;
}
/* FALLTHROUGH */
case IPV6_UNICAST_HOPS:
case IPV6_HOPLIMIT:
case IPV6_FAITH:
case IPV6_RECVPKTINFO:
case IPV6_RECVHOPLIMIT:
case IPV6_RECVRTHDR:
case IPV6_RECVPATHMTU:
case IPV6_RECVTCLASS:
case IPV6_V6ONLY:
case IPV6_AUTOFLOWLABEL:
if (optlen != sizeof(int)) {
error = EINVAL;
break;
}
error = sooptcopyin(sopt, &optval,
sizeof optval, sizeof optval);
if (error)
break;
switch (optname) {
case IPV6_UNICAST_HOPS:
if (optval < -1 || optval >= 256)
error = EINVAL;
else {
/* -1 = kernel default */
in6p->in6p_hops = optval;
if ((in6p->in6p_vflag &
INP_IPV4) != 0)
in6p->inp_ip_ttl = optval;
}
break;
#define OPTSET(bit) \
do { \
if (optval) \
in6p->in6p_flags |= (bit); \
else \
in6p->in6p_flags &= ~(bit); \
} while (/*CONSTCOND*/ 0)
#define OPTSET2292(bit) \
do { \
in6p->in6p_flags |= IN6P_RFC2292; \
if (optval) \
in6p->in6p_flags |= (bit); \
else \
in6p->in6p_flags &= ~(bit); \
} while (/*CONSTCOND*/ 0)
#define OPTBIT(bit) (in6p->in6p_flags & (bit) ? 1 : 0)
case IPV6_RECVPKTINFO:
/* cannot mix with RFC2292 */
if (OPTBIT(IN6P_RFC2292)) {
error = EINVAL;
break;
}
OPTSET(IN6P_PKTINFO);
break;
case IPV6_HOPLIMIT:
{
struct ip6_pktopts **optp;
/* cannot mix with RFC2292 */
if (OPTBIT(IN6P_RFC2292)) {
error = EINVAL;
break;
}
optp = &in6p->in6p_outputopts;
error = ip6_pcbopt(IPV6_HOPLIMIT,
(u_char *)&optval, sizeof(optval),
optp, (td != NULL) ? td->td_ucred :
NULL, uproto);
break;
}
case IPV6_RECVHOPLIMIT:
/* cannot mix with RFC2292 */
if (OPTBIT(IN6P_RFC2292)) {
error = EINVAL;
break;
}
OPTSET(IN6P_HOPLIMIT);
break;
case IPV6_RECVHOPOPTS:
/* cannot mix with RFC2292 */
if (OPTBIT(IN6P_RFC2292)) {
error = EINVAL;
break;
}
OPTSET(IN6P_HOPOPTS);
break;
case IPV6_RECVDSTOPTS:
/* cannot mix with RFC2292 */
if (OPTBIT(IN6P_RFC2292)) {
error = EINVAL;
break;
}
OPTSET(IN6P_DSTOPTS);
break;
case IPV6_RECVRTHDRDSTOPTS:
/* cannot mix with RFC2292 */
if (OPTBIT(IN6P_RFC2292)) {
error = EINVAL;
break;
}
OPTSET(IN6P_RTHDRDSTOPTS);
break;
case IPV6_RECVRTHDR:
/* cannot mix with RFC2292 */
if (OPTBIT(IN6P_RFC2292)) {
error = EINVAL;
break;
}
OPTSET(IN6P_RTHDR);
break;
case IPV6_FAITH:
OPTSET(IN6P_FAITH);
break;
case IPV6_RECVPATHMTU:
/*
* We ignore this option for TCP
* sockets.
* (RFC3542 leaves this case
* unspecified.)
*/
if (uproto != IPPROTO_TCP)
OPTSET(IN6P_MTU);
break;
case IPV6_V6ONLY:
/*
* make setsockopt(IPV6_V6ONLY)
* available only prior to bind(2).
* see ipng mailing list, Jun 22 2001.
*/
if (in6p->in6p_lport ||
!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr)) {
error = EINVAL;
break;
}
OPTSET(IN6P_IPV6_V6ONLY);
if (optval)
in6p->in6p_vflag &= ~INP_IPV4;
else
in6p->in6p_vflag |= INP_IPV4;
break;
case IPV6_RECVTCLASS:
/* cannot mix with RFC2292 XXX */
if (OPTBIT(IN6P_RFC2292)) {
error = EINVAL;
break;
}
OPTSET(IN6P_TCLASS);
break;
case IPV6_AUTOFLOWLABEL:
OPTSET(IN6P_AUTOFLOWLABEL);
break;
}
break;
case IPV6_TCLASS:
case IPV6_DONTFRAG:
case IPV6_USE_MIN_MTU:
case IPV6_PREFER_TEMPADDR:
if (optlen != sizeof(optval)) {
error = EINVAL;
break;
}
error = sooptcopyin(sopt, &optval,
sizeof optval, sizeof optval);
if (error)
break;
{
struct ip6_pktopts **optp;
optp = &in6p->in6p_outputopts;
error = ip6_pcbopt(optname,
(u_char *)&optval, sizeof(optval),
optp, (td != NULL) ? td->td_ucred :
NULL, uproto);
break;
}
case IPV6_2292PKTINFO:
case IPV6_2292HOPLIMIT:
case IPV6_2292HOPOPTS:
case IPV6_2292DSTOPTS:
case IPV6_2292RTHDR:
/* RFC 2292 */
if (optlen != sizeof(int)) {
error = EINVAL;
break;
}
error = sooptcopyin(sopt, &optval,
sizeof optval, sizeof optval);
if (error)
break;
switch (optname) {
case IPV6_2292PKTINFO:
OPTSET2292(IN6P_PKTINFO);
break;
case IPV6_2292HOPLIMIT:
OPTSET2292(IN6P_HOPLIMIT);
break;
case IPV6_2292HOPOPTS:
/*
* Check super-user privilege.
* See comments for IPV6_RECVHOPOPTS.
*/
if (td != NULL) {
error = priv_check(td,
PRIV_NETINET_SETHDROPTS);
if (error)
return (error);
}
OPTSET2292(IN6P_HOPOPTS);
break;
case IPV6_2292DSTOPTS:
if (td != NULL) {
error = priv_check(td,
PRIV_NETINET_SETHDROPTS);
if (error)
return (error);
}
OPTSET2292(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS); /* XXX */
break;
case IPV6_2292RTHDR:
OPTSET2292(IN6P_RTHDR);
break;
}
break;
case IPV6_PKTINFO:
case IPV6_HOPOPTS:
case IPV6_RTHDR:
case IPV6_DSTOPTS:
case IPV6_RTHDRDSTOPTS:
case IPV6_NEXTHOP:
{
/* new advanced API (RFC3542) */
u_char *optbuf;
u_char optbuf_storage[MCLBYTES];
int optlen;
struct ip6_pktopts **optp;
/* cannot mix with RFC2292 */
if (OPTBIT(IN6P_RFC2292)) {
error = EINVAL;
break;
}
/*
* We only ensure valsize is not too large
* here. Further validation will be done
* later.
*/
error = sooptcopyin(sopt, optbuf_storage,
sizeof(optbuf_storage), 0);
if (error)
break;
optlen = sopt->sopt_valsize;
optbuf = optbuf_storage;
optp = &in6p->in6p_outputopts;
error = ip6_pcbopt(optname, optbuf, optlen,
optp, (td != NULL) ? td->td_ucred : NULL,
uproto);
break;
}
#undef OPTSET
case IPV6_MULTICAST_IF:
case IPV6_MULTICAST_HOPS:
case IPV6_MULTICAST_LOOP:
case IPV6_JOIN_GROUP:
case IPV6_LEAVE_GROUP:
{
if (sopt->sopt_valsize > MLEN) {
error = EMSGSIZE;
break;
}
/* XXX */
}
/* FALLTHROUGH */
{
struct mbuf *m;
if (sopt->sopt_valsize > MCLBYTES) {
error = EMSGSIZE;
break;
}
/* XXX */
MGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT, MT_DATA);
if (m == 0) {
error = ENOBUFS;
break;
}
if (sopt->sopt_valsize > MLEN) {
MCLGET(m, sopt->sopt_td ? M_WAIT : M_DONTWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
error = ENOBUFS;
break;
}
}
m->m_len = sopt->sopt_valsize;
error = sooptcopyin(sopt, mtod(m, char *),
m->m_len, m->m_len);
if (error) {
(void)m_free(m);
break;
}
error = ip6_setmoptions(sopt->sopt_name,
&in6p->in6p_moptions,
m);
(void)m_free(m);
}
break;
case IPV6_PORTRANGE:
error = sooptcopyin(sopt, &optval,
sizeof optval, sizeof optval);
if (error)
break;
switch (optval) {
case IPV6_PORTRANGE_DEFAULT:
in6p->in6p_flags &= ~(IN6P_LOWPORT);
in6p->in6p_flags &= ~(IN6P_HIGHPORT);
break;
case IPV6_PORTRANGE_HIGH:
in6p->in6p_flags &= ~(IN6P_LOWPORT);
in6p->in6p_flags |= IN6P_HIGHPORT;
break;
case IPV6_PORTRANGE_LOW:
in6p->in6p_flags &= ~(IN6P_HIGHPORT);
in6p->in6p_flags |= IN6P_LOWPORT;
break;
default:
error = EINVAL;
break;
}
break;
#ifdef IPSEC
case IPV6_IPSEC_POLICY:
{
caddr_t req;
struct mbuf *m;
if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
break;
if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
break;
req = mtod(m, caddr_t);
error = ipsec6_set_policy(in6p, optname, req,
m->m_len, (sopt->sopt_td != NULL) ?
sopt->sopt_td->td_ucred : NULL);
m_freem(m);
break;
}
#endif /* IPSEC */
default:
error = ENOPROTOOPT;
break;
}
break;
case SOPT_GET:
switch (optname) {
case IPV6_2292PKTOPTIONS:
#ifdef IPV6_PKTOPTIONS
case IPV6_PKTOPTIONS:
#endif
/*
* RFC3542 (effectively) deprecated the
* semantics of the 2292-style pktoptions.
* Since it was not reliable in nature (i.e.,
* applications had to expect the lack of some
* information after all), it would make sense
* to simplify this part by always returning
* empty data.
*/
sopt->sopt_valsize = 0;
break;
case IPV6_RECVHOPOPTS:
case IPV6_RECVDSTOPTS:
case IPV6_RECVRTHDRDSTOPTS:
case IPV6_UNICAST_HOPS:
case IPV6_RECVPKTINFO:
case IPV6_RECVHOPLIMIT:
case IPV6_RECVRTHDR:
case IPV6_RECVPATHMTU:
case IPV6_FAITH:
case IPV6_V6ONLY:
case IPV6_PORTRANGE:
case IPV6_RECVTCLASS:
case IPV6_AUTOFLOWLABEL:
switch (optname) {
case IPV6_RECVHOPOPTS:
optval = OPTBIT(IN6P_HOPOPTS);
break;
case IPV6_RECVDSTOPTS:
optval = OPTBIT(IN6P_DSTOPTS);
break;
case IPV6_RECVRTHDRDSTOPTS:
optval = OPTBIT(IN6P_RTHDRDSTOPTS);
break;
case IPV6_UNICAST_HOPS:
optval = in6p->in6p_hops;
break;
case IPV6_RECVPKTINFO:
optval = OPTBIT(IN6P_PKTINFO);
break;
case IPV6_RECVHOPLIMIT:
optval = OPTBIT(IN6P_HOPLIMIT);
break;
case IPV6_RECVRTHDR:
optval = OPTBIT(IN6P_RTHDR);
break;
case IPV6_RECVPATHMTU:
optval = OPTBIT(IN6P_MTU);
break;
case IPV6_FAITH:
optval = OPTBIT(IN6P_FAITH);
break;
case IPV6_V6ONLY:
optval = OPTBIT(IN6P_IPV6_V6ONLY);
break;
case IPV6_PORTRANGE:
{
int flags;
flags = in6p->in6p_flags;
if (flags & IN6P_HIGHPORT)
optval = IPV6_PORTRANGE_HIGH;
else if (flags & IN6P_LOWPORT)
optval = IPV6_PORTRANGE_LOW;
else
optval = 0;
break;
}
case IPV6_RECVTCLASS:
optval = OPTBIT(IN6P_TCLASS);
break;
case IPV6_AUTOFLOWLABEL:
optval = OPTBIT(IN6P_AUTOFLOWLABEL);
break;
}
if (error)
break;
error = sooptcopyout(sopt, &optval,
sizeof optval);
break;
case IPV6_PATHMTU:
{
u_long pmtu = 0;
struct ip6_mtuinfo mtuinfo;
struct route_in6 sro;
bzero(&sro, sizeof(sro));
if (!(so->so_state & SS_ISCONNECTED))
return (ENOTCONN);
/*
* XXX: we dot not consider the case of source
* routing, or optional information to specify
* the outgoing interface.
*/
error = ip6_getpmtu(&sro, NULL, NULL,
&in6p->in6p_faddr, &pmtu, NULL);
if (sro.ro_rt)
RTFREE(sro.ro_rt);
if (error)
break;
if (pmtu > IPV6_MAXPACKET)
pmtu = IPV6_MAXPACKET;
bzero(&mtuinfo, sizeof(mtuinfo));
mtuinfo.ip6m_mtu = (u_int32_t)pmtu;
optdata = (void *)&mtuinfo;
optdatalen = sizeof(mtuinfo);
error = sooptcopyout(sopt, optdata,
optdatalen);
break;
}
case IPV6_2292PKTINFO:
case IPV6_2292HOPLIMIT:
case IPV6_2292HOPOPTS:
case IPV6_2292RTHDR:
case IPV6_2292DSTOPTS:
switch (optname) {
case IPV6_2292PKTINFO:
optval = OPTBIT(IN6P_PKTINFO);
break;
case IPV6_2292HOPLIMIT:
optval = OPTBIT(IN6P_HOPLIMIT);
break;
case IPV6_2292HOPOPTS:
optval = OPTBIT(IN6P_HOPOPTS);
break;
case IPV6_2292RTHDR:
optval = OPTBIT(IN6P_RTHDR);
break;
case IPV6_2292DSTOPTS:
optval = OPTBIT(IN6P_DSTOPTS|IN6P_RTHDRDSTOPTS);
break;
}
error = sooptcopyout(sopt, &optval,
sizeof optval);
break;
case IPV6_PKTINFO:
case IPV6_HOPOPTS:
case IPV6_RTHDR:
case IPV6_DSTOPTS:
case IPV6_RTHDRDSTOPTS:
case IPV6_NEXTHOP:
case IPV6_TCLASS:
case IPV6_DONTFRAG:
case IPV6_USE_MIN_MTU:
case IPV6_PREFER_TEMPADDR:
error = ip6_getpcbopt(in6p->in6p_outputopts,
optname, sopt);
break;
case IPV6_MULTICAST_IF:
case IPV6_MULTICAST_HOPS:
case IPV6_MULTICAST_LOOP:
case IPV6_JOIN_GROUP:
case IPV6_LEAVE_GROUP:
{
struct mbuf *m;
error = ip6_getmoptions(sopt->sopt_name,
in6p->in6p_moptions, &m);
if (error == 0)
error = sooptcopyout(sopt,
mtod(m, char *), m->m_len);
m_freem(m);
}
break;
#ifdef IPSEC
case IPV6_IPSEC_POLICY:
{
caddr_t req = NULL;
size_t len = 0;
struct mbuf *m = NULL;
struct mbuf **mp = &m;
size_t ovalsize = sopt->sopt_valsize;
caddr_t oval = (caddr_t)sopt->sopt_val;
error = soopt_getm(sopt, &m); /* XXX */
if (error != 0)
break;
error = soopt_mcopyin(sopt, m); /* XXX */
if (error != 0)
break;
sopt->sopt_valsize = ovalsize;
sopt->sopt_val = oval;
if (m) {
req = mtod(m, caddr_t);
len = m->m_len;
}
error = ipsec6_get_policy(in6p, req, len, mp);
if (error == 0)
error = soopt_mcopyout(sopt, m); /* XXX */
if (error == 0 && m)
m_freem(m);
break;
}
#endif /* IPSEC */
default:
error = ENOPROTOOPT;
break;
}
break;
}
} else { /* level != IPPROTO_IPV6 */
error = EINVAL;
}
return (error);
}
int
ip6_raw_ctloutput(struct socket *so, struct sockopt *sopt)
{
int error = 0, optval, optlen;
const int icmp6off = offsetof(struct icmp6_hdr, icmp6_cksum);
struct in6pcb *in6p = sotoin6pcb(so);
int level, op, optname;
level = sopt->sopt_level;
op = sopt->sopt_dir;
optname = sopt->sopt_name;
optlen = sopt->sopt_valsize;
if (level != IPPROTO_IPV6) {
return (EINVAL);
}
switch (optname) {
case IPV6_CHECKSUM:
/*
* For ICMPv6 sockets, no modification allowed for checksum
* offset, permit "no change" values to help existing apps.
*
* RFC3542 says: "An attempt to set IPV6_CHECKSUM
* for an ICMPv6 socket will fail."
* The current behavior does not meet RFC3542.
*/
switch (op) {
case SOPT_SET:
if (optlen != sizeof(int)) {
error = EINVAL;
break;
}
error = sooptcopyin(sopt, &optval, sizeof(optval),
sizeof(optval));
if (error)
break;
if ((optval % 2) != 0) {
/* the API assumes even offset values */
error = EINVAL;
} else if (so->so_proto->pr_protocol ==
IPPROTO_ICMPV6) {
if (optval != icmp6off)
error = EINVAL;
} else
in6p->in6p_cksum = optval;
break;
case SOPT_GET:
if (so->so_proto->pr_protocol == IPPROTO_ICMPV6)
optval = icmp6off;
else
optval = in6p->in6p_cksum;
error = sooptcopyout(sopt, &optval, sizeof(optval));
break;
default:
error = EINVAL;
break;
}
break;
default:
error = ENOPROTOOPT;
break;
}
return (error);
}
/*
* Set up IP6 options in pcb for insertion in output packets or
* specifying behavior of outgoing packets.
*/
static int
ip6_pcbopts(struct ip6_pktopts **pktopt, struct mbuf *m,
struct socket *so, struct sockopt *sopt)
{
struct ip6_pktopts *opt = *pktopt;
int error = 0;
struct thread *td = sopt->sopt_td;
/* turn off any old options. */
if (opt) {
#ifdef DIAGNOSTIC
if (opt->ip6po_pktinfo || opt->ip6po_nexthop ||
opt->ip6po_hbh || opt->ip6po_dest1 || opt->ip6po_dest2 ||
opt->ip6po_rhinfo.ip6po_rhi_rthdr)
printf("ip6_pcbopts: all specified options are cleared.\n");
#endif
ip6_clearpktopts(opt, -1);
} else
opt = malloc(sizeof(*opt), M_IP6OPT, M_WAITOK);
*pktopt = NULL;
if (!m || m->m_len == 0) {
/*
* Only turning off any previous options, regardless of
* whether the opt is just created or given.
*/
free(opt, M_IP6OPT);
return (0);
}
/* set options specified by user. */
if ((error = ip6_setpktopts(m, opt, NULL, (td != NULL) ?
td->td_ucred : NULL, so->so_proto->pr_protocol)) != 0) {
ip6_clearpktopts(opt, -1); /* XXX: discard all options */
free(opt, M_IP6OPT);
return (error);
}
*pktopt = opt;
return (0);
}
/*
* initialize ip6_pktopts. beware that there are non-zero default values in
* the struct.
*/
void
ip6_initpktopts(struct ip6_pktopts *opt)
{
bzero(opt, sizeof(*opt));
opt->ip6po_hlim = -1; /* -1 means default hop limit */
opt->ip6po_tclass = -1; /* -1 means default traffic class */
opt->ip6po_minmtu = IP6PO_MINMTU_MCASTONLY;
opt->ip6po_prefer_tempaddr = IP6PO_TEMPADDR_SYSTEM;
}
static int
ip6_pcbopt(int optname, u_char *buf, int len, struct ip6_pktopts **pktopt,
struct ucred *cred, int uproto)
{
struct ip6_pktopts *opt;
if (*pktopt == NULL) {
*pktopt = malloc(sizeof(struct ip6_pktopts), M_IP6OPT,
M_WAITOK);
ip6_initpktopts(*pktopt);
}
opt = *pktopt;
return (ip6_setpktopt(optname, buf, len, opt, cred, 1, 0, uproto));
}
static int
ip6_getpcbopt(struct ip6_pktopts *pktopt, int optname, struct sockopt *sopt)
{
void *optdata = NULL;
int optdatalen = 0;
struct ip6_ext *ip6e;
int error = 0;
struct in6_pktinfo null_pktinfo;
int deftclass = 0, on;
int defminmtu = IP6PO_MINMTU_MCASTONLY;
int defpreftemp = IP6PO_TEMPADDR_SYSTEM;
switch (optname) {
case IPV6_PKTINFO:
if (pktopt && pktopt->ip6po_pktinfo)
optdata = (void *)pktopt->ip6po_pktinfo;
else {
/* XXX: we don't have to do this every time... */
bzero(&null_pktinfo, sizeof(null_pktinfo));
optdata = (void *)&null_pktinfo;
}
optdatalen = sizeof(struct in6_pktinfo);
break;
case IPV6_TCLASS:
if (pktopt && pktopt->ip6po_tclass >= 0)
optdata = (void *)&pktopt->ip6po_tclass;
else
optdata = (void *)&deftclass;
optdatalen = sizeof(int);
break;
case IPV6_HOPOPTS:
if (pktopt && pktopt->ip6po_hbh) {
optdata = (void *)pktopt->ip6po_hbh;
ip6e = (struct ip6_ext *)pktopt->ip6po_hbh;
optdatalen = (ip6e->ip6e_len + 1) << 3;
}
break;
case IPV6_RTHDR:
if (pktopt && pktopt->ip6po_rthdr) {
optdata = (void *)pktopt->ip6po_rthdr;
ip6e = (struct ip6_ext *)pktopt->ip6po_rthdr;
optdatalen = (ip6e->ip6e_len + 1) << 3;
}
break;
case IPV6_RTHDRDSTOPTS:
if (pktopt && pktopt->ip6po_dest1) {
optdata = (void *)pktopt->ip6po_dest1;
ip6e = (struct ip6_ext *)pktopt->ip6po_dest1;
optdatalen = (ip6e->ip6e_len + 1) << 3;
}
break;
case IPV6_DSTOPTS:
if (pktopt && pktopt->ip6po_dest2) {
optdata = (void *)pktopt->ip6po_dest2;
ip6e = (struct ip6_ext *)pktopt->ip6po_dest2;
optdatalen = (ip6e->ip6e_len + 1) << 3;
}
break;
case IPV6_NEXTHOP:
if (pktopt && pktopt->ip6po_nexthop) {
optdata = (void *)pktopt->ip6po_nexthop;
optdatalen = pktopt->ip6po_nexthop->sa_len;
}
break;
case IPV6_USE_MIN_MTU:
if (pktopt)
optdata = (void *)&pktopt->ip6po_minmtu;
else
optdata = (void *)&defminmtu;
optdatalen = sizeof(int);
break;
case IPV6_DONTFRAG:
if (pktopt && ((pktopt->ip6po_flags) & IP6PO_DONTFRAG))
on = 1;
else
on = 0;
optdata = (void *)&on;
optdatalen = sizeof(on);
break;
case IPV6_PREFER_TEMPADDR:
if (pktopt)
optdata = (void *)&pktopt->ip6po_prefer_tempaddr;
else
optdata = (void *)&defpreftemp;
optdatalen = sizeof(int);
break;
default: /* should not happen */
#ifdef DIAGNOSTIC
panic("ip6_getpcbopt: unexpected option\n");
#endif
return (ENOPROTOOPT);
}
error = sooptcopyout(sopt, optdata, optdatalen);
return (error);
}
void
ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
{
if (pktopt == NULL)
return;
if (optname == -1 || optname == IPV6_PKTINFO) {
if (pktopt->ip6po_pktinfo)
free(pktopt->ip6po_pktinfo, M_IP6OPT);
pktopt->ip6po_pktinfo = NULL;
}
if (optname == -1 || optname == IPV6_HOPLIMIT)
pktopt->ip6po_hlim = -1;
if (optname == -1 || optname == IPV6_TCLASS)
pktopt->ip6po_tclass = -1;
if (optname == -1 || optname == IPV6_NEXTHOP) {
if (pktopt->ip6po_nextroute.ro_rt) {
RTFREE(pktopt->ip6po_nextroute.ro_rt);
pktopt->ip6po_nextroute.ro_rt = NULL;
}
if (pktopt->ip6po_nexthop)
free(pktopt->ip6po_nexthop, M_IP6OPT);
pktopt->ip6po_nexthop = NULL;
}
if (optname == -1 || optname == IPV6_HOPOPTS) {
if (pktopt->ip6po_hbh)
free(pktopt->ip6po_hbh, M_IP6OPT);
pktopt->ip6po_hbh = NULL;
}
if (optname == -1 || optname == IPV6_RTHDRDSTOPTS) {
if (pktopt->ip6po_dest1)
free(pktopt->ip6po_dest1, M_IP6OPT);
pktopt->ip6po_dest1 = NULL;
}
if (optname == -1 || optname == IPV6_RTHDR) {
if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
if (pktopt->ip6po_route.ro_rt) {
RTFREE(pktopt->ip6po_route.ro_rt);
pktopt->ip6po_route.ro_rt = NULL;
}
}
if (optname == -1 || optname == IPV6_DSTOPTS) {
if (pktopt->ip6po_dest2)
free(pktopt->ip6po_dest2, M_IP6OPT);
pktopt->ip6po_dest2 = NULL;
}
}
#define PKTOPT_EXTHDRCPY(type) \
do {\
if (src->type) {\
int hlen = (((struct ip6_ext *)src->type)->ip6e_len + 1) << 3;\
dst->type = malloc(hlen, M_IP6OPT, canwait);\
if (dst->type == NULL && canwait == M_NOWAIT)\
goto bad;\
bcopy(src->type, dst->type, hlen);\
}\
} while (/*CONSTCOND*/ 0)
static int
copypktopts(struct ip6_pktopts *dst, struct ip6_pktopts *src, int canwait)
{
if (dst == NULL || src == NULL) {
printf("ip6_clearpktopts: invalid argument\n");
return (EINVAL);
}
dst->ip6po_hlim = src->ip6po_hlim;
dst->ip6po_tclass = src->ip6po_tclass;
dst->ip6po_flags = src->ip6po_flags;
if (src->ip6po_pktinfo) {
dst->ip6po_pktinfo = malloc(sizeof(*dst->ip6po_pktinfo),
M_IP6OPT, canwait);
if (dst->ip6po_pktinfo == NULL)
goto bad;
*dst->ip6po_pktinfo = *src->ip6po_pktinfo;
}
if (src->ip6po_nexthop) {
dst->ip6po_nexthop = malloc(src->ip6po_nexthop->sa_len,
M_IP6OPT, canwait);
if (dst->ip6po_nexthop == NULL)
goto bad;
bcopy(src->ip6po_nexthop, dst->ip6po_nexthop,
src->ip6po_nexthop->sa_len);
}
PKTOPT_EXTHDRCPY(ip6po_hbh);
PKTOPT_EXTHDRCPY(ip6po_dest1);
PKTOPT_EXTHDRCPY(ip6po_dest2);
PKTOPT_EXTHDRCPY(ip6po_rthdr); /* not copy the cached route */
return (0);
bad:
ip6_clearpktopts(dst, -1);
return (ENOBUFS);
}
#undef PKTOPT_EXTHDRCPY
struct ip6_pktopts *
ip6_copypktopts(struct ip6_pktopts *src, int canwait)
{
int error;
struct ip6_pktopts *dst;
dst = malloc(sizeof(*dst), M_IP6OPT, canwait);
if (dst == NULL)
return (NULL);
ip6_initpktopts(dst);
if ((error = copypktopts(dst, src, canwait)) != 0) {
free(dst, M_IP6OPT);
return (NULL);
}
return (dst);
}
void
ip6_freepcbopts(struct ip6_pktopts *pktopt)
{
if (pktopt == NULL)
return;
ip6_clearpktopts(pktopt, -1);
free(pktopt, M_IP6OPT);
}
/*
* Set the IP6 multicast options in response to user setsockopt().
*/
static int
ip6_setmoptions(int optname, struct ip6_moptions **im6op, struct mbuf *m)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
int error = 0;
u_int loop, ifindex;
struct ipv6_mreq *mreq;
struct ifnet *ifp;
struct ip6_moptions *im6o = *im6op;
struct route_in6 ro;
struct in6_multi_mship *imm;
if (im6o == NULL) {
/*
* No multicast option buffer attached to the pcb;
* allocate one and initialize to default values.
*/
im6o = (struct ip6_moptions *)
malloc(sizeof(*im6o), M_IP6MOPTS, M_WAITOK);
if (im6o == NULL)
return (ENOBUFS);
*im6op = im6o;
im6o->im6o_multicast_ifp = NULL;
im6o->im6o_multicast_hlim = V_ip6_defmcasthlim;
im6o->im6o_multicast_loop = IPV6_DEFAULT_MULTICAST_LOOP;
LIST_INIT(&im6o->im6o_memberships);
}
switch (optname) {
case IPV6_MULTICAST_IF:
/*
* Select the interface for outgoing multicast packets.
*/
if (m == NULL || m->m_len != sizeof(u_int)) {
error = EINVAL;
break;
}
bcopy(mtod(m, u_int *), &ifindex, sizeof(ifindex));
if (ifindex < 0 || V_if_index < ifindex) {
error = ENXIO; /* XXX EINVAL? */
break;
}
ifp = ifnet_byindex(ifindex);
if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
error = EADDRNOTAVAIL;
break;
}
im6o->im6o_multicast_ifp = ifp;
break;
case IPV6_MULTICAST_HOPS:
{
/*
* Set the IP6 hoplimit for outgoing multicast packets.
*/
int optval;
if (m == NULL || m->m_len != sizeof(int)) {
error = EINVAL;
break;
}
bcopy(mtod(m, u_int *), &optval, sizeof(optval));
if (optval < -1 || optval >= 256)
error = EINVAL;
else if (optval == -1)
im6o->im6o_multicast_hlim = V_ip6_defmcasthlim;
else
im6o->im6o_multicast_hlim = optval;
break;
}
case IPV6_MULTICAST_LOOP:
/*
* Set the loopback flag for outgoing multicast packets.
* Must be zero or one.
*/
if (m == NULL || m->m_len != sizeof(u_int)) {
error = EINVAL;
break;
}
bcopy(mtod(m, u_int *), &loop, sizeof(loop));
if (loop > 1) {
error = EINVAL;
break;
}
im6o->im6o_multicast_loop = loop;
break;
case IPV6_JOIN_GROUP:
/*
* Add a multicast group membership.
* Group must be a valid IP6 multicast address.
*/
if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
error = EINVAL;
break;
}
mreq = mtod(m, struct ipv6_mreq *);
if (IN6_IS_ADDR_UNSPECIFIED(&mreq->ipv6mr_multiaddr)) {
/*
* We use the unspecified address to specify to accept
* all multicast addresses. Only super user is allowed
* to do this.
*/
/* XXX-BZ might need a better PRIV_NETINET_x for this */
error = priv_check(curthread, PRIV_NETINET_MROUTE);
if (error)
break;
} else if (!IN6_IS_ADDR_MULTICAST(&mreq->ipv6mr_multiaddr)) {
error = EINVAL;
break;
}
/*
* If no interface was explicitly specified, choose an
* appropriate one according to the given multicast address.
*/
if (mreq->ipv6mr_interface == 0) {
struct sockaddr_in6 *dst;
/*
* Look up the routing table for the
* address, and choose the outgoing interface.
* XXX: is it a good approach?
*/
ro.ro_rt = NULL;
dst = (struct sockaddr_in6 *)&ro.ro_dst;
bzero(dst, sizeof(*dst));
dst->sin6_family = AF_INET6;
dst->sin6_len = sizeof(*dst);
dst->sin6_addr = mreq->ipv6mr_multiaddr;
rtalloc((struct route *)&ro);
if (ro.ro_rt == NULL) {
error = EADDRNOTAVAIL;
break;
}
ifp = ro.ro_rt->rt_ifp;
RTFREE(ro.ro_rt);
} else {
/*
* If the interface is specified, validate it.
*/
if (mreq->ipv6mr_interface < 0 ||
V_if_index < mreq->ipv6mr_interface) {
error = ENXIO; /* XXX EINVAL? */
break;
}
ifp = ifnet_byindex(mreq->ipv6mr_interface);
if (!ifp) {
error = ENXIO; /* XXX EINVAL? */
break;
}
}
/*
* See if we found an interface, and confirm that it
* supports multicast
*/
if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
error = EADDRNOTAVAIL;
break;
}
if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
error = EADDRNOTAVAIL; /* XXX: should not happen */
break;
}
/*
* See if the membership already exists.
*/
for (imm = im6o->im6o_memberships.lh_first;
imm != NULL; imm = imm->i6mm_chain.le_next)
if (imm->i6mm_maddr->in6m_ifp == ifp &&
IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
&mreq->ipv6mr_multiaddr))
break;
if (imm != NULL) {
error = EADDRINUSE;
break;
}
/*
* Everything looks good; add a new record to the multicast
* address list for the given interface.
*/
imm = in6_joingroup(ifp, &mreq->ipv6mr_multiaddr, &error, 0);
if (imm == NULL)
break;
LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
break;
case IPV6_LEAVE_GROUP:
/*
* Drop a multicast group membership.
* Group must be a valid IP6 multicast address.
*/
if (m == NULL || m->m_len != sizeof(struct ipv6_mreq)) {
error = EINVAL;
break;
}
mreq = mtod(m, struct ipv6_mreq *);
/*
* If an interface address was specified, get a pointer
* to its ifnet structure.
*/
if (mreq->ipv6mr_interface < 0 ||
V_if_index < mreq->ipv6mr_interface) {
error = ENXIO; /* XXX EINVAL? */
break;
}
if (mreq->ipv6mr_interface == 0)
ifp = NULL;
else
ifp = ifnet_byindex(mreq->ipv6mr_interface);
/* Fill in the scope zone ID */
if (ifp) {
if (in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL)) {
/* XXX: should not happen */
error = EADDRNOTAVAIL;
break;
}
} else if (mreq->ipv6mr_interface != 0) {
/*
* This case happens when the (positive) index is in
* the valid range, but the corresponding interface has
* been detached dynamically (XXX).
*/
error = EADDRNOTAVAIL;
break;
} else { /* ipv6mr_interface == 0 */
struct sockaddr_in6 sa6_mc;
/*
* The API spec says as follows:
* If the interface index is specified as 0, the
* system may choose a multicast group membership to
* drop by matching the multicast address only.
* On the other hand, we cannot disambiguate the scope
* zone unless an interface is provided. Thus, we
* check if there's ambiguity with the default scope
* zone as the last resort.
*/
bzero(&sa6_mc, sizeof(sa6_mc));
sa6_mc.sin6_family = AF_INET6;
sa6_mc.sin6_len = sizeof(sa6_mc);
sa6_mc.sin6_addr = mreq->ipv6mr_multiaddr;
error = sa6_embedscope(&sa6_mc, V_ip6_use_defzone);
if (error != 0)
break;
mreq->ipv6mr_multiaddr = sa6_mc.sin6_addr;
}
/*
* Find the membership in the membership list.
*/
for (imm = im6o->im6o_memberships.lh_first;
imm != NULL; imm = imm->i6mm_chain.le_next) {
if ((ifp == NULL || imm->i6mm_maddr->in6m_ifp == ifp) &&
IN6_ARE_ADDR_EQUAL(&imm->i6mm_maddr->in6m_addr,
&mreq->ipv6mr_multiaddr))
break;
}
if (imm == NULL) {
/* Unable to resolve interface */
error = EADDRNOTAVAIL;
break;
}
/*
* Give up the multicast address record to which the
* membership points.
*/
LIST_REMOVE(imm, i6mm_chain);
in6_delmulti(imm->i6mm_maddr);
free(imm, M_IP6MADDR);
break;
default:
error = EOPNOTSUPP;
break;
}
/*
* If all options have default values, no need to keep the mbuf.
*/
if (im6o->im6o_multicast_ifp == NULL &&
im6o->im6o_multicast_hlim == V_ip6_defmcasthlim &&
im6o->im6o_multicast_loop == IPV6_DEFAULT_MULTICAST_LOOP &&
im6o->im6o_memberships.lh_first == NULL) {
free(*im6op, M_IP6MOPTS);
*im6op = NULL;
}
return (error);
}
/*
* Return the IP6 multicast options in response to user getsockopt().
*/
static int
ip6_getmoptions(int optname, struct ip6_moptions *im6o, struct mbuf **mp)
{
INIT_VNET_INET6(curvnet);
u_int *hlim, *loop, *ifindex;
*mp = m_get(M_WAIT, MT_HEADER); /* XXX */
switch (optname) {
case IPV6_MULTICAST_IF:
ifindex = mtod(*mp, u_int *);
(*mp)->m_len = sizeof(u_int);
if (im6o == NULL || im6o->im6o_multicast_ifp == NULL)
*ifindex = 0;
else
*ifindex = im6o->im6o_multicast_ifp->if_index;
return (0);
case IPV6_MULTICAST_HOPS:
hlim = mtod(*mp, u_int *);
(*mp)->m_len = sizeof(u_int);
if (im6o == NULL)
*hlim = V_ip6_defmcasthlim;
else
*hlim = im6o->im6o_multicast_hlim;
return (0);
case IPV6_MULTICAST_LOOP:
loop = mtod(*mp, u_int *);
(*mp)->m_len = sizeof(u_int);
if (im6o == NULL)
*loop = V_ip6_defmcasthlim;
else
*loop = im6o->im6o_multicast_loop;
return (0);
default:
return (EOPNOTSUPP);
}
}
/*
* Discard the IP6 multicast options.
*/
void
ip6_freemoptions(struct ip6_moptions *im6o)
{
struct in6_multi_mship *imm;
if (im6o == NULL)
return;
while ((imm = im6o->im6o_memberships.lh_first) != NULL) {
LIST_REMOVE(imm, i6mm_chain);
if (imm->i6mm_maddr)
in6_delmulti(imm->i6mm_maddr);
free(imm, M_IP6MADDR);
}
free(im6o, M_IP6MOPTS);
}
/*
* Set IPv6 outgoing packet options based on advanced API.
*/
int
ip6_setpktopts(struct mbuf *control, struct ip6_pktopts *opt,
struct ip6_pktopts *stickyopt, struct ucred *cred, int uproto)
{
struct cmsghdr *cm = 0;
if (control == NULL || opt == NULL)
return (EINVAL);
ip6_initpktopts(opt);
if (stickyopt) {
int error;
/*
* If stickyopt is provided, make a local copy of the options
* for this particular packet, then override them by ancillary
* objects.
* XXX: copypktopts() does not copy the cached route to a next
* hop (if any). This is not very good in terms of efficiency,
* but we can allow this since this option should be rarely
* used.
*/
if ((error = copypktopts(opt, stickyopt, M_NOWAIT)) != 0)
return (error);
}
/*
* XXX: Currently, we assume all the optional information is stored
* in a single mbuf.
*/
if (control->m_next)
return (EINVAL);
for (; control->m_len > 0; control->m_data += CMSG_ALIGN(cm->cmsg_len),
control->m_len -= CMSG_ALIGN(cm->cmsg_len)) {
int error;
if (control->m_len < CMSG_LEN(0))
return (EINVAL);
cm = mtod(control, struct cmsghdr *);
if (cm->cmsg_len == 0 || cm->cmsg_len > control->m_len)
return (EINVAL);
if (cm->cmsg_level != IPPROTO_IPV6)
continue;
error = ip6_setpktopt(cm->cmsg_type, CMSG_DATA(cm),
cm->cmsg_len - CMSG_LEN(0), opt, cred, 0, 1, uproto);
if (error)
return (error);
}
return (0);
}
/*
* Set a particular packet option, as a sticky option or an ancillary data
* item. "len" can be 0 only when it's a sticky option.
* We have 4 cases of combination of "sticky" and "cmsg":
* "sticky=0, cmsg=0": impossible
* "sticky=0, cmsg=1": RFC2292 or RFC3542 ancillary data
* "sticky=1, cmsg=0": RFC3542 socket option
* "sticky=1, cmsg=1": RFC2292 socket option
*/
static int
ip6_setpktopt(int optname, u_char *buf, int len, struct ip6_pktopts *opt,
struct ucred *cred, int sticky, int cmsg, int uproto)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
int minmtupolicy, preftemp;
int error;
if (!sticky && !cmsg) {
#ifdef DIAGNOSTIC
printf("ip6_setpktopt: impossible case\n");
#endif
return (EINVAL);
}
/*
* IPV6_2292xxx is for backward compatibility to RFC2292, and should
* not be specified in the context of RFC3542. Conversely,
* RFC3542 types should not be specified in the context of RFC2292.
*/
if (!cmsg) {
switch (optname) {
case IPV6_2292PKTINFO:
case IPV6_2292HOPLIMIT:
case IPV6_2292NEXTHOP:
case IPV6_2292HOPOPTS:
case IPV6_2292DSTOPTS:
case IPV6_2292RTHDR:
case IPV6_2292PKTOPTIONS:
return (ENOPROTOOPT);
}
}
if (sticky && cmsg) {
switch (optname) {
case IPV6_PKTINFO:
case IPV6_HOPLIMIT:
case IPV6_NEXTHOP:
case IPV6_HOPOPTS:
case IPV6_DSTOPTS:
case IPV6_RTHDRDSTOPTS:
case IPV6_RTHDR:
case IPV6_USE_MIN_MTU:
case IPV6_DONTFRAG:
case IPV6_TCLASS:
case IPV6_PREFER_TEMPADDR: /* XXX: not an RFC3542 option */
return (ENOPROTOOPT);
}
}
switch (optname) {
case IPV6_2292PKTINFO:
case IPV6_PKTINFO:
{
struct ifnet *ifp = NULL;
struct in6_pktinfo *pktinfo;
if (len != sizeof(struct in6_pktinfo))
return (EINVAL);
pktinfo = (struct in6_pktinfo *)buf;
/*
* An application can clear any sticky IPV6_PKTINFO option by
* doing a "regular" setsockopt with ipi6_addr being
* in6addr_any and ipi6_ifindex being zero.
* [RFC 3542, Section 6]
*/
if (optname == IPV6_PKTINFO && opt->ip6po_pktinfo &&
pktinfo->ipi6_ifindex == 0 &&
IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
ip6_clearpktopts(opt, optname);
break;
}
if (uproto == IPPROTO_TCP && optname == IPV6_PKTINFO &&
sticky && !IN6_IS_ADDR_UNSPECIFIED(&pktinfo->ipi6_addr)) {
return (EINVAL);
}
/* validate the interface index if specified. */
if (pktinfo->ipi6_ifindex > V_if_index ||
pktinfo->ipi6_ifindex < 0) {
return (ENXIO);
}
if (pktinfo->ipi6_ifindex) {
ifp = ifnet_byindex(pktinfo->ipi6_ifindex);
if (ifp == NULL)
return (ENXIO);
}
/*
* We store the address anyway, and let in6_selectsrc()
* validate the specified address. This is because ipi6_addr
* may not have enough information about its scope zone, and
* we may need additional information (such as outgoing
* interface or the scope zone of a destination address) to
* disambiguate the scope.
* XXX: the delay of the validation may confuse the
* application when it is used as a sticky option.
*/
if (opt->ip6po_pktinfo == NULL) {
opt->ip6po_pktinfo = malloc(sizeof(*pktinfo),
M_IP6OPT, M_NOWAIT);
if (opt->ip6po_pktinfo == NULL)
return (ENOBUFS);
}
bcopy(pktinfo, opt->ip6po_pktinfo, sizeof(*pktinfo));
break;
}
case IPV6_2292HOPLIMIT:
case IPV6_HOPLIMIT:
{
int *hlimp;
/*
* RFC 3542 deprecated the usage of sticky IPV6_HOPLIMIT
* to simplify the ordering among hoplimit options.
*/
if (optname == IPV6_HOPLIMIT && sticky)
return (ENOPROTOOPT);
if (len != sizeof(int))
return (EINVAL);
hlimp = (int *)buf;
if (*hlimp < -1 || *hlimp > 255)
return (EINVAL);
opt->ip6po_hlim = *hlimp;
break;
}
case IPV6_TCLASS:
{
int tclass;
if (len != sizeof(int))
return (EINVAL);
tclass = *(int *)buf;
if (tclass < -1 || tclass > 255)
return (EINVAL);
opt->ip6po_tclass = tclass;
break;
}
case IPV6_2292NEXTHOP:
case IPV6_NEXTHOP:
if (cred != NULL) {
error = priv_check_cred(cred,
PRIV_NETINET_SETHDROPTS, 0);
if (error)
return (error);
}
if (len == 0) { /* just remove the option */
ip6_clearpktopts(opt, IPV6_NEXTHOP);
break;
}
/* check if cmsg_len is large enough for sa_len */
if (len < sizeof(struct sockaddr) || len < *buf)
return (EINVAL);
switch (((struct sockaddr *)buf)->sa_family) {
case AF_INET6:
{
struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *)buf;
int error;
if (sa6->sin6_len != sizeof(struct sockaddr_in6))
return (EINVAL);
if (IN6_IS_ADDR_UNSPECIFIED(&sa6->sin6_addr) ||
IN6_IS_ADDR_MULTICAST(&sa6->sin6_addr)) {
return (EINVAL);
}
if ((error = sa6_embedscope(sa6, V_ip6_use_defzone))
!= 0) {
return (error);
}
break;
}
case AF_LINK: /* should eventually be supported */
default:
return (EAFNOSUPPORT);
}
/* turn off the previous option, then set the new option. */
ip6_clearpktopts(opt, IPV6_NEXTHOP);
opt->ip6po_nexthop = malloc(*buf, M_IP6OPT, M_NOWAIT);
if (opt->ip6po_nexthop == NULL)
return (ENOBUFS);
bcopy(buf, opt->ip6po_nexthop, *buf);
break;
case IPV6_2292HOPOPTS:
case IPV6_HOPOPTS:
{
struct ip6_hbh *hbh;
int hbhlen;
/*
* XXX: We don't allow a non-privileged user to set ANY HbH
* options, since per-option restriction has too much
* overhead.
*/
if (cred != NULL) {
error = priv_check_cred(cred,
PRIV_NETINET_SETHDROPTS, 0);
if (error)
return (error);
}
if (len == 0) {
ip6_clearpktopts(opt, IPV6_HOPOPTS);
break; /* just remove the option */
}
/* message length validation */
if (len < sizeof(struct ip6_hbh))
return (EINVAL);
hbh = (struct ip6_hbh *)buf;
hbhlen = (hbh->ip6h_len + 1) << 3;
if (len != hbhlen)
return (EINVAL);
/* turn off the previous option, then set the new option. */
ip6_clearpktopts(opt, IPV6_HOPOPTS);
opt->ip6po_hbh = malloc(hbhlen, M_IP6OPT, M_NOWAIT);
if (opt->ip6po_hbh == NULL)
return (ENOBUFS);
bcopy(hbh, opt->ip6po_hbh, hbhlen);
break;
}
case IPV6_2292DSTOPTS:
case IPV6_DSTOPTS:
case IPV6_RTHDRDSTOPTS:
{
struct ip6_dest *dest, **newdest = NULL;
int destlen;
if (cred != NULL) { /* XXX: see the comment for IPV6_HOPOPTS */
error = priv_check_cred(cred,
PRIV_NETINET_SETHDROPTS, 0);
if (error)
return (error);
}
if (len == 0) {
ip6_clearpktopts(opt, optname);
break; /* just remove the option */
}
/* message length validation */
if (len < sizeof(struct ip6_dest))
return (EINVAL);
dest = (struct ip6_dest *)buf;
destlen = (dest->ip6d_len + 1) << 3;
if (len != destlen)
return (EINVAL);
/*
* Determine the position that the destination options header
* should be inserted; before or after the routing header.
*/
switch (optname) {
case IPV6_2292DSTOPTS:
/*
* The old advacned API is ambiguous on this point.
* Our approach is to determine the position based
* according to the existence of a routing header.
* Note, however, that this depends on the order of the
* extension headers in the ancillary data; the 1st
* part of the destination options header must appear
* before the routing header in the ancillary data,
* too.
* RFC3542 solved the ambiguity by introducing
* separate ancillary data or option types.
*/
if (opt->ip6po_rthdr == NULL)
newdest = &opt->ip6po_dest1;
else
newdest = &opt->ip6po_dest2;
break;
case IPV6_RTHDRDSTOPTS:
newdest = &opt->ip6po_dest1;
break;
case IPV6_DSTOPTS:
newdest = &opt->ip6po_dest2;
break;
}
/* turn off the previous option, then set the new option. */
ip6_clearpktopts(opt, optname);
*newdest = malloc(destlen, M_IP6OPT, M_NOWAIT);
if (*newdest == NULL)
return (ENOBUFS);
bcopy(dest, *newdest, destlen);
break;
}
case IPV6_2292RTHDR:
case IPV6_RTHDR:
{
struct ip6_rthdr *rth;
int rthlen;
if (len == 0) {
ip6_clearpktopts(opt, IPV6_RTHDR);
break; /* just remove the option */
}
/* message length validation */
if (len < sizeof(struct ip6_rthdr))
return (EINVAL);
rth = (struct ip6_rthdr *)buf;
rthlen = (rth->ip6r_len + 1) << 3;
if (len != rthlen)
return (EINVAL);
switch (rth->ip6r_type) {
case IPV6_RTHDR_TYPE_0:
if (rth->ip6r_len == 0) /* must contain one addr */
return (EINVAL);
if (rth->ip6r_len % 2) /* length must be even */
return (EINVAL);
if (rth->ip6r_len / 2 != rth->ip6r_segleft)
return (EINVAL);
break;
default:
return (EINVAL); /* not supported */
}
/* turn off the previous option */
ip6_clearpktopts(opt, IPV6_RTHDR);
opt->ip6po_rthdr = malloc(rthlen, M_IP6OPT, M_NOWAIT);
if (opt->ip6po_rthdr == NULL)
return (ENOBUFS);
bcopy(rth, opt->ip6po_rthdr, rthlen);
break;
}
case IPV6_USE_MIN_MTU:
if (len != sizeof(int))
return (EINVAL);
minmtupolicy = *(int *)buf;
if (minmtupolicy != IP6PO_MINMTU_MCASTONLY &&
minmtupolicy != IP6PO_MINMTU_DISABLE &&
minmtupolicy != IP6PO_MINMTU_ALL) {
return (EINVAL);
}
opt->ip6po_minmtu = minmtupolicy;
break;
case IPV6_DONTFRAG:
if (len != sizeof(int))
return (EINVAL);
if (uproto == IPPROTO_TCP || *(int *)buf == 0) {
/*
* we ignore this option for TCP sockets.
* (RFC3542 leaves this case unspecified.)
*/
opt->ip6po_flags &= ~IP6PO_DONTFRAG;
} else
opt->ip6po_flags |= IP6PO_DONTFRAG;
break;
case IPV6_PREFER_TEMPADDR:
if (len != sizeof(int))
return (EINVAL);
preftemp = *(int *)buf;
if (preftemp != IP6PO_TEMPADDR_SYSTEM &&
preftemp != IP6PO_TEMPADDR_NOTPREFER &&
preftemp != IP6PO_TEMPADDR_PREFER) {
return (EINVAL);
}
opt->ip6po_prefer_tempaddr = preftemp;
break;
default:
return (ENOPROTOOPT);
} /* end of switch */
return (0);
}
/*
* Routine called from ip6_output() to loop back a copy of an IP6 multicast
* packet to the input queue of a specified interface. Note that this
* calls the output routine of the loopback "driver", but with an interface
* pointer that might NOT be &loif -- easier than replicating that code here.
*/
void
ip6_mloopback(struct ifnet *ifp, struct mbuf *m, struct sockaddr_in6 *dst)
{
struct mbuf *copym;
struct ip6_hdr *ip6;
copym = m_copy(m, 0, M_COPYALL);
if (copym == NULL)
return;
/*
* Make sure to deep-copy IPv6 header portion in case the data
* is in an mbuf cluster, so that we can safely override the IPv6
* header portion later.
*/
if ((copym->m_flags & M_EXT) != 0 ||
copym->m_len < sizeof(struct ip6_hdr)) {
copym = m_pullup(copym, sizeof(struct ip6_hdr));
if (copym == NULL)
return;
}
#ifdef DIAGNOSTIC
if (copym->m_len < sizeof(*ip6)) {
m_freem(copym);
return;
}
#endif
ip6 = mtod(copym, struct ip6_hdr *);
/*
* clear embedded scope identifiers if necessary.
* in6_clearscope will touch the addresses only when necessary.
*/
in6_clearscope(&ip6->ip6_src);
in6_clearscope(&ip6->ip6_dst);
(void)if_simloop(ifp, copym, dst->sin6_family, 0);
}
/*
* Chop IPv6 header off from the payload.
*/
static int
ip6_splithdr(struct mbuf *m, struct ip6_exthdrs *exthdrs)
{
struct mbuf *mh;
struct ip6_hdr *ip6;
ip6 = mtod(m, struct ip6_hdr *);
if (m->m_len > sizeof(*ip6)) {
MGETHDR(mh, M_DONTWAIT, MT_HEADER);
if (mh == 0) {
m_freem(m);
return ENOBUFS;
}
M_MOVE_PKTHDR(mh, m);
MH_ALIGN(mh, sizeof(*ip6));
m->m_len -= sizeof(*ip6);
m->m_data += sizeof(*ip6);
mh->m_next = m;
m = mh;
m->m_len = sizeof(*ip6);
bcopy((caddr_t)ip6, mtod(m, caddr_t), sizeof(*ip6));
}
exthdrs->ip6e_ip6 = m;
return 0;
}
/*
* Compute IPv6 extension header length.
*/
int
ip6_optlen(struct in6pcb *in6p)
{
int len;
if (!in6p->in6p_outputopts)
return 0;
len = 0;
#define elen(x) \
(((struct ip6_ext *)(x)) ? (((struct ip6_ext *)(x))->ip6e_len + 1) << 3 : 0)
len += elen(in6p->in6p_outputopts->ip6po_hbh);
if (in6p->in6p_outputopts->ip6po_rthdr)
/* dest1 is valid with rthdr only */
len += elen(in6p->in6p_outputopts->ip6po_dest1);
len += elen(in6p->in6p_outputopts->ip6po_rthdr);
len += elen(in6p->in6p_outputopts->ip6po_dest2);
return len;
#undef elen
}
Index: projects/arpv2_merge_1/sys/netinet6/nd6.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet6/nd6.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet6/nd6.c (revision 185839)
@@ -1,2015 +1,2102 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_mac.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/protosw.h>
#include <sys/errno.h>
#include <sys/syslog.h>
+#include <sys/lock.h>
+#include <sys/rwlock.h>
#include <sys/queue.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_arc.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/iso88025.h>
#include <net/fddi.h>
#include <net/route.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <net/if_llatbl.h>
#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le))
#include <netinet/if_ether.h>
#include <netinet6/in6_var.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#include <netinet/icmp6.h>
#include <netinet6/vinet6.h>
#include <sys/limits.h>
#include <sys/vimage.h>
#include <security/mac/mac_framework.h>
#define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */
#define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */
#define SIN6(s) ((struct sockaddr_in6 *)s)
#define SDL(s) ((struct sockaddr_dl *)s)
#ifdef VIMAGE_GLOBALS
int nd6_prune;
int nd6_delay;
int nd6_umaxtries;
int nd6_mmaxtries;
int nd6_useloopback;
int nd6_gctimer;
/* preventing too many loops in ND option parsing */
int nd6_maxndopt;
int nd6_maxnudhint;
int nd6_maxqueuelen;
int nd6_debug;
+/* for debugging? */
+#if 0
+static int nd6_inuse, nd6_allocated;
+#endif
+
struct nd_drhead nd_defrouter;
struct nd_prhead nd_prefix;
int nd6_recalc_reachtm_interval;
#endif /* VIMAGE_GLOBALS */
static struct sockaddr_in6 all1_sa;
static int nd6_is_new_addr_neighbor __P((struct sockaddr_in6 *,
struct ifnet *));
static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *);
static void nd6_slowtimo(void *);
static int regen_tmpaddr(struct in6_ifaddr *);
static struct llentry *nd6_free(struct llentry *, int);
static void nd6_llinfo_timer(void *);
static void clear_llinfo_pqueue(struct llentry *);
#ifdef VIMAGE_GLOBALS
struct callout nd6_slowtimo_ch;
struct callout nd6_timer_ch;
extern struct callout in6_tmpaddrtimer_ch;
extern int dad_ignore_ns;
extern int dad_maxtry;
#endif
void
nd6_init(void)
{
INIT_VNET_INET6(curvnet);
static int nd6_init_done = 0;
int i;
if (nd6_init_done) {
log(LOG_NOTICE, "nd6_init called more than once(ignored)\n");
return;
}
V_nd6_prune = 1; /* walk list every 1 seconds */
V_nd6_delay = 5; /* delay first probe time 5 second */
V_nd6_umaxtries = 3; /* maximum unicast query */
V_nd6_mmaxtries = 3; /* maximum multicast query */
V_nd6_useloopback = 1; /* use loopback interface for local traffic */
V_nd6_gctimer = (60 * 60 * 24); /* 1 day: garbage collection timer */
/* preventing too many loops in ND option parsing */
V_nd6_maxndopt = 10; /* max # of ND options allowed */
V_nd6_maxnudhint = 0; /* max # of subsequent upper layer hints */
V_nd6_maxqueuelen = 1; /* max pkts cached in unresolved ND entries */
#ifdef ND6_DEBUG
V_nd6_debug = 1;
#else
V_nd6_debug = 0;
#endif
V_nd6_recalc_reachtm_interval = ND6_RECALC_REACHTM_INTERVAL;
V_dad_ignore_ns = 0; /* ignore NS in DAD - specwise incorrect*/
V_dad_maxtry = 15; /* max # of *tries* to transmit DAD packet */
+ /*
+ * XXX just to get this to compile KMM
+ */
+#ifdef notyet
+ V_llinfo_nd6.ln_next = &V_llinfo_nd6;
+ V_llinfo_nd6.ln_prev = &V_llinfo_nd6;
+#endif
LIST_INIT(&V_nd_prefix);
ip6_use_tempaddr = 0;
ip6_temp_preferred_lifetime = DEF_TEMP_PREFERRED_LIFETIME;
ip6_temp_valid_lifetime = DEF_TEMP_VALID_LIFETIME;
ip6_temp_regen_advance = TEMPADDR_REGEN_ADVANCE;
all1_sa.sin6_family = AF_INET6;
all1_sa.sin6_len = sizeof(struct sockaddr_in6);
for (i = 0; i < sizeof(all1_sa.sin6_addr); i++)
all1_sa.sin6_addr.s6_addr[i] = 0xff;
/* initialization of the default router list */
TAILQ_INIT(&V_nd_defrouter);
/* start timer */
callout_init(&V_nd6_slowtimo_ch, 0);
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
nd6_slowtimo, NULL);
nd6_init_done = 1;
}
struct nd_ifinfo *
nd6_ifattach(struct ifnet *ifp)
{
struct nd_ifinfo *nd;
nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK);
bzero(nd, sizeof(*nd));
nd->initialized = 1;
nd->chlim = IPV6_DEFHLIM;
nd->basereachable = REACHABLE_TIME;
nd->reachable = ND_COMPUTE_RTIME(nd->basereachable);
nd->retrans = RETRANS_TIMER;
/*
* Note that the default value of ip6_accept_rtadv is 0, which means
* we won't accept RAs by default even if we set ND6_IFF_ACCEPT_RTADV
* here.
*/
nd->flags = (ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV);
/* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */
nd6_setmtu0(ifp, nd);
return nd;
}
void
nd6_ifdetach(struct nd_ifinfo *nd)
{
free(nd, M_IP6NDP);
}
/*
* Reset ND level link MTU. This function is called when the physical MTU
* changes, which means we might have to adjust the ND level MTU.
*/
void
nd6_setmtu(struct ifnet *ifp)
{
nd6_setmtu0(ifp, ND_IFINFO(ifp));
}
/* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */
void
nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi)
{
INIT_VNET_INET6(ifp->if_vnet);
u_int32_t omaxmtu;
omaxmtu = ndi->maxmtu;
switch (ifp->if_type) {
case IFT_ARCNET:
ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */
break;
case IFT_FDDI:
ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */
break;
case IFT_ISO88025:
ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu);
break;
default:
ndi->maxmtu = ifp->if_mtu;
break;
}
/*
* Decreasing the interface MTU under IPV6 minimum MTU may cause
* undesirable situation. We thus notify the operator of the change
* explicitly. The check for omaxmtu is necessary to restrict the
* log to the case of changing the MTU, not initializing it.
*/
if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) {
log(LOG_NOTICE, "nd6_setmtu0: "
"new link MTU on %s (%lu) is too small for IPv6\n",
if_name(ifp), (unsigned long)ndi->maxmtu);
}
if (ndi->maxmtu > V_in6_maxmtu)
in6_setmaxmtu(); /* check all interfaces just in case */
#undef MIN
}
void
nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts)
{
bzero(ndopts, sizeof(*ndopts));
ndopts->nd_opts_search = (struct nd_opt_hdr *)opt;
ndopts->nd_opts_last
= (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len);
if (icmp6len == 0) {
ndopts->nd_opts_done = 1;
ndopts->nd_opts_search = NULL;
}
}
/*
* Take one ND option.
*/
struct nd_opt_hdr *
nd6_option(union nd_opts *ndopts)
{
struct nd_opt_hdr *nd_opt;
int olen;
if (ndopts == NULL)
panic("ndopts == NULL in nd6_option");
if (ndopts->nd_opts_last == NULL)
panic("uninitialized ndopts in nd6_option");
if (ndopts->nd_opts_search == NULL)
return NULL;
if (ndopts->nd_opts_done)
return NULL;
nd_opt = ndopts->nd_opts_search;
/* make sure nd_opt_len is inside the buffer */
if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) {
bzero(ndopts, sizeof(*ndopts));
return NULL;
}
olen = nd_opt->nd_opt_len << 3;
if (olen == 0) {
/*
* Message validation requires that all included
* options have a length that is greater than zero.
*/
bzero(ndopts, sizeof(*ndopts));
return NULL;
}
ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen);
if (ndopts->nd_opts_search > ndopts->nd_opts_last) {
/* option overruns the end of buffer, invalid */
bzero(ndopts, sizeof(*ndopts));
return NULL;
} else if (ndopts->nd_opts_search == ndopts->nd_opts_last) {
/* reached the end of options chain */
ndopts->nd_opts_done = 1;
ndopts->nd_opts_search = NULL;
}
return nd_opt;
}
/*
* Parse multiple ND options.
* This function is much easier to use, for ND routines that do not need
* multiple options of the same type.
*/
int
nd6_options(union nd_opts *ndopts)
{
INIT_VNET_INET6(curvnet);
struct nd_opt_hdr *nd_opt;
int i = 0;
if (ndopts == NULL)
panic("ndopts == NULL in nd6_options");
if (ndopts->nd_opts_last == NULL)
panic("uninitialized ndopts in nd6_options");
if (ndopts->nd_opts_search == NULL)
return 0;
while (1) {
nd_opt = nd6_option(ndopts);
if (nd_opt == NULL && ndopts->nd_opts_last == NULL) {
/*
* Message validation requires that all included
* options have a length that is greater than zero.
*/
V_icmp6stat.icp6s_nd_badopt++;
bzero(ndopts, sizeof(*ndopts));
return -1;
}
if (nd_opt == NULL)
goto skip1;
switch (nd_opt->nd_opt_type) {
case ND_OPT_SOURCE_LINKADDR:
case ND_OPT_TARGET_LINKADDR:
case ND_OPT_MTU:
case ND_OPT_REDIRECTED_HEADER:
if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
nd6log((LOG_INFO,
"duplicated ND6 option found (type=%d)\n",
nd_opt->nd_opt_type));
/* XXX bark? */
} else {
ndopts->nd_opt_array[nd_opt->nd_opt_type]
= nd_opt;
}
break;
case ND_OPT_PREFIX_INFORMATION:
if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) {
ndopts->nd_opt_array[nd_opt->nd_opt_type]
= nd_opt;
}
ndopts->nd_opts_pi_end =
(struct nd_opt_prefix_info *)nd_opt;
break;
default:
/*
* Unknown options must be silently ignored,
* to accomodate future extension to the protocol.
*/
nd6log((LOG_DEBUG,
"nd6_options: unsupported option %d - "
"option ignored\n", nd_opt->nd_opt_type));
}
skip1:
i++;
if (i > V_nd6_maxndopt) {
V_icmp6stat.icp6s_nd_toomanyopt++;
nd6log((LOG_INFO, "too many loop in nd opt\n"));
break;
}
if (ndopts->nd_opts_done)
break;
}
return 0;
}
/*
* ND6 timer routine to handle ND6 entries
*/
void
nd6_llinfo_settimer(struct llentry *ln, long tick)
{
+ LLE_WLOCK(ln);
if (tick < 0) {
ln->la_expire = 0;
ln->ln_ntick = 0;
callout_stop(&ln->ln_timer_ch);
} else {
ln->la_expire = time_second + tick / hz;
+ LLE_ADDREF(ln);
if (tick > INT_MAX) {
ln->ln_ntick = tick - INT_MAX;
callout_reset(&ln->ln_timer_ch, INT_MAX,
nd6_llinfo_timer, ln);
} else {
ln->ln_ntick = 0;
callout_reset(&ln->ln_timer_ch, tick,
nd6_llinfo_timer, ln);
}
}
+ LLE_WUNLOCK(ln);
}
static void
nd6_llinfo_timer(void *arg)
{
struct llentry *ln;
struct in6_addr *dst;
struct ifnet *ifp;
struct nd_ifinfo *ndi = NULL;
ln = (struct llentry *)arg;
if (ln == NULL) {
panic("%s: NULL entry!\n", __func__);
return;
}
if ((ifp = ((ln->lle_tbl != NULL) ? ln->lle_tbl->llt_ifp : NULL)) == NULL)
panic("ln ifp == NULL");
CURVNET_SET(ifp->if_vnet);
INIT_VNET_INET6(curvnet);
+ /*
+ * llentry is refcounted - we shouldn't need to protect it
+ * with IF_AFDATA
+ */
IF_AFDATA_LOCK(ifp);
if (ln->ln_ntick > 0) {
if (ln->ln_ntick > INT_MAX) {
ln->ln_ntick -= INT_MAX;
nd6_llinfo_settimer(ln, INT_MAX);
} else {
ln->ln_ntick = 0;
nd6_llinfo_settimer(ln, ln->ln_ntick);
}
IF_AFDATA_UNLOCK(ifp);
- return;
+ goto done;
}
ndi = ND_IFINFO(ifp);
dst = &L3_ADDR_SIN6(ln)->sin6_addr;
if ((ln->la_flags & LLE_STATIC) || (ln->la_expire > time_second)) {
IF_AFDATA_UNLOCK(ifp);
- return;
+ goto done;
}
if (ln->la_flags & LLE_DELETED) {
(void)nd6_free(ln, 0);
IF_AFDATA_UNLOCK(ifp);
- return;
+ goto done;
}
switch (ln->ln_state) {
case ND6_LLINFO_INCOMPLETE:
if (ln->la_asked < V_nd6_mmaxtries) {
ln->la_asked++;
nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
nd6_ns_output(ifp, NULL, dst, ln, 0);
} else {
struct mbuf *m = ln->la_hold;
if (m) {
struct mbuf *m0;
/*
* assuming every packet in la_hold has the
* same IP header
*/
m0 = m->m_nextpkt;
m->m_nextpkt = NULL;
icmp6_error2(m, ICMP6_DST_UNREACH,
ICMP6_DST_UNREACH_ADDR, 0, ifp);
ln->la_hold = m0;
clear_llinfo_pqueue(ln);
}
(void)nd6_free(ln, 0);
ln = NULL;
}
break;
case ND6_LLINFO_REACHABLE:
if (!ND6_LLINFO_PERMANENT(ln)) {
ln->ln_state = ND6_LLINFO_STALE;
nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
}
break;
case ND6_LLINFO_STALE:
/* Garbage Collection(RFC 2461 5.3) */
if (!ND6_LLINFO_PERMANENT(ln)) {
(void)nd6_free(ln, 1);
ln = NULL;
}
break;
case ND6_LLINFO_DELAY:
if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) {
/* We need NUD */
ln->la_asked = 1;
ln->ln_state = ND6_LLINFO_PROBE;
nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
nd6_ns_output(ifp, dst, dst, ln, 0);
} else {
ln->ln_state = ND6_LLINFO_STALE; /* XXX */
nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
}
break;
case ND6_LLINFO_PROBE:
if (ln->la_asked < V_nd6_umaxtries) {
ln->la_asked++;
nd6_llinfo_settimer(ln, (long)ndi->retrans * hz / 1000);
nd6_ns_output(ifp, dst, dst, ln, 0);
} else {
(void)nd6_free(ln, 0);
ln = NULL;
}
break;
}
IF_AFDATA_UNLOCK(ifp);
CURVNET_RESTORE();
+done:
+ LLE_FREE_LOCKED(ln);
}
/*
* ND6 timer routine to expire default route list and prefix list
*/
void
nd6_timer(void *arg)
{
CURVNET_SET_QUIET((struct vnet *) arg);
INIT_VNET_INET6((struct vnet *) arg);
int s;
struct nd_defrouter *dr;
struct nd_prefix *pr;
struct in6_ifaddr *ia6, *nia6;
struct in6_addrlifetime *lt6;
callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz,
nd6_timer, NULL);
/* expire default router list */
s = splnet();
dr = TAILQ_FIRST(&V_nd_defrouter);
while (dr) {
if (dr->expire && dr->expire < time_second) {
struct nd_defrouter *t;
t = TAILQ_NEXT(dr, dr_entry);
defrtrlist_del(dr);
dr = t;
} else {
dr = TAILQ_NEXT(dr, dr_entry);
}
}
/*
* expire interface addresses.
* in the past the loop was inside prefix expiry processing.
* However, from a stricter speci-confrmance standpoint, we should
* rather separate address lifetimes and prefix lifetimes.
*/
addrloop:
for (ia6 = V_in6_ifaddr; ia6; ia6 = nia6) {
nia6 = ia6->ia_next;
/* check address lifetime */
lt6 = &ia6->ia6_lifetime;
if (IFA6_IS_INVALID(ia6)) {
int regen = 0;
/*
* If the expiring address is temporary, try
* regenerating a new one. This would be useful when
* we suspended a laptop PC, then turned it on after a
* period that could invalidate all temporary
* addresses. Although we may have to restart the
* loop (see below), it must be after purging the
* address. Otherwise, we'd see an infinite loop of
* regeneration.
*/
if (V_ip6_use_tempaddr &&
(ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
if (regen_tmpaddr(ia6) == 0)
regen = 1;
}
in6_purgeaddr(&ia6->ia_ifa);
if (regen)
goto addrloop; /* XXX: see below */
} else if (IFA6_IS_DEPRECATED(ia6)) {
int oldflags = ia6->ia6_flags;
ia6->ia6_flags |= IN6_IFF_DEPRECATED;
/*
* If a temporary address has just become deprecated,
* regenerate a new one if possible.
*/
if (V_ip6_use_tempaddr &&
(ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
(oldflags & IN6_IFF_DEPRECATED) == 0) {
if (regen_tmpaddr(ia6) == 0) {
/*
* A new temporary address is
* generated.
* XXX: this means the address chain
* has changed while we are still in
* the loop. Although the change
* would not cause disaster (because
* it's not a deletion, but an
* addition,) we'd rather restart the
* loop just for safety. Or does this
* significantly reduce performance??
*/
goto addrloop;
}
}
} else {
/*
* A new RA might have made a deprecated address
* preferred.
*/
ia6->ia6_flags &= ~IN6_IFF_DEPRECATED;
}
}
/* expire prefix list */
pr = V_nd_prefix.lh_first;
while (pr) {
/*
* check prefix lifetime.
* since pltime is just for autoconf, pltime processing for
* prefix is not necessary.
*/
if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME &&
time_second - pr->ndpr_lastupdate > pr->ndpr_vltime) {
struct nd_prefix *t;
t = pr->ndpr_next;
/*
* address expiration and prefix expiration are
* separate. NEVER perform in6_purgeaddr here.
*/
prelist_remove(pr);
pr = t;
} else
pr = pr->ndpr_next;
}
splx(s);
CURVNET_RESTORE();
}
/*
* ia6 - deprecated/invalidated temporary address
*/
static int
regen_tmpaddr(struct in6_ifaddr *ia6)
{
struct ifaddr *ifa;
struct ifnet *ifp;
struct in6_ifaddr *public_ifa6 = NULL;
ifp = ia6->ia_ifa.ifa_ifp;
for (ifa = ifp->if_addrlist.tqh_first; ifa;
ifa = ifa->ifa_list.tqe_next) {
struct in6_ifaddr *it6;
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
it6 = (struct in6_ifaddr *)ifa;
/* ignore no autoconf addresses. */
if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0)
continue;
/* ignore autoconf addresses with different prefixes. */
if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr)
continue;
/*
* Now we are looking at an autoconf address with the same
* prefix as ours. If the address is temporary and is still
* preferred, do not create another one. It would be rare, but
* could happen, for example, when we resume a laptop PC after
* a long period.
*/
if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 &&
!IFA6_IS_DEPRECATED(it6)) {
public_ifa6 = NULL;
break;
}
/*
* This is a public autoconf address that has the same prefix
* as ours. If it is preferred, keep it. We can't break the
* loop here, because there may be a still-preferred temporary
* address with the prefix.
*/
if (!IFA6_IS_DEPRECATED(it6))
public_ifa6 = it6;
}
if (public_ifa6 != NULL) {
int e;
if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) {
log(LOG_NOTICE, "regen_tmpaddr: failed to create a new"
" tmp addr,errno=%d\n", e);
return (-1);
}
return (0);
}
return (-1);
}
/*
* Nuke neighbor cache/prefix/default router management table, right before
* ifp goes away.
*/
void
nd6_purge(struct ifnet *ifp)
{
INIT_VNET_INET6(ifp->if_vnet);
struct nd_defrouter *dr, *ndr;
struct nd_prefix *pr, *npr;
/*
* Nuke default router list entries toward ifp.
* We defer removal of default router list entries that is installed
* in the routing table, in order to keep additional side effects as
* small as possible.
*/
for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
ndr = TAILQ_NEXT(dr, dr_entry);
if (dr->installed)
continue;
if (dr->ifp == ifp)
defrtrlist_del(dr);
}
for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = ndr) {
ndr = TAILQ_NEXT(dr, dr_entry);
if (!dr->installed)
continue;
if (dr->ifp == ifp)
defrtrlist_del(dr);
}
/* Nuke prefix list entries toward ifp */
for (pr = V_nd_prefix.lh_first; pr; pr = npr) {
npr = pr->ndpr_next;
if (pr->ndpr_ifp == ifp) {
/*
* Because if_detach() does *not* release prefixes
* while purging addresses the reference count will
* still be above zero. We therefore reset it to
* make sure that the prefix really gets purged.
*/
pr->ndpr_refcnt = 0;
/*
* Previously, pr->ndpr_addr is removed as well,
* but I strongly believe we don't have to do it.
* nd6_purge() is only called from in6_ifdetach(),
* which removes all the associated interface addresses
* by itself.
* (jinmei@kame.net 20010129)
*/
prelist_remove(pr);
}
}
/* cancel default outgoing interface setting */
if (V_nd6_defifindex == ifp->if_index)
nd6_setdefaultiface(0);
if (!V_ip6_forwarding && V_ip6_accept_rtadv) { /* XXX: too restrictive? */
- /* refresh default router list */
+ /* refresh default router list
+ *
+ *
+ */
defrouter_select();
+
}
/* XXXXX
* We do not nuke the neighbor cache entries here any more
* because the neighbor cache is kept in if_afdata[AF_INET6].
* nd6_purge() is invoked by in6_ifdetach() which is called
* from if_detach() where everything gets purged. So let
* in6_domifdetach() do the actual L2 table purging work.
*/
-#if 0
- /*
- * Nuke neighbor cache entries for the ifp.
- * Note that rt->rt_ifp may not be the same as ifp,
- * due to KAME goto ours hack. See RTM_RESOLVE case in
- * nd6_rtrequest(), and ip6_input().
- */
- IF_AFDATA_LOCK(ifp);
- lltable_free(LLTABLE6(ifp));
- IF_AFDATA_UNLOCK(ifp);
-#endif
}
-
-
/* Qing
* the caller acquires and releases the lock on the lltbls
+ * Returns the llentry locked
*/
struct llentry *
-nd6_lookup(struct in6_addr *addr6, int create, struct ifnet *ifp)
+nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp)
{
INIT_VNET_INET6(curvnet);
struct sockaddr_in6 sin6;
struct llentry *ln;
- int flags = 0;
-
+ int llflags = 0;
+
bzero(&sin6, sizeof(sin6));
sin6.sin6_len = sizeof(struct sockaddr_in6);
sin6.sin6_family = AF_INET6;
sin6.sin6_addr = *addr6;
- if (create)
- flags |= LLE_CREATE;
- ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)&sin6);
+ IF_AFDATA_LOCK_ASSERT(ifp);
+
+ if (flags & ND6_CREATE)
+ llflags |= LLE_CREATE;
+ if (flags & ND6_EXCLUSIVE)
+ llflags |= LLE_EXCLUSIVE;
+
+ ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6);
if ((ln != NULL) && (flags & LLE_CREATE)) {
ln->ln_state = ND6_LLINFO_NOSTATE;
callout_init(&ln->ln_timer_ch, 0);
}
+
return (ln);
}
/*
* Test whether a given IPv6 address is a neighbor or not, ignoring
* the actual neighbor cache. The neighbor cache is ignored in order
* to not reenter the routing code from within itself.
*/
static int
nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
{
INIT_VNET_INET6(ifp->if_vnet);
struct nd_prefix *pr;
struct ifaddr *dstaddr;
/*
* A link-local address is always a neighbor.
* XXX: a link does not necessarily specify a single interface.
*/
if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) {
struct sockaddr_in6 sin6_copy;
u_int32_t zone;
/*
* We need sin6_copy since sa6_recoverscope() may modify the
* content (XXX).
*/
sin6_copy = *addr;
if (sa6_recoverscope(&sin6_copy))
return (0); /* XXX: should be impossible */
if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone))
return (0);
if (sin6_copy.sin6_scope_id == zone)
return (1);
else
return (0);
}
/*
* If the address matches one of our addresses,
* it should be a neighbor.
* If the address matches one of our on-link prefixes, it should be a
* neighbor.
*/
for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
if (pr->ndpr_ifp != ifp)
continue;
if (!(pr->ndpr_stateflags & NDPRF_ONLINK))
continue;
if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr,
&addr->sin6_addr, &pr->ndpr_mask))
return (1);
}
/*
* If the address is assigned on the node of the other side of
* a p2p interface, the address should be a neighbor.
*/
dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr);
if ((dstaddr != NULL) && (dstaddr->ifa_ifp == ifp))
return (1);
/*
* If the default router list is empty, all addresses are regarded
* as on-link, and thus, as a neighbor.
* XXX: we restrict the condition to hosts, because routers usually do
* not have the "default router list".
*/
if (!V_ip6_forwarding && TAILQ_FIRST(&V_nd_defrouter) == NULL &&
V_nd6_defifindex == ifp->if_index) {
return (1);
}
return (0);
}
/*
* Detect if a given IPv6 address identifies a neighbor on a given link.
* XXX: should take care of the destination of a p2p link?
*/
int
nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp)
{
+ struct llentry *lle;
+ int rc = 0;
+ IF_AFDATA_UNLOCK_ASSERT(ifp);
if (nd6_is_new_addr_neighbor(addr, ifp))
return (1);
/*
* Even if the address matches none of our addresses, it might be
* in the neighbor cache.
*/
IF_AFDATA_LOCK(ifp);
- if (nd6_lookup(&addr->sin6_addr, 0, ifp) != NULL) {
- IF_AFDATA_UNLOCK(ifp);
- return (1);
+ if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) {
+ LLE_RUNLOCK(lle);
+ rc = 1;
}
IF_AFDATA_UNLOCK(ifp);
- return (0);
+ return (rc);
}
/*
* Free an nd6 llinfo entry.
* Since the function would cause significant changes in the kernel, DO NOT
* make it global, unless you have a strong reason for the change, and are sure
* that the change is safe.
*/
static struct llentry *
nd6_free(struct llentry *ln, int gc)
{
INIT_VNET_INET6(curvnet);
struct llentry *next;
struct nd_defrouter *dr;
struct ifnet *ifp=NULL;
/*
* we used to have pfctlinput(PRC_HOSTDEAD) here.
* even though it is not harmful, it was not really necessary.
*/
/* cancel timer */
nd6_llinfo_settimer(ln, -1);
if (!V_ip6_forwarding) {
int s;
s = splnet();
dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
if (dr != NULL && dr->expire &&
ln->ln_state == ND6_LLINFO_STALE && gc) {
/*
* If the reason for the deletion is just garbage
* collection, and the neighbor is an active default
* router, do not delete it. Instead, reset the GC
* timer using the router's lifetime.
* Simply deleting the entry would affect default
* router selection, which is not necessarily a good
* thing, especially when we're using router preference
* values.
* XXX: the check for ln_state would be redundant,
* but we intentionally keep it just in case.
*/
if (dr->expire > time_second)
nd6_llinfo_settimer(ln,
(dr->expire - time_second) * hz);
else
nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
splx(s);
return (LIST_NEXT(ln, lle_next));
}
if (ln->ln_router || dr) {
/*
* rt6_flush must be called whether or not the neighbor
* is in the Default Router List.
* See a corresponding comment in nd6_na_input().
*/
rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ln->lle_tbl->llt_ifp);
}
if (dr) {
/*
* Unreachablity of a router might affect the default
* router selection and on-link detection of advertised
* prefixes.
*/
/*
* Temporarily fake the state to choose a new default
* router and to perform on-link determination of
* prefixes correctly.
* Below the state will be set correctly,
* or the entry itself will be deleted.
*/
ln->ln_state = ND6_LLINFO_INCOMPLETE;
/*
* Since defrouter_select() does not affect the
* on-link determination and MIP6 needs the check
* before the default router selection, we perform
* the check now.
*/
pfxlist_onlink_check();
/*
* refresh default router list
*/
defrouter_select();
}
splx(s);
}
/*
* Before deleting the entry, remember the next entry as the
* return value. We need this because pfxlist_onlink_check() above
* might have freed other entries (particularly the old next entry) as
* a side effect (XXX).
*/
next = LIST_NEXT(ln, lle_next);
ifp = ln->lle_tbl->llt_ifp;
IF_AFDATA_LOCK(ifp);
llentry_free(ln);
IF_AFDATA_UNLOCK(ifp);
return (next);
}
/*
* Upper-layer reachability hint for Neighbor Unreachability Detection.
*
* XXX cost-effective methods?
*/
void
nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force)
{
INIT_VNET_INET6(curvnet);
struct llentry *ln;
- struct ifnet *ifp = NULL;
+ struct ifnet *ifp;
- if (dst6 == NULL)
+ if ((dst6 == NULL) || (rt == NULL))
return;
- if (rt == NULL)
- return;
ifp = rt->rt_ifp;
IF_AFDATA_LOCK(ifp);
- if ((ln = nd6_lookup(dst6, 0, NULL)) == NULL) {
- IF_AFDATA_UNLOCK(ifp);
+ ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL);
+ IF_AFDATA_UNLOCK(ifp);
+ if (ln == NULL)
return;
- }
- if (ln->ln_state < ND6_LLINFO_REACHABLE) {
- IF_AFDATA_UNLOCK(ifp);
- return;
- }
+ if (ln->ln_state < ND6_LLINFO_REACHABLE)
+ goto done;
/*
* if we get upper-layer reachability confirmation many times,
* it is possible we have false information.
*/
if (!force) {
ln->ln_byhint++;
if (ln->ln_byhint > V_nd6_maxnudhint) {
- IF_AFDATA_UNLOCK(ifp);
- return;
+ goto done;
}
}
- ln->ln_state = ND6_LLINFO_REACHABLE;
+ ln->ln_state = ND6_LLINFO_REACHABLE;
if (!ND6_LLINFO_PERMANENT(ln)) {
nd6_llinfo_settimer(ln,
(long)ND_IFINFO(rt->rt_ifp)->reachable * hz);
}
- IF_AFDATA_UNLOCK(ifp);
+done:
+ LLE_WUNLOCK(ln);
}
int
nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp)
{
INIT_VNET_INET6(ifp->if_vnet);
struct in6_drlist *drl = (struct in6_drlist *)data;
struct in6_oprlist *oprl = (struct in6_oprlist *)data;
struct in6_ndireq *ndi = (struct in6_ndireq *)data;
struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data;
struct in6_ndifreq *ndif = (struct in6_ndifreq *)data;
struct nd_defrouter *dr;
struct nd_prefix *pr;
int i = 0, error = 0;
int s;
switch (cmd) {
case SIOCGDRLST_IN6:
/*
* obsolete API, use sysctl under net.inet6.icmp6
*/
bzero(drl, sizeof(*drl));
s = splnet();
dr = TAILQ_FIRST(&V_nd_defrouter);
while (dr && i < DRLSTSIZ) {
drl->defrouter[i].rtaddr = dr->rtaddr;
in6_clearscope(&drl->defrouter[i].rtaddr);
drl->defrouter[i].flags = dr->flags;
drl->defrouter[i].rtlifetime = dr->rtlifetime;
drl->defrouter[i].expire = dr->expire;
drl->defrouter[i].if_index = dr->ifp->if_index;
i++;
dr = TAILQ_NEXT(dr, dr_entry);
}
splx(s);
break;
case SIOCGPRLST_IN6:
/*
* obsolete API, use sysctl under net.inet6.icmp6
*
* XXX the structure in6_prlist was changed in backward-
* incompatible manner. in6_oprlist is used for SIOCGPRLST_IN6,
* in6_prlist is used for nd6_sysctl() - fill_prlist().
*/
/*
* XXX meaning of fields, especialy "raflags", is very
* differnet between RA prefix list and RR/static prefix list.
* how about separating ioctls into two?
*/
bzero(oprl, sizeof(*oprl));
s = splnet();
pr = V_nd_prefix.lh_first;
while (pr && i < PRLSTSIZ) {
struct nd_pfxrouter *pfr;
int j;
oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr;
oprl->prefix[i].raflags = pr->ndpr_raf;
oprl->prefix[i].prefixlen = pr->ndpr_plen;
oprl->prefix[i].vltime = pr->ndpr_vltime;
oprl->prefix[i].pltime = pr->ndpr_pltime;
oprl->prefix[i].if_index = pr->ndpr_ifp->if_index;
if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
oprl->prefix[i].expire = 0;
else {
time_t maxexpire;
/* XXX: we assume time_t is signed. */
maxexpire = (-1) &
~((time_t)1 <<
((sizeof(maxexpire) * 8) - 1));
if (pr->ndpr_vltime <
maxexpire - pr->ndpr_lastupdate) {
oprl->prefix[i].expire =
pr->ndpr_lastupdate +
pr->ndpr_vltime;
} else
oprl->prefix[i].expire = maxexpire;
}
pfr = pr->ndpr_advrtrs.lh_first;
j = 0;
while (pfr) {
if (j < DRLSTSIZ) {
#define RTRADDR oprl->prefix[i].advrtr[j]
RTRADDR = pfr->router->rtaddr;
in6_clearscope(&RTRADDR);
#undef RTRADDR
}
j++;
pfr = pfr->pfr_next;
}
oprl->prefix[i].advrtrs = j;
oprl->prefix[i].origin = PR_ORIG_RA;
i++;
pr = pr->ndpr_next;
}
splx(s);
break;
case OSIOCGIFINFO_IN6:
#define ND ndi->ndi
/* XXX: old ndp(8) assumes a positive value for linkmtu. */
bzero(&ND, sizeof(ND));
ND.linkmtu = IN6_LINKMTU(ifp);
ND.maxmtu = ND_IFINFO(ifp)->maxmtu;
ND.basereachable = ND_IFINFO(ifp)->basereachable;
ND.reachable = ND_IFINFO(ifp)->reachable;
ND.retrans = ND_IFINFO(ifp)->retrans;
ND.flags = ND_IFINFO(ifp)->flags;
ND.recalctm = ND_IFINFO(ifp)->recalctm;
ND.chlim = ND_IFINFO(ifp)->chlim;
break;
case SIOCGIFINFO_IN6:
ND = *ND_IFINFO(ifp);
break;
case SIOCSIFINFO_IN6:
/*
* used to change host variables from userland.
* intented for a use on router to reflect RA configurations.
*/
/* 0 means 'unspecified' */
if (ND.linkmtu != 0) {
if (ND.linkmtu < IPV6_MMTU ||
ND.linkmtu > IN6_LINKMTU(ifp)) {
error = EINVAL;
break;
}
ND_IFINFO(ifp)->linkmtu = ND.linkmtu;
}
if (ND.basereachable != 0) {
int obasereachable = ND_IFINFO(ifp)->basereachable;
ND_IFINFO(ifp)->basereachable = ND.basereachable;
if (ND.basereachable != obasereachable)
ND_IFINFO(ifp)->reachable =
ND_COMPUTE_RTIME(ND.basereachable);
}
if (ND.retrans != 0)
ND_IFINFO(ifp)->retrans = ND.retrans;
if (ND.chlim != 0)
ND_IFINFO(ifp)->chlim = ND.chlim;
/* FALLTHROUGH */
case SIOCSIFINFO_FLAGS:
ND_IFINFO(ifp)->flags = ND.flags;
break;
#undef ND
case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */
/* sync kernel routing table with the default router list */
defrouter_reset();
defrouter_select();
break;
case SIOCSPFXFLUSH_IN6:
{
/* flush all the prefix advertised by routers */
struct nd_prefix *pr, *next;
s = splnet();
for (pr = V_nd_prefix.lh_first; pr; pr = next) {
struct in6_ifaddr *ia, *ia_next;
next = pr->ndpr_next;
if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
continue; /* XXX */
/* do we really have to remove addresses as well? */
for (ia = V_in6_ifaddr; ia; ia = ia_next) {
/* ia might be removed. keep the next ptr. */
ia_next = ia->ia_next;
if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0)
continue;
if (ia->ia6_ndpr == pr)
in6_purgeaddr(&ia->ia_ifa);
}
prelist_remove(pr);
}
splx(s);
break;
}
case SIOCSRTRFLUSH_IN6:
{
/* flush all the default routers */
struct nd_defrouter *dr, *next;
s = splnet();
defrouter_reset();
for (dr = TAILQ_FIRST(&V_nd_defrouter); dr; dr = next) {
next = TAILQ_NEXT(dr, dr_entry);
defrtrlist_del(dr);
}
defrouter_select();
splx(s);
break;
}
case SIOCGNBRINFO_IN6:
{
struct llentry *ln;
struct in6_addr nb_addr = nbi->addr; /* make local for safety */
if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0)
return (error);
IF_AFDATA_LOCK(ifp);
- if ((ln = nd6_lookup(&nb_addr, 0, ifp)) == NULL) {
+ ln = nd6_lookup(&nb_addr, 0, ifp);
+ IF_AFDATA_UNLOCK(ifp);
+
+ if (ln == NULL) {
error = EINVAL;
- IF_AFDATA_UNLOCK(ifp);
break;
}
nbi->state = ln->ln_state;
nbi->asked = ln->la_asked;
nbi->isrouter = ln->ln_router;
nbi->expire = ln->la_expire;
- IF_AFDATA_UNLOCK(ifp);
-
+ LLE_RUNLOCK(ln);
break;
}
case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */
ndif->ifindex = V_nd6_defifindex;
break;
case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */
return (nd6_setdefaultiface(ndif->ifindex));
}
return (error);
}
/*
* Create neighbor cache entry and cache link-layer address,
* on reception of inbound ND6 packets. (RS/RA/NS/redirect)
*
* type - ICMP6 type
* code - type dependent information
*
* XXXXX
* The caller of this function already acquired the ndp
* cache table lock because the cache entry is returned.
*/
struct llentry *
nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr,
int lladdrlen, int type, int code)
{
INIT_VNET_INET6(curvnet);
struct llentry *ln = NULL;
int is_newentry;
int do_update;
int olladdr;
int llchange;
+ int flags = 0;
int newstate = 0;
+ IF_AFDATA_UNLOCK_ASSERT(ifp);
+
if (ifp == NULL)
panic("ifp == NULL in nd6_cache_lladdr");
if (from == NULL)
panic("from == NULL in nd6_cache_lladdr");
/* nothing must be updated for unspecified address */
if (IN6_IS_ADDR_UNSPECIFIED(from))
return NULL;
/*
* Validation about ifp->if_addrlen and lladdrlen must be done in
* the caller.
*
* XXX If the link does not have link-layer adderss, what should
* we do? (ifp->if_addrlen == 0)
* Spec says nothing in sections for RA, RS and NA. There's small
* description on it in NS section (RFC 2461 7.2.3).
*/
- ln = nd6_lookup(from, 0, ifp);
+ flags |= lladdr ? ND6_EXCLUSIVE : 0;
+ IF_AFDATA_LOCK(ifp);
+ ln = nd6_lookup(from, flags, ifp);
+ if (ln)
+ IF_AFDATA_UNLOCK(ifp);
if (ln == NULL) {
- ln = nd6_lookup(from, 1, ifp);
+ ln = nd6_lookup(from, flags |ND6_CREATE, ifp);
+ IF_AFDATA_UNLOCK(ifp);
is_newentry = 1;
} else {
/* do nothing if static ndp is set */
if (ln->la_flags & LLE_STATIC)
- return NULL;
+ goto done;
is_newentry = 0;
}
- if (ln == NULL) {
- return NULL;
- }
+ if (ln == NULL)
+ return (NULL);
olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0;
if (olladdr && lladdr) {
- if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen))
- llchange = 1;
- else
- llchange = 0;
+ llchange = bcmp(lladdr, &ln->ll_addr,
+ ifp->if_addrlen);
} else
llchange = 0;
/*
* newentry olladdr lladdr llchange (*=record)
* 0 n n -- (1)
* 0 y n -- (2)
* 0 n y -- (3) * STALE
* 0 y y n (4) *
* 0 y y y (5) * STALE
* 1 -- n -- (6) NOSTATE(= PASSIVE)
* 1 -- y -- (7) * STALE
*/
if (lladdr) { /* (3-5) and (7) */
/*
* Record source link-layer address
* XXX is it dependent to ifp->if_type?
*/
bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
ln->la_flags |= LLE_VALID;
}
if (!is_newentry) {
if ((!olladdr && lladdr != NULL) || /* (3) */
(olladdr && lladdr != NULL && llchange)) { /* (5) */
do_update = 1;
newstate = ND6_LLINFO_STALE;
} else /* (1-2,4) */
do_update = 0;
} else {
do_update = 1;
if (lladdr == NULL) /* (6) */
newstate = ND6_LLINFO_NOSTATE;
else /* (7) */
newstate = ND6_LLINFO_STALE;
}
if (do_update) {
/*
* Update the state of the neighbor cache.
*/
ln->ln_state = newstate;
if (ln->ln_state == ND6_LLINFO_STALE) {
/*
* XXX: since nd6_output() below will cause
* state tansition to DELAY and reset the timer,
* we must set the timer now, although it is actually
* meaningless.
*/
nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
if (ln->la_hold) {
struct mbuf *m_hold, *m_hold_next;
/*
* reset the la_hold in advance, to explicitly
* prevent a la_hold lookup in nd6_output()
* (wouldn't happen, though...)
*/
for (m_hold = ln->la_hold, ln->la_hold = NULL;
m_hold; m_hold = m_hold_next) {
m_hold_next = m_hold->m_nextpkt;
m_hold->m_nextpkt = NULL;
/*
* we assume ifp is not a p2p here, so
* just set the 2nd argument as the
* 1st one.
*/
nd6_output(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL);
}
}
} else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
/* probe right away */
nd6_llinfo_settimer((void *)ln, 0);
}
}
/*
* ICMP6 type dependent behavior.
*
* NS: clear IsRouter if new entry
* RS: clear IsRouter
* RA: set IsRouter if there's lladdr
* redir: clear IsRouter if new entry
*
* RA case, (1):
* The spec says that we must set IsRouter in the following cases:
* - If lladdr exist, set IsRouter. This means (1-5).
* - If it is old entry (!newentry), set IsRouter. This means (7).
* So, based on the spec, in (1-5) and (7) cases we must set IsRouter.
* A quetion arises for (1) case. (1) case has no lladdr in the
* neighbor cache, this is similar to (6).
* This case is rare but we figured that we MUST NOT set IsRouter.
*
* newentry olladdr lladdr llchange NS RS RA redir
* D R
* 0 n n -- (1) c ? s
* 0 y n -- (2) c s s
* 0 n y -- (3) c s s
* 0 y y n (4) c s s
* 0 y y y (5) c s s
* 1 -- n -- (6) c c c s
* 1 -- y -- (7) c c s c s
*
* (c=clear s=set)
*/
switch (type & 0xff) {
case ND_NEIGHBOR_SOLICIT:
/*
* New entry must have is_router flag cleared.
*/
if (is_newentry) /* (6-7) */
ln->ln_router = 0;
break;
case ND_REDIRECT:
/*
* If the icmp is a redirect to a better router, always set the
* is_router flag. Otherwise, if the entry is newly created,
* clear the flag. [RFC 2461, sec 8.3]
*/
if (code == ND_REDIRECT_ROUTER)
ln->ln_router = 1;
else if (is_newentry) /* (6-7) */
ln->ln_router = 0;
break;
case ND_ROUTER_SOLICIT:
/*
* is_router flag must always be cleared.
*/
ln->ln_router = 0;
break;
case ND_ROUTER_ADVERT:
/*
* Mark an entry with lladdr as a router.
*/
if ((!is_newentry && (olladdr || lladdr)) || /* (2-5) */
(is_newentry && lladdr)) { /* (7) */
ln->ln_router = 1;
}
break;
}
/*
* When the link-layer address of a router changes, select the
* best router again. In particular, when the neighbor entry is newly
* created, it might affect the selection policy.
* Question: can we restrict the first condition to the "is_newentry"
* case?
* XXX: when we hear an RA from a new router with the link-layer
* address option, defrouter_select() is called twice, since
* defrtrlist_update called the function as well. However, I believe
* we can compromise the overhead, since it only happens the first
* time.
* XXX: although defrouter_select() should not have a bad effect
* for those are not autoconfigured hosts, we explicitly avoid such
* cases for safety.
*/
- if (do_update && ln->ln_router && !V_ip6_forwarding && V_ip6_accept_rtadv)
+ if (do_update && ln->ln_router && !V_ip6_forwarding && V_ip6_accept_rtadv) {
+#ifdef notyet
+ /*
+ * XXX implement the boiler plate
+ */
+ taskqueue_enqueue(ipv6_taskq, defrouter_select_task);
+#endif
+ /*
+ * guaranteed recursion
+ */
defrouter_select();
-
- return ln;
+ }
+
+done:
+ if (ln) {
+ if (flags & ND6_EXCLUSIVE)
+ LLE_WUNLOCK(ln);
+ else
+ LLE_RUNLOCK(ln);
+ if (ln->la_flags & LLE_STATIC)
+ ln = NULL;
+ }
+ return (ln);
}
static void
nd6_slowtimo(void *arg)
{
CURVNET_SET((struct vnet *) arg);
INIT_VNET_NET((struct vnet *) arg);
INIT_VNET_INET6((struct vnet *) arg);
struct nd_ifinfo *nd6if;
struct ifnet *ifp;
callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz,
nd6_slowtimo, NULL);
IFNET_RLOCK();
for (ifp = TAILQ_FIRST(&V_ifnet); ifp;
ifp = TAILQ_NEXT(ifp, if_list)) {
nd6if = ND_IFINFO(ifp);
if (nd6if->basereachable && /* already initialized */
(nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) {
/*
* Since reachable time rarely changes by router
* advertisements, we SHOULD insure that a new random
* value gets recomputed at least once every few hours.
* (RFC 2461, 6.3.4)
*/
nd6if->recalctm = V_nd6_recalc_reachtm_interval;
nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable);
}
}
IFNET_RUNLOCK();
CURVNET_RESTORE();
}
+/*
+ * Note that I'm not enforcing any global serialization
+ * lle state or asked changes here as the logic is too
+ * complicated to avoid having to always acquire an exclusive
+ * lock
+ * KMM
+ *
+ */
#define senderr(e) { error = (e); goto bad;}
int
nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m0,
struct sockaddr_in6 *dst, struct rtentry *rt0)
{
INIT_VNET_INET6(curvnet);
struct mbuf *m = m0;
struct rtentry *rt = rt0;
struct llentry *ln = NULL;
int error = 0;
+ int flags = 0;
if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr))
goto sendpkt;
if (nd6_need_cache(ifp) == 0)
goto sendpkt;
/*
* next hop determination. This routine is derived from ether_output.
*/
/*
* Address resolution or Neighbor Unreachability Detection
* for the next hop.
* At this point, the destination of the packet must be a unicast
* or an anycast address(i.e. not a multicast).
*/
- ln = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)dst);
+ flags = m ? LLE_EXCLUSIVE : 0;
+ IF_AFDATA_LOCK(rt->rt_ifp);
+ ln = lla_lookup(LLTABLE6(ifp), flags, (struct sockaddr *)dst);
+ IF_AFDATA_UNLOCK(rt->rt_ifp);
if ((ln == NULL) && nd6_is_addr_neighbor(dst, ifp)) {
/*
* Since nd6_is_addr_neighbor() internally calls nd6_lookup(),
* the condition below is not very efficient. But we believe
* it is tolerable, because this should be a rare case.
*/
- ln = nd6_lookup(&dst->sin6_addr, 1, ifp);
+ flags = ND6_CREATE | (m ? ND6_EXCLUSIVE : 0);
+ IF_AFDATA_LOCK(rt->rt_ifp);
+ ln = nd6_lookup(&dst->sin6_addr, flags, ifp);
+ IF_AFDATA_UNLOCK(rt->rt_ifp);
}
if (ln == NULL) {
if ((ifp->if_flags & IFF_POINTOPOINT) == 0 &&
!(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) {
char ip6buf[INET6_ADDRSTRLEN];
log(LOG_DEBUG,
"nd6_output: can't allocate llinfo for %s "
"(ln=%p, rt=%p)\n",
ip6_sprintf(ip6buf, &dst->sin6_addr), ln, rt);
senderr(EIO); /* XXX: good error? */
}
-
goto sendpkt; /* send anyway */
}
/* We don't have to do link-layer address resolution on a p2p link. */
if ((ifp->if_flags & IFF_POINTOPOINT) != 0 &&
ln->ln_state < ND6_LLINFO_REACHABLE) {
ln->ln_state = ND6_LLINFO_STALE;
nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
}
/*
* The first time we send a packet to a neighbor whose entry is
* STALE, we have to change the state to DELAY and a sets a timer to
* expire in DELAY_FIRST_PROBE_TIME seconds to ensure do
* neighbor unreachability detection on expiration.
* (RFC 2461 7.3.3)
*/
if (ln->ln_state == ND6_LLINFO_STALE) {
ln->la_asked = 0;
ln->ln_state = ND6_LLINFO_DELAY;
nd6_llinfo_settimer(ln, (long)V_nd6_delay * hz);
}
/*
* If the neighbor cache entry has a state other than INCOMPLETE
* (i.e. its link-layer address is already resolved), just
* send the packet.
*/
if (ln->ln_state > ND6_LLINFO_INCOMPLETE)
goto sendpkt;
/*
* There is a neighbor cache entry, but no ethernet address
* response yet. Append this latest packet to the end of the
* packet queue in the mbuf, unless the number of the packet
* does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen,
* the oldest packet in the queue will be removed.
*/
if (ln->ln_state == ND6_LLINFO_NOSTATE)
ln->ln_state = ND6_LLINFO_INCOMPLETE;
if (ln->la_hold) {
struct mbuf *m_hold;
int i;
i = 0;
for (m_hold = ln->la_hold; m_hold; m_hold = m_hold->m_nextpkt) {
i++;
if (m_hold->m_nextpkt == NULL) {
m_hold->m_nextpkt = m;
break;
}
}
while (i >= V_nd6_maxqueuelen) {
m_hold = ln->la_hold;
ln->la_hold = ln->la_hold->m_nextpkt;
m_freem(m_hold);
i--;
}
} else {
ln->la_hold = m;
}
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(ln);
+ else
+ LLE_RUNLOCK(ln);
+
/*
* If there has been no NS for the neighbor after entering the
* INCOMPLETE state, send the first solicitation.
*/
if (!ND6_LLINFO_PERMANENT(ln) && ln->la_asked == 0) {
ln->la_asked++;
+
nd6_llinfo_settimer(ln,
(long)ND_IFINFO(ifp)->retrans * hz / 1000);
nd6_ns_output(ifp, NULL, &dst->sin6_addr, ln, 0);
}
return (0);
sendpkt:
/* discard the packet if IPv6 operation is disabled on the interface */
if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) {
error = ENETDOWN; /* better error? */
goto bad;
}
+ if (ln) {
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(ln);
+ else
+ LLE_RUNLOCK(ln);
+ }
#ifdef MAC
mac_netinet6_nd6_send(ifp, m);
#endif
if ((ifp->if_flags & IFF_LOOPBACK) != 0) {
return ((*ifp->if_output)(origifp, m, (struct sockaddr *)dst,
rt));
}
error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, rt);
return (error);
bad:
+ if (ln) {
+ if (flags & LLE_EXCLUSIVE)
+ LLE_WUNLOCK(ln);
+ else
+ LLE_RUNLOCK(ln);
+ }
if (m)
m_freem(m);
return (error);
}
#undef senderr
int
nd6_need_cache(struct ifnet *ifp)
{
/*
* XXX: we currently do not make neighbor cache on any interface
* other than ARCnet, Ethernet, FDDI and GIF.
*
* RFC2893 says:
* - unidirectional tunnels needs no ND
*/
switch (ifp->if_type) {
case IFT_ARCNET:
case IFT_ETHER:
case IFT_FDDI:
case IFT_IEEE1394:
#ifdef IFT_L2VLAN
case IFT_L2VLAN:
#endif
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
#ifdef IFT_CARP
case IFT_CARP:
#endif
case IFT_GIF: /* XXX need more cases? */
case IFT_PPP:
case IFT_TUNNEL:
case IFT_BRIDGE:
case IFT_PROPVIRTUAL:
return (1);
default:
return (0);
}
}
/*
- * the caller of this function needs to lock the interface table
+ * the callers of this function need to be re-worked to drop
+ * the lle lock, drop here for now
*/
int
nd6_storelladdr(struct ifnet *ifp, struct rtentry *rt0, struct mbuf *m,
struct sockaddr *dst, u_char *desten, struct llentry **lle)
{
struct llentry *ln;
*lle = NULL;
+ IF_AFDATA_UNLOCK_ASSERT(ifp);
if (m->m_flags & M_MCAST) {
int i;
switch (ifp->if_type) {
case IFT_ETHER:
case IFT_FDDI:
#ifdef IFT_L2VLAN
case IFT_L2VLAN:
#endif
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
case IFT_BRIDGE:
case IFT_ISO88025:
ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr,
desten);
return (0);
case IFT_IEEE1394:
/*
* netbsd can use if_broadcastaddr, but we don't do so
* to reduce # of ifdef.
*/
for (i = 0; i < ifp->if_addrlen; i++)
desten[i] = ~0;
return (0);
case IFT_ARCNET:
*desten = 0;
return (0);
default:
m_freem(m);
return (EAFNOSUPPORT);
}
}
/*
* the entry should have been created in nd6_store_lladdr
*/
+ IF_AFDATA_LOCK(ifp);
ln = lla_lookup(LLTABLE6(ifp), 0, dst);
+ IF_AFDATA_UNLOCK(ifp);
if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) {
+ if (ln)
+ LLE_RUNLOCK(ln);
/* this could happen, if we could not allocate memory */
m_freem(m);
return (1);
}
bcopy(&ln->ll_addr, desten, ifp->if_addrlen);
*lle = ln;
+ LLE_RUNLOCK(ln);
+ /*
+ * A *small* use after free race exists here
+ */
return (0);
}
static void
clear_llinfo_pqueue(struct llentry *ln)
{
struct mbuf *m_hold, *m_hold_next;
for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) {
m_hold_next = m_hold->m_nextpkt;
m_hold->m_nextpkt = NULL;
m_freem(m_hold);
}
ln->la_hold = NULL;
return;
}
static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS);
static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS);
#ifdef SYSCTL_DECL
SYSCTL_DECL(_net_inet6_icmp6);
#endif
SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
CTLFLAG_RD, nd6_sysctl_drlist, "");
SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist,
CTLFLAG_RD, nd6_sysctl_prlist, "");
SYSCTL_V_INT(V_NET, vnet_inet6, _net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN,
nd6_maxqueuelen, CTLFLAG_RW, nd6_maxqueuelen, 1, "");
static int
nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
{
INIT_VNET_INET6(curvnet);
int error;
char buf[1024] __aligned(4);
struct in6_defrouter *d, *de;
struct nd_defrouter *dr;
if (req->newptr)
return EPERM;
error = 0;
for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
dr = TAILQ_NEXT(dr, dr_entry)) {
d = (struct in6_defrouter *)buf;
de = (struct in6_defrouter *)(buf + sizeof(buf));
if (d + 1 <= de) {
bzero(d, sizeof(*d));
d->rtaddr.sin6_family = AF_INET6;
d->rtaddr.sin6_len = sizeof(d->rtaddr);
d->rtaddr.sin6_addr = dr->rtaddr;
error = sa6_recoverscope(&d->rtaddr);
if (error != 0)
return (error);
d->flags = dr->flags;
d->rtlifetime = dr->rtlifetime;
d->expire = dr->expire;
d->if_index = dr->ifp->if_index;
} else
panic("buffer too short");
error = SYSCTL_OUT(req, buf, sizeof(*d));
if (error)
break;
}
return (error);
}
static int
nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS)
{
INIT_VNET_INET6(curvnet);
int error;
char buf[1024] __aligned(4);
struct in6_prefix *p, *pe;
struct nd_prefix *pr;
char ip6buf[INET6_ADDRSTRLEN];
if (req->newptr)
return EPERM;
error = 0;
for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
u_short advrtrs;
size_t advance;
struct sockaddr_in6 *sin6, *s6;
struct nd_pfxrouter *pfr;
p = (struct in6_prefix *)buf;
pe = (struct in6_prefix *)(buf + sizeof(buf));
if (p + 1 <= pe) {
bzero(p, sizeof(*p));
sin6 = (struct sockaddr_in6 *)(p + 1);
p->prefix = pr->ndpr_prefix;
if (sa6_recoverscope(&p->prefix)) {
log(LOG_ERR,
"scope error in prefix list (%s)\n",
ip6_sprintf(ip6buf, &p->prefix.sin6_addr));
/* XXX: press on... */
}
p->raflags = pr->ndpr_raf;
p->prefixlen = pr->ndpr_plen;
p->vltime = pr->ndpr_vltime;
p->pltime = pr->ndpr_pltime;
p->if_index = pr->ndpr_ifp->if_index;
if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME)
p->expire = 0;
else {
time_t maxexpire;
/* XXX: we assume time_t is signed. */
maxexpire = (-1) &
~((time_t)1 <<
((sizeof(maxexpire) * 8) - 1));
if (pr->ndpr_vltime <
maxexpire - pr->ndpr_lastupdate) {
p->expire = pr->ndpr_lastupdate +
pr->ndpr_vltime;
} else
p->expire = maxexpire;
}
p->refcnt = pr->ndpr_refcnt;
p->flags = pr->ndpr_stateflags;
p->origin = PR_ORIG_RA;
advrtrs = 0;
for (pfr = pr->ndpr_advrtrs.lh_first; pfr;
pfr = pfr->pfr_next) {
if ((void *)&sin6[advrtrs + 1] > (void *)pe) {
advrtrs++;
continue;
}
s6 = &sin6[advrtrs];
bzero(s6, sizeof(*s6));
s6->sin6_family = AF_INET6;
s6->sin6_len = sizeof(*sin6);
s6->sin6_addr = pfr->router->rtaddr;
if (sa6_recoverscope(s6)) {
log(LOG_ERR,
"scope error in "
"prefix list (%s)\n",
ip6_sprintf(ip6buf,
&pfr->router->rtaddr));
}
advrtrs++;
}
p->advrtrs = advrtrs;
} else
panic("buffer too short");
advance = sizeof(*p) + sizeof(*sin6) * advrtrs;
error = SYSCTL_OUT(req, buf, advance);
if (error)
break;
}
return (error);
}
Index: projects/arpv2_merge_1/sys/netinet6/nd6.h
===================================================================
--- projects/arpv2_merge_1/sys/netinet6/nd6.h (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet6/nd6.h (revision 185839)
@@ -1,426 +1,429 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $KAME: nd6.h,v 1.76 2001/12/18 02:10:31 itojun Exp $
* $FreeBSD$
*/
#ifndef _NETINET6_ND6_H_
#define _NETINET6_ND6_H_
/* see net/route.h, or net/if_inarp.h */
#ifndef RTF_ANNOUNCE
#define RTF_ANNOUNCE RTF_PROTO2
#endif
#include <sys/queue.h>
#include <sys/callout.h>
struct llentry;
#define ND6_LLINFO_NOSTATE -2
/*
* We don't need the WAITDELETE state any more, but we keep the definition
* in a comment line instead of removing it. This is necessary to avoid
* unintentionally reusing the value for another purpose, which might
* affect backward compatibility with old applications.
* (20000711 jinmei@kame.net)
*/
/* #define ND6_LLINFO_WAITDELETE -1 */
#define ND6_LLINFO_INCOMPLETE 0
#define ND6_LLINFO_REACHABLE 1
#define ND6_LLINFO_STALE 2
#define ND6_LLINFO_DELAY 3
#define ND6_LLINFO_PROBE 4
#define ND6_IS_LLINFO_PROBREACH(n) ((n)->ln_state > ND6_LLINFO_INCOMPLETE)
#define ND6_LLINFO_PERMANENT(n) (((n)->la_expire == 0) && ((n)->ln_state > ND6_LLINFO_INCOMPLETE))
struct nd_ifinfo {
u_int32_t linkmtu; /* LinkMTU */
u_int32_t maxmtu; /* Upper bound of LinkMTU */
u_int32_t basereachable; /* BaseReachableTime */
u_int32_t reachable; /* Reachable Time */
u_int32_t retrans; /* Retrans Timer */
u_int32_t flags; /* Flags */
int recalctm; /* BaseReacable re-calculation timer */
u_int8_t chlim; /* CurHopLimit */
u_int8_t initialized; /* Flag to see the entry is initialized */
/* the following 3 members are for privacy extension for addrconf */
u_int8_t randomseed0[8]; /* upper 64 bits of MD5 digest */
u_int8_t randomseed1[8]; /* lower 64 bits (usually the EUI64 IFID) */
u_int8_t randomid[8]; /* current random ID */
};
#define ND6_IFF_PERFORMNUD 0x1
#define ND6_IFF_ACCEPT_RTADV 0x2
#define ND6_IFF_PREFER_SOURCE 0x4 /* XXX: not related to ND. */
#define ND6_IFF_IFDISABLED 0x8 /* IPv6 operation is disabled due to
* DAD failure. (XXX: not ND-specific)
*/
#define ND6_IFF_DONT_SET_IFROUTE 0x10
+#define ND6_CREATE LLE_CREATE
+#define ND6_EXCLUSIVE LLE_EXCLUSIVE
+
#ifdef _KERNEL
#define ND_IFINFO(ifp) \
(((struct in6_ifextra *)(ifp)->if_afdata[AF_INET6])->nd_ifinfo)
#define IN6_LINKMTU(ifp) \
((ND_IFINFO(ifp)->linkmtu && ND_IFINFO(ifp)->linkmtu < (ifp)->if_mtu) \
? ND_IFINFO(ifp)->linkmtu \
: ((ND_IFINFO(ifp)->maxmtu && ND_IFINFO(ifp)->maxmtu < (ifp)->if_mtu) \
? ND_IFINFO(ifp)->maxmtu : (ifp)->if_mtu))
#endif
struct in6_nbrinfo {
char ifname[IFNAMSIZ]; /* if name, e.g. "en0" */
struct in6_addr addr; /* IPv6 address of the neighbor */
long asked; /* number of queries already sent for this addr */
int isrouter; /* if it acts as a router */
int state; /* reachability state */
int expire; /* lifetime for NDP state transition */
};
#define DRLSTSIZ 10
#define PRLSTSIZ 10
struct in6_drlist {
char ifname[IFNAMSIZ];
struct {
struct in6_addr rtaddr;
u_char flags;
u_short rtlifetime;
u_long expire;
u_short if_index;
} defrouter[DRLSTSIZ];
};
struct in6_defrouter {
struct sockaddr_in6 rtaddr;
u_char flags;
u_short rtlifetime;
u_long expire;
u_short if_index;
};
#ifdef _KERNEL
struct in6_oprlist {
char ifname[IFNAMSIZ];
struct {
struct in6_addr prefix;
struct prf_ra raflags;
u_char prefixlen;
u_char origin;
u_long vltime;
u_long pltime;
u_long expire;
u_short if_index;
u_short advrtrs; /* number of advertisement routers */
struct in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */
} prefix[PRLSTSIZ];
};
#endif
struct in6_prlist {
char ifname[IFNAMSIZ];
struct {
struct in6_addr prefix;
struct prf_ra raflags;
u_char prefixlen;
u_char origin;
u_int32_t vltime;
u_int32_t pltime;
time_t expire;
u_short if_index;
u_short advrtrs; /* number of advertisement routers */
struct in6_addr advrtr[DRLSTSIZ]; /* XXX: explicit limit */
} prefix[PRLSTSIZ];
};
struct in6_prefix {
struct sockaddr_in6 prefix;
struct prf_ra raflags;
u_char prefixlen;
u_char origin;
u_int32_t vltime;
u_int32_t pltime;
time_t expire;
u_int32_t flags;
int refcnt;
u_short if_index;
u_short advrtrs; /* number of advertisement routers */
/* struct sockaddr_in6 advrtr[] */
};
#ifdef _KERNEL
struct in6_ondireq {
char ifname[IFNAMSIZ];
struct {
u_int32_t linkmtu; /* LinkMTU */
u_int32_t maxmtu; /* Upper bound of LinkMTU */
u_int32_t basereachable; /* BaseReachableTime */
u_int32_t reachable; /* Reachable Time */
u_int32_t retrans; /* Retrans Timer */
u_int32_t flags; /* Flags */
int recalctm; /* BaseReacable re-calculation timer */
u_int8_t chlim; /* CurHopLimit */
u_int8_t receivedra;
} ndi;
};
#endif
struct in6_ndireq {
char ifname[IFNAMSIZ];
struct nd_ifinfo ndi;
};
struct in6_ndifreq {
char ifname[IFNAMSIZ];
u_long ifindex;
};
/* Prefix status */
#define NDPRF_ONLINK 0x1
#define NDPRF_DETACHED 0x2
/* protocol constants */
#define MAX_RTR_SOLICITATION_DELAY 1 /* 1sec */
#define RTR_SOLICITATION_INTERVAL 4 /* 4sec */
#define MAX_RTR_SOLICITATIONS 3
#define ND6_INFINITE_LIFETIME 0xffffffff
#ifdef _KERNEL
/* node constants */
#define MAX_REACHABLE_TIME 3600000 /* msec */
#define REACHABLE_TIME 30000 /* msec */
#define RETRANS_TIMER 1000 /* msec */
#define MIN_RANDOM_FACTOR 512 /* 1024 * 0.5 */
#define MAX_RANDOM_FACTOR 1536 /* 1024 * 1.5 */
#define DEF_TEMP_VALID_LIFETIME 604800 /* 1 week */
#define DEF_TEMP_PREFERRED_LIFETIME 86400 /* 1 day */
#define TEMPADDR_REGEN_ADVANCE 5 /* sec */
#define MAX_TEMP_DESYNC_FACTOR 600 /* 10 min */
#define ND_COMPUTE_RTIME(x) \
(((MIN_RANDOM_FACTOR * (x >> 10)) + (arc4random() & \
((MAX_RANDOM_FACTOR - MIN_RANDOM_FACTOR) * (x >> 10)))) /1000)
TAILQ_HEAD(nd_drhead, nd_defrouter);
struct nd_defrouter {
TAILQ_ENTRY(nd_defrouter) dr_entry;
struct in6_addr rtaddr;
u_char flags; /* flags on RA message */
u_short rtlifetime;
u_long expire;
struct ifnet *ifp;
int installed; /* is installed into kernel routing table */
};
struct nd_prefixctl {
struct ifnet *ndpr_ifp;
/* prefix */
struct sockaddr_in6 ndpr_prefix;
u_char ndpr_plen;
u_int32_t ndpr_vltime; /* advertised valid lifetime */
u_int32_t ndpr_pltime; /* advertised preferred lifetime */
struct prf_ra ndpr_flags;
};
struct nd_prefix {
struct ifnet *ndpr_ifp;
LIST_ENTRY(nd_prefix) ndpr_entry;
struct sockaddr_in6 ndpr_prefix; /* prefix */
struct in6_addr ndpr_mask; /* netmask derived from the prefix */
u_int32_t ndpr_vltime; /* advertised valid lifetime */
u_int32_t ndpr_pltime; /* advertised preferred lifetime */
time_t ndpr_expire; /* expiration time of the prefix */
time_t ndpr_preferred; /* preferred time of the prefix */
time_t ndpr_lastupdate; /* reception time of last advertisement */
struct prf_ra ndpr_flags;
u_int32_t ndpr_stateflags; /* actual state flags */
/* list of routers that advertise the prefix: */
LIST_HEAD(pr_rtrhead, nd_pfxrouter) ndpr_advrtrs;
u_char ndpr_plen;
int ndpr_refcnt; /* reference couter from addresses */
};
#define ndpr_next ndpr_entry.le_next
#define ndpr_raf ndpr_flags
#define ndpr_raf_onlink ndpr_flags.onlink
#define ndpr_raf_auto ndpr_flags.autonomous
#define ndpr_raf_router ndpr_flags.router
/*
* Message format for use in obtaining information about prefixes
* from inet6 sysctl function
*/
struct inet6_ndpr_msghdr {
u_short inpm_msglen; /* to skip over non-understood messages */
u_char inpm_version; /* future binary compatibility */
u_char inpm_type; /* message type */
struct in6_addr inpm_prefix;
u_long prm_vltim;
u_long prm_pltime;
u_long prm_expire;
u_long prm_preferred;
struct in6_prflags prm_flags;
u_short prm_index; /* index for associated ifp */
u_char prm_plen; /* length of prefix in bits */
};
#define prm_raf_onlink prm_flags.prf_ra.onlink
#define prm_raf_auto prm_flags.prf_ra.autonomous
#define prm_statef_onlink prm_flags.prf_state.onlink
#define prm_rrf_decrvalid prm_flags.prf_rr.decrvalid
#define prm_rrf_decrprefd prm_flags.prf_rr.decrprefd
struct nd_pfxrouter {
LIST_ENTRY(nd_pfxrouter) pfr_entry;
#define pfr_next pfr_entry.le_next
struct nd_defrouter *router;
};
LIST_HEAD(nd_prhead, nd_prefix);
/* nd6.c */
extern int nd6_prune;
extern int nd6_delay;
extern int nd6_umaxtries;
extern int nd6_mmaxtries;
extern int nd6_useloopback;
extern int nd6_maxnudhint;
extern int nd6_gctimer;
extern struct nd_drhead nd_defrouter;
extern struct nd_prhead nd_prefix;
extern int nd6_debug;
extern int nd6_onlink_ns_rfc4861;
#define nd6log(x) do { if (V_nd6_debug) log x; } while (/*CONSTCOND*/ 0)
extern struct callout nd6_timer_ch;
/* nd6_rtr.c */
extern int nd6_defifindex;
extern int ip6_desync_factor; /* seconds */
extern u_int32_t ip6_temp_preferred_lifetime; /* seconds */
extern u_int32_t ip6_temp_valid_lifetime; /* seconds */
extern int ip6_temp_regen_advance; /* seconds */
union nd_opts {
struct nd_opt_hdr *nd_opt_array[8]; /* max = target address list */
struct {
struct nd_opt_hdr *zero;
struct nd_opt_hdr *src_lladdr;
struct nd_opt_hdr *tgt_lladdr;
struct nd_opt_prefix_info *pi_beg; /* multiple opts, start */
struct nd_opt_rd_hdr *rh;
struct nd_opt_mtu *mtu;
struct nd_opt_hdr *search; /* multiple opts */
struct nd_opt_hdr *last; /* multiple opts */
int done;
struct nd_opt_prefix_info *pi_end;/* multiple opts, end */
} nd_opt_each;
};
#define nd_opts_src_lladdr nd_opt_each.src_lladdr
#define nd_opts_tgt_lladdr nd_opt_each.tgt_lladdr
#define nd_opts_pi nd_opt_each.pi_beg
#define nd_opts_pi_end nd_opt_each.pi_end
#define nd_opts_rh nd_opt_each.rh
#define nd_opts_mtu nd_opt_each.mtu
#define nd_opts_search nd_opt_each.search
#define nd_opts_last nd_opt_each.last
#define nd_opts_done nd_opt_each.done
/* XXX: need nd6_var.h?? */
/* nd6.c */
void nd6_init __P((void));
struct nd_ifinfo *nd6_ifattach __P((struct ifnet *));
void nd6_ifdetach __P((struct nd_ifinfo *));
int nd6_is_addr_neighbor __P((struct sockaddr_in6 *, struct ifnet *));
void nd6_option_init __P((void *, int, union nd_opts *));
struct nd_opt_hdr *nd6_option __P((union nd_opts *));
int nd6_options __P((union nd_opts *));
struct llentry *nd6_lookup __P((struct in6_addr *, int, struct ifnet *));
void nd6_setmtu __P((struct ifnet *));
void nd6_llinfo_settimer __P((struct llentry *, long));
void nd6_timer __P((void *));
void nd6_purge __P((struct ifnet *));
void nd6_nud_hint __P((struct rtentry *, struct in6_addr *, int));
int nd6_resolve __P((struct ifnet *, struct rtentry *, struct mbuf *,
struct sockaddr *, u_char *));
int nd6_ioctl __P((u_long, caddr_t, struct ifnet *));
struct llentry *nd6_cache_lladdr __P((struct ifnet *, struct in6_addr *,
char *, int, int, int));
int nd6_output __P((struct ifnet *, struct ifnet *, struct mbuf *,
struct sockaddr_in6 *, struct rtentry *));
int nd6_need_cache __P((struct ifnet *));
int nd6_storelladdr __P((struct ifnet *, struct rtentry *, struct mbuf *,
struct sockaddr *, u_char *, struct llentry **));
/* nd6_nbr.c */
void nd6_na_input __P((struct mbuf *, int, int));
void nd6_na_output __P((struct ifnet *, const struct in6_addr *,
const struct in6_addr *, u_long, int, struct sockaddr *));
void nd6_ns_input __P((struct mbuf *, int, int));
void nd6_ns_output __P((struct ifnet *, const struct in6_addr *,
const struct in6_addr *, struct llentry *, int));
caddr_t nd6_ifptomac __P((struct ifnet *));
void nd6_dad_start __P((struct ifaddr *, int));
void nd6_dad_stop __P((struct ifaddr *));
void nd6_dad_duplicated __P((struct ifaddr *));
/* nd6_rtr.c */
void nd6_rs_input __P((struct mbuf *, int, int));
void nd6_ra_input __P((struct mbuf *, int, int));
void prelist_del __P((struct nd_prefix *));
void defrouter_addreq __P((struct nd_defrouter *));
void defrouter_reset __P((void));
void defrouter_select __P((void));
void defrtrlist_del __P((struct nd_defrouter *));
void prelist_remove __P((struct nd_prefix *));
int nd6_prelist_add __P((struct nd_prefixctl *, struct nd_defrouter *,
struct nd_prefix **));
int nd6_prefix_onlink __P((struct nd_prefix *));
int nd6_prefix_offlink __P((struct nd_prefix *));
void pfxlist_onlink_check __P((void));
struct nd_defrouter *defrouter_lookup __P((struct in6_addr *, struct ifnet *));
struct nd_prefix *nd6_prefix_lookup __P((struct nd_prefixctl *));
void rt6_flush __P((struct in6_addr *, struct ifnet *));
int nd6_setdefaultiface __P((int));
int in6_tmpifadd __P((const struct in6_ifaddr *, int, int));
#endif /* _KERNEL */
#endif /* _NETINET6_ND6_H_ */
Index: projects/arpv2_merge_1/sys/netinet6/nd6_nbr.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet6/nd6_nbr.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet6/nd6_nbr.c (revision 185839)
@@ -1,1516 +1,1511 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $KAME: nd6_nbr.c,v 1.86 2002/01/21 02:33:04 jinmei Exp $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_ipsec.h"
#include "opt_carp.h"
#include "opt_mpath.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/errno.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <sys/callout.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_dl.h>
#include <net/if_var.h>
#include <net/route.h>
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <net/if_llatbl.h>
#define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le))
#include <netinet6/in6_var.h>
#include <netinet6/in6_ifattach.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/scope6_var.h>
#include <netinet6/nd6.h>
#include <netinet/icmp6.h>
#include <netinet6/vinet6.h>
#ifdef DEV_CARP
#include <netinet/ip_carp.h>
#endif
#define SDL(s) ((struct sockaddr_dl *)s)
struct dadq;
static struct dadq *nd6_dad_find(struct ifaddr *);
static void nd6_dad_starttimer(struct dadq *, int);
static void nd6_dad_stoptimer(struct dadq *);
static void nd6_dad_timer(struct ifaddr *);
static void nd6_dad_ns_output(struct dadq *, struct ifaddr *);
static void nd6_dad_ns_input(struct ifaddr *);
static void nd6_dad_na_input(struct ifaddr *);
#ifdef VIMAGE_GLOBALS
int dad_ignore_ns;
int dad_maxtry;
#endif
/*
* Input a Neighbor Solicitation Message.
*
* Based on RFC 2461
* Based on RFC 2462 (duplicate address detection)
*/
void
nd6_ns_input(struct mbuf *m, int off, int icmp6len)
{
INIT_VNET_INET6(curvnet);
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct nd_neighbor_solicit *nd_ns;
struct in6_addr saddr6 = ip6->ip6_src;
struct in6_addr daddr6 = ip6->ip6_dst;
struct in6_addr taddr6;
struct in6_addr myaddr6;
char *lladdr = NULL;
struct ifaddr *ifa = NULL;
int lladdrlen = 0;
int anycast = 0, proxy = 0, tentative = 0;
- int tlladdr;
+ int tlladdr, error;
union nd_opts ndopts;
struct sockaddr_dl *proxydl = NULL;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, icmp6len,);
nd_ns = (struct nd_neighbor_solicit *)((caddr_t)ip6 + off);
#else
IP6_EXTHDR_GET(nd_ns, struct nd_neighbor_solicit *, m, off, icmp6len);
if (nd_ns == NULL) {
V_icmp6stat.icp6s_tooshort++;
return;
}
#endif
ip6 = mtod(m, struct ip6_hdr *); /* adjust pointer for safety */
taddr6 = nd_ns->nd_ns_target;
if (in6_setscope(&taddr6, ifp, NULL) != 0)
goto bad;
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
"nd6_ns_input: invalid hlim (%d) from %s to %s on %s\n",
ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
goto bad;
}
if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
/* dst has to be a solicited node multicast address. */
if (daddr6.s6_addr16[0] == IPV6_ADDR_INT16_MLL &&
/* don't check ifindex portion */
daddr6.s6_addr32[1] == 0 &&
daddr6.s6_addr32[2] == IPV6_ADDR_INT32_ONE &&
daddr6.s6_addr8[12] == 0xff) {
; /* good */
} else {
nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
"(wrong ip6 dst)\n"));
goto bad;
}
} else if (!V_nd6_onlink_ns_rfc4861) {
struct sockaddr_in6 src_sa6;
/*
* According to recent IETF discussions, it is not a good idea
* to accept a NS from an address which would not be deemed
* to be a neighbor otherwise. This point is expected to be
* clarified in future revisions of the specification.
*/
bzero(&src_sa6, sizeof(src_sa6));
src_sa6.sin6_family = AF_INET6;
src_sa6.sin6_len = sizeof(src_sa6);
src_sa6.sin6_addr = saddr6;
- if (!nd6_is_addr_neighbor(&src_sa6, ifp)) {
+ error = nd6_is_addr_neighbor(&src_sa6, ifp);
+ if (error) {
nd6log((LOG_INFO, "nd6_ns_input: "
"NS packet from non-neighbor\n"));
goto bad;
}
}
if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
nd6log((LOG_INFO, "nd6_ns_input: bad NS target (multicast)\n"));
goto bad;
}
icmp6len -= sizeof(*nd_ns);
nd6_option_init(nd_ns + 1, icmp6len, &ndopts);
if (nd6_options(&ndopts) < 0) {
nd6log((LOG_INFO,
"nd6_ns_input: invalid ND option, ignored\n"));
/* nd6_options have incremented stats */
goto freeit;
}
if (ndopts.nd_opts_src_lladdr) {
lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
}
if (IN6_IS_ADDR_UNSPECIFIED(&ip6->ip6_src) && lladdr) {
nd6log((LOG_INFO, "nd6_ns_input: bad DAD packet "
"(link-layer address option)\n"));
goto bad;
}
/*
* Attaching target link-layer address to the NA?
* (RFC 2461 7.2.4)
*
* NS IP dst is unicast/anycast MUST NOT add
* NS IP dst is solicited-node multicast MUST add
*
* In implementation, we add target link-layer address by default.
* We do not add one in MUST NOT cases.
*/
if (!IN6_IS_ADDR_MULTICAST(&daddr6))
tlladdr = 0;
else
tlladdr = 1;
/*
* Target address (taddr6) must be either:
* (1) Valid unicast/anycast address for my receiving interface,
* (2) Unicast address for which I'm offering proxy service, or
* (3) "tentative" address on which DAD is being performed.
*/
/* (1) and (3) check. */
#ifdef DEV_CARP
if (ifp->if_carp)
ifa = carp_iamatch6(ifp->if_carp, &taddr6);
if (ifa == NULL)
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
#else
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
#endif
/* (2) check. */
if (ifa == NULL) {
struct rtentry *rt;
struct sockaddr_in6 tsin6;
int need_proxy;
#ifdef RADIX_MPATH
struct route_in6 ro;
#endif
bzero(&tsin6, sizeof tsin6);
tsin6.sin6_len = sizeof(struct sockaddr_in6);
tsin6.sin6_family = AF_INET6;
tsin6.sin6_addr = taddr6;
#ifdef RADIX_MPATH
bzero(&ro, sizeof(ro));
ro.ro_dst = tsin6;
rtalloc_mpath((struct route *)&ro, RTF_ANNOUNCE);
rt = ro.ro_rt;
#else
rt = rtalloc1((struct sockaddr *)&tsin6, 0, 0);
#endif
need_proxy = (rt && (rt->rt_flags & RTF_ANNOUNCE) != 0 &&
rt->rt_gateway->sa_family == AF_LINK);
if (rt)
rtfree(rt);
if (need_proxy) {
/*
* proxy NDP for single entry
*/
ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
IN6_IFF_NOTREADY|IN6_IFF_ANYCAST);
if (ifa) {
proxy = 1;
proxydl = SDL(rt->rt_gateway);
}
}
}
if (ifa == NULL) {
/*
* We've got an NS packet, and we don't have that adddress
* assigned for us. We MUST silently ignore it.
* See RFC2461 7.2.3.
*/
goto freeit;
}
myaddr6 = *IFA_IN6(ifa);
anycast = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST;
tentative = ((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE;
if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DUPLICATED)
goto freeit;
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
nd6log((LOG_INFO, "nd6_ns_input: lladdrlen mismatch for %s "
"(if %d, NS packet %d)\n",
ip6_sprintf(ip6bufs, &taddr6),
ifp->if_addrlen, lladdrlen - 2));
goto bad;
}
if (IN6_ARE_ADDR_EQUAL(&myaddr6, &saddr6)) {
nd6log((LOG_INFO, "nd6_ns_input: duplicate IP6 address %s\n",
ip6_sprintf(ip6bufs, &saddr6)));
goto freeit;
}
/*
* We have neighbor solicitation packet, with target address equals to
* one of my tentative address.
*
* src addr how to process?
* --- ---
* multicast of course, invalid (rejected in ip6_input)
* unicast somebody is doing address resolution -> ignore
* unspec dup address detection
*
* The processing is defined in RFC 2462.
*/
if (tentative) {
/*
* If source address is unspecified address, it is for
* duplicate address detection.
*
* If not, the packet is for addess resolution;
* silently ignore it.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
nd6_dad_ns_input(ifa);
goto freeit;
}
/*
* If the source address is unspecified address, entries must not
* be created or updated.
* It looks that sender is performing DAD. Output NA toward
* all-node multicast address, to tell the sender that I'm using
* the address.
* S bit ("solicited") must be zero.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&saddr6)) {
struct in6_addr in6_all;
in6_all = in6addr_linklocal_allnodes;
if (in6_setscope(&in6_all, ifp, NULL) != 0)
goto bad;
nd6_na_output(ifp, &in6_all, &taddr6,
((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
(V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0),
tlladdr, (struct sockaddr *)proxydl);
goto freeit;
}
- IF_AFDATA_LOCK(ifp);
nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen,
ND_NEIGHBOR_SOLICIT, 0);
- IF_AFDATA_UNLOCK(ifp);
nd6_na_output(ifp, &saddr6, &taddr6,
((anycast || proxy || !tlladdr) ? 0 : ND_NA_FLAG_OVERRIDE) |
(V_ip6_forwarding ? ND_NA_FLAG_ROUTER : 0) | ND_NA_FLAG_SOLICITED,
tlladdr, (struct sockaddr *)proxydl);
freeit:
m_freem(m);
return;
bad:
nd6log((LOG_ERR, "nd6_ns_input: src=%s\n",
ip6_sprintf(ip6bufs, &saddr6)));
nd6log((LOG_ERR, "nd6_ns_input: dst=%s\n",
ip6_sprintf(ip6bufs, &daddr6)));
nd6log((LOG_ERR, "nd6_ns_input: tgt=%s\n",
ip6_sprintf(ip6bufs, &taddr6)));
V_icmp6stat.icp6s_badns++;
m_freem(m);
}
/*
* Output a Neighbor Solicitation Message. Caller specifies:
* - ICMP6 header source IP6 address
* - ND6 header target IP6 address
* - ND6 header source datalink address
*
* Based on RFC 2461
* Based on RFC 2462 (duplicate address detection)
*
* ln - for source address determination
* dad - duplicate address detection
*/
void
nd6_ns_output(struct ifnet *ifp, const struct in6_addr *daddr6,
const struct in6_addr *taddr6, struct llentry *ln, int dad)
{
INIT_VNET_INET6(ifp->if_vnet);
struct mbuf *m;
struct ip6_hdr *ip6;
struct nd_neighbor_solicit *nd_ns;
struct in6_addr *src, src_in;
struct ip6_moptions im6o;
int icmp6len;
int maxlen;
caddr_t mac;
struct route_in6 ro;
bzero(&ro, sizeof(ro));
if (IN6_IS_ADDR_MULTICAST(taddr6))
return;
/* estimate the size of message */
maxlen = sizeof(*ip6) + sizeof(*nd_ns);
maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
if (max_linkhdr + maxlen >= MCLBYTES) {
#ifdef DIAGNOSTIC
printf("nd6_ns_output: max_linkhdr + maxlen >= MCLBYTES "
"(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
#endif
return;
}
MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m && max_linkhdr + maxlen >= MHLEN) {
MCLGET(m, M_DONTWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
m = NULL;
}
}
if (m == NULL)
return;
m->m_pkthdr.rcvif = NULL;
if (daddr6 == NULL || IN6_IS_ADDR_MULTICAST(daddr6)) {
m->m_flags |= M_MCAST;
im6o.im6o_multicast_ifp = ifp;
im6o.im6o_multicast_hlim = 255;
im6o.im6o_multicast_loop = 0;
}
icmp6len = sizeof(*nd_ns);
m->m_pkthdr.len = m->m_len = sizeof(*ip6) + icmp6len;
m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */
/* fill neighbor solicitation packet */
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_flow = 0;
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
ip6->ip6_vfc |= IPV6_VERSION;
/* ip6->ip6_plen will be set later */
ip6->ip6_nxt = IPPROTO_ICMPV6;
ip6->ip6_hlim = 255;
if (daddr6)
ip6->ip6_dst = *daddr6;
else {
ip6->ip6_dst.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
ip6->ip6_dst.s6_addr16[1] = 0;
ip6->ip6_dst.s6_addr32[1] = 0;
ip6->ip6_dst.s6_addr32[2] = IPV6_ADDR_INT32_ONE;
ip6->ip6_dst.s6_addr32[3] = taddr6->s6_addr32[3];
ip6->ip6_dst.s6_addr8[12] = 0xff;
if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0)
goto bad;
}
if (!dad) {
/*
* RFC2461 7.2.2:
* "If the source address of the packet prompting the
* solicitation is the same as one of the addresses assigned
* to the outgoing interface, that address SHOULD be placed
* in the IP Source Address of the outgoing solicitation.
* Otherwise, any one of the addresses assigned to the
* interface should be used."
*
* We use the source address for the prompting packet
* (saddr6), if:
* - saddr6 is given from the caller (by giving "ln"), and
* - saddr6 belongs to the outgoing interface.
* Otherwise, we perform the source address selection as usual.
*/
struct ip6_hdr *hip6; /* hold ip6 */
struct in6_addr *hsrc = NULL;
if (ln && ln->la_hold) {
/*
* assuming every packet in la_hold has the same IP
* header
*/
hip6 = mtod(ln->la_hold, struct ip6_hdr *);
/* XXX pullup? */
if (sizeof(*hip6) < ln->la_hold->m_len)
hsrc = &hip6->ip6_src;
else
hsrc = NULL;
}
if (hsrc && in6ifa_ifpwithaddr(ifp, hsrc))
src = hsrc;
else {
int error;
struct sockaddr_in6 dst_sa;
bzero(&dst_sa, sizeof(dst_sa));
dst_sa.sin6_family = AF_INET6;
dst_sa.sin6_len = sizeof(dst_sa);
dst_sa.sin6_addr = ip6->ip6_dst;
src = in6_selectsrc(&dst_sa, NULL,
NULL, &ro, NULL, NULL, &error);
if (src == NULL) {
char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_DEBUG,
"nd6_ns_output: source can't be "
"determined: dst=%s, error=%d\n",
ip6_sprintf(ip6buf, &dst_sa.sin6_addr),
error));
goto bad;
}
}
} else {
/*
* Source address for DAD packet must always be IPv6
* unspecified address. (0::0)
* We actually don't have to 0-clear the address (we did it
* above), but we do so here explicitly to make the intention
* clearer.
*/
bzero(&src_in, sizeof(src_in));
src = &src_in;
}
ip6->ip6_src = *src;
nd_ns = (struct nd_neighbor_solicit *)(ip6 + 1);
nd_ns->nd_ns_type = ND_NEIGHBOR_SOLICIT;
nd_ns->nd_ns_code = 0;
nd_ns->nd_ns_reserved = 0;
nd_ns->nd_ns_target = *taddr6;
in6_clearscope(&nd_ns->nd_ns_target); /* XXX */
/*
* Add source link-layer address option.
*
* spec implementation
* --- ---
* DAD packet MUST NOT do not add the option
* there's no link layer address:
* impossible do not add the option
* there's link layer address:
* Multicast NS MUST add one add the option
* Unicast NS SHOULD add one add the option
*/
if (!dad && (mac = nd6_ifptomac(ifp))) {
int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_ns + 1);
/* 8 byte alignments... */
optlen = (optlen + 7) & ~7;
m->m_pkthdr.len += optlen;
m->m_len += optlen;
icmp6len += optlen;
bzero((caddr_t)nd_opt, optlen);
nd_opt->nd_opt_type = ND_OPT_SOURCE_LINKADDR;
nd_opt->nd_opt_len = optlen >> 3;
bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
}
ip6->ip6_plen = htons((u_short)icmp6len);
nd_ns->nd_ns_cksum = 0;
nd_ns->nd_ns_cksum =
in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), icmp6len);
ip6_output(m, NULL, &ro, dad ? IPV6_UNSPECSRC : 0, &im6o, NULL, NULL);
icmp6_ifstat_inc(ifp, ifs6_out_msg);
icmp6_ifstat_inc(ifp, ifs6_out_neighborsolicit);
V_icmp6stat.icp6s_outhist[ND_NEIGHBOR_SOLICIT]++;
if (ro.ro_rt) { /* we don't cache this route. */
RTFREE(ro.ro_rt);
}
return;
bad:
if (ro.ro_rt) {
RTFREE(ro.ro_rt);
}
m_freem(m);
return;
}
/*
* Neighbor advertisement input handling.
*
* Based on RFC 2461
* Based on RFC 2462 (duplicate address detection)
*
* the following items are not implemented yet:
* - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
* - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
*/
void
nd6_na_input(struct mbuf *m, int off, int icmp6len)
{
INIT_VNET_INET6(curvnet);
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct nd_neighbor_advert *nd_na;
struct in6_addr daddr6 = ip6->ip6_dst;
struct in6_addr taddr6;
int flags;
int is_router;
int is_solicited;
int is_override;
char *lladdr = NULL;
int lladdrlen = 0;
struct ifaddr *ifa;
struct llentry *ln;
union nd_opts ndopts;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
"nd6_na_input: invalid hlim (%d) from %s to %s on %s\n",
ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
goto bad;
}
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, icmp6len,);
nd_na = (struct nd_neighbor_advert *)((caddr_t)ip6 + off);
#else
IP6_EXTHDR_GET(nd_na, struct nd_neighbor_advert *, m, off, icmp6len);
if (nd_na == NULL) {
V_icmp6stat.icp6s_tooshort++;
return;
}
#endif
flags = nd_na->nd_na_flags_reserved;
is_router = ((flags & ND_NA_FLAG_ROUTER) != 0);
is_solicited = ((flags & ND_NA_FLAG_SOLICITED) != 0);
is_override = ((flags & ND_NA_FLAG_OVERRIDE) != 0);
taddr6 = nd_na->nd_na_target;
if (in6_setscope(&taddr6, ifp, NULL))
goto bad; /* XXX: impossible */
if (IN6_IS_ADDR_MULTICAST(&taddr6)) {
nd6log((LOG_ERR,
"nd6_na_input: invalid target address %s\n",
ip6_sprintf(ip6bufs, &taddr6)));
goto bad;
}
if (IN6_IS_ADDR_MULTICAST(&daddr6))
if (is_solicited) {
nd6log((LOG_ERR,
"nd6_na_input: a solicited adv is multicasted\n"));
goto bad;
}
icmp6len -= sizeof(*nd_na);
nd6_option_init(nd_na + 1, icmp6len, &ndopts);
if (nd6_options(&ndopts) < 0) {
nd6log((LOG_INFO,
"nd6_na_input: invalid ND option, ignored\n"));
/* nd6_options have incremented stats */
goto freeit;
}
if (ndopts.nd_opts_tgt_lladdr) {
lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1);
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
}
ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp, &taddr6);
/*
* Target address matches one of my interface address.
*
* If my address is tentative, this means that there's somebody
* already using the same address as mine. This indicates DAD failure.
* This is defined in RFC 2462.
*
* Otherwise, process as defined in RFC 2461.
*/
if (ifa
&& (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_TENTATIVE)) {
nd6_dad_na_input(ifa);
goto freeit;
}
/* Just for safety, maybe unnecessary. */
if (ifa) {
log(LOG_ERR,
"nd6_na_input: duplicate IP6 address %s\n",
ip6_sprintf(ip6bufs, &taddr6));
goto freeit;
}
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
nd6log((LOG_INFO, "nd6_na_input: lladdrlen mismatch for %s "
"(if %d, NA packet %d)\n", ip6_sprintf(ip6bufs, &taddr6),
ifp->if_addrlen, lladdrlen - 2));
goto bad;
}
/*
* If no neighbor cache entry is found, NA SHOULD silently be
* discarded.
*/
IF_AFDATA_LOCK(ifp);
ln = nd6_lookup(&taddr6, 0, ifp);
+ IF_AFDATA_UNLOCK(ifp);
if (ln == NULL) {
- IF_AFDATA_UNLOCK(ifp);
goto freeit;
}
if (ln->ln_state == ND6_LLINFO_INCOMPLETE) {
/*
* If the link-layer has address, and no lladdr option came,
* discard the packet.
*/
if (ifp->if_addrlen && lladdr == NULL) {
- IF_AFDATA_UNLOCK(ifp);
goto freeit;
}
/*
* Record link-layer address, and update the state.
*/
bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
ln->la_flags |= LLE_VALID;
if (is_solicited) {
ln->ln_state = ND6_LLINFO_REACHABLE;
ln->ln_byhint = 0;
if (!ND6_LLINFO_PERMANENT(ln)) {
nd6_llinfo_settimer(ln,
(long)ND_IFINFO(ln->lle_tbl->llt_ifp)->reachable * hz);
}
} else {
ln->ln_state = ND6_LLINFO_STALE;
nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
}
if ((ln->ln_router = is_router) != 0) {
/*
* This means a router's state has changed from
* non-reachable to probably reachable, and might
* affect the status of associated prefixes..
*/
pfxlist_onlink_check();
}
} else {
int llchange;
/*
* Check if the link-layer address has changed or not.
*/
if (lladdr == NULL)
llchange = 0;
else {
if (ln->la_flags & LLE_VALID) {
if (bcmp(lladdr, &ln->ll_addr, ifp->if_addrlen))
llchange = 1;
else
llchange = 0;
} else
llchange = 1;
}
/*
* This is VERY complex. Look at it with care.
*
* override solicit lladdr llchange action
* (L: record lladdr)
*
* 0 0 n -- (2c)
* 0 0 y n (2b) L
* 0 0 y y (1) REACHABLE->STALE
* 0 1 n -- (2c) *->REACHABLE
* 0 1 y n (2b) L *->REACHABLE
* 0 1 y y (1) REACHABLE->STALE
* 1 0 n -- (2a)
* 1 0 y n (2a) L
* 1 0 y y (2a) L *->STALE
* 1 1 n -- (2a) *->REACHABLE
* 1 1 y n (2a) L *->REACHABLE
* 1 1 y y (2a) L *->REACHABLE
*/
if (!is_override && (lladdr != NULL && llchange)) { /* (1) */
/*
* If state is REACHABLE, make it STALE.
* no other updates should be done.
*/
if (ln->ln_state == ND6_LLINFO_REACHABLE) {
ln->ln_state = ND6_LLINFO_STALE;
nd6_llinfo_settimer(ln, (long)V_nd6_gctimer * hz);
}
- IF_AFDATA_UNLOCK(ifp);
goto freeit;
} else if (is_override /* (2a) */
|| (!is_override && (lladdr != NULL && !llchange)) /* (2b) */
|| lladdr == NULL) { /* (2c) */
/*
* Update link-local address, if any.
*/
if (lladdr != NULL) {
bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen);
ln->la_flags |= LLE_VALID;
}
/*
* If solicited, make the state REACHABLE.
* If not solicited and the link-layer address was
* changed, make it STALE.
*/
if (is_solicited) {
ln->ln_state = ND6_LLINFO_REACHABLE;
ln->ln_byhint = 0;
if (!ND6_LLINFO_PERMANENT(ln)) {
nd6_llinfo_settimer(ln,
(long)ND_IFINFO(ifp)->reachable * hz);
}
} else {
if (lladdr != NULL && llchange) {
ln->ln_state = ND6_LLINFO_STALE;
nd6_llinfo_settimer(ln,
(long)V_nd6_gctimer * hz);
}
}
}
if (ln->ln_router && !is_router) {
/*
* The peer dropped the router flag.
* Remove the sender from the Default Router List and
* update the Destination Cache entries.
*/
struct nd_defrouter *dr;
struct in6_addr *in6;
/* int s;*/
in6 = &L3_ADDR_SIN6(ln)->sin6_addr;
/*
* Lock to protect the default router list.
* XXX: this might be unnecessary, since this function
* is only called under the network software interrupt
* context. However, we keep it just for safety.
*/
/* Qing - removing
s = splnet();
*/
dr = defrouter_lookup(in6, ln->lle_tbl->llt_ifp);
if (dr)
defrtrlist_del(dr);
else if (!V_ip6_forwarding) {
/*
* Even if the neighbor is not in the default
* router list, the neighbor may be used
* as a next hop for some destinations
* (e.g. redirect case). So we must
* call rt6_flush explicitly.
*/
rt6_flush(&ip6->ip6_src, ifp);
}
/* Qing - removing
splx(s);
*/
}
ln->ln_router = is_router;
}
/* Qing - do we care ?
rt->rt_flags &= ~RTF_REJECT;
*/
ln->la_asked = 0;
if (ln->la_hold) {
struct mbuf *m_hold, *m_hold_next;
/*
* reset the la_hold in advance, to explicitly
* prevent a la_hold lookup in nd6_output()
* (wouldn't happen, though...)
*/
for (m_hold = ln->la_hold, ln->la_hold = NULL;
m_hold; m_hold = m_hold_next) {
m_hold_next = m_hold->m_nextpkt;
m_hold->m_nextpkt = NULL;
/*
* we assume ifp is not a loopback here, so just set
* the 2nd argument as the 1st one.
*/
nd6_output(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL);
}
}
- IF_AFDATA_UNLOCK(ifp);
-
freeit:
m_freem(m);
return;
bad:
V_icmp6stat.icp6s_badna++;
m_freem(m);
}
/*
* Neighbor advertisement output handling.
*
* Based on RFC 2461
*
* the following items are not implemented yet:
* - proxy advertisement delay rule (RFC2461 7.2.8, last paragraph, SHOULD)
* - anycast advertisement delay rule (RFC2461 7.2.7, SHOULD)
*
* tlladdr - 1 if include target link-layer address
* sdl0 - sockaddr_dl (= proxy NA) or NULL
*/
void
nd6_na_output(struct ifnet *ifp, const struct in6_addr *daddr6_0,
const struct in6_addr *taddr6, u_long flags, int tlladdr,
struct sockaddr *sdl0)
{
INIT_VNET_INET6(ifp->if_vnet);
struct mbuf *m;
struct ip6_hdr *ip6;
struct nd_neighbor_advert *nd_na;
struct ip6_moptions im6o;
struct in6_addr *src, daddr6;
struct sockaddr_in6 dst_sa;
int icmp6len, maxlen, error;
caddr_t mac = NULL;
struct route_in6 ro;
bzero(&ro, sizeof(ro));
daddr6 = *daddr6_0; /* make a local copy for modification */
/* estimate the size of message */
maxlen = sizeof(*ip6) + sizeof(*nd_na);
maxlen += (sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7;
if (max_linkhdr + maxlen >= MCLBYTES) {
#ifdef DIAGNOSTIC
printf("nd6_na_output: max_linkhdr + maxlen >= MCLBYTES "
"(%d + %d > %d)\n", max_linkhdr, maxlen, MCLBYTES);
#endif
return;
}
MGETHDR(m, M_DONTWAIT, MT_DATA);
if (m && max_linkhdr + maxlen >= MHLEN) {
MCLGET(m, M_DONTWAIT);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
m = NULL;
}
}
if (m == NULL)
return;
m->m_pkthdr.rcvif = NULL;
if (IN6_IS_ADDR_MULTICAST(&daddr6)) {
m->m_flags |= M_MCAST;
im6o.im6o_multicast_ifp = ifp;
im6o.im6o_multicast_hlim = 255;
im6o.im6o_multicast_loop = 0;
}
icmp6len = sizeof(*nd_na);
m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + icmp6len;
m->m_data += max_linkhdr; /* or MH_ALIGN() equivalent? */
/* fill neighbor advertisement packet */
ip6 = mtod(m, struct ip6_hdr *);
ip6->ip6_flow = 0;
ip6->ip6_vfc &= ~IPV6_VERSION_MASK;
ip6->ip6_vfc |= IPV6_VERSION;
ip6->ip6_nxt = IPPROTO_ICMPV6;
ip6->ip6_hlim = 255;
if (IN6_IS_ADDR_UNSPECIFIED(&daddr6)) {
/* reply to DAD */
daddr6.s6_addr16[0] = IPV6_ADDR_INT16_MLL;
daddr6.s6_addr16[1] = 0;
daddr6.s6_addr32[1] = 0;
daddr6.s6_addr32[2] = 0;
daddr6.s6_addr32[3] = IPV6_ADDR_INT32_ONE;
if (in6_setscope(&daddr6, ifp, NULL))
goto bad;
flags &= ~ND_NA_FLAG_SOLICITED;
}
ip6->ip6_dst = daddr6;
bzero(&dst_sa, sizeof(struct sockaddr_in6));
dst_sa.sin6_family = AF_INET6;
dst_sa.sin6_len = sizeof(struct sockaddr_in6);
dst_sa.sin6_addr = daddr6;
/*
* Select a source whose scope is the same as that of the dest.
*/
bcopy(&dst_sa, &ro.ro_dst, sizeof(dst_sa));
src = in6_selectsrc(&dst_sa, NULL, NULL, &ro, NULL, NULL, &error);
if (src == NULL) {
char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_DEBUG, "nd6_na_output: source can't be "
"determined: dst=%s, error=%d\n",
ip6_sprintf(ip6buf, &dst_sa.sin6_addr), error));
goto bad;
}
ip6->ip6_src = *src;
nd_na = (struct nd_neighbor_advert *)(ip6 + 1);
nd_na->nd_na_type = ND_NEIGHBOR_ADVERT;
nd_na->nd_na_code = 0;
nd_na->nd_na_target = *taddr6;
in6_clearscope(&nd_na->nd_na_target); /* XXX */
/*
* "tlladdr" indicates NS's condition for adding tlladdr or not.
* see nd6_ns_input() for details.
* Basically, if NS packet is sent to unicast/anycast addr,
* target lladdr option SHOULD NOT be included.
*/
if (tlladdr) {
/*
* sdl0 != NULL indicates proxy NA. If we do proxy, use
* lladdr in sdl0. If we are not proxying (sending NA for
* my address) use lladdr configured for the interface.
*/
if (sdl0 == NULL) {
#ifdef DEV_CARP
if (ifp->if_carp)
mac = carp_macmatch6(ifp->if_carp, m, taddr6);
if (mac == NULL)
mac = nd6_ifptomac(ifp);
#else
mac = nd6_ifptomac(ifp);
#endif
} else if (sdl0->sa_family == AF_LINK) {
struct sockaddr_dl *sdl;
sdl = (struct sockaddr_dl *)sdl0;
if (sdl->sdl_alen == ifp->if_addrlen)
mac = LLADDR(sdl);
}
}
if (tlladdr && mac) {
int optlen = sizeof(struct nd_opt_hdr) + ifp->if_addrlen;
struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)(nd_na + 1);
/* roundup to 8 bytes alignment! */
optlen = (optlen + 7) & ~7;
m->m_pkthdr.len += optlen;
m->m_len += optlen;
icmp6len += optlen;
bzero((caddr_t)nd_opt, optlen);
nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR;
nd_opt->nd_opt_len = optlen >> 3;
bcopy(mac, (caddr_t)(nd_opt + 1), ifp->if_addrlen);
} else
flags &= ~ND_NA_FLAG_OVERRIDE;
ip6->ip6_plen = htons((u_short)icmp6len);
nd_na->nd_na_flags_reserved = flags;
nd_na->nd_na_cksum = 0;
nd_na->nd_na_cksum =
in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), icmp6len);
ip6_output(m, NULL, &ro, 0, &im6o, NULL, NULL);
icmp6_ifstat_inc(ifp, ifs6_out_msg);
icmp6_ifstat_inc(ifp, ifs6_out_neighboradvert);
V_icmp6stat.icp6s_outhist[ND_NEIGHBOR_ADVERT]++;
if (ro.ro_rt) { /* we don't cache this route. */
RTFREE(ro.ro_rt);
}
return;
bad:
if (ro.ro_rt) {
RTFREE(ro.ro_rt);
}
m_freem(m);
return;
}
caddr_t
nd6_ifptomac(struct ifnet *ifp)
{
switch (ifp->if_type) {
case IFT_ARCNET:
case IFT_ETHER:
case IFT_FDDI:
case IFT_IEEE1394:
#ifdef IFT_L2VLAN
case IFT_L2VLAN:
#endif
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
#ifdef IFT_CARP
case IFT_CARP:
#endif
case IFT_BRIDGE:
case IFT_ISO88025:
return IF_LLADDR(ifp);
default:
return NULL;
}
}
TAILQ_HEAD(dadq_head, dadq);
struct dadq {
TAILQ_ENTRY(dadq) dad_list;
struct ifaddr *dad_ifa;
int dad_count; /* max NS to send */
int dad_ns_tcount; /* # of trials to send NS */
int dad_ns_ocount; /* NS sent so far */
int dad_ns_icount;
int dad_na_icount;
struct callout dad_timer_ch;
};
#ifdef VIMAGE_GLOBALS
static struct dadq_head dadq;
int dad_init;
#endif
static struct dadq *
nd6_dad_find(struct ifaddr *ifa)
{
INIT_VNET_INET6(curvnet);
struct dadq *dp;
for (dp = V_dadq.tqh_first; dp; dp = dp->dad_list.tqe_next) {
if (dp->dad_ifa == ifa)
return dp;
}
return NULL;
}
static void
nd6_dad_starttimer(struct dadq *dp, int ticks)
{
callout_reset(&dp->dad_timer_ch, ticks,
(void (*)(void *))nd6_dad_timer, (void *)dp->dad_ifa);
}
static void
nd6_dad_stoptimer(struct dadq *dp)
{
callout_stop(&dp->dad_timer_ch);
}
/*
* Start Duplicate Address Detection (DAD) for specified interface address.
*/
void
nd6_dad_start(struct ifaddr *ifa, int delay)
{
INIT_VNET_INET6(curvnet);
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct dadq *dp;
char ip6buf[INET6_ADDRSTRLEN];
if (!V_dad_init) {
TAILQ_INIT(&V_dadq);
V_dad_init++;
}
/*
* If we don't need DAD, don't do it.
* There are several cases:
* - DAD is disabled (ip6_dad_count == 0)
* - the interface address is anycast
*/
if (!(ia->ia6_flags & IN6_IFF_TENTATIVE)) {
log(LOG_DEBUG,
"nd6_dad_start: called with non-tentative address "
"%s(%s)\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
return;
}
if (ia->ia6_flags & IN6_IFF_ANYCAST) {
ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
return;
}
if (!V_ip6_dad_count) {
ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
return;
}
if (ifa->ifa_ifp == NULL)
panic("nd6_dad_start: ifa->ifa_ifp == NULL");
if (!(ifa->ifa_ifp->if_flags & IFF_UP)) {
return;
}
if (nd6_dad_find(ifa) != NULL) {
/* DAD already in progress */
return;
}
dp = malloc(sizeof(*dp), M_IP6NDP, M_NOWAIT);
if (dp == NULL) {
log(LOG_ERR, "nd6_dad_start: memory allocation failed for "
"%s(%s)\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
return;
}
bzero(dp, sizeof(*dp));
callout_init(&dp->dad_timer_ch, 0);
TAILQ_INSERT_TAIL(&V_dadq, (struct dadq *)dp, dad_list);
nd6log((LOG_DEBUG, "%s: starting DAD for %s\n", if_name(ifa->ifa_ifp),
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
/*
* Send NS packet for DAD, ip6_dad_count times.
* Note that we must delay the first transmission, if this is the
* first packet to be sent from the interface after interface
* (re)initialization.
*/
dp->dad_ifa = ifa;
IFAREF(ifa); /* just for safety */
dp->dad_count = V_ip6_dad_count;
dp->dad_ns_icount = dp->dad_na_icount = 0;
dp->dad_ns_ocount = dp->dad_ns_tcount = 0;
if (delay == 0) {
nd6_dad_ns_output(dp, ifa);
nd6_dad_starttimer(dp,
(long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
} else {
nd6_dad_starttimer(dp, delay);
}
}
/*
* terminate DAD unconditionally. used for address removals.
*/
void
nd6_dad_stop(struct ifaddr *ifa)
{
INIT_VNET_INET6(curvnet);
struct dadq *dp;
if (!V_dad_init)
return;
dp = nd6_dad_find(ifa);
if (!dp) {
/* DAD wasn't started yet */
return;
}
nd6_dad_stoptimer(dp);
TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
free(dp, M_IP6NDP);
dp = NULL;
IFAFREE(ifa);
}
static void
nd6_dad_timer(struct ifaddr *ifa)
{
CURVNET_SET(dp->dad_vnet);
INIT_VNET_INET6(curvnet);
int s;
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct dadq *dp;
char ip6buf[INET6_ADDRSTRLEN];
s = splnet(); /* XXX */
/* Sanity check */
if (ia == NULL) {
log(LOG_ERR, "nd6_dad_timer: called with null parameter\n");
goto done;
}
dp = nd6_dad_find(ifa);
if (dp == NULL) {
log(LOG_ERR, "nd6_dad_timer: DAD structure not found\n");
goto done;
}
if (ia->ia6_flags & IN6_IFF_DUPLICATED) {
log(LOG_ERR, "nd6_dad_timer: called with duplicated address "
"%s(%s)\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
goto done;
}
if ((ia->ia6_flags & IN6_IFF_TENTATIVE) == 0) {
log(LOG_ERR, "nd6_dad_timer: called with non-tentative address "
"%s(%s)\n",
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
ifa->ifa_ifp ? if_name(ifa->ifa_ifp) : "???");
goto done;
}
/* timeouted with IFF_{RUNNING,UP} check */
if (dp->dad_ns_tcount > V_dad_maxtry) {
nd6log((LOG_INFO, "%s: could not run DAD, driver problem?\n",
if_name(ifa->ifa_ifp)));
TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
free(dp, M_IP6NDP);
dp = NULL;
IFAFREE(ifa);
goto done;
}
/* Need more checks? */
if (dp->dad_ns_ocount < dp->dad_count) {
/*
* We have more NS to go. Send NS packet for DAD.
*/
nd6_dad_ns_output(dp, ifa);
nd6_dad_starttimer(dp,
(long)ND_IFINFO(ifa->ifa_ifp)->retrans * hz / 1000);
} else {
/*
* We have transmitted sufficient number of DAD packets.
* See what we've got.
*/
int duplicate;
duplicate = 0;
if (dp->dad_na_icount) {
/*
* the check is in nd6_dad_na_input(),
* but just in case
*/
duplicate++;
}
if (dp->dad_ns_icount) {
/* We've seen NS, means DAD has failed. */
duplicate++;
}
if (duplicate) {
/* (*dp) will be freed in nd6_dad_duplicated() */
dp = NULL;
nd6_dad_duplicated(ifa);
} else {
/*
* We are done with DAD. No NA came, no NS came.
* No duplicate address found.
*/
ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
nd6log((LOG_DEBUG,
"%s: DAD complete for %s - no duplicates found\n",
if_name(ifa->ifa_ifp),
ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr)));
TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
free(dp, M_IP6NDP);
dp = NULL;
IFAFREE(ifa);
}
}
done:
splx(s);
CURVNET_RESTORE();
}
void
nd6_dad_duplicated(struct ifaddr *ifa)
{
INIT_VNET_INET6(curvnet);
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct ifnet *ifp;
struct dadq *dp;
char ip6buf[INET6_ADDRSTRLEN];
dp = nd6_dad_find(ifa);
if (dp == NULL) {
log(LOG_ERR, "nd6_dad_duplicated: DAD structure not found\n");
return;
}
log(LOG_ERR, "%s: DAD detected duplicate IPv6 address %s: "
"NS in/out=%d/%d, NA in=%d\n",
if_name(ifa->ifa_ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr),
dp->dad_ns_icount, dp->dad_ns_ocount, dp->dad_na_icount);
ia->ia6_flags &= ~IN6_IFF_TENTATIVE;
ia->ia6_flags |= IN6_IFF_DUPLICATED;
/* We are done with DAD, with duplicate address found. (failure) */
nd6_dad_stoptimer(dp);
ifp = ifa->ifa_ifp;
log(LOG_ERR, "%s: DAD complete for %s - duplicate found\n",
if_name(ifp), ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr));
log(LOG_ERR, "%s: manual intervention required\n",
if_name(ifp));
/*
* If the address is a link-local address formed from an interface
* identifier based on the hardware address which is supposed to be
* uniquely assigned (e.g., EUI-64 for an Ethernet interface), IP
* operation on the interface SHOULD be disabled.
* [rfc2462bis-03 Section 5.4.5]
*/
if (IN6_IS_ADDR_LINKLOCAL(&ia->ia_addr.sin6_addr)) {
struct in6_addr in6;
/*
* To avoid over-reaction, we only apply this logic when we are
* very sure that hardware addresses are supposed to be unique.
*/
switch (ifp->if_type) {
case IFT_ETHER:
case IFT_FDDI:
case IFT_ATM:
case IFT_IEEE1394:
#ifdef IFT_IEEE80211
case IFT_IEEE80211:
#endif
in6 = ia->ia_addr.sin6_addr;
if (in6_get_hw_ifid(ifp, &in6) == 0 &&
IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &in6)) {
ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED;
log(LOG_ERR, "%s: possible hardware address "
"duplication detected, disable IPv6\n",
if_name(ifp));
}
break;
}
}
TAILQ_REMOVE(&V_dadq, (struct dadq *)dp, dad_list);
free(dp, M_IP6NDP);
dp = NULL;
IFAFREE(ifa);
}
static void
nd6_dad_ns_output(struct dadq *dp, struct ifaddr *ifa)
{
struct in6_ifaddr *ia = (struct in6_ifaddr *)ifa;
struct ifnet *ifp = ifa->ifa_ifp;
dp->dad_ns_tcount++;
if ((ifp->if_flags & IFF_UP) == 0) {
return;
}
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
return;
}
dp->dad_ns_ocount++;
nd6_ns_output(ifp, NULL, &ia->ia_addr.sin6_addr, NULL, 1);
}
static void
nd6_dad_ns_input(struct ifaddr *ifa)
{
INIT_VNET_INET6(curvnet);
struct in6_ifaddr *ia;
struct ifnet *ifp;
const struct in6_addr *taddr6;
struct dadq *dp;
int duplicate;
if (ifa == NULL)
panic("ifa == NULL in nd6_dad_ns_input");
ia = (struct in6_ifaddr *)ifa;
ifp = ifa->ifa_ifp;
taddr6 = &ia->ia_addr.sin6_addr;
duplicate = 0;
dp = nd6_dad_find(ifa);
/* Quickhack - completely ignore DAD NS packets */
if (V_dad_ignore_ns) {
char ip6buf[INET6_ADDRSTRLEN];
nd6log((LOG_INFO,
"nd6_dad_ns_input: ignoring DAD NS packet for "
"address %s(%s)\n", ip6_sprintf(ip6buf, taddr6),
if_name(ifa->ifa_ifp)));
return;
}
/*
* if I'm yet to start DAD, someone else started using this address
* first. I have a duplicate and you win.
*/
if (dp == NULL || dp->dad_ns_ocount == 0)
duplicate++;
/* XXX more checks for loopback situation - see nd6_dad_timer too */
if (duplicate) {
dp = NULL; /* will be freed in nd6_dad_duplicated() */
nd6_dad_duplicated(ifa);
} else {
/*
* not sure if I got a duplicate.
* increment ns count and see what happens.
*/
if (dp)
dp->dad_ns_icount++;
}
}
static void
nd6_dad_na_input(struct ifaddr *ifa)
{
struct dadq *dp;
if (ifa == NULL)
panic("ifa == NULL in nd6_dad_na_input");
dp = nd6_dad_find(ifa);
if (dp)
dp->dad_na_icount++;
/* remove the address. */
nd6_dad_duplicated(ifa);
}
Index: projects/arpv2_merge_1/sys/netinet6/nd6_rtr.c
===================================================================
--- projects/arpv2_merge_1/sys/netinet6/nd6_rtr.c (revision 185838)
+++ projects/arpv2_merge_1/sys/netinet6/nd6_rtr.c (revision 185839)
@@ -1,2136 +1,2134 @@
/*-
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the project nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/time.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/errno.h>
#include <sys/rwlock.h>
#include <sys/syslog.h>
#include <sys/queue.h>
#include <sys/vimage.h>
#include <net/if.h>
#include <net/if_types.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/radix.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <net/if_llatbl.h>
#include <netinet6/in6_var.h>
#include <netinet6/in6_ifattach.h>
#include <netinet/ip6.h>
#include <netinet6/ip6_var.h>
#include <netinet6/nd6.h>
#include <netinet/icmp6.h>
#include <netinet6/scope6_var.h>
#include <netinet6/vinet6.h>
#define SDL(s) ((struct sockaddr_dl *)s)
static int rtpref(struct nd_defrouter *);
static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
static int prelist_update __P((struct nd_prefixctl *, struct nd_defrouter *,
struct mbuf *, int));
static struct in6_ifaddr *in6_ifadd(struct nd_prefixctl *, int);
static struct nd_pfxrouter *pfxrtr_lookup __P((struct nd_prefix *,
struct nd_defrouter *));
static void pfxrtr_add(struct nd_prefix *, struct nd_defrouter *);
static void pfxrtr_del(struct nd_pfxrouter *);
static struct nd_pfxrouter *find_pfxlist_reachable_router
(struct nd_prefix *);
static void defrouter_delreq(struct nd_defrouter *);
static void nd6_rtmsg(int, struct rtentry *);
static int in6_init_prefix_ltimes(struct nd_prefix *);
static void in6_init_address_ltimes __P((struct nd_prefix *,
struct in6_addrlifetime *));
static int rt6_deleteroute(struct radix_node *, void *);
#ifdef VIMAGE_GLOBALS
extern int nd6_recalc_reachtm_interval;
static struct ifnet *nd6_defifp;
int nd6_defifindex;
int ip6_use_tempaddr;
int ip6_desync_factor;
u_int32_t ip6_temp_preferred_lifetime;
u_int32_t ip6_temp_valid_lifetime;
int ip6_temp_regen_advance;
#endif
/* RTPREF_MEDIUM has to be 0! */
#define RTPREF_HIGH 1
#define RTPREF_MEDIUM 0
#define RTPREF_LOW (-1)
#define RTPREF_RESERVED (-2)
#define RTPREF_INVALID (-3) /* internal */
/*
* Receive Router Solicitation Message - just for routers.
* Router solicitation/advertisement is mostly managed by userland program
* (rtadvd) so here we have no function like nd6_ra_output().
*
* Based on RFC 2461
*/
void
nd6_rs_input(struct mbuf *m, int off, int icmp6len)
{
INIT_VNET_INET6(curvnet);
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct nd_router_solicit *nd_rs;
struct in6_addr saddr6 = ip6->ip6_src;
char *lladdr = NULL;
int lladdrlen = 0;
union nd_opts ndopts;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
/* If I'm not a router, ignore it. */
if (V_ip6_accept_rtadv != 0 || V_ip6_forwarding != 1)
goto freeit;
/* Sanity checks */
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
"nd6_rs_input: invalid hlim (%d) from %s to %s on %s\n",
ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
goto bad;
}
/*
* Don't update the neighbor cache, if src = ::.
* This indicates that the src has no IP address assigned yet.
*/
if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
goto freeit;
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, icmp6len,);
nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
#else
IP6_EXTHDR_GET(nd_rs, struct nd_router_solicit *, m, off, icmp6len);
if (nd_rs == NULL) {
V_icmp6stat.icp6s_tooshort++;
return;
}
#endif
icmp6len -= sizeof(*nd_rs);
nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
if (nd6_options(&ndopts) < 0) {
nd6log((LOG_INFO,
"nd6_rs_input: invalid ND option, ignored\n"));
/* nd6_options have incremented stats */
goto freeit;
}
if (ndopts.nd_opts_src_lladdr) {
lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
}
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
nd6log((LOG_INFO,
"nd6_rs_input: lladdrlen mismatch for %s "
"(if %d, RS packet %d)\n",
ip6_sprintf(ip6bufs, &saddr6),
ifp->if_addrlen, lladdrlen - 2));
goto bad;
}
- IF_AFDATA_LOCK(ifp);
nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0);
- IF_AFDATA_UNLOCK(ifp);
freeit:
m_freem(m);
return;
bad:
V_icmp6stat.icp6s_badrs++;
m_freem(m);
}
/*
* Receive Router Advertisement Message.
*
* Based on RFC 2461
* TODO: on-link bit on prefix information
* TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
*/
void
nd6_ra_input(struct mbuf *m, int off, int icmp6len)
{
INIT_VNET_INET6(curvnet);
struct ifnet *ifp = m->m_pkthdr.rcvif;
struct nd_ifinfo *ndi = ND_IFINFO(ifp);
struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
struct nd_router_advert *nd_ra;
struct in6_addr saddr6 = ip6->ip6_src;
int mcast = 0;
union nd_opts ndopts;
struct nd_defrouter *dr;
char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
/*
* We only accept RAs only when
* the system-wide variable allows the acceptance, and
* per-interface variable allows RAs on the receiving interface.
*/
if (V_ip6_accept_rtadv == 0)
goto freeit;
if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
goto freeit;
if (ip6->ip6_hlim != 255) {
nd6log((LOG_ERR,
"nd6_ra_input: invalid hlim (%d) from %s to %s on %s\n",
ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
goto bad;
}
if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
nd6log((LOG_ERR,
"nd6_ra_input: src %s is not link-local\n",
ip6_sprintf(ip6bufs, &saddr6)));
goto bad;
}
#ifndef PULLDOWN_TEST
IP6_EXTHDR_CHECK(m, off, icmp6len,);
nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
#else
IP6_EXTHDR_GET(nd_ra, struct nd_router_advert *, m, off, icmp6len);
if (nd_ra == NULL) {
V_icmp6stat.icp6s_tooshort++;
return;
}
#endif
icmp6len -= sizeof(*nd_ra);
nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
if (nd6_options(&ndopts) < 0) {
nd6log((LOG_INFO,
"nd6_ra_input: invalid ND option, ignored\n"));
/* nd6_options have incremented stats */
goto freeit;
}
{
struct nd_defrouter dr0;
u_int32_t advreachable = nd_ra->nd_ra_reachable;
/* remember if this is a multicasted advertisement */
if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
mcast = 1;
bzero(&dr0, sizeof(dr0));
dr0.rtaddr = saddr6;
dr0.flags = nd_ra->nd_ra_flags_reserved;
dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
dr0.expire = time_second + dr0.rtlifetime;
dr0.ifp = ifp;
/* unspecified or not? (RFC 2461 6.3.4) */
if (advreachable) {
advreachable = ntohl(advreachable);
if (advreachable <= MAX_REACHABLE_TIME &&
ndi->basereachable != advreachable) {
ndi->basereachable = advreachable;
ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */
}
}
if (nd_ra->nd_ra_retransmit)
ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
if (nd_ra->nd_ra_curhoplimit)
ndi->chlim = nd_ra->nd_ra_curhoplimit;
dr = defrtrlist_update(&dr0);
}
/*
* prefix
*/
if (ndopts.nd_opts_pi) {
struct nd_opt_hdr *pt;
struct nd_opt_prefix_info *pi = NULL;
struct nd_prefixctl pr;
for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi;
pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end;
pt = (struct nd_opt_hdr *)((caddr_t)pt +
(pt->nd_opt_len << 3))) {
if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION)
continue;
pi = (struct nd_opt_prefix_info *)pt;
if (pi->nd_opt_pi_len != 4) {
nd6log((LOG_INFO,
"nd6_ra_input: invalid option "
"len %d for prefix information option, "
"ignored\n", pi->nd_opt_pi_len));
continue;
}
if (128 < pi->nd_opt_pi_prefix_len) {
nd6log((LOG_INFO,
"nd6_ra_input: invalid prefix "
"len %d for prefix information option, "
"ignored\n", pi->nd_opt_pi_prefix_len));
continue;
}
if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
|| IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
nd6log((LOG_INFO,
"nd6_ra_input: invalid prefix "
"%s, ignored\n",
ip6_sprintf(ip6bufs,
&pi->nd_opt_pi_prefix)));
continue;
}
bzero(&pr, sizeof(pr));
pr.ndpr_prefix.sin6_family = AF_INET6;
pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif;
pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved &
ND_OPT_PI_FLAG_ONLINK) ? 1 : 0;
pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved &
ND_OPT_PI_FLAG_AUTO) ? 1 : 0;
pr.ndpr_plen = pi->nd_opt_pi_prefix_len;
pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time);
pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time);
(void)prelist_update(&pr, dr, m, mcast);
}
}
/*
* MTU
*/
if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
u_long mtu;
u_long maxmtu;
mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
/* lower bound */
if (mtu < IPV6_MMTU) {
nd6log((LOG_INFO, "nd6_ra_input: bogus mtu option "
"mtu=%lu sent from %s, ignoring\n",
mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
goto skip;
}
/* upper bound */
maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu)
? ndi->maxmtu : ifp->if_mtu;
if (mtu <= maxmtu) {
int change = (ndi->linkmtu != mtu);
ndi->linkmtu = mtu;
if (change) /* in6_maxmtu may change */
in6_setmaxmtu();
} else {
nd6log((LOG_INFO, "nd6_ra_input: bogus mtu "
"mtu=%lu sent from %s; "
"exceeds maxmtu %lu, ignoring\n",
mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
}
}
skip:
/*
* Source link layer address
*/
{
char *lladdr = NULL;
int lladdrlen = 0;
if (ndopts.nd_opts_src_lladdr) {
lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
}
if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
nd6log((LOG_INFO,
"nd6_ra_input: lladdrlen mismatch for %s "
"(if %d, RA packet %d)\n", ip6_sprintf(ip6bufs, &saddr6),
ifp->if_addrlen, lladdrlen - 2));
goto bad;
}
- IF_AFDATA_LOCK(ifp);
nd6_cache_lladdr(ifp, &saddr6, lladdr,
lladdrlen, ND_ROUTER_ADVERT, 0);
- IF_AFDATA_UNLOCK(ifp);
/*
* Installing a link-layer address might change the state of the
* router's neighbor cache, which might also affect our on-link
* detection of adveritsed prefixes.
*/
pfxlist_onlink_check();
}
freeit:
m_freem(m);
return;
bad:
V_icmp6stat.icp6s_badra++;
m_freem(m);
}
/*
* default router list proccessing sub routines
*/
/* tell the change to user processes watching the routing socket. */
static void
nd6_rtmsg(int cmd, struct rtentry *rt)
{
struct rt_addrinfo info;
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
info.rti_info[RTAX_NETMASK] = rt_mask(rt);
if (rt->rt_ifp) {
info.rti_info[RTAX_IFP] =
TAILQ_FIRST(&rt->rt_ifp->if_addrlist)->ifa_addr;
info.rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
}
rt_missmsg(cmd, &info, rt->rt_flags, 0);
}
void
defrouter_addreq(struct nd_defrouter *new)
{
struct sockaddr_in6 def, mask, gate;
struct rtentry *newrt = NULL;
int s;
int error;
bzero(&def, sizeof(def));
bzero(&mask, sizeof(mask));
bzero(&gate, sizeof(gate));
def.sin6_len = mask.sin6_len = gate.sin6_len =
sizeof(struct sockaddr_in6);
def.sin6_family = gate.sin6_family = AF_INET6;
gate.sin6_addr = new->rtaddr;
s = splnet();
error = rtrequest(RTM_ADD, (struct sockaddr *)&def,
(struct sockaddr *)&gate, (struct sockaddr *)&mask,
RTF_GATEWAY, &newrt);
if (newrt) {
- RT_LOCK(newrt);
nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
- RT_REMREF(newrt);
- RT_UNLOCK(newrt);
+ RTFREE(newrt);
}
if (error == 0)
new->installed = 1;
splx(s);
return;
}
struct nd_defrouter *
defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
{
INIT_VNET_INET6(ifp->if_vnet);
struct nd_defrouter *dr;
for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
dr = TAILQ_NEXT(dr, dr_entry)) {
if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr))
return (dr);
}
return (NULL); /* search failed */
}
/*
* Remove the default route for a given router.
* This is just a subroutine function for defrouter_select(), and should
* not be called from anywhere else.
*/
static void
defrouter_delreq(struct nd_defrouter *dr)
{
struct sockaddr_in6 def, mask, gate;
struct rtentry *oldrt = NULL;
bzero(&def, sizeof(def));
bzero(&mask, sizeof(mask));
bzero(&gate, sizeof(gate));
def.sin6_len = mask.sin6_len = gate.sin6_len =
sizeof(struct sockaddr_in6);
def.sin6_family = gate.sin6_family = AF_INET6;
gate.sin6_addr = dr->rtaddr;
rtrequest(RTM_DELETE, (struct sockaddr *)&def,
(struct sockaddr *)&gate,
(struct sockaddr *)&mask, RTF_GATEWAY, &oldrt);
if (oldrt) {
nd6_rtmsg(RTM_DELETE, oldrt);
RTFREE(oldrt);
}
dr->installed = 0;
}
/*
* remove all default routes from default router list
*/
void
defrouter_reset(void)
{
INIT_VNET_INET6(curvnet);
struct nd_defrouter *dr;
for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
dr = TAILQ_NEXT(dr, dr_entry))
defrouter_delreq(dr);
/*
* XXX should we also nuke any default routers in the kernel, by
* going through them by rtalloc1()?
*/
}
void
defrtrlist_del(struct nd_defrouter *dr)
{
INIT_VNET_INET6(curvnet);
struct nd_defrouter *deldr = NULL;
struct nd_prefix *pr;
/*
* Flush all the routing table entries that use the router
* as a next hop.
*/
if (!V_ip6_forwarding && V_ip6_accept_rtadv) /* XXX: better condition? */
rt6_flush(&dr->rtaddr, dr->ifp);
if (dr->installed) {
deldr = dr;
defrouter_delreq(dr);
}
TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
/*
* Also delete all the pointers to the router in each prefix lists.
*/
for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
struct nd_pfxrouter *pfxrtr;
if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
pfxrtr_del(pfxrtr);
}
pfxlist_onlink_check();
/*
* If the router is the primary one, choose a new one.
* Note that defrouter_select() will remove the current gateway
* from the routing table.
*/
if (deldr)
defrouter_select();
free(dr, M_IP6NDP);
}
/*
* Default Router Selection according to Section 6.3.6 of RFC 2461 and
* draft-ietf-ipngwg-router-selection:
* 1) Routers that are reachable or probably reachable should be preferred.
* If we have more than one (probably) reachable router, prefer ones
* with the highest router preference.
* 2) When no routers on the list are known to be reachable or
* probably reachable, routers SHOULD be selected in a round-robin
* fashion, regardless of router preference values.
* 3) If the Default Router List is empty, assume that all
* destinations are on-link.
*
* We assume nd_defrouter is sorted by router preference value.
* Since the code below covers both with and without router preference cases,
* we do not need to classify the cases by ifdef.
*
* At this moment, we do not try to install more than one default router,
* even when the multipath routing is available, because we're not sure about
* the benefits for stub hosts comparing to the risk of making the code
* complicated and the possibility of introducing bugs.
*/
void
defrouter_select(void)
{
INIT_VNET_INET6(curvnet);
int s = splnet();
struct nd_defrouter *dr, *selected_dr = NULL, *installed_dr = NULL;
struct llentry *ln = NULL;
/*
* This function should be called only when acting as an autoconfigured
* host. Although the remaining part of this function is not effective
* if the node is not an autoconfigured host, we explicitly exclude
* such cases here for safety.
*/
if (V_ip6_forwarding || !V_ip6_accept_rtadv) {
nd6log((LOG_WARNING,
"defrouter_select: called unexpectedly (forwarding=%d, "
"accept_rtadv=%d)\n", V_ip6_forwarding, V_ip6_accept_rtadv));
splx(s);
return;
}
/*
* Let's handle easy case (3) first:
* If default router list is empty, there's nothing to be done.
*/
if (!TAILQ_FIRST(&V_nd_defrouter)) {
splx(s);
return;
}
/*
* Search for a (probably) reachable router from the list.
* We just pick up the first reachable one (if any), assuming that
* the ordering rule of the list described in defrtrlist_update().
*/
for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
dr = TAILQ_NEXT(dr, dr_entry)) {
IF_AFDATA_LOCK(dr->ifp);
if (selected_dr == NULL &&
(ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
ND6_IS_LLINFO_PROBREACH(ln)) {
selected_dr = dr;
}
IF_AFDATA_UNLOCK(dr->ifp);
if (dr->installed && installed_dr == NULL)
installed_dr = dr;
else if (dr->installed && installed_dr) {
/* this should not happen. warn for diagnosis. */
log(LOG_ERR, "defrouter_select: more than one router"
" is installed\n");
}
}
/*
* If none of the default routers was found to be reachable,
* round-robin the list regardless of preference.
* Otherwise, if we have an installed router, check if the selected
* (reachable) router should really be preferred to the installed one.
* We only prefer the new router when the old one is not reachable
* or when the new one has a really higher preference value.
*/
if (selected_dr == NULL) {
if (installed_dr == NULL || !TAILQ_NEXT(installed_dr, dr_entry))
selected_dr = TAILQ_FIRST(&V_nd_defrouter);
else
selected_dr = TAILQ_NEXT(installed_dr, dr_entry);
} else if (installed_dr) {
IF_AFDATA_LOCK(installed_dr->ifp);
if ((ln = nd6_lookup(&installed_dr->rtaddr, 0, installed_dr->ifp)) &&
ND6_IS_LLINFO_PROBREACH(ln) &&
rtpref(selected_dr) <= rtpref(installed_dr)) {
selected_dr = installed_dr;
}
IF_AFDATA_UNLOCK(installed_dr->ifp);
}
/*
* If the selected router is different than the installed one,
* remove the installed router and install the selected one.
* Note that the selected router is never NULL here.
*/
if (installed_dr != selected_dr) {
if (installed_dr)
defrouter_delreq(installed_dr);
defrouter_addreq(selected_dr);
}
splx(s);
return;
}
/*
* for default router selection
* regards router-preference field as a 2-bit signed integer
*/
static int
rtpref(struct nd_defrouter *dr)
{
switch (dr->flags & ND_RA_FLAG_RTPREF_MASK) {
case ND_RA_FLAG_RTPREF_HIGH:
return (RTPREF_HIGH);
case ND_RA_FLAG_RTPREF_MEDIUM:
case ND_RA_FLAG_RTPREF_RSV:
return (RTPREF_MEDIUM);
case ND_RA_FLAG_RTPREF_LOW:
return (RTPREF_LOW);
default:
/*
* This case should never happen. If it did, it would mean a
* serious bug of kernel internal. We thus always bark here.
* Or, can we even panic?
*/
log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->flags);
return (RTPREF_INVALID);
}
/* NOTREACHED */
}
static struct nd_defrouter *
defrtrlist_update(struct nd_defrouter *new)
{
INIT_VNET_INET6(curvnet);
struct nd_defrouter *dr, *n;
int s = splnet();
if ((dr = defrouter_lookup(&new->rtaddr, new->ifp)) != NULL) {
/* entry exists */
if (new->rtlifetime == 0) {
defrtrlist_del(dr);
dr = NULL;
} else {
int oldpref = rtpref(dr);
/* override */
dr->flags = new->flags; /* xxx flag check */
dr->rtlifetime = new->rtlifetime;
dr->expire = new->expire;
/*
* If the preference does not change, there's no need
* to sort the entries.
*/
if (rtpref(new) == oldpref) {
splx(s);
return (dr);
}
/*
* preferred router may be changed, so relocate
* this router.
* XXX: calling TAILQ_REMOVE directly is a bad manner.
* However, since defrtrlist_del() has many side
* effects, we intentionally do so here.
* defrouter_select() below will handle routing
* changes later.
*/
TAILQ_REMOVE(&V_nd_defrouter, dr, dr_entry);
n = dr;
goto insert;
}
splx(s);
return (dr);
}
/* entry does not exist */
if (new->rtlifetime == 0) {
splx(s);
return (NULL);
}
n = (struct nd_defrouter *)malloc(sizeof(*n), M_IP6NDP, M_NOWAIT);
if (n == NULL) {
splx(s);
return (NULL);
}
bzero(n, sizeof(*n));
*n = *new;
insert:
/*
* Insert the new router in the Default Router List;
* The Default Router List should be in the descending order
* of router-preferece. Routers with the same preference are
* sorted in the arriving time order.
*/
/* insert at the end of the group */
for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
dr = TAILQ_NEXT(dr, dr_entry)) {
if (rtpref(n) > rtpref(dr))
break;
}
if (dr)
TAILQ_INSERT_BEFORE(dr, n, dr_entry);
else
TAILQ_INSERT_TAIL(&V_nd_defrouter, n, dr_entry);
defrouter_select();
splx(s);
return (n);
}
static struct nd_pfxrouter *
pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
{
struct nd_pfxrouter *search;
for (search = pr->ndpr_advrtrs.lh_first; search; search = search->pfr_next) {
if (search->router == dr)
break;
}
return (search);
}
static void
pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
{
struct nd_pfxrouter *new;
new = (struct nd_pfxrouter *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
if (new == NULL)
return;
bzero(new, sizeof(*new));
new->router = dr;
LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
pfxlist_onlink_check();
}
static void
pfxrtr_del(struct nd_pfxrouter *pfr)
{
LIST_REMOVE(pfr, pfr_entry);
free(pfr, M_IP6NDP);
}
struct nd_prefix *
nd6_prefix_lookup(struct nd_prefixctl *key)
{
INIT_VNET_INET6(curvnet);
struct nd_prefix *search;
for (search = V_nd_prefix.lh_first;
search; search = search->ndpr_next) {
if (key->ndpr_ifp == search->ndpr_ifp &&
key->ndpr_plen == search->ndpr_plen &&
in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
&search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
break;
}
}
return (search);
}
int
nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
struct nd_prefix **newp)
{
INIT_VNET_INET6(curvnet);
struct nd_prefix *new = NULL;
int error = 0;
int i, s;
char ip6buf[INET6_ADDRSTRLEN];
new = (struct nd_prefix *)malloc(sizeof(*new), M_IP6NDP, M_NOWAIT);
if (new == NULL)
return(ENOMEM);
bzero(new, sizeof(*new));
new->ndpr_ifp = pr->ndpr_ifp;
new->ndpr_prefix = pr->ndpr_prefix;
new->ndpr_plen = pr->ndpr_plen;
new->ndpr_vltime = pr->ndpr_vltime;
new->ndpr_pltime = pr->ndpr_pltime;
new->ndpr_flags = pr->ndpr_flags;
if ((error = in6_init_prefix_ltimes(new)) != 0) {
free(new, M_IP6NDP);
return(error);
}
new->ndpr_lastupdate = time_second;
if (newp != NULL)
*newp = new;
/* initialization */
LIST_INIT(&new->ndpr_advrtrs);
in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
/* make prefix in the canonical form */
for (i = 0; i < 4; i++)
new->ndpr_prefix.sin6_addr.s6_addr32[i] &=
new->ndpr_mask.s6_addr32[i];
s = splnet();
/* link ndpr_entry to nd_prefix list */
LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
splx(s);
/* ND_OPT_PI_FLAG_ONLINK processing */
if (new->ndpr_raf_onlink) {
int e;
if ((e = nd6_prefix_onlink(new)) != 0) {
nd6log((LOG_ERR, "nd6_prelist_add: failed to make "
"the prefix %s/%d on-link on %s (errno=%d)\n",
ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
/* proceed anyway. XXX: is it correct? */
}
}
if (dr)
pfxrtr_add(new, dr);
return 0;
}
void
prelist_remove(struct nd_prefix *pr)
{
INIT_VNET_INET6(curvnet);
struct nd_pfxrouter *pfr, *next;
int e, s;
char ip6buf[INET6_ADDRSTRLEN];
/* make sure to invalidate the prefix until it is really freed. */
pr->ndpr_vltime = 0;
pr->ndpr_pltime = 0;
/*
* Though these flags are now meaningless, we'd rather keep the value
* of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
* when executing "ndp -p".
*/
if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0 &&
(e = nd6_prefix_offlink(pr)) != 0) {
nd6log((LOG_ERR, "prelist_remove: failed to make %s/%d offlink "
"on %s, errno=%d\n",
ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
/* what should we do? */
}
if (pr->ndpr_refcnt > 0)
return; /* notice here? */
s = splnet();
/* unlink ndpr_entry from nd_prefix list */
LIST_REMOVE(pr, ndpr_entry);
/* free list of routers that adversed the prefix */
for (pfr = pr->ndpr_advrtrs.lh_first; pfr; pfr = next) {
next = pfr->pfr_next;
free(pfr, M_IP6NDP);
}
splx(s);
free(pr, M_IP6NDP);
pfxlist_onlink_check();
}
/*
* dr - may be NULL
*/
static int
prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
struct mbuf *m, int mcast)
{
INIT_VNET_INET6(curvnet);
struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
struct ifaddr *ifa;
struct ifnet *ifp = new->ndpr_ifp;
struct nd_prefix *pr;
int s = splnet();
int error = 0;
int newprefix = 0;
int auth;
struct in6_addrlifetime lt6_tmp;
char ip6buf[INET6_ADDRSTRLEN];
auth = 0;
if (m) {
/*
* Authenticity for NA consists authentication for
* both IP header and IP datagrams, doesn't it ?
*/
#if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM)
auth = ((m->m_flags & M_AUTHIPHDR) &&
(m->m_flags & M_AUTHIPDGM));
#endif
}
if ((pr = nd6_prefix_lookup(new)) != NULL) {
/*
* nd6_prefix_lookup() ensures that pr and new have the same
* prefix on a same interface.
*/
/*
* Update prefix information. Note that the on-link (L) bit
* and the autonomous (A) bit should NOT be changed from 1
* to 0.
*/
if (new->ndpr_raf_onlink == 1)
pr->ndpr_raf_onlink = 1;
if (new->ndpr_raf_auto == 1)
pr->ndpr_raf_auto = 1;
if (new->ndpr_raf_onlink) {
pr->ndpr_vltime = new->ndpr_vltime;
pr->ndpr_pltime = new->ndpr_pltime;
(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
pr->ndpr_lastupdate = time_second;
}
if (new->ndpr_raf_onlink &&
(pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
int e;
if ((e = nd6_prefix_onlink(pr)) != 0) {
nd6log((LOG_ERR,
"prelist_update: failed to make "
"the prefix %s/%d on-link on %s "
"(errno=%d)\n",
ip6_sprintf(ip6buf,
&pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
/* proceed anyway. XXX: is it correct? */
}
}
if (dr && pfxrtr_lookup(pr, dr) == NULL)
pfxrtr_add(pr, dr);
} else {
struct nd_prefix *newpr = NULL;
newprefix = 1;
if (new->ndpr_vltime == 0)
goto end;
if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
goto end;
error = nd6_prelist_add(new, dr, &newpr);
if (error != 0 || newpr == NULL) {
nd6log((LOG_NOTICE, "prelist_update: "
"nd6_prelist_add failed for %s/%d on %s "
"errno=%d, returnpr=%p\n",
ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
new->ndpr_plen, if_name(new->ndpr_ifp),
error, newpr));
goto end; /* we should just give up in this case. */
}
/*
* XXX: from the ND point of view, we can ignore a prefix
* with the on-link bit being zero. However, we need a
* prefix structure for references from autoconfigured
* addresses. Thus, we explicitly make sure that the prefix
* itself expires now.
*/
if (newpr->ndpr_raf_onlink == 0) {
newpr->ndpr_vltime = 0;
newpr->ndpr_pltime = 0;
in6_init_prefix_ltimes(newpr);
}
pr = newpr;
}
/*
* Address autoconfiguration based on Section 5.5.3 of RFC 2462.
* Note that pr must be non NULL at this point.
*/
/* 5.5.3 (a). Ignore the prefix without the A bit set. */
if (!new->ndpr_raf_auto)
goto end;
/*
* 5.5.3 (b). the link-local prefix should have been ignored in
* nd6_ra_input.
*/
/* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */
if (new->ndpr_pltime > new->ndpr_vltime) {
error = EINVAL; /* XXX: won't be used */
goto end;
}
/*
* 5.5.3 (d). If the prefix advertised is not equal to the prefix of
* an address configured by stateless autoconfiguration already in the
* list of addresses associated with the interface, and the Valid
* Lifetime is not 0, form an address. We first check if we have
* a matching prefix.
* Note: we apply a clarification in rfc2462bis-02 here. We only
* consider autoconfigured addresses while RFC2462 simply said
* "address".
*/
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
struct in6_ifaddr *ifa6;
u_int32_t remaininglifetime;
if (ifa->ifa_addr->sa_family != AF_INET6)
continue;
ifa6 = (struct in6_ifaddr *)ifa;
/*
* We only consider autoconfigured addresses as per rfc2462bis.
*/
if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF))
continue;
/*
* Spec is not clear here, but I believe we should concentrate
* on unicast (i.e. not anycast) addresses.
* XXX: other ia6_flags? detached or duplicated?
*/
if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
continue;
/*
* Ignore the address if it is not associated with a prefix
* or is associated with a prefix that is different from this
* one. (pr is never NULL here)
*/
if (ifa6->ia6_ndpr != pr)
continue;
if (ia6_match == NULL) /* remember the first one */
ia6_match = ifa6;
/*
* An already autoconfigured address matched. Now that we
* are sure there is at least one matched address, we can
* proceed to 5.5.3. (e): update the lifetimes according to the
* "two hours" rule and the privacy extension.
* We apply some clarifications in rfc2462bis:
* - use remaininglifetime instead of storedlifetime as a
* variable name
* - remove the dead code in the "two-hour" rule
*/
#define TWOHOUR (120*60)
lt6_tmp = ifa6->ia6_lifetime;
if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
remaininglifetime = ND6_INFINITE_LIFETIME;
else if (time_second - ifa6->ia6_updatetime >
lt6_tmp.ia6t_vltime) {
/*
* The case of "invalid" address. We should usually
* not see this case.
*/
remaininglifetime = 0;
} else
remaininglifetime = lt6_tmp.ia6t_vltime -
(time_second - ifa6->ia6_updatetime);
/* when not updating, keep the current stored lifetime. */
lt6_tmp.ia6t_vltime = remaininglifetime;
if (TWOHOUR < new->ndpr_vltime ||
remaininglifetime < new->ndpr_vltime) {
lt6_tmp.ia6t_vltime = new->ndpr_vltime;
} else if (remaininglifetime <= TWOHOUR) {
if (auth) {
lt6_tmp.ia6t_vltime = new->ndpr_vltime;
}
} else {
/*
* new->ndpr_vltime <= TWOHOUR &&
* TWOHOUR < remaininglifetime
*/
lt6_tmp.ia6t_vltime = TWOHOUR;
}
/* The 2 hour rule is not imposed for preferred lifetime. */
lt6_tmp.ia6t_pltime = new->ndpr_pltime;
in6_init_address_ltimes(pr, &lt6_tmp);
/*
* We need to treat lifetimes for temporary addresses
* differently, according to
* draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
* we only update the lifetimes when they are in the maximum
* intervals.
*/
if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
u_int32_t maxvltime, maxpltime;
if (V_ip6_temp_valid_lifetime >
(u_int32_t)((time_second - ifa6->ia6_createtime) +
V_ip6_desync_factor)) {
maxvltime = V_ip6_temp_valid_lifetime -
(time_second - ifa6->ia6_createtime) -
V_ip6_desync_factor;
} else
maxvltime = 0;
if (V_ip6_temp_preferred_lifetime >
(u_int32_t)((time_second - ifa6->ia6_createtime) +
V_ip6_desync_factor)) {
maxpltime = V_ip6_temp_preferred_lifetime -
(time_second - ifa6->ia6_createtime) -
V_ip6_desync_factor;
} else
maxpltime = 0;
if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME ||
lt6_tmp.ia6t_vltime > maxvltime) {
lt6_tmp.ia6t_vltime = maxvltime;
}
if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME ||
lt6_tmp.ia6t_pltime > maxpltime) {
lt6_tmp.ia6t_pltime = maxpltime;
}
}
ifa6->ia6_lifetime = lt6_tmp;
ifa6->ia6_updatetime = time_second;
}
if (ia6_match == NULL && new->ndpr_vltime) {
int ifidlen;
/*
* 5.5.3 (d) (continued)
* No address matched and the valid lifetime is non-zero.
* Create a new address.
*/
/*
* Prefix Length check:
* If the sum of the prefix length and interface identifier
* length does not equal 128 bits, the Prefix Information
* option MUST be ignored. The length of the interface
* identifier is defined in a separate link-type specific
* document.
*/
ifidlen = in6_if2idlen(ifp);
if (ifidlen < 0) {
/* this should not happen, so we always log it. */
log(LOG_ERR, "prelist_update: IFID undefined (%s)\n",
if_name(ifp));
goto end;
}
if (ifidlen + pr->ndpr_plen != 128) {
nd6log((LOG_INFO,
"prelist_update: invalid prefixlen "
"%d for %s, ignored\n",
pr->ndpr_plen, if_name(ifp)));
goto end;
}
if ((ia6 = in6_ifadd(new, mcast)) != NULL) {
/*
* note that we should use pr (not new) for reference.
*/
pr->ndpr_refcnt++;
ia6->ia6_ndpr = pr;
/*
* RFC 3041 3.3 (2).
* When a new public address is created as described
* in RFC2462, also create a new temporary address.
*
* RFC 3041 3.5.
* When an interface connects to a new link, a new
* randomized interface identifier should be generated
* immediately together with a new set of temporary
* addresses. Thus, we specifiy 1 as the 2nd arg of
* in6_tmpifadd().
*/
if (V_ip6_use_tempaddr) {
int e;
if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) {
nd6log((LOG_NOTICE, "prelist_update: "
"failed to create a temporary "
"address, errno=%d\n",
e));
}
}
/*
* A newly added address might affect the status
* of other addresses, so we check and update it.
* XXX: what if address duplication happens?
*/
pfxlist_onlink_check();
} else {
/* just set an error. do not bark here. */
error = EADDRNOTAVAIL; /* XXX: might be unused. */
}
}
end:
splx(s);
return error;
}
/*
* A supplement function used in the on-link detection below;
* detect if a given prefix has a (probably) reachable advertising router.
* XXX: lengthy function name...
*/
static struct nd_pfxrouter *
find_pfxlist_reachable_router(struct nd_prefix *pr)
{
struct nd_pfxrouter *pfxrtr;
struct llentry *ln;
for (pfxrtr = LIST_FIRST(&pr->ndpr_advrtrs); pfxrtr;
pfxrtr = LIST_NEXT(pfxrtr, pfr_entry)) {
IF_AFDATA_LOCK(pfxrtr->router->ifp);
if ((ln = nd6_lookup(&pfxrtr->router->rtaddr, 0,
pfxrtr->router->ifp)) &&
ND6_IS_LLINFO_PROBREACH(ln)) {
IF_AFDATA_UNLOCK(pfxrtr->router->ifp);
break; /* found */
}
IF_AFDATA_UNLOCK(pfxrtr->router->ifp);
}
return (pfxrtr);
}
/*
* Check if each prefix in the prefix list has at least one available router
* that advertised the prefix (a router is "available" if its neighbor cache
* entry is reachable or probably reachable).
* If the check fails, the prefix may be off-link, because, for example,
* we have moved from the network but the lifetime of the prefix has not
* expired yet. So we should not use the prefix if there is another prefix
* that has an available router.
* But, if there is no prefix that has an available router, we still regards
* all the prefixes as on-link. This is because we can't tell if all the
* routers are simply dead or if we really moved from the network and there
* is no router around us.
*/
void
pfxlist_onlink_check()
{
INIT_VNET_INET6(curvnet);
struct nd_prefix *pr;
struct in6_ifaddr *ifa;
struct nd_defrouter *dr;
struct nd_pfxrouter *pfxrtr = NULL;
/*
* Check if there is a prefix that has a reachable advertising
* router.
*/
for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
break;
}
/*
* If we have no such prefix, check whether we still have a router
* that does not advertise any prefixes.
*/
if (pr == NULL) {
for (dr = TAILQ_FIRST(&V_nd_defrouter); dr;
dr = TAILQ_NEXT(dr, dr_entry)) {
struct nd_prefix *pr0;
for (pr0 = V_nd_prefix.lh_first; pr0;
pr0 = pr0->ndpr_next) {
if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL)
break;
}
if (pfxrtr != NULL)
break;
}
}
if (pr != NULL || (TAILQ_FIRST(&V_nd_defrouter) && pfxrtr == NULL)) {
/*
* There is at least one prefix that has a reachable router,
* or at least a router which probably does not advertise
* any prefixes. The latter would be the case when we move
* to a new link where we have a router that does not provide
* prefixes and we configure an address by hand.
* Detach prefixes which have no reachable advertising
* router, and attach other prefixes.
*/
for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
/* XXX: a link-local prefix should never be detached */
if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
continue;
/*
* we aren't interested in prefixes without the L bit
* set.
*/
if (pr->ndpr_raf_onlink == 0)
continue;
if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
find_pfxlist_reachable_router(pr) == NULL)
pr->ndpr_stateflags |= NDPRF_DETACHED;
if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
find_pfxlist_reachable_router(pr) != 0)
pr->ndpr_stateflags &= ~NDPRF_DETACHED;
}
} else {
/* there is no prefix that has a reachable router */
for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
continue;
if (pr->ndpr_raf_onlink == 0)
continue;
if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0)
pr->ndpr_stateflags &= ~NDPRF_DETACHED;
}
}
/*
* Remove each interface route associated with a (just) detached
* prefix, and reinstall the interface route for a (just) attached
* prefix. Note that all attempt of reinstallation does not
* necessarily success, when a same prefix is shared among multiple
* interfaces. Such cases will be handled in nd6_prefix_onlink,
* so we don't have to care about them.
*/
for (pr = V_nd_prefix.lh_first; pr; pr = pr->ndpr_next) {
int e;
char ip6buf[INET6_ADDRSTRLEN];
if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr))
continue;
if (pr->ndpr_raf_onlink == 0)
continue;
if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
(pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
if ((e = nd6_prefix_offlink(pr)) != 0) {
nd6log((LOG_ERR,
"pfxlist_onlink_check: failed to "
"make %s/%d offlink, errno=%d\n",
ip6_sprintf(ip6buf,
&pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, e));
}
}
if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
(pr->ndpr_stateflags & NDPRF_ONLINK) == 0 &&
pr->ndpr_raf_onlink) {
if ((e = nd6_prefix_onlink(pr)) != 0) {
nd6log((LOG_ERR,
"pfxlist_onlink_check: failed to "
"make %s/%d onlink, errno=%d\n",
ip6_sprintf(ip6buf,
&pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, e));
}
}
}
/*
* Changes on the prefix status might affect address status as well.
* Make sure that all addresses derived from an attached prefix are
* attached, and that all addresses derived from a detached prefix are
* detached. Note, however, that a manually configured address should
* always be attached.
* The precise detection logic is same as the one for prefixes.
*/
for (ifa = V_in6_ifaddr; ifa; ifa = ifa->ia_next) {
if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
continue;
if (ifa->ia6_ndpr == NULL) {
/*
* This can happen when we first configure the address
* (i.e. the address exists, but the prefix does not).
* XXX: complicated relationships...
*/
continue;
}
if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
break;
}
if (ifa) {
for (ifa = V_in6_ifaddr; ifa; ifa = ifa->ia_next) {
if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
continue;
if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
continue;
if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) {
if (ifa->ia6_flags & IN6_IFF_DETACHED) {
ifa->ia6_flags &= ~IN6_IFF_DETACHED;
ifa->ia6_flags |= IN6_IFF_TENTATIVE;
nd6_dad_start((struct ifaddr *)ifa, 0);
}
} else {
ifa->ia6_flags |= IN6_IFF_DETACHED;
}
}
}
else {
for (ifa = V_in6_ifaddr; ifa; ifa = ifa->ia_next) {
if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
continue;
if (ifa->ia6_flags & IN6_IFF_DETACHED) {
ifa->ia6_flags &= ~IN6_IFF_DETACHED;
ifa->ia6_flags |= IN6_IFF_TENTATIVE;
/* Do we need a delay in this case? */
nd6_dad_start((struct ifaddr *)ifa, 0);
}
}
}
}
int
nd6_prefix_onlink(struct nd_prefix *pr)
{
INIT_VNET_INET6(curvnet);
struct ifaddr *ifa;
struct ifnet *ifp = pr->ndpr_ifp;
struct sockaddr_in6 mask6;
struct nd_prefix *opr;
u_long rtflags;
int error = 0;
+ struct radix_node_head *rnh;
struct rtentry *rt = NULL;
char ip6buf[INET6_ADDRSTRLEN];
struct sockaddr_dl null_sdl = {sizeof(null_sdl), AF_LINK};
/* sanity check */
if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
nd6log((LOG_ERR,
"nd6_prefix_onlink: %s/%d is already on-link\n",
ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen));
return (EEXIST);
}
/*
* Add the interface route associated with the prefix. Before
* installing the route, check if there's the same prefix on another
* interface, and the prefix has already installed the interface route.
* Although such a configuration is expected to be rare, we explicitly
* allow it.
*/
for (opr = V_nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
if (opr == pr)
continue;
if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
continue;
if (opr->ndpr_plen == pr->ndpr_plen &&
in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
&opr->ndpr_prefix.sin6_addr, pr->ndpr_plen))
return (0);
}
/*
* We prefer link-local addresses as the associated interface address.
*/
/* search for a link-local addr */
ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
if (ifa == NULL) {
/* XXX: freebsd does not have ifa_ifwithaf */
TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) {
if (ifa->ifa_addr->sa_family == AF_INET6)
break;
}
/* should we care about ia6_flags? */
}
if (ifa == NULL) {
/*
* This can still happen, when, for example, we receive an RA
* containing a prefix with the L bit set and the A bit clear,
* after removing all IPv6 addresses on the receiving
* interface. This should, of course, be rare though.
*/
nd6log((LOG_NOTICE,
"nd6_prefix_onlink: failed to find any ifaddr"
" to add route for a prefix(%s/%d) on %s\n",
ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(ifp)));
return (0);
}
/*
* in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs.
* ifa->ifa_rtrequest = nd6_rtrequest;
*/
bzero(&mask6, sizeof(mask6));
mask6.sin6_len = sizeof(mask6);
mask6.sin6_addr = pr->ndpr_mask;
rtflags = ifa->ifa_flags | RTF_CLONING | RTF_UP;
if (nd6_need_cache(ifp)) {
/* explicitly set in case ifa_flags does not set the flag. */
rtflags |= RTF_CLONING;
} else {
/*
* explicitly clear the cloning bit in case ifa_flags sets it.
*/
rtflags &= ~RTF_CLONING;
}
error = rtrequest(RTM_ADD, (struct sockaddr *)&pr->ndpr_prefix,
ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt);
if (error == 0) {
if (rt != NULL) /* this should be non NULL, though */ {
+ rnh = V_rt_tables[rt->rt_fibnum][AF_INET6];
+ RADIX_NODE_HEAD_LOCK(rnh);
RT_LOCK(rt);
if (!rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl)) {
((struct sockaddr_dl *)rt->rt_gateway)->sdl_type =
rt->rt_ifp->if_type;
((struct sockaddr_dl *)rt->rt_gateway)->sdl_index =
rt->rt_ifp->if_index;
}
+ RADIX_NODE_HEAD_UNLOCK(rnh);
nd6_rtmsg(RTM_ADD, rt);
RT_UNLOCK(rt);
}
pr->ndpr_stateflags |= NDPRF_ONLINK;
} else {
char ip6bufg[INET6_ADDRSTRLEN], ip6bufm[INET6_ADDRSTRLEN];
nd6log((LOG_ERR, "nd6_prefix_onlink: failed to add route for a"
" prefix (%s/%d) on %s, gw=%s, mask=%s, flags=%lx "
"errno = %d\n",
ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(ifp),
ip6_sprintf(ip6bufg, &((struct sockaddr_in6 *)ifa->ifa_addr)->sin6_addr),
ip6_sprintf(ip6bufm, &mask6.sin6_addr), rtflags, error));
}
if (rt != NULL) {
RT_LOCK(rt);
RT_REMREF(rt);
RT_UNLOCK(rt);
}
return (error);
}
int
nd6_prefix_offlink(struct nd_prefix *pr)
{
INIT_VNET_INET6(curvnet);
int error = 0;
struct ifnet *ifp = pr->ndpr_ifp;
struct nd_prefix *opr;
struct sockaddr_in6 sa6, mask6;
struct rtentry *rt = NULL;
char ip6buf[INET6_ADDRSTRLEN];
/* sanity check */
if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
nd6log((LOG_ERR,
"nd6_prefix_offlink: %s/%d is already off-link\n",
ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen));
return (EEXIST);
}
bzero(&sa6, sizeof(sa6));
sa6.sin6_family = AF_INET6;
sa6.sin6_len = sizeof(sa6);
bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr,
sizeof(struct in6_addr));
bzero(&mask6, sizeof(mask6));
mask6.sin6_family = AF_INET6;
mask6.sin6_len = sizeof(sa6);
bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
error = rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
(struct sockaddr *)&mask6, 0, &rt);
if (error == 0) {
pr->ndpr_stateflags &= ~NDPRF_ONLINK;
/* report the route deletion to the routing socket. */
if (rt != NULL)
nd6_rtmsg(RTM_DELETE, rt);
/*
* There might be the same prefix on another interface,
* the prefix which could not be on-link just because we have
* the interface route (see comments in nd6_prefix_onlink).
* If there's one, try to make the prefix on-link on the
* interface.
*/
for (opr = V_nd_prefix.lh_first; opr; opr = opr->ndpr_next) {
if (opr == pr)
continue;
if ((opr->ndpr_stateflags & NDPRF_ONLINK) != 0)
continue;
/*
* KAME specific: detached prefixes should not be
* on-link.
*/
if ((opr->ndpr_stateflags & NDPRF_DETACHED) != 0)
continue;
if (opr->ndpr_plen == pr->ndpr_plen &&
in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
&opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
int e;
if ((e = nd6_prefix_onlink(opr)) != 0) {
nd6log((LOG_ERR,
"nd6_prefix_offlink: failed to "
"recover a prefix %s/%d from %s "
"to %s (errno = %d)\n",
ip6_sprintf(ip6buf,
&opr->ndpr_prefix.sin6_addr),
opr->ndpr_plen, if_name(ifp),
if_name(opr->ndpr_ifp), e));
}
}
}
} else {
/* XXX: can we still set the NDPRF_ONLINK flag? */
nd6log((LOG_ERR,
"nd6_prefix_offlink: failed to delete route: "
"%s/%d on %s (errno = %d)\n",
ip6_sprintf(ip6buf, &sa6.sin6_addr), pr->ndpr_plen,
if_name(ifp), error));
}
if (rt != NULL) {
RTFREE(rt);
}
return (error);
}
static struct in6_ifaddr *
in6_ifadd(struct nd_prefixctl *pr, int mcast)
{
INIT_VNET_INET6(curvnet);
struct ifnet *ifp = pr->ndpr_ifp;
struct ifaddr *ifa;
struct in6_aliasreq ifra;
struct in6_ifaddr *ia, *ib;
int error, plen0;
struct in6_addr mask;
int prefixlen = pr->ndpr_plen;
int updateflags;
char ip6buf[INET6_ADDRSTRLEN];
in6_prefixlen2mask(&mask, prefixlen);
/*
* find a link-local address (will be interface ID).
* Is it really mandatory? Theoretically, a global or a site-local
* address can be configured without a link-local address, if we
* have a unique interface identifier...
*
* it is not mandatory to have a link-local address, we can generate
* interface identifier on the fly. we do this because:
* (1) it should be the easiest way to find interface identifier.
* (2) RFC2462 5.4 suggesting the use of the same interface identifier
* for multiple addresses on a single interface, and possible shortcut
* of DAD. we omitted DAD for this reason in the past.
* (3) a user can prevent autoconfiguration of global address
* by removing link-local address by hand (this is partly because we
* don't have other way to control the use of IPv6 on an interface.
* this has been our design choice - cf. NRL's "ifconfig auto").
* (4) it is easier to manage when an interface has addresses
* with the same interface identifier, than to have multiple addresses
* with different interface identifiers.
*/
ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
if (ifa)
ib = (struct in6_ifaddr *)ifa;
else
return NULL;
/* prefixlen + ifidlen must be equal to 128 */
plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
if (prefixlen != plen0) {
nd6log((LOG_INFO, "in6_ifadd: wrong prefixlen for %s "
"(prefix=%d ifid=%d)\n",
if_name(ifp), prefixlen, 128 - plen0));
return NULL;
}
/* make ifaddr */
bzero(&ifra, sizeof(ifra));
/*
* in6_update_ifa() does not use ifra_name, but we accurately set it
* for safety.
*/
strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
ifra.ifra_addr.sin6_family = AF_INET6;
ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6);
/* prefix */
ifra.ifra_addr.sin6_addr = pr->ndpr_prefix.sin6_addr;
ifra.ifra_addr.sin6_addr.s6_addr32[0] &= mask.s6_addr32[0];
ifra.ifra_addr.sin6_addr.s6_addr32[1] &= mask.s6_addr32[1];
ifra.ifra_addr.sin6_addr.s6_addr32[2] &= mask.s6_addr32[2];
ifra.ifra_addr.sin6_addr.s6_addr32[3] &= mask.s6_addr32[3];
/* interface ID */
ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
(ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
(ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
(ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
(ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
/* new prefix mask. */
ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6);
ifra.ifra_prefixmask.sin6_family = AF_INET6;
bcopy(&mask, &ifra.ifra_prefixmask.sin6_addr,
sizeof(ifra.ifra_prefixmask.sin6_addr));
/* lifetimes. */
ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
/* XXX: scope zone ID? */
ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
/*
* Make sure that we do not have this address already. This should
* usually not happen, but we can still see this case, e.g., if we
* have manually configured the exact address to be configured.
*/
if (in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr) != NULL) {
/* this should be rare enough to make an explicit log */
log(LOG_INFO, "in6_ifadd: %s is already configured\n",
ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
return (NULL);
}
/*
* Allocate ifaddr structure, link into chain, etc.
* If we are going to create a new address upon receiving a multicasted
* RA, we need to impose a random delay before starting DAD.
* [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
*/
updateflags = 0;
if (mcast)
updateflags |= IN6_IFAUPDATE_DADDELAY;
if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
nd6log((LOG_ERR,
"in6_ifadd: failed to make ifaddr %s on %s (errno=%d)\n",
ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
if_name(ifp), error));
return (NULL); /* ifaddr must not have been allocated. */
}
ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
return (ia); /* this is always non-NULL */
}
/*
* ia0 - corresponding public address
*/
int
in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
{
INIT_VNET_INET6(curvnet);
struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
struct in6_ifaddr *newia, *ia;
struct in6_aliasreq ifra;
int i, error;
int trylimit = 3; /* XXX: adhoc value */
int updateflags;
u_int32_t randid[2];
time_t vltime0, pltime0;
bzero(&ifra, sizeof(ifra));
strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name));
ifra.ifra_addr = ia0->ia_addr;
/* copy prefix mask */
ifra.ifra_prefixmask = ia0->ia_prefixmask;
/* clear the old IFID */
for (i = 0; i < 4; i++) {
ifra.ifra_addr.sin6_addr.s6_addr32[i] &=
ifra.ifra_prefixmask.sin6_addr.s6_addr32[i];
}
again:
if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
(const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) {
nd6log((LOG_NOTICE, "in6_tmpifadd: failed to find a good "
"random IFID\n"));
return (EINVAL);
}
ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
(randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
(randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
/*
* in6_get_tmpifid() quite likely provided a unique interface ID.
* However, we may still have a chance to see collision, because
* there may be a time lag between generation of the ID and generation
* of the address. So, we'll do one more sanity check.
*/
for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) {
if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr,
&ifra.ifra_addr.sin6_addr)) {
if (trylimit-- == 0) {
/*
* Give up. Something strange should have
* happened.
*/
nd6log((LOG_NOTICE, "in6_tmpifadd: failed to "
"find a unique random IFID\n"));
return (EEXIST);
}
forcegen = 1;
goto again;
}
}
/*
* The Valid Lifetime is the lower of the Valid Lifetime of the
* public address or TEMP_VALID_LIFETIME.
* The Preferred Lifetime is the lower of the Preferred Lifetime
* of the public address or TEMP_PREFERRED_LIFETIME -
* DESYNC_FACTOR.
*/
if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
(ia0->ia6_lifetime.ia6t_vltime -
(time_second - ia0->ia6_updatetime));
if (vltime0 > V_ip6_temp_valid_lifetime)
vltime0 = V_ip6_temp_valid_lifetime;
} else
vltime0 = V_ip6_temp_valid_lifetime;
if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
(ia0->ia6_lifetime.ia6t_pltime -
(time_second - ia0->ia6_updatetime));
if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
pltime0 = V_ip6_temp_preferred_lifetime -
V_ip6_desync_factor;
}
} else
pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor;
ifra.ifra_lifetime.ia6t_vltime = vltime0;
ifra.ifra_lifetime.ia6t_pltime = pltime0;
/*
* A temporary address is created only if this calculated Preferred
* Lifetime is greater than REGEN_ADVANCE time units.
*/
if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance)
return (0);
/* XXX: scope zone ID? */
ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY);
/* allocate ifaddr structure, link into chain, etc. */
updateflags = 0;
if (delay)
updateflags |= IN6_IFAUPDATE_DADDELAY;
if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0)
return (error);
newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
if (newia == NULL) { /* XXX: can it happen? */
nd6log((LOG_ERR,
"in6_tmpifadd: ifa update succeeded, but we got "
"no ifaddr\n"));
return (EINVAL); /* XXX */
}
newia->ia6_ndpr = ia0->ia6_ndpr;
newia->ia6_ndpr->ndpr_refcnt++;
/*
* A newly added address might affect the status of other addresses.
* XXX: when the temporary address is generated with a new public
* address, the onlink check is redundant. However, it would be safe
* to do the check explicitly everywhere a new address is generated,
* and, in fact, we surely need the check when we create a new
* temporary address due to deprecation of an old temporary address.
*/
pfxlist_onlink_check();
return (0);
}
static int
in6_init_prefix_ltimes(struct nd_prefix *ndpr)
{
if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
ndpr->ndpr_preferred = 0;
else
ndpr->ndpr_preferred = time_second + ndpr->ndpr_pltime;
if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
ndpr->ndpr_expire = 0;
else
ndpr->ndpr_expire = time_second + ndpr->ndpr_vltime;
return 0;
}
static void
in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
{
/* init ia6t_expire */
if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
lt6->ia6t_expire = 0;
else {
lt6->ia6t_expire = time_second;
lt6->ia6t_expire += lt6->ia6t_vltime;
}
/* init ia6t_preferred */
if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
lt6->ia6t_preferred = 0;
else {
lt6->ia6t_preferred = time_second;
lt6->ia6t_preferred += lt6->ia6t_pltime;
}
}
/*
* Delete all the routing table entries that use the specified gateway.
* XXX: this function causes search through all entries of routing table, so
* it shouldn't be called when acting as a router.
*/
void
rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
{
INIT_VNET_NET(curvnet);
struct radix_node_head *rnh = V_rt_tables[0][AF_INET6];
int s = splnet();
/* We'll care only link-local addresses */
if (!IN6_IS_ADDR_LINKLOCAL(gateway)) {
splx(s);
return;
}
RADIX_NODE_HEAD_LOCK(rnh);
rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
RADIX_NODE_HEAD_UNLOCK(rnh);
splx(s);
}
static int
rt6_deleteroute(struct radix_node *rn, void *arg)
{
#define SIN6(s) ((struct sockaddr_in6 *)s)
struct rtentry *rt = (struct rtentry *)rn;
struct in6_addr *gate = (struct in6_addr *)arg;
if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
return (0);
if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) {
return (0);
}
/*
* Do not delete a static route.
* XXX: this seems to be a bit ad-hoc. Should we consider the
* 'cloned' bit instead?
*/
if ((rt->rt_flags & RTF_STATIC) != 0)
return (0);
/*
* We delete only host route. This means, in particular, we don't
* delete default route.
*/
if ((rt->rt_flags & RTF_HOST) == 0)
return (0);
return (rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
rt_mask(rt), rt->rt_flags, 0));
#undef SIN6
}
int
nd6_setdefaultiface(int ifindex)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
int error = 0;
if (ifindex < 0 || V_if_index < ifindex)
return (EINVAL);
if (ifindex != 0 && !ifnet_byindex(ifindex))
return (EINVAL);
if (V_nd6_defifindex != ifindex) {
V_nd6_defifindex = ifindex;
if (V_nd6_defifindex > 0)
V_nd6_defifp = ifnet_byindex(V_nd6_defifindex);
else
V_nd6_defifp = NULL;
/*
* Our current implementation assumes one-to-one maping between
* interfaces and links, so it would be natural to use the
* default interface as the default link.
*/
scope6_setdefault(V_nd6_defifp);
}
return (error);
}

File Metadata

Mime Type
text/x-diff
Expires
Fri, Feb 21, 9:02 PM (8 h, 22 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
16744960
Default Alt Text
(712 KB)

Event Timeline