Index: head/sys/compat/linux/linux_misc.c =================================================================== --- head/sys/compat/linux/linux_misc.c (revision 180290) +++ head/sys/compat/linux/linux_misc.c (revision 180291) @@ -1,1865 +1,1867 @@ /*- * Copyright (c) 2002 Doug Rabson * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_mac.h" #include #include #include #if defined(__i386__) #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include #include #include #include #include #ifdef __i386__ #include #endif #define BSD_TO_LINUX_SIGNAL(sig) \ (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig) static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, RLIMIT_MEMLOCK, RLIMIT_AS }; struct l_sysinfo { l_long uptime; /* Seconds since boot */ l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ #define LINUX_SYSINFO_LOADS_SCALE 65536 l_ulong totalram; /* Total usable main memory size */ l_ulong freeram; /* Available memory size */ l_ulong sharedram; /* Amount of shared memory */ l_ulong bufferram; /* Memory used by buffers */ l_ulong totalswap; /* Total swap space size */ l_ulong freeswap; /* swap space still available */ l_ushort procs; /* Number of current processes */ l_ushort pads; l_ulong totalbig; l_ulong freebig; l_uint mem_unit; char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ }; int linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) { struct l_sysinfo sysinfo; vm_object_t object; int i, j; struct timespec ts; getnanouptime(&ts); if (ts.tv_nsec != 0) ts.tv_sec++; sysinfo.uptime = ts.tv_sec; /* Use the information from the mib to get our load averages */ for (i = 0; i < 3; i++) sysinfo.loads[i] = averunnable.ldavg[i] * LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; sysinfo.totalram = physmem * PAGE_SIZE; sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE; sysinfo.sharedram = 0; mtx_lock(&vm_object_list_mtx); TAILQ_FOREACH(object, &vm_object_list, object_list) if (object->shadow_count > 1) sysinfo.sharedram += object->resident_page_count; mtx_unlock(&vm_object_list_mtx); sysinfo.sharedram *= PAGE_SIZE; sysinfo.bufferram = 0; swap_pager_status(&i, &j); sysinfo.totalswap = i * PAGE_SIZE; sysinfo.freeswap = (i - j) * PAGE_SIZE; sysinfo.procs = nprocs; /* The following are only present in newer Linux kernels. */ sysinfo.totalbig = 0; sysinfo.freebig = 0; sysinfo.mem_unit = 1; return copyout(&sysinfo, args->info, sizeof(sysinfo)); } int linux_alarm(struct thread *td, struct linux_alarm_args *args) { struct itimerval it, old_it; int error; #ifdef DEBUG if (ldebug(alarm)) printf(ARGS(alarm, "%u"), args->secs); #endif if (args->secs > 100000000) return (EINVAL); it.it_value.tv_sec = (long)args->secs; it.it_value.tv_usec = 0; it.it_interval.tv_sec = 0; it.it_interval.tv_usec = 0; error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); if (error) return (error); if (timevalisset(&old_it.it_value)) { if (old_it.it_value.tv_usec != 0) old_it.it_value.tv_sec++; td->td_retval[0] = old_it.it_value.tv_sec; } return (0); } int linux_brk(struct thread *td, struct linux_brk_args *args) { struct vmspace *vm = td->td_proc->p_vmspace; vm_offset_t new, old; struct obreak_args /* { char * nsize; } */ tmp; #ifdef DEBUG if (ldebug(brk)) printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); #endif old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); new = (vm_offset_t)args->dsend; tmp.nsize = (char *)new; if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp)) td->td_retval[0] = (long)new; else td->td_retval[0] = (long)old; return 0; } #if defined(__i386__) /* XXX: what about amd64/linux32? */ int linux_uselib(struct thread *td, struct linux_uselib_args *args) { struct nameidata ni; struct vnode *vp; struct exec *a_out; struct vattr attr; vm_offset_t vmaddr; unsigned long file_offset; vm_offset_t buffer; unsigned long bss_size; char *library; int error; int locked, vfslocked; LCONVPATHEXIST(td, args->library, &library); #ifdef DEBUG if (ldebug(uselib)) printf(ARGS(uselib, "%s"), library); #endif a_out = NULL; vfslocked = 0; locked = 0; vp = NULL; NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1, UIO_SYSSPACE, library, td); error = namei(&ni); LFREEPATH(library); if (error) goto cleanup; vp = ni.ni_vp; vfslocked = NDHASGIANT(&ni); NDFREE(&ni, NDF_ONLY_PNBUF); /* * From here on down, we have a locked vnode that must be unlocked. * XXX: The code below largely duplicates exec_check_permissions(). */ locked = 1; /* Writable? */ if (vp->v_writecount) { error = ETXTBSY; goto cleanup; } /* Executable? */ error = VOP_GETATTR(vp, &attr, td->td_ucred, td); if (error) goto cleanup; if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { /* EACCESS is what exec(2) returns. */ error = ENOEXEC; goto cleanup; } /* Sensible size? */ if (attr.va_size == 0) { error = ENOEXEC; goto cleanup; } /* Can we access it? */ error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); if (error) goto cleanup; /* * XXX: This should use vn_open() so that it is properly authorized, * and to reduce code redundancy all over the place here. * XXX: Not really, it duplicates far more of exec_check_permissions() * than vn_open(). */ #ifdef MAC error = mac_vnode_check_open(td->td_ucred, vp, FREAD); if (error) goto cleanup; #endif error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); if (error) goto cleanup; /* Pull in executable header into kernel_map */ error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); if (error) goto cleanup; /* Is it a Linux binary ? */ if (((a_out->a_magic >> 16) & 0xff) != 0x64) { error = ENOEXEC; goto cleanup; } /* * While we are here, we should REALLY do some more checks */ /* Set file/virtual offset based on a.out variant. */ switch ((int)(a_out->a_magic & 0xffff)) { case 0413: /* ZMAGIC */ file_offset = 1024; break; case 0314: /* QMAGIC */ file_offset = 0; break; default: error = ENOEXEC; goto cleanup; } bss_size = round_page(a_out->a_bss); /* Check various fields in header for validity/bounds. */ if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { error = ENOEXEC; goto cleanup; } /* text + data can't exceed file size */ if (a_out->a_data + a_out->a_text > attr.va_size) { error = EFAULT; goto cleanup; } /* * text/data/bss must not exceed limits * XXX - this is not complete. it should check current usage PLUS * the resources needed by this library. */ PROC_LOCK(td->td_proc); if (a_out->a_text > maxtsiz || a_out->a_data + bss_size > lim_cur(td->td_proc, RLIMIT_DATA)) { PROC_UNLOCK(td->td_proc); error = ENOMEM; goto cleanup; } PROC_UNLOCK(td->td_proc); /* * Prevent more writers. * XXX: Note that if any of the VM operations fail below we don't * clear this flag. */ vp->v_vflag |= VV_TEXT; /* * Lock no longer needed */ locked = 0; VOP_UNLOCK(vp, 0); VFS_UNLOCK_GIANT(vfslocked); /* * Check if file_offset page aligned. Currently we cannot handle * misalinged file offsets, and so we read in the entire image * (what a waste). */ if (file_offset & PAGE_MASK) { #ifdef DEBUG printf("uselib: Non page aligned binary %lu\n", file_offset); #endif /* Map text+data read/write/execute */ /* a_entry is the load address and is page aligned */ vmaddr = trunc_page(a_out->a_entry); /* get anon user mapping, read+write+execute */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; /* map file into kernel_map */ error = vm_mmap(kernel_map, &buffer, round_page(a_out->a_text + a_out->a_data + file_offset), VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, trunc_page(file_offset)); if (error) goto cleanup; /* copy from kernel VM space to user space */ error = copyout(PTRIN(buffer + file_offset), (void *)vmaddr, a_out->a_text + a_out->a_data); /* release temporary kernel space */ vm_map_remove(kernel_map, buffer, buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); if (error) goto cleanup; } else { #ifdef DEBUG printf("uselib: Page aligned binary %lu\n", file_offset); #endif /* * for QMAGIC, a_entry is 20 bytes beyond the load address * to skip the executable header */ vmaddr = trunc_page(a_out->a_entry); /* * Map it all into the process's space as a single * copy-on-write "data" segment. */ error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); if (error) goto cleanup; } #ifdef DEBUG printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], ((long *)vmaddr)[1]); #endif if (bss_size != 0) { /* Calculate BSS start address */ vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; /* allocate some 'anon' space */ error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) goto cleanup; } cleanup: /* Unlock vnode if needed */ if (locked) { VOP_UNLOCK(vp, 0); VFS_UNLOCK_GIANT(vfslocked); } /* Release the kernel mapping. */ if (a_out) vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); return error; } #endif /* __i386__ */ int linux_select(struct thread *td, struct linux_select_args *args) { l_timeval ltv; struct timeval tv0, tv1, utv, *tvp; int error; #ifdef DEBUG if (ldebug(select)) printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, (void *)args->readfds, (void *)args->writefds, (void *)args->exceptfds, (void *)args->timeout); #endif /* * Store current time for computation of the amount of * time left. */ if (args->timeout) { if ((error = copyin(args->timeout, <v, sizeof(ltv)))) goto select_out; utv.tv_sec = ltv.tv_sec; utv.tv_usec = ltv.tv_usec; #ifdef DEBUG if (ldebug(select)) printf(LMSG("incoming timeout (%jd/%ld)"), (intmax_t)utv.tv_sec, utv.tv_usec); #endif if (itimerfix(&utv)) { /* * The timeval was invalid. Convert it to something * valid that will act as it does under Linux. */ utv.tv_sec += utv.tv_usec / 1000000; utv.tv_usec %= 1000000; if (utv.tv_usec < 0) { utv.tv_sec -= 1; utv.tv_usec += 1000000; } if (utv.tv_sec < 0) timevalclear(&utv); } microtime(&tv0); tvp = &utv; } else tvp = NULL; error = kern_select(td, args->nfds, args->readfds, args->writefds, args->exceptfds, tvp); #ifdef DEBUG if (ldebug(select)) printf(LMSG("real select returns %d"), error); #endif if (error) { /* * See fs/select.c in the Linux kernel. Without this, * Maelstrom doesn't work. */ if (error == ERESTART) error = EINTR; goto select_out; } if (args->timeout) { if (td->td_retval[0]) { /* * Compute how much time was left of the timeout, * by subtracting the current time and the time * before we started the call, and subtracting * that result from the user-supplied value. */ microtime(&tv1); timevalsub(&tv1, &tv0); timevalsub(&utv, &tv1); if (utv.tv_sec < 0) timevalclear(&utv); } else timevalclear(&utv); #ifdef DEBUG if (ldebug(select)) printf(LMSG("outgoing timeout (%jd/%ld)"), (intmax_t)utv.tv_sec, utv.tv_usec); #endif ltv.tv_sec = utv.tv_sec; ltv.tv_usec = utv.tv_usec; if ((error = copyout(<v, args->timeout, sizeof(ltv)))) goto select_out; } select_out: #ifdef DEBUG if (ldebug(select)) printf(LMSG("select_out -> %d"), error); #endif return error; } int linux_mremap(struct thread *td, struct linux_mremap_args *args) { struct munmap_args /* { void *addr; size_t len; } */ bsd_args; int error = 0; #ifdef DEBUG if (ldebug(mremap)) printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), (void *)(uintptr_t)args->addr, (unsigned long)args->old_len, (unsigned long)args->new_len, (unsigned long)args->flags); #endif if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { td->td_retval[0] = 0; return (EINVAL); } /* * Check for the page alignment. * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. */ if (args->addr & PAGE_MASK) { td->td_retval[0] = 0; return (EINVAL); } args->new_len = round_page(args->new_len); args->old_len = round_page(args->old_len); if (args->new_len > args->old_len) { td->td_retval[0] = 0; return ENOMEM; } if (args->new_len < args->old_len) { bsd_args.addr = (caddr_t)((uintptr_t)args->addr + args->new_len); bsd_args.len = args->old_len - args->new_len; error = munmap(td, &bsd_args); } td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; return error; } #define LINUX_MS_ASYNC 0x0001 #define LINUX_MS_INVALIDATE 0x0002 #define LINUX_MS_SYNC 0x0004 int linux_msync(struct thread *td, struct linux_msync_args *args) { struct msync_args bsd_args; bsd_args.addr = (caddr_t)(uintptr_t)args->addr; bsd_args.len = (uintptr_t)args->len; bsd_args.flags = args->fl & ~LINUX_MS_SYNC; return msync(td, &bsd_args); } int linux_time(struct thread *td, struct linux_time_args *args) { struct timeval tv; l_time_t tm; int error; #ifdef DEBUG if (ldebug(time)) printf(ARGS(time, "*")); #endif microtime(&tv); tm = tv.tv_sec; if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) return error; td->td_retval[0] = tm; return 0; } struct l_times_argv { l_long tms_utime; l_long tms_stime; l_long tms_cutime; l_long tms_cstime; }; #define CLK_TCK 100 /* Linux uses 100 */ #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) int linux_times(struct thread *td, struct linux_times_args *args) { struct timeval tv, utime, stime, cutime, cstime; struct l_times_argv tms; struct proc *p; int error; #ifdef DEBUG if (ldebug(times)) printf(ARGS(times, "*")); #endif if (args->buf != NULL) { p = td->td_proc; PROC_LOCK(p); PROC_SLOCK(p); calcru(p, &utime, &stime); PROC_SUNLOCK(p); calccru(p, &cutime, &cstime); PROC_UNLOCK(p); tms.tms_utime = CONVTCK(utime); tms.tms_stime = CONVTCK(stime); tms.tms_cutime = CONVTCK(cutime); tms.tms_cstime = CONVTCK(cstime); if ((error = copyout(&tms, args->buf, sizeof(tms)))) return error; } microuptime(&tv); td->td_retval[0] = (int)CONVTCK(tv); return 0; } int linux_newuname(struct thread *td, struct linux_newuname_args *args) { struct l_new_utsname utsname; char osname[LINUX_MAX_UTSNAME]; char osrelease[LINUX_MAX_UTSNAME]; char *p; #ifdef DEBUG if (ldebug(newuname)) printf(ARGS(newuname, "*")); #endif linux_get_osname(td, osname); linux_get_osrelease(td, osrelease); bzero(&utsname, sizeof(utsname)); strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); for (p = utsname.version; *p != '\0'; ++p) if (*p == '\n') { *p = '\0'; break; } #ifdef __i386__ { const char *class; switch (cpu_class) { case CPUCLASS_686: class = "i686"; break; case CPUCLASS_586: class = "i586"; break; case CPUCLASS_486: class = "i486"; break; default: class = "i386"; } strlcpy(utsname.machine, class, LINUX_MAX_UTSNAME); } #elif defined(__amd64__) /* XXX: Linux can change 'personality'. */ #ifdef COMPAT_LINUX32 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); #else strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); #endif /* COMPAT_LINUX32 */ #else /* something other than i386 or amd64 - assume we and Linux agree */ strlcpy(utsname.machine, machine, LINUX_MAX_UTSNAME); #endif /* __i386__ */ + mtx_lock(&hostname_mtx); strlcpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME); + mtx_unlock(&hostname_mtx); return (copyout(&utsname, args->buf, sizeof(utsname))); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) struct l_utimbuf { l_time_t l_actime; l_time_t l_modtime; }; int linux_utime(struct thread *td, struct linux_utime_args *args) { struct timeval tv[2], *tvp; struct l_utimbuf lut; char *fname; int error; LCONVPATHEXIST(td, args->fname, &fname); #ifdef DEBUG if (ldebug(utime)) printf(ARGS(utime, "%s, *"), fname); #endif if (args->times) { if ((error = copyin(args->times, &lut, sizeof lut))) { LFREEPATH(fname); return error; } tv[0].tv_sec = lut.l_actime; tv[0].tv_usec = 0; tv[1].tv_sec = lut.l_modtime; tv[1].tv_usec = 0; tvp = tv; } else tvp = NULL; error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } int linux_utimes(struct thread *td, struct linux_utimes_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error; LCONVPATHEXIST(td, args->fname, &fname); #ifdef DEBUG if (ldebug(utimes)) printf(ARGS(utimes, "%s, *"), fname); #endif if (args->tptr != NULL) { if ((error = copyin(args->tptr, ltv, sizeof ltv))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } int linux_futimesat(struct thread *td, struct linux_futimesat_args *args) { l_timeval ltv[2]; struct timeval tv[2], *tvp = NULL; char *fname; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); #ifdef DEBUG if (ldebug(futimesat)) printf(ARGS(futimesat, "%s, *"), fname); #endif if (args->utimes != NULL) { if ((error = copyin(args->utimes, ltv, sizeof ltv))) { LFREEPATH(fname); return (error); } tv[0].tv_sec = ltv[0].tv_sec; tv[0].tv_usec = ltv[0].tv_usec; tv[1].tv_sec = ltv[1].tv_sec; tv[1].tv_usec = ltv[1].tv_usec; tvp = tv; } error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); LFREEPATH(fname); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ #define __WCLONE 0x80000000 int linux_waitpid(struct thread *td, struct linux_waitpid_args *args) { int error, options, tmpstat; #ifdef DEBUG if (ldebug(waitpid)) printf(ARGS(waitpid, "%d, %p, %d"), args->pid, (void *)args->status, args->options); #endif /* * this is necessary because the test in kern_wait doesn't work * because we mess with the options here */ if (args->options & ~(WUNTRACED | WNOHANG | WCONTINUED | __WCLONE)) return (EINVAL); options = (args->options & (WNOHANG | WUNTRACED)); /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ if (args->options & __WCLONE) options |= WLINUXCLONE; error = kern_wait(td, args->pid, &tmpstat, options, NULL); if (error) return error; if (args->status) { tmpstat &= 0xffff; if (WIFSIGNALED(tmpstat)) tmpstat = (tmpstat & 0xffffff80) | BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); else if (WIFSTOPPED(tmpstat)) tmpstat = (tmpstat & 0xffff00ff) | (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); return copyout(&tmpstat, args->status, sizeof(int)); } return 0; } int linux_wait4(struct thread *td, struct linux_wait4_args *args) { int error, options, tmpstat; struct rusage ru, *rup; struct proc *p; #ifdef DEBUG if (ldebug(wait4)) printf(ARGS(wait4, "%d, %p, %d, %p"), args->pid, (void *)args->status, args->options, (void *)args->rusage); #endif options = (args->options & (WNOHANG | WUNTRACED)); /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ if (args->options & __WCLONE) options |= WLINUXCLONE; if (args->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, args->pid, &tmpstat, options, rup); if (error) return error; p = td->td_proc; PROC_LOCK(p); sigqueue_delete(&p->p_sigqueue, SIGCHLD); PROC_UNLOCK(p); if (args->status) { tmpstat &= 0xffff; if (WIFSIGNALED(tmpstat)) tmpstat = (tmpstat & 0xffffff80) | BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); else if (WIFSTOPPED(tmpstat)) tmpstat = (tmpstat & 0xffff00ff) | (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); error = copyout(&tmpstat, args->status, sizeof(int)); } if (args->rusage != NULL && error == 0) error = copyout(&ru, args->rusage, sizeof(ru)); return (error); } int linux_mknod(struct thread *td, struct linux_mknod_args *args) { char *path; int error; LCONVPATHCREAT(td, args->path, &path); #ifdef DEBUG if (ldebug(mknod)) printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev); #endif switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknod(td, path, UIO_SYSSPACE, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_open(td, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } LFREEPATH(path); return (error); } int linux_mknodat(struct thread *td, struct linux_mknodat_args *args) { char *path; int error, dfd; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHCREAT_AT(td, args->filename, &path, dfd); #ifdef DEBUG if (ldebug(mknodat)) printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); #endif switch (args->mode & S_IFMT) { case S_IFIFO: case S_IFSOCK: error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); break; case S_IFCHR: case S_IFBLK: error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, args->dev); break; case S_IFDIR: error = EPERM; break; case 0: args->mode |= S_IFREG; /* FALLTHROUGH */ case S_IFREG: error = kern_openat(td, dfd, path, UIO_SYSSPACE, O_WRONLY | O_CREAT | O_TRUNC, args->mode); if (error == 0) kern_close(td, td->td_retval[0]); break; default: error = EINVAL; break; } LFREEPATH(path); return (error); } /* * UGH! This is just about the dumbest idea I've ever heard!! */ int linux_personality(struct thread *td, struct linux_personality_args *args) { #ifdef DEBUG if (ldebug(personality)) printf(ARGS(personality, "%lu"), (unsigned long)args->per); #endif if (args->per != 0) return EINVAL; /* Yes Jim, it's still a Linux... */ td->td_retval[0] = 0; return 0; } struct l_itimerval { l_timeval it_interval; l_timeval it_value; }; #define B2L_ITIMERVAL(bip, lip) \ (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; int linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv, oitv; #ifdef DEBUG if (ldebug(setitimer)) printf(ARGS(setitimer, "%p, %p"), (void *)uap->itv, (void *)uap->oitv); #endif if (uap->itv == NULL) { uap->itv = uap->oitv; return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); } error = copyin(uap->itv, &ls, sizeof(ls)); if (error != 0) return (error); B2L_ITIMERVAL(&aitv, &ls); #ifdef DEBUG if (ldebug(setitimer)) { printf("setitimer: value: sec: %jd, usec: %ld\n", (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); printf("setitimer: interval: sec: %jd, usec: %ld\n", (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); } #endif error = kern_setitimer(td, uap->which, &aitv, &oitv); if (error != 0 || uap->oitv == NULL) return (error); B2L_ITIMERVAL(&ls, &oitv); return (copyout(&ls, uap->oitv, sizeof(ls))); } int linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) { int error; struct l_itimerval ls; struct itimerval aitv; #ifdef DEBUG if (ldebug(getitimer)) printf(ARGS(getitimer, "%p"), (void *)uap->itv); #endif error = kern_getitimer(td, uap->which, &aitv); if (error != 0) return (error); B2L_ITIMERVAL(&ls, &aitv); return (copyout(&ls, uap->itv, sizeof(ls))); } int linux_nice(struct thread *td, struct linux_nice_args *args) { struct setpriority_args bsd_args; bsd_args.which = PRIO_PROCESS; bsd_args.who = 0; /* current process */ bsd_args.prio = args->inc; return setpriority(td, &bsd_args); } int linux_setgroups(struct thread *td, struct linux_setgroups_args *args) { struct ucred *newcred, *oldcred; l_gid_t linux_gidset[NGROUPS]; gid_t *bsd_gidset; int ngrp, error; struct proc *p; ngrp = args->gidsetsize; if (ngrp < 0 || ngrp >= NGROUPS) return (EINVAL); error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); if (error) return (error); newcred = crget(); p = td->td_proc; PROC_LOCK(p); oldcred = p->p_ucred; /* * cr_groups[0] holds egid. Setting the whole set from * the supplied set will cause egid to be changed too. * Keep cr_groups[0] unchanged to prevent that. */ if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { PROC_UNLOCK(p); crfree(newcred); return (error); } crcopy(newcred, oldcred); if (ngrp > 0) { newcred->cr_ngroups = ngrp + 1; bsd_gidset = newcred->cr_groups; ngrp--; while (ngrp >= 0) { bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; ngrp--; } } else newcred->cr_ngroups = 1; setsugid(p); p->p_ucred = newcred; PROC_UNLOCK(p); crfree(oldcred); return (0); } int linux_getgroups(struct thread *td, struct linux_getgroups_args *args) { struct ucred *cred; l_gid_t linux_gidset[NGROUPS]; gid_t *bsd_gidset; int bsd_gidsetsz, ngrp, error; cred = td->td_ucred; bsd_gidset = cred->cr_groups; bsd_gidsetsz = cred->cr_ngroups - 1; /* * cr_groups[0] holds egid. Returning the whole set * here will cause a duplicate. Exclude cr_groups[0] * to prevent that. */ if ((ngrp = args->gidsetsize) == 0) { td->td_retval[0] = bsd_gidsetsz; return (0); } if (ngrp < bsd_gidsetsz) return (EINVAL); ngrp = 0; while (ngrp < bsd_gidsetsz) { linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; ngrp++; } if ((error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)))) return (error); td->td_retval[0] = ngrp; return (0); } int linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) { struct rlimit bsd_rlim; struct l_rlimit rlim; u_int which; int error; #ifdef DEBUG if (ldebug(setrlimit)) printf(ARGS(setrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); error = copyin(args->rlim, &rlim, sizeof(rlim)); if (error) return (error); bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; return (kern_setrlimit(td, which, &bsd_rlim)); } int linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) { struct l_rlimit rlim; struct proc *p = td->td_proc; struct rlimit bsd_rlim; u_int which; #ifdef DEBUG if (ldebug(old_getrlimit)) printf(ARGS(old_getrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); PROC_LOCK(p); lim_rlimit(p, which, &bsd_rlim); PROC_UNLOCK(p); #ifdef COMPAT_LINUX32 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; if (rlim.rlim_cur == UINT_MAX) rlim.rlim_cur = INT_MAX; rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; if (rlim.rlim_max == UINT_MAX) rlim.rlim_max = INT_MAX; #else rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; if (rlim.rlim_cur == ULONG_MAX) rlim.rlim_cur = LONG_MAX; rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; if (rlim.rlim_max == ULONG_MAX) rlim.rlim_max = LONG_MAX; #endif return (copyout(&rlim, args->rlim, sizeof(rlim))); } int linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) { struct l_rlimit rlim; struct proc *p = td->td_proc; struct rlimit bsd_rlim; u_int which; #ifdef DEBUG if (ldebug(getrlimit)) printf(ARGS(getrlimit, "%d, %p"), args->resource, (void *)args->rlim); #endif if (args->resource >= LINUX_RLIM_NLIMITS) return (EINVAL); which = linux_to_bsd_resource[args->resource]; if (which == -1) return (EINVAL); PROC_LOCK(p); lim_rlimit(p, which, &bsd_rlim); PROC_UNLOCK(p); rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; return (copyout(&rlim, args->rlim, sizeof(rlim))); } int linux_sched_setscheduler(struct thread *td, struct linux_sched_setscheduler_args *args) { struct sched_setscheduler_args bsd; #ifdef DEBUG if (ldebug(sched_setscheduler)) printf(ARGS(sched_setscheduler, "%d, %d, %p"), args->pid, args->policy, (const void *)args->param); #endif switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return EINVAL; } bsd.pid = args->pid; bsd.param = (struct sched_param *)args->param; return sched_setscheduler(td, &bsd); } int linux_sched_getscheduler(struct thread *td, struct linux_sched_getscheduler_args *args) { struct sched_getscheduler_args bsd; int error; #ifdef DEBUG if (ldebug(sched_getscheduler)) printf(ARGS(sched_getscheduler, "%d"), args->pid); #endif bsd.pid = args->pid; error = sched_getscheduler(td, &bsd); switch (td->td_retval[0]) { case SCHED_OTHER: td->td_retval[0] = LINUX_SCHED_OTHER; break; case SCHED_FIFO: td->td_retval[0] = LINUX_SCHED_FIFO; break; case SCHED_RR: td->td_retval[0] = LINUX_SCHED_RR; break; } return error; } int linux_sched_get_priority_max(struct thread *td, struct linux_sched_get_priority_max_args *args) { struct sched_get_priority_max_args bsd; #ifdef DEBUG if (ldebug(sched_get_priority_max)) printf(ARGS(sched_get_priority_max, "%d"), args->policy); #endif switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return EINVAL; } return sched_get_priority_max(td, &bsd); } int linux_sched_get_priority_min(struct thread *td, struct linux_sched_get_priority_min_args *args) { struct sched_get_priority_min_args bsd; #ifdef DEBUG if (ldebug(sched_get_priority_min)) printf(ARGS(sched_get_priority_min, "%d"), args->policy); #endif switch (args->policy) { case LINUX_SCHED_OTHER: bsd.policy = SCHED_OTHER; break; case LINUX_SCHED_FIFO: bsd.policy = SCHED_FIFO; break; case LINUX_SCHED_RR: bsd.policy = SCHED_RR; break; default: return EINVAL; } return sched_get_priority_min(td, &bsd); } #define REBOOT_CAD_ON 0x89abcdef #define REBOOT_CAD_OFF 0 #define REBOOT_HALT 0xcdef0123 #define REBOOT_RESTART 0x01234567 #define REBOOT_RESTART2 0xA1B2C3D4 #define REBOOT_POWEROFF 0x4321FEDC #define REBOOT_MAGIC1 0xfee1dead #define REBOOT_MAGIC2 0x28121969 #define REBOOT_MAGIC2A 0x05121996 #define REBOOT_MAGIC2B 0x16041998 int linux_reboot(struct thread *td, struct linux_reboot_args *args) { struct reboot_args bsd_args; #ifdef DEBUG if (ldebug(reboot)) printf(ARGS(reboot, "0x%x"), args->cmd); #endif if (args->magic1 != REBOOT_MAGIC1) return EINVAL; switch (args->magic2) { case REBOOT_MAGIC2: case REBOOT_MAGIC2A: case REBOOT_MAGIC2B: break; default: return EINVAL; } switch (args->cmd) { case REBOOT_CAD_ON: case REBOOT_CAD_OFF: return (priv_check(td, PRIV_REBOOT)); case REBOOT_HALT: bsd_args.opt = RB_HALT; break; case REBOOT_RESTART: case REBOOT_RESTART2: bsd_args.opt = 0; break; case REBOOT_POWEROFF: bsd_args.opt = RB_POWEROFF; break; default: return EINVAL; } return reboot(td, &bsd_args); } /* * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that * are assumed to be preserved. The following lightweight syscalls fixes * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c * * linux_getpid() - MP SAFE * linux_getgid() - MP SAFE * linux_getuid() - MP SAFE */ int linux_getpid(struct thread *td, struct linux_getpid_args *args) { struct linux_emuldata *em; #ifdef DEBUG if (ldebug(getpid)) printf(ARGS(getpid, "")); #endif if (linux_use26(td)) { em = em_find(td->td_proc, EMUL_DONTLOCK); KASSERT(em != NULL, ("getpid: emuldata not found.\n")); td->td_retval[0] = em->shared->group_pid; } else { td->td_retval[0] = td->td_proc->p_pid; } return (0); } int linux_gettid(struct thread *td, struct linux_gettid_args *args) { #ifdef DEBUG if (ldebug(gettid)) printf(ARGS(gettid, "")); #endif td->td_retval[0] = td->td_proc->p_pid; return (0); } int linux_getppid(struct thread *td, struct linux_getppid_args *args) { struct linux_emuldata *em; struct proc *p, *pp; #ifdef DEBUG if (ldebug(getppid)) printf(ARGS(getppid, "")); #endif if (!linux_use26(td)) { PROC_LOCK(td->td_proc); td->td_retval[0] = td->td_proc->p_pptr->p_pid; PROC_UNLOCK(td->td_proc); return (0); } em = em_find(td->td_proc, EMUL_DONTLOCK); KASSERT(em != NULL, ("getppid: process emuldata not found.\n")); /* find the group leader */ p = pfind(em->shared->group_pid); if (p == NULL) { #ifdef DEBUG printf(LMSG("parent process not found.\n")); #endif return (0); } pp = p->p_pptr; /* switch to parent */ PROC_LOCK(pp); PROC_UNLOCK(p); /* if its also linux process */ if (pp->p_sysent == &elf_linux_sysvec) { em = em_find(pp, EMUL_DONTLOCK); KASSERT(em != NULL, ("getppid: parent emuldata not found.\n")); td->td_retval[0] = em->shared->group_pid; } else td->td_retval[0] = pp->p_pid; PROC_UNLOCK(pp); return (0); } int linux_getgid(struct thread *td, struct linux_getgid_args *args) { #ifdef DEBUG if (ldebug(getgid)) printf(ARGS(getgid, "")); #endif td->td_retval[0] = td->td_ucred->cr_rgid; return (0); } int linux_getuid(struct thread *td, struct linux_getuid_args *args) { #ifdef DEBUG if (ldebug(getuid)) printf(ARGS(getuid, "")); #endif td->td_retval[0] = td->td_ucred->cr_ruid; return (0); } int linux_getsid(struct thread *td, struct linux_getsid_args *args) { struct getsid_args bsd; #ifdef DEBUG if (ldebug(getsid)) printf(ARGS(getsid, "%i"), args->pid); #endif bsd.pid = args->pid; return getsid(td, &bsd); } int linux_nosys(struct thread *td, struct nosys_args *ignore) { return (ENOSYS); } int linux_getpriority(struct thread *td, struct linux_getpriority_args *args) { struct getpriority_args bsd_args; int error; #ifdef DEBUG if (ldebug(getpriority)) printf(ARGS(getpriority, "%i, %i"), args->which, args->who); #endif bsd_args.which = args->which; bsd_args.who = args->who; error = getpriority(td, &bsd_args); td->td_retval[0] = 20 - td->td_retval[0]; return error; } int linux_sethostname(struct thread *td, struct linux_sethostname_args *args) { int name[2]; #ifdef DEBUG if (ldebug(sethostname)) printf(ARGS(sethostname, "*, %i"), args->len); #endif name[0] = CTL_KERN; name[1] = KERN_HOSTNAME; return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, args->len, 0, 0)); } int linux_exit_group(struct thread *td, struct linux_exit_group_args *args) { struct linux_emuldata *em, *td_em, *tmp_em; struct proc *sp; #ifdef DEBUG if (ldebug(exit_group)) printf(ARGS(exit_group, "%i"), args->error_code); #endif if (linux_use26(td)) { td_em = em_find(td->td_proc, EMUL_DONTLOCK); KASSERT(td_em != NULL, ("exit_group: emuldata not found.\n")); EMUL_SHARED_RLOCK(&emul_shared_lock); LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) { if (em->pid == td_em->pid) continue; sp = pfind(em->pid); psignal(sp, SIGKILL); PROC_UNLOCK(sp); #ifdef DEBUG printf(LMSG("linux_sys_exit_group: kill PID %d\n"), em->pid); #endif } EMUL_SHARED_RUNLOCK(&emul_shared_lock); } /* * XXX: we should send a signal to the parent if * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) * as it doesnt occur often. */ exit1(td, W_EXITCODE(args->error_code, 0)); return (0); } int linux_prctl(struct thread *td, struct linux_prctl_args *args) { int error = 0, max_size; struct proc *p = td->td_proc; char comm[LINUX_MAX_COMM_LEN]; struct linux_emuldata *em; int pdeath_signal; #ifdef DEBUG if (ldebug(prctl)) printf(ARGS(prctl, "%d, %d, %d, %d, %d"), args->option, args->arg2, args->arg3, args->arg4, args->arg5); #endif switch (args->option) { case LINUX_PR_SET_PDEATHSIG: if (!LINUX_SIG_VALID(args->arg2)) return (EINVAL); em = em_find(p, EMUL_DOLOCK); KASSERT(em != NULL, ("prctl: emuldata not found.\n")); em->pdeath_signal = args->arg2; EMUL_UNLOCK(&emul_lock); break; case LINUX_PR_GET_PDEATHSIG: em = em_find(p, EMUL_DOLOCK); KASSERT(em != NULL, ("prctl: emuldata not found.\n")); pdeath_signal = em->pdeath_signal; EMUL_UNLOCK(&emul_lock); error = copyout(&pdeath_signal, (void *)(register_t)args->arg2, sizeof(pdeath_signal)); break; case LINUX_PR_SET_NAME: /* * To be on the safe side we need to make sure to not * overflow the size a linux program expects. We already * do this here in the copyin, so that we don't need to * check on copyout. */ max_size = MIN(sizeof(comm), sizeof(p->p_comm)); error = copyinstr((void *)(register_t)args->arg2, comm, max_size, NULL); /* Linux silently truncates the name if it is too long. */ if (error == ENAMETOOLONG) { /* * XXX: copyinstr() isn't documented to populate the * array completely, so do a copyin() to be on the * safe side. This should be changed in case * copyinstr() is changed to guarantee this. */ error = copyin((void *)(register_t)args->arg2, comm, max_size - 1); comm[max_size - 1] = '\0'; } if (error) return (error); PROC_LOCK(p); strlcpy(p->p_comm, comm, sizeof(p->p_comm)); PROC_UNLOCK(p); break; case LINUX_PR_GET_NAME: PROC_LOCK(p); strlcpy(comm, p->p_comm, sizeof(comm)); PROC_UNLOCK(p); error = copyout(comm, (void *)(register_t)args->arg2, strlen(comm) + 1); break; default: error = EINVAL; break; } return (error); } /* * Get affinity of a process. */ int linux_sched_getaffinity(struct thread *td, struct linux_sched_getaffinity_args *args) { int error; struct cpuset_getaffinity_args cga; #ifdef DEBUG if (ldebug(sched_getaffinity)) printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, args->len); #endif cga.level = CPU_LEVEL_WHICH; cga.which = CPU_WHICH_PID; cga.id = args->pid; cga.cpusetsize = sizeof(cpumask_t); cga.mask = (cpuset_t *) args->user_mask_ptr; if ((error = cpuset_getaffinity(td, &cga)) == 0) td->td_retval[0] = sizeof(cpumask_t); return (error); } /* * Set affinity of a process. */ int linux_sched_setaffinity(struct thread *td, struct linux_sched_setaffinity_args *args) { struct cpuset_setaffinity_args csa; #ifdef DEBUG if (ldebug(sched_setaffinity)) printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, args->len); #endif csa.level = CPU_LEVEL_WHICH; csa.which = CPU_WHICH_PID; csa.id = args->pid; csa.cpusetsize = args->len; csa.mask = (cpuset_t *) args->user_mask_ptr; return (cpuset_setaffinity(td, &csa)); } Index: head/sys/compat/svr4/svr4_stat.c =================================================================== --- head/sys/compat/svr4/svr4_stat.c (revision 180290) +++ head/sys/compat/svr4/svr4_stat.c (revision 180291) @@ -1,683 +1,684 @@ /*- * Copyright (c) 1998 Mark Newton * Copyright (c) 1994 Christos Zoulas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(NOTYET) #include "svr4_fuser.h" #endif #ifdef sparc /* * Solaris-2.4 on the sparc has the old stat call using the new * stat data structure... */ # define SVR4_NO_OSTAT #endif struct svr4_ustat_args { svr4_dev_t dev; struct svr4_ustat * name; }; static void bsd_to_svr4_xstat(struct stat *, struct svr4_xstat *); static void bsd_to_svr4_stat64(struct stat *, struct svr4_stat64 *); int svr4_ustat(struct thread *, struct svr4_ustat_args *); static int svr4_to_bsd_pathconf(int); /* * SVR4 uses named pipes as named sockets, so we tell programs * that sockets are named pipes with mode 0 */ #define BSD_TO_SVR4_MODE(mode) (S_ISSOCK(mode) ? S_IFIFO : (mode)) #ifndef SVR4_NO_OSTAT static void bsd_to_svr4_stat(struct stat *, struct svr4_stat *); static void bsd_to_svr4_stat(st, st4) struct stat *st; struct svr4_stat *st4; { memset(st4, 0, sizeof(*st4)); st4->st_dev = bsd_to_svr4_odev_t(st->st_dev); st4->st_ino = st->st_ino; st4->st_mode = BSD_TO_SVR4_MODE(st->st_mode); st4->st_nlink = st->st_nlink; st4->st_uid = st->st_uid; st4->st_gid = st->st_gid; st4->st_rdev = bsd_to_svr4_odev_t(st->st_rdev); st4->st_size = st->st_size; st4->st_atim = st->st_atimespec.tv_sec; st4->st_mtim = st->st_mtimespec.tv_sec; st4->st_ctim = st->st_ctimespec.tv_sec; } #endif static void bsd_to_svr4_xstat(st, st4) struct stat *st; struct svr4_xstat *st4; { memset(st4, 0, sizeof(*st4)); st4->st_dev = bsd_to_svr4_dev_t(st->st_dev); st4->st_ino = st->st_ino; st4->st_mode = BSD_TO_SVR4_MODE(st->st_mode); st4->st_nlink = st->st_nlink; st4->st_uid = st->st_uid; st4->st_gid = st->st_gid; st4->st_rdev = bsd_to_svr4_dev_t(st->st_rdev); st4->st_size = st->st_size; st4->st_atim = st->st_atimespec; st4->st_mtim = st->st_mtimespec; st4->st_ctim = st->st_ctimespec; st4->st_blksize = st->st_blksize; st4->st_blocks = st->st_blocks; strcpy(st4->st_fstype, "unknown"); } static void bsd_to_svr4_stat64(st, st4) struct stat *st; struct svr4_stat64 *st4; { memset(st4, 0, sizeof(*st4)); st4->st_dev = bsd_to_svr4_dev_t(st->st_dev); st4->st_ino = st->st_ino; st4->st_mode = BSD_TO_SVR4_MODE(st->st_mode); st4->st_nlink = st->st_nlink; st4->st_uid = st->st_uid; st4->st_gid = st->st_gid; st4->st_rdev = bsd_to_svr4_dev_t(st->st_rdev); st4->st_size = st->st_size; st4->st_atim = st->st_atimespec; st4->st_mtim = st->st_mtimespec; st4->st_ctim = st->st_ctimespec; st4->st_blksize = st->st_blksize; st4->st_blocks = st->st_blocks; strcpy(st4->st_fstype, "unknown"); } int svr4_sys_stat(td, uap) struct thread *td; struct svr4_sys_stat_args *uap; { struct svr4_stat svr4_st; struct stat st; char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_stat(td, path, UIO_SYSSPACE, &st); free(path, M_TEMP); if (error) return (error); bsd_to_svr4_stat(&st, &svr4_st); if (S_ISSOCK(st.st_mode)) (void) svr4_add_socket(td, uap->path, &st); return (copyout(&svr4_st, uap->ub, sizeof svr4_st)); } int svr4_sys_lstat(td, uap) register struct thread *td; struct svr4_sys_lstat_args *uap; { struct svr4_stat svr4_st; struct stat st; char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_lstat(td, path, UIO_SYSSPACE, &st); free(path, M_TEMP); if (error) return (error); bsd_to_svr4_stat(&st, &svr4_st); if (S_ISSOCK(st.st_mode)) (void) svr4_add_socket(td, uap->path, &st); return (copyout(&svr4_st, uap->ub, sizeof svr4_st)); } int svr4_sys_fstat(td, uap) register struct thread *td; struct svr4_sys_fstat_args *uap; { struct svr4_stat svr4_st; struct stat st; int error; error = kern_fstat(td, uap->fd, &st); if (error) return (error); bsd_to_svr4_stat(&st, &svr4_st); return (copyout(&svr4_st, uap->sb, sizeof svr4_st)); } int svr4_sys_xstat(td, uap) register struct thread *td; struct svr4_sys_xstat_args *uap; { struct svr4_xstat svr4_st; struct stat st; char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_stat(td, path, UIO_SYSSPACE, &st); free(path, M_TEMP); if (error) return (error); bsd_to_svr4_xstat(&st, &svr4_st); #if defined(SOCKET_NOTYET) if (S_ISSOCK(st.st_mode)) (void) svr4_add_socket(td, uap->path, &st); #endif return (copyout(&svr4_st, uap->ub, sizeof svr4_st)); } int svr4_sys_lxstat(td, uap) register struct thread *td; struct svr4_sys_lxstat_args *uap; { struct svr4_xstat svr4_st; struct stat st; char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_lstat(td, path, UIO_SYSSPACE, &st); free(path, M_TEMP); if (error) return (error); bsd_to_svr4_xstat(&st, &svr4_st); #if defined(SOCKET_NOTYET) if (S_ISSOCK(st.st_mode)) (void) svr4_add_socket(td, uap->path, &st); #endif return (copyout(&svr4_st, uap->ub, sizeof svr4_st)); } int svr4_sys_fxstat(td, uap) register struct thread *td; struct svr4_sys_fxstat_args *uap; { struct svr4_xstat svr4_st; struct stat st; int error; error = kern_fstat(td, uap->fd, &st); if (error) return (error); bsd_to_svr4_xstat(&st, &svr4_st); return (copyout(&svr4_st, uap->sb, sizeof svr4_st)); } int svr4_sys_stat64(td, uap) register struct thread *td; struct svr4_sys_stat64_args *uap; { struct svr4_stat64 svr4_st; struct stat st; char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_stat(td, path, UIO_SYSSPACE, &st); free(path, M_TEMP); if (error) return (error); bsd_to_svr4_stat64(&st, &svr4_st); if (S_ISSOCK(st.st_mode)) (void) svr4_add_socket(td, uap->path, &st); return (copyout(&svr4_st, uap->sb, sizeof svr4_st)); } int svr4_sys_lstat64(td, uap) register struct thread *td; struct svr4_sys_lstat64_args *uap; { struct svr4_stat64 svr4_st; struct stat st; char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_lstat(td, path, UIO_SYSSPACE, &st); free(path, M_TEMP); if (error) return (error); bsd_to_svr4_stat64(&st, &svr4_st); if (S_ISSOCK(st.st_mode)) (void) svr4_add_socket(td, uap->path, &st); return (copyout(&svr4_st, uap->sb, sizeof svr4_st)); } int svr4_sys_fstat64(td, uap) register struct thread *td; struct svr4_sys_fstat64_args *uap; { struct svr4_stat64 svr4_st; struct stat st; int error; error = kern_fstat(td, uap->fd, &st); if (error) return (error); bsd_to_svr4_stat64(&st, &svr4_st); return (copyout(&svr4_st, uap->sb, sizeof svr4_st)); } int svr4_ustat(td, uap) register struct thread *td; struct svr4_ustat_args *uap; { struct svr4_ustat us; int error; memset(&us, 0, sizeof us); /* * XXX: should set f_tfree and f_tinode at least * How do we translate dev -> fstat? (and then to svr4_ustat) */ if ((error = copyout(&us, uap->name, sizeof us)) != 0) return (error); return 0; } /*extern char ostype[], hostname[], osrelease[], version[], machine[];*/ int svr4_sys_uname(td, uap) register struct thread *td; struct svr4_sys_uname_args *uap; { struct svr4_utsname sut; memset(&sut, 0, sizeof(sut)); strlcpy(sut.sysname, ostype, sizeof(sut.sysname)); getcredhostname(td->td_ucred, sut.nodename, sizeof(sut.nodename)); strlcpy(sut.release, osrelease, sizeof(sut.release)); strlcpy(sut.version, version, sizeof(sut.version)); strlcpy(sut.machine, machine, sizeof(sut.machine)); return copyout((caddr_t) &sut, (caddr_t) uap->name, sizeof(struct svr4_utsname)); } int svr4_sys_systeminfo(td, uap) struct thread *td; struct svr4_sys_systeminfo_args *uap; { char *str = NULL; int error = 0; register_t *retval = td->td_retval; size_t len = 0; char buf[1]; /* XXX NetBSD uses 256, but that seems like awfully excessive kstack usage for an empty string... */ u_int rlen = uap->len; switch (uap->what) { case SVR4_SI_SYSNAME: str = ostype; break; case SVR4_SI_HOSTNAME: str = hostname; break; case SVR4_SI_RELEASE: str = osrelease; break; case SVR4_SI_VERSION: str = version; break; case SVR4_SI_MACHINE: str = machine; break; case SVR4_SI_ARCHITECTURE: str = machine; break; case SVR4_SI_HW_SERIAL: str = "0"; break; case SVR4_SI_HW_PROVIDER: str = ostype; break; case SVR4_SI_SRPC_DOMAIN: + /* XXXRW: locking? */ str = domainname; break; case SVR4_SI_PLATFORM: #ifdef __i386__ str = "i86pc"; #else str = "unknown"; #endif break; case SVR4_SI_KERB_REALM: str = "unsupported"; break; #if defined(WHY_DOES_AN_EMULATOR_WANT_TO_SET_HOSTNAMES) case SVR4_SI_SET_HOSTNAME: name = KERN_HOSTNAME; return kern_sysctl(&name, 1, 0, 0, uap->buf, rlen, td); case SVR4_SI_SET_SRPC_DOMAIN: name = KERN_NISDOMAINNAME; return kern_sysctl(&name, 1, 0, 0, uap->buf, rlen, td); #else case SVR4_SI_SET_HOSTNAME: case SVR4_SI_SET_SRPC_DOMAIN: /* FALLTHROUGH */ #endif case SVR4_SI_SET_KERB_REALM: return 0; default: DPRINTF(("Bad systeminfo command %d\n", uap->what)); return ENOSYS; } if (str) { len = strlen(str) + 1; if (len > rlen) len = rlen; if (uap->buf) { error = copyout(str, uap->buf, len); if (error) return error; /* make sure we are NULL terminated */ buf[0] = '\0'; error = copyout(buf, &(uap->buf[len - 1]), 1); } else error = 0; } /* XXX NetBSD has hostname setting stuff here. Why would an emulator want to do that? */ *retval = len; return error; } int svr4_sys_utssys(td, uap) register struct thread *td; struct svr4_sys_utssys_args *uap; { switch (uap->sel) { case 0: /* uname(2) */ { struct svr4_sys_uname_args ua; ua.name = uap->a1; return svr4_sys_uname(td, &ua); } case 2: /* ustat(2) */ { struct svr4_ustat_args ua; ua.dev = (svr4_dev_t) uap->a2; ua.name = uap->a1; return svr4_ustat(td, &ua); } case 3: /* fusers(2) */ return ENOSYS; default: return ENOSYS; } return ENOSYS; } int svr4_sys_utime(td, uap) register struct thread *td; struct svr4_sys_utime_args *uap; { struct svr4_utimbuf ub; struct timeval tbuf[2], *tp; char *path; int error; if (uap->ubuf != NULL) { error = copyin(uap->ubuf, &ub, sizeof(ub)); if (error) return (error); tbuf[0].tv_sec = ub.actime; tbuf[0].tv_usec = 0; tbuf[1].tv_sec = ub.modtime; tbuf[1].tv_usec = 0; tp = tbuf; } else tp = NULL; CHECKALTEXIST(td, uap->path, &path); error = kern_utimes(td, path, UIO_SYSSPACE, tp, UIO_SYSSPACE); free(path, M_TEMP); return (error); } int svr4_sys_utimes(td, uap) register struct thread *td; struct svr4_sys_utimes_args *uap; { char *path; int error; CHECKALTEXIST(td, uap->path, &path); error = kern_utimes(td, path, UIO_SYSSPACE, uap->tptr, UIO_USERSPACE); free(path, M_TEMP); return (error); } static int svr4_to_bsd_pathconf(name) int name; { switch (name) { case SVR4_PC_LINK_MAX: return _PC_LINK_MAX; case SVR4_PC_MAX_CANON: return _PC_MAX_CANON; case SVR4_PC_MAX_INPUT: return _PC_MAX_INPUT; case SVR4_PC_NAME_MAX: return _PC_NAME_MAX; case SVR4_PC_PATH_MAX: return _PC_PATH_MAX; case SVR4_PC_PIPE_BUF: return _PC_PIPE_BUF; case SVR4_PC_NO_TRUNC: return _PC_NO_TRUNC; case SVR4_PC_VDISABLE: return _PC_VDISABLE; case SVR4_PC_CHOWN_RESTRICTED: return _PC_CHOWN_RESTRICTED; case SVR4_PC_SYNC_IO: #if defined(_PC_SYNC_IO) return _PC_SYNC_IO; #else return 0; #endif case SVR4_PC_ASYNC_IO: case SVR4_PC_PRIO_IO: /* Not supported */ return 0; default: /* Invalid */ return -1; } } int svr4_sys_pathconf(td, uap) register struct thread *td; struct svr4_sys_pathconf_args *uap; { char *path; int error, name; name = svr4_to_bsd_pathconf(uap->name); switch (name) { case -1: td->td_retval[0] = -1; return (EINVAL); case 0: td->td_retval[0] = 0; return (0); default: CHECKALTEXIST(td, uap->path, &path); error = kern_pathconf(td, path, UIO_SYSSPACE, name); free(path, M_TEMP); return (error); } } int svr4_sys_fpathconf(td, uap) register struct thread *td; struct svr4_sys_fpathconf_args *uap; { register_t *retval = td->td_retval; uap->name = svr4_to_bsd_pathconf(uap->name); switch (uap->name) { case -1: *retval = -1; return EINVAL; case 0: *retval = 0; return 0; default: return fpathconf(td, (struct fpathconf_args *)uap); } } Index: head/sys/dev/syscons/daemon/daemon_saver.c =================================================================== --- head/sys/dev/syscons/daemon/daemon_saver.c (revision 180290) +++ head/sys/dev/syscons/daemon/daemon_saver.c (revision 180291) @@ -1,382 +1,384 @@ /*- * Copyright (c) 1997 Sandro Sigala, Brescia, Italy. * Copyright (c) 1997 Chris Shenton * Copyright (c) 1995 S ren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #define DAEMON_MAX_WIDTH 32 #define DAEMON_MAX_HEIGHT 19 static u_char *message; static int messagelen; static int blanked; static int attr_mask; #define ATTR(attr) (((attr) & attr_mask) << 8) /* Who is the author of this ASCII pic? */ static u_char *daemon_pic[] = { " , ,", " /( )`", " \\ \\___ / |", " /- _ `-/ '", " (/\\/ \\ \\ /\\", " / / | ` \\", " O O ) / |", " `-^--'`< '", " (_.) _ ) /", " `.___/` /", " `-----' /", "<----. __ / __ \\", "<----|====O)))==) \\) /====", "<----' `--' `.__,' \\", " | |", " \\ / /\\", " ______( (_ / \\______/", " ,' ,-----' |", " `--{__________)", NULL }; static u_char *daemon_attr[] = { " R R", " RR RR", " R RRRR R R", " RR W RRR R", " RWWW W R RR", " W W W R R", " B B W R R", " WWWWWWRR R", " RRRR R R R", " RRRRRRR R", " RRRRRRR R", "YYYYYY RR R RR R", "YYYYYYYYYYRRRRYYR RR RYYYY", "YYYYYY RRRR RRRRRR R", " R R", " R R RR", " CCCCCCR RR R RRRRRRRR", " CC CCCCCCC C", " CCCCCCCCCCCCCCC", NULL }; /* * Reverse a graphics character, or return unaltered if no mirror; * should do alphanumerics too, but I'm too lazy. */ static u_char xflip_symbol(u_char symbol) { static const u_char lchars[] = "`'(){}[]\\/<>"; static const u_char rchars[] = "'`)(}{][/\\><"; int pos; for (pos = 0; lchars[pos] != '\0'; pos++) if (lchars[pos] == symbol) return rchars[pos]; return symbol; } static void clear_daemon(sc_softc_t *sc, int xpos, int ypos, int dxdir, int xoff, int yoff, int xlen, int ylen) { int y; if (xlen <= 0) return; for (y = yoff; y < ylen; y++) { sc_vtb_erase(&sc->cur_scp->scr, (ypos + y)*sc->cur_scp->xsize + xpos + xoff, xlen - xoff, sc->scr_map[0x20], ATTR(FG_LIGHTGREY | BG_BLACK)); } } static void draw_daemon(sc_softc_t *sc, int xpos, int ypos, int dxdir, int xoff, int yoff, int xlen, int ylen) { int x, y; int px; int attr; for (y = yoff; y < ylen; y++) { if (dxdir < 0) px = xoff; else px = DAEMON_MAX_WIDTH - xlen; if (px >= strlen(daemon_pic[y])) continue; for (x = xoff; (x < xlen) && (daemon_pic[y][px] != '\0'); x++, px++) { switch (daemon_attr[y][px]) { case 'R': attr = FG_LIGHTRED | BG_BLACK; break; case 'Y': attr = FG_YELLOW | BG_BLACK; break; case 'B': attr = FG_LIGHTBLUE | BG_BLACK; break; case 'W': attr = FG_LIGHTGREY | BG_BLACK; break; case 'C': attr = FG_CYAN | BG_BLACK; break; default: attr = FG_WHITE | BG_BLACK; break; } if (dxdir < 0) { /* Moving left */ sc_vtb_putc(&sc->cur_scp->scr, (ypos + y)*sc->cur_scp->xsize + xpos + x, sc->scr_map[daemon_pic[y][px]], ATTR(attr)); } else { /* Moving right */ sc_vtb_putc(&sc->cur_scp->scr, (ypos + y)*sc->cur_scp->xsize + xpos + DAEMON_MAX_WIDTH - px - 1, sc->scr_map[xflip_symbol(daemon_pic[y][px])], ATTR(attr)); } } } } static void clear_string(sc_softc_t *sc, int xpos, int ypos, int xoff, char *s, int len) { if (len <= 0) return; sc_vtb_erase(&sc->cur_scp->scr, ypos*sc->cur_scp->xsize + xpos + xoff, len - xoff, sc->scr_map[0x20], ATTR(FG_LIGHTGREY | BG_BLACK)); } static void draw_string(sc_softc_t *sc, int xpos, int ypos, int xoff, u_char *s, int len) { int x; for (x = xoff; x < len; x++) sc_vtb_putc(&sc->cur_scp->scr, ypos*sc->cur_scp->xsize + xpos + x, sc->scr_map[s[x]], ATTR(FG_LIGHTGREEN | BG_BLACK)); } static int daemon_saver(video_adapter_t *adp, int blank) { static int txpos = 10, typos = 10; static int txdir = -1, tydir = -1; static int dxpos = 0, dypos = 0; static int dxdir = 1, dydir = 1; static int moved_daemon = 0; static int xoff, yoff, toff; static int xlen, ylen, tlen; sc_softc_t *sc; scr_stat *scp; int min, max; sc = sc_find_softc(adp, NULL); if (sc == NULL) return EAGAIN; scp = sc->cur_scp; if (blank) { if (adp->va_info.vi_flags & V_INFO_GRAPHICS) return EAGAIN; if (blanked == 0) { /* clear the screen and set the border color */ sc_vtb_clear(&scp->scr, sc->scr_map[0x20], ATTR(FG_LIGHTGREY | BG_BLACK)); vidd_set_hw_cursor(adp, -1, -1); sc_set_border(scp, 0); xlen = ylen = tlen = 0; } if (blanked++ < 2) return 0; blanked = 1; clear_daemon(sc, dxpos, dypos, dxdir, xoff, yoff, xlen, ylen); clear_string(sc, txpos, typos, toff, message, tlen); if (++moved_daemon) { /* * The daemon picture may be off the screen, if * screen size is chagened while the screen * saver is inactive. Make sure the origin of * the picture is between min and max. */ if (scp->xsize <= DAEMON_MAX_WIDTH) { /* * If the screen width is too narrow, we * allow part of the picture go off * the screen so that the daemon won't * flip too often. */ min = scp->xsize - DAEMON_MAX_WIDTH - 10; max = 10; } else { min = 0; max = scp->xsize - DAEMON_MAX_WIDTH; } if (dxpos <= min) { dxpos = min; dxdir = 1; } else if (dxpos >= max) { dxpos = max; dxdir = -1; } if (scp->ysize <= DAEMON_MAX_HEIGHT) { min = scp->ysize - DAEMON_MAX_HEIGHT - 10; max = 10; } else { min = 0; max = scp->ysize - DAEMON_MAX_HEIGHT; } if (dypos <= min) { dypos = min; dydir = 1; } else if (dypos >= max) { dypos = max; dydir = -1; } moved_daemon = -1; dxpos += dxdir; dypos += dydir; /* clip the picture */ xoff = 0; xlen = DAEMON_MAX_WIDTH; if (dxpos + xlen <= 0) xlen = 0; else if (dxpos < 0) xoff = -dxpos; if (dxpos >= scp->xsize) xlen = 0; else if (dxpos + xlen > scp->xsize) xlen = scp->xsize - dxpos; yoff = 0; ylen = DAEMON_MAX_HEIGHT; if (dypos + ylen <= 0) ylen = 0; else if (dypos < 0) yoff = -dypos; if (dypos >= scp->ysize) ylen = 0; else if (dypos + ylen > scp->ysize) ylen = scp->ysize - dypos; } if (scp->xsize <= messagelen) { min = scp->xsize - messagelen - 10; max = 10; } else { min = 0; max = scp->xsize - messagelen; } if (txpos <= min) { txpos = min; txdir = 1; } else if (txpos >= max) { txpos = max; txdir = -1; } if (typos <= 0) { typos = 0; tydir = 1; } else if (typos >= scp->ysize - 1) { typos = scp->ysize - 1; tydir = -1; } txpos += txdir; typos += tydir; toff = 0; tlen = messagelen; if (txpos + tlen <= 0) tlen = 0; else if (txpos < 0) toff = -txpos; if (txpos >= scp->xsize) tlen = 0; else if (txpos + tlen > scp->xsize) tlen = scp->xsize - txpos; draw_daemon(sc, dxpos, dypos, dxdir, xoff, yoff, xlen, ylen); draw_string(sc, txpos, typos, toff, message, tlen); } else blanked = 0; return 0; } static int daemon_init(video_adapter_t *adp) { + + /* XXXRW: Locking -- these can change! */ messagelen = strlen(hostname) + 3 + strlen(ostype) + 1 + strlen(osrelease); message = malloc(messagelen + 1, M_DEVBUF, M_WAITOK); sprintf(message, "%s - %s %s", hostname, ostype, osrelease); blanked = 0; switch (adp->va_mode) { case M_PC98_80x25: case M_PC98_80x30: attr_mask = ~FG_UNDERLINE; break; default: attr_mask = ~0; break; } return 0; } static int daemon_term(video_adapter_t *adp) { free(message, M_DEVBUF); return 0; } static scrn_saver_t daemon_module = { "daemon_saver", daemon_init, daemon_term, daemon_saver, NULL, }; SAVER_MODULE(daemon_saver, daemon_module); Index: head/sys/fs/cd9660/cd9660_rrip.c =================================================================== --- head/sys/fs/cd9660/cd9660_rrip.c (revision 180290) +++ head/sys/fs/cd9660/cd9660_rrip.c (revision 180291) @@ -1,715 +1,717 @@ /*- * Copyright (c) 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension * Support code is derived from software contributed to Berkeley * by Atsushi Murai (amurai@spec.co.jp). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)cd9660_rrip.c 8.6 (Berkeley) 12/5/94 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include typedef int rrt_func_t(void *, ISO_RRIP_ANALYZE *ana); typedef struct { char type[2]; rrt_func_t *func; void (*func2)(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana); int result; } RRIP_TABLE; static int cd9660_rrip_altname(ISO_RRIP_ALTNAME *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_attr(ISO_RRIP_ATTR *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_cont(ISO_RRIP_CONT *p, ISO_RRIP_ANALYZE *ana); static void cd9660_rrip_defattr(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana); static void cd9660_rrip_defname(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana); static void cd9660_rrip_deftstamp(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_device(ISO_RRIP_DEVICE *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_extref(ISO_RRIP_EXTREF *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_idflag(ISO_RRIP_IDFLAG *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_loop(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana, RRIP_TABLE *table); static int cd9660_rrip_pclink(ISO_RRIP_CLINK *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_reldir(ISO_RRIP_RELDIR *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_slink(ISO_RRIP_SLINK *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_stop(ISO_SUSP_HEADER *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_tstamp(ISO_RRIP_TSTAMP *p, ISO_RRIP_ANALYZE *ana); /* * POSIX file attribute */ static int cd9660_rrip_attr(p,ana) ISO_RRIP_ATTR *p; ISO_RRIP_ANALYZE *ana; { ana->inop->inode.iso_mode = isonum_733(p->mode); ana->inop->inode.iso_uid = isonum_733(p->uid); ana->inop->inode.iso_gid = isonum_733(p->gid); ana->inop->inode.iso_links = isonum_733(p->links); ana->fields &= ~ISO_SUSP_ATTR; return ISO_SUSP_ATTR; } static void cd9660_rrip_defattr(isodir,ana) struct iso_directory_record *isodir; ISO_RRIP_ANALYZE *ana; { /* But this is a required field! */ printf("RRIP without PX field?\n"); cd9660_defattr(isodir,ana->inop,NULL,ISO_FTYPE_RRIP); } /* * Symbolic Links */ static int cd9660_rrip_slink(p,ana) ISO_RRIP_SLINK *p; ISO_RRIP_ANALYZE *ana; { ISO_RRIP_SLINK_COMPONENT *pcomp; ISO_RRIP_SLINK_COMPONENT *pcompe; int len, wlen, cont; char *outbuf, *inbuf; pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component; pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length)); len = *ana->outlen; outbuf = ana->outbuf; cont = ana->cont; /* * Gathering a Symbolic name from each component with path */ for (; pcomp < pcompe; pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ + isonum_711(pcomp->clen))) { if (!cont) { if (len < ana->maxlen) { len++; *outbuf++ = '/'; } } cont = 0; inbuf = ".."; wlen = 0; switch (*pcomp->cflag) { case ISO_SUSP_CFLAG_CURRENT: /* Inserting Current */ wlen = 1; break; case ISO_SUSP_CFLAG_PARENT: /* Inserting Parent */ wlen = 2; break; case ISO_SUSP_CFLAG_ROOT: /* Inserting slash for ROOT */ /* Double slash, nothing really to do here. */ break; case ISO_SUSP_CFLAG_VOLROOT: /* Inserting a mount point i.e. "/cdrom" */ /* same as above */ outbuf -= len; len = 0; inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname; wlen = strlen(inbuf); break; case ISO_SUSP_CFLAG_HOST: + /* XXXRW: locking. */ /* Inserting hostname i.e. "kurt.tools.de" */ inbuf = hostname; wlen = strlen(hostname); break; case ISO_SUSP_CFLAG_CONTINUE: cont = 1; /* FALLTHROUGH */ case 0: /* Inserting component */ wlen = isonum_711(pcomp->clen); inbuf = pcomp->name; break; default: printf("RRIP with incorrect flags?"); wlen = ana->maxlen + 1; break; } if (len + wlen > ana->maxlen) { /* indicate error to caller */ ana->cont = 1; ana->fields = 0; ana->outbuf -= *ana->outlen; *ana->outlen = 0; return 0; } bcopy(inbuf,outbuf,wlen); outbuf += wlen; len += wlen; } ana->outbuf = outbuf; *ana->outlen = len; ana->cont = cont; if (!isonum_711(p->flags)) { ana->fields &= ~ISO_SUSP_SLINK; return ISO_SUSP_SLINK; } return 0; } /* * Alternate name */ static int cd9660_rrip_altname(p,ana) ISO_RRIP_ALTNAME *p; ISO_RRIP_ANALYZE *ana; { char *inbuf; int wlen; int cont; inbuf = ".."; wlen = 0; cont = 0; switch (*p->flags) { case ISO_SUSP_CFLAG_CURRENT: /* Inserting Current */ wlen = 1; break; case ISO_SUSP_CFLAG_PARENT: /* Inserting Parent */ wlen = 2; break; case ISO_SUSP_CFLAG_HOST: + /* XXXRW: locking. */ /* Inserting hostname i.e. "kurt.tools.de" */ inbuf = hostname; wlen = strlen(hostname); break; case ISO_SUSP_CFLAG_CONTINUE: cont = 1; /* FALLTHROUGH */ case 0: /* Inserting component */ wlen = isonum_711(p->h.length) - 5; inbuf = (char *)p + 5; break; default: printf("RRIP with incorrect NM flags?\n"); wlen = ana->maxlen + 1; break; } if ((*ana->outlen += wlen) > ana->maxlen) { /* treat as no name field */ ana->fields &= ~ISO_SUSP_ALTNAME; ana->outbuf -= *ana->outlen - wlen; *ana->outlen = 0; return 0; } bcopy(inbuf,ana->outbuf,wlen); ana->outbuf += wlen; if (!cont) { ana->fields &= ~ISO_SUSP_ALTNAME; return ISO_SUSP_ALTNAME; } return 0; } static void cd9660_rrip_defname(isodir,ana) struct iso_directory_record *isodir; ISO_RRIP_ANALYZE *ana; { isofntrans(isodir->name,isonum_711(isodir->name_len), ana->outbuf,ana->outlen, 1,isonum_711(isodir->flags)&4, ana->imp->joliet_level, ana->imp->im_flags, ana->imp->im_d2l); switch (*ana->outbuf) { default: break; case 1: *ana->outlen = 2; /* FALLTHROUGH */ case 0: /* outlen is 1 already */ strcpy(ana->outbuf,".."); break; } } /* * Parent or Child Link */ static int cd9660_rrip_pclink(p,ana) ISO_RRIP_CLINK *p; ISO_RRIP_ANALYZE *ana; { *ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift; ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK); return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK; } /* * Relocated directory */ static int cd9660_rrip_reldir(p,ana) ISO_RRIP_RELDIR *p; ISO_RRIP_ANALYZE *ana; { /* special hack to make caller aware of RE field */ *ana->outlen = 0; ana->fields = 0; return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK; } static int cd9660_rrip_tstamp(p,ana) ISO_RRIP_TSTAMP *p; ISO_RRIP_ANALYZE *ana; { u_char *ptime; ptime = p->time; /* Check a format of time stamp (7bytes/17bytes) */ if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) { if (*p->flags&ISO_SUSP_TSTAMP_CREAT) ptime += 7; if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) { cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime, ISO_FTYPE_RRIP); ptime += 7; } else bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec)); if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) { cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime, ISO_FTYPE_RRIP); ptime += 7; } else ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime; if (*p->flags&ISO_SUSP_TSTAMP_ATTR) cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime, ISO_FTYPE_RRIP); else ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime; } else { if (*p->flags&ISO_SUSP_TSTAMP_CREAT) ptime += 17; if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) { cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime); ptime += 17; } else bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec)); if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) { cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime); ptime += 17; } else ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime; if (*p->flags&ISO_SUSP_TSTAMP_ATTR) cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime); else ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime; } ana->fields &= ~ISO_SUSP_TSTAMP; return ISO_SUSP_TSTAMP; } static void cd9660_rrip_deftstamp(isodir,ana) struct iso_directory_record *isodir; ISO_RRIP_ANALYZE *ana; { cd9660_deftstamp(isodir,ana->inop,NULL,ISO_FTYPE_RRIP); } /* * POSIX device modes */ static int cd9660_rrip_device(p,ana) ISO_RRIP_DEVICE *p; ISO_RRIP_ANALYZE *ana; { u_int high, low; high = isonum_733(p->dev_t_high); low = isonum_733(p->dev_t_low); if (high == 0) ana->inop->inode.iso_rdev = makedev(umajor(low), uminor(low)); else ana->inop->inode.iso_rdev = makedev(high, uminor(low)); ana->fields &= ~ISO_SUSP_DEVICE; return ISO_SUSP_DEVICE; } /* * Flag indicating */ static int cd9660_rrip_idflag(p,ana) ISO_RRIP_IDFLAG *p; ISO_RRIP_ANALYZE *ana; { ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */ /* special handling of RE field */ if (ana->fields&ISO_SUSP_RELDIR) return cd9660_rrip_reldir(/* XXX */ (ISO_RRIP_RELDIR *)p,ana); return ISO_SUSP_IDFLAG; } /* * Continuation pointer */ static int cd9660_rrip_cont(p,ana) ISO_RRIP_CONT *p; ISO_RRIP_ANALYZE *ana; { ana->iso_ce_blk = isonum_733(p->location); ana->iso_ce_off = isonum_733(p->offset); ana->iso_ce_len = isonum_733(p->length); return ISO_SUSP_CONT; } /* * System Use end */ static int cd9660_rrip_stop(p,ana) ISO_SUSP_HEADER *p; ISO_RRIP_ANALYZE *ana; { return ISO_SUSP_STOP; } /* * Extension reference */ static int cd9660_rrip_extref(p,ana) ISO_RRIP_EXTREF *p; ISO_RRIP_ANALYZE *ana; { if (isonum_711(p->len_id) != 10 || bcmp((char *)p + 8,"RRIP_1991A",10) || isonum_711(p->version) != 1) return 0; ana->fields &= ~ISO_SUSP_EXTREF; return ISO_SUSP_EXTREF; } static int cd9660_rrip_loop(isodir,ana,table) struct iso_directory_record *isodir; ISO_RRIP_ANALYZE *ana; RRIP_TABLE *table; { RRIP_TABLE *ptable; ISO_SUSP_HEADER *phead; ISO_SUSP_HEADER *pend; struct buf *bp = NULL; char *pwhead; u_short c; int result; /* * Note: If name length is odd, * it will be padding 1 byte after the name */ pwhead = isodir->name + isonum_711(isodir->name_len); if (!(isonum_711(isodir->name_len)&1)) pwhead++; isochar(isodir->name, pwhead, ana->imp->joliet_level, &c, NULL, ana->imp->im_flags, ana->imp->im_d2l); /* If it's not the '.' entry of the root dir obey SP field */ if (c != 0 || isonum_733(isodir->extent) != ana->imp->root_extent) pwhead += ana->imp->rr_skip; else pwhead += ana->imp->rr_skip0; phead = (ISO_SUSP_HEADER *)pwhead; pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length)); result = 0; while (1) { ana->iso_ce_len = 0; /* * Note: "pend" should be more than one SUSP header */ while (pend >= phead + 1) { if (isonum_711(phead->version) == 1) { for (ptable = table; ptable->func; ptable++) { if (*phead->type == *ptable->type && phead->type[1] == ptable->type[1]) { result |= ptable->func(phead,ana); break; } } if (!ana->fields) break; } if (result&ISO_SUSP_STOP) { result &= ~ISO_SUSP_STOP; break; } /* plausibility check */ if (isonum_711(phead->length) < sizeof(*phead)) break; /* * move to next SUSP * Hopefully this works with newer versions, too */ phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length)); } if (ana->fields && ana->iso_ce_len) { if (ana->iso_ce_blk >= ana->imp->volume_space_size || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size || bread(ana->imp->im_devvp, ana->iso_ce_blk << (ana->imp->im_bshift - DEV_BSHIFT), ana->imp->logical_block_size, NOCRED, &bp)) /* what to do now? */ break; phead = (ISO_SUSP_HEADER *)(bp->b_data + ana->iso_ce_off); pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len); } else break; } if (bp) brelse(bp); /* * If we don't find the Basic SUSP stuffs, just set default value * (attribute/time stamp) */ for (ptable = table; ptable->func2; ptable++) if (!(ptable->result&result)) ptable->func2(isodir,ana); return result; } /* * Get Attributes. */ /* * XXX the casts are bogus but will do for now. */ #define BC (rrt_func_t *) static RRIP_TABLE rrip_table_analyze[] = { { "PX", BC cd9660_rrip_attr, cd9660_rrip_defattr, ISO_SUSP_ATTR }, { "TF", BC cd9660_rrip_tstamp, cd9660_rrip_deftstamp, ISO_SUSP_TSTAMP }, { "PN", BC cd9660_rrip_device, 0, ISO_SUSP_DEVICE }, { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; int cd9660_rrip_analyze(isodir,inop,imp) struct iso_directory_record *isodir; struct iso_node *inop; struct iso_mnt *imp; { ISO_RRIP_ANALYZE analyze; analyze.inop = inop; analyze.imp = imp; analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE; return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze); } /* * Get Alternate Name. */ static RRIP_TABLE rrip_table_getname[] = { { "NM", BC cd9660_rrip_altname, cd9660_rrip_defname, ISO_SUSP_ALTNAME }, { "CL", BC cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, { "PL", BC cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, { "RE", BC cd9660_rrip_reldir, 0, ISO_SUSP_RELDIR }, { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; int cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp) struct iso_directory_record *isodir; char *outbuf; u_short *outlen; ino_t *inump; struct iso_mnt *imp; { ISO_RRIP_ANALYZE analyze; RRIP_TABLE *tab; u_short c; analyze.outbuf = outbuf; analyze.outlen = outlen; analyze.maxlen = NAME_MAX; analyze.inump = inump; analyze.imp = imp; analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK; *outlen = 0; isochar(isodir->name, isodir->name + isonum_711(isodir->name_len), imp->joliet_level, &c, NULL, imp->im_flags, imp->im_d2l); tab = rrip_table_getname; if (c == 0 || c == 1) { cd9660_rrip_defname(isodir,&analyze); analyze.fields &= ~ISO_SUSP_ALTNAME; tab++; } return cd9660_rrip_loop(isodir,&analyze,tab); } /* * Get Symbolic Link. */ static RRIP_TABLE rrip_table_getsymname[] = { { "SL", BC cd9660_rrip_slink, 0, ISO_SUSP_SLINK }, { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; int cd9660_rrip_getsymname(isodir,outbuf,outlen,imp) struct iso_directory_record *isodir; char *outbuf; u_short *outlen; struct iso_mnt *imp; { ISO_RRIP_ANALYZE analyze; analyze.outbuf = outbuf; analyze.outlen = outlen; *outlen = 0; analyze.maxlen = MAXPATHLEN; analyze.cont = 1; /* don't start with a slash */ analyze.imp = imp; analyze.fields = ISO_SUSP_SLINK; return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK); } static RRIP_TABLE rrip_table_extref[] = { { "ER", BC cd9660_rrip_extref, 0, ISO_SUSP_EXTREF }, { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; /* * Check for Rock Ridge Extension and return offset of its fields. * Note: We insist on the ER field. */ int cd9660_rrip_offset(isodir,imp) struct iso_directory_record *isodir; struct iso_mnt *imp; { ISO_RRIP_OFFSET *p; ISO_RRIP_ANALYZE analyze; imp->rr_skip0 = 0; p = (ISO_RRIP_OFFSET *)(isodir->name + 1); if (bcmp(p,"SP\7\1\276\357",6)) { /* Maybe, it's a CDROM XA disc? */ imp->rr_skip0 = 15; p = (ISO_RRIP_OFFSET *)((char *)p + 15); if (bcmp(p,"SP\7\1\276\357",6)) return -1; } analyze.imp = imp; analyze.fields = ISO_SUSP_EXTREF; if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF)) return -1; return isonum_711(p->skip); } Index: head/sys/geom/vinum/geom_vinum_drive.c =================================================================== --- head/sys/geom/vinum/geom_vinum_drive.c (revision 180290) +++ head/sys/geom/vinum/geom_vinum_drive.c (revision 180291) @@ -1,668 +1,670 @@ /*- * Copyright (c) 2004, 2005 Lukas Ertl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void gv_drive_dead(void *, int); static void gv_drive_worker(void *); void gv_config_new_drive(struct gv_drive *d) { struct gv_hdr *vhdr; struct gv_freelist *fl; KASSERT(d != NULL, ("config_new_drive: NULL d")); vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO); vhdr->magic = GV_MAGIC; vhdr->config_length = GV_CFG_LEN; + mtx_lock(&hostname_mtx); bcopy(hostname, vhdr->label.sysname, GV_HOSTNAME_LEN); + mtx_unlock(&hostname_mtx); strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME); microtime(&vhdr->label.date_of_birth); d->hdr = vhdr; LIST_INIT(&d->subdisks); LIST_INIT(&d->freelist); fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO); fl->offset = GV_DATA_START; fl->size = d->avail; LIST_INSERT_HEAD(&d->freelist, fl, freelist); d->freelist_entries = 1; d->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO); bioq_init(d->bqueue); mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); kproc_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name); d->flags |= GV_DRIVE_THREAD_ACTIVE; } void gv_save_config_all(struct gv_softc *sc) { struct gv_drive *d; g_topology_assert(); LIST_FOREACH(d, &sc->drives, drive) { if (d->geom == NULL) continue; gv_save_config(NULL, d, sc); } } /* Save the vinum configuration back to disk. */ void gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc) { struct g_geom *gp; struct g_consumer *cp2; struct gv_hdr *vhdr, *hdr; struct sbuf *sb; int error; g_topology_assert(); KASSERT(d != NULL, ("gv_save_config: null d")); KASSERT(sc != NULL, ("gv_save_config: null sc")); /* * We can't save the config on a drive that isn't up, but drives that * were just created aren't officially up yet, so we check a special * flag. */ if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN)) return; if (cp == NULL) { gp = d->geom; KASSERT(gp != NULL, ("gv_save_config: null gp")); cp2 = LIST_FIRST(&gp->consumer); KASSERT(cp2 != NULL, ("gv_save_config: null cp2")); } else cp2 = cp; vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO); vhdr->magic = GV_MAGIC; vhdr->config_length = GV_CFG_LEN; hdr = d->hdr; if (hdr == NULL) { printf("GEOM_VINUM: drive %s has NULL hdr\n", d->name); g_free(vhdr); return; } microtime(&hdr->label.last_update); bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label)); sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN); gv_format_config(sc, sb, 1, NULL); sbuf_finish(sb); error = g_access(cp2, 0, 1, 0); if (error) { printf("GEOM_VINUM: g_access failed on drive %s, errno %d\n", d->name, error); sbuf_delete(sb); g_free(vhdr); return; } g_topology_unlock(); do { error = g_write_data(cp2, GV_HDR_OFFSET, vhdr, GV_HDR_LEN); if (error) { printf("GEOM_VINUM: writing vhdr failed on drive %s, " "errno %d", d->name, error); break; } error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb), GV_CFG_LEN); if (error) { printf("GEOM_VINUM: writing first config copy failed " "on drive %s, errno %d", d->name, error); break; } error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN, sbuf_data(sb), GV_CFG_LEN); if (error) printf("GEOM_VINUM: writing second config copy failed " "on drive %s, errno %d", d->name, error); } while (0); g_topology_lock(); g_access(cp2, 0, -1, 0); sbuf_delete(sb); g_free(vhdr); if (d->geom != NULL) gv_drive_modify(d); } /* This resembles g_slice_access(). */ static int gv_drive_access(struct g_provider *pp, int dr, int dw, int de) { struct g_geom *gp; struct g_consumer *cp; struct g_provider *pp2; struct gv_drive *d; struct gv_sd *s, *s2; int error; gp = pp->geom; cp = LIST_FIRST(&gp->consumer); if (cp == NULL) return (0); d = gp->softc; if (d == NULL) return (0); s = pp->private; KASSERT(s != NULL, ("gv_drive_access: NULL s")); LIST_FOREACH(s2, &d->subdisks, from_drive) { if (s == s2) continue; if (s->drive_offset + s->size <= s2->drive_offset) continue; if (s2->drive_offset + s2->size <= s->drive_offset) continue; /* Overlap. */ pp2 = s2->provider; KASSERT(s2 != NULL, ("gv_drive_access: NULL s2")); if ((pp->acw + dw) > 0 && pp2->ace > 0) return (EPERM); if ((pp->ace + de) > 0 && pp2->acw > 0) return (EPERM); } error = g_access(cp, dr, dw, de); return (error); } static void gv_drive_done(struct bio *bp) { struct gv_drive *d; /* Put the BIO on the worker queue again. */ d = bp->bio_from->geom->softc; bp->bio_cflags |= GV_BIO_DONE; mtx_lock(&d->bqueue_mtx); bioq_insert_tail(d->bqueue, bp); wakeup(d); mtx_unlock(&d->bqueue_mtx); } static void gv_drive_start(struct bio *bp) { struct gv_drive *d; struct gv_sd *s; switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: break; case BIO_GETATTR: default: g_io_deliver(bp, EOPNOTSUPP); return; } s = bp->bio_to->private; if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) { g_io_deliver(bp, ENXIO); return; } d = bp->bio_to->geom->softc; /* * Put the BIO on the worker queue, where the worker thread will pick * it up. */ mtx_lock(&d->bqueue_mtx); bioq_disksort(d->bqueue, bp); wakeup(d); mtx_unlock(&d->bqueue_mtx); } static void gv_drive_worker(void *arg) { struct bio *bp, *cbp; struct g_geom *gp; struct g_provider *pp; struct gv_drive *d; struct gv_sd *s; int error; d = arg; mtx_lock(&d->bqueue_mtx); for (;;) { /* We were signaled to exit. */ if (d->flags & GV_DRIVE_THREAD_DIE) break; /* Take the first BIO from out queue. */ bp = bioq_takefirst(d->bqueue); if (bp == NULL) { msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10); continue; } mtx_unlock(&d->bqueue_mtx); pp = bp->bio_to; gp = pp->geom; /* Completed request. */ if (bp->bio_cflags & GV_BIO_DONE) { error = bp->bio_error; /* Deliver the original request. */ g_std_done(bp); /* The request had an error, we need to clean up. */ if (error != 0) { g_topology_lock(); gv_set_drive_state(d, GV_DRIVE_DOWN, GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); g_topology_unlock(); g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL); } /* New request, needs to be sent downwards. */ } else { s = pp->private; if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) { g_io_deliver(bp, ENXIO); mtx_lock(&d->bqueue_mtx); continue; } if (bp->bio_offset > s->size) { g_io_deliver(bp, EINVAL); mtx_lock(&d->bqueue_mtx); continue; } cbp = g_clone_bio(bp); if (cbp == NULL) { g_io_deliver(bp, ENOMEM); mtx_lock(&d->bqueue_mtx); continue; } if (cbp->bio_offset + cbp->bio_length > s->size) cbp->bio_length = s->size - cbp->bio_offset; cbp->bio_done = gv_drive_done; cbp->bio_offset += s->drive_offset; g_io_request(cbp, LIST_FIRST(&gp->consumer)); } mtx_lock(&d->bqueue_mtx); } while ((bp = bioq_takefirst(d->bqueue)) != NULL) { mtx_unlock(&d->bqueue_mtx); if (bp->bio_cflags & GV_BIO_DONE) g_std_done(bp); else g_io_deliver(bp, ENXIO); mtx_lock(&d->bqueue_mtx); } mtx_unlock(&d->bqueue_mtx); d->flags |= GV_DRIVE_THREAD_DEAD; kproc_exit(ENXIO); } static void gv_drive_orphan(struct g_consumer *cp) { struct g_geom *gp; struct gv_drive *d; g_topology_assert(); gp = cp->geom; g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name); d = gp->softc; if (d != NULL) { gv_set_drive_state(d, GV_DRIVE_DOWN, GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG); g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL); } else g_wither_geom(gp, ENXIO); } static struct g_geom * gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_geom *gp, *gp2; struct g_consumer *cp; struct gv_drive *d; struct gv_sd *s; struct gv_softc *sc; struct gv_freelist *fl; struct gv_hdr *vhdr; int error; char *buf, errstr[ERRBUFSIZ]; vhdr = NULL; d = NULL; g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name); g_topology_assert(); /* Find the VINUM class and its associated geom. */ gp2 = find_vinum_geom(); if (gp2 == NULL) return (NULL); sc = gp2->softc; gp = g_new_geomf(mp, "%s.vinumdrive", pp->name); gp->start = gv_drive_start; gp->orphan = gv_drive_orphan; gp->access = gv_drive_access; gp->start = gv_drive_start; cp = g_new_consumer(gp); g_attach(cp, pp); error = g_access(cp, 1, 0, 0); if (error) { g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } g_topology_unlock(); /* Now check if the provided slice is a valid vinum drive. */ do { vhdr = g_read_data(cp, GV_HDR_OFFSET, pp->sectorsize, NULL); if (vhdr == NULL) break; if (vhdr->magic != GV_MAGIC) { g_free(vhdr); break; } /* A valid vinum drive, let's parse the on-disk information. */ buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL); if (buf == NULL) { g_free(vhdr); break; } g_topology_lock(); gv_parse_config(sc, buf, 1); g_free(buf); /* * Let's see if this drive is already known in the * configuration. */ d = gv_find_drive(sc, vhdr->label.name); /* We already know about this drive. */ if (d != NULL) { /* Check if this drive already has a geom. */ if (d->geom != NULL) { g_topology_unlock(); g_free(vhdr); break; } bcopy(vhdr, d->hdr, sizeof(*vhdr)); g_free(vhdr); /* This is a new drive. */ } else { d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO); /* Initialize all needed variables. */ d->size = pp->mediasize - GV_DATA_START; d->avail = d->size; d->hdr = vhdr; strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME); LIST_INIT(&d->subdisks); LIST_INIT(&d->freelist); /* We also need a freelist entry. */ fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO); fl->offset = GV_DATA_START; fl->size = d->avail; LIST_INSERT_HEAD(&d->freelist, fl, freelist); d->freelist_entries = 1; /* Save it into the main configuration. */ LIST_INSERT_HEAD(&sc->drives, d, drive); } /* * Create bio queue, queue mutex and a worker thread, if * necessary. */ if (d->bqueue == NULL) { d->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO); bioq_init(d->bqueue); } if (mtx_initialized(&d->bqueue_mtx) == 0) mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF); if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) { kproc_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name); d->flags |= GV_DRIVE_THREAD_ACTIVE; } g_access(cp, -1, 0, 0); gp->softc = d; d->geom = gp; d->vinumconf = sc; strncpy(d->device, pp->name, GV_MAXDRIVENAME); /* * Find out which subdisks belong to this drive and crosslink * them. */ LIST_FOREACH(s, &sc->subdisks, sd) { if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME)) /* XXX: errors ignored */ gv_sd_to_drive(sc, d, s, errstr, sizeof(errstr)); } /* This drive is now up for sure. */ gv_set_drive_state(d, GV_DRIVE_UP, 0); /* * If there are subdisks on this drive, we need to create * providers for them. */ if (d->sdcount) gv_drive_modify(d); return (gp); } while (0); g_topology_lock(); g_access(cp, -1, 0, 0); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } /* * Modify the providers for the given drive 'd'. It is assumed that the * subdisk list of 'd' is already correctly set up. */ void gv_drive_modify(struct gv_drive *d) { struct g_geom *gp; struct g_consumer *cp; struct g_provider *pp, *pp2; struct gv_sd *s; KASSERT(d != NULL, ("gv_drive_modify: null d")); gp = d->geom; KASSERT(gp != NULL, ("gv_drive_modify: null gp")); cp = LIST_FIRST(&gp->consumer); KASSERT(cp != NULL, ("gv_drive_modify: null cp")); pp = cp->provider; KASSERT(pp != NULL, ("gv_drive_modify: null pp")); g_topology_assert(); LIST_FOREACH(s, &d->subdisks, from_drive) { /* This subdisk already has a provider. */ if (s->provider != NULL) continue; pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name); pp2->mediasize = s->size; pp2->sectorsize = pp->sectorsize; g_error_provider(pp2, 0); s->provider = pp2; pp2->private = s; } } static void gv_drive_dead(void *arg, int flag) { struct g_geom *gp; struct g_consumer *cp; struct gv_drive *d; struct gv_sd *s; g_topology_assert(); KASSERT(arg != NULL, ("gv_drive_dead: NULL arg")); if (flag == EV_CANCEL) return; d = arg; if (d->state != GV_DRIVE_DOWN) return; g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name); gp = d->geom; if (gp == NULL) return; LIST_FOREACH(cp, &gp->consumer, consumer) { if (cp->nstart != cp->nend) { printf("GEOM_VINUM: dead drive '%s' has still " "active requests, can't detach consumer\n", d->name); g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL); return; } if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0) g_access(cp, -cp->acr, -cp->acw, -cp->ace); } printf("GEOM_VINUM: lost drive '%s'\n", d->name); d->geom = NULL; LIST_FOREACH(s, &d->subdisks, from_drive) { s->provider = NULL; s->consumer = NULL; } gv_kill_drive_thread(d); gp->softc = NULL; g_wither_geom(gp, ENXIO); } static int gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { struct gv_drive *d; g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name); g_topology_assert(); d = gp->softc; gv_kill_drive_thread(d); g_wither_geom(gp, ENXIO); return (0); } #define VINUMDRIVE_CLASS_NAME "VINUMDRIVE" static struct g_class g_vinum_drive_class = { .name = VINUMDRIVE_CLASS_NAME, .version = G_VERSION, .taste = gv_drive_taste, .destroy_geom = gv_drive_destroy_geom }; DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive); Index: head/sys/i386/ibcs2/ibcs2_socksys.c =================================================================== --- head/sys/i386/ibcs2/ibcs2_socksys.c (revision 180290) +++ head/sys/i386/ibcs2/ibcs2_socksys.c (revision 180291) @@ -1,208 +1,214 @@ /*- * Copyright (c) 1994, 1995 Scott Bartram * Copyright (c) 1994 Arne H Juul * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include +#include +#include #include #include #include /* Local structures */ struct getipdomainname_args { char *ipdomainname; int len; }; struct setipdomainname_args { char *ipdomainname; int len; }; /* Local prototypes */ static int ibcs2_getipdomainname(struct thread *, struct getipdomainname_args *); static int ibcs2_setipdomainname(struct thread *, struct setipdomainname_args *); /* * iBCS2 socksys calls. */ int ibcs2_socksys(td, uap) register struct thread *td; register struct ibcs2_socksys_args *uap; { int error; int realargs[7]; /* 1 for command, 6 for recvfrom */ void *passargs; /* * SOCKET should only be legal on /dev/socksys. * GETIPDOMAINNAME should only be legal on /dev/socksys ? * The others are (and should be) only legal on sockets. */ if ((error = copyin(uap->argsp, (caddr_t)realargs, sizeof(realargs))) != 0) return error; DPRINTF(("ibcs2_socksys: %08x %08x %08x %08x %08x %08x %08x\n", realargs[0], realargs[1], realargs[2], realargs[3], realargs[4], realargs[5], realargs[6])); passargs = (void *)(realargs + 1); switch (realargs[0]) { case SOCKSYS_ACCEPT: return accept(td, passargs); case SOCKSYS_BIND: return bind(td, passargs); case SOCKSYS_CONNECT: return connect(td, passargs); case SOCKSYS_GETPEERNAME: return getpeername(td, passargs); case SOCKSYS_GETSOCKNAME: return getsockname(td, passargs); case SOCKSYS_GETSOCKOPT: return getsockopt(td, passargs); case SOCKSYS_LISTEN: return listen(td, passargs); case SOCKSYS_RECV: realargs[5] = realargs[6] = 0; /* FALLTHROUGH */ case SOCKSYS_RECVFROM: return recvfrom(td, passargs); case SOCKSYS_SEND: realargs[5] = realargs[6] = 0; /* FALLTHROUGH */ case SOCKSYS_SENDTO: return sendto(td, passargs); case SOCKSYS_SETSOCKOPT: return setsockopt(td, passargs); case SOCKSYS_SHUTDOWN: return shutdown(td, passargs); case SOCKSYS_SOCKET: return socket(td, passargs); case SOCKSYS_SELECT: return select(td, passargs); case SOCKSYS_GETIPDOMAIN: return ibcs2_getipdomainname(td, passargs); case SOCKSYS_SETIPDOMAIN: return ibcs2_setipdomainname(td, passargs); case SOCKSYS_ADJTIME: return adjtime(td, passargs); case SOCKSYS_SETREUID: return setreuid(td, passargs); case SOCKSYS_SETREGID: return setregid(td, passargs); case SOCKSYS_GETTIME: return gettimeofday(td, passargs); case SOCKSYS_SETTIME: return settimeofday(td, passargs); case SOCKSYS_GETITIMER: return getitimer(td, passargs); case SOCKSYS_SETITIMER: return setitimer(td, passargs); default: printf("socksys unknown %08x %08x %08x %08x %08x %08x %08x\n", realargs[0], realargs[1], realargs[2], realargs[3], realargs[4], realargs[5], realargs[6]); return EINVAL; } /* NOTREACHED */ } /* ARGSUSED */ static int ibcs2_getipdomainname(td, uap) struct thread *td; struct getipdomainname_args *uap; { char hname[MAXHOSTNAMELEN], *dptr; int len; /* Get the domain name */ getcredhostname(td->td_ucred, hname, sizeof(hname)); dptr = index(hname, '.'); if ( dptr ) dptr++; else /* Make it effectively an empty string */ dptr = hname + strlen(hname); len = strlen(dptr) + 1; if ((u_int)uap->len > len + 1) uap->len = len + 1; return (copyout((caddr_t)dptr, (caddr_t)uap->ipdomainname, uap->len)); } /* ARGSUSED */ static int ibcs2_setipdomainname(td, uap) struct thread *td; struct setipdomainname_args *uap; { char hname[MAXHOSTNAMELEN], *ptr; int error, sctl[2], hlen; /* W/out a hostname a domain-name is nonsense */ - if ( strlen(hostname) == 0 ) + mtx_lock(&hostname_mtx); + if ( strlen(hostname) == 0 ) { + mtx_unlock(&hostname_mtx); return EINVAL; + } /* Get the host's unqualified name (strip off the domain) */ snprintf(hname, sizeof(hname), "%s", hostname); + mtx_unlock(&hostname_mtx); ptr = index(hname, '.'); if ( ptr != NULL ) { ptr++; *ptr = '\0'; } else { if (strlcat(hname, ".", sizeof(hname)) >= sizeof(hname)) return (EINVAL); } /* Set ptr to the end of the string so we can append to it */ hlen = strlen(hname); ptr = hname + hlen; if ((u_int)uap->len > (sizeof (hname) - hlen - 1)) return EINVAL; /* Append the ipdomain to the end */ error = copyinstr((caddr_t)uap->ipdomainname, ptr, uap->len, NULL); if (error) return (error); /* 'sethostname' with the new information */ sctl[0] = CTL_KERN; sctl[1] = KERN_HOSTNAME; hlen = strlen(hname) + 1; return (kernel_sysctl(td, sctl, 2, 0, 0, hname, hlen, 0, 0)); } Index: head/sys/kern/kern_jail.c =================================================================== --- head/sys/kern/kern_jail.c (revision 180290) +++ head/sys/kern/kern_jail.c (revision 180291) @@ -1,992 +1,995 @@ /*- * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- */ #include __FBSDID("$FreeBSD$"); #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_PRISON, "prison", "Prison structures"); SYSCTL_NODE(_security, OID_AUTO, jail, CTLFLAG_RW, 0, "Jail rules"); int jail_set_hostname_allowed = 1; SYSCTL_INT(_security_jail, OID_AUTO, set_hostname_allowed, CTLFLAG_RW, &jail_set_hostname_allowed, 0, "Processes in jail can set their hostnames"); int jail_socket_unixiproute_only = 1; SYSCTL_INT(_security_jail, OID_AUTO, socket_unixiproute_only, CTLFLAG_RW, &jail_socket_unixiproute_only, 0, "Processes in jail are limited to creating UNIX/IPv4/route sockets only"); int jail_sysvipc_allowed = 0; SYSCTL_INT(_security_jail, OID_AUTO, sysvipc_allowed, CTLFLAG_RW, &jail_sysvipc_allowed, 0, "Processes in jail can use System V IPC primitives"); static int jail_enforce_statfs = 2; SYSCTL_INT(_security_jail, OID_AUTO, enforce_statfs, CTLFLAG_RW, &jail_enforce_statfs, 0, "Processes in jail cannot see all mounted file systems"); int jail_allow_raw_sockets = 0; SYSCTL_INT(_security_jail, OID_AUTO, allow_raw_sockets, CTLFLAG_RW, &jail_allow_raw_sockets, 0, "Prison root can create raw sockets"); int jail_chflags_allowed = 0; SYSCTL_INT(_security_jail, OID_AUTO, chflags_allowed, CTLFLAG_RW, &jail_chflags_allowed, 0, "Processes in jail can alter system file flags"); int jail_mount_allowed = 0; SYSCTL_INT(_security_jail, OID_AUTO, mount_allowed, CTLFLAG_RW, &jail_mount_allowed, 0, "Processes in jail can mount/unmount jail-friendly file systems"); /* allprison, lastprid, and prisoncount are protected by allprison_lock. */ struct prisonlist allprison; struct sx allprison_lock; int lastprid = 0; int prisoncount = 0; /* * List of jail services. Protected by allprison_lock. */ TAILQ_HEAD(prison_services_head, prison_service); static struct prison_services_head prison_services = TAILQ_HEAD_INITIALIZER(prison_services); static int prison_service_slots = 0; struct prison_service { prison_create_t ps_create; prison_destroy_t ps_destroy; int ps_slotno; TAILQ_ENTRY(prison_service) ps_next; char ps_name[0]; }; static void init_prison(void *); static void prison_complete(void *context, int pending); static int sysctl_jail_list(SYSCTL_HANDLER_ARGS); static void init_prison(void *data __unused) { sx_init(&allprison_lock, "allprison"); LIST_INIT(&allprison); } SYSINIT(prison, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_prison, NULL); /* * struct jail_args { * struct jail *jail; * }; */ int jail(struct thread *td, struct jail_args *uap) { struct nameidata nd; struct prison *pr, *tpr; struct prison_service *psrv; struct jail j; struct jail_attach_args jaa; int vfslocked, error, tryprid; error = copyin(uap->jail, &j, sizeof(j)); if (error) return (error); if (j.version != 0) return (EINVAL); MALLOC(pr, struct prison *, sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO); mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF); pr->pr_ref = 1; error = copyinstr(j.path, &pr->pr_path, sizeof(pr->pr_path), 0); if (error) goto e_killmtx; NDINIT(&nd, LOOKUP, MPSAFE | FOLLOW | LOCKLEAF, UIO_SYSSPACE, pr->pr_path, td); error = namei(&nd); if (error) goto e_killmtx; vfslocked = NDHASGIANT(&nd); pr->pr_root = nd.ni_vp; VOP_UNLOCK(nd.ni_vp, 0); NDFREE(&nd, NDF_ONLY_PNBUF); VFS_UNLOCK_GIANT(vfslocked); error = copyinstr(j.hostname, &pr->pr_host, sizeof(pr->pr_host), 0); if (error) goto e_dropvnref; pr->pr_ip = j.ip_number; pr->pr_linux = NULL; pr->pr_securelevel = securelevel; if (prison_service_slots == 0) pr->pr_slots = NULL; else { pr->pr_slots = malloc(sizeof(*pr->pr_slots) * prison_service_slots, M_PRISON, M_ZERO | M_WAITOK); } /* Determine next pr_id and add prison to allprison list. */ sx_xlock(&allprison_lock); tryprid = lastprid + 1; if (tryprid == JAIL_MAX) tryprid = 1; next: LIST_FOREACH(tpr, &allprison, pr_list) { if (tpr->pr_id == tryprid) { tryprid++; if (tryprid == JAIL_MAX) { sx_xunlock(&allprison_lock); error = EAGAIN; goto e_dropvnref; } goto next; } } pr->pr_id = jaa.jid = lastprid = tryprid; LIST_INSERT_HEAD(&allprison, pr, pr_list); prisoncount++; sx_downgrade(&allprison_lock); TAILQ_FOREACH(psrv, &prison_services, ps_next) { psrv->ps_create(psrv, pr); } sx_sunlock(&allprison_lock); error = jail_attach(td, &jaa); if (error) goto e_dropprref; mtx_lock(&pr->pr_mtx); pr->pr_ref--; mtx_unlock(&pr->pr_mtx); td->td_retval[0] = jaa.jid; return (0); e_dropprref: sx_xlock(&allprison_lock); LIST_REMOVE(pr, pr_list); prisoncount--; sx_downgrade(&allprison_lock); TAILQ_FOREACH(psrv, &prison_services, ps_next) { psrv->ps_destroy(psrv, pr); } sx_sunlock(&allprison_lock); e_dropvnref: vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); vrele(pr->pr_root); VFS_UNLOCK_GIANT(vfslocked); e_killmtx: mtx_destroy(&pr->pr_mtx); FREE(pr, M_PRISON); return (error); } /* * struct jail_attach_args { * int jid; * }; */ int jail_attach(struct thread *td, struct jail_attach_args *uap) { struct proc *p; struct ucred *newcred, *oldcred; struct prison *pr; int vfslocked, error; /* * XXX: Note that there is a slight race here if two threads * in the same privileged process attempt to attach to two * different jails at the same time. It is important for * user processes not to do this, or they might end up with * a process root from one prison, but attached to the jail * of another. */ error = priv_check(td, PRIV_JAIL_ATTACH); if (error) return (error); p = td->td_proc; sx_slock(&allprison_lock); pr = prison_find(uap->jid); if (pr == NULL) { sx_sunlock(&allprison_lock); return (EINVAL); } pr->pr_ref++; mtx_unlock(&pr->pr_mtx); sx_sunlock(&allprison_lock); vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); vn_lock(pr->pr_root, LK_EXCLUSIVE | LK_RETRY); if ((error = change_dir(pr->pr_root, td)) != 0) goto e_unlock; #ifdef MAC if ((error = mac_vnode_check_chroot(td->td_ucred, pr->pr_root))) goto e_unlock; #endif VOP_UNLOCK(pr->pr_root, 0); change_root(pr->pr_root, td); VFS_UNLOCK_GIANT(vfslocked); newcred = crget(); PROC_LOCK(p); oldcred = p->p_ucred; setsugid(p); crcopy(newcred, oldcred); newcred->cr_prison = pr; p->p_ucred = newcred; PROC_UNLOCK(p); crfree(oldcred); return (0); e_unlock: VOP_UNLOCK(pr->pr_root, 0); VFS_UNLOCK_GIANT(vfslocked); mtx_lock(&pr->pr_mtx); pr->pr_ref--; mtx_unlock(&pr->pr_mtx); return (error); } /* * Returns a locked prison instance, or NULL on failure. */ struct prison * prison_find(int prid) { struct prison *pr; sx_assert(&allprison_lock, SX_LOCKED); LIST_FOREACH(pr, &allprison, pr_list) { if (pr->pr_id == prid) { mtx_lock(&pr->pr_mtx); if (pr->pr_ref == 0) { mtx_unlock(&pr->pr_mtx); break; } return (pr); } } return (NULL); } void prison_free(struct prison *pr) { mtx_lock(&pr->pr_mtx); pr->pr_ref--; if (pr->pr_ref == 0) { mtx_unlock(&pr->pr_mtx); TASK_INIT(&pr->pr_task, 0, prison_complete, pr); taskqueue_enqueue(taskqueue_thread, &pr->pr_task); return; } mtx_unlock(&pr->pr_mtx); } static void prison_complete(void *context, int pending) { struct prison_service *psrv; struct prison *pr; int vfslocked; pr = (struct prison *)context; sx_xlock(&allprison_lock); LIST_REMOVE(pr, pr_list); prisoncount--; sx_downgrade(&allprison_lock); TAILQ_FOREACH(psrv, &prison_services, ps_next) { psrv->ps_destroy(psrv, pr); } sx_sunlock(&allprison_lock); vfslocked = VFS_LOCK_GIANT(pr->pr_root->v_mount); vrele(pr->pr_root); VFS_UNLOCK_GIANT(vfslocked); mtx_destroy(&pr->pr_mtx); if (pr->pr_linux != NULL) FREE(pr->pr_linux, M_PRISON); FREE(pr, M_PRISON); } void prison_hold(struct prison *pr) { mtx_lock(&pr->pr_mtx); KASSERT(pr->pr_ref > 0, ("Trying to hold dead prison (id=%d).", pr->pr_id)); pr->pr_ref++; mtx_unlock(&pr->pr_mtx); } u_int32_t prison_getip(struct ucred *cred) { return (cred->cr_prison->pr_ip); } int prison_ip(struct ucred *cred, int flag, u_int32_t *ip) { u_int32_t tmp; if (!jailed(cred)) return (0); if (flag) tmp = *ip; else tmp = ntohl(*ip); if (tmp == INADDR_ANY) { if (flag) *ip = cred->cr_prison->pr_ip; else *ip = htonl(cred->cr_prison->pr_ip); return (0); } if (tmp == INADDR_LOOPBACK) { if (flag) *ip = cred->cr_prison->pr_ip; else *ip = htonl(cred->cr_prison->pr_ip); return (0); } if (cred->cr_prison->pr_ip != tmp) return (1); return (0); } void prison_remote_ip(struct ucred *cred, int flag, u_int32_t *ip) { u_int32_t tmp; if (!jailed(cred)) return; if (flag) tmp = *ip; else tmp = ntohl(*ip); if (tmp == INADDR_LOOPBACK) { if (flag) *ip = cred->cr_prison->pr_ip; else *ip = htonl(cred->cr_prison->pr_ip); return; } return; } int prison_if(struct ucred *cred, struct sockaddr *sa) { struct sockaddr_in *sai; int ok; sai = (struct sockaddr_in *)sa; if ((sai->sin_family != AF_INET) && jail_socket_unixiproute_only) ok = 1; else if (sai->sin_family != AF_INET) ok = 0; else if (cred->cr_prison->pr_ip != ntohl(sai->sin_addr.s_addr)) ok = 1; else ok = 0; return (ok); } /* * Return 0 if jails permit p1 to frob p2, otherwise ESRCH. */ int prison_check(struct ucred *cred1, struct ucred *cred2) { if (jailed(cred1)) { if (!jailed(cred2)) return (ESRCH); if (cred2->cr_prison != cred1->cr_prison) return (ESRCH); } return (0); } /* * Return 1 if the passed credential is in a jail, otherwise 0. */ int jailed(struct ucred *cred) { return (cred->cr_prison != NULL); } /* * Return the correct hostname for the passed credential. */ void getcredhostname(struct ucred *cred, char *buf, size_t size) { if (jailed(cred)) { mtx_lock(&cred->cr_prison->pr_mtx); strlcpy(buf, cred->cr_prison->pr_host, size); mtx_unlock(&cred->cr_prison->pr_mtx); - } else + } else { + mtx_lock(&hostname_mtx); strlcpy(buf, hostname, size); + mtx_unlock(&hostname_mtx); + } } /* * Determine whether the subject represented by cred can "see" * status of a mount point. * Returns: 0 for permitted, ENOENT otherwise. * XXX: This function should be called cr_canseemount() and should be * placed in kern_prot.c. */ int prison_canseemount(struct ucred *cred, struct mount *mp) { struct prison *pr; struct statfs *sp; size_t len; if (!jailed(cred) || jail_enforce_statfs == 0) return (0); pr = cred->cr_prison; if (pr->pr_root->v_mount == mp) return (0); if (jail_enforce_statfs == 2) return (ENOENT); /* * If jail's chroot directory is set to "/" we should be able to see * all mount-points from inside a jail. * This is ugly check, but this is the only situation when jail's * directory ends with '/'. */ if (strcmp(pr->pr_path, "/") == 0) return (0); len = strlen(pr->pr_path); sp = &mp->mnt_stat; if (strncmp(pr->pr_path, sp->f_mntonname, len) != 0) return (ENOENT); /* * Be sure that we don't have situation where jail's root directory * is "/some/path" and mount point is "/some/pathpath". */ if (sp->f_mntonname[len] != '\0' && sp->f_mntonname[len] != '/') return (ENOENT); return (0); } void prison_enforce_statfs(struct ucred *cred, struct mount *mp, struct statfs *sp) { char jpath[MAXPATHLEN]; struct prison *pr; size_t len; if (!jailed(cred) || jail_enforce_statfs == 0) return; pr = cred->cr_prison; if (prison_canseemount(cred, mp) != 0) { bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); strlcpy(sp->f_mntonname, "[restricted]", sizeof(sp->f_mntonname)); return; } if (pr->pr_root->v_mount == mp) { /* * Clear current buffer data, so we are sure nothing from * the valid path left there. */ bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); *sp->f_mntonname = '/'; return; } /* * If jail's chroot directory is set to "/" we should be able to see * all mount-points from inside a jail. */ if (strcmp(pr->pr_path, "/") == 0) return; len = strlen(pr->pr_path); strlcpy(jpath, sp->f_mntonname + len, sizeof(jpath)); /* * Clear current buffer data, so we are sure nothing from * the valid path left there. */ bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); if (*jpath == '\0') { /* Should never happen. */ *sp->f_mntonname = '/'; } else { strlcpy(sp->f_mntonname, jpath, sizeof(sp->f_mntonname)); } } /* * Check with permission for a specific privilege is granted within jail. We * have a specific list of accepted privileges; the rest are denied. */ int prison_priv_check(struct ucred *cred, int priv) { if (!jailed(cred)) return (0); switch (priv) { /* * Allow ktrace privileges for root in jail. */ case PRIV_KTRACE: #if 0 /* * Allow jailed processes to configure audit identity and * submit audit records (login, etc). In the future we may * want to further refine the relationship between audit and * jail. */ case PRIV_AUDIT_GETAUDIT: case PRIV_AUDIT_SETAUDIT: case PRIV_AUDIT_SUBMIT: #endif /* * Allow jailed processes to manipulate process UNIX * credentials in any way they see fit. */ case PRIV_CRED_SETUID: case PRIV_CRED_SETEUID: case PRIV_CRED_SETGID: case PRIV_CRED_SETEGID: case PRIV_CRED_SETGROUPS: case PRIV_CRED_SETREUID: case PRIV_CRED_SETREGID: case PRIV_CRED_SETRESUID: case PRIV_CRED_SETRESGID: /* * Jail implements visibility constraints already, so allow * jailed root to override uid/gid-based constraints. */ case PRIV_SEEOTHERGIDS: case PRIV_SEEOTHERUIDS: /* * Jail implements inter-process debugging limits already, so * allow jailed root various debugging privileges. */ case PRIV_DEBUG_DIFFCRED: case PRIV_DEBUG_SUGID: case PRIV_DEBUG_UNPRIV: /* * Allow jail to set various resource limits and login * properties, and for now, exceed process resource limits. */ case PRIV_PROC_LIMIT: case PRIV_PROC_SETLOGIN: case PRIV_PROC_SETRLIMIT: /* * System V and POSIX IPC privileges are granted in jail. */ case PRIV_IPC_READ: case PRIV_IPC_WRITE: case PRIV_IPC_ADMIN: case PRIV_IPC_MSGSIZE: case PRIV_MQ_ADMIN: /* * Jail implements its own inter-process limits, so allow * root processes in jail to change scheduling on other * processes in the same jail. Likewise for signalling. */ case PRIV_SCHED_DIFFCRED: case PRIV_SIGNAL_DIFFCRED: case PRIV_SIGNAL_SUGID: /* * Allow jailed processes to write to sysctls marked as jail * writable. */ case PRIV_SYSCTL_WRITEJAIL: /* * Allow root in jail to manage a variety of quota * properties. These should likely be conditional on a * configuration option. */ case PRIV_VFS_GETQUOTA: case PRIV_VFS_SETQUOTA: /* * Since Jail relies on chroot() to implement file system * protections, grant many VFS privileges to root in jail. * Be careful to exclude mount-related and NFS-related * privileges. */ case PRIV_VFS_READ: case PRIV_VFS_WRITE: case PRIV_VFS_ADMIN: case PRIV_VFS_EXEC: case PRIV_VFS_LOOKUP: case PRIV_VFS_BLOCKRESERVE: /* XXXRW: Slightly surprising. */ case PRIV_VFS_CHFLAGS_DEV: case PRIV_VFS_CHOWN: case PRIV_VFS_CHROOT: case PRIV_VFS_RETAINSUGID: case PRIV_VFS_FCHROOT: case PRIV_VFS_LINK: case PRIV_VFS_SETGID: case PRIV_VFS_STAT: case PRIV_VFS_STICKYFILE: return (0); /* * Depending on the global setting, allow privilege of * setting system flags. */ case PRIV_VFS_SYSFLAGS: if (jail_chflags_allowed) return (0); else return (EPERM); /* * Depending on the global setting, allow privilege of * mounting/unmounting file systems. */ case PRIV_VFS_MOUNT: case PRIV_VFS_UNMOUNT: case PRIV_VFS_MOUNT_NONUSER: case PRIV_VFS_MOUNT_OWNER: if (jail_mount_allowed) return (0); else return (EPERM); /* * Allow jailed root to bind reserved ports and reuse in-use * ports. */ case PRIV_NETINET_RESERVEDPORT: case PRIV_NETINET_REUSEPORT: return (0); /* * Allow jailed root to set certian IPv4/6 (option) headers. */ case PRIV_NETINET_SETHDROPTS: return (0); /* * Conditionally allow creating raw sockets in jail. */ case PRIV_NETINET_RAW: if (jail_allow_raw_sockets) return (0); else return (EPERM); /* * Since jail implements its own visibility limits on netstat * sysctls, allow getcred. This allows identd to work in * jail. */ case PRIV_NETINET_GETCRED: return (0); default: /* * In all remaining cases, deny the privilege request. This * includes almost all network privileges, many system * configuration privileges. */ return (EPERM); } } /* * Register jail service. Provides 'create' and 'destroy' methods. * 'create' method will be called for every existing jail and all * jails in the future as they beeing created. * 'destroy' method will be called for every jail going away and * for all existing jails at the time of service deregistration. */ struct prison_service * prison_service_register(const char *name, prison_create_t create, prison_destroy_t destroy) { struct prison_service *psrv, *psrv2; struct prison *pr; int reallocate = 1, slotno = 0; void **slots, **oldslots; psrv = malloc(sizeof(*psrv) + strlen(name) + 1, M_PRISON, M_WAITOK | M_ZERO); psrv->ps_create = create; psrv->ps_destroy = destroy; strcpy(psrv->ps_name, name); /* * Grab the allprison_lock here, so we won't miss any jail * creation/destruction. */ sx_xlock(&allprison_lock); #ifdef INVARIANTS /* * Verify if service is not already registered. */ TAILQ_FOREACH(psrv2, &prison_services, ps_next) { KASSERT(strcmp(psrv2->ps_name, name) != 0, ("jail service %s already registered", name)); } #endif /* * Find free slot. When there is no existing free slot available, * allocate one at the end. */ TAILQ_FOREACH(psrv2, &prison_services, ps_next) { if (psrv2->ps_slotno != slotno) { KASSERT(slotno < psrv2->ps_slotno, ("Invalid slotno (slotno=%d >= ps_slotno=%d", slotno, psrv2->ps_slotno)); /* We found free slot. */ reallocate = 0; break; } slotno++; } psrv->ps_slotno = slotno; /* * Keep the list sorted by slot number. */ if (psrv2 != NULL) { KASSERT(reallocate == 0, ("psrv2 != NULL && reallocate != 0")); TAILQ_INSERT_BEFORE(psrv2, psrv, ps_next); } else { KASSERT(reallocate == 1, ("psrv2 == NULL && reallocate == 0")); TAILQ_INSERT_TAIL(&prison_services, psrv, ps_next); } prison_service_slots++; sx_downgrade(&allprison_lock); /* * Allocate memory for new slot if we didn't found empty one. * Do not use realloc(9), because pr_slots is protected with a mutex, * so we can't sleep. */ LIST_FOREACH(pr, &allprison, pr_list) { if (reallocate) { /* First allocate memory with M_WAITOK. */ slots = malloc(sizeof(*slots) * prison_service_slots, M_PRISON, M_WAITOK); /* Now grab the mutex and replace pr_slots. */ mtx_lock(&pr->pr_mtx); oldslots = pr->pr_slots; if (psrv->ps_slotno > 0) { bcopy(oldslots, slots, sizeof(*slots) * (prison_service_slots - 1)); } slots[psrv->ps_slotno] = NULL; pr->pr_slots = slots; mtx_unlock(&pr->pr_mtx); if (oldslots != NULL) free(oldslots, M_PRISON); } /* * Call 'create' method for each existing jail. */ psrv->ps_create(psrv, pr); } sx_sunlock(&allprison_lock); return (psrv); } void prison_service_deregister(struct prison_service *psrv) { struct prison *pr; void **slots, **oldslots; int last = 0; sx_xlock(&allprison_lock); if (TAILQ_LAST(&prison_services, prison_services_head) == psrv) last = 1; TAILQ_REMOVE(&prison_services, psrv, ps_next); prison_service_slots--; sx_downgrade(&allprison_lock); LIST_FOREACH(pr, &allprison, pr_list) { /* * Call 'destroy' method for every currently existing jail. */ psrv->ps_destroy(psrv, pr); /* * If this is the last slot, free the memory allocated for it. */ if (last) { if (prison_service_slots == 0) slots = NULL; else { slots = malloc(sizeof(*slots) * prison_service_slots, M_PRISON, M_WAITOK); } mtx_lock(&pr->pr_mtx); oldslots = pr->pr_slots; /* * We require setting slot to NULL after freeing it, * this way we can check for memory leaks here. */ KASSERT(oldslots[psrv->ps_slotno] == NULL, ("Slot %d (service %s, jailid=%d) still contains data?", psrv->ps_slotno, psrv->ps_name, pr->pr_id)); if (psrv->ps_slotno > 0) { bcopy(oldslots, slots, sizeof(*slots) * prison_service_slots); } pr->pr_slots = slots; mtx_unlock(&pr->pr_mtx); KASSERT(oldslots != NULL, ("oldslots == NULL")); free(oldslots, M_PRISON); } } sx_sunlock(&allprison_lock); free(psrv, M_PRISON); } /* * Function sets data for the given jail in slot assigned for the given * jail service. */ void prison_service_data_set(struct prison_service *psrv, struct prison *pr, void *data) { mtx_assert(&pr->pr_mtx, MA_OWNED); pr->pr_slots[psrv->ps_slotno] = data; } /* * Function clears slots assigned for the given jail service in the given * prison structure and returns current slot data. */ void * prison_service_data_del(struct prison_service *psrv, struct prison *pr) { void *data; mtx_assert(&pr->pr_mtx, MA_OWNED); data = pr->pr_slots[psrv->ps_slotno]; pr->pr_slots[psrv->ps_slotno] = NULL; return (data); } /* * Function returns current data from the slot assigned to the given jail * service for the given jail. */ void * prison_service_data_get(struct prison_service *psrv, struct prison *pr) { mtx_assert(&pr->pr_mtx, MA_OWNED); return (pr->pr_slots[psrv->ps_slotno]); } static int sysctl_jail_list(SYSCTL_HANDLER_ARGS) { struct xprison *xp, *sxp; struct prison *pr; int count, error; if (jailed(req->td->td_ucred)) return (0); sx_slock(&allprison_lock); if ((count = prisoncount) == 0) { sx_sunlock(&allprison_lock); return (0); } sxp = xp = malloc(sizeof(*xp) * count, M_TEMP, M_WAITOK | M_ZERO); LIST_FOREACH(pr, &allprison, pr_list) { xp->pr_version = XPRISON_VERSION; xp->pr_id = pr->pr_id; xp->pr_ip = pr->pr_ip; strlcpy(xp->pr_path, pr->pr_path, sizeof(xp->pr_path)); mtx_lock(&pr->pr_mtx); strlcpy(xp->pr_host, pr->pr_host, sizeof(xp->pr_host)); mtx_unlock(&pr->pr_mtx); xp++; } sx_sunlock(&allprison_lock); error = SYSCTL_OUT(req, sxp, sizeof(*sxp) * count); free(sxp, M_TEMP); return (error); } SYSCTL_OID(_security_jail, OID_AUTO, list, CTLTYPE_STRUCT | CTLFLAG_RD, NULL, 0, sysctl_jail_list, "S", "List of active jails"); static int sysctl_jail_jailed(SYSCTL_HANDLER_ARGS) { int error, injail; injail = jailed(req->td->td_ucred); error = SYSCTL_OUT(req, &injail, sizeof(injail)); return (error); } SYSCTL_PROC(_security_jail, OID_AUTO, jailed, CTLTYPE_INT | CTLFLAG_RD, NULL, 0, sysctl_jail_jailed, "I", "Process in jail?"); Index: head/sys/kern/kern_mib.c =================================================================== --- head/sys/kern/kern_mib.c (revision 180290) +++ head/sys/kern/kern_mib.c (revision 180291) @@ -1,428 +1,464 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Mike Karels at Berkeley Software Design, Inc. * * Quite extensively rewritten by Poul-Henning Kamp of the FreeBSD * project, to make these variables more userfriendly. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_sysctl.c 8.4 (Berkeley) 4/14/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_posix.h" #include "opt_config.h" #include #include #include #include #include #include #include #include #include #include #include SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW, 0, "Sysctl internal magic"); SYSCTL_NODE(, CTL_KERN, kern, CTLFLAG_RW, 0, "High kernel, proc, limits &c"); SYSCTL_NODE(, CTL_VM, vm, CTLFLAG_RW, 0, "Virtual memory"); SYSCTL_NODE(, CTL_VFS, vfs, CTLFLAG_RW, 0, "File system"); SYSCTL_NODE(, CTL_NET, net, CTLFLAG_RW, 0, "Network, (see socket.h)"); SYSCTL_NODE(, CTL_DEBUG, debug, CTLFLAG_RW, 0, "Debugging"); SYSCTL_NODE(_debug, OID_AUTO, sizeof, CTLFLAG_RW, 0, "Sizeof various things"); SYSCTL_NODE(, CTL_HW, hw, CTLFLAG_RW, 0, "hardware"); SYSCTL_NODE(, CTL_MACHDEP, machdep, CTLFLAG_RW, 0, "machine dependent"); SYSCTL_NODE(, CTL_USER, user, CTLFLAG_RW, 0, "user-level"); SYSCTL_NODE(, CTL_P1003_1B, p1003_1b, CTLFLAG_RW, 0, "p1003_1b, (see p1003_1b.h)"); SYSCTL_NODE(, OID_AUTO, compat, CTLFLAG_RW, 0, "Compatibility code"); SYSCTL_NODE(, OID_AUTO, security, CTLFLAG_RW, 0, "Security"); #ifdef REGRESSION SYSCTL_NODE(, OID_AUTO, regression, CTLFLAG_RW, 0, "Regression test MIB"); #endif SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD, kern_ident, 0, "Kernel identifier"); SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD, osrelease, 0, "Operating system release"); SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD, 0, BSD, "Operating system revision"); SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD, version, 0, "Kernel version"); SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD, ostype, 0, "Operating system type"); /* * NOTICE: The *userland* release date is available in * /usr/include/osreldate.h */ SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD, &osreldate, 0, "Kernel release date"); SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RDTUN, &maxproc, 0, "Maximum number of processes"); SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW, &maxprocperuid, 0, "Maximum processes allowed per userid"); SYSCTL_INT(_kern, OID_AUTO, maxusers, CTLFLAG_RDTUN, &maxusers, 0, "Hint for kernel tuning"); SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD, 0, ARG_MAX, "Maximum bytes of argument to execve(2)"); SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD, 0, _POSIX_VERSION, "Version of POSIX attempting to comply to"); SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RD, 0, NGROUPS_MAX, "Maximum number of groups a user can belong to"); SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD, 0, 1, "Whether job control is available"); #ifdef _POSIX_SAVED_IDS SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD, 0, 1, "Whether saved set-group/user ID is available"); #else SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD, 0, 0, "Whether saved set-group/user ID is available"); #endif char kernelname[MAXPATHLEN] = "/kernel"; /* XXX bloat */ SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW, kernelname, sizeof kernelname, "Name of kernel file booted"); SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD, &mp_ncpus, 0, "Number of active CPUs"); SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD, 0, BYTE_ORDER, "System byte order"); SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD, 0, PAGE_SIZE, "System memory page size"); static int sysctl_kern_arnd(SYSCTL_HANDLER_ARGS) { char buf[256]; size_t len; len = req->oldlen; if (len > sizeof(buf)) len = sizeof(buf); arc4rand(buf, len, 0); return (SYSCTL_OUT(req, buf, len)); } SYSCTL_PROC(_kern, KERN_ARND, arandom, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, sysctl_kern_arnd, "", "arc4rand"); static int sysctl_hw_physmem(SYSCTL_HANDLER_ARGS) { u_long val; val = ctob(physmem); return (sysctl_handle_long(oidp, &val, 0, req)); } SYSCTL_PROC(_hw, HW_PHYSMEM, physmem, CTLTYPE_ULONG | CTLFLAG_RD, 0, 0, sysctl_hw_physmem, "LU", ""); static int sysctl_hw_realmem(SYSCTL_HANDLER_ARGS) { u_long val; val = ctob(realmem); return (sysctl_handle_long(oidp, &val, 0, req)); } SYSCTL_PROC(_hw, HW_REALMEM, realmem, CTLTYPE_ULONG | CTLFLAG_RD, 0, 0, sysctl_hw_realmem, "LU", ""); static int sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) { u_long val; val = ctob(physmem - cnt.v_wire_count); return (sysctl_handle_long(oidp, &val, 0, req)); } SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_ULONG | CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "LU", ""); SYSCTL_ULONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, ""); static char machine_arch[] = MACHINE_ARCH; SYSCTL_STRING(_hw, HW_MACHINE_ARCH, machine_arch, CTLFLAG_RD, machine_arch, 0, "System architecture"); char hostname[MAXHOSTNAMELEN]; +/* + * This mutex is used to protect the hostname and domainname variables, and + * perhaps in the future should also protect hostid, hostuid, and others. + */ +struct mtx hostname_mtx; +MTX_SYSINIT(hostname_mtx, &hostname_mtx, "hostname", MTX_DEF); + static int sysctl_hostname(SYSCTL_HANDLER_ARGS) { struct prison *pr; char tmphostname[MAXHOSTNAMELEN]; int error; pr = req->td->td_ucred->cr_prison; if (pr != NULL) { if (!jail_set_hostname_allowed && req->newptr) return (EPERM); /* * Process is in jail, so make a local copy of jail * hostname to get/set so we don't have to hold the jail * mutex during the sysctl copyin/copyout activities. */ mtx_lock(&pr->pr_mtx); bcopy(pr->pr_host, tmphostname, MAXHOSTNAMELEN); mtx_unlock(&pr->pr_mtx); error = sysctl_handle_string(oidp, tmphostname, sizeof pr->pr_host, req); if (req->newptr != NULL && error == 0) { /* * Copy the locally set hostname to the jail, if * appropriate. */ mtx_lock(&pr->pr_mtx); bcopy(tmphostname, pr->pr_host, MAXHOSTNAMELEN); mtx_unlock(&pr->pr_mtx); } - } else - error = sysctl_handle_string(oidp, - hostname, sizeof hostname, req); + } else { + mtx_lock(&hostname_mtx); + bcopy(hostname, tmphostname, MAXHOSTNAMELEN); + mtx_unlock(&hostname_mtx); + error = sysctl_handle_string(oidp, tmphostname, + sizeof tmphostname, req); + if (req->newptr != NULL && error == 0) { + mtx_lock(&hostname_mtx); + bcopy(tmphostname, hostname, MAXHOSTNAMELEN); + mtx_unlock(&hostname_mtx); + } + } return (error); } SYSCTL_PROC(_kern, KERN_HOSTNAME, hostname, CTLTYPE_STRING|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, sysctl_hostname, "A", "Hostname"); static int regression_securelevel_nonmonotonic = 0; #ifdef REGRESSION SYSCTL_INT(_regression, OID_AUTO, securelevel_nonmonotonic, CTLFLAG_RW, ®ression_securelevel_nonmonotonic, 0, "securelevel may be lowered"); #endif int securelevel = -1; static struct mtx securelevel_mtx; MTX_SYSINIT(securelevel_lock, &securelevel_mtx, "securelevel mutex lock", MTX_DEF); static int sysctl_kern_securelvl(SYSCTL_HANDLER_ARGS) { struct prison *pr; int error, level; pr = req->td->td_ucred->cr_prison; /* * If the process is in jail, return the maximum of the global and * local levels; otherwise, return the global level. Perform a * lockless read since the securelevel is an integer. */ if (pr != NULL) level = imax(securelevel, pr->pr_securelevel); else level = securelevel; error = sysctl_handle_int(oidp, &level, 0, req); if (error || !req->newptr) return (error); /* * Permit update only if the new securelevel exceeds the * global level, and local level if any. */ if (pr != NULL) { mtx_lock(&pr->pr_mtx); if (!regression_securelevel_nonmonotonic && (level < imax(securelevel, pr->pr_securelevel))) { mtx_unlock(&pr->pr_mtx); return (EPERM); } pr->pr_securelevel = level; mtx_unlock(&pr->pr_mtx); } else { mtx_lock(&securelevel_mtx); if (!regression_securelevel_nonmonotonic && (level < securelevel)) { mtx_unlock(&securelevel_mtx); return (EPERM); } securelevel = level; mtx_unlock(&securelevel_mtx); } return (error); } SYSCTL_PROC(_kern, KERN_SECURELVL, securelevel, CTLTYPE_INT|CTLFLAG_RW|CTLFLAG_PRISON, 0, 0, sysctl_kern_securelvl, "I", "Current secure level"); #ifdef INCLUDE_CONFIG_FILE /* Actual kernel configuration options. */ extern char kernconfstring[]; static int sysctl_kern_config(SYSCTL_HANDLER_ARGS) { return (sysctl_handle_string(oidp, kernconfstring, strlen(kernconfstring), req)); } SYSCTL_PROC(_kern, OID_AUTO, conftxt, CTLTYPE_STRING|CTLFLAG_RW, 0, 0, sysctl_kern_config, "", "Kernel configuration file"); #endif -char domainname[MAXHOSTNAMELEN]; -SYSCTL_STRING(_kern, KERN_NISDOMAINNAME, domainname, CTLFLAG_RW, - &domainname, sizeof(domainname), "Name of the current YP/NIS domain"); +char domainname[MAXHOSTNAMELEN]; /* Protected by hostname_mtx. */ + +static int +sysctl_domainname(SYSCTL_HANDLER_ARGS) +{ + char tmpdomainname[MAXHOSTNAMELEN]; + int error; + + mtx_lock(&hostname_mtx); + bcopy(domainname, tmpdomainname, MAXHOSTNAMELEN); + mtx_unlock(&hostname_mtx); + error = sysctl_handle_string(oidp, tmpdomainname, + sizeof tmpdomainname, req); + if (req->newptr != NULL && error == 0) { + mtx_lock(&hostname_mtx); + bcopy(tmpdomainname, domainname, MAXHOSTNAMELEN); + mtx_unlock(&hostname_mtx); + } + return (error); +} + +SYSCTL_PROC(_kern, KERN_NISDOMAINNAME, domainname, CTLTYPE_STRING|CTLFLAG_RW, + 0, 0, sysctl_domainname, "A", "NAme of the current YP/NIS domain"); u_long hostid; SYSCTL_ULONG(_kern, KERN_HOSTID, hostid, CTLFLAG_RW, &hostid, 0, "Host ID"); char hostuuid[64] = "00000000-0000-0000-0000-000000000000"; SYSCTL_STRING(_kern, KERN_HOSTUUID, hostuuid, CTLFLAG_RW, hostuuid, sizeof(hostuuid), "Host UUID"); SYSCTL_NODE(_kern, OID_AUTO, features, CTLFLAG_RD, 0, "Kernel Features"); #ifdef COMPAT_FREEBSD4 FEATURE(compat_freebsd4, "Compatible with FreeBSD 4"); #endif #ifdef COMPAT_FREEBSD5 FEATURE(compat_freebsd5, "Compatible with FreeBSD 5"); #endif #ifdef COMPAT_FREEBSD6 FEATURE(compat_freebsd6, "Compatible with FreeBSD 6"); #endif #ifdef COMPAT_FREEBSD7 FEATURE(compat_freebsd7, "Compatible with FreeBSD 7"); #endif /* * This is really cheating. These actually live in the libc, something * which I'm not quite sure is a good idea anyway, but in order for * getnext and friends to actually work, we define dummies here. */ SYSCTL_STRING(_user, USER_CS_PATH, cs_path, CTLFLAG_RD, "", 0, "PATH that finds all the standard utilities"); SYSCTL_INT(_user, USER_BC_BASE_MAX, bc_base_max, CTLFLAG_RD, 0, 0, "Max ibase/obase values in bc(1)"); SYSCTL_INT(_user, USER_BC_DIM_MAX, bc_dim_max, CTLFLAG_RD, 0, 0, "Max array size in bc(1)"); SYSCTL_INT(_user, USER_BC_SCALE_MAX, bc_scale_max, CTLFLAG_RD, 0, 0, "Max scale value in bc(1)"); SYSCTL_INT(_user, USER_BC_STRING_MAX, bc_string_max, CTLFLAG_RD, 0, 0, "Max string length in bc(1)"); SYSCTL_INT(_user, USER_COLL_WEIGHTS_MAX, coll_weights_max, CTLFLAG_RD, 0, 0, "Maximum number of weights assigned to an LC_COLLATE locale entry"); SYSCTL_INT(_user, USER_EXPR_NEST_MAX, expr_nest_max, CTLFLAG_RD, 0, 0, ""); SYSCTL_INT(_user, USER_LINE_MAX, line_max, CTLFLAG_RD, 0, 0, "Max length (bytes) of a text-processing utility's input line"); SYSCTL_INT(_user, USER_RE_DUP_MAX, re_dup_max, CTLFLAG_RD, 0, 0, "Maximum number of repeats of a regexp permitted"); SYSCTL_INT(_user, USER_POSIX2_VERSION, posix2_version, CTLFLAG_RD, 0, 0, "The version of POSIX 1003.2 with which the system attempts to comply"); SYSCTL_INT(_user, USER_POSIX2_C_BIND, posix2_c_bind, CTLFLAG_RD, 0, 0, "Whether C development supports the C bindings option"); SYSCTL_INT(_user, USER_POSIX2_C_DEV, posix2_c_dev, CTLFLAG_RD, 0, 0, "Whether system supports the C development utilities option"); SYSCTL_INT(_user, USER_POSIX2_CHAR_TERM, posix2_char_term, CTLFLAG_RD, 0, 0, ""); SYSCTL_INT(_user, USER_POSIX2_FORT_DEV, posix2_fort_dev, CTLFLAG_RD, 0, 0, "Whether system supports FORTRAN development utilities"); SYSCTL_INT(_user, USER_POSIX2_FORT_RUN, posix2_fort_run, CTLFLAG_RD, 0, 0, "Whether system supports FORTRAN runtime utilities"); SYSCTL_INT(_user, USER_POSIX2_LOCALEDEF, posix2_localedef, CTLFLAG_RD, 0, 0, "Whether system supports creation of locales"); SYSCTL_INT(_user, USER_POSIX2_SW_DEV, posix2_sw_dev, CTLFLAG_RD, 0, 0, "Whether system supports software development utilities"); SYSCTL_INT(_user, USER_POSIX2_UPE, posix2_upe, CTLFLAG_RD, 0, 0, "Whether system supports the user portability utilities"); SYSCTL_INT(_user, USER_STREAM_MAX, stream_max, CTLFLAG_RD, 0, 0, "Min Maximum number of streams a process may have open at one time"); SYSCTL_INT(_user, USER_TZNAME_MAX, tzname_max, CTLFLAG_RD, 0, 0, "Min Maximum number of types supported for timezone names"); #include SYSCTL_INT(_debug_sizeof, OID_AUTO, vnode, CTLFLAG_RD, 0, sizeof(struct vnode), "sizeof(struct vnode)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, proc, CTLFLAG_RD, 0, sizeof(struct proc), "sizeof(struct proc)"); #include #include SYSCTL_INT(_debug_sizeof, OID_AUTO, bio, CTLFLAG_RD, 0, sizeof(struct bio), "sizeof(struct bio)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, buf, CTLFLAG_RD, 0, sizeof(struct buf), "sizeof(struct buf)"); #include SYSCTL_INT(_debug_sizeof, OID_AUTO, kinfo_proc, CTLFLAG_RD, 0, sizeof(struct kinfo_proc), "sizeof(struct kinfo_proc)"); /* XXX compatibility, remove for 6.0 */ #include #include SYSCTL_INT(_kern, OID_AUTO, fallback_elf_brand, CTLFLAG_RW, &__elfN(fallback_brand), sizeof(__elfN(fallback_brand)), "compatibility for kern.fallback_elf_brand"); Index: head/sys/kern/kern_xxx.c =================================================================== --- head/sys/kern/kern_xxx.c (revision 180290) +++ head/sys/kern/kern_xxx.c (revision 180291) @@ -1,288 +1,290 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_xxx.c 8.2 (Berkeley) 11/14/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #if defined(COMPAT_43) #ifndef _SYS_SYSPROTO_H_ struct gethostname_args { char *hostname; u_int len; }; #endif /* ARGSUSED */ int ogethostname(td, uap) struct thread *td; struct gethostname_args *uap; { int name[2]; int error; size_t len = uap->len; name[0] = CTL_KERN; name[1] = KERN_HOSTNAME; mtx_lock(&Giant); error = userland_sysctl(td, name, 2, uap->hostname, &len, 1, 0, 0, 0, 0); mtx_unlock(&Giant); return(error); } #ifndef _SYS_SYSPROTO_H_ struct sethostname_args { char *hostname; u_int len; }; #endif /* ARGSUSED */ int osethostname(td, uap) struct thread *td; register struct sethostname_args *uap; { int name[2]; int error; name[0] = CTL_KERN; name[1] = KERN_HOSTNAME; mtx_lock(&Giant); error = userland_sysctl(td, name, 2, 0, 0, 0, uap->hostname, uap->len, 0, 0); mtx_unlock(&Giant); return (error); } #ifndef _SYS_SYSPROTO_H_ struct ogethostid_args { int dummy; }; #endif /* ARGSUSED */ int ogethostid(td, uap) struct thread *td; struct ogethostid_args *uap; { *(long *)(td->td_retval) = hostid; return (0); } #endif /* COMPAT_43 */ #ifdef COMPAT_43 #ifndef _SYS_SYSPROTO_H_ struct osethostid_args { long hostid; }; #endif /* ARGSUSED */ int osethostid(td, uap) struct thread *td; struct osethostid_args *uap; { int error; error = priv_check(td, PRIV_SETHOSTID); if (error) return (error); mtx_lock(&Giant); hostid = uap->hostid; mtx_unlock(&Giant); return (0); } int oquota(td, uap) struct thread *td; struct oquota_args *uap; { return (ENOSYS); } #endif /* COMPAT_43 */ /* * This is the FreeBSD-1.1 compatable uname(2) interface. These days it is * done in libc as a wrapper around a bunch of sysctl's. This must maintain * the old 1.1 binary ABI. */ #if SYS_NMLN != 32 #error "FreeBSD-1.1 uname syscall has been broken" #endif #ifndef _SYS_SYSPROTO_H_ struct uname_args { struct utsname *name; }; #endif /* ARGSUSED */ int uname(td, uap) struct thread *td; struct uname_args *uap; { int name[2], error; size_t len; char *s, *us; name[0] = CTL_KERN; name[1] = KERN_OSTYPE; len = sizeof (uap->name->sysname); mtx_lock(&Giant); error = userland_sysctl(td, name, 2, uap->name->sysname, &len, 1, 0, 0, 0, 0); if (error) goto done2; subyte( uap->name->sysname + sizeof(uap->name->sysname) - 1, 0); name[1] = KERN_HOSTNAME; len = sizeof uap->name->nodename; error = userland_sysctl(td, name, 2, uap->name->nodename, &len, 1, 0, 0, 0, 0); if (error) goto done2; subyte( uap->name->nodename + sizeof(uap->name->nodename) - 1, 0); name[1] = KERN_OSRELEASE; len = sizeof uap->name->release; error = userland_sysctl(td, name, 2, uap->name->release, &len, 1, 0, 0, 0, 0); if (error) goto done2; subyte( uap->name->release + sizeof(uap->name->release) - 1, 0); /* name = KERN_VERSION; len = sizeof uap->name->version; error = userland_sysctl(td, name, 2, uap->name->version, &len, 1, 0, 0, 0, 0); if (error) goto done2; subyte( uap->name->version + sizeof(uap->name->version) - 1, 0); */ /* * this stupid hackery to make the version field look like FreeBSD 1.1 */ for(s = version; *s && *s != '#'; s++); for(us = uap->name->version; *s && *s != ':'; s++) { error = subyte( us++, *s); if (error) goto done2; } error = subyte( us++, 0); if (error) goto done2; name[0] = CTL_HW; name[1] = HW_MACHINE; len = sizeof uap->name->machine; error = userland_sysctl(td, name, 2, uap->name->machine, &len, 1, 0, 0, 0, 0); if (error) goto done2; subyte( uap->name->machine + sizeof(uap->name->machine) - 1, 0); done2: mtx_unlock(&Giant); return (error); } #ifndef _SYS_SYSPROTO_H_ struct getdomainname_args { char *domainname; int len; }; #endif /* ARGSUSED */ int getdomainname(td, uap) struct thread *td; struct getdomainname_args *uap; { + char tmpdomainname[MAXHOSTNAMELEN]; int domainnamelen; - int error; - mtx_lock(&Giant); - domainnamelen = strlen(domainname) + 1; + mtx_lock(&hostname_mtx); + bcopy(domainname, tmpdomainname, sizeof(tmpdomainname)); + mtx_unlock(&hostname_mtx); + + domainnamelen = strlen(tmpdomainname) + 1; if ((u_int)uap->len > domainnamelen) uap->len = domainnamelen; - error = copyout(domainname, uap->domainname, uap->len); - mtx_unlock(&Giant); - return (error); + return (copyout(tmpdomainname, uap->domainname, uap->len)); } #ifndef _SYS_SYSPROTO_H_ struct setdomainname_args { char *domainname; int len; }; #endif /* ARGSUSED */ int setdomainname(td, uap) struct thread *td; struct setdomainname_args *uap; { + char tmpdomainname[MAXHOSTNAMELEN]; int error, domainnamelen; error = priv_check(td, PRIV_SETDOMAINNAME); if (error) return (error); - mtx_lock(&Giant); - if ((u_int)uap->len > sizeof (domainname) - 1) { - error = EINVAL; - goto done2; - } + if ((u_int)uap->len > sizeof(tmpdomainname) - 1) + return (EINVAL); domainnamelen = uap->len; - error = copyin(uap->domainname, domainname, uap->len); - domainname[domainnamelen] = 0; -done2: - mtx_unlock(&Giant); + error = copyin(uap->domainname, tmpdomainname, uap->len); + if (error == 0) { + tmpdomainname[domainnamelen] = 0; + mtx_lock(&hostname_mtx); + bcopy(tmpdomainname, domainname, sizeof(domainname)); + mtx_unlock(&hostname_mtx); + } return (error); } Index: head/sys/netinet6/icmp6.c =================================================================== --- head/sys/netinet6/icmp6.c (revision 180290) +++ head/sys/netinet6/icmp6.c (revision 180291) @@ -1,2786 +1,2792 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: icmp6.c,v 1.211 2001/04/04 05:56:20 itojun Exp $ */ /*- * Copyright (c) 1982, 1986, 1988, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ip_icmp.c 8.2 (Berkeley) 1/4/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef IPSEC #include #include #endif extern struct domain inet6domain; struct icmp6stat icmp6stat; extern struct inpcbinfo ripcbinfo; extern struct inpcbhead ripcb; extern int icmp6errppslim; static int icmp6errpps_count = 0; static struct timeval icmp6errppslim_last; extern int icmp6_nodeinfo; static void icmp6_errcount(struct icmp6errstat *, int, int); static int icmp6_rip6_input(struct mbuf **, int); static int icmp6_ratelimit(const struct in6_addr *, const int, const int); static const char *icmp6_redirect_diag __P((struct in6_addr *, struct in6_addr *, struct in6_addr *)); static struct mbuf *ni6_input(struct mbuf *, int); static struct mbuf *ni6_nametodns(const char *, int, int); static int ni6_dnsmatch(const char *, int, const char *, int); static int ni6_addrs __P((struct icmp6_nodeinfo *, struct mbuf *, struct ifnet **, struct in6_addr *)); static int ni6_store_addrs __P((struct icmp6_nodeinfo *, struct icmp6_nodeinfo *, struct ifnet *, int)); static int icmp6_notify_error(struct mbuf **, int, int, int); void icmp6_init(void) { mld6_init(); } static void icmp6_errcount(struct icmp6errstat *stat, int type, int code) { switch (type) { case ICMP6_DST_UNREACH: switch (code) { case ICMP6_DST_UNREACH_NOROUTE: stat->icp6errs_dst_unreach_noroute++; return; case ICMP6_DST_UNREACH_ADMIN: stat->icp6errs_dst_unreach_admin++; return; case ICMP6_DST_UNREACH_BEYONDSCOPE: stat->icp6errs_dst_unreach_beyondscope++; return; case ICMP6_DST_UNREACH_ADDR: stat->icp6errs_dst_unreach_addr++; return; case ICMP6_DST_UNREACH_NOPORT: stat->icp6errs_dst_unreach_noport++; return; } break; case ICMP6_PACKET_TOO_BIG: stat->icp6errs_packet_too_big++; return; case ICMP6_TIME_EXCEEDED: switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: stat->icp6errs_time_exceed_transit++; return; case ICMP6_TIME_EXCEED_REASSEMBLY: stat->icp6errs_time_exceed_reassembly++; return; } break; case ICMP6_PARAM_PROB: switch (code) { case ICMP6_PARAMPROB_HEADER: stat->icp6errs_paramprob_header++; return; case ICMP6_PARAMPROB_NEXTHEADER: stat->icp6errs_paramprob_nextheader++; return; case ICMP6_PARAMPROB_OPTION: stat->icp6errs_paramprob_option++; return; } break; case ND_REDIRECT: stat->icp6errs_redirect++; return; } stat->icp6errs_unknown++; } /* * A wrapper function for icmp6_error() necessary when the erroneous packet * may not contain enough scope zone information. */ void icmp6_error2(struct mbuf *m, int type, int code, int param, struct ifnet *ifp) { struct ip6_hdr *ip6; if (ifp == NULL) return; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif ip6 = mtod(m, struct ip6_hdr *); if (in6_setscope(&ip6->ip6_src, ifp, NULL) != 0) return; if (in6_setscope(&ip6->ip6_dst, ifp, NULL) != 0) return; icmp6_error(m, type, code, param); } /* * Generate an error packet of type error in response to bad IP6 packet. */ void icmp6_error(struct mbuf *m, int type, int code, int param) { struct ip6_hdr *oip6, *nip6; struct icmp6_hdr *icmp6; u_int preplen; int off; int nxt; icmp6stat.icp6s_error++; /* count per-type-code statistics */ icmp6_errcount(&icmp6stat.icp6s_outerrhist, type, code); #ifdef M_DECRYPTED /*not openbsd*/ if (m->m_flags & M_DECRYPTED) { icmp6stat.icp6s_canterror++; goto freeit; } #endif #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, sizeof(struct ip6_hdr), ); #else if (m->m_len < sizeof(struct ip6_hdr)) { m = m_pullup(m, sizeof(struct ip6_hdr)); if (m == NULL) return; } #endif oip6 = mtod(m, struct ip6_hdr *); /* * If the destination address of the erroneous packet is a multicast * address, or the packet was sent using link-layer multicast, * we should basically suppress sending an error (RFC 2463, Section * 2.4). * We have two exceptions (the item e.2 in that section): * - the Pakcet Too Big message can be sent for path MTU discovery. * - the Parameter Problem Message that can be allowed an icmp6 error * in the option type field. This check has been done in * ip6_unknown_opt(), so we can just check the type and code. */ if ((m->m_flags & (M_BCAST|M_MCAST) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_dst)) && (type != ICMP6_PACKET_TOO_BIG && (type != ICMP6_PARAM_PROB || code != ICMP6_PARAMPROB_OPTION))) goto freeit; /* * RFC 2463, 2.4 (e.5): source address check. * XXX: the case of anycast source? */ if (IN6_IS_ADDR_UNSPECIFIED(&oip6->ip6_src) || IN6_IS_ADDR_MULTICAST(&oip6->ip6_src)) goto freeit; /* * If we are about to send ICMPv6 against ICMPv6 error/redirect, * don't do it. */ nxt = -1; off = ip6_lasthdr(m, 0, IPPROTO_IPV6, &nxt); if (off >= 0 && nxt == IPPROTO_ICMPV6) { struct icmp6_hdr *icp; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, off + sizeof(struct icmp6_hdr), ); icp = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icp, struct icmp6_hdr *, m, off, sizeof(*icp)); if (icp == NULL) { icmp6stat.icp6s_tooshort++; return; } #endif if (icp->icmp6_type < ICMP6_ECHO_REQUEST || icp->icmp6_type == ND_REDIRECT) { /* * ICMPv6 error * Special case: for redirect (which is * informational) we must not send icmp6 error. */ icmp6stat.icp6s_canterror++; goto freeit; } else { /* ICMPv6 informational - send the error */ } } else { /* non-ICMPv6 - send the error */ } oip6 = mtod(m, struct ip6_hdr *); /* adjust pointer */ /* Finally, do rate limitation check. */ if (icmp6_ratelimit(&oip6->ip6_src, type, code)) { icmp6stat.icp6s_toofreq++; goto freeit; } /* * OK, ICMP6 can be generated. */ if (m->m_pkthdr.len >= ICMPV6_PLD_MAXLEN) m_adj(m, ICMPV6_PLD_MAXLEN - m->m_pkthdr.len); preplen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); M_PREPEND(m, preplen, M_DONTWAIT); if (m && m->m_len < preplen) m = m_pullup(m, preplen); if (m == NULL) { nd6log((LOG_DEBUG, "ENOBUFS in icmp6_error %d\n", __LINE__)); return; } nip6 = mtod(m, struct ip6_hdr *); nip6->ip6_src = oip6->ip6_src; nip6->ip6_dst = oip6->ip6_dst; in6_clearscope(&oip6->ip6_src); in6_clearscope(&oip6->ip6_dst); icmp6 = (struct icmp6_hdr *)(nip6 + 1); icmp6->icmp6_type = type; icmp6->icmp6_code = code; icmp6->icmp6_pptr = htonl((u_int32_t)param); /* * icmp6_reflect() is designed to be in the input path. * icmp6_error() can be called from both input and output path, * and if we are in output path rcvif could contain bogus value. * clear m->m_pkthdr.rcvif for safety, we should have enough scope * information in ip header (nip6). */ m->m_pkthdr.rcvif = NULL; icmp6stat.icp6s_outhist[type]++; icmp6_reflect(m, sizeof(struct ip6_hdr)); /* header order: IPv6 - ICMPv6 */ return; freeit: /* * If we can't tell whether or not we can generate ICMP6, free it. */ m_freem(m); } /* * Process a received ICMP6 message. */ int icmp6_input(struct mbuf **mp, int *offp, int proto) { struct mbuf *m = *mp, *n; struct ip6_hdr *ip6, *nip6; struct icmp6_hdr *icmp6, *nicmp6; int off = *offp; int icmp6len = m->m_pkthdr.len - *offp; int code, sum, noff; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE); /* m might change if M_LOOP. So, call mtod after this */ #endif /* * Locate icmp6 structure in mbuf, and check * that not corrupted and of at least minimum length */ ip6 = mtod(m, struct ip6_hdr *); if (icmp6len < sizeof(struct icmp6_hdr)) { icmp6stat.icp6s_tooshort++; goto freeit; } /* * calculate the checksum */ #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { icmp6stat.icp6s_tooshort++; return IPPROTO_DONE; } #endif code = icmp6->icmp6_code; if ((sum = in6_cksum(m, IPPROTO_ICMPV6, off, icmp6len)) != 0) { nd6log((LOG_ERR, "ICMP6 checksum error(%d|%x) %s\n", icmp6->icmp6_type, sum, ip6_sprintf(ip6bufs, &ip6->ip6_src))); icmp6stat.icp6s_checksum++; goto freeit; } if (faithprefix_p != NULL && (*faithprefix_p)(&ip6->ip6_dst)) { /* * Deliver very specific ICMP6 type only. * This is important to deliver TOOBIG. Otherwise PMTUD * will not work. */ switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: case ICMP6_PACKET_TOO_BIG: case ICMP6_TIME_EXCEEDED: break; default: goto freeit; } } icmp6stat.icp6s_inhist[icmp6->icmp6_type]++; icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg); if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error); switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach); switch (code) { case ICMP6_DST_UNREACH_NOROUTE: code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_ADMIN: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib); code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ break; case ICMP6_DST_UNREACH_ADDR: code = PRC_HOSTDEAD; break; case ICMP6_DST_UNREACH_BEYONDSCOPE: /* I mean "source address was incorrect." */ code = PRC_PARAMPROB; break; case ICMP6_DST_UNREACH_NOPORT: code = PRC_UNREACH_PORT; break; default: goto badcode; } goto deliver; break; case ICMP6_PACKET_TOO_BIG: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig); /* validation is made in icmp6_mtudisc_update */ code = PRC_MSGSIZE; /* * Updating the path MTU will be done after examining * intermediate extension headers. */ goto deliver; break; case ICMP6_TIME_EXCEEDED: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed); switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: code = PRC_TIMXCEED_INTRANS; break; case ICMP6_TIME_EXCEED_REASSEMBLY: code = PRC_TIMXCEED_REASS; break; default: goto badcode; } goto deliver; break; case ICMP6_PARAM_PROB: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob); switch (code) { case ICMP6_PARAMPROB_NEXTHEADER: code = PRC_UNREACH_PROTOCOL; break; case ICMP6_PARAMPROB_HEADER: case ICMP6_PARAMPROB_OPTION: code = PRC_PARAMPROB; break; default: goto badcode; } goto deliver; break; case ICMP6_ECHO_REQUEST: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo); if (code != 0) goto badcode; if ((n = m_copy(m, 0, M_COPYALL)) == NULL) { /* Give up remote */ break; } if ((n->m_flags & M_EXT) != 0 || n->m_len < off + sizeof(struct icmp6_hdr)) { struct mbuf *n0 = n; const int maxlen = sizeof(*nip6) + sizeof(*nicmp6); int n0len; MGETHDR(n, M_DONTWAIT, n0->m_type); n0len = n0->m_pkthdr.len; /* save for use below */ if (n) M_MOVE_PKTHDR(n, n0); if (n && maxlen >= MHLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_free(n); n = NULL; } } if (n == NULL) { /* Give up remote */ m_freem(n0); break; } /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); noff = sizeof(struct ip6_hdr); /* new mbuf contains only ipv6+icmpv6 headers */ n->m_len = noff + sizeof(struct icmp6_hdr); /* * Adjust mbuf. ip6_plen will be adjusted in * ip6_output(). */ m_adj(n0, off + sizeof(struct icmp6_hdr)); /* recalculate complete packet size */ n->m_pkthdr.len = n0len + (noff - off); n->m_next = n0; } else { nip6 = mtod(n, struct ip6_hdr *); IP6_EXTHDR_GET(nicmp6, struct icmp6_hdr *, n, off, sizeof(*nicmp6)); noff = off; } nicmp6->icmp6_type = ICMP6_ECHO_REPLY; nicmp6->icmp6_code = 0; if (n) { icmp6stat.icp6s_reflect++; icmp6stat.icp6s_outhist[ICMP6_ECHO_REPLY]++; icmp6_reflect(n, noff); } break; case ICMP6_ECHO_REPLY: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply); if (code != 0) goto badcode; break; case MLD_LISTENER_QUERY: case MLD_LISTENER_REPORT: if (icmp6len < sizeof(struct mld_hdr)) goto badlen; if (icmp6->icmp6_type == MLD_LISTENER_QUERY) /* XXX: ugly... */ icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldquery); else icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mldreport); if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ mld6_input(m, off); m = NULL; goto freeit; } mld6_input(n, off); /* m stays. */ break; case MLD_LISTENER_DONE: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_mlddone); if (icmp6len < sizeof(struct mld_hdr)) /* necessary? */ goto badlen; break; /* nothing to be done in kernel */ case MLD_MTRACE_RESP: case MLD_MTRACE: /* XXX: these two are experimental. not officially defined. */ /* XXX: per-interface statistics? */ break; /* just pass it to applications */ case ICMP6_WRUREQUEST: /* ICMP6_FQDN_QUERY */ { enum { WRU, FQDN } mode; if (!icmp6_nodeinfo) break; if (icmp6len == sizeof(struct icmp6_hdr) + 4) mode = WRU; else if (icmp6len >= sizeof(struct icmp6_nodeinfo)) mode = FQDN; else goto badlen; #define hostnamelen strlen(hostname) if (mode == FQDN) { #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_nodeinfo), IPPROTO_DONE); #endif n = m_copy(m, 0, M_COPYALL); if (n) n = ni6_input(n, off); /* XXX meaningless if n == NULL */ noff = sizeof(struct ip6_hdr); } else { u_char *p; int maxlen, maxhlen; /* * XXX: this combination of flags is pointless, * but should we keep this for compatibility? */ if ((icmp6_nodeinfo & 5) != 5) break; if (code != 0) goto badcode; maxlen = sizeof(*nip6) + sizeof(*nicmp6) + 4; if (maxlen >= MCLBYTES) { /* Give up remote */ break; } MGETHDR(n, M_DONTWAIT, m->m_type); if (n && maxlen > MHLEN) { MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { m_free(n); n = NULL; } } if (n && !m_dup_pkthdr(n, m, M_DONTWAIT)) { /* * Previous code did a blind M_COPY_PKTHDR * and said "just for rcvif". If true, then * we could tolerate the dup failing (due to * the deep copy of the tag chain). For now * be conservative and just fail. */ m_free(n); n = NULL; } if (n == NULL) { /* Give up remote */ break; } n->m_pkthdr.rcvif = NULL; n->m_len = 0; maxhlen = M_TRAILINGSPACE(n) - maxlen; + mtx_lock(&hostname_mtx); if (maxhlen > hostnamelen) maxhlen = hostnamelen; /* * Copy IPv6 and ICMPv6 only. */ nip6 = mtod(n, struct ip6_hdr *); bcopy(ip6, nip6, sizeof(struct ip6_hdr)); nicmp6 = (struct icmp6_hdr *)(nip6 + 1); bcopy(icmp6, nicmp6, sizeof(struct icmp6_hdr)); p = (u_char *)(nicmp6 + 1); bzero(p, 4); bcopy(hostname, p + 4, maxhlen); /* meaningless TTL */ + mtx_unlock(&hostname_mtx); noff = sizeof(struct ip6_hdr); n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + 4 + maxhlen; nicmp6->icmp6_type = ICMP6_WRUREPLY; nicmp6->icmp6_code = 0; } #undef hostnamelen if (n) { icmp6stat.icp6s_reflect++; icmp6stat.icp6s_outhist[ICMP6_WRUREPLY]++; icmp6_reflect(n, noff); } break; } case ICMP6_WRUREPLY: if (code != 0) goto badcode; break; case ND_ROUTER_SOLICIT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_rs_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_rs_input(n, off, icmp6len); /* m stays. */ break; case ND_ROUTER_ADVERT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_ra_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_ra_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_SOLICIT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_solicit)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_ns_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_ns_input(n, off, icmp6len); /* m stays. */ break; case ND_NEIGHBOR_ADVERT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_advert)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ nd6_na_input(m, off, icmp6len); m = NULL; goto freeit; } nd6_na_input(n, off, icmp6len); /* m stays. */ break; case ND_REDIRECT: icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_redirect)) goto badlen; if ((n = m_copym(m, 0, M_COPYALL, M_DONTWAIT)) == NULL) { /* give up local */ icmp6_redirect_input(m, off); m = NULL; goto freeit; } icmp6_redirect_input(n, off); /* m stays. */ break; case ICMP6_ROUTER_RENUMBERING: if (code != ICMP6_ROUTER_RENUMBERING_COMMAND && code != ICMP6_ROUTER_RENUMBERING_RESULT) goto badcode; if (icmp6len < sizeof(struct icmp6_router_renum)) goto badlen; break; default: nd6log((LOG_DEBUG, "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n", icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0)); if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) { /* ICMPv6 error: MUST deliver it by spec... */ code = PRC_NCMDS; /* deliver */ } else { /* ICMPv6 informational: MUST not deliver */ break; } deliver: if (icmp6_notify_error(&m, off, icmp6len, code)) { /* In this case, m should've been freed. */ return (IPPROTO_DONE); } break; badcode: icmp6stat.icp6s_badcode++; break; badlen: icmp6stat.icp6s_badlen++; break; } /* deliver the packet to appropriate sockets */ icmp6_rip6_input(&m, *offp); return IPPROTO_DONE; freeit: m_freem(m); return IPPROTO_DONE; } static int icmp6_notify_error(struct mbuf **mp, int off, int icmp6len, int code) { struct mbuf *m = *mp; struct icmp6_hdr *icmp6; struct ip6_hdr *eip6; u_int32_t notifymtu; struct sockaddr_in6 icmp6src, icmp6dst; if (icmp6len < sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr)) { icmp6stat.icp6s_tooshort++; goto freeit; } #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr), -1); icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { icmp6stat.icp6s_tooshort++; return (-1); } #endif eip6 = (struct ip6_hdr *)(icmp6 + 1); /* Detect the upper level protocol */ { void (*ctlfunc)(int, struct sockaddr *, void *); u_int8_t nxt = eip6->ip6_nxt; int eoff = off + sizeof(struct icmp6_hdr) + sizeof(struct ip6_hdr); struct ip6ctlparam ip6cp; struct in6_addr *finaldst = NULL; int icmp6type = icmp6->icmp6_type; struct ip6_frag *fh; struct ip6_rthdr *rth; struct ip6_rthdr0 *rth0; int rthlen; while (1) { /* XXX: should avoid infinite loop explicitly? */ struct ip6_ext *eh; switch (nxt) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: case IPPROTO_AH: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_ext), -1); eh = (struct ip6_ext *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(eh, struct ip6_ext *, m, eoff, sizeof(*eh)); if (eh == NULL) { icmp6stat.icp6s_tooshort++; return (-1); } #endif if (nxt == IPPROTO_AH) eoff += (eh->ip6e_len + 2) << 2; else eoff += (eh->ip6e_len + 1) << 3; nxt = eh->ip6e_nxt; break; case IPPROTO_ROUTING: /* * When the erroneous packet contains a * routing header, we should examine the * header to determine the final destination. * Otherwise, we can't properly update * information that depends on the final * destination (e.g. path MTU). */ #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(*rth), -1); rth = (struct ip6_rthdr *) (mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth, struct ip6_rthdr *, m, eoff, sizeof(*rth)); if (rth == NULL) { icmp6stat.icp6s_tooshort++; return (-1); } #endif rthlen = (rth->ip6r_len + 1) << 3; /* * XXX: currently there is no * officially defined type other * than type-0. * Note that if the segment left field * is 0, all intermediate hops must * have been passed. */ if (rth->ip6r_segleft && rth->ip6r_type == IPV6_RTHDR_TYPE_0) { int hops; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + rthlen, -1); rth0 = (struct ip6_rthdr0 *) (mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(rth0, struct ip6_rthdr0 *, m, eoff, rthlen); if (rth0 == NULL) { icmp6stat.icp6s_tooshort++; return (-1); } #endif /* just ignore a bogus header */ if ((rth0->ip6r0_len % 2) == 0 && (hops = rth0->ip6r0_len/2)) finaldst = (struct in6_addr *)(rth0 + 1) + (hops - 1); } eoff += rthlen; nxt = rth->ip6r_nxt; break; case IPPROTO_FRAGMENT: #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, 0, eoff + sizeof(struct ip6_frag), -1); fh = (struct ip6_frag *)(mtod(m, caddr_t) + eoff); #else IP6_EXTHDR_GET(fh, struct ip6_frag *, m, eoff, sizeof(*fh)); if (fh == NULL) { icmp6stat.icp6s_tooshort++; return (-1); } #endif /* * Data after a fragment header is meaningless * unless it is the first fragment, but * we'll go to the notify label for path MTU * discovery. */ if (fh->ip6f_offlg & IP6F_OFF_MASK) goto notify; eoff += sizeof(struct ip6_frag); nxt = fh->ip6f_nxt; break; default: /* * This case includes ESP and the No Next * Header. In such cases going to the notify * label does not have any meaning * (i.e. ctlfunc will be NULL), but we go * anyway since we might have to update * path MTU information. */ goto notify; } } notify: #ifndef PULLDOWN_TEST icmp6 = (struct icmp6_hdr *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6) + sizeof(struct ip6_hdr)); if (icmp6 == NULL) { icmp6stat.icp6s_tooshort++; return (-1); } #endif /* * retrieve parameters from the inner IPv6 header, and convert * them into sockaddr structures. * XXX: there is no guarantee that the source or destination * addresses of the inner packet are in the same scope as * the addresses of the icmp packet. But there is no other * way to determine the zone. */ eip6 = (struct ip6_hdr *)(icmp6 + 1); bzero(&icmp6dst, sizeof(icmp6dst)); icmp6dst.sin6_len = sizeof(struct sockaddr_in6); icmp6dst.sin6_family = AF_INET6; if (finaldst == NULL) icmp6dst.sin6_addr = eip6->ip6_dst; else icmp6dst.sin6_addr = *finaldst; if (in6_setscope(&icmp6dst.sin6_addr, m->m_pkthdr.rcvif, NULL)) goto freeit; bzero(&icmp6src, sizeof(icmp6src)); icmp6src.sin6_len = sizeof(struct sockaddr_in6); icmp6src.sin6_family = AF_INET6; icmp6src.sin6_addr = eip6->ip6_src; if (in6_setscope(&icmp6src.sin6_addr, m->m_pkthdr.rcvif, NULL)) goto freeit; icmp6src.sin6_flowinfo = (eip6->ip6_flow & IPV6_FLOWLABEL_MASK); if (finaldst == NULL) finaldst = &eip6->ip6_dst; ip6cp.ip6c_m = m; ip6cp.ip6c_icmp6 = icmp6; ip6cp.ip6c_ip6 = (struct ip6_hdr *)(icmp6 + 1); ip6cp.ip6c_off = eoff; ip6cp.ip6c_finaldst = finaldst; ip6cp.ip6c_src = &icmp6src; ip6cp.ip6c_nxt = nxt; if (icmp6type == ICMP6_PACKET_TOO_BIG) { notifymtu = ntohl(icmp6->icmp6_mtu); ip6cp.ip6c_cmdarg = (void *)¬ifymtu; icmp6_mtudisc_update(&ip6cp, 1); /*XXX*/ } ctlfunc = (void (*)(int, struct sockaddr *, void *)) (inet6sw[ip6_protox[nxt]].pr_ctlinput); if (ctlfunc) { (void) (*ctlfunc)(code, (struct sockaddr *)&icmp6dst, &ip6cp); } } *mp = m; return (0); freeit: m_freem(m); return (-1); } void icmp6_mtudisc_update(struct ip6ctlparam *ip6cp, int validated) { struct in6_addr *dst = ip6cp->ip6c_finaldst; struct icmp6_hdr *icmp6 = ip6cp->ip6c_icmp6; struct mbuf *m = ip6cp->ip6c_m; /* will be necessary for scope issue */ u_int mtu = ntohl(icmp6->icmp6_mtu); struct in_conninfo inc; #if 0 /* * RFC2460 section 5, last paragraph. * even though minimum link MTU for IPv6 is IPV6_MMTU, * we may see ICMPv6 too big with mtu < IPV6_MMTU * due to packet translator in the middle. * see ip6_output() and ip6_getpmtu() "alwaysfrag" case for * special handling. */ if (mtu < IPV6_MMTU) return; #endif /* * we reject ICMPv6 too big with abnormally small value. * XXX what is the good definition of "abnormally small"? */ if (mtu < sizeof(struct ip6_hdr) + sizeof(struct ip6_frag) + 8) return; if (!validated) return; bzero(&inc, sizeof(inc)); inc.inc_flags = 1; /* IPv6 */ inc.inc6_faddr = *dst; if (in6_setscope(&inc.inc6_faddr, m->m_pkthdr.rcvif, NULL)) return; if (mtu < tcp_maxmtu6(&inc, NULL)) { tcp_hc_updatemtu(&inc, mtu); icmp6stat.icp6s_pmtuchg++; } } /* * Process a Node Information Query packet, based on * draft-ietf-ipngwg-icmp-name-lookups-07. * * Spec incompatibilities: * - IPv6 Subject address handling * - IPv4 Subject address handling support missing * - Proxy reply (answer even if it's not for me) * - joins NI group address at in6_ifattach() time only, does not cope * with hostname changes by sethostname(3) */ #define hostnamelen strlen(hostname) static struct mbuf * ni6_input(struct mbuf *m, int off) { struct icmp6_nodeinfo *ni6, *nni6; struct mbuf *n = NULL; u_int16_t qtype; int subjlen; int replylen = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); struct ni_reply_fqdn *fqdn; int addrs; /* for NI_QTYPE_NODEADDR */ struct ifnet *ifp = NULL; /* for NI_QTYPE_NODEADDR */ struct in6_addr in6_subj; /* subject address */ struct ip6_hdr *ip6; int oldfqdn = 0; /* if 1, return pascal string (03 draft) */ char *subj = NULL; struct in6_ifaddr *ia6 = NULL; ip6 = mtod(m, struct ip6_hdr *); #ifndef PULLDOWN_TEST ni6 = (struct icmp6_nodeinfo *)(mtod(m, caddr_t) + off); #else IP6_EXTHDR_GET(ni6, struct icmp6_nodeinfo *, m, off, sizeof(*ni6)); if (ni6 == NULL) { /* m is already reclaimed */ return (NULL); } #endif /* * Validate IPv6 source address. * The default configuration MUST be to refuse answering queries from * global-scope addresses according to RFC4602. * Notes: * - it's not very clear what "refuse" means; this implementation * simply drops it. * - it's not very easy to identify global-scope (unicast) addresses * since there are many prefixes for them. It should be safer * and in practice sufficient to check "all" but loopback and * link-local (note that site-local unicast was deprecated and * ULA is defined as global scope-wise) */ if ((icmp6_nodeinfo & ICMP6_NODEINFO_GLOBALOK) == 0 && !IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && !IN6_IS_ADDR_LINKLOCAL(&ip6->ip6_src)) goto bad; /* * Validate IPv6 destination address. * * The Responder must discard the Query without further processing * unless it is one of the Responder's unicast or anycast addresses, or * a link-local scope multicast address which the Responder has joined. * [RFC4602, Section 5.] */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { if (!IN6_IS_ADDR_MC_LINKLOCAL(&ip6->ip6_dst)) goto bad; /* else it's a link-local multicast, fine */ } else { /* unicast or anycast */ if ((ia6 = ip6_getdstifaddr(m)) == NULL) goto bad; /* XXX impossible */ if ((ia6->ia6_flags & IN6_IFF_TEMPORARY) && !(icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK)) { nd6log((LOG_DEBUG, "ni6_input: ignore node info to " "a temporary address in %s:%d", __FILE__, __LINE__)); goto bad; } } /* validate query Subject field. */ qtype = ntohs(ni6->ni_qtype); subjlen = m->m_pkthdr.len - off - sizeof(struct icmp6_nodeinfo); switch (qtype) { case NI_QTYPE_NOOP: case NI_QTYPE_SUPTYPES: /* 07 draft */ if (ni6->ni_code == ICMP6_NI_SUBJ_FQDN && subjlen == 0) break; /* FALLTHROUGH */ case NI_QTYPE_FQDN: case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: #if ICMP6_NI_SUBJ_IPV6 != 0 case 0: #endif /* * backward compatibility - try to accept 03 draft * format, where no Subject is present. */ if (qtype == NI_QTYPE_FQDN && ni6->ni_code == 0 && subjlen == 0) { oldfqdn++; break; } #if ICMP6_NI_SUBJ_IPV6 != 0 if (ni6->ni_code != ICMP6_NI_SUBJ_IPV6) goto bad; #endif if (subjlen != sizeof(struct in6_addr)) goto bad; /* * Validate Subject address. * * Not sure what exactly "address belongs to the node" * means in the spec, is it just unicast, or what? * * At this moment we consider Subject address as * "belong to the node" if the Subject address equals * to the IPv6 destination address; validation for * IPv6 destination address should have done enough * check for us. * * We do not do proxy at this moment. */ /* m_pulldown instead of copy? */ m_copydata(m, off + sizeof(struct icmp6_nodeinfo), subjlen, (caddr_t)&in6_subj); if (in6_setscope(&in6_subj, m->m_pkthdr.rcvif, NULL)) goto bad; subj = (char *)&in6_subj; if (IN6_ARE_ADDR_EQUAL(&ip6->ip6_dst, &in6_subj)) break; /* * XXX if we are to allow other cases, we should really * be careful about scope here. * basically, we should disallow queries toward IPv6 * destination X with subject Y, * if scope(X) > scope(Y). * if we allow scope(X) > scope(Y), it will result in * information leakage across scope boundary. */ goto bad; case ICMP6_NI_SUBJ_FQDN: /* * Validate Subject name with gethostname(3). * * The behavior may need some debate, since: * - we are not sure if the node has FQDN as * hostname (returned by gethostname(3)). * - the code does wildcard match for truncated names. * however, we are not sure if we want to perform * wildcard match, if gethostname(3) side has * truncated hostname. */ + mtx_lock(&hostname_mtx); n = ni6_nametodns(hostname, hostnamelen, 0); + mtx_unlock(&hostname_mtx); if (!n || n->m_next || n->m_len == 0) goto bad; IP6_EXTHDR_GET(subj, char *, m, off + sizeof(struct icmp6_nodeinfo), subjlen); if (subj == NULL) goto bad; if (!ni6_dnsmatch(subj, subjlen, mtod(n, const char *), n->m_len)) { goto bad; } m_freem(n); n = NULL; break; case ICMP6_NI_SUBJ_IPV4: /* XXX: to be implemented? */ default: goto bad; } break; } /* refuse based on configuration. XXX ICMP6_NI_REFUSED? */ switch (qtype) { case NI_QTYPE_FQDN: if ((icmp6_nodeinfo & ICMP6_NODEINFO_FQDNOK) == 0) goto bad; break; case NI_QTYPE_NODEADDR: case NI_QTYPE_IPV4ADDR: if ((icmp6_nodeinfo & ICMP6_NODEINFO_NODEADDROK) == 0) goto bad; break; } /* guess reply length */ switch (qtype) { case NI_QTYPE_NOOP: break; /* no reply data */ case NI_QTYPE_SUPTYPES: replylen += sizeof(u_int32_t); break; case NI_QTYPE_FQDN: /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); break; case NI_QTYPE_NODEADDR: addrs = ni6_addrs(ni6, m, &ifp, (struct in6_addr *)subj); if ((replylen += addrs * (sizeof(struct in6_addr) + sizeof(u_int32_t))) > MCLBYTES) replylen = MCLBYTES; /* XXX: will truncate pkt later */ break; case NI_QTYPE_IPV4ADDR: /* unsupported - should respond with unknown Qtype? */ break; default: /* * XXX: We must return a reply with the ICMP6 code * `unknown Qtype' in this case. However we regard the case * as an FQDN query for backward compatibility. * Older versions set a random value to this field, * so it rarely varies in the defined qtypes. * But the mechanism is not reliable... * maybe we should obsolete older versions. */ qtype = NI_QTYPE_FQDN; /* XXX will append an mbuf */ replylen += offsetof(struct ni_reply_fqdn, ni_fqdn_namelen); oldfqdn++; break; } /* allocate an mbuf to reply. */ MGETHDR(n, M_DONTWAIT, m->m_type); if (n == NULL) { m_freem(m); return (NULL); } M_MOVE_PKTHDR(n, m); /* just for recvif */ if (replylen > MHLEN) { if (replylen > MCLBYTES) { /* * XXX: should we try to allocate more? But MCLBYTES * is probably much larger than IPV6_MMTU... */ goto bad; } MCLGET(n, M_DONTWAIT); if ((n->m_flags & M_EXT) == 0) { goto bad; } } n->m_pkthdr.len = n->m_len = replylen; /* copy mbuf header and IPv6 + Node Information base headers */ bcopy(mtod(m, caddr_t), mtod(n, caddr_t), sizeof(struct ip6_hdr)); nni6 = (struct icmp6_nodeinfo *)(mtod(n, struct ip6_hdr *) + 1); bcopy((caddr_t)ni6, (caddr_t)nni6, sizeof(struct icmp6_nodeinfo)); /* qtype dependent procedure */ switch (qtype) { case NI_QTYPE_NOOP: nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = 0; break; case NI_QTYPE_SUPTYPES: { u_int32_t v; nni6->ni_code = ICMP6_NI_SUCCESS; nni6->ni_flags = htons(0x0000); /* raw bitmap */ /* supports NOOP, SUPTYPES, FQDN, and NODEADDR */ v = (u_int32_t)htonl(0x0000000f); bcopy(&v, nni6 + 1, sizeof(u_int32_t)); break; } case NI_QTYPE_FQDN: nni6->ni_code = ICMP6_NI_SUCCESS; fqdn = (struct ni_reply_fqdn *)(mtod(n, caddr_t) + sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo)); nni6->ni_flags = 0; /* XXX: meaningless TTL */ fqdn->ni_fqdn_ttl = 0; /* ditto. */ /* * XXX do we really have FQDN in variable "hostname"? */ + mtx_lock(&hostname_mtx); n->m_next = ni6_nametodns(hostname, hostnamelen, oldfqdn); + mtx_unlock(&hostname_mtx); if (n->m_next == NULL) goto bad; /* XXX we assume that n->m_next is not a chain */ if (n->m_next->m_next != NULL) goto bad; n->m_pkthdr.len += n->m_next->m_len; break; case NI_QTYPE_NODEADDR: { int lenlim, copied; nni6->ni_code = ICMP6_NI_SUCCESS; n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo); lenlim = M_TRAILINGSPACE(n); copied = ni6_store_addrs(ni6, nni6, ifp, lenlim); /* XXX: reset mbuf length */ n->m_pkthdr.len = n->m_len = sizeof(struct ip6_hdr) + sizeof(struct icmp6_nodeinfo) + copied; break; } default: break; /* XXX impossible! */ } nni6->ni_type = ICMP6_NI_REPLY; m_freem(m); return (n); bad: m_freem(m); if (n) m_freem(n); return (NULL); } #undef hostnamelen /* * make a mbuf with DNS-encoded string. no compression support. * * XXX names with less than 2 dots (like "foo" or "foo.section") will be * treated as truncated name (two \0 at the end). this is a wild guess. * * old - return pascal string if non-zero */ static struct mbuf * ni6_nametodns(const char *name, int namelen, int old) { struct mbuf *m; char *cp, *ep; const char *p, *q; int i, len, nterm; if (old) len = namelen + 1; else len = MCLBYTES; /* because MAXHOSTNAMELEN is usually 256, we use cluster mbuf */ MGET(m, M_DONTWAIT, MT_DATA); if (m && len > MLEN) { MCLGET(m, M_DONTWAIT); if ((m->m_flags & M_EXT) == 0) goto fail; } if (!m) goto fail; m->m_next = NULL; if (old) { m->m_len = len; *mtod(m, char *) = namelen; bcopy(name, mtod(m, char *) + 1, namelen); return m; } else { m->m_len = 0; cp = mtod(m, char *); ep = mtod(m, char *) + M_TRAILINGSPACE(m); /* if not certain about my name, return empty buffer */ if (namelen == 0) return m; /* * guess if it looks like shortened hostname, or FQDN. * shortened hostname needs two trailing "\0". */ i = 0; for (p = name; p < name + namelen; p++) { if (*p && *p == '.') i++; } if (i < 2) nterm = 2; else nterm = 1; p = name; while (cp < ep && p < name + namelen) { i = 0; for (q = p; q < name + namelen && *q && *q != '.'; q++) i++; /* result does not fit into mbuf */ if (cp + i + 1 >= ep) goto fail; /* * DNS label length restriction, RFC1035 page 8. * "i == 0" case is included here to avoid returning * 0-length label on "foo..bar". */ if (i <= 0 || i >= 64) goto fail; *cp++ = i; bcopy(p, cp, i); cp += i; p = q; if (p < name + namelen && *p == '.') p++; } /* termination */ if (cp + nterm >= ep) goto fail; while (nterm-- > 0) *cp++ = '\0'; m->m_len = cp - mtod(m, char *); return m; } panic("should not reach here"); /* NOTREACHED */ fail: if (m) m_freem(m); return NULL; } /* * check if two DNS-encoded string matches. takes care of truncated * form (with \0\0 at the end). no compression support. * XXX upper/lowercase match (see RFC2065) */ static int ni6_dnsmatch(const char *a, int alen, const char *b, int blen) { const char *a0, *b0; int l; /* simplest case - need validation? */ if (alen == blen && bcmp(a, b, alen) == 0) return 1; a0 = a; b0 = b; /* termination is mandatory */ if (alen < 2 || blen < 2) return 0; if (a0[alen - 1] != '\0' || b0[blen - 1] != '\0') return 0; alen--; blen--; while (a - a0 < alen && b - b0 < blen) { if (a - a0 + 1 > alen || b - b0 + 1 > blen) return 0; if ((signed char)a[0] < 0 || (signed char)b[0] < 0) return 0; /* we don't support compression yet */ if (a[0] >= 64 || b[0] >= 64) return 0; /* truncated case */ if (a[0] == 0 && a - a0 == alen - 1) return 1; if (b[0] == 0 && b - b0 == blen - 1) return 1; if (a[0] == 0 || b[0] == 0) return 0; if (a[0] != b[0]) return 0; l = a[0]; if (a - a0 + 1 + l > alen || b - b0 + 1 + l > blen) return 0; if (bcmp(a + 1, b + 1, l) != 0) return 0; a += 1 + l; b += 1 + l; } if (a - a0 == alen && b - b0 == blen) return 1; else return 0; } /* * calculate the number of addresses to be returned in the node info reply. */ static int ni6_addrs(struct icmp6_nodeinfo *ni6, struct mbuf *m, struct ifnet **ifpp, struct in6_addr *subj) { struct ifnet *ifp; struct in6_ifaddr *ifa6; struct ifaddr *ifa; int addrs = 0, addrsofif, iffound = 0; int niflags = ni6->ni_flags; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0) { switch (ni6->ni_code) { case ICMP6_NI_SUBJ_IPV6: if (subj == NULL) /* must be impossible... */ return (0); break; default: /* * XXX: we only support IPv6 subject address for * this Qtype. */ return (0); } } IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { addrsofif = 0; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((niflags & NI_NODEADDR_FLAG_ALL) == 0 && IN6_ARE_ADDR_EQUAL(subj, &ifa6->ia_addr.sin6_addr)) iffound = 1; /* * IPv4-mapped addresses can only be returned by a * Node Information proxy, since they represent * addresses of IPv4-only nodes, which perforce do * not implement this protocol. * [icmp-name-lookups-07, Section 5.4] * So we don't support NI_NODEADDR_FLAG_COMPAT in * this function at this moment. */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; /* we need only unicast addresses */ if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { continue; } addrsofif++; /* count the address */ } if (iffound) { *ifpp = ifp; IFNET_RUNLOCK(); return (addrsofif); } addrs += addrsofif; } IFNET_RUNLOCK(); return (addrs); } static int ni6_store_addrs(struct icmp6_nodeinfo *ni6, struct icmp6_nodeinfo *nni6, struct ifnet *ifp0, int resid) { struct ifnet *ifp = ifp0 ? ifp0 : TAILQ_FIRST(&ifnet); struct in6_ifaddr *ifa6; struct ifaddr *ifa; struct ifnet *ifp_dep = NULL; int copied = 0, allow_deprecated = 0; u_char *cp = (u_char *)(nni6 + 1); int niflags = ni6->ni_flags; u_int32_t ltime; if (ifp0 == NULL && !(niflags & NI_NODEADDR_FLAG_ALL)) return (0); /* needless to copy */ IFNET_RLOCK(); again: for (; ifp; ifp = TAILQ_NEXT(ifp, if_list)) { for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifa6 = (struct in6_ifaddr *)ifa; if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) != 0 && allow_deprecated == 0) { /* * prefererred address should be put before * deprecated addresses. */ /* record the interface for later search */ if (ifp_dep == NULL) ifp_dep = ifp; continue; } else if ((ifa6->ia6_flags & IN6_IFF_DEPRECATED) == 0 && allow_deprecated != 0) continue; /* we now collect deprecated addrs */ /* What do we have to do about ::1? */ switch (in6_addrscope(&ifa6->ia_addr.sin6_addr)) { case IPV6_ADDR_SCOPE_LINKLOCAL: if ((niflags & NI_NODEADDR_FLAG_LINKLOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_SITELOCAL: if ((niflags & NI_NODEADDR_FLAG_SITELOCAL) == 0) continue; break; case IPV6_ADDR_SCOPE_GLOBAL: if ((niflags & NI_NODEADDR_FLAG_GLOBAL) == 0) continue; break; default: continue; } /* * check if anycast is okay. * XXX: just experimental. not in the spec. */ if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0 && (niflags & NI_NODEADDR_FLAG_ANYCAST) == 0) continue; if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && (icmp6_nodeinfo & ICMP6_NODEINFO_TMPADDROK) == 0) { continue; } /* now we can copy the address */ if (resid < sizeof(struct in6_addr) + sizeof(u_int32_t)) { /* * We give up much more copy. * Set the truncate flag and return. */ nni6->ni_flags |= NI_NODEADDR_FLAG_TRUNCATE; IFNET_RUNLOCK(); return (copied); } /* * Set the TTL of the address. * The TTL value should be one of the following * according to the specification: * * 1. The remaining lifetime of a DHCP lease on the * address, or * 2. The remaining Valid Lifetime of a prefix from * which the address was derived through Stateless * Autoconfiguration. * * Note that we currently do not support stateful * address configuration by DHCPv6, so the former * case can't happen. */ if (ifa6->ia6_lifetime.ia6t_expire == 0) ltime = ND6_INFINITE_LIFETIME; else { if (ifa6->ia6_lifetime.ia6t_expire > time_second) ltime = htonl(ifa6->ia6_lifetime.ia6t_expire - time_second); else ltime = 0; } bcopy(<ime, cp, sizeof(u_int32_t)); cp += sizeof(u_int32_t); /* copy the address itself */ bcopy(&ifa6->ia_addr.sin6_addr, cp, sizeof(struct in6_addr)); in6_clearscope((struct in6_addr *)cp); /* XXX */ cp += sizeof(struct in6_addr); resid -= (sizeof(struct in6_addr) + sizeof(u_int32_t)); copied += (sizeof(struct in6_addr) + sizeof(u_int32_t)); } if (ifp0) /* we need search only on the specified IF */ break; } if (allow_deprecated == 0 && ifp_dep != NULL) { ifp = ifp_dep; allow_deprecated = 1; goto again; } IFNET_RUNLOCK(); return (copied); } /* * XXX almost dup'ed code with rip6_input. */ static int icmp6_rip6_input(struct mbuf **mp, int off) { struct mbuf *m = *mp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct in6pcb *in6p; struct in6pcb *last = NULL; struct sockaddr_in6 fromsa; struct icmp6_hdr *icmp6; struct mbuf *opts = NULL; #ifndef PULLDOWN_TEST /* this is assumed to be safe. */ icmp6 = (struct icmp6_hdr *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, off, sizeof(*icmp6)); if (icmp6 == NULL) { /* m is already reclaimed */ return (IPPROTO_DONE); } #endif /* * XXX: the address may have embedded scope zone ID, which should be * hidden from applications. */ bzero(&fromsa, sizeof(fromsa)); fromsa.sin6_family = AF_INET6; fromsa.sin6_len = sizeof(struct sockaddr_in6); fromsa.sin6_addr = ip6->ip6_src; if (sa6_recoverscope(&fromsa)) { m_freem(m); return (IPPROTO_DONE); } INP_INFO_RLOCK(&ripcbinfo); LIST_FOREACH(in6p, &ripcb, inp_list) { INP_RLOCK(in6p); if ((in6p->inp_vflag & INP_IPV6) == 0) { docontinue: INP_RUNLOCK(in6p); continue; } if (in6p->in6p_ip6_nxt != IPPROTO_ICMPV6) goto docontinue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_laddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_laddr, &ip6->ip6_dst)) goto docontinue; if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) goto docontinue; if (in6p->in6p_icmp6filt && ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, in6p->in6p_icmp6filt)) goto docontinue; if (last) { struct mbuf *n = NULL; /* * Recent network drivers tend to allocate a single * mbuf cluster, rather than to make a couple of * mbufs without clusters. Also, since the IPv6 code * path tries to avoid m_pullup(), it is highly * probable that we still have an mbuf cluster here * even though the necessary length can be stored in an * mbuf's internal buffer. * Meanwhile, the default size of the receive socket * buffer for raw sockets is not so large. This means * the possibility of packet loss is relatively higher * than before. To avoid this scenario, we copy the * received data to a separate mbuf that does not use * a cluster, if possible. * XXX: it is better to copy the data after stripping * intermediate headers. */ if ((m->m_flags & M_EXT) && m->m_next == NULL && m->m_len <= MHLEN) { MGET(n, M_DONTWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, m->m_len); n->m_len = m->m_len; } else { m_free(n); n = NULL; } } } if (n != NULL || (n = m_copy(m, 0, (int)M_COPYALL)) != NULL) { if (last->in6p_flags & IN6P_CONTROLOPTS) ip6_savecontrol(last, n, &opts); /* strip intermediate headers */ m_adj(n, off); SOCKBUF_LOCK(&last->in6p_socket->so_rcv); if (sbappendaddr_locked( &last->in6p_socket->so_rcv, (struct sockaddr *)&fromsa, n, opts) == 0) { /* should notify about lost packet */ m_freem(n); if (opts) { m_freem(opts); } SOCKBUF_UNLOCK( &last->in6p_socket->so_rcv); } else sorwakeup_locked(last->in6p_socket); opts = NULL; } INP_RUNLOCK(last); } last = in6p; } if (last) { if (last->in6p_flags & IN6P_CONTROLOPTS) ip6_savecontrol(last, m, &opts); /* strip intermediate headers */ m_adj(m, off); /* avoid using mbuf clusters if possible (see above) */ if ((m->m_flags & M_EXT) && m->m_next == NULL && m->m_len <= MHLEN) { struct mbuf *n; MGET(n, M_DONTWAIT, m->m_type); if (n != NULL) { if (m_dup_pkthdr(n, m, M_NOWAIT)) { bcopy(m->m_data, n->m_data, m->m_len); n->m_len = m->m_len; m_freem(m); m = n; } else { m_freem(n); n = NULL; } } } SOCKBUF_LOCK(&last->in6p_socket->so_rcv); if (sbappendaddr_locked(&last->in6p_socket->so_rcv, (struct sockaddr *)&fromsa, m, opts) == 0) { m_freem(m); if (opts) m_freem(opts); SOCKBUF_UNLOCK(&last->in6p_socket->so_rcv); } else sorwakeup_locked(last->in6p_socket); INP_RUNLOCK(last); } else { m_freem(m); ip6stat.ip6s_delivered--; } INP_INFO_RUNLOCK(&ripcbinfo); return IPPROTO_DONE; } /* * Reflect the ip6 packet back to the source. * OFF points to the icmp6 header, counted from the top of the mbuf. */ void icmp6_reflect(struct mbuf *m, size_t off) { struct ip6_hdr *ip6; struct icmp6_hdr *icmp6; struct in6_ifaddr *ia; int plen; int type, code; struct ifnet *outif = NULL; struct in6_addr origdst, *src = NULL; /* too short to reflect */ if (off < sizeof(struct ip6_hdr)) { nd6log((LOG_DEBUG, "sanity fail: off=%lx, sizeof(ip6)=%lx in %s:%d\n", (u_long)off, (u_long)sizeof(struct ip6_hdr), __FILE__, __LINE__)); goto bad; } /* * If there are extra headers between IPv6 and ICMPv6, strip * off that header first. */ #ifdef DIAGNOSTIC if (sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) > MHLEN) panic("assumption failed in icmp6_reflect"); #endif if (off > sizeof(struct ip6_hdr)) { size_t l; struct ip6_hdr nip6; l = off - sizeof(struct ip6_hdr); m_copydata(m, 0, sizeof(nip6), (caddr_t)&nip6); m_adj(m, l); l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } bcopy((caddr_t)&nip6, mtod(m, caddr_t), sizeof(nip6)); } else /* off == sizeof(struct ip6_hdr) */ { size_t l; l = sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr); if (m->m_len < l) { if ((m = m_pullup(m, l)) == NULL) return; } } plen = m->m_pkthdr.len - sizeof(struct ip6_hdr); ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_nxt = IPPROTO_ICMPV6; icmp6 = (struct icmp6_hdr *)(ip6 + 1); type = icmp6->icmp6_type; /* keep type for statistics */ code = icmp6->icmp6_code; /* ditto. */ origdst = ip6->ip6_dst; /* * ip6_input() drops a packet if its src is multicast. * So, the src is never multicast. */ ip6->ip6_dst = ip6->ip6_src; /* * If the incoming packet was addressed directly to us (i.e. unicast), * use dst as the src for the reply. * The IN6_IFF_NOTREADY case should be VERY rare, but is possible * (for example) when we encounter an error while forwarding procedure * destined to a duplicated address of ours. * Note that ip6_getdstifaddr() may fail if we are in an error handling * procedure of an outgoing packet of our own, in which case we need * to search in the ifaddr list. */ if (!IN6_IS_ADDR_MULTICAST(&origdst)) { if ((ia = ip6_getdstifaddr(m))) { if (!(ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) src = &ia->ia_addr.sin6_addr; } else { struct sockaddr_in6 d; bzero(&d, sizeof(d)); d.sin6_family = AF_INET6; d.sin6_len = sizeof(d); d.sin6_addr = origdst; ia = (struct in6_ifaddr *) ifa_ifwithaddr((struct sockaddr *)&d); if (ia && !(ia->ia6_flags & (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY))) { src = &ia->ia_addr.sin6_addr; } } } if (src == NULL) { int e; struct sockaddr_in6 sin6; struct route_in6 ro; /* * This case matches to multicasts, our anycast, or unicasts * that we do not own. Select a source address based on the * source address of the erroneous packet. */ bzero(&sin6, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(sin6); sin6.sin6_addr = ip6->ip6_dst; /* zone ID should be embedded */ bzero(&ro, sizeof(ro)); src = in6_selectsrc(&sin6, NULL, NULL, &ro, NULL, &outif, &e); if (ro.ro_rt) RTFREE(ro.ro_rt); /* XXX: we could use this */ if (src == NULL) { char ip6buf[INET6_ADDRSTRLEN]; nd6log((LOG_DEBUG, "icmp6_reflect: source can't be determined: " "dst=%s, error=%d\n", ip6_sprintf(ip6buf, &sin6.sin6_addr), e)); goto bad; } } ip6->ip6_src = *src; ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; ip6->ip6_nxt = IPPROTO_ICMPV6; if (outif) ip6->ip6_hlim = ND_IFINFO(outif)->chlim; else if (m->m_pkthdr.rcvif) { /* XXX: This may not be the outgoing interface */ ip6->ip6_hlim = ND_IFINFO(m->m_pkthdr.rcvif)->chlim; } else ip6->ip6_hlim = ip6_defhlim; icmp6->icmp6_cksum = 0; icmp6->icmp6_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(struct ip6_hdr), plen); /* * XXX option handling */ m->m_flags &= ~(M_BCAST|M_MCAST); ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) icmp6_ifoutstat_inc(outif, type, code); return; bad: m_freem(m); return; } void icmp6_fasttimo(void) { return; } static const char * icmp6_redirect_diag(struct in6_addr *src6, struct in6_addr *dst6, struct in6_addr *tgt6) { static char buf[1024]; char ip6bufs[INET6_ADDRSTRLEN]; char ip6bufd[INET6_ADDRSTRLEN]; char ip6buft[INET6_ADDRSTRLEN]; snprintf(buf, sizeof(buf), "(src=%s dst=%s tgt=%s)", ip6_sprintf(ip6bufs, src6), ip6_sprintf(ip6bufd, dst6), ip6_sprintf(ip6buft, tgt6)); return buf; } void icmp6_redirect_input(struct mbuf *m, int off) { struct ifnet *ifp; struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); struct nd_redirect *nd_rd; int icmp6len = ntohs(ip6->ip6_plen); char *lladdr = NULL; int lladdrlen = 0; u_char *redirhdr = NULL; int redirhdrlen = 0; struct rtentry *rt = NULL; int is_router; int is_onlink; struct in6_addr src6 = ip6->ip6_src; struct in6_addr redtgt6; struct in6_addr reddst6; union nd_opts ndopts; char ip6buf[INET6_ADDRSTRLEN]; if (!m) return; ifp = m->m_pkthdr.rcvif; if (!ifp) return; /* XXX if we are router, we don't update route by icmp6 redirect */ if (ip6_forwarding) goto freeit; if (!icmp6_rediraccept) goto freeit; #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, icmp6len,); nd_rd = (struct nd_redirect *)((caddr_t)ip6 + off); #else IP6_EXTHDR_GET(nd_rd, struct nd_redirect *, m, off, icmp6len); if (nd_rd == NULL) { icmp6stat.icp6s_tooshort++; return; } #endif redtgt6 = nd_rd->nd_rd_target; reddst6 = nd_rd->nd_rd_dst; if (in6_setscope(&redtgt6, m->m_pkthdr.rcvif, NULL) || in6_setscope(&reddst6, m->m_pkthdr.rcvif, NULL)) { goto freeit; } /* validation */ if (!IN6_IS_ADDR_LINKLOCAL(&src6)) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "must be from linklocal\n", ip6_sprintf(ip6buf, &src6))); goto bad; } if (ip6->ip6_hlim != 255) { nd6log((LOG_ERR, "ICMP6 redirect sent from %s rejected; " "hlim=%d (must be 255)\n", ip6_sprintf(ip6buf, &src6), ip6->ip6_hlim)); goto bad; } { /* ip6->ip6_src must be equal to gw for icmp6->icmp6_reddst */ struct sockaddr_in6 sin6; struct in6_addr *gw6; bzero(&sin6, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sin6.sin6_addr, sizeof(reddst6)); rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); if (rt) { if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6) { nd6log((LOG_ERR, "ICMP6 redirect rejected; no route " "with inet6 gateway found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); RTFREE_LOCKED(rt); goto bad; } gw6 = &(((struct sockaddr_in6 *)rt->rt_gateway)->sin6_addr); if (bcmp(&src6, gw6, sizeof(struct in6_addr)) != 0) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "not equal to gw-for-src=%s (must be same): " "%s\n", ip6_sprintf(ip6buf, gw6), icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); RTFREE_LOCKED(rt); goto bad; } } else { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "no route found for redirect dst: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } RTFREE_LOCKED(rt); rt = NULL; } if (IN6_IS_ADDR_MULTICAST(&reddst6)) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "redirect dst must be unicast: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } is_router = is_onlink = 0; if (IN6_IS_ADDR_LINKLOCAL(&redtgt6)) is_router = 1; /* router case */ if (bcmp(&redtgt6, &reddst6, sizeof(redtgt6)) == 0) is_onlink = 1; /* on-link destination case */ if (!is_router && !is_onlink) { nd6log((LOG_ERR, "ICMP6 redirect rejected; " "neither router case nor onlink case: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } /* validation passed */ icmp6len -= sizeof(*nd_rd); nd6_option_init(nd_rd + 1, icmp6len, &ndopts); if (nd6_options(&ndopts) < 0) { nd6log((LOG_INFO, "icmp6_redirect_input: " "invalid ND option, rejected: %s\n", icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); /* nd6_options have incremented stats */ goto freeit; } if (ndopts.nd_opts_tgt_lladdr) { lladdr = (char *)(ndopts.nd_opts_tgt_lladdr + 1); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; } if (ndopts.nd_opts_rh) { redirhdrlen = ndopts.nd_opts_rh->nd_opt_rh_len; redirhdr = (u_char *)(ndopts.nd_opts_rh + 1); /* xxx */ } if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) { nd6log((LOG_INFO, "icmp6_redirect_input: lladdrlen mismatch for %s " "(if %d, icmp6 packet %d): %s\n", ip6_sprintf(ip6buf, &redtgt6), ifp->if_addrlen, lladdrlen - 2, icmp6_redirect_diag(&src6, &reddst6, &redtgt6))); goto bad; } /* RFC 2461 8.3 */ nd6_cache_lladdr(ifp, &redtgt6, lladdr, lladdrlen, ND_REDIRECT, is_onlink ? ND_REDIRECT_ONLINK : ND_REDIRECT_ROUTER); if (!is_onlink) { /* better router case. perform rtredirect. */ /* perform rtredirect */ struct sockaddr_in6 sdst; struct sockaddr_in6 sgw; struct sockaddr_in6 ssrc; bzero(&sdst, sizeof(sdst)); bzero(&sgw, sizeof(sgw)); bzero(&ssrc, sizeof(ssrc)); sdst.sin6_family = sgw.sin6_family = ssrc.sin6_family = AF_INET6; sdst.sin6_len = sgw.sin6_len = ssrc.sin6_len = sizeof(struct sockaddr_in6); bcopy(&redtgt6, &sgw.sin6_addr, sizeof(struct in6_addr)); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); bcopy(&src6, &ssrc.sin6_addr, sizeof(struct in6_addr)); rtredirect((struct sockaddr *)&sdst, (struct sockaddr *)&sgw, (struct sockaddr *)NULL, RTF_GATEWAY | RTF_HOST, (struct sockaddr *)&ssrc); } /* finally update cached route in each socket via pfctlinput */ { struct sockaddr_in6 sdst; bzero(&sdst, sizeof(sdst)); sdst.sin6_family = AF_INET6; sdst.sin6_len = sizeof(struct sockaddr_in6); bcopy(&reddst6, &sdst.sin6_addr, sizeof(struct in6_addr)); pfctlinput(PRC_REDIRECT_HOST, (struct sockaddr *)&sdst); #ifdef IPSEC key_sa_routechange((struct sockaddr *)&sdst); #endif /* IPSEC */ } freeit: m_freem(m); return; bad: icmp6stat.icp6s_badredirect++; m_freem(m); } void icmp6_redirect_output(struct mbuf *m0, struct rtentry *rt) { struct ifnet *ifp; /* my outgoing interface */ struct in6_addr *ifp_ll6; struct in6_addr *router_ll6; struct ip6_hdr *sip6; /* m0 as struct ip6_hdr */ struct mbuf *m = NULL; /* newly allocated one */ struct ip6_hdr *ip6; /* m as struct ip6_hdr */ struct nd_redirect *nd_rd; size_t maxlen; u_char *p; struct ifnet *outif = NULL; struct sockaddr_in6 src_sa; icmp6_errcount(&icmp6stat.icp6s_outerrhist, ND_REDIRECT, 0); /* if we are not router, we don't send icmp6 redirect */ if (!ip6_forwarding) goto fail; /* sanity check */ if (!m0 || !rt || !(rt->rt_flags & RTF_UP) || !(ifp = rt->rt_ifp)) goto fail; /* * Address check: * the source address must identify a neighbor, and * the destination address must not be a multicast address * [RFC 2461, sec 8.2] */ sip6 = mtod(m0, struct ip6_hdr *); bzero(&src_sa, sizeof(src_sa)); src_sa.sin6_family = AF_INET6; src_sa.sin6_len = sizeof(src_sa); src_sa.sin6_addr = sip6->ip6_src; if (nd6_is_addr_neighbor(&src_sa, ifp) == 0) goto fail; if (IN6_IS_ADDR_MULTICAST(&sip6->ip6_dst)) goto fail; /* what should we do here? */ /* rate limit */ if (icmp6_ratelimit(&sip6->ip6_src, ND_REDIRECT, 0)) goto fail; /* * Since we are going to append up to 1280 bytes (= IPV6_MMTU), * we almost always ask for an mbuf cluster for simplicity. * (MHLEN < IPV6_MMTU is almost always true) */ #if IPV6_MMTU >= MCLBYTES # error assumption failed about IPV6_MMTU and MCLBYTES #endif MGETHDR(m, M_DONTWAIT, MT_HEADER); if (m && IPV6_MMTU >= MHLEN) MCLGET(m, M_DONTWAIT); if (!m) goto fail; m->m_pkthdr.rcvif = NULL; m->m_len = 0; maxlen = M_TRAILINGSPACE(m); maxlen = min(IPV6_MMTU, maxlen); /* just for safety */ if (maxlen < sizeof(struct ip6_hdr) + sizeof(struct icmp6_hdr) + ((sizeof(struct nd_opt_hdr) + ifp->if_addrlen + 7) & ~7)) { goto fail; } { /* get ip6 linklocal address for ifp(my outgoing interface). */ struct in6_ifaddr *ia; if ((ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY| IN6_IFF_ANYCAST)) == NULL) goto fail; ifp_ll6 = &ia->ia_addr.sin6_addr; } /* get ip6 linklocal address for the router. */ if (rt->rt_gateway && (rt->rt_flags & RTF_GATEWAY)) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)rt->rt_gateway; router_ll6 = &sin6->sin6_addr; if (!IN6_IS_ADDR_LINKLOCAL(router_ll6)) router_ll6 = (struct in6_addr *)NULL; } else router_ll6 = (struct in6_addr *)NULL; /* ip6 */ ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_flow = 0; ip6->ip6_vfc &= ~IPV6_VERSION_MASK; ip6->ip6_vfc |= IPV6_VERSION; /* ip6->ip6_plen will be set later */ ip6->ip6_nxt = IPPROTO_ICMPV6; ip6->ip6_hlim = 255; /* ip6->ip6_src must be linklocal addr for my outgoing if. */ bcopy(ifp_ll6, &ip6->ip6_src, sizeof(struct in6_addr)); bcopy(&sip6->ip6_src, &ip6->ip6_dst, sizeof(struct in6_addr)); /* ND Redirect */ nd_rd = (struct nd_redirect *)(ip6 + 1); nd_rd->nd_rd_type = ND_REDIRECT; nd_rd->nd_rd_code = 0; nd_rd->nd_rd_reserved = 0; if (rt->rt_flags & RTF_GATEWAY) { /* * nd_rd->nd_rd_target must be a link-local address in * better router cases. */ if (!router_ll6) goto fail; bcopy(router_ll6, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } else { /* make sure redtgt == reddst */ bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_target, sizeof(nd_rd->nd_rd_target)); bcopy(&sip6->ip6_dst, &nd_rd->nd_rd_dst, sizeof(nd_rd->nd_rd_dst)); } p = (u_char *)(nd_rd + 1); if (!router_ll6) goto nolladdropt; { /* target lladdr option */ struct rtentry *rt_router = NULL; int len; struct sockaddr_dl *sdl; struct nd_opt_hdr *nd_opt; char *lladdr; rt_router = nd6_lookup(router_ll6, 0, ifp); if (!rt_router) goto nolladdropt; len = sizeof(*nd_opt) + ifp->if_addrlen; len = (len + 7) & ~7; /* round by 8 */ /* safety check */ if (len + (p - (u_char *)ip6) > maxlen) goto nolladdropt; if (!(rt_router->rt_flags & RTF_GATEWAY) && (rt_router->rt_flags & RTF_LLINFO) && (rt_router->rt_gateway->sa_family == AF_LINK) && (sdl = (struct sockaddr_dl *)rt_router->rt_gateway) && sdl->sdl_alen) { nd_opt = (struct nd_opt_hdr *)p; nd_opt->nd_opt_type = ND_OPT_TARGET_LINKADDR; nd_opt->nd_opt_len = len >> 3; lladdr = (char *)(nd_opt + 1); bcopy(LLADDR(sdl), lladdr, ifp->if_addrlen); p += len; } } nolladdropt:; m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* just to be safe */ #ifdef M_DECRYPTED /*not openbsd*/ if (m0->m_flags & M_DECRYPTED) goto noredhdropt; #endif if (p - (u_char *)ip6 > maxlen) goto noredhdropt; { /* redirected header option */ int len; struct nd_opt_rd_hdr *nd_opt_rh; /* * compute the maximum size for icmp6 redirect header option. * XXX room for auth header? */ len = maxlen - (p - (u_char *)ip6); len &= ~7; /* This is just for simplicity. */ if (m0->m_pkthdr.len != m0->m_len) { if (m0->m_next) { m_freem(m0->m_next); m0->m_next = NULL; } m0->m_pkthdr.len = m0->m_len; } /* * Redirected header option spec (RFC2461 4.6.3) talks nothing * about padding/truncate rule for the original IP packet. * From the discussion on IPv6imp in Feb 1999, * the consensus was: * - "attach as much as possible" is the goal * - pad if not aligned (original size can be guessed by * original ip6 header) * Following code adds the padding if it is simple enough, * and truncates if not. */ if (m0->m_next || m0->m_pkthdr.len != m0->m_len) panic("assumption failed in %s:%d", __FILE__, __LINE__); if (len - sizeof(*nd_opt_rh) < m0->m_pkthdr.len) { /* not enough room, truncate */ m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } else { /* enough room, pad or truncate */ size_t extra; extra = m0->m_pkthdr.len % 8; if (extra) { /* pad if easy enough, truncate if not */ if (8 - extra <= M_TRAILINGSPACE(m0)) { /* pad */ m0->m_len += (8 - extra); m0->m_pkthdr.len += (8 - extra); } else { /* truncate */ m0->m_pkthdr.len -= extra; m0->m_len -= extra; } } len = m0->m_pkthdr.len + sizeof(*nd_opt_rh); m0->m_pkthdr.len = m0->m_len = len - sizeof(*nd_opt_rh); } nd_opt_rh = (struct nd_opt_rd_hdr *)p; bzero(nd_opt_rh, sizeof(*nd_opt_rh)); nd_opt_rh->nd_opt_rh_type = ND_OPT_REDIRECTED_HEADER; nd_opt_rh->nd_opt_rh_len = len >> 3; p += sizeof(*nd_opt_rh); m->m_pkthdr.len = m->m_len = p - (u_char *)ip6; /* connect m0 to m */ m_tag_delete_chain(m0, NULL); m0->m_flags &= ~M_PKTHDR; m->m_next = m0; m->m_pkthdr.len = m->m_len + m0->m_len; m0 = NULL; } noredhdropt:; if (m0) { m_freem(m0); m0 = NULL; } /* XXX: clear embedded link IDs in the inner header */ in6_clearscope(&sip6->ip6_src); in6_clearscope(&sip6->ip6_dst); in6_clearscope(&nd_rd->nd_rd_target); in6_clearscope(&nd_rd->nd_rd_dst); ip6->ip6_plen = htons(m->m_pkthdr.len - sizeof(struct ip6_hdr)); nd_rd->nd_rd_cksum = 0; nd_rd->nd_rd_cksum = in6_cksum(m, IPPROTO_ICMPV6, sizeof(*ip6), ntohs(ip6->ip6_plen)); /* send the packet to outside... */ ip6_output(m, NULL, NULL, 0, NULL, &outif, NULL); if (outif) { icmp6_ifstat_inc(outif, ifs6_out_msg); icmp6_ifstat_inc(outif, ifs6_out_redirect); } icmp6stat.icp6s_outhist[ND_REDIRECT]++; return; fail: if (m) m_freem(m); if (m0) m_freem(m0); } /* * ICMPv6 socket option processing. */ int icmp6_ctloutput(struct socket *so, struct sockopt *sopt) { int error = 0; int optlen; struct inpcb *inp = sotoinpcb(so); int level, op, optname; if (sopt) { level = sopt->sopt_level; op = sopt->sopt_dir; optname = sopt->sopt_name; optlen = sopt->sopt_valsize; } else level = op = optname = optlen = 0; if (level != IPPROTO_ICMPV6) { return EINVAL; } switch (op) { case PRCO_SETOPT: switch (optname) { case ICMP6_FILTER: { struct icmp6_filter *p; if (optlen != sizeof(*p)) { error = EMSGSIZE; break; } if (inp->in6p_icmp6filt == NULL) { error = EINVAL; break; } error = sooptcopyin(sopt, inp->in6p_icmp6filt, optlen, optlen); break; } default: error = ENOPROTOOPT; break; } break; case PRCO_GETOPT: switch (optname) { case ICMP6_FILTER: { if (inp->in6p_icmp6filt == NULL) { error = EINVAL; break; } error = sooptcopyout(sopt, inp->in6p_icmp6filt, sizeof(struct icmp6_filter)); break; } default: error = ENOPROTOOPT; break; } break; } return (error); } /* * Perform rate limit check. * Returns 0 if it is okay to send the icmp6 packet. * Returns 1 if the router SHOULD NOT send this icmp6 packet due to rate * limitation. * * XXX per-destination/type check necessary? * * dst - not used at this moment * type - not used at this moment * code - not used at this moment */ static int icmp6_ratelimit(const struct in6_addr *dst, const int type, const int code) { int ret; ret = 0; /* okay to send */ /* PPS limit */ if (!ppsratecheck(&icmp6errppslim_last, &icmp6errpps_count, icmp6errppslim)) { /* The packet is subject to rate limit */ ret++; } return ret; } Index: head/sys/netinet6/in6.c =================================================================== --- head/sys/netinet6/in6.c (revision 180290) +++ head/sys/netinet6/in6.c (revision 180291) @@ -1,2315 +1,2318 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6.c,v 1.259 2002/01/21 11:37:50 keiichi Exp $ */ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)in.c 8.2 (Berkeley) 11/15/93 */ #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_IP6MADDR, "in6_multi", "internet multicast address"); /* * Definitions of some costant IP6 addresses. */ const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT; const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT; const struct in6_addr in6addr_nodelocal_allnodes = IN6ADDR_NODELOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT; const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT; const struct in6_addr in6mask0 = IN6MASK0; const struct in6_addr in6mask32 = IN6MASK32; const struct in6_addr in6mask64 = IN6MASK64; const struct in6_addr in6mask96 = IN6MASK96; const struct in6_addr in6mask128 = IN6MASK128; const struct sockaddr_in6 sa6_any = { sizeof(sa6_any), AF_INET6, 0, 0, IN6ADDR_ANY_INIT, 0 }; static int in6_lifaddr_ioctl __P((struct socket *, u_long, caddr_t, struct ifnet *, struct thread *)); static int in6_ifinit __P((struct ifnet *, struct in6_ifaddr *, struct sockaddr_in6 *, int)); static void in6_unlink_ifa(struct in6_ifaddr *, struct ifnet *); struct in6_multihead in6_multihead; /* XXX BSS initialization */ int (*faithprefix_p)(struct in6_addr *); /* * Subroutine for in6_ifaddloop() and in6_ifremloop(). * This routine does actual work. */ static void in6_ifloop_request(int cmd, struct ifaddr *ifa) { struct sockaddr_in6 all1_sa; struct rtentry *nrt = NULL; int e; char ip6buf[INET6_ADDRSTRLEN]; bzero(&all1_sa, sizeof(all1_sa)); all1_sa.sin6_family = AF_INET6; all1_sa.sin6_len = sizeof(struct sockaddr_in6); all1_sa.sin6_addr = in6mask128; /* * We specify the address itself as the gateway, and set the * RTF_LLINFO flag, so that the corresponding host route would have * the flag, and thus applications that assume traditional behavior * would be happy. Note that we assume the caller of the function * (probably implicitly) set nd6_rtrequest() to ifa->ifa_rtrequest, * which changes the outgoing interface to the loopback interface. */ e = rtrequest(cmd, ifa->ifa_addr, ifa->ifa_addr, (struct sockaddr *)&all1_sa, RTF_UP|RTF_HOST|RTF_LLINFO, &nrt); if (e != 0) { /* XXX need more descriptive message */ log(LOG_ERR, "in6_ifloop_request: " "%s operation failed for %s (errno=%d)\n", cmd == RTM_ADD ? "ADD" : "DELETE", ip6_sprintf(ip6buf, &((struct in6_ifaddr *)ifa)->ia_addr.sin6_addr), e); } /* * Report the addition/removal of the address to the routing socket. * XXX: since we called rtinit for a p2p interface with a destination, * we end up reporting twice in such a case. Should we rather * omit the second report? */ if (nrt) { RT_LOCK(nrt); /* * Make sure rt_ifa be equal to IFA, the second argument of * the function. We need this because when we refer to * rt_ifa->ia6_flags in ip6_input, we assume that the rt_ifa * points to the address instead of the loopback address. */ if (cmd == RTM_ADD && ifa != nrt->rt_ifa) { IFAFREE(nrt->rt_ifa); IFAREF(ifa); nrt->rt_ifa = ifa; } rt_newaddrmsg(cmd, ifa, e, nrt); if (cmd == RTM_DELETE) RTFREE_LOCKED(nrt); else { /* the cmd must be RTM_ADD here */ RT_REMREF(nrt); RT_UNLOCK(nrt); } } } /* * Add ownaddr as loopback rtentry. We previously add the route only if * necessary (ex. on a p2p link). However, since we now manage addresses * separately from prefixes, we should always add the route. We can't * rely on the cloning mechanism from the corresponding interface route * any more. */ void in6_ifaddloop(struct ifaddr *ifa) { struct rtentry *rt; int need_loop; /* If there is no loopback entry, allocate one. */ rt = rtalloc1(ifa->ifa_addr, 0, 0); need_loop = (rt == NULL || (rt->rt_flags & RTF_HOST) == 0 || (rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0); if (rt) RTFREE_LOCKED(rt); if (need_loop) in6_ifloop_request(RTM_ADD, ifa); } /* * Remove loopback rtentry of ownaddr generated by in6_ifaddloop(), * if it exists. */ void in6_ifremloop(struct ifaddr *ifa) { struct in6_ifaddr *ia; struct rtentry *rt; int ia_count = 0; /* * Some of BSD variants do not remove cloned routes * from an interface direct route, when removing the direct route * (see comments in net/net_osdep.h). Even for variants that do remove * cloned routes, they could fail to remove the cloned routes when * we handle multple addresses that share a common prefix. * So, we should remove the route corresponding to the deleted address. */ /* * Delete the entry only if exact one ifa exists. More than one ifa * can exist if we assign a same single address to multiple * (probably p2p) interfaces. * XXX: we should avoid such a configuration in IPv6... */ for (ia = in6_ifaddr; ia; ia = ia->ia_next) { if (IN6_ARE_ADDR_EQUAL(IFA_IN6(ifa), &ia->ia_addr.sin6_addr)) { ia_count++; if (ia_count > 1) break; } } if (ia_count == 1) { /* * Before deleting, check if a corresponding loopbacked host * route surely exists. With this check, we can avoid to * delete an interface direct route whose destination is same * as the address being removed. This can happen when removing * a subnet-router anycast address on an interface attahced * to a shared medium. */ rt = rtalloc1(ifa->ifa_addr, 0, 0); if (rt != NULL) { if ((rt->rt_flags & RTF_HOST) != 0 && (rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { RTFREE_LOCKED(rt); in6_ifloop_request(RTM_DELETE, ifa); } else RT_UNLOCK(rt); } } } int in6_mask2len(struct in6_addr *mask, u_char *lim0) { int x = 0, y; u_char *lim = lim0, *p; /* ignore the scope_id part */ if (lim0 == NULL || lim0 - (u_char *)mask > sizeof(*mask)) lim = (u_char *)mask + sizeof(*mask); for (p = (u_char *)mask; p < lim; x++, p++) { if (*p != 0xff) break; } y = 0; if (p < lim) { for (y = 0; y < 8; y++) { if ((*p & (0x80 >> y)) == 0) break; } } /* * when the limit pointer is given, do a stricter check on the * remaining bits. */ if (p < lim) { if (y != 0 && (*p & (0x00ff >> y)) != 0) return (-1); for (p = p + 1; p < lim; p++) if (*p != 0) return (-1); } return x * 8 + y; } #define ifa2ia6(ifa) ((struct in6_ifaddr *)(ifa)) #define ia62ifa(ia6) (&((ia6)->ia_ifa)) int in6_control(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { struct in6_ifreq *ifr = (struct in6_ifreq *)data; struct in6_ifaddr *ia = NULL; struct in6_aliasreq *ifra = (struct in6_aliasreq *)data; struct sockaddr_in6 *sa6; int error; switch (cmd) { case SIOCGETSGCNT_IN6: case SIOCGETMIFCNT_IN6: return (mrt6_ioctl ? mrt6_ioctl(cmd, data) : EOPNOTSUPP); } switch(cmd) { case SIOCAADDRCTL_POLICY: case SIOCDADDRCTL_POLICY: if (td != NULL) { error = priv_check(td, PRIV_NETINET_ADDRCTRL6); if (error) return (error); } return (in6_src_ioctl(cmd, data)); } if (ifp == NULL) return (EOPNOTSUPP); switch (cmd) { case SIOCSNDFLUSH_IN6: case SIOCSPFXFLUSH_IN6: case SIOCSRTRFLUSH_IN6: case SIOCSDEFIFACE_IN6: case SIOCSIFINFO_FLAGS: if (td != NULL) { error = priv_check(td, PRIV_NETINET_ND6); if (error) return (error); } /* FALLTHROUGH */ case OSIOCGIFINFO_IN6: case SIOCGIFINFO_IN6: case SIOCSIFINFO_IN6: case SIOCGDRLST_IN6: case SIOCGPRLST_IN6: case SIOCGNBRINFO_IN6: case SIOCGDEFIFACE_IN6: return (nd6_ioctl(cmd, data, ifp)); } switch (cmd) { case SIOCSIFPREFIX_IN6: case SIOCDIFPREFIX_IN6: case SIOCAIFPREFIX_IN6: case SIOCCIFPREFIX_IN6: case SIOCSGIFPREFIX_IN6: case SIOCGIFPREFIX_IN6: log(LOG_NOTICE, "prefix ioctls are now invalidated. " "please use ifconfig.\n"); return (EOPNOTSUPP); } switch (cmd) { case SIOCSSCOPE6: if (td != NULL) { error = priv_check(td, PRIV_NETINET_SCOPE6); if (error) return (error); } return (scope6_set(ifp, (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id)); case SIOCGSCOPE6: return (scope6_get(ifp, (struct scope6_id *)ifr->ifr_ifru.ifru_scope_id)); case SIOCGSCOPE6DEF: return (scope6_get_default((struct scope6_id *) ifr->ifr_ifru.ifru_scope_id)); } switch (cmd) { case SIOCALIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_ADDIFADDR); if (error) return (error); } return in6_lifaddr_ioctl(so, cmd, data, ifp, td); case SIOCDLIFADDR: if (td != NULL) { error = priv_check(td, PRIV_NET_DELIFADDR); if (error) return (error); } /* FALLTHROUGH */ case SIOCGLIFADDR: return in6_lifaddr_ioctl(so, cmd, data, ifp, td); } /* * Find address for this interface, if it exists. * * In netinet code, we have checked ifra_addr in SIOCSIF*ADDR operation * only, and used the first interface address as the target of other * operations (without checking ifra_addr). This was because netinet * code/API assumed at most 1 interface address per interface. * Since IPv6 allows a node to assign multiple addresses * on a single interface, we almost always look and check the * presence of ifra_addr, and reject invalid ones here. * It also decreases duplicated code among SIOC*_IN6 operations. */ switch (cmd) { case SIOCAIFADDR_IN6: case SIOCSIFPHYADDR_IN6: sa6 = &ifra->ifra_addr; break; case SIOCSIFADDR_IN6: case SIOCGIFADDR_IN6: case SIOCSIFDSTADDR_IN6: case SIOCSIFNETMASK_IN6: case SIOCGIFDSTADDR_IN6: case SIOCGIFNETMASK_IN6: case SIOCDIFADDR_IN6: case SIOCGIFPSRCADDR_IN6: case SIOCGIFPDSTADDR_IN6: case SIOCGIFAFLAG_IN6: case SIOCSNDFLUSH_IN6: case SIOCSPFXFLUSH_IN6: case SIOCSRTRFLUSH_IN6: case SIOCGIFALIFETIME_IN6: case SIOCSIFALIFETIME_IN6: case SIOCGIFSTAT_IN6: case SIOCGIFSTAT_ICMP6: sa6 = &ifr->ifr_addr; break; default: sa6 = NULL; break; } if (sa6 && sa6->sin6_family == AF_INET6) { int error = 0; if (sa6->sin6_scope_id != 0) error = sa6_embedscope(sa6, 0); else error = in6_setscope(&sa6->sin6_addr, ifp, NULL); if (error != 0) return (error); ia = in6ifa_ifpwithaddr(ifp, &sa6->sin6_addr); } else ia = NULL; switch (cmd) { case SIOCSIFADDR_IN6: case SIOCSIFDSTADDR_IN6: case SIOCSIFNETMASK_IN6: /* * Since IPv6 allows a node to assign multiple addresses * on a single interface, SIOCSIFxxx ioctls are deprecated. */ /* we decided to obsolete this command (20000704) */ return (EINVAL); case SIOCDIFADDR_IN6: /* * for IPv4, we look for existing in_ifaddr here to allow * "ifconfig if0 delete" to remove the first IPv4 address on * the interface. For IPv6, as the spec allows multiple * interface address from the day one, we consider "remove the * first one" semantics to be not preferable. */ if (ia == NULL) return (EADDRNOTAVAIL); /* FALLTHROUGH */ case SIOCAIFADDR_IN6: /* * We always require users to specify a valid IPv6 address for * the corresponding operation. */ if (ifra->ifra_addr.sin6_family != AF_INET6 || ifra->ifra_addr.sin6_len != sizeof(struct sockaddr_in6)) return (EAFNOSUPPORT); if (td != NULL) { error = priv_check(td, (cmd == SIOCDIFADDR_IN6) ? PRIV_NET_DELIFADDR : PRIV_NET_ADDIFADDR); if (error) return (error); } break; case SIOCGIFADDR_IN6: /* This interface is basically deprecated. use SIOCGIFCONF. */ /* FALLTHROUGH */ case SIOCGIFAFLAG_IN6: case SIOCGIFNETMASK_IN6: case SIOCGIFDSTADDR_IN6: case SIOCGIFALIFETIME_IN6: /* must think again about its semantics */ if (ia == NULL) return (EADDRNOTAVAIL); break; case SIOCSIFALIFETIME_IN6: { struct in6_addrlifetime *lt; if (td != NULL) { error = priv_check(td, PRIV_NETINET_ALIFETIME6); if (error) return (error); } if (ia == NULL) return (EADDRNOTAVAIL); /* sanity for overflow - beware unsigned */ lt = &ifr->ifr_ifru.ifru_lifetime; if (lt->ia6t_vltime != ND6_INFINITE_LIFETIME && lt->ia6t_vltime + time_second < time_second) { return EINVAL; } if (lt->ia6t_pltime != ND6_INFINITE_LIFETIME && lt->ia6t_pltime + time_second < time_second) { return EINVAL; } break; } } switch (cmd) { case SIOCGIFADDR_IN6: ifr->ifr_addr = ia->ia_addr; if ((error = sa6_recoverscope(&ifr->ifr_addr)) != 0) return (error); break; case SIOCGIFDSTADDR_IN6: if ((ifp->if_flags & IFF_POINTOPOINT) == 0) return (EINVAL); /* * XXX: should we check if ifa_dstaddr is NULL and return * an error? */ ifr->ifr_dstaddr = ia->ia_dstaddr; if ((error = sa6_recoverscope(&ifr->ifr_dstaddr)) != 0) return (error); break; case SIOCGIFNETMASK_IN6: ifr->ifr_addr = ia->ia_prefixmask; break; case SIOCGIFAFLAG_IN6: ifr->ifr_ifru.ifru_flags6 = ia->ia6_flags; break; case SIOCGIFSTAT_IN6: if (ifp == NULL) return EINVAL; bzero(&ifr->ifr_ifru.ifru_stat, sizeof(ifr->ifr_ifru.ifru_stat)); ifr->ifr_ifru.ifru_stat = *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->in6_ifstat; break; case SIOCGIFSTAT_ICMP6: if (ifp == NULL) return EINVAL; bzero(&ifr->ifr_ifru.ifru_icmp6stat, sizeof(ifr->ifr_ifru.ifru_icmp6stat)); ifr->ifr_ifru.ifru_icmp6stat = *((struct in6_ifextra *)ifp->if_afdata[AF_INET6])->icmp6_ifstat; break; case SIOCGIFALIFETIME_IN6: ifr->ifr_ifru.ifru_lifetime = ia->ia6_lifetime; if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { time_t maxexpire; struct in6_addrlifetime *retlt = &ifr->ifr_ifru.ifru_lifetime; /* * XXX: adjust expiration time assuming time_t is * signed. */ maxexpire = (-1) & ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (ia->ia6_lifetime.ia6t_vltime < maxexpire - ia->ia6_updatetime) { retlt->ia6t_expire = ia->ia6_updatetime + ia->ia6_lifetime.ia6t_vltime; } else retlt->ia6t_expire = maxexpire; } if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { time_t maxexpire; struct in6_addrlifetime *retlt = &ifr->ifr_ifru.ifru_lifetime; /* * XXX: adjust expiration time assuming time_t is * signed. */ maxexpire = (-1) & ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); if (ia->ia6_lifetime.ia6t_pltime < maxexpire - ia->ia6_updatetime) { retlt->ia6t_preferred = ia->ia6_updatetime + ia->ia6_lifetime.ia6t_pltime; } else retlt->ia6t_preferred = maxexpire; } break; case SIOCSIFALIFETIME_IN6: ia->ia6_lifetime = ifr->ifr_ifru.ifru_lifetime; /* for sanity */ if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = time_second + ia->ia6_lifetime.ia6t_vltime; } else ia->ia6_lifetime.ia6t_expire = 0; if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_preferred = time_second + ia->ia6_lifetime.ia6t_pltime; } else ia->ia6_lifetime.ia6t_preferred = 0; break; case SIOCAIFADDR_IN6: { int i, error = 0; struct nd_prefixctl pr0; struct nd_prefix *pr; /* * first, make or update the interface address structure, * and link it to the list. */ if ((error = in6_update_ifa(ifp, ifra, ia, 0)) != 0) return (error); if ((ia = in6ifa_ifpwithaddr(ifp, &ifra->ifra_addr.sin6_addr)) == NULL) { /* * this can happen when the user specify the 0 valid * lifetime. */ break; } /* * then, make the prefix on-link on the interface. * XXX: we'd rather create the prefix before the address, but * we need at least one address to install the corresponding * interface route, so we configure the address first. */ /* * convert mask to prefix length (prefixmask has already * been validated in in6_update_ifa(). */ bzero(&pr0, sizeof(pr0)); pr0.ndpr_ifp = ifp; pr0.ndpr_plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, NULL); if (pr0.ndpr_plen == 128) { break; /* we don't need to install a host route. */ } pr0.ndpr_prefix = ifra->ifra_addr; /* apply the mask for safety. */ for (i = 0; i < 4; i++) { pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= ifra->ifra_prefixmask.sin6_addr.s6_addr32[i]; } /* * XXX: since we don't have an API to set prefix (not address) * lifetimes, we just use the same lifetimes as addresses. * The (temporarily) installed lifetimes can be overridden by * later advertised RAs (when accept_rtadv is non 0), which is * an intended behavior. */ pr0.ndpr_raf_onlink = 1; /* should be configurable? */ pr0.ndpr_raf_auto = ((ifra->ifra_flags & IN6_IFF_AUTOCONF) != 0); pr0.ndpr_vltime = ifra->ifra_lifetime.ia6t_vltime; pr0.ndpr_pltime = ifra->ifra_lifetime.ia6t_pltime; /* add the prefix if not yet. */ if ((pr = nd6_prefix_lookup(&pr0)) == NULL) { /* * nd6_prelist_add will install the corresponding * interface route. */ if ((error = nd6_prelist_add(&pr0, NULL, &pr)) != 0) return (error); if (pr == NULL) { log(LOG_ERR, "nd6_prelist_add succeeded but " "no prefix\n"); return (EINVAL); /* XXX panic here? */ } } /* relate the address to the prefix */ if (ia->ia6_ndpr == NULL) { ia->ia6_ndpr = pr; pr->ndpr_refcnt++; /* * If this is the first autoconf address from the * prefix, create a temporary address as well * (when required). */ if ((ia->ia6_flags & IN6_IFF_AUTOCONF) && ip6_use_tempaddr && pr->ndpr_refcnt == 1) { int e; if ((e = in6_tmpifadd(ia, 1, 0)) != 0) { log(LOG_NOTICE, "in6_control: failed " "to create a temporary address, " "errno=%d\n", e); } } } /* * this might affect the status of autoconfigured addresses, * that is, this address might make other addresses detached. */ pfxlist_onlink_check(); if (error == 0 && ia) EVENTHANDLER_INVOKE(ifaddr_event, ifp); break; } case SIOCDIFADDR_IN6: { struct nd_prefix *pr; /* * If the address being deleted is the only one that owns * the corresponding prefix, expire the prefix as well. * XXX: theoretically, we don't have to worry about such * relationship, since we separate the address management * and the prefix management. We do this, however, to provide * as much backward compatibility as possible in terms of * the ioctl operation. * Note that in6_purgeaddr() will decrement ndpr_refcnt. */ pr = ia->ia6_ndpr; in6_purgeaddr(&ia->ia_ifa); if (pr && pr->ndpr_refcnt == 0) prelist_remove(pr); EVENTHANDLER_INVOKE(ifaddr_event, ifp); break; } default: if (ifp == NULL || ifp->if_ioctl == 0) return (EOPNOTSUPP); return ((*ifp->if_ioctl)(ifp, cmd, data)); } return (0); } /* * Update parameters of an IPv6 interface address. * If necessary, a new entry is created and linked into address chains. * This function is separated from in6_control(). * XXX: should this be performed under splnet()? */ int in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra, struct in6_ifaddr *ia, int flags) { int error = 0, hostIsNew = 0, plen = -1; struct in6_ifaddr *oia; struct sockaddr_in6 dst6; struct in6_addrlifetime *lt; struct in6_multi_mship *imm; struct in6_multi *in6m_sol; struct rtentry *rt; int delay; char ip6buf[INET6_ADDRSTRLEN]; /* Validate parameters */ if (ifp == NULL || ifra == NULL) /* this maybe redundant */ return (EINVAL); /* * The destination address for a p2p link must have a family * of AF_UNSPEC or AF_INET6. */ if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && ifra->ifra_dstaddr.sin6_family != AF_INET6 && ifra->ifra_dstaddr.sin6_family != AF_UNSPEC) return (EAFNOSUPPORT); /* * validate ifra_prefixmask. don't check sin6_family, netmask * does not carry fields other than sin6_len. */ if (ifra->ifra_prefixmask.sin6_len > sizeof(struct sockaddr_in6)) return (EINVAL); /* * Because the IPv6 address architecture is classless, we require * users to specify a (non 0) prefix length (mask) for a new address. * We also require the prefix (when specified) mask is valid, and thus * reject a non-consecutive mask. */ if (ia == NULL && ifra->ifra_prefixmask.sin6_len == 0) return (EINVAL); if (ifra->ifra_prefixmask.sin6_len != 0) { plen = in6_mask2len(&ifra->ifra_prefixmask.sin6_addr, (u_char *)&ifra->ifra_prefixmask + ifra->ifra_prefixmask.sin6_len); if (plen <= 0) return (EINVAL); } else { /* * In this case, ia must not be NULL. We just use its prefix * length. */ plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); } /* * If the destination address on a p2p interface is specified, * and the address is a scoped one, validate/set the scope * zone identifier. */ dst6 = ifra->ifra_dstaddr; if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) != 0 && (dst6.sin6_family == AF_INET6)) { struct in6_addr in6_tmp; u_int32_t zoneid; in6_tmp = dst6.sin6_addr; if (in6_setscope(&in6_tmp, ifp, &zoneid)) return (EINVAL); /* XXX: should be impossible */ if (dst6.sin6_scope_id != 0) { if (dst6.sin6_scope_id != zoneid) return (EINVAL); } else /* user omit to specify the ID. */ dst6.sin6_scope_id = zoneid; /* convert into the internal form */ if (sa6_embedscope(&dst6, 0)) return (EINVAL); /* XXX: should be impossible */ } /* * The destination address can be specified only for a p2p or a * loopback interface. If specified, the corresponding prefix length * must be 128. */ if (ifra->ifra_dstaddr.sin6_family == AF_INET6) { if ((ifp->if_flags & (IFF_POINTOPOINT|IFF_LOOPBACK)) == 0) { /* XXX: noisy message */ nd6log((LOG_INFO, "in6_update_ifa: a destination can " "be specified for a p2p or a loopback IF only\n")); return (EINVAL); } if (plen != 128) { nd6log((LOG_INFO, "in6_update_ifa: prefixlen should " "be 128 when dstaddr is specified\n")); return (EINVAL); } } /* lifetime consistency check */ lt = &ifra->ifra_lifetime; if (lt->ia6t_pltime > lt->ia6t_vltime) return (EINVAL); if (lt->ia6t_vltime == 0) { /* * the following log might be noisy, but this is a typical * configuration mistake or a tool's bug. */ nd6log((LOG_INFO, "in6_update_ifa: valid lifetime is 0 for %s\n", ip6_sprintf(ip6buf, &ifra->ifra_addr.sin6_addr))); if (ia == NULL) return (0); /* there's nothing to do */ } /* * If this is a new address, allocate a new ifaddr and link it * into chains. */ if (ia == NULL) { hostIsNew = 1; /* * When in6_update_ifa() is called in a process of a received * RA, it is called under an interrupt context. So, we should * call malloc with M_NOWAIT. */ ia = (struct in6_ifaddr *) malloc(sizeof(*ia), M_IFADDR, M_NOWAIT); if (ia == NULL) return (ENOBUFS); bzero((caddr_t)ia, sizeof(*ia)); LIST_INIT(&ia->ia6_memberships); /* Initialize the address and masks, and put time stamp */ IFA_LOCK_INIT(&ia->ia_ifa); ia->ia_ifa.ifa_addr = (struct sockaddr *)&ia->ia_addr; ia->ia_addr.sin6_family = AF_INET6; ia->ia_addr.sin6_len = sizeof(ia->ia_addr); ia->ia6_createtime = time_second; if ((ifp->if_flags & (IFF_POINTOPOINT | IFF_LOOPBACK)) != 0) { /* * XXX: some functions expect that ifa_dstaddr is not * NULL for p2p interfaces. */ ia->ia_ifa.ifa_dstaddr = (struct sockaddr *)&ia->ia_dstaddr; } else { ia->ia_ifa.ifa_dstaddr = NULL; } ia->ia_ifa.ifa_netmask = (struct sockaddr *)&ia->ia_prefixmask; ia->ia_ifp = ifp; if ((oia = in6_ifaddr) != NULL) { for ( ; oia->ia_next; oia = oia->ia_next) continue; oia->ia_next = ia; } else in6_ifaddr = ia; ia->ia_ifa.ifa_refcnt = 1; TAILQ_INSERT_TAIL(&ifp->if_addrlist, &ia->ia_ifa, ifa_list); } /* update timestamp */ ia->ia6_updatetime = time_second; /* set prefix mask */ if (ifra->ifra_prefixmask.sin6_len) { /* * We prohibit changing the prefix length of an existing * address, because * + such an operation should be rare in IPv6, and * + the operation would confuse prefix management. */ if (ia->ia_prefixmask.sin6_len && in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL) != plen) { nd6log((LOG_INFO, "in6_update_ifa: the prefix length of an" " existing (%s) address should not be changed\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); error = EINVAL; goto unlink; } ia->ia_prefixmask = ifra->ifra_prefixmask; } /* * If a new destination address is specified, scrub the old one and * install the new destination. Note that the interface must be * p2p or loopback (see the check above.) */ if (dst6.sin6_family == AF_INET6 && !IN6_ARE_ADDR_EQUAL(&dst6.sin6_addr, &ia->ia_dstaddr.sin6_addr)) { int e; if ((ia->ia_flags & IFA_ROUTE) != 0 && (e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) { nd6log((LOG_ERR, "in6_update_ifa: failed to remove " "a route to the old destination: %s\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr))); /* proceed anyway... */ } else ia->ia_flags &= ~IFA_ROUTE; ia->ia_dstaddr = dst6; } /* * Set lifetimes. We do not refer to ia6t_expire and ia6t_preferred * to see if the address is deprecated or invalidated, but initialize * these members for applications. */ ia->ia6_lifetime = ifra->ifra_lifetime; if (ia->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_expire = time_second + ia->ia6_lifetime.ia6t_vltime; } else ia->ia6_lifetime.ia6t_expire = 0; if (ia->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) { ia->ia6_lifetime.ia6t_preferred = time_second + ia->ia6_lifetime.ia6t_pltime; } else ia->ia6_lifetime.ia6t_preferred = 0; /* reset the interface and routing table appropriately. */ if ((error = in6_ifinit(ifp, ia, &ifra->ifra_addr, hostIsNew)) != 0) goto unlink; /* * configure address flags. */ ia->ia6_flags = ifra->ifra_flags; /* * backward compatibility - if IN6_IFF_DEPRECATED is set from the * userland, make it deprecated. */ if ((ifra->ifra_flags & IN6_IFF_DEPRECATED) != 0) { ia->ia6_lifetime.ia6t_pltime = 0; ia->ia6_lifetime.ia6t_preferred = time_second; } /* * Make the address tentative before joining multicast addresses, * so that corresponding MLD responses would not have a tentative * source address. */ ia->ia6_flags &= ~IN6_IFF_DUPLICATED; /* safety */ if (hostIsNew && in6if_do_dad(ifp)) ia->ia6_flags |= IN6_IFF_TENTATIVE; /* * We are done if we have simply modified an existing address. */ if (!hostIsNew) return (error); /* * Beyond this point, we should call in6_purgeaddr upon an error, * not just go to unlink. */ /* Join necessary multicast groups */ in6m_sol = NULL; if ((ifp->if_flags & IFF_MULTICAST) != 0) { struct sockaddr_in6 mltaddr, mltmask; struct in6_addr llsol; /* join solicited multicast addr for new host id */ bzero(&llsol, sizeof(struct in6_addr)); llsol.s6_addr32[0] = IPV6_ADDR_INT32_MLL; llsol.s6_addr32[1] = 0; llsol.s6_addr32[2] = htonl(1); llsol.s6_addr32[3] = ifra->ifra_addr.sin6_addr.s6_addr32[3]; llsol.s6_addr8[12] = 0xff; if ((error = in6_setscope(&llsol, ifp, NULL)) != 0) { /* XXX: should not happen */ log(LOG_ERR, "in6_update_ifa: " "in6_setscope failed\n"); goto cleanup; } delay = 0; if ((flags & IN6_IFAUPDATE_DADDELAY)) { /* * We need a random delay for DAD on the address * being configured. It also means delaying * transmission of the corresponding MLD report to * avoid report collision. * [draft-ietf-ipv6-rfc2462bis-02.txt] */ delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); } imm = in6_joingroup(ifp, &llsol, &error, delay); if (imm == NULL) { nd6log((LOG_WARNING, "in6_update_ifa: addmulti failed for " "%s on %s (errno=%d)\n", ip6_sprintf(ip6buf, &llsol), if_name(ifp), error)); in6_purgeaddr((struct ifaddr *)ia); return (error); } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); in6m_sol = imm->i6mm_maddr; bzero(&mltmask, sizeof(mltmask)); mltmask.sin6_len = sizeof(struct sockaddr_in6); mltmask.sin6_family = AF_INET6; mltmask.sin6_addr = in6mask32; #define MLTMASK_LEN 4 /* mltmask's masklen (=32bit=4octet) */ /* * join link-local all-nodes address */ bzero(&mltaddr, sizeof(mltaddr)); mltaddr.sin6_len = sizeof(struct sockaddr_in6); mltaddr.sin6_family = AF_INET6; mltaddr.sin6_addr = in6addr_linklocal_allnodes; if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) goto cleanup; /* XXX: should not fail */ /* * XXX: do we really need this automatic routes? * We should probably reconsider this stuff. Most applications * actually do not need the routes, since they usually specify * the outgoing interface. */ rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); if (rt) { /* XXX: only works in !SCOPEDROUTING case. */ if (memcmp(&mltaddr.sin6_addr, &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, MLTMASK_LEN)) { RTFREE_LOCKED(rt); rt = NULL; } } if (!rt) { /* XXX: we need RTF_CLONING to fake nd6_rtrequest */ error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING, (struct rtentry **)0); if (error) goto cleanup; } else { RTFREE_LOCKED(rt); } imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); if (!imm) { nd6log((LOG_WARNING, "in6_update_ifa: addmulti failed for " "%s on %s (errno=%d)\n", ip6_sprintf(ip6buf, &mltaddr.sin6_addr), if_name(ifp), error)); goto cleanup; } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); /* * join node information group address */ #define hostnamelen strlen(hostname) delay = 0; if ((flags & IN6_IFAUPDATE_DADDELAY)) { /* * The spec doesn't say anything about delay for this * group, but the same logic should apply. */ delay = arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz); } + mtx_lock(&hostname_mtx); if (in6_nigroup(ifp, hostname, hostnamelen, &mltaddr.sin6_addr) == 0) { + mtx_unlock(&hostname_mtx); imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, delay); /* XXX jinmei */ if (!imm) { nd6log((LOG_WARNING, "in6_update_ifa: " "addmulti failed for %s on %s " "(errno=%d)\n", ip6_sprintf(ip6buf, &mltaddr.sin6_addr), if_name(ifp), error)); /* XXX not very fatal, go on... */ } else { LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); } - } + } else + mtx_unlock(&hostname_mtx); #undef hostnamelen /* * join interface-local all-nodes address. * (ff01::1%ifN, and ff01::%ifN/32) */ mltaddr.sin6_addr = in6addr_nodelocal_allnodes; if ((error = in6_setscope(&mltaddr.sin6_addr, ifp, NULL)) != 0) goto cleanup; /* XXX: should not fail */ /* XXX: again, do we really need the route? */ rt = rtalloc1((struct sockaddr *)&mltaddr, 0, 0UL); if (rt) { if (memcmp(&mltaddr.sin6_addr, &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr, MLTMASK_LEN)) { RTFREE_LOCKED(rt); rt = NULL; } } if (!rt) { error = rtrequest(RTM_ADD, (struct sockaddr *)&mltaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&mltmask, RTF_UP | RTF_CLONING, (struct rtentry **)0); if (error) goto cleanup; } else RTFREE_LOCKED(rt); imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error, 0); if (!imm) { nd6log((LOG_WARNING, "in6_update_ifa: " "addmulti failed for %s on %s " "(errno=%d)\n", ip6_sprintf(ip6buf, &mltaddr.sin6_addr), if_name(ifp), error)); goto cleanup; } LIST_INSERT_HEAD(&ia->ia6_memberships, imm, i6mm_chain); #undef MLTMASK_LEN } /* * Perform DAD, if needed. * XXX It may be of use, if we can administratively * disable DAD. */ if (hostIsNew && in6if_do_dad(ifp) && ((ifra->ifra_flags & IN6_IFF_NODAD) == 0) && (ia->ia6_flags & IN6_IFF_TENTATIVE)) { int mindelay, maxdelay; delay = 0; if ((flags & IN6_IFAUPDATE_DADDELAY)) { /* * We need to impose a delay before sending an NS * for DAD. Check if we also needed a delay for the * corresponding MLD message. If we did, the delay * should be larger than the MLD delay (this could be * relaxed a bit, but this simple logic is at least * safe). */ mindelay = 0; if (in6m_sol != NULL && in6m_sol->in6m_state == MLD_REPORTPENDING) { mindelay = in6m_sol->in6m_timer; } maxdelay = MAX_RTR_SOLICITATION_DELAY * hz; if (maxdelay - mindelay == 0) delay = 0; else { delay = (arc4random() % (maxdelay - mindelay)) + mindelay; } } nd6_dad_start((struct ifaddr *)ia, delay); } return (error); unlink: /* * XXX: if a change of an existing address failed, keep the entry * anyway. */ if (hostIsNew) in6_unlink_ifa(ia, ifp); return (error); cleanup: in6_purgeaddr(&ia->ia_ifa); return error; } void in6_purgeaddr(struct ifaddr *ifa) { struct ifnet *ifp = ifa->ifa_ifp; struct in6_ifaddr *ia = (struct in6_ifaddr *) ifa; char ip6buf[INET6_ADDRSTRLEN]; struct in6_multi_mship *imm; /* stop DAD processing */ nd6_dad_stop(ifa); /* * delete route to the destination of the address being purged. * The interface must be p2p or loopback in this case. */ if ((ia->ia_flags & IFA_ROUTE) != 0 && ia->ia_dstaddr.sin6_len != 0) { int e; if ((e = rtinit(&(ia->ia_ifa), (int)RTM_DELETE, RTF_HOST)) != 0) { log(LOG_ERR, "in6_purgeaddr: failed to remove " "a route to the p2p destination: %s on %s, " "errno=%d\n", ip6_sprintf(ip6buf, &ia->ia_addr.sin6_addr), if_name(ifp), e); /* proceed anyway... */ } else ia->ia_flags &= ~IFA_ROUTE; } /* Remove ownaddr's loopback rtentry, if it exists. */ in6_ifremloop(&(ia->ia_ifa)); /* * leave from multicast groups we have joined for the interface */ while ((imm = ia->ia6_memberships.lh_first) != NULL) { LIST_REMOVE(imm, i6mm_chain); in6_leavegroup(imm); } in6_unlink_ifa(ia, ifp); } static void in6_unlink_ifa(struct in6_ifaddr *ia, struct ifnet *ifp) { struct in6_ifaddr *oia; int s = splnet(); TAILQ_REMOVE(&ifp->if_addrlist, &ia->ia_ifa, ifa_list); oia = ia; if (oia == (ia = in6_ifaddr)) in6_ifaddr = ia->ia_next; else { while (ia->ia_next && (ia->ia_next != oia)) ia = ia->ia_next; if (ia->ia_next) ia->ia_next = oia->ia_next; else { /* search failed */ printf("Couldn't unlink in6_ifaddr from in6_ifaddr\n"); } } /* * Release the reference to the base prefix. There should be a * positive reference. */ if (oia->ia6_ndpr == NULL) { nd6log((LOG_NOTICE, "in6_unlink_ifa: autoconf'ed address " "%p has no prefix\n", oia)); } else { oia->ia6_ndpr->ndpr_refcnt--; oia->ia6_ndpr = NULL; } /* * Also, if the address being removed is autoconf'ed, call * pfxlist_onlink_check() since the release might affect the status of * other (detached) addresses. */ if ((oia->ia6_flags & IN6_IFF_AUTOCONF)) { pfxlist_onlink_check(); } /* * release another refcnt for the link from in6_ifaddr. * Note that we should decrement the refcnt at least once for all *BSD. */ IFAFREE(&oia->ia_ifa); splx(s); } void in6_purgeif(struct ifnet *ifp) { struct ifaddr *ifa, *nifa; for (ifa = TAILQ_FIRST(&ifp->if_addrlist); ifa != NULL; ifa = nifa) { nifa = TAILQ_NEXT(ifa, ifa_list); if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6_purgeaddr(ifa); } in6_ifdetach(ifp); } /* * SIOC[GAD]LIFADDR. * SIOCGLIFADDR: get first address. (?) * SIOCGLIFADDR with IFLR_PREFIX: * get first address that matches the specified prefix. * SIOCALIFADDR: add the specified address. * SIOCALIFADDR with IFLR_PREFIX: * add the specified prefix, filling hostid part from * the first link-local address. prefixlen must be <= 64. * SIOCDLIFADDR: delete the specified address. * SIOCDLIFADDR with IFLR_PREFIX: * delete the first address that matches the specified prefix. * return values: * EINVAL on invalid parameters * EADDRNOTAVAIL on prefix match failed/specified address not found * other values may be returned from in6_ioctl() * * NOTE: SIOCALIFADDR(with IFLR_PREFIX set) allows prefixlen less than 64. * this is to accomodate address naming scheme other than RFC2374, * in the future. * RFC2373 defines interface id to be 64bit, but it allows non-RFC2374 * address encoding scheme. (see figure on page 8) */ static int in6_lifaddr_ioctl(struct socket *so, u_long cmd, caddr_t data, struct ifnet *ifp, struct thread *td) { struct if_laddrreq *iflr = (struct if_laddrreq *)data; struct ifaddr *ifa; struct sockaddr *sa; /* sanity checks */ if (!data || !ifp) { panic("invalid argument to in6_lifaddr_ioctl"); /* NOTREACHED */ } switch (cmd) { case SIOCGLIFADDR: /* address must be specified on GET with IFLR_PREFIX */ if ((iflr->flags & IFLR_PREFIX) == 0) break; /* FALLTHROUGH */ case SIOCALIFADDR: case SIOCDLIFADDR: /* address must be specified on ADD and DELETE */ sa = (struct sockaddr *)&iflr->addr; if (sa->sa_family != AF_INET6) return EINVAL; if (sa->sa_len != sizeof(struct sockaddr_in6)) return EINVAL; /* XXX need improvement */ sa = (struct sockaddr *)&iflr->dstaddr; if (sa->sa_family && sa->sa_family != AF_INET6) return EINVAL; if (sa->sa_len && sa->sa_len != sizeof(struct sockaddr_in6)) return EINVAL; break; default: /* shouldn't happen */ #if 0 panic("invalid cmd to in6_lifaddr_ioctl"); /* NOTREACHED */ #else return EOPNOTSUPP; #endif } if (sizeof(struct in6_addr) * 8 < iflr->prefixlen) return EINVAL; switch (cmd) { case SIOCALIFADDR: { struct in6_aliasreq ifra; struct in6_addr *hostid = NULL; int prefixlen; if ((iflr->flags & IFLR_PREFIX) != 0) { struct sockaddr_in6 *sin6; /* * hostid is to fill in the hostid part of the * address. hostid points to the first link-local * address attached to the interface. */ ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); if (!ifa) return EADDRNOTAVAIL; hostid = IFA_IN6(ifa); /* prefixlen must be <= 64. */ if (64 < iflr->prefixlen) return EINVAL; prefixlen = iflr->prefixlen; /* hostid part must be zero. */ sin6 = (struct sockaddr_in6 *)&iflr->addr; if (sin6->sin6_addr.s6_addr32[2] != 0 || sin6->sin6_addr.s6_addr32[3] != 0) { return EINVAL; } } else prefixlen = iflr->prefixlen; /* copy args to in6_aliasreq, perform ioctl(SIOCAIFADDR_IN6). */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); bcopy(&iflr->addr, &ifra.ifra_addr, ((struct sockaddr *)&iflr->addr)->sa_len); if (hostid) { /* fill in hostid part */ ifra.ifra_addr.sin6_addr.s6_addr32[2] = hostid->s6_addr32[2]; ifra.ifra_addr.sin6_addr.s6_addr32[3] = hostid->s6_addr32[3]; } if (((struct sockaddr *)&iflr->dstaddr)->sa_family) { /* XXX */ bcopy(&iflr->dstaddr, &ifra.ifra_dstaddr, ((struct sockaddr *)&iflr->dstaddr)->sa_len); if (hostid) { ifra.ifra_dstaddr.sin6_addr.s6_addr32[2] = hostid->s6_addr32[2]; ifra.ifra_dstaddr.sin6_addr.s6_addr32[3] = hostid->s6_addr32[3]; } } ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); in6_prefixlen2mask(&ifra.ifra_prefixmask.sin6_addr, prefixlen); ifra.ifra_flags = iflr->flags & ~IFLR_PREFIX; return in6_control(so, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, td); } case SIOCGLIFADDR: case SIOCDLIFADDR: { struct in6_ifaddr *ia; struct in6_addr mask, candidate, match; struct sockaddr_in6 *sin6; int cmp; bzero(&mask, sizeof(mask)); if (iflr->flags & IFLR_PREFIX) { /* lookup a prefix rather than address. */ in6_prefixlen2mask(&mask, iflr->prefixlen); sin6 = (struct sockaddr_in6 *)&iflr->addr; bcopy(&sin6->sin6_addr, &match, sizeof(match)); match.s6_addr32[0] &= mask.s6_addr32[0]; match.s6_addr32[1] &= mask.s6_addr32[1]; match.s6_addr32[2] &= mask.s6_addr32[2]; match.s6_addr32[3] &= mask.s6_addr32[3]; /* if you set extra bits, that's wrong */ if (bcmp(&match, &sin6->sin6_addr, sizeof(match))) return EINVAL; cmp = 1; } else { if (cmd == SIOCGLIFADDR) { /* on getting an address, take the 1st match */ cmp = 0; /* XXX */ } else { /* on deleting an address, do exact match */ in6_prefixlen2mask(&mask, 128); sin6 = (struct sockaddr_in6 *)&iflr->addr; bcopy(&sin6->sin6_addr, &match, sizeof(match)); cmp = 1; } } TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (!cmp) break; /* * XXX: this is adhoc, but is necessary to allow * a user to specify fe80::/64 (not /10) for a * link-local address. */ bcopy(IFA_IN6(ifa), &candidate, sizeof(candidate)); in6_clearscope(&candidate); candidate.s6_addr32[0] &= mask.s6_addr32[0]; candidate.s6_addr32[1] &= mask.s6_addr32[1]; candidate.s6_addr32[2] &= mask.s6_addr32[2]; candidate.s6_addr32[3] &= mask.s6_addr32[3]; if (IN6_ARE_ADDR_EQUAL(&candidate, &match)) break; } if (!ifa) return EADDRNOTAVAIL; ia = ifa2ia6(ifa); if (cmd == SIOCGLIFADDR) { int error; /* fill in the if_laddrreq structure */ bcopy(&ia->ia_addr, &iflr->addr, ia->ia_addr.sin6_len); error = sa6_recoverscope( (struct sockaddr_in6 *)&iflr->addr); if (error != 0) return (error); if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &iflr->dstaddr, ia->ia_dstaddr.sin6_len); error = sa6_recoverscope( (struct sockaddr_in6 *)&iflr->dstaddr); if (error != 0) return (error); } else bzero(&iflr->dstaddr, sizeof(iflr->dstaddr)); iflr->prefixlen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); iflr->flags = ia->ia6_flags; /* XXX */ return 0; } else { struct in6_aliasreq ifra; /* fill in6_aliasreq and do ioctl(SIOCDIFADDR_IN6) */ bzero(&ifra, sizeof(ifra)); bcopy(iflr->iflr_name, ifra.ifra_name, sizeof(ifra.ifra_name)); bcopy(&ia->ia_addr, &ifra.ifra_addr, ia->ia_addr.sin6_len); if ((ifp->if_flags & IFF_POINTOPOINT) != 0) { bcopy(&ia->ia_dstaddr, &ifra.ifra_dstaddr, ia->ia_dstaddr.sin6_len); } else { bzero(&ifra.ifra_dstaddr, sizeof(ifra.ifra_dstaddr)); } bcopy(&ia->ia_prefixmask, &ifra.ifra_dstaddr, ia->ia_prefixmask.sin6_len); ifra.ifra_flags = ia->ia6_flags; return in6_control(so, SIOCDIFADDR_IN6, (caddr_t)&ifra, ifp, td); } } } return EOPNOTSUPP; /* just for safety */ } /* * Initialize an interface's intetnet6 address * and routing table entry. */ static int in6_ifinit(struct ifnet *ifp, struct in6_ifaddr *ia, struct sockaddr_in6 *sin6, int newhost) { int error = 0, plen, ifacount = 0; int s = splimp(); struct ifaddr *ifa; /* * Give the interface a chance to initialize * if this is its first address, * and to validate the address if necessary. */ TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ifacount++; } ia->ia_addr = *sin6; if (ifacount <= 1 && ifp->if_ioctl) { IFF_LOCKGIANT(ifp); error = (*ifp->if_ioctl)(ifp, SIOCSIFADDR, (caddr_t)ia); IFF_UNLOCKGIANT(ifp); if (error) { splx(s); return (error); } } splx(s); ia->ia_ifa.ifa_metric = ifp->if_metric; /* we could do in(6)_socktrim here, but just omit it at this moment. */ if (newhost) { /* * set the rtrequest function to create llinfo. It also * adjust outgoing interface of the route for the local * address when called via in6_ifaddloop() below. */ ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; } /* * Special case: * If a new destination address is specified for a point-to-point * interface, install a route to the destination as an interface * direct route. In addition, if the link is expected to have neighbor * cache entries, specify RTF_LLINFO so that a cache entry for the * destination address will be created. * created * XXX: the logic below rejects assigning multiple addresses on a p2p * interface that share the same destination. */ plen = in6_mask2len(&ia->ia_prefixmask.sin6_addr, NULL); /* XXX */ if (!(ia->ia_flags & IFA_ROUTE) && plen == 128 && ia->ia_dstaddr.sin6_family == AF_INET6) { int rtflags = RTF_UP | RTF_HOST; struct rtentry *rt = NULL, **rtp = NULL; if (nd6_need_cache(ifp) != 0) { rtflags |= RTF_LLINFO; rtp = &rt; } error = rtrequest(RTM_ADD, (struct sockaddr *)&ia->ia_dstaddr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&ia->ia_prefixmask, ia->ia_flags | rtflags, rtp); if (error != 0) return (error); if (rt != NULL) { struct llinfo_nd6 *ln; RT_LOCK(rt); ln = (struct llinfo_nd6 *)rt->rt_llinfo; if (ln != NULL) { /* * Set the state to STALE because we don't * have to perform address resolution on this * link. */ ln->ln_state = ND6_LLINFO_STALE; } RT_REMREF(rt); RT_UNLOCK(rt); } ia->ia_flags |= IFA_ROUTE; } if (plen < 128) { /* * The RTF_CLONING flag is necessary for in6_is_ifloop_auto(). */ ia->ia_ifa.ifa_flags |= RTF_CLONING; } /* Add ownaddr as loopback rtentry, if necessary (ex. on p2p link). */ if (newhost) in6_ifaddloop(&(ia->ia_ifa)); return (error); } struct in6_multi_mship * in6_joingroup(struct ifnet *ifp, struct in6_addr *addr, int *errorp, int delay) { struct in6_multi_mship *imm; imm = malloc(sizeof(*imm), M_IP6MADDR, M_NOWAIT); if (!imm) { *errorp = ENOBUFS; return NULL; } imm->i6mm_maddr = in6_addmulti(addr, ifp, errorp, delay); if (!imm->i6mm_maddr) { /* *errorp is alrady set */ free(imm, M_IP6MADDR); return NULL; } return imm; } int in6_leavegroup(struct in6_multi_mship *imm) { if (imm->i6mm_maddr) in6_delmulti(imm->i6mm_maddr); free(imm, M_IP6MADDR); return 0; } /* * Find an IPv6 interface link-local address specific to an interface. */ struct in6_ifaddr * in6ifa_ifpforlinklocal(struct ifnet *ifp, int ignoreflags) { struct ifaddr *ifa; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (IN6_IS_ADDR_LINKLOCAL(IFA_IN6(ifa))) { if ((((struct in6_ifaddr *)ifa)->ia6_flags & ignoreflags) != 0) continue; break; } } return ((struct in6_ifaddr *)ifa); } /* * find the internet address corresponding to a given interface and address. */ struct in6_ifaddr * in6ifa_ifpwithaddr(struct ifnet *ifp, struct in6_addr *addr) { struct ifaddr *ifa; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (IN6_ARE_ADDR_EQUAL(addr, IFA_IN6(ifa))) break; } return ((struct in6_ifaddr *)ifa); } /* * Convert IP6 address to printable (loggable) representation. Caller * has to make sure that ip6buf is at least INET6_ADDRSTRLEN long. */ static char digits[] = "0123456789abcdef"; char * ip6_sprintf(char *ip6buf, const struct in6_addr *addr) { int i; char *cp; const u_int16_t *a = (const u_int16_t *)addr; const u_int8_t *d; int dcolon = 0, zero = 0; cp = ip6buf; for (i = 0; i < 8; i++) { if (dcolon == 1) { if (*a == 0) { if (i == 7) *cp++ = ':'; a++; continue; } else dcolon = 2; } if (*a == 0) { if (dcolon == 0 && *(a + 1) == 0) { if (i == 0) *cp++ = ':'; *cp++ = ':'; dcolon = 1; } else { *cp++ = '0'; *cp++ = ':'; } a++; continue; } d = (const u_char *)a; /* Try to eliminate leading zeros in printout like in :0001. */ zero = 1; *cp = digits[*d >> 4]; if (*cp != '0') { zero = 0; cp++; } *cp = digits[*d++ & 0xf]; if (zero == 0 || (*cp != '0')) { zero = 0; cp++; } *cp = digits[*d >> 4]; if (zero == 0 || (*cp != '0')) { zero = 0; cp++; } *cp++ = digits[*d & 0xf]; *cp++ = ':'; a++; } *--cp = '\0'; return (ip6buf); } int in6_localaddr(struct in6_addr *in6) { struct in6_ifaddr *ia; if (IN6_IS_ADDR_LOOPBACK(in6) || IN6_IS_ADDR_LINKLOCAL(in6)) return 1; for (ia = in6_ifaddr; ia; ia = ia->ia_next) { if (IN6_ARE_MASKED_ADDR_EQUAL(in6, &ia->ia_addr.sin6_addr, &ia->ia_prefixmask.sin6_addr)) { return 1; } } return (0); } int in6_is_addr_deprecated(struct sockaddr_in6 *sa6) { struct in6_ifaddr *ia; for (ia = in6_ifaddr; ia; ia = ia->ia_next) { if (IN6_ARE_ADDR_EQUAL(&ia->ia_addr.sin6_addr, &sa6->sin6_addr) && (ia->ia6_flags & IN6_IFF_DEPRECATED) != 0) return (1); /* true */ /* XXX: do we still have to go thru the rest of the list? */ } return (0); /* false */ } /* * return length of part which dst and src are equal * hard coding... */ int in6_matchlen(struct in6_addr *src, struct in6_addr *dst) { int match = 0; u_char *s = (u_char *)src, *d = (u_char *)dst; u_char *lim = s + 16, r; while (s < lim) if ((r = (*d++ ^ *s++)) != 0) { while (r < 128) { match++; r <<= 1; } break; } else match += 8; return match; } /* XXX: to be scope conscious */ int in6_are_prefix_equal(struct in6_addr *p1, struct in6_addr *p2, int len) { int bytelen, bitlen; /* sanity check */ if (0 > len || len > 128) { log(LOG_ERR, "in6_are_prefix_equal: invalid prefix length(%d)\n", len); return (0); } bytelen = len / 8; bitlen = len % 8; if (bcmp(&p1->s6_addr, &p2->s6_addr, bytelen)) return (0); if (bitlen != 0 && p1->s6_addr[bytelen] >> (8 - bitlen) != p2->s6_addr[bytelen] >> (8 - bitlen)) return (0); return (1); } void in6_prefixlen2mask(struct in6_addr *maskp, int len) { u_char maskarray[8] = {0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff}; int bytelen, bitlen, i; /* sanity check */ if (0 > len || len > 128) { log(LOG_ERR, "in6_prefixlen2mask: invalid prefix length(%d)\n", len); return; } bzero(maskp, sizeof(*maskp)); bytelen = len / 8; bitlen = len % 8; for (i = 0; i < bytelen; i++) maskp->s6_addr[i] = 0xff; if (bitlen) maskp->s6_addr[bytelen] = maskarray[bitlen - 1]; } /* * return the best address out of the same scope. if no address was * found, return the first valid address from designated IF. */ struct in6_ifaddr * in6_ifawithifp(struct ifnet *ifp, struct in6_addr *dst) { int dst_scope = in6_addrscope(dst), blen = -1, tlen; struct ifaddr *ifa; struct in6_ifaddr *besta = 0; struct in6_ifaddr *dep[2]; /* last-resort: deprecated */ dep[0] = dep[1] = NULL; /* * We first look for addresses in the same scope. * If there is one, return it. * If two or more, return one which matches the dst longest. * If none, return one of global addresses assigned other ifs. */ TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) continue; /* XXX: is there any case to allow anycast? */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) continue; /* don't use this interface */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { if (ip6_use_deprecated) dep[0] = (struct in6_ifaddr *)ifa; continue; } if (dst_scope == in6_addrscope(IFA_IN6(ifa))) { /* * call in6_matchlen() as few as possible */ if (besta) { if (blen == -1) blen = in6_matchlen(&besta->ia_addr.sin6_addr, dst); tlen = in6_matchlen(IFA_IN6(ifa), dst); if (tlen > blen) { blen = tlen; besta = (struct in6_ifaddr *)ifa; } } else besta = (struct in6_ifaddr *)ifa; } } if (besta) return (besta); TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_ANYCAST) continue; /* XXX: is there any case to allow anycast? */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_NOTREADY) continue; /* don't use this interface */ if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DETACHED) continue; if (((struct in6_ifaddr *)ifa)->ia6_flags & IN6_IFF_DEPRECATED) { if (ip6_use_deprecated) dep[1] = (struct in6_ifaddr *)ifa; continue; } return (struct in6_ifaddr *)ifa; } /* use the last-resort values, that are, deprecated addresses */ if (dep[0]) return dep[0]; if (dep[1]) return dep[1]; return NULL; } /* * perform DAD when interface becomes IFF_UP. */ void in6_if_up(struct ifnet *ifp) { struct ifaddr *ifa; struct in6_ifaddr *ia; TAILQ_FOREACH(ifa, &ifp->if_addrlist, ifa_list) { if (ifa->ifa_addr->sa_family != AF_INET6) continue; ia = (struct in6_ifaddr *)ifa; if (ia->ia6_flags & IN6_IFF_TENTATIVE) { /* * The TENTATIVE flag was likely set by hand * beforehand, implicitly indicating the need for DAD. * We may be able to skip the random delay in this * case, but we impose delays just in case. */ nd6_dad_start(ifa, arc4random() % (MAX_RTR_SOLICITATION_DELAY * hz)); } } /* * special cases, like 6to4, are handled in in6_ifattach */ in6_ifattach(ifp, NULL); } int in6if_do_dad(struct ifnet *ifp) { if ((ifp->if_flags & IFF_LOOPBACK) != 0) return (0); switch (ifp->if_type) { #ifdef IFT_DUMMY case IFT_DUMMY: #endif case IFT_FAITH: /* * These interfaces do not have the IFF_LOOPBACK flag, * but loop packets back. We do not have to do DAD on such * interfaces. We should even omit it, because loop-backed * NS would confuse the DAD procedure. */ return (0); default: /* * Our DAD routine requires the interface up and running. * However, some interfaces can be up before the RUNNING * status. Additionaly, users may try to assign addresses * before the interface becomes up (or running). * We simply skip DAD in such a case as a work around. * XXX: we should rather mark "tentative" on such addresses, * and do DAD after the interface becomes ready. */ if (!((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))) return (0); return (1); } } /* * Calculate max IPv6 MTU through all the interfaces and store it * to in6_maxmtu. */ void in6_setmaxmtu(void) { unsigned long maxmtu = 0; struct ifnet *ifp; IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { /* this function can be called during ifnet initialization */ if (!ifp->if_afdata[AF_INET6]) continue; if ((ifp->if_flags & IFF_LOOPBACK) == 0 && IN6_LINKMTU(ifp) > maxmtu) maxmtu = IN6_LINKMTU(ifp); } IFNET_RUNLOCK(); if (maxmtu) /* update only when maxmtu is positive */ in6_maxmtu = maxmtu; } /* * Provide the length of interface identifiers to be used for the link attached * to the given interface. The length should be defined in "IPv6 over * xxx-link" document. Note that address architecture might also define * the length for a particular set of address prefixes, regardless of the * link type. As clarified in rfc2462bis, those two definitions should be * consistent, and those really are as of August 2004. */ int in6_if2idlen(struct ifnet *ifp) { switch (ifp->if_type) { case IFT_ETHER: /* RFC2464 */ #ifdef IFT_PROPVIRTUAL case IFT_PROPVIRTUAL: /* XXX: no RFC. treat it as ether */ #endif #ifdef IFT_L2VLAN case IFT_L2VLAN: /* ditto */ #endif #ifdef IFT_IEEE80211 case IFT_IEEE80211: /* ditto */ #endif #ifdef IFT_MIP case IFT_MIP: /* ditto */ #endif return (64); case IFT_FDDI: /* RFC2467 */ return (64); case IFT_ISO88025: /* RFC2470 (IPv6 over Token Ring) */ return (64); case IFT_PPP: /* RFC2472 */ return (64); case IFT_ARCNET: /* RFC2497 */ return (64); case IFT_FRELAY: /* RFC2590 */ return (64); case IFT_IEEE1394: /* RFC3146 */ return (64); case IFT_GIF: return (64); /* draft-ietf-v6ops-mech-v2-07 */ case IFT_LOOP: return (64); /* XXX: is this really correct? */ default: /* * Unknown link type: * It might be controversial to use the today's common constant * of 64 for these cases unconditionally. For full compliance, * we should return an error in this case. On the other hand, * if we simply miss the standard for the link type or a new * standard is defined for a new link type, the IFID length * is very likely to be the common constant. As a compromise, * we always use the constant, but make an explicit notice * indicating the "unknown" case. */ printf("in6_if2idlen: unknown link type (%d)\n", ifp->if_type); return (64); } } void * in6_domifattach(struct ifnet *ifp) { struct in6_ifextra *ext; ext = (struct in6_ifextra *)malloc(sizeof(*ext), M_IFADDR, M_WAITOK); bzero(ext, sizeof(*ext)); ext->in6_ifstat = (struct in6_ifstat *)malloc(sizeof(struct in6_ifstat), M_IFADDR, M_WAITOK); bzero(ext->in6_ifstat, sizeof(*ext->in6_ifstat)); ext->icmp6_ifstat = (struct icmp6_ifstat *)malloc(sizeof(struct icmp6_ifstat), M_IFADDR, M_WAITOK); bzero(ext->icmp6_ifstat, sizeof(*ext->icmp6_ifstat)); ext->nd_ifinfo = nd6_ifattach(ifp); ext->scope6_id = scope6_ifattach(ifp); return ext; } void in6_domifdetach(struct ifnet *ifp, void *aux) { struct in6_ifextra *ext = (struct in6_ifextra *)aux; scope6_ifdetach(ext->scope6_id); nd6_ifdetach(ext->nd_ifinfo); free(ext->in6_ifstat, M_IFADDR); free(ext->icmp6_ifstat, M_IFADDR); free(ext, M_IFADDR); } /* * Convert sockaddr_in6 to sockaddr_in. Original sockaddr_in6 must be * v4 mapped addr or v4 compat addr */ void in6_sin6_2_sin(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) { bzero(sin, sizeof(*sin)); sin->sin_len = sizeof(struct sockaddr_in); sin->sin_family = AF_INET; sin->sin_port = sin6->sin6_port; sin->sin_addr.s_addr = sin6->sin6_addr.s6_addr32[3]; } /* Convert sockaddr_in to sockaddr_in6 in v4 mapped addr format. */ void in6_sin_2_v4mapsin6(struct sockaddr_in *sin, struct sockaddr_in6 *sin6) { bzero(sin6, sizeof(*sin6)); sin6->sin6_len = sizeof(struct sockaddr_in6); sin6->sin6_family = AF_INET6; sin6->sin6_port = sin->sin_port; sin6->sin6_addr.s6_addr32[0] = 0; sin6->sin6_addr.s6_addr32[1] = 0; sin6->sin6_addr.s6_addr32[2] = IPV6_ADDR_INT32_SMP; sin6->sin6_addr.s6_addr32[3] = sin->sin_addr.s_addr; } /* Convert sockaddr_in6 into sockaddr_in. */ void in6_sin6_2_sin_in_sock(struct sockaddr *nam) { struct sockaddr_in *sin_p; struct sockaddr_in6 sin6; /* * Save original sockaddr_in6 addr and convert it * to sockaddr_in. */ sin6 = *(struct sockaddr_in6 *)nam; sin_p = (struct sockaddr_in *)nam; in6_sin6_2_sin(sin_p, &sin6); } /* Convert sockaddr_in into sockaddr_in6 in v4 mapped addr format. */ void in6_sin_2_v4mapsin6_in_sock(struct sockaddr **nam) { struct sockaddr_in *sin_p; struct sockaddr_in6 *sin6_p; MALLOC(sin6_p, struct sockaddr_in6 *, sizeof *sin6_p, M_SONAME, M_WAITOK); sin_p = (struct sockaddr_in *)*nam; in6_sin_2_v4mapsin6(sin_p, sin6_p); FREE(*nam, M_SONAME); *nam = (struct sockaddr *)sin6_p; } Index: head/sys/netinet6/in6_ifattach.c =================================================================== --- head/sys/netinet6/in6_ifattach.c (revision 180290) +++ head/sys/netinet6/in6_ifattach.c (revision 180291) @@ -1,905 +1,908 @@ /*- * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the project nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $KAME: in6_ifattach.c,v 1.118 2001/05/24 07:44:00 itojun Exp $ */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include unsigned long in6_maxmtu = 0; #ifdef IP6_AUTO_LINKLOCAL int ip6_auto_linklocal = IP6_AUTO_LINKLOCAL; #else int ip6_auto_linklocal = 1; /* enable by default */ #endif struct callout in6_tmpaddrtimer_ch; extern struct inpcbinfo udbinfo; extern struct inpcbinfo ripcbinfo; static int get_rand_ifid(struct ifnet *, struct in6_addr *); static int generate_tmp_ifid(u_int8_t *, const u_int8_t *, u_int8_t *); static int get_ifid(struct ifnet *, struct ifnet *, struct in6_addr *); static int in6_ifattach_linklocal(struct ifnet *, struct ifnet *); static int in6_ifattach_loopback(struct ifnet *); static void in6_purgemaddrs(struct ifnet *); #define EUI64_GBIT 0x01 #define EUI64_UBIT 0x02 #define EUI64_TO_IFID(in6) do {(in6)->s6_addr[8] ^= EUI64_UBIT; } while (0) #define EUI64_GROUP(in6) ((in6)->s6_addr[8] & EUI64_GBIT) #define EUI64_INDIVIDUAL(in6) (!EUI64_GROUP(in6)) #define EUI64_LOCAL(in6) ((in6)->s6_addr[8] & EUI64_UBIT) #define EUI64_UNIVERSAL(in6) (!EUI64_LOCAL(in6)) #define IFID_LOCAL(in6) (!EUI64_LOCAL(in6)) #define IFID_UNIVERSAL(in6) (!EUI64_UNIVERSAL(in6)) /* * Generate a last-resort interface identifier, when the machine has no * IEEE802/EUI64 address sources. * The goal here is to get an interface identifier that is * (1) random enough and (2) does not change across reboot. * We currently use MD5(hostname) for it. * * in6 - upper 64bits are preserved */ static int get_rand_ifid(struct ifnet *ifp, struct in6_addr *in6) { MD5_CTX ctxt; u_int8_t digest[16]; - int hostnamelen = strlen(hostname); + int hostnamelen; + mtx_lock(&hostname_mtx); + hostnamelen = strlen(hostname); #if 0 /* we need at least several letters as seed for ifid */ if (hostnamelen < 3) return -1; #endif /* generate 8 bytes of pseudo-random value. */ bzero(&ctxt, sizeof(ctxt)); MD5Init(&ctxt); MD5Update(&ctxt, hostname, hostnamelen); + mtx_unlock(&hostname_mtx); MD5Final(digest, &ctxt); /* assumes sizeof(digest) > sizeof(ifid) */ bcopy(digest, &in6->s6_addr[8], 8); /* make sure to set "u" bit to local, and "g" bit to individual. */ in6->s6_addr[8] &= ~EUI64_GBIT; /* g bit to "individual" */ in6->s6_addr[8] |= EUI64_UBIT; /* u bit to "local" */ /* convert EUI64 into IPv6 interface identifier */ EUI64_TO_IFID(in6); return 0; } static int generate_tmp_ifid(u_int8_t *seed0, const u_int8_t *seed1, u_int8_t *ret) { MD5_CTX ctxt; u_int8_t seed[16], digest[16], nullbuf[8]; u_int32_t val32; /* If there's no history, start with a random seed. */ bzero(nullbuf, sizeof(nullbuf)); if (bcmp(nullbuf, seed0, sizeof(nullbuf)) == 0) { int i; for (i = 0; i < 2; i++) { val32 = arc4random(); bcopy(&val32, seed + sizeof(val32) * i, sizeof(val32)); } } else bcopy(seed0, seed, 8); /* copy the right-most 64-bits of the given address */ /* XXX assumption on the size of IFID */ bcopy(seed1, &seed[8], 8); if (0) { /* for debugging purposes only */ int i; printf("generate_tmp_ifid: new randomized ID from: "); for (i = 0; i < 16; i++) printf("%02x", seed[i]); printf(" "); } /* generate 16 bytes of pseudo-random value. */ bzero(&ctxt, sizeof(ctxt)); MD5Init(&ctxt); MD5Update(&ctxt, seed, sizeof(seed)); MD5Final(digest, &ctxt); /* * RFC 3041 3.2.1. (3) * Take the left-most 64-bits of the MD5 digest and set bit 6 (the * left-most bit is numbered 0) to zero. */ bcopy(digest, ret, 8); ret[0] &= ~EUI64_UBIT; /* * XXX: we'd like to ensure that the generated value is not zero * for simplicity. If the caclculated digest happens to be zero, * use a random non-zero value as the last resort. */ if (bcmp(nullbuf, ret, sizeof(nullbuf)) == 0) { nd6log((LOG_INFO, "generate_tmp_ifid: computed MD5 value is zero.\n")); val32 = arc4random(); val32 = 1 + (val32 % (0xffffffff - 1)); } /* * RFC 3041 3.2.1. (4) * Take the rightmost 64-bits of the MD5 digest and save them in * stable storage as the history value to be used in the next * iteration of the algorithm. */ bcopy(&digest[8], seed0, 8); if (0) { /* for debugging purposes only */ int i; printf("to: "); for (i = 0; i < 16; i++) printf("%02x", digest[i]); printf("\n"); } return 0; } /* * Get interface identifier for the specified interface. * XXX assumes single sockaddr_dl (AF_LINK address) per an interface * * in6 - upper 64bits are preserved */ int in6_get_hw_ifid(struct ifnet *ifp, struct in6_addr *in6) { struct ifaddr *ifa; struct sockaddr_dl *sdl; u_int8_t *addr; size_t addrlen; static u_int8_t allzero[8] = { 0, 0, 0, 0, 0, 0, 0, 0 }; static u_int8_t allone[8] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = ifa->ifa_list.tqe_next) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) continue; if (sdl->sdl_alen == 0) continue; goto found; } return -1; found: addr = LLADDR(sdl); addrlen = sdl->sdl_alen; /* get EUI64 */ switch (ifp->if_type) { case IFT_ETHER: case IFT_FDDI: case IFT_ISO88025: case IFT_ATM: case IFT_IEEE1394: #ifdef IFT_IEEE80211 case IFT_IEEE80211: #endif /* IEEE802/EUI64 cases - what others? */ /* IEEE1394 uses 16byte length address starting with EUI64 */ if (addrlen > 8) addrlen = 8; /* look at IEEE802/EUI64 only */ if (addrlen != 8 && addrlen != 6) return -1; /* * check for invalid MAC address - on bsdi, we see it a lot * since wildboar configures all-zero MAC on pccard before * card insertion. */ if (bcmp(addr, allzero, addrlen) == 0) return -1; if (bcmp(addr, allone, addrlen) == 0) return -1; /* make EUI64 address */ if (addrlen == 8) bcopy(addr, &in6->s6_addr[8], 8); else if (addrlen == 6) { in6->s6_addr[8] = addr[0]; in6->s6_addr[9] = addr[1]; in6->s6_addr[10] = addr[2]; in6->s6_addr[11] = 0xff; in6->s6_addr[12] = 0xfe; in6->s6_addr[13] = addr[3]; in6->s6_addr[14] = addr[4]; in6->s6_addr[15] = addr[5]; } break; case IFT_ARCNET: if (addrlen != 1) return -1; if (!addr[0]) return -1; bzero(&in6->s6_addr[8], 8); in6->s6_addr[15] = addr[0]; /* * due to insufficient bitwidth, we mark it local. */ in6->s6_addr[8] &= ~EUI64_GBIT; /* g bit to "individual" */ in6->s6_addr[8] |= EUI64_UBIT; /* u bit to "local" */ break; case IFT_GIF: #ifdef IFT_STF case IFT_STF: #endif /* * RFC2893 says: "SHOULD use IPv4 address as ifid source". * however, IPv4 address is not very suitable as unique * identifier source (can be renumbered). * we don't do this. */ return -1; default: return -1; } /* sanity check: g bit must not indicate "group" */ if (EUI64_GROUP(in6)) return -1; /* convert EUI64 into IPv6 interface identifier */ EUI64_TO_IFID(in6); /* * sanity check: ifid must not be all zero, avoid conflict with * subnet router anycast */ if ((in6->s6_addr[8] & ~(EUI64_GBIT | EUI64_UBIT)) == 0x00 && bcmp(&in6->s6_addr[9], allzero, 7) == 0) { return -1; } return 0; } /* * Get interface identifier for the specified interface. If it is not * available on ifp0, borrow interface identifier from other information * sources. * * altifp - secondary EUI64 source */ static int get_ifid(struct ifnet *ifp0, struct ifnet *altifp, struct in6_addr *in6) { struct ifnet *ifp; /* first, try to get it from the interface itself */ if (in6_get_hw_ifid(ifp0, in6) == 0) { nd6log((LOG_DEBUG, "%s: got interface identifier from itself\n", if_name(ifp0))); goto success; } /* try secondary EUI64 source. this basically is for ATM PVC */ if (altifp && in6_get_hw_ifid(altifp, in6) == 0) { nd6log((LOG_DEBUG, "%s: got interface identifier from %s\n", if_name(ifp0), if_name(altifp))); goto success; } /* next, try to get it from some other hardware interface */ IFNET_RLOCK(); for (ifp = ifnet.tqh_first; ifp; ifp = ifp->if_list.tqe_next) { if (ifp == ifp0) continue; if (in6_get_hw_ifid(ifp, in6) != 0) continue; /* * to borrow ifid from other interface, ifid needs to be * globally unique */ if (IFID_UNIVERSAL(in6)) { nd6log((LOG_DEBUG, "%s: borrow interface identifier from %s\n", if_name(ifp0), if_name(ifp))); IFNET_RUNLOCK(); goto success; } } IFNET_RUNLOCK(); /* last resort: get from random number source */ if (get_rand_ifid(ifp, in6) == 0) { nd6log((LOG_DEBUG, "%s: interface identifier generated by random number\n", if_name(ifp0))); goto success; } printf("%s: failed to get interface identifier\n", if_name(ifp0)); return -1; success: nd6log((LOG_INFO, "%s: ifid: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x\n", if_name(ifp0), in6->s6_addr[8], in6->s6_addr[9], in6->s6_addr[10], in6->s6_addr[11], in6->s6_addr[12], in6->s6_addr[13], in6->s6_addr[14], in6->s6_addr[15])); return 0; } /* * altifp - secondary EUI64 source */ static int in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp) { struct in6_ifaddr *ia; struct in6_aliasreq ifra; struct nd_prefixctl pr0; int i, error; /* * configure link-local address. */ bzero(&ifra, sizeof(ifra)); /* * in6_update_ifa() does not use ifra_name, but we accurately set it * for safety. */ strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); ifra.ifra_addr.sin6_family = AF_INET6; ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_addr.sin6_addr.s6_addr32[0] = htonl(0xfe800000); ifra.ifra_addr.sin6_addr.s6_addr32[1] = 0; if ((ifp->if_flags & IFF_LOOPBACK) != 0) { ifra.ifra_addr.sin6_addr.s6_addr32[2] = 0; ifra.ifra_addr.sin6_addr.s6_addr32[3] = htonl(1); } else { if (get_ifid(ifp, altifp, &ifra.ifra_addr.sin6_addr) != 0) { nd6log((LOG_ERR, "%s: no ifid available\n", if_name(ifp))); return (-1); } } if (in6_setscope(&ifra.ifra_addr.sin6_addr, ifp, NULL)) return (-1); ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_prefixmask.sin6_family = AF_INET6; ifra.ifra_prefixmask.sin6_addr = in6mask64; /* link-local addresses should NEVER expire. */ ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; /* * Now call in6_update_ifa() to do a bunch of procedures to configure * a link-local address. We can set the 3rd argument to NULL, because * we know there's no other link-local address on the interface * and therefore we are adding one (instead of updating one). */ if ((error = in6_update_ifa(ifp, &ifra, NULL, IN6_IFAUPDATE_DADDELAY)) != 0) { /* * XXX: When the interface does not support IPv6, this call * would fail in the SIOCSIFADDR ioctl. I believe the * notification is rather confusing in this case, so just * suppress it. (jinmei@kame.net 20010130) */ if (error != EAFNOSUPPORT) nd6log((LOG_NOTICE, "in6_ifattach_linklocal: failed to " "configure a link-local address on %s " "(errno=%d)\n", if_name(ifp), error)); return (-1); } ia = in6ifa_ifpforlinklocal(ifp, 0); /* ia must not be NULL */ #ifdef DIAGNOSTIC if (!ia) { panic("ia == NULL in in6_ifattach_linklocal"); /* NOTREACHED */ } #endif /* * Make the link-local prefix (fe80::%link/64) as on-link. * Since we'd like to manage prefixes separately from addresses, * we make an ND6 prefix structure for the link-local prefix, * and add it to the prefix list as a never-expire prefix. * XXX: this change might affect some existing code base... */ bzero(&pr0, sizeof(pr0)); pr0.ndpr_ifp = ifp; /* this should be 64 at this moment. */ pr0.ndpr_plen = in6_mask2len(&ifra.ifra_prefixmask.sin6_addr, NULL); pr0.ndpr_prefix = ifra.ifra_addr; /* apply the mask for safety. (nd6_prelist_add will apply it again) */ for (i = 0; i < 4; i++) { pr0.ndpr_prefix.sin6_addr.s6_addr32[i] &= in6mask64.s6_addr32[i]; } /* * Initialize parameters. The link-local prefix must always be * on-link, and its lifetimes never expire. */ pr0.ndpr_raf_onlink = 1; pr0.ndpr_raf_auto = 1; /* probably meaningless */ pr0.ndpr_vltime = ND6_INFINITE_LIFETIME; pr0.ndpr_pltime = ND6_INFINITE_LIFETIME; /* * Since there is no other link-local addresses, nd6_prefix_lookup() * probably returns NULL. However, we cannot always expect the result. * For example, if we first remove the (only) existing link-local * address, and then reconfigure another one, the prefix is still * valid with referring to the old link-local address. */ if (nd6_prefix_lookup(&pr0) == NULL) { if ((error = nd6_prelist_add(&pr0, NULL, NULL)) != 0) return (error); } return 0; } /* * ifp - must be IFT_LOOP */ static int in6_ifattach_loopback(struct ifnet *ifp) { struct in6_aliasreq ifra; int error; bzero(&ifra, sizeof(ifra)); /* * in6_update_ifa() does not use ifra_name, but we accurately set it * for safety. */ strncpy(ifra.ifra_name, if_name(ifp), sizeof(ifra.ifra_name)); ifra.ifra_prefixmask.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_prefixmask.sin6_family = AF_INET6; ifra.ifra_prefixmask.sin6_addr = in6mask128; /* * Always initialize ia_dstaddr (= broadcast address) to loopback * address. Follows IPv4 practice - see in_ifinit(). */ ifra.ifra_dstaddr.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_dstaddr.sin6_family = AF_INET6; ifra.ifra_dstaddr.sin6_addr = in6addr_loopback; ifra.ifra_addr.sin6_len = sizeof(struct sockaddr_in6); ifra.ifra_addr.sin6_family = AF_INET6; ifra.ifra_addr.sin6_addr = in6addr_loopback; /* the loopback address should NEVER expire. */ ifra.ifra_lifetime.ia6t_vltime = ND6_INFINITE_LIFETIME; ifra.ifra_lifetime.ia6t_pltime = ND6_INFINITE_LIFETIME; /* we don't need to perform DAD on loopback interfaces. */ ifra.ifra_flags |= IN6_IFF_NODAD; /* skip registration to the prefix list. XXX should be temporary. */ ifra.ifra_flags |= IN6_IFF_NOPFX; /* * We are sure that this is a newly assigned address, so we can set * NULL to the 3rd arg. */ if ((error = in6_update_ifa(ifp, &ifra, NULL, 0)) != 0) { nd6log((LOG_ERR, "in6_ifattach_loopback: failed to configure " "the loopback address on %s (errno=%d)\n", if_name(ifp), error)); return (-1); } return 0; } /* * compute NI group address, based on the current hostname setting. * see draft-ietf-ipngwg-icmp-name-lookup-* (04 and later). * * when ifp == NULL, the caller is responsible for filling scopeid. */ int in6_nigroup(struct ifnet *ifp, const char *name, int namelen, struct in6_addr *in6) { const char *p; u_char *q; MD5_CTX ctxt; u_int8_t digest[16]; char l; char n[64]; /* a single label must not exceed 63 chars */ if (!namelen || !name) return -1; p = name; while (p && *p && *p != '.' && p - name < namelen) p++; if (p - name > sizeof(n) - 1) return -1; /* label too long */ l = p - name; strncpy(n, name, l); n[(int)l] = '\0'; for (q = n; *q; q++) { if ('A' <= *q && *q <= 'Z') *q = *q - 'A' + 'a'; } /* generate 8 bytes of pseudo-random value. */ bzero(&ctxt, sizeof(ctxt)); MD5Init(&ctxt); MD5Update(&ctxt, &l, sizeof(l)); MD5Update(&ctxt, n, l); MD5Final(digest, &ctxt); bzero(in6, sizeof(*in6)); in6->s6_addr16[0] = IPV6_ADDR_INT16_MLL; in6->s6_addr8[11] = 2; bcopy(digest, &in6->s6_addr32[3], sizeof(in6->s6_addr32[3])); if (in6_setscope(in6, ifp, NULL)) return (-1); /* XXX: should not fail */ return 0; } /* * XXX multiple loopback interface needs more care. for instance, * nodelocal address needs to be configured onto only one of them. * XXX multiple link-local address case * * altifp - secondary EUI64 source */ void in6_ifattach(struct ifnet *ifp, struct ifnet *altifp) { struct in6_ifaddr *ia; struct in6_addr in6; /* some of the interfaces are inherently not IPv6 capable */ switch (ifp->if_type) { case IFT_PFLOG: case IFT_PFSYNC: case IFT_CARP: return; } /* * quirks based on interface type */ switch (ifp->if_type) { #ifdef IFT_STF case IFT_STF: /* * 6to4 interface is a very special kind of beast. * no multicast, no linklocal. RFC2529 specifies how to make * linklocals for 6to4 interface, but there's no use and * it is rather harmful to have one. */ goto statinit; #endif default: break; } /* * usually, we require multicast capability to the interface */ if ((ifp->if_flags & IFF_MULTICAST) == 0) { nd6log((LOG_INFO, "in6_ifattach: " "%s is not multicast capable, IPv6 not enabled\n", if_name(ifp))); return; } /* * assign loopback address for loopback interface. * XXX multiple loopback interface case. */ if ((ifp->if_flags & IFF_LOOPBACK) != 0) { in6 = in6addr_loopback; if (in6ifa_ifpwithaddr(ifp, &in6) == NULL) { if (in6_ifattach_loopback(ifp) != 0) return; } } /* * assign a link-local address, if there's none. */ if (ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) { ia = in6ifa_ifpforlinklocal(ifp, 0); if (ia == NULL) { if (in6_ifattach_linklocal(ifp, altifp) == 0) { /* linklocal address assigned */ } else { /* failed to assign linklocal address. bark? */ } } } #ifdef IFT_STF /* XXX */ statinit: #endif /* update dynamically. */ if (in6_maxmtu < ifp->if_mtu) in6_maxmtu = ifp->if_mtu; } /* * NOTE: in6_ifdetach() does not support loopback if at this moment. * We don't need this function in bsdi, because interfaces are never removed * from the ifnet list in bsdi. */ void in6_ifdetach(struct ifnet *ifp) { struct in6_ifaddr *ia, *oia; struct ifaddr *ifa, *next; struct rtentry *rt; short rtflags; struct sockaddr_in6 sin6; struct in6_multi_mship *imm; /* remove neighbor management table */ nd6_purge(ifp); /* nuke any of IPv6 addresses we have */ for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = next) { next = ifa->ifa_list.tqe_next; if (ifa->ifa_addr->sa_family != AF_INET6) continue; in6_purgeaddr(ifa); } /* undo everything done by in6_ifattach(), just in case */ for (ifa = ifp->if_addrlist.tqh_first; ifa; ifa = next) { next = ifa->ifa_list.tqe_next; if (ifa->ifa_addr->sa_family != AF_INET6 || !IN6_IS_ADDR_LINKLOCAL(&satosin6(&ifa->ifa_addr)->sin6_addr)) { continue; } ia = (struct in6_ifaddr *)ifa; /* * leave from multicast groups we have joined for the interface */ while ((imm = ia->ia6_memberships.lh_first) != NULL) { LIST_REMOVE(imm, i6mm_chain); in6_leavegroup(imm); } /* remove from the routing table */ if ((ia->ia_flags & IFA_ROUTE) && (rt = rtalloc1((struct sockaddr *)&ia->ia_addr, 0, 0UL))) { rtflags = rt->rt_flags; rtfree(rt); rtrequest(RTM_DELETE, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&ia->ia_addr, (struct sockaddr *)&ia->ia_prefixmask, rtflags, (struct rtentry **)0); } /* remove from the linked list */ TAILQ_REMOVE(&ifp->if_addrlist, (struct ifaddr *)ia, ifa_list); IFAFREE(&ia->ia_ifa); /* also remove from the IPv6 address chain(itojun&jinmei) */ oia = ia; if (oia == (ia = in6_ifaddr)) in6_ifaddr = ia->ia_next; else { while (ia->ia_next && (ia->ia_next != oia)) ia = ia->ia_next; if (ia->ia_next) ia->ia_next = oia->ia_next; else { nd6log((LOG_ERR, "%s: didn't unlink in6ifaddr from list\n", if_name(ifp))); } } IFAFREE(&oia->ia_ifa); } in6_pcbpurgeif0(&udbinfo, ifp); in6_pcbpurgeif0(&ripcbinfo, ifp); /* leave from all multicast groups joined */ in6_purgemaddrs(ifp); /* * remove neighbor management table. we call it twice just to make * sure we nuke everything. maybe we need just one call. * XXX: since the first call did not release addresses, some prefixes * might remain. We should call nd6_purge() again to release the * prefixes after removing all addresses above. * (Or can we just delay calling nd6_purge until at this point?) */ nd6_purge(ifp); /* remove route to link-local allnodes multicast (ff02::1) */ bzero(&sin6, sizeof(sin6)); sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_family = AF_INET6; sin6.sin6_addr = in6addr_linklocal_allnodes; if (in6_setscope(&sin6.sin6_addr, ifp, NULL)) /* XXX: should not fail */ return; /* XXX grab lock first to avoid LOR */ if (rt_tables[0][AF_INET6] != NULL) { RADIX_NODE_HEAD_LOCK(rt_tables[0][AF_INET6]); rt = rtalloc1((struct sockaddr *)&sin6, 0, 0UL); if (rt) { if (rt->rt_ifp == ifp) rtexpunge(rt); RTFREE_LOCKED(rt); } RADIX_NODE_HEAD_UNLOCK(rt_tables[0][AF_INET6]); } } int in6_get_tmpifid(struct ifnet *ifp, u_int8_t *retbuf, const u_int8_t *baseid, int generate) { u_int8_t nullbuf[8]; struct nd_ifinfo *ndi = ND_IFINFO(ifp); bzero(nullbuf, sizeof(nullbuf)); if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) == 0) { /* we've never created a random ID. Create a new one. */ generate = 1; } if (generate) { bcopy(baseid, ndi->randomseed1, sizeof(ndi->randomseed1)); /* generate_tmp_ifid will update seedn and buf */ (void)generate_tmp_ifid(ndi->randomseed0, ndi->randomseed1, ndi->randomid); } bcopy(ndi->randomid, retbuf, 8); return (0); } void in6_tmpaddrtimer(void *ignored_arg) { struct nd_ifinfo *ndi; u_int8_t nullbuf[8]; struct ifnet *ifp; int s = splnet(); callout_reset(&in6_tmpaddrtimer_ch, (ip6_temp_preferred_lifetime - ip6_desync_factor - ip6_temp_regen_advance) * hz, in6_tmpaddrtimer, NULL); bzero(nullbuf, sizeof(nullbuf)); for (ifp = TAILQ_FIRST(&ifnet); ifp; ifp = TAILQ_NEXT(ifp, if_list)) { ndi = ND_IFINFO(ifp); if (bcmp(ndi->randomid, nullbuf, sizeof(nullbuf)) != 0) { /* * We've been generating a random ID on this interface. * Create a new one. */ (void)generate_tmp_ifid(ndi->randomseed0, ndi->randomseed1, ndi->randomid); } } splx(s); } static void in6_purgemaddrs(struct ifnet *ifp) { struct in6_multi *in6m; struct in6_multi *oin6m; #ifdef DIAGNOSTIC printf("%s: purging ifp %p\n", __func__, ifp); #endif IFF_LOCKGIANT(ifp); LIST_FOREACH_SAFE(in6m, &in6_multihead, in6m_entry, oin6m) { if (in6m->in6m_ifp == ifp) in6_delmulti(in6m); } IFF_UNLOCKGIANT(ifp); } Index: head/sys/nfsclient/bootp_subr.c =================================================================== --- head/sys/nfsclient/bootp_subr.c (revision 180290) +++ head/sys/nfsclient/bootp_subr.c (revision 180291) @@ -1,1867 +1,1869 @@ /*- * Copyright (c) 1995 Gordon Ross, Adam Glass * Copyright (c) 1992 Regents of the University of California. * All rights reserved. * * This software was developed by the Computer Systems Engineering group * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and * contributed to Berkeley. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Lawrence Berkeley Laboratory and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * based on: * nfs/krpc_subr.c * $NetBSD: krpc_subr.c,v 1.10 1995/08/08 20:43:43 gwr Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_bootp.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define BOOTP_MIN_LEN 300 /* Minimum size of bootp udp packet */ #ifndef BOOTP_SETTLE_DELAY #define BOOTP_SETTLE_DELAY 3 #endif /* * What is the longest we will wait before re-sending a request? * Note this is also the frequency of "RPC timeout" messages. * The re-send loop count sup linearly to this maximum, so the * first complaint will happen after (1+2+3+4+5)=15 seconds. */ #define MAX_RESEND_DELAY 5 /* seconds */ /* Definitions from RFC951 */ struct bootp_packet { u_int8_t op; u_int8_t htype; u_int8_t hlen; u_int8_t hops; u_int32_t xid; u_int16_t secs; u_int16_t flags; struct in_addr ciaddr; struct in_addr yiaddr; struct in_addr siaddr; struct in_addr giaddr; unsigned char chaddr[16]; char sname[64]; char file[128]; unsigned char vend[1222]; }; struct bootpc_ifcontext { struct bootpc_ifcontext *next; struct bootp_packet call; struct bootp_packet reply; int replylen; int overload; struct socket *so; struct ifreq ireq; struct ifnet *ifp; struct sockaddr_dl *sdl; struct sockaddr_in myaddr; struct sockaddr_in netmask; struct sockaddr_in gw; struct sockaddr_in broadcast; /* Different for each interface */ int gotgw; int gotnetmask; int gotrootpath; int outstanding; int sentmsg; u_int32_t xid; enum { IF_BOOTP_UNRESOLVED, IF_BOOTP_RESOLVED, IF_BOOTP_FAILED, IF_DHCP_UNRESOLVED, IF_DHCP_OFFERED, IF_DHCP_RESOLVED, IF_DHCP_FAILED, } state; int dhcpquerytype; /* dhcp type sent */ struct in_addr dhcpserver; int gotdhcpserver; }; #define TAG_MAXLEN 1024 struct bootpc_tagcontext { char buf[TAG_MAXLEN + 1]; int overload; int badopt; int badtag; int foundopt; int taglen; }; struct bootpc_globalcontext { struct bootpc_ifcontext *interfaces; struct bootpc_ifcontext *lastinterface; u_int32_t xid; int gotrootpath; int gotgw; int ifnum; int secs; int starttime; struct bootp_packet reply; int replylen; struct bootpc_ifcontext *setrootfs; struct bootpc_ifcontext *sethostname; struct bootpc_tagcontext tmptag; struct bootpc_tagcontext tag; }; #define IPPORT_BOOTPC 68 #define IPPORT_BOOTPS 67 #define BOOTP_REQUEST 1 #define BOOTP_REPLY 2 /* Common tags */ #define TAG_PAD 0 /* Pad option, implicit length 1 */ #define TAG_SUBNETMASK 1 /* RFC 950 subnet mask */ #define TAG_ROUTERS 3 /* Routers (in order of preference) */ #define TAG_HOSTNAME 12 /* Client host name */ #define TAG_ROOT 17 /* Root path */ /* DHCP specific tags */ #define TAG_OVERLOAD 52 /* Option Overload */ #define TAG_MAXMSGSIZE 57 /* Maximum DHCP Message Size */ #define TAG_END 255 /* End Option (i.e. no more options) */ /* Overload values */ #define OVERLOAD_FILE 1 #define OVERLOAD_SNAME 2 /* Site specific tags: */ #define TAG_ROOTOPTS 130 #define TAG_COOKIE 134 /* ascii info for userland, via sysctl */ #define TAG_DHCP_MSGTYPE 53 #define TAG_DHCP_REQ_ADDR 50 #define TAG_DHCP_SERVERID 54 #define TAG_DHCP_LEASETIME 51 #define TAG_VENDOR_INDENTIFIER 60 #define DHCP_NOMSG 0 #define DHCP_DISCOVER 1 #define DHCP_OFFER 2 #define DHCP_REQUEST 3 #define DHCP_ACK 5 /* NFS read/write block size */ #ifndef BOOTP_BLOCKSIZE #define BOOTP_BLOCKSIZE 8192 #endif static char bootp_cookie[128]; SYSCTL_STRING(_kern, OID_AUTO, bootp_cookie, CTLFLAG_RD, bootp_cookie, 0, "Cookie (T134) supplied by bootp server"); /* mountd RPC */ static int md_mount(struct sockaddr_in *mdsin, char *path, u_char *fhp, int *fhsizep, struct nfs_args *args, struct thread *td); static int setfs(struct sockaddr_in *addr, char *path, char *p, const struct in_addr *siaddr); static int getdec(char **ptr); static int getip(char **ptr, struct in_addr *ip); static void mountopts(struct nfs_args *args, char *p); static int xdr_opaque_decode(struct mbuf **ptr, u_char *buf, int len); static int xdr_int_decode(struct mbuf **ptr, int *iptr); static void print_in_addr(struct in_addr addr); static void print_sin_addr(struct sockaddr_in *addr); static void clear_sinaddr(struct sockaddr_in *sin); static void allocifctx(struct bootpc_globalcontext *gctx); static void bootpc_compose_query(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td); static unsigned char *bootpc_tag(struct bootpc_tagcontext *tctx, struct bootp_packet *bp, int len, int tag); static void bootpc_tag_helper(struct bootpc_tagcontext *tctx, unsigned char *start, int len, int tag); #ifdef BOOTP_DEBUG void bootpboot_p_sa(struct sockaddr *sa, struct sockaddr *ma); void bootpboot_p_ma(struct sockaddr *ma); void bootpboot_p_rtentry(struct rtentry *rt); void bootpboot_p_tree(struct radix_node *rn); void bootpboot_p_rtlist(void); void bootpboot_p_if(struct ifnet *ifp, struct ifaddr *ifa); void bootpboot_p_iflist(void); #endif static int bootpc_call(struct bootpc_globalcontext *gctx, struct thread *td); static int bootpc_fakeup_interface(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td); static int bootpc_adjust_interface(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td); static void bootpc_decode_reply(struct nfsv3_diskless *nd, struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx); static int bootpc_received(struct bootpc_globalcontext *gctx, struct bootpc_ifcontext *ifctx); static __inline int bootpc_ifctx_isresolved(struct bootpc_ifcontext *ifctx); static __inline int bootpc_ifctx_isunresolved(struct bootpc_ifcontext *ifctx); static __inline int bootpc_ifctx_isfailed(struct bootpc_ifcontext *ifctx); /* * In order to have multiple active interfaces with address 0.0.0.0 * and be able to send data to a selected interface, we perform * some tricks: * * - The 'broadcast' address is different for each interface. * * - We temporarily add routing pointing 255.255.255.255 to the * selected interface broadcast address, thus the packet sent * goes to that interface. */ #ifdef BOOTP_DEBUG void bootpboot_p_sa(struct sockaddr *sa, struct sockaddr *ma) { if (sa == NULL) { printf("(sockaddr *) "); return; } switch (sa->sa_family) { case AF_INET: { struct sockaddr_in *sin; sin = (struct sockaddr_in *) sa; printf("inet "); print_sin_addr(sin); if (ma != NULL) { sin = (struct sockaddr_in *) ma; printf(" mask "); print_sin_addr(sin); } } break; case AF_LINK: { struct sockaddr_dl *sli; int i; sli = (struct sockaddr_dl *) sa; printf("link %.*s ", sli->sdl_nlen, sli->sdl_data); for (i = 0; i < sli->sdl_alen; i++) { if (i > 0) printf(":"); printf("%x", ((unsigned char *) LLADDR(sli))[i]); } } break; default: printf("af%d", sa->sa_family); } } void bootpboot_p_ma(struct sockaddr *ma) { if (ma == NULL) { printf(""); return; } printf("%x", *(int *)ma); } void bootpboot_p_rtentry(struct rtentry *rt) { bootpboot_p_sa(rt_key(rt), rt_mask(rt)); printf(" "); bootpboot_p_ma(rt->rt_genmask); printf(" "); bootpboot_p_sa(rt->rt_gateway, NULL); printf(" "); printf("flags %x", (unsigned short) rt->rt_flags); printf(" %d", (int) rt->rt_rmx.rmx_expire); printf(" %s\n", rt->rt_ifp->if_xname); } void bootpboot_p_tree(struct radix_node *rn) { while (rn != NULL) { if (rn->rn_bit < 0) { if ((rn->rn_flags & RNF_ROOT) != 0) { } else { bootpboot_p_rtentry((struct rtentry *) rn); } rn = rn->rn_dupedkey; } else { bootpboot_p_tree(rn->rn_left); bootpboot_p_tree(rn->rn_right); return; } } } void bootpboot_p_rtlist(void) { printf("Routing table:\n"); RADIX_NODE_LOCK(rt_tables[AF_INET]); /* could sleep XXX */ bootpboot_p_tree(rt_tables[AF_INET]->rnh_treetop); RADIX_NODE_UNLOCK(rt_tables[AF_INET]); } void bootpboot_p_if(struct ifnet *ifp, struct ifaddr *ifa) { printf("%s flags %x, addr ", ifp->if_xname, ifp->if_flags); print_sin_addr((struct sockaddr_in *) ifa->ifa_addr); printf(", broadcast "); print_sin_addr((struct sockaddr_in *) ifa->ifa_dstaddr); printf(", netmask "); print_sin_addr((struct sockaddr_in *) ifa->ifa_netmask); printf("\n"); } void bootpboot_p_iflist(void) { struct ifnet *ifp; struct ifaddr *ifa; printf("Interface list:\n"); IFNET_RLOCK(); /* could sleep, but okay for debugging XXX */ for (ifp = TAILQ_FIRST(&ifnet); ifp != NULL; ifp = TAILQ_NEXT(ifp, if_link)) { for (ifa = TAILQ_FIRST(&ifp->if_addrhead); ifa != NULL; ifa = TAILQ_NEXT(ifa, ifa_link)) if (ifa->ifa_addr->sa_family == AF_INET) bootpboot_p_if(ifp, ifa); } IFNET_RUNLOCK(); } #endif /* defined(BOOTP_DEBUG) */ static void clear_sinaddr(struct sockaddr_in *sin) { bzero(sin, sizeof(*sin)); sin->sin_len = sizeof(*sin); sin->sin_family = AF_INET; sin->sin_addr.s_addr = INADDR_ANY; /* XXX: htonl(INAADDR_ANY) ? */ sin->sin_port = 0; } static void allocifctx(struct bootpc_globalcontext *gctx) { struct bootpc_ifcontext *ifctx; ifctx = (struct bootpc_ifcontext *) malloc(sizeof(*ifctx), M_TEMP, M_WAITOK | M_ZERO); if (ifctx == NULL) panic("Failed to allocate bootp interface context structure"); ifctx->xid = gctx->xid; #ifdef BOOTP_NO_DHCP ifctx->state = IF_BOOTP_UNRESOLVED; #else ifctx->state = IF_DHCP_UNRESOLVED; #endif gctx->xid += 0x100; if (gctx->interfaces != NULL) gctx->lastinterface->next = ifctx; else gctx->interfaces = ifctx; gctx->lastinterface = ifctx; } static __inline int bootpc_ifctx_isresolved(struct bootpc_ifcontext *ifctx) { if (ifctx->state == IF_BOOTP_RESOLVED || ifctx->state == IF_DHCP_RESOLVED) return 1; return 0; } static __inline int bootpc_ifctx_isunresolved(struct bootpc_ifcontext *ifctx) { if (ifctx->state == IF_BOOTP_UNRESOLVED || ifctx->state == IF_DHCP_UNRESOLVED) return 1; return 0; } static __inline int bootpc_ifctx_isfailed(struct bootpc_ifcontext *ifctx) { if (ifctx->state == IF_BOOTP_FAILED || ifctx->state == IF_DHCP_FAILED) return 1; return 0; } static int bootpc_received(struct bootpc_globalcontext *gctx, struct bootpc_ifcontext *ifctx) { unsigned char dhcpreplytype; char *p; /* * Need timeout for fallback to less * desirable alternative. */ /* This call used for the side effect (badopt flag) */ (void) bootpc_tag(&gctx->tmptag, &gctx->reply, gctx->replylen, TAG_END); /* If packet is invalid, ignore it */ if (gctx->tmptag.badopt != 0) return 0; p = bootpc_tag(&gctx->tmptag, &gctx->reply, gctx->replylen, TAG_DHCP_MSGTYPE); if (p != NULL) dhcpreplytype = *p; else dhcpreplytype = DHCP_NOMSG; switch (ifctx->dhcpquerytype) { case DHCP_DISCOVER: if (dhcpreplytype != DHCP_OFFER /* Normal DHCP offer */ #ifndef BOOTP_FORCE_DHCP && dhcpreplytype != DHCP_NOMSG /* Fallback to BOOTP */ #endif ) return 0; break; case DHCP_REQUEST: if (dhcpreplytype != DHCP_ACK) return 0; case DHCP_NOMSG: break; } /* Ignore packet unless it gives us a root tag we didn't have */ if ((ifctx->state == IF_BOOTP_RESOLVED || (ifctx->dhcpquerytype == DHCP_DISCOVER && (ifctx->state == IF_DHCP_OFFERED || ifctx->state == IF_DHCP_RESOLVED))) && (bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_ROOT) != NULL || bootpc_tag(&gctx->tmptag, &gctx->reply, gctx->replylen, TAG_ROOT) == NULL)) return 0; bcopy(&gctx->reply, &ifctx->reply, gctx->replylen); ifctx->replylen = gctx->replylen; /* XXX: Only reset if 'perfect' response */ if (ifctx->state == IF_BOOTP_UNRESOLVED) ifctx->state = IF_BOOTP_RESOLVED; else if (ifctx->state == IF_DHCP_UNRESOLVED && ifctx->dhcpquerytype == DHCP_DISCOVER) { if (dhcpreplytype == DHCP_OFFER) ifctx->state = IF_DHCP_OFFERED; else ifctx->state = IF_BOOTP_RESOLVED; /* Fallback */ } else if (ifctx->state == IF_DHCP_OFFERED && ifctx->dhcpquerytype == DHCP_REQUEST) ifctx->state = IF_DHCP_RESOLVED; if (ifctx->dhcpquerytype == DHCP_DISCOVER && ifctx->state != IF_BOOTP_RESOLVED) { p = bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_DHCP_SERVERID); if (p != NULL && gctx->tmptag.taglen == 4) { memcpy(&ifctx->dhcpserver, p, 4); ifctx->gotdhcpserver = 1; } else ifctx->gotdhcpserver = 0; return 1; } ifctx->gotrootpath = (bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_ROOT) != NULL); ifctx->gotgw = (bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_ROUTERS) != NULL); ifctx->gotnetmask = (bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_SUBNETMASK) != NULL); return 1; } static int bootpc_call(struct bootpc_globalcontext *gctx, struct thread *td) { struct socket *so; struct sockaddr_in *sin, dst; struct uio auio; struct sockopt sopt; struct iovec aio; int error, on, rcvflg, timo, len; time_t atimo; time_t rtimo; struct timeval tv; struct bootpc_ifcontext *ifctx; int outstanding; int gotrootpath; int retry; const char *s; /* * Create socket and set its recieve timeout. */ error = socreate(AF_INET, &so, SOCK_DGRAM, 0, td->td_ucred, td); if (error != 0) goto out0; tv.tv_sec = 1; tv.tv_usec = 0; bzero(&sopt, sizeof(sopt)); sopt.sopt_dir = SOPT_SET; sopt.sopt_level = SOL_SOCKET; sopt.sopt_name = SO_RCVTIMEO; sopt.sopt_val = &tv; sopt.sopt_valsize = sizeof tv; error = sosetopt(so, &sopt); if (error != 0) goto out; /* * Enable broadcast. */ on = 1; sopt.sopt_name = SO_BROADCAST; sopt.sopt_val = &on; sopt.sopt_valsize = sizeof on; error = sosetopt(so, &sopt); if (error != 0) goto out; /* * Disable routing. */ on = 1; sopt.sopt_name = SO_DONTROUTE; sopt.sopt_val = &on; sopt.sopt_valsize = sizeof on; error = sosetopt(so, &sopt); if (error != 0) goto out; /* * Bind the local endpoint to a bootp client port. */ sin = &dst; clear_sinaddr(sin); sin->sin_port = htons(IPPORT_BOOTPC); error = sobind(so, (struct sockaddr *)sin, td); if (error != 0) { printf("bind failed\n"); goto out; } /* * Setup socket address for the server. */ sin = &dst; clear_sinaddr(sin); sin->sin_addr.s_addr = INADDR_BROADCAST; sin->sin_port = htons(IPPORT_BOOTPS); /* * Send it, repeatedly, until a reply is received, * but delay each re-send by an increasing amount. * If the delay hits the maximum, start complaining. */ timo = 0; rtimo = 0; for (;;) { outstanding = 0; gotrootpath = 0; for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (bootpc_ifctx_isresolved(ifctx) != 0 && bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_ROOT) != NULL) gotrootpath = 1; } for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { ifctx->outstanding = 0; if (bootpc_ifctx_isresolved(ifctx) != 0 && gotrootpath != 0) { continue; } if (bootpc_ifctx_isfailed(ifctx) != 0) continue; outstanding++; ifctx->outstanding = 1; /* Proceed to next step in DHCP negotiation */ if ((ifctx->state == IF_DHCP_OFFERED && ifctx->dhcpquerytype != DHCP_REQUEST) || (ifctx->state == IF_DHCP_UNRESOLVED && ifctx->dhcpquerytype != DHCP_DISCOVER) || (ifctx->state == IF_BOOTP_UNRESOLVED && ifctx->dhcpquerytype != DHCP_NOMSG)) { ifctx->sentmsg = 0; bootpc_compose_query(ifctx, gctx, td); } /* Send BOOTP request (or re-send). */ if (ifctx->sentmsg == 0) { switch(ifctx->dhcpquerytype) { case DHCP_DISCOVER: s = "DHCP Discover"; break; case DHCP_REQUEST: s = "DHCP Request"; break; case DHCP_NOMSG: default: s = "BOOTP Query"; break; } printf("Sending %s packet from " "interface %s (%*D)\n", s, ifctx->ireq.ifr_name, ifctx->sdl->sdl_alen, (unsigned char *) LLADDR(ifctx->sdl), ":"); ifctx->sentmsg = 1; } aio.iov_base = (caddr_t) &ifctx->call; aio.iov_len = sizeof(ifctx->call); auio.uio_iov = &aio; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_WRITE; auio.uio_offset = 0; auio.uio_resid = sizeof(ifctx->call); auio.uio_td = td; /* Set netmask to 0.0.0.0 */ sin = (struct sockaddr_in *) &ifctx->ireq.ifr_addr; clear_sinaddr(sin); error = ifioctl(ifctx->so, SIOCSIFNETMASK, (caddr_t) &ifctx->ireq, td); if (error != 0) panic("bootpc_call:" "set if netmask, error=%d", error); error = sosend(so, (struct sockaddr *) &dst, &auio, NULL, NULL, 0, td); if (error != 0) { printf("bootpc_call: sosend: %d state %08x\n", error, (int) so->so_state); } /* XXX: Is this needed ? */ pause("bootpw", hz/10); /* Set netmask to 255.0.0.0 */ sin = (struct sockaddr_in *) &ifctx->ireq.ifr_addr; clear_sinaddr(sin); sin->sin_addr.s_addr = htonl(0xff000000u); error = ifioctl(ifctx->so, SIOCSIFNETMASK, (caddr_t) &ifctx->ireq, td); if (error != 0) panic("bootpc_call:" "set if netmask, error=%d", error); } if (outstanding == 0 && (rtimo == 0 || time_second >= rtimo)) { error = 0; goto gotreply; } /* Determine new timeout. */ if (timo < MAX_RESEND_DELAY) timo++; else { printf("DHCP/BOOTP timeout for server "); print_sin_addr(&dst); printf("\n"); } /* * Wait for up to timo seconds for a reply. * The socket receive timeout was set to 1 second. */ atimo = timo + time_second; while (time_second < atimo) { aio.iov_base = (caddr_t) &gctx->reply; aio.iov_len = sizeof(gctx->reply); auio.uio_iov = &aio; auio.uio_iovcnt = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_offset = 0; auio.uio_resid = sizeof(gctx->reply); auio.uio_td = td; rcvflg = 0; error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg); gctx->secs = time_second - gctx->starttime; for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (bootpc_ifctx_isresolved(ifctx) != 0 || bootpc_ifctx_isfailed(ifctx) != 0) continue; ifctx->call.secs = htons(gctx->secs); } if (error == EWOULDBLOCK) continue; if (error != 0) goto out; len = sizeof(gctx->reply) - auio.uio_resid; /* Do we have the required number of bytes ? */ if (len < BOOTP_MIN_LEN) continue; gctx->replylen = len; /* Is it a reply? */ if (gctx->reply.op != BOOTP_REPLY) continue; /* Is this an answer to our query */ for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (gctx->reply.xid != ifctx->call.xid) continue; /* Same HW address size ? */ if (gctx->reply.hlen != ifctx->call.hlen) continue; /* Correct HW address ? */ if (bcmp(gctx->reply.chaddr, ifctx->call.chaddr, ifctx->call.hlen) != 0) continue; break; } if (ifctx != NULL) { s = bootpc_tag(&gctx->tmptag, &gctx->reply, gctx->replylen, TAG_DHCP_MSGTYPE); if (s != NULL) { switch (*s) { case DHCP_OFFER: s = "DHCP Offer"; break; case DHCP_ACK: s = "DHCP Ack"; break; default: s = "DHCP (unexpected)"; break; } } else s = "BOOTP Reply"; printf("Received %s packet" " on %s from ", s, ifctx->ireq.ifr_name); print_in_addr(gctx->reply.siaddr); if (gctx->reply.giaddr.s_addr != htonl(INADDR_ANY)) { printf(" via "); print_in_addr(gctx->reply.giaddr); } if (bootpc_received(gctx, ifctx) != 0) { printf(" (accepted)"); if (ifctx->outstanding) { ifctx->outstanding = 0; outstanding--; } /* Network settle delay */ if (outstanding == 0) atimo = time_second + BOOTP_SETTLE_DELAY; } else printf(" (ignored)"); if (ifctx->gotrootpath) { gotrootpath = 1; rtimo = time_second + BOOTP_SETTLE_DELAY; printf(" (got root path)"); } else printf(" (no root path)"); printf("\n"); } } /* while secs */ #ifdef BOOTP_TIMEOUT if (gctx->secs > BOOTP_TIMEOUT && BOOTP_TIMEOUT > 0) break; #endif /* Force a retry if halfway in DHCP negotiation */ retry = 0; for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (ifctx->state == IF_DHCP_OFFERED) { if (ifctx->dhcpquerytype == DHCP_DISCOVER) retry = 1; else ifctx->state = IF_DHCP_UNRESOLVED; } } if (retry != 0) continue; if (gotrootpath != 0) { gctx->gotrootpath = gotrootpath; if (rtimo != 0 && time_second >= rtimo) break; } } /* forever send/receive */ /* * XXX: These are errors of varying seriousness being silently * ignored */ for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (bootpc_ifctx_isresolved(ifctx) == 0) { printf("%s timeout for interface %s\n", ifctx->dhcpquerytype != DHCP_NOMSG ? "DHCP" : "BOOTP", ifctx->ireq.ifr_name); } } if (gctx->gotrootpath != 0) { #if 0 printf("Got a root path, ignoring remaining timeout\n"); #endif error = 0; goto out; } #ifndef BOOTP_NFSROOT for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { if (bootpc_ifctx_isresolved(ifctx) != 0) { error = 0; goto out; } } #endif error = ETIMEDOUT; goto out; gotreply: out: soclose(so); out0: return error; } static int bootpc_fakeup_interface(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td) { struct sockaddr_in *sin; int error; struct ifreq *ireq; struct socket *so; struct ifaddr *ifa; struct sockaddr_dl *sdl; error = socreate(AF_INET, &ifctx->so, SOCK_DGRAM, 0, td->td_ucred, td); if (error != 0) panic("nfs_boot: socreate, error=%d", error); ireq = &ifctx->ireq; so = ifctx->so; /* * Bring up the interface. * * Get the old interface flags and or IFF_UP into them; if * IFF_UP set blindly, interface selection can be clobbered. */ error = ifioctl(so, SIOCGIFFLAGS, (caddr_t)ireq, td); if (error != 0) panic("bootpc_fakeup_interface: GIFFLAGS, error=%d", error); ireq->ifr_flags |= IFF_UP; error = ifioctl(so, SIOCSIFFLAGS, (caddr_t)ireq, td); if (error != 0) panic("bootpc_fakeup_interface: SIFFLAGS, error=%d", error); /* * Do enough of ifconfig(8) so that the chosen interface * can talk to the servers. (just set the address) */ /* addr is 0.0.0.0 */ sin = (struct sockaddr_in *) &ireq->ifr_addr; clear_sinaddr(sin); error = ifioctl(so, SIOCSIFADDR, (caddr_t) ireq, td); if (error != 0 && (error != EEXIST || ifctx == gctx->interfaces)) panic("bootpc_fakeup_interface: " "set if addr, error=%d", error); /* netmask is 255.0.0.0 */ sin = (struct sockaddr_in *) &ireq->ifr_addr; clear_sinaddr(sin); sin->sin_addr.s_addr = htonl(0xff000000u); error = ifioctl(so, SIOCSIFNETMASK, (caddr_t)ireq, td); if (error != 0) panic("bootpc_fakeup_interface: set if netmask, error=%d", error); /* Broadcast is 255.255.255.255 */ sin = (struct sockaddr_in *)&ireq->ifr_addr; clear_sinaddr(sin); clear_sinaddr(&ifctx->broadcast); sin->sin_addr.s_addr = htonl(INADDR_BROADCAST); ifctx->broadcast.sin_addr.s_addr = sin->sin_addr.s_addr; error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t)ireq, td); if (error != 0) panic("bootpc_fakeup_interface: " "set if broadcast addr, error=%d", error); /* Get HW address */ sdl = NULL; TAILQ_FOREACH(ifa, &ifctx->ifp->if_addrhead, ifa_link) if (ifa->ifa_addr->sa_family == AF_LINK) { sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl->sdl_type == IFT_ETHER) break; } if (sdl == NULL) panic("bootpc: Unable to find HW address for %s", ifctx->ireq.ifr_name); ifctx->sdl = sdl; return error; } static int bootpc_adjust_interface(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td) { int error; struct sockaddr_in defdst; struct sockaddr_in defmask; struct sockaddr_in *sin; struct ifreq *ireq; struct socket *so; struct sockaddr_in *myaddr; struct sockaddr_in *netmask; struct sockaddr_in *gw; ireq = &ifctx->ireq; so = ifctx->so; myaddr = &ifctx->myaddr; netmask = &ifctx->netmask; gw = &ifctx->gw; if (bootpc_ifctx_isresolved(ifctx) == 0) { /* Shutdown interfaces where BOOTP failed */ printf("Shutdown interface %s\n", ifctx->ireq.ifr_name); error = ifioctl(so, SIOCGIFFLAGS, (caddr_t)ireq, td); if (error != 0) panic("bootpc_adjust_interface: " "SIOCGIFFLAGS, error=%d", error); ireq->ifr_flags &= ~IFF_UP; error = ifioctl(so, SIOCSIFFLAGS, (caddr_t)ireq, td); if (error != 0) panic("bootpc_adjust_interface: " "SIOCSIFFLAGS, error=%d", error); sin = (struct sockaddr_in *) &ireq->ifr_addr; clear_sinaddr(sin); error = ifioctl(so, SIOCDIFADDR, (caddr_t) ireq, td); if (error != 0 && (error != EEXIST || ifctx == gctx->interfaces)) panic("bootpc_adjust_interface: " "SIOCDIFADDR, error=%d", error); return 0; } printf("Adjusted interface %s\n", ifctx->ireq.ifr_name); /* * Do enough of ifconfig(8) so that the chosen interface * can talk to the servers. (just set the address) */ bcopy(netmask, &ireq->ifr_addr, sizeof(*netmask)); error = ifioctl(so, SIOCSIFNETMASK, (caddr_t) ireq, td); if (error != 0) panic("bootpc_adjust_interface: " "set if netmask, error=%d", error); /* Broadcast is with host part of IP address all 1's */ sin = (struct sockaddr_in *) &ireq->ifr_addr; clear_sinaddr(sin); sin->sin_addr.s_addr = myaddr->sin_addr.s_addr | ~ netmask->sin_addr.s_addr; error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t) ireq, td); if (error != 0) panic("bootpc_adjust_interface: " "set if broadcast addr, error=%d", error); bcopy(myaddr, &ireq->ifr_addr, sizeof(*myaddr)); error = ifioctl(so, SIOCSIFADDR, (caddr_t) ireq, td); if (error != 0 && (error != EEXIST || ifctx == gctx->interfaces)) panic("bootpc_adjust_interface: " "set if addr, error=%d", error); /* Add new default route */ if (ifctx->gotgw != 0 || gctx->gotgw == 0) { clear_sinaddr(&defdst); clear_sinaddr(&defmask); /* XXX MRT just table 0 */ error = rtrequest_fib(RTM_ADD, (struct sockaddr *) &defdst, (struct sockaddr *) gw, (struct sockaddr *) &defmask, (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL, 0); if (error != 0) { printf("bootpc_adjust_interface: " "add net route, error=%d\n", error); return error; } } return 0; } static int setfs(struct sockaddr_in *addr, char *path, char *p, const struct in_addr *siaddr) { if (getip(&p, &addr->sin_addr) == 0) { if (siaddr != NULL && *p == '/') bcopy(siaddr, &addr->sin_addr, sizeof(struct in_addr)); else return 0; } else { if (*p != ':') return 0; p++; } addr->sin_len = sizeof(struct sockaddr_in); addr->sin_family = AF_INET; strlcpy(path, p, MNAMELEN); return 1; } static int getip(char **ptr, struct in_addr *addr) { char *p; unsigned int ip; int val; p = *ptr; ip = 0; if (((val = getdec(&p)) < 0) || (val > 255)) return 0; ip = val << 24; if (*p != '.') return 0; p++; if (((val = getdec(&p)) < 0) || (val > 255)) return 0; ip |= (val << 16); if (*p != '.') return 0; p++; if (((val = getdec(&p)) < 0) || (val > 255)) return 0; ip |= (val << 8); if (*p != '.') return 0; p++; if (((val = getdec(&p)) < 0) || (val > 255)) return 0; ip |= val; addr->s_addr = htonl(ip); *ptr = p; return 1; } static int getdec(char **ptr) { char *p; int ret; p = *ptr; ret = 0; if ((*p < '0') || (*p > '9')) return -1; while ((*p >= '0') && (*p <= '9')) { ret = ret * 10 + (*p - '0'); p++; } *ptr = p; return ret; } static void mountopts(struct nfs_args *args, char *p) { args->version = NFS_ARGSVERSION; args->rsize = BOOTP_BLOCKSIZE; args->wsize = BOOTP_BLOCKSIZE; args->flags = NFSMNT_RSIZE | NFSMNT_WSIZE | NFSMNT_RESVPORT; args->sotype = SOCK_DGRAM; if (p != NULL) nfs_parse_options(p, args); } static int xdr_opaque_decode(struct mbuf **mptr, u_char *buf, int len) { struct mbuf *m; int alignedlen; m = *mptr; alignedlen = ( len + 3 ) & ~3; if (m->m_len < alignedlen) { m = m_pullup(m, alignedlen); if (m == NULL) { *mptr = NULL; return EBADRPC; } } bcopy(mtod(m, u_char *), buf, len); m_adj(m, alignedlen); *mptr = m; return 0; } static int xdr_int_decode(struct mbuf **mptr, int *iptr) { u_int32_t i; if (xdr_opaque_decode(mptr, (u_char *) &i, sizeof(u_int32_t)) != 0) return EBADRPC; *iptr = fxdr_unsigned(u_int32_t, i); return 0; } static void print_sin_addr(struct sockaddr_in *sin) { print_in_addr(sin->sin_addr); } static void print_in_addr(struct in_addr addr) { unsigned int ip; ip = ntohl(addr.s_addr); printf("%d.%d.%d.%d", ip >> 24, (ip >> 16) & 255, (ip >> 8) & 255, ip & 255); } static void bootpc_compose_query(struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx, struct thread *td) { unsigned char *vendp; unsigned char vendor_client[64]; uint32_t leasetime; uint8_t vendor_client_len; ifctx->gotrootpath = 0; bzero((caddr_t) &ifctx->call, sizeof(ifctx->call)); /* bootpc part */ ifctx->call.op = BOOTP_REQUEST; /* BOOTREQUEST */ ifctx->call.htype = 1; /* 10mb ethernet */ ifctx->call.hlen = ifctx->sdl->sdl_alen;/* Hardware address length */ ifctx->call.hops = 0; if (bootpc_ifctx_isunresolved(ifctx) != 0) ifctx->xid++; ifctx->call.xid = txdr_unsigned(ifctx->xid); bcopy(LLADDR(ifctx->sdl), &ifctx->call.chaddr, ifctx->sdl->sdl_alen); vendp = ifctx->call.vend; *vendp++ = 99; /* RFC1048 cookie */ *vendp++ = 130; *vendp++ = 83; *vendp++ = 99; *vendp++ = TAG_MAXMSGSIZE; *vendp++ = 2; *vendp++ = (sizeof(struct bootp_packet) >> 8) & 255; *vendp++ = sizeof(struct bootp_packet) & 255; snprintf(vendor_client, sizeof(vendor_client), "%s:%s:%s", ostype, MACHINE, osrelease); vendor_client_len = strlen(vendor_client); *vendp++ = TAG_VENDOR_INDENTIFIER; *vendp++ = vendor_client_len; memcpy(vendp, vendor_client, vendor_client_len); vendp += vendor_client_len;; ifctx->dhcpquerytype = DHCP_NOMSG; switch (ifctx->state) { case IF_DHCP_UNRESOLVED: *vendp++ = TAG_DHCP_MSGTYPE; *vendp++ = 1; *vendp++ = DHCP_DISCOVER; ifctx->dhcpquerytype = DHCP_DISCOVER; ifctx->gotdhcpserver = 0; break; case IF_DHCP_OFFERED: *vendp++ = TAG_DHCP_MSGTYPE; *vendp++ = 1; *vendp++ = DHCP_REQUEST; ifctx->dhcpquerytype = DHCP_REQUEST; *vendp++ = TAG_DHCP_REQ_ADDR; *vendp++ = 4; memcpy(vendp, &ifctx->reply.yiaddr, 4); vendp += 4; if (ifctx->gotdhcpserver != 0) { *vendp++ = TAG_DHCP_SERVERID; *vendp++ = 4; memcpy(vendp, &ifctx->dhcpserver, 4); vendp += 4; } *vendp++ = TAG_DHCP_LEASETIME; *vendp++ = 4; leasetime = htonl(300); memcpy(vendp, &leasetime, 4); vendp += 4; break; default: break; } *vendp = TAG_END; ifctx->call.secs = 0; ifctx->call.flags = htons(0x8000); /* We need a broadcast answer */ } static int bootpc_hascookie(struct bootp_packet *bp) { return (bp->vend[0] == 99 && bp->vend[1] == 130 && bp->vend[2] == 83 && bp->vend[3] == 99); } static void bootpc_tag_helper(struct bootpc_tagcontext *tctx, unsigned char *start, int len, int tag) { unsigned char *j; unsigned char *ej; unsigned char code; if (tctx->badtag != 0 || tctx->badopt != 0) return; j = start; ej = j + len; while (j < ej) { code = *j++; if (code == TAG_PAD) continue; if (code == TAG_END) return; if (j >= ej || j + *j + 1 > ej) { tctx->badopt = 1; return; } len = *j++; if (code == tag) { if (tctx->taglen + len > TAG_MAXLEN) { tctx->badtag = 1; return; } tctx->foundopt = 1; if (len > 0) memcpy(tctx->buf + tctx->taglen, j, len); tctx->taglen += len; } if (code == TAG_OVERLOAD) tctx->overload = *j; j += len; } } static unsigned char * bootpc_tag(struct bootpc_tagcontext *tctx, struct bootp_packet *bp, int len, int tag) { tctx->overload = 0; tctx->badopt = 0; tctx->badtag = 0; tctx->foundopt = 0; tctx->taglen = 0; if (bootpc_hascookie(bp) == 0) return NULL; bootpc_tag_helper(tctx, &bp->vend[4], (unsigned char *) bp + len - &bp->vend[4], tag); if ((tctx->overload & OVERLOAD_FILE) != 0) bootpc_tag_helper(tctx, (unsigned char *) bp->file, sizeof(bp->file), tag); if ((tctx->overload & OVERLOAD_SNAME) != 0) bootpc_tag_helper(tctx, (unsigned char *) bp->sname, sizeof(bp->sname), tag); if (tctx->badopt != 0 || tctx->badtag != 0 || tctx->foundopt == 0) return NULL; tctx->buf[tctx->taglen] = '\0'; return tctx->buf; } static void bootpc_decode_reply(struct nfsv3_diskless *nd, struct bootpc_ifcontext *ifctx, struct bootpc_globalcontext *gctx) { char *p; unsigned int ip; ifctx->gotgw = 0; ifctx->gotnetmask = 0; clear_sinaddr(&ifctx->myaddr); clear_sinaddr(&ifctx->netmask); clear_sinaddr(&ifctx->gw); ifctx->myaddr.sin_addr = ifctx->reply.yiaddr; ip = ntohl(ifctx->myaddr.sin_addr.s_addr); printf("%s at ", ifctx->ireq.ifr_name); print_sin_addr(&ifctx->myaddr); printf(" server "); print_in_addr(ifctx->reply.siaddr); ifctx->gw.sin_addr = ifctx->reply.giaddr; if (ifctx->reply.giaddr.s_addr != htonl(INADDR_ANY)) { printf(" via gateway "); print_in_addr(ifctx->reply.giaddr); } /* This call used for the side effect (overload flag) */ (void) bootpc_tag(&gctx->tmptag, &ifctx->reply, ifctx->replylen, TAG_END); if ((gctx->tmptag.overload & OVERLOAD_SNAME) == 0) if (ifctx->reply.sname[0] != '\0') printf(" server name %s", ifctx->reply.sname); if ((gctx->tmptag.overload & OVERLOAD_FILE) == 0) if (ifctx->reply.file[0] != '\0') printf(" boot file %s", ifctx->reply.file); printf("\n"); p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_SUBNETMASK); if (p != NULL) { if (gctx->tag.taglen != 4) panic("bootpc: subnet mask len is %d", gctx->tag.taglen); bcopy(p, &ifctx->netmask.sin_addr, 4); ifctx->gotnetmask = 1; printf("subnet mask "); print_sin_addr(&ifctx->netmask); printf(" "); } p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_ROUTERS); if (p != NULL) { /* Routers */ if (gctx->tag.taglen % 4) panic("bootpc: Router Len is %d", gctx->tag.taglen); if (gctx->tag.taglen > 0) { bcopy(p, &ifctx->gw.sin_addr, 4); printf("router "); print_sin_addr(&ifctx->gw); printf(" "); ifctx->gotgw = 1; gctx->gotgw = 1; } } p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_ROOT); if (p != NULL) { if (gctx->setrootfs != NULL) { printf("rootfs %s (ignored) ", p); } else if (setfs(&nd->root_saddr, nd->root_hostnam, p, &ifctx->reply.siaddr)) { if (*p == '/') { printf("root_server "); print_sin_addr(&nd->root_saddr); printf(" "); } printf("rootfs %s ", p); gctx->gotrootpath = 1; ifctx->gotrootpath = 1; gctx->setrootfs = ifctx; p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_ROOTOPTS); if (p != NULL) { mountopts(&nd->root_args, p); printf("rootopts %s ", p); } } else panic("Failed to set rootfs to %s", p); } p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_HOSTNAME); if (p != NULL) { if (gctx->tag.taglen >= MAXHOSTNAMELEN) panic("bootpc: hostname >= %d bytes", MAXHOSTNAMELEN); if (gctx->sethostname != NULL) { printf("hostname %s (ignored) ", p); } else { strcpy(nd->my_hostnam, p); + mtx_lock(&hostname_mtx); strcpy(hostname, p); printf("hostname %s ", hostname); + mtx_unlock(&hostname_mtx); gctx->sethostname = ifctx; } } p = bootpc_tag(&gctx->tag, &ifctx->reply, ifctx->replylen, TAG_COOKIE); if (p != NULL) { /* store in a sysctl variable */ int i, l = sizeof(bootp_cookie) - 1; for (i = 0; i < l && p[i] != '\0'; i++) bootp_cookie[i] = p[i]; p[i] = '\0'; } printf("\n"); if (ifctx->gotnetmask == 0) { if (IN_CLASSA(ntohl(ifctx->myaddr.sin_addr.s_addr))) ifctx->netmask.sin_addr.s_addr = htonl(IN_CLASSA_NET); else if (IN_CLASSB(ntohl(ifctx->myaddr.sin_addr.s_addr))) ifctx->netmask.sin_addr.s_addr = htonl(IN_CLASSB_NET); else ifctx->netmask.sin_addr.s_addr = htonl(IN_CLASSC_NET); } if (ifctx->gotgw == 0) { /* Use proxyarp */ ifctx->gw.sin_addr.s_addr = ifctx->myaddr.sin_addr.s_addr; } } void bootpc_init(void) { struct bootpc_ifcontext *ifctx, *nctx; /* Interface BOOTP contexts */ struct bootpc_globalcontext *gctx; /* Global BOOTP context */ struct ifnet *ifp; int error; #ifndef BOOTP_WIRED_TO int ifcnt; #endif struct nfsv3_diskless *nd; struct thread *td; nd = &nfsv3_diskless; td = curthread; /* * If already filled in, don't touch it here */ if (nfs_diskless_valid != 0) return; gctx = malloc(sizeof(*gctx), M_TEMP, M_WAITOK | M_ZERO); if (gctx == NULL) panic("Failed to allocate bootp global context structure"); gctx->xid = ~0xFFFF; gctx->starttime = time_second; /* * Find a network interface. */ #ifdef BOOTP_WIRED_TO printf("bootpc_init: wired to interface '%s'\n", __XSTRING(BOOTP_WIRED_TO)); allocifctx(gctx); #else /* * Preallocate interface context storage, if another interface * attaches and wins the race, it won't be eligible for bootp. */ IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&ifnet), ifcnt = 0; ifp != NULL; ifp = TAILQ_NEXT(ifp, if_link)) { if ((ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT | IFF_BROADCAST)) != IFF_BROADCAST) continue; ifcnt++; } IFNET_RUNLOCK(); if (ifcnt == 0) panic("bootpc_init: no eligible interfaces"); for (; ifcnt > 0; ifcnt--) allocifctx(gctx); #endif IFNET_RLOCK(); for (ifp = TAILQ_FIRST(&ifnet), ifctx = gctx->interfaces; ifp != NULL && ifctx != NULL; ifp = TAILQ_NEXT(ifp, if_link)) { strlcpy(ifctx->ireq.ifr_name, ifp->if_xname, sizeof(ifctx->ireq.ifr_name)); #ifdef BOOTP_WIRED_TO if (strcmp(ifctx->ireq.ifr_name, __XSTRING(BOOTP_WIRED_TO)) != 0) continue; #else if ((ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT | IFF_BROADCAST)) != IFF_BROADCAST) continue; #endif ifctx->ifp = ifp; ifctx = ifctx->next; } IFNET_RUNLOCK(); if (gctx->interfaces == NULL || gctx->interfaces->ifp == NULL) { #ifdef BOOTP_WIRED_TO panic("bootpc_init: Could not find interface specified " "by BOOTP_WIRED_TO: " __XSTRING(BOOTP_WIRED_TO)); #else panic("bootpc_init: no suitable interface"); #endif } for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) bootpc_fakeup_interface(ifctx, gctx, td); for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) bootpc_compose_query(ifctx, gctx, td); error = bootpc_call(gctx, td); if (error != 0) { #ifdef BOOTP_NFSROOT panic("BOOTP call failed"); #else printf("BOOTP call failed\n"); #endif } rootdevnames[0] = "nfs:"; mountopts(&nd->root_args, NULL); for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) if (bootpc_ifctx_isresolved(ifctx) != 0) bootpc_decode_reply(nd, ifctx, gctx); #ifdef BOOTP_NFSROOT if (gctx->gotrootpath == 0) panic("bootpc: No root path offered"); #endif for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) { bootpc_adjust_interface(ifctx, gctx, td); soclose(ifctx->so); } for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) if (ifctx->gotrootpath != 0) break; if (ifctx == NULL) { for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = ifctx->next) if (bootpc_ifctx_isresolved(ifctx) != 0) break; } if (ifctx == NULL) goto out; if (gctx->gotrootpath != 0) { setenv("boot.netif.name", ifctx->ifp->if_xname); error = md_mount(&nd->root_saddr, nd->root_hostnam, nd->root_fh, &nd->root_fhsize, &nd->root_args, td); if (error != 0) panic("nfs_boot: mountd root, error=%d", error); nfs_diskless_valid = 3; } strcpy(nd->myif.ifra_name, ifctx->ireq.ifr_name); bcopy(&ifctx->myaddr, &nd->myif.ifra_addr, sizeof(ifctx->myaddr)); bcopy(&ifctx->myaddr, &nd->myif.ifra_broadaddr, sizeof(ifctx->myaddr)); ((struct sockaddr_in *) &nd->myif.ifra_broadaddr)->sin_addr.s_addr = ifctx->myaddr.sin_addr.s_addr | ~ ifctx->netmask.sin_addr.s_addr; bcopy(&ifctx->netmask, &nd->myif.ifra_mask, sizeof(ifctx->netmask)); out: for (ifctx = gctx->interfaces; ifctx != NULL; ifctx = nctx) { nctx = ifctx->next; free(ifctx, M_TEMP); } free(gctx, M_TEMP); } /* * RPC: mountd/mount * Given a server pathname, get an NFS file handle. * Also, sets sin->sin_port to the NFS service port. */ static int md_mount(struct sockaddr_in *mdsin, char *path, u_char *fhp, int *fhsizep, struct nfs_args *args, struct thread *td) { struct mbuf *m; int error; int authunixok; int authcount; int authver; /* XXX honor v2/v3 flags in args->flags? */ #ifdef BOOTP_NFSV3 /* First try NFS v3 */ /* Get port number for MOUNTD. */ error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER3, &mdsin->sin_port, td); if (error == 0) { m = xdr_string_encode(path, strlen(path)); /* Do RPC to mountd. */ error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER3, RPCMNT_MOUNT, &m, NULL, td); } if (error == 0) { args->flags |= NFSMNT_NFSV3; } else { #endif /* Fallback to NFS v2 */ /* Get port number for MOUNTD. */ error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER1, &mdsin->sin_port, td); if (error != 0) return error; m = xdr_string_encode(path, strlen(path)); /* Do RPC to mountd. */ error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER1, RPCMNT_MOUNT, &m, NULL, td); if (error != 0) return error; /* message already freed */ #ifdef BOOTP_NFSV3 } #endif if (xdr_int_decode(&m, &error) != 0 || error != 0) goto bad; if ((args->flags & NFSMNT_NFSV3) != 0) { if (xdr_int_decode(&m, fhsizep) != 0 || *fhsizep > NFSX_V3FHMAX || *fhsizep <= 0) goto bad; } else *fhsizep = NFSX_V2FH; if (xdr_opaque_decode(&m, fhp, *fhsizep) != 0) goto bad; if (args->flags & NFSMNT_NFSV3) { if (xdr_int_decode(&m, &authcount) != 0) goto bad; authunixok = 0; if (authcount < 0 || authcount > 100) goto bad; while (authcount > 0) { if (xdr_int_decode(&m, &authver) != 0) goto bad; if (authver == RPCAUTH_UNIX) authunixok = 1; authcount--; } if (authunixok == 0) goto bad; } /* Set port number for NFS use. */ error = krpc_portmap(mdsin, NFS_PROG, (args->flags & NFSMNT_NFSV3) ? NFS_VER3 : NFS_VER2, &mdsin->sin_port, td); goto out; bad: error = EBADRPC; out: m_freem(m); return error; } SYSINIT(bootp_rootconf, SI_SUB_ROOT_CONF, SI_ORDER_FIRST, bootpc_init, NULL); Index: head/sys/nfsclient/nfs_vfsops.c =================================================================== --- head/sys/nfsclient/nfs_vfsops.c (revision 180290) +++ head/sys/nfsclient/nfs_vfsops.c (revision 180291) @@ -1,1171 +1,1173 @@ /*- * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_vfsops.c 8.12 (Berkeley) 5/20/95 */ #include __FBSDID("$FreeBSD$"); #include "opt_bootp.h" #include "opt_nfsroot.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_NFSREQ, "nfsclient_req", "NFS request header"); MALLOC_DEFINE(M_NFSBIGFH, "nfsclient_bigfh", "NFS version 3 file handle"); MALLOC_DEFINE(M_NFSDIROFF, "nfsclient_diroff", "NFS directory offset data"); MALLOC_DEFINE(M_NFSHASH, "nfsclient_hash", "NFS hash tables"); MALLOC_DEFINE(M_NFSDIRECTIO, "nfsclient_directio", "NFS Direct IO async write state"); uma_zone_t nfsmount_zone; struct nfsstats nfsstats; SYSCTL_NODE(_vfs, OID_AUTO, nfs, CTLFLAG_RW, 0, "NFS filesystem"); SYSCTL_STRUCT(_vfs_nfs, NFS_NFSSTATS, nfsstats, CTLFLAG_RW, &nfsstats, nfsstats, "S,nfsstats"); static int nfs_ip_paranoia = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_ip_paranoia, CTLFLAG_RW, &nfs_ip_paranoia, 0, ""); #ifdef NFS_DEBUG int nfs_debug; SYSCTL_INT(_vfs_nfs, OID_AUTO, debug, CTLFLAG_RW, &nfs_debug, 0, ""); #endif static int nfs_tprintf_initial_delay = NFS_TPRINTF_INITIAL_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_INITIAL_DELAY, downdelayinitial, CTLFLAG_RW, &nfs_tprintf_initial_delay, 0, ""); /* how long between console messages "nfs server foo not responding" */ static int nfs_tprintf_delay = NFS_TPRINTF_DELAY; SYSCTL_INT(_vfs_nfs, NFS_TPRINTF_DELAY, downdelayinterval, CTLFLAG_RW, &nfs_tprintf_delay, 0, ""); static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp); static int mountnfs(struct nfs_args *, struct mount *, struct sockaddr *, char *, struct vnode **, struct ucred *cred); static vfs_mount_t nfs_mount; static vfs_cmount_t nfs_cmount; static vfs_unmount_t nfs_unmount; static vfs_root_t nfs_root; static vfs_statfs_t nfs_statfs; static vfs_sync_t nfs_sync; static vfs_sysctl_t nfs_sysctl; /* * nfs vfs operations. */ static struct vfsops nfs_vfsops = { .vfs_init = nfs_init, .vfs_mount = nfs_mount, .vfs_cmount = nfs_cmount, .vfs_root = nfs_root, .vfs_statfs = nfs_statfs, .vfs_sync = nfs_sync, .vfs_uninit = nfs_uninit, .vfs_unmount = nfs_unmount, .vfs_sysctl = nfs_sysctl, }; VFS_SET(nfs_vfsops, nfs, VFCF_NETWORK); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfs, 1); static struct nfs_rpcops nfs_rpcops = { nfs_readrpc, nfs_writerpc, nfs_writebp, nfs_readlinkrpc, nfs_invaldir, nfs_commit, }; /* * This structure must be filled in by a primary bootstrap or bootstrap * server for a diskless/dataless machine. It is initialized below just * to ensure that it is allocated to initialized data (.data not .bss). */ struct nfs_diskless nfs_diskless = { { { 0 } } }; struct nfsv3_diskless nfsv3_diskless = { { { 0 } } }; int nfs_diskless_valid = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, diskless_valid, CTLFLAG_RD, &nfs_diskless_valid, 0, ""); SYSCTL_STRING(_vfs_nfs, OID_AUTO, diskless_rootpath, CTLFLAG_RD, nfsv3_diskless.root_hostnam, 0, ""); SYSCTL_OPAQUE(_vfs_nfs, OID_AUTO, diskless_rootaddr, CTLFLAG_RD, &nfsv3_diskless.root_saddr, sizeof nfsv3_diskless.root_saddr, "%Ssockaddr_in", ""); void nfsargs_ntoh(struct nfs_args *); static int nfs_mountdiskless(char *, struct sockaddr_in *, struct nfs_args *, struct thread *, struct vnode **, struct mount *); static void nfs_convert_diskless(void); static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs); int nfs_iosize(struct nfsmount *nmp) { int iosize; /* * Calculate the size used for io buffers. Use the larger * of the two sizes to minimise nfs requests but make sure * that it is at least one VM page to avoid wasting buffer * space. */ iosize = imax(nmp->nm_rsize, nmp->nm_wsize); iosize = imax(iosize, PAGE_SIZE); return (iosize); } static void nfs_convert_oargs(struct nfs_args *args, struct onfs_args *oargs) { args->version = NFS_ARGSVERSION; args->addr = oargs->addr; args->addrlen = oargs->addrlen; args->sotype = oargs->sotype; args->proto = oargs->proto; args->fh = oargs->fh; args->fhsize = oargs->fhsize; args->flags = oargs->flags; args->wsize = oargs->wsize; args->rsize = oargs->rsize; args->readdirsize = oargs->readdirsize; args->timeo = oargs->timeo; args->retrans = oargs->retrans; args->maxgrouplist = oargs->maxgrouplist; args->readahead = oargs->readahead; args->deadthresh = oargs->deadthresh; args->hostname = oargs->hostname; } static void nfs_convert_diskless(void) { bcopy(&nfs_diskless.myif, &nfsv3_diskless.myif, sizeof(struct ifaliasreq)); bcopy(&nfs_diskless.mygateway, &nfsv3_diskless.mygateway, sizeof(struct sockaddr_in)); nfs_convert_oargs(&nfsv3_diskless.root_args,&nfs_diskless.root_args); if (nfsv3_diskless.root_args.flags & NFSMNT_NFSV3) { nfsv3_diskless.root_fhsize = NFSX_V3FH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V3FH); } else { nfsv3_diskless.root_fhsize = NFSX_V2FH; bcopy(nfs_diskless.root_fh, nfsv3_diskless.root_fh, NFSX_V2FH); } bcopy(&nfs_diskless.root_saddr,&nfsv3_diskless.root_saddr, sizeof(struct sockaddr_in)); bcopy(nfs_diskless.root_hostnam, nfsv3_diskless.root_hostnam, MNAMELEN); nfsv3_diskless.root_time = nfs_diskless.root_time; bcopy(nfs_diskless.my_hostnam, nfsv3_diskless.my_hostnam, MAXHOSTNAMELEN); nfs_diskless_valid = 3; } /* * nfs statfs call */ static int nfs_statfs(struct mount *mp, struct statfs *sbp, struct thread *td) { struct vnode *vp; struct nfs_statfs *sfp; caddr_t bpos, dpos; struct nfsmount *nmp = VFSTONFS(mp); int error = 0, v3 = (nmp->nm_flag & NFSMNT_NFSV3), retattr; struct mbuf *mreq, *mrep, *md, *mb; struct nfsnode *np; u_quad_t tquad; #ifndef nolint sfp = NULL; #endif error = vfs_busy(mp, LK_NOWAIT, NULL, td); if (error) return (error); error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) { vfs_unbusy(mp, td); return (error); } vp = NFSTOV(np); mtx_lock(&nmp->nm_mtx); if (v3 && (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) { mtx_unlock(&nmp->nm_mtx); (void)nfs_fsinfo(nmp, vp, td->td_ucred, td); } else mtx_unlock(&nmp->nm_mtx); nfsstats.rpccnt[NFSPROC_FSSTAT]++; mreq = nfsm_reqhead(vp, NFSPROC_FSSTAT, NFSX_FH(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_FSSTAT, td, td->td_ucred); if (v3) nfsm_postop_attr(vp, retattr); if (error) { if (mrep != NULL) m_freem(mrep); goto nfsmout; } sfp = nfsm_dissect(struct nfs_statfs *, NFSX_STATFS(v3)); mtx_lock(&nmp->nm_mtx); sbp->f_iosize = nfs_iosize(nmp); mtx_unlock(&nmp->nm_mtx); if (v3) { sbp->f_bsize = NFS_FABLKSIZE; tquad = fxdr_hyper(&sfp->sf_tbytes); sbp->f_blocks = tquad / NFS_FABLKSIZE; tquad = fxdr_hyper(&sfp->sf_fbytes); sbp->f_bfree = tquad / NFS_FABLKSIZE; tquad = fxdr_hyper(&sfp->sf_abytes); sbp->f_bavail = tquad / NFS_FABLKSIZE; sbp->f_files = (fxdr_unsigned(int32_t, sfp->sf_tfiles.nfsuquad[1]) & 0x7fffffff); sbp->f_ffree = (fxdr_unsigned(int32_t, sfp->sf_ffiles.nfsuquad[1]) & 0x7fffffff); } else { sbp->f_bsize = fxdr_unsigned(int32_t, sfp->sf_bsize); sbp->f_blocks = fxdr_unsigned(int32_t, sfp->sf_blocks); sbp->f_bfree = fxdr_unsigned(int32_t, sfp->sf_bfree); sbp->f_bavail = fxdr_unsigned(int32_t, sfp->sf_bavail); sbp->f_files = 0; sbp->f_ffree = 0; } m_freem(mrep); nfsmout: vput(vp); vfs_unbusy(mp, td); return (error); } /* * nfs version 3 fsinfo rpc call */ int nfs_fsinfo(struct nfsmount *nmp, struct vnode *vp, struct ucred *cred, struct thread *td) { struct nfsv3_fsinfo *fsp; u_int32_t pref, max; caddr_t bpos, dpos; int error = 0, retattr; struct mbuf *mreq, *mrep, *md, *mb; u_int64_t maxfsize; nfsstats.rpccnt[NFSPROC_FSINFO]++; mreq = nfsm_reqhead(vp, NFSPROC_FSINFO, NFSX_FH(1)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, 1); nfsm_request(vp, NFSPROC_FSINFO, td, cred); nfsm_postop_attr(vp, retattr); if (!error) { fsp = nfsm_dissect(struct nfsv3_fsinfo *, NFSX_V3FSINFO); pref = fxdr_unsigned(u_int32_t, fsp->fs_wtpref); mtx_lock(&nmp->nm_mtx); if (pref < nmp->nm_wsize && pref >= NFS_FABLKSIZE) nmp->nm_wsize = (pref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); max = fxdr_unsigned(u_int32_t, fsp->fs_wtmax); if (max < nmp->nm_wsize && max > 0) { nmp->nm_wsize = max & ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize == 0) nmp->nm_wsize = max; } pref = fxdr_unsigned(u_int32_t, fsp->fs_rtpref); if (pref < nmp->nm_rsize && pref >= NFS_FABLKSIZE) nmp->nm_rsize = (pref + NFS_FABLKSIZE - 1) & ~(NFS_FABLKSIZE - 1); max = fxdr_unsigned(u_int32_t, fsp->fs_rtmax); if (max < nmp->nm_rsize && max > 0) { nmp->nm_rsize = max & ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize == 0) nmp->nm_rsize = max; } pref = fxdr_unsigned(u_int32_t, fsp->fs_dtpref); if (pref < nmp->nm_readdirsize && pref >= NFS_DIRBLKSIZ) nmp->nm_readdirsize = (pref + NFS_DIRBLKSIZ - 1) & ~(NFS_DIRBLKSIZ - 1); if (max < nmp->nm_readdirsize && max > 0) { nmp->nm_readdirsize = max & ~(NFS_DIRBLKSIZ - 1); if (nmp->nm_readdirsize == 0) nmp->nm_readdirsize = max; } maxfsize = fxdr_hyper(&fsp->fs_maxfilesize); if (maxfsize > 0 && maxfsize < nmp->nm_maxfilesize) nmp->nm_maxfilesize = maxfsize; nmp->nm_mountp->mnt_stat.f_iosize = nfs_iosize(nmp); nmp->nm_state |= NFSSTA_GOTFSINFO; mtx_unlock(&nmp->nm_mtx); } m_freem(mrep); nfsmout: return (error); } /* * Mount a remote root fs via. nfs. This depends on the info in the * nfs_diskless structure that has been filled in properly by some primary * bootstrap. * It goes something like this: * - do enough of "ifconfig" by calling ifioctl() so that the system * can talk to the server * - If nfs_diskless.mygateway is filled in, use that address as * a default gateway. * - build the rootfs mount point and call mountnfs() to do the rest. * * It is assumed to be safe to read, modify, and write the nfsv3_diskless * structure, as well as other global NFS client variables here, as * nfs_mountroot() will be called once in the boot before any other NFS * client activity occurs. */ int nfs_mountroot(struct mount *mp, struct thread *td) { struct nfsv3_diskless *nd = &nfsv3_diskless; struct socket *so; struct vnode *vp; struct ifreq ir; int error, i; u_long l; char buf[128]; char *cp; #if defined(BOOTP_NFSROOT) && defined(BOOTP) bootpc_init(); /* use bootp to get nfs_diskless filled in */ #elif defined(NFS_ROOT) nfs_setup_diskless(); #endif if (nfs_diskless_valid == 0) return (-1); if (nfs_diskless_valid == 1) nfs_convert_diskless(); /* * XXX splnet, so networks will receive... */ splnet(); /* * Do enough of ifconfig(8) so that the critical net interface can * talk to the server. */ error = socreate(nd->myif.ifra_addr.sa_family, &so, nd->root_args.sotype, 0, td->td_ucred, td); if (error) panic("nfs_mountroot: socreate(%04x): %d", nd->myif.ifra_addr.sa_family, error); #if 0 /* XXX Bad idea */ /* * We might not have been told the right interface, so we pass * over the first ten interfaces of the same kind, until we get * one of them configured. */ for (i = strlen(nd->myif.ifra_name) - 1; nd->myif.ifra_name[i] >= '0' && nd->myif.ifra_name[i] <= '9'; nd->myif.ifra_name[i] ++) { error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if(!error) break; } #endif error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, td); if (error) panic("nfs_mountroot: SIOCAIFADDR: %d", error); if ((cp = getenv("boot.netif.mtu")) != NULL) { ir.ifr_mtu = strtol(cp, NULL, 10); bcopy(nd->myif.ifra_name, ir.ifr_name, IFNAMSIZ); freeenv(cp); error = ifioctl(so, SIOCSIFMTU, (caddr_t)&ir, td); if (error) printf("nfs_mountroot: SIOCSIFMTU: %d", error); } soclose(so); /* * If the gateway field is filled in, set it as the default route. * Note that pxeboot will set a default route of 0 if the route * is not set by the DHCP server. Check also for a value of 0 * to avoid panicking inappropriately in that situation. */ if (nd->mygateway.sin_len != 0 && nd->mygateway.sin_addr.s_addr != 0) { struct sockaddr_in mask, sin; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ error = rtrequest(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, RTF_UP | RTF_GATEWAY, NULL); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } /* * Create the rootfs mount point. */ nd->root_args.fh = nd->root_fh; nd->root_args.fhsize = nd->root_fhsize; l = ntohl(nd->root_saddr.sin_addr.s_addr); snprintf(buf, sizeof(buf), "%ld.%ld.%ld.%ld:%s", (l >> 24) & 0xff, (l >> 16) & 0xff, (l >> 8) & 0xff, (l >> 0) & 0xff, nd->root_hostnam); printf("NFS ROOT: %s\n", buf); nd->root_args.hostname = buf; if ((error = nfs_mountdiskless(buf, &nd->root_saddr, &nd->root_args, td, &vp, mp)) != 0) { return (error); } /* * This is not really an nfs issue, but it is much easier to * set hostname here and then let the "/etc/rc.xxx" files * mount the right /var based upon its preset value. */ + mtx_lock(&hostname_mtx); bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN); hostname[MAXHOSTNAMELEN - 1] = '\0'; for (i = 0; i < MAXHOSTNAMELEN; i++) if (hostname[i] == '\0') break; + mtx_unlock(&hostname_mtx); inittodr(ntohl(nd->root_time)); return (0); } /* * Internal version of mount system call for diskless setup. */ static int nfs_mountdiskless(char *path, struct sockaddr_in *sin, struct nfs_args *args, struct thread *td, struct vnode **vpp, struct mount *mp) { struct sockaddr *nam; int error; nam = sodupsockaddr((struct sockaddr *)sin, M_WAITOK); if ((error = mountnfs(args, mp, nam, path, vpp, td->td_ucred)) != 0) { printf("nfs_mountroot: mount %s on /: %d\n", path, error); return (error); } return (0); } static void nfs_decode_args(struct mount *mp, struct nfsmount *nmp, struct nfs_args *argp) { int s; int adjsock; int maxio; char *p; s = splnet(); /* * Set read-only flag if requested; otherwise, clear it if this is * an update. If this is not an update, then either the read-only * flag is already clear, or this is a root mount and it was set * intentionally at some previous point. */ if (vfs_getopt(mp->mnt_optnew, "ro", NULL, NULL) == 0) { MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); } else if (mp->mnt_flag & MNT_UPDATE) { MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); } /* * Silently clear NFSMNT_NOCONN if it's a TCP mount, it makes * no sense in that context. Also, set up appropriate retransmit * and soft timeout behavior. */ if (argp->sotype == SOCK_STREAM) { nmp->nm_flag &= ~NFSMNT_NOCONN; nmp->nm_flag |= NFSMNT_DUMBTIMR; nmp->nm_timeo = NFS_MAXTIMEO; nmp->nm_retry = NFS_RETRANS_TCP; } /* Also clear RDIRPLUS if not NFSv3, it crashes some servers */ if ((argp->flags & NFSMNT_NFSV3) == 0) nmp->nm_flag &= ~NFSMNT_RDIRPLUS; /* Re-bind if rsrvd port requested and wasn't on one */ adjsock = !(nmp->nm_flag & NFSMNT_RESVPORT) && (argp->flags & NFSMNT_RESVPORT); /* Also re-bind if we're switching to/from a connected UDP socket */ adjsock |= ((nmp->nm_flag & NFSMNT_NOCONN) != (argp->flags & NFSMNT_NOCONN)); /* Update flags atomically. Don't change the lock bits. */ nmp->nm_flag = argp->flags | nmp->nm_flag; splx(s); if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) { nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10; if (nmp->nm_timeo < NFS_MINTIMEO) nmp->nm_timeo = NFS_MINTIMEO; else if (nmp->nm_timeo > NFS_MAXTIMEO) nmp->nm_timeo = NFS_MAXTIMEO; } if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) { nmp->nm_retry = argp->retrans; if (nmp->nm_retry > NFS_MAXREXMIT) nmp->nm_retry = NFS_MAXREXMIT; } if (argp->flags & NFSMNT_NFSV3) { if (argp->sotype == SOCK_DGRAM) maxio = NFS_MAXDGRAMDATA; else maxio = NFS_MAXDATA; } else maxio = NFS_V2MAXDATA; if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) { nmp->nm_wsize = argp->wsize; /* Round down to multiple of blocksize */ nmp->nm_wsize &= ~(NFS_FABLKSIZE - 1); if (nmp->nm_wsize <= 0) nmp->nm_wsize = NFS_FABLKSIZE; } if (nmp->nm_wsize > maxio) nmp->nm_wsize = maxio; if (nmp->nm_wsize > MAXBSIZE) nmp->nm_wsize = MAXBSIZE; if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) { nmp->nm_rsize = argp->rsize; /* Round down to multiple of blocksize */ nmp->nm_rsize &= ~(NFS_FABLKSIZE - 1); if (nmp->nm_rsize <= 0) nmp->nm_rsize = NFS_FABLKSIZE; } if (nmp->nm_rsize > maxio) nmp->nm_rsize = maxio; if (nmp->nm_rsize > MAXBSIZE) nmp->nm_rsize = MAXBSIZE; if ((argp->flags & NFSMNT_READDIRSIZE) && argp->readdirsize > 0) { nmp->nm_readdirsize = argp->readdirsize; } if (nmp->nm_readdirsize > maxio) nmp->nm_readdirsize = maxio; if (nmp->nm_readdirsize > nmp->nm_rsize) nmp->nm_readdirsize = nmp->nm_rsize; if ((argp->flags & NFSMNT_ACREGMIN) && argp->acregmin >= 0) nmp->nm_acregmin = argp->acregmin; else nmp->nm_acregmin = NFS_MINATTRTIMO; if ((argp->flags & NFSMNT_ACREGMAX) && argp->acregmax >= 0) nmp->nm_acregmax = argp->acregmax; else nmp->nm_acregmax = NFS_MAXATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMIN) && argp->acdirmin >= 0) nmp->nm_acdirmin = argp->acdirmin; else nmp->nm_acdirmin = NFS_MINDIRATTRTIMO; if ((argp->flags & NFSMNT_ACDIRMAX) && argp->acdirmax >= 0) nmp->nm_acdirmax = argp->acdirmax; else nmp->nm_acdirmax = NFS_MAXDIRATTRTIMO; if (nmp->nm_acdirmin > nmp->nm_acdirmax) nmp->nm_acdirmin = nmp->nm_acdirmax; if (nmp->nm_acregmin > nmp->nm_acregmax) nmp->nm_acregmin = nmp->nm_acregmax; if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0) { if (argp->maxgrouplist <= NFS_MAXGRPS) nmp->nm_numgrps = argp->maxgrouplist; else nmp->nm_numgrps = NFS_MAXGRPS; } if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0) { if (argp->readahead <= NFS_MAXRAHEAD) nmp->nm_readahead = argp->readahead; else nmp->nm_readahead = NFS_MAXRAHEAD; } if ((argp->flags & NFSMNT_WCOMMITSIZE) && argp->wcommitsize >= 0) { if (argp->wcommitsize < nmp->nm_wsize) nmp->nm_wcommitsize = nmp->nm_wsize; else nmp->nm_wcommitsize = argp->wcommitsize; } if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 0) { if (argp->deadthresh <= NFS_MAXDEADTHRESH) nmp->nm_deadthresh = argp->deadthresh; else nmp->nm_deadthresh = NFS_MAXDEADTHRESH; } adjsock |= ((nmp->nm_sotype != argp->sotype) || (nmp->nm_soproto != argp->proto)); nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; if (nmp->nm_so && adjsock) { nfs_safedisconnect(nmp); if (nmp->nm_sotype == SOCK_DGRAM) while (nfs_connect(nmp, NULL)) { printf("nfs_args: retrying connect\n"); (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0); } } strlcpy(nmp->nm_hostname, argp->hostname, sizeof(nmp->nm_hostname)); p = strchr(nmp->nm_hostname, ':'); if (p) *p = '\0'; } static const char *nfs_opts[] = { "from", "nfs_args", "noatime", "noexec", "suiddir", "nosuid", "nosymfollow", "union", "noclusterr", "noclusterw", "multilabel", "acls", "force", "update", "async", "dumbtimer", "noconn", "nolockd", "intr", "rdirplus", "resvport", "readdirsize", "soft", "hard", "mntudp", "tcp", "wsize", "rsize", "retrans", "acregmin", "acregmax", "acdirmin", "acdirmax", NULL }; /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the sockargs() call because sockargs() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_mount(struct mount *mp, struct thread *td) { struct nfs_args args = { .version = NFS_ARGSVERSION, .addr = NULL, .addrlen = sizeof (struct sockaddr_in), .sotype = SOCK_STREAM, .proto = 0, .fh = NULL, .fhsize = 0, .flags = NFSMNT_RESVPORT, .wsize = NFS_WSIZE, .rsize = NFS_RSIZE, .readdirsize = NFS_READDIRSIZE, .timeo = 10, .retrans = NFS_RETRANS, .maxgrouplist = NFS_MAXGRPS, .readahead = NFS_DEFRAHEAD, .wcommitsize = 0, /* was: NQ_DEFLEASE */ .deadthresh = NFS_MAXDEADTHRESH, /* was: NQ_DEADTHRESH */ .hostname = NULL, /* args version 4 */ .acregmin = NFS_MINATTRTIMO, .acregmax = NFS_MAXATTRTIMO, .acdirmin = NFS_MINDIRATTRTIMO, .acdirmax = NFS_MAXDIRATTRTIMO, }; int error; struct sockaddr *nam; struct vnode *vp; char hst[MNAMELEN]; size_t len; u_char nfh[NFSX_V3FHMAX]; if (vfs_filteropt(mp->mnt_optnew, nfs_opts)) { error = EINVAL; goto out; } if ((mp->mnt_flag & (MNT_ROOTFS | MNT_UPDATE)) == MNT_ROOTFS) { error = nfs_mountroot(mp, td); goto out; } error = vfs_copyopt(mp->mnt_optnew, "nfs_args", &args, sizeof args); if (error) goto out; if (args.version != NFS_ARGSVERSION) { error = EPROGMISMATCH; goto out; } if (mp->mnt_flag & MNT_UPDATE) { struct nfsmount *nmp = VFSTONFS(mp); if (nmp == NULL) { error = EIO; goto out; } /* * When doing an update, we can't change from or to * v3, switch lockd strategies or change cookie translation */ args.flags = (args.flags & ~(NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)) | (nmp->nm_flag & (NFSMNT_NFSV3 | NFSMNT_NOLOCKD /*|NFSMNT_XLATECOOKIE*/)); nfs_decode_args(mp, nmp, &args); goto out; } /* * Make the nfs_ip_paranoia sysctl serve as the default connection * or no-connection mode for those protocols that support * no-connection mode (the flag will be cleared later for protocols * that do not support no-connection mode). This will allow a client * to receive replies from a different IP then the request was * sent to. Note: default value for nfs_ip_paranoia is 1 (paranoid), * not 0. */ if (nfs_ip_paranoia == 0) args.flags |= NFSMNT_NOCONN; if (args.fhsize < 0 || args.fhsize > NFSX_V3FHMAX) { error = EINVAL; goto out; } error = copyin((caddr_t)args.fh, (caddr_t)nfh, args.fhsize); if (error) goto out; error = copyinstr(args.hostname, hst, MNAMELEN-1, &len); if (error) goto out; bzero(&hst[len], MNAMELEN - len); /* sockargs() call must be after above copyin() calls */ error = getsockaddr(&nam, (caddr_t)args.addr, args.addrlen); if (error) goto out; args.fh = nfh; error = mountnfs(&args, mp, nam, hst, &vp, td->td_ucred); out: if (!error) { MNT_ILOCK(mp); mp->mnt_kern_flag |= (MNTK_MPSAFE|MNTK_LOOKUP_SHARED); MNT_IUNLOCK(mp); } return (error); } /* * VFS Operations. * * mount system call * It seems a bit dumb to copyinstr() the host and path here and then * bcopy() them in mountnfs(), but I wanted to detect errors before * doing the sockargs() call because sockargs() allocates an mbuf and * an error after that means that I have to release the mbuf. */ /* ARGSUSED */ static int nfs_cmount(struct mntarg *ma, void *data, int flags, struct thread *td) { int error; struct nfs_args args; error = copyin(data, &args, sizeof (struct nfs_args)); if (error) return error; ma = mount_arg(ma, "nfs_args", &args, sizeof args); error = kernel_mount(ma, flags); return (error); } /* * Common code for mount and mountroot */ static int mountnfs(struct nfs_args *argp, struct mount *mp, struct sockaddr *nam, char *hst, struct vnode **vpp, struct ucred *cred) { struct nfsmount *nmp; struct nfsnode *np; int error; struct vattr attrs; if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONFS(mp); printf("%s: MNT_UPDATE is no longer handled here\n", __func__); FREE(nam, M_SONAME); return (0); } else { nmp = uma_zalloc(nfsmount_zone, M_WAITOK); bzero((caddr_t)nmp, sizeof (struct nfsmount)); TAILQ_INIT(&nmp->nm_bufq); mp->mnt_data = nmp; } vfs_getnewfsid(mp); nmp->nm_mountp = mp; mtx_init(&nmp->nm_mtx, "NFSmount lock", NULL, MTX_DEF); /* * V2 can only handle 32 bit filesizes. A 4GB-1 limit may be too * high, depending on whether we end up with negative offsets in * the client or server somewhere. 2GB-1 may be safer. * * For V3, nfs_fsinfo will adjust this as necessary. Assume maximum * that we can handle until we find out otherwise. * XXX Our "safe" limit on the client is what we can store in our * buffer cache using signed(!) block numbers. */ if ((argp->flags & NFSMNT_NFSV3) == 0) nmp->nm_maxfilesize = 0xffffffffLL; else nmp->nm_maxfilesize = (u_int64_t)0x80000000 * DEV_BSIZE - 1; nmp->nm_timeo = NFS_TIMEO; nmp->nm_retry = NFS_RETRANS; if ((argp->flags & NFSMNT_NFSV3) && argp->sotype == SOCK_STREAM) { nmp->nm_wsize = nmp->nm_rsize = NFS_MAXDATA; } else { nmp->nm_wsize = NFS_WSIZE; nmp->nm_rsize = NFS_RSIZE; } nmp->nm_wcommitsize = hibufspace / (desiredvnodes / 1000); nmp->nm_readdirsize = NFS_READDIRSIZE; nmp->nm_numgrps = NFS_MAXGRPS; nmp->nm_readahead = NFS_DEFRAHEAD; nmp->nm_deadthresh = NFS_MAXDEADTHRESH; nmp->nm_tprintf_delay = nfs_tprintf_delay; if (nmp->nm_tprintf_delay < 0) nmp->nm_tprintf_delay = 0; nmp->nm_tprintf_initial_delay = nfs_tprintf_initial_delay; if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; nmp->nm_fhsize = argp->fhsize; bcopy((caddr_t)argp->fh, (caddr_t)nmp->nm_fh, argp->fhsize); bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN); nmp->nm_nam = nam; /* Set up the sockets and per-host congestion */ nmp->nm_sotype = argp->sotype; nmp->nm_soproto = argp->proto; nmp->nm_rpcops = &nfs_rpcops; nfs_decode_args(mp, nmp, argp); /* * For Connection based sockets (TCP,...) defer the connect until * the first request, in case the server is not responding. */ if (nmp->nm_sotype == SOCK_DGRAM && (error = nfs_connect(nmp, NULL))) goto bad; /* * This is silly, but it has to be set so that vinifod() works. * We do not want to do an nfs_statfs() here since we can get * stuck on a dead server and we are holding a lock on the mount * point. */ mtx_lock(&nmp->nm_mtx); mp->mnt_stat.f_iosize = nfs_iosize(nmp); mtx_unlock(&nmp->nm_mtx); /* * A reference count is needed on the nfsnode representing the * remote root. If this object is not persistent, then backward * traversals of the mount point (i.e. "..") will not work if * the nfsnode gets flushed out of the cache. Ufs does not have * this problem, because one can identify root inodes by their * number == ROOTINO (2). */ error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, LK_EXCLUSIVE); if (error) goto bad; *vpp = NFSTOV(np); /* * Get file attributes and transfer parameters for the * mountpoint. This has the side effect of filling in * (*vpp)->v_type with the correct value. */ if (argp->flags & NFSMNT_NFSV3) nfs_fsinfo(nmp, *vpp, curthread->td_ucred, curthread); else VOP_GETATTR(*vpp, &attrs, curthread->td_ucred, curthread); /* * Lose the lock but keep the ref. */ VOP_UNLOCK(*vpp, 0); return (0); bad: nfs_disconnect(nmp); mtx_destroy(&nmp->nm_mtx); uma_zfree(nfsmount_zone, nmp); FREE(nam, M_SONAME); return (error); } /* * unmount system call */ static int nfs_unmount(struct mount *mp, int mntflags, struct thread *td) { struct nfsmount *nmp; int error, flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; nmp = VFSTONFS(mp); /* * Goes something like this.. * - Call vflush() to clear out vnodes for this filesystem * - Close the socket * - Free up the data structures */ /* In the forced case, cancel any outstanding requests. */ if (flags & FORCECLOSE) { error = nfs_nmcancelreqs(nmp); if (error) goto out; } /* We hold 1 extra ref on the root vnode; see comment in mountnfs(). */ error = vflush(mp, 1, flags, td); if (error) goto out; /* * We are now committed to the unmount. */ nfs_disconnect(nmp); FREE(nmp->nm_nam, M_SONAME); mtx_destroy(&nmp->nm_mtx); uma_zfree(nfsmount_zone, nmp); out: return (error); } /* * Return root of a filesystem */ static int nfs_root(struct mount *mp, int flags, struct vnode **vpp, struct thread *td) { struct vnode *vp; struct nfsmount *nmp; struct nfsnode *np; int error; nmp = VFSTONFS(mp); error = nfs_nget(mp, (nfsfh_t *)nmp->nm_fh, nmp->nm_fhsize, &np, flags); if (error) return error; vp = NFSTOV(np); /* * Get transfer parameters and attributes for root vnode once. */ mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_GOTFSINFO) == 0 && (nmp->nm_flag & NFSMNT_NFSV3)) { mtx_unlock(&nmp->nm_mtx); nfs_fsinfo(nmp, vp, curthread->td_ucred, curthread); } else mtx_unlock(&nmp->nm_mtx); if (vp->v_type == VNON) vp->v_type = VDIR; vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } /* * Flush out the buffer cache */ /* ARGSUSED */ static int nfs_sync(struct mount *mp, int waitfor, struct thread *td) { struct vnode *vp, *mvp; int error, allerror = 0; /* * Force stale buffer cache information to be flushed. */ MNT_ILOCK(mp); loop: MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); MNT_IUNLOCK(mp); /* XXX Racy bv_cnt check. */ if (VOP_ISLOCKED(vp) || vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY) { VI_UNLOCK(vp); MNT_ILOCK(mp); continue; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { MNT_ILOCK(mp); MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); goto loop; } error = VOP_FSYNC(vp, waitfor, td); if (error) allerror = error; VOP_UNLOCK(vp, 0); vrele(vp); MNT_ILOCK(mp); } MNT_IUNLOCK(mp); return (allerror); } static int nfs_sysctl(struct mount *mp, fsctlop_t op, struct sysctl_req *req) { struct nfsmount *nmp = VFSTONFS(mp); struct vfsquery vq; int error; bzero(&vq, sizeof(vq)); switch (op) { #if 0 case VFS_CTL_NOLOCKS: val = (nmp->nm_flag & NFSMNT_NOLOCKS) ? 1 : 0; if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &val, sizeof(val)); if (error) return (error); } if (req->newptr != NULL) { error = SYSCTL_IN(req, &val, sizeof(val)); if (error) return (error); if (val) nmp->nm_flag |= NFSMNT_NOLOCKS; else nmp->nm_flag &= ~NFSMNT_NOLOCKS; } break; #endif case VFS_CTL_QUERY: mtx_lock(&nmp->nm_mtx); if (nmp->nm_state & NFSSTA_TIMEO) vq.vq_flags |= VQ_NOTRESP; mtx_unlock(&nmp->nm_mtx); #if 0 if (!(nmp->nm_flag & NFSMNT_NOLOCKS) && (nmp->nm_state & NFSSTA_LOCKTIMEO)) vq.vq_flags |= VQ_NOTRESPLOCK; #endif error = SYSCTL_OUT(req, &vq, sizeof(vq)); break; case VFS_CTL_TIMEO: if (req->oldptr != NULL) { error = SYSCTL_OUT(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); } if (req->newptr != NULL) { error = vfs_suser(mp, req->td); if (error) return (error); error = SYSCTL_IN(req, &nmp->nm_tprintf_initial_delay, sizeof(nmp->nm_tprintf_initial_delay)); if (error) return (error); if (nmp->nm_tprintf_initial_delay < 0) nmp->nm_tprintf_initial_delay = 0; } break; default: return (ENOTSUP); } return (0); } Index: head/sys/nlm/nlm_advlock.c =================================================================== --- head/sys/nlm/nlm_advlock.c (revision 180290) +++ head/sys/nlm/nlm_advlock.c (revision 180291) @@ -1,1235 +1,1239 @@ /*- * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ * Authors: Doug Rabson * Developed with Red Inc: Alfred Perlstein * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * We need to keep track of the svid values used for F_FLOCK locks. */ struct nlm_file_svid { int ns_refs; /* thread count + 1 if active */ int ns_svid; /* on-the-wire SVID for this file */ struct ucred *ns_ucred; /* creds to use for lock recovery */ void *ns_id; /* local struct file pointer */ bool_t ns_active; /* TRUE if we own a lock */ LIST_ENTRY(nlm_file_svid) ns_link; }; LIST_HEAD(nlm_file_svid_list, nlm_file_svid); #define NLM_SVID_HASH_SIZE 256 struct nlm_file_svid_list nlm_file_svids[NLM_SVID_HASH_SIZE]; struct mtx nlm_svid_lock; static struct unrhdr *nlm_svid_allocator; static volatile u_int nlm_xid = 1; static int nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim); static int nlm_clearlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size); static int nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size); static int nlm_map_status(nlm4_stats stat); static struct nlm_file_svid *nlm_find_svid(void *id); static void nlm_free_svid(struct nlm_file_svid *nf); static int nlm_init_lock(struct flock *fl, int flags, int svid, rpcvers_t vers, size_t fhlen, void *fh, off_t size, struct nlm4_lock *lock, char oh_space[32]); static void nlm_client_init(void *dummy) { int i; mtx_init(&nlm_svid_lock, "NLM svid lock", NULL, MTX_DEF); nlm_svid_allocator = new_unrhdr(PID_MAX + 2, INT_MAX, &nlm_svid_lock); for (i = 0; i < NLM_SVID_HASH_SIZE; i++) LIST_INIT(&nlm_file_svids[i]); } SYSINIT(nlm_client_init, SI_SUB_LOCK, SI_ORDER_FIRST, nlm_client_init, NULL); static int nlm_msg(struct thread *td, const char *server, const char *msg, int error) { struct proc *p; p = td ? td->td_proc : NULL; if (error) { tprintf(p, LOG_INFO, "nfs server %s: %s, error %d\n", server, msg, error); } else { tprintf(p, LOG_INFO, "nfs server %s: %s\n", server, msg); } return (0); } struct nlm_feedback_arg { bool_t nf_printed; struct nfsmount *nf_nmp; }; static void nlm_down(struct nlm_feedback_arg *nf, struct thread *td, const char *msg, int error) { struct nfsmount *nmp = nf->nf_nmp; if (nmp == NULL) return; mtx_lock(&nmp->nm_mtx); if (!(nmp->nm_state & NFSSTA_LOCKTIMEO)) { nmp->nm_state |= NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 0); } else { mtx_unlock(&nmp->nm_mtx); } nf->nf_printed = TRUE; nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, error); } static void nlm_up(struct nlm_feedback_arg *nf, struct thread *td, const char *msg) { struct nfsmount *nmp = nf->nf_nmp; if (!nf->nf_printed) return; nlm_msg(td, nmp->nm_mountp->mnt_stat.f_mntfromname, msg, 0); mtx_lock(&nmp->nm_mtx); if (nmp->nm_state & NFSSTA_LOCKTIMEO) { nmp->nm_state &= ~NFSSTA_LOCKTIMEO; mtx_unlock(&nmp->nm_mtx); vfs_event_signal(&nmp->nm_mountp->mnt_stat.f_fsid, VQ_NOTRESPLOCK, 1); } else { mtx_unlock(&nmp->nm_mtx); } } static void nlm_feedback(int type, int proc, void *arg) { struct thread *td = curthread; struct nlm_feedback_arg *nf = (struct nlm_feedback_arg *) arg; switch (type) { case FEEDBACK_REXMIT2: case FEEDBACK_RECONNECT: nlm_down(nf, td, "lockd not responding", 0); break; case FEEDBACK_OK: nlm_up(nf, td, "lockd is alive again"); break; } } /* * nlm_advlock -- * NFS advisory byte-level locks. */ static int nlm_advlock_internal(struct vnode *vp, void *id, int op, struct flock *fl, int flags, bool_t reclaim, bool_t unlock_vp) { struct thread *td = curthread; struct nfsmount *nmp; struct nfsnode *np; off_t size; size_t fhlen; union nfsfh fh; struct sockaddr *sa; struct sockaddr_storage ss; char servername[MNAMELEN]; struct timeval timo; int retries; rpcvers_t vers; struct nlm_host *host; struct rpc_callextra ext; struct nlm_feedback_arg nf; AUTH *auth; struct ucred *cred; struct nlm_file_svid *ns; int svid; int error; ASSERT_VOP_LOCKED(vp, "nlm_advlock_1"); /* * Push any pending writes to the server and flush our cache * so that if we are contending with another machine for a * file, we get whatever they wrote and vice-versa. */ if (op == F_SETLK || op == F_UNLCK) nfs_vinvalbuf(vp, V_SAVE, td, 1); np = VTONFS(vp); nmp = VFSTONFS(vp->v_mount); size = np->n_size; sa = nmp->nm_nam; memcpy(&ss, sa, sa->sa_len); sa = (struct sockaddr *) &ss; + mtx_lock(&hostname_mtx); strcpy(servername, nmp->nm_hostname); + mtx_unlock(&hostname_mtx); fhlen = np->n_fhsize; memcpy(&fh.fh_bytes, np->n_fhp, fhlen); timo.tv_sec = nmp->nm_timeo / NFS_HZ; timo.tv_usec = (nmp->nm_timeo % NFS_HZ) * (1000000 / NFS_HZ); if (NFS_ISV3(vp)) vers = NLM_VERS4; else vers = NLM_VERS; if (nmp->nm_flag & NFSMNT_SOFT) retries = nmp->nm_retry; else retries = INT_MAX; if (unlock_vp) VOP_UNLOCK(vp, 0); /* * We need to switch to mount-point creds so that we can send * packets from a privileged port. */ cred = td->td_ucred; td->td_ucred = vp->v_mount->mnt_cred; host = nlm_find_host_by_name(servername, sa, vers); auth = authunix_create(cred); memset(&ext, 0, sizeof(ext)); nf.nf_printed = FALSE; nf.nf_nmp = nmp; ext.rc_auth = auth; ext.rc_feedback = nlm_feedback; ext.rc_feedback_arg = &nf; ns = NULL; if (flags & F_FLOCK) { ns = nlm_find_svid(id); KASSERT(fl->l_start == 0 && fl->l_len == 0, ("F_FLOCK lock requests must be whole-file locks")); if (!ns->ns_ucred) { /* * Remember the creds used for locking in case * we need to recover the lock later. */ ns->ns_ucred = crdup(cred); } svid = ns->ns_svid; } else if (flags & F_REMOTE) { /* * If we are recovering after a server restart or * trashing locks on a force unmount, use the same * svid as last time. */ svid = fl->l_pid; } else { svid = ((struct proc *) id)->p_pid; } switch(op) { case F_SETLK: if ((flags & (F_FLOCK|F_WAIT)) == (F_FLOCK|F_WAIT) && fl->l_type == F_WRLCK) { /* * The semantics for flock(2) require that any * shared lock on the file must be released * before an exclusive lock is granted. The * local locking code interprets this by * unlocking the file before sleeping on a * blocked exclusive lock request. We * approximate this by first attempting * non-blocking and if that fails, we unlock * the file and block. */ error = nlm_setlock(host, &ext, vers, &timo, retries, vp, F_SETLK, fl, flags & ~F_WAIT, svid, fhlen, &fh.fh_bytes, size, reclaim); if (error == EAGAIN) { fl->l_type = F_UNLCK; error = nlm_clearlock(host, &ext, vers, &timo, retries, vp, F_UNLCK, fl, flags, svid, fhlen, &fh.fh_bytes, size); fl->l_type = F_WRLCK; if (!error) { mtx_lock(&nlm_svid_lock); if (ns->ns_active) { ns->ns_refs--; ns->ns_active = FALSE; } mtx_unlock(&nlm_svid_lock); flags |= F_WAIT; error = nlm_setlock(host, &ext, vers, &timo, retries, vp, F_SETLK, fl, flags, svid, fhlen, &fh.fh_bytes, size, reclaim); } } } else { error = nlm_setlock(host, &ext, vers, &timo, retries, vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size, reclaim); } if (!error && ns) { mtx_lock(&nlm_svid_lock); if (!ns->ns_active) { /* * Add one to the reference count to * hold onto the SVID for the lifetime * of the lock. Note that since * F_FLOCK only supports whole-file * locks, there can only be one active * lock for this SVID. */ ns->ns_refs++; ns->ns_active = TRUE; } mtx_unlock(&nlm_svid_lock); } break; case F_UNLCK: error = nlm_clearlock(host, &ext, vers, &timo, retries, vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size); if (!error && ns) { mtx_lock(&nlm_svid_lock); if (ns->ns_active) { ns->ns_refs--; ns->ns_active = FALSE; } mtx_unlock(&nlm_svid_lock); } break; case F_GETLK: error = nlm_getlock(host, &ext, vers, &timo, retries, vp, op, fl, flags, svid, fhlen, &fh.fh_bytes, size); break; default: error = EINVAL; break; } if (ns) nlm_free_svid(ns); td->td_ucred = cred; AUTH_DESTROY(auth); nlm_host_release(host); return (error); } int nlm_advlock(struct vop_advlock_args *ap) { return (nlm_advlock_internal(ap->a_vp, ap->a_id, ap->a_op, ap->a_fl, ap->a_flags, FALSE, TRUE)); } /* * Set the creds of td to the creds of the given lock's owner. The new * creds reference count will be incremented via crhold. The caller is * responsible for calling crfree and restoring td's original creds. */ static void nlm_set_creds_for_lock(struct thread *td, struct flock *fl) { int i; struct nlm_file_svid *ns; struct proc *p; struct ucred *cred; cred = NULL; if (fl->l_pid > PID_MAX) { /* * If this was originally a F_FLOCK-style lock, we * recorded the creds used when it was originally * locked in the nlm_file_svid structure. */ mtx_lock(&nlm_svid_lock); for (i = 0; i < NLM_SVID_HASH_SIZE; i++) { for (ns = LIST_FIRST(&nlm_file_svids[i]); ns; ns = LIST_NEXT(ns, ns_link)) { if (ns->ns_svid == fl->l_pid) { cred = crhold(ns->ns_ucred); break; } } } mtx_unlock(&nlm_svid_lock); } else { /* * This lock is owned by a process. Get a reference to * the process creds. */ p = pfind(fl->l_pid); if (p) { cred = crhold(p->p_ucred); PROC_UNLOCK(p); } } /* * If we can't find a cred, fall back on the recovery * thread's cred. */ if (!cred) { cred = crhold(td->td_ucred); } td->td_ucred = cred; } static int nlm_reclaim_free_lock(struct vnode *vp, struct flock *fl, void *arg) { struct flock newfl; struct thread *td = curthread; struct ucred *oldcred; int error; newfl = *fl; newfl.l_type = F_UNLCK; oldcred = td->td_ucred; nlm_set_creds_for_lock(td, &newfl); error = nlm_advlock_internal(vp, NULL, F_UNLCK, &newfl, F_REMOTE, FALSE, FALSE); crfree(td->td_ucred); td->td_ucred = oldcred; return (error); } int nlm_reclaim(struct vop_reclaim_args *ap) { nlm_cancel_wait(ap->a_vp); lf_iteratelocks_vnode(ap->a_vp, nlm_reclaim_free_lock, NULL); return (0); } struct nlm_recovery_context { struct nlm_host *nr_host; /* host we are recovering */ int nr_state; /* remote NSM state for recovery */ }; static int nlm_client_recover_lock(struct vnode *vp, struct flock *fl, void *arg) { struct nlm_recovery_context *nr = (struct nlm_recovery_context *) arg; struct thread *td = curthread; struct ucred *oldcred; int state, error; /* * If the remote NSM state changes during recovery, the host * must have rebooted a second time. In that case, we must * restart the recovery. */ state = nlm_host_get_state(nr->nr_host); if (nr->nr_state != state) return (ERESTART); error = vn_lock(vp, LK_SHARED); if (error) return (error); oldcred = td->td_ucred; nlm_set_creds_for_lock(td, fl); error = nlm_advlock_internal(vp, NULL, F_SETLK, fl, F_REMOTE, TRUE, TRUE); crfree(td->td_ucred); td->td_ucred = oldcred; return (error); } void nlm_client_recovery(struct nlm_host *host) { struct nlm_recovery_context nr; int sysid, error; sysid = NLM_SYSID_CLIENT | nlm_host_get_sysid(host); do { nr.nr_host = host; nr.nr_state = nlm_host_get_state(host); error = lf_iteratelocks_sysid(sysid, nlm_client_recover_lock, &nr); } while (error == ERESTART); } static void nlm_convert_to_nlm_lock(struct nlm_lock *dst, struct nlm4_lock *src) { dst->caller_name = src->caller_name; dst->fh = src->fh; dst->oh = src->oh; dst->svid = src->svid; dst->l_offset = src->l_offset; dst->l_len = src->l_len; } static void nlm_convert_to_nlm4_holder(struct nlm4_holder *dst, struct nlm_holder *src) { dst->exclusive = src->exclusive; dst->svid = src->svid; dst->oh = src->oh; dst->l_offset = src->l_offset; dst->l_len = src->l_len; } static void nlm_convert_to_nlm4_res(struct nlm4_res *dst, struct nlm_res *src) { dst->cookie = src->cookie; dst->stat.stat = (enum nlm4_stats) src->stat.stat; } static enum clnt_stat nlm_test_rpc(rpcvers_t vers, nlm4_testargs *args, nlm4_testres *res, CLIENT *client, struct rpc_callextra *ext, struct timeval timo) { if (vers == NLM_VERS4) { return nlm4_test_4(args, res, client, ext, timo); } else { nlm_testargs args1; nlm_testres res1; enum clnt_stat stat; args1.cookie = args->cookie; args1.exclusive = args->exclusive; nlm_convert_to_nlm_lock(&args1.alock, &args->alock); memset(&res1, 0, sizeof(res1)); stat = nlm_test_1(&args1, &res1, client, ext, timo); if (stat == RPC_SUCCESS) { res->cookie = res1.cookie; res->stat.stat = (enum nlm4_stats) res1.stat.stat; if (res1.stat.stat == nlm_denied) nlm_convert_to_nlm4_holder( &res->stat.nlm4_testrply_u.holder, &res1.stat.nlm_testrply_u.holder); } return (stat); } } static enum clnt_stat nlm_lock_rpc(rpcvers_t vers, nlm4_lockargs *args, nlm4_res *res, CLIENT *client, struct rpc_callextra *ext, struct timeval timo) { if (vers == NLM_VERS4) { return nlm4_lock_4(args, res, client, ext, timo); } else { nlm_lockargs args1; nlm_res res1; enum clnt_stat stat; args1.cookie = args->cookie; args1.block = args->block; args1.exclusive = args->exclusive; nlm_convert_to_nlm_lock(&args1.alock, &args->alock); args1.reclaim = args->reclaim; args1.state = args->state; memset(&res1, 0, sizeof(res1)); stat = nlm_lock_1(&args1, &res1, client, ext, timo); if (stat == RPC_SUCCESS) { nlm_convert_to_nlm4_res(res, &res1); } return (stat); } } static enum clnt_stat nlm_cancel_rpc(rpcvers_t vers, nlm4_cancargs *args, nlm4_res *res, CLIENT *client, struct rpc_callextra *ext, struct timeval timo) { if (vers == NLM_VERS4) { return nlm4_cancel_4(args, res, client, ext, timo); } else { nlm_cancargs args1; nlm_res res1; enum clnt_stat stat; args1.cookie = args->cookie; args1.block = args->block; args1.exclusive = args->exclusive; nlm_convert_to_nlm_lock(&args1.alock, &args->alock); memset(&res1, 0, sizeof(res1)); stat = nlm_cancel_1(&args1, &res1, client, ext, timo); if (stat == RPC_SUCCESS) { nlm_convert_to_nlm4_res(res, &res1); } return (stat); } } static enum clnt_stat nlm_unlock_rpc(rpcvers_t vers, nlm4_unlockargs *args, nlm4_res *res, CLIENT *client, struct rpc_callextra *ext, struct timeval timo) { if (vers == NLM_VERS4) { return nlm4_unlock_4(args, res, client, ext, timo); } else { nlm_unlockargs args1; nlm_res res1; enum clnt_stat stat; args1.cookie = args->cookie; nlm_convert_to_nlm_lock(&args1.alock, &args->alock); memset(&res1, 0, sizeof(res1)); stat = nlm_unlock_1(&args1, &res1, client, ext, timo); if (stat == RPC_SUCCESS) { nlm_convert_to_nlm4_res(res, &res1); } return (stat); } } /* * Called after a lock request (set or clear) succeeded. We record the * details in the local lock manager. Note that since the remote * server has granted the lock, we can be sure that it doesn't * conflict with any other locks we have in the local lock manager. * * Since it is possible that host may also make NLM client requests to * our NLM server, we use a different sysid value to record our own * client locks. * * Note that since it is possible for us to receive replies from the * server in a different order than the locks were granted (e.g. if * many local threads are contending for the same lock), we must use a * blocking operation when registering with the local lock manager. * We expect that any actual wait will be rare and short hence we * ignore signals for this. */ static void nlm_record_lock(struct vnode *vp, int op, struct flock *fl, int svid, int sysid, off_t size) { struct vop_advlockasync_args a; struct flock newfl; int error; a.a_vp = vp; a.a_id = NULL; a.a_op = op; a.a_fl = &newfl; a.a_flags = F_REMOTE|F_WAIT|F_NOINTR; a.a_task = NULL; a.a_cookiep = NULL; newfl.l_start = fl->l_start; newfl.l_len = fl->l_len; newfl.l_type = fl->l_type; newfl.l_whence = fl->l_whence; newfl.l_pid = svid; newfl.l_sysid = NLM_SYSID_CLIENT | sysid; error = lf_advlockasync(&a, &vp->v_lockf, size); KASSERT(error == 0, ("Failed to register NFS lock locally - error=%d", error)); } static int nlm_setlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size, bool_t reclaim) { struct nlm4_lockargs args; char oh_space[32]; struct nlm4_res res; u_int xid; CLIENT *client; enum clnt_stat stat; int retry, block, exclusive; void *wait_handle = NULL; int error; memset(&args, 0, sizeof(args)); memset(&res, 0, sizeof(res)); block = (flags & F_WAIT) ? TRUE : FALSE; exclusive = (fl->l_type == F_WRLCK); error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, &args.alock, oh_space); if (error) return (error); args.block = block; args.exclusive = exclusive; args.reclaim = reclaim; args.state = nlm_nsm_state; retry = 5*hz; for (;;) { client = nlm_host_get_rpc(host); if (!client) return (ENOLCK); /* XXX retry? */ if (block) wait_handle = nlm_register_wait_lock(&args.alock, vp); xid = atomic_fetchadd_int(&nlm_xid, 1); args.cookie.n_len = sizeof(xid); args.cookie.n_bytes = (char*) &xid; stat = nlm_lock_rpc(vers, &args, &res, client, ext, *timo); CLNT_RELEASE(client); if (stat != RPC_SUCCESS) { if (block) nlm_deregister_wait_lock(wait_handle); if (retries) { retries--; continue; } return (EINVAL); } /* * Free res.cookie. */ xdr_free((xdrproc_t) xdr_nlm4_res, &res); if (block && res.stat.stat != nlm4_blocked) nlm_deregister_wait_lock(wait_handle); if (res.stat.stat == nlm4_denied_grace_period) { /* * The server has recently rebooted and is * giving old clients a change to reclaim * their locks. Wait for a few seconds and try * again. */ error = tsleep(&args, PCATCH, "nlmgrace", retry); if (error && error != EWOULDBLOCK) return (error); retry = 2*retry; if (retry > 30*hz) retry = 30*hz; continue; } if (block && res.stat.stat == nlm4_blocked) { /* * The server should call us back with a * granted message when the lock succeeds. In * order to deal with broken servers, lost * granted messages and server reboots, we * will also re-try every few seconds. */ error = nlm_wait_lock(wait_handle, retry); if (error == EWOULDBLOCK) { retry = 2*retry; if (retry > 30*hz) retry = 30*hz; continue; } if (error) { /* * We need to call the server to * cancel our lock request. */ nlm4_cancargs cancel; memset(&cancel, 0, sizeof(cancel)); xid = atomic_fetchadd_int(&nlm_xid, 1); cancel.cookie.n_len = sizeof(xid); cancel.cookie.n_bytes = (char*) &xid; cancel.block = block; cancel.exclusive = exclusive; cancel.alock = args.alock; do { client = nlm_host_get_rpc(host); if (!client) /* XXX retry? */ return (ENOLCK); stat = nlm_cancel_rpc(vers, &cancel, &res, client, ext, *timo); CLNT_RELEASE(client); if (stat != RPC_SUCCESS) { /* * We need to cope * with temporary * network partitions * as well as server * reboots. This means * we have to keep * trying to cancel * until the server * wakes up again. */ pause("nlmcancel", 10*hz); } } while (stat != RPC_SUCCESS); /* * Free res.cookie. */ xdr_free((xdrproc_t) xdr_nlm4_res, &res); switch (res.stat.stat) { case nlm_denied: /* * There was nothing * to cancel. We are * going to go ahead * and assume we got * the lock. */ error = 0; break; case nlm4_denied_grace_period: /* * The server has * recently rebooted - * treat this as a * successful * cancellation. */ break; case nlm4_granted: /* * We managed to * cancel. */ break; default: /* * Broken server * implementation - * can't really do * anything here. */ break; } } } else { error = nlm_map_status(res.stat.stat); } if (!error && !reclaim) { nlm_record_lock(vp, op, fl, args.alock.svid, nlm_host_get_sysid(host), size); nlm_host_monitor(host, 0); } return (error); } } static int nlm_clearlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size) { struct nlm4_unlockargs args; char oh_space[32]; struct nlm4_res res; u_int xid; CLIENT *client; enum clnt_stat stat; int error; memset(&args, 0, sizeof(args)); memset(&res, 0, sizeof(res)); error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, &args.alock, oh_space); if (error) return (error); for (;;) { client = nlm_host_get_rpc(host); if (!client) return (ENOLCK); /* XXX retry? */ xid = atomic_fetchadd_int(&nlm_xid, 1); args.cookie.n_len = sizeof(xid); args.cookie.n_bytes = (char*) &xid; stat = nlm_unlock_rpc(vers, &args, &res, client, ext, *timo); CLNT_RELEASE(client); if (stat != RPC_SUCCESS) { if (retries) { retries--; continue; } return (EINVAL); } /* * Free res.cookie. */ xdr_free((xdrproc_t) xdr_nlm4_res, &res); if (res.stat.stat == nlm4_denied_grace_period) { /* * The server has recently rebooted and is * giving old clients a change to reclaim * their locks. Wait for a few seconds and try * again. */ error = tsleep(&args, PCATCH, "nlmgrace", 5*hz); if (error && error != EWOULDBLOCK) return (error); continue; } /* * If we are being called via nlm_reclaim (which will * use the F_REMOTE flag), don't record the lock * operation in the local lock manager since the vnode * is going away. */ if (!(flags & F_REMOTE)) nlm_record_lock(vp, op, fl, args.alock.svid, nlm_host_get_sysid(host), size); return (0); } } static int nlm_getlock(struct nlm_host *host, struct rpc_callextra *ext, rpcvers_t vers, struct timeval *timo, int retries, struct vnode *vp, int op, struct flock *fl, int flags, int svid, size_t fhlen, void *fh, off_t size) { struct nlm4_testargs args; char oh_space[32]; struct nlm4_testres res; u_int xid; CLIENT *client; enum clnt_stat stat; int exclusive; int error; KASSERT(!(flags & F_FLOCK), ("unexpected F_FLOCK for F_GETLK")); memset(&args, 0, sizeof(args)); memset(&res, 0, sizeof(res)); exclusive = (fl->l_type == F_WRLCK); error = nlm_init_lock(fl, flags, svid, vers, fhlen, fh, size, &args.alock, oh_space); if (error) return (error); args.exclusive = exclusive; for (;;) { client = nlm_host_get_rpc(host); if (!client) return (ENOLCK); /* XXX retry? */ xid = atomic_fetchadd_int(&nlm_xid, 1); args.cookie.n_len = sizeof(xid); args.cookie.n_bytes = (char*) &xid; stat = nlm_test_rpc(vers, &args, &res, client, ext, *timo); CLNT_RELEASE(client); if (stat != RPC_SUCCESS) { if (retries) { retries--; continue; } return (EINVAL); } if (res.stat.stat == nlm4_denied_grace_period) { /* * The server has recently rebooted and is * giving old clients a change to reclaim * their locks. Wait for a few seconds and try * again. */ xdr_free((xdrproc_t) xdr_nlm4_testres, &res); error = tsleep(&args, PCATCH, "nlmgrace", 5*hz); if (error && error != EWOULDBLOCK) return (error); continue; } if (res.stat.stat == nlm4_denied) { struct nlm4_holder *h = &res.stat.nlm4_testrply_u.holder; fl->l_start = h->l_offset; fl->l_len = h->l_len; fl->l_pid = h->svid; if (h->exclusive) fl->l_type = F_WRLCK; else fl->l_type = F_RDLCK; fl->l_whence = SEEK_SET; fl->l_sysid = 0; } else { fl->l_type = F_UNLCK; } xdr_free((xdrproc_t) xdr_nlm4_testres, &res); return (0); } } static int nlm_map_status(nlm4_stats stat) { switch (stat) { case nlm4_granted: return (0); case nlm4_denied: return (EAGAIN); case nlm4_denied_nolocks: return (ENOLCK); case nlm4_deadlck: return (EDEADLK); case nlm4_rofs: return (EROFS); case nlm4_stale_fh: return (ESTALE); case nlm4_fbig: return (EFBIG); case nlm4_failed: return (EACCES); default: return (EINVAL); } } static struct nlm_file_svid * nlm_find_svid(void *id) { struct nlm_file_svid *ns, *newns; int h; h = (((uintptr_t) id) >> 7) % NLM_SVID_HASH_SIZE; mtx_lock(&nlm_svid_lock); LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) { if (ns->ns_id == id) { ns->ns_refs++; break; } } mtx_unlock(&nlm_svid_lock); if (!ns) { int svid = alloc_unr(nlm_svid_allocator); newns = malloc(sizeof(struct nlm_file_svid), M_NLM, M_WAITOK); newns->ns_refs = 1; newns->ns_id = id; newns->ns_svid = svid; newns->ns_ucred = NULL; newns->ns_active = FALSE; /* * We need to check for a race with some other * thread allocating a svid for this file. */ mtx_lock(&nlm_svid_lock); LIST_FOREACH(ns, &nlm_file_svids[h], ns_link) { if (ns->ns_id == id) { ns->ns_refs++; break; } } if (ns) { mtx_unlock(&nlm_svid_lock); free_unr(nlm_svid_allocator, newns->ns_svid); free(newns, M_NLM); } else { LIST_INSERT_HEAD(&nlm_file_svids[h], newns, ns_link); ns = newns; mtx_unlock(&nlm_svid_lock); } } return (ns); } static void nlm_free_svid(struct nlm_file_svid *ns) { mtx_lock(&nlm_svid_lock); ns->ns_refs--; if (!ns->ns_refs) { KASSERT(!ns->ns_active, ("Freeing active SVID")); LIST_REMOVE(ns, ns_link); mtx_unlock(&nlm_svid_lock); free_unr(nlm_svid_allocator, ns->ns_svid); if (ns->ns_ucred) crfree(ns->ns_ucred); free(ns, M_NLM); } else { mtx_unlock(&nlm_svid_lock); } } static int nlm_init_lock(struct flock *fl, int flags, int svid, rpcvers_t vers, size_t fhlen, void *fh, off_t size, struct nlm4_lock *lock, char oh_space[32]) { size_t oh_len; off_t start, len; if (fl->l_whence == SEEK_END) { if (size > OFF_MAX || (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) return (EOVERFLOW); start = size + fl->l_start; } else if (fl->l_whence == SEEK_SET || fl->l_whence == SEEK_CUR) { start = fl->l_start; } else { return (EINVAL); } if (start < 0) return (EINVAL); if (fl->l_len < 0) { len = -fl->l_len; start -= len; if (start < 0) return (EINVAL); } else { len = fl->l_len; } if (vers == NLM_VERS) { /* * Enforce range limits on V1 locks */ if (start > 0xffffffffLL || len > 0xffffffffLL) return (EOVERFLOW); } + mtx_lock(&hostname_mtx); snprintf(oh_space, 32, "%d@%s", svid, hostname); + mtx_unlock(&hostname_mtx); oh_len = strlen(oh_space); memset(lock, 0, sizeof(*lock)); lock->caller_name = hostname; lock->fh.n_len = fhlen; lock->fh.n_bytes = fh; lock->oh.n_len = oh_len; lock->oh.n_bytes = oh_space; lock->svid = svid; lock->l_offset = start; lock->l_len = len; return (0); } Index: head/sys/rpc/authunix_prot.c =================================================================== --- head/sys/rpc/authunix_prot.c (revision 180290) +++ head/sys/rpc/authunix_prot.c (revision 180291) @@ -1,127 +1,131 @@ /* $NetBSD: authunix_prot.c,v 1.12 2000/01/22 22:19:17 mycroft Exp $ */ /* * Sun RPC is a product of Sun Microsystems, Inc. and is provided for * unrestricted use provided that this legend is included on all tape * media and as a part of the software program in whole or part. Users * may copy or modify Sun RPC without charge, but are not authorized * to license or distribute it to anyone else except as part of a product or * program developed by the user. * * SUN RPC IS PROVIDED AS IS WITH NO WARRANTIES OF ANY KIND INCLUDING THE * WARRANTIES OF DESIGN, MERCHANTIBILITY AND FITNESS FOR A PARTICULAR * PURPOSE, OR ARISING FROM A COURSE OF DEALING, USAGE OR TRADE PRACTICE. * * Sun RPC is provided with no support and without any obligation on the * part of Sun Microsystems, Inc. to assist in its use, correction, * modification or enhancement. * * SUN MICROSYSTEMS, INC. SHALL HAVE NO LIABILITY WITH RESPECT TO THE * INFRINGEMENT OF COPYRIGHTS, TRADE SECRETS OR ANY PATENTS BY SUN RPC * OR ANY PART THEREOF. * * In no event will Sun Microsystems, Inc. be liable for any lost revenue * or profits or other special, indirect and consequential damages, even if * Sun has been advised of the possibility of such damages. * * Sun Microsystems, Inc. * 2550 Garcia Avenue * Mountain View, California 94043 */ #if defined(LIBC_SCCS) && !defined(lint) static char *sccsid2 = "@(#)authunix_prot.c 1.15 87/08/11 Copyr 1984 Sun Micro"; static char *sccsid = "@(#)authunix_prot.c 2.1 88/07/29 4.0 RPCSRC"; #endif #include __FBSDID("$FreeBSD$"); /* * authunix_prot.c * XDR for UNIX style authentication parameters for RPC * * Copyright (C) 1984, Sun Microsystems, Inc. */ #include #include +#include +#include #include #include #include #include #include #include /* gids compose part of a credential; there may not be more than 16 of them */ #define NGRPS 16 /* * XDR for unix authentication parameters. */ bool_t xdr_authunix_parms(XDR *xdrs, uint32_t *time, struct xucred *cred) { uint32_t namelen; uint32_t ngroups, i; uint32_t junk; + mtx_lock(&hostname_mtx); if (xdrs->x_op == XDR_ENCODE) { /* * Restrict name length to 255 according to RFC 1057. */ namelen = strlen(hostname); if (namelen > 255) namelen = 255; } else { namelen = 0; } junk = 0; if (!xdr_uint32_t(xdrs, time) || !xdr_uint32_t(xdrs, &namelen)) return (FALSE); /* * Ignore the hostname on decode. */ if (xdrs->x_op == XDR_ENCODE) { if (!xdr_opaque(xdrs, hostname, namelen)) return (FALSE); } else { xdr_setpos(xdrs, xdr_getpos(xdrs) + RNDUP(namelen)); } + mtx_unlock(&hostname_mtx); if (!xdr_uint32_t(xdrs, &cred->cr_uid)) return (FALSE); if (!xdr_uint32_t(xdrs, &cred->cr_groups[0])) return (FALSE); if (xdrs->x_op == XDR_ENCODE) { ngroups = cred->cr_ngroups - 1; if (ngroups > NGRPS) ngroups = NGRPS; } if (!xdr_uint32_t(xdrs, &ngroups)) return (FALSE); for (i = 0; i < ngroups; i++) { if (i + 1 < NGROUPS) { if (!xdr_uint32_t(xdrs, &cred->cr_groups[i + 1])) return (FALSE); } else { if (!xdr_uint32_t(xdrs, &junk)) return (FALSE); } } if (xdrs->x_op == XDR_DECODE) { if (ngroups + 1 > NGROUPS) cred->cr_ngroups = NGROUPS; else cred->cr_ngroups = ngroups + 1; } return (TRUE); } Index: head/sys/sys/kernel.h =================================================================== --- head/sys/sys/kernel.h (revision 180290) +++ head/sys/sys/kernel.h (revision 180291) @@ -1,359 +1,360 @@ /*- * Copyright (c) 1995 Terrence R. Lambert * All rights reserved. * * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kernel.h 8.3 (Berkeley) 1/21/94 * $FreeBSD$ */ #ifndef _SYS_KERNEL_H_ #define _SYS_KERNEL_H_ #include #ifdef _KERNEL /* for intrhook below */ #include /* Global variables for the kernel. */ /* 1.1 */ +extern struct mtx hostname_mtx; extern unsigned long hostid; extern char hostuuid[64]; extern char hostname[MAXHOSTNAMELEN]; extern char domainname[MAXHOSTNAMELEN]; extern char kernelname[MAXPATHLEN]; extern int tick; /* usec per tick (1000000 / hz) */ extern int hz; /* system clock's frequency */ extern int psratio; /* ratio: prof / stat */ extern int stathz; /* statistics clock's frequency */ extern int profhz; /* profiling clock's frequency */ extern int profprocs; /* number of process's profiling */ extern int ticks; extern int lbolt; /* once a second sleep address */ #endif /* _KERNEL */ /* * Enumerated types for known system startup interfaces. * * Startup occurs in ascending numeric order; the list entries are * sorted prior to attempting startup to guarantee order. Items * of the same level are arbitrated for order based on the 'order' * element. * * These numbers are arbitrary and are chosen ONLY for ordering; the * enumeration values are explicit rather than implicit to provide * for binary compatibility with inserted elements. * * The SI_SUB_RUN_SCHEDULER value must have the highest lexical value. * * The SI_SUB_SWAP values represent a value used by * the BSD 4.4Lite but not by FreeBSD; it is maintained in dependent * order to support porting. * * The SI_SUB_PROTO_BEGIN and SI_SUB_PROTO_END bracket a range of * initializations to take place at splimp(). This is a historical * wart that should be removed -- probably running everything at * splimp() until the first init that doesn't want it is the correct * fix. They are currently present to ensure historical behavior. */ enum sysinit_sub_id { SI_SUB_DUMMY = 0x0000000, /* not executed; for linker*/ SI_SUB_DONE = 0x0000001, /* processed*/ SI_SUB_TUNABLES = 0x0700000, /* establish tunable values */ SI_SUB_COPYRIGHT = 0x0800001, /* first use of console*/ SI_SUB_SETTINGS = 0x0880000, /* check and recheck settings */ SI_SUB_MTX_POOL_STATIC = 0x0900000, /* static mutex pool */ SI_SUB_LOCKMGR = 0x0980000, /* lockmgr locks */ SI_SUB_VM = 0x1000000, /* virtual memory system init*/ SI_SUB_KMEM = 0x1800000, /* kernel memory*/ SI_SUB_KVM_RSRC = 0x1A00000, /* kvm operational limits*/ SI_SUB_WITNESS = 0x1A80000, /* witness initialization */ SI_SUB_MTX_POOL_DYNAMIC = 0x1AC0000, /* dynamic mutex pool */ SI_SUB_LOCK = 0x1B00000, /* various locks */ SI_SUB_EVENTHANDLER = 0x1C00000, /* eventhandler init */ SI_SUB_KLD = 0x2000000, /* KLD and module setup */ SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/ SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */ SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */ SI_SUB_MAC_POLICY = 0x21C0000, /* TrustedBSD MAC policies */ SI_SUB_MAC_LATE = 0x21D0000, /* TrustedBSD MAC subsystem */ SI_SUB_INTRINSIC = 0x2200000, /* proc 0*/ SI_SUB_VM_CONF = 0x2300000, /* config VM, set limits*/ SI_SUB_DDB_SERVICES = 0x2380000, /* capture, scripting, etc. */ SI_SUB_RUN_QUEUE = 0x2400000, /* set up run queue*/ SI_SUB_KTRACE = 0x2480000, /* ktrace */ SI_SUB_OPENSOLARIS = 0x2490000, /* OpenSolaris compatibility */ SI_SUB_CYCLIC = 0x24A0000, /* Cyclic timers */ SI_SUB_AUDIT = 0x24C0000, /* audit */ SI_SUB_CREATE_INIT = 0x2500000, /* create init process*/ SI_SUB_SCHED_IDLE = 0x2600000, /* required idle procs */ SI_SUB_MBUF = 0x2700000, /* mbuf subsystem */ SI_SUB_INTR = 0x2800000, /* interrupt threads */ SI_SUB_SOFTINTR = 0x2800001, /* start soft interrupt thread */ SI_SUB_ACL = 0x2900000, /* start for filesystem ACLs */ SI_SUB_DEVFS = 0x2F00000, /* devfs ready for devices */ SI_SUB_INIT_IF = 0x3000000, /* prep for net interfaces */ SI_SUB_NETGRAPH = 0x3010000, /* Let Netgraph initialize */ SI_SUB_DTRACE = 0x3020000, /* DTrace subsystem */ SI_SUB_DTRACE_PROVIDER = 0x3048000, /* DTrace providers */ SI_SUB_DTRACE_ANON = 0x308C000, /* DTrace anon enabling */ SI_SUB_DRIVERS = 0x3100000, /* Let Drivers initialize */ SI_SUB_CONFIGURE = 0x3800000, /* Configure devices */ SI_SUB_VFS = 0x4000000, /* virtual filesystem*/ SI_SUB_CLOCKS = 0x4800000, /* real time and stat clocks*/ SI_SUB_CLIST = 0x5800000, /* clists*/ SI_SUB_SYSV_SHM = 0x6400000, /* System V shared memory*/ SI_SUB_SYSV_SEM = 0x6800000, /* System V semaphores*/ SI_SUB_SYSV_MSG = 0x6C00000, /* System V message queues*/ SI_SUB_P1003_1B = 0x6E00000, /* P1003.1B realtime */ SI_SUB_PSEUDO = 0x7000000, /* pseudo devices*/ SI_SUB_EXEC = 0x7400000, /* execve() handlers */ SI_SUB_PROTO_BEGIN = 0x8000000, /* XXX: set splimp (kludge)*/ SI_SUB_PROTO_IF = 0x8400000, /* interfaces*/ SI_SUB_PROTO_DOMAIN = 0x8800000, /* domains (address families?)*/ SI_SUB_PROTO_IFATTACHDOMAIN = 0x8800001, /* domain dependent data init*/ SI_SUB_PROTO_END = 0x8ffffff, /* XXX: set splx (kludge)*/ SI_SUB_KPROF = 0x9000000, /* kernel profiling*/ SI_SUB_KICK_SCHEDULER = 0xa000000, /* start the timeout events*/ SI_SUB_INT_CONFIG_HOOKS = 0xa800000, /* Interrupts enabled config */ SI_SUB_ROOT_CONF = 0xb000000, /* Find root devices */ SI_SUB_DUMP_CONF = 0xb200000, /* Find dump devices */ SI_SUB_RAID = 0xb380000, /* Configure GEOM classes */ SI_SUB_SWAP = 0xc000000, /* swap */ SI_SUB_INTRINSIC_POST = 0xd000000, /* proc 0 cleanup*/ SI_SUB_SYSCALLS = 0xd800000, /* register system calls */ SI_SUB_KTHREAD_INIT = 0xe000000, /* init process*/ SI_SUB_KTHREAD_PAGE = 0xe400000, /* pageout daemon*/ SI_SUB_KTHREAD_VM = 0xe800000, /* vm daemon*/ SI_SUB_KTHREAD_BUF = 0xea00000, /* buffer daemon*/ SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/ SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/ SI_SUB_SMP = 0xf000000, /* start the APs*/ SI_SUB_RUN_SCHEDULER = 0xfffffff /* scheduler*/ }; /* * Some enumerated orders; "ANY" sorts last. */ enum sysinit_elem_order { SI_ORDER_FIRST = 0x0000000, /* first*/ SI_ORDER_SECOND = 0x0000001, /* second*/ SI_ORDER_THIRD = 0x0000002, /* third*/ SI_ORDER_MIDDLE = 0x1000000, /* somewhere in the middle */ SI_ORDER_ANY = 0xfffffff /* last*/ }; /* * A system initialization call instance * * At the moment there is one instance of sysinit. We probably do not * want two which is why this code is if'd out, but we definitely want * to discern SYSINIT's which take non-constant data pointers and * SYSINIT's which take constant data pointers, * * The C_* macros take functions expecting const void * arguments * while the non-C_* macros take functions expecting just void * arguments. * * With -Wcast-qual on, the compiler issues warnings: * - if we pass non-const data or functions taking non-const data * to a C_* macro. * * - if we pass const data to the normal macros * * However, no warning is issued if we pass a function taking const data * through a normal non-const macro. This is ok because the function is * saying it won't modify the data so we don't care whether the data is * modifiable or not. */ typedef void (*sysinit_nfunc_t)(void *); typedef void (*sysinit_cfunc_t)(const void *); struct sysinit { enum sysinit_sub_id subsystem; /* subsystem identifier*/ enum sysinit_elem_order order; /* init order within subsystem*/ sysinit_cfunc_t func; /* function */ const void *udata; /* multiplexer/argument */ }; /* * Default: no special processing * * The C_ version of SYSINIT is for data pointers to const * data ( and functions taking data pointers to const data ). * At the moment it is no different from SYSINIT and thus * still results in warnings. * * The casts are necessary to have the compiler produce the * correct warnings when -Wcast-qual is used. * */ #define C_SYSINIT(uniquifier, subsystem, order, func, ident) \ static struct sysinit uniquifier ## _sys_init = { \ subsystem, \ order, \ func, \ (ident) \ }; \ DATA_SET(sysinit_set,uniquifier ## _sys_init) #define SYSINIT(uniquifier, subsystem, order, func, ident) \ C_SYSINIT(uniquifier, subsystem, order, \ (sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident)) /* * Called on module unload: no special processing */ #define C_SYSUNINIT(uniquifier, subsystem, order, func, ident) \ static struct sysinit uniquifier ## _sys_uninit = { \ subsystem, \ order, \ func, \ (ident) \ }; \ DATA_SET(sysuninit_set,uniquifier ## _sys_uninit) #define SYSUNINIT(uniquifier, subsystem, order, func, ident) \ C_SYSUNINIT(uniquifier, subsystem, order, \ (sysinit_cfunc_t)(sysinit_nfunc_t)func, (void *)(ident)) void sysinit_add(struct sysinit **set, struct sysinit **set_end); /* * Infrastructure for tunable 'constants'. Value may be specified at compile * time or kernel load time. Rules relating tunables together can be placed * in a SYSINIT function at SI_SUB_TUNABLES with SI_ORDER_LAST. * * WARNING: developers should never use the reserved suffixes specified in * loader.conf(5) for any tunables or conflicts will result. */ /* * int * please avoid using for new tunables! */ extern void tunable_int_init(void *); struct tunable_int { const char *path; int *var; }; #define TUNABLE_INT(path, var) \ static struct tunable_int __CONCAT(__tunable_int_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_int_init, \ &__CONCAT(__tunable_int_, __LINE__)) #define TUNABLE_INT_FETCH(path, var) getenv_int((path), (var)) /* * long */ extern void tunable_long_init(void *); struct tunable_long { const char *path; long *var; }; #define TUNABLE_LONG(path, var) \ static struct tunable_long __CONCAT(__tunable_long_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_long_init,\ &__CONCAT(__tunable_long_, __LINE__)) #define TUNABLE_LONG_FETCH(path, var) getenv_long((path), (var)) /* * unsigned long */ extern void tunable_ulong_init(void *); struct tunable_ulong { const char *path; unsigned long *var; }; #define TUNABLE_ULONG(path, var) \ static struct tunable_ulong __CONCAT(__tunable_ulong_, __LINE__) = { \ (path), \ (var), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_ulong_init, \ &__CONCAT(__tunable_ulong_, __LINE__)) #define TUNABLE_ULONG_FETCH(path, var) getenv_ulong((path), (var)) extern void tunable_str_init(void *); struct tunable_str { const char *path; char *var; int size; }; #define TUNABLE_STR(path, var, size) \ static struct tunable_str __CONCAT(__tunable_str_, __LINE__) = { \ (path), \ (var), \ (size), \ }; \ SYSINIT(__CONCAT(__Tunable_init_, __LINE__), \ SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_str_init, \ &__CONCAT(__tunable_str_, __LINE__)) #define TUNABLE_STR_FETCH(path, var, size) \ getenv_string((path), (var), (size)) struct intr_config_hook { TAILQ_ENTRY(intr_config_hook) ich_links; void (*ich_func)(void *arg); void *ich_arg; }; int config_intrhook_establish(struct intr_config_hook *hook); void config_intrhook_disestablish(struct intr_config_hook *hook); #endif /* !_SYS_KERNEL_H_*/