diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 5e1bb32e54a1..f4475bb199b8 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -1,1104 +1,1105 @@
 /*-
  * Copyright (C) 1994, David Greenman
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the University of Utah, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
- *	$Id: trap.c,v 1.122 1998/02/06 12:13:10 eivind Exp $
+ *	$Id: trap.c,v 1.123 1998/03/23 19:52:37 jlemon Exp $
  */
 
 /*
  * 386 Trap and System call handling
  */
 
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 #include "opt_trap.h"
 #include "opt_vm86.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
 #include <sys/kernel.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
+#include <sys/uio.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <sys/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/ipl.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #include <machine/tss.h>
 
 #include <i386/isa/intr_machdep.h>
 
 #ifdef POWERFAIL_NMI
 #include <sys/syslog.h>
 #include <machine/clock.h>
 #endif
 
 #ifdef VM86
 #include <machine/vm86.h>
 #endif
 
 #include "isa.h"
 #include "npx.h"
 
 extern struct i386tss common_tss;
 
 int (*pmath_emulate) __P((struct trapframe *));
 
 extern void trap __P((struct trapframe frame));
 extern int trapwrite __P((unsigned addr));
 extern void syscall __P((struct trapframe frame));
 
 static int trap_pfault __P((struct trapframe *, int));
 static void trap_fatal __P((struct trapframe *));
 void dblfault_handler __P((void));
 
 extern inthand_t IDTVEC(syscall);
 
 #define MAX_TRAP_MSG		28
 static char *trap_msg[] = {
 	"",					/*  0 unused */
 	"privileged instruction fault",		/*  1 T_PRIVINFLT */
 	"",					/*  2 unused */
 	"breakpoint instruction fault",		/*  3 T_BPTFLT */
 	"",					/*  4 unused */
 	"",					/*  5 unused */
 	"arithmetic trap",			/*  6 T_ARITHTRAP */
 	"system forced exception",		/*  7 T_ASTFLT */
 	"",					/*  8 unused */
 	"general protection fault",		/*  9 T_PROTFLT */
 	"trace trap",				/* 10 T_TRCTRAP */
 	"",					/* 11 unused */
 	"page fault",				/* 12 T_PAGEFLT */
 	"",					/* 13 unused */
 	"alignment fault",			/* 14 T_ALIGNFLT */
 	"",					/* 15 unused */
 	"",					/* 16 unused */
 	"",					/* 17 unused */
 	"integer divide fault",			/* 18 T_DIVIDE */
 	"non-maskable interrupt trap",		/* 19 T_NMI */
 	"overflow trap",			/* 20 T_OFLOW */
 	"FPU bounds check fault",		/* 21 T_BOUND */
 	"FPU device not available",		/* 22 T_DNA */
 	"double fault",				/* 23 T_DOUBLEFLT */
 	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
 	"invalid TSS fault",			/* 25 T_TSSFLT */
 	"segment not present fault",		/* 26 T_SEGNPFLT */
 	"stack fault",				/* 27 T_STKFLT */
 	"machine check trap",			/* 28 T_MCHK */
 };
 
 static void userret __P((struct proc *p, struct trapframe *frame,
 			 u_quad_t oticks));
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern struct gate_descriptor *t_idt;
 extern int has_f00f_bug;
 #endif
 
 static inline void
 userret(p, frame, oticks)
 	struct proc *p;
 	struct trapframe *frame;
 	u_quad_t oticks;
 {
 	int sig, s;
 
 	while ((sig = CURSIG(p)) != 0)
 		postsig(sig);
 
 #if 0
 	if (!want_resched &&
 		(p->p_priority <= p->p_usrpri) &&
 		(p->p_rtprio.type == RTP_PRIO_NORMAL)) {
 		 int newpriority;
 		 p->p_estcpu += 1;
 		 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
 		 newpriority = min(newpriority, MAXPRI);
 		 p->p_usrpri = newpriority;
 	}
 #endif
 		
 	p->p_priority = p->p_usrpri;
 	if (want_resched) {
 		/*
 		 * Since we are curproc, clock will normally just change
 		 * our priority without moving us from one queue to another
 		 * (since the running process is not on a queue.)
 		 * If that happened after we setrunqueue ourselves but before we
 		 * mi_switch()'ed, we might not be on the queue indicated by
 		 * our priority.
 		 */
 		s = splhigh();
 		setrunqueue(p);
 		p->p_stats->p_ru.ru_nivcsw++;
 		mi_switch();
 		splx(s);
 		while ((sig = CURSIG(p)) != 0)
 			postsig(sig);
 	}
 	/*
 	 * Charge system time if profiling.
 	 */
 	if (p->p_flag & P_PROFIL)
 		addupc_task(p, frame->tf_eip,
 			    (u_int)(p->p_sticks - oticks) * psratio);
 
 	curpriority = p->p_priority;
 }
 
 /*
  * Exception, fault, and trap interface to the FreeBSD kernel.
  * This common code is called from assembly language IDT gate entry
  * routines that prepare a suitable stack frame, and restore this
  * frame after the exception has been processed.
  */
 
 void
 trap(frame)
 	struct trapframe frame;
 {
 	struct proc *p = curproc;
 	u_quad_t sticks = 0;
 	int i = 0, ucode = 0, type, code;
 #ifdef DEBUG
 	u_long eva;
 #endif
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 restart:
 #endif
 	type = frame.tf_trapno;
 	code = frame.tf_err;
 
 #ifdef VM86
 	if (in_vm86call) {
 		if (frame.tf_eflags & PSL_VM &&
 		    (type == T_PROTFLT || type == T_STKFLT)) {
 			i = vm86_emulate((struct vm86frame *)&frame);
 			if (i != 0)
 				/*
 				 * returns to original process
 				 */
 				vm86_trap((struct vm86frame *)&frame);
 			return;
 		}
 		switch (type) {
 			/*
 			 * these traps want either a process context, or
 			 * assume a normal userspace trap.
 			 */
 		case T_PROTFLT:
 		case T_SEGNPFLT:
 			trap_fatal(&frame);
 			return;
 		case T_TRCTRAP:
 			type = T_BPTFLT;	/* kernel breakpoint */
 			/* FALL THROUGH */
 		}
 		goto kernel_trap;	/* normal kernel trap handling */
 	}
 #endif
 
         if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
 		/* user trap */
 
 		sticks = p->p_sticks;
 		p->p_md.md_regs = &frame;
 
 		switch (type) {
 		case T_PRIVINFLT:	/* privileged instruction fault */
 			ucode = type;
 			i = SIGILL;
 			break;
 
 		case T_BPTFLT:		/* bpt instruction fault */
 		case T_TRCTRAP:		/* trace trap */
 			frame.tf_eflags &= ~PSL_T;
 			i = SIGTRAP;
 			break;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
 			ucode = code;
 			i = SIGFPE;
 			break;
 
 		case T_ASTFLT:		/* Allow process switch */
 			astoff();
 			cnt.v_soft++;
 			if (p->p_flag & P_OWEUPC) {
 				p->p_flag &= ~P_OWEUPC;
 				addupc_task(p, p->p_stats->p_prof.pr_addr,
 					    p->p_stats->p_prof.pr_ticks);
 			}
 			goto out;
 
 			/*
 			 * The following two traps can happen in
 			 * vm86 mode, and, if so, we want to handle
 			 * them specially.
 			 */
 		case T_PROTFLT:		/* general protection fault */
 		case T_STKFLT:		/* stack fault */
 #ifdef VM86
 			if (frame.tf_eflags & PSL_VM) {
 				i = vm86_emulate((struct vm86frame *)&frame);
 				if (i == 0)
 					goto out;
 				break;
 			}
 #endif /* VM86 */
 			/* FALL THROUGH */
 
 		case T_SEGNPFLT:	/* segment not present fault */
 		case T_TSSFLT:		/* invalid TSS fault */
 		case T_DOUBLEFLT:	/* double fault */
 		default:
 			ucode = code + BUS_SEGM_FAULT ;
 			i = SIGBUS;
 			break;
 
 		case T_PAGEFLT:		/* page fault */
 			i = trap_pfault(&frame, TRUE);
 			if (i == -1)
 				return;
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 			if (i == -2)
 				goto restart;
 #endif
 			if (i == 0)
 				goto out;
 
 			ucode = T_PAGEFLT;
 			break;
 
 		case T_DIVIDE:		/* integer divide fault */
 			ucode = FPE_INTDIV_TRAP;
 			i = SIGFPE;
 			break;
 
 #if NISA > 0
 		case T_NMI:
 #ifdef POWERFAIL_NMI
 			goto handle_powerfail;
 #else /* !POWERFAIL_NMI */
 #ifdef DDB
 			/* NMI can be hooked up to a pushbutton for debugging */
 			printf ("NMI ... going to debugger\n");
 			if (kdb_trap (type, 0, &frame))
 				return;
 #endif /* DDB */
 			/* machine/parity/power fail/"kitchen sink" faults */
 			if (isa_nmi(code) == 0) return;
 			panic("NMI indicates hardware failure");
 #endif /* POWERFAIL_NMI */
 #endif /* NISA > 0 */
 
 		case T_OFLOW:		/* integer overflow fault */
 			ucode = FPE_INTOVF_TRAP;
 			i = SIGFPE;
 			break;
 
 		case T_BOUND:		/* bounds check fault */
 			ucode = FPE_SUBRNG_TRAP;
 			i = SIGFPE;
 			break;
 
 		case T_DNA:
 #if NNPX > 0
 			/* if a transparent fault (due to context switch "late") */
 			if (npxdna())
 				return;
 #endif
 			if (!pmath_emulate) {
 				i = SIGFPE;
 				ucode = FPE_FPU_NP_TRAP;
 				break;
 			}
 			i = (*pmath_emulate)(&frame);
 			if (i == 0) {
 				if (!(frame.tf_eflags & PSL_T))
 					return;
 				frame.tf_eflags &= ~PSL_T;
 				i = SIGTRAP;
 			}
 			/* else ucode = emulator_only_knows() XXX */
 			break;
 
 		case T_FPOPFLT:		/* FPU operand fetch fault */
 			ucode = T_FPOPFLT;
 			i = SIGILL;
 			break;
 		}
 	} else {
 #ifdef VM86
 kernel_trap:
 #endif
 		/* kernel trap */
 
 		switch (type) {
 		case T_PAGEFLT:			/* page fault */
 			(void) trap_pfault(&frame, FALSE);
 			return;
 
 		case T_DNA:
 #if NNPX > 0
 			/*
 			 * The kernel is apparently using npx for copying.
 			 * XXX this should be fatal unless the kernel has
 			 * registered such use.
 			 */
 			if (npxdna())
 				return;
 #endif
 			break;
 
 		case T_PROTFLT:		/* general protection fault */
 		case T_SEGNPFLT:	/* segment not present fault */
 			/*
 			 * Invalid segment selectors and out of bounds
 			 * %eip's and %esp's can be set up in user mode.
 			 * This causes a fault in kernel mode when the
 			 * kernel tries to return to user mode.  We want
 			 * to get this fault so that we can fix the
 			 * problem here and not have to check all the
 			 * selectors and pointers when the user changes
 			 * them.
 			 */
 #define	MAYBE_DORETI_FAULT(where, whereto)				\
 	do {								\
 		if (frame.tf_eip == (int)where) {			\
 			frame.tf_eip = (int)whereto;			\
 			return;						\
 		}							\
 	} while (0)
 
 			if (intr_nesting_level == 0) {
 				/*
 				 * Invalid %fs's and %gs's can be created using
 				 * procfs or PT_SETREGS or by invalidating the
 				 * underlying LDT entry.  This causes a fault
 				 * in kernel mode when the kernel attempts to
 				 * switch contexts.  Lose the bad context
 				 * (XXX) so that we can continue, and generate
 				 * a signal.
 				 */
 				if (frame.tf_eip == (int)cpu_switch_load_fs) {
 					curpcb->pcb_fs = 0;
 					psignal(p, SIGBUS);
 					return;
 				}
 				if (frame.tf_eip == (int)cpu_switch_load_gs) {
 					curpcb->pcb_gs = 0;
 					psignal(p, SIGBUS);
 					return;
 				}
 				MAYBE_DORETI_FAULT(doreti_iret,
 						   doreti_iret_fault);
 				MAYBE_DORETI_FAULT(doreti_popl_ds,
 						   doreti_popl_ds_fault);
 				MAYBE_DORETI_FAULT(doreti_popl_es,
 						   doreti_popl_es_fault);
 				if (curpcb && curpcb->pcb_onfault) {
 					frame.tf_eip = (int)curpcb->pcb_onfault;
 					return;
 				}
 			}
 			break;
 
 		case T_TSSFLT:
 			/*
 			 * PSL_NT can be set in user mode and isn't cleared
 			 * automatically when the kernel is entered.  This
 			 * causes a TSS fault when the kernel attempts to
 			 * `iret' because the TSS link is uninitialized.  We
 			 * want to get this fault so that we can fix the
 			 * problem here and not every time the kernel is
 			 * entered.
 			 */
 			if (frame.tf_eflags & PSL_NT) {
 				frame.tf_eflags &= ~PSL_NT;
 				return;
 			}
 			break;
 
 		case T_TRCTRAP:	 /* trace trap */
 			if (frame.tf_eip == (int)IDTVEC(syscall)) {
 				/*
 				 * We've just entered system mode via the
 				 * syscall lcall.  Continue single stepping
 				 * silently until the syscall handler has
 				 * saved the flags.
 				 */
 				return;
 			}
 			if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
 				/*
 				 * The syscall handler has now saved the
 				 * flags.  Stop single stepping it.
 				 */
 				frame.tf_eflags &= ~PSL_T;
 				return;
 			}
 			/*
 			 * Fall through.
 			 */
 		case T_BPTFLT:
 			/*
 			 * If DDB is enabled, let it handle the debugger trap.
 			 * Otherwise, debugger traps "can't happen".
 			 */
 #ifdef DDB
 			if (kdb_trap (type, 0, &frame))
 				return;
 #endif
 			break;
 
 #if NISA > 0
 		case T_NMI:
 #ifdef POWERFAIL_NMI
 #ifndef TIMER_FREQ
 #  define TIMER_FREQ 1193182
 #endif
 	handle_powerfail:
 		{
 		  static unsigned lastalert = 0;
 
 		  if(time.tv_sec - lastalert > 10)
 		    {
 		      log(LOG_WARNING, "NMI: power fail\n");
 		      sysbeep(TIMER_FREQ/880, hz);
 		      lastalert = time.tv_sec;
 		    }
 		  return;
 		}
 #else /* !POWERFAIL_NMI */
 #ifdef DDB
 			/* NMI can be hooked up to a pushbutton for debugging */
 			printf ("NMI ... going to debugger\n");
 			if (kdb_trap (type, 0, &frame))
 				return;
 #endif /* DDB */
 			/* machine/parity/power fail/"kitchen sink" faults */
 			if (isa_nmi(code) == 0) return;
 			/* FALL THROUGH */
 #endif /* POWERFAIL_NMI */
 #endif /* NISA > 0 */
 		}
 
 		trap_fatal(&frame);
 		return;
 	}
 
 	trapsignal(p, i, ucode);
 
 #ifdef DEBUG
 	eva = rcr2();
 	if (type <= MAX_TRAP_MSG) {
 		uprintf("fatal process exception: %s",
 			trap_msg[type]);
 		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
 			uprintf(", fault VA = 0x%x", eva);
 		uprintf("\n");
 	}
 #endif
 
 out:
 	userret(p, &frame, sticks);
 }
 
 #ifdef notyet
 /*
  * This version doesn't allow a page fault to user space while
  * in the kernel. The rest of the kernel needs to be made "safe"
  * before this can be used. I think the only things remaining
  * to be made safe are the iBCS2 code and the process tracing/
  * debugging code.
  */
 static int
 trap_pfault(frame, usermode)
 	struct trapframe *frame;
 	int usermode;
 {
 	vm_offset_t va;
 	struct vmspace *vm = NULL;
 	vm_map_t map = 0;
 	int rv = 0;
 	vm_prot_t ftype;
 	int eva;
 	struct proc *p = curproc;
 
 	if (frame->tf_err & PGEX_W)
 		ftype = VM_PROT_READ | VM_PROT_WRITE;
 	else
 		ftype = VM_PROT_READ;
 
 	eva = rcr2();
 	va = trunc_page((vm_offset_t)eva);
 
 	if (va < VM_MIN_KERNEL_ADDRESS) {
 		vm_offset_t v;
 		vm_page_t mpte;
 
 		if (p == NULL ||
 		    (!usermode && va < VM_MAXUSER_ADDRESS &&
 		     (intr_nesting_level != 0 || curpcb == NULL ||
 		      curpcb->pcb_onfault == NULL))) {
 			trap_fatal(frame);
 			return (-1);
 		}
 
 		/*
 		 * This is a fault on non-kernel virtual memory.
 		 * vm is initialized above to NULL. If curproc is NULL
 		 * or curproc->p_vmspace is NULL the fault is fatal.
 		 */
 		vm = p->p_vmspace;
 		if (vm == NULL)
 			goto nogo;
 
 		map = &vm->vm_map;
 
 		/*
 		 * Keep swapout from messing with us during this
 		 *	critical time.
 		 */
 		++p->p_lock;
 
 		/*
 		 * Grow the stack if necessary
 		 */
 		if ((caddr_t)va > vm->vm_maxsaddr
 		    && (caddr_t)va < (caddr_t)USRSTACK) {
 			if (!grow(p, va)) {
 				rv = KERN_FAILURE;
 				--p->p_lock;
 				goto nogo;
 			}
 		}
 
 		/* Fault in the user page: */
 		rv = vm_fault(map, va, ftype,
 			(ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
 
 		--p->p_lock;
 	} else {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 */
 		if (usermode)
 			goto nogo;
 
 		/*
 		 * Since we know that kernel virtual address addresses
 		 * always have pte pages mapped, we just have to fault
 		 * the page.
 		 */
 		rv = vm_fault(kernel_map, va, ftype, FALSE);
 	}
 
 	if (rv == KERN_SUCCESS)
 		return (0);
 nogo:
 	if (!usermode) {
 		if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
 			frame->tf_eip = (int)curpcb->pcb_onfault;
 			return (0);
 		}
 		trap_fatal(frame);
 		return (-1);
 	}
 
 	/* kludge to pass faulting virtual address to sendsig */
 	frame->tf_err = eva;
 
 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }
 #endif
 
 int
 trap_pfault(frame, usermode)
 	struct trapframe *frame;
 	int usermode;
 {
 	vm_offset_t va;
 	struct vmspace *vm = NULL;
 	vm_map_t map = 0;
 	int rv = 0;
 	vm_prot_t ftype;
 	int eva;
 	struct proc *p = curproc;
 
 	eva = rcr2();
 	va = trunc_page((vm_offset_t)eva);
 
 	if (va >= KERNBASE) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 * An exception:  if the faulting address is the invalid
 		 * instruction entry in the IDT, then the Intel Pentium
 		 * F00F bug workaround was triggered, and we need to
 		 * treat it is as an illegal instruction, and not a page
 		 * fault.
 		 */
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 		if ((eva == (unsigned int)&t_idt[6]) && has_f00f_bug) {
 			frame->tf_trapno = T_PRIVINFLT;
 			return -2;
 		}
 #endif
 		if (usermode)
 			goto nogo;
 
 		map = kernel_map;
 	} else {
 		/*
 		 * This is a fault on non-kernel virtual memory.
 		 * vm is initialized above to NULL. If curproc is NULL
 		 * or curproc->p_vmspace is NULL the fault is fatal.
 		 */
 		if (p != NULL)
 			vm = p->p_vmspace;
 
 		if (vm == NULL)
 			goto nogo;
 
 		map = &vm->vm_map;
 	}
 
 	if (frame->tf_err & PGEX_W)
 		ftype = VM_PROT_READ | VM_PROT_WRITE;
 	else
 		ftype = VM_PROT_READ;
 
 	if (map != kernel_map) {
 		/*
 		 * Keep swapout from messing with us during this
 		 *	critical time.
 		 */
 		++p->p_lock;
 
 		/*
 		 * Grow the stack if necessary
 		 */
 		if ((caddr_t)va > vm->vm_maxsaddr
 		    && (caddr_t)va < (caddr_t)USRSTACK) {
 			if (!grow(p, va)) {
 				rv = KERN_FAILURE;
 				--p->p_lock;
 				goto nogo;
 			}
 		}
 
 		/* Fault in the user page: */
 		rv = vm_fault(map, va, ftype,
 			(ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
 
 		--p->p_lock;
 	} else {
 		/*
 		 * Don't have to worry about process locking or stacks in the kernel.
 		 */
 		rv = vm_fault(map, va, ftype, FALSE);
 	}
 
 	if (rv == KERN_SUCCESS)
 		return (0);
 nogo:
 	if (!usermode) {
 		if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
 			frame->tf_eip = (int)curpcb->pcb_onfault;
 			return (0);
 		}
 		trap_fatal(frame);
 		return (-1);
 	}
 
 	/* kludge to pass faulting virtual address to sendsig */
 	frame->tf_err = eva;
 
 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }
 
 static void
 trap_fatal(frame)
 	struct trapframe *frame;
 {
 	int code, type, eva, ss, esp;
 	struct soft_segment_descriptor softseg;
 
 	code = frame->tf_err;
 	type = frame->tf_trapno;
 	eva = rcr2();
 	sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
 
 	if (type <= MAX_TRAP_MSG)
 		printf("\n\nFatal trap %d: %s while in %s mode\n",
 			type, trap_msg[type],
         		frame->tf_eflags & PSL_VM ? "vm86" :
 			ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
 #ifdef SMP
 	/* three seperate prints in case of a trap on an unmapped page */
 	printf("mp_lock = %08x; ", mp_lock);
 	printf("cpuid = %d; ", cpuid);
 	printf("lapic.id = %08x\n", lapic.id);
 #endif
 	if (type == T_PAGEFLT) {
 		printf("fault virtual address	= 0x%x\n", eva);
 		printf("fault code		= %s %s, %s\n",
 			code & PGEX_U ? "user" : "supervisor",
 			code & PGEX_W ? "write" : "read",
 			code & PGEX_P ? "protection violation" : "page not present");
 	}
 	printf("instruction pointer	= 0x%x:0x%x\n",
 	       frame->tf_cs & 0xffff, frame->tf_eip);
         if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
 		ss = frame->tf_ss & 0xffff;
 		esp = frame->tf_esp;
 	} else {
 		ss = GSEL(GDATA_SEL, SEL_KPL);
 		esp = (int)&frame->tf_esp;
 	}
 	printf("stack pointer	        = 0x%x:0x%x\n", ss, esp);
 	printf("frame pointer	        = 0x%x:0x%x\n", ss, frame->tf_ebp);
 	printf("code segment		= base 0x%x, limit 0x%x, type 0x%x\n",
 	       softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
 	printf("			= DPL %d, pres %d, def32 %d, gran %d\n",
 	       softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
 	       softseg.ssd_gran);
 	printf("processor eflags	= ");
 	if (frame->tf_eflags & PSL_T)
 		printf("trace trap, ");
 	if (frame->tf_eflags & PSL_I)
 		printf("interrupt enabled, ");
 	if (frame->tf_eflags & PSL_NT)
 		printf("nested task, ");
 	if (frame->tf_eflags & PSL_RF)
 		printf("resume, ");
 	if (frame->tf_eflags & PSL_VM)
 		printf("vm86, ");
 	printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
 	printf("current process		= ");
 	if (curproc) {
 		printf("%lu (%s)\n",
 		    (u_long)curproc->p_pid, curproc->p_comm ?
 		    curproc->p_comm : "");
 	} else {
 		printf("Idle\n");
 	}
 	printf("interrupt mask		= ");
 	if ((cpl & net_imask) == net_imask)
 		printf("net ");
 	if ((cpl & tty_imask) == tty_imask)
 		printf("tty ");
 	if ((cpl & bio_imask) == bio_imask)
 		printf("bio ");
 	if ((cpl & cam_imask) == cam_imask)
 		printf("cam ");
 	if (cpl == 0)
 		printf("none");
 #ifdef SMP
 /**
  *  XXX FIXME:
  *	we probably SHOULD have stopped the other CPUs before now!
  *	another CPU COULD have been touching cpl at this moment...
  */
 	printf(" <- SMP: XXX");
 #endif
 	printf("\n");
 
 #ifdef KDB
 	if (kdb_trap(&psl))
 		return;
 #endif
 #ifdef DDB
 	if (kdb_trap (type, 0, frame))
 		return;
 #endif
 	printf("trap number		= %d\n", type);
 	if (type <= MAX_TRAP_MSG)
 		panic(trap_msg[type]);
 	else
 		panic("unknown/reserved trap");
 }
 
 /*
  * Double fault handler. Called when a fault occurs while writing
  * a frame for a trap/exception onto the stack. This usually occurs
  * when the stack overflows (such is the case with infinite recursion,
  * for example).
  *
  * XXX Note that the current PTD gets replaced by IdlePTD when the
  * task switch occurs. This means that the stack that was active at
  * the time of the double fault is not available at <kstack> unless
  * the machine was idle when the double fault occurred. The downside
  * of this is that "trace <ebp>" in ddb won't work.
  */
 void
 dblfault_handler()
 {
 	printf("\nFatal double fault:\n");
 	printf("eip = 0x%x\n", common_tss.tss_eip);
 	printf("esp = 0x%x\n", common_tss.tss_esp);
 	printf("ebp = 0x%x\n", common_tss.tss_ebp);
 #ifdef SMP
 	/* three seperate prints in case of a trap on an unmapped page */
 	printf("mp_lock = %08x; ", mp_lock);
 	printf("cpuid = %d; ", cpuid);
 	printf("lapic.id = %08x\n", lapic.id);
 #endif
 	panic("double fault");
 }
 
 /*
  * Compensate for 386 brain damage (missing URKR).
  * This is a little simpler than the pagefault handler in trap() because
  * it the page tables have already been faulted in and high addresses
  * are thrown out early for other reasons.
  */
 int trapwrite(addr)
 	unsigned addr;
 {
 	struct proc *p;
 	vm_offset_t va;
 	struct vmspace *vm;
 	int rv;
 
 	va = trunc_page((vm_offset_t)addr);
 	/*
 	 * XXX - MAX is END.  Changed > to >= for temp. fix.
 	 */
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (1);
 
 	p = curproc;
 	vm = p->p_vmspace;
 
 	++p->p_lock;
 
 	if ((caddr_t)va >= vm->vm_maxsaddr
 	    && (caddr_t)va < (caddr_t)USRSTACK) {
 		if (!grow(p, va)) {
 			--p->p_lock;
 			return (1);
 		}
 	}
 
 	/*
 	 * fault the data page
 	 */
 	rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_DIRTY);
 
 	--p->p_lock;
 
 	if (rv != KERN_SUCCESS)
 		return 1;
 
 	return (0);
 }
 
 /*
  * System call request from POSIX system call gate interface to kernel.
  * Like trap(), argument is call by reference.
  */
 void
 syscall(frame)
 	struct trapframe frame;
 {
 	caddr_t params;
 	int i;
 	struct sysent *callp;
 	struct proc *p = curproc;
 	u_quad_t sticks;
 	int error;
 	int args[8];
 	u_int code;
 
 #ifdef DIAGNOSTIC
 	if (ISPL(frame.tf_cs) != SEL_UPL)
 		panic("syscall");
 #endif
 	sticks = p->p_sticks;
 	p->p_md.md_regs = &frame;
 	params = (caddr_t)frame.tf_esp + sizeof(int);
 	code = frame.tf_eax;
 	if (p->p_sysent->sv_prepsyscall) {
 		(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
 	} else {
 		/*
 		 * Need to check if this is a 32 bit or 64 bit syscall.
 		 */
 		if (code == SYS_syscall) {
 			/*
 			 * Code is first argument, followed by actual args.
 			 */
 			code = fuword(params);
 			params += sizeof(int);
 		} else if (code == SYS___syscall) {
 			/*
 			 * Like syscall, but code is a quad, so as to maintain
 			 * quad alignment for the rest of the arguments.
 			 */
 			code = fuword(params);
 			params += sizeof(quad_t);
 		}
 	}
 
  	if (p->p_sysent->sv_mask)
  		code &= p->p_sysent->sv_mask;
 
  	if (code >= p->p_sysent->sv_size)
  		callp = &p->p_sysent->sv_table[0];
   	else
  		callp = &p->p_sysent->sv_table[code];
 
 	if (params && (i = callp->sy_narg * sizeof(int)) &&
 	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
 #ifdef KTRACE
 		if (KTRPOINT(p, KTR_SYSCALL))
 			ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
 #endif
 		goto bad;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_SYSCALL))
 		ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
 #endif
 	p->p_retval[0] = 0;
 	p->p_retval[1] = frame.tf_edx;
 
 	STOPEVENT(p, S_SCE, callp->sy_narg);
 
 	error = (*callp->sy_call)(p, args);
 
 	switch (error) {
 
 	case 0:
 		/*
 		 * Reinitialize proc pointer `p' as it may be different
 		 * if this is a child returning from fork syscall.
 		 */
 		p = curproc;
 		frame.tf_eax = p->p_retval[0];
 		frame.tf_edx = p->p_retval[1];
 		frame.tf_eflags &= ~PSL_C;
 		break;
 
 	case ERESTART:
 		/*
 		 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
 		 * int 0x80 is 2 bytes. We saved this in tf_err.
 		 */
 		frame.tf_eip -= frame.tf_err;
 		break;
 
 	case EJUSTRETURN:
 		break;
 
 	default:
 bad:
  		if (p->p_sysent->sv_errsize)
  			if (error >= p->p_sysent->sv_errsize)
   				error = -1;	/* XXX */
    			else
   				error = p->p_sysent->sv_errtbl[error];
 		frame.tf_eax = error;
 		frame.tf_eflags |= PSL_C;
 		break;
 	}
 
 	if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
 		/* Traced syscall. */
 		frame.tf_eflags &= ~PSL_T;
 		trapsignal(p, SIGTRAP, 0);
 	}
 
 	userret(p, &frame, sticks);
 
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_SYSRET))
 		ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
 #endif
 
 	/*
 	 * This works because errno is findable through the
 	 * register set.  If we ever support an emulation where this
 	 * is not the case, this code will need to be revisited.
 	 */
 	STOPEVENT(p, S_SCX, code);
 
 }
 
 /*
  * Simplified back end of syscall(), used when returning from fork()
  * directly into user mode.
  */
 void
 fork_return(p, frame)
 	struct proc *p;
 	struct trapframe frame;
 {
 	frame.tf_eax = 0;		/* Child returns zero */
 	frame.tf_eflags &= ~PSL_C;	/* success */
 	frame.tf_edx = 1;
 
 	userret(p, &frame, 0);
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_SYSRET))
 		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
 #endif
 }
diff --git a/sys/compat/linux/linux_socket.c b/sys/compat/linux/linux_socket.c
index aac40a077dd9..b50cf5860e31 100644
--- a/sys/compat/linux/linux_socket.c
+++ b/sys/compat/linux/linux_socket.c
@@ -1,816 +1,817 @@
 /*-
  * Copyright (c) 1995 S�ren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer 
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software withough specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- *  $Id: linux_socket.c,v 1.11 1997/12/16 17:40:11 eivind Exp $
+ *  $Id: linux_socket.c,v 1.12 1998/02/07 02:13:27 msmith Exp $
  */
 
 /* XXX we use functions that might not exist. */
 #include "opt_compat.h"
 
 #ifndef COMPAT_43
 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
 #endif
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/fcntl.h>
 #include <sys/socket.h>
+#include <sys/uio.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 
 #include <i386/linux/linux.h>
 #include <i386/linux/linux_proto.h>
 #include <i386/linux/linux_util.h>
 
 static int
 linux_to_bsd_domain(int domain)
 {
     switch (domain) {
     case LINUX_AF_UNSPEC:
 	return AF_UNSPEC;
     case LINUX_AF_UNIX:
 	return AF_LOCAL;
     case LINUX_AF_INET:
 	return AF_INET;
     case LINUX_AF_AX25:
 	return AF_CCITT;
     case LINUX_AF_IPX:
 	return AF_IPX;
     case LINUX_AF_APPLETALK:
 	return AF_APPLETALK;
     default:
 	return -1;
     }
 }
 
 static int
 linux_to_bsd_sockopt_level(int level)
 {
     switch (level) {
     case LINUX_SOL_SOCKET:
 	return SOL_SOCKET;
     default:
 	return level;
     }
 }
 
 static int linux_to_bsd_ip_sockopt(int opt)
 {
     switch (opt) {
     case LINUX_IP_TOS:
 	return IP_TOS;
     case LINUX_IP_TTL:
 	return IP_TTL;
     case LINUX_IP_OPTIONS:
 	return IP_OPTIONS;
     case LINUX_IP_MULTICAST_IF:
 	return IP_MULTICAST_IF;
     case LINUX_IP_MULTICAST_TTL:
 	return IP_MULTICAST_TTL;
     case LINUX_IP_MULTICAST_LOOP:
 	return IP_MULTICAST_LOOP;
     case LINUX_IP_ADD_MEMBERSHIP:
 	return IP_ADD_MEMBERSHIP;
     case LINUX_IP_DROP_MEMBERSHIP:
 	return IP_DROP_MEMBERSHIP;
     case LINUX_IP_HDRINCL:
         return IP_HDRINCL;
     default:
 	return -1;
     }
 }
 
 static int
 linux_to_bsd_so_sockopt(int opt)
 {
     switch (opt) {
     case LINUX_SO_DEBUG:
 	return SO_DEBUG;
     case LINUX_SO_REUSEADDR:
 	return SO_REUSEADDR;
     case LINUX_SO_TYPE:
 	return SO_TYPE;
     case LINUX_SO_ERROR:
 	return SO_ERROR;
     case LINUX_SO_DONTROUTE:
 	return SO_DONTROUTE;
     case LINUX_SO_BROADCAST:
 	return SO_BROADCAST;
     case LINUX_SO_SNDBUF:
 	return SO_SNDBUF;
     case LINUX_SO_RCVBUF:
 	return SO_RCVBUF;
     case LINUX_SO_KEEPALIVE:
 	return SO_KEEPALIVE;
     case LINUX_SO_OOBINLINE:
 	return SO_OOBINLINE;
     case LINUX_SO_LINGER:
 	return SO_LINGER;
     case LINUX_SO_PRIORITY:
     case LINUX_SO_NO_CHECK:
     default:
 	return -1;
     }
 }
 
 /* Return 0 if IP_HDRINCL is set of the given socket, not 0 otherwise */
 static int
 linux_check_hdrincl(struct proc *p, int s)
 {
     struct getsockopt_args /* {
 	int s;
 	int level;
 	int name;
 	caddr_t val;
 	int *avalsize;
     } */ bsd_args;
     int error;
     caddr_t sg, val, valsize;
     int size_val = sizeof val;
     int optval;
 
     sg = stackgap_init();
     val = stackgap_alloc(&sg, sizeof(int));
     valsize = stackgap_alloc(&sg, sizeof(int));
 
     if ((error=copyout(&size_val, valsize, sizeof(size_val))))
 	return error;
     bsd_args.s = s;
     bsd_args.level = IPPROTO_IP;
     bsd_args.name = IP_HDRINCL;
     bsd_args.val = val;
     bsd_args.avalsize = (int *)valsize;
     if ((error=getsockopt(p, &bsd_args)))
 	return error;
     if ((error=copyin(val, &optval, sizeof(optval))))
 	return error;
     return optval == 0;
 }
 
 /*
  * Updated sendto() when IP_HDRINCL is set:
  * tweak endian-dependent fields in the IP packet.
  */
 static int
 linux_sendto_hdrincl(struct proc *p, struct sendto_args *bsd_args)
 {
 /*
  * linux_ip_copysize defines how many bytes we should copy
  * from the beginning of the IP packet before we customize it for BSD.
  * It should include all the fields we modify (ip_len and ip_off)
  * and be as small as possible to minimize copying overhead.
  */
 #define linux_ip_copysize	8
 
     caddr_t sg;
     struct ip *packet;
     struct msghdr *msg;
     struct iovec *iov;
 
     int error;
     struct  sendmsg_args /* {
 	int s;
 	caddr_t msg;
 	int flags;
     } */ sendmsg_args;
 
     /* Check the packet isn't too small before we mess with it */
     if (bsd_args->len < linux_ip_copysize)
 	return EINVAL;
 
     /*
      * Tweaking the user buffer in place would be bad manners.
      * We create a corrected IP header with just the needed length,
      * then use an iovec to glue it to the rest of the user packet
      * when calling sendmsg().
      */
     sg = stackgap_init();
     packet = (struct ip *)stackgap_alloc(&sg, linux_ip_copysize);
     msg = (struct msghdr *)stackgap_alloc(&sg, sizeof(*msg));
     iov = (struct iovec *)stackgap_alloc(&sg, sizeof(*iov)*2);
 
     /* Make a copy of the beginning of the packet to be sent */
     if ((error = copyin(bsd_args->buf, (caddr_t)packet, linux_ip_copysize)))
 	return error;
 
     /* Convert fields from Linux to BSD raw IP socket format */
     packet->ip_len = bsd_args->len;
     packet->ip_off = ntohs(packet->ip_off);
 
     /* Prepare the msghdr and iovec structures describing the new packet */
     msg->msg_name = bsd_args->to;
     msg->msg_namelen = bsd_args->tolen;
     msg->msg_iov = iov;
     msg->msg_iovlen = 2;
     msg->msg_control = NULL;
     msg->msg_controllen = 0;
     msg->msg_flags = 0;
     iov[0].iov_base = (char *)packet;
     iov[0].iov_len = linux_ip_copysize;
     iov[1].iov_base = (char *)(bsd_args->buf) + linux_ip_copysize;
     iov[1].iov_len = bsd_args->len - linux_ip_copysize;
 
     sendmsg_args.s = bsd_args->s;
     sendmsg_args.msg = (caddr_t)msg;
     sendmsg_args.flags = bsd_args->flags;
     return sendmsg(p, &sendmsg_args);
 }
 
 struct linux_socket_args {
     int domain;
     int type;
     int protocol;
 };
 
 static int
 linux_socket(struct proc *p, struct linux_socket_args *args)
 {
     struct linux_socket_args linux_args;
     struct socket_args /* {
 	int domain;
 	int type;
 	int protocol;
     } */ bsd_args;
     int error;
     int retval_socket;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.protocol = linux_args.protocol;
     bsd_args.type = linux_args.type;
     bsd_args.domain = linux_to_bsd_domain(linux_args.domain);
     if (bsd_args.domain == -1)
 	return EINVAL;
 
     retval_socket = socket(p, &bsd_args);
     if (bsd_args.type == SOCK_RAW
 	&& (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
 	&& bsd_args.domain == AF_INET
 	&& retval_socket >= 0) {
 	/* It's a raw IP socket: set the IP_HDRINCL option. */
 	struct setsockopt_args /* {
 	    int s;
 	    int level;
 	    int name;
 	    caddr_t val;
 	    int valsize;
 	} */ bsd_setsockopt_args;
 	caddr_t sg;
 	int *hdrincl;
 
 	sg = stackgap_init();
 	hdrincl = (int *)stackgap_alloc(&sg, sizeof(*hdrincl));
 	*hdrincl = 1;
 	bsd_setsockopt_args.s = p->p_retval[0];
 	bsd_setsockopt_args.level = IPPROTO_IP;
 	bsd_setsockopt_args.name = IP_HDRINCL;
 	bsd_setsockopt_args.val = (caddr_t)hdrincl;
 	bsd_setsockopt_args.valsize = sizeof(*hdrincl);
 	/* We ignore any error returned by setsockopt() */
 	setsockopt(p, &bsd_setsockopt_args);
 	/* Copy back the return value from socket() */
 	p->p_retval[0] = bsd_setsockopt_args.s;
     }
     return retval_socket;
 }
 
 struct linux_bind_args {
     int s;
     struct sockaddr *name;
     int namelen;
 };
 
 static int
 linux_bind(struct proc *p, struct linux_bind_args *args)
 {
     struct linux_bind_args linux_args;
     struct bind_args /* {
 	int s;
 	caddr_t name;
 	int namelen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.name = (caddr_t)linux_args.name;
     bsd_args.namelen = linux_args.namelen;
     return bind(p, &bsd_args);
 }
 
 struct linux_connect_args {
     int s;
     struct sockaddr * name;
     int namelen;
 };
 
 static int
 linux_connect(struct proc *p, struct linux_connect_args *args)
 {
     struct linux_connect_args linux_args;
     struct connect_args /* {
 	int s;
 	caddr_t name;
 	int namelen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.name = (caddr_t)linux_args.name;
     bsd_args.namelen = linux_args.namelen;
     error = connect(p, &bsd_args);
     if (error == EISCONN) {
 	/*
 	 * Linux doesn't return EISCONN the first time it occurs,
 	 * when on a non-blocking socket. Instead it returns the
 	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
 	 */
 	struct fcntl_args /* {
 	    int fd;
 	    int cmd;
 	    int arg;
 	} */ bsd_fcntl_args;
 	struct getsockopt_args /* {
 	    int s;
 	    int level;
 	    int name;
 	    caddr_t val;
 	    int *avalsize;
 	} */ bsd_getsockopt_args;
 	void *status, *statusl;
 	int stat, statl = sizeof stat;
 	caddr_t sg;
 
 	/* Check for non-blocking */
 	bsd_fcntl_args.fd = linux_args.s;
 	bsd_fcntl_args.cmd = F_GETFL;
 	bsd_fcntl_args.arg = 0;
 	error = fcntl(p, &bsd_fcntl_args);
 	if (error == 0 && (p->p_retval[0] & O_NONBLOCK)) {
 	    sg = stackgap_init();
 	    status = stackgap_alloc(&sg, sizeof stat);
 	    statusl = stackgap_alloc(&sg, sizeof statusl);
 
 	    if ((error = copyout(&statl, statusl, sizeof statl)))
 		return error;
 
 	    bsd_getsockopt_args.s = linux_args.s;
 	    bsd_getsockopt_args.level = SOL_SOCKET;
 	    bsd_getsockopt_args.name = SO_ERROR;
 	    bsd_getsockopt_args.val = status;
 	    bsd_getsockopt_args.avalsize = statusl;
 
 	    error = getsockopt(p, &bsd_getsockopt_args);
 	    if (error)
 		return error;
 	    if ((error = copyin(status, &stat, sizeof stat)))
 		return error;  
 	    p->p_retval[0] = stat;
 	    return 0;
 	}
     }
     return error;
 }
 
 struct linux_listen_args {
     int s;
     int backlog;
 };
 
 static int
 linux_listen(struct proc *p, struct linux_listen_args *args)
 {
     struct linux_listen_args linux_args;
     struct listen_args /* {
 	int s;
 	int backlog;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.backlog = linux_args.backlog;
     return listen(p, &bsd_args);
 }
 
 struct linux_accept_args {
     int s;
     struct sockaddr *addr;
     int *namelen;
 };
 
 static int
 linux_accept(struct proc *p, struct linux_accept_args *args)
 {
     struct linux_accept_args linux_args;
     struct accept_args /* {
 	int s;
 	caddr_t name;
 	int *anamelen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.name = (caddr_t)linux_args.addr;
     bsd_args.anamelen = linux_args.namelen;
     return oaccept(p, &bsd_args);
 }
 
 struct linux_getsockname_args {
     int s;
     struct sockaddr *addr;
     int *namelen;
 };
 
 static int
 linux_getsockname(struct proc *p, struct linux_getsockname_args *args)
 {
     struct linux_getsockname_args linux_args;
     struct getsockname_args /* {
 	int fdes;
 	caddr_t asa;
 	int *alen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.fdes = linux_args.s;
     bsd_args.asa = (caddr_t) linux_args.addr;
     bsd_args.alen = linux_args.namelen;
     return ogetsockname(p, &bsd_args);
 }
 
 struct linux_getpeername_args {
     int s;
     struct sockaddr *addr;
     int *namelen;
 };
 
 static int
 linux_getpeername(struct proc *p, struct linux_getpeername_args *args)
 {
     struct linux_getpeername_args linux_args;
     struct ogetpeername_args /* {
 	int fdes;
 	caddr_t asa;
 	int *alen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.fdes = linux_args.s;
     bsd_args.asa = (caddr_t) linux_args.addr;
     bsd_args.alen = linux_args.namelen;
     return ogetpeername(p, &bsd_args);
 }
 
 struct linux_socketpair_args {
     int domain;
     int type;
     int protocol;
     int *rsv;
 };
 
 static int
 linux_socketpair(struct proc *p, struct linux_socketpair_args *args)
 {
     struct linux_socketpair_args linux_args;
     struct socketpair_args /* {
 	int domain;
 	int type;
 	int protocol;
 	int *rsv;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.domain = linux_to_bsd_domain(linux_args.domain);
     if (bsd_args.domain == -1)
 	return EINVAL;
     bsd_args.type = linux_args.type;
     bsd_args.protocol = linux_args.protocol;
     bsd_args.rsv = linux_args.rsv;
     return socketpair(p, &bsd_args);
 }
 
 struct linux_send_args {
     int s;
     void *msg;
     int len;
     int flags;
 };
 
 static int
 linux_send(struct proc *p, struct linux_send_args *args)
 {
     struct linux_send_args linux_args;
     struct osend_args /* {
 	int s;
 	caddr_t buf;
 	int len;
 	int flags;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.buf = linux_args.msg;
     bsd_args.len = linux_args.len;
     bsd_args.flags = linux_args.flags;
     return osend(p, &bsd_args);
 }
 
 struct linux_recv_args {
     int s;
     void *msg;
     int len;
     int flags;
 };
 
 static int
 linux_recv(struct proc *p, struct linux_recv_args *args)
 {
     struct linux_recv_args linux_args;
     struct orecv_args /* {
 	int s;
 	caddr_t buf;
 	int len;
 	int flags;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.buf = linux_args.msg;
     bsd_args.len = linux_args.len;
     bsd_args.flags = linux_args.flags;
     return orecv(p, &bsd_args);
 }
 
 struct linux_sendto_args {
     int s;
     void *msg;
     int len;
     int flags;
     caddr_t to;
     int tolen;
 };
 
 static int
 linux_sendto(struct proc *p, struct linux_sendto_args *args)
 {
     struct linux_sendto_args linux_args;
     struct sendto_args /* {
 	int s;
 	caddr_t buf;
 	size_t len;
 	int flags;
 	caddr_t to;
 	int tolen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.buf = linux_args.msg;
     bsd_args.len = linux_args.len;
     bsd_args.flags = linux_args.flags;
     bsd_args.to = linux_args.to;
     bsd_args.tolen = linux_args.tolen;
 
     if (linux_check_hdrincl(p, linux_args.s) == 0)
 	/* IP_HDRINCL set, tweak the packet before sending */
 	return linux_sendto_hdrincl(p, &bsd_args);
 
     return sendto(p, &bsd_args);
 }
 
 struct linux_recvfrom_args {
     int s;
     void *buf;
     int len;
     int flags;
     caddr_t from;
     int *fromlen;
 };
 
 static int
 linux_recvfrom(struct proc *p, struct linux_recvfrom_args *args)
 {
     struct linux_recvfrom_args linux_args;
     struct recvfrom_args /* {
 	int s;
 	caddr_t buf;
 	size_t len;
 	int flags;
 	caddr_t from;
 	int *fromlenaddr;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.buf = linux_args.buf;
     bsd_args.len = linux_args.len;
     bsd_args.flags = linux_args.flags;
     bsd_args.from = linux_args.from;
     bsd_args.fromlenaddr = linux_args.fromlen;
     return orecvfrom(p, &bsd_args);
 }
 
 struct linux_shutdown_args {
     int s;
     int how;
 };
 
 static int
 linux_shutdown(struct proc *p, struct linux_shutdown_args *args)
 {
     struct linux_shutdown_args linux_args;
     struct shutdown_args /* {
 	int s;
 	int how;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.how = linux_args.how;
     return shutdown(p, &bsd_args);
 }
 
 struct linux_setsockopt_args {
     int s;
     int level;
     int optname;
     void *optval;
     int optlen;
 };
 
 static int
 linux_setsockopt(struct proc *p, struct linux_setsockopt_args *args)
 {
     struct linux_setsockopt_args linux_args;
     struct setsockopt_args /* {
 	int s;
 	int level;
 	int name;
 	caddr_t val;
 	int valsize;
     } */ bsd_args;
     int error, name;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.level = linux_to_bsd_sockopt_level(linux_args.level);
     switch (bsd_args.level) {
     case SOL_SOCKET:
 	name = linux_to_bsd_so_sockopt(linux_args.optname);
 	break;
     case IPPROTO_IP:
 	name = linux_to_bsd_ip_sockopt(linux_args.optname);
 	break;
     case IPPROTO_TCP:
 	/* Linux TCP option values match BSD's */
 	name = linux_args.optname;
 	break;
     default:
 	return EINVAL;
     }
     if (name == -1)
 	return EINVAL;
     bsd_args.name = name;
     bsd_args.val = linux_args.optval;
     bsd_args.valsize = linux_args.optlen;
     return setsockopt(p, &bsd_args);
 }
 
 struct linux_getsockopt_args {
     int s;
     int level;
     int optname;
     void *optval;
     int *optlen;
 };
 
 static int
 linux_getsockopt(struct proc *p, struct linux_getsockopt_args *args)
 {
     struct linux_getsockopt_args linux_args;
     struct getsockopt_args /* {
 	int s;
 	int level;
 	int name;
 	caddr_t val;
 	int *avalsize;
     } */ bsd_args;
     int error, name;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.level = linux_to_bsd_sockopt_level(linux_args.level);
     switch (bsd_args.level) {
     case SOL_SOCKET:
 	name = linux_to_bsd_so_sockopt(linux_args.optname);
 	break;
     case IPPROTO_IP:
 	name = linux_to_bsd_ip_sockopt(linux_args.optname);
 	break;
     case IPPROTO_TCP:
 	/* Linux TCP option values match BSD's */
 	name = linux_args.optname;
 	break;
     default:
 	return EINVAL;
     }
     if (name == -1)
 	return EINVAL;
     bsd_args.name = name;
     bsd_args.val = linux_args.optval;
     bsd_args.avalsize = linux_args.optlen;
     return getsockopt(p, &bsd_args);
 }
 
 int
 linux_socketcall(struct proc *p, struct linux_socketcall_args *args)
 {
     switch (args->what) {
     case LINUX_SOCKET:
 	return linux_socket(p, args->args);
     case LINUX_BIND:
 	return linux_bind(p, args->args);
     case LINUX_CONNECT:
 	return linux_connect(p, args->args);
     case LINUX_LISTEN:
 	return linux_listen(p, args->args);
     case LINUX_ACCEPT:
 	return linux_accept(p, args->args);
     case LINUX_GETSOCKNAME:
 	return linux_getsockname(p, args->args);
     case LINUX_GETPEERNAME:
 	return linux_getpeername(p, args->args);
     case LINUX_SOCKETPAIR:
 	return linux_socketpair(p, args->args);
     case LINUX_SEND:
 	return linux_send(p, args->args);
     case LINUX_RECV:
 	return linux_recv(p, args->args);
     case LINUX_SENDTO:
 	return linux_sendto(p, args->args);
     case LINUX_RECVFROM:
 	return linux_recvfrom(p, args->args);
     case LINUX_SHUTDOWN:
 	return linux_shutdown(p, args->args);
     case LINUX_SETSOCKOPT:
 	return linux_setsockopt(p, args->args);
     case LINUX_GETSOCKOPT:
 	return linux_getsockopt(p, args->args);
     default:
 	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
 	return ENOSYS;
     }
 }
diff --git a/sys/dev/joy/joy.c b/sys/dev/joy/joy.c
index 84682a929247..072be4edb5c5 100644
--- a/sys/dev/joy/joy.c
+++ b/sys/dev/joy/joy.c
@@ -1,299 +1,300 @@
 /*-
  * Copyright (c) 1995 Jean-Marc Zucconi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software withough specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 #include "joy.h"
 
 #if NJOY > 0
 
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
+#include <sys/uio.h>
 
 #include <machine/clock.h>
 #include <machine/joystick.h>
 
 #include <i386/isa/isa.h>
 #include <i386/isa/isa_device.h>
 #include <i386/isa/timerreg.h>
 
 /* The game port can manage 4 buttons and 4 variable resistors (usually 2
  * joysticks, each with 2 buttons and 2 pots.) via the port at address 0x201.
  * Getting the state of the buttons is done by reading the game port:
  * buttons 1-4 correspond to bits 4-7 and resistors 1-4 (X1, Y1, X2, Y2)
  * to bits 0-3.
  * if button 1 (resp 2, 3, 4) is pressed, the bit 4 (resp 5, 6, 7) is set to 0
  * to get the value of a resistor, write the value 0xff at port and
  * wait until the corresponding bit returns to 0.
  */
 
 
 /* the formulae below only work if u is  ``not too large''. See also
  * the discussion in microtime.s */
 #define usec2ticks(u) 	(((u) * 19549)>>14)
 #define ticks2usec(u) 	(((u) * 3433)>>12)
 
 
 #define joypart(d) minor(d)&1
 #define UNIT(d) minor(d)>>1&3
 #ifndef JOY_TIMEOUT
 #define JOY_TIMEOUT   2000 /* 2 milliseconds */
 #endif
 
 static struct {
     int port;
     int x_off[2], y_off[2];
     int timeout[2];
 #ifdef	DEVFS
     void	*devfs_token;
 #endif
 } joy[NJOY];
 
 
 static int joyprobe (struct isa_device *);
 static int joyattach (struct isa_device *);
 
 struct isa_driver joydriver = {joyprobe, joyattach, "joy"};
 
 #define CDEV_MAJOR 51
 static	d_open_t	joyopen;
 static	d_close_t	joyclose;
 static	d_read_t	joyread;
 static	d_ioctl_t	joyioctl;
 
 static struct cdevsw joy_cdevsw = 
 	{ joyopen,	joyclose,	joyread,	nowrite,	/*51*/
 	  joyioctl,	nostop,		nullreset,	nodevtotty,/*joystick */
 	  seltrue,	nommap,		NULL,	"joy",	NULL,	-1 };
 
 static int get_tick __P((void));
 
 
 static int
 joyprobe (struct isa_device *dev)
 {
 #ifdef WANT_JOYSTICK_CONNECTED
     outb (dev->id_iobase, 0xff);
     DELAY (10000); /*  10 ms delay */
     return (inb (dev->id_iobase) & 0x0f) != 0x0f;
 #else
     return 1;
 #endif
 }
 
 static int
 joyattach (struct isa_device *dev)
 {
     int	unit = dev->id_unit;
 
     joy[unit].port = dev->id_iobase;
     joy[unit].timeout[0] = joy[unit].timeout[1] = 0;
     printf("joy%d: joystick\n", unit);
 #ifdef	DEVFS
     joy[dev->id_unit].devfs_token = 
 		devfs_add_devswf(&joy_cdevsw, 0, DV_CHR, 0, 0, 
 				 0600, "joy%d", unit);
 #endif
     return 1;
 }
 
 static	int
 joyopen (dev_t dev, int flags, int fmt, struct proc *p)
 {
     int unit = UNIT (dev);
     int i = joypart (dev);
 
     if (joy[unit].timeout[i])
 	return EBUSY;
     joy[unit].x_off[i] = joy[unit].y_off[i] = 0;
     joy[unit].timeout[i] = JOY_TIMEOUT;
     return 0;
 }
 static	int
 joyclose (dev_t dev, int flags, int fmt, struct proc *p)
 {
     int unit = UNIT (dev);
     int i = joypart (dev);
 
     joy[unit].timeout[i] = 0;
     return 0;
 }
 
 static	int
 joyread (dev_t dev, struct uio *uio, int flag)
 {
     int unit = UNIT(dev);
     int port = joy[unit].port;
     int i, t0, t1;
     int state = 0, x = 0, y = 0;
     struct joystick c;
 
     disable_intr ();
     outb (port, 0xff);
     t0 = get_tick ();
     t1 = t0;
     i = usec2ticks(joy[unit].timeout[joypart(dev)]);
     while (t0-t1 < i) {
 	state = inb (port);
 	if (joypart(dev) == 1)
 	    state >>= 2;
 	t1 = get_tick ();
 	if (t1 > t0)
 	    t1 -= timer0_max_count;
 	if (!x && !(state & 0x01))
 	    x = t1;
 	if (!y && !(state & 0x02))
 	    y =  t1;
 	if (x && y)
 	    break;
     }
     enable_intr ();
     c.x = x ? joy[unit].x_off[joypart(dev)] + ticks2usec(t0-x) : 0x80000000;
     c.y = y ? joy[unit].y_off[joypart(dev)] + ticks2usec(t0-y) : 0x80000000;
     state >>= 4;
     c.b1 = ~state & 1;
     c.b2 = ~(state >> 1) & 1;
     return uiomove ((caddr_t)&c, sizeof(struct joystick), uio);
 }
 
 static	int
 joyioctl (dev_t dev, int cmd, caddr_t data, int flag, struct proc *p)
 {
     int unit = UNIT (dev);
     int i = joypart (dev);
     int x;
 
     switch (cmd) {
     case JOY_SETTIMEOUT:
 	x = *(int *) data;
 	if (x < 1 || x > 10000) /* 10ms maximum! */
 	    return EINVAL;
 	joy[unit].timeout[i] = x;
 	break;
     case JOY_GETTIMEOUT:
 	*(int *) data = joy[unit].timeout[i];
 	break;
     case JOY_SET_X_OFFSET:
 	joy[unit].x_off[i] = *(int *) data;
 	break;
     case JOY_SET_Y_OFFSET:
 	joy[unit].y_off[i] = *(int *) data;
 	break;
     case JOY_GET_X_OFFSET:
 	*(int *) data = joy[unit].x_off[i];
 	break;
     case JOY_GET_Y_OFFSET:
 	*(int *) data = joy[unit].y_off[i];
 	break;
     default:
 	return ENXIO;
     }
     return 0;
 }
 
 static int
 get_tick ()
 {
     int low, high;
 
     outb (TIMER_MODE, TIMER_SEL0);
     low = inb (TIMER_CNTR0);
     high = inb (TIMER_CNTR0);
 
     return (high << 8) | low;
 }
 
 
 static joy_devsw_installed = 0;
 
 static void 	joy_drvinit(void *unused)
 {
 	dev_t dev;
 
 	if( ! joy_devsw_installed ) {
 		dev = makedev(CDEV_MAJOR,0);
 		cdevsw_add(&dev,&joy_cdevsw,NULL);
 		joy_devsw_installed = 1;
     	}
 }
 
 SYSINIT(joydev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,joy_drvinit,NULL)
 
 #ifdef JOY_MODULE
 
 #include <sys/exec.h>
 #include <sys/sysent.h>
 #include <sys/lkm.h>
 
 MOD_DEV (joy, LM_DT_CHAR, CDEV_MAJOR, &joy_cdevsw);
 
 static struct isa_device dev = {0, &joydriver, IO_GAME, 0, -1, (caddr_t) 0, 0, 0, 0, 0, 0, 0, 0,  0, 1, 0, 0};
 
 static int 
 joy_load (struct lkm_table *lkmtp, int cmd)
 {
     if (joyprobe (&dev)) {
 	joyattach (&dev);
 /*	    joy_drvinit (0);*/
 	uprintf ("Joystick driver loaded\n");
 	return 0;
     } else {
 	uprintf ("Joystick driver: probe failed\n");
 	return 1;
     }
 }
 
 static int
 joy_unload (struct lkm_table *lkmtp, int cmd)
 {
     uprintf ("Joystick driver unloaded\n");
     return 0;
 }
 
 static int
 joy_stat (struct lkm_table *lkmtp, int cmd)
 {
     return 0;
 }
 
 int
 joy_mod (struct lkm_table *lkmtp, int cmd, int ver)
 {
     MOD_DISPATCH(joy, lkmtp, cmd, ver,
 	joy_load, joy_unload, joy_stat);
 }
 
 #endif /* JOY_MODULE */
 
 
 #endif /* NJOY > 0 */
diff --git a/sys/gnu/ext2fs/ext2_balloc.c b/sys/gnu/ext2fs/ext2_balloc.c
index 3d871239956c..b2317a205e69 100644
--- a/sys/gnu/ext2fs/ext2_balloc.c
+++ b/sys/gnu/ext2fs/ext2_balloc.c
@@ -1,312 +1,313 @@
 /*
  *  modified for Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_balloc.c	8.4 (Berkeley) 9/23/93
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/lock.h>
+#include <sys/ucred.h>
 #include <sys/vnode.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <gnu/ext2fs/ext2_fs.h>
 #include <gnu/ext2fs/ext2_fs_sb.h>
 #include <gnu/ext2fs/fs.h>
 #include <gnu/ext2fs/ext2_extern.h>
 
 /*
  * Balloc defines the structure of file system storage
  * by allocating the physical blocks on a device given
  * the inode and the logical block number in a file.
  */
 int
 ext2_balloc(ip, bn, size, cred, bpp, flags)
 	register struct inode *ip;
 	register daddr_t bn;
 	int size;
 	struct ucred *cred;
 	struct buf **bpp;
 	int flags;
 {
 	register struct ext2_sb_info *fs;
 	register daddr_t nb;
 	struct buf *bp, *nbp;
 	struct vnode *vp = ITOV(ip);
 	struct indir indirs[NIADDR + 2];
 	daddr_t newb, lbn, *bap, pref;
 	int osize, nsize, num, i, error;
 /*
 ext2_debug("ext2_balloc called (%d, %d, %d)\n", 
 	ip->i_number, (int)bn, (int)size);
 */
 	*bpp = NULL;
 	if (bn < 0)
 		return (EFBIG);
 	fs = ip->i_e2fs;
 	lbn = bn;
 
 	/*
 	 * check if this is a sequential block allocation. 
 	 * If so, increment next_alloc fields to allow ext2_blkpref 
 	 * to make a good guess
 	 */
         if (lbn == ip->i_next_alloc_block + 1) {
 		ip->i_next_alloc_block++;
 		ip->i_next_alloc_goal++;
 	}
 
 	/*
 	 * The first NDADDR blocks are direct blocks
 	 */
 	if (bn < NDADDR) {
 		nb = ip->i_db[bn];
 		/* no new block is to be allocated, and no need to expand
 		   the file */
 		if (nb != 0 && ip->i_size >= (bn + 1) * fs->s_blocksize) {
 			error = bread(vp, bn, fs->s_blocksize, NOCRED, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			*bpp = bp;
 			return (0);
 		}
 		if (nb != 0) {
 			/*
 			 * Consider need to reallocate a fragment.
 			 */
 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
 			nsize = fragroundup(fs, size);
 			if (nsize <= osize) {
 				error = bread(vp, bn, osize, NOCRED, &bp);
 				if (error) {
 					brelse(bp);
 					return (error);
 				}
 			} else {
 			/* Godmar thinks: this shouldn't happen w/o fragments */
 				printf("nsize %d(%d) > osize %d(%d) nb %d\n", 
 					(int)nsize, (int)size, (int)osize, 
 					(int)ip->i_size, (int)nb);
 				panic(
 				    "ext2_balloc: Something is terribly wrong");
 /*
  * please note there haven't been any changes from here on -
  * FFS seems to work.
  */
 			}
 		} else {
 			if (ip->i_size < (bn + 1) * fs->s_blocksize)
 				nsize = fragroundup(fs, size);
 			else
 				nsize = fs->s_blocksize;
 			error = ext2_alloc(ip, bn,
 			    ext2_blkpref(ip, bn, (int)bn, &ip->i_db[0], 0),
 			    nsize, cred, &newb);
 			if (error)
 				return (error);
 			bp = getblk(vp, bn, nsize, 0, 0);
 			bp->b_blkno = fsbtodb(fs, newb);
 			if (flags & B_CLRBUF)
 				vfs_bio_clrbuf(bp);
 		}
 		ip->i_db[bn] = dbtofsb(fs, bp->b_blkno);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		*bpp = bp;
 		return (0);
 	}
 	/*
 	 * Determine the number of levels of indirection.
 	 */
 	pref = 0;
 	if (error = ufs_getlbns(vp, bn, indirs, &num))
 		return(error);
 #if DIAGNOSTIC
 	if (num < 1)
 		panic ("ext2_balloc: ufs_bmaparray returned indirect block");
 #endif
 	/*
 	 * Fetch the first indirect block allocating if necessary.
 	 */
 	--num;
 	nb = ip->i_ib[indirs[0].in_off];
 	if (nb == 0) {
 #if 0
 		pref = ext2_blkpref(ip, lbn, 0, (daddr_t *)0, 0);
 #else
 		/* see the comment by ext2_blkpref. What we do here is
 		   to pretend that it'd be good for a block holding indirect
 		   pointers to be allocated near its predecessor in terms 
 		   of indirection, or the last direct block. 
 		   We shamelessly exploit the fact that i_ib immediately
 		   follows i_db. 
 		   Godmar thinks it make sense to allocate i_ib[0] immediately
 		   after i_db[11], but it's not utterly clear whether this also
 		   applies to i_ib[1] and i_ib[0]
 		*/
 
 		pref = ext2_blkpref(ip, lbn, indirs[0].in_off + 
 					     EXT2_NDIR_BLOCKS, &ip->i_db[0], 0);
 #endif
 	        if (error = ext2_alloc(ip, lbn, pref, (int)fs->s_blocksize,
 		    cred, &newb))
 			return (error);
 		nb = newb;
 		bp = getblk(vp, indirs[1].in_lbn, fs->s_blocksize, 0, 0);
 		bp->b_blkno = fsbtodb(fs, newb);
 		vfs_bio_clrbuf(bp);
 		/*
 		 * Write synchronously so that indirect blocks
 		 * never point at garbage.
 		 */
 		if (error = bwrite(bp)) {
 			ext2_blkfree(ip, nb, fs->s_blocksize);
 			return (error);
 		}
 		ip->i_ib[indirs[0].in_off] = newb;
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	/*
 	 * Fetch through the indirect blocks, allocating as necessary.
 	 */
 	for (i = 1;;) {
 		error = bread(vp,
 		    indirs[i].in_lbn, (int)fs->s_blocksize, NOCRED, &bp);
 		if (error) {
 			brelse(bp);
 			return (error);
 		}
 		bap = (daddr_t *)bp->b_data;
 		nb = bap[indirs[i].in_off];
 		if (i == num)
 			break;
 		i += 1;
 		if (nb != 0) {
 			brelse(bp);
 			continue;
 		}
 		if (pref == 0) 
 #if 1
 			/* see the comment above and by ext2_blkpref
 			 * I think this implements Linux policy, but
 			 * does it really make sense to allocate to
 			 * block containing pointers together ?
 			 * Also, will it ever succeed ?
 			 */
 			pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap,
 						bp->b_lblkno);
 #else
 			pref = ext2_blkpref(ip, lbn, 0, (daddr_t *)0, 0);
 #endif
 		if (error =
 		    ext2_alloc(ip, lbn, pref, (int)fs->s_blocksize, cred, &newb)) {
 			brelse(bp);
 			return (error);
 		}
 		nb = newb;
 		nbp = getblk(vp, indirs[i].in_lbn, fs->s_blocksize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		vfs_bio_clrbuf(nbp);
 		/*
 		 * Write synchronously so that indirect blocks
 		 * never point at garbage.
 		 */
 		if (error = bwrite(nbp)) {
 			ext2_blkfree(ip, nb, fs->s_blocksize);
 			brelse(bp);
 			return (error);
 		}
 		bap[indirs[i - 1].in_off] = nb;
 		/*
 		 * If required, write synchronously, otherwise use
 		 * delayed write.
 		 */
 		if (flags & B_SYNC) {
 			bwrite(bp);
 		} else {
 			bdwrite(bp);
 		}
 	}
 	/*
 	 * Get the data block, allocating if necessary.
 	 */
 	if (nb == 0) {
 		pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0], 
 				bp->b_lblkno);
 		if (error = ext2_alloc(ip,
 		    lbn, pref, (int)fs->s_blocksize, cred, &newb)) {
 			brelse(bp);
 			return (error);
 		}
 		nb = newb;
 		nbp = getblk(vp, lbn, fs->s_blocksize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		if (flags & B_CLRBUF)
 			vfs_bio_clrbuf(nbp);
 		bap[indirs[i].in_off] = nb;
 		/*
 		 * If required, write synchronously, otherwise use
 		 * delayed write.
 		 */
 		if (flags & B_SYNC) {
 			bwrite(bp);
 		} else {
 			bdwrite(bp);
 		}
 		*bpp = nbp;
 		return (0);
 	}
 	brelse(bp);
 	if (flags & B_CLRBUF) {
 		error = bread(vp, lbn, (int)fs->s_blocksize, NOCRED, &nbp);
 		if (error) {
 			brelse(nbp);
 			return (error);
 		}
 	} else {
 		nbp = getblk(vp, lbn, fs->s_blocksize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 	}
 	*bpp = nbp;
 	return (0);
 }
diff --git a/sys/gnu/ext2fs/ext2_subr.c b/sys/gnu/ext2fs/ext2_subr.c
index 1f1ce9682d99..3040d2427b7c 100644
--- a/sys/gnu/ext2fs/ext2_subr.c
+++ b/sys/gnu/ext2fs/ext2_subr.c
@@ -1,122 +1,123 @@
 /*
  *  modified for Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ext2_subr.c	8.2 (Berkeley) 9/21/93
  */
 
 #include <sys/param.h>
 #include <gnu/ext2fs/ext2_fs_sb.h>
 #include <gnu/ext2fs/fs.h>
 
 #include <sys/lock.h>
 #include <sys/systm.h>
+#include <sys/ucred.h>
 #include <sys/vnode.h>
 #include <gnu/ext2fs/ext2_extern.h>
 #include <sys/buf.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 
 static void	ext2_checkoverlap __P((struct buf *, struct inode *));
 
 /*
  * Return buffer with the contents of block "offset" from the beginning of
  * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
  * remaining space in the directory.
  */
 int
 ext2_blkatoff(vp, offset, res, bpp)
 	struct vnode *vp;
 	off_t offset;
 	char **res;
 	struct buf **bpp;
 {
 	struct inode *ip;
 	register struct ext2_sb_info *fs;
 	struct buf *bp;
 	daddr_t lbn;
 	int bsize, error;
 
 	ip = VTOI(vp);
 	fs = ip->i_e2fs;
 	lbn = lblkno(fs, offset);
 	bsize = blksize(fs, ip, lbn);
 
 	*bpp = NULL;
 	if (error = bread(vp, lbn, bsize, NOCRED, &bp)) {
 		brelse(bp);
 		return (error);
 	}
 	if (res)
 		*res = (char *)bp->b_data + blkoff(fs, offset);
 	*bpp = bp;
 	return (0);
 }
 
 #ifdef DDB
 static void
 ext2_checkoverlap(bp, ip)
 	struct buf *bp;
 	struct inode *ip;
 {
 	register struct buf *ebp, *ep;
 	register daddr_t start, last;
 	struct vnode *vp;
 
 	ebp = &buf[nbuf];
 	start = bp->b_blkno;
 	last = start + btodb(bp->b_bcount) - 1;
 	for (ep = buf; ep < ebp; ep++) {
 		if (ep == bp || (ep->b_flags & B_INVAL) ||
 		    ep->b_vp == NULLVP)
 			continue;
 		if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL, NULL))
 			continue;
 		if (vp != ip->i_devvp)
 			continue;
 		/* look for overlap */
 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
 		    ep->b_blkno + btodb(ep->b_bcount) <= start)
 			continue;
 		vprint("Disk overlap", vp);
 		(void)printf("\tstart %d, end %d overlap start %d, end %d\n",
 			start, last, ep->b_blkno,
 			ep->b_blkno + btodb(ep->b_bcount) - 1);
 		panic("Disk buffer overlap");
 	}
 }
 #endif /* DDB */
diff --git a/sys/gnu/fs/ext2fs/ext2_balloc.c b/sys/gnu/fs/ext2fs/ext2_balloc.c
index 3d871239956c..b2317a205e69 100644
--- a/sys/gnu/fs/ext2fs/ext2_balloc.c
+++ b/sys/gnu/fs/ext2fs/ext2_balloc.c
@@ -1,312 +1,313 @@
 /*
  *  modified for Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_balloc.c	8.4 (Berkeley) 9/23/93
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/lock.h>
+#include <sys/ucred.h>
 #include <sys/vnode.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <gnu/ext2fs/ext2_fs.h>
 #include <gnu/ext2fs/ext2_fs_sb.h>
 #include <gnu/ext2fs/fs.h>
 #include <gnu/ext2fs/ext2_extern.h>
 
 /*
  * Balloc defines the structure of file system storage
  * by allocating the physical blocks on a device given
  * the inode and the logical block number in a file.
  */
 int
 ext2_balloc(ip, bn, size, cred, bpp, flags)
 	register struct inode *ip;
 	register daddr_t bn;
 	int size;
 	struct ucred *cred;
 	struct buf **bpp;
 	int flags;
 {
 	register struct ext2_sb_info *fs;
 	register daddr_t nb;
 	struct buf *bp, *nbp;
 	struct vnode *vp = ITOV(ip);
 	struct indir indirs[NIADDR + 2];
 	daddr_t newb, lbn, *bap, pref;
 	int osize, nsize, num, i, error;
 /*
 ext2_debug("ext2_balloc called (%d, %d, %d)\n", 
 	ip->i_number, (int)bn, (int)size);
 */
 	*bpp = NULL;
 	if (bn < 0)
 		return (EFBIG);
 	fs = ip->i_e2fs;
 	lbn = bn;
 
 	/*
 	 * check if this is a sequential block allocation. 
 	 * If so, increment next_alloc fields to allow ext2_blkpref 
 	 * to make a good guess
 	 */
         if (lbn == ip->i_next_alloc_block + 1) {
 		ip->i_next_alloc_block++;
 		ip->i_next_alloc_goal++;
 	}
 
 	/*
 	 * The first NDADDR blocks are direct blocks
 	 */
 	if (bn < NDADDR) {
 		nb = ip->i_db[bn];
 		/* no new block is to be allocated, and no need to expand
 		   the file */
 		if (nb != 0 && ip->i_size >= (bn + 1) * fs->s_blocksize) {
 			error = bread(vp, bn, fs->s_blocksize, NOCRED, &bp);
 			if (error) {
 				brelse(bp);
 				return (error);
 			}
 			*bpp = bp;
 			return (0);
 		}
 		if (nb != 0) {
 			/*
 			 * Consider need to reallocate a fragment.
 			 */
 			osize = fragroundup(fs, blkoff(fs, ip->i_size));
 			nsize = fragroundup(fs, size);
 			if (nsize <= osize) {
 				error = bread(vp, bn, osize, NOCRED, &bp);
 				if (error) {
 					brelse(bp);
 					return (error);
 				}
 			} else {
 			/* Godmar thinks: this shouldn't happen w/o fragments */
 				printf("nsize %d(%d) > osize %d(%d) nb %d\n", 
 					(int)nsize, (int)size, (int)osize, 
 					(int)ip->i_size, (int)nb);
 				panic(
 				    "ext2_balloc: Something is terribly wrong");
 /*
  * please note there haven't been any changes from here on -
  * FFS seems to work.
  */
 			}
 		} else {
 			if (ip->i_size < (bn + 1) * fs->s_blocksize)
 				nsize = fragroundup(fs, size);
 			else
 				nsize = fs->s_blocksize;
 			error = ext2_alloc(ip, bn,
 			    ext2_blkpref(ip, bn, (int)bn, &ip->i_db[0], 0),
 			    nsize, cred, &newb);
 			if (error)
 				return (error);
 			bp = getblk(vp, bn, nsize, 0, 0);
 			bp->b_blkno = fsbtodb(fs, newb);
 			if (flags & B_CLRBUF)
 				vfs_bio_clrbuf(bp);
 		}
 		ip->i_db[bn] = dbtofsb(fs, bp->b_blkno);
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 		*bpp = bp;
 		return (0);
 	}
 	/*
 	 * Determine the number of levels of indirection.
 	 */
 	pref = 0;
 	if (error = ufs_getlbns(vp, bn, indirs, &num))
 		return(error);
 #if DIAGNOSTIC
 	if (num < 1)
 		panic ("ext2_balloc: ufs_bmaparray returned indirect block");
 #endif
 	/*
 	 * Fetch the first indirect block allocating if necessary.
 	 */
 	--num;
 	nb = ip->i_ib[indirs[0].in_off];
 	if (nb == 0) {
 #if 0
 		pref = ext2_blkpref(ip, lbn, 0, (daddr_t *)0, 0);
 #else
 		/* see the comment by ext2_blkpref. What we do here is
 		   to pretend that it'd be good for a block holding indirect
 		   pointers to be allocated near its predecessor in terms 
 		   of indirection, or the last direct block. 
 		   We shamelessly exploit the fact that i_ib immediately
 		   follows i_db. 
 		   Godmar thinks it make sense to allocate i_ib[0] immediately
 		   after i_db[11], but it's not utterly clear whether this also
 		   applies to i_ib[1] and i_ib[0]
 		*/
 
 		pref = ext2_blkpref(ip, lbn, indirs[0].in_off + 
 					     EXT2_NDIR_BLOCKS, &ip->i_db[0], 0);
 #endif
 	        if (error = ext2_alloc(ip, lbn, pref, (int)fs->s_blocksize,
 		    cred, &newb))
 			return (error);
 		nb = newb;
 		bp = getblk(vp, indirs[1].in_lbn, fs->s_blocksize, 0, 0);
 		bp->b_blkno = fsbtodb(fs, newb);
 		vfs_bio_clrbuf(bp);
 		/*
 		 * Write synchronously so that indirect blocks
 		 * never point at garbage.
 		 */
 		if (error = bwrite(bp)) {
 			ext2_blkfree(ip, nb, fs->s_blocksize);
 			return (error);
 		}
 		ip->i_ib[indirs[0].in_off] = newb;
 		ip->i_flag |= IN_CHANGE | IN_UPDATE;
 	}
 	/*
 	 * Fetch through the indirect blocks, allocating as necessary.
 	 */
 	for (i = 1;;) {
 		error = bread(vp,
 		    indirs[i].in_lbn, (int)fs->s_blocksize, NOCRED, &bp);
 		if (error) {
 			brelse(bp);
 			return (error);
 		}
 		bap = (daddr_t *)bp->b_data;
 		nb = bap[indirs[i].in_off];
 		if (i == num)
 			break;
 		i += 1;
 		if (nb != 0) {
 			brelse(bp);
 			continue;
 		}
 		if (pref == 0) 
 #if 1
 			/* see the comment above and by ext2_blkpref
 			 * I think this implements Linux policy, but
 			 * does it really make sense to allocate to
 			 * block containing pointers together ?
 			 * Also, will it ever succeed ?
 			 */
 			pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap,
 						bp->b_lblkno);
 #else
 			pref = ext2_blkpref(ip, lbn, 0, (daddr_t *)0, 0);
 #endif
 		if (error =
 		    ext2_alloc(ip, lbn, pref, (int)fs->s_blocksize, cred, &newb)) {
 			brelse(bp);
 			return (error);
 		}
 		nb = newb;
 		nbp = getblk(vp, indirs[i].in_lbn, fs->s_blocksize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		vfs_bio_clrbuf(nbp);
 		/*
 		 * Write synchronously so that indirect blocks
 		 * never point at garbage.
 		 */
 		if (error = bwrite(nbp)) {
 			ext2_blkfree(ip, nb, fs->s_blocksize);
 			brelse(bp);
 			return (error);
 		}
 		bap[indirs[i - 1].in_off] = nb;
 		/*
 		 * If required, write synchronously, otherwise use
 		 * delayed write.
 		 */
 		if (flags & B_SYNC) {
 			bwrite(bp);
 		} else {
 			bdwrite(bp);
 		}
 	}
 	/*
 	 * Get the data block, allocating if necessary.
 	 */
 	if (nb == 0) {
 		pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0], 
 				bp->b_lblkno);
 		if (error = ext2_alloc(ip,
 		    lbn, pref, (int)fs->s_blocksize, cred, &newb)) {
 			brelse(bp);
 			return (error);
 		}
 		nb = newb;
 		nbp = getblk(vp, lbn, fs->s_blocksize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 		if (flags & B_CLRBUF)
 			vfs_bio_clrbuf(nbp);
 		bap[indirs[i].in_off] = nb;
 		/*
 		 * If required, write synchronously, otherwise use
 		 * delayed write.
 		 */
 		if (flags & B_SYNC) {
 			bwrite(bp);
 		} else {
 			bdwrite(bp);
 		}
 		*bpp = nbp;
 		return (0);
 	}
 	brelse(bp);
 	if (flags & B_CLRBUF) {
 		error = bread(vp, lbn, (int)fs->s_blocksize, NOCRED, &nbp);
 		if (error) {
 			brelse(nbp);
 			return (error);
 		}
 	} else {
 		nbp = getblk(vp, lbn, fs->s_blocksize, 0, 0);
 		nbp->b_blkno = fsbtodb(fs, nb);
 	}
 	*bpp = nbp;
 	return (0);
 }
diff --git a/sys/gnu/fs/ext2fs/ext2_subr.c b/sys/gnu/fs/ext2fs/ext2_subr.c
index 1f1ce9682d99..3040d2427b7c 100644
--- a/sys/gnu/fs/ext2fs/ext2_subr.c
+++ b/sys/gnu/fs/ext2fs/ext2_subr.c
@@ -1,122 +1,123 @@
 /*
  *  modified for Lites 1.1
  *
  *  Aug 1995, Godmar Back (gback@cs.utah.edu)
  *  University of Utah, Department of Computer Science
  */
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ext2_subr.c	8.2 (Berkeley) 9/21/93
  */
 
 #include <sys/param.h>
 #include <gnu/ext2fs/ext2_fs_sb.h>
 #include <gnu/ext2fs/fs.h>
 
 #include <sys/lock.h>
 #include <sys/systm.h>
+#include <sys/ucred.h>
 #include <sys/vnode.h>
 #include <gnu/ext2fs/ext2_extern.h>
 #include <sys/buf.h>
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 
 static void	ext2_checkoverlap __P((struct buf *, struct inode *));
 
 /*
  * Return buffer with the contents of block "offset" from the beginning of
  * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
  * remaining space in the directory.
  */
 int
 ext2_blkatoff(vp, offset, res, bpp)
 	struct vnode *vp;
 	off_t offset;
 	char **res;
 	struct buf **bpp;
 {
 	struct inode *ip;
 	register struct ext2_sb_info *fs;
 	struct buf *bp;
 	daddr_t lbn;
 	int bsize, error;
 
 	ip = VTOI(vp);
 	fs = ip->i_e2fs;
 	lbn = lblkno(fs, offset);
 	bsize = blksize(fs, ip, lbn);
 
 	*bpp = NULL;
 	if (error = bread(vp, lbn, bsize, NOCRED, &bp)) {
 		brelse(bp);
 		return (error);
 	}
 	if (res)
 		*res = (char *)bp->b_data + blkoff(fs, offset);
 	*bpp = bp;
 	return (0);
 }
 
 #ifdef DDB
 static void
 ext2_checkoverlap(bp, ip)
 	struct buf *bp;
 	struct inode *ip;
 {
 	register struct buf *ebp, *ep;
 	register daddr_t start, last;
 	struct vnode *vp;
 
 	ebp = &buf[nbuf];
 	start = bp->b_blkno;
 	last = start + btodb(bp->b_bcount) - 1;
 	for (ep = buf; ep < ebp; ep++) {
 		if (ep == bp || (ep->b_flags & B_INVAL) ||
 		    ep->b_vp == NULLVP)
 			continue;
 		if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL, NULL))
 			continue;
 		if (vp != ip->i_devvp)
 			continue;
 		/* look for overlap */
 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
 		    ep->b_blkno + btodb(ep->b_bcount) <= start)
 			continue;
 		vprint("Disk overlap", vp);
 		(void)printf("\tstart %d, end %d overlap start %d, end %d\n",
 			start, last, ep->b_blkno,
 			ep->b_blkno + btodb(ep->b_bcount) - 1);
 		panic("Disk buffer overlap");
 	}
 }
 #endif /* DDB */
diff --git a/sys/i386/i386/cons.c b/sys/i386/i386/cons.c
index 6cccc4fb6a7b..a90283af2009 100644
--- a/sys/i386/i386/cons.c
+++ b/sys/i386/i386/cons.c
@@ -1,445 +1,446 @@
 /*
  * Copyright (c) 1988 University of Utah.
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)cons.c	7.2 (Berkeley) 5/9/91
- *	$Id: cons.c,v 1.55 1997/09/14 03:19:01 peter Exp $
+ *	$Id: cons.c,v 1.56 1998/01/24 02:54:12 eivind Exp $
  */
 
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/reboot.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/tty.h>
+#include <sys/uio.h>
 
 #include <machine/cpu.h>
 #include <machine/cons.h>
 
 /* XXX this should be config(8)ed. */
 #include "sc.h"
 #include "vt.h"
 #include "sio.h"
 static struct consdev constab[] = {
 #if NSC > 0
 	{ sccnprobe,	sccninit,	sccngetc,	sccncheckc,	sccnputc },
 #endif
 #if NVT > 0
 	{ pccnprobe,	pccninit,	pccngetc,	pccncheckc,	pccnputc },
 #endif
 #if NSIO > 0
 	{ siocnprobe,	siocninit,	siocngetc,	siocncheckc,	siocnputc },
 #endif
 	{ 0 },
 };
 
 static	d_open_t	cnopen;
 static	d_close_t	cnclose;
 static	d_read_t	cnread;
 static	d_write_t	cnwrite;
 static	d_ioctl_t	cnioctl;
 static	d_poll_t	cnpoll;
 
 #define CDEV_MAJOR 0
 static struct cdevsw cn_cdevsw = 
 	{ cnopen,	cnclose,	cnread,		cnwrite,	/*0*/
 	  cnioctl,	nullstop,	nullreset,	nodevtotty,/* console */
 	  cnpoll,	nommap,		NULL,	"console",	NULL,	-1 };
 
 static dev_t	cn_dev_t; 	/* seems to be never really used */
 SYSCTL_OPAQUE(_machdep, CPU_CONSDEV, consdev, CTLTYPE_OPAQUE|CTLFLAG_RD,
 	&cn_dev_t, sizeof cn_dev_t, "T,dev_t", "");
 
 static int cn_mute;
 
 int	cons_unavail = 0;	/* XXX:
 				 * physical console not available for
 				 * input (i.e., it is in graphics mode)
 				 */
 
 static u_char cn_is_open;		/* nonzero if logical console is open */
 static int openmode, openflag;		/* how /dev/console was openned */
 static u_char cn_phys_is_open;		/* nonzero if physical device is open */
 static d_close_t *cn_phys_close;	/* physical device close function */
 static d_open_t *cn_phys_open;		/* physical device open function */
 static struct consdev *cn_tab;		/* physical console device info */
 static struct tty *cn_tp;		/* physical console tty struct */
 #ifdef DEVFS
 static void *cn_devfs_token;		/* represents the devfs entry */
 #endif /* DEVFS */
 
 void
 cninit()
 {
 	struct consdev *best_cp, *cp;
 
 	/*
 	 * Find the first console with the highest priority.
 	 */
 	best_cp = NULL;
 	for (cp = constab; cp->cn_probe; cp++) {
 		(*cp->cn_probe)(cp);
 		if (cp->cn_pri > CN_DEAD &&
 		    (best_cp == NULL || cp->cn_pri > best_cp->cn_pri))
 			best_cp = cp;
 	}
 
 	/*
 	 * Check if we should mute the console (for security reasons perhaps)
 	 * It can be changes dynamically using sysctl kern.consmute
 	 * once we are up and going.
 	 * 
 	 */
         cn_mute = ((boothowto & (RB_MUTE
 			|RB_SINGLE
 			|RB_VERBOSE
 			|RB_ASKNAME
 			|RB_CONFIG)) == RB_MUTE);
 	
 	/*
 	 * If no console, give up.
 	 */
 	if (best_cp == NULL) {
 		cn_tab = best_cp;
 		return;
 	}
 
 	/*
 	 * Initialize console, then attach to it.  This ordering allows
 	 * debugging using the previous console, if any.
 	 * XXX if there was a previous console, then its driver should
 	 * be informed when we forget about it.
 	 */
 	(*best_cp->cn_init)(best_cp);
 	cn_tab = best_cp;
 }
 
 void
 cninit_finish()
 {
 	struct cdevsw *cdp;
 
 	if ((cn_tab == NULL) || cn_mute)
 		return;
 
 	/*
 	 * Hook the open and close functions.
 	 */
 	cdp = cdevsw[major(cn_tab->cn_dev)];
 	cn_phys_close = cdp->d_close;
 	cdp->d_close = cnclose;
 	cn_phys_open = cdp->d_open;
 	cdp->d_open = cnopen;
 	cn_tp = (*cdp->d_devtotty)(cn_tab->cn_dev);
 	cn_dev_t = cn_tp->t_dev;
 }
 
 static void
 cnuninit(void)
 {
 	struct cdevsw *cdp;
 
 	if (cn_tab == NULL)
 		return;
 
 	/*
 	 * Unhook the open and close functions.
 	 */
 	cdp = cdevsw[major(cn_tab->cn_dev)];
 	cdp->d_close = cn_phys_close;
 	cn_phys_close = NULL;
 	cdp->d_open = cn_phys_open;
 	cn_phys_open = NULL;
 	cn_tp = NULL;
 	cn_dev_t = 0;
 }
 
 /*
  * User has changed the state of the console muting.
  * This may require us to open or close the device in question.
  */
 static int
 sysctl_kern_consmute SYSCTL_HANDLER_ARGS
 {
 	int error;
 	int ocn_mute;
 
 	ocn_mute = cn_mute;
 	error = sysctl_handle_int(oidp, &cn_mute, 0, req);
 	if((error == 0) && (cn_tab != NULL) && (req->newptr != NULL)) {
 		if(ocn_mute && !cn_mute) {
 			/*
 			 * going from muted to unmuted.. open the physical dev 
 			 * if the console has been openned
 			 */
 			cninit_finish();
 			if(cn_is_open)
 				/* XXX curproc is not what we want really */
 				error = cnopen(cn_dev_t, openflag,
 					openmode, curproc);
 			/* if it failed, back it out */
 			if ( error != 0) cnuninit();
 		} else if (!ocn_mute && cn_mute) {
 			/*
 			 * going from unmuted to muted.. close the physical dev 
 			 * if it's only open via /dev/console
 			 */
 			if(cn_is_open)
 				error = cnclose(cn_dev_t, openflag,
 					openmode, curproc);
 			if ( error == 0) cnuninit();
 		}
 		if (error != 0) {
 			/* 
 	 		 * back out the change if there was an error
 			 */
 			cn_mute = ocn_mute;
 		}
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, consmute, CTLTYPE_INT|CTLFLAG_RW,
 	0, sizeof cn_mute, sysctl_kern_consmute, "I", "");
 
 static int
 cnopen(dev, flag, mode, p)
 	dev_t dev;
 	int flag, mode;
 	struct proc *p;
 {
 	dev_t cndev, physdev;
 	int retval = 0;
 
 	if (cn_tab == NULL)
 		return (0);
 	cndev = cn_tab->cn_dev;
 	physdev = (major(dev) == major(cndev) ? dev : cndev);
 	/*
 	 * If mute is active, then non console opens don't get here
 	 * so we don't need to check for that. They 
 	 * bypass this and go straight to the device.
 	 */
 	if(!cn_mute)
 		retval = (*cn_phys_open)(physdev, flag, mode, p);
 	if (retval == 0) {
 		/* 
 		 * check if we openned it via /dev/console or 
 		 * via the physical entry (e.g. /dev/sio0).
 		 */
 		if (dev == cndev)
 			cn_phys_is_open = 1;
 		else if (physdev == cndev) {
 			openmode = mode;
 			openflag = flag;
 			cn_is_open = 1;
 		}
 	}
 	return (retval);
 }
 
 static int
 cnclose(dev, flag, mode, p)
 	dev_t dev;
 	int flag, mode;
 	struct proc *p;
 {
 	dev_t cndev;
 
 	if (cn_tab == NULL)
 		return (0);
 	cndev = cn_tab->cn_dev;
 	/*
 	 * act appropriatly depending on whether it's /dev/console
 	 * or the pysical device (e.g. /dev/sio) that's being closed.
 	 * in either case, don't actually close the device unless
 	 * both are closed.
 	 */
 	if (dev == cndev) {
 		/* the physical device is about to be closed */
 		cn_phys_is_open = 0;
 		if (cn_is_open) {
 			if (cn_tp) {
 				/* perform a ttyhalfclose() */
 				/* reset session and proc group */
 				cn_tp->t_pgrp = NULL;
 				cn_tp->t_session = NULL;
 			}
 			return (0);
 		}
 	} else if (major(dev) != major(cndev)) {
 		/* the logical console is about to be closed */
 		cn_is_open = 0;
 		if (cn_phys_is_open)
 			return (0);
 		dev = cndev;
 	}
 	if(cn_phys_close)
 		return ((*cn_phys_close)(dev, flag, mode, p));
 	return (0);
 }
 
 static int
 cnread(dev, uio, flag)
 	dev_t dev;
 	struct uio *uio;
 	int flag;
 {
 	if ((cn_tab == NULL) || cn_mute)
 		return (0);
 	dev = cn_tab->cn_dev;
 	return ((*cdevsw[major(dev)]->d_read)(dev, uio, flag));
 }
 
 static int
 cnwrite(dev, uio, flag)
 	dev_t dev;
 	struct uio *uio;
 	int flag;
 {
 	if ((cn_tab == NULL) || cn_mute) {
 		uio->uio_resid = 0; /* dump the data */
 		return (0);
 	}
 	if (constty)
 		dev = constty->t_dev;
 	else
 		dev = cn_tab->cn_dev;
 	return ((*cdevsw[major(dev)]->d_write)(dev, uio, flag));
 }
 
 static int
 cnioctl(dev, cmd, data, flag, p)
 	dev_t dev;
 	int cmd;
 	caddr_t data;
 	int flag;
 	struct proc *p;
 {
 	int error;
 
 	if ((cn_tab == NULL) || cn_mute)
 		return (0);
 	/*
 	 * Superuser can always use this to wrest control of console
 	 * output from the "virtual" console.
 	 */
 	if (cmd == TIOCCONS && constty) {
 		error = suser(p->p_ucred, (u_short *) NULL);
 		if (error)
 			return (error);
 		constty = NULL;
 		return (0);
 	}
 	dev = cn_tab->cn_dev;
 	return ((*cdevsw[major(dev)]->d_ioctl)(dev, cmd, data, flag, p));
 }
 
 static int
 cnpoll(dev, events, p)
 	dev_t dev;
 	int events;
 	struct proc *p;
 {
 	if ((cn_tab == NULL) || cn_mute)
 		return (1);
 
 	dev = cn_tab->cn_dev;
 
 	return ((*cdevsw[major(dev)]->d_poll)(dev, events, p));
 }
 
 int
 cngetc()
 {
 	int c;
 	if ((cn_tab == NULL) || cn_mute)
 		return (-1);
 	c = (*cn_tab->cn_getc)(cn_tab->cn_dev);
 	if (c == '\r') c = '\n'; /* console input is always ICRNL */
 	return (c);
 }
 
 int
 cncheckc()
 {
 	if ((cn_tab == NULL) || cn_mute)
 		return (-1);
 	return ((*cn_tab->cn_checkc)(cn_tab->cn_dev));
 }
 
 void
 cnputc(c)
 	register int c;
 {
 	if ((cn_tab == NULL) || cn_mute)
 		return;
 	if (c) {
 		if (c == '\n')
 			(*cn_tab->cn_putc)(cn_tab->cn_dev, '\r');
 		(*cn_tab->cn_putc)(cn_tab->cn_dev, c);
 	}
 }
 
 static cn_devsw_installed = 0;
 
 static void
 cn_drvinit(void *unused)
 {
 	dev_t dev;
 
 	if( ! cn_devsw_installed ) {
 		dev = makedev(CDEV_MAJOR,0);
 		cdevsw_add(&dev,&cn_cdevsw,NULL);
 		cn_devsw_installed = 1;
 #ifdef DEVFS
 		cn_devfs_token = devfs_add_devswf(&cn_cdevsw, 0, DV_CHR,
 						  UID_ROOT, GID_WHEEL, 0600,
 						  "console");
 #endif
 	}
 }
 
 SYSINIT(cndev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,cn_drvinit,NULL)
 
 
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index 5e1bb32e54a1..f4475bb199b8 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -1,1104 +1,1105 @@
 /*-
  * Copyright (C) 1994, David Greenman
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the University of Utah, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
- *	$Id: trap.c,v 1.122 1998/02/06 12:13:10 eivind Exp $
+ *	$Id: trap.c,v 1.123 1998/03/23 19:52:37 jlemon Exp $
  */
 
 /*
  * 386 Trap and System call handling
  */
 
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 #include "opt_trap.h"
 #include "opt_vm86.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
 #include <sys/kernel.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
+#include <sys/uio.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <sys/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/ipl.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #include <machine/tss.h>
 
 #include <i386/isa/intr_machdep.h>
 
 #ifdef POWERFAIL_NMI
 #include <sys/syslog.h>
 #include <machine/clock.h>
 #endif
 
 #ifdef VM86
 #include <machine/vm86.h>
 #endif
 
 #include "isa.h"
 #include "npx.h"
 
 extern struct i386tss common_tss;
 
 int (*pmath_emulate) __P((struct trapframe *));
 
 extern void trap __P((struct trapframe frame));
 extern int trapwrite __P((unsigned addr));
 extern void syscall __P((struct trapframe frame));
 
 static int trap_pfault __P((struct trapframe *, int));
 static void trap_fatal __P((struct trapframe *));
 void dblfault_handler __P((void));
 
 extern inthand_t IDTVEC(syscall);
 
 #define MAX_TRAP_MSG		28
 static char *trap_msg[] = {
 	"",					/*  0 unused */
 	"privileged instruction fault",		/*  1 T_PRIVINFLT */
 	"",					/*  2 unused */
 	"breakpoint instruction fault",		/*  3 T_BPTFLT */
 	"",					/*  4 unused */
 	"",					/*  5 unused */
 	"arithmetic trap",			/*  6 T_ARITHTRAP */
 	"system forced exception",		/*  7 T_ASTFLT */
 	"",					/*  8 unused */
 	"general protection fault",		/*  9 T_PROTFLT */
 	"trace trap",				/* 10 T_TRCTRAP */
 	"",					/* 11 unused */
 	"page fault",				/* 12 T_PAGEFLT */
 	"",					/* 13 unused */
 	"alignment fault",			/* 14 T_ALIGNFLT */
 	"",					/* 15 unused */
 	"",					/* 16 unused */
 	"",					/* 17 unused */
 	"integer divide fault",			/* 18 T_DIVIDE */
 	"non-maskable interrupt trap",		/* 19 T_NMI */
 	"overflow trap",			/* 20 T_OFLOW */
 	"FPU bounds check fault",		/* 21 T_BOUND */
 	"FPU device not available",		/* 22 T_DNA */
 	"double fault",				/* 23 T_DOUBLEFLT */
 	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
 	"invalid TSS fault",			/* 25 T_TSSFLT */
 	"segment not present fault",		/* 26 T_SEGNPFLT */
 	"stack fault",				/* 27 T_STKFLT */
 	"machine check trap",			/* 28 T_MCHK */
 };
 
 static void userret __P((struct proc *p, struct trapframe *frame,
 			 u_quad_t oticks));
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern struct gate_descriptor *t_idt;
 extern int has_f00f_bug;
 #endif
 
 static inline void
 userret(p, frame, oticks)
 	struct proc *p;
 	struct trapframe *frame;
 	u_quad_t oticks;
 {
 	int sig, s;
 
 	while ((sig = CURSIG(p)) != 0)
 		postsig(sig);
 
 #if 0
 	if (!want_resched &&
 		(p->p_priority <= p->p_usrpri) &&
 		(p->p_rtprio.type == RTP_PRIO_NORMAL)) {
 		 int newpriority;
 		 p->p_estcpu += 1;
 		 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
 		 newpriority = min(newpriority, MAXPRI);
 		 p->p_usrpri = newpriority;
 	}
 #endif
 		
 	p->p_priority = p->p_usrpri;
 	if (want_resched) {
 		/*
 		 * Since we are curproc, clock will normally just change
 		 * our priority without moving us from one queue to another
 		 * (since the running process is not on a queue.)
 		 * If that happened after we setrunqueue ourselves but before we
 		 * mi_switch()'ed, we might not be on the queue indicated by
 		 * our priority.
 		 */
 		s = splhigh();
 		setrunqueue(p);
 		p->p_stats->p_ru.ru_nivcsw++;
 		mi_switch();
 		splx(s);
 		while ((sig = CURSIG(p)) != 0)
 			postsig(sig);
 	}
 	/*
 	 * Charge system time if profiling.
 	 */
 	if (p->p_flag & P_PROFIL)
 		addupc_task(p, frame->tf_eip,
 			    (u_int)(p->p_sticks - oticks) * psratio);
 
 	curpriority = p->p_priority;
 }
 
 /*
  * Exception, fault, and trap interface to the FreeBSD kernel.
  * This common code is called from assembly language IDT gate entry
  * routines that prepare a suitable stack frame, and restore this
  * frame after the exception has been processed.
  */
 
 void
 trap(frame)
 	struct trapframe frame;
 {
 	struct proc *p = curproc;
 	u_quad_t sticks = 0;
 	int i = 0, ucode = 0, type, code;
 #ifdef DEBUG
 	u_long eva;
 #endif
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 restart:
 #endif
 	type = frame.tf_trapno;
 	code = frame.tf_err;
 
 #ifdef VM86
 	if (in_vm86call) {
 		if (frame.tf_eflags & PSL_VM &&
 		    (type == T_PROTFLT || type == T_STKFLT)) {
 			i = vm86_emulate((struct vm86frame *)&frame);
 			if (i != 0)
 				/*
 				 * returns to original process
 				 */
 				vm86_trap((struct vm86frame *)&frame);
 			return;
 		}
 		switch (type) {
 			/*
 			 * these traps want either a process context, or
 			 * assume a normal userspace trap.
 			 */
 		case T_PROTFLT:
 		case T_SEGNPFLT:
 			trap_fatal(&frame);
 			return;
 		case T_TRCTRAP:
 			type = T_BPTFLT;	/* kernel breakpoint */
 			/* FALL THROUGH */
 		}
 		goto kernel_trap;	/* normal kernel trap handling */
 	}
 #endif
 
         if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
 		/* user trap */
 
 		sticks = p->p_sticks;
 		p->p_md.md_regs = &frame;
 
 		switch (type) {
 		case T_PRIVINFLT:	/* privileged instruction fault */
 			ucode = type;
 			i = SIGILL;
 			break;
 
 		case T_BPTFLT:		/* bpt instruction fault */
 		case T_TRCTRAP:		/* trace trap */
 			frame.tf_eflags &= ~PSL_T;
 			i = SIGTRAP;
 			break;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
 			ucode = code;
 			i = SIGFPE;
 			break;
 
 		case T_ASTFLT:		/* Allow process switch */
 			astoff();
 			cnt.v_soft++;
 			if (p->p_flag & P_OWEUPC) {
 				p->p_flag &= ~P_OWEUPC;
 				addupc_task(p, p->p_stats->p_prof.pr_addr,
 					    p->p_stats->p_prof.pr_ticks);
 			}
 			goto out;
 
 			/*
 			 * The following two traps can happen in
 			 * vm86 mode, and, if so, we want to handle
 			 * them specially.
 			 */
 		case T_PROTFLT:		/* general protection fault */
 		case T_STKFLT:		/* stack fault */
 #ifdef VM86
 			if (frame.tf_eflags & PSL_VM) {
 				i = vm86_emulate((struct vm86frame *)&frame);
 				if (i == 0)
 					goto out;
 				break;
 			}
 #endif /* VM86 */
 			/* FALL THROUGH */
 
 		case T_SEGNPFLT:	/* segment not present fault */
 		case T_TSSFLT:		/* invalid TSS fault */
 		case T_DOUBLEFLT:	/* double fault */
 		default:
 			ucode = code + BUS_SEGM_FAULT ;
 			i = SIGBUS;
 			break;
 
 		case T_PAGEFLT:		/* page fault */
 			i = trap_pfault(&frame, TRUE);
 			if (i == -1)
 				return;
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 			if (i == -2)
 				goto restart;
 #endif
 			if (i == 0)
 				goto out;
 
 			ucode = T_PAGEFLT;
 			break;
 
 		case T_DIVIDE:		/* integer divide fault */
 			ucode = FPE_INTDIV_TRAP;
 			i = SIGFPE;
 			break;
 
 #if NISA > 0
 		case T_NMI:
 #ifdef POWERFAIL_NMI
 			goto handle_powerfail;
 #else /* !POWERFAIL_NMI */
 #ifdef DDB
 			/* NMI can be hooked up to a pushbutton for debugging */
 			printf ("NMI ... going to debugger\n");
 			if (kdb_trap (type, 0, &frame))
 				return;
 #endif /* DDB */
 			/* machine/parity/power fail/"kitchen sink" faults */
 			if (isa_nmi(code) == 0) return;
 			panic("NMI indicates hardware failure");
 #endif /* POWERFAIL_NMI */
 #endif /* NISA > 0 */
 
 		case T_OFLOW:		/* integer overflow fault */
 			ucode = FPE_INTOVF_TRAP;
 			i = SIGFPE;
 			break;
 
 		case T_BOUND:		/* bounds check fault */
 			ucode = FPE_SUBRNG_TRAP;
 			i = SIGFPE;
 			break;
 
 		case T_DNA:
 #if NNPX > 0
 			/* if a transparent fault (due to context switch "late") */
 			if (npxdna())
 				return;
 #endif
 			if (!pmath_emulate) {
 				i = SIGFPE;
 				ucode = FPE_FPU_NP_TRAP;
 				break;
 			}
 			i = (*pmath_emulate)(&frame);
 			if (i == 0) {
 				if (!(frame.tf_eflags & PSL_T))
 					return;
 				frame.tf_eflags &= ~PSL_T;
 				i = SIGTRAP;
 			}
 			/* else ucode = emulator_only_knows() XXX */
 			break;
 
 		case T_FPOPFLT:		/* FPU operand fetch fault */
 			ucode = T_FPOPFLT;
 			i = SIGILL;
 			break;
 		}
 	} else {
 #ifdef VM86
 kernel_trap:
 #endif
 		/* kernel trap */
 
 		switch (type) {
 		case T_PAGEFLT:			/* page fault */
 			(void) trap_pfault(&frame, FALSE);
 			return;
 
 		case T_DNA:
 #if NNPX > 0
 			/*
 			 * The kernel is apparently using npx for copying.
 			 * XXX this should be fatal unless the kernel has
 			 * registered such use.
 			 */
 			if (npxdna())
 				return;
 #endif
 			break;
 
 		case T_PROTFLT:		/* general protection fault */
 		case T_SEGNPFLT:	/* segment not present fault */
 			/*
 			 * Invalid segment selectors and out of bounds
 			 * %eip's and %esp's can be set up in user mode.
 			 * This causes a fault in kernel mode when the
 			 * kernel tries to return to user mode.  We want
 			 * to get this fault so that we can fix the
 			 * problem here and not have to check all the
 			 * selectors and pointers when the user changes
 			 * them.
 			 */
 #define	MAYBE_DORETI_FAULT(where, whereto)				\
 	do {								\
 		if (frame.tf_eip == (int)where) {			\
 			frame.tf_eip = (int)whereto;			\
 			return;						\
 		}							\
 	} while (0)
 
 			if (intr_nesting_level == 0) {
 				/*
 				 * Invalid %fs's and %gs's can be created using
 				 * procfs or PT_SETREGS or by invalidating the
 				 * underlying LDT entry.  This causes a fault
 				 * in kernel mode when the kernel attempts to
 				 * switch contexts.  Lose the bad context
 				 * (XXX) so that we can continue, and generate
 				 * a signal.
 				 */
 				if (frame.tf_eip == (int)cpu_switch_load_fs) {
 					curpcb->pcb_fs = 0;
 					psignal(p, SIGBUS);
 					return;
 				}
 				if (frame.tf_eip == (int)cpu_switch_load_gs) {
 					curpcb->pcb_gs = 0;
 					psignal(p, SIGBUS);
 					return;
 				}
 				MAYBE_DORETI_FAULT(doreti_iret,
 						   doreti_iret_fault);
 				MAYBE_DORETI_FAULT(doreti_popl_ds,
 						   doreti_popl_ds_fault);
 				MAYBE_DORETI_FAULT(doreti_popl_es,
 						   doreti_popl_es_fault);
 				if (curpcb && curpcb->pcb_onfault) {
 					frame.tf_eip = (int)curpcb->pcb_onfault;
 					return;
 				}
 			}
 			break;
 
 		case T_TSSFLT:
 			/*
 			 * PSL_NT can be set in user mode and isn't cleared
 			 * automatically when the kernel is entered.  This
 			 * causes a TSS fault when the kernel attempts to
 			 * `iret' because the TSS link is uninitialized.  We
 			 * want to get this fault so that we can fix the
 			 * problem here and not every time the kernel is
 			 * entered.
 			 */
 			if (frame.tf_eflags & PSL_NT) {
 				frame.tf_eflags &= ~PSL_NT;
 				return;
 			}
 			break;
 
 		case T_TRCTRAP:	 /* trace trap */
 			if (frame.tf_eip == (int)IDTVEC(syscall)) {
 				/*
 				 * We've just entered system mode via the
 				 * syscall lcall.  Continue single stepping
 				 * silently until the syscall handler has
 				 * saved the flags.
 				 */
 				return;
 			}
 			if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
 				/*
 				 * The syscall handler has now saved the
 				 * flags.  Stop single stepping it.
 				 */
 				frame.tf_eflags &= ~PSL_T;
 				return;
 			}
 			/*
 			 * Fall through.
 			 */
 		case T_BPTFLT:
 			/*
 			 * If DDB is enabled, let it handle the debugger trap.
 			 * Otherwise, debugger traps "can't happen".
 			 */
 #ifdef DDB
 			if (kdb_trap (type, 0, &frame))
 				return;
 #endif
 			break;
 
 #if NISA > 0
 		case T_NMI:
 #ifdef POWERFAIL_NMI
 #ifndef TIMER_FREQ
 #  define TIMER_FREQ 1193182
 #endif
 	handle_powerfail:
 		{
 		  static unsigned lastalert = 0;
 
 		  if(time.tv_sec - lastalert > 10)
 		    {
 		      log(LOG_WARNING, "NMI: power fail\n");
 		      sysbeep(TIMER_FREQ/880, hz);
 		      lastalert = time.tv_sec;
 		    }
 		  return;
 		}
 #else /* !POWERFAIL_NMI */
 #ifdef DDB
 			/* NMI can be hooked up to a pushbutton for debugging */
 			printf ("NMI ... going to debugger\n");
 			if (kdb_trap (type, 0, &frame))
 				return;
 #endif /* DDB */
 			/* machine/parity/power fail/"kitchen sink" faults */
 			if (isa_nmi(code) == 0) return;
 			/* FALL THROUGH */
 #endif /* POWERFAIL_NMI */
 #endif /* NISA > 0 */
 		}
 
 		trap_fatal(&frame);
 		return;
 	}
 
 	trapsignal(p, i, ucode);
 
 #ifdef DEBUG
 	eva = rcr2();
 	if (type <= MAX_TRAP_MSG) {
 		uprintf("fatal process exception: %s",
 			trap_msg[type]);
 		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
 			uprintf(", fault VA = 0x%x", eva);
 		uprintf("\n");
 	}
 #endif
 
 out:
 	userret(p, &frame, sticks);
 }
 
 #ifdef notyet
 /*
  * This version doesn't allow a page fault to user space while
  * in the kernel. The rest of the kernel needs to be made "safe"
  * before this can be used. I think the only things remaining
  * to be made safe are the iBCS2 code and the process tracing/
  * debugging code.
  */
 static int
 trap_pfault(frame, usermode)
 	struct trapframe *frame;
 	int usermode;
 {
 	vm_offset_t va;
 	struct vmspace *vm = NULL;
 	vm_map_t map = 0;
 	int rv = 0;
 	vm_prot_t ftype;
 	int eva;
 	struct proc *p = curproc;
 
 	if (frame->tf_err & PGEX_W)
 		ftype = VM_PROT_READ | VM_PROT_WRITE;
 	else
 		ftype = VM_PROT_READ;
 
 	eva = rcr2();
 	va = trunc_page((vm_offset_t)eva);
 
 	if (va < VM_MIN_KERNEL_ADDRESS) {
 		vm_offset_t v;
 		vm_page_t mpte;
 
 		if (p == NULL ||
 		    (!usermode && va < VM_MAXUSER_ADDRESS &&
 		     (intr_nesting_level != 0 || curpcb == NULL ||
 		      curpcb->pcb_onfault == NULL))) {
 			trap_fatal(frame);
 			return (-1);
 		}
 
 		/*
 		 * This is a fault on non-kernel virtual memory.
 		 * vm is initialized above to NULL. If curproc is NULL
 		 * or curproc->p_vmspace is NULL the fault is fatal.
 		 */
 		vm = p->p_vmspace;
 		if (vm == NULL)
 			goto nogo;
 
 		map = &vm->vm_map;
 
 		/*
 		 * Keep swapout from messing with us during this
 		 *	critical time.
 		 */
 		++p->p_lock;
 
 		/*
 		 * Grow the stack if necessary
 		 */
 		if ((caddr_t)va > vm->vm_maxsaddr
 		    && (caddr_t)va < (caddr_t)USRSTACK) {
 			if (!grow(p, va)) {
 				rv = KERN_FAILURE;
 				--p->p_lock;
 				goto nogo;
 			}
 		}
 
 		/* Fault in the user page: */
 		rv = vm_fault(map, va, ftype,
 			(ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
 
 		--p->p_lock;
 	} else {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 */
 		if (usermode)
 			goto nogo;
 
 		/*
 		 * Since we know that kernel virtual address addresses
 		 * always have pte pages mapped, we just have to fault
 		 * the page.
 		 */
 		rv = vm_fault(kernel_map, va, ftype, FALSE);
 	}
 
 	if (rv == KERN_SUCCESS)
 		return (0);
 nogo:
 	if (!usermode) {
 		if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
 			frame->tf_eip = (int)curpcb->pcb_onfault;
 			return (0);
 		}
 		trap_fatal(frame);
 		return (-1);
 	}
 
 	/* kludge to pass faulting virtual address to sendsig */
 	frame->tf_err = eva;
 
 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }
 #endif
 
 int
 trap_pfault(frame, usermode)
 	struct trapframe *frame;
 	int usermode;
 {
 	vm_offset_t va;
 	struct vmspace *vm = NULL;
 	vm_map_t map = 0;
 	int rv = 0;
 	vm_prot_t ftype;
 	int eva;
 	struct proc *p = curproc;
 
 	eva = rcr2();
 	va = trunc_page((vm_offset_t)eva);
 
 	if (va >= KERNBASE) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 * An exception:  if the faulting address is the invalid
 		 * instruction entry in the IDT, then the Intel Pentium
 		 * F00F bug workaround was triggered, and we need to
 		 * treat it is as an illegal instruction, and not a page
 		 * fault.
 		 */
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 		if ((eva == (unsigned int)&t_idt[6]) && has_f00f_bug) {
 			frame->tf_trapno = T_PRIVINFLT;
 			return -2;
 		}
 #endif
 		if (usermode)
 			goto nogo;
 
 		map = kernel_map;
 	} else {
 		/*
 		 * This is a fault on non-kernel virtual memory.
 		 * vm is initialized above to NULL. If curproc is NULL
 		 * or curproc->p_vmspace is NULL the fault is fatal.
 		 */
 		if (p != NULL)
 			vm = p->p_vmspace;
 
 		if (vm == NULL)
 			goto nogo;
 
 		map = &vm->vm_map;
 	}
 
 	if (frame->tf_err & PGEX_W)
 		ftype = VM_PROT_READ | VM_PROT_WRITE;
 	else
 		ftype = VM_PROT_READ;
 
 	if (map != kernel_map) {
 		/*
 		 * Keep swapout from messing with us during this
 		 *	critical time.
 		 */
 		++p->p_lock;
 
 		/*
 		 * Grow the stack if necessary
 		 */
 		if ((caddr_t)va > vm->vm_maxsaddr
 		    && (caddr_t)va < (caddr_t)USRSTACK) {
 			if (!grow(p, va)) {
 				rv = KERN_FAILURE;
 				--p->p_lock;
 				goto nogo;
 			}
 		}
 
 		/* Fault in the user page: */
 		rv = vm_fault(map, va, ftype,
 			(ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
 
 		--p->p_lock;
 	} else {
 		/*
 		 * Don't have to worry about process locking or stacks in the kernel.
 		 */
 		rv = vm_fault(map, va, ftype, FALSE);
 	}
 
 	if (rv == KERN_SUCCESS)
 		return (0);
 nogo:
 	if (!usermode) {
 		if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
 			frame->tf_eip = (int)curpcb->pcb_onfault;
 			return (0);
 		}
 		trap_fatal(frame);
 		return (-1);
 	}
 
 	/* kludge to pass faulting virtual address to sendsig */
 	frame->tf_err = eva;
 
 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }
 
 static void
 trap_fatal(frame)
 	struct trapframe *frame;
 {
 	int code, type, eva, ss, esp;
 	struct soft_segment_descriptor softseg;
 
 	code = frame->tf_err;
 	type = frame->tf_trapno;
 	eva = rcr2();
 	sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
 
 	if (type <= MAX_TRAP_MSG)
 		printf("\n\nFatal trap %d: %s while in %s mode\n",
 			type, trap_msg[type],
         		frame->tf_eflags & PSL_VM ? "vm86" :
 			ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
 #ifdef SMP
 	/* three seperate prints in case of a trap on an unmapped page */
 	printf("mp_lock = %08x; ", mp_lock);
 	printf("cpuid = %d; ", cpuid);
 	printf("lapic.id = %08x\n", lapic.id);
 #endif
 	if (type == T_PAGEFLT) {
 		printf("fault virtual address	= 0x%x\n", eva);
 		printf("fault code		= %s %s, %s\n",
 			code & PGEX_U ? "user" : "supervisor",
 			code & PGEX_W ? "write" : "read",
 			code & PGEX_P ? "protection violation" : "page not present");
 	}
 	printf("instruction pointer	= 0x%x:0x%x\n",
 	       frame->tf_cs & 0xffff, frame->tf_eip);
         if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
 		ss = frame->tf_ss & 0xffff;
 		esp = frame->tf_esp;
 	} else {
 		ss = GSEL(GDATA_SEL, SEL_KPL);
 		esp = (int)&frame->tf_esp;
 	}
 	printf("stack pointer	        = 0x%x:0x%x\n", ss, esp);
 	printf("frame pointer	        = 0x%x:0x%x\n", ss, frame->tf_ebp);
 	printf("code segment		= base 0x%x, limit 0x%x, type 0x%x\n",
 	       softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
 	printf("			= DPL %d, pres %d, def32 %d, gran %d\n",
 	       softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
 	       softseg.ssd_gran);
 	printf("processor eflags	= ");
 	if (frame->tf_eflags & PSL_T)
 		printf("trace trap, ");
 	if (frame->tf_eflags & PSL_I)
 		printf("interrupt enabled, ");
 	if (frame->tf_eflags & PSL_NT)
 		printf("nested task, ");
 	if (frame->tf_eflags & PSL_RF)
 		printf("resume, ");
 	if (frame->tf_eflags & PSL_VM)
 		printf("vm86, ");
 	printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
 	printf("current process		= ");
 	if (curproc) {
 		printf("%lu (%s)\n",
 		    (u_long)curproc->p_pid, curproc->p_comm ?
 		    curproc->p_comm : "");
 	} else {
 		printf("Idle\n");
 	}
 	printf("interrupt mask		= ");
 	if ((cpl & net_imask) == net_imask)
 		printf("net ");
 	if ((cpl & tty_imask) == tty_imask)
 		printf("tty ");
 	if ((cpl & bio_imask) == bio_imask)
 		printf("bio ");
 	if ((cpl & cam_imask) == cam_imask)
 		printf("cam ");
 	if (cpl == 0)
 		printf("none");
 #ifdef SMP
 /**
  *  XXX FIXME:
  *	we probably SHOULD have stopped the other CPUs before now!
  *	another CPU COULD have been touching cpl at this moment...
  */
 	printf(" <- SMP: XXX");
 #endif
 	printf("\n");
 
 #ifdef KDB
 	if (kdb_trap(&psl))
 		return;
 #endif
 #ifdef DDB
 	if (kdb_trap (type, 0, frame))
 		return;
 #endif
 	printf("trap number		= %d\n", type);
 	if (type <= MAX_TRAP_MSG)
 		panic(trap_msg[type]);
 	else
 		panic("unknown/reserved trap");
 }
 
 /*
  * Double fault handler. Called when a fault occurs while writing
  * a frame for a trap/exception onto the stack. This usually occurs
  * when the stack overflows (such is the case with infinite recursion,
  * for example).
  *
  * XXX Note that the current PTD gets replaced by IdlePTD when the
  * task switch occurs. This means that the stack that was active at
  * the time of the double fault is not available at <kstack> unless
  * the machine was idle when the double fault occurred. The downside
  * of this is that "trace <ebp>" in ddb won't work.
  */
 void
 dblfault_handler()
 {
 	printf("\nFatal double fault:\n");
 	printf("eip = 0x%x\n", common_tss.tss_eip);
 	printf("esp = 0x%x\n", common_tss.tss_esp);
 	printf("ebp = 0x%x\n", common_tss.tss_ebp);
 #ifdef SMP
 	/* three seperate prints in case of a trap on an unmapped page */
 	printf("mp_lock = %08x; ", mp_lock);
 	printf("cpuid = %d; ", cpuid);
 	printf("lapic.id = %08x\n", lapic.id);
 #endif
 	panic("double fault");
 }
 
 /*
  * Compensate for 386 brain damage (missing URKR).
  * This is a little simpler than the pagefault handler in trap() because
  * it the page tables have already been faulted in and high addresses
  * are thrown out early for other reasons.
  */
 int trapwrite(addr)
 	unsigned addr;
 {
 	struct proc *p;
 	vm_offset_t va;
 	struct vmspace *vm;
 	int rv;
 
 	va = trunc_page((vm_offset_t)addr);
 	/*
 	 * XXX - MAX is END.  Changed > to >= for temp. fix.
 	 */
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (1);
 
 	p = curproc;
 	vm = p->p_vmspace;
 
 	++p->p_lock;
 
 	if ((caddr_t)va >= vm->vm_maxsaddr
 	    && (caddr_t)va < (caddr_t)USRSTACK) {
 		if (!grow(p, va)) {
 			--p->p_lock;
 			return (1);
 		}
 	}
 
 	/*
 	 * fault the data page
 	 */
 	rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_DIRTY);
 
 	--p->p_lock;
 
 	if (rv != KERN_SUCCESS)
 		return 1;
 
 	return (0);
 }
 
 /*
  * System call request from POSIX system call gate interface to kernel.
  * Like trap(), argument is call by reference.
  */
 void
 syscall(frame)
 	struct trapframe frame;
 {
 	caddr_t params;
 	int i;
 	struct sysent *callp;
 	struct proc *p = curproc;
 	u_quad_t sticks;
 	int error;
 	int args[8];
 	u_int code;
 
 #ifdef DIAGNOSTIC
 	if (ISPL(frame.tf_cs) != SEL_UPL)
 		panic("syscall");
 #endif
 	sticks = p->p_sticks;
 	p->p_md.md_regs = &frame;
 	params = (caddr_t)frame.tf_esp + sizeof(int);
 	code = frame.tf_eax;
 	if (p->p_sysent->sv_prepsyscall) {
 		(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
 	} else {
 		/*
 		 * Need to check if this is a 32 bit or 64 bit syscall.
 		 */
 		if (code == SYS_syscall) {
 			/*
 			 * Code is first argument, followed by actual args.
 			 */
 			code = fuword(params);
 			params += sizeof(int);
 		} else if (code == SYS___syscall) {
 			/*
 			 * Like syscall, but code is a quad, so as to maintain
 			 * quad alignment for the rest of the arguments.
 			 */
 			code = fuword(params);
 			params += sizeof(quad_t);
 		}
 	}
 
  	if (p->p_sysent->sv_mask)
  		code &= p->p_sysent->sv_mask;
 
  	if (code >= p->p_sysent->sv_size)
  		callp = &p->p_sysent->sv_table[0];
   	else
  		callp = &p->p_sysent->sv_table[code];
 
 	if (params && (i = callp->sy_narg * sizeof(int)) &&
 	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
 #ifdef KTRACE
 		if (KTRPOINT(p, KTR_SYSCALL))
 			ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
 #endif
 		goto bad;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_SYSCALL))
 		ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
 #endif
 	p->p_retval[0] = 0;
 	p->p_retval[1] = frame.tf_edx;
 
 	STOPEVENT(p, S_SCE, callp->sy_narg);
 
 	error = (*callp->sy_call)(p, args);
 
 	switch (error) {
 
 	case 0:
 		/*
 		 * Reinitialize proc pointer `p' as it may be different
 		 * if this is a child returning from fork syscall.
 		 */
 		p = curproc;
 		frame.tf_eax = p->p_retval[0];
 		frame.tf_edx = p->p_retval[1];
 		frame.tf_eflags &= ~PSL_C;
 		break;
 
 	case ERESTART:
 		/*
 		 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
 		 * int 0x80 is 2 bytes. We saved this in tf_err.
 		 */
 		frame.tf_eip -= frame.tf_err;
 		break;
 
 	case EJUSTRETURN:
 		break;
 
 	default:
 bad:
  		if (p->p_sysent->sv_errsize)
  			if (error >= p->p_sysent->sv_errsize)
   				error = -1;	/* XXX */
    			else
   				error = p->p_sysent->sv_errtbl[error];
 		frame.tf_eax = error;
 		frame.tf_eflags |= PSL_C;
 		break;
 	}
 
 	if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
 		/* Traced syscall. */
 		frame.tf_eflags &= ~PSL_T;
 		trapsignal(p, SIGTRAP, 0);
 	}
 
 	userret(p, &frame, sticks);
 
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_SYSRET))
 		ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
 #endif
 
 	/*
 	 * This works because errno is findable through the
 	 * register set.  If we ever support an emulation where this
 	 * is not the case, this code will need to be revisited.
 	 */
 	STOPEVENT(p, S_SCX, code);
 
 }
 
 /*
  * Simplified back end of syscall(), used when returning from fork()
  * directly into user mode.
  */
 void
 fork_return(p, frame)
 	struct proc *p;
 	struct trapframe frame;
 {
 	frame.tf_eax = 0;		/* Child returns zero */
 	frame.tf_eflags &= ~PSL_C;	/* success */
 	frame.tf_edx = 1;
 
 	userret(p, &frame, 0);
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_SYSRET))
 		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
 #endif
 }
diff --git a/sys/i386/isa/asc.c b/sys/i386/isa/asc.c
index 7965ddf934b7..cd40df43c528 100644
--- a/sys/i386/isa/asc.c
+++ b/sys/i386/isa/asc.c
@@ -1,944 +1,945 @@
 /* asc.c - device driver for hand scanners
  *
  * Current version supports:
  *
  * 	- AmiScan (Mustek) Color and BW hand scanners (GI1904 chipset)
  *
  * Copyright (c) 1995 Gunther Schadow.  All rights reserved.
  * Copyright (c) 1995,1996,1997 Luigi Rizzo.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Gunther Schadow
  *	and Luigi Rizzo.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 /*
- * $Id: asc.c,v 1.27 1997/09/14 03:19:05 peter Exp $
+ * $Id: asc.c,v 1.28 1998/01/24 02:54:15 eivind Exp $
  */
 
 #include "asc.h"
 #if NASC > 0
 #ifdef FREEBSD_1_X
 #include "param.h"
 #include "systm.h"
 #include "proc.h"
 #include "buf.h"
 #include "malloc.h"
 #include "kernel.h"
 #include "ioctl.h"
 
 #include "i386/isa/isa_device.h"
 #include "i386/isa/ascreg.h"
 
 #include "machine/asc_ioctl.h"
 #else
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/proc.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #include <sys/poll.h>
 
 #include "opt_devfs.h"
 
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
+#include <sys/uio.h>
 
 #include <machine/asc_ioctl.h>
 
 #include <i386/isa/isa.h>
 #include <i386/isa/isa_device.h>
 #include <i386/isa/ascreg.h>
 
 #endif /* FREEBSD_1_X */
 
 /***
  *** CONSTANTS & DEFINES
  ***
  ***/
 
 #define PROBE_FAIL    0
 #define PROBE_SUCCESS IO_ASCSIZE
 #define ATTACH_FAIL   0
 #define ATTACH_SUCCESS 1
 #define SUCCESS       0
 #define FAIL         -1
 #define INVALID       FAIL
 
 #define DMA1_READY  0x08
 #define ASCDEBUG
 #ifdef ASCDEBUG
 #	define lprintf if(scu->flags & FLAG_DEBUG) printf
 #else
 #	define lprintf (void)
 #endif
 
 #define TIMEOUT (hz*15)  /* timeout while reading a buffer - default value */
 #define ASCPRI  PRIBIO   /* priority while reading a buffer */
 
 /***
  *** LAYOUT OF THE MINOR NUMBER
  ***/
 
 #define UNIT_MASK 0xc0    /* unit asc0 .. asc3 */
 #define UNIT(x)   (x >> 6)
 #define DBUG_MASK 0x20
 #define FRMT_MASK 0x18    /* output format */
 #define FRMT_RAW  0x00    /* output bits as read from scanner */
 #define FRMT_GRAY 0x1     /* output gray mode for color scanner */
 #define FRMT_PBM  0x08    /* output pbm format */
 #define FRMT_PGM  0x18
 
 /***
  *** THE GEMOMETRY TABLE
  ***/
 
 #define GREY_LINE 826 /* 825, or 826 , or 550 ??? */
 static const struct asc_geom {
   int dpi;     /* dots per inch */
   int dpl;     /* dots per line */
   int bpl;     /* bytes per line */
   int g_res;   /* get resolution value (ASC_STAT) */
 } geomtab[] = {
   { 800, 3312, 414, ASC_RES_800},
   { 700, 2896, 362, ASC_RES_700},
   { 600, 2480, 310, ASC_RES_600},
   { 500, 1656, 258, ASC_RES_500},
   { 400, 1656, 207, ASC_RES_400},
   { 300, 1240, 155, ASC_RES_300},
   { 200, 832, 104, ASC_RES_200},
   { 100, 416, 52, ASC_RES_100},
   { 200, 3*GREY_LINE, 3*GREY_LINE, 0 /* returned by color scanner */},
   { 200, GREY_LINE, GREY_LINE, 0 /* color scanner, grey mode */},
   { INVALID, 416, 52, INVALID } /* terminator */
 };
 
 /***
  *** THE TABLE OF UNITS
  ***/
 
 struct _sbuf {
   size_t  size;
   size_t  rptr;
   size_t  wptr; /* only changed in ascintr */
   size_t  count;
   char   *base;
 };
 
 struct asc_unit {
   long thedev;	/* XXX */
   int base;		/* base address */
   int dma_num;		/* dma number */
   char    dma_byte;       /* mask of byte for setting DMA value */
   char    int_byte;       /* mask of byte for setting int value */
   char    cfg_byte;       /* mirror of byte written to config reg (ASC_CFG). */
   char    cmd_byte;       /* mirror of byte written to cmd port (ASC_CMD)*/
   char   portf_byte;
   int flags;
 #define ATTACHED 	0x01
 #define OPEN     	0x02
 #define READING  	0x04
 #define DMA_ACTIVE      0x08
 #define SLEEPING	0x10
 #define SEL_COLL	0x20
 #define PBM_MODE	0x40
 #define FLAG_DEBUG    	0x80
   int     geometry;       /* resolution as geomtab index */
   int     linesize;       /* length of one scan line (from geom.table) */
   int     blen;           /* length of buffer in lines */
   int     btime;          /* timeout of buffer in seconds/hz */
   struct  _sbuf sbuf;
   long	  icnt;		/* interrupt count XXX for debugging */
 #ifdef FREEBSD_1_X
   pid_t	  selp;	/* select pointer... */
 #else
   struct selinfo selp;
 #endif
   int     height;         /* height, for pnm modes */
   size_t  bcount;         /* bytes to read, for pnm modes */
 #ifdef DEVFS
   void *devfs_asc;	  /* storage for devfs tokens (handles) */
   void *devfs_ascp;
   void *devfs_ascd;
   void *devfs_ascpd;
 #endif
 };
 
 static struct asc_unit unittab[NASC];                                 
 
 /*** I could not find a reasonable buffer size limit other than by
  *** experiments. MAXPHYS is obviously too much, while DEV_BSIZE and
  *** PAGE_SIZE are really too small. There must be something wrong
  *** with isa_dmastart/isa_dmarangecheck HELP!!!
  ***
  *** Note, must be DEFAULT_BLEN * samples_per_line <= MAX_BUFSIZE
  ***/
 #define MAX_BUFSIZE 0xb000 /* XXX was 0x3000 */
 #define DEFAULT_BLEN 16
 
 /***
  *** THE PER-DRIVER RECORD FOR ISA.C
  ***/
 static int ascprobe (struct isa_device *isdp);
 static int ascattach(struct isa_device *isdp);
 struct isa_driver ascdriver = { ascprobe, ascattach, "asc" };
 
 #ifndef FREEBSD_1_X
 
 static d_open_t		ascopen;
 static d_close_t	ascclose;
 static d_read_t		ascread;
 static d_ioctl_t	ascioctl;
 static d_poll_t		ascpoll;
 
 #define CDEV_MAJOR 71
 
 static struct cdevsw asc_cdevsw = 
 	{ ascopen,      ascclose,       ascread,        nowrite,        /*71*/
 	  ascioctl,     nostop,         nullreset,      nodevtotty, /* asc */   
 	  ascpoll,	nommap,         NULL,	"asc",	NULL,	-1 };
 
 #define STATIC static
 #else
 #define STATIC
 #endif /* ! FREEBSD_1_X */
 
 /***
  *** LOCALLY USED SUBROUTINES
  ***
  ***/
 
 /***
  *** get_resolution
  ***	read resolution from the scanner
  ***/
 static void
 get_resolution(struct asc_unit *scu)
 {
     int res, i, delay;
 
     res=0;
     scu->cmd_byte = ASC_STANDBY;
     outb(ASC_CMD, scu->cmd_byte);
     tsleep((caddr_t)scu, ASCPRI | PCATCH, "ascres", hz/10);
     for(delay= 100; (res=inb(ASC_STAT)) & ASC_RDY_FLAG; delay--)
     {
         i = tsleep((caddr_t)scu, ASCPRI | PCATCH, "ascres0", 1);
         if ( ( i == 0 ) || ( i == EWOULDBLOCK ) )
 	    i = SUCCESS;
 	else
 	    break;
     }
     if (delay==0) {
 	lprintf("asc.get_resolution: timeout completing command\n");
 	return /*  -1 */;
     }
     /* ... actual read resolution... */
     res &= ASC_RES_MASK;
     for (i=0; geomtab[i].dpi != INVALID; i++) {
     	if (geomtab[i].g_res == res) break;
     }
     if (geomtab[i].dpi==INVALID) {
 	scu->geometry= i; /* INVALID; */
 	lprintf("asc.get_resolution: wrong resolution\n");
     } else {
 	lprintf("asc.get_resolution: %d dpi\n",geomtab[i].dpi);
 	scu->geometry = i;
     }
     scu->portf_byte=0; /* default */
     if (geomtab[scu->geometry].g_res==0 && !(scu->thedev&FRMT_GRAY)) {
 	/* color scanner seems to require this */
 	scu->portf_byte=2;
 	/* scu->geometry++; */
     }
     scu->linesize = geomtab[scu->geometry].bpl;
     scu->height = geomtab[scu->geometry].dpl; /* default... */
 }
 
 /***
  *** buffer_allocate
  ***	allocate/reallocate a buffer
  ***	Now just checks that the preallocated buffer is large enough.
  ***/
 
 static int
 buffer_allocate(struct asc_unit *scu)
 {
   size_t size, size1;
 
   size = scu->blen * scu->linesize;
 
   lprintf("asc.buffer_allocate: need 0x%x bytes\n", size);
 
   if ( size > MAX_BUFSIZE ) {
       size1=size;
       size= ( (MAX_BUFSIZE+scu->linesize-1) / scu->linesize)*scu->linesize;
       lprintf("asc.buffer_allocate: 0x%x bytes are too much, try 0x%x\n",
 	  size1, size);
       return ENOMEM;
   }
 
   scu->sbuf.size = size;
   scu->sbuf.rptr  = 0;
   scu->sbuf.wptr  = 0;
   scu->sbuf.count  = 0; /* available data for reading */
 
   lprintf("asc.buffer_allocate: ok\n");
 
   return SUCCESS;
 }
 
 /*** dma_restart
  ***	invoked locally to start dma. Must run in a critical section
  ***/
 static void
 dma_restart(struct asc_unit *scu)
 {
     unsigned char al=scu->cmd_byte;
 
     if (geomtab[scu->geometry].g_res==0) {/* color */
 	isa_dmastart(B_READ, scu->sbuf.base+scu->sbuf.wptr,
 	    scu->linesize + 90 /* XXX */ , scu->dma_num);
 	/*
 	 * looks like we have to set and then clear this
 	 * bit to enable the scanner to send interrupts
 	 */
 	outb( ASC_CMD, al |= 4 ); /* seems to disable interrupts */
 #if 0
 	outb( ASC_CMD, al |= 8 ); /* ??? seems useless */
 #endif
 	outb( ASC_CMD, al &= 0xfb );
 	scu->cmd_byte = al;
     } else {					/* normal */
     isa_dmastart(B_READ, scu->sbuf.base+scu->sbuf.wptr,
 	scu->linesize, scu->dma_num);
     /*** this is done in sub_20, after dmastart ? ***/  
 #if 0
     outb( ASC_CMD, al |= 4 );
     outb( ASC_CMD, al |= 8 ); /* ??? seems useless */
     outb( ASC_CMD, al &= 0xfb );
     scu->cmd_byte = al;
 #else
     outb( ASC_CMD, ASC_OPERATE); 
 #endif
     }
     scu->flags |= DMA_ACTIVE;
 }
 
 /***
  *** the main functions
  ***/
 
 /*** asc_reset
  ***	resets the scanner and the config bytes...
  ***/
 static void
 asc_reset(struct asc_unit *scu)
 {
   scu->cfg_byte = 0 ; /* clear... */
   scu->cmd_byte = 0 ; /* clear... */
 
   outb(ASC_CFG,scu->cfg_byte);	/* for safety, do this here */
   outb(ASC_CMD,scu->cmd_byte);	/* probably not needed */
   tsleep((caddr_t)scu, ASCPRI | PCATCH, "ascres", hz/10); /* sleep .1 sec */
 
   scu->blen = DEFAULT_BLEN;
   scu->btime = TIMEOUT;
   scu->height = 0 ; /* don't know better... */
 }
 /**************************************************************************
  ***
  *** ascprobe
  ***	read status port and check for proper configuration:
  ***	- if address group matches (status byte has reasonable value)
  ***	  cannot check interrupt/dma, only clear the config byte.
  ***/
 static int
 ascprobe (struct isa_device *isdp)
 {
   int unit = isdp->id_unit;
   struct asc_unit *scu = unittab + unit;
   int stb;
 
   scu->base = isdp->id_iobase; /*** needed by the following macros ***/
   scu->flags = FLAG_DEBUG;
 
   if ( isdp->id_iobase < 0 ) {
       lprintf("asc%d.probe: no iobase given\n", unit);
       return PROBE_FAIL;
   }
 
   if ((stb=inb(ASC_PROBE)) != ASC_PROBE_VALUE) {
       lprintf("asc%d.probe: failed, got 0x%02x instead of 0x%02x\n",
 	  unit, stb, ASC_PROBE_VALUE);
       return PROBE_FAIL;
   }
 
 /*
  * NOTE NOTE NOTE
  * the new AmiScan Color board uses int 10,11,12 instead of 3,5,10
  * respectively. This means that the driver must act accordingly.
  * Unfortunately there is no easy way of telling which board one has,
  * other than trying to get an interrupt and noticing that it is
  * missing. use "option ASC_NEW_BOARD" if you have a new board.
  *
  */
 
 #if ASC_NEW_BOARD
 #define	ASC_IRQ_A	10
 #define	ASC_IRQ_B	11
 #define	ASC_IRQ_C	12
 #else
 #define	ASC_IRQ_A	3
 #define	ASC_IRQ_B	5
 #define	ASC_IRQ_C	10
 #endif
 
   switch(ffs(isdp->id_irq) - 1) {
     case ASC_IRQ_A :
       scu->int_byte = ASC_CNF_IRQ3;
       break;
     case ASC_IRQ_B :
       scu->int_byte = ASC_CNF_IRQ5;
       break;
     case ASC_IRQ_C :
       scu->int_byte = ASC_CNF_IRQ10;
       break;
 #if 0
     case -1:
       scu->int_byte = 0;
       lprintf("asc%d.probe: warning - going interruptless\n", unit);
       break;
 #endif
     default:
       lprintf("asc%d.probe: unsupported INT %d (only 3, 5, 10)\n",
 		unit, ffs(isdp->id_irq) - 1 );
       return PROBE_FAIL;
   }
   scu->dma_num = isdp->id_drq;
   switch(scu->dma_num) {
     case 1:
       scu->dma_byte = ASC_CNF_DMA1;
       break;
     case 3:
       scu->dma_byte = ASC_CNF_DMA3;
       break;
     default:
       lprintf("asc%d.probe: unsupported DMA %d (only 1 or 3)\n", 
 		unit, scu->dma_num);
       return PROBE_FAIL;
   }
   asc_reset(scu);
 /*  lprintf("asc%d.probe: ok\n", unit); */
 
   scu->flags &= ~FLAG_DEBUG;
   scu->icnt = 0;
   return PROBE_SUCCESS;
 }
 
 /**************************************************************************
  ***
  *** ascattach
  ***	finish initialization of unit structure, get geometry value (?)
  ***/
 
 static int
 ascattach(struct isa_device *isdp)
 {
   int unit = isdp->id_unit;
   struct asc_unit *scu = unittab + unit;
 
   scu->flags |= FLAG_DEBUG;
   printf("asc%d: [GI1904/Trust Ami-Scan Grey/Color]\n", unit);
 
   /*
    * Initialize buffer structure.
    * XXX this must be done early to give a good chance of getting a
    * contiguous buffer.  This wastes memory.
    */
 #ifdef FREEBSD_1_X
   /*
    * The old contigmalloc() didn't have a `low/minpa' arg, and took masks
    * instead of multipliers for the alignments.
    */
   scu->sbuf.base = contigmalloc((unsigned long)MAX_BUFSIZE, M_DEVBUF, M_NOWAIT,
 			        0xfffffful, 0ul, 0xfffful);
 #else
   scu->sbuf.base = contigmalloc((unsigned long)MAX_BUFSIZE, M_DEVBUF, M_NOWAIT,
 				0ul, 0xfffffful, 1ul, 0x10000ul);
 #endif
   if ( scu->sbuf.base == NULL )
     {
       lprintf("asc%d.attach: buffer allocation failed\n", unit);
       return ATTACH_FAIL;	/* XXX attach must not fail */
     }
   scu->sbuf.size = INVALID;
   scu->sbuf.rptr  = INVALID;
 
   scu->flags |= ATTACHED;
 /*  lprintf("asc%d.attach: ok\n", unit); */
   scu->flags &= ~FLAG_DEBUG;
 
 #ifdef FREEBSD_1_X
   scu->selp = (pid_t)0;
 #else
     scu->selp.si_flags=0;
     scu->selp.si_pid=(pid_t)0;
 #endif
 #ifdef DEVFS
 #define ASC_UID 0
 #define ASC_GID 13
     scu->devfs_asc = 
 		devfs_add_devswf(&asc_cdevsw, unit<<6, DV_CHR, ASC_UID,
 				 ASC_GID, 0666, "asc%d", unit);
     scu->devfs_ascp = 
 		devfs_add_devswf(&asc_cdevsw, ((unit<<6) + FRMT_PBM), DV_CHR, 
 				 ASC_UID,  ASC_GID, 0666, "asc%dp", unit);
     scu->devfs_ascd = 
 		devfs_add_devswf(&asc_cdevsw, ((unit<<6) + DBUG_MASK), DV_CHR, 
 				 ASC_UID,  ASC_GID, 0666, "asc%dd", unit);
     scu->devfs_ascpd = 
 		devfs_add_devswf(&asc_cdevsw, ((unit<<6) + DBUG_MASK+FRMT_PBM),
 				 DV_CHR, ASC_UID, ASC_GID, 0666, "asc%dpd", 
 				 unit);
 #endif /*DEVFS*/
   return ATTACH_SUCCESS;
 }
 
 /**************************************************************************
  ***
  *** ascintr
  ***	the interrupt routine, at the end of DMA...
  ***/
 void
 ascintr(int unit)
 {
     struct asc_unit *scu = unittab + unit;
     int chan_bit = 0x01 << scu->dma_num;
 
     scu->icnt++;
     /* ignore stray interrupts... */
     if ( scu->flags & (OPEN |READING) != (OPEN | READING) ) {
 	/* must be after closing... */
 	scu->flags &= ~(OPEN | READING | DMA_ACTIVE | SLEEPING | SEL_COLL);
 	return;
     }
     if ( (scu->flags & DMA_ACTIVE) && (inb(DMA1_READY) & chan_bit) != 0) {
 	outb( ASC_CMD, ASC_STANDBY);
 	scu->flags &= ~DMA_ACTIVE;
 		/* bounce buffers... */
         isa_dmadone(B_READ, scu->sbuf.base+scu->sbuf.wptr,
 	    scu->linesize, scu->dma_num);
 	scu->sbuf.wptr += scu->linesize;
 	if (scu->sbuf.wptr >= scu->sbuf.size) scu->sbuf.wptr=0;
 	scu->sbuf.count += scu->linesize;
 	if (scu->flags & SLEEPING) {
 	    scu->flags &= ~SLEEPING;
 	    wakeup((caddr_t)scu);
 	}
 	if (scu->sbuf.size - scu->sbuf.count >= scu->linesize) {
 	    dma_restart(scu);
 	}
 #ifdef FREEBSD_1_X
 	if (scu->selp) {
 	    selwakeup(&scu->selp, scu->flags & SEL_COLL );
 	    scu->selp=(pid_t)0;
 	    scu->flags &= ~SEL_COLL;
 	}
 #else
 	if (scu->selp.si_pid) {
 	    selwakeup(&scu->selp);
 	    scu->selp.si_pid=(pid_t)0;
 	    scu->selp.si_flags = 0;
 	}
 #endif
     }
 }
 
 /**************************************************************************
  ***
  *** ascopen
  ***	set open flag, set modes according to minor number
  *** 	FOR RELEASE:
  ***	don't switch scanner on, wait until first read or ioctls go before
  ***/
 
 STATIC int
 ascopen(dev_t dev, int flags, int fmt, struct proc *p)
 {
   struct asc_unit *scu;
   int unit;
 
   unit = UNIT(minor(dev)) & UNIT_MASK;
   if ( unit >= NASC )
     {
 #ifdef ASCDEBUG
       /* XXX lprintf isn't valid here since there is no scu. */
       printf("asc%d.open: unconfigured unit number (max %d)\n", unit, NASC);
 #endif
       return ENXIO;
     }
   scu = unittab + unit;
   if ( !( scu->flags & ATTACHED ) )
     {
       lprintf("asc%d.open: unit was not attached successfully 0x%04x\n",
 	     unit, scu->flags);
       return ENXIO;
     }
 
   if ( minor(dev) & DBUG_MASK )
     scu->flags |= FLAG_DEBUG;
   else
     scu->flags &= ~FLAG_DEBUG;
 
   switch(minor(dev) & FRMT_MASK) {
   case FRMT_PBM:
     scu->flags |= PBM_MODE;
     lprintf("asc%d.open: pbm mode\n", unit);
     break;
   case FRMT_RAW:
     lprintf("asc%d.open: raw mode\n", unit);
     scu->flags &= ~PBM_MODE;
     break;
   default:
     lprintf("asc%d.open: gray maps are not yet supported", unit);
     return ENXIO;
   }
   
   lprintf("asc%d.open: minor %d icnt %ld\n", unit, minor(dev), scu->icnt);
 
   if ( scu->flags & OPEN ) {
       lprintf("asc%d.open: already open", unit);
       return EBUSY;
   }
   if (isa_dma_acquire(scu->dma_num))
       return(EBUSY);
 
   scu->flags = ATTACHED | OPEN;      
 
   asc_reset(scu);
   get_resolution(scu);
   return SUCCESS;
 }
 
 static int
 asc_startread(struct asc_unit *scu)
 {
     /*** from here on, things can be delayed to the first read/ioctl ***/
     /*** this was done in sub_12... ***/
   scu->cfg_byte= scu->cmd_byte=0;	/* init scanner */
   outb(ASC_CMD, scu->cmd_byte);
     /*** this was done in sub_16, set scan len... ***/
   outb(ASC_BOH, scu->portf_byte );
   if (geomtab[scu->geometry].g_res==0) {		/* color */
 	scu->cmd_byte = 0x00 ;
   } else {
   scu->cmd_byte = 0x90 ;
   }
   outb(ASC_CMD, scu->cmd_byte);
   outb(ASC_LEN_L, scu->linesize & 0xff /* len_low */);
   outb(ASC_LEN_H, (scu->linesize >>8) & 0xff /* len_high */);
     /*** this was done in sub_21, config DMA ... ***/
   scu->cfg_byte |= scu->dma_byte;
   outb(ASC_CFG, scu->cfg_byte);
     /*** sub_22: enable int on the scanner ***/
   scu->cfg_byte |= scu->int_byte;
   outb(ASC_CFG, scu->cfg_byte);
     /*** sub_28: light on etc...***/
   scu->cmd_byte = ASC_STANDBY;
   outb(ASC_CMD, scu->cmd_byte);
   tsleep((caddr_t)scu, ASCPRI | PCATCH, "ascstrd", hz/10); /* sleep .1 sec */
   return SUCCESS;
 }
 
 /**************************************************************************
  ***
  *** ascclose
  ***	turn off scanner, release the buffer
  ***	should probably terminate dma ops, release int and dma. lr 12mar95
  ***/
 
 STATIC int
 ascclose(dev_t dev, int flags, int fmt, struct proc *p)
 {
   int unit = UNIT(minor(dev));
   struct asc_unit *scu = unittab + unit;
 
   lprintf("asc%d.close: minor %d\n",
 	 unit, minor(dev));
 
   if ( unit >= NASC || !( scu->flags & ATTACHED ) ) {
       lprintf("asc%d.close: unit was not attached successfully 0x%04x\n",
 	     unit, scu->flags);
       return ENXIO;
   }
     /* all this is in sub_29... */
   /* cli(); */
   outb(ASC_CFG, 0 ); /* don't save in CFG byte!!! */
   scu->cmd_byte &= ~ASC_LIGHT_ON;
   outb(ASC_CMD, scu->cmd_byte);/* light off */
   tsleep((caddr_t)scu, ASCPRI | PCATCH, "ascclo", hz/2); /* sleep 1/2 sec */
   scu->cfg_byte &= ~ scu->dma_byte ; /* disable scanner dma */
   scu->cfg_byte &= ~ scu->int_byte ; /* disable scanner int */
   outb(ASC_CFG, scu->cfg_byte);
     /* --- disable dma controller ? --- */
   isa_dma_release(scu->dma_num);
     /* --- disable interrupts on the controller (sub_24) --- */
 
   scu->sbuf.size = INVALID;
   scu->sbuf.rptr  = INVALID;
 
   scu->flags &= ~(FLAG_DEBUG | OPEN | READING);
   
   return SUCCESS;
 }
 
 static void
 pbm_init(struct asc_unit *scu)
 {
     int width = geomtab[scu->geometry].dpl;
     int l= sprintf(scu->sbuf.base,"P4 %d %d\n", width, scu->height);
     char *p;
 
     scu->bcount = scu->height * width / 8 + l;
 
       /* move header to end of sbuf */
     scu->sbuf.rptr=scu->sbuf.size-l;
     bcopy(scu->sbuf.base, scu->sbuf.base+scu->sbuf.rptr,l);
     scu->sbuf.count = l;
     if (geomtab[scu->geometry].g_res!=0) { /* BW scanner */
     for(p = scu->sbuf.base + scu->sbuf.rptr; l; p++, l--)
 	*p = ~*p;
 }
 }
 /**************************************************************************
  ***
  *** ascread
  ***/
 
 STATIC int
 ascread(dev_t dev, struct uio *uio, int ioflag)
 {
   int unit = UNIT(minor(dev));
   struct asc_unit *scu = unittab + unit;
   size_t nbytes;
   int sps, res;
   unsigned char *p;
   
   lprintf("asc%d.read: minor %d icnt %d\n", unit, minor(dev), scu->icnt);
 
   if ( unit >= NASC || !( scu->flags & ATTACHED ) ) {
       lprintf("asc%d.read: unit was not attached successfully 0x%04x\n",
 	     unit, scu->flags);
       return ENXIO;
   }
 
   if ( !(scu->flags & READING) ) { /*** first read... ***/
 	/* allocate a buffer for reading data and init things */
       if ( (res = buffer_allocate(scu)) == SUCCESS ) scu->flags |= READING;
       else return res;
       asc_startread(scu);
       if ( scu->flags & PBM_MODE ) { /* initialize for pbm mode */
 	  pbm_init(scu);
       }
   }
   
   lprintf("asc%d.read(before): "
       "sz 0x%x, rptr 0x%x, wptr 0x%x, cnt 0x%x bcnt 0x%x flags 0x%x icnt %d\n",
 	  unit, scu->sbuf.size, scu->sbuf.rptr,
 	  scu->sbuf.wptr, scu->sbuf.count, scu->bcount,scu->flags,
 	  scu->icnt);
 
   sps=spltty();
   if ( scu->sbuf.count == 0 ) { /* no data avail., must wait */
       if (!(scu->flags & DMA_ACTIVE)) dma_restart(scu);
       scu->flags |= SLEEPING;
       res = tsleep((caddr_t)scu, ASCPRI | PCATCH, "ascread", 0);
       scu->flags &= ~SLEEPING;
       if ( res == 0 ) res = SUCCESS;
   }
   splx(sps); /* lower priority... */
   if (scu->flags & FLAG_DEBUG)
       tsleep((caddr_t)scu, ASCPRI | PCATCH, "ascdly",hz);
   lprintf("asc%d.read(after): "
       "sz 0x%x, rptr 0x%x, wptr 0x%x, cnt 0x%x bcnt 0x%x flags 0x%x icnt %d\n",
 	  unit, scu->sbuf.size, scu->sbuf.rptr,
 	  scu->sbuf.wptr, scu->sbuf.count, scu->bcount,scu->flags,scu->icnt);
 
 	/* first, not more than available... */
   nbytes = min( uio->uio_resid, scu->sbuf.count );
 	/* second, contiguous data... */
   nbytes = min( nbytes, (scu->sbuf.size - scu->sbuf.rptr) );
 	/* third, one line (will remove this later, XXX) */
   nbytes = min( nbytes, scu->linesize );
   if ( (scu->flags & PBM_MODE) )
       nbytes = min( nbytes, scu->bcount );
   lprintf("asc%d.read: transferring 0x%x bytes\n", unit, nbytes);
   if (geomtab[scu->geometry].g_res!=0) { /* BW scanner */
   lprintf("asc%d.read: invert buffer\n",unit);
   for(p = scu->sbuf.base + scu->sbuf.rptr, res=nbytes; res; p++, res--)
 	*p = ~*p;
   }
   res = uiomove(scu->sbuf.base + scu->sbuf.rptr, nbytes, uio);
   if ( res != SUCCESS ) {
       lprintf("asc%d.read: uiomove failed %d", unit, res);
       return res;
   }
   
   sps=spltty();
   scu->sbuf.rptr += nbytes;
   if (scu->sbuf.rptr >= scu->sbuf.size) scu->sbuf.rptr=0;
   scu->sbuf.count -= nbytes;
 	/* having moved some data, can read mode */
   if (!(scu->flags & DMA_ACTIVE)) dma_restart(scu);
   splx(sps); /* lower priority... */
   if ( scu->flags & PBM_MODE ) scu->bcount -= nbytes;
   
   lprintf("asc%d.read: size 0x%x, pointer 0x%x, bcount 0x%x, ok\n",
 	  unit, scu->sbuf.size, scu->sbuf.rptr, scu->bcount);
   
   return SUCCESS;
 }
 
 /**************************************************************************
  ***
  *** ascioctl
  ***/
 
 STATIC int
 ascioctl(dev_t dev, int cmd, caddr_t data, int flags, struct proc *p)
 {
   int unit = UNIT(minor(dev));
   struct asc_unit *scu = unittab + unit;
 
   lprintf("asc%d.ioctl: minor %d\n",
 	 unit, minor(dev));
 
   if ( unit >= NASC || !( scu->flags & ATTACHED ) ) {
       lprintf("asc%d.ioctl: unit was not attached successfully 0x%04x\n",
 	     unit, scu->flags);
       return ENXIO;
   }
   switch(cmd) {
   case ASC_GRES:
     asc_reset(scu);
     get_resolution(scu);
     *(int *)data=geomtab[scu->geometry].dpi;
     lprintf("asc%d.ioctl:ASC_GRES %ddpi\n", unit, *(int *)data);
     return SUCCESS;    
   case ASC_GWIDTH:
     *(int *)data=geomtab[scu->geometry].dpl;
     lprintf("asc%d.ioctl:ASC_GWIDTH %d\n", unit, *(int *)data);
     return SUCCESS;    
   case ASC_GHEIGHT:
     *(int *)data=scu->height;
     lprintf("asc%d.ioctl:ASC_GHEIGHT %d\n", unit, *(int *)data);
     return SUCCESS;
   case ASC_SHEIGHT:
     lprintf("asc%d.ioctl:ASC_SHEIGHT %d\n", unit, *(int *)data);
     if ( scu->flags & READING ) { 
 	lprintf("asc%d:ioctl on already reading unit\n", unit);
 	return EBUSY;
     }
     scu->height=*(int *)data;
     return SUCCESS;
 #if 0  
   case ASC_GBLEN:
     *(int *)data=scu->blen;
     lprintf("asc%d.ioctl:ASC_GBLEN %d\n", unit, *(int *)data);
     return SUCCESS;
   case ASC_SBLEN:
     lprintf("asc%d.ioctl:ASC_SBLEN %d\n", unit, *(int *)data);
     if (*(int *)data * geomtab[scu->geometry].dpl / 8 > MAX_BUFSIZE)
       {
 	lprintf("asc%d:ioctl buffer size too high\n", unit);
 	return ENOMEM;
       }
     scu->blen=*(int *)data;
     return SUCCESS;
   case ASC_GBTIME:
     *(int *)data = scu->btime / hz;
     lprintf("asc%d.ioctl:ASC_GBTIME %d\n", unit, *(int *)data);
     return SUCCESS;
   case ASC_SBTIME:
     scu->btime = *(int *)data * hz;
     lprintf("asc%d.ioctl:ASC_SBTIME %d\n", unit, *(int *)data);
     return SUCCESS;
 #endif
   default: return ENOTTY;
   }
   return SUCCESS;
 }
 
 STATIC int
 ascpoll(dev_t dev, int events, struct proc *p)
 {
     int unit = UNIT(minor(dev));
     struct asc_unit *scu = unittab + unit;
     int sps;
     struct proc *p1;
     int revents = 0;
 
     sps=spltty();
 
     if (events & (POLLIN | POLLRDNORM))
 	if (scu->sbuf.count >0)
 	    revents |= events & (POLLIN | POLLRDNORM);
 	else {
 	    if (!(scu->flags & DMA_ACTIVE))
 		dma_restart(scu);
 	    
 	    if (scu->selp.si_pid && (p1=pfind(scu->selp.si_pid))
 		    && p1->p_wchan == (caddr_t)&selwait)
 		scu->selp.si_flags = SI_COLL;
 	    else
 		scu->selp.si_pid = p->p_pid;
 	}
 
     splx(sps);
     return 0;
 }
 
 
 static asc_devsw_installed = 0;
 
 static void 
 asc_drvinit(void *unused)
 {
 	dev_t dev;
 
 	if( ! asc_devsw_installed ) {
 		dev = makedev(CDEV_MAJOR,0);
 		cdevsw_add(&dev,&asc_cdevsw,NULL);
 		asc_devsw_installed = 1;
     	}
 }
 
 SYSINIT(ascdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,asc_drvinit,NULL)
 
 
 #endif /* NASC > 0 */
diff --git a/sys/i386/isa/gsc.c b/sys/i386/isa/gsc.c
index c9aa02dadbe8..966249f7a97c 100644
--- a/sys/i386/isa/gsc.c
+++ b/sys/i386/isa/gsc.c
@@ -1,848 +1,849 @@
 /* gsc.c - device driver for handy scanners
  *
  * Current version supports:
  *
  * 	- Genius GS-4500
  *
  * Copyright (c) 1995 Gunther Schadow.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Gunther Schadow.
  * 4. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include "gsc.h"
 #if NGSC > 0
 
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/buf.h>
 #include <sys/malloc.h>
 #include <sys/kernel.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
+#include <sys/uio.h>
 
 #include <machine/gsc.h>
 
 #include <i386/isa/isa.h>
 #include <i386/isa/isa_device.h>
 #include <i386/isa/gscreg.h>
 
 /***********************************************************************
  *
  * CONSTANTS & DEFINES
  *
  ***********************************************************************/
 
 #define PROBE_FAIL    0
 #define PROBE_SUCCESS IO_GSCSIZE
 #define ATTACH_FAIL   0
 #define ATTACH_SUCCESS 1
 #define SUCCESS       0
 #define FAIL         -1
 #define INVALID       FAIL
 
 #define DMA1_READY  0x08
 
 #ifdef GSCDEBUG
 #define lprintf if(scu->flags & FLAG_DEBUG) printf
 #else
 #define lprintf (void)
 #endif
 
 #define MIN(a, b)	(((a) < (b)) ? (a) : (b))
 
 #define TIMEOUT (hz*15)  /* timeout while reading a buffer - default value */
 #define LONG    (hz/60)  /* timesteps while reading a buffer */
 #define GSCPRI  PRIBIO   /* priority while reading a buffer */
 
 /***********************************************************************
  *
  * LAYOUT OF THE MINOR NUMBER
  *
  ***********************************************************************/
 
 #define UNIT_MASK 0xc0    /* unit gsc0 .. gsc3 */
 #define UNIT(x)   (x >> 6)
 #define DBUG_MASK 0x20
 #define FRMT_MASK 0x18    /* output format */
 #define FRMT_RAW  0x00    /* output bits as read from scanner */
 #define FRMT_GRAY 0x10    /* output graymap (not implemented yet) */
 #define FRMT_PBM  0x08    /* output pbm format */
 #define FRMT_PGM  0x18
 
 /***********************************************************************
  *
  * THE GEMOMETRY TABLE
  *
  ***********************************************************************/
 
 #define GEOMTAB_SIZE 7
 
 static const struct gsc_geom {
   int dpi;     /* dots per inch */
   int dpl;     /* dots per line */
   int g_res;   /* get resolution value (status flag) */
   int s_res;   /* set resolution value (control register) */
 } geomtab[GEOMTAB_SIZE] = {
   { 100,  424, GSC_RES_100, GSC_CNT_424},
   { 200,  840, GSC_RES_200, GSC_CNT_840},
   { 300, 1264, GSC_RES_300, GSC_CNT_1264},
   { 400, 1648, GSC_RES_400, GSC_CNT_1648},
   {  -1, 1696,          -1, GSC_CNT_1696},
   {  -2, 2644,          -2, GSC_CNT_2544},
   {  -3, 3648,          -3, GSC_CNT_3648},
 };
 
 #define NEW_GEOM { INVALID, INVALID, INVALID, INVALID }
 
 /***********************************************************************
  *
  * THE TABLE OF UNITS
  *
  ***********************************************************************/
 
 struct _sbuf {
   size_t  size;
   size_t  poi;
   char   *base;
 };
 
 struct gsc_unit {
   int channel;            /* DMA channel */
   int data;               /* - video port */
   int stat;               /* - status port */
   int ctrl;               /* - control port */
   int clrp;               /* - clear port */
   int flags;
 #define ATTACHED 0x01
 #define OPEN     0x02
 #define READING  0x04
 #define EOF      0x08
 #define FLAG_DEBUG  0x10
 #define PBM_MODE 0x20
   int     geometry;       /* resolution as geomtab index */
   int     blen;           /* length of buffer in lines */
   int     btime;          /* timeout of buffer in seconds/hz */
   struct  _sbuf sbuf;
   char    ctrl_byte;      /* the byte actually written to ctrl port */
   int     height;         /* height, for pnm modes */
   size_t  bcount;         /* bytes to read, for pnm modes */
   struct  _sbuf hbuf;     /* buffer for pnm header data */
 #ifdef DEVFS
   void *devfs_gsc;	  /* storage for devfs tokens (handles) */
   void *devfs_gscp;
   void *devfs_gscd;
   void *devfs_gscpd;
 #endif
 };
 
 static struct gsc_unit unittab[NGSC];
 
 /* I could not find a reasonable buffer size limit other than by
  * experiments. MAXPHYS is obviously too much, while DEV_BSIZE and
  * PAGE_SIZE are really too small. There must be something wrong
  * with isa_dmastart/isa_dmarangecheck HELP!!!
  */
 #define MAX_BUFSIZE 0x3000
 #define DEFAULT_BLEN 59
 
 /***********************************************************************
  *
  * THE PER-DRIVER RECORD FOR ISA.C
  *
  ***********************************************************************/
 
 static	int gscprobe (struct isa_device *isdp);
 static	int gscattach(struct isa_device *isdp);
 
 struct isa_driver gscdriver = { gscprobe, gscattach, "gsc" };
 
 static	d_open_t	gscopen;
 static	d_close_t	gscclose;
 static	d_read_t	gscread;
 static	d_ioctl_t	gscioctl;
 
 #define CDEV_MAJOR 47
 static struct cdevsw gsc_cdevsw = 
 	{ gscopen,      gscclose,       gscread,        nowrite,	/*47*/
 	  gscioctl,     nostop,         nullreset,      nodevtotty,/* gsc */
 	  seltrue,      nommap,         NULL,	"gsc",	NULL,	-1 };
 
 
 /***********************************************************************
  *
  * LOCALLY USED SUBROUTINES
  *
  ***********************************************************************/
 
 /***********************************************************************
  *
  * lookup_geometry -- lookup a record in the geometry table by pattern
  *
  * The caller supplies a geometry record pattern, where INVALID
  * matches anything. Returns the index in the table or INVALID if
  * lookup fails.
  */
 
 static int
 lookup_geometry(struct gsc_geom geom, const struct gsc_unit *scu)
 {
   struct gsc_geom tab;
   int i;
 
   for(i=0; i<GEOMTAB_SIZE; i++)
     {
       tab = geomtab[i];
 
       if ( ( ( geom.dpi   != INVALID ) && ( tab.dpi   == geom.dpi   ) ) ||
 	   ( ( geom.dpl   != INVALID ) && ( tab.dpl   == geom.dpl   ) ) ||
 	   ( ( geom.g_res != INVALID ) && ( tab.g_res == geom.g_res ) ) ||
 	   ( ( geom.s_res != INVALID ) && ( tab.s_res == geom.s_res ) ) )
 	{
 	  lprintf("gsc.lookup_geometry: "
 		 "geometry lookup found: %ddpi, %ddpl\n",
 		 tab.dpi, tab.dpl);
 	  return i;
 	}
     }
 
   lprintf("gsc.lookup_geometry: "
 	 "geometry lookup failed on {%d, %d, 0x%02x, 0x%02x}\n",
 	 geom.dpi, geom.dpl, geom.g_res, geom.s_res);
 
   return INVALID;
 }
 
 /***********************************************************************
  *
  * get_geometry -- read geometry from status port
  *
  * Returns the index into geometry table or INVALID if it fails to
  * either read the status byte or lookup the record.
  */
 
 static int
 get_geometry(const struct gsc_unit *scu)
 {
   struct gsc_geom geom = NEW_GEOM;
 
   lprintf("gsc.get_geometry: get geometry at 0x%03x\n", scu->stat);
 
   if ( ( geom.g_res = inb(scu->stat) ) == FAIL )
     return INVALID;
 
   geom.g_res &= GSC_RES_MASK;
 
   return lookup_geometry(geom, scu);
 }
 
 /***********************************************************************
  *
  * buffer_allocate -- allocate/reallocate a buffer
  * Now just checks that the preallocated buffer is large enough.
  */
 
 static int
 buffer_allocate(struct gsc_unit *scu)
 {
   size_t size;
 
   size = scu->blen * geomtab[scu->geometry].dpl / 8;
 
   lprintf("gsc.buffer_allocate: need 0x%x bytes\n", size);
 
   if ( size > MAX_BUFSIZE )
     {
       lprintf("gsc.buffer_allocate: 0x%x bytes are too much\n", size);
       return ENOMEM;
     }
 
   scu->sbuf.size = size;
   scu->sbuf.poi  = size;
 
   lprintf("gsc.buffer_allocate: ok\n");
 
   return SUCCESS;
 }
 
 /***********************************************************************
  *
  * buffer_read -- scan a buffer
  */
 
 static int
 buffer_read(struct gsc_unit *scu)
 {
   int stb;
   int res = SUCCESS;
   int chan_bit;
   char *p;
   int sps;
   int delay;
 
   lprintf("gsc.buffer_read: begin\n");
 
   if (scu->ctrl_byte == INVALID)
     {
       lprintf("gsc.buffer_read: invalid ctrl_byte\n");
       return EIO;
     }
 
   sps=splbio();
 
   outb( scu->ctrl, scu->ctrl_byte | GSC_POWER_ON );
   outb( scu->clrp, 0 );
   stb = inb( scu->stat );
 
   isa_dmastart(B_READ, scu->sbuf.base, scu->sbuf.size, scu->channel);
 
   chan_bit = 0x01 << scu->channel;
 
   for(delay=0; !(inb(DMA1_READY) & 0x01 << scu->channel); delay += LONG)
     {
       if(delay >= scu->btime)
 	{
 	  splx(sps);
 	  lprintf("gsc.buffer_read: timeout\n");
 	  res = EWOULDBLOCK;
 	  break;
 	}
       res = tsleep((caddr_t)scu, GSCPRI | PCATCH, "gscread", LONG);
       if ( ( res == 0 ) || ( res == EWOULDBLOCK ) )
 	res = SUCCESS;
       else
 	break;
     }
   splx(sps);
   isa_dmadone(B_READ, scu->sbuf.base, scu->sbuf.size, scu->channel);
   outb( scu->clrp, 0 );
 
   if(res != SUCCESS)
     {
       lprintf("gsc.buffer_read: aborted with %d\n", res);
       return res;
     }
 
   lprintf("gsc.buffer_read: invert buffer\n");
   for(p = scu->sbuf.base + scu->sbuf.size - 1; p >= scu->sbuf.base; p--)
     *p = ~*p;
 
   scu->sbuf.poi = 0;
   lprintf("gsc.buffer_read: ok\n");
   return SUCCESS;
 }
 
 /***********************************************************************
  *
  * the main functions
  *
  ***********************************************************************/
 
 /***********************************************************************
  *
  * gscprobe
  *
  * read status port and check for proper configuration:
  *  - if address group matches (status byte has reasonable value)
  *  - if DMA channel matches   (status byte has correct value)
  */
 
 static int
 gscprobe (struct isa_device *isdp)
 {
   int unit = isdp->id_unit;
   struct gsc_unit *scu = unittab + unit;
   int stb;
   struct gsc_geom geom = NEW_GEOM;
 
   scu->flags = FLAG_DEBUG;
 
   lprintf("gsc%d.probe "
 	 "on iobase 0x%03x, irq %d, drq %d, addr %d, size %d\n",
 	 unit,
 	 isdp->id_iobase,
 	 isdp->id_irq,
 	 isdp->id_drq,
 	 isdp->id_maddr,
 	 isdp->id_msize);
 
   if ( isdp->id_iobase < 0 )
     {
       lprintf("gsc%d.probe: no iobase given\n", unit);
       return PROBE_FAIL;
     }
 
   stb = inb( GSC_STAT(isdp->id_iobase) );
   if (stb == FAIL)
     {
       lprintf("gsc%d.probe: get status byte failed\n", unit);
       return PROBE_FAIL;
     }
 
   scu->data = GSC_DATA(isdp->id_iobase);
   scu->stat = GSC_STAT(isdp->id_iobase);
   scu->ctrl = GSC_CTRL(isdp->id_iobase);
   scu->clrp = GSC_CLRP(isdp->id_iobase);
 
   outb(scu->clrp,stb);
   stb = inb(scu->stat);
 
   switch(stb & GSC_CNF_MASK) {
   case GSC_CNF_DMA1:
     lprintf("gsc%d.probe: DMA 1\n", unit);
     scu->channel = 1;
     break;
 
   case GSC_CNF_DMA3:
     lprintf("gsc%d.probe: DMA 3\n", unit);
     scu->channel = 3;
     break;
 
   case GSC_CNF_IRQ3:
     lprintf("gsc%d.probe: IRQ 3\n", unit);
     goto probe_noirq;
   case GSC_CNF_IRQ5:
     lprintf("gsc%d.probe: IRQ 5\n", unit);
   probe_noirq:
     lprintf("gsc%d.probe: sorry, can't use IRQ yet\n", unit);
     return PROBE_FAIL;
   default:
     lprintf("gsc%d.probe: invalid status byte\n", unit, stb);
     return PROBE_FAIL;
   }
 
   if (isdp->id_drq < 0)
     isdp->id_drq = scu->channel;
   if (scu->channel != isdp->id_drq)
     {
       lprintf("gsc%d.probe: drq mismatch: config: %d; hardware: %d\n",
 	      unit, isdp->id_drq, scu->channel);
       return PROBE_FAIL;
     }
 
   geom.g_res = stb & GSC_RES_MASK;
   scu->geometry = lookup_geometry(geom, scu);
   if (scu->geometry == INVALID)
     {
       lprintf("gsc%d.probe: geometry lookup failed\n", unit);
       return PROBE_FAIL;
     }
   else
     {
       scu->ctrl_byte = geomtab[scu->geometry].s_res;
       outb(scu->ctrl, scu->ctrl_byte | GSC_POWER_ON);
 
       lprintf("gsc%d.probe: status 0x%02x, %ddpi\n",
 	     unit, stb, geomtab[scu->geometry].dpi);
 
       outb(scu->ctrl, scu->ctrl_byte & ~GSC_POWER_ON);
     }
 
   lprintf("gsc%d.probe: ok\n", unit);
 
   scu->flags &= ~FLAG_DEBUG;
 
   return PROBE_SUCCESS;
 }
 
 /***********************************************************************
  *
  * gscattach
  *
  * finish initialization of unit structure
  * get geometry value
  */
 
 static int
 gscattach(struct isa_device *isdp)
 {
   int unit = isdp->id_unit;
   struct gsc_unit *scu = unittab + unit;
 
   scu->flags |= FLAG_DEBUG;
 
   lprintf("gsc%d.attach: "
 	 "iobase 0x%03x, irq %d, drq %d, addr %d, size %d\n",
 	 unit,
 	 isdp->id_iobase,
 	 isdp->id_irq,
 	 isdp->id_drq,
 	 isdp->id_maddr,
 	 isdp->id_msize);
 
   printf("gsc%d: GeniScan GS-4500 at %ddpi\n",
 	 unit, geomtab[scu->geometry].dpi);
 
   /*
    * Initialize buffer structure.
    * XXX this must be done early to give a good chance of getting a
    * contiguous buffer.  This wastes memory.
    */
   scu->sbuf.base = contigmalloc((unsigned long)MAX_BUFSIZE, M_DEVBUF, M_NOWAIT,
 				0ul, 0xfffffful, 1ul, 0x10000ul);
   if ( scu->sbuf.base == NULL )
     {
       lprintf("gsc%d.attach: buffer allocation failed\n", unit);
       return ATTACH_FAIL;	/* XXX attach must not fail */
     }
   scu->sbuf.size = INVALID;
   scu->sbuf.poi  = INVALID;
 
   scu->blen = DEFAULT_BLEN;
   scu->btime = TIMEOUT;
 
   scu->flags |= ATTACHED;
   lprintf("gsc%d.attach: ok\n", unit);
   scu->flags &= ~FLAG_DEBUG;
 #ifdef DEVFS
 #define GSC_UID 0
 #define GSC_GID 13
     scu->devfs_gsc = 
 		devfs_add_devswf(&gsc_cdevsw, unit<<6, DV_CHR, GSC_UID, GSC_GID,
 				 0666, "gsc%d", unit);
     scu->devfs_gscp = 
 		devfs_add_devswf(&gsc_cdevsw, ((unit<<6) + FRMT_PBM), DV_CHR, 
 				 GSC_UID,  GSC_GID, 0666, "gsc%dp", unit);
     scu->devfs_gscd = 
 		devfs_add_devswf(&gsc_cdevsw, ((unit<<6) + DBUG_MASK), DV_CHR, 
 				 GSC_UID,  GSC_GID, 0666, "gsc%dd", unit);
     scu->devfs_gscpd = 
 		devfs_add_devswf(&gsc_cdevsw, ((unit<<6) + DBUG_MASK+FRMT_PBM),
 				 DV_CHR, GSC_UID,  GSC_GID, 0666, "gsc%dpd", 
 				 unit);
 #endif /*DEVFS*/
 
   return ATTACH_SUCCESS;
 }
 
 /***********************************************************************
  *
  * gscopen
  *
  * set open flag
  * set modes according to minor number
  * don't switch scanner on, wait until first read ioctls go before
  */
 
 static	int
 gscopen  (dev_t dev, int flags, int fmt, struct proc *p)
 {
   struct gsc_unit *scu;
   int unit;
 
   unit = UNIT(minor(dev)) & UNIT_MASK;
   if ( unit >= NGSC )
     {
 #ifdef GSCDEBUG
       /* XXX lprintf isn't valid here since there is no scu. */
       printf("gsc%d.open: unconfigured unit number (max %d)\n", unit, NGSC);
 #endif
       return ENXIO;
     }
   scu = unittab + unit;
   if ( !( scu->flags & ATTACHED ) )
     {
       lprintf("gsc%d.open: unit was not attached successfully 0x04x\n",
 	     unit, scu->flags);
       return ENXIO;
     }
 
   if ( minor(dev) & DBUG_MASK )
     scu->flags |= FLAG_DEBUG;
   else
     scu->flags &= ~FLAG_DEBUG;
 
   switch(minor(dev) & FRMT_MASK) {
   case FRMT_PBM:
     scu->flags |= PBM_MODE;
     lprintf("gsc%d.open: pbm mode\n", unit);
     break;
   case FRMT_RAW:
     lprintf("gsc%d.open: raw mode\n", unit);
     scu->flags &= ~PBM_MODE;
     break;
   default:
     lprintf("gsc%d.open: gray maps are not yet supported", unit);
     return ENXIO;
   }
 
   lprintf("gsc%d.open: minor %d\n",
 	 unit, minor(dev));
 
   if ( scu->flags & OPEN )
     {
       lprintf("gsc%d.open: already open", unit);
       return EBUSY;
     }
 
   if (isa_dma_acquire(scu->channel))
       return(EBUSY);
 
   scu->flags |= OPEN;
 
   return SUCCESS;
 }
 
 /***********************************************************************
  *
  * gscclose
  *
  * turn off scanner
  * release the buffer
  */
 
 static	int
 gscclose (dev_t dev, int flags, int fmt, struct proc *p)
 {
   int unit = UNIT(minor(dev));
   struct gsc_unit *scu = unittab + unit;
 
   lprintf("gsc%d.close: minor %d\n",
 	 unit, minor(dev));
 
   if ( unit >= NGSC || !( scu->flags & ATTACHED ) )
     {
       lprintf("gsc%d.read: unit was not attached successfully 0x04x\n",
 	     unit, scu->flags);
       return ENXIO;
     }
 
   outb(scu->ctrl, scu->ctrl_byte & ~GSC_POWER_ON);
 
   scu->sbuf.size = INVALID;
   scu->sbuf.poi  = INVALID;
 
   isa_dma_release(scu->channel);
 
   scu->flags &= ~(FLAG_DEBUG | OPEN | READING);
 
   return SUCCESS;
 }
 
 /***********************************************************************
  *
  * gscread
  */
 
 static	int
 gscread  (dev_t dev, struct uio *uio, int ioflag)
 {
   int unit = UNIT(minor(dev));
   struct gsc_unit *scu = unittab + unit;
   size_t nbytes;
   int res;
 
   lprintf("gsc%d.read: minor %d\n", unit, minor(dev));
 
   if ( unit >= NGSC || !( scu->flags & ATTACHED ) )
     {
       lprintf("gsc%d.read: unit was not attached successfully 0x04x\n",
 	     unit, scu->flags);
       return ENXIO;
     }
 
   if ( !(scu->flags & READING) )
     {
       res = buffer_allocate(scu);
       if ( res == SUCCESS )
 	scu->flags |= READING;
       else
 	return res;
 
       scu->ctrl_byte = geomtab[scu->geometry].s_res;
 
       /* initialize for pbm mode */
       if ( scu->flags & PBM_MODE )
 	{
 	  char *p;
 	  int width = geomtab[scu->geometry].dpl;
 
 	  sprintf(scu->sbuf.base,"P4 %d %d\n", width, scu->height);
 	  scu->bcount = scu->height * width / 8;
 
 	  lprintf("gsc%d.read: initializing pbm mode: `%s', bcount: 0x%x\n",
 		  unit, scu->sbuf.base, scu->bcount);
 
 	  /* move header to end of sbuf */
 	  for(p=scu->sbuf.base; *p; p++);
 	  while(--p >= scu->sbuf.base)
 	    {
 	      *(char *)(scu->sbuf.base + --scu->sbuf.poi) = *p;
 	      scu->bcount++;
 	    }
 	}
     }
 
   lprintf("gsc%d.read(before buffer_read): "
 	  "size 0x%x, pointer 0x%x, bcount 0x%x, ok\n",
 	  unit, scu->sbuf.size, scu->sbuf.poi, scu->bcount);
 
   if ( scu->sbuf.poi == scu->sbuf.size )
     if ( (res = buffer_read(scu)) != SUCCESS )
       return res;
 
   lprintf("gsc%d.read(after buffer_read): "
 	  "size 0x%x, pointer 0x%x, bcount 0x%x, ok\n",
 	  unit, scu->sbuf.size, scu->sbuf.poi, scu->bcount);
 
   nbytes = MIN( uio->uio_resid, scu->sbuf.size - scu->sbuf.poi );
 
   if ( (scu->flags & PBM_MODE) )
     nbytes = MIN( nbytes, scu->bcount );
 
   lprintf("gsc%d.read: transferring 0x%x bytes", nbytes);
 
   res = uiomove(scu->sbuf.base + scu->sbuf.poi, nbytes, uio);
   if ( res != SUCCESS )
     {
       lprintf("gsc%d.read: uiomove failed %d", unit, res);
       return res;
     }
 
   scu->sbuf.poi += nbytes;
   if ( scu->flags & PBM_MODE ) scu->bcount -= nbytes;
 
   lprintf("gsc%d.read: size 0x%x, pointer 0x%x, bcount 0x%x, ok\n",
 	  unit, scu->sbuf.size, scu->sbuf.poi, scu->bcount);
 
   return SUCCESS;
 }
 
 /***********************************************************************
  *
  * gscioctl
  *
  */
 
 static	int
 gscioctl (dev_t dev, int cmd, caddr_t data, int flag, struct proc *p)
 {
   int unit = UNIT(minor(dev));
   struct gsc_unit *scu = unittab + unit;
 
   lprintf("gsc%d.ioctl: minor %d\n",
 	 unit, minor(dev));
 
   if ( unit >= NGSC || !( scu->flags & ATTACHED ) )
     {
       lprintf("gsc%d.ioctl: unit was not attached successfully 0x04x\n",
 	     unit, scu->flags);
       return ENXIO;
     }
 
   switch(cmd) {
   case GSC_SRESSW:
     lprintf("gsc%d.ioctl:GSC_SRESSW\n", unit);
     if ( scu->flags & READING )
       {
 	lprintf("gsc%d:ioctl on already reading unit\n", unit);
 	return EBUSY;
       }
     scu->geometry = get_geometry(scu);
     return SUCCESS;
   case GSC_GRES:
     *(int *)data=geomtab[scu->geometry].dpi;
     lprintf("gsc%d.ioctl:GSC_GRES %ddpi\n", unit, *(int *)data);
     return SUCCESS;
   case GSC_GWIDTH:
     *(int *)data=geomtab[scu->geometry].dpl;
     lprintf("gsc%d.ioctl:GSC_GWIDTH %d\n", unit, *(int *)data);
     return SUCCESS;
   case GSC_SRES:
   case GSC_SWIDTH:
     lprintf("gsc%d.ioctl:GSC_SRES or GSC_SWIDTH %d\n",
 	   unit, *(int *)data);
     { int g;
       struct gsc_geom geom = NEW_GEOM;
       if ( cmd == GSC_SRES )
 	geom.dpi = *(int *)data;
       else
 	geom.dpl = *(int *)data;
       if ( ( g = lookup_geometry(geom, scu) ) == INVALID )
 	return EINVAL;
       scu->geometry = g;
       return SUCCESS;
     }
   case GSC_GHEIGHT:
     *(int *)data=scu->height;
     lprintf("gsc%d.ioctl:GSC_GHEIGHT %d\n", unit, *(int *)data);
     return SUCCESS;
   case GSC_SHEIGHT:
     lprintf("gsc%d.ioctl:GSC_SHEIGHT %d\n", unit, *(int *)data);
     if ( scu->flags & READING )
       {
 	lprintf("gsc%d:ioctl on already reading unit\n", unit);
 	return EBUSY;
       }
     scu->height=*(int *)data;
     return SUCCESS;
   case GSC_GBLEN:
     *(int *)data=scu->blen;
     lprintf("gsc%d.ioctl:GSC_GBLEN %d\n", unit, *(int *)data);
     return SUCCESS;
   case GSC_SBLEN:
     lprintf("gsc%d.ioctl:GSC_SBLEN %d\n", unit, *(int *)data);
     if (*(int *)data * geomtab[scu->geometry].dpl / 8 > MAX_BUFSIZE)
       {
 	lprintf("gsc%d:ioctl buffer size too high\n", unit);
 	return ENOMEM;
       }
     scu->blen=*(int *)data;
     return SUCCESS;
   case GSC_GBTIME:
     *(int *)data = scu->btime / hz;
     lprintf("gsc%d.ioctl:GSC_GBTIME %d\n", unit, *(int *)data);
     return SUCCESS;
   case GSC_SBTIME:
     scu->btime = *(int *)data * hz;
     lprintf("gsc%d.ioctl:GSC_SBTIME %d\n", unit, *(int *)data);
     return SUCCESS;
   default: return ENOTTY;
   }
 }
 
 
 static gsc_devsw_installed = 0;
 
 static void
 gsc_drvinit(void *unused)
 {
 	dev_t dev;
 
 	if( ! gsc_devsw_installed ) {
 		dev = makedev(CDEV_MAJOR, 0);
 		cdevsw_add(&dev,&gsc_cdevsw, NULL);
 		gsc_devsw_installed = 1;
     	}
 }
 
 SYSINIT(gscdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,gsc_drvinit,NULL)
 
 
 #endif /* NGSC > 0 */
diff --git a/sys/i386/isa/joy.c b/sys/i386/isa/joy.c
index 84682a929247..072be4edb5c5 100644
--- a/sys/i386/isa/joy.c
+++ b/sys/i386/isa/joy.c
@@ -1,299 +1,300 @@
 /*-
  * Copyright (c) 1995 Jean-Marc Zucconi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software withough specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 #include "joy.h"
 
 #if NJOY > 0
 
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
+#include <sys/uio.h>
 
 #include <machine/clock.h>
 #include <machine/joystick.h>
 
 #include <i386/isa/isa.h>
 #include <i386/isa/isa_device.h>
 #include <i386/isa/timerreg.h>
 
 /* The game port can manage 4 buttons and 4 variable resistors (usually 2
  * joysticks, each with 2 buttons and 2 pots.) via the port at address 0x201.
  * Getting the state of the buttons is done by reading the game port:
  * buttons 1-4 correspond to bits 4-7 and resistors 1-4 (X1, Y1, X2, Y2)
  * to bits 0-3.
  * if button 1 (resp 2, 3, 4) is pressed, the bit 4 (resp 5, 6, 7) is set to 0
  * to get the value of a resistor, write the value 0xff at port and
  * wait until the corresponding bit returns to 0.
  */
 
 
 /* the formulae below only work if u is  ``not too large''. See also
  * the discussion in microtime.s */
 #define usec2ticks(u) 	(((u) * 19549)>>14)
 #define ticks2usec(u) 	(((u) * 3433)>>12)
 
 
 #define joypart(d) minor(d)&1
 #define UNIT(d) minor(d)>>1&3
 #ifndef JOY_TIMEOUT
 #define JOY_TIMEOUT   2000 /* 2 milliseconds */
 #endif
 
 static struct {
     int port;
     int x_off[2], y_off[2];
     int timeout[2];
 #ifdef	DEVFS
     void	*devfs_token;
 #endif
 } joy[NJOY];
 
 
 static int joyprobe (struct isa_device *);
 static int joyattach (struct isa_device *);
 
 struct isa_driver joydriver = {joyprobe, joyattach, "joy"};
 
 #define CDEV_MAJOR 51
 static	d_open_t	joyopen;
 static	d_close_t	joyclose;
 static	d_read_t	joyread;
 static	d_ioctl_t	joyioctl;
 
 static struct cdevsw joy_cdevsw = 
 	{ joyopen,	joyclose,	joyread,	nowrite,	/*51*/
 	  joyioctl,	nostop,		nullreset,	nodevtotty,/*joystick */
 	  seltrue,	nommap,		NULL,	"joy",	NULL,	-1 };
 
 static int get_tick __P((void));
 
 
 static int
 joyprobe (struct isa_device *dev)
 {
 #ifdef WANT_JOYSTICK_CONNECTED
     outb (dev->id_iobase, 0xff);
     DELAY (10000); /*  10 ms delay */
     return (inb (dev->id_iobase) & 0x0f) != 0x0f;
 #else
     return 1;
 #endif
 }
 
 static int
 joyattach (struct isa_device *dev)
 {
     int	unit = dev->id_unit;
 
     joy[unit].port = dev->id_iobase;
     joy[unit].timeout[0] = joy[unit].timeout[1] = 0;
     printf("joy%d: joystick\n", unit);
 #ifdef	DEVFS
     joy[dev->id_unit].devfs_token = 
 		devfs_add_devswf(&joy_cdevsw, 0, DV_CHR, 0, 0, 
 				 0600, "joy%d", unit);
 #endif
     return 1;
 }
 
 static	int
 joyopen (dev_t dev, int flags, int fmt, struct proc *p)
 {
     int unit = UNIT (dev);
     int i = joypart (dev);
 
     if (joy[unit].timeout[i])
 	return EBUSY;
     joy[unit].x_off[i] = joy[unit].y_off[i] = 0;
     joy[unit].timeout[i] = JOY_TIMEOUT;
     return 0;
 }
 static	int
 joyclose (dev_t dev, int flags, int fmt, struct proc *p)
 {
     int unit = UNIT (dev);
     int i = joypart (dev);
 
     joy[unit].timeout[i] = 0;
     return 0;
 }
 
 static	int
 joyread (dev_t dev, struct uio *uio, int flag)
 {
     int unit = UNIT(dev);
     int port = joy[unit].port;
     int i, t0, t1;
     int state = 0, x = 0, y = 0;
     struct joystick c;
 
     disable_intr ();
     outb (port, 0xff);
     t0 = get_tick ();
     t1 = t0;
     i = usec2ticks(joy[unit].timeout[joypart(dev)]);
     while (t0-t1 < i) {
 	state = inb (port);
 	if (joypart(dev) == 1)
 	    state >>= 2;
 	t1 = get_tick ();
 	if (t1 > t0)
 	    t1 -= timer0_max_count;
 	if (!x && !(state & 0x01))
 	    x = t1;
 	if (!y && !(state & 0x02))
 	    y =  t1;
 	if (x && y)
 	    break;
     }
     enable_intr ();
     c.x = x ? joy[unit].x_off[joypart(dev)] + ticks2usec(t0-x) : 0x80000000;
     c.y = y ? joy[unit].y_off[joypart(dev)] + ticks2usec(t0-y) : 0x80000000;
     state >>= 4;
     c.b1 = ~state & 1;
     c.b2 = ~(state >> 1) & 1;
     return uiomove ((caddr_t)&c, sizeof(struct joystick), uio);
 }
 
 static	int
 joyioctl (dev_t dev, int cmd, caddr_t data, int flag, struct proc *p)
 {
     int unit = UNIT (dev);
     int i = joypart (dev);
     int x;
 
     switch (cmd) {
     case JOY_SETTIMEOUT:
 	x = *(int *) data;
 	if (x < 1 || x > 10000) /* 10ms maximum! */
 	    return EINVAL;
 	joy[unit].timeout[i] = x;
 	break;
     case JOY_GETTIMEOUT:
 	*(int *) data = joy[unit].timeout[i];
 	break;
     case JOY_SET_X_OFFSET:
 	joy[unit].x_off[i] = *(int *) data;
 	break;
     case JOY_SET_Y_OFFSET:
 	joy[unit].y_off[i] = *(int *) data;
 	break;
     case JOY_GET_X_OFFSET:
 	*(int *) data = joy[unit].x_off[i];
 	break;
     case JOY_GET_Y_OFFSET:
 	*(int *) data = joy[unit].y_off[i];
 	break;
     default:
 	return ENXIO;
     }
     return 0;
 }
 
 static int
 get_tick ()
 {
     int low, high;
 
     outb (TIMER_MODE, TIMER_SEL0);
     low = inb (TIMER_CNTR0);
     high = inb (TIMER_CNTR0);
 
     return (high << 8) | low;
 }
 
 
 static joy_devsw_installed = 0;
 
 static void 	joy_drvinit(void *unused)
 {
 	dev_t dev;
 
 	if( ! joy_devsw_installed ) {
 		dev = makedev(CDEV_MAJOR,0);
 		cdevsw_add(&dev,&joy_cdevsw,NULL);
 		joy_devsw_installed = 1;
     	}
 }
 
 SYSINIT(joydev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,joy_drvinit,NULL)
 
 #ifdef JOY_MODULE
 
 #include <sys/exec.h>
 #include <sys/sysent.h>
 #include <sys/lkm.h>
 
 MOD_DEV (joy, LM_DT_CHAR, CDEV_MAJOR, &joy_cdevsw);
 
 static struct isa_device dev = {0, &joydriver, IO_GAME, 0, -1, (caddr_t) 0, 0, 0, 0, 0, 0, 0, 0,  0, 1, 0, 0};
 
 static int 
 joy_load (struct lkm_table *lkmtp, int cmd)
 {
     if (joyprobe (&dev)) {
 	joyattach (&dev);
 /*	    joy_drvinit (0);*/
 	uprintf ("Joystick driver loaded\n");
 	return 0;
     } else {
 	uprintf ("Joystick driver: probe failed\n");
 	return 1;
     }
 }
 
 static int
 joy_unload (struct lkm_table *lkmtp, int cmd)
 {
     uprintf ("Joystick driver unloaded\n");
     return 0;
 }
 
 static int
 joy_stat (struct lkm_table *lkmtp, int cmd)
 {
     return 0;
 }
 
 int
 joy_mod (struct lkm_table *lkmtp, int cmd, int ver)
 {
     MOD_DISPATCH(joy, lkmtp, cmd, ver,
 	joy_load, joy_unload, joy_stat);
 }
 
 #endif /* JOY_MODULE */
 
 
 #endif /* NJOY > 0 */
diff --git a/sys/i386/isa/pcvt/pcvt_sup.c b/sys/i386/isa/pcvt/pcvt_sup.c
index d34c01bf5721..24144e14b458 100644
--- a/sys/i386/isa/pcvt/pcvt_sup.c
+++ b/sys/i386/isa/pcvt/pcvt_sup.c
@@ -1,2214 +1,2216 @@
 /*
  * Copyright (c) 1992, 1995 Hellmuth Michaelis and Joerg Wunsch.
  *
  * Copyright (c) 1992, 1993 Brian Dunford-Shore and Scott Turner.
  *
  * Copyright (C) 1992, 1993 Soeren Schmidt.
  *
  * All rights reserved.
  *
  * For the sake of compatibility, portions of this code regarding the
  * X server interface are taken from Soeren Schmidt's syscons driver.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Hellmuth Michaelis,
  *	Brian Dunford-Shore, Joerg Wunsch, Scott Turner and Soeren Schmidt.
  * 4. The name authors may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *
  * @(#)pcvt_sup.c, 3.20, Last Edit-Date: [Thu Apr  6 10:49:44 1995]
  *
  */
 
 /*---------------------------------------------------------------------------*
  *
  *	pcvt_sup.c	VT220 Driver Support Routines
  *	---------------------------------------------
  *	-hm	------------ Release 3.00 --------------
  *	-hm	integrating NetBSD-current patches
  *	-hm	removed paranoid delay()/DELAY() from vga_test()
  *	-hm	removing vgapage() protection if PCVT_KBD_FIFO
  *	-hm	some new CONF_ - values
  *	-hm	Joerg's patches for FreeBSD ttymalloc
  *	-hm	applying Joerg's patches for FreeBSD 2.0
  *	-hm	applying Lon Willet's patches for NetBSD
  *	-hm	NetBSD PR #400: patch to short-circuit TIOCSWINSZ
  *	-hm	getting PCVT_BURST reported correctly for FreeBSD 2.0
  *	-hm	applying patch from Joerg fixing Crtat bug
  *	-hm	moving ega/vga coldinit support code to mda2egaorvga()
  *	-hm	patch from Thomas Eberhardt fixing force 24 lines fkey update
  *
  *---------------------------------------------------------------------------*/
 
 #include "vt.h"
 #if NVT > 0
 
 #include <i386/isa/pcvt/pcvt_hdr.h>	/* global include */
 
+#include <sys/resource.h>
+
 static void vid_cursor ( struct cursorshape *data );
 static void vgasetfontattr ( struct vgafontattr *data );
 static void vgagetfontattr ( struct vgafontattr *data );
 static void vgaloadchar ( struct vgaloadchar *data );
 static void vid_getscreen ( struct screeninfo *data, Dev_t dev );
 static void vid_setscreen ( struct screeninfo *data, Dev_t dev );
 static void setchargen ( void );
 static void setchargen3 ( void );
 static void resetchargen ( void );
 static void vgareadpel ( struct vgapel *data, Dev_t dev );
 static void vgawritepel ( struct vgapel *data, Dev_t dev );
 static void vgapcvtid ( struct pcvtid *data );
 static void vgapcvtinfo ( struct pcvtinfo *data );
 
 #ifdef XSERVER
 static unsigned char * compute_charset_base ( unsigned fontset );
 #endif /* XSERVER */
 
 static struct callout_handle async_update_ch =
     CALLOUT_HANDLE_INITIALIZER(&async_update_ch);
 
 #if PCVT_SCREENSAVER
 static void scrnsv_timedout ( void *arg );
 static struct callout_handle scrnsv_timeout_ch =
     CALLOUT_HANDLE_INITIALIZER(&scrnsv_timeout_ch);
 static u_short *savedscreen = (u_short *)0;	/* ptr to screen contents */
 static size_t scrnsv_size = (size_t)-1;		/* size of saved image */
 
 #ifndef XSERVER
 static unsigned scrnsv_timeout = 0;		/* initially off */
 static void pcvt_set_scrnsv_tmo ( int timeout );/* else declared global */
 #endif /* XSERVER */
 
 #if PCVT_PRETTYSCRNS
 static u_short *scrnsv_current = (u_short *)0;	/* attention char ptr */
 static struct callout_handle scrnsv_blink_ch =
     CALLOUT_HANDLE_INITIALIZER(&scrnsv_blink_ch);
 static void scrnsv_blink ( void * );
 static u_short getrand ( void );
 #endif /* PCVT_PRETTYSCRNS */
 
 #endif /* PCVT_SCREENSAVER */
 
 
 /*---------------------------------------------------------------------------*
  *	execute vga ioctls
  *---------------------------------------------------------------------------*/
 int
 vgaioctl(Dev_t dev, int cmd, caddr_t data, int flag)
 {
 	if(minor(dev) >= PCVT_NSCREENS)
 		return -1;
 
 /*
  * Some of the commands are not applicable if the vt in question, or the
  * current vt is in graphics mode (i.e., the X server acts on it); they
  * will cause an EAGAIN (resource temporarily unavailable) to be returned.
  */
 
 #ifdef XSERVER
 #if PCVT_USL_VT_COMPAT
 #define is_dev_grafx vs[minor(dev)].vt_status & VT_GRAFX
 #define is_current_grafx vsp->vt_status & VT_GRAFX
 #else   /* old X interface */
 #define is_dev_grafx pcvt_xmode
 #define is_current_grafx pcvt_xmode
 #endif /* PCVT_USL_VT_COMPAT */
 #else /* !XSERVER */
 #define is_dev_grafx 0  /* not applicable */
 #define is_current_grafx 0
 #endif /* XSERVER */
 
 	switch(cmd)
 	{
 		case VGACURSOR:
 			if(is_current_grafx)
 				return EAGAIN;
 			vid_cursor((struct cursorshape *)data);
 			break;
 
 		case VGALOADCHAR:
 			if((adaptor_type != VGA_ADAPTOR) &&
 			   (adaptor_type != EGA_ADAPTOR))
 				return -1;
 			if(is_current_grafx)
 				return EAGAIN;
 			vgaloadchar((struct vgaloadchar *)data);
 			break;
 
 		case VGASETFONTATTR:
 			if((adaptor_type != VGA_ADAPTOR) &&
 			   (adaptor_type != EGA_ADAPTOR))
 				return -1;
 
 #if PCVT_SCREENSAVER
 			pcvt_scrnsv_reset();
 #endif /* PCVT_SCREENSAVER */
 
 			vgasetfontattr((struct vgafontattr *)data);
 			break;
 
 		case VGAGETFONTATTR:
 			if((adaptor_type != VGA_ADAPTOR) &&
 			   (adaptor_type != EGA_ADAPTOR))
 				return -1;
 			vgagetfontattr((struct vgafontattr *)data);
 			break;
 
 		case VGASETSCREEN:
 
 #if defined XSERVER && !PCVT_USL_VT_COMPAT
 			/* avoid screen switch if using old X mode */
 			if(is_dev_grafx)
 				return EAGAIN;
 #endif /* XSERVER && !PCVT_USL_VT_COMPAT */
 
 #if PCVT_SCREENSAVER
 			pcvt_scrnsv_reset();
 #endif /* PCVT_SCREENSAVER */
 
 			vid_setscreen((struct screeninfo *)data, dev);
 			break;
 
 		case VGAGETSCREEN:
 			vid_getscreen((struct screeninfo *)data, dev);
 			break;
 
 		case VGAREADPEL:
 			if(adaptor_type != VGA_ADAPTOR)
 				return -1;
 			if(is_dev_grafx)
 				return EAGAIN;
 			vgareadpel((struct vgapel *)data, dev);
 			break;
 
 		case VGAWRITEPEL:
 			if(adaptor_type != VGA_ADAPTOR)
 				return -1;
 			if(is_dev_grafx)
 				return EAGAIN;
 			vgawritepel((struct vgapel *)data, dev);
 			break;
 
 #if PCVT_SCREENSAVER
 		case VGASCREENSAVER:
 			if(is_current_grafx)
 				return EAGAIN;
 			pcvt_set_scrnsv_tmo(*(int *)data);
 			pcvt_scrnsv_reset();
 			break;
 #endif /* PCVT_SCREENSAVER */
 
 		case VGAPCVTID:
 			vgapcvtid((struct pcvtid *)data);
 			break;
 
 		case VGAPCVTINFO:
 			vgapcvtinfo((struct pcvtinfo *)data);
 			break;
 
 		case VGASETCOLMS:
 			if(is_dev_grafx)
 				return EAGAIN;
 			if(*(int *)data == 80)
 				(void)vt_col(&vs[minor(dev)], SCR_COL80);
 			else if(*(int *)data == 132)
 			{
 				if(vt_col(&vs[minor(dev)], SCR_COL132) == 0)
 					return EINVAL; /* not a VGA */
 			}
 			else
 				return EINVAL;
 			break;
 
 		case TIOCSWINSZ:
 			/* do nothing here */
 			break;
 
 		default:
 			return -1;
 	}
 	return 0;
 
 #undef is_dev_grafx
 #undef is_current_grafx
 }
 
 /*---------------------------------------------------------------------------*
  *	video ioctl - return driver id
  *---------------------------------------------------------------------------*/
 static void
 vgapcvtid(struct pcvtid *data)
 {
 	strcpy(data->name, PCVTIDNAME);
 	data->rmajor	= PCVTIDMAJOR;
 	data->rminor	= PCVTIDMINOR;
 }
 
 /*---------------------------------------------------------------------------*
  *	video ioctl - return driver compile time options data
  *---------------------------------------------------------------------------*/
 static void
 vgapcvtinfo(struct pcvtinfo *data)
 {
 #if PCVT_NETBSD
 	data->opsys	= CONF_NETBSD;
 	data->opsysrel	= PCVT_NETBSD;
 #elif PCVT_FREEBSD
 	data->opsys	= CONF_FREEBSD;
 	data->opsysrel	= PCVT_FREEBSD;
 #else
 	data->opsys	= CONF_UNKNOWNOPSYS;
 	data->opsysrel	= 0;
 #endif
 
 	data->nscreens	= PCVT_NSCREENS;
 	data->scanset	= PCVT_SCANSET;
 	data->updatefast= PCVT_UPDATEFAST;
 	data->updateslow= PCVT_UPDATESLOW;
 	data->sysbeepf	= PCVT_SYSBEEPF;
 
 #if PCVT_NETBSD || PCVT_FREEBSD >= 200
 	data->pcburst	= PCVT_PCBURST;
 #else
 	data->pcburst	= 1;
 #endif
 
 #if PCVT_KBD_FIFO
 	data->kbd_fifo_sz = PCVT_KBD_FIFO_SZ;
 #else
 	data->kbd_fifo_sz = 0;
 #endif
 
 	data->compile_opts = (0
 
 #if PCVT_VT220KEYB
 	| CONF_VT220KEYB
 #endif
 #if PCVT_SCREENSAVER
 	| CONF_SCREENSAVER
 #endif
 #if PCVT_PRETTYSCRNS
 	| CONF_PRETTYSCRNS
 #endif
 #if PCVT_CTRL_ALT_DEL
 	| CONF_CTRL_ALT_DEL
 #endif
 #if PCVT_USEKBDSEC
 	| CONF_USEKBDSEC
 #endif
 #if PCVT_24LINESDEF
 	| CONF_24LINESDEF
 #endif
 #if PCVT_EMU_MOUSE
 	| CONF_EMU_MOUSE
 #endif
 #if PCVT_SHOWKEYS
 	| CONF_SHOWKEYS
 #endif
 #if PCVT_KEYBDID
 	| CONF_KEYBDID
 #endif
 #if PCVT_SIGWINCH
 	| CONF_SIGWINCH
 #endif
 #if PCVT_NULLCHARS
 	| CONF_NULLCHARS
 #endif
 #if PCVT_BACKUP_FONTS
 	| CONF_BACKUP_FONTS
 #endif
 #if PCVT_SW0CNOUTP	/* was FORCE8BIT */
 	| CONF_SW0CNOUTP
 #endif
 #if PCVT_SETCOLOR
 	| CONF_SETCOLOR
 #endif
 #if PCVT_132GENERIC
 	| CONF_132GENERIC
 #endif
 #if PCVT_PALFLICKER
 	| CONF_PALFLICKER
 #endif
 #if PCVT_WAITRETRACE
 	| CONF_WAITRETRACE
 #endif
 #ifdef XSERVER
 	| CONF_XSERVER
 #endif
 #if PCVT_USL_VT_COMPAT
 	| CONF_USL_VT_COMPAT
 #endif
 #if PCVT_PORTIO_DELAY
 	| CONF_PORTIO_DELAY
 #endif
 #if PCVT_INHIBIT_NUMLOCK
 	| CONF_INHIBIT_NUMLOCK
 #endif
 #if PCVT_META_ESC
 	| CONF_META_ESC
 #endif
 #if PCVT_KBD_FIFO
 	| CONF_KBD_FIFO
 #endif
 #if PCVT_NOFASTSCROLL
 	| CONF_NOFASTSCROLL
 #endif
 #if PCVT_SLOW_INTERRUPT
 	| CONF_SLOW_INTERRUPT
 #endif
 #if PCVT_NO_LED_UPDATE
 	| CONF_NO_LED_UPDATE
 #endif
 	);
 }
 
 /*---------------------------------------------------------------------------*
  *	video ioctl - set cursor appearence
  *---------------------------------------------------------------------------*/
 static void
 vid_cursor(struct cursorshape *data)
 {
 	int screen;
 	int start;
 	int end;
 	int line_height;
 	int character_set;
 
 	/* for which virtual screen, -1 for current */
 	screen = data->screen_no;
 
 	if(screen == -1)	  /* current ? */
 		screen = current_video_screen;
 	else if(screen > totalscreens - 1)
 		screen = totalscreens - 1;
 	else if(screen < 0)
 		screen = 0;
 
 	if(adaptor_type == VGA_ADAPTOR || adaptor_type == EGA_ADAPTOR)
 	{
 		character_set = vs[screen].vga_charset;
 		character_set = (character_set < 0) ? 0 :
 			((character_set < totalfonts) ?
 			 character_set :
 			 totalfonts-1);
 
 		line_height = vgacs[character_set].char_scanlines & 0x1F;
 	}
 	else if(adaptor_type == MDA_ADAPTOR)
 	{
 		line_height = 14;
 	}
 	else
 	{
 		line_height = 8;	/* CGA */
 	}
 
 	start = (data->start < 0) ? 0 :
 		((data->start > line_height) ? line_height : data->start);
 
 	if((vga_family == VGA_F_TRI) && (start == 0))
 		start = 1;
 
 	end = (data->end < 0) ? 0 :
 		((data->end > line_height) ? line_height : data->end);
 
 	vs[screen].cursor_start = start;
 	vs[screen].cursor_end = end;
 
 	if(screen == current_video_screen)
 	{
 		outb(addr_6845,CRTC_CURSTART);	/* cursor start reg */
 		outb(addr_6845+1, start);
 		outb(addr_6845,CRTC_CUREND);	/* cursor end reg */
 		outb(addr_6845+1, end);
 	}
 }
 
 /*---------------------------------------------------------------------------*
  *	ega/vga ioctl - set font attributes
  *---------------------------------------------------------------------------*/
 static void
 vgasetfontattr(struct vgafontattr *data)
 {
 	register int i;
 	int vga_character_set;
 	int lines_per_character;
 	int totscanlines;
 	int size;
 
 	vga_character_set = data->character_set;
 	vga_character_set = (vga_character_set < 0) ? 0 :
 		((vga_character_set < totalfonts) ?
 		vga_character_set : totalfonts-1);
 
 	vgacs[vga_character_set].loaded = data->font_loaded;
 
 	/* Limit Characters to 32 scanlines doubled */
 	vgacs[vga_character_set].char_scanlines =
 		(data->character_scanlines & 0x1F)
 		| 0x40;	/* always set bit 9 of line cmp reg */
 
 	if(adaptor_type == EGA_ADAPTOR)
 		/* ...and screen height to scan 350 lines */
 	        vgacs[vga_character_set].scr_scanlines =
 		(data->screen_scanlines > 0x5d) ?
 		0x5d : data->screen_scanlines;
 	else
 		/* ...and screen height to scan 480 lines */
 	        vgacs[vga_character_set].scr_scanlines =
 		(data->screen_scanlines > 0xdF) ?
 		0xdF : data->screen_scanlines;
 
 	lines_per_character =
 		(int)(0x1F & vgacs[vga_character_set].char_scanlines)+1;
 
 	totscanlines = 0x101 + (int)vgacs[vga_character_set].scr_scanlines;
 
 	size = data->screen_size;
 
 	if(adaptor_type == EGA_ADAPTOR)
 	{
 	        switch(size)
 		{
 			case SIZ_25ROWS: /* This case is always OK */
 		    		break;
 
 			case SIZ_35ROWS:
 				if(totscanlines/lines_per_character >= 35)
 		         		size = SIZ_35ROWS;
 		    		else
 		         		size = SIZ_25ROWS;
 		    		break;
 
 		  	case SIZ_43ROWS:
 			default:
 				if(totscanlines/lines_per_character >= 43)
 					size = SIZ_43ROWS;
 				else if(totscanlines/lines_per_character >= 35)
 					size = SIZ_35ROWS;
 				else
 					size = SIZ_25ROWS;
 				break;
 		}
 	}
 	else
 	{
 	        switch(size)
 		{
 			case SIZ_25ROWS: /* This case is always OK */
 		    		break;
 
 			case SIZ_28ROWS:
 		    		if(totscanlines/lines_per_character >= 28)
 		         		size = SIZ_28ROWS;
 		    		else
 		         		size = SIZ_25ROWS;
 				break;
 
 			case SIZ_40ROWS:
 				if(totscanlines/lines_per_character >= 40)
 		         		size = SIZ_40ROWS;
 		    		else if(totscanlines/lines_per_character >= 28)
 		         		size = SIZ_28ROWS;
 		    		else
 		         		size = SIZ_25ROWS;
 		    		break;
 
 			case SIZ_50ROWS:
 			default:
 				if(totscanlines/lines_per_character >= 50)
 					size = SIZ_50ROWS;
 		    		else if(totscanlines/lines_per_character >= 40)
 		         		size = SIZ_40ROWS;
 		    		else if(totscanlines/lines_per_character >= 28)
 		         		size = SIZ_28ROWS;
 		    		else
 		         		size = SIZ_25ROWS;
 		    	break;
 		}
 	}
 
 	vgacs[vga_character_set].screen_size = size;
 
 	for (i = 0;i < PCVT_NSCREENS;i++)
 	{
 		if(vga_character_set == vs[i].vga_charset)
 			set_charset(&(vs[i]),vga_character_set);
 	}
 
 #if !PCVT_USL_VT_COMPAT
 	vgapage(current_video_screen);
 #else
 	switch_screen(current_video_screen, 0, 0);
 #endif /* !PCVT_USL_VT_COMPAT */
 
 }
 
 /*---------------------------------------------------------------------------*
  *	ega/vga ioctl - get font attributes
  *---------------------------------------------------------------------------*/
 static void
 vgagetfontattr(struct vgafontattr *data)
 {
 	int vga_character_set;
 
 	vga_character_set = data->character_set;
 	vga_character_set = (vga_character_set < 0) ? 0 :
 		((vga_character_set < (int)totalfonts) ?
 		 vga_character_set :
 		 (int)(totalfonts-1));
 
 	data->character_set = (int)vga_character_set;
 
 	data->font_loaded = (int)vgacs[vga_character_set].loaded;
 
 	data->character_scanlines =
 		(int)vgacs[vga_character_set].char_scanlines
 		& 0x1f;		/* do not display the overflow bits */
 
 	data->screen_scanlines = (int)vgacs[vga_character_set].scr_scanlines;
 
 	data->screen_size = (int)vgacs[vga_character_set].screen_size;
 }
 
 /*---------------------------------------------------------------------------*
  *	ega/vga ioctl - load a character shape into character set
  *---------------------------------------------------------------------------*/
 static void
 vgaloadchar(struct vgaloadchar *data)
 {
 	int vga_character_set;
 	int character;
 	int lines_per_character;
 
 	vga_character_set = data->character_set;
 	vga_character_set = (vga_character_set < 0) ? 0 :
 		((vga_character_set < (int)totalfonts) ?
 		 vga_character_set : (int)(totalfonts-1));
 
 	character = (data->character < 0) ? 0 :
 		((data->character > 255) ? 255 : data->character);
 
 	lines_per_character = (int)data->character_scanlines;
 	lines_per_character = (lines_per_character < 0) ? 0 :
 	        ((lines_per_character > 32) ? 32 : lines_per_character);
 
 	loadchar(vga_character_set,character,lines_per_character,
 		 data->char_table);
 }
 
 /*---------------------------------------------------------------------------*
  *	video ioctl - get screen information
  *---------------------------------------------------------------------------*/
 static void
 vid_getscreen(struct screeninfo *data, Dev_t dev)
 {
 	int device = minor(dev);
 	data->adaptor_type = adaptor_type;	/* video adapter installed */
 	data->monitor_type = color;		/* monitor type installed */
 	data->totalfonts = totalfonts;		/* no of downloadble fonts */
 	data->totalscreens = totalscreens;	/* no of virtual screens */
 	data->screen_no = device;		/* this screen number */
 	data->current_screen = current_video_screen; /* displayed screen no */
 	/* screen size */
 	data->screen_size = vgacs[(vs[device].vga_charset)].screen_size;
 	/* pure VT mode or HP/VT mode */
 	data->pure_vt_mode = vs[device].vt_pure_mode;
 	data->vga_family = vga_family;		/* manufacturer, family */
 	data->vga_type = vga_type;		/* detected chipset type */
 	data->vga_132 = can_do_132col;		/* 132 column support */
 	data->force_24lines = vs[device].force24; /* force 24 lines */
 }
 
 /*---------------------------------------------------------------------------*
  *	video ioctl - set screen information
  *---------------------------------------------------------------------------*/
 static void
 vid_setscreen(struct screeninfo *data, Dev_t dev)
 {
 	int screen;
 
 	if(data->current_screen == -1)
 	{
 		screen = minor(dev);
 	}
 	else
 	{
 		if(data->current_screen >= PCVT_NSCREENS)
 			return;					/* XXXXXX */
 		screen = data->current_screen;
 	}
 
 	vgapage(screen);
 
 #if defined XSERVER && PCVT_USL_VT_COMPAT
 	{
 		int x = spltty(), waitfor = screen + 1;
 		/* if the vt is yet to be released by a process, wait here */
 		if(vs[screen].vt_status & VT_WAIT_REL)
 			(void)usl_vt_ioctl(dev, VT_WAITACTIVE,
 					   (caddr_t)&waitfor, 0, 0);
 		splx(x);
 	}
 	/* make sure the switch really happened */
 	if(screen != current_video_screen)
 		return;		/* XXX should say "EAGAIN" here */
 #endif /* defined XSERVER && PCVT_USL_VT_COMPAT */
 
 	if((data->screen_size != -1) || (data->force_24lines != -1))
 	{
 		if(data->screen_size == -1)
 			data->screen_size =
 				vgacs[(vs[screen].vga_charset)].screen_size;
 
 		if(data->force_24lines != -1)
 		{
 			vs[screen].force24 = data->force_24lines;
 
 			if(vs[screen].force24)
 			{
 				swritefkl(2,(u_char *)"FORCE24 ENABLE *",
 					  &vs[screen]);
 			}
 			else
 			{
 				swritefkl(2,(u_char *)"FORCE24 ENABLE  ",
 					  &vs[screen]);
 			}
 		}
 
 		if((data->screen_size == SIZ_25ROWS) ||
 		   (data->screen_size == SIZ_28ROWS) ||
 		   (data->screen_size == SIZ_35ROWS) ||
 		   (data->screen_size == SIZ_40ROWS) ||
 		   (data->screen_size == SIZ_43ROWS) ||
 		   (data->screen_size == SIZ_50ROWS))
 		{
 			if(data->screen_no == -1)
 				set_screen_size(vsp, data->screen_size);
 			else
 				set_screen_size(&vs[minor(dev)],
 						data->screen_size);
 		}
 	}
 
 	if(data->pure_vt_mode != -1)
 	{
 		if((data->pure_vt_mode == M_HPVT) ||
 		   (data->pure_vt_mode == M_PUREVT))
 		{
 			if(data->screen_no == -1)
 				set_emulation_mode(vsp, data->pure_vt_mode);
 			else
 				set_emulation_mode(&vs[minor(dev)],
 						   data->pure_vt_mode);
 		}
 	}
 }
 
 /*---------------------------------------------------------------------------*
  *	set screen size/resolution for a virtual screen
  *---------------------------------------------------------------------------*/
 void
 set_screen_size(struct video_state *svsp, int size)
 {
 	int i;
 
 	for(i = 0; i < totalfonts; i++)
 	{
 		if(vgacs[i].screen_size == size)
 		{
 			set_charset(svsp, i);
 			clr_parms(svsp); 	/* escape parameter init */
 			svsp->state = STATE_INIT; /* initial state */
 			svsp->scrr_beg = 0;	/* start of scrolling region */
 			svsp->sc_flag = 0;	/* invalidate saved cursor
 						 * position */
 			svsp->transparent = 0;	/* disable control code
 						 * processing */
 
 			/* Update tty to reflect screen size */
 
 			if (svsp->vs_tty)
 			{
 				svsp->vs_tty->t_winsize.ws_col = svsp->maxcol;
 				svsp->vs_tty->t_winsize.ws_xpixel =
 					(svsp->maxcol == 80)? 720: 1056;
 				svsp->vs_tty->t_winsize.ws_ypixel = 400;
 				svsp->vs_tty->t_winsize.ws_row =
 					svsp->screen_rows;
 			}
 			/* screen_rows already calculated in set_charset() */
 			if(svsp->vt_pure_mode == M_HPVT && svsp->labels_on)
 			{
 				if(svsp->which_fkl == SYS_FKL)
 					sw_sfkl(svsp);
 				else if(svsp->which_fkl == USR_FKL)
 					sw_ufkl(svsp);
 			}
 
 
 			svsp->scrr_len = svsp->screen_rows;
 			svsp->scrr_end = svsp->scrr_len - 1;
 
 #if PCVT_SIGWINCH
 			if (svsp->vs_tty && svsp->vs_tty->t_pgrp)
 				pgsignal(svsp->vs_tty->t_pgrp, SIGWINCH, 1);
 #endif /* PCVT_SIGWINCH */
 
 			break;
 		}
  	}
 }
 
 /*---------------------------------------------------------------------------*
  *	VGA ioctl - read DAC palette entry
  *---------------------------------------------------------------------------*/
 static void
 vgareadpel(struct vgapel *data, Dev_t dev)
 {
 	register unsigned vpage = minor(dev);
 	register unsigned idx = data->idx;
 
 	if(idx >= NVGAPEL)
 		return;		/* no such entry */
 
 	/* do not read VGA palette directly, use saved values */
 	data->r = vs[vpage].palette[idx].r;
 	data->g = vs[vpage].palette[idx].g;
 	data->b = vs[vpage].palette[idx].b;
 }
 
 /*---------------------------------------------------------------------------*
  *	VGA ioctl - write DAC palette entry
  *---------------------------------------------------------------------------*/
 static void
 vgawritepel(struct vgapel *data, Dev_t dev)
 {
 	register unsigned vpage = minor(dev);
 	register unsigned idx = data->idx;
 
 	if(idx >= NVGAPEL)
 		return;		/* no such entry */
 
 	/* first, update saved values for this video screen */
 	vs[vpage].palette[idx].r = data->r;
 	vs[vpage].palette[idx].g = data->g;
 	vs[vpage].palette[idx].b = data->b;
 
 	/* if this happens on active screen, update VGA DAC, too */
 	if(vpage == current_video_screen)
 		vgapaletteio(idx, &vs[vpage].palette[idx], 1);
 }
 
 /*---------------------------------------------------------------------------*
  *	VGA physical IO - read/write one palette entry
  *---------------------------------------------------------------------------*/
 void
 vgapaletteio(unsigned idx, struct rgb *val, int writeit)
 {
 
 #if PCVT_PALFLICKER
 	vga_screen_off();
 #endif /* PCVT_PALFLICKER */
 
 	if(writeit)
 	{
 		outb(VGA_DAC + 2, idx);
 
 #if PCVT_WAITRETRACE
 		wait_retrace();
 #endif /* PCVT_WAITRETRACE */
 
 		outb(VGA_DAC + 3, val->r & VGA_PMSK);
 
 #if PCVT_WAITRETRACE
 		wait_retrace();
 #endif /* PCVT_WAITRETRACE */
 
 		outb(VGA_DAC + 3, val->g & VGA_PMSK);
 
 #if PCVT_WAITRETRACE
 		wait_retrace();
 #endif /* PCVT_WAITRETRACE */
 
 		outb(VGA_DAC + 3, val->b & VGA_PMSK);
 	}
 	else	/* read it */
 	{
 		outb(VGA_DAC + 1, idx);
 
 #if PCVT_WAITRETRACE
 		wait_retrace();
 #endif /* PCVT_WAITRETRACE */
 
 		val->r = inb(VGA_DAC + 3) & VGA_PMSK;
 
 #if PCVT_WAITRETRACE
 		wait_retrace();
 #endif /* PCVT_WAITRETRACE */
 
 		val->g = inb(VGA_DAC + 3) & VGA_PMSK;
 
 #if PCVT_WAITRETRACE
 		wait_retrace();
 #endif /* PCVT_WAITRETRACE */
 
 		val->b = inb(VGA_DAC + 3) & VGA_PMSK;
 	}
 
 #if PCVT_PALFLICKER
 	vga_screen_on();
 #endif /* PCVT_PALFLICKER */
 
 }
 
 /*---------------------------------------------------------------------------*
  *
  *	update asynchronous: cursor, cursor pos displ, sys load, keyb scan
  *
  *	arg is:
  *		UPDATE_START = do update; requeue
  *		UPDATE_STOP  = suspend updates
  *		UPDATE_KERN  = do update for kernel printfs
  *
  *---------------------------------------------------------------------------*/
 void
 async_update(void *arg)
 {
 	static int lastpos = 0;
 	static int counter = PCVT_UPDATESLOW;
 
 #ifdef XSERVER
 	/* need a method to suspend the updates */
 
 	if(arg == UPDATE_STOP)
 	{
 		untimeout(async_update, UPDATE_START, async_update_ch);
 		return;
 	}
 #endif /* XSERVER */
 
 	/* first check if update is possible */
 
 	if(chargen_access		/* does no-one load characters? */
 #ifdef XSERVER				/* is vt0 not in graphics mode? */
 #if !PCVT_USL_VT_COMPAT
 	   || pcvt_xmode		/* XXX necessary ????? */
 #endif /* PCVT_USL_VT_COMPAT */
 #endif /* XSERVER */
 	   )
 	{
 		goto async_update_exit;	/* do not update anything */
 	}
 
 #if PCVT_SCREENSAVER
 	if(reset_screen_saver && (counter == PCVT_UPDATESLOW))
 	{
 		pcvt_scrnsv_reset();	/* yes, do it */
 		reset_screen_saver = 0;	/* re-init */
 	}
 	else if(scrnsv_active)		/* is the screen not blanked? */
 	{
 		goto async_update_exit;	/* do not update anything */
 	}
 #endif /* PCVT_SCREENSAVER */
 
 	/*-------------------------------------------------------------------*/
 	/* this takes place on EVERY virtual screen (if not in X mode etc...)*/
 	/*-------------------------------------------------------------------*/
 
 	if ( cursor_pos_valid &&
 	    (lastpos != (vsp->Crtat + vsp->cur_offset - Crtat)))
 	{
 		lastpos = vsp->Crtat + vsp->cur_offset - Crtat;
 	 	outb(addr_6845, CRTC_CURSORH);	/* high register */
 		outb(addr_6845+1, ((lastpos) >> 8));
 		outb(addr_6845, CRTC_CURSORL);	/* low register */
 		outb(addr_6845+1, (lastpos));
 	}
 
 	if (arg == UPDATE_KERN)		/* Magic arg: for kernel printfs */
 		return;
 
 	if(--counter)			/* below is possible update */
 		goto async_update_exit;	/*  just now and then ..... */
 	counter = PCVT_UPDATESLOW;	/* caution, see screensaver above !! */
 
 	/*-------------------------------------------------------------------*/
 	/* this takes place ONLY on screen 0 if in HP mode, labels on, !X    */
 	/*-------------------------------------------------------------------*/
 
 	/* additional processing for HP necessary ? */
 
 	if((vs[0].vt_pure_mode == M_HPVT) && (vs[0].labels_on))
 	{
 		static volatile u_char buffer[] =
 		       "System Load: 1min: 0.00 5min: 0.00 15min: 0.00";
 		register int tmp, i;
 #if PCVT_SHOWKEYS
 		extern u_char rawkeybuf[80];
 
 		if(keyboard_show)
 		{
 			for(i = 0; i < 80; i++)
 			{
 				*((vs[0].Crtat+((vs[0].screen_rows+2)
 					* vs[0].maxcol))+i) =
 				 user_attr | rawkeybuf[i];
 			}
 		}
 		else
 		{
 #endif	/* PCVT_SHOWKEYS */
 
 		/* display load averages in last line (taken from tty.c) */
 			i = 18;
 #ifdef NEW_AVERUNNABLE
 	 		tmp = (averunnable.ldavg[0] * 100 + FSCALE / 2)
 				>> FSHIFT;
 #else
 			tmp = (averunnable[0] * 100 + FSCALE / 2) >> FSHIFT;
 #endif
 
 			buffer[i++] =
 				((((tmp/100)/10) == 0) ?
 				 ' ' :
 				 ((tmp/100)/10) + '0');
 			buffer[i++] = ((tmp/100)%10) + '0';
 			buffer[i++] = '.';
 			buffer[i++] = ((tmp%100)/10) + '0';
 			buffer[i++] = ((tmp%100)%10) + '0';
 			i += 6;
 #ifdef NEW_AVERUNNABLE
 	 		tmp = (averunnable.ldavg[1] * 100 + FSCALE / 2)
 				>> FSHIFT;
 #else
 			tmp = (averunnable[1] * 100 + FSCALE / 2) >> FSHIFT;
 #endif
 			buffer[i++] = ((((tmp/100)/10) == 0) ?
 				       ' ' :
 				       ((tmp/100)/10) + '0');
 			buffer[i++] = ((tmp/100)%10) + '0';
 			buffer[i++] = '.';
 			buffer[i++] = ((tmp%100)/10) + '0';
 			buffer[i++] = ((tmp%100)%10) + '0';
 			i += 7;
 #ifdef NEW_AVERUNNABLE
 	 		tmp = (averunnable.ldavg[2] * 100 + FSCALE / 2)
 				>> FSHIFT;
 #else
 			tmp = (averunnable[2] * 100 + FSCALE / 2) >> FSHIFT;
 #endif
 			buffer[i++] = ((((tmp/100)/10) == 0) ?
 				       ' ' :
 				       ((tmp/100)/10) + '0');
 			buffer[i++] = ((tmp/100)%10) + '0';
 			buffer[i++] = '.';
 			buffer[i++] = ((tmp%100)/10) + '0';
 			buffer[i++] = ((tmp%100)%10) + '0';
 			buffer[i] = '\0';
 
 			for(i = 0; buffer[i]; i++)
 			{
 				*((vs[0].Crtat +
 				   ((vs[0].screen_rows + 2) * vs[0].maxcol)
 				   ) + i
 				  ) = user_attr | buffer[i];
 			}
 
 #if PCVT_SHOWKEYS
 			for(; i < 77; i++)
 			{
 				*((vs[0].Crtat +
 				   ((vs[0].screen_rows + 2) * vs[0].maxcol)
 				   ) + i
 				  ) = user_attr | ' ';
 			}
 
 		}
 #endif	/* PCVT_SHOWKEYS */
 	}
 
 	/*-------------------------------------------------------------------*/
 	/* this takes place on EVERY screen which is in HP mode, labels on,!X*/
 	/*-------------------------------------------------------------------*/
 
 	if((vsp->vt_pure_mode == M_HPVT) && (vsp->labels_on))
 	{
 		register int col = vsp->col+1;
 		register u_short *p = vsp->Crtat +
 				(vsp->screen_rows * vsp->maxcol);
 
 		/* update column display between labels */
 
 		if(vsp->maxcol == SCR_COL132)
 		{
 			p += (SCR_COL132 - SCR_COL80)/2;
 
 			if(col >= 100)
 			{
 				*(p + LABEL_COLU) = user_attr | '1';
 				col -= 100;
 			}
 			else
 			{
 				*(p + LABEL_COLU) = user_attr | '0';
 			}
 		}
 		*(p + LABEL_COLH) = user_attr | ((col/10) + '0');
 		*(p + LABEL_COLL) = user_attr | ((col%10) + '0');
 
 		/* update row display between labels */
 
 		*(p + LABEL_ROWH) = (user_attr | (((vsp->row+1)/10) + '0'));
 		*(p + LABEL_ROWL) = (user_attr | (((vsp->row+1)%10) + '0'));
 	}
 
 async_update_exit:
 
  	if(arg == UPDATE_START)
 	{
 	   async_update_ch = timeout(async_update, UPDATE_START,
 				     PCVT_UPDATEFAST);
 	}
 }
 
 /*---------------------------------------------------------------------------*
  *	set character set for virtual screen
  *---------------------------------------------------------------------------*/
 void
 set_charset(struct video_state *svsp, int curvgacs)
 {
 	static int sizetab[] = { 25, 28, 35, 40, 43, 50 };
 	int oldsize, oldrows, newsize, newrows;
 
 	if((curvgacs < 0) || (curvgacs > (NVGAFONTS-1)))
 		return;
 
 	svsp->vga_charset = curvgacs;
 
 	select_vga_charset(curvgacs);
 
 	oldsize = svsp->screen_rowsize;
 	oldrows = svsp->screen_rows;
 	newsize = sizetab[(vgacs[curvgacs].screen_size)];
 	newrows = newsize;
 	if (svsp->vt_pure_mode == M_HPVT)
 		newrows -= 3;
 	if (newrows == 25 && svsp->force24)
 		newrows = 24;
 	if (newrows < oldrows) {
 		int nscroll = svsp->row + 1 - newrows;
 
 		if (svsp->row >= oldrows) /* Sanity check */
 			nscroll = oldrows - newrows;
 		if (nscroll > 0) {
 			/* Scroll up */
 			bcopy (svsp->Crtat + nscroll * svsp->maxcol,
 			       svsp->Crtat,
 			       newrows * svsp->maxcol * CHR);
 			svsp->row -= nscroll;
 			svsp->cur_offset -= nscroll * svsp->maxcol;
 		}
 		if (newrows < newsize)
 			fillw(user_attr | ' ',
 			      svsp->Crtat + newrows * svsp->maxcol,
 			      (newsize - newrows) * svsp->maxcol);
 	} else if (oldrows < newsize)
 		fillw(user_attr | ' ',
 		      svsp->Crtat + oldrows * svsp->maxcol,
 		      (newsize - oldrows) * svsp->maxcol);
 
 	svsp->screen_rowsize = newsize;
 	svsp->screen_rows = newrows;
 
 	/* Clip scrolling region */
 	if(svsp->scrr_end > svsp->screen_rows - 1)
 		svsp->scrr_end = svsp->screen_rows - 1;
 	svsp->scrr_len = svsp->scrr_end - svsp->scrr_beg + 1;
 
 	/* Clip cursor pos */
 
 	if(svsp->cur_offset > (svsp->scrr_len * svsp->maxcol))
 		svsp->cur_offset = (svsp->scrr_len * svsp->maxcol) + svsp->col;
 }
 
 /*---------------------------------------------------------------------------*
  *	select a vga character set
  *---------------------------------------------------------------------------*/
 void
 select_vga_charset(int vga_charset)
 {
 	int first, second;
 	int fflag = 0;
 	int sflag = 0;
 	u_char cmap = 0;
 
 	static u_char cmaptaba[] =
 		{0x00, 0x01, 0x02, 0x03, 0x10, 0x11, 0x12, 0x13};
 
 	static u_char cmaptabb[] =
 		{0x00, 0x04, 0x08, 0x0c, 0x20, 0x24, 0x28, 0x2c};
 
  	if((adaptor_type != EGA_ADAPTOR) && (adaptor_type != VGA_ADAPTOR))
  		return;
 
 	if((vga_charset < 0) || (vga_charset >= totalfonts))
 		return;
 
 	if(!vgacs[vga_charset].loaded)
 		return;
 
 	/*--------------------------------------------------------------
 	   find the the first and second charset of a given resolution.
 	   the first is used for lower 256 and the second (if any) is
 	   used for the upper 256 entries of a complete 512 entry ega/
 	   vga charset.
 	--------------------------------------------------------------*/
 
 	for(first = 0; first < totalfonts; first++)
 	{
 		if(!vgacs[first].loaded)
 			continue;
 		if(vgacs[first].screen_size != vgacs[vga_charset].screen_size)
 			continue;
 		if(vgacs[first].char_scanlines !=
 		   vgacs[vga_charset].char_scanlines)
 			continue;
 		if(vgacs[first].scr_scanlines !=
 		   vgacs[vga_charset].scr_scanlines)
 			continue;
 		fflag = 1;
 		break;
 	}
 
 	if(fflag != 1)
 		return;
 
 	for(second = first+1; second < totalfonts; second++)
 	{
 		if(!vgacs[second].loaded)
 			continue;
 		if(vgacs[second].screen_size != vgacs[vga_charset].screen_size)
 			continue;
 		if(vgacs[second].char_scanlines !=
 		   vgacs[vga_charset].char_scanlines)
 			continue;
 		if(vgacs[second].scr_scanlines !=
 		   vgacs[vga_charset].scr_scanlines)
 			continue;
 		sflag = 1;
 		break;
 	}
 
 	cmap = cmaptaba[first];
 	if(sflag)
 	{
 		cmap |= cmaptabb[second];
 		vgacs[first].secondloaded = second;
 	}
 	else
 	{
 		vgacs[first].secondloaded = 0; /*cs 0 can never become a 2nd!*/
 	}
 
 	if(vsp->wd132col)
 	{
 		cmap = (vga_charset & 0x07);
 		cmap |= 0x10;
 	}
 
 	outb(TS_INDEX, TS_FONTSEL);	/* character map select register */
 	outb(TS_DATA, cmap);		/* new char map */
 
 	outb(addr_6845, CRTC_MAXROW);	/* max scan line reg */
 	outb(addr_6845+1,
 		vgacs[first].char_scanlines); /* scanlines/char */
 
 	outb(addr_6845, CRTC_VDE);	/* vert display enable end */
 	outb(addr_6845+1,
 		vgacs[first].scr_scanlines);  /* low byte of scr scanlines */
 
 	if((color == 0) && (adaptor_type == VGA_ADAPTOR))
 	{
 		outb(addr_6845, CRTC_ULOC);	/* underline location reg */
 		outb(addr_6845+1, (vgacs[first].char_scanlines & 0x1F));
 	}
 }
 
 /*---------------------------------------------------------------------------*
  *	switch vga-card to load a character set
  *---------------------------------------------------------------------------*/
 static void
 setchargen(void)
 {
 	chargen_access = 1;	/* flag we are accessing the chargen ram */
 
 	/* program sequencer to access character generator */
 
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x01);	/* synchronous reset */
 
 	outb(TS_INDEX, TS_WRPLMASK);
 	outb(TS_DATA, 0x04);	/* write to map 2 */
 
 	outb(TS_INDEX, TS_MEMMODE);
 	outb(TS_DATA, 0x07);	/* sequential addressing */
 
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x03);	/* clear synchronous reset */
 
 	/* program graphics controller to access character generator */
 
 	outb(GDC_INDEX, GDC_RDPLANESEL);
 	outb(GDC_DATA, 0x02);	/* select map 2 for cpu reads */
 
 	outb(GDC_INDEX, GDC_MODE);
 	outb(GDC_DATA, 0x00);	/* disable odd-even addressing */
 
 	outb(GDC_INDEX, GDC_MISC);
 	outb(GDC_DATA, 0x00);	/* map starts at 0xA000 */
 }
 
 /*---------------------------------------------------------------------------*
  *	switch vga-card to load a character set to plane 3
  *---------------------------------------------------------------------------*/
 static void
 setchargen3(void)
 {
 	chargen_access = 1;	/* flag we are accessing the chargen ram */
 
 	/* program sequencer to access character generator */
 
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x01);	/* synchronous reset */
 
 	outb(TS_INDEX, TS_WRPLMASK);
 	outb(TS_DATA, 0x08);	/* write to map 3 */
 
 	outb(TS_INDEX, TS_MEMMODE);
 	outb(TS_DATA, 0x07);	/* sequential addressing */
 
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x03);	/* clear synchronous reset */
 
 	/* program graphics controller to access character generator */
 
 	outb(GDC_INDEX, GDC_RDPLANESEL);
 	outb(GDC_DATA, 0x03);	/* select map 3 for cpu reads */
 
 	outb(GDC_INDEX, GDC_MODE);
 	outb(GDC_DATA, 0x00);	/* disable odd-even addressing */
 
 	outb(GDC_INDEX, GDC_MISC);
 	outb(GDC_DATA, 0x00);	/* map starts at 0xA000 */
 }
 
 /*---------------------------------------------------------------------------*
  *	switch back vga-card to normal operation
  *---------------------------------------------------------------------------*/
 static void
 resetchargen(void)
 {
 	/* program sequencer to access video ram */
 
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x01);	/* synchronous reset */
 
 	outb(TS_INDEX, TS_WRPLMASK);
 	outb(TS_DATA, 0x03);	/* write to map 0 & 1 */
 
 	outb(TS_INDEX, TS_MEMMODE);
 	outb(TS_DATA, 0x03);	/* odd-even addressing */
 
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x03);	/* clear synchronous reset */
 
 	/* program graphics controller to access character generator */
 
 	outb(GDC_INDEX, GDC_RDPLANESEL);
 	outb(GDC_DATA, 0x00);	/* select map 0 for cpu reads */
 
 	outb(GDC_INDEX, GDC_MODE);
 	outb(GDC_DATA, 0x10);	/* enable odd-even addressing */
 
 	outb(GDC_INDEX, GDC_MISC);
 	if(color)
 		outb(GDC_DATA, 0x0e);	/* map starts at 0xb800 */
 	else
 		outb(GDC_DATA, 0x0a);	/* map starts at 0xb000 */
 
 	chargen_access = 0;	/* flag we are NOT accessing the chargen ram */
 }
 
 #if PCVT_WAITRETRACE
 /*---------------------------------------------------------------------------*
  *	wait for being in a retrace time window
  *	NOTE: this is __VERY__ bad programming practice in this environment !!
  *---------------------------------------------------------------------------*/
 
 static void
 wait_retrace(void)
 {
 	if(color)
 	{
 		while(!(inb(GN_INPSTAT1C) & 0x01))
 			;
 	}
 	else
 	{
 		while(!(inb(GN_INPSTAT1M) & 0x01))
 			;
 	}
 }
 
 #endif /* PCVT_WAITRETRACE */
 
 /*---------------------------------------------------------------------------*
  *	switch screen off (VGA only)
  *---------------------------------------------------------------------------*/
 void
 vga_screen_off(void)
 {
 	unsigned char old;
 
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x01);		/* synchronous reset */
 
 	outb(TS_INDEX, TS_MODE);	/* clocking mode reg */
 	old = inb(TS_DATA);		/* get current value */
 
 	outb(TS_INDEX, TS_MODE);	/* clocking mode reg */
 	outb(TS_DATA, (old | 0x20));	/* screen off bit on */
 
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x03);		/* clear synchronous reset */
 }
 
 /*---------------------------------------------------------------------------*
  *	switch screen back on (VGA only)
  *---------------------------------------------------------------------------*/
 void
 vga_screen_on(void)
 {
 	unsigned char old;
 
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x01);		/* synchronous reset */
 
 	outb(TS_INDEX, TS_MODE);	/* clocking mode reg */
 	old = inb(TS_DATA);		/* get current value */
 
 	outb(TS_INDEX, TS_MODE);	/* clocking mode reg */
 	outb(TS_DATA, (old & ~0x20));	/* screen off bit off */
 
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x03);		/* clear synchronous reset */
 }
 
 /*---------------------------------------------------------------------------*
  *	compute character set base address (in kernel map)
  *---------------------------------------------------------------------------*/
 static unsigned char *
 compute_charset_base(unsigned fontset)
 {
 	unsigned char *d = (unsigned char *)Crtat;
 
 	static int charset_offset[8] = { 0x0000, 0x4000, 0x8000, 0xC000,
 					 0x2000, 0x6000, 0xA000, 0xE000 };
 
 	static int charsetw_offset[8] = { 0x0000, 0x2000, 0x4000, 0x6000,
 					  0x8000, 0xA000, 0xC000, 0xE000 };
 
 	switch(adaptor_type)
 	{
 		case EGA_ADAPTOR:
 			fontset = (fontset > 3) ? 3 : fontset;
 			break;
 
 		case VGA_ADAPTOR:
 			fontset = (fontset > 7) ? 7 : fontset;
 			break;
 
 		default:
 			return 0;
 	}
 
 	if(color)
 		d -= (0xB8000 - 0xA0000);	/* Point to 0xA0000 */
 	else
 		d -= (0xB0000 - 0xA0000);	/* Point to 0xA0000 */
 
 	if(vsp->wd132col)
 		d += charsetw_offset[fontset];	/* Load into Character set n */
 	else
 		d += charset_offset[fontset];	/* Load into Character set n */
 
 	return d;
 }
 
 /*---------------------------------------------------------------------------*
  *	load a char into ega/vga character generator ram
  *---------------------------------------------------------------------------*/
 void
 loadchar(int fontset, int character, int char_scanlines, u_char *char_table)
 {
 	unsigned char *d;
 
 #if PCVT_BACKUP_FONTS
 	unsigned char *bak;
 #endif /* PCVT_BACKUP_FONTS */
 
 	int j, k;
 
 	if((d = compute_charset_base(fontset)) == 0)
 		return;
 
 	d += (character * 32);		/* 32 bytes per character */
 
 	if(vsp->wd132col &&
 	   (fontset == 1||fontset == 3||fontset == 5||fontset == 7))
 		setchargen3();			/* access chargen ram */
 	else
 		setchargen();			/* access chargen ram */
 
 	for(j = k = 0; j < char_scanlines; j++) /* x bit high characters */
 	{
 		*d = char_table[k];
 		d++;
 		k++;
 	}
 	for(; j < 32; j++)		/* Up to 32 bytes per character image*/
 	{
 		*d = 0x00;
 		d++;
 	}
 
 	resetchargen();			/* access video ram */
 
 #if PCVT_BACKUP_FONTS
 	if(saved_charsets[fontset] == 0)
 		saved_charsets[fontset] =
 			(u_char *)malloc(32 * 256, M_DEVBUF, M_WAITOK);
 
 	if((bak = saved_charsets[fontset]))
 	{
 		/* make a backup copy of this char */
 		bak += (character * 32);
 		bzero(bak, 32);
 		bcopy(char_table, bak, char_scanlines);
 	}
 #ifdef DIAGNOSTIC
 	else
 		panic("pcvt loadchar: no backup buffer");
 #endif /* DIAGNOSTIC */
 
 #endif /* PCVT_BACKUP_FONTS */
 
 }
 
 /*---------------------------------------------------------------------------*
  *	save/restore character set n to addr b
  *---------------------------------------------------------------------------*/
 #if !PCVT_BACKUP_FONTS
 
 void
 vga_move_charset(unsigned n, unsigned char *b, int save_it)
 {
 	unsigned char *d = compute_charset_base(n);
 
 #ifdef DIAGNOSTIC
 	if(d == 0)
 		panic("vga_move_charset: wrong adaptor");
 #endif
 
 	if(vsp->wd132col && (n == 1||n == 3||n == 5||n == 7))
 	{
 		setchargen3();
 		d -= 0x2000;
 	}
 	else
 	{
 		setchargen();
 	}
 
 	/* PLEASE, leave the following alone using bcopyb, as several	*/
 	/* chipsets have problems if their memory is accessed with 32	*/
 	/* or 16 bits wide, don't change this to using bcopy for speed!	*/
 
 	if(save_it)
 		bcopyb(d, b, 256 /* chars */ * 32 /* bytes per char */);
 	else
 		bcopyb(b, d, 256 /* chars */ * 32 /* bytes per char */);
 
 	resetchargen();
 }
 
 #else /* PCVT_BACKUP_FONTS */
 
 /* since there are always backed up copies, we do not save anything here */
 /* parameter "b" is totally ignored */
 
 void
 vga_move_charset(unsigned n, unsigned char *b, int save_it)
 {
 	unsigned char *d = compute_charset_base(n);
 
 	if(save_it)
 		return;
 
 	if(saved_charsets[n] == 0)
 #ifdef DIAGNOSTIC
 		panic("pcvt: restoring unbuffered charset");
 #else
 		return;
 #endif
 
 #ifdef DIAGNOSTIC
 	if(d == 0)
 		panic("vga_move_charset: wrong adaptor");
 #endif
 
 	if(vsp->wd132col && (n == 1||n == 3||n == 5||n == 7))
 	{
 		setchargen3();
 		d -= 0x2000;
 	}
 	else
 	{
 		setchargen();
 	}
 
 	/* PLEASE, leave the following alone using bcopyb, as several	*/
 	/* chipsets have problems if their memory is accessed with 32	*/
 	/* or 16 bits wide, don't change this to using bcopy for speed!	*/
 
 	bcopyb(saved_charsets[n], d,
 	       256 /* chars */ * 32 /* bytes per char */);
 
 	resetchargen();
 }
 
 #endif /* PCVT_BACKUP_FONTS */
 
 
 #if !PCVT_USL_VT_COMPAT
 /*---------------------------------------------------------------------------*
  *	switch to virtual screen n (0 ... PCVT_NSCREENS-1)
  *---------------------------------------------------------------------------*/
 void
 vgapage(int n)
 {
 
 #if !PCVT_KBD_FIFO
 	int x;
 #endif	/* !PCVT_KBD_FIFO */
 
 	int cols = vsp->maxcol;		/* get current col val */
 
 	if(n < 0 || n >= totalscreens)
 		return;
 
 #if !PCVT_KBD_FIFO
 	x = spltty();			/* protect us */
 #endif	/* !PCVT_KBD_FIFO */
 
 	/* video board memory -> kernel memory */
 
 	bcopy(vsp->Crtat, vsp->Memory, vsp->screen_rows * vsp->maxcol * CHR);
 
 	vsp->Crtat = vsp->Memory;	/* operate in memory now */
 
 	/* update global screen pointers/variables */
 
 	current_video_screen = n;	/* current screen no */
 
 #if !PCVT_NETBSD && !(PCVT_FREEBSD > 110 && PCVT_FREEBSD < 200)
 	pcconsp = &pccons[n];		/* current tty */
 #elif PCVT_FREEBSD > 110 && PCVT_FREEBSD < 200
 	pcconsp = pccons[n];		/* current tty */
 #else
 	pcconsp = pc_tty[n];		/* current tty */
 #endif
 
 	vsp = &vs[n];			/* current video state ptr */
 
 	/* kernel memory -> video board memory */
 
 	bcopy(vsp->Crtat, Crtat, vsp->screen_rows * vsp->maxcol * CHR);
 
 	vsp->Crtat = Crtat;		/* operate on screen now */
 
 	outb(addr_6845, CRTC_STARTADRH);
 	outb(addr_6845+1, 0);
 	outb(addr_6845, CRTC_STARTADRL);
 	outb(addr_6845+1, 0);
 
 #if !PCVT_KBD_FIFO
 	splx(x);
 #endif	/* !PCVT_KBD_FIFO */
 
 	select_vga_charset(vsp->vga_charset);
 
 	if(vsp->maxcol != cols)
 		vga_col(vsp, vsp->maxcol);	/* select 80/132 columns */
 
  	outb(addr_6845, CRTC_CURSORH);	/* select high register */
 	outb(addr_6845+1, vsp->cur_offset >> 8);
 	outb(addr_6845, CRTC_CURSORL);	/* select low register */
 	outb(addr_6845+1, vsp->cur_offset);
 
 	if(vsp->cursor_on)
 	{
 		outb(addr_6845, CRTC_CURSTART);	/* select high register */
 		outb(addr_6845+1, vsp->cursor_start);
 		outb(addr_6845, CRTC_CUREND);	/* select low register */
 		outb(addr_6845+1, vsp->cursor_end);
 	}
 	else
 	{
 		sw_cursor(0);
 	}
 
 	if(adaptor_type == VGA_ADAPTOR)
 	{
 		unsigned i;
 
 		/* switch VGA DAC palette entries */
 
 		for(i = 0; i < NVGAPEL; i++)
 			vgapaletteio(i, &vsp->palette[i], 1);
 	}
 
 	update_led();			/* update led's */
 
 	update_hp(vsp);			/* update fkey labels, if present */
 }
 #endif /* !PCVT_USL_VT_COMPAT */
 
 /*---------------------------------------------------------------------------*
  *	test if it is a vga
  *---------------------------------------------------------------------------*/
 
 int
 vga_test(void)
 {
 	u_char old, new, check;
 
 	outb(addr_6845,CRTC_CURSTART);	/* cursor start reg */
 	old = inb(addr_6845+1);		/* get current value */
 
 	new = old | CURSOR_ON_BIT;	/* set cursor on by setting bit 5 on */
 
 	outb(addr_6845,CRTC_CURSTART);	/* cursor start reg */
 	outb(addr_6845+1,new);		/* cursor should be on now */
 
 	outb(addr_6845,CRTC_CURSTART);	/* cursor start reg */
 	check = inb(addr_6845+1);	/* get current value */
 
 	if(check != new)
 	{
 		outb(addr_6845,CRTC_CURSTART);	/* cursor start reg */
 		outb(addr_6845+1,old);		/* failsafe */
 		return(0);			/* must be ega */
 	}
 
 	new = old & ~CURSOR_ON_BIT;	/* turn cursor off by clearing bit 5 */
 
 	outb(addr_6845,CRTC_CURSTART);	/* cursor start reg */
 	outb(addr_6845+1,new);		/* cursor should be off now */
 
 	outb(addr_6845,CRTC_CURSTART);	/* cursor start reg */
 	check = inb(addr_6845+1);	/* get current value */
 
 	if(check != new)
 	{
 		outb(addr_6845,CRTC_CURSTART);	/* cursor start reg */
 		outb(addr_6845+1,old);		/* failsafe */
 		return(0);			/* must be ega */
 	}
 
 	outb(addr_6845,CRTC_CURSTART);	/* cursor start reg */
 	outb(addr_6845+1,old);		/* failsafe */
 
         return(1);	/* vga */
 }
 
 /*---------------------------------------------------------------------------*
  *	convert upper/lower sixel font array to vga font array
  *---------------------------------------------------------------------------*/
 void
 sixel_vga(struct sixels *sixelp, u_char *vgachar)
 {
 	register int i, j;
 	register int shift;
 	register u_char mask;
 
 	for(j = 0; j < 16; j++)
 		vgachar[j] = 0;
 
 	mask = 0x01;
 	for(j = 0; j < 6; j++)
 	{
 		for(i = 0, shift = 7; i < 8; i++, shift--)
 			vgachar[j] |= ((((sixelp->upper[i]) & mask) >> j)
 				       << shift);
 		mask <<= 1;
 	}
 
 	mask = 0x01;
 	for(j = 0; j < 4; j++)
 	{
 		for(i = 0, shift = 7; i < 8; i++, shift--)
 			vgachar[j+6] |= ((((sixelp->lower[i]) & mask) >>j)
 					 << shift);
 		mask <<= 1;
 	}
 }
 
 /*---------------------------------------------------------------------------*
  *	Expand 8x10 EGA/VGA characters to 8x16 EGA/VGA characters
  *---------------------------------------------------------------------------*/
 void
 vga10_vga16(u_char *invga, u_char *outvga)
 {
 	register int i,j;
 
 	/*
 	 * Keep the top and bottom scanlines the same and double every scan
 	 * line in between.
 	 */
 
 	outvga[0] = invga[0];
 	outvga[1] = invga[1];
 	outvga[14] = invga[8];
 	outvga[15] = invga[9];
 
 	for(i = j = 2;i < 8 && j < 14;i++,j += 2)
 	{
 		outvga[j]   = invga[i];
 		outvga[j+1] = invga[i];
 	}
 }
 
 /*---------------------------------------------------------------------------*
  *	Expand 8x10 EGA/VGA characters to 8x14 EGA/VGA characters
  *---------------------------------------------------------------------------*/
 void
 vga10_vga14(u_char *invga, u_char *outvga)
 {
 	register int i;
 
 	/*
 	 * Double the top two and bottom two scanlines and copy everything
 	 * in between.
 	 */
 
 	outvga[0] = invga[0];
 	outvga[1] = invga[0];
 	outvga[2] = invga[1];
 	outvga[3] = invga[1];
 	outvga[10] = invga[8];
 	outvga[11] = invga[8];
 	outvga[12] = invga[9];
 	outvga[13] = invga[9];
 
 	for(i = 2;i < 8;i++)
 		outvga[i+2]   = invga[i];
 }
 
 /*---------------------------------------------------------------------------*
  *	Expand 8x10 EGA/VGA characters to 8x10 EGA/VGA characters
  *---------------------------------------------------------------------------*/
 void
 vga10_vga10(u_char *invga, u_char *outvga)
 {
 	register int i;
 
 	for(i = 0;i < 10;i++)
 		outvga[i]   = invga[i];
 }
 
 /*---------------------------------------------------------------------------*
  *	Contract 8x10 EGA/VGA characters to 8x8 EGA/VGA characters
  *---------------------------------------------------------------------------*/
 void
 vga10_vga8(u_char *invga, u_char *outvga)
 {
 	/* Skip scanlines 3 and 7 */
 
 	outvga[0] = invga[0];
 	outvga[1] = invga[1];
 	outvga[2] = invga[2];
 	outvga[3] = invga[4];
 	outvga[4] = invga[5];
 	outvga[5] = invga[6];
 	outvga[6] = invga[8];
 	outvga[7] = invga[9];
 }
 
 /*---------------------------------------------------------------------------*
  *	force a vga card to behave like an ega for debugging
  *---------------------------------------------------------------------------*/
 #if FORCE_EGA
 void
 force_ega(void)
 {
 	unsigned char vgareg;
 
 	if(adaptor_type == VGA_ADAPTOR)
 	{
 		adaptor_type = EGA_ADAPTOR;
 		totalfonts = 4;
 		vgareg = inb(GN_MISCOUTR); /* Miscellaneous Output Register */
 		vgareg |= 128;		   /* Set 350 scanline mode */
 		vgareg &= ~64;
 		outb(GN_MISCOUTW,vgareg);
 	}
 }
 #endif /* FORCE_EGA */
 
 /*---------------------------------------------------------------------------*
  *	disconnect attribute bit 3 from generating intensity
  *	(and use it for a second character set !)
  *---------------------------------------------------------------------------*/
 void
 set_2ndcharset(void)
 {
 	if(color)			/* prepare to access index register! */
 		inb(GN_INPSTAT1C);
 	else
 		inb(GN_INPSTAT1M);
 
 	/* select color plane enable reg, caution: set ATC access bit ! */
 
 	outb(ATC_INDEX, (ATC_COLPLEN | ATC_ACCESS));
 	outb(ATC_DATAW, 0x07);		/* disable plane 3 */
 }
 
 #if PCVT_SCREENSAVER
 #if PCVT_PRETTYSCRNS
 
 /*---------------------------------------------------------------------------*
  * produce some kinda random number, had a look into the system library...
  *---------------------------------------------------------------------------*/
 static u_short
 getrand(void)
 {
 #if !PCVT_FREEBSD
 	extern struct timeval time; /* time-of-day register */
 #endif
 	static unsigned long seed = 1;
 	register u_short res = (u_short)seed;
 	seed = seed * 1103515245L + time.tv_sec;
 	return res;
 }
 
 /*---------------------------------------------------------------------------*
  *	produce "nice" screensaving ....
  *---------------------------------------------------------------------------*/
 static void
 scrnsv_blink(void * arg)
 {
 	static struct rgb blink_rgb[8] =
 	{
 		{63, 63, 63},	/* white */
 		{0, 63, 42},	/* pale green */
 		{63, 63, 0},	/* yellow */
 		{63, 21, 63},	/* violet */
 		{42, 63, 0},	/* yellow-green */
 		{63, 42, 0},	/* amber */
 		{63, 42, 42},	/* rose */
 		{21, 42, 42}	/* cyan */
 	};
 	register u_short r = getrand();
 	unsigned pos = (r % (scrnsv_size / 2));
 
 	*scrnsv_current = /* (0 << 8) + */ ' ';
 	scrnsv_current = vsp->Crtat + pos;
 	*scrnsv_current = (7 /* LIGHTGRAY */ << 8) + '*';
 	if(adaptor_type == VGA_ADAPTOR)
 		vgapaletteio(7 /* LIGHTGRAY */, &blink_rgb[(r >> 4) & 7], 1);
 	scrnsv_blink_ch = timeout(scrnsv_blink, NULL, hz);
 }
 
 #endif /* PCVT_PRETTYSCRNS */
 
 /*---------------------------------------------------------------------------*
  *	set timeout time
  *---------------------------------------------------------------------------*/
 #ifndef XSERVER
 static void
 pcvt_set_scrnsv_tmo(int timeout)
 #else
 void
 pcvt_set_scrnsv_tmo(int timeout)
 #endif /* XSERVER */
 {
 	int x = splhigh();
 
 	if(scrnsv_timeout)
 		untimeout(scrnsv_timedout, NULL, scrnsv_timeout_ch);
 
 	scrnsv_timeout = timeout;
 	pcvt_scrnsv_reset();		/* sanity */
 	splx(x);
 	if(timeout == 0 && savedscreen)
 	{
 		/* release buffer when screen saver turned off */
 		free(savedscreen, M_TEMP);
 		savedscreen = (u_short *)0;
 	}
 }
 
 /*---------------------------------------------------------------------------*
  *	we were timed out
  *---------------------------------------------------------------------------*/
 static void
 scrnsv_timedout(void *arg)
 {
 	/* this function is called by timeout() */
 	/* raise priority to avoid conflicts with kbd intr */
 	int x = spltty();
 
 	/*
 	 * due to some undefined problems with video adaptor RAM
 	 * access timing, the following has been splitted into
 	 * two pieces called subsequently with a time difference
 	 * of 100 millisec
 	 */
 
 	if(++scrnsv_active == 1)
 	{
 		register size_t s;
 		/*
 		 * first, allocate a buffer
 		 * do only if none allocated yet or another size required
 		 * this reduces malloc() overhead by avoiding successive
 		 * calls to malloc() and free() if they would have requested
 		 * the same buffer
 		 *
 		 * XXX This is inherited from old days where no buffering
 		 * happened at all. Meanwhile we should use the standard
 		 * screen buffer instead. Any volunteers? :-) [At least,
 		 * this code proved to work...]
 		 */
 
 		s = sizeof(u_short) * vsp->screen_rowsize * vsp->maxcol;
 
 		if(savedscreen == (u_short *)0 || s != scrnsv_size)
 		{
 			/* really need to allocate */
 			if(savedscreen)
 				free(savedscreen, M_TEMP);
 			scrnsv_size = s;
 			if((savedscreen =
 			    (u_short *)malloc(s, M_TEMP, M_NOWAIT))
 			   == (u_short *)0)
 			{
 				/*
 				 * didn't get the buffer memory,
 				 * turn off screen saver
 				 */
 				scrnsv_timeout = scrnsv_active = 0;
 				splx(x);
 				return;
 			}
 		}
 		/* save current screen */
 		bcopy(vsp->Crtat, savedscreen, scrnsv_size);
 
 		/* on VGA's, make sure palette is set to blank screen */
 		if(adaptor_type == VGA_ADAPTOR)
 		{
 			struct rgb black = {0, 0, 0};
 			vgapaletteio(0 /* BLACK */, &black, 1);
 		}
 		/* prepare for next time... */
 		scrnsv_timeout_ch = timeout(scrnsv_timedout, NULL, hz / 10);
 	}
 	else
 	{
 		/* second call, now blank the screen */
 		/* fill screen with blanks */
 		fillw(/* (BLACK<<8) + */ ' ', vsp->Crtat, scrnsv_size / 2);
 
 #if PCVT_PRETTYSCRNS
 		scrnsv_current = vsp->Crtat;
 		scrnsv_blink_ch = timeout(scrnsv_blink, NULL, hz);
 #endif /* PCVT_PRETTYSCRNS */
 
 		sw_cursor(0);	/* cursor off on mda/cga */
 	}
 	splx(x);
 }
 
 /*---------------------------------------------------------------------------*
  *	interface to screensaver "subsystem"
  *---------------------------------------------------------------------------*/
 void
 pcvt_scrnsv_reset(void)
 {
 	/*
 	 * to save lotta time with superfluous timeout()/untimeout() calls
 	 * when having massive output operations, we remember the last
 	 * second of kernel timer we've rescheduled scrnsv_timedout()
 	 */
 	static long last_schedule = 0L;
 	register int x = splhigh();
 	int reschedule = 0;
 
 	if((scrnsv_active == 1 || scrnsv_timeout) &&
 	   last_schedule != time.tv_sec)
 	{
 		last_schedule = time.tv_sec;
 		reschedule = 1;
 		untimeout(scrnsv_timedout, NULL, scrnsv_timeout_ch);
 	}
 	if(scrnsv_active)
 	{
 
 #if PCVT_PRETTYSCRNS
 		if(scrnsv_active > 1)
 			untimeout(scrnsv_blink, NULL, scrnsv_blink_ch);
 #endif /* PCVT_PRETTYSCRNS */
 
 		bcopy(savedscreen, vsp->Crtat, scrnsv_size);
 		if(adaptor_type == VGA_ADAPTOR)
 		{
 			/* back up VGA palette info */
 			vgapaletteio(0 /* BLACK */, &vsp->palette[0], 1);
 
 #if PCVT_PRETTYSCRNS
 			vgapaletteio(7 /* LIGHTGRAY */, &vsp->palette[7], 1);
 #endif /* PCVT_PRETTYSCRNS */
 
 		}
 		scrnsv_active = 0;
 
 		if(vsp->cursor_on)
 			sw_cursor(1);	/* cursor on */
 	}
 
 	if(reschedule)
 	{
 		/* mark next timeout */
 		scrnsv_timeout_ch = timeout(scrnsv_timedout, NULL,
 					    scrnsv_timeout * hz);
 	}
 	splx(x);
 }
 
 #endif /* PCVT_SCREENSAVER */
 
 /*---------------------------------------------------------------------------*
  *	switch cursor on/off
  *---------------------------------------------------------------------------*/
 void
 sw_cursor(int onoff)
 {
 	if(adaptor_type == EGA_ADAPTOR)
 	{
 		int start, end;
 		if(onoff)
 		{
 			start = vsp->cursor_start;
 			end = vsp->cursor_end;
 		}
 		else
 		{
 			int cs = vs[current_video_screen].vga_charset;
 
 			cs = (cs < 0) ? 0 : ((cs < totalfonts) ?
 					     cs : totalfonts-1);
 
 			start = (vgacs[cs].char_scanlines & 0x1F) + 1;
 			end = 0;
 		}
 		outb(addr_6845,CRTC_CURSTART);	/* cursor start reg */
 		outb(addr_6845+1, start);
 		outb(addr_6845,CRTC_CUREND);	/* cursor end reg */
 		outb(addr_6845+1, end);
 	}
 	else	/* mda, cga, vga */
 	{
 		outb(addr_6845,CRTC_CURSTART);	/* cursor start reg */
 		if(onoff)
 			outb(addr_6845+1, vsp->cursor_start);
 		else
 			outb(addr_6845+1, CURSOR_ON_BIT);
 	}
 }
 
 /*---------------------------------------------------------------------------*
  *	cold init support, if a mono monitor is attached to a
  *	vga or ega, it comes up with a mda emulation. switch
  *	board to generic ega/vga mode in this case.
  *---------------------------------------------------------------------------*/
 void
 mda2egaorvga(void)
 {
 	/*
 	 * program sequencer to access
 	 * video ram
 	 */
 
 	/* synchronous reset */
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x01);
 
 	/* write to map 0 & 1 */
 	outb(TS_INDEX, TS_WRPLMASK);
 	outb(TS_DATA, 0x03);
 
 	/* odd-even addressing */
 	outb(TS_INDEX, TS_MEMMODE);
 	outb(TS_DATA, 0x03);
 
 	/* clear synchronous reset */
 	outb(TS_INDEX, TS_SYNCRESET);
 	outb(TS_DATA, 0x03);
 
 	/*
 	 * program graphics controller
 	 * to access character
 	 * generator
 	 */
 
 	/* select map 0 for cpu reads */
 	outb(GDC_INDEX, GDC_RDPLANESEL);
 	outb(GDC_DATA, 0x00);
 
 	/* enable odd-even addressing */
 	outb(GDC_INDEX, GDC_MODE);
 	outb(GDC_DATA, 0x10);
 
 	/* map starts at 0xb000 */
 	outb(GDC_INDEX, GDC_MISC);
 	outb(GDC_DATA, 0x0a);
 }
 
 #endif	/* NVT > 0 */
 
 /* ------------------------- E O F ------------------------------------------*/
diff --git a/sys/i386/isa/psm.c b/sys/i386/isa/psm.c
index c7c00faa96d8..cc7b94deb495 100644
--- a/sys/i386/isa/psm.c
+++ b/sys/i386/isa/psm.c
@@ -1,2212 +1,2214 @@
 /*-
  * Copyright (c) 1992, 1993 Erik Forsberg.
  * Copyright (c) 1996, 1997 Kazutaka YOKOTA.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  *
  * THIS SOFTWARE IS PROVIDED BY ``AS IS'' AND ANY EXPRESS OR IMPLIED
  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN
  * NO EVENT SHALL I BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- * $Id: psm.c,v 1.49 1998/01/24 02:54:23 eivind Exp $
+ * $Id: psm.c,v 1.50 1998/01/24 12:12:32 yokota Exp $
  */
 
 /*
  *  Ported to 386bsd Oct 17, 1992
  *  Sandi Donno, Computer Science, University of Cape Town, South Africa
  *  Please send bug reports to sandi@cs.uct.ac.za
  *
  *  Thanks are also due to Rick Macklem, rick@snowhite.cis.uoguelph.ca -
  *  although I was only partially successful in getting the alpha release
  *  of his "driver for the Logitech and ATI Inport Bus mice for use with
  *  386bsd and the X386 port" to work with my Microsoft mouse, I nevertheless
  *  found his code to be an invaluable reference when porting this driver
  *  to 386bsd.
  *
  *  Further modifications for latest 386BSD+patchkit and port to NetBSD,
  *  Andrew Herbert <andrew@werple.apana.org.au> - 8 June 1993
  *
  *  Cloned from the Microsoft Bus Mouse driver, also by Erik Forsberg, by
  *  Andrew Herbert - 12 June 1993
  *
  *  Modified for PS/2 mouse by Charles Hannum <mycroft@ai.mit.edu>
  *  - 13 June 1993
  *
  *  Modified for PS/2 AUX mouse by Shoji Yuen <yuen@nuie.nagoya-u.ac.jp>
  *  - 24 October 1993
  *
  *  Hardware access routines and probe logic rewritten by
  *  Kazutaka Yokota <yokota@zodiac.mech.utsunomiya-u.ac.jp>
  *  - 3, 14, 22 October 1996.
  *  - 12 November 1996. IOCTLs and rearranging `psmread', `psmioctl'...
  *  - 14, 30 November 1996. Uses `kbdio.c'.
  *  - 13 December 1996. Uses queuing version of `kbdio.c'.
  *  - January/February 1997. Tweaked probe logic for 
  *    HiNote UltraII/Latitude/Armada laptops.
  *  - 30 July 1997. Added APM support.
  *  - 5 March 1997. Defined driver configuration flags (PSM_CONFIG_XXX). 
  *    Improved sync check logic.
  *    Vender specific support routines.
  */
 
 #include "psm.h"
 #include "apm.h"
 #include "opt_devfs.h"
 #include "opt_psm.h"
 
 #if NPSM > 0
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/conf.h>
 #include <sys/poll.h>
 #include <sys/syslog.h>
 #include <sys/malloc.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif
 #include <sys/select.h>
+#include <sys/uio.h>
 
 #include <machine/apm_bios.h>
 #include <machine/clock.h>
+#include <machine/limits.h>
 #include <machine/mouse.h>
 
 #include <i386/isa/isa_device.h>
 #include <i386/isa/kbdio.h>
 
 /*
  * Driver specific options: the following options may be set by
  * `options' statements in the kernel configuration file.
  */
 
 /* debugging */
 #ifndef PSM_DEBUG
 #define PSM_DEBUG	0	/* logging: 0: none, 1: brief, 2: verbose */
 #endif
 
 /* features */
 
 /* #define PSM_HOOKAPM	   	   hook the APM resume event */
 /* #define PSM_RESETAFTERSUSPEND   reset the device at the resume event */
 
 #if NAPM <= 0
 #undef PSM_HOOKAPM
 #endif /* NAPM */
 
 #ifndef PSM_HOOKAPM
 #undef PSM_RESETAFTERSUSPEND
 #endif /* PSM_HOOKAPM */
 
 /* end of driver specific options */
 
 /* input queue */
 #define PSM_BUFSIZE		960
 #define PSM_SMALLBUFSIZE	240
 
 /* operation levels */
 #define PSM_LEVEL_BASE		0
 #define PSM_LEVEL_STANDARD	1
 #define PSM_LEVEL_NATIVE	2
 #define PSM_LEVEL_MIN		PSM_LEVEL_BASE
 #define PSM_LEVEL_MAX		PSM_LEVEL_NATIVE
 
 /* some macros */
 #define PSM_UNIT(dev)		(minor(dev) >> 1)
 #define PSM_NBLOCKIO(dev)	(minor(dev) & 1)
 #define PSM_MKMINOR(unit,block)	(((unit) << 1) | ((block) ? 0:1))
 
 #ifndef max
 #define max(x,y)		((x) > (y) ? (x) : (y))
 #endif
 #ifndef min
 #define min(x,y)		((x) < (y) ? (x) : (y))
 #endif
 
 /* ring buffer */
 typedef struct ringbuf {
     int           count;	/* # of valid elements in the buffer */
     int           head;		/* head pointer */
     int           tail;		/* tail poiner */
     unsigned char buf[PSM_BUFSIZE];
 } ringbuf_t;
 
 /* driver control block */
 static struct psm_softc {    /* Driver status information */
     struct selinfo rsel;	/* Process selecting for Input */
     unsigned char state;	/* Mouse driver state */
     int           config;	/* driver configuration flags */
     int           flags;	/* other flags */
     KBDC          kbdc;		/* handle to access the keyboard controller */
     int           addr;		/* I/O port address */
     mousehw_t     hw;		/* hardware information */
     mousemode_t   mode;		/* operation mode */
     mousemode_t   dflt_mode;	/* default operation mode */
     mousestatus_t status;	/* accumulated mouse movement */
     ringbuf_t     queue;	/* mouse status queue */
     unsigned char ipacket[16];	/* interim input buffer */
     int           inputbytes;	/* # of bytes in the input buffer */
     int           button;	/* the latest button state */
 #ifdef DEVFS
     void          *devfs_token;
     void          *n_devfs_token;
 #endif
 #ifdef PSM_HOOKAPM
     struct apmhook resumehook;
 #endif
 } *psm_softc[NPSM];
 
 /* driver state flags (state) */
 #define PSM_VALID		0x80
 #define PSM_OPEN		1	/* Device is open */
 #define PSM_ASLP		2	/* Waiting for mouse data */
 
 /* driver configuration flags (config) */
 #define PSM_CONFIG_RESOLUTION	0x000f	/* resolution */
 #define PSM_CONFIG_ACCEL	0x00f0  /* acceleration factor */
 #define PSM_CONFIG_NOCHECKSYNC	0x0100  /* disable sync. test */
 
 #define PSM_CONFIG_FLAGS	(PSM_CONFIG_RESOLUTION 		\
 				    | PSM_CONFIG_ACCEL		\
 				    | PSM_CONFIG_NOCHECKSYNC)
 
 /* other flags (flags) */
 /*
  * Pass mouse data packet to the user land program `as is', even if 
  * the mouse has vender-specific enhanced features and uses non-standard 
  * packet format.  Otherwise manipulate the mouse data packet so that 
  * it can be recognized by the programs which can only understand 
  * the standard packet format.
 */
 #define PSM_FLAGS_NATIVEMODE 	0x0200
 
 /* for backward compatibility */
 #define OLD_MOUSE_GETHWINFO	_IOR('M', 1, old_mousehw_t)
 #define OLD_MOUSE_GETMODE	_IOR('M', 2, old_mousemode_t)
 #define OLD_MOUSE_SETMODE	_IOW('M', 3, old_mousemode_t)
 
 typedef struct old_mousehw {
     int buttons;
     int iftype;
     int type;
     int hwid;
 } old_mousehw_t;
 
 typedef struct old_mousemode {
     int protocol;
     int rate;
     int resolution;
     int accelfactor;
 } old_mousemode_t;
 
 /* packet formatting function */
 typedef int packetfunc_t __P((struct psm_softc *, unsigned char *,
 			      int *, int, mousestatus_t *));
 
 /* function prototypes */
 static int psmprobe __P((struct isa_device *));
 static int psmattach __P((struct isa_device *));
 static void psm_drvinit __P((void *));
 #ifdef PSM_HOOKAPM
 static int psmresume __P((void *));
 #endif
 
 static d_open_t psmopen;
 static d_close_t psmclose;
 static d_read_t psmread;
 static d_ioctl_t psmioctl;
 static d_poll_t psmpoll;
 
 static int enable_aux_dev __P((KBDC));
 static int disable_aux_dev __P((KBDC));
 static int get_mouse_status __P((KBDC, int *, int, int));
 static int get_aux_id __P((KBDC));
 static int set_mouse_sampling_rate __P((KBDC, int));
 static int set_mouse_scaling __P((KBDC, int));
 static int set_mouse_resolution __P((KBDC, int));
 static int set_mouse_mode __P((KBDC));
 static int get_mouse_buttons __P((KBDC));
 static int is_a_mouse __P((int));
 static void recover_from_error __P((KBDC));
 static int restore_controller __P((KBDC, int));
 static int reinitialize __P((int, mousemode_t *));
 static int doopen __P((int, int));
 static char *model_name(int);
 
 /* vender specific features */
 typedef int probefunc_t __P((struct psm_softc *));
 
 static int mouse_id_proc1 __P((KBDC, int, int, int *));
 static probefunc_t enable_groller;
 static probefunc_t enable_gmouse;
 static probefunc_t enable_aglide; 
 static probefunc_t enable_kmouse;
 static probefunc_t enable_msintelli;
 static probefunc_t enable_mmanplus;
 static int tame_mouse __P((struct psm_softc *, mousestatus_t *, unsigned char *));
 
 static struct {
     int                 model;
     unsigned char	syncmask;
     int 		packetsize;
     probefunc_t 	*probefunc;
 } vendertype[] = {
     { MOUSE_MODEL_NET,			/* Genius NetMouse */
       0xc8, MOUSE_INTELLI_PACKETSIZE, enable_gmouse, },
     { MOUSE_MODEL_NETSCROLL,		/* Genius NetScroll */
       0xc8, 6, enable_groller, },
     { MOUSE_MODEL_GLIDEPOINT,		/* ALPS GlidePoint */
       0xc0, MOUSE_PS2_PACKETSIZE, enable_aglide, },
     { MOUSE_MODEL_MOUSEMANPLUS,		/* Logitech MouseMan+ */
       0x08, MOUSE_PS2_PACKETSIZE, enable_mmanplus, },
     { MOUSE_MODEL_THINK,		/* Kensignton ThinkingMouse */
       0x80, MOUSE_PS2_PACKETSIZE, enable_kmouse, },
     { MOUSE_MODEL_INTELLI,		/* Microsoft IntelliMouse */
       0xc8, MOUSE_INTELLI_PACKETSIZE, enable_msintelli, },
     { MOUSE_MODEL_GENERIC,
       0xc0, MOUSE_PS2_PACKETSIZE, NULL, },
 };
 
 /* device driver declarateion */
 struct isa_driver psmdriver = { psmprobe, psmattach, "psm", FALSE };
 #define CDEV_MAJOR        21
 
 static struct  cdevsw psm_cdevsw = {
 	psmopen,	psmclose,	psmread,	nowrite,	/* 21 */
 	psmioctl,	nostop,		nullreset,	nodevtotty,
 	psmpoll,	nommap,		NULL,		"psm",	NULL,	-1
 };
 
 /* debug message level */
 static int verbose = PSM_DEBUG;
 
 /* device I/O routines */
 static int
 enable_aux_dev(KBDC kbdc)
 {
     int res;
 
     res = send_aux_command(kbdc, PSMC_ENABLE_DEV);
     if (verbose >= 2)
         log(LOG_DEBUG, "psm: ENABLE_DEV return code:%04x\n", res);
 
     return (res == PSM_ACK);
 }
 
 static int
 disable_aux_dev(KBDC kbdc)
 {
     int res;
 
     res = send_aux_command(kbdc, PSMC_DISABLE_DEV);
     if (verbose >= 2)
         log(LOG_DEBUG, "psm: DISABLE_DEV return code:%04x\n", res);
 
     return (res == PSM_ACK);
 }
 
 static int
 get_mouse_status(KBDC kbdc, int *status, int flag, int len)
 {
     int cmd;
     int res;
     int i;
 
     switch (flag) {
     case 0:
     default:
 	cmd = PSMC_SEND_DEV_STATUS;
 	break;
     case 1:
 	cmd = PSMC_SEND_DEV_DATA;
 	break;
     }
     empty_aux_buffer(kbdc, 5);
     res = send_aux_command(kbdc, cmd);
     if (verbose >= 2)
         log(LOG_DEBUG, "psm: SEND_AUX_DEV_%s return code:%04x\n", 
 	    (flag == 1) ? "DATA" : "STATUS", res);
     if (res != PSM_ACK)
         return 0;
 
     for (i = 0; i < len; ++i) {
         status[i] = read_aux_data(kbdc);
 	if (status[i] < 0)
 	    break;
     }
 
     if (verbose) {
         log(LOG_DEBUG, "psm: %s %02x %02x %02x\n",
             (flag == 1) ? "data" : "status", status[0], status[1], status[2]);
     }
 
     return i;
 }
 
 static int
 get_aux_id(KBDC kbdc)
 {
     int res;
     int id;
 
     empty_aux_buffer(kbdc, 5);
     res = send_aux_command(kbdc, PSMC_SEND_DEV_ID);
     if (verbose >= 2)
         log(LOG_DEBUG, "psm: SEND_DEV_ID return code:%04x\n", res);
     if (res != PSM_ACK)
 	return (-1);
 
     /* 10ms delay */
     DELAY(10000);
 
     id = read_aux_data(kbdc);
     if (verbose >= 2)
         log(LOG_DEBUG, "psm: device ID: %04x\n", id);
 
     return id;
 }
 
 static int
 set_mouse_sampling_rate(KBDC kbdc, int rate)
 {
     int res;
 
     res = send_aux_command_and_data(kbdc, PSMC_SET_SAMPLING_RATE, rate);
     if (verbose >= 2)
         log(LOG_DEBUG, "psm: SET_SAMPLING_RATE (%d) %04x\n", rate, res);
 
     return ((res == PSM_ACK) ? rate : -1);
 }
 
 static int
 set_mouse_scaling(KBDC kbdc, int scale)
 {
     int res;
 
     switch (scale) {
     case 1:
     default:
 	scale = PSMC_SET_SCALING11;
 	break;
     case 2:
 	scale = PSMC_SET_SCALING21;
 	break;
     }
     res = send_aux_command(kbdc, scale);
     if (verbose >= 2)
         log(LOG_DEBUG, "psm: SET_SCALING%s return code:%04x\n", 
 	    (scale == PSMC_SET_SCALING21) ? "21" : "11", res);
 
     return (res == PSM_ACK);
 }
 
 /* `val' must be 0 through PSMD_MAX_RESOLUTION */
 static int
 set_mouse_resolution(KBDC kbdc, int val)
 {
     int res;
 
     res = send_aux_command_and_data(kbdc, PSMC_SET_RESOLUTION, val);
     if (verbose >= 2)
         log(LOG_DEBUG, "psm: SET_RESOLUTION (%d) %04x\n", val, res);
 
     return ((res == PSM_ACK) ? val : -1);
 }
 
 /*
  * NOTE: once `set_mouse_mode()' is called, the mouse device must be
  * re-enabled by calling `enable_aux_dev()'
  */
 static int
 set_mouse_mode(KBDC kbdc)
 {
     int res;
 
     res = send_aux_command(kbdc, PSMC_SET_STREAM_MODE);
     if (verbose >= 2)
         log(LOG_DEBUG, "psm: SET_STREAM_MODE return code:%04x\n", res);
 
     return (res == PSM_ACK);
 }
 
 static int
 get_mouse_buttons(KBDC kbdc)
 {
     int c = 2;		/* assume two buttons by default */
     int status[3];
 
     /*
      * NOTE: a special sequence to obtain Logitech Mouse specific
      * information: set resolution to 25 ppi, set scaling to 1:1, set
      * scaling to 1:1, set scaling to 1:1. Then the second byte of the
      * mouse status bytes is the number of available buttons.
      * Some manufactures also support this sequence.
      */
     if (set_mouse_resolution(kbdc, PSMD_RES_LOW) != PSMD_RES_LOW)
         return c;
     if (set_mouse_scaling(kbdc, 1) && set_mouse_scaling(kbdc, 1)
         && set_mouse_scaling(kbdc, 1) 
 	&& (get_mouse_status(kbdc, status, 0, 3) >= 3)) {
         if (status[1] != 0)
             return status[1];
     }
     return c;
 }
 
 /* misc subroutines */
 /*
  * Someday, I will get the complete list of valid pointing devices and
  * their IDs... XXX
  */
 static int
 is_a_mouse(int id)
 {
 #if 0
     static int valid_ids[] = {
         PSM_MOUSE_ID,		/* mouse */
         PSM_BALLPOINT_ID,	/* ballpoint device */
         PSM_INTELLI_ID,		/* Intellimouse */
         -1			/* end of table */
     };
     int i;
 
     for (i = 0; valid_ids[i] >= 0; ++i)
         if (valid_ids[i] == id)
             return TRUE;
     return FALSE;
 #else
     return TRUE;
 #endif
 }
 
 static char *
 model_name(int model)
 {
     static struct {
 	int model_code;
 	char *model_name;
     } models[] = {
         { MOUSE_MODEL_NETSCROLL,	"NetScroll Mouse" },
         { MOUSE_MODEL_NET,		"NetMouse" },
         { MOUSE_MODEL_GLIDEPOINT,	"GlidePoint" },
         { MOUSE_MODEL_THINK,		"ThinkingMouse" },
         { MOUSE_MODEL_INTELLI,		"IntelliMouse" },
         { MOUSE_MODEL_MOUSEMANPLUS,	"MouseMan+" },
         { MOUSE_MODEL_GENERIC,		"Generic PS/2 mouse" },
         { MOUSE_MODEL_UNKNOWN,		NULL },
     };
     int i;
 
     for (i = 0; models[i].model_code != MOUSE_MODEL_UNKNOWN; ++i) {
 	if (models[i].model_code == model)
 	    return models[i].model_name;
     }
     return "Unknown";
 }
 
 static void
 recover_from_error(KBDC kbdc)
 {
     /* discard anything left in the output buffer */
     empty_both_buffers(kbdc, 10);
 
 #if 0
     /*
      * NOTE: KBDC_RESET_KBD may not restore the communication between the
      * keyboard and the controller.
      */
     reset_kbd(kbdc);
 #else
     /*
      * NOTE: somehow diagnostic and keyboard port test commands bring the
      * keyboard back.
      */
     if (!test_controller(kbdc)) 
         log(LOG_ERR, "psm: keyboard controller failed.\n");
     /* if there isn't a keyboard in the system, the following error is OK */
     if (test_kbd_port(kbdc) != 0) {
 	if (verbose)
 	    log(LOG_ERR, "psm: keyboard port failed.\n");
     }
 #endif
 }
 
 static int
 restore_controller(KBDC kbdc, int command_byte)
 {
     empty_both_buffers(kbdc, 10);
 
     if (!set_controller_command_byte(kbdc, 0xff, command_byte)) {
 	log(LOG_ERR, "psm: failed to restore the keyboard controller "
 		     "command byte.\n");
 	return FALSE;
     } else {
 	return TRUE;
     }
 }
 
 /* 
  * Re-initialize the aux port and device. The aux port must be enabled
  * and its interrupt must be disabled before calling this routine. 
  * The aux device will be disabled before returning.
  * The keyboard controller must be locked via `kbdc_lock()' before
  * calling this routine.
  */
 static int
 reinitialize(int unit, mousemode_t *mode)
 {
     struct psm_softc *sc = psm_softc[unit];
     KBDC kbdc = psm_softc[unit]->kbdc;
     int stat[3];
     int i;
 
     switch((i = test_aux_port(kbdc))) {
     case 1:	/* ignore this error */
 	if (verbose)
 	    log(LOG_DEBUG, "psm%d: strange result for test aux port (%d).\n",
 	        unit, i);
 	/* fall though */
     case 0:	/* no error */
     	break;
     case -1: 	/* time out */
     default: 	/* error */
     	recover_from_error(kbdc);
     	log(LOG_ERR, "psm%d: the aux port is not functioning (%d).\n",
     	    unit, i);
     	return FALSE;
     }
 
     /* 
      * NOTE: some controllers appears to hang the `keyboard' when
      * the aux port doesn't exist and `PSMC_RESET_DEV' is issued. 
      */
     if (!reset_aux_dev(kbdc)) {
         recover_from_error(kbdc);
         log(LOG_ERR, "psm%d: failed to reset the aux device.\n", unit);
         return FALSE;
     }
 
     /* 
      * both the aux port and the aux device is functioning, see
      * if the device can be enabled. 
      */
     if (!enable_aux_dev(kbdc) || !disable_aux_dev(kbdc)) {
         log(LOG_ERR, "psm%d: failed to enable the aux device.\n", unit);
         return FALSE;
     }
     empty_both_buffers(kbdc, 10);	/* remove stray data if any */
 
     /* FIXME: hardware ID, mouse buttons? */
 
     /* other parameters */
     for (i = 0; vendertype[i].probefunc != NULL; ++i) {
 	if ((*vendertype[i].probefunc)(sc)) {
 	    if (verbose >= 2)
 		log(LOG_ERR, "psm%d: found %s\n", 
 		    unit, model_name(vendertype[i].model));
 	    break;
 	}
     }
 
     sc->hw.model = vendertype[i].model;
     sc->mode.packetsize = vendertype[i].packetsize;
 
     /* set mouse parameters */
     if (mode != (mousemode_t *)NULL) {
 	if (mode->rate > 0)
             mode->rate = set_mouse_sampling_rate(kbdc, mode->rate);
 	if (mode->resolution >= 0)
             mode->resolution = set_mouse_resolution(kbdc, mode->resolution);
         set_mouse_scaling(kbdc, 1);
         set_mouse_mode(kbdc);	
     }
 
     /* request a data packet and extract sync. bits */
     if (get_mouse_status(kbdc, stat, 1, 3) < 3) {
         log(LOG_DEBUG, "psm%d: failed to get data (reinitialize).\n", unit);
         sc->mode.syncmask[0] = 0;
     } else {
         sc->mode.syncmask[1] = stat[0] & sc->mode.syncmask[0];	/* syncbits */
 	/* the NetScroll Mouse will send three more bytes... Ignore them */
 	empty_aux_buffer(kbdc, 5);
     }
 
     /* just check the status of the mouse */
     if (get_mouse_status(kbdc, stat, 0, 3) < 3)
         log(LOG_DEBUG, "psm%d: failed to get status (reinitialize).\n", unit);
 
     return TRUE;
 }
 
 static int
 doopen(int unit, int command_byte)
 {
     struct psm_softc *sc = psm_softc[unit];
     int stat[3];
 
     /* enable the mouse device */
     if (!enable_aux_dev(sc->kbdc)) {
 	/* MOUSE ERROR: failed to enable the mouse because:
 	 * 1) the mouse is faulty,
 	 * 2) the mouse has been removed(!?)
 	 * In the latter case, the keyboard may have hung, and need 
 	 * recovery procedure...
 	 */
 	recover_from_error(sc->kbdc);
 #if 0
 	/* FIXME: we could reset the mouse here and try to enable
 	 * it again. But it will take long time and it's not a good
 	 * idea to disable the keyboard that long...
 	 */
 	if (!reinitialize(unit, &sc->mode) || !enable_aux_dev(sc->kbdc)) {
 	    recover_from_error(sc->kbdc);
 #else
         {
 #endif
             restore_controller(sc->kbdc, command_byte);
 	    /* mark this device is no longer available */
 	    sc->state &= ~PSM_VALID;	
 	    log(LOG_ERR, "psm%d: failed to enable the device (doopen).\n",
 		unit);
 	    return (EIO);
 	}
     }
 
     if (get_mouse_status(sc->kbdc, stat, 0, 3) < 3) 
         log(LOG_DEBUG, "psm%d: failed to get status (doopen).\n", unit);
 
     /* enable the aux port and interrupt */
     if (!set_controller_command_byte(sc->kbdc, 
 	    kbdc_get_device_mask(sc->kbdc),
 	    (command_byte & KBD_KBD_CONTROL_BITS)
 		| KBD_ENABLE_AUX_PORT | KBD_ENABLE_AUX_INT)) {
 	/* CONTROLLER ERROR */
 	disable_aux_dev(sc->kbdc);
         restore_controller(sc->kbdc, command_byte);
 	log(LOG_ERR, "psm%d: failed to enable the aux interrupt (doopen).\n",
 	    unit);
 	return (EIO);
     }
 
     return (0);
 }
 
 /* psm driver entry points */
 
 #define endprobe(v)	{   if (bootverbose) 				\
 				--verbose;   				\
                             kbdc_set_device_mask(sc->kbdc, mask);	\
 			    kbdc_lock(sc->kbdc, FALSE);			\
  	                    free(sc, M_DEVBUF);                         \
 			    return (v);	     				\
 			}
 
 static int
 psmprobe(struct isa_device *dvp)
 {
     int unit = dvp->id_unit;
     struct psm_softc *sc;
     int stat[3];
     int command_byte;
     int mask;
     int i;
 
     /* validate unit number */
     if (unit >= NPSM)
         return (0);
 
     psm_softc[unit] = NULL;
 
     sc =  malloc(sizeof *sc, M_DEVBUF, M_NOWAIT);
     if (sc == NULL)
         return (0);
     bzero(sc, sizeof *sc);
 
 #if 0
     kbdc_debug(TRUE);
 #endif
     sc->addr = dvp->id_iobase;
     sc->kbdc = kbdc_open(sc->addr);
     sc->config = dvp->id_flags & PSM_CONFIG_FLAGS;
     sc->flags = 0;
     if (bootverbose)
         ++verbose;
 
     if (!kbdc_lock(sc->kbdc, TRUE)) {
         printf("psm%d: unable to lock the controller.\n", unit);
         if (bootverbose)
             --verbose;
         free(sc, M_DEVBUF);
 	return (0);
     }
 
     /*
      * NOTE: two bits in the command byte controls the operation of the
      * aux port (mouse port): the aux port disable bit (bit 5) and the aux
      * port interrupt (IRQ 12) enable bit (bit 2).
      */
 
     /* discard anything left after the keyboard initialization */
     empty_both_buffers(sc->kbdc, 10);
 
     /* save the current command byte; it will be used later */
     mask = kbdc_get_device_mask(sc->kbdc) & ~KBD_AUX_CONTROL_BITS;
     command_byte = get_controller_command_byte(sc->kbdc);
     if (verbose) 
         printf("psm%d: current command byte:%04x\n", unit, command_byte);
     if (command_byte == -1) {
         /* CONTROLLER ERROR */
         printf("psm%d: unable to get the current command byte value.\n",
             unit);
         endprobe(0);
     }
 
     /*
      * disable the keyboard port while probing the aux port, which must be
      * enabled during this routine
      */
     if (!set_controller_command_byte(sc->kbdc,
 	    KBD_KBD_CONTROL_BITS | KBD_AUX_CONTROL_BITS,
   	    KBD_DISABLE_KBD_PORT | KBD_DISABLE_KBD_INT
                 | KBD_ENABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) {
         /* 
 	 * this is CONTROLLER ERROR; I don't know how to recover 
          * from this error... 
 	 */
         restore_controller(sc->kbdc, command_byte);
         printf("psm%d: unable to set the command byte.\n", unit);
         endprobe(0);
     }
 
     /*
      * NOTE: `test_aux_port()' is designed to return with zero if the aux
      * port exists and is functioning. However, some controllers appears
      * to respond with zero even when the aux port doesn't exist. (It may
      * be that this is only the case when the controller DOES have the aux
      * port but the port is not wired on the motherboard.) The keyboard
      * controllers without the port, such as the original AT, are
      * supporsed to return with an error code or simply time out. In any
      * case, we have to continue probing the port even when the controller
      * passes this test.
      *
      * XXX: some controllers erroneously return the error code 1 when
      * it has the perfectly functional aux port. We have to ignore this
      * error code. Even if the controller HAS error with the aux port,
      * it will be detected later...
      */
     switch ((i = test_aux_port(sc->kbdc))) {
     case 1:	   /* ignore this error */
         if (verbose)
 	    printf("psm%d: strange result for test aux port (%d).\n",
 	        unit, i);
 	/* fall though */
     case 0:        /* no error */
         break;
     case -1:        /* time out */
     default:        /* error */
         recover_from_error(sc->kbdc);
         restore_controller(sc->kbdc, command_byte);
         if (verbose)
             printf("psm%d: the aux port is not functioning (%d).\n",
                 unit, i);
         endprobe(0);
     }
 
     /*
      * NOTE: some controllers appears to hang the `keyboard' when the aux
      * port doesn't exist and `PSMC_RESET_DEV' is issued.
      */
     if (!reset_aux_dev(sc->kbdc)) {
         recover_from_error(sc->kbdc);
         restore_controller(sc->kbdc, command_byte);
         if (verbose)
             printf("psm%d: failed to reset the aux device.\n", unit);
         endprobe(0);
     }
     /*
      * both the aux port and the aux device is functioning, see if the
      * device can be enabled. NOTE: when enabled, the device will start
      * sending data; we shall immediately disable the device once we know
      * the device can be enabled.
      */
     if (!enable_aux_dev(sc->kbdc) || !disable_aux_dev(sc->kbdc)) {
 	/* MOUSE ERROR */
         restore_controller(sc->kbdc, command_byte);
         if (verbose)
             printf("psm%d: failed to enable the aux device.\n", unit);
         endprobe(0);
     }
 
     /* save the default values after reset */
     if (get_mouse_status(sc->kbdc, stat, 0, 3) >= 3) {
 	sc->dflt_mode.rate = sc->mode.rate = stat[2];
 	sc->dflt_mode.resolution = sc->mode.resolution = stat[1];
     } else {
 	sc->dflt_mode.rate = sc->mode.rate = -1;
 	sc->dflt_mode.resolution = sc->mode.resolution = -1;
     }
 
     /* hardware information */
     sc->hw.iftype = MOUSE_IF_PS2;
 
     /* verify the device is a mouse */
     sc->hw.hwid = get_aux_id(sc->kbdc);
     if (!is_a_mouse(sc->hw.hwid)) {
         restore_controller(sc->kbdc, command_byte);
         if (verbose)
             printf("psm%d: unknown device type (%d).\n", unit, sc->hw.hwid);
         endprobe(0);
     }
     switch (sc->hw.hwid) {
     case PSM_BALLPOINT_ID:
         sc->hw.type = MOUSE_TRACKBALL;
         break;
     case PSM_MOUSE_ID:
     case PSM_INTELLI_ID:
         sc->hw.type = MOUSE_MOUSE;
         break;
     default:
         sc->hw.type = MOUSE_UNKNOWN;
         break;
     }
 
     /* # of buttons */
     sc->hw.buttons = get_mouse_buttons(sc->kbdc);
 
     /* other parameters */
     for (i = 0; vendertype[i].probefunc != NULL; ++i) {
 	if ((*vendertype[i].probefunc)(sc)) {
 	    if (verbose >= 2)
 		printf("psm%d: found %s\n",
 		    unit, model_name(vendertype[i].model));
 	    break;
 	}
     }
 
     sc->hw.model = vendertype[i].model;
 
     sc->dflt_mode.level = PSM_LEVEL_BASE;
     sc->dflt_mode.packetsize = MOUSE_PS2_PACKETSIZE;
     sc->dflt_mode.accelfactor = (sc->config & PSM_CONFIG_ACCEL) >> 4;
     if (sc->config & PSM_CONFIG_NOCHECKSYNC)
         sc->dflt_mode.syncmask[0] = 0;
     else
         sc->dflt_mode.syncmask[0] = vendertype[i].syncmask;
     sc->dflt_mode.syncmask[1] = 0;	/* syncbits */
     sc->mode = sc->dflt_mode;
     sc->mode.packetsize = vendertype[i].packetsize;
 
     /* set mouse parameters */
     i = send_aux_command(sc->kbdc, PSMC_SET_DEFAULTS);
     if (verbose >= 2)
 	printf("psm%d: SET_DEFAULTS return code:%04x\n", unit, i);
     if (sc->config & PSM_CONFIG_RESOLUTION) {
         sc->mode.resolution
 	    = set_mouse_resolution(sc->kbdc, 
 	        (sc->config & PSM_CONFIG_RESOLUTION) - 1);
     }
 
     /* request a data packet and extract sync. bits */
     if (get_mouse_status(sc->kbdc, stat, 1, 3) < 3) {
         printf("psm%d: failed to get data.\n", unit);
         sc->mode.syncmask[0] = 0;
     } else {
         sc->mode.syncmask[1] = stat[0] & sc->mode.syncmask[0];	/* syncbits */
 	/* the NetScroll Mouse will send three more bytes... Ignore them */
 	empty_aux_buffer(sc->kbdc, 5);
     }
 
     /* just check the status of the mouse */
     /* 
      * NOTE: XXX there are some arcane controller/mouse combinations out 
      * there, which hung the controller unless there is data transmission 
      * after ACK from the mouse.
      */
     if (get_mouse_status(sc->kbdc, stat, 0, 3) < 3) {
         printf("psm%d: failed to get status.\n", unit);
     } else {
 	/* 
 	 * When in its native mode, some mice operate with different 
 	 * default parameters than in the PS/2 compatible mode.
 	 */
         sc->dflt_mode.rate = sc->mode.rate = stat[2];
         sc->dflt_mode.resolution = sc->mode.resolution = stat[1];
      }
 
     /* disable the aux port for now... */
     if (!set_controller_command_byte(sc->kbdc, 
 	    KBD_KBD_CONTROL_BITS | KBD_AUX_CONTROL_BITS,
             (command_byte & KBD_KBD_CONTROL_BITS)
                 | KBD_DISABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) {
         /* 
 	 * this is CONTROLLER ERROR; I don't know the proper way to 
          * recover from this error... 
 	 */
         restore_controller(sc->kbdc, command_byte);
         printf("psm%d: unable to set the command byte.\n", unit);
         endprobe(0);
     }
 
     /* done */
     psm_softc[unit] = sc;
     kbdc_set_device_mask(sc->kbdc, mask | KBD_AUX_CONTROL_BITS);
     kbdc_lock(sc->kbdc, FALSE);
     return (IO_PSMSIZE);
 }
 
 static int
 psmattach(struct isa_device *dvp)
 {
     int unit = dvp->id_unit;
     struct psm_softc *sc = psm_softc[unit];
 
     if (sc == NULL)    /* shouldn't happen */
 	return (0);
 
     /* Setup initial state */
     sc->state = PSM_VALID;
 
     /* Done */
 #ifdef    DEVFS
     sc->devfs_token =
         devfs_add_devswf(&psm_cdevsw, PSM_MKMINOR(unit, TRUE),
         DV_CHR, 0, 0, 0666, "psm%d", unit);
     sc->n_devfs_token =
         devfs_add_devswf(&psm_cdevsw, PSM_MKMINOR(unit, FALSE),
         DV_CHR, 0, 0, 0666, "npsm%d", unit);
 #endif /* DEVFS */
 
 #ifdef PSM_HOOKAPM
     sc->resumehook.ah_name = "PS/2 mouse";
     sc->resumehook.ah_fun = psmresume;
     sc->resumehook.ah_arg = (void *)unit;
     sc->resumehook.ah_order = APM_MID_ORDER;
     apm_hook_establish(APM_HOOK_RESUME , &sc->resumehook);
     if (verbose)
         printf("psm%d: APM hooks installed.\n", unit);
 #endif /* PSM_HOOKAPM */
 
     if (!verbose) {
         printf("psm%d: model %s, device ID %d\n", 
 	    unit, model_name(sc->hw.model), sc->hw.hwid);
     } else {
         printf("psm%d: model %s, device ID %d, %d buttons\n",
 	    unit, model_name(sc->hw.model), sc->hw.hwid, sc->hw.buttons);
 	printf("psm%d: config:%08x, flags:%08x, packet size:%d\n",
 	    unit, sc->config, sc->flags, sc->mode.packetsize);
 	printf("psm%d: syncmask:%02x, syncbits:%02x\n",
 	    unit, sc->mode.syncmask[0], sc->mode.syncmask[1]);
     }
 
     if (bootverbose)
         --verbose;
 
     return (1);
 }
 
 static int
 psmopen(dev_t dev, int flag, int fmt, struct proc *p)
 {
     int unit = PSM_UNIT(dev);
     struct psm_softc *sc;
     int command_byte;
     int err;
     int s;
 
     /* Validate unit number */
     if (unit >= NPSM)
         return (ENXIO);
 
     /* Get device data */
     sc = psm_softc[unit];
     if ((sc == NULL) || (sc->state & PSM_VALID) == 0)
 	/* the device is no longer valid/functioning */
         return (ENXIO);
 
     /* Disallow multiple opens */
     if (sc->state & PSM_OPEN)
         return (EBUSY);
 
     /* Initialize state */
     sc->rsel.si_flags = 0;
     sc->rsel.si_pid = 0;
     sc->mode.level = sc->dflt_mode.level;
     sc->mode.protocol = sc->dflt_mode.protocol;
 
     /* flush the event queue */
     sc->queue.count = 0;
     sc->queue.head = 0;
     sc->queue.tail = 0;
     sc->status.flags = 0;
     sc->status.button = 0;
     sc->status.obutton = 0;
     sc->status.dx = 0;
     sc->status.dy = 0;
     sc->status.dz = 0;
     sc->button = 0;
 
     /* empty input buffer */
     bzero(sc->ipacket, sizeof(sc->ipacket));
     sc->inputbytes = 0;
 
     /* don't let timeout routines in the keyboard driver to poll the kbdc */
     if (!kbdc_lock(sc->kbdc, TRUE))
 	return (EIO);
 
     /* save the current controller command byte */
     s = spltty();
     command_byte = get_controller_command_byte(sc->kbdc);
 
     /* enable the aux port and temporalily disable the keyboard */
     if ((command_byte == -1) 
         || !set_controller_command_byte(sc->kbdc,
 	    kbdc_get_device_mask(sc->kbdc),
   	    KBD_DISABLE_KBD_PORT | KBD_DISABLE_KBD_INT
 	        | KBD_ENABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) {
         /* CONTROLLER ERROR; do you know how to get out of this? */
         kbdc_lock(sc->kbdc, FALSE);
 	splx(s);
 	log(LOG_ERR, "psm%d: unable to set the command byte (psmopen).\n",
 	    unit);
 	return (EIO);
     }
     /* 
      * Now that the keyboard controller is told not to generate 
      * the keyboard and mouse interrupts, call `splx()' to allow 
      * the other tty interrupts. The clock interrupt may also occur, 
      * but timeout routines will be blocked by the poll flag set 
      * via `kbdc_lock()'
      */
     splx(s);
   
     /* enable the mouse device */
     err = doopen(unit, command_byte);
 
     /* done */
     if (err == 0) 
         sc->state |= PSM_OPEN;
     kbdc_lock(sc->kbdc, FALSE);
     return (err);
 }
 
 static int
 psmclose(dev_t dev, int flag, int fmt, struct proc *p)
 {
     struct psm_softc *sc = psm_softc[PSM_UNIT(dev)];
     int stat[3];
     int command_byte;
     int s;
 
     /* don't let timeout routines in the keyboard driver to poll the kbdc */
     if (!kbdc_lock(sc->kbdc, TRUE))
 	return (EIO);
 
     /* save the current controller command byte */
     s = spltty();
     command_byte = get_controller_command_byte(sc->kbdc);
     if (command_byte == -1) {
         kbdc_lock(sc->kbdc, FALSE);
 	splx(s);
 	return (EIO);
     }
 
     /* disable the aux interrupt and temporalily disable the keyboard */
     if (!set_controller_command_byte(sc->kbdc, 
 	    kbdc_get_device_mask(sc->kbdc),
   	    KBD_DISABLE_KBD_PORT | KBD_DISABLE_KBD_INT
 	        | KBD_ENABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) {
 	log(LOG_ERR, "psm%d: failed to disable the aux int (psmclose).\n",
 	    PSM_UNIT(dev));
 	/* CONTROLLER ERROR;
 	 * NOTE: we shall force our way through. Because the only
 	 * ill effect we shall see is that we may not be able
 	 * to read ACK from the mouse, and it doesn't matter much 
 	 * so long as the mouse will accept the DISABLE command.
 	 */
     }
     splx(s);
 
     /* remove anything left in the output buffer */
     empty_aux_buffer(sc->kbdc, 10);
 
     /* disable the aux device, port and interrupt */
     if (sc->state & PSM_VALID) {
         if (!disable_aux_dev(sc->kbdc)) {
 	    /* MOUSE ERROR; 
 	     * NOTE: we don't return error and continue, pretending 
 	     * we have successfully disabled the device. It's OK because 
 	     * the interrupt routine will discard any data from the mouse
 	     * hereafter. 
 	     */
 	    log(LOG_ERR, "psm%d: failed to disable the device (psmclose).\n",
 	        PSM_UNIT(dev));
         }
 
         if (get_mouse_status(sc->kbdc, stat, 0, 3) < 3)
             log(LOG_DEBUG, "psm%d: failed to get status (psmclose).\n", 
 	        PSM_UNIT(dev));
     }
 
     if (!set_controller_command_byte(sc->kbdc, 
 	    kbdc_get_device_mask(sc->kbdc),
 	    (command_byte & KBD_KBD_CONTROL_BITS)
 	        | KBD_DISABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) {
 	/* CONTROLLER ERROR; 
 	 * we shall ignore this error; see the above comment.
 	 */
 	log(LOG_ERR, "psm%d: failed to disable the aux port (psmclose).\n",
 	    PSM_UNIT(dev));
     }
 
     /* remove anything left in the output buffer */
     empty_aux_buffer(sc->kbdc, 10);
 
     /* close is almost always successful */
     sc->state &= ~PSM_OPEN;
     kbdc_lock(sc->kbdc, FALSE);
     return (0);
 }
 
 static int
 tame_mouse(struct psm_softc *sc, mousestatus_t *status, unsigned char *buf)
 {
     static unsigned char butmapps2[8] = {
         0,
         MOUSE_PS2_BUTTON1DOWN, 
         MOUSE_PS2_BUTTON2DOWN,
         MOUSE_PS2_BUTTON1DOWN | MOUSE_PS2_BUTTON2DOWN,
         MOUSE_PS2_BUTTON3DOWN,
         MOUSE_PS2_BUTTON1DOWN | MOUSE_PS2_BUTTON3DOWN,
         MOUSE_PS2_BUTTON2DOWN | MOUSE_PS2_BUTTON3DOWN,
         MOUSE_PS2_BUTTON1DOWN | MOUSE_PS2_BUTTON2DOWN | MOUSE_PS2_BUTTON3DOWN,
     };
     static unsigned char butmapmsc[8] = {
         MOUSE_MSC_BUTTON1UP | MOUSE_MSC_BUTTON2UP | MOUSE_MSC_BUTTON3UP,
         MOUSE_MSC_BUTTON2UP | MOUSE_MSC_BUTTON3UP,
         MOUSE_MSC_BUTTON1UP | MOUSE_MSC_BUTTON3UP,
         MOUSE_MSC_BUTTON3UP,
         MOUSE_MSC_BUTTON1UP | MOUSE_MSC_BUTTON2UP,
         MOUSE_MSC_BUTTON2UP,
         MOUSE_MSC_BUTTON1UP, 
         0,
     };
     int mapped;
     int i;
 
     if (sc->mode.level == PSM_LEVEL_BASE) {
         mapped = status->button & ~MOUSE_BUTTON4DOWN;
         if (status->button & MOUSE_BUTTON4DOWN) 
 	    mapped |= MOUSE_BUTTON1DOWN;
         status->button = mapped;
         buf[0] = MOUSE_PS2_SYNC | butmapps2[mapped & MOUSE_STDBUTTONS];
         i = max(min(status->dx, 255), -256);
 	if (i < 0)
 	    buf[0] |= MOUSE_PS2_XNEG;
         buf[1] = i;
         i = max(min(status->dy, 255), -256);
 	if (i < 0)
 	    buf[0] |= MOUSE_PS2_YNEG;
         buf[2] = i;
 	return MOUSE_PS2_PACKETSIZE;
     } else if (sc->mode.level == PSM_LEVEL_STANDARD) {
         buf[0] = MOUSE_MSC_SYNC | butmapmsc[status->button & MOUSE_STDBUTTONS];
         i = max(min(status->dx, 255), -256);
         buf[1] = i >> 1;
         buf[3] = i - buf[1];
         i = max(min(status->dy, 255), -256);
         buf[2] = i >> 1;
         buf[4] = i - buf[2];
         i = max(min(status->dz, 127), -128);
         buf[5] = (i >> 1) & 0x7f;
         buf[6] = (i - (i >> 1)) & 0x7f;
         buf[7] = (~status->button >> 3) & 0x7f;
 	return MOUSE_SYS_PACKETSIZE;
     }
     return sc->inputbytes;;
 }
 
 static int
 psmread(dev_t dev, struct uio *uio, int flag)
 {
     register struct psm_softc *sc = psm_softc[PSM_UNIT(dev)];
     unsigned char buf[PSM_SMALLBUFSIZE];
     int error = 0;
     int s;
     int l;
 
     if ((sc->state & PSM_VALID) == 0)
 	return EIO;
 
     /* block until mouse activity occured */
     s = spltty();
     while (sc->queue.count <= 0) {
         if (PSM_NBLOCKIO(dev)) {
             splx(s);
             return EWOULDBLOCK;
         }
         sc->state |= PSM_ASLP;
         error = tsleep((caddr_t) sc, PZERO | PCATCH, "psmrea", 0);
         sc->state &= ~PSM_ASLP;
         if (error) {
             splx(s);
             return error;
         } else if ((sc->state & PSM_VALID) == 0) {
             /* the device disappeared! */
             splx(s);
             return EIO;
 	}
     }
     splx(s);
 
     /* copy data to the user land */
     while ((sc->queue.count > 0) && (uio->uio_resid > 0)) {
         s = spltty();
 	l = min(sc->queue.count, uio->uio_resid);
 	if (l > sizeof(buf))
 	    l = sizeof(buf);
 	if (l > sizeof(sc->queue.buf) - sc->queue.head) {
 	    bcopy(&sc->queue.buf[sc->queue.head], &buf[0], 
 		sizeof(sc->queue.buf) - sc->queue.head);
 	    bcopy(&sc->queue.buf[0], 
 		&buf[sizeof(sc->queue.buf) - sc->queue.head],
 		l - (sizeof(sc->queue.buf) - sc->queue.head));
 	} else {
 	    bcopy(&sc->queue.buf[sc->queue.head], &buf[0], l);
 	}
 	sc->queue.count -= l;
 	sc->queue.head = (sc->queue.head + l) % sizeof(sc->queue.buf);
         splx(s);
         error = uiomove(buf, l, uio);
         if (error)
 	    break;
     }
 
     return error;
 }
 
 static int
 block_mouse_data(struct psm_softc *sc, int *c)
 {
     int s;
 
     if (!kbdc_lock(sc->kbdc, TRUE)) 
 	return EIO;
 
     s = spltty();
     *c = get_controller_command_byte(sc->kbdc);
     if ((*c == -1) 
 	|| !set_controller_command_byte(sc->kbdc, 
 	    kbdc_get_device_mask(sc->kbdc),
             KBD_DISABLE_KBD_PORT | KBD_DISABLE_KBD_INT
                 | KBD_ENABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) {
         /* this is CONTROLLER ERROR */
 	splx(s);
         kbdc_lock(sc->kbdc, FALSE);
 	return EIO;
     }
 
     /* 
      * The device may be in the middle of status data transmission.
      * The transmission will be interrupted, thus, incomplete status 
      * data must be discarded. Although the aux interrupt is disabled 
      * at the keyboard controller level, at most one aux interrupt 
      * may have already been pending and a data byte is in the 
      * output buffer; throw it away. Note that the second argument 
      * to `empty_aux_buffer()' is zero, so that the call will just 
      * flush the internal queue.
      * `psmintr()' will be invoked after `splx()' if an interrupt is
      * pending; it will see no data and returns immediately.
      */
     empty_aux_buffer(sc->kbdc, 0);	/* flush the queue */
     read_aux_data_no_wait(sc->kbdc);	/* throw away data if any */
     sc->inputbytes = 0;
     splx(s);
 
     return 0;
 }
 
 static int
 unblock_mouse_data(struct psm_softc *sc, int c)
 {
     int error = 0;
 
     /* 
      * We may have seen a part of status data during `set_mouse_XXX()'.
      * they have been queued; flush it.
      */
     empty_aux_buffer(sc->kbdc, 0);
 
     /* restore ports and interrupt */
     if (!set_controller_command_byte(sc->kbdc, 
             kbdc_get_device_mask(sc->kbdc),
 	    c & (KBD_KBD_CONTROL_BITS | KBD_AUX_CONTROL_BITS))) {
         /* CONTROLLER ERROR; this is serious, we may have
          * been left with the inaccessible keyboard and
          * the disabled mouse interrupt. 
          */
         error = EIO;
     }
 
     kbdc_lock(sc->kbdc, FALSE);
     return error;
 }
 
 static int
 psmioctl(dev_t dev, int cmd, caddr_t addr, int flag, struct proc *p)
 {
     struct psm_softc *sc = psm_softc[PSM_UNIT(dev)];
     mousemode_t mode;
     mousestatus_t status;
 #if (defined(MOUSE_GETVARS))
     mousevar_t *var;
 #endif
     mousedata_t *data;
     int stat[3];
     int command_byte;
     int error = 0;
     int s;
 
     /* Perform IOCTL command */
     switch (cmd) {
 
     case OLD_MOUSE_GETHWINFO:
 	s = spltty();
         ((old_mousehw_t *)addr)->buttons = sc->hw.buttons;
         ((old_mousehw_t *)addr)->iftype = sc->hw.iftype;
         ((old_mousehw_t *)addr)->type = sc->hw.type;
         ((old_mousehw_t *)addr)->hwid = sc->hw.hwid;
 	splx(s);
         break;
 
     case MOUSE_GETHWINFO:
 	s = spltty();
         *(mousehw_t *)addr = sc->hw;
 	if (sc->mode.level == PSM_LEVEL_BASE)
 	    ((mousehw_t *)addr)->model = MOUSE_MODEL_GENERIC;
 	splx(s);
         break;
 
     case OLD_MOUSE_GETMODE:
 	s = spltty();
 	switch (sc->mode.level) {
 	case PSM_LEVEL_BASE:
 	    ((old_mousemode_t *)addr)->protocol = MOUSE_PROTO_PS2;
 	    break;
 	case PSM_LEVEL_STANDARD:
 	    ((old_mousemode_t *)addr)->protocol = MOUSE_PROTO_SYSMOUSE;
 	    break;
 	case PSM_LEVEL_NATIVE:
 	    ((old_mousemode_t *)addr)->protocol = MOUSE_PROTO_PS2;
 	    break;
 	}
         ((old_mousemode_t *)addr)->rate = sc->mode.rate;
         ((old_mousemode_t *)addr)->resolution = sc->mode.resolution;
         ((old_mousemode_t *)addr)->accelfactor = sc->mode.accelfactor;
 	splx(s);
         break;
 
     case MOUSE_GETMODE:
 	s = spltty();
         *(mousemode_t *)addr = sc->mode;
         ((mousemode_t *)addr)->resolution = 
 	    MOUSE_RES_LOW - sc->mode.resolution;
 	switch (sc->mode.level) {
 	case PSM_LEVEL_BASE:
 	    ((mousemode_t *)addr)->protocol = MOUSE_PROTO_PS2;
 	    ((mousemode_t *)addr)->packetsize = MOUSE_PS2_PACKETSIZE;
 	    break;
 	case PSM_LEVEL_STANDARD:
 	    ((mousemode_t *)addr)->protocol = MOUSE_PROTO_SYSMOUSE;
 	    ((mousemode_t *)addr)->packetsize = MOUSE_SYS_PACKETSIZE;
 	    ((mousemode_t *)addr)->syncmask[0] = MOUSE_SYS_SYNCMASK;
 	    ((mousemode_t *)addr)->syncmask[1] = MOUSE_SYS_SYNC;
 	    break;
 	case PSM_LEVEL_NATIVE:
 	    /* FIXME: this isn't quite correct... XXX */
 	    ((mousemode_t *)addr)->protocol = MOUSE_PROTO_PS2;
 	    break;
 	}
 	splx(s);
         break;
 
     case OLD_MOUSE_SETMODE:
     case MOUSE_SETMODE:
 	if (cmd == OLD_MOUSE_SETMODE) {
 	    mode.rate = ((old_mousemode_t *)addr)->rate;
 	    /*
 	     * resolution  old I/F   new I/F
 	     * default        0         0
 	     * low            1        -2
 	     * medium low     2        -3
 	     * medium high    3        -4
 	     * high           4        -5
 	     */
 	    if (((old_mousemode_t *)addr)->resolution > 0)
 	        mode.resolution = -((old_mousemode_t *)addr)->resolution - 1;
 	    mode.accelfactor = ((old_mousemode_t *)addr)->accelfactor;
 	    mode.level = -1;
 	} else {
 	    mode = *(mousemode_t *)addr;
 	}
 
 	/* adjust and validate parameters. */
 	if (mode.rate > UCHAR_MAX)
 	    return EINVAL;
         if (mode.rate == 0)
             mode.rate = sc->dflt_mode.rate;
 	else if (mode.rate == -1)
 	    /* don't change the current setting */
 	    ;
 	else if (mode.rate < 0)
 	    return EINVAL;
 	if (mode.resolution >= UCHAR_MAX)
 	    return EINVAL;
 	if (mode.resolution >= 200)
 	    mode.resolution = MOUSE_RES_HIGH;
 	else if (mode.resolution >= 100)
 	    mode.resolution = MOUSE_RES_MEDIUMHIGH;
 	else if (mode.resolution >= 50)
 	    mode.resolution = MOUSE_RES_MEDIUMLOW;
 	else if (mode.resolution > 0)
 	    mode.resolution = MOUSE_RES_LOW;
         if (mode.resolution == MOUSE_RES_DEFAULT)
             mode.resolution = sc->dflt_mode.resolution;
         else if (mode.resolution == -1)
 	    /* don't change the current setting */
 	    ;
         else if (mode.resolution < 0) /* MOUSE_RES_LOW/MEDIUM/HIGH */
             mode.resolution = MOUSE_RES_LOW - mode.resolution;
 	if (mode.level == -1)
 	    /* don't change the current setting */
 	    mode.level = sc->mode.level;
 	else if ((mode.level < PSM_LEVEL_MIN) || (mode.level > PSM_LEVEL_MAX))
 	    return EINVAL;
         if (mode.accelfactor == -1)
 	    /* don't change the current setting */
 	    mode.accelfactor = sc->mode.accelfactor;
         else if (mode.accelfactor < 0)
 	    return EINVAL;
 
 	/* don't allow anybody to poll the keyboard controller */
 	error = block_mouse_data(sc, &command_byte);
 	if (error)
             return error;
 
         /* set mouse parameters */
 	if (mode.rate > 0)
 	    mode.rate = set_mouse_sampling_rate(sc->kbdc, mode.rate);
 	if (mode.resolution >= 0)
 	    mode.resolution = set_mouse_resolution(sc->kbdc, mode.resolution);
 	set_mouse_scaling(sc->kbdc, 1);
 	get_mouse_status(sc->kbdc, stat, 0, 3);
 
         s = spltty();
     	sc->mode.rate = mode.rate;
     	sc->mode.resolution = mode.resolution;
     	sc->mode.accelfactor = mode.accelfactor;
     	sc->mode.level = mode.level;
         splx(s);
 
 	unblock_mouse_data(sc, command_byte);
         break;
 
     case MOUSE_GETLEVEL:
 	*(int *)addr = sc->mode.level;
         break;
 
     case MOUSE_SETLEVEL:
 	if ((*(int *)addr < PSM_LEVEL_MIN) || (*(int *)addr > PSM_LEVEL_MAX))
 	    return EINVAL;
 	sc->mode.level = *(int *)addr;
         break;
 
     case MOUSE_GETSTATUS:
         s = spltty();
 	status = sc->status;
 	sc->status.flags = 0;
 	sc->status.obutton = sc->status.button;
 	sc->status.button = 0;
 	sc->status.dx = 0;
 	sc->status.dy = 0;
 	sc->status.dz = 0;
         splx(s);
         *(mousestatus_t *)addr = status;
         break;
 
 #if (defined(MOUSE_GETVARS))
     case MOUSE_GETVARS:
 	var = (mousevar_t *)addr;
 	bzero(var, sizeof(*var));
 	s = spltty();
         var->var[0] = MOUSE_VARS_PS2_SIG;
         var->var[1] = sc->config;
         var->var[2] = sc->flags;
 	splx(s);
         break;
 
     case MOUSE_SETVARS:
 	return ENODEV;
 #endif /* MOUSE_GETVARS */
 
     case MOUSE_READSTATE:
     case MOUSE_READDATA:
 	data = (mousedata_t *)addr;
 	if (data->len > sizeof(data->buf)/sizeof(data->buf[0]))
 	    return EINVAL;
 
 	error = block_mouse_data(sc, &command_byte);
 	if (error)
             return error;
         if ((data->len = get_mouse_status(sc->kbdc, data->buf, 
 		(cmd == MOUSE_READDATA) ? 1 : 0, data->len)) <= 0)
             error = EIO;
 	unblock_mouse_data(sc, command_byte);
 	break;
 
 #if (defined(MOUSE_SETRESOLUTION))
     case MOUSE_SETRESOLUTION:
 	mode.resolution = *(int *)addr;
 	if (mode.resolution >= UCHAR_MAX)
 	    return EINVAL;
 	else if (mode.resolution >= 200)
 	    mode.resolution = MOUSE_RES_HIGH;
 	else if (mode.resolution >= 100)
 	    mode.resolution = MOUSE_RES_MEDIUMHIGH;
 	else if (mode.resolution >= 50)
 	    mode.resolution = MOUSE_RES_MEDIUMLOW;
 	else if (mode.resolution > 0)
 	    mode.resolution = MOUSE_RES_LOW;
         if (mode.resolution == MOUSE_RES_DEFAULT)
             mode.resolution = sc->dflt_mode.resolution;
         else if (mode.resolution == -1)
 	    mode.resolution = sc->mode.resolution;
         else if (mode.resolution < 0) /* MOUSE_RES_LOW/MEDIUM/HIGH */
             mode.resolution = MOUSE_RES_LOW - mode.resolution;
 
 	error = block_mouse_data(sc, &command_byte);
 	if (error)
             return error;
         sc->mode.resolution = set_mouse_resolution(sc->kbdc, mode.resolution);
 	if (sc->mode.resolution != mode.resolution)
 	    error = EIO;
 	unblock_mouse_data(sc, command_byte);
         break;
 #endif /* MOUSE_SETRESOLUTION */
 
 #if (defined(MOUSE_SETRATE))
     case MOUSE_SETRATE:
 	mode.rate = *(int *)addr;
 	if (mode.rate > UCHAR_MAX)
 	    return EINVAL;
         if (mode.rate == 0)
             mode.rate = sc->dflt_mode.rate;
 	else if (mode.rate < 0)
 	    mode.rate = sc->mode.rate;
 
 	error = block_mouse_data(sc, &command_byte);
 	if (error)
             return error;
         sc->mode.rate = set_mouse_sampling_rate(sc->kbdc, mode.rate);
 	if (sc->mode.rate != mode.rate)
 	    error = EIO;
 	unblock_mouse_data(sc, command_byte);
         break;
 #endif /* MOUSE_SETRATE */
 
 #if (defined(MOUSE_SETSCALING))
     case MOUSE_SETSCALING:
 	if ((*(int *)addr <= 0) || (*(int *)addr > 2))
 	    return EINVAL;
 
 	error = block_mouse_data(sc, &command_byte);
 	if (error)
             return error;
         if (!set_mouse_scaling(sc->kbdc, *(int *)addr))
 	    error = EIO;
 	unblock_mouse_data(sc, command_byte);
         break;
 #endif /* MOUSE_SETSCALING */
 
 #if (defined(MOUSE_GETHWID))
     case MOUSE_GETHWID:
 	error = block_mouse_data(sc, &command_byte);
 	if (error)
             return error;
         sc->hw.hwid = get_aux_id(sc->kbdc);
 	*(int *)addr = sc->hw.hwid;
 	unblock_mouse_data(sc, command_byte);
         break;
 #endif /* MOUSE_GETHWID */
 
     default:
 	return ENOTTY;
     }
 
     return error;
 }
 
 void
 psmintr(int unit)
 {
     /*
      * the table to turn PS/2 mouse button bits (MOUSE_PS2_BUTTON?DOWN)
      * into `mousestatus' button bits (MOUSE_BUTTON?DOWN).
      */
     static int butmap[8] = {
         0, 
 	MOUSE_BUTTON1DOWN, 
 	MOUSE_BUTTON3DOWN, 
 	MOUSE_BUTTON1DOWN | MOUSE_BUTTON3DOWN, 
 	MOUSE_BUTTON2DOWN, 
 	MOUSE_BUTTON1DOWN | MOUSE_BUTTON2DOWN, 
 	MOUSE_BUTTON2DOWN | MOUSE_BUTTON3DOWN,
         MOUSE_BUTTON1DOWN | MOUSE_BUTTON2DOWN | MOUSE_BUTTON3DOWN
     };
     register struct psm_softc *sc = psm_softc[unit];
     mousestatus_t ms;
     int x, y, z;
     int c;
     int l;
 
     /* read until there is nothing to read */
     while((c = read_aux_data_no_wait(sc->kbdc)) != -1) {
     
         /* discard the byte if the device is not open */
         if ((sc->state & PSM_OPEN) == 0)
             continue;
     
         /* 
 	 * Check sync bits. We check for overflow bits and the bit 3
 	 * for most mice. True, the code doesn't work if overflow 
 	 * condition occurs. But we expect it rarely happens...
 	 */
 	if ((sc->inputbytes == 0) 
 		&& ((c & sc->mode.syncmask[0]) != sc->mode.syncmask[1])) {
             log(LOG_DEBUG, "psmintr: out of sync (%04x != %04x).\n", 
 		c & sc->mode.syncmask[0], sc->mode.syncmask[1]);
             continue;
 	}
 
         sc->ipacket[sc->inputbytes++] = c;
         if (sc->inputbytes < sc->mode.packetsize) 
 	    continue;
 
 #if 0
         log(LOG_DEBUG, "psmintr: %02x %02x %02x %02x %02x %02x\n",
 	    sc->ipacket[0], sc->ipacket[1], sc->ipacket[2],
 	    sc->ipacket[3], sc->ipacket[4], sc->ipacket[5]);
 #endif
 
 	c = sc->ipacket[0];
 
         /* 
 	 * A kludge for Kensington device! 
 	 * The MSB of the horizontal count appears to be stored in 
 	 * a strange place. This kludge doesn't affect other mice 
 	 * because the bit is the overflow bit which is, in most cases, 
 	 * expected to be zero when we reach here. XXX 
 	 */
         sc->ipacket[1] |= (c & MOUSE_PS2_XOVERFLOW) ? 0x80 : 0;
 
         /* ignore the overflow bits... */
         x = (c & MOUSE_PS2_XNEG) ?  sc->ipacket[1] - 256 : sc->ipacket[1];
         y = (c & MOUSE_PS2_YNEG) ?  sc->ipacket[2] - 256 : sc->ipacket[2];
 	z = 0;
         ms.obutton = sc->button;		  /* previous button state */
         ms.button = butmap[c & MOUSE_PS2_BUTTONS];
 
 	switch (sc->hw.model) {
 
 	case MOUSE_MODEL_INTELLI:
 	case MOUSE_MODEL_NET:
 	    /* wheel data is in the fourth byte */
 	    z = (char)sc->ipacket[3];
 	    break;
 
 	case MOUSE_MODEL_MOUSEMANPLUS:
 	    if ((c & ~MOUSE_PS2_BUTTONS) == 0xc8) {
 		/* the extended data packet encodes button and wheel events */
 		x = y = 0;
 		z = (sc->ipacket[1] & MOUSE_PS2PLUS_ZNEG)
 		    ? (sc->ipacket[2] & 0x0f) - 16 : (sc->ipacket[2] & 0x0f);
 		ms.button |= (sc->ipacket[2] & MOUSE_PS2PLUS_BUTTON4DOWN)
 		    ? MOUSE_BUTTON4DOWN : 0;
 	    } else {
 		/* preserve button states */
 		ms.button |= ms.obutton & MOUSE_EXTBUTTONS;
 	    }
 	    break;
 
 	case MOUSE_MODEL_GLIDEPOINT:
 	    /* `tapping' action */
 	    ms.button |= ((c & MOUSE_PS2_TAP)) ? 0 : MOUSE_BUTTON4DOWN;
 	    break;
 
 	case MOUSE_MODEL_NETSCROLL:
 	    /* three addtional bytes encode button and wheel events */
 	    ms.button |= (sc->ipacket[3] & MOUSE_PS2_BUTTON3DOWN) 
 		? MOUSE_BUTTON4DOWN : 0;
 	    z = (sc->ipacket[3] & MOUSE_PS2_XNEG) 
 		? sc->ipacket[4] - 256 : sc->ipacket[4];
 	    break;
 
 	case MOUSE_MODEL_THINK:
 	    /* the fourth button state in the first byte */
 	    ms.button |= (c & MOUSE_PS2_TAP) ? MOUSE_BUTTON4DOWN : 0;
 	    break;
 
 	case MOUSE_MODEL_GENERIC:
 	default:
 	    break;
 	}
 
         /* scale values */
         if (sc->mode.accelfactor >= 1) {
             if (x != 0) {
                 x = x * x / sc->mode.accelfactor;
                 if (x == 0)
                     x = 1;
                 if (c & MOUSE_PS2_XNEG)
                     x = -x;
             }
             if (y != 0) {
                 y = y * y / sc->mode.accelfactor;
                 if (y == 0)
                     y = 1;
                 if (c & MOUSE_PS2_YNEG)
                     y = -y;
             }
         }
 
         ms.dx = x;
         ms.dy = y;
         ms.dz = z;
         ms.flags = ((x || y || z) ? MOUSE_POSCHANGED : 0) 
 	    | (ms.obutton ^ ms.button);
 
 	if (sc->mode.level < PSM_LEVEL_NATIVE)
 	    sc->inputbytes = tame_mouse(sc, &ms, sc->ipacket);
 
         sc->status.flags |= ms.flags;
         sc->status.dx += ms.dx;
         sc->status.dy += ms.dy;
         sc->status.dz += ms.dz;
         sc->status.button = ms.button;
         sc->button = ms.button;
 
         /* queue data */
         if (sc->queue.count + sc->inputbytes < sizeof(sc->queue.buf)) {
 	    l = min(sc->inputbytes, sizeof(sc->queue.buf) - sc->queue.tail);
 	    bcopy(&sc->ipacket[0], &sc->queue.buf[sc->queue.tail], l);
 	    if (sc->inputbytes > l)
 	        bcopy(&sc->ipacket[l], &sc->queue.buf[0], sc->inputbytes - l);
             sc->queue.tail = 
 		(sc->queue.tail + sc->inputbytes) % sizeof(sc->queue.buf);
             sc->queue.count += sc->inputbytes;
 	}
         sc->inputbytes = 0;
 
         if (sc->state & PSM_ASLP) {
             sc->state &= ~PSM_ASLP;
             wakeup((caddr_t) sc);
     	}
         selwakeup(&sc->rsel);
     }
 }
 
 static int
 psmpoll(dev_t dev, int events, struct proc *p)
 {
     struct psm_softc *sc = psm_softc[PSM_UNIT(dev)];
     int s;
     int revents = 0;
 
     /* Return true if a mouse event available */
     s = spltty();
     if (events & (POLLIN | POLLRDNORM))
 	if (sc->queue.count > 0)
 	    revents |= events & (POLLIN | POLLRDNORM);
 	else
 	    selrecord(p, &sc->rsel);
 
     splx(s);
 
     return (revents);
 }
 
 /* vender/model specific routines */
 
 static int mouse_id_proc1(KBDC kbdc, int res, int scale, int *status)
 {
     if (set_mouse_resolution(kbdc, res) != res)
         return FALSE;
     if (set_mouse_scaling(kbdc, scale)
 	&& set_mouse_scaling(kbdc, scale)
 	&& set_mouse_scaling(kbdc, scale) 
 	&& (get_mouse_status(kbdc, status, 0, 3) >= 3)) 
 	return TRUE;
     return FALSE;
 }
 
 #if notyet
 /* Logitech MouseMan Cordless II */
 static int
 enable_lcordless(struct psm_softc *sc)
 {
     int status[3];
     int ch;
 
     if (!mouse_id_proc1(sc->kbdc, PSMD_RES_HIGH, 2, status))
         return FALSE;
     if (status[1] == PSMD_RES_HIGH)
 	return FALSE;
     ch = (status[0] & 0x07) - 1;	/* channel # */
     if ((ch <= 0) || (ch > 4))
 	return FALSE;
     /* 
      * status[1]: always one?
      * status[2]: battery status? (0-100)
      */
     return TRUE;
 }
 #endif /* notyet */
 
 /* Genius NetScroll Mouse */
 static int
 enable_groller(struct psm_softc *sc)
 {
     int status[3];
 
     /*
      * The special sequence to enable the fourth button and the
      * roller. Immediately after this sequence check status bytes.
      * if the mouse is NetScroll, the second and the third bytes are 
      * '3' and 'D'.
      */
 
     /*
      * If the mouse is an ordinary PS/2 mouse, the status bytes should
      * look like the following.
      * 
      * byte 1 bit 7 always 0
      *        bit 6 stream mode (0)
      *        bit 5 disabled (0)
      *        bit 4 1:1 scaling (0)
      *        bit 3 always 0
      *        bit 0-2 button status
      * byte 2 resolution (PSMD_RES_HIGH)
      * byte 3 report rate (?)
      */
 
     if (!mouse_id_proc1(sc->kbdc, PSMD_RES_HIGH, 1, status))
         return FALSE;
     if ((status[1] != '3') || (status[2] != 'D'))
         return FALSE;
     /* FIXME!! */
     sc->hw.buttons = get_mouse_buttons(sc->kbdc);
     sc->hw.buttons = 4;
     return TRUE;
 }
 
 /* Genius NetMouse/NetMouse Pro */
 static int
 enable_gmouse(struct psm_softc *sc)
 {
     int status[3];
 
     /*
      * The special sequence to enable the middle, "rubber" button. 
      * Immediately after this sequence check status bytes.
      * if the mouse is NetMouse, NetMouse Pro, or ASCII MIE Mouse, 
      * the second and the third bytes are '3' and 'U'.
      * NOTE: NetMouse reports that it has three buttons although it has
      * two buttons and a rubber button. NetMouse Pro and MIE Mouse
      * say they have three buttons too and they do have a button on the
      * side...
      */
     if (!mouse_id_proc1(sc->kbdc, PSMD_RES_HIGH, 1, status))
         return FALSE;
     if ((status[1] != '3') || (status[2] != 'U'))
         return FALSE;
     return TRUE;
 }
 
 /* ALPS GlidePoint */
 static int
 enable_aglide(struct psm_softc *sc)
 {
     int status[3];
 
     /*
      * The special sequence to obtain ALPS GlidePoint specific
      * information. Immediately after this sequence, status bytes will 
      * contain something interesting.
      * NOTE: ALPS produces several models of GlidePoint. Some of those
      * do not respond to this sequence, thus, cannot be detected this way.
      */
     if (!mouse_id_proc1(sc->kbdc, PSMD_RES_LOW, 2, status))
         return FALSE;
     if ((status[0] & 0x10) || (status[1] == PSMD_RES_LOW)) 
         return FALSE;
     return TRUE;
 }
 
 /* Kensington ThinkingMouse/Trackball */
 static int
 enable_kmouse(struct psm_softc *sc)
 {
     static unsigned char rate[] = { 20, 60, 40, 20, 20, 60, 40, 20, 20 };
     KBDC kbdc = sc->kbdc;
     int status[3];
     int id1;
     int id2;
     int i;
 
     id1 = get_aux_id(kbdc);
     if (set_mouse_sampling_rate(kbdc, 10) != 10)
 	return FALSE;
     /* 
      * The device is now in the native mode? It returns a different
      * ID value...
      */
     id2 = get_aux_id(kbdc);
     if ((id1 == id2) || (id2 != 2))
 	return FALSE;
 
     if (set_mouse_resolution(kbdc, PSMD_RES_LOW) != PSMD_RES_LOW)
         return FALSE;
 #if PSM_DEBUG >= 2
     /* at this point, resolution is LOW, sampling rate is 10/sec */
     if (get_mouse_status(kbdc, status, 0, 3) < 3)
         return FALSE;
 #endif
 
     /*
      * The special sequence to enable the third and fourth buttons.
      * Otherwise they behave like the first and second buttons.
      */
     for (i = 0; i < sizeof(rate)/sizeof(rate[0]); ++i) {
         if (set_mouse_sampling_rate(kbdc, rate[i]) != rate[i])
 	    return FALSE;
     }
 
     /* 
      * At this point, the device is using default resolution and
      * sampling rate for the native mode. 
      */
     if (get_mouse_status(kbdc, status, 0, 3) < 3)
         return FALSE;
     if ((status[1] == PSMD_RES_LOW) || (status[2] == rate[i - 1]))
         return FALSE;
 
     /* the device appears be enabled by this sequence, diable it for now */
     disable_aux_dev(kbdc);
     empty_aux_buffer(kbdc, 5);
 
     return TRUE;
 }
 
 /* Logitech MouseMan+/FirstMouse+ */
 static int
 enable_mmanplus(struct psm_softc *sc)
 {
     static char res[] = {
 	-1, PSMD_RES_LOW, PSMD_RES_HIGH, PSMD_RES_MEDIUM_HIGH,
 	PSMD_RES_MEDIUM_LOW, -1, PSMD_RES_HIGH, PSMD_RES_MEDIUM_LOW,
 	PSMD_RES_MEDIUM_HIGH, PSMD_RES_HIGH, 
     };
     KBDC kbdc = sc->kbdc;
     int data[3];
     int i;
 
     /* the special sequence to enable the fourth button and the roller. */
     for (i = 0; i < sizeof(res)/sizeof(res[0]); ++i) {
 	if (res[i] < 0) {
 	    if (!set_mouse_scaling(kbdc, 1))
 		return FALSE;
 	} else {
 	    if (set_mouse_resolution(kbdc, res[i]) != res[i])
 		return FALSE;
 	}
     }
 
     if (get_mouse_status(kbdc, data, 1, 3) < 3)
         return FALSE;
 
     /* 
      * MouseMan+ and FirstMouse+ return following data.
      *
      * byte 1 0xc8
      * byte 2 ?? (MouseMan+:0xc2, FirstMouse+:0xc6)
      * byte 3 model ID? MouseMan+:0x50, FirstMouse+:0x51
      */
     if ((data[0] & ~MOUSE_PS2_BUTTONS) != 0xc8)
         return FALSE;
 
     /*
      * MouseMan+ (or FirstMouse+) is now in its native mode, in which
      * the wheel and the fourth button events are encoded in the
      * special data packet. The mouse may be put in the IntelliMouse mode
      * if it is initialized by the IntelliMouse's method.
      */
     return TRUE;
 }
 
 /* MS IntelliMouse */
 static int
 enable_msintelli(struct psm_softc *sc)
 {
     /*
      * Logitech MouseMan+ and FirstMouse+ will also respond to this
      * probe routine and act like IntelliMouse.
      */
 
     static unsigned char rate[] = { 200, 100, 80, };
     KBDC kbdc = sc->kbdc;
     int id;
     int i;
 
     /* the special sequence to enable the third button and the roller. */
     for (i = 0; i < sizeof(rate)/sizeof(rate[0]); ++i) {
         if (set_mouse_sampling_rate(kbdc, rate[i]) != rate[i])
 	    return FALSE;
     }
     /* the device will give the genuine ID only after the above sequence */
     id = get_aux_id(kbdc);
     if (id != PSM_INTELLI_ID)
 	return FALSE;
 
     sc->hw.hwid = id;
     sc->hw.buttons = 3;
 
     return TRUE;
 }
 
 static int psm_devsw_installed = FALSE;
 
 static void
 psm_drvinit(void *unused)
 {
     dev_t dev;
 
     if (!psm_devsw_installed) {
         dev = makedev(CDEV_MAJOR, 0);
         cdevsw_add(&dev, &psm_cdevsw, NULL);
         psm_devsw_installed = TRUE;
     }
 }
 
 #ifdef PSM_HOOKAPM
 static int
 psmresume(void *dummy)
 {
     struct psm_softc *sc = psm_softc[(int)dummy];
     int unit = (int)dummy;
     int err = 0;
     int s;
     int c;
 
     if (verbose >= 2)
         log(LOG_NOTICE, "psm%d: APM resume hook called.\n", unit);
 
     /* don't let anybody mess with the aux device */
     if (!kbdc_lock(sc->kbdc, TRUE))
 	return (EIO);
     s = spltty();
 
     /* save the current controller command byte */
     empty_both_buffers(sc->kbdc, 10);
     c = get_controller_command_byte(sc->kbdc);
     if (verbose >= 2)
         log(LOG_DEBUG, "psm%d: current command byte: %04x (psmresume).\n", 
 	    unit, c);
 
     /* enable the aux port but disable the aux interrupt and the keyboard */
     if ((c == -1) || !set_controller_command_byte(sc->kbdc,
 	    kbdc_get_device_mask(sc->kbdc),
   	    KBD_DISABLE_KBD_PORT | KBD_DISABLE_KBD_INT
 	        | KBD_ENABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) {
         /* CONTROLLER ERROR */
 	splx(s);
         kbdc_lock(sc->kbdc, FALSE);
 	log(LOG_ERR, "psm%d: unable to set the command byte (psmresume).\n",
 	    unit);
 	return (EIO);
     }
 
     /* flush any data */
     if (sc->state & PSM_VALID) {
 	disable_aux_dev(sc->kbdc);	/* this may fail; but never mind... */
 	empty_aux_buffer(sc->kbdc, 10);
     }
     sc->inputbytes = 0;
 
 #ifdef PSM_RESETAFTERSUSPEND
     /* try to detect the aux device; are you still there? */
     if (reinitialize(unit, &sc->mode)) {
 	/* yes */
 	sc->state |= PSM_VALID;
     } else {
 	/* the device has gone! */
         restore_controller(sc->kbdc, c);
 	sc->state &= ~PSM_VALID;
 	log(LOG_ERR, "psm%d: the aux device has gone! (psmresume).\n",
 	    unit);
 	err = ENXIO;
     }
 #endif /* PSM_RESETAFTERSUSPEND */
     splx(s);
 
     /* restore the driver state */
     if ((sc->state & PSM_OPEN) && (err == 0)) {
         /* enable the aux device and the port again */
 	err = doopen(unit, c);
 	if (err != 0) 
 	    log(LOG_ERR, "psm%d: failed to enable the device (psmresume).\n",
 		unit);
     } else {
         /* restore the keyboard port and disable the aux port */
         if (!set_controller_command_byte(sc->kbdc, 
                 kbdc_get_device_mask(sc->kbdc),
                 (c & KBD_KBD_CONTROL_BITS)
                     | KBD_DISABLE_AUX_PORT | KBD_DISABLE_AUX_INT)) {
             /* CONTROLLER ERROR */
             log(LOG_ERR, "psm%d: failed to disable the aux port (psmresume).\n",
                 unit);
             err = EIO;
 	}
     }
 
     /* done */
     kbdc_lock(sc->kbdc, FALSE);
     if ((sc->state & PSM_ASLP) && !(sc->state & PSM_VALID)) {
 	/* 
 	 * Release the blocked process; it must be notified that the device
 	 * cannot be accessed anymore.
 	 */
         sc->state &= ~PSM_ASLP;
         wakeup((caddr_t)sc);
     }
 
     if (verbose >= 2)
         log(LOG_DEBUG, "psm%d: APM resume hook exiting.\n", unit);
 
     return (err);
 }
 #endif /* PSM_HOOKAPM */
 
 SYSINIT(psmdev, SI_SUB_DRIVERS, SI_ORDER_MIDDLE + CDEV_MAJOR, psm_drvinit, NULL)
 
 #endif /* NPSM > 0 */
diff --git a/sys/i386/linux/linux_socket.c b/sys/i386/linux/linux_socket.c
index aac40a077dd9..b50cf5860e31 100644
--- a/sys/i386/linux/linux_socket.c
+++ b/sys/i386/linux/linux_socket.c
@@ -1,816 +1,817 @@
 /*-
  * Copyright (c) 1995 S�ren Schmidt
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer 
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software withough specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
- *  $Id: linux_socket.c,v 1.11 1997/12/16 17:40:11 eivind Exp $
+ *  $Id: linux_socket.c,v 1.12 1998/02/07 02:13:27 msmith Exp $
  */
 
 /* XXX we use functions that might not exist. */
 #include "opt_compat.h"
 
 #ifndef COMPAT_43
 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
 #endif
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/fcntl.h>
 #include <sys/socket.h>
+#include <sys/uio.h>
 
 #include <netinet/in.h>
 #include <netinet/in_systm.h>
 #include <netinet/ip.h>
 
 #include <i386/linux/linux.h>
 #include <i386/linux/linux_proto.h>
 #include <i386/linux/linux_util.h>
 
 static int
 linux_to_bsd_domain(int domain)
 {
     switch (domain) {
     case LINUX_AF_UNSPEC:
 	return AF_UNSPEC;
     case LINUX_AF_UNIX:
 	return AF_LOCAL;
     case LINUX_AF_INET:
 	return AF_INET;
     case LINUX_AF_AX25:
 	return AF_CCITT;
     case LINUX_AF_IPX:
 	return AF_IPX;
     case LINUX_AF_APPLETALK:
 	return AF_APPLETALK;
     default:
 	return -1;
     }
 }
 
 static int
 linux_to_bsd_sockopt_level(int level)
 {
     switch (level) {
     case LINUX_SOL_SOCKET:
 	return SOL_SOCKET;
     default:
 	return level;
     }
 }
 
 static int linux_to_bsd_ip_sockopt(int opt)
 {
     switch (opt) {
     case LINUX_IP_TOS:
 	return IP_TOS;
     case LINUX_IP_TTL:
 	return IP_TTL;
     case LINUX_IP_OPTIONS:
 	return IP_OPTIONS;
     case LINUX_IP_MULTICAST_IF:
 	return IP_MULTICAST_IF;
     case LINUX_IP_MULTICAST_TTL:
 	return IP_MULTICAST_TTL;
     case LINUX_IP_MULTICAST_LOOP:
 	return IP_MULTICAST_LOOP;
     case LINUX_IP_ADD_MEMBERSHIP:
 	return IP_ADD_MEMBERSHIP;
     case LINUX_IP_DROP_MEMBERSHIP:
 	return IP_DROP_MEMBERSHIP;
     case LINUX_IP_HDRINCL:
         return IP_HDRINCL;
     default:
 	return -1;
     }
 }
 
 static int
 linux_to_bsd_so_sockopt(int opt)
 {
     switch (opt) {
     case LINUX_SO_DEBUG:
 	return SO_DEBUG;
     case LINUX_SO_REUSEADDR:
 	return SO_REUSEADDR;
     case LINUX_SO_TYPE:
 	return SO_TYPE;
     case LINUX_SO_ERROR:
 	return SO_ERROR;
     case LINUX_SO_DONTROUTE:
 	return SO_DONTROUTE;
     case LINUX_SO_BROADCAST:
 	return SO_BROADCAST;
     case LINUX_SO_SNDBUF:
 	return SO_SNDBUF;
     case LINUX_SO_RCVBUF:
 	return SO_RCVBUF;
     case LINUX_SO_KEEPALIVE:
 	return SO_KEEPALIVE;
     case LINUX_SO_OOBINLINE:
 	return SO_OOBINLINE;
     case LINUX_SO_LINGER:
 	return SO_LINGER;
     case LINUX_SO_PRIORITY:
     case LINUX_SO_NO_CHECK:
     default:
 	return -1;
     }
 }
 
 /* Return 0 if IP_HDRINCL is set of the given socket, not 0 otherwise */
 static int
 linux_check_hdrincl(struct proc *p, int s)
 {
     struct getsockopt_args /* {
 	int s;
 	int level;
 	int name;
 	caddr_t val;
 	int *avalsize;
     } */ bsd_args;
     int error;
     caddr_t sg, val, valsize;
     int size_val = sizeof val;
     int optval;
 
     sg = stackgap_init();
     val = stackgap_alloc(&sg, sizeof(int));
     valsize = stackgap_alloc(&sg, sizeof(int));
 
     if ((error=copyout(&size_val, valsize, sizeof(size_val))))
 	return error;
     bsd_args.s = s;
     bsd_args.level = IPPROTO_IP;
     bsd_args.name = IP_HDRINCL;
     bsd_args.val = val;
     bsd_args.avalsize = (int *)valsize;
     if ((error=getsockopt(p, &bsd_args)))
 	return error;
     if ((error=copyin(val, &optval, sizeof(optval))))
 	return error;
     return optval == 0;
 }
 
 /*
  * Updated sendto() when IP_HDRINCL is set:
  * tweak endian-dependent fields in the IP packet.
  */
 static int
 linux_sendto_hdrincl(struct proc *p, struct sendto_args *bsd_args)
 {
 /*
  * linux_ip_copysize defines how many bytes we should copy
  * from the beginning of the IP packet before we customize it for BSD.
  * It should include all the fields we modify (ip_len and ip_off)
  * and be as small as possible to minimize copying overhead.
  */
 #define linux_ip_copysize	8
 
     caddr_t sg;
     struct ip *packet;
     struct msghdr *msg;
     struct iovec *iov;
 
     int error;
     struct  sendmsg_args /* {
 	int s;
 	caddr_t msg;
 	int flags;
     } */ sendmsg_args;
 
     /* Check the packet isn't too small before we mess with it */
     if (bsd_args->len < linux_ip_copysize)
 	return EINVAL;
 
     /*
      * Tweaking the user buffer in place would be bad manners.
      * We create a corrected IP header with just the needed length,
      * then use an iovec to glue it to the rest of the user packet
      * when calling sendmsg().
      */
     sg = stackgap_init();
     packet = (struct ip *)stackgap_alloc(&sg, linux_ip_copysize);
     msg = (struct msghdr *)stackgap_alloc(&sg, sizeof(*msg));
     iov = (struct iovec *)stackgap_alloc(&sg, sizeof(*iov)*2);
 
     /* Make a copy of the beginning of the packet to be sent */
     if ((error = copyin(bsd_args->buf, (caddr_t)packet, linux_ip_copysize)))
 	return error;
 
     /* Convert fields from Linux to BSD raw IP socket format */
     packet->ip_len = bsd_args->len;
     packet->ip_off = ntohs(packet->ip_off);
 
     /* Prepare the msghdr and iovec structures describing the new packet */
     msg->msg_name = bsd_args->to;
     msg->msg_namelen = bsd_args->tolen;
     msg->msg_iov = iov;
     msg->msg_iovlen = 2;
     msg->msg_control = NULL;
     msg->msg_controllen = 0;
     msg->msg_flags = 0;
     iov[0].iov_base = (char *)packet;
     iov[0].iov_len = linux_ip_copysize;
     iov[1].iov_base = (char *)(bsd_args->buf) + linux_ip_copysize;
     iov[1].iov_len = bsd_args->len - linux_ip_copysize;
 
     sendmsg_args.s = bsd_args->s;
     sendmsg_args.msg = (caddr_t)msg;
     sendmsg_args.flags = bsd_args->flags;
     return sendmsg(p, &sendmsg_args);
 }
 
 struct linux_socket_args {
     int domain;
     int type;
     int protocol;
 };
 
 static int
 linux_socket(struct proc *p, struct linux_socket_args *args)
 {
     struct linux_socket_args linux_args;
     struct socket_args /* {
 	int domain;
 	int type;
 	int protocol;
     } */ bsd_args;
     int error;
     int retval_socket;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.protocol = linux_args.protocol;
     bsd_args.type = linux_args.type;
     bsd_args.domain = linux_to_bsd_domain(linux_args.domain);
     if (bsd_args.domain == -1)
 	return EINVAL;
 
     retval_socket = socket(p, &bsd_args);
     if (bsd_args.type == SOCK_RAW
 	&& (bsd_args.protocol == IPPROTO_RAW || bsd_args.protocol == 0)
 	&& bsd_args.domain == AF_INET
 	&& retval_socket >= 0) {
 	/* It's a raw IP socket: set the IP_HDRINCL option. */
 	struct setsockopt_args /* {
 	    int s;
 	    int level;
 	    int name;
 	    caddr_t val;
 	    int valsize;
 	} */ bsd_setsockopt_args;
 	caddr_t sg;
 	int *hdrincl;
 
 	sg = stackgap_init();
 	hdrincl = (int *)stackgap_alloc(&sg, sizeof(*hdrincl));
 	*hdrincl = 1;
 	bsd_setsockopt_args.s = p->p_retval[0];
 	bsd_setsockopt_args.level = IPPROTO_IP;
 	bsd_setsockopt_args.name = IP_HDRINCL;
 	bsd_setsockopt_args.val = (caddr_t)hdrincl;
 	bsd_setsockopt_args.valsize = sizeof(*hdrincl);
 	/* We ignore any error returned by setsockopt() */
 	setsockopt(p, &bsd_setsockopt_args);
 	/* Copy back the return value from socket() */
 	p->p_retval[0] = bsd_setsockopt_args.s;
     }
     return retval_socket;
 }
 
 struct linux_bind_args {
     int s;
     struct sockaddr *name;
     int namelen;
 };
 
 static int
 linux_bind(struct proc *p, struct linux_bind_args *args)
 {
     struct linux_bind_args linux_args;
     struct bind_args /* {
 	int s;
 	caddr_t name;
 	int namelen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.name = (caddr_t)linux_args.name;
     bsd_args.namelen = linux_args.namelen;
     return bind(p, &bsd_args);
 }
 
 struct linux_connect_args {
     int s;
     struct sockaddr * name;
     int namelen;
 };
 
 static int
 linux_connect(struct proc *p, struct linux_connect_args *args)
 {
     struct linux_connect_args linux_args;
     struct connect_args /* {
 	int s;
 	caddr_t name;
 	int namelen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.name = (caddr_t)linux_args.name;
     bsd_args.namelen = linux_args.namelen;
     error = connect(p, &bsd_args);
     if (error == EISCONN) {
 	/*
 	 * Linux doesn't return EISCONN the first time it occurs,
 	 * when on a non-blocking socket. Instead it returns the
 	 * error getsockopt(SOL_SOCKET, SO_ERROR) would return on BSD.
 	 */
 	struct fcntl_args /* {
 	    int fd;
 	    int cmd;
 	    int arg;
 	} */ bsd_fcntl_args;
 	struct getsockopt_args /* {
 	    int s;
 	    int level;
 	    int name;
 	    caddr_t val;
 	    int *avalsize;
 	} */ bsd_getsockopt_args;
 	void *status, *statusl;
 	int stat, statl = sizeof stat;
 	caddr_t sg;
 
 	/* Check for non-blocking */
 	bsd_fcntl_args.fd = linux_args.s;
 	bsd_fcntl_args.cmd = F_GETFL;
 	bsd_fcntl_args.arg = 0;
 	error = fcntl(p, &bsd_fcntl_args);
 	if (error == 0 && (p->p_retval[0] & O_NONBLOCK)) {
 	    sg = stackgap_init();
 	    status = stackgap_alloc(&sg, sizeof stat);
 	    statusl = stackgap_alloc(&sg, sizeof statusl);
 
 	    if ((error = copyout(&statl, statusl, sizeof statl)))
 		return error;
 
 	    bsd_getsockopt_args.s = linux_args.s;
 	    bsd_getsockopt_args.level = SOL_SOCKET;
 	    bsd_getsockopt_args.name = SO_ERROR;
 	    bsd_getsockopt_args.val = status;
 	    bsd_getsockopt_args.avalsize = statusl;
 
 	    error = getsockopt(p, &bsd_getsockopt_args);
 	    if (error)
 		return error;
 	    if ((error = copyin(status, &stat, sizeof stat)))
 		return error;  
 	    p->p_retval[0] = stat;
 	    return 0;
 	}
     }
     return error;
 }
 
 struct linux_listen_args {
     int s;
     int backlog;
 };
 
 static int
 linux_listen(struct proc *p, struct linux_listen_args *args)
 {
     struct linux_listen_args linux_args;
     struct listen_args /* {
 	int s;
 	int backlog;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.backlog = linux_args.backlog;
     return listen(p, &bsd_args);
 }
 
 struct linux_accept_args {
     int s;
     struct sockaddr *addr;
     int *namelen;
 };
 
 static int
 linux_accept(struct proc *p, struct linux_accept_args *args)
 {
     struct linux_accept_args linux_args;
     struct accept_args /* {
 	int s;
 	caddr_t name;
 	int *anamelen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.name = (caddr_t)linux_args.addr;
     bsd_args.anamelen = linux_args.namelen;
     return oaccept(p, &bsd_args);
 }
 
 struct linux_getsockname_args {
     int s;
     struct sockaddr *addr;
     int *namelen;
 };
 
 static int
 linux_getsockname(struct proc *p, struct linux_getsockname_args *args)
 {
     struct linux_getsockname_args linux_args;
     struct getsockname_args /* {
 	int fdes;
 	caddr_t asa;
 	int *alen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.fdes = linux_args.s;
     bsd_args.asa = (caddr_t) linux_args.addr;
     bsd_args.alen = linux_args.namelen;
     return ogetsockname(p, &bsd_args);
 }
 
 struct linux_getpeername_args {
     int s;
     struct sockaddr *addr;
     int *namelen;
 };
 
 static int
 linux_getpeername(struct proc *p, struct linux_getpeername_args *args)
 {
     struct linux_getpeername_args linux_args;
     struct ogetpeername_args /* {
 	int fdes;
 	caddr_t asa;
 	int *alen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.fdes = linux_args.s;
     bsd_args.asa = (caddr_t) linux_args.addr;
     bsd_args.alen = linux_args.namelen;
     return ogetpeername(p, &bsd_args);
 }
 
 struct linux_socketpair_args {
     int domain;
     int type;
     int protocol;
     int *rsv;
 };
 
 static int
 linux_socketpair(struct proc *p, struct linux_socketpair_args *args)
 {
     struct linux_socketpair_args linux_args;
     struct socketpair_args /* {
 	int domain;
 	int type;
 	int protocol;
 	int *rsv;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.domain = linux_to_bsd_domain(linux_args.domain);
     if (bsd_args.domain == -1)
 	return EINVAL;
     bsd_args.type = linux_args.type;
     bsd_args.protocol = linux_args.protocol;
     bsd_args.rsv = linux_args.rsv;
     return socketpair(p, &bsd_args);
 }
 
 struct linux_send_args {
     int s;
     void *msg;
     int len;
     int flags;
 };
 
 static int
 linux_send(struct proc *p, struct linux_send_args *args)
 {
     struct linux_send_args linux_args;
     struct osend_args /* {
 	int s;
 	caddr_t buf;
 	int len;
 	int flags;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.buf = linux_args.msg;
     bsd_args.len = linux_args.len;
     bsd_args.flags = linux_args.flags;
     return osend(p, &bsd_args);
 }
 
 struct linux_recv_args {
     int s;
     void *msg;
     int len;
     int flags;
 };
 
 static int
 linux_recv(struct proc *p, struct linux_recv_args *args)
 {
     struct linux_recv_args linux_args;
     struct orecv_args /* {
 	int s;
 	caddr_t buf;
 	int len;
 	int flags;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.buf = linux_args.msg;
     bsd_args.len = linux_args.len;
     bsd_args.flags = linux_args.flags;
     return orecv(p, &bsd_args);
 }
 
 struct linux_sendto_args {
     int s;
     void *msg;
     int len;
     int flags;
     caddr_t to;
     int tolen;
 };
 
 static int
 linux_sendto(struct proc *p, struct linux_sendto_args *args)
 {
     struct linux_sendto_args linux_args;
     struct sendto_args /* {
 	int s;
 	caddr_t buf;
 	size_t len;
 	int flags;
 	caddr_t to;
 	int tolen;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.buf = linux_args.msg;
     bsd_args.len = linux_args.len;
     bsd_args.flags = linux_args.flags;
     bsd_args.to = linux_args.to;
     bsd_args.tolen = linux_args.tolen;
 
     if (linux_check_hdrincl(p, linux_args.s) == 0)
 	/* IP_HDRINCL set, tweak the packet before sending */
 	return linux_sendto_hdrincl(p, &bsd_args);
 
     return sendto(p, &bsd_args);
 }
 
 struct linux_recvfrom_args {
     int s;
     void *buf;
     int len;
     int flags;
     caddr_t from;
     int *fromlen;
 };
 
 static int
 linux_recvfrom(struct proc *p, struct linux_recvfrom_args *args)
 {
     struct linux_recvfrom_args linux_args;
     struct recvfrom_args /* {
 	int s;
 	caddr_t buf;
 	size_t len;
 	int flags;
 	caddr_t from;
 	int *fromlenaddr;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.buf = linux_args.buf;
     bsd_args.len = linux_args.len;
     bsd_args.flags = linux_args.flags;
     bsd_args.from = linux_args.from;
     bsd_args.fromlenaddr = linux_args.fromlen;
     return orecvfrom(p, &bsd_args);
 }
 
 struct linux_shutdown_args {
     int s;
     int how;
 };
 
 static int
 linux_shutdown(struct proc *p, struct linux_shutdown_args *args)
 {
     struct linux_shutdown_args linux_args;
     struct shutdown_args /* {
 	int s;
 	int how;
     } */ bsd_args;
     int error;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.how = linux_args.how;
     return shutdown(p, &bsd_args);
 }
 
 struct linux_setsockopt_args {
     int s;
     int level;
     int optname;
     void *optval;
     int optlen;
 };
 
 static int
 linux_setsockopt(struct proc *p, struct linux_setsockopt_args *args)
 {
     struct linux_setsockopt_args linux_args;
     struct setsockopt_args /* {
 	int s;
 	int level;
 	int name;
 	caddr_t val;
 	int valsize;
     } */ bsd_args;
     int error, name;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.level = linux_to_bsd_sockopt_level(linux_args.level);
     switch (bsd_args.level) {
     case SOL_SOCKET:
 	name = linux_to_bsd_so_sockopt(linux_args.optname);
 	break;
     case IPPROTO_IP:
 	name = linux_to_bsd_ip_sockopt(linux_args.optname);
 	break;
     case IPPROTO_TCP:
 	/* Linux TCP option values match BSD's */
 	name = linux_args.optname;
 	break;
     default:
 	return EINVAL;
     }
     if (name == -1)
 	return EINVAL;
     bsd_args.name = name;
     bsd_args.val = linux_args.optval;
     bsd_args.valsize = linux_args.optlen;
     return setsockopt(p, &bsd_args);
 }
 
 struct linux_getsockopt_args {
     int s;
     int level;
     int optname;
     void *optval;
     int *optlen;
 };
 
 static int
 linux_getsockopt(struct proc *p, struct linux_getsockopt_args *args)
 {
     struct linux_getsockopt_args linux_args;
     struct getsockopt_args /* {
 	int s;
 	int level;
 	int name;
 	caddr_t val;
 	int *avalsize;
     } */ bsd_args;
     int error, name;
 
     if ((error=copyin((caddr_t)args, (caddr_t)&linux_args, sizeof(linux_args))))
 	return error;
     bsd_args.s = linux_args.s;
     bsd_args.level = linux_to_bsd_sockopt_level(linux_args.level);
     switch (bsd_args.level) {
     case SOL_SOCKET:
 	name = linux_to_bsd_so_sockopt(linux_args.optname);
 	break;
     case IPPROTO_IP:
 	name = linux_to_bsd_ip_sockopt(linux_args.optname);
 	break;
     case IPPROTO_TCP:
 	/* Linux TCP option values match BSD's */
 	name = linux_args.optname;
 	break;
     default:
 	return EINVAL;
     }
     if (name == -1)
 	return EINVAL;
     bsd_args.name = name;
     bsd_args.val = linux_args.optval;
     bsd_args.avalsize = linux_args.optlen;
     return getsockopt(p, &bsd_args);
 }
 
 int
 linux_socketcall(struct proc *p, struct linux_socketcall_args *args)
 {
     switch (args->what) {
     case LINUX_SOCKET:
 	return linux_socket(p, args->args);
     case LINUX_BIND:
 	return linux_bind(p, args->args);
     case LINUX_CONNECT:
 	return linux_connect(p, args->args);
     case LINUX_LISTEN:
 	return linux_listen(p, args->args);
     case LINUX_ACCEPT:
 	return linux_accept(p, args->args);
     case LINUX_GETSOCKNAME:
 	return linux_getsockname(p, args->args);
     case LINUX_GETPEERNAME:
 	return linux_getpeername(p, args->args);
     case LINUX_SOCKETPAIR:
 	return linux_socketpair(p, args->args);
     case LINUX_SEND:
 	return linux_send(p, args->args);
     case LINUX_RECV:
 	return linux_recv(p, args->args);
     case LINUX_SENDTO:
 	return linux_sendto(p, args->args);
     case LINUX_RECVFROM:
 	return linux_recvfrom(p, args->args);
     case LINUX_SHUTDOWN:
 	return linux_shutdown(p, args->args);
     case LINUX_SETSOCKOPT:
 	return linux_setsockopt(p, args->args);
     case LINUX_GETSOCKOPT:
 	return linux_getsockopt(p, args->args);
     default:
 	uprintf("LINUX: 'socket' typ=%d not implemented\n", args->what);
 	return ENOSYS;
     }
 }
diff --git a/sys/isa/joy.c b/sys/isa/joy.c
index 84682a929247..072be4edb5c5 100644
--- a/sys/isa/joy.c
+++ b/sys/isa/joy.c
@@ -1,299 +1,300 @@
 /*-
  * Copyright (c) 1995 Jean-Marc Zucconi
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software withough specific prior written permission
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  */
 #include "joy.h"
 
 #if NJOY > 0
 
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
+#include <sys/uio.h>
 
 #include <machine/clock.h>
 #include <machine/joystick.h>
 
 #include <i386/isa/isa.h>
 #include <i386/isa/isa_device.h>
 #include <i386/isa/timerreg.h>
 
 /* The game port can manage 4 buttons and 4 variable resistors (usually 2
  * joysticks, each with 2 buttons and 2 pots.) via the port at address 0x201.
  * Getting the state of the buttons is done by reading the game port:
  * buttons 1-4 correspond to bits 4-7 and resistors 1-4 (X1, Y1, X2, Y2)
  * to bits 0-3.
  * if button 1 (resp 2, 3, 4) is pressed, the bit 4 (resp 5, 6, 7) is set to 0
  * to get the value of a resistor, write the value 0xff at port and
  * wait until the corresponding bit returns to 0.
  */
 
 
 /* the formulae below only work if u is  ``not too large''. See also
  * the discussion in microtime.s */
 #define usec2ticks(u) 	(((u) * 19549)>>14)
 #define ticks2usec(u) 	(((u) * 3433)>>12)
 
 
 #define joypart(d) minor(d)&1
 #define UNIT(d) minor(d)>>1&3
 #ifndef JOY_TIMEOUT
 #define JOY_TIMEOUT   2000 /* 2 milliseconds */
 #endif
 
 static struct {
     int port;
     int x_off[2], y_off[2];
     int timeout[2];
 #ifdef	DEVFS
     void	*devfs_token;
 #endif
 } joy[NJOY];
 
 
 static int joyprobe (struct isa_device *);
 static int joyattach (struct isa_device *);
 
 struct isa_driver joydriver = {joyprobe, joyattach, "joy"};
 
 #define CDEV_MAJOR 51
 static	d_open_t	joyopen;
 static	d_close_t	joyclose;
 static	d_read_t	joyread;
 static	d_ioctl_t	joyioctl;
 
 static struct cdevsw joy_cdevsw = 
 	{ joyopen,	joyclose,	joyread,	nowrite,	/*51*/
 	  joyioctl,	nostop,		nullreset,	nodevtotty,/*joystick */
 	  seltrue,	nommap,		NULL,	"joy",	NULL,	-1 };
 
 static int get_tick __P((void));
 
 
 static int
 joyprobe (struct isa_device *dev)
 {
 #ifdef WANT_JOYSTICK_CONNECTED
     outb (dev->id_iobase, 0xff);
     DELAY (10000); /*  10 ms delay */
     return (inb (dev->id_iobase) & 0x0f) != 0x0f;
 #else
     return 1;
 #endif
 }
 
 static int
 joyattach (struct isa_device *dev)
 {
     int	unit = dev->id_unit;
 
     joy[unit].port = dev->id_iobase;
     joy[unit].timeout[0] = joy[unit].timeout[1] = 0;
     printf("joy%d: joystick\n", unit);
 #ifdef	DEVFS
     joy[dev->id_unit].devfs_token = 
 		devfs_add_devswf(&joy_cdevsw, 0, DV_CHR, 0, 0, 
 				 0600, "joy%d", unit);
 #endif
     return 1;
 }
 
 static	int
 joyopen (dev_t dev, int flags, int fmt, struct proc *p)
 {
     int unit = UNIT (dev);
     int i = joypart (dev);
 
     if (joy[unit].timeout[i])
 	return EBUSY;
     joy[unit].x_off[i] = joy[unit].y_off[i] = 0;
     joy[unit].timeout[i] = JOY_TIMEOUT;
     return 0;
 }
 static	int
 joyclose (dev_t dev, int flags, int fmt, struct proc *p)
 {
     int unit = UNIT (dev);
     int i = joypart (dev);
 
     joy[unit].timeout[i] = 0;
     return 0;
 }
 
 static	int
 joyread (dev_t dev, struct uio *uio, int flag)
 {
     int unit = UNIT(dev);
     int port = joy[unit].port;
     int i, t0, t1;
     int state = 0, x = 0, y = 0;
     struct joystick c;
 
     disable_intr ();
     outb (port, 0xff);
     t0 = get_tick ();
     t1 = t0;
     i = usec2ticks(joy[unit].timeout[joypart(dev)]);
     while (t0-t1 < i) {
 	state = inb (port);
 	if (joypart(dev) == 1)
 	    state >>= 2;
 	t1 = get_tick ();
 	if (t1 > t0)
 	    t1 -= timer0_max_count;
 	if (!x && !(state & 0x01))
 	    x = t1;
 	if (!y && !(state & 0x02))
 	    y =  t1;
 	if (x && y)
 	    break;
     }
     enable_intr ();
     c.x = x ? joy[unit].x_off[joypart(dev)] + ticks2usec(t0-x) : 0x80000000;
     c.y = y ? joy[unit].y_off[joypart(dev)] + ticks2usec(t0-y) : 0x80000000;
     state >>= 4;
     c.b1 = ~state & 1;
     c.b2 = ~(state >> 1) & 1;
     return uiomove ((caddr_t)&c, sizeof(struct joystick), uio);
 }
 
 static	int
 joyioctl (dev_t dev, int cmd, caddr_t data, int flag, struct proc *p)
 {
     int unit = UNIT (dev);
     int i = joypart (dev);
     int x;
 
     switch (cmd) {
     case JOY_SETTIMEOUT:
 	x = *(int *) data;
 	if (x < 1 || x > 10000) /* 10ms maximum! */
 	    return EINVAL;
 	joy[unit].timeout[i] = x;
 	break;
     case JOY_GETTIMEOUT:
 	*(int *) data = joy[unit].timeout[i];
 	break;
     case JOY_SET_X_OFFSET:
 	joy[unit].x_off[i] = *(int *) data;
 	break;
     case JOY_SET_Y_OFFSET:
 	joy[unit].y_off[i] = *(int *) data;
 	break;
     case JOY_GET_X_OFFSET:
 	*(int *) data = joy[unit].x_off[i];
 	break;
     case JOY_GET_Y_OFFSET:
 	*(int *) data = joy[unit].y_off[i];
 	break;
     default:
 	return ENXIO;
     }
     return 0;
 }
 
 static int
 get_tick ()
 {
     int low, high;
 
     outb (TIMER_MODE, TIMER_SEL0);
     low = inb (TIMER_CNTR0);
     high = inb (TIMER_CNTR0);
 
     return (high << 8) | low;
 }
 
 
 static joy_devsw_installed = 0;
 
 static void 	joy_drvinit(void *unused)
 {
 	dev_t dev;
 
 	if( ! joy_devsw_installed ) {
 		dev = makedev(CDEV_MAJOR,0);
 		cdevsw_add(&dev,&joy_cdevsw,NULL);
 		joy_devsw_installed = 1;
     	}
 }
 
 SYSINIT(joydev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,joy_drvinit,NULL)
 
 #ifdef JOY_MODULE
 
 #include <sys/exec.h>
 #include <sys/sysent.h>
 #include <sys/lkm.h>
 
 MOD_DEV (joy, LM_DT_CHAR, CDEV_MAJOR, &joy_cdevsw);
 
 static struct isa_device dev = {0, &joydriver, IO_GAME, 0, -1, (caddr_t) 0, 0, 0, 0, 0, 0, 0, 0,  0, 1, 0, 0};
 
 static int 
 joy_load (struct lkm_table *lkmtp, int cmd)
 {
     if (joyprobe (&dev)) {
 	joyattach (&dev);
 /*	    joy_drvinit (0);*/
 	uprintf ("Joystick driver loaded\n");
 	return 0;
     } else {
 	uprintf ("Joystick driver: probe failed\n");
 	return 1;
     }
 }
 
 static int
 joy_unload (struct lkm_table *lkmtp, int cmd)
 {
     uprintf ("Joystick driver unloaded\n");
     return 0;
 }
 
 static int
 joy_stat (struct lkm_table *lkmtp, int cmd)
 {
     return 0;
 }
 
 int
 joy_mod (struct lkm_table *lkmtp, int cmd, int ver)
 {
     MOD_DISPATCH(joy, lkmtp, cmd, ver,
 	joy_load, joy_unload, joy_stat);
 }
 
 #endif /* JOY_MODULE */
 
 
 #endif /* NJOY > 0 */
diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c
index 0d3f31c24586..63978434e086 100644
--- a/sys/kern/kern_physio.c
+++ b/sys/kern/kern_physio.c
@@ -1,234 +1,236 @@
 /*
  * Copyright (c) 1994 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Absolutely no warranty of function or purpose is made by the author
  *    John S. Dyson.
  * 4. Modifications may be freely made to this file if the above conditions
  *    are met.
  *
- * $Id: kern_physio.c,v 1.23 1998/01/24 02:01:18 dyson Exp $
+ * $Id: kern_physio.c,v 1.24 1998/03/19 22:48:05 dyson Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/buf.h>
 #include <sys/conf.h>
 #include <sys/proc.h>
+#include <sys/uio.h>
+
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 
 static void	physwakeup __P((struct buf *bp));
 static struct buf * phygetvpbuf(dev_t dev, int resid);
 
 int
 physio(strategy, bp, dev, rw, minp, uio)
 	d_strategy_t *strategy;
 	struct buf *bp;
 	dev_t dev;
 	int rw;
 	u_int (*minp) __P((struct buf *bp));
 	struct uio *uio;
 {
 	int i;
 	int bufflags = rw?B_READ:0;
 	int error;
 	int spl;
 	caddr_t sa;
 	int bp_alloc = (bp == 0);
 	struct buf *bpa;
 
 /*
  * keep the process from being swapped
  */
 	curproc->p_flag |= P_PHYSIO;
 
 	/* create and build a buffer header for a transfer */
 	bpa = (struct buf *)phygetvpbuf(dev, uio->uio_resid);
 	if (!bp_alloc) {
 		spl = splbio();
 		while (bp->b_flags & B_BUSY) {
 			bp->b_flags |= B_WANTED;
 			tsleep((caddr_t)bp, PRIBIO, "physbw", 0);
 		}
 		bp->b_flags |= B_BUSY;
 		splx(spl);
 	} else {
 		bp = bpa;
 	}
 
 	/*
 	 * get a copy of the kva from the physical buffer
 	 */
 	sa = bpa->b_data;
 	bp->b_proc = curproc;
 	error = bp->b_error = 0;
 
 	for(i=0;i<uio->uio_iovcnt;i++) {
 		while( uio->uio_iov[i].iov_len) {
 
 			bp->b_dev = dev;
 			bp->b_bcount = uio->uio_iov[i].iov_len;
 			bp->b_flags = B_BUSY | B_PHYS | B_CALL | bufflags;
 			bp->b_iodone = physwakeup;
 			bp->b_data = uio->uio_iov[i].iov_base;
 			bp->b_bcount = minp( bp);
 			if( minp != minphys)
 				bp->b_bcount = minphys( bp);
 			bp->b_bufsize = bp->b_bcount;
 			/*
 			 * pass in the kva from the physical buffer
 			 * for the temporary kernel mapping.
 			 */
 			bp->b_saveaddr = sa;
 			bp->b_blkno = btodb(uio->uio_offset);
 
 
 			if (uio->uio_segflg == UIO_USERSPACE) {
 				if (rw && !useracc(bp->b_data, bp->b_bufsize, B_WRITE)) {
 					error = EFAULT;
 					goto doerror;
 				}
 				if (!rw && !useracc(bp->b_data, bp->b_bufsize, B_READ)) {
 					error = EFAULT;
 					goto doerror;
 				}
 
 				/* bring buffer into kernel space */
 				vmapbuf(bp);
 			}
 
 			/* perform transfer */
 			(*strategy)(bp);
 
 			spl = splbio();
 			while ((bp->b_flags & B_DONE) == 0)
 				tsleep((caddr_t)bp, PRIBIO, "physstr", 0);
 			splx(spl);
 
 			/* release mapping into kernel space */
 			if (uio->uio_segflg == UIO_USERSPACE)
 				vunmapbuf(bp);
 
 			/*
 			 * update the uio data
 			 */
 			{
 				int iolen = bp->b_bcount - bp->b_resid;
 
 				if (iolen == 0 && !(bp->b_flags & B_ERROR))
 					goto doerror;	/* EOF */
 				uio->uio_iov[i].iov_len -= iolen;
 				uio->uio_iov[i].iov_base += iolen;
 				uio->uio_resid -= iolen;
 				uio->uio_offset += iolen;
 			}
 
 			/*
 			 * check for an error
 			 */
 			if( bp->b_flags & B_ERROR) {
 				error = bp->b_error;
 				goto doerror;
 			}
 		}
 	}
 
 
 doerror:
 	relpbuf(bpa);
 	if (!bp_alloc) {
 		bp->b_flags &= ~(B_BUSY|B_PHYS);
 		if( bp->b_flags & B_WANTED) {
 			bp->b_flags &= ~B_WANTED;
 			wakeup((caddr_t)bp);
 		}
 	}
 /*
  * allow the process to be swapped
  */
 	curproc->p_flag &= ~P_PHYSIO;
 
 	return (error);
 }
 
 u_int
 minphys(bp)
 	struct buf *bp;
 {
 	u_int maxphys = DFLTPHYS;
 	struct bdevsw *bdsw;
 	int offset;
 
 	bdsw = cdevsw[major(bp->b_dev)]->d_bdev;
 
 	if (bdsw && bdsw->d_maxio) {
 		maxphys = bdsw->d_maxio;
 	}
 	if (bp->b_kvasize < maxphys)
 		maxphys = bp->b_kvasize;
 
 	if(((vm_offset_t) bp->b_data) & PAGE_MASK) {
 		maxphys -= PAGE_SIZE;
 	}
 
 	if( bp->b_bcount > maxphys) {
 		bp->b_bcount = maxphys;
 	}
 
 	return bp->b_bcount;
 }
 
 struct buf *
 phygetvpbuf(dev_t dev, int resid)
 {
 	struct bdevsw *bdsw;
 	int maxio;
 
 	bdsw = cdevsw[major(dev)]->d_bdev;
 	if (bdsw == NULL)
 		return getpbuf();
 
 	maxio = bdsw->d_maxio;
 	if (resid > maxio)
 		resid = maxio;
 
 	return getpbuf();
 }
 
 int
 rawread(dev, uio, ioflag)
 	dev_t dev;
 	struct uio *uio;
 	int ioflag;
 {
 	return (physio(cdevsw[major(dev)]->d_strategy, (struct buf *)NULL,
 	    dev, 1, minphys, uio));
 }
 
 int
 rawwrite(dev, uio, ioflag)
 	dev_t dev;
 	struct uio *uio;
 	int ioflag;
 {
 	return (physio(cdevsw[major(dev)]->d_strategy, (struct buf *)NULL,
 	    dev, 0, minphys, uio));
 }
 
 static void
 physwakeup(bp)
 	struct buf *bp;
 {
 	wakeup((caddr_t) bp);
 	bp->b_flags &= ~B_CALL;
 }
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index b8f8c4799cef..46089c7c9869 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -1,755 +1,756 @@
 /*-
  * Copyright (c) 1982, 1986, 1990, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kern_synch.c	8.9 (Berkeley) 5/19/95
- * $Id: kern_synch.c,v 1.49 1998/03/08 09:56:59 julian Exp $
+ * $Id: kern_synch.c,v 1.50 1998/03/11 20:50:42 dufault Exp $
  */
 
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/signalvar.h>
 #include <sys/resourcevar.h>
 #include <sys/vmmeter.h>
 #include <sys/sysctl.h>
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #ifdef KTRACE
+#include <sys/uio.h>
 #include <sys/ktrace.h>
 #endif
 
 #include <machine/cpu.h>
 #include <machine/limits.h>	/* for UCHAR_MAX = typeof(p_priority)_MAX */
 
 static void rqinit __P((void *));
 SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, rqinit, NULL)
 
 u_char	curpriority;		/* usrpri of curproc */
 int	lbolt;			/* once a second sleep address */
 
 static void	endtsleep __P((void *));
 static void	roundrobin __P((void *arg));
 static void	schedcpu __P((void *arg));
 static void	updatepri __P((struct proc *p));
 
 #define MAXIMUM_SCHEDULE_QUANTUM	(1000000) /* arbitrary limit */
 #ifndef DEFAULT_SCHEDULE_QUANTUM
 #define DEFAULT_SCHEDULE_QUANTUM 10
 #endif
 static int quantum = DEFAULT_SCHEDULE_QUANTUM; /* default value */
 
 static int
 sysctl_kern_quantum SYSCTL_HANDLER_ARGS
 {
 	int error;
 	int new_val = quantum;
 
 	new_val = quantum;
 	error = sysctl_handle_int(oidp, &new_val, 0, req);
 	if (error == 0) {
 		if ((new_val > 0) && (new_val < MAXIMUM_SCHEDULE_QUANTUM)) {
 			quantum = new_val;
 		} else {
 			error = EINVAL;
 		}
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, quantum, CTLTYPE_INT|CTLFLAG_RW,
 	0, sizeof quantum, sysctl_kern_quantum, "I", "");
 
 /* maybe_resched: Decide if you need to reschedule or not
  * taking the priorities and schedulers into account.
  */
 static void maybe_resched(struct proc *chk)
 {
 	struct proc *p = curproc; /* XXX */
 
 	/* If the current scheduler is the idle scheduler or
 	 * the priority of the new one is higher then reschedule.
 	 */
 	if (p == 0 ||
 	RTP_PRIO_BASE(p->p_rtprio.type) == RTP_PRIO_IDLE ||
 	(chk->p_priority < curpriority &&
 	RTP_PRIO_BASE(p->p_rtprio.type) == RTP_PRIO_BASE(chk->p_rtprio.type)) )
 		need_resched();
 }
 
 #define ROUNDROBIN_INTERVAL (hz / quantum)
 int roundrobin_interval(void)
 {
 	return ROUNDROBIN_INTERVAL;
 }
 
 /*
  * Force switch among equal priority processes every 100ms.
  */
 /* ARGSUSED */
 static void
 roundrobin(arg)
 	void *arg;
 {
  	struct proc *p = curproc; /* XXX */
  
  	if (p == 0 || RTP_PRIO_NEED_RR(p->p_rtprio.type))
  		need_resched();
 
  	timeout(roundrobin, NULL, ROUNDROBIN_INTERVAL);
 }
 
 /*
  * Constants for digital decay and forget:
  *	90% of (p_estcpu) usage in 5 * loadav time
  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
  *          Note that, as ps(1) mentions, this can let percentages
  *          total over 100% (I've seen 137.9% for 3 processes).
  *
  * Note that statclock() updates p_estcpu and p_cpticks asynchronously.
  *
  * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
  * That is, the system wants to compute a value of decay such
  * that the following for loop:
  * 	for (i = 0; i < (5 * loadavg); i++)
  * 		p_estcpu *= decay;
  * will compute
  * 	p_estcpu *= 0.1;
  * for all values of loadavg:
  *
  * Mathematically this loop can be expressed by saying:
  * 	decay ** (5 * loadavg) ~= .1
  *
  * The system computes decay as:
  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
  *
  * We wish to prove that the system's computation of decay
  * will always fulfill the equation:
  * 	decay ** (5 * loadavg) ~= .1
  *
  * If we compute b as:
  * 	b = 2 * loadavg
  * then
  * 	decay = b / (b + 1)
  *
  * We now need to prove two things:
  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
  *
  * Facts:
  *         For x close to zero, exp(x) =~ 1 + x, since
  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
  *         For x close to zero, ln(1+x) =~ x, since
  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
  *         ln(.1) =~ -2.30
  *
  * Proof of (1):
  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
  *	solving for factor,
  *      ln(factor) =~ (-2.30/5*loadav), or
  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
  *
  * Proof of (2):
  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
  *	solving for power,
  *      power*ln(b/(b+1)) =~ -2.30, or
  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
  *
  * Actual power values for the implemented algorithm are as follows:
  *      loadav: 1       2       3       4
  *      power:  5.68    10.32   14.94   19.55
  */
 
 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
 #define	loadfactor(loadav)	(2 * (loadav))
 #define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
 
 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
 static fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;	/* exp(-1/20) */
 
 /*
  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
  *
  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
  *
  * If you don't want to bother with the faster/more-accurate formula, you
  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
  * (more general) method of calculating the %age of CPU used by a process.
  */
 #define	CCPU_SHIFT	11
 
 /*
  * Recompute process priorities, every hz ticks.
  */
 /* ARGSUSED */
 static void
 schedcpu(arg)
 	void *arg;
 {
 	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
 	register struct proc *p;
 	register int s;
 	register unsigned int newcpu;
 
 	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 		/*
 		 * Increment time in/out of memory and sleep time
 		 * (if sleeping).  We ignore overflow; with 16-bit int's
 		 * (remember them?) overflow takes 45 days.
 		 */
 		p->p_swtime++;
 		if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
 			p->p_slptime++;
 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
 		/*
 		 * If the process has slept the entire second,
 		 * stop recalculating its priority until it wakes up.
 		 */
 		if (p->p_slptime > 1)
 			continue;
 		s = splhigh();	/* prevent state changes and protect run queue */
 		/*
 		 * p_pctcpu is only for ps.
 		 */
 #if	(FSHIFT >= CCPU_SHIFT)
 		p->p_pctcpu += (hz == 100)?
 			((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
                 	100 * (((fixpt_t) p->p_cpticks)
 				<< (FSHIFT - CCPU_SHIFT)) / hz;
 #else
 		p->p_pctcpu += ((FSCALE - ccpu) *
 			(p->p_cpticks * FSCALE / hz)) >> FSHIFT;
 #endif
 		p->p_cpticks = 0;
 		newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu) + p->p_nice;
 		p->p_estcpu = min(newcpu, UCHAR_MAX);
 		resetpriority(p);
 		if (p->p_priority >= PUSER) {
 #define	PPQ	(128 / NQS)		/* priorities per queue */
 			if ((p != curproc) &&
 #ifdef SMP
 			    (u_char)p->p_oncpu == 0xff && 	/* idle */
 #endif
 			    p->p_stat == SRUN &&
 			    (p->p_flag & P_INMEM) &&
 			    (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) {
 				remrq(p);
 				p->p_priority = p->p_usrpri;
 				setrunqueue(p);
 			} else
 				p->p_priority = p->p_usrpri;
 		}
 		splx(s);
 	}
 	vmmeter();
 	wakeup((caddr_t)&lbolt);
 	timeout(schedcpu, (void *)0, hz);
 }
 
 /*
  * Recalculate the priority of a process after it has slept for a while.
  * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
  * least six times the loadfactor will decay p_estcpu to zero.
  */
 static void
 updatepri(p)
 	register struct proc *p;
 {
 	register unsigned int newcpu = p->p_estcpu;
 	register fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
 
 	if (p->p_slptime > 5 * loadfac)
 		p->p_estcpu = 0;
 	else {
 		p->p_slptime--;	/* the first time was done in schedcpu */
 		while (newcpu && --p->p_slptime)
 			newcpu = (int) decay_cpu(loadfac, newcpu);
 		p->p_estcpu = min(newcpu, UCHAR_MAX);
 	}
 	resetpriority(p);
 }
 
 /*
  * We're only looking at 7 bits of the address; everything is
  * aligned to 4, lots of things are aligned to greater powers
  * of 2.  Shift right by 8, i.e. drop the bottom 256 worth.
  */
 #define TABLESIZE	128
 static TAILQ_HEAD(slpquehead, proc) slpque[TABLESIZE];
 #define LOOKUP(x)	(((long)(x) >> 8) & (TABLESIZE - 1))
 
 /*
  * During autoconfiguration or after a panic, a sleep will simply
  * lower the priority briefly to allow interrupts, then return.
  * The priority to be used (safepri) is machine-dependent, thus this
  * value is initialized and maintained in the machine-dependent layers.
  * This priority will typically be 0, or the lowest priority
  * that is safe for use on the interrupt stack; it can be made
  * higher to block network software interrupts after panics.
  */
 int safepri;
 
 void
 sleepinit()
 {
 	int i;
 
 	for (i = 0; i < TABLESIZE; i++)
 		TAILQ_INIT(&slpque[i]);
 }
 
 /*
  * General sleep call.  Suspends the current process until a wakeup is
  * performed on the specified identifier.  The process will then be made
  * runnable with the specified priority.  Sleeps at most timo/hz seconds
  * (0 means no timeout).  If pri includes PCATCH flag, signals are checked
  * before and after sleeping, else signals are not checked.  Returns 0 if
  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
  * signal needs to be delivered, ERESTART is returned if the current system
  * call should be restarted if possible, and EINTR is returned if the system
  * call should be interrupted by the signal (return EINTR).
  */
 int
 tsleep(ident, priority, wmesg, timo)
 	void *ident;
 	int priority, timo;
 	const char *wmesg;
 {
 	struct proc *p = curproc;
 	int s, sig, catch = priority & PCATCH;
 	struct callout_handle thandle;
 
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_CSW))
 		ktrcsw(p->p_tracep, 1, 0);
 #endif
 	s = splhigh();
 	if (cold || panicstr) {
 		/*
 		 * After a panic, or during autoconfiguration,
 		 * just give interrupts a chance, then just return;
 		 * don't run any other procs or panic below,
 		 * in case this is the idle process and already asleep.
 		 */
 		splx(safepri);
 		splx(s);
 		return (0);
 	}
 #ifdef DIAGNOSTIC
 	if(p == NULL) 
 		panic("tsleep1");
 	if (ident == NULL || p->p_stat != SRUN)
 		panic("tsleep");
 	/* XXX This is not exhaustive, just the most common case */
 	if ((p->p_procq.tqe_prev != NULL) && (*p->p_procq.tqe_prev == p))
 		panic("sleeping process already on another queue");
 #endif
 	p->p_wchan = ident;
 	p->p_wmesg = wmesg;
 	p->p_slptime = 0;
 	p->p_priority = priority & PRIMASK;
 	TAILQ_INSERT_TAIL(&slpque[LOOKUP(ident)], p, p_procq);
 	if (timo)
 		thandle = timeout(endtsleep, (void *)p, timo);
 	/*
 	 * We put ourselves on the sleep queue and start our timeout
 	 * before calling CURSIG, as we could stop there, and a wakeup
 	 * or a SIGCONT (or both) could occur while we were stopped.
 	 * A SIGCONT would cause us to be marked as SSLEEP
 	 * without resuming us, thus we must be ready for sleep
 	 * when CURSIG is called.  If the wakeup happens while we're
 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
 	 */
 	if (catch) {
 		p->p_flag |= P_SINTR;
 		if ((sig = CURSIG(p))) {
 			if (p->p_wchan)
 				unsleep(p);
 			p->p_stat = SRUN;
 			goto resume;
 		}
 		if (p->p_wchan == 0) {
 			catch = 0;
 			goto resume;
 		}
 	} else
 		sig = 0;
 	p->p_stat = SSLEEP;
 	p->p_stats->p_ru.ru_nvcsw++;
 	mi_switch();
 resume:
 	curpriority = p->p_usrpri;
 	splx(s);
 	p->p_flag &= ~P_SINTR;
 	if (p->p_flag & P_TIMEOUT) {
 		p->p_flag &= ~P_TIMEOUT;
 		if (sig == 0) {
 #ifdef KTRACE
 			if (KTRPOINT(p, KTR_CSW))
 				ktrcsw(p->p_tracep, 0, 0);
 #endif
 			return (EWOULDBLOCK);
 		}
 	} else if (timo)
 		untimeout(endtsleep, (void *)p, thandle);
 	if (catch && (sig != 0 || (sig = CURSIG(p)))) {
 #ifdef KTRACE
 		if (KTRPOINT(p, KTR_CSW))
 			ktrcsw(p->p_tracep, 0, 0);
 #endif
 		if (p->p_sigacts->ps_sigintr & sigmask(sig))
 			return (EINTR);
 		return (ERESTART);
 	}
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_CSW))
 		ktrcsw(p->p_tracep, 0, 0);
 #endif
 	return (0);
 }
 
 /*
  * Implement timeout for tsleep.
  * If process hasn't been awakened (wchan non-zero),
  * set timeout flag and undo the sleep.  If proc
  * is stopped, just unsleep so it will remain stopped.
  */
 static void
 endtsleep(arg)
 	void *arg;
 {
 	register struct proc *p;
 	int s;
 
 	p = (struct proc *)arg;
 	s = splhigh();
 	if (p->p_wchan) {
 		if (p->p_stat == SSLEEP)
 			setrunnable(p);
 		else
 			unsleep(p);
 		p->p_flag |= P_TIMEOUT;
 	}
 	splx(s);
 }
 
 /*
  * Remove a process from its wait queue
  */
 void
 unsleep(p)
 	register struct proc *p;
 {
 	int s;
 
 	s = splhigh();
 	if (p->p_wchan) {
 		TAILQ_REMOVE(&slpque[LOOKUP(p->p_wchan)], p, p_procq);
 		p->p_wchan = 0;
 	}
 	splx(s);
 }
 
 /*
  * Make all processes sleeping on the specified identifier runnable.
  */
 void
 wakeup(ident)
 	register void *ident;
 {
 	register struct slpquehead *qp;
 	register struct proc *p;
 	int s;
 
 	s = splhigh();
 	qp = &slpque[LOOKUP(ident)];
 restart:
 	for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) {
 #ifdef DIAGNOSTIC
 		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
 			panic("wakeup");
 #endif
 		if (p->p_wchan == ident) {
 			TAILQ_REMOVE(qp, p, p_procq);
 			p->p_wchan = 0;
 			if (p->p_stat == SSLEEP) {
 				/* OPTIMIZED EXPANSION OF setrunnable(p); */
 				if (p->p_slptime > 1)
 					updatepri(p);
 				p->p_slptime = 0;
 				p->p_stat = SRUN;
 				if (p->p_flag & P_INMEM) {
 					setrunqueue(p);
 					maybe_resched(p);
 				} else {
 					p->p_flag |= P_SWAPINREQ;
 					wakeup((caddr_t)&proc0);
 				}
 				/* END INLINE EXPANSION */
 				goto restart;
 			}
 		}
 	}
 	splx(s);
 }
 
 /*
  * Make a process sleeping on the specified identifier runnable.
  * May wake more than one process if a target prcoess is currently
  * swapped out.
  */
 void
 wakeup_one(ident)
 	register void *ident;
 {
 	register struct slpquehead *qp;
 	register struct proc *p;
 	int s;
 
 	s = splhigh();
 	qp = &slpque[LOOKUP(ident)];
 
 	for (p = qp->tqh_first; p != NULL; p = p->p_procq.tqe_next) {
 #ifdef DIAGNOSTIC
 		if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
 			panic("wakeup_one");
 #endif
 		if (p->p_wchan == ident) {
 			TAILQ_REMOVE(qp, p, p_procq);
 			p->p_wchan = 0;
 			if (p->p_stat == SSLEEP) {
 				/* OPTIMIZED EXPANSION OF setrunnable(p); */
 				if (p->p_slptime > 1)
 					updatepri(p);
 				p->p_slptime = 0;
 				p->p_stat = SRUN;
 				if (p->p_flag & P_INMEM) {
 					setrunqueue(p);
 					maybe_resched(p);
 					break;
 				} else {
 					p->p_flag |= P_SWAPINREQ;
 					wakeup((caddr_t)&proc0);
 				}
 				/* END INLINE EXPANSION */
 			}
 		}
 	}
 	splx(s);
 }
 
 /*
  * The machine independent parts of mi_switch().
  * Must be called at splstatclock() or higher.
  */
 void
 mi_switch()
 {
 	register struct proc *p = curproc;	/* XXX */
 	register struct rlimit *rlim;
 	register long s, u;
 	int x;
 	struct timeval tv;
 
 	/*
 	 * XXX this spl is almost unnecessary.  It is partly to allow for
 	 * sloppy callers that don't do it (issignal() via CURSIG() is the
 	 * main offender).  It is partly to work around a bug in the i386
 	 * cpu_switch() (the ipl is not preserved).  We ran for years
 	 * without it.  I think there was only a interrupt latency problem.
 	 * The main caller, tsleep(), does an splx() a couple of instructions
 	 * after calling here.  The buggy caller, issignal(), usually calls
 	 * here at spl0() and sometimes returns at splhigh().  The process
 	 * then runs for a little too long at splhigh().  The ipl gets fixed
 	 * when the process returns to user mode (or earlier).
 	 *
 	 * It would probably be better to always call here at spl0(). Callers
 	 * are prepared to give up control to another process, so they must
 	 * be prepared to be interrupted.  The clock stuff here may not
 	 * actually need splstatclock().
 	 */
 	x = splstatclock();
 
 #ifdef SIMPLELOCK_DEBUG
 	if (p->p_simple_locks)
 		printf("sleep: holding simple lock\n");
 #endif
 	/*
 	 * Compute the amount of time during which the current
 	 * process was running, and add that to its total so far.
 	 */
 	microtime(&tv);
 	u = p->p_rtime.tv_usec + (tv.tv_usec - runtime.tv_usec);
 	s = p->p_rtime.tv_sec + (tv.tv_sec - runtime.tv_sec);
 	if (u < 0) {
 		u += 1000000;
 		s--;
 	} else if (u >= 1000000) {
 		u -= 1000000;
 		s++;
 	}
 #ifdef SMP
 	if (s < 0)
 		s = u = 0;
 #endif
 	p->p_rtime.tv_usec = u;
 	p->p_rtime.tv_sec = s;
 
 	/*
 	 * Check if the process exceeds its cpu resource allocation.
 	 * If over max, kill it.
 	 */
 	if (p->p_stat != SZOMB) {
 		rlim = &p->p_rlimit[RLIMIT_CPU];
 		if (s >= rlim->rlim_cur) {
 			if (s >= rlim->rlim_max)
 				killproc(p, "exceeded maximum CPU limit");
 			else {
 				psignal(p, SIGXCPU);
 				if (rlim->rlim_cur < rlim->rlim_max)
 					rlim->rlim_cur += 5;
 			}
 		}
 	}
 
 	/*
 	 * Pick a new current process and record its start time.
 	 */
 	cnt.v_swtch++;
 	cpu_switch(p);
 	microtime(&runtime);
 	splx(x);
 }
 
 /*
  * Initialize the (doubly-linked) run queues
  * to be empty.
  */
 /* ARGSUSED*/
 static void
 rqinit(dummy)
 	void *dummy;
 {
 	register int i;
 
 	for (i = 0; i < NQS; i++) {
 		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
 		rtqs[i].ph_link = rtqs[i].ph_rlink = (struct proc *)&rtqs[i];
 		idqs[i].ph_link = idqs[i].ph_rlink = (struct proc *)&idqs[i];
 	}
 }
 
 /*
  * Change process state to be runnable,
  * placing it on the run queue if it is in memory,
  * and awakening the swapper if it isn't in memory.
  */
 void
 setrunnable(p)
 	register struct proc *p;
 {
 	register int s;
 
 	s = splhigh();
 	switch (p->p_stat) {
 	case 0:
 	case SRUN:
 	case SZOMB:
 	default:
 		panic("setrunnable");
 	case SSTOP:
 	case SSLEEP:
 		unsleep(p);		/* e.g. when sending signals */
 		break;
 
 	case SIDL:
 		break;
 	}
 	p->p_stat = SRUN;
 	if (p->p_flag & P_INMEM)
 		setrunqueue(p);
 	splx(s);
 	if (p->p_slptime > 1)
 		updatepri(p);
 	p->p_slptime = 0;
 	if ((p->p_flag & P_INMEM) == 0) {
 		p->p_flag |= P_SWAPINREQ;
 		wakeup((caddr_t)&proc0);
 	}
 	else
 		maybe_resched(p);
 }
 
 /*
  * Compute the priority of a process when running in user mode.
  * Arrange to reschedule if the resulting priority is better
  * than that of the current process.
  */
 void
 resetpriority(p)
 	register struct proc *p;
 {
 	register unsigned int newpriority;
 
 	if (p->p_rtprio.type == RTP_PRIO_NORMAL) {
 		newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
 		newpriority = min(newpriority, MAXPRI);
 		p->p_usrpri = newpriority;
 	}
 	maybe_resched(p);
 }
 
 /* ARGSUSED */
 static void sched_setup __P((void *dummy));
 static void
 sched_setup(dummy)
 	void *dummy;
 {
 	/* Kick off timeout driven events by calling first time. */
 	roundrobin(NULL);
 	schedcpu(NULL);
 }
 SYSINIT(sched_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, sched_setup, NULL)
 
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 5e1bb32e54a1..f4475bb199b8 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -1,1104 +1,1105 @@
 /*-
  * Copyright (C) 1994, David Greenman
  * Copyright (c) 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the University of Utah, and William Jolitz.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)trap.c	7.4 (Berkeley) 5/13/91
- *	$Id: trap.c,v 1.122 1998/02/06 12:13:10 eivind Exp $
+ *	$Id: trap.c,v 1.123 1998/03/23 19:52:37 jlemon Exp $
  */
 
 /*
  * 386 Trap and System call handling
  */
 
 #include "opt_cpu.h"
 #include "opt_ddb.h"
 #include "opt_ktrace.h"
 #include "opt_trap.h"
 #include "opt_vm86.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/pioctl.h>
 #include <sys/kernel.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/syscall.h>
 #include <sys/sysent.h>
+#include <sys/uio.h>
 #include <sys/vmmeter.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_prot.h>
 #include <sys/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_extern.h>
 
 #include <machine/cpu.h>
 #include <machine/ipl.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
 #ifdef SMP
 #include <machine/smp.h>
 #endif
 #include <machine/tss.h>
 
 #include <i386/isa/intr_machdep.h>
 
 #ifdef POWERFAIL_NMI
 #include <sys/syslog.h>
 #include <machine/clock.h>
 #endif
 
 #ifdef VM86
 #include <machine/vm86.h>
 #endif
 
 #include "isa.h"
 #include "npx.h"
 
 extern struct i386tss common_tss;
 
 int (*pmath_emulate) __P((struct trapframe *));
 
 extern void trap __P((struct trapframe frame));
 extern int trapwrite __P((unsigned addr));
 extern void syscall __P((struct trapframe frame));
 
 static int trap_pfault __P((struct trapframe *, int));
 static void trap_fatal __P((struct trapframe *));
 void dblfault_handler __P((void));
 
 extern inthand_t IDTVEC(syscall);
 
 #define MAX_TRAP_MSG		28
 static char *trap_msg[] = {
 	"",					/*  0 unused */
 	"privileged instruction fault",		/*  1 T_PRIVINFLT */
 	"",					/*  2 unused */
 	"breakpoint instruction fault",		/*  3 T_BPTFLT */
 	"",					/*  4 unused */
 	"",					/*  5 unused */
 	"arithmetic trap",			/*  6 T_ARITHTRAP */
 	"system forced exception",		/*  7 T_ASTFLT */
 	"",					/*  8 unused */
 	"general protection fault",		/*  9 T_PROTFLT */
 	"trace trap",				/* 10 T_TRCTRAP */
 	"",					/* 11 unused */
 	"page fault",				/* 12 T_PAGEFLT */
 	"",					/* 13 unused */
 	"alignment fault",			/* 14 T_ALIGNFLT */
 	"",					/* 15 unused */
 	"",					/* 16 unused */
 	"",					/* 17 unused */
 	"integer divide fault",			/* 18 T_DIVIDE */
 	"non-maskable interrupt trap",		/* 19 T_NMI */
 	"overflow trap",			/* 20 T_OFLOW */
 	"FPU bounds check fault",		/* 21 T_BOUND */
 	"FPU device not available",		/* 22 T_DNA */
 	"double fault",				/* 23 T_DOUBLEFLT */
 	"FPU operand fetch fault",		/* 24 T_FPOPFLT */
 	"invalid TSS fault",			/* 25 T_TSSFLT */
 	"segment not present fault",		/* 26 T_SEGNPFLT */
 	"stack fault",				/* 27 T_STKFLT */
 	"machine check trap",			/* 28 T_MCHK */
 };
 
 static void userret __P((struct proc *p, struct trapframe *frame,
 			 u_quad_t oticks));
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 extern struct gate_descriptor *t_idt;
 extern int has_f00f_bug;
 #endif
 
 static inline void
 userret(p, frame, oticks)
 	struct proc *p;
 	struct trapframe *frame;
 	u_quad_t oticks;
 {
 	int sig, s;
 
 	while ((sig = CURSIG(p)) != 0)
 		postsig(sig);
 
 #if 0
 	if (!want_resched &&
 		(p->p_priority <= p->p_usrpri) &&
 		(p->p_rtprio.type == RTP_PRIO_NORMAL)) {
 		 int newpriority;
 		 p->p_estcpu += 1;
 		 newpriority = PUSER + p->p_estcpu / 4 + 2 * p->p_nice;
 		 newpriority = min(newpriority, MAXPRI);
 		 p->p_usrpri = newpriority;
 	}
 #endif
 		
 	p->p_priority = p->p_usrpri;
 	if (want_resched) {
 		/*
 		 * Since we are curproc, clock will normally just change
 		 * our priority without moving us from one queue to another
 		 * (since the running process is not on a queue.)
 		 * If that happened after we setrunqueue ourselves but before we
 		 * mi_switch()'ed, we might not be on the queue indicated by
 		 * our priority.
 		 */
 		s = splhigh();
 		setrunqueue(p);
 		p->p_stats->p_ru.ru_nivcsw++;
 		mi_switch();
 		splx(s);
 		while ((sig = CURSIG(p)) != 0)
 			postsig(sig);
 	}
 	/*
 	 * Charge system time if profiling.
 	 */
 	if (p->p_flag & P_PROFIL)
 		addupc_task(p, frame->tf_eip,
 			    (u_int)(p->p_sticks - oticks) * psratio);
 
 	curpriority = p->p_priority;
 }
 
 /*
  * Exception, fault, and trap interface to the FreeBSD kernel.
  * This common code is called from assembly language IDT gate entry
  * routines that prepare a suitable stack frame, and restore this
  * frame after the exception has been processed.
  */
 
 void
 trap(frame)
 	struct trapframe frame;
 {
 	struct proc *p = curproc;
 	u_quad_t sticks = 0;
 	int i = 0, ucode = 0, type, code;
 #ifdef DEBUG
 	u_long eva;
 #endif
 
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 restart:
 #endif
 	type = frame.tf_trapno;
 	code = frame.tf_err;
 
 #ifdef VM86
 	if (in_vm86call) {
 		if (frame.tf_eflags & PSL_VM &&
 		    (type == T_PROTFLT || type == T_STKFLT)) {
 			i = vm86_emulate((struct vm86frame *)&frame);
 			if (i != 0)
 				/*
 				 * returns to original process
 				 */
 				vm86_trap((struct vm86frame *)&frame);
 			return;
 		}
 		switch (type) {
 			/*
 			 * these traps want either a process context, or
 			 * assume a normal userspace trap.
 			 */
 		case T_PROTFLT:
 		case T_SEGNPFLT:
 			trap_fatal(&frame);
 			return;
 		case T_TRCTRAP:
 			type = T_BPTFLT;	/* kernel breakpoint */
 			/* FALL THROUGH */
 		}
 		goto kernel_trap;	/* normal kernel trap handling */
 	}
 #endif
 
         if ((ISPL(frame.tf_cs) == SEL_UPL) || (frame.tf_eflags & PSL_VM)) {
 		/* user trap */
 
 		sticks = p->p_sticks;
 		p->p_md.md_regs = &frame;
 
 		switch (type) {
 		case T_PRIVINFLT:	/* privileged instruction fault */
 			ucode = type;
 			i = SIGILL;
 			break;
 
 		case T_BPTFLT:		/* bpt instruction fault */
 		case T_TRCTRAP:		/* trace trap */
 			frame.tf_eflags &= ~PSL_T;
 			i = SIGTRAP;
 			break;
 
 		case T_ARITHTRAP:	/* arithmetic trap */
 			ucode = code;
 			i = SIGFPE;
 			break;
 
 		case T_ASTFLT:		/* Allow process switch */
 			astoff();
 			cnt.v_soft++;
 			if (p->p_flag & P_OWEUPC) {
 				p->p_flag &= ~P_OWEUPC;
 				addupc_task(p, p->p_stats->p_prof.pr_addr,
 					    p->p_stats->p_prof.pr_ticks);
 			}
 			goto out;
 
 			/*
 			 * The following two traps can happen in
 			 * vm86 mode, and, if so, we want to handle
 			 * them specially.
 			 */
 		case T_PROTFLT:		/* general protection fault */
 		case T_STKFLT:		/* stack fault */
 #ifdef VM86
 			if (frame.tf_eflags & PSL_VM) {
 				i = vm86_emulate((struct vm86frame *)&frame);
 				if (i == 0)
 					goto out;
 				break;
 			}
 #endif /* VM86 */
 			/* FALL THROUGH */
 
 		case T_SEGNPFLT:	/* segment not present fault */
 		case T_TSSFLT:		/* invalid TSS fault */
 		case T_DOUBLEFLT:	/* double fault */
 		default:
 			ucode = code + BUS_SEGM_FAULT ;
 			i = SIGBUS;
 			break;
 
 		case T_PAGEFLT:		/* page fault */
 			i = trap_pfault(&frame, TRUE);
 			if (i == -1)
 				return;
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 			if (i == -2)
 				goto restart;
 #endif
 			if (i == 0)
 				goto out;
 
 			ucode = T_PAGEFLT;
 			break;
 
 		case T_DIVIDE:		/* integer divide fault */
 			ucode = FPE_INTDIV_TRAP;
 			i = SIGFPE;
 			break;
 
 #if NISA > 0
 		case T_NMI:
 #ifdef POWERFAIL_NMI
 			goto handle_powerfail;
 #else /* !POWERFAIL_NMI */
 #ifdef DDB
 			/* NMI can be hooked up to a pushbutton for debugging */
 			printf ("NMI ... going to debugger\n");
 			if (kdb_trap (type, 0, &frame))
 				return;
 #endif /* DDB */
 			/* machine/parity/power fail/"kitchen sink" faults */
 			if (isa_nmi(code) == 0) return;
 			panic("NMI indicates hardware failure");
 #endif /* POWERFAIL_NMI */
 #endif /* NISA > 0 */
 
 		case T_OFLOW:		/* integer overflow fault */
 			ucode = FPE_INTOVF_TRAP;
 			i = SIGFPE;
 			break;
 
 		case T_BOUND:		/* bounds check fault */
 			ucode = FPE_SUBRNG_TRAP;
 			i = SIGFPE;
 			break;
 
 		case T_DNA:
 #if NNPX > 0
 			/* if a transparent fault (due to context switch "late") */
 			if (npxdna())
 				return;
 #endif
 			if (!pmath_emulate) {
 				i = SIGFPE;
 				ucode = FPE_FPU_NP_TRAP;
 				break;
 			}
 			i = (*pmath_emulate)(&frame);
 			if (i == 0) {
 				if (!(frame.tf_eflags & PSL_T))
 					return;
 				frame.tf_eflags &= ~PSL_T;
 				i = SIGTRAP;
 			}
 			/* else ucode = emulator_only_knows() XXX */
 			break;
 
 		case T_FPOPFLT:		/* FPU operand fetch fault */
 			ucode = T_FPOPFLT;
 			i = SIGILL;
 			break;
 		}
 	} else {
 #ifdef VM86
 kernel_trap:
 #endif
 		/* kernel trap */
 
 		switch (type) {
 		case T_PAGEFLT:			/* page fault */
 			(void) trap_pfault(&frame, FALSE);
 			return;
 
 		case T_DNA:
 #if NNPX > 0
 			/*
 			 * The kernel is apparently using npx for copying.
 			 * XXX this should be fatal unless the kernel has
 			 * registered such use.
 			 */
 			if (npxdna())
 				return;
 #endif
 			break;
 
 		case T_PROTFLT:		/* general protection fault */
 		case T_SEGNPFLT:	/* segment not present fault */
 			/*
 			 * Invalid segment selectors and out of bounds
 			 * %eip's and %esp's can be set up in user mode.
 			 * This causes a fault in kernel mode when the
 			 * kernel tries to return to user mode.  We want
 			 * to get this fault so that we can fix the
 			 * problem here and not have to check all the
 			 * selectors and pointers when the user changes
 			 * them.
 			 */
 #define	MAYBE_DORETI_FAULT(where, whereto)				\
 	do {								\
 		if (frame.tf_eip == (int)where) {			\
 			frame.tf_eip = (int)whereto;			\
 			return;						\
 		}							\
 	} while (0)
 
 			if (intr_nesting_level == 0) {
 				/*
 				 * Invalid %fs's and %gs's can be created using
 				 * procfs or PT_SETREGS or by invalidating the
 				 * underlying LDT entry.  This causes a fault
 				 * in kernel mode when the kernel attempts to
 				 * switch contexts.  Lose the bad context
 				 * (XXX) so that we can continue, and generate
 				 * a signal.
 				 */
 				if (frame.tf_eip == (int)cpu_switch_load_fs) {
 					curpcb->pcb_fs = 0;
 					psignal(p, SIGBUS);
 					return;
 				}
 				if (frame.tf_eip == (int)cpu_switch_load_gs) {
 					curpcb->pcb_gs = 0;
 					psignal(p, SIGBUS);
 					return;
 				}
 				MAYBE_DORETI_FAULT(doreti_iret,
 						   doreti_iret_fault);
 				MAYBE_DORETI_FAULT(doreti_popl_ds,
 						   doreti_popl_ds_fault);
 				MAYBE_DORETI_FAULT(doreti_popl_es,
 						   doreti_popl_es_fault);
 				if (curpcb && curpcb->pcb_onfault) {
 					frame.tf_eip = (int)curpcb->pcb_onfault;
 					return;
 				}
 			}
 			break;
 
 		case T_TSSFLT:
 			/*
 			 * PSL_NT can be set in user mode and isn't cleared
 			 * automatically when the kernel is entered.  This
 			 * causes a TSS fault when the kernel attempts to
 			 * `iret' because the TSS link is uninitialized.  We
 			 * want to get this fault so that we can fix the
 			 * problem here and not every time the kernel is
 			 * entered.
 			 */
 			if (frame.tf_eflags & PSL_NT) {
 				frame.tf_eflags &= ~PSL_NT;
 				return;
 			}
 			break;
 
 		case T_TRCTRAP:	 /* trace trap */
 			if (frame.tf_eip == (int)IDTVEC(syscall)) {
 				/*
 				 * We've just entered system mode via the
 				 * syscall lcall.  Continue single stepping
 				 * silently until the syscall handler has
 				 * saved the flags.
 				 */
 				return;
 			}
 			if (frame.tf_eip == (int)IDTVEC(syscall) + 1) {
 				/*
 				 * The syscall handler has now saved the
 				 * flags.  Stop single stepping it.
 				 */
 				frame.tf_eflags &= ~PSL_T;
 				return;
 			}
 			/*
 			 * Fall through.
 			 */
 		case T_BPTFLT:
 			/*
 			 * If DDB is enabled, let it handle the debugger trap.
 			 * Otherwise, debugger traps "can't happen".
 			 */
 #ifdef DDB
 			if (kdb_trap (type, 0, &frame))
 				return;
 #endif
 			break;
 
 #if NISA > 0
 		case T_NMI:
 #ifdef POWERFAIL_NMI
 #ifndef TIMER_FREQ
 #  define TIMER_FREQ 1193182
 #endif
 	handle_powerfail:
 		{
 		  static unsigned lastalert = 0;
 
 		  if(time.tv_sec - lastalert > 10)
 		    {
 		      log(LOG_WARNING, "NMI: power fail\n");
 		      sysbeep(TIMER_FREQ/880, hz);
 		      lastalert = time.tv_sec;
 		    }
 		  return;
 		}
 #else /* !POWERFAIL_NMI */
 #ifdef DDB
 			/* NMI can be hooked up to a pushbutton for debugging */
 			printf ("NMI ... going to debugger\n");
 			if (kdb_trap (type, 0, &frame))
 				return;
 #endif /* DDB */
 			/* machine/parity/power fail/"kitchen sink" faults */
 			if (isa_nmi(code) == 0) return;
 			/* FALL THROUGH */
 #endif /* POWERFAIL_NMI */
 #endif /* NISA > 0 */
 		}
 
 		trap_fatal(&frame);
 		return;
 	}
 
 	trapsignal(p, i, ucode);
 
 #ifdef DEBUG
 	eva = rcr2();
 	if (type <= MAX_TRAP_MSG) {
 		uprintf("fatal process exception: %s",
 			trap_msg[type]);
 		if ((type == T_PAGEFLT) || (type == T_PROTFLT))
 			uprintf(", fault VA = 0x%x", eva);
 		uprintf("\n");
 	}
 #endif
 
 out:
 	userret(p, &frame, sticks);
 }
 
 #ifdef notyet
 /*
  * This version doesn't allow a page fault to user space while
  * in the kernel. The rest of the kernel needs to be made "safe"
  * before this can be used. I think the only things remaining
  * to be made safe are the iBCS2 code and the process tracing/
  * debugging code.
  */
 static int
 trap_pfault(frame, usermode)
 	struct trapframe *frame;
 	int usermode;
 {
 	vm_offset_t va;
 	struct vmspace *vm = NULL;
 	vm_map_t map = 0;
 	int rv = 0;
 	vm_prot_t ftype;
 	int eva;
 	struct proc *p = curproc;
 
 	if (frame->tf_err & PGEX_W)
 		ftype = VM_PROT_READ | VM_PROT_WRITE;
 	else
 		ftype = VM_PROT_READ;
 
 	eva = rcr2();
 	va = trunc_page((vm_offset_t)eva);
 
 	if (va < VM_MIN_KERNEL_ADDRESS) {
 		vm_offset_t v;
 		vm_page_t mpte;
 
 		if (p == NULL ||
 		    (!usermode && va < VM_MAXUSER_ADDRESS &&
 		     (intr_nesting_level != 0 || curpcb == NULL ||
 		      curpcb->pcb_onfault == NULL))) {
 			trap_fatal(frame);
 			return (-1);
 		}
 
 		/*
 		 * This is a fault on non-kernel virtual memory.
 		 * vm is initialized above to NULL. If curproc is NULL
 		 * or curproc->p_vmspace is NULL the fault is fatal.
 		 */
 		vm = p->p_vmspace;
 		if (vm == NULL)
 			goto nogo;
 
 		map = &vm->vm_map;
 
 		/*
 		 * Keep swapout from messing with us during this
 		 *	critical time.
 		 */
 		++p->p_lock;
 
 		/*
 		 * Grow the stack if necessary
 		 */
 		if ((caddr_t)va > vm->vm_maxsaddr
 		    && (caddr_t)va < (caddr_t)USRSTACK) {
 			if (!grow(p, va)) {
 				rv = KERN_FAILURE;
 				--p->p_lock;
 				goto nogo;
 			}
 		}
 
 		/* Fault in the user page: */
 		rv = vm_fault(map, va, ftype,
 			(ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
 
 		--p->p_lock;
 	} else {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 */
 		if (usermode)
 			goto nogo;
 
 		/*
 		 * Since we know that kernel virtual address addresses
 		 * always have pte pages mapped, we just have to fault
 		 * the page.
 		 */
 		rv = vm_fault(kernel_map, va, ftype, FALSE);
 	}
 
 	if (rv == KERN_SUCCESS)
 		return (0);
 nogo:
 	if (!usermode) {
 		if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
 			frame->tf_eip = (int)curpcb->pcb_onfault;
 			return (0);
 		}
 		trap_fatal(frame);
 		return (-1);
 	}
 
 	/* kludge to pass faulting virtual address to sendsig */
 	frame->tf_err = eva;
 
 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }
 #endif
 
 int
 trap_pfault(frame, usermode)
 	struct trapframe *frame;
 	int usermode;
 {
 	vm_offset_t va;
 	struct vmspace *vm = NULL;
 	vm_map_t map = 0;
 	int rv = 0;
 	vm_prot_t ftype;
 	int eva;
 	struct proc *p = curproc;
 
 	eva = rcr2();
 	va = trunc_page((vm_offset_t)eva);
 
 	if (va >= KERNBASE) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 * An exception:  if the faulting address is the invalid
 		 * instruction entry in the IDT, then the Intel Pentium
 		 * F00F bug workaround was triggered, and we need to
 		 * treat it is as an illegal instruction, and not a page
 		 * fault.
 		 */
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 		if ((eva == (unsigned int)&t_idt[6]) && has_f00f_bug) {
 			frame->tf_trapno = T_PRIVINFLT;
 			return -2;
 		}
 #endif
 		if (usermode)
 			goto nogo;
 
 		map = kernel_map;
 	} else {
 		/*
 		 * This is a fault on non-kernel virtual memory.
 		 * vm is initialized above to NULL. If curproc is NULL
 		 * or curproc->p_vmspace is NULL the fault is fatal.
 		 */
 		if (p != NULL)
 			vm = p->p_vmspace;
 
 		if (vm == NULL)
 			goto nogo;
 
 		map = &vm->vm_map;
 	}
 
 	if (frame->tf_err & PGEX_W)
 		ftype = VM_PROT_READ | VM_PROT_WRITE;
 	else
 		ftype = VM_PROT_READ;
 
 	if (map != kernel_map) {
 		/*
 		 * Keep swapout from messing with us during this
 		 *	critical time.
 		 */
 		++p->p_lock;
 
 		/*
 		 * Grow the stack if necessary
 		 */
 		if ((caddr_t)va > vm->vm_maxsaddr
 		    && (caddr_t)va < (caddr_t)USRSTACK) {
 			if (!grow(p, va)) {
 				rv = KERN_FAILURE;
 				--p->p_lock;
 				goto nogo;
 			}
 		}
 
 		/* Fault in the user page: */
 		rv = vm_fault(map, va, ftype,
 			(ftype & VM_PROT_WRITE) ? VM_FAULT_DIRTY : 0);
 
 		--p->p_lock;
 	} else {
 		/*
 		 * Don't have to worry about process locking or stacks in the kernel.
 		 */
 		rv = vm_fault(map, va, ftype, FALSE);
 	}
 
 	if (rv == KERN_SUCCESS)
 		return (0);
 nogo:
 	if (!usermode) {
 		if (intr_nesting_level == 0 && curpcb && curpcb->pcb_onfault) {
 			frame->tf_eip = (int)curpcb->pcb_onfault;
 			return (0);
 		}
 		trap_fatal(frame);
 		return (-1);
 	}
 
 	/* kludge to pass faulting virtual address to sendsig */
 	frame->tf_err = eva;
 
 	return((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
 }
 
 static void
 trap_fatal(frame)
 	struct trapframe *frame;
 {
 	int code, type, eva, ss, esp;
 	struct soft_segment_descriptor softseg;
 
 	code = frame->tf_err;
 	type = frame->tf_trapno;
 	eva = rcr2();
 	sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
 
 	if (type <= MAX_TRAP_MSG)
 		printf("\n\nFatal trap %d: %s while in %s mode\n",
 			type, trap_msg[type],
         		frame->tf_eflags & PSL_VM ? "vm86" :
 			ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
 #ifdef SMP
 	/* three seperate prints in case of a trap on an unmapped page */
 	printf("mp_lock = %08x; ", mp_lock);
 	printf("cpuid = %d; ", cpuid);
 	printf("lapic.id = %08x\n", lapic.id);
 #endif
 	if (type == T_PAGEFLT) {
 		printf("fault virtual address	= 0x%x\n", eva);
 		printf("fault code		= %s %s, %s\n",
 			code & PGEX_U ? "user" : "supervisor",
 			code & PGEX_W ? "write" : "read",
 			code & PGEX_P ? "protection violation" : "page not present");
 	}
 	printf("instruction pointer	= 0x%x:0x%x\n",
 	       frame->tf_cs & 0xffff, frame->tf_eip);
         if ((ISPL(frame->tf_cs) == SEL_UPL) || (frame->tf_eflags & PSL_VM)) {
 		ss = frame->tf_ss & 0xffff;
 		esp = frame->tf_esp;
 	} else {
 		ss = GSEL(GDATA_SEL, SEL_KPL);
 		esp = (int)&frame->tf_esp;
 	}
 	printf("stack pointer	        = 0x%x:0x%x\n", ss, esp);
 	printf("frame pointer	        = 0x%x:0x%x\n", ss, frame->tf_ebp);
 	printf("code segment		= base 0x%x, limit 0x%x, type 0x%x\n",
 	       softseg.ssd_base, softseg.ssd_limit, softseg.ssd_type);
 	printf("			= DPL %d, pres %d, def32 %d, gran %d\n",
 	       softseg.ssd_dpl, softseg.ssd_p, softseg.ssd_def32,
 	       softseg.ssd_gran);
 	printf("processor eflags	= ");
 	if (frame->tf_eflags & PSL_T)
 		printf("trace trap, ");
 	if (frame->tf_eflags & PSL_I)
 		printf("interrupt enabled, ");
 	if (frame->tf_eflags & PSL_NT)
 		printf("nested task, ");
 	if (frame->tf_eflags & PSL_RF)
 		printf("resume, ");
 	if (frame->tf_eflags & PSL_VM)
 		printf("vm86, ");
 	printf("IOPL = %d\n", (frame->tf_eflags & PSL_IOPL) >> 12);
 	printf("current process		= ");
 	if (curproc) {
 		printf("%lu (%s)\n",
 		    (u_long)curproc->p_pid, curproc->p_comm ?
 		    curproc->p_comm : "");
 	} else {
 		printf("Idle\n");
 	}
 	printf("interrupt mask		= ");
 	if ((cpl & net_imask) == net_imask)
 		printf("net ");
 	if ((cpl & tty_imask) == tty_imask)
 		printf("tty ");
 	if ((cpl & bio_imask) == bio_imask)
 		printf("bio ");
 	if ((cpl & cam_imask) == cam_imask)
 		printf("cam ");
 	if (cpl == 0)
 		printf("none");
 #ifdef SMP
 /**
  *  XXX FIXME:
  *	we probably SHOULD have stopped the other CPUs before now!
  *	another CPU COULD have been touching cpl at this moment...
  */
 	printf(" <- SMP: XXX");
 #endif
 	printf("\n");
 
 #ifdef KDB
 	if (kdb_trap(&psl))
 		return;
 #endif
 #ifdef DDB
 	if (kdb_trap (type, 0, frame))
 		return;
 #endif
 	printf("trap number		= %d\n", type);
 	if (type <= MAX_TRAP_MSG)
 		panic(trap_msg[type]);
 	else
 		panic("unknown/reserved trap");
 }
 
 /*
  * Double fault handler. Called when a fault occurs while writing
  * a frame for a trap/exception onto the stack. This usually occurs
  * when the stack overflows (such is the case with infinite recursion,
  * for example).
  *
  * XXX Note that the current PTD gets replaced by IdlePTD when the
  * task switch occurs. This means that the stack that was active at
  * the time of the double fault is not available at <kstack> unless
  * the machine was idle when the double fault occurred. The downside
  * of this is that "trace <ebp>" in ddb won't work.
  */
 void
 dblfault_handler()
 {
 	printf("\nFatal double fault:\n");
 	printf("eip = 0x%x\n", common_tss.tss_eip);
 	printf("esp = 0x%x\n", common_tss.tss_esp);
 	printf("ebp = 0x%x\n", common_tss.tss_ebp);
 #ifdef SMP
 	/* three seperate prints in case of a trap on an unmapped page */
 	printf("mp_lock = %08x; ", mp_lock);
 	printf("cpuid = %d; ", cpuid);
 	printf("lapic.id = %08x\n", lapic.id);
 #endif
 	panic("double fault");
 }
 
 /*
  * Compensate for 386 brain damage (missing URKR).
  * This is a little simpler than the pagefault handler in trap() because
  * it the page tables have already been faulted in and high addresses
  * are thrown out early for other reasons.
  */
 int trapwrite(addr)
 	unsigned addr;
 {
 	struct proc *p;
 	vm_offset_t va;
 	struct vmspace *vm;
 	int rv;
 
 	va = trunc_page((vm_offset_t)addr);
 	/*
 	 * XXX - MAX is END.  Changed > to >= for temp. fix.
 	 */
 	if (va >= VM_MAXUSER_ADDRESS)
 		return (1);
 
 	p = curproc;
 	vm = p->p_vmspace;
 
 	++p->p_lock;
 
 	if ((caddr_t)va >= vm->vm_maxsaddr
 	    && (caddr_t)va < (caddr_t)USRSTACK) {
 		if (!grow(p, va)) {
 			--p->p_lock;
 			return (1);
 		}
 	}
 
 	/*
 	 * fault the data page
 	 */
 	rv = vm_fault(&vm->vm_map, va, VM_PROT_READ|VM_PROT_WRITE, VM_FAULT_DIRTY);
 
 	--p->p_lock;
 
 	if (rv != KERN_SUCCESS)
 		return 1;
 
 	return (0);
 }
 
 /*
  * System call request from POSIX system call gate interface to kernel.
  * Like trap(), argument is call by reference.
  */
 void
 syscall(frame)
 	struct trapframe frame;
 {
 	caddr_t params;
 	int i;
 	struct sysent *callp;
 	struct proc *p = curproc;
 	u_quad_t sticks;
 	int error;
 	int args[8];
 	u_int code;
 
 #ifdef DIAGNOSTIC
 	if (ISPL(frame.tf_cs) != SEL_UPL)
 		panic("syscall");
 #endif
 	sticks = p->p_sticks;
 	p->p_md.md_regs = &frame;
 	params = (caddr_t)frame.tf_esp + sizeof(int);
 	code = frame.tf_eax;
 	if (p->p_sysent->sv_prepsyscall) {
 		(*p->p_sysent->sv_prepsyscall)(&frame, args, &code, &params);
 	} else {
 		/*
 		 * Need to check if this is a 32 bit or 64 bit syscall.
 		 */
 		if (code == SYS_syscall) {
 			/*
 			 * Code is first argument, followed by actual args.
 			 */
 			code = fuword(params);
 			params += sizeof(int);
 		} else if (code == SYS___syscall) {
 			/*
 			 * Like syscall, but code is a quad, so as to maintain
 			 * quad alignment for the rest of the arguments.
 			 */
 			code = fuword(params);
 			params += sizeof(quad_t);
 		}
 	}
 
  	if (p->p_sysent->sv_mask)
  		code &= p->p_sysent->sv_mask;
 
  	if (code >= p->p_sysent->sv_size)
  		callp = &p->p_sysent->sv_table[0];
   	else
  		callp = &p->p_sysent->sv_table[code];
 
 	if (params && (i = callp->sy_narg * sizeof(int)) &&
 	    (error = copyin(params, (caddr_t)args, (u_int)i))) {
 #ifdef KTRACE
 		if (KTRPOINT(p, KTR_SYSCALL))
 			ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
 #endif
 		goto bad;
 	}
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_SYSCALL))
 		ktrsyscall(p->p_tracep, code, callp->sy_narg, args);
 #endif
 	p->p_retval[0] = 0;
 	p->p_retval[1] = frame.tf_edx;
 
 	STOPEVENT(p, S_SCE, callp->sy_narg);
 
 	error = (*callp->sy_call)(p, args);
 
 	switch (error) {
 
 	case 0:
 		/*
 		 * Reinitialize proc pointer `p' as it may be different
 		 * if this is a child returning from fork syscall.
 		 */
 		p = curproc;
 		frame.tf_eax = p->p_retval[0];
 		frame.tf_edx = p->p_retval[1];
 		frame.tf_eflags &= ~PSL_C;
 		break;
 
 	case ERESTART:
 		/*
 		 * Reconstruct pc, assuming lcall $X,y is 7 bytes,
 		 * int 0x80 is 2 bytes. We saved this in tf_err.
 		 */
 		frame.tf_eip -= frame.tf_err;
 		break;
 
 	case EJUSTRETURN:
 		break;
 
 	default:
 bad:
  		if (p->p_sysent->sv_errsize)
  			if (error >= p->p_sysent->sv_errsize)
   				error = -1;	/* XXX */
    			else
   				error = p->p_sysent->sv_errtbl[error];
 		frame.tf_eax = error;
 		frame.tf_eflags |= PSL_C;
 		break;
 	}
 
 	if ((frame.tf_eflags & PSL_T) && !(frame.tf_eflags & PSL_VM)) {
 		/* Traced syscall. */
 		frame.tf_eflags &= ~PSL_T;
 		trapsignal(p, SIGTRAP, 0);
 	}
 
 	userret(p, &frame, sticks);
 
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_SYSRET))
 		ktrsysret(p->p_tracep, code, error, p->p_retval[0]);
 #endif
 
 	/*
 	 * This works because errno is findable through the
 	 * register set.  If we ever support an emulation where this
 	 * is not the case, this code will need to be revisited.
 	 */
 	STOPEVENT(p, S_SCX, code);
 
 }
 
 /*
  * Simplified back end of syscall(), used when returning from fork()
  * directly into user mode.
  */
 void
 fork_return(p, frame)
 	struct proc *p;
 	struct trapframe frame;
 {
 	frame.tf_eax = 0;		/* Child returns zero */
 	frame.tf_eflags &= ~PSL_C;	/* success */
 	frame.tf_edx = 1;
 
 	userret(p, &frame, 0);
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_SYSRET))
 		ktrsysret(p->p_tracep, SYS_fork, 0, 0);
 #endif
 }
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index c237952ae81c..c6b9320a0839 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -1,1099 +1,1100 @@
 /*
  * Copyright (c) 1996 John S. Dyson
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice immediately at the beginning of the file, without modification,
  *    this list of conditions, and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. Absolutely no warranty of function or purpose is made by the author
  *    John S. Dyson.
  * 4. Modifications may be freely made to this file if the above conditions
  *    are met.
  *
- * $Id: sys_pipe.c,v 1.39 1998/02/09 06:09:25 eivind Exp $
+ * $Id: sys_pipe.c,v 1.40 1998/03/26 20:51:47 phk Exp $
  */
 
 /*
  * This file contains a high-performance replacement for the socket-based
  * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
  * all features of sockets, but does do everything that pipes normally
  * do.
  */
 
 /*
  * This code has two modes of operation, a small write mode and a large
  * write mode.  The small write mode acts like conventional pipes with
  * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
  * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
  * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
  * the receiving process can copy it directly from the pages in the sending
  * process.
  *
  * If the sending process receives a signal, it is possible that it will
  * go away, and certainly its address space can change, because control
  * is returned back to the user-mode side.  In that case, the pipe code
  * arranges to copy the buffer supplied by the user process, to a pageable
  * kernel buffer, and the receiving process will grab the data from the
  * pageable kernel buffer.  Since signals don't happen all that often,
  * the copy operation is normally eliminated.
  *
  * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
  * happen for small transfers so that the system will not spend all of
  * its time context switching.  PIPE_SIZE is constrained by the
  * amount of kernel virtual memory.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/ttycom.h>
 #include <sys/stat.h>
 #include <sys/poll.h>
 #include <sys/signalvar.h>
 #include <sys/sysproto.h>
 #include <sys/pipe.h>
+#include <sys/uio.h>
 
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/vm_object.h>
 #include <vm/vm_kern.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_page.h>
 #include <vm/vm_zone.h>
 
 /*
  * Use this define if you want to disable *fancy* VM things.  Expect an
  * approx 30% decrease in transfer rate.  This could be useful for
  * NetBSD or OpenBSD.
  */
 /* #define PIPE_NODIRECT */
 
 /*
  * interfaces to the outside world
  */
 static int pipe_read __P((struct file *fp, struct uio *uio, 
 		struct ucred *cred));
 static int pipe_write __P((struct file *fp, struct uio *uio, 
 		struct ucred *cred));
 static int pipe_close __P((struct file *fp, struct proc *p));
 static int pipe_poll __P((struct file *fp, int events, struct ucred *cred,
 		struct proc *p));
 static int pipe_ioctl __P((struct file *fp, int cmd, caddr_t data, struct proc *p));
 
 static struct fileops pipeops =
     { pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_close };
 
 /*
  * Default pipe buffer size(s), this can be kind-of large now because pipe
  * space is pageable.  The pipe code will try to maintain locality of
  * reference for performance reasons, so small amounts of outstanding I/O
  * will not wipe the cache.
  */
 #define MINPIPESIZE (PIPE_SIZE/3)
 #define MAXPIPESIZE (2*PIPE_SIZE/3)
 
 /*
  * Maximum amount of kva for pipes -- this is kind-of a soft limit, but
  * is there so that on large systems, we don't exhaust it.
  */
 #define MAXPIPEKVA (8*1024*1024)
 
 /*
  * Limit for direct transfers, we cannot, of course limit
  * the amount of kva for pipes in general though.
  */
 #define LIMITPIPEKVA (16*1024*1024)
 
 /*
  * Limit the number of "big" pipes
  */
 #define LIMITBIGPIPES	32
 static int nbigpipe;
 
 static int amountpipekva;
 
 static void pipeclose __P((struct pipe *cpipe));
 static void pipeinit __P((struct pipe *cpipe));
 static __inline int pipelock __P((struct pipe *cpipe, int catch));
 static __inline void pipeunlock __P((struct pipe *cpipe));
 static __inline void pipeselwakeup __P((struct pipe *cpipe));
 #ifndef PIPE_NODIRECT
 static int pipe_build_write_buffer __P((struct pipe *wpipe, struct uio *uio));
 static void pipe_destroy_write_buffer __P((struct pipe *wpipe));
 static int pipe_direct_write __P((struct pipe *wpipe, struct uio *uio));
 static void pipe_clone_write_buffer __P((struct pipe *wpipe));
 #endif
 static void pipespace __P((struct pipe *cpipe));
 
 static vm_zone_t pipe_zone;
 
 /*
  * The pipe system call for the DTYPE_PIPE type of pipes
  */
 
 /* ARGSUSED */
 int
 pipe(p, uap)
 	struct proc *p;
 	struct pipe_args /* {
 		int	dummy;
 	} */ *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	struct file *rf, *wf;
 	struct pipe *rpipe, *wpipe;
 	int fd, error;
 
 	if (pipe_zone == NULL)
 		pipe_zone = zinit("PIPE", sizeof (struct pipe), 0, 0, 4);
 
 	rpipe = zalloc( pipe_zone);
 	pipeinit(rpipe);
 	rpipe->pipe_state |= PIPE_DIRECTOK;
 	wpipe = zalloc( pipe_zone);
 	pipeinit(wpipe);
 	wpipe->pipe_state |= PIPE_DIRECTOK;
 
 	error = falloc(p, &rf, &fd);
 	if (error)
 		goto free2;
 	p->p_retval[0] = fd;
 	rf->f_flag = FREAD | FWRITE;
 	rf->f_type = DTYPE_PIPE;
 	rf->f_ops = &pipeops;
 	rf->f_data = (caddr_t)rpipe;
 	error = falloc(p, &wf, &fd);
 	if (error)
 		goto free3;
 	wf->f_flag = FREAD | FWRITE;
 	wf->f_type = DTYPE_PIPE;
 	wf->f_ops = &pipeops;
 	wf->f_data = (caddr_t)wpipe;
 	p->p_retval[1] = fd;
 
 	rpipe->pipe_peer = wpipe;
 	wpipe->pipe_peer = rpipe;
 
 	return (0);
 free3:
 	ffree(rf);
 	fdp->fd_ofiles[p->p_retval[0]] = 0;
 free2:
 	(void)pipeclose(wpipe);
 	(void)pipeclose(rpipe);
 	return (error);
 }
 
 /*
  * Allocate kva for pipe circular buffer, the space is pageable
  */
 static void
 pipespace(cpipe)
 	struct pipe *cpipe;
 {
 	int npages, error;
 
 	npages = round_page(cpipe->pipe_buffer.size)/PAGE_SIZE;
 	/*
 	 * Create an object, I don't like the idea of paging to/from
 	 * kernel_object.
 	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
 	 */
 	cpipe->pipe_buffer.object = vm_object_allocate(OBJT_DEFAULT, npages);
 	cpipe->pipe_buffer.buffer = (caddr_t) vm_map_min(kernel_map);
 
 	/*
 	 * Insert the object into the kernel map, and allocate kva for it.
 	 * The map entry is, by default, pageable.
 	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
 	 */
 	error = vm_map_find(kernel_map, cpipe->pipe_buffer.object, 0,
 		(vm_offset_t *) &cpipe->pipe_buffer.buffer, 
 		cpipe->pipe_buffer.size, 1,
 		VM_PROT_ALL, VM_PROT_ALL, 0);
 
 	if (error != KERN_SUCCESS)
 		panic("pipeinit: cannot allocate pipe -- out of kvm -- code = %d", error);
 	amountpipekva += cpipe->pipe_buffer.size;
 }
 
 /*
  * initialize and allocate VM and memory for pipe
  */
 static void
 pipeinit(cpipe)
 	struct pipe *cpipe;
 {
 
 	cpipe->pipe_buffer.in = 0;
 	cpipe->pipe_buffer.out = 0;
 	cpipe->pipe_buffer.cnt = 0;
 	cpipe->pipe_buffer.size = PIPE_SIZE;
 
 	/* Buffer kva gets dynamically allocated */
 	cpipe->pipe_buffer.buffer = NULL;
 	/* cpipe->pipe_buffer.object = invalid */
 
 	cpipe->pipe_state = 0;
 	cpipe->pipe_peer = NULL;
 	cpipe->pipe_busy = 0;
 	getnanotime(&cpipe->pipe_ctime);
 	cpipe->pipe_atime = cpipe->pipe_ctime;
 	cpipe->pipe_mtime = cpipe->pipe_ctime;
 	bzero(&cpipe->pipe_sel, sizeof cpipe->pipe_sel);
 	cpipe->pipe_pgid = NO_PID;
 
 #ifndef PIPE_NODIRECT
 	/*
 	 * pipe data structure initializations to support direct pipe I/O
 	 */
 	cpipe->pipe_map.cnt = 0;
 	cpipe->pipe_map.kva = 0;
 	cpipe->pipe_map.pos = 0;
 	cpipe->pipe_map.npages = 0;
 	/* cpipe->pipe_map.ms[] = invalid */
 #endif
 }
 
 
 /*
  * lock a pipe for I/O, blocking other access
  */
 static __inline int
 pipelock(cpipe, catch)
 	struct pipe *cpipe;
 	int catch;
 {
 	int error;
 	while (cpipe->pipe_state & PIPE_LOCK) {
 		cpipe->pipe_state |= PIPE_LWANT;
 		if (error = tsleep( cpipe,
 			catch?(PRIBIO|PCATCH):PRIBIO, "pipelk", 0)) {
 			return error;
 		}
 	}
 	cpipe->pipe_state |= PIPE_LOCK;
 	return 0;
 }
 
 /*
  * unlock a pipe I/O lock
  */
 static __inline void
 pipeunlock(cpipe)
 	struct pipe *cpipe;
 {
 	cpipe->pipe_state &= ~PIPE_LOCK;
 	if (cpipe->pipe_state & PIPE_LWANT) {
 		cpipe->pipe_state &= ~PIPE_LWANT;
 		wakeup(cpipe);
 	}
 }
 
 static __inline void
 pipeselwakeup(cpipe)
 	struct pipe *cpipe;
 {
 	struct proc *p;
 
 	if (cpipe->pipe_state & PIPE_SEL) {
 		cpipe->pipe_state &= ~PIPE_SEL;
 		selwakeup(&cpipe->pipe_sel);
 	}
 	if (cpipe->pipe_state & PIPE_ASYNC) {
 		if (cpipe->pipe_pgid < 0)
 			gsignal(-cpipe->pipe_pgid, SIGIO);
 		else if ((p = pfind(cpipe->pipe_pgid)) != NULL)
 			psignal(p, SIGIO);
 	}
 }
 
 /* ARGSUSED */
 static int
 pipe_read(fp, uio, cred)
 	struct file *fp;
 	struct uio *uio;
 	struct ucred *cred;
 {
 
 	struct pipe *rpipe = (struct pipe *) fp->f_data;
 	int error = 0;
 	int nread = 0;
 	u_int size;
 
 	++rpipe->pipe_busy;
 	while (uio->uio_resid) {
 		/*
 		 * normal pipe buffer receive
 		 */
 		if (rpipe->pipe_buffer.cnt > 0) {
 			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
 			if (size > rpipe->pipe_buffer.cnt)
 				size = rpipe->pipe_buffer.cnt;
 			if (size > (u_int) uio->uio_resid)
 				size = (u_int) uio->uio_resid;
 			if ((error = pipelock(rpipe,1)) == 0) {
 				error = uiomove( &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 
 					size, uio);
 				pipeunlock(rpipe);
 			}
 			if (error) {
 				break;
 			}
 			rpipe->pipe_buffer.out += size;
 			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
 				rpipe->pipe_buffer.out = 0;
 
 			rpipe->pipe_buffer.cnt -= size;
 			nread += size;
 #ifndef PIPE_NODIRECT
 		/*
 		 * Direct copy, bypassing a kernel buffer.
 		 */
 		} else if ((size = rpipe->pipe_map.cnt) &&
 			(rpipe->pipe_state & PIPE_DIRECTW)) {
 			caddr_t va;
 			if (size > (u_int) uio->uio_resid)
 				size = (u_int) uio->uio_resid;
 			if ((error = pipelock(rpipe,1)) == 0) {
 				va = (caddr_t) rpipe->pipe_map.kva + rpipe->pipe_map.pos;
 				error = uiomove(va, size, uio);
 				pipeunlock(rpipe);
 			}
 			if (error)
 				break;
 			nread += size;
 			rpipe->pipe_map.pos += size;
 			rpipe->pipe_map.cnt -= size;
 			if (rpipe->pipe_map.cnt == 0) {
 				rpipe->pipe_state &= ~PIPE_DIRECTW;
 				wakeup(rpipe);
 			}
 #endif
 		} else {
 			/*
 			 * detect EOF condition
 			 */
 			if (rpipe->pipe_state & PIPE_EOF) {
 				/* XXX error = ? */
 				break;
 			}
 			/*
 			 * If the "write-side" has been blocked, wake it up now.
 			 */
 			if (rpipe->pipe_state & PIPE_WANTW) {
 				rpipe->pipe_state &= ~PIPE_WANTW;
 				wakeup(rpipe);
 			}
 			if (nread > 0)
 				break;
 
 			if (fp->f_flag & FNONBLOCK) {
 				error = EAGAIN;
 				break;
 			}
 
 			/*
 			 * If there is no more to read in the pipe, reset
 			 * its pointers to the beginning.  This improves
 			 * cache hit stats.
 			 */
 		
 			if ((error = pipelock(rpipe,1)) == 0) {
 				if (rpipe->pipe_buffer.cnt == 0) {
 					rpipe->pipe_buffer.in = 0;
 					rpipe->pipe_buffer.out = 0;
 				}
 				pipeunlock(rpipe);
 			} else {
 				break;
 			}
 
 			if (rpipe->pipe_state & PIPE_WANTW) {
 				rpipe->pipe_state &= ~PIPE_WANTW;
 				wakeup(rpipe);
 			}
 
 			rpipe->pipe_state |= PIPE_WANTR;
 			if (error = tsleep(rpipe, PRIBIO|PCATCH, "piperd", 0)) {
 				break;
 			}
 		}
 	}
 
 	if (error == 0)
 		getnanotime(&rpipe->pipe_atime);
 
 	--rpipe->pipe_busy;
 	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
 		rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
 		wakeup(rpipe);
 	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
 		/*
 		 * If there is no more to read in the pipe, reset
 		 * its pointers to the beginning.  This improves
 		 * cache hit stats.
 		 */
 		if (rpipe->pipe_buffer.cnt == 0) {
 			if ((error == 0) && (error = pipelock(rpipe,1)) == 0) {
 				rpipe->pipe_buffer.in = 0;
 				rpipe->pipe_buffer.out = 0;
 				pipeunlock(rpipe);
 			}
 		}
 
 		/*
 		 * If the "write-side" has been blocked, wake it up now.
 		 */
 		if (rpipe->pipe_state & PIPE_WANTW) {
 			rpipe->pipe_state &= ~PIPE_WANTW;
 			wakeup(rpipe);
 		}
 	}
 
 	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
 		pipeselwakeup(rpipe);
 
 	return error;
 }
 
 #ifndef PIPE_NODIRECT
 /*
  * Map the sending processes' buffer into kernel space and wire it.
  * This is similar to a physical write operation.
  */
 static int
 pipe_build_write_buffer(wpipe, uio)
 	struct pipe *wpipe;
 	struct uio *uio;
 {
 	u_int size;
 	int i;
 	vm_offset_t addr, endaddr, paddr;
 
 	size = (u_int) uio->uio_iov->iov_len;
 	if (size > wpipe->pipe_buffer.size)
 		size = wpipe->pipe_buffer.size;
 
 	endaddr = round_page(uio->uio_iov->iov_base + size);
 	for(i = 0, addr = trunc_page(uio->uio_iov->iov_base);
 		addr < endaddr;
 		addr += PAGE_SIZE, i+=1) {
 
 		vm_page_t m;
 
 		vm_fault_quick( (caddr_t) addr, VM_PROT_READ);
 		paddr = pmap_kextract(addr);
 		if (!paddr) {
 			int j;
 			for(j=0;j<i;j++)
 				vm_page_unwire(wpipe->pipe_map.ms[j]);
 			return EFAULT;
 		}
 
 		m = PHYS_TO_VM_PAGE(paddr);
 		vm_page_wire(m);
 		wpipe->pipe_map.ms[i] = m;
 	}
 
 /*
  * set up the control block
  */
 	wpipe->pipe_map.npages = i;
 	wpipe->pipe_map.pos = ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
 	wpipe->pipe_map.cnt = size;
 
 /*
  * and map the buffer
  */
 	if (wpipe->pipe_map.kva == 0) {
 		/*
 		 * We need to allocate space for an extra page because the
 		 * address range might (will) span pages at times.
 		 */
 		wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map,
 			wpipe->pipe_buffer.size + PAGE_SIZE);
 		amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE;
 	}
 	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
 		wpipe->pipe_map.npages);
 
 /*
  * and update the uio data
  */
 
 	uio->uio_iov->iov_len -= size;
 	uio->uio_iov->iov_base += size;
 	if (uio->uio_iov->iov_len == 0)
 		uio->uio_iov++;
 	uio->uio_resid -= size;
 	uio->uio_offset += size;
 	return 0;
 }
 
 /*
  * unmap and unwire the process buffer
  */
 static void
 pipe_destroy_write_buffer(wpipe)
 struct pipe *wpipe;
 {
 	int i;
 	if (wpipe->pipe_map.kva) {
 		pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
 
 		if (amountpipekva > MAXPIPEKVA) {
 			vm_offset_t kva = wpipe->pipe_map.kva;
 			wpipe->pipe_map.kva = 0;
 			kmem_free(kernel_map, kva,
 				wpipe->pipe_buffer.size + PAGE_SIZE);
 			amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
 		}
 	}
 	for (i=0;i<wpipe->pipe_map.npages;i++)
 		vm_page_unwire(wpipe->pipe_map.ms[i]);
 }
 
 /*
  * In the case of a signal, the writing process might go away.  This
  * code copies the data into the circular buffer so that the source
  * pages can be freed without loss of data.
  */
 static void
 pipe_clone_write_buffer(wpipe)
 struct pipe *wpipe;
 {
 	int size;
 	int pos;
 
 	size = wpipe->pipe_map.cnt;
 	pos = wpipe->pipe_map.pos;
 	bcopy((caddr_t) wpipe->pipe_map.kva+pos,
 			(caddr_t) wpipe->pipe_buffer.buffer,
 			size);
 
 	wpipe->pipe_buffer.in = size;
 	wpipe->pipe_buffer.out = 0;
 	wpipe->pipe_buffer.cnt = size;
 	wpipe->pipe_state &= ~PIPE_DIRECTW;
 
 	pipe_destroy_write_buffer(wpipe);
 }
 
 /*
  * This implements the pipe buffer write mechanism.  Note that only
  * a direct write OR a normal pipe write can be pending at any given time.
  * If there are any characters in the pipe buffer, the direct write will
  * be deferred until the receiving process grabs all of the bytes from
  * the pipe buffer.  Then the direct mapping write is set-up.
  */
 static int
 pipe_direct_write(wpipe, uio)
 	struct pipe *wpipe;
 	struct uio *uio;
 {
 	int error;
 retry:
 	while (wpipe->pipe_state & PIPE_DIRECTW) {
 		if ( wpipe->pipe_state & PIPE_WANTR) {
 			wpipe->pipe_state &= ~PIPE_WANTR;
 			wakeup(wpipe);
 		}
 		wpipe->pipe_state |= PIPE_WANTW;
 		error = tsleep(wpipe,
 				PRIBIO|PCATCH, "pipdww", 0);
 		if (error)
 			goto error1;
 		if (wpipe->pipe_state & PIPE_EOF) {
 			error = EPIPE;
 			goto error1;
 		}
 	}
 	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
 	if (wpipe->pipe_buffer.cnt > 0) {
 		if ( wpipe->pipe_state & PIPE_WANTR) {
 			wpipe->pipe_state &= ~PIPE_WANTR;
 			wakeup(wpipe);
 		}
 			
 		wpipe->pipe_state |= PIPE_WANTW;
 		error = tsleep(wpipe,
 				PRIBIO|PCATCH, "pipdwc", 0);
 		if (error)
 			goto error1;
 		if (wpipe->pipe_state & PIPE_EOF) {
 			error = EPIPE;
 			goto error1;
 		}
 		goto retry;
 	}
 
 	wpipe->pipe_state |= PIPE_DIRECTW;
 
 	error = pipe_build_write_buffer(wpipe, uio);
 	if (error) {
 		wpipe->pipe_state &= ~PIPE_DIRECTW;
 		goto error1;
 	}
 
 	error = 0;
 	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
 		if (wpipe->pipe_state & PIPE_EOF) {
 			pipelock(wpipe, 0);
 			pipe_destroy_write_buffer(wpipe);
 			pipeunlock(wpipe);
 			pipeselwakeup(wpipe);
 			error = EPIPE;
 			goto error1;
 		}
 		if (wpipe->pipe_state & PIPE_WANTR) {
 			wpipe->pipe_state &= ~PIPE_WANTR;
 			wakeup(wpipe);
 		}
 		pipeselwakeup(wpipe);
 		error = tsleep(wpipe, PRIBIO|PCATCH, "pipdwt", 0);
 	}
 
 	pipelock(wpipe,0);
 	if (wpipe->pipe_state & PIPE_DIRECTW) {
 		/*
 		 * this bit of trickery substitutes a kernel buffer for
 		 * the process that might be going away.
 		 */
 		pipe_clone_write_buffer(wpipe);
 	} else {
 		pipe_destroy_write_buffer(wpipe);
 	}
 	pipeunlock(wpipe);
 	return error;
 
 error1:
 	wakeup(wpipe);
 	return error;
 }
 #endif
 	
 static int
 pipe_write(fp, uio, cred)
 	struct file *fp;
 	struct uio *uio;
 	struct ucred *cred;
 {
 	int error = 0;
 	int orig_resid;
 
 	struct pipe *wpipe, *rpipe;
 
 	rpipe = (struct pipe *) fp->f_data;
 	wpipe = rpipe->pipe_peer;
 
 	/*
 	 * detect loss of pipe read side, issue SIGPIPE if lost.
 	 */
 	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
 		return EPIPE;
 	}
 
 	/*
 	 * If it is advantageous to resize the pipe buffer, do
 	 * so.
 	 */
 	if ((uio->uio_resid > PIPE_SIZE) &&
 		(nbigpipe < LIMITBIGPIPES) &&
 		(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
 		(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
 		(wpipe->pipe_buffer.cnt == 0)) {
 
 		if (wpipe->pipe_buffer.buffer) {
 			amountpipekva -= wpipe->pipe_buffer.size;
 			kmem_free(kernel_map,
 				(vm_offset_t)wpipe->pipe_buffer.buffer,
 				wpipe->pipe_buffer.size);
 		}
 
 #ifndef PIPE_NODIRECT
 		if (wpipe->pipe_map.kva) {
 			amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
 			kmem_free(kernel_map,
 				wpipe->pipe_map.kva,
 				wpipe->pipe_buffer.size + PAGE_SIZE);
 		}
 #endif
 
 		wpipe->pipe_buffer.in = 0;
 		wpipe->pipe_buffer.out = 0;
 		wpipe->pipe_buffer.cnt = 0;
 		wpipe->pipe_buffer.size = BIG_PIPE_SIZE;
 		wpipe->pipe_buffer.buffer = NULL;
 		++nbigpipe;
 
 #ifndef PIPE_NODIRECT
 		wpipe->pipe_map.cnt = 0;
 		wpipe->pipe_map.kva = 0;
 		wpipe->pipe_map.pos = 0;
 		wpipe->pipe_map.npages = 0;
 #endif
 
 	}
 		
 
 	if( wpipe->pipe_buffer.buffer == NULL) {
 		if ((error = pipelock(wpipe,1)) == 0) {
 			pipespace(wpipe);
 			pipeunlock(wpipe);
 		} else {
 			return error;
 		}
 	}
 
 	++wpipe->pipe_busy;
 	orig_resid = uio->uio_resid;
 	while (uio->uio_resid) {
 		int space;
 #ifndef PIPE_NODIRECT
 		/*
 		 * If the transfer is large, we can gain performance if
 		 * we do process-to-process copies directly.
 		 * If the write is non-blocking, we don't use the
 		 * direct write mechanism.
 		 */
 		if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
 		    (fp->f_flag & FNONBLOCK) == 0 &&
 			(wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) &&
 			(uio->uio_iov->iov_len >= PIPE_MINDIRECT)) {
 			error = pipe_direct_write( wpipe, uio);
 			if (error) {
 				break;
 			}
 			continue;
 		}
 #endif
 
 		/*
 		 * Pipe buffered writes cannot be coincidental with
 		 * direct writes.  We wait until the currently executing
 		 * direct write is completed before we start filling the
 		 * pipe buffer.
 		 */
 	retrywrite:
 		while (wpipe->pipe_state & PIPE_DIRECTW) {
 			if (wpipe->pipe_state & PIPE_WANTR) {
 				wpipe->pipe_state &= ~PIPE_WANTR;
 				wakeup(wpipe);
 			}
 			error = tsleep(wpipe,
 					PRIBIO|PCATCH, "pipbww", 0);
 			if (error)
 				break;
 		}
 
 		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
 
 		/* Writes of size <= PIPE_BUF must be atomic. */
 		/* XXX perhaps they need to be contiguous to be atomic? */
 		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
 			space = 0;
 
 		if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) {
 			/*
 			 * This set the maximum transfer as a segment of
 			 * the buffer.
 			 */
 			int size = wpipe->pipe_buffer.size - wpipe->pipe_buffer.in;
 			/*
 			 * space is the size left in the buffer
 			 */
 			if (size > space)
 				size = space;
 			/*
 			 * now limit it to the size of the uio transfer
 			 */
 			if (size > uio->uio_resid)
 				size = uio->uio_resid;
 			if ((error = pipelock(wpipe,1)) == 0) {
 				/*
 				 * It is possible for a direct write to
 				 * slip in on us... handle it here...
 				 */
 				if (wpipe->pipe_state & PIPE_DIRECTW) {
 					pipeunlock(wpipe);
 					goto retrywrite;
 				}
 				error = uiomove( &wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 
 					size, uio);
 				pipeunlock(wpipe);
 			}
 			if (error)
 				break;
 
 			wpipe->pipe_buffer.in += size;
 			if (wpipe->pipe_buffer.in >= wpipe->pipe_buffer.size)
 				wpipe->pipe_buffer.in = 0;
 
 			wpipe->pipe_buffer.cnt += size;
 		} else {
 			/*
 			 * If the "read-side" has been blocked, wake it up now.
 			 */
 			if (wpipe->pipe_state & PIPE_WANTR) {
 				wpipe->pipe_state &= ~PIPE_WANTR;
 				wakeup(wpipe);
 			}
 
 			/*
 			 * don't block on non-blocking I/O
 			 */
 			if (fp->f_flag & FNONBLOCK) {
 				error = EAGAIN;
 				break;
 			}
 
 			/*
 			 * We have no more space and have something to offer,
 			 * wake up select/poll.
 			 */
 			pipeselwakeup(wpipe);
 
 			wpipe->pipe_state |= PIPE_WANTW;
 			if (error = tsleep(wpipe, (PRIBIO+1)|PCATCH, "pipewr", 0)) {
 				break;
 			}
 			/*
 			 * If read side wants to go away, we just issue a signal
 			 * to ourselves.
 			 */
 			if (wpipe->pipe_state & PIPE_EOF) {
 				error = EPIPE;
 				break;
 			}	
 		}
 	}
 
 	--wpipe->pipe_busy;
 	if ((wpipe->pipe_busy == 0) &&
 		(wpipe->pipe_state & PIPE_WANT)) {
 		wpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTR);
 		wakeup(wpipe);
 	} else if (wpipe->pipe_buffer.cnt > 0) {
 		/*
 		 * If we have put any characters in the buffer, we wake up
 		 * the reader.
 		 */
 		if (wpipe->pipe_state & PIPE_WANTR) {
 			wpipe->pipe_state &= ~PIPE_WANTR;
 			wakeup(wpipe);
 		}
 	}
 
 	/*
 	 * Don't return EPIPE if I/O was successful
 	 */
 	if ((wpipe->pipe_buffer.cnt == 0) &&
 		(uio->uio_resid == 0) &&
 		(error == EPIPE))
 		error = 0;
 
 	if (error == 0)
 		getnanotime(&wpipe->pipe_mtime);
 
 	/*
 	 * We have something to offer,
 	 * wake up select/poll.
 	 */
 	if (wpipe->pipe_buffer.cnt)
 		pipeselwakeup(wpipe);
 
 	return error;
 }
 
 /*
  * we implement a very minimal set of ioctls for compatibility with sockets.
  */
 int
 pipe_ioctl(fp, cmd, data, p)
 	struct file *fp;
 	int cmd;
 	register caddr_t data;
 	struct proc *p;
 {
 	register struct pipe *mpipe = (struct pipe *)fp->f_data;
 
 	switch (cmd) {
 
 	case FIONBIO:
 		return (0);
 
 	case FIOASYNC:
 		if (*(int *)data) {
 			mpipe->pipe_state |= PIPE_ASYNC;
 		} else {
 			mpipe->pipe_state &= ~PIPE_ASYNC;
 		}
 		return (0);
 
 	case FIONREAD:
 		if (mpipe->pipe_state & PIPE_DIRECTW)
 			*(int *)data = mpipe->pipe_map.cnt;
 		else
 			*(int *)data = mpipe->pipe_buffer.cnt;
 		return (0);
 
 	case TIOCSPGRP:
 		mpipe->pipe_pgid = *(int *)data;
 		return (0);
 
 	case TIOCGPGRP:
 		*(int *)data = mpipe->pipe_pgid;
 		return (0);
 
 	}
 	return (ENOTTY);
 }
 
 int
 pipe_poll(fp, events, cred, p)
 	struct file *fp;
 	int events;
 	struct ucred *cred;
 	struct proc *p;
 {
 	register struct pipe *rpipe = (struct pipe *)fp->f_data;
 	struct pipe *wpipe;
 	int revents = 0;
 
 	wpipe = rpipe->pipe_peer;
 	if (events & (POLLIN | POLLRDNORM))
 		if ((rpipe->pipe_state & PIPE_DIRECTW) ||
 		    (rpipe->pipe_buffer.cnt > 0) ||
 		    (rpipe->pipe_state & PIPE_EOF))
 			revents |= events & (POLLIN | POLLRDNORM);
 
 	if (events & (POLLOUT | POLLWRNORM))
 		if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
 		    ((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
 		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)
 			revents |= events & (POLLOUT | POLLWRNORM);
 
 	if ((rpipe->pipe_state & PIPE_EOF) ||
 	    (wpipe == NULL) ||
 	    (wpipe->pipe_state & PIPE_EOF))
 		revents |= POLLHUP;
 
 	if (revents == 0) {
 		if (events & (POLLIN | POLLRDNORM)) {
 			selrecord(p, &rpipe->pipe_sel);
 			rpipe->pipe_state |= PIPE_SEL;
 		}
 
 		if (events & (POLLOUT | POLLWRNORM)) {
 			selrecord(p, &wpipe->pipe_sel);
 			wpipe->pipe_state |= PIPE_SEL;
 		}
 	}
 
 	return (revents);
 }
 
 int
 pipe_stat(pipe, ub)
 	register struct pipe *pipe;
 	register struct stat *ub;
 {
 	bzero((caddr_t)ub, sizeof (*ub));
 	ub->st_mode = S_IFIFO;
 	ub->st_blksize = pipe->pipe_buffer.size;
 	ub->st_size = pipe->pipe_buffer.cnt;
 	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
 	ub->st_atimespec = pipe->pipe_atime;
 	ub->st_mtimespec = pipe->pipe_mtime;
 	ub->st_ctimespec = pipe->pipe_ctime;
 	/*
 	 * Left as 0: st_dev, st_ino, st_nlink, st_uid, st_gid, st_rdev,
 	 * st_flags, st_gen.
 	 * XXX (st_dev, st_ino) should be unique.
 	 */
 	return 0;
 }
 
 /* ARGSUSED */
 static int
 pipe_close(fp, p)
 	struct file *fp;
 	struct proc *p;
 {
 	struct pipe *cpipe = (struct pipe *)fp->f_data;
 
 	pipeclose(cpipe);
 	fp->f_data = NULL;
 	return 0;
 }
 
 /*
  * shutdown the pipe
  */
 static void
 pipeclose(cpipe)
 	struct pipe *cpipe;
 {
 	struct pipe *ppipe;
 	if (cpipe) {
 		
 		pipeselwakeup(cpipe);
 
 		/*
 		 * If the other side is blocked, wake it up saying that
 		 * we want to close it down.
 		 */
 		while (cpipe->pipe_busy) {
 			wakeup(cpipe);
 			cpipe->pipe_state |= PIPE_WANT|PIPE_EOF;
 			tsleep(cpipe, PRIBIO, "pipecl", 0);
 		}
 
 		/*
 		 * Disconnect from peer
 		 */
 		if (ppipe = cpipe->pipe_peer) {
 			pipeselwakeup(ppipe);
 
 			ppipe->pipe_state |= PIPE_EOF;
 			wakeup(ppipe);
 			ppipe->pipe_peer = NULL;
 		}
 
 		/*
 		 * free resources
 		 */
 		if (cpipe->pipe_buffer.buffer) {
 			if (cpipe->pipe_buffer.size > PIPE_SIZE)
 				--nbigpipe;
 			amountpipekva -= cpipe->pipe_buffer.size;
 			kmem_free(kernel_map,
 				(vm_offset_t)cpipe->pipe_buffer.buffer,
 				cpipe->pipe_buffer.size);
 		}
 #ifndef PIPE_NODIRECT
 		if (cpipe->pipe_map.kva) {
 			amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE;
 			kmem_free(kernel_map,
 				cpipe->pipe_map.kva,
 				cpipe->pipe_buffer.size + PAGE_SIZE);
 		}
 #endif
 		zfree(pipe_zone, cpipe);
 	}
 }
diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c
index 5f9c5e38ca46..5814c3a51d2b 100644
--- a/sys/kern/sys_socket.c
+++ b/sys/kern/sys_socket.c
@@ -1,174 +1,175 @@
 /*
  * Copyright (c) 1982, 1986, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)sys_socket.c	8.1 (Berkeley) 6/10/93
- * $Id: sys_socket.c,v 1.15 1997/08/16 19:15:02 wollman Exp $
+ * $Id: sys_socket.c,v 1.16 1997/09/14 02:52:15 peter Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/file.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/filio.h>			/* XXX */
 #include <sys/sockio.h>
 #include <sys/stat.h>
+#include <sys/uio.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 static int soo_read __P((struct file *fp, struct uio *uio, 
 		struct ucred *cred));
 static int soo_write __P((struct file *fp, struct uio *uio, 
 		struct ucred *cred));
 static int soo_close __P((struct file *fp, struct proc *p));
 
 struct	fileops socketops =
     { soo_read, soo_write, soo_ioctl, soo_poll, soo_close };
 
 /* ARGSUSED */
 static int
 soo_read(fp, uio, cred)
 	struct file *fp;
 	struct uio *uio;
 	struct ucred *cred;
 {
 	struct socket *so = (struct socket *)fp->f_data;
 	return so->so_proto->pr_usrreqs->pru_soreceive(so, 0, uio, 0, 0, 0);
 }
 
 /* ARGSUSED */
 static int
 soo_write(fp, uio, cred)
 	struct file *fp;
 	struct uio *uio;
 	struct ucred *cred;
 {
 	struct socket *so = (struct socket *)fp->f_data;
 	return so->so_proto->pr_usrreqs->pru_sosend(so, 0, uio, 0, 0, 0,
 						    uio->uio_procp);
 }
 
 int
 soo_ioctl(fp, cmd, data, p)
 	struct file *fp;
 	int cmd;
 	register caddr_t data;
 	struct proc *p;
 {
 	register struct socket *so = (struct socket *)fp->f_data;
 
 	switch (cmd) {
 
 	case FIONBIO:
 		if (*(int *)data)
 			so->so_state |= SS_NBIO;
 		else
 			so->so_state &= ~SS_NBIO;
 		return (0);
 
 	case FIOASYNC:
 		if (*(int *)data) {
 			so->so_state |= SS_ASYNC;
 			so->so_rcv.sb_flags |= SB_ASYNC;
 			so->so_snd.sb_flags |= SB_ASYNC;
 		} else {
 			so->so_state &= ~SS_ASYNC;
 			so->so_rcv.sb_flags &= ~SB_ASYNC;
 			so->so_snd.sb_flags &= ~SB_ASYNC;
 		}
 		return (0);
 
 	case FIONREAD:
 		*(int *)data = so->so_rcv.sb_cc;
 		return (0);
 
 	case SIOCSPGRP:
 		so->so_pgid = *(int *)data;
 		return (0);
 
 	case SIOCGPGRP:
 		*(int *)data = so->so_pgid;
 		return (0);
 
 	case SIOCATMARK:
 		*(int *)data = (so->so_state&SS_RCVATMARK) != 0;
 		return (0);
 	}
 	/*
 	 * Interface/routing/protocol specific ioctls:
 	 * interface and routing ioctls should have a
 	 * different entry since a socket's unnecessary
 	 */
 	if (IOCGROUP(cmd) == 'i')
 		return (ifioctl(so, cmd, data, p));
 	if (IOCGROUP(cmd) == 'r')
 		return (rtioctl(cmd, data, p));
 	return ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, 0, p));
 }
 
 int
 soo_poll(fp, events, cred, p)
 	struct file *fp;
 	int events;
 	struct ucred *cred;
 	struct proc *p;
 {
 	struct socket *so = (struct socket *)fp->f_data;
 	return so->so_proto->pr_usrreqs->pru_sopoll(so, events, cred, p);
 }
 
 int
 soo_stat(so, ub)
 	register struct socket *so;
 	register struct stat *ub;
 {
 
 	bzero((caddr_t)ub, sizeof (*ub));
 	ub->st_mode = S_IFSOCK;
 	return ((*so->so_proto->pr_usrreqs->pru_sense)(so, ub));
 }
 
 /* ARGSUSED */
 static int
 soo_close(fp, p)
 	struct file *fp;
 	struct proc *p;
 {
 	int error = 0;
 
 	if (fp->f_data)
 		error = soclose((struct socket *)fp->f_data);
 	fp->f_data = 0;
 	return (error);
 }
diff --git a/sys/kern/tty_cons.c b/sys/kern/tty_cons.c
index 6cccc4fb6a7b..a90283af2009 100644
--- a/sys/kern/tty_cons.c
+++ b/sys/kern/tty_cons.c
@@ -1,445 +1,446 @@
 /*
  * Copyright (c) 1988 University of Utah.
  * Copyright (c) 1991 The Regents of the University of California.
  * All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * the Systems Programming Group of the University of Utah Computer
  * Science Department.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	from: @(#)cons.c	7.2 (Berkeley) 5/9/91
- *	$Id: cons.c,v 1.55 1997/09/14 03:19:01 peter Exp $
+ *	$Id: cons.c,v 1.56 1998/01/24 02:54:12 eivind Exp $
  */
 
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /*DEVFS*/
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/reboot.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/tty.h>
+#include <sys/uio.h>
 
 #include <machine/cpu.h>
 #include <machine/cons.h>
 
 /* XXX this should be config(8)ed. */
 #include "sc.h"
 #include "vt.h"
 #include "sio.h"
 static struct consdev constab[] = {
 #if NSC > 0
 	{ sccnprobe,	sccninit,	sccngetc,	sccncheckc,	sccnputc },
 #endif
 #if NVT > 0
 	{ pccnprobe,	pccninit,	pccngetc,	pccncheckc,	pccnputc },
 #endif
 #if NSIO > 0
 	{ siocnprobe,	siocninit,	siocngetc,	siocncheckc,	siocnputc },
 #endif
 	{ 0 },
 };
 
 static	d_open_t	cnopen;
 static	d_close_t	cnclose;
 static	d_read_t	cnread;
 static	d_write_t	cnwrite;
 static	d_ioctl_t	cnioctl;
 static	d_poll_t	cnpoll;
 
 #define CDEV_MAJOR 0
 static struct cdevsw cn_cdevsw = 
 	{ cnopen,	cnclose,	cnread,		cnwrite,	/*0*/
 	  cnioctl,	nullstop,	nullreset,	nodevtotty,/* console */
 	  cnpoll,	nommap,		NULL,	"console",	NULL,	-1 };
 
 static dev_t	cn_dev_t; 	/* seems to be never really used */
 SYSCTL_OPAQUE(_machdep, CPU_CONSDEV, consdev, CTLTYPE_OPAQUE|CTLFLAG_RD,
 	&cn_dev_t, sizeof cn_dev_t, "T,dev_t", "");
 
 static int cn_mute;
 
 int	cons_unavail = 0;	/* XXX:
 				 * physical console not available for
 				 * input (i.e., it is in graphics mode)
 				 */
 
 static u_char cn_is_open;		/* nonzero if logical console is open */
 static int openmode, openflag;		/* how /dev/console was openned */
 static u_char cn_phys_is_open;		/* nonzero if physical device is open */
 static d_close_t *cn_phys_close;	/* physical device close function */
 static d_open_t *cn_phys_open;		/* physical device open function */
 static struct consdev *cn_tab;		/* physical console device info */
 static struct tty *cn_tp;		/* physical console tty struct */
 #ifdef DEVFS
 static void *cn_devfs_token;		/* represents the devfs entry */
 #endif /* DEVFS */
 
 void
 cninit()
 {
 	struct consdev *best_cp, *cp;
 
 	/*
 	 * Find the first console with the highest priority.
 	 */
 	best_cp = NULL;
 	for (cp = constab; cp->cn_probe; cp++) {
 		(*cp->cn_probe)(cp);
 		if (cp->cn_pri > CN_DEAD &&
 		    (best_cp == NULL || cp->cn_pri > best_cp->cn_pri))
 			best_cp = cp;
 	}
 
 	/*
 	 * Check if we should mute the console (for security reasons perhaps)
 	 * It can be changes dynamically using sysctl kern.consmute
 	 * once we are up and going.
 	 * 
 	 */
         cn_mute = ((boothowto & (RB_MUTE
 			|RB_SINGLE
 			|RB_VERBOSE
 			|RB_ASKNAME
 			|RB_CONFIG)) == RB_MUTE);
 	
 	/*
 	 * If no console, give up.
 	 */
 	if (best_cp == NULL) {
 		cn_tab = best_cp;
 		return;
 	}
 
 	/*
 	 * Initialize console, then attach to it.  This ordering allows
 	 * debugging using the previous console, if any.
 	 * XXX if there was a previous console, then its driver should
 	 * be informed when we forget about it.
 	 */
 	(*best_cp->cn_init)(best_cp);
 	cn_tab = best_cp;
 }
 
 void
 cninit_finish()
 {
 	struct cdevsw *cdp;
 
 	if ((cn_tab == NULL) || cn_mute)
 		return;
 
 	/*
 	 * Hook the open and close functions.
 	 */
 	cdp = cdevsw[major(cn_tab->cn_dev)];
 	cn_phys_close = cdp->d_close;
 	cdp->d_close = cnclose;
 	cn_phys_open = cdp->d_open;
 	cdp->d_open = cnopen;
 	cn_tp = (*cdp->d_devtotty)(cn_tab->cn_dev);
 	cn_dev_t = cn_tp->t_dev;
 }
 
 static void
 cnuninit(void)
 {
 	struct cdevsw *cdp;
 
 	if (cn_tab == NULL)
 		return;
 
 	/*
 	 * Unhook the open and close functions.
 	 */
 	cdp = cdevsw[major(cn_tab->cn_dev)];
 	cdp->d_close = cn_phys_close;
 	cn_phys_close = NULL;
 	cdp->d_open = cn_phys_open;
 	cn_phys_open = NULL;
 	cn_tp = NULL;
 	cn_dev_t = 0;
 }
 
 /*
  * User has changed the state of the console muting.
  * This may require us to open or close the device in question.
  */
 static int
 sysctl_kern_consmute SYSCTL_HANDLER_ARGS
 {
 	int error;
 	int ocn_mute;
 
 	ocn_mute = cn_mute;
 	error = sysctl_handle_int(oidp, &cn_mute, 0, req);
 	if((error == 0) && (cn_tab != NULL) && (req->newptr != NULL)) {
 		if(ocn_mute && !cn_mute) {
 			/*
 			 * going from muted to unmuted.. open the physical dev 
 			 * if the console has been openned
 			 */
 			cninit_finish();
 			if(cn_is_open)
 				/* XXX curproc is not what we want really */
 				error = cnopen(cn_dev_t, openflag,
 					openmode, curproc);
 			/* if it failed, back it out */
 			if ( error != 0) cnuninit();
 		} else if (!ocn_mute && cn_mute) {
 			/*
 			 * going from unmuted to muted.. close the physical dev 
 			 * if it's only open via /dev/console
 			 */
 			if(cn_is_open)
 				error = cnclose(cn_dev_t, openflag,
 					openmode, curproc);
 			if ( error == 0) cnuninit();
 		}
 		if (error != 0) {
 			/* 
 	 		 * back out the change if there was an error
 			 */
 			cn_mute = ocn_mute;
 		}
 	}
 	return (error);
 }
 
 SYSCTL_PROC(_kern, OID_AUTO, consmute, CTLTYPE_INT|CTLFLAG_RW,
 	0, sizeof cn_mute, sysctl_kern_consmute, "I", "");
 
 static int
 cnopen(dev, flag, mode, p)
 	dev_t dev;
 	int flag, mode;
 	struct proc *p;
 {
 	dev_t cndev, physdev;
 	int retval = 0;
 
 	if (cn_tab == NULL)
 		return (0);
 	cndev = cn_tab->cn_dev;
 	physdev = (major(dev) == major(cndev) ? dev : cndev);
 	/*
 	 * If mute is active, then non console opens don't get here
 	 * so we don't need to check for that. They 
 	 * bypass this and go straight to the device.
 	 */
 	if(!cn_mute)
 		retval = (*cn_phys_open)(physdev, flag, mode, p);
 	if (retval == 0) {
 		/* 
 		 * check if we openned it via /dev/console or 
 		 * via the physical entry (e.g. /dev/sio0).
 		 */
 		if (dev == cndev)
 			cn_phys_is_open = 1;
 		else if (physdev == cndev) {
 			openmode = mode;
 			openflag = flag;
 			cn_is_open = 1;
 		}
 	}
 	return (retval);
 }
 
 static int
 cnclose(dev, flag, mode, p)
 	dev_t dev;
 	int flag, mode;
 	struct proc *p;
 {
 	dev_t cndev;
 
 	if (cn_tab == NULL)
 		return (0);
 	cndev = cn_tab->cn_dev;
 	/*
 	 * act appropriatly depending on whether it's /dev/console
 	 * or the pysical device (e.g. /dev/sio) that's being closed.
 	 * in either case, don't actually close the device unless
 	 * both are closed.
 	 */
 	if (dev == cndev) {
 		/* the physical device is about to be closed */
 		cn_phys_is_open = 0;
 		if (cn_is_open) {
 			if (cn_tp) {
 				/* perform a ttyhalfclose() */
 				/* reset session and proc group */
 				cn_tp->t_pgrp = NULL;
 				cn_tp->t_session = NULL;
 			}
 			return (0);
 		}
 	} else if (major(dev) != major(cndev)) {
 		/* the logical console is about to be closed */
 		cn_is_open = 0;
 		if (cn_phys_is_open)
 			return (0);
 		dev = cndev;
 	}
 	if(cn_phys_close)
 		return ((*cn_phys_close)(dev, flag, mode, p));
 	return (0);
 }
 
 static int
 cnread(dev, uio, flag)
 	dev_t dev;
 	struct uio *uio;
 	int flag;
 {
 	if ((cn_tab == NULL) || cn_mute)
 		return (0);
 	dev = cn_tab->cn_dev;
 	return ((*cdevsw[major(dev)]->d_read)(dev, uio, flag));
 }
 
 static int
 cnwrite(dev, uio, flag)
 	dev_t dev;
 	struct uio *uio;
 	int flag;
 {
 	if ((cn_tab == NULL) || cn_mute) {
 		uio->uio_resid = 0; /* dump the data */
 		return (0);
 	}
 	if (constty)
 		dev = constty->t_dev;
 	else
 		dev = cn_tab->cn_dev;
 	return ((*cdevsw[major(dev)]->d_write)(dev, uio, flag));
 }
 
 static int
 cnioctl(dev, cmd, data, flag, p)
 	dev_t dev;
 	int cmd;
 	caddr_t data;
 	int flag;
 	struct proc *p;
 {
 	int error;
 
 	if ((cn_tab == NULL) || cn_mute)
 		return (0);
 	/*
 	 * Superuser can always use this to wrest control of console
 	 * output from the "virtual" console.
 	 */
 	if (cmd == TIOCCONS && constty) {
 		error = suser(p->p_ucred, (u_short *) NULL);
 		if (error)
 			return (error);
 		constty = NULL;
 		return (0);
 	}
 	dev = cn_tab->cn_dev;
 	return ((*cdevsw[major(dev)]->d_ioctl)(dev, cmd, data, flag, p));
 }
 
 static int
 cnpoll(dev, events, p)
 	dev_t dev;
 	int events;
 	struct proc *p;
 {
 	if ((cn_tab == NULL) || cn_mute)
 		return (1);
 
 	dev = cn_tab->cn_dev;
 
 	return ((*cdevsw[major(dev)]->d_poll)(dev, events, p));
 }
 
 int
 cngetc()
 {
 	int c;
 	if ((cn_tab == NULL) || cn_mute)
 		return (-1);
 	c = (*cn_tab->cn_getc)(cn_tab->cn_dev);
 	if (c == '\r') c = '\n'; /* console input is always ICRNL */
 	return (c);
 }
 
 int
 cncheckc()
 {
 	if ((cn_tab == NULL) || cn_mute)
 		return (-1);
 	return ((*cn_tab->cn_checkc)(cn_tab->cn_dev));
 }
 
 void
 cnputc(c)
 	register int c;
 {
 	if ((cn_tab == NULL) || cn_mute)
 		return;
 	if (c) {
 		if (c == '\n')
 			(*cn_tab->cn_putc)(cn_tab->cn_dev, '\r');
 		(*cn_tab->cn_putc)(cn_tab->cn_dev, c);
 	}
 }
 
 static cn_devsw_installed = 0;
 
 static void
 cn_drvinit(void *unused)
 {
 	dev_t dev;
 
 	if( ! cn_devsw_installed ) {
 		dev = makedev(CDEV_MAJOR,0);
 		cdevsw_add(&dev,&cn_cdevsw,NULL);
 		cn_devsw_installed = 1;
 #ifdef DEVFS
 		cn_devfs_token = devfs_add_devswf(&cn_cdevsw, 0, DV_CHR,
 						  UID_ROOT, GID_WHEEL, 0600,
 						  "console");
 #endif
 	}
 }
 
 SYSINIT(cndev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,cn_drvinit,NULL)
 
 
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 4debb5f8e8d2..58e309ab78ad 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -1,1136 +1,1137 @@
 /*
  * Copyright (c) 1982, 1986, 1988, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_socket.c	8.3 (Berkeley) 4/15/94
- *	$Id: uipc_socket.c,v 1.37 1998/02/19 19:38:20 fenner Exp $
+ *	$Id: uipc_socket.c,v 1.38 1998/03/01 19:39:17 guido Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/fcntl.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/domain.h>
 #include <sys/kernel.h>
 #include <sys/poll.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
+#include <sys/uio.h>
 
 #include <machine/limits.h>
 
 MALLOC_DEFINE(M_SOCKET, "socket", "socket structure");
 MALLOC_DEFINE(M_SONAME, "soname", "socket name");
 MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
 
 static int somaxconn = SOMAXCONN;
 SYSCTL_INT(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLFLAG_RW, &somaxconn,
 	   0, "");
 
 /*
  * Socket operation routines.
  * These routines are called by the routines in
  * sys_socket.c or from a system process, and
  * implement the semantics of socket operations by
  * switching out to the protocol specific routines.
  */
 /*ARGSUSED*/
 int
 socreate(dom, aso, type, proto, p)
 	int dom;
 	struct socket **aso;
 	register int type;
 	int proto;
 	struct proc *p;
 {
 	register struct protosw *prp;
 	register struct socket *so;
 	register int error;
 
 	if (proto)
 		prp = pffindproto(dom, proto, type);
 	else
 		prp = pffindtype(dom, type);
 	if (prp == 0 || prp->pr_usrreqs->pru_attach == 0)
 		return (EPROTONOSUPPORT);
 	if (prp->pr_type != type)
 		return (EPROTOTYPE);
 	MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_WAIT);
 	bzero((caddr_t)so, sizeof(*so));
 	TAILQ_INIT(&so->so_incomp);
 	TAILQ_INIT(&so->so_comp);
 	so->so_type = type;
 	so->so_uid = p->p_ucred->cr_uid;;
 	so->so_proto = prp;
 	error = (*prp->pr_usrreqs->pru_attach)(so, proto, p);
 	if (error) {
 		so->so_state |= SS_NOFDREF;
 		sofree(so);
 		return (error);
 	}
 	*aso = so;
 	return (0);
 }
 
 int
 sobind(so, nam, p)
 	struct socket *so;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	int s = splnet();
 	int error;
 
 	error = (*so->so_proto->pr_usrreqs->pru_bind)(so, nam, p);
 	splx(s);
 	return (error);
 }
 
 int
 solisten(so, backlog, p)
 	register struct socket *so;
 	int backlog;
 	struct proc *p;
 {
 	int s = splnet(), error;
 
 	error = (*so->so_proto->pr_usrreqs->pru_listen)(so, p);
 	if (error) {
 		splx(s);
 		return (error);
 	}
 	if (so->so_comp.tqh_first == NULL)
 		so->so_options |= SO_ACCEPTCONN;
 	if (backlog < 0 || backlog > somaxconn)
 		backlog = somaxconn;
 	so->so_qlimit = backlog;
 	splx(s);
 	return (0);
 }
 
 void
 sofree(so)
 	register struct socket *so;
 {
 	struct socket *head = so->so_head;
 
 	if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0)
 		return;
 	if (head != NULL) {
 		if (so->so_state & SS_INCOMP) {
 			TAILQ_REMOVE(&head->so_incomp, so, so_list);
 			head->so_incqlen--;
 		} else if (so->so_state & SS_COMP) {
 			TAILQ_REMOVE(&head->so_comp, so, so_list);
 		} else {
 			panic("sofree: not queued");
 		}
 		head->so_qlen--;
 		so->so_state &= ~(SS_INCOMP|SS_COMP);
 		so->so_head = NULL;
 	}
 	sbrelease(&so->so_snd);
 	sorflush(so);
 	FREE(so, M_SOCKET);
 }
 
 /*
  * Close a socket on last file table reference removal.
  * Initiate disconnect if connected.
  * Free socket when disconnect complete.
  */
 int
 soclose(so)
 	register struct socket *so;
 {
 	int s = splnet();		/* conservative */
 	int error = 0;
 
 	if (so->so_options & SO_ACCEPTCONN) {
 		struct socket *sp, *sonext;
 
 		for (sp = so->so_incomp.tqh_first; sp != NULL; sp = sonext) {
 			sonext = sp->so_list.tqe_next;
 			(void) soabort(sp);
 		}
 		for (sp = so->so_comp.tqh_first; sp != NULL; sp = sonext) {
 			sonext = sp->so_list.tqe_next;
 			(void) soabort(sp);
 		}
 	}
 	if (so->so_pcb == 0)
 		goto discard;
 	if (so->so_state & SS_ISCONNECTED) {
 		if ((so->so_state & SS_ISDISCONNECTING) == 0) {
 			error = sodisconnect(so);
 			if (error)
 				goto drop;
 		}
 		if (so->so_options & SO_LINGER) {
 			if ((so->so_state & SS_ISDISCONNECTING) &&
 			    (so->so_state & SS_NBIO))
 				goto drop;
 			while (so->so_state & SS_ISCONNECTED) {
 				error = tsleep((caddr_t)&so->so_timeo,
 				    PSOCK | PCATCH, "soclos", so->so_linger);
 				if (error)
 					break;
 			}
 		}
 	}
 drop:
 	if (so->so_pcb) {
 		int error2 = (*so->so_proto->pr_usrreqs->pru_detach)(so);
 		if (error == 0)
 			error = error2;
 	}
 discard:
 	if (so->so_state & SS_NOFDREF)
 		panic("soclose: NOFDREF");
 	so->so_state |= SS_NOFDREF;
 	sofree(so);
 	splx(s);
 	return (error);
 }
 
 /*
  * Must be called at splnet...
  */
 int
 soabort(so)
 	struct socket *so;
 {
 
 	return (*so->so_proto->pr_usrreqs->pru_abort)(so);
 }
 
 int
 soaccept(so, nam)
 	register struct socket *so;
 	struct sockaddr **nam;
 {
 	int s = splnet();
 	int error;
 
 	if ((so->so_state & SS_NOFDREF) == 0)
 		panic("soaccept: !NOFDREF");
 	so->so_state &= ~SS_NOFDREF;
 	error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
 	splx(s);
 	return (error);
 }
 
 int
 soconnect(so, nam, p)
 	register struct socket *so;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	int s;
 	int error;
 
 	if (so->so_options & SO_ACCEPTCONN)
 		return (EOPNOTSUPP);
 	s = splnet();
 	/*
 	 * If protocol is connection-based, can only connect once.
 	 * Otherwise, if connected, try to disconnect first.
 	 * This allows user to disconnect by connecting to, e.g.,
 	 * a null address.
 	 */
 	if (so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING) &&
 	    ((so->so_proto->pr_flags & PR_CONNREQUIRED) ||
 	    (error = sodisconnect(so))))
 		error = EISCONN;
 	else
 		error = (*so->so_proto->pr_usrreqs->pru_connect)(so, nam, p);
 	splx(s);
 	return (error);
 }
 
 int
 soconnect2(so1, so2)
 	register struct socket *so1;
 	struct socket *so2;
 {
 	int s = splnet();
 	int error;
 
 	error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
 	splx(s);
 	return (error);
 }
 
 int
 sodisconnect(so)
 	register struct socket *so;
 {
 	int s = splnet();
 	int error;
 
 	if ((so->so_state & SS_ISCONNECTED) == 0) {
 		error = ENOTCONN;
 		goto bad;
 	}
 	if (so->so_state & SS_ISDISCONNECTING) {
 		error = EALREADY;
 		goto bad;
 	}
 	error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
 bad:
 	splx(s);
 	return (error);
 }
 
 #define	SBLOCKWAIT(f)	(((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
 /*
  * Send on a socket.
  * If send must go all at once and message is larger than
  * send buffering, then hard error.
  * Lock against other senders.
  * If must go all at once and not enough room now, then
  * inform user that this would block and do nothing.
  * Otherwise, if nonblocking, send as much as possible.
  * The data to be sent is described by "uio" if nonzero,
  * otherwise by the mbuf chain "top" (which must be null
  * if uio is not).  Data provided in mbuf chain must be small
  * enough to send all at once.
  *
  * Returns nonzero on error, timeout or signal; callers
  * must check for short counts if EINTR/ERESTART are returned.
  * Data and control buffers are freed on return.
  */
 int
 sosend(so, addr, uio, top, control, flags, p)
 	register struct socket *so;
 	struct sockaddr *addr;
 	struct uio *uio;
 	struct mbuf *top;
 	struct mbuf *control;
 	int flags;
 	struct proc *p;
 {
 	struct mbuf **mp;
 	register struct mbuf *m;
 	register long space, len, resid;
 	int clen = 0, error, s, dontroute, mlen;
 	int atomic = sosendallatonce(so) || top;
 
 	if (uio)
 		resid = uio->uio_resid;
 	else
 		resid = top->m_pkthdr.len;
 	/*
 	 * In theory resid should be unsigned.
 	 * However, space must be signed, as it might be less than 0
 	 * if we over-committed, and we must use a signed comparison
 	 * of space and resid.  On the other hand, a negative resid
 	 * causes us to loop sending 0-length segments to the protocol.
 	 *
 	 * Also check to make sure that MSG_EOR isn't used on SOCK_STREAM
 	 * type sockets since that's an error.
 	 */
 	if (resid < 0 || so->so_type == SOCK_STREAM && (flags & MSG_EOR)) {
 		error = EINVAL;
 		goto out;
 	}
 
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 	    (so->so_proto->pr_flags & PR_ATOMIC);
 	if (p)
 		p->p_stats->p_ru.ru_msgsnd++;
 	if (control)
 		clen = control->m_len;
 #define	snderr(errno)	{ error = errno; splx(s); goto release; }
 
 restart:
 	error = sblock(&so->so_snd, SBLOCKWAIT(flags));
 	if (error)
 		goto out;
 	do {
 		s = splnet();
 		if (so->so_state & SS_CANTSENDMORE)
 			snderr(EPIPE);
 		if (so->so_error) {
 			error = so->so_error;
 			so->so_error = 0;
 			splx(s);
 			goto release;
 		}
 		if ((so->so_state & SS_ISCONNECTED) == 0) {
 			/*
 			 * `sendto' and `sendmsg' is allowed on a connection-
 			 * based socket if it supports implied connect.
 			 * Return ENOTCONN if not connected and no address is
 			 * supplied.
 			 */
 			if ((so->so_proto->pr_flags & PR_CONNREQUIRED) &&
 			    (so->so_proto->pr_flags & PR_IMPLOPCL) == 0) {
 				if ((so->so_state & SS_ISCONFIRMING) == 0 &&
 				    !(resid == 0 && clen != 0))
 					snderr(ENOTCONN);
 			} else if (addr == 0)
 			    snderr(so->so_proto->pr_flags & PR_CONNREQUIRED ?
 				   ENOTCONN : EDESTADDRREQ);
 		}
 		space = sbspace(&so->so_snd);
 		if (flags & MSG_OOB)
 			space += 1024;
 		if ((atomic && resid > so->so_snd.sb_hiwat) ||
 		    clen > so->so_snd.sb_hiwat)
 			snderr(EMSGSIZE);
 		if (space < resid + clen && uio &&
 		    (atomic || space < so->so_snd.sb_lowat || space < clen)) {
 			if (so->so_state & SS_NBIO)
 				snderr(EWOULDBLOCK);
 			sbunlock(&so->so_snd);
 			error = sbwait(&so->so_snd);
 			splx(s);
 			if (error)
 				goto out;
 			goto restart;
 		}
 		splx(s);
 		mp = &top;
 		space -= clen;
 		do {
 		    if (uio == NULL) {
 			/*
 			 * Data is prepackaged in "top".
 			 */
 			resid = 0;
 			if (flags & MSG_EOR)
 				top->m_flags |= M_EOR;
 		    } else do {
 			if (top == 0) {
 				MGETHDR(m, M_WAIT, MT_DATA);
 				mlen = MHLEN;
 				m->m_pkthdr.len = 0;
 				m->m_pkthdr.rcvif = (struct ifnet *)0;
 			} else {
 				MGET(m, M_WAIT, MT_DATA);
 				mlen = MLEN;
 			}
 			if (resid >= MINCLSIZE) {
 				MCLGET(m, M_WAIT);
 				if ((m->m_flags & M_EXT) == 0)
 					goto nopages;
 				mlen = MCLBYTES;
 				len = min(min(mlen, resid), space);
 			} else {
 nopages:
 				len = min(min(mlen, resid), space);
 				/*
 				 * For datagram protocols, leave room
 				 * for protocol headers in first mbuf.
 				 */
 				if (atomic && top == 0 && len < mlen)
 					MH_ALIGN(m, len);
 			}
 			space -= len;
 			error = uiomove(mtod(m, caddr_t), (int)len, uio);
 			resid = uio->uio_resid;
 			m->m_len = len;
 			*mp = m;
 			top->m_pkthdr.len += len;
 			if (error)
 				goto release;
 			mp = &m->m_next;
 			if (resid <= 0) {
 				if (flags & MSG_EOR)
 					top->m_flags |= M_EOR;
 				break;
 			}
 		    } while (space > 0 && atomic);
 		    if (dontroute)
 			    so->so_options |= SO_DONTROUTE;
 		    s = splnet();				/* XXX */
 		    error = (*so->so_proto->pr_usrreqs->pru_send)(so,
 			(flags & MSG_OOB) ? PRUS_OOB :
 			/*
 			 * If the user set MSG_EOF, the protocol
 			 * understands this flag and nothing left to
 			 * send then use PRU_SEND_EOF instead of PRU_SEND.
 			 */
 			((flags & MSG_EOF) &&
 			 (so->so_proto->pr_flags & PR_IMPLOPCL) &&
 			 (resid <= 0)) ?
 				PRUS_EOF : 0,
 			top, addr, control, p);
 		    splx(s);
 		    if (dontroute)
 			    so->so_options &= ~SO_DONTROUTE;
 		    clen = 0;
 		    control = 0;
 		    top = 0;
 		    mp = &top;
 		    if (error)
 			goto release;
 		} while (resid && space > 0);
 	} while (resid);
 
 release:
 	sbunlock(&so->so_snd);
 out:
 	if (top)
 		m_freem(top);
 	if (control)
 		m_freem(control);
 	return (error);
 }
 
 /*
  * Implement receive operations on a socket.
  * We depend on the way that records are added to the sockbuf
  * by sbappend*.  In particular, each record (mbufs linked through m_next)
  * must begin with an address if the protocol so specifies,
  * followed by an optional mbuf or mbufs containing ancillary data,
  * and then zero or more mbufs of data.
  * In order to avoid blocking network interrupts for the entire time here,
  * we splx() while doing the actual copy to user space.
  * Although the sockbuf is locked, new data may still be appended,
  * and thus we must maintain consistency of the sockbuf during that time.
  *
  * The caller may receive the data as a single mbuf chain by supplying
  * an mbuf **mp0 for use in returning the chain.  The uio is then used
  * only for the count in uio_resid.
  */
 int
 soreceive(so, psa, uio, mp0, controlp, flagsp)
 	register struct socket *so;
 	struct sockaddr **psa;
 	struct uio *uio;
 	struct mbuf **mp0;
 	struct mbuf **controlp;
 	int *flagsp;
 {
 	register struct mbuf *m, **mp;
 	register int flags, len, error, s, offset;
 	struct protosw *pr = so->so_proto;
 	struct mbuf *nextrecord;
 	int moff, type = 0;
 	int orig_resid = uio->uio_resid;
 
 	mp = mp0;
 	if (psa)
 		*psa = 0;
 	if (controlp)
 		*controlp = 0;
 	if (flagsp)
 		flags = *flagsp &~ MSG_EOR;
 	else
 		flags = 0;
 	if (flags & MSG_OOB) {
 		m = m_get(M_WAIT, MT_DATA);
 		error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
 		if (error)
 			goto bad;
 		do {
 			error = uiomove(mtod(m, caddr_t),
 			    (int) min(uio->uio_resid, m->m_len), uio);
 			m = m_free(m);
 		} while (uio->uio_resid && error == 0 && m);
 bad:
 		if (m)
 			m_freem(m);
 		return (error);
 	}
 	if (mp)
 		*mp = (struct mbuf *)0;
 	if (so->so_state & SS_ISCONFIRMING && uio->uio_resid)
 		(*pr->pr_usrreqs->pru_rcvd)(so, 0);
 
 restart:
 	error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
 	if (error)
 		return (error);
 	s = splnet();
 
 	m = so->so_rcv.sb_mb;
 	/*
 	 * If we have less data than requested, block awaiting more
 	 * (subject to any timeout) if:
 	 *   1. the current count is less than the low water mark, or
 	 *   2. MSG_WAITALL is set, and it is possible to do the entire
 	 *	receive operation at once if we block (resid <= hiwat).
 	 *   3. MSG_DONTWAIT is not set
 	 * If MSG_WAITALL is set but resid is larger than the receive buffer,
 	 * we have to do the receive in sections, and thus risk returning
 	 * a short count if a timeout or signal occurs after we start.
 	 */
 	if (m == 0 || (((flags & MSG_DONTWAIT) == 0 &&
 	    so->so_rcv.sb_cc < uio->uio_resid) &&
 	    (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
 	    ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
 	    m->m_nextpkt == 0 && (pr->pr_flags & PR_ATOMIC) == 0)) {
 #ifdef DIAGNOSTIC
 		if (m == 0 && so->so_rcv.sb_cc)
 			panic("receive 1");
 #endif
 		if (so->so_error) {
 			if (m)
 				goto dontblock;
 			error = so->so_error;
 			if ((flags & MSG_PEEK) == 0)
 				so->so_error = 0;
 			goto release;
 		}
 		if (so->so_state & SS_CANTRCVMORE) {
 			if (m)
 				goto dontblock;
 			else
 				goto release;
 		}
 		for (; m; m = m->m_next)
 			if (m->m_type == MT_OOBDATA  || (m->m_flags & M_EOR)) {
 				m = so->so_rcv.sb_mb;
 				goto dontblock;
 			}
 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) == 0 &&
 		    (so->so_proto->pr_flags & PR_CONNREQUIRED)) {
 			error = ENOTCONN;
 			goto release;
 		}
 		if (uio->uio_resid == 0)
 			goto release;
 		if ((so->so_state & SS_NBIO) || (flags & MSG_DONTWAIT)) {
 			error = EWOULDBLOCK;
 			goto release;
 		}
 		sbunlock(&so->so_rcv);
 		error = sbwait(&so->so_rcv);
 		splx(s);
 		if (error)
 			return (error);
 		goto restart;
 	}
 dontblock:
 	if (uio->uio_procp)
 		uio->uio_procp->p_stats->p_ru.ru_msgrcv++;
 	nextrecord = m->m_nextpkt;
 	if (pr->pr_flags & PR_ADDR) {
 #ifdef DIAGNOSTIC
 		if (m->m_type != MT_SONAME)
 			panic("receive 1a");
 #endif
 		orig_resid = 0;
 		if (psa)
 			*psa = dup_sockaddr(mtod(m, struct sockaddr *),
 					    mp0 == 0);
 		if (flags & MSG_PEEK) {
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			MFREE(m, so->so_rcv.sb_mb);
 			m = so->so_rcv.sb_mb;
 		}
 	}
 	while (m && m->m_type == MT_CONTROL && error == 0) {
 		if (flags & MSG_PEEK) {
 			if (controlp)
 				*controlp = m_copy(m, 0, m->m_len);
 			m = m->m_next;
 		} else {
 			sbfree(&so->so_rcv, m);
 			if (controlp) {
 				if (pr->pr_domain->dom_externalize &&
 				    mtod(m, struct cmsghdr *)->cmsg_type ==
 				    SCM_RIGHTS)
 				   error = (*pr->pr_domain->dom_externalize)(m);
 				*controlp = m;
 				so->so_rcv.sb_mb = m->m_next;
 				m->m_next = 0;
 				m = so->so_rcv.sb_mb;
 			} else {
 				MFREE(m, so->so_rcv.sb_mb);
 				m = so->so_rcv.sb_mb;
 			}
 		}
 		if (controlp) {
 			orig_resid = 0;
 			controlp = &(*controlp)->m_next;
 		}
 	}
 	if (m) {
 		if ((flags & MSG_PEEK) == 0)
 			m->m_nextpkt = nextrecord;
 		type = m->m_type;
 		if (type == MT_OOBDATA)
 			flags |= MSG_OOB;
 	}
 	moff = 0;
 	offset = 0;
 	while (m && uio->uio_resid > 0 && error == 0) {
 		if (m->m_type == MT_OOBDATA) {
 			if (type != MT_OOBDATA)
 				break;
 		} else if (type == MT_OOBDATA)
 			break;
 #ifdef DIAGNOSTIC
 		else if (m->m_type != MT_DATA && m->m_type != MT_HEADER)
 			panic("receive 3");
 #endif
 		so->so_state &= ~SS_RCVATMARK;
 		len = uio->uio_resid;
 		if (so->so_oobmark && len > so->so_oobmark - offset)
 			len = so->so_oobmark - offset;
 		if (len > m->m_len - moff)
 			len = m->m_len - moff;
 		/*
 		 * If mp is set, just pass back the mbufs.
 		 * Otherwise copy them out via the uio, then free.
 		 * Sockbuf must be consistent here (points to current mbuf,
 		 * it points to next record) when we drop priority;
 		 * we must note any additions to the sockbuf when we
 		 * block interrupts again.
 		 */
 		if (mp == 0) {
 			splx(s);
 			error = uiomove(mtod(m, caddr_t) + moff, (int)len, uio);
 			s = splnet();
 			if (error)
 				goto release;
 		} else
 			uio->uio_resid -= len;
 		if (len == m->m_len - moff) {
 			if (m->m_flags & M_EOR)
 				flags |= MSG_EOR;
 			if (flags & MSG_PEEK) {
 				m = m->m_next;
 				moff = 0;
 			} else {
 				nextrecord = m->m_nextpkt;
 				sbfree(&so->so_rcv, m);
 				if (mp) {
 					*mp = m;
 					mp = &m->m_next;
 					so->so_rcv.sb_mb = m = m->m_next;
 					*mp = (struct mbuf *)0;
 				} else {
 					MFREE(m, so->so_rcv.sb_mb);
 					m = so->so_rcv.sb_mb;
 				}
 				if (m)
 					m->m_nextpkt = nextrecord;
 			}
 		} else {
 			if (flags & MSG_PEEK)
 				moff += len;
 			else {
 				if (mp)
 					*mp = m_copym(m, 0, len, M_WAIT);
 				m->m_data += len;
 				m->m_len -= len;
 				so->so_rcv.sb_cc -= len;
 			}
 		}
 		if (so->so_oobmark) {
 			if ((flags & MSG_PEEK) == 0) {
 				so->so_oobmark -= len;
 				if (so->so_oobmark == 0) {
 					so->so_state |= SS_RCVATMARK;
 					break;
 				}
 			} else {
 				offset += len;
 				if (offset == so->so_oobmark)
 					break;
 			}
 		}
 		if (flags & MSG_EOR)
 			break;
 		/*
 		 * If the MSG_WAITALL flag is set (for non-atomic socket),
 		 * we must not quit until "uio->uio_resid == 0" or an error
 		 * termination.  If a signal/timeout occurs, return
 		 * with a short count but without error.
 		 * Keep sockbuf locked against other readers.
 		 */
 		while (flags & MSG_WAITALL && m == 0 && uio->uio_resid > 0 &&
 		    !sosendallatonce(so) && !nextrecord) {
 			if (so->so_error || so->so_state & SS_CANTRCVMORE)
 				break;
 			error = sbwait(&so->so_rcv);
 			if (error) {
 				sbunlock(&so->so_rcv);
 				splx(s);
 				return (0);
 			}
 			m = so->so_rcv.sb_mb;
 			if (m)
 				nextrecord = m->m_nextpkt;
 		}
 	}
 
 	if (m && pr->pr_flags & PR_ATOMIC) {
 		flags |= MSG_TRUNC;
 		if ((flags & MSG_PEEK) == 0)
 			(void) sbdroprecord(&so->so_rcv);
 	}
 	if ((flags & MSG_PEEK) == 0) {
 		if (m == 0)
 			so->so_rcv.sb_mb = nextrecord;
 		if (pr->pr_flags & PR_WANTRCVD && so->so_pcb)
 			(*pr->pr_usrreqs->pru_rcvd)(so, flags);
 	}
 	if (orig_resid == uio->uio_resid && orig_resid &&
 	    (flags & MSG_EOR) == 0 && (so->so_state & SS_CANTRCVMORE) == 0) {
 		sbunlock(&so->so_rcv);
 		splx(s);
 		goto restart;
 	}
 
 	if (flagsp)
 		*flagsp |= flags;
 release:
 	sbunlock(&so->so_rcv);
 	splx(s);
 	return (error);
 }
 
 int
 soshutdown(so, how)
 	register struct socket *so;
 	register int how;
 {
 	register struct protosw *pr = so->so_proto;
 
 	how++;
 	if (how & FREAD)
 		sorflush(so);
 	if (how & FWRITE)
 		return ((*pr->pr_usrreqs->pru_shutdown)(so));
 	return (0);
 }
 
 void
 sorflush(so)
 	register struct socket *so;
 {
 	register struct sockbuf *sb = &so->so_rcv;
 	register struct protosw *pr = so->so_proto;
 	register int s;
 	struct sockbuf asb;
 
 	sb->sb_flags |= SB_NOINTR;
 	(void) sblock(sb, M_WAITOK);
 	s = splimp();
 	socantrcvmore(so);
 	sbunlock(sb);
 	asb = *sb;
 	bzero((caddr_t)sb, sizeof (*sb));
 	splx(s);
 	if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose)
 		(*pr->pr_domain->dom_dispose)(asb.sb_mb);
 	sbrelease(&asb);
 }
 
 int
 sosetopt(so, level, optname, m0, p)
 	register struct socket *so;
 	int level, optname;
 	struct mbuf *m0;
 	struct proc *p;
 {
 	int error = 0;
 	register struct mbuf *m = m0;
 
 	if (level != SOL_SOCKET) {
 		if (so->so_proto && so->so_proto->pr_ctloutput)
 			return ((*so->so_proto->pr_ctloutput)
 				  (PRCO_SETOPT, so, level, optname, &m0, p));
 		error = ENOPROTOOPT;
 	} else {
 		switch (optname) {
 
 		case SO_LINGER:
 			if (m == NULL || m->m_len != sizeof (struct linger)) {
 				error = EINVAL;
 				goto bad;
 			}
 			so->so_linger = mtod(m, struct linger *)->l_linger;
 			/* fall thru... */
 
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_DONTROUTE:
 		case SO_USELOOPBACK:
 		case SO_BROADCAST:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 			if (m == NULL || m->m_len < sizeof (int)) {
 				error = EINVAL;
 				goto bad;
 			}
 			if (*mtod(m, int *))
 				so->so_options |= optname;
 			else
 				so->so_options &= ~optname;
 			break;
 
 		case SO_SNDBUF:
 		case SO_RCVBUF:
 		case SO_SNDLOWAT:
 		case SO_RCVLOWAT:
 		    {
 			int optval;
 
 			if (m == NULL || m->m_len < sizeof (int)) {
 				error = EINVAL;
 				goto bad;
 			}
 
 			/*
 			 * Values < 1 make no sense for any of these
 			 * options, so disallow them.
 			 */
 			optval = *mtod(m, int *);
 			if (optval < 1) {
 				error = EINVAL;
 				goto bad;
 			}
 
 			switch (optname) {
 
 			case SO_SNDBUF:
 			case SO_RCVBUF:
 				if (sbreserve(optname == SO_SNDBUF ?
 				    &so->so_snd : &so->so_rcv,
 				    (u_long) optval) == 0) {
 					error = ENOBUFS;
 					goto bad;
 				}
 				break;
 
 			/*
 			 * Make sure the low-water is never greater than
 			 * the high-water.
 			 */
 			case SO_SNDLOWAT:
 				so->so_snd.sb_lowat =
 				    (optval > so->so_snd.sb_hiwat) ?
 				    so->so_snd.sb_hiwat : optval;
 				break;
 			case SO_RCVLOWAT:
 				so->so_rcv.sb_lowat =
 				    (optval > so->so_rcv.sb_hiwat) ?
 				    so->so_rcv.sb_hiwat : optval;
 				break;
 			}
 			break;
 		    }
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 		    {
 			struct timeval *tv;
 			short val;
 
 			if (m == NULL || m->m_len < sizeof (*tv)) {
 				error = EINVAL;
 				goto bad;
 			}
 			tv = mtod(m, struct timeval *);
 			if (tv->tv_sec > SHRT_MAX / hz - hz) {
 				error = EDOM;
 				goto bad;
 			}
 			val = tv->tv_sec * hz + tv->tv_usec / tick;
 
 			switch (optname) {
 
 			case SO_SNDTIMEO:
 				so->so_snd.sb_timeo = val;
 				break;
 			case SO_RCVTIMEO:
 				so->so_rcv.sb_timeo = val;
 				break;
 			}
 			break;
 		    }
 
 		default:
 			error = ENOPROTOOPT;
 			break;
 		}
 		if (error == 0 && so->so_proto && so->so_proto->pr_ctloutput) {
 			(void) ((*so->so_proto->pr_ctloutput)
 				  (PRCO_SETOPT, so, level, optname, &m0, p));
 			m = NULL;	/* freed by protocol */
 		}
 	}
 bad:
 	if (m)
 		(void) m_free(m);
 	return (error);
 }
 
 int
 sogetopt(so, level, optname, mp, p)
 	register struct socket *so;
 	int level, optname;
 	struct mbuf **mp;
 	struct proc *p;
 {
 	register struct mbuf *m;
 
 	if (level != SOL_SOCKET) {
 		if (so->so_proto && so->so_proto->pr_ctloutput) {
 			return ((*so->so_proto->pr_ctloutput)
 				  (PRCO_GETOPT, so, level, optname, mp, p));
 		} else
 			return (ENOPROTOOPT);
 	} else {
 		m = m_get(M_WAIT, MT_SOOPTS);
 		m->m_len = sizeof (int);
 
 		switch (optname) {
 
 		case SO_LINGER:
 			m->m_len = sizeof (struct linger);
 			mtod(m, struct linger *)->l_onoff =
 				so->so_options & SO_LINGER;
 			mtod(m, struct linger *)->l_linger = so->so_linger;
 			break;
 
 		case SO_USELOOPBACK:
 		case SO_DONTROUTE:
 		case SO_DEBUG:
 		case SO_KEEPALIVE:
 		case SO_REUSEADDR:
 		case SO_REUSEPORT:
 		case SO_BROADCAST:
 		case SO_OOBINLINE:
 		case SO_TIMESTAMP:
 			*mtod(m, int *) = so->so_options & optname;
 			break;
 
 		case SO_TYPE:
 			*mtod(m, int *) = so->so_type;
 			break;
 
 		case SO_ERROR:
 			*mtod(m, int *) = so->so_error;
 			so->so_error = 0;
 			break;
 
 		case SO_SNDBUF:
 			*mtod(m, int *) = so->so_snd.sb_hiwat;
 			break;
 
 		case SO_RCVBUF:
 			*mtod(m, int *) = so->so_rcv.sb_hiwat;
 			break;
 
 		case SO_SNDLOWAT:
 			*mtod(m, int *) = so->so_snd.sb_lowat;
 			break;
 
 		case SO_RCVLOWAT:
 			*mtod(m, int *) = so->so_rcv.sb_lowat;
 			break;
 
 		case SO_SNDTIMEO:
 		case SO_RCVTIMEO:
 		    {
 			int val = (optname == SO_SNDTIMEO ?
 			     so->so_snd.sb_timeo : so->so_rcv.sb_timeo);
 
 			m->m_len = sizeof(struct timeval);
 			mtod(m, struct timeval *)->tv_sec = val / hz;
 			mtod(m, struct timeval *)->tv_usec =
 			    (val % hz) * tick;
 			break;
 		    }
 
 		default:
 			(void)m_free(m);
 			return (ENOPROTOOPT);
 		}
 		*mp = m;
 		return (0);
 	}
 }
 
 void
 sohasoutofband(so)
 	register struct socket *so;
 {
 	struct proc *p;
 
 	if (so->so_pgid < 0)
 		gsignal(-so->so_pgid, SIGURG);
 	else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
 		psignal(p, SIGURG);
 	selwakeup(&so->so_rcv.sb_sel);
 }
 
 int
 sopoll(struct socket *so, int events, struct ucred *cred, struct proc *p)
 {
 	int revents = 0;
 	int s = splnet();
 
 	if (events & (POLLIN | POLLRDNORM))
 		if (soreadable(so))
 			revents |= events & (POLLIN | POLLRDNORM);
 
 	if (events & (POLLOUT | POLLWRNORM))
 		if (sowriteable(so))
 			revents |= events & (POLLOUT | POLLWRNORM);
 
 	if (events & (POLLPRI | POLLRDBAND))
 		if (so->so_oobmark || (so->so_state & SS_RCVATMARK))
 			revents |= events & (POLLPRI | POLLRDBAND);
 
 	if (revents == 0) {
 		if (events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
 			selrecord(p, &so->so_rcv.sb_sel);
 			so->so_rcv.sb_flags |= SB_SEL;
 		}
 
 		if (events & (POLLOUT | POLLWRNORM)) {
 			selrecord(p, &so->so_snd.sb_sel);
 			so->so_snd.sb_flags |= SB_SEL;
 		}
 	}
 
 	splx(s);
 	return (revents);
 }
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index c1a5826315c2..76470e9c9f5e 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -1,1276 +1,1277 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1990, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)uipc_syscalls.c	8.4 (Berkeley) 2/21/94
- * $Id: uipc_syscalls.c,v 1.35 1997/12/16 17:40:30 eivind Exp $
+ * $Id: uipc_syscalls.c,v 1.36 1998/02/09 06:09:27 eivind Exp $
  */
 
 #include "opt_compat.h"
 #include "opt_ktrace.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/malloc.h>
 #include <sys/filedesc.h>
 #include <sys/proc.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/signalvar.h>
+#include <sys/uio.h>
 #ifdef KTRACE
 #include <sys/ktrace.h>
 #endif
 
 static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags));
 static int recvit __P((struct proc *p, int s, struct msghdr *mp,
 		       caddr_t namelenp));
   
 static int accept1 __P((struct proc *p, struct accept_args *uap, int compat));
 static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
 			     int compat));
 static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
 			     int compat));
 
 /*
  * System call interface to the socket abstraction.
  */
 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
 #define COMPAT_OLDSOCK
 #endif
 
 extern	struct fileops socketops;
 
 int
 socket(p, uap)
 	struct proc *p;
 	register struct socket_args /* {
 		int	domain;
 		int	type;
 		int	protocol;
 	} */ *uap;
 {
 	struct filedesc *fdp = p->p_fd;
 	struct socket *so;
 	struct file *fp;
 	int fd, error;
 
 	error = falloc(p, &fp, &fd);
 	if (error)
 		return (error);
 	fp->f_flag = FREAD|FWRITE;
 	fp->f_type = DTYPE_SOCKET;
 	fp->f_ops = &socketops;
 	error = socreate(uap->domain, &so, uap->type, uap->protocol, p);
 	if (error) {
 		fdp->fd_ofiles[fd] = 0;
 		ffree(fp);
 	} else {
 		fp->f_data = (caddr_t)so;
 		p->p_retval[0] = fd;
 	}
 	return (error);
 }
 
 /* ARGSUSED */
 int
 bind(p, uap)
 	struct proc *p;
 	register struct bind_args /* {
 		int	s;
 		caddr_t	name;
 		int	namelen;
 	} */ *uap;
 {
 	struct file *fp;
 	struct sockaddr *sa;
 	int error;
 
 	error = getsock(p->p_fd, uap->s, &fp);
 	if (error)
 		return (error);
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error)
 		return (error);
 	error = sobind((struct socket *)fp->f_data, sa, p);
 	FREE(sa, M_SONAME);
 	return (error);
 }
 
 /* ARGSUSED */
 int
 listen(p, uap)
 	struct proc *p;
 	register struct listen_args /* {
 		int	s;
 		int	backlog;
 	} */ *uap;
 {
 	struct file *fp;
 	int error;
 
 	error = getsock(p->p_fd, uap->s, &fp);
 	if (error)
 		return (error);
 	return (solisten((struct socket *)fp->f_data, uap->backlog, p));
 }
 
 static int
 accept1(p, uap, compat)
 	struct proc *p;
 	register struct accept_args /* {
 		int	s;
 		caddr_t	name;
 		int	*anamelen;
 	} */ *uap;
 	int compat;
 {
 	struct file *fp;
 	struct sockaddr *sa;
 	int namelen, error, s;
 	struct socket *head, *so;
 	short fflag;		/* type must match fp->f_flag */
 
 	if (uap->name) {
 		error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
 			sizeof (namelen));
 		if(error)
 			return (error);
 	}
 	error = getsock(p->p_fd, uap->s, &fp);
 	if (error)
 		return (error);
 	s = splnet();
 	head = (struct socket *)fp->f_data;
 	if ((head->so_options & SO_ACCEPTCONN) == 0) {
 		splx(s);
 		return (EINVAL);
 	}
 	if ((head->so_state & SS_NBIO) && head->so_comp.tqh_first == NULL) {
 		splx(s);
 		return (EWOULDBLOCK);
 	}
 	while (head->so_comp.tqh_first == NULL && head->so_error == 0) {
 		if (head->so_state & SS_CANTRCVMORE) {
 			head->so_error = ECONNABORTED;
 			break;
 		}
 		error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
 		    "accept", 0);
 		if (error) {
 			splx(s);
 			return (error);
 		}
 	}
 	if (head->so_error) {
 		error = head->so_error;
 		head->so_error = 0;
 		splx(s);
 		return (error);
 	}
 
 	/*
 	 * At this point we know that there is at least one connection
 	 * ready to be accepted. Remove it from the queue prior to
 	 * allocating the file descriptor for it since falloc() may
 	 * block allowing another process to accept the connection
 	 * instead.
 	 */
 	so = head->so_comp.tqh_first;
 	TAILQ_REMOVE(&head->so_comp, so, so_list);
 	head->so_qlen--;
 
 	fflag = fp->f_flag;
 	error = falloc(p, &fp, p->p_retval);
 	if (error) {
 		/*
 		 * Probably ran out of file descriptors. Put the
 		 * unaccepted connection back onto the queue and
 		 * do another wakeup so some other process might
 		 * have a chance at it.
 		 */
 		TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
 		head->so_qlen++;
 		wakeup_one(&head->so_timeo);
 		splx(s);
 		return (error);
 	}
 
 	so->so_state &= ~SS_COMP;
 	so->so_head = NULL;
 
 	fp->f_type = DTYPE_SOCKET;
 	fp->f_flag = fflag;
 	fp->f_ops = &socketops;
 	fp->f_data = (caddr_t)so;
 	sa = 0;
 	(void) soaccept(so, &sa);
 	if (sa == 0) {
 		namelen = 0;
 		if (uap->name)
 			goto gotnoname;
 		return 0;
 	}
 	if (uap->name) {
 		/* check sa_len before it is destroyed */
 		if (namelen > sa->sa_len)
 			namelen = sa->sa_len;
 #ifdef COMPAT_OLDSOCK
 		if (compat)
 			((struct osockaddr *)sa)->sa_family =
 			    sa->sa_family;
 #endif
 		error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
 		if (!error)
 gotnoname:
 			error = copyout((caddr_t)&namelen,
 			    (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
 	}
 	FREE(sa, M_SONAME);
 	splx(s);
 	return (error);
 }
 
 int
 accept(p, uap)
 	struct proc *p;
 	struct accept_args *uap;
 {
 
 	return (accept1(p, uap, 0));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 oaccept(p, uap)
 	struct proc *p;
 	struct accept_args *uap;
 {
 
 	return (accept1(p, uap, 1));
 }
 #endif /* COMPAT_OLDSOCK */
 
 /* ARGSUSED */
 int
 connect(p, uap)
 	struct proc *p;
 	register struct connect_args /* {
 		int	s;
 		caddr_t	name;
 		int	namelen;
 	} */ *uap;
 {
 	struct file *fp;
 	register struct socket *so;
 	struct sockaddr *sa;
 	int error, s;
 
 	error = getsock(p->p_fd, uap->s, &fp);
 	if (error)
 		return (error);
 	so = (struct socket *)fp->f_data;
 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING))
 		return (EALREADY);
 	error = getsockaddr(&sa, uap->name, uap->namelen);
 	if (error)
 		return (error);
 	error = soconnect(so, sa, p);
 	if (error)
 		goto bad;
 	if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
 		FREE(sa, M_SONAME);
 		return (EINPROGRESS);
 	}
 	s = splnet();
 	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
 		error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
 		    "connec", 0);
 		if (error)
 			break;
 	}
 	if (error == 0) {
 		error = so->so_error;
 		so->so_error = 0;
 	}
 	splx(s);
 bad:
 	so->so_state &= ~SS_ISCONNECTING;
 	FREE(sa, M_SONAME);
 	if (error == ERESTART)
 		error = EINTR;
 	return (error);
 }
 
 int
 socketpair(p, uap)
 	struct proc *p;
 	register struct socketpair_args /* {
 		int	domain;
 		int	type;
 		int	protocol;
 		int	*rsv;
 	} */ *uap;
 {
 	register struct filedesc *fdp = p->p_fd;
 	struct file *fp1, *fp2;
 	struct socket *so1, *so2;
 	int fd, error, sv[2];
 
 	error = socreate(uap->domain, &so1, uap->type, uap->protocol, p);
 	if (error)
 		return (error);
 	error = socreate(uap->domain, &so2, uap->type, uap->protocol, p);
 	if (error)
 		goto free1;
 	error = falloc(p, &fp1, &fd);
 	if (error)
 		goto free2;
 	sv[0] = fd;
 	fp1->f_flag = FREAD|FWRITE;
 	fp1->f_type = DTYPE_SOCKET;
 	fp1->f_ops = &socketops;
 	fp1->f_data = (caddr_t)so1;
 	error = falloc(p, &fp2, &fd);
 	if (error)
 		goto free3;
 	fp2->f_flag = FREAD|FWRITE;
 	fp2->f_type = DTYPE_SOCKET;
 	fp2->f_ops = &socketops;
 	fp2->f_data = (caddr_t)so2;
 	sv[1] = fd;
 	error = soconnect2(so1, so2);
 	if (error)
 		goto free4;
 	if (uap->type == SOCK_DGRAM) {
 		/*
 		 * Datagram socket connection is asymmetric.
 		 */
 		 error = soconnect2(so2, so1);
 		 if (error)
 			goto free4;
 	}
 	error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
 	return (error);
 free4:
 	ffree(fp2);
 	fdp->fd_ofiles[sv[1]] = 0;
 free3:
 	ffree(fp1);
 	fdp->fd_ofiles[sv[0]] = 0;
 free2:
 	(void)soclose(so2);
 free1:
 	(void)soclose(so1);
 	return (error);
 }
 
 static int
 sendit(p, s, mp, flags)
 	register struct proc *p;
 	int s;
 	register struct msghdr *mp;
 	int flags;
 {
 	struct file *fp;
 	struct uio auio;
 	register struct iovec *iov;
 	register int i;
 	struct mbuf *control;
 	struct sockaddr *to;
 	int len, error;
 	struct socket *so;
 #ifdef KTRACE
 	struct iovec *ktriov = NULL;
 #endif
 
 	error = getsock(p->p_fd, s, &fp);
 	if (error)
 		return (error);
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_procp = p;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0)
 			return (EINVAL);
 	}
 	if (mp->msg_name) {
 		error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
 		if (error)
 			return (error);
 	} else
 		to = 0;
 	if (mp->msg_control) {
 		if (mp->msg_controllen < sizeof(struct cmsghdr)
 #ifdef COMPAT_OLDSOCK
 		    && mp->msg_flags != MSG_COMPAT
 #endif
 		) {
 			error = EINVAL;
 			goto bad;
 		}
 		error = sockargs(&control, mp->msg_control,
 		    mp->msg_controllen, MT_CONTROL);
 		if (error)
 			goto bad;
 #ifdef COMPAT_OLDSOCK
 		if (mp->msg_flags == MSG_COMPAT) {
 			register struct cmsghdr *cm;
 
 			M_PREPEND(control, sizeof(*cm), M_WAIT);
 			if (control == 0) {
 				error = ENOBUFS;
 				goto bad;
 			} else {
 				cm = mtod(control, struct cmsghdr *);
 				cm->cmsg_len = control->m_len;
 				cm->cmsg_level = SOL_SOCKET;
 				cm->cmsg_type = SCM_RIGHTS;
 			}
 		}
 #endif
 	} else
 		control = 0;
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_GENIO)) {
 		int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
 
 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
 	}
 #endif
 	len = auio.uio_resid;
 	so = (struct socket *)fp->f_data;
 	error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
 						     flags, p);
 	if (error) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 		if (error == EPIPE)
 			psignal(p, SIGPIPE);
 	}
 	if (error == 0)
 		p->p_retval[0] = len - auio.uio_resid;
 #ifdef KTRACE
 	if (ktriov != NULL) {
 		if (error == 0)
 			ktrgenio(p->p_tracep, s, UIO_WRITE,
 				ktriov, p->p_retval[0], error);
 		FREE(ktriov, M_TEMP);
 	}
 #endif
 bad:
 	if (to)
 		FREE(to, M_SONAME);
 	return (error);
 }
 
 int
 sendto(p, uap)
 	struct proc *p;
 	register struct sendto_args /* {
 		int	s;
 		caddr_t	buf;
 		size_t	len;
 		int	flags;
 		caddr_t	to;
 		int	tolen;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = uap->to;
 	msg.msg_namelen = uap->tolen;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	msg.msg_control = 0;
 #ifdef COMPAT_OLDSOCK
 	msg.msg_flags = 0;
 #endif
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	return (sendit(p, uap->s, &msg, uap->flags));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 osend(p, uap)
 	struct proc *p;
 	register struct osend_args /* {
 		int	s;
 		caddr_t	buf;
 		int	len;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = 0;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = 0;
 	return (sendit(p, uap->s, &msg, uap->flags));
 }
 
 int
 osendmsg(p, uap)
 	struct proc *p;
 	register struct osendmsg_args /* {
 		int	s;
 		caddr_t	msg;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov[UIO_SMALLIOV], *iov;
 	int error;
 
 	error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
 	if (error)
 		return (error);
 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
 			return (EMSGSIZE);
 		MALLOC(iov, struct iovec *,
 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
 		      M_WAITOK);
 	} else
 		iov = aiov;
 	error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
 	if (error)
 		goto done;
 	msg.msg_flags = MSG_COMPAT;
 	msg.msg_iov = iov;
 	error = sendit(p, uap->s, &msg, uap->flags);
 done:
 	if (iov != aiov)
 		FREE(iov, M_IOV);
 	return (error);
 }
 #endif
 
 int
 sendmsg(p, uap)
 	struct proc *p;
 	register struct sendmsg_args /* {
 		int	s;
 		caddr_t	msg;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov[UIO_SMALLIOV], *iov;
 	int error;
 
 	error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
 	if (error)
 		return (error);
 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
 			return (EMSGSIZE);
 		MALLOC(iov, struct iovec *,
 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
 		       M_WAITOK);
 	} else
 		iov = aiov;
 	if (msg.msg_iovlen &&
 	    (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
 		goto done;
 	msg.msg_iov = iov;
 #ifdef COMPAT_OLDSOCK
 	msg.msg_flags = 0;
 #endif
 	error = sendit(p, uap->s, &msg, uap->flags);
 done:
 	if (iov != aiov)
 		FREE(iov, M_IOV);
 	return (error);
 }
 
 static int
 recvit(p, s, mp, namelenp)
 	register struct proc *p;
 	int s;
 	register struct msghdr *mp;
 	caddr_t namelenp;
 {
 	struct file *fp;
 	struct uio auio;
 	register struct iovec *iov;
 	register int i;
 	int len, error;
 	struct mbuf *m, *control = 0;
 	caddr_t ctlbuf;
 	struct socket *so;
 	struct sockaddr *fromsa = 0;
 #ifdef KTRACE
 	struct iovec *ktriov = NULL;
 #endif
 
 	error = getsock(p->p_fd, s, &fp);
 	if (error)
 		return (error);
 	auio.uio_iov = mp->msg_iov;
 	auio.uio_iovcnt = mp->msg_iovlen;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_rw = UIO_READ;
 	auio.uio_procp = p;
 	auio.uio_offset = 0;			/* XXX */
 	auio.uio_resid = 0;
 	iov = mp->msg_iov;
 	for (i = 0; i < mp->msg_iovlen; i++, iov++) {
 		if ((auio.uio_resid += iov->iov_len) < 0)
 			return (EINVAL);
 	}
 #ifdef KTRACE
 	if (KTRPOINT(p, KTR_GENIO)) {
 		int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
 
 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
 	}
 #endif
 	len = auio.uio_resid;
 	so = (struct socket *)fp->f_data;
 	error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
 	    (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
 	    &mp->msg_flags);
 	if (error) {
 		if (auio.uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	}
 #ifdef KTRACE
 	if (ktriov != NULL) {
 		if (error == 0)
 			ktrgenio(p->p_tracep, s, UIO_READ,
 				ktriov, len - auio.uio_resid, error);
 		FREE(ktriov, M_TEMP);
 	}
 #endif
 	if (error)
 		goto out;
 	p->p_retval[0] = len - auio.uio_resid;
 	if (mp->msg_name) {
 		len = mp->msg_namelen;
 		if (len <= 0 || fromsa == 0)
 			len = 0;
 		else {
 #ifndef MIN
 #define MIN(a,b) ((a)>(b)?(b):(a))
 #endif
 			/* save sa_len before it is destroyed by MSG_COMPAT */
 			len = MIN(len, fromsa->sa_len);
 #ifdef COMPAT_OLDSOCK
 			if (mp->msg_flags & MSG_COMPAT)
 				((struct osockaddr *)fromsa)->sa_family =
 				    fromsa->sa_family;
 #endif
 			error = copyout(fromsa,
 			    (caddr_t)mp->msg_name, (unsigned)len);
 			if (error)
 				goto out;
 		}
 		mp->msg_namelen = len;
 		if (namelenp &&
 		    (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
 #ifdef COMPAT_OLDSOCK
 			if (mp->msg_flags & MSG_COMPAT)
 				error = 0;	/* old recvfrom didn't check */
 			else
 #endif
 			goto out;
 		}
 	}
 	if (mp->msg_control) {
 #ifdef COMPAT_OLDSOCK
 		/*
 		 * We assume that old recvmsg calls won't receive access
 		 * rights and other control info, esp. as control info
 		 * is always optional and those options didn't exist in 4.3.
 		 * If we receive rights, trim the cmsghdr; anything else
 		 * is tossed.
 		 */
 		if (control && mp->msg_flags & MSG_COMPAT) {
 			if (mtod(control, struct cmsghdr *)->cmsg_level !=
 			    SOL_SOCKET ||
 			    mtod(control, struct cmsghdr *)->cmsg_type !=
 			    SCM_RIGHTS) {
 				mp->msg_controllen = 0;
 				goto out;
 			}
 			control->m_len -= sizeof (struct cmsghdr);
 			control->m_data += sizeof (struct cmsghdr);
 		}
 #endif
 		len = mp->msg_controllen;
 		m = control;
 		mp->msg_controllen = 0;
 		ctlbuf = (caddr_t) mp->msg_control;
 
 		while (m && len > 0) {
 			unsigned int tocopy;
 
 			if (len >= m->m_len) 
 				tocopy = m->m_len;
 			else {
 				mp->msg_flags |= MSG_CTRUNC;
 				tocopy = len;
 			}
 		
 			if (error = copyout((caddr_t)mtod(m, caddr_t),
 					ctlbuf, tocopy))
 				goto out;
 
 			ctlbuf += tocopy;
 			len -= tocopy;
 			m = m->m_next;
 		}
 		mp->msg_controllen = ctlbuf - mp->msg_control;
 	}
 out:
 	if (fromsa)
 		FREE(fromsa, M_SONAME);
 	if (control)
 		m_freem(control);
 	return (error);
 }
 
 int
 recvfrom(p, uap)
 	struct proc *p;
 	register struct recvfrom_args /* {
 		int	s;
 		caddr_t	buf;
 		size_t	len;
 		int	flags;
 		caddr_t	from;
 		int	*fromlenaddr;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov;
 	int error;
 
 	if (uap->fromlenaddr) {
 		error = copyin((caddr_t)uap->fromlenaddr,
 		    (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
 		if (error)
 			return (error);
 	} else
 		msg.msg_namelen = 0;
 	msg.msg_name = uap->from;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = uap->flags;
 	return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 orecvfrom(p, uap)
 	struct proc *p;
 	struct recvfrom_args *uap;
 {
 
 	uap->flags |= MSG_COMPAT;
 	return (recvfrom(p, uap));
 }
 #endif
 
 
 #ifdef COMPAT_OLDSOCK
 int
 orecv(p, uap)
 	struct proc *p;
 	register struct orecv_args /* {
 		int	s;
 		caddr_t	buf;
 		int	len;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov;
 
 	msg.msg_name = 0;
 	msg.msg_namelen = 0;
 	msg.msg_iov = &aiov;
 	msg.msg_iovlen = 1;
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->len;
 	msg.msg_control = 0;
 	msg.msg_flags = uap->flags;
 	return (recvit(p, uap->s, &msg, (caddr_t)0));
 }
 
 /*
  * Old recvmsg.  This code takes advantage of the fact that the old msghdr
  * overlays the new one, missing only the flags, and with the (old) access
  * rights where the control fields are now.
  */
 int
 orecvmsg(p, uap)
 	struct proc *p;
 	register struct orecvmsg_args /* {
 		int	s;
 		struct	omsghdr *msg;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov[UIO_SMALLIOV], *iov;
 	int error;
 
 	error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
 	    sizeof (struct omsghdr));
 	if (error)
 		return (error);
 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
 			return (EMSGSIZE);
 		MALLOC(iov, struct iovec *,
 		      sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
 		      M_WAITOK);
 	} else
 		iov = aiov;
 	msg.msg_flags = uap->flags | MSG_COMPAT;
 	error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
 	if (error)
 		goto done;
 	msg.msg_iov = iov;
 	error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
 
 	if (msg.msg_controllen && error == 0)
 		error = copyout((caddr_t)&msg.msg_controllen,
 		    (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
 done:
 	if (iov != aiov)
 		FREE(iov, M_IOV);
 	return (error);
 }
 #endif
 
 int
 recvmsg(p, uap)
 	struct proc *p;
 	register struct recvmsg_args /* {
 		int	s;
 		struct	msghdr *msg;
 		int	flags;
 	} */ *uap;
 {
 	struct msghdr msg;
 	struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
 	register int error;
 
 	error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
 	if (error)
 		return (error);
 	if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
 		if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
 			return (EMSGSIZE);
 		MALLOC(iov, struct iovec *,
 		       sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
 		       M_WAITOK);
 	} else
 		iov = aiov;
 #ifdef COMPAT_OLDSOCK
 	msg.msg_flags = uap->flags &~ MSG_COMPAT;
 #else
 	msg.msg_flags = uap->flags;
 #endif
 	uiov = msg.msg_iov;
 	msg.msg_iov = iov;
 	error = copyin((caddr_t)uiov, (caddr_t)iov,
 	    (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
 	if (error)
 		goto done;
 	error = recvit(p, uap->s, &msg, (caddr_t)0);
 	if (!error) {
 		msg.msg_iov = uiov;
 		error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
 	}
 done:
 	if (iov != aiov)
 		FREE(iov, M_IOV);
 	return (error);
 }
 
 /* ARGSUSED */
 int
 shutdown(p, uap)
 	struct proc *p;
 	register struct shutdown_args /* {
 		int	s;
 		int	how;
 	} */ *uap;
 {
 	struct file *fp;
 	int error;
 
 	error = getsock(p->p_fd, uap->s, &fp);
 	if (error)
 		return (error);
 	return (soshutdown((struct socket *)fp->f_data, uap->how));
 }
 
 /* ARGSUSED */
 int
 setsockopt(p, uap)
 	struct proc *p;
 	register struct setsockopt_args /* {
 		int	s;
 		int	level;
 		int	name;
 		caddr_t	val;
 		int	valsize;
 	} */ *uap;
 {
 	struct file *fp;
 	struct mbuf *m = NULL;
 	int error;
 
 	error = getsock(p->p_fd, uap->s, &fp);
 	if (error)
 		return (error);
 	if (uap->valsize > MLEN)
 		return (EINVAL);
 	if (uap->val) {
 		m = m_get(M_WAIT, MT_SOOPTS);
 		if (m == NULL)
 			return (ENOBUFS);
 		error = copyin(uap->val, mtod(m, caddr_t), (u_int)uap->valsize);
 		if (error) {
 			(void) m_free(m);
 			return (error);
 		}
 		m->m_len = uap->valsize;
 	}
 	return (sosetopt((struct socket *)fp->f_data, uap->level,
 	    uap->name, m, p));
 }
 
 /* ARGSUSED */
 int
 getsockopt(p, uap)
 	struct proc *p;
 	register struct getsockopt_args /* {
 		int	s;
 		int	level;
 		int	name;
 		caddr_t	val;
 		int	*avalsize;
 	} */ *uap;
 {
 	struct file *fp;
 	struct mbuf *m = NULL, *m0;
 	int op, i, valsize, error;
 
 	error = getsock(p->p_fd, uap->s, &fp);
 	if (error)
 		return (error);
 	if (uap->val) {
 		error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
 		    sizeof (valsize));
 		if (error)
 			return (error);
 	} else
 		valsize = 0;
 	if ((error = sogetopt((struct socket *)fp->f_data, uap->level,
 	    uap->name, &m, p)) == 0 && uap->val && valsize && m != NULL) {
 		op = 0;
 		while (m && !error && op < valsize) {
 			i = min(m->m_len, (valsize - op));
 			error = copyout(mtod(m, caddr_t), uap->val, (u_int)i);
 			op += i;
 			uap->val += i;
 			m0 = m;
 			MFREE(m0,m);
 		}
 		valsize = op;
 		if (error == 0)
 			error = copyout((caddr_t)&valsize,
 			    (caddr_t)uap->avalsize, sizeof (valsize));
 	}
 	if (m != NULL)
 		(void) m_free(m);
 	return (error);
 }
 
 /*
  * Get socket name.
  */
 /* ARGSUSED */
 static int
 getsockname1(p, uap, compat)
 	struct proc *p;
 	register struct getsockname_args /* {
 		int	fdes;
 		caddr_t	asa;
 		int	*alen;
 	} */ *uap;
 	int compat;
 {
 	struct file *fp;
 	register struct socket *so;
 	struct sockaddr *sa;
 	int len, error;
 
 	error = getsock(p->p_fd, uap->fdes, &fp);
 	if (error)
 		return (error);
 	error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
 	if (error)
 		return (error);
 	so = (struct socket *)fp->f_data;
 	sa = 0;
 	error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
 	if (error)
 		goto bad;
 	if (sa == 0) {
 		len = 0;
 		goto gotnothing;
 	}
 
 	len = MIN(len, sa->sa_len);
 #ifdef COMPAT_OLDSOCK
 	if (compat)
 		((struct osockaddr *)sa)->sa_family = sa->sa_family;
 #endif
 	error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
 	if (error == 0)
 gotnothing:
 		error = copyout((caddr_t)&len, (caddr_t)uap->alen,
 		    sizeof (len));
 bad:
 	if (sa)
 		FREE(sa, M_SONAME);
 	return (error);
 }
 
 int
 getsockname(p, uap)
 	struct proc *p;
 	struct getsockname_args *uap;
 {
 
 	return (getsockname1(p, uap, 0));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 ogetsockname(p, uap)
 	struct proc *p;
 	struct getsockname_args *uap;
 {
 
 	return (getsockname1(p, uap, 1));
 }
 #endif /* COMPAT_OLDSOCK */
 
 /*
  * Get name of peer for connected socket.
  */
 /* ARGSUSED */
 static int
 getpeername1(p, uap, compat)
 	struct proc *p;
 	register struct getpeername_args /* {
 		int	fdes;
 		caddr_t	asa;
 		int	*alen;
 	} */ *uap;
 	int compat;
 {
 	struct file *fp;
 	register struct socket *so;
 	struct sockaddr *sa;
 	int len, error;
 
 	error = getsock(p->p_fd, uap->fdes, &fp);
 	if (error)
 		return (error);
 	so = (struct socket *)fp->f_data;
 	if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0)
 		return (ENOTCONN);
 	error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
 	if (error)
 		return (error);
 	sa = 0;
 	error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
 	if (error)
 		goto bad;
 	if (sa == 0) {
 		len = 0;
 		goto gotnothing;
 	}
 	len = MIN(len, sa->sa_len);
 #ifdef COMPAT_OLDSOCK
 	if (compat)
 		((struct osockaddr *)sa)->sa_family =
 		    sa->sa_family;
 #endif
 	error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
 	if (error)
 		goto bad;
 gotnothing:
 	error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
 bad:
 	if (sa) FREE(sa, M_SONAME);
 	return (error);
 }
 
 int
 getpeername(p, uap)
 	struct proc *p;
 	struct getpeername_args *uap;
 {
 
 	return (getpeername1(p, uap, 0));
 }
 
 #ifdef COMPAT_OLDSOCK
 int
 ogetpeername(p, uap)
 	struct proc *p;
 	struct ogetpeername_args *uap;
 {
 
 	/* XXX uap should have type `getpeername_args *' to begin with. */
 	return (getpeername1(p, (struct getpeername_args *)uap, 1));
 }
 #endif /* COMPAT_OLDSOCK */
 
 int
 sockargs(mp, buf, buflen, type)
 	struct mbuf **mp;
 	caddr_t buf;
 	int buflen, type;
 {
 	register struct sockaddr *sa;
 	register struct mbuf *m;
 	int error;
 
 	if ((u_int)buflen > MLEN) {
 #ifdef COMPAT_OLDSOCK
 		if (type == MT_SONAME && (u_int)buflen <= 112)
 			buflen = MLEN;		/* unix domain compat. hack */
 		else
 #endif
 		return (EINVAL);
 	}
 	m = m_get(M_WAIT, type);
 	if (m == NULL)
 		return (ENOBUFS);
 	m->m_len = buflen;
 	error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
 	if (error)
 		(void) m_free(m);
 	else {
 		*mp = m;
 		if (type == MT_SONAME) {
 			sa = mtod(m, struct sockaddr *);
 
 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 			if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 				sa->sa_family = sa->sa_len;
 #endif
 			sa->sa_len = buflen;
 		}
 	}
 	return (error);
 }
 
 int
 getsockaddr(namp, uaddr, len)
 	struct sockaddr **namp;
 	caddr_t uaddr;
 	size_t len;
 {
 	struct sockaddr *sa;
 	int error;
 
 	if (len > SOCK_MAXADDRLEN)
 		return ENAMETOOLONG;
 	MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
 	error = copyin(uaddr, sa, len);
 	if (error) {
 		FREE(sa, M_SONAME);
 	} else {
 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
 		if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
 			sa->sa_family = sa->sa_len;
 #endif
 		sa->sa_len = len;
 		*namp = sa;
 	}
 	return error;
 }
 
 int
 getsock(fdp, fdes, fpp)
 	struct filedesc *fdp;
 	int fdes;
 	struct file **fpp;
 {
 	register struct file *fp;
 
 	if ((unsigned)fdes >= fdp->fd_nfiles ||
 	    (fp = fdp->fd_ofiles[fdes]) == NULL)
 		return (EBADF);
 	if (fp->f_type != DTYPE_SOCKET)
 		return (ENOTSOCK);
 	*fpp = fp;
 	return (0);
 }
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index 3f740147eb11..33e524ebce0f 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -1,2052 +1,2053 @@
 /*
  * Copyright (c) 1997 John S. Dyson.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. John S. Dyson's name may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * DISCLAIMER:  This code isn't warranted to do anything useful.  Anything
  * bad that happens because of using this software isn't the responsibility
  * of the author.  This software is distributed AS-IS.
  *
- * $Id: vfs_aio.c,v 1.23 1998/02/09 06:09:28 eivind Exp $
+ * $Id: vfs_aio.c,v 1.24 1998/02/25 06:30:15 bde Exp $
  */
 
 /*
  * This file contains support for the POSIX.4 AIO/LIO facility.
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/fcntl.h>
 #include <sys/file.h>
 #include <sys/lock.h>
 #include <sys/unistd.h>
 #include <sys/proc.h>
 #include <sys/uio.h>
 #include <sys/malloc.h>
 #include <sys/signalvar.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 #include <sys/conf.h>
 #include <miscfs/specfs/specdev.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_zone.h>
 #include <sys/aio.h>
 #include <sys/shm.h>
 #include <sys/user.h>
 
 #include <machine/cpu.h>
+#include <machine/limits.h>
 
 static	int jobrefid;
 
 #define JOBST_NULL			0x0
 #define	JOBST_JOBQPROC		0x1
 #define JOBST_JOBQGLOBAL	0x2
 #define JOBST_JOBRUNNING	0x3
 #define JOBST_JOBFINISHED	0x4
 #define	JOBST_JOBQBUF		0x5
 #define	JOBST_JOBBFINISHED	0x6
 
 #ifndef MAX_AIO_PER_PROC
 #define MAX_AIO_PER_PROC	32
 #endif
 
 #ifndef MAX_AIO_QUEUE_PER_PROC
 #define MAX_AIO_QUEUE_PER_PROC	256 /* Bigger than AIO_LISTIO_MAX */
 #endif
 
 #ifndef MAX_AIO_PROCS
 #define MAX_AIO_PROCS		32
 #endif
 
 #ifndef MAX_AIO_QUEUE
 #define	MAX_AIO_QUEUE		1024 /* Bigger than AIO_LISTIO_MAX */
 #endif
 
 #ifndef TARGET_AIO_PROCS
 #define TARGET_AIO_PROCS	0
 #endif
 
 #ifndef MAX_BUF_AIO
 #define MAX_BUF_AIO 16
 #endif
 
 #ifndef AIOD_TIMEOUT_DEFAULT
 #define	AIOD_TIMEOUT_DEFAULT (10 * hz)
 #endif
 
 #ifndef AIOD_LIFETIME_DEFAULT
 #define AIOD_LIFETIME_DEFAULT (30 * hz)
 #endif
 
 static int max_aio_procs = MAX_AIO_PROCS;
 static int num_aio_procs = 0;
 static int target_aio_procs = TARGET_AIO_PROCS;
 static int max_queue_count = MAX_AIO_QUEUE;
 static int num_queue_count = 0;
 static int num_buf_aio = 0;
 static int num_aio_resv_start = 0;
 static int aiod_timeout;
 static int aiod_lifetime;
 
 static int max_aio_per_proc = MAX_AIO_PER_PROC,
 	max_aio_queue_per_proc=MAX_AIO_QUEUE_PER_PROC;
 
 static int max_buf_aio = MAX_BUF_AIO;
 
 SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "AIO mgmt");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc,
 	CTLFLAG_RW, &max_aio_per_proc, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc,
 	CTLFLAG_RW, &max_aio_queue_per_proc, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs,
 	CTLFLAG_RW, &max_aio_procs, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs,
 	CTLFLAG_RD, &num_aio_procs, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count,
 	CTLFLAG_RD, &num_queue_count, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue,
 	CTLFLAG_RW, &max_queue_count, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs,
 	CTLFLAG_RW, &target_aio_procs, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio,
 	CTLFLAG_RW, &max_buf_aio, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio,
 	CTLFLAG_RD, &num_buf_aio, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime,
 	CTLFLAG_RW, &aiod_lifetime, 0, "");
 
 SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_timeout,
 	CTLFLAG_RW, &aiod_timeout, 0, "");
 
 
 /*
  * Job queue item
  */
 
 #define AIOCBLIST_CANCELLED	0x1
 #define AIOCBLIST_RUNDOWN	0x4
 #define AIOCBLIST_ASYNCFREE	0x8
 #define AIOCBLIST_DONE		0x10
 
 struct aiocblist {
 	TAILQ_ENTRY (aiocblist) list;		/* List of jobs */
 	TAILQ_ENTRY (aiocblist) plist;		/* List of jobs for proc */
 	int	jobflags;
 	int	jobstate;
 	int inputcharge, outputcharge;
 	struct	buf *bp;				/* buffer pointer */
 	struct	proc *userproc;			/* User process */
 	struct	aioproclist	*jobaioproc;	/* AIO process descriptor */
 	struct	aio_liojob	*lio;		/* optional lio job */
 	struct	aiocb *uuaiocb;			/* pointer in userspace of aiocb */
 	struct	aiocb uaiocb;			/* Kernel I/O control block */
 };
 
 
 /*
  * AIO process info
  */
 #define AIOP_FREE	0x1			/* proc on free queue */
 #define AIOP_SCHED	0x2			/* proc explicitly scheduled */
 
 struct aioproclist {
 	int aioprocflags;			/* AIO proc flags */
 	TAILQ_ENTRY(aioproclist) list;		/* List of processes */
 	struct proc *aioproc;			/* The AIO thread */
 	TAILQ_HEAD (,aiocblist) jobtorun;	/* suggested job to run */
 };
 
 /*
  * data-structure for lio signal management
  */
 struct aio_liojob {
 	int lioj_flags;
 	int	lioj_buffer_count;
 	int	lioj_buffer_finished_count;
 	int	lioj_queue_count;
 	int	lioj_queue_finished_count;
 	struct sigevent lioj_signal;	/* signal on all I/O done */
 	TAILQ_ENTRY (aio_liojob) lioj_list;
 	struct kaioinfo *lioj_ki;
 };
 #define	LIOJ_SIGNAL			0x1 /* signal on all done (lio) */
 #define	LIOJ_SIGNAL_POSTED	0x2	/* signal has been posted */
 
 /*
  * per process aio data structure
  */
 struct kaioinfo {
 	int	kaio_flags;			/* per process kaio flags */
 	int	kaio_maxactive_count;	/* maximum number of AIOs */
 	int	kaio_active_count;	/* number of currently used AIOs */
 	int	kaio_qallowed_count;	/* maxiumu size of AIO queue */
 	int	kaio_queue_count;	/* size of AIO queue */
 	int	kaio_ballowed_count;	/* maximum number of buffers */
 	int	kaio_queue_finished_count;	/* number of daemon jobs finished */
 	int	kaio_buffer_count;	/* number of physio buffers */
 	int	kaio_buffer_finished_count;	/* count of I/O done */
 	struct proc *kaio_p;			/* process that uses this kaio block */
 	TAILQ_HEAD (,aio_liojob) kaio_liojoblist;	/* list of lio jobs */
 	TAILQ_HEAD (,aiocblist)	kaio_jobqueue;	/* job queue for process */
 	TAILQ_HEAD (,aiocblist)	kaio_jobdone;	/* done queue for process */
 	TAILQ_HEAD (,aiocblist)	kaio_bufqueue;	/* buffer job queue for process */
 	TAILQ_HEAD (,aiocblist)	kaio_bufdone;	/* buffer done queue for process */
 };
 
 #define KAIO_RUNDOWN 0x1		/* process is being run down */
 #define KAIO_WAKEUP 0x2			/* wakeup process when there is a significant
 								   event */
 
 
 static TAILQ_HEAD (,aioproclist) aio_freeproc, aio_activeproc;
 static TAILQ_HEAD(,aiocblist) aio_jobs;			/* Async job list */
 static TAILQ_HEAD(,aiocblist) aio_bufjobs;		/* Phys I/O job list */
 static TAILQ_HEAD(,aiocblist) aio_freejobs;		/* Pool of free jobs */
 
 static void aio_init_aioinfo(struct proc *p) ;
 static void aio_onceonly(void *) ;
 static int aio_free_entry(struct aiocblist *aiocbe);
 static void aio_process(struct aiocblist *aiocbe);
 static int aio_newproc(void) ;
 static int aio_aqueue(struct proc *p, struct aiocb *job, int type) ;
 static void aio_physwakeup(struct buf *bp);
 static int aio_fphysio(struct proc *p, struct aiocblist *aiocbe, int type);
 static int aio_qphysio(struct proc *p, struct aiocblist *iocb);
 static void aio_daemon(void *uproc);
 
 SYSINIT(aio, SI_SUB_VFS, SI_ORDER_ANY, aio_onceonly, NULL);
 
 static vm_zone_t kaio_zone=0, aiop_zone=0,
 	aiocb_zone=0, aiol_zone=0, aiolio_zone=0;
 
 /*
  * Single AIOD vmspace shared amongst all of them
  */
 static struct vmspace *aiovmspace = NULL;
 
 /*
  * Startup initialization
  */
 void
 aio_onceonly(void *na)
 {
 	TAILQ_INIT(&aio_freeproc);
 	TAILQ_INIT(&aio_activeproc);
 	TAILQ_INIT(&aio_jobs);
 	TAILQ_INIT(&aio_bufjobs);
 	TAILQ_INIT(&aio_freejobs);
 	kaio_zone = zinit("AIO", sizeof (struct kaioinfo), 0, 0, 1);
 	aiop_zone = zinit("AIOP", sizeof (struct aioproclist), 0, 0, 1);
 	aiocb_zone = zinit("AIOCB", sizeof (struct aiocblist), 0, 0, 1);
 	aiol_zone = zinit("AIOL", AIO_LISTIO_MAX * sizeof (int), 0, 0, 1);
 	aiolio_zone = zinit("AIOLIO",
 		AIO_LISTIO_MAX * sizeof (struct aio_liojob), 0, 0, 1);
 	aiod_timeout = AIOD_TIMEOUT_DEFAULT;
 	aiod_lifetime = AIOD_LIFETIME_DEFAULT;
 	jobrefid = 1;
 }
 
 /*
  * Init the per-process aioinfo structure.
  * The aioinfo limits are set per-process for user limit (resource) management.
  */
 void
 aio_init_aioinfo(struct proc *p)
 {
 	struct kaioinfo *ki;
 	if (p->p_aioinfo == NULL) {
 		ki = zalloc(kaio_zone);
 		p->p_aioinfo = ki;
 		ki->kaio_flags = 0;
 		ki->kaio_maxactive_count = max_aio_per_proc;
 		ki->kaio_active_count = 0;
 		ki->kaio_qallowed_count = max_aio_queue_per_proc;
 		ki->kaio_queue_count = 0;
 		ki->kaio_ballowed_count = max_buf_aio;
 		ki->kaio_buffer_count = 0;
 		ki->kaio_buffer_finished_count = 0;
 		ki->kaio_p = p;
 		TAILQ_INIT(&ki->kaio_jobdone);
 		TAILQ_INIT(&ki->kaio_jobqueue);
 		TAILQ_INIT(&ki->kaio_bufdone);
 		TAILQ_INIT(&ki->kaio_bufqueue);
 		TAILQ_INIT(&ki->kaio_liojoblist);
 	}
 }
 
 /*
  * Free a job entry.  Wait for completion if it is currently
  * active, but don't delay forever.  If we delay, we return
  * a flag that says that we have to restart the queue scan.
  */
 int
 aio_free_entry(struct aiocblist *aiocbe)
 {
 	struct kaioinfo *ki;
 	struct aioproclist *aiop;
 	struct aio_liojob *lj;
 	struct proc *p;
 	int error;
 	int s;
 
 	if (aiocbe->jobstate == JOBST_NULL)
 		panic("aio_free_entry: freeing already free job");
 
 	p = aiocbe->userproc;
 	ki = p->p_aioinfo;
 	lj = aiocbe->lio;
 	if (ki == NULL)
 		panic("aio_free_entry: missing p->p_aioinfo");
 
 	if (aiocbe->jobstate == JOBST_JOBRUNNING) {
 		if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE)
 			return 0;
 		aiocbe->jobflags |= AIOCBLIST_RUNDOWN;
 		tsleep(aiocbe, PRIBIO|PCATCH, "jobwai", 0);
 	}
 	aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE;
 
 	if (aiocbe->bp == NULL) {
 		if (ki->kaio_queue_count <= 0)
 			panic("aio_free_entry: process queue size <= 0");
 		if (num_queue_count <= 0)
 			panic("aio_free_entry: system wide queue size <= 0");
 	
 		if(lj) {
 			lj->lioj_queue_count--;
 			if (aiocbe->jobflags & AIOCBLIST_DONE)
 				lj->lioj_queue_finished_count--;
 		}
 		ki->kaio_queue_count--;
 		if (aiocbe->jobflags & AIOCBLIST_DONE)
 			ki->kaio_queue_finished_count--;
 		num_queue_count--;
 
 	} else {
 		if(lj) {
 			lj->lioj_buffer_count--;
 			if (aiocbe->jobflags & AIOCBLIST_DONE)
 				lj->lioj_buffer_finished_count--;
 		}
 		if (aiocbe->jobflags & AIOCBLIST_DONE)
 			ki->kaio_buffer_finished_count--;
 		ki->kaio_buffer_count--;
 		num_buf_aio--;
 
 	}
 
 	if ((ki->kaio_flags & KAIO_WAKEUP) ||
 		(ki->kaio_flags & KAIO_RUNDOWN) &&
 		((ki->kaio_buffer_count == 0) && (ki->kaio_queue_count == 0))) {
 		ki->kaio_flags &= ~KAIO_WAKEUP;
 		wakeup(p);
 	}
 
 	if ( aiocbe->jobstate == JOBST_JOBQBUF) {
 		if ((error = aio_fphysio(p, aiocbe, 1)) != 0)
 			return error;
 		if (aiocbe->jobstate != JOBST_JOBBFINISHED)
 			panic("aio_free_entry: invalid physio finish-up state");
 		s = splbio();
 		TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist);
 		splx(s);
 	} else if ( aiocbe->jobstate == JOBST_JOBQPROC) {
 		aiop = aiocbe->jobaioproc;
 		TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list);
 	} else if ( aiocbe->jobstate == JOBST_JOBQGLOBAL) {
 		TAILQ_REMOVE(&aio_jobs, aiocbe, list);
 	} else if ( aiocbe->jobstate == JOBST_JOBFINISHED) {
 		TAILQ_REMOVE(&ki->kaio_jobdone, aiocbe, plist);
 	} else if ( aiocbe->jobstate == JOBST_JOBBFINISHED) {
 		s = splbio();
 		TAILQ_REMOVE(&ki->kaio_bufdone, aiocbe, plist);
 		splx(s);
 		if (aiocbe->bp) {
 			vunmapbuf(aiocbe->bp);
 			relpbuf(aiocbe->bp);
 			aiocbe->bp = NULL;
 		}
 	}
 	if (lj && (lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 0)) {
 		TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list);
 		zfree(aiolio_zone, lj);
 	}
 	TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 	aiocbe->jobstate = JOBST_NULL;
 	return 0;
 }
 
 /*
  * Rundown the jobs for a given process.  
  */
 void
 aio_proc_rundown(struct proc *p)
 {
 	int s;
 	struct kaioinfo *ki;
 	struct aio_liojob *lj, *ljn;
 	struct aiocblist *aiocbe, *aiocbn;
 	
 	ki = p->p_aioinfo;
 	if (ki == NULL)
 		return;
 
 	ki->kaio_flags |= LIOJ_SIGNAL_POSTED;
 	while ((ki->kaio_active_count > 0) ||
 		(ki->kaio_buffer_count > ki->kaio_buffer_finished_count)) {
 		ki->kaio_flags |= KAIO_RUNDOWN;
 		if (tsleep(p, PRIBIO, "kaiowt", aiod_timeout))
 			break;
 	}
 
 restart1:
 	for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobdone);
 		aiocbe;
 		aiocbe = aiocbn) {
 		aiocbn = TAILQ_NEXT(aiocbe, plist);
 		if (aio_free_entry(aiocbe))
 			goto restart1;
 	}
 
 restart2:
 	for ( aiocbe = TAILQ_FIRST(&ki->kaio_jobqueue);
 		aiocbe;
 		aiocbe = aiocbn) {
 		aiocbn = TAILQ_NEXT(aiocbe, plist);
 		if (aio_free_entry(aiocbe))
 			goto restart2;
 	}
 
 /*
  * Note the use of lots of splbio here, trying to avoid
  * splbio for long chains of I/O.  Probably unnecessary.
  */
 
 restart3:
 	s = splbio();
 	while (TAILQ_FIRST(&ki->kaio_bufqueue)) {
 		ki->kaio_flags |= KAIO_WAKEUP;
 		tsleep (p, PRIBIO, "aioprn", 0);	
 		splx(s);
 		goto restart3;
 	}
 	splx(s);
 
 restart4:
 	s = splbio();
 	for ( aiocbe = TAILQ_FIRST(&ki->kaio_bufdone);
 		aiocbe;
 		aiocbe = aiocbn) {
 		aiocbn = TAILQ_NEXT(aiocbe, plist);
 		if (aio_free_entry(aiocbe)) {
 			splx(s);
 			goto restart4;
 		}
 	}
 	splx(s);
 
 	for ( lj = TAILQ_FIRST(&ki->kaio_liojoblist);
 		  lj;
 		  lj = ljn) {
 			ljn = TAILQ_NEXT(lj, lioj_list);
 			if ((lj->lioj_buffer_count == 0) && (lj->lioj_queue_count == 0)) {
 				TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list);
 				zfree(aiolio_zone, lj);
 			} else {
 #if defined(DIAGNOSTIC)
 				printf("LIO job not cleaned up: B:%d, BF:%d, Q:%d, QF:%d\n",
 					lj->lioj_buffer_count, lj->lioj_buffer_finished_count,
 					lj->lioj_queue_count, lj->lioj_queue_finished_count);
 #endif
 			}
 	}
 
 	zfree(kaio_zone, ki);
 	p->p_aioinfo = NULL;
 }
 
 /*
  * Select a job to run (called by an AIO daemon)
  */
 static struct aiocblist *
 aio_selectjob(struct aioproclist *aiop)
 {
 
 	struct aiocblist *aiocbe;
 
 	aiocbe = TAILQ_FIRST(&aiop->jobtorun);
 	if (aiocbe) {
 		TAILQ_REMOVE(&aiop->jobtorun, aiocbe, list);
 		return aiocbe;
 	}
 
 	for (aiocbe = TAILQ_FIRST(&aio_jobs);
 		aiocbe;
 		aiocbe = TAILQ_NEXT(aiocbe, list)) {
 		struct kaioinfo *ki;
 		struct proc *userp;
 
 		userp = aiocbe->userproc;
 		ki = userp->p_aioinfo;
 
 		if (ki->kaio_active_count < ki->kaio_maxactive_count) {
 			TAILQ_REMOVE(&aio_jobs, aiocbe, list);
 			return aiocbe;
 		}
 	}
 
 	return NULL;
 }
 
 /*
  * The AIO processing activity.  This is the code that does the
  * I/O request for the non-physio version of the operations.  The
  * normal vn operations are used, and this code should work in
  * all instances for every type of file, including pipes, sockets,
  * fifos, and regular files.
  */
 void
 aio_process(struct aiocblist *aiocbe)
 {
 	struct filedesc *fdp;
 	struct proc *userp, *mycp;
 	struct aiocb *cb;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	unsigned int fd;
 	int cnt;
 	static nperline=0;
 	int error;
 	off_t offset;
 	int oublock_st, oublock_end;
 	int inblock_st, inblock_end;
 
 	userp = aiocbe->userproc;
 	cb = &aiocbe->uaiocb;
 
 	mycp = curproc;
 
 	fdp = mycp->p_fd;
 	fd = cb->aio_fildes;
 	fp = fdp->fd_ofiles[fd];
 
 	aiov.iov_base = (void *) cb->aio_buf;
 	aiov.iov_len = cb->aio_nbytes;
 
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = offset = cb->aio_offset;
 	auio.uio_resid = cb->aio_nbytes;
 	cnt = cb->aio_nbytes;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = mycp;
 
 	inblock_st = mycp->p_stats->p_ru.ru_inblock;
 	oublock_st = mycp->p_stats->p_ru.ru_oublock;
 	if (cb->aio_lio_opcode == LIO_READ) {
 		auio.uio_rw = UIO_READ;
 		error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred);
 	} else {
 		auio.uio_rw = UIO_WRITE;
 		error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred);
 	}
 	inblock_end = mycp->p_stats->p_ru.ru_inblock;
 	oublock_end = mycp->p_stats->p_ru.ru_oublock;
 
 	aiocbe->inputcharge = inblock_end - inblock_st;
 	aiocbe->outputcharge = oublock_end - oublock_st;
 
 	if (error) {
 		if (auio.uio_resid != cnt) {
 			if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
 				error = 0;
 			if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE))
 				psignal(userp, SIGPIPE);
 		}
 	}
 
 	cnt -= auio.uio_resid;
 	cb->_aiocb_private.error = error;
 	cb->_aiocb_private.status = cnt;
 	
 	return;
 
 }
 
 /*
  * The AIO daemon, most of the actual work is done in aio_process,
  * but the setup (and address space mgmt) is done in this routine.
  */
 static void
 aio_daemon(void *uproc)
 {
 	int s;
 	struct aioproclist *aiop;
 	struct vmspace *myvm, *aiovm;
 	struct proc *mycp;
 
 	/*
 	 * Local copies of curproc (cp) and vmspace (myvm)
 	 */
 	mycp = curproc;
 	myvm = mycp->p_vmspace;
 
 	/*
 	 * We manage to create only one VM space for all AIOD processes.
 	 * The VM space for the first AIOD created becomes the shared VM
 	 * space for all of them.  We add an additional reference count,
 	 * even for the first AIOD, so the address space does not go away,
 	 * and we continue to use that original VM space even if the first
 	 * AIOD exits.
 	 */
 	if ((aiovm = aiovmspace) == NULL) {
 		aiovmspace = myvm;
 		myvm->vm_refcnt++;
 		/*
 		 * Remove userland cruft from address space.
 		 */
 		if (myvm->vm_shm)
 			shmexit(mycp);
 		pmap_remove_pages(&myvm->vm_pmap, 0, USRSTACK);
 		vm_map_remove(&myvm->vm_map, 0, USRSTACK);
 		myvm->vm_tsize = 0;
 		myvm->vm_dsize = 0;
 		myvm->vm_ssize = 0;
 	} else {
 		aiovm->vm_refcnt++;
 		mycp->p_vmspace = aiovm;
 		pmap_activate(mycp);
 		vmspace_free(myvm);
 		myvm = aiovm;
 	}
 
 	if (mycp->p_textvp) {
 		vrele(mycp->p_textvp);
 		mycp->p_textvp = NULL;
 	}
 
 	/*
 	 * Allocate and ready the aio control info.  There is one
 	 * aiop structure per daemon.
 	 */
 	aiop = zalloc(aiop_zone);
 	aiop->aioproc = mycp;
 	aiop->aioprocflags |= AIOP_FREE;
 	TAILQ_INIT(&aiop->jobtorun);
 
 	/*
 	 * Place thread (lightweight process) onto the AIO free thread list
 	 */
 	if (TAILQ_EMPTY(&aio_freeproc))
 		wakeup(&aio_freeproc);
 	TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list);
 
 	/*
 	 * Make up a name for the daemon
 	 */
 	strcpy(mycp->p_comm, "aiod");
 
 	/*
 	 * Get rid of our current filedescriptors.  AIOD's don't need any
 	 * filedescriptors, except as temporarily inherited from the client.
 	 * Credentials are also cloned, and made equivalent to "root."
 	 */
 	fdfree(mycp);
 	mycp->p_fd = NULL;
 	mycp->p_ucred = crcopy(mycp->p_ucred);
 	mycp->p_ucred->cr_uid = 0;
 	mycp->p_ucred->cr_ngroups = 1;
 	mycp->p_ucred->cr_groups[0] = 1;
 
 	/*
 	 * The daemon resides in it's own pgrp.
 	 */
 	enterpgrp(mycp, mycp->p_pid, 1);
 
 	/*
 	 * Mark special process type
 	 */
 	mycp->p_flag |= P_SYSTEM|P_KTHREADP;
 
 	/*
 	 * Wakeup parent process.  (Parent sleeps to keep from blasting away
 	 * creating to many daemons.)
 	 */
 	wakeup(mycp);
 
 	while(1) {
 		struct proc *curcp;
 		struct	aiocblist *aiocbe;
 
 		/*
 		 * curcp is the current daemon process context.
 		 * userp is the current user process context.
 		 */
 		curcp = mycp;
 
 		/*
 		 * Take daemon off of free queue
 		 */
 		if (aiop->aioprocflags & AIOP_FREE) {
 			TAILQ_REMOVE(&aio_freeproc, aiop, list);
 			TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list);
 			aiop->aioprocflags &= ~AIOP_FREE;
 		}
 		aiop->aioprocflags &= ~AIOP_SCHED;
 
 		/*
 		 * Check for jobs
 		 */
 		while ( aiocbe = aio_selectjob(aiop)) {
 			struct proc *userp;
 			struct aiocb *cb;
 			struct kaioinfo *ki;
 			struct aio_liojob *lj;
 
 			cb = &aiocbe->uaiocb;
 			userp = aiocbe->userproc;
 
 			aiocbe->jobstate = JOBST_JOBRUNNING;
 
 			/*
 			 * Connect to process address space for user program
 			 */
 			if (userp != curcp) {
 				struct vmspace *tmpvm;
 				/*
 				 * Save the current address space that we are connected to.
 				 */
 				tmpvm = mycp->p_vmspace;
 				/*
 				 * Point to the new user address space, and refer to it.
 				 */
 				mycp->p_vmspace = userp->p_vmspace;
 				mycp->p_vmspace->vm_refcnt++;
 				/*
 				 * Activate the new mapping.
 				 */
 				pmap_activate(mycp);
 				/*
 				 * If the old address space wasn't the daemons own address
 				 * space, then we need to remove the daemon's reference from
 				 * the other process that it was acting on behalf of.
 				 */
 				if (tmpvm != myvm) {
 					vmspace_free(tmpvm);
 				}
 				/*
 				 * Disassociate from previous clients file descriptors, and
 				 * associate to the new clients descriptors.  Note that
 				 * the daemon doesn't need to worry about it's orginal
 				 * descriptors, because they were originally freed.
 				 */
 				if (mycp->p_fd)
 					fdfree(mycp);
 				mycp->p_fd = fdshare(userp);
 				curcp = userp;
 			}
 
 			ki = userp->p_aioinfo;
 			lj = aiocbe->lio;
 
 			/*
 			 * Account for currently active jobs
 			 */
 			ki->kaio_active_count++;
 
 			/*
 			 * Do the I/O function
 			 */
 			aiocbe->jobaioproc = aiop;
 			aio_process(aiocbe);
 
 			/*
 			 * decrement the active job count
 			 */
 			ki->kaio_active_count--;
 
 			/*
 			 * increment the completion count for wakeup/signal comparisons
 			 */
 			aiocbe->jobflags |= AIOCBLIST_DONE;
 			ki->kaio_queue_finished_count++;
 			if (lj) {
 				lj->lioj_queue_finished_count++;
 			}
 			if ((ki->kaio_flags & KAIO_WAKEUP) ||
 				(ki->kaio_flags & KAIO_RUNDOWN) &&
 				(ki->kaio_active_count == 0)) {
 				ki->kaio_flags &= ~KAIO_WAKEUP;
 				wakeup(userp);
 			}
 
 			s = splbio();
 			if (lj && (lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) ==
 				LIOJ_SIGNAL) {
 				if ((lj->lioj_queue_finished_count == lj->lioj_queue_count) &&
 					(lj->lioj_buffer_finished_count == lj->lioj_buffer_count)) {
 						psignal(userp, lj->lioj_signal.sigev_signo);
 						lj->lioj_flags |= LIOJ_SIGNAL_POSTED;
 				}
 			}
 			splx(s);
 
 			aiocbe->jobstate = JOBST_JOBFINISHED;
 
 			/*
 			 * If the I/O request should be automatically rundown, do the
 			 * needed cleanup.  Otherwise, place the queue entry for
 			 * the just finished I/O request into the done queue for the
 			 * associated client.
 			 */
 			if (aiocbe->jobflags & AIOCBLIST_ASYNCFREE) {
 				aiocbe->jobflags &= ~AIOCBLIST_ASYNCFREE;
 				TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 			} else {
 				TAILQ_REMOVE(&ki->kaio_jobqueue,
 					aiocbe, plist);
 				TAILQ_INSERT_TAIL(&ki->kaio_jobdone,
 					aiocbe, plist);
 			}
 
 			if (aiocbe->jobflags & AIOCBLIST_RUNDOWN) {
 				wakeup(aiocbe);
 				aiocbe->jobflags &= ~AIOCBLIST_RUNDOWN;
 			}
 
 			if (cb->aio_sigevent.sigev_notify == SIGEV_SIGNAL) {
 				psignal(userp, cb->aio_sigevent.sigev_signo);
 			}
 		}
 
 		/*
 		 * Disconnect from user address space
 		 */
 		if (curcp != mycp) {
 			struct vmspace *tmpvm;
 			/*
 			 * Get the user address space to disconnect from.
 			 */
 			tmpvm = mycp->p_vmspace;
 			/*
 			 * Get original address space for daemon.
 			 */
 			mycp->p_vmspace = myvm;
 			/*
 			 * Activate the daemon's address space.
 			 */
 			pmap_activate(mycp);
 #if defined(DIAGNOSTIC)
 			if (tmpvm == myvm)
 				printf("AIOD: vmspace problem -- %d\n", mycp->p_pid);
 #endif
 			/*
 			 * remove our vmspace reference.
 			 */
 			vmspace_free(tmpvm);
 			/*
 			 * disassociate from the user process's file descriptors.
 			 */
 			if (mycp->p_fd)
 				fdfree(mycp);
 			mycp->p_fd = NULL;
 			curcp = mycp;
 		}
 
 		/*
 		 * If we are the first to be put onto the free queue, wakeup
 		 * anyone waiting for a daemon.
 		 */
 		TAILQ_REMOVE(&aio_activeproc, aiop, list);
 		if (TAILQ_EMPTY(&aio_freeproc))
 			wakeup(&aio_freeproc);
 		TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list);
 		aiop->aioprocflags |= AIOP_FREE;
 
 		/*
 		 * If daemon is inactive for a long time, allow it to exit, thereby
 		 * freeing resources.
 		 */
 		if (((aiop->aioprocflags & AIOP_SCHED) == 0) &&
 			tsleep(mycp, PRIBIO, "aiordy", aiod_lifetime)) {
 			if ((TAILQ_FIRST(&aio_jobs) == NULL) &&
 				(TAILQ_FIRST(&aiop->jobtorun) == NULL)) {
 				if ((aiop->aioprocflags & AIOP_FREE) &&
 					(num_aio_procs > target_aio_procs)) {
 					TAILQ_REMOVE(&aio_freeproc, aiop, list);
 					zfree(aiop_zone, aiop);
 					num_aio_procs--;
 #if defined(DIAGNOSTIC)
 					if (mycp->p_vmspace->vm_refcnt <= 1)
 						printf("AIOD: bad vm refcnt for exiting daemon: %d\n",
 							mycp->p_vmspace->vm_refcnt);
 #endif
 					exit1(mycp, 0);
 				}
 			}
 		}
 	}
 }
 
 /*
  * Create a new AIO daemon.  This is mostly a kernel-thread fork routine.
  * The AIO daemon modifies it's environment itself.
  */
 static int
 aio_newproc()
 {
 	int error;
 	struct rfork_args rfa;
 	struct proc *p, *np;
 
 	rfa.flags = RFPROC | RFCFDG;
 
 	p = curproc;
 	if (error = rfork(p, &rfa))
 		return error;
 
 	np = pfind(p->p_retval[0]);
 	cpu_set_fork_handler(np, aio_daemon, p);
 
 	/*
 	 * Wait until daemon is started, but continue on just in case (to
 	 * handle error conditions.
 	 */
 	error = tsleep(np, PZERO, "aiosta", aiod_timeout);
 	num_aio_procs++;
 
 	return error;
 
 }
 
 /*
  * Try the high-performance physio method for eligible VCHR devices.  This
  * routine doesn't require the use of any additional threads, and have
  * overhead.
  */
 int
 aio_qphysio(p, aiocbe)
 	struct proc *p;
 	struct aiocblist *aiocbe;
 {
 	int error;
 	caddr_t sa;
 	struct aiocb *cb;
 	struct file *fp;
 	struct buf *bp;
 	int bflags;
 	struct vnode *vp;
 	struct kaioinfo *ki;
 	struct filedesc *fdp;
 	struct aio_liojob *lj;
 	int fd;
 	int majordev;
 	int s;
 	int cnt;
 	dev_t dev;
 	int rw;
 	d_strategy_t *fstrategy;
 	struct cdevsw *cdev;
 	struct bdevsw *bdev;
 
 	cb = &aiocbe->uaiocb;
 	fdp = p->p_fd;
 	fd = cb->aio_fildes;
 	fp = fdp->fd_ofiles[fd];
 
 	if (fp->f_type != DTYPE_VNODE) {
 		return -1;
 	}
 
 	vp = (struct vnode *)fp->f_data;
 	if (vp->v_type != VCHR || ((cb->aio_nbytes & (DEV_BSIZE - 1)) != 0)) {
 		return -1;
 	}
 
 	if ((cb->aio_nbytes > MAXPHYS) && (num_buf_aio >= max_buf_aio)) {
 		return -1;
 	}
 
 	if ((vp->v_specinfo == NULL) || (vp->v_flag & VISTTY)) {
 		return -1;
 	}
 
 	majordev = major(vp->v_rdev);
 	if (majordev == NODEV) {
 		return -1;
 	}
 
 	cdev = cdevsw[major(vp->v_rdev)];
 	if (cdev == NULL) {
 		return -1;
 	}
 	bdev = cdev->d_bdev;
 	if (bdev == NULL) {
 		return -1;
 	}
 
 	ki = p->p_aioinfo;
 	if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) {
 		return -1;
 	}
 
 	cnt = cb->aio_nbytes;
 	if (cnt > MAXPHYS) {
 		return -1;
 	}
 
 	dev = makedev(bdev->d_maj, minor(vp->v_rdev));
 
 	/*
 	 * Physical I/O is charged directly to the process, so we don't have
 	 * to fake it.
 	 */
 	aiocbe->inputcharge = 0;
 	aiocbe->outputcharge = 0;
 
 	ki->kaio_buffer_count++;
 
 	lj = aiocbe->lio;
 	if (lj) {
 		lj->lioj_buffer_count++;
 	}
 
 	/* create and build a buffer header for a transfer */
 	bp = (struct buf *)getpbuf();
 
 	/*
 	 * get a copy of the kva from the physical buffer
 	 */
 	bp->b_proc = p;
 	bp->b_dev = dev;
 	error = bp->b_error = 0;
 
 	if (cb->aio_lio_opcode == LIO_WRITE) {
 		rw = 0;
 		bflags = B_WRITE;
 	} else {
 		rw = 1;
 		bflags = B_READ;
 	}
 	
 	bp->b_bcount = cb->aio_nbytes;
 	bp->b_bufsize = cb->aio_nbytes;
 	bp->b_flags = B_BUSY | B_PHYS | B_CALL | bflags;
 	bp->b_iodone = aio_physwakeup;
 	bp->b_saveaddr = bp->b_data;
 	bp->b_data = (void *) cb->aio_buf;
 	bp->b_blkno = btodb(cb->aio_offset);
 
 	if (rw && !useracc(bp->b_data, bp->b_bufsize, B_WRITE)) {
 		error = EFAULT;
 		goto doerror;
 	}
 	if (!rw && !useracc(bp->b_data, bp->b_bufsize, B_READ)) {
 		error = EFAULT;
 		goto doerror;
 	}
 
 	/* bring buffer into kernel space */
 	vmapbuf(bp);
 
 	s = splbio();
 	aiocbe->bp = bp;
 	bp->b_spc = (void *)aiocbe;
 	TAILQ_INSERT_TAIL(&aio_bufjobs, aiocbe, list);
 	TAILQ_INSERT_TAIL(&ki->kaio_bufqueue, aiocbe, plist);
 	aiocbe->jobstate = JOBST_JOBQBUF;
 	cb->_aiocb_private.status = cb->aio_nbytes;
 	num_buf_aio++;
 	fstrategy = bdev->d_strategy;
 	bp->b_error = 0;
 
 	splx(s);
 	/* perform transfer */
 	(*fstrategy)(bp);
 
 	s = splbio();
 	/*
 	 * If we had an error invoking the request, or an error in processing
 	 * the request before we have returned, we process it as an error
 	 * in transfer.  Note that such an I/O error is not indicated immediately,
 	 * but is returned using the aio_error mechanism.  In this case, aio_suspend
 	 * will return immediately.
 	 */
 	if (bp->b_error || (bp->b_flags & B_ERROR)) {
 		struct aiocb *job = aiocbe->uuaiocb;
 
 		aiocbe->uaiocb._aiocb_private.status = 0;
 		suword(&job->_aiocb_private.status, 0);
 		aiocbe->uaiocb._aiocb_private.error = bp->b_error;
 		suword(&job->_aiocb_private.error, bp->b_error);
 
 		ki->kaio_buffer_finished_count++;
 
 		if (aiocbe->jobstate != JOBST_JOBBFINISHED) {
 			aiocbe->jobstate = JOBST_JOBBFINISHED;
 			aiocbe->jobflags |= AIOCBLIST_DONE;
 			TAILQ_REMOVE(&aio_bufjobs, aiocbe, list);
 			TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist);
 			TAILQ_INSERT_TAIL(&ki->kaio_bufdone, aiocbe, plist);
 		}
 	}
 	splx(s);
 	return 0;
 
 doerror:
 	ki->kaio_buffer_count--;
 	if (lj) {
 		lj->lioj_buffer_count--;
 	}
 	aiocbe->bp = NULL;
 	relpbuf(bp);
 	return error;
 }
 
 /*
  * This waits/tests physio completion.
  */
 int
 aio_fphysio(p, iocb, flgwait)
 	struct proc *p;
 	struct aiocblist *iocb;
 	int flgwait;
 {
 	int s;
 	struct buf *bp;
 	int error;
 
 	bp = iocb->bp;
 
 	s = splbio();
 	if (flgwait == 0) {
 		if ((bp->b_flags & B_DONE) == 0) {
 			splx(s);
 			return EINPROGRESS;
 		}
 	}
 
 	while ((bp->b_flags & B_DONE) == 0) {
 		if (tsleep((caddr_t)bp, PRIBIO, "physstr", aiod_timeout)) {
 			if ((bp->b_flags & B_DONE) == 0) {
 				splx(s);
 				return EINPROGRESS;
 			} else {
 				break;
 			}
 		}
 	}
 
 	/* release mapping into kernel space */
 	vunmapbuf(bp);
 	iocb->bp = 0;
 
 	error = 0;
 	/*
 	 * check for an error
 	 */
 	if (bp->b_flags & B_ERROR) {
 		error = bp->b_error;
 	}
 
 	relpbuf(bp);
 	return (error);
 }
 
 /*
  * Queue a new AIO request.  Choosing either the threaded or direct physio
  * VCHR technique is done in this code.
  */
 static int
 _aio_aqueue(struct proc *p, struct aiocb *job, struct aio_liojob *lj, int type)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	unsigned int fd;
 
 	int error;
 	int opcode;
 	struct aiocblist *aiocbe;
 	struct aioproclist *aiop;
 	struct kaioinfo *ki;
 
 	if (aiocbe = TAILQ_FIRST(&aio_freejobs)) {
 		TAILQ_REMOVE(&aio_freejobs, aiocbe, list);
 	} else {
 		aiocbe = zalloc (aiocb_zone);
 	}
 
 	aiocbe->inputcharge = 0;
 	aiocbe->outputcharge = 0;
 
 	suword(&job->_aiocb_private.status, -1);
 	suword(&job->_aiocb_private.error, 0);
 	suword(&job->_aiocb_private.kernelinfo, -1);
 
 	error = copyin((caddr_t)job,
 		(caddr_t) &aiocbe->uaiocb, sizeof aiocbe->uaiocb);
 	if (error) {
 		suword(&job->_aiocb_private.error, error);
 
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		return error;
 	}
 
 	/*
 	 * Save userspace address of the job info
 	 */
 	aiocbe->uuaiocb = job;
 
 	/*
 	 * Get the opcode
 	 */
 	if (type != LIO_NOP) {
 		aiocbe->uaiocb.aio_lio_opcode = type;
 	}
 	opcode = aiocbe->uaiocb.aio_lio_opcode;
 
 	/*
 	 * Get the fd info for process
 	 */
 	fdp = p->p_fd;
 
 	/*
 	 * Range check file descriptor
 	 */
 	fd = aiocbe->uaiocb.aio_fildes;
 	if (fd >= fdp->fd_nfiles) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.error, EBADF);
 		}
 		return EBADF;
 	}
 
 	fp = fdp->fd_ofiles[fd];
 	if ((fp == NULL) ||
 		((opcode == LIO_WRITE) && ((fp->f_flag & FWRITE) == 0))) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.error, EBADF);
 		}
 		return EBADF;
 	}
 
 	if (aiocbe->uaiocb.aio_offset == -1LL) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.error, EINVAL);
 		}
 		return EINVAL;
 	}
 
 	error = suword(&job->_aiocb_private.kernelinfo, jobrefid);
 	if (error) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.error, EINVAL);
 		}
 		return error;
 	}
 
 	aiocbe->uaiocb._aiocb_private.kernelinfo = (void *)jobrefid;
 	jobrefid++;
 	if (jobrefid > INT_MAX)
 		jobrefid = 1;
 	
 	if (opcode == LIO_NOP) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.error, 0);
 			suword(&job->_aiocb_private.status, 0);
 			suword(&job->_aiocb_private.kernelinfo, 0);
 		}
 		return 0;
 	}
 
 	if ((opcode != LIO_READ) && (opcode != LIO_WRITE)) {
 		TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list);
 		if (type == 0) {
 			suword(&job->_aiocb_private.status, 0);
 			suword(&job->_aiocb_private.error, EINVAL);
 		}
 		return EINVAL;
 	}
 
 	suword(&job->_aiocb_private.error, EINPROGRESS);
 	aiocbe->uaiocb._aiocb_private.error = EINPROGRESS;
 	aiocbe->userproc = p;
 	aiocbe->jobflags = 0;
 	aiocbe->lio = lj;
 	ki = p->p_aioinfo;
 
 	if ((error = aio_qphysio(p, aiocbe)) == 0) {
 		return 0;
 	} else if (error > 0) {
 		suword(&job->_aiocb_private.status, 0);
 		aiocbe->uaiocb._aiocb_private.error = error;
 		suword(&job->_aiocb_private.error, error);
 		return error;
 	}
 
 	/*
 	 * No buffer for daemon I/O
 	 */
 	aiocbe->bp = NULL;
 
 	ki->kaio_queue_count++;
 	if (lj) {
 		lj->lioj_queue_count++;
 	}
 	TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist);
 	TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list);
 	aiocbe->jobstate = JOBST_JOBQGLOBAL;
 
 	num_queue_count++;
 	error = 0;
 
 	/*
 	 * If we don't have a free AIO process, and we are below our
 	 * quota, then start one.  Otherwise, depend on the subsequent
 	 * I/O completions to pick-up this job.  If we don't sucessfully
 	 * create the new process (thread) due to resource issues, we
 	 * return an error for now (EAGAIN), which is likely not the
 	 * correct thing to do.
 	 */
 retryproc:
 	if (aiop = TAILQ_FIRST(&aio_freeproc)) {
 		TAILQ_REMOVE(&aio_freeproc, aiop, list);
 		TAILQ_INSERT_TAIL(&aio_activeproc, aiop, list);
 		aiop->aioprocflags &= ~AIOP_FREE;
 		wakeup(aiop->aioproc);
 	} else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) &&
 			((ki->kaio_active_count + num_aio_resv_start) <
 				ki->kaio_maxactive_count)) {
 		num_aio_resv_start++;
 		if ((error = aio_newproc()) == 0) {
 			num_aio_resv_start--;
 			p->p_retval[0] = 0;
 			goto retryproc;
 		}
 		num_aio_resv_start--;
 	}
 	return error;
 }
 
 /*
  * This routine queues an AIO request, checking for quotas.
  */
 static int
 aio_aqueue(struct proc *p, struct aiocb *job, int type)
 {
 	struct kaioinfo *ki;
 
 	if (p->p_aioinfo == NULL) {
 		aio_init_aioinfo(p);
 	}
 
 	if (num_queue_count >= max_queue_count)
 		return EAGAIN;
 
 	ki = p->p_aioinfo;
 	if (ki->kaio_queue_count >= ki->kaio_qallowed_count)
 		return EAGAIN;
 
 	return _aio_aqueue(p, job, NULL, type);
 }
 
 /*
  * Support the aio_return system call, as a side-effect, kernel
  * resources are released.
  */
 int
 aio_return(struct proc *p, struct aio_return_args *uap)
 {
 	int s;
 	int jobref, status;
 	struct aiocblist *cb, *ncb;
 	struct aiocb *ujob;
 	struct kaioinfo *ki;
 	struct proc *userp;
 
 	ki = p->p_aioinfo;
 	if (ki == NULL) {
 		return EINVAL;
 	}
 
 	ujob = uap->aiocbp;
 
 	jobref = fuword(&ujob->_aiocb_private.kernelinfo);
 	if (jobref == -1 || jobref == 0)
 		return EINVAL;
 
 	for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
 		cb;
 		cb = TAILQ_NEXT(cb, plist)) {
 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			if (ujob == cb->uuaiocb) {
 				p->p_retval[0] = cb->uaiocb._aiocb_private.status;
 			} else {
 				p->p_retval[0] = EFAULT;
 			}
 			if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) {
 				curproc->p_stats->p_ru.ru_oublock += cb->outputcharge;
 				cb->outputcharge = 0;
 			} else if (cb->uaiocb.aio_lio_opcode == LIO_READ) {
 				curproc->p_stats->p_ru.ru_inblock += cb->inputcharge;
 				cb->inputcharge = 0;
 			}
 			aio_free_entry(cb);
 			return 0;
 		}
 	}
 
 	s = splbio();
 	for (cb = TAILQ_FIRST(&ki->kaio_bufdone);
 		cb;
 		cb = ncb) {
 		ncb = TAILQ_NEXT(cb, plist);
 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			splx(s);
 			if (ujob == cb->uuaiocb) {
 				p->p_retval[0] = cb->uaiocb._aiocb_private.status;
 			} else {
 				p->p_retval[0] = EFAULT;
 			}
 			aio_free_entry(cb);
 			return 0;
 		}
 	}
 	splx(s);
 
 	return (EINVAL);
 }
 
 /*
  * Allow a process to wakeup when any of the I/O requests are
  * completed.
  */
 int
 aio_suspend(struct proc *p, struct aio_suspend_args *uap)
 {
 	struct timeval atv;
 	struct timespec ts;
 	struct aiocb *const *cbptr, *cbp;
 	struct kaioinfo *ki;
 	struct aiocblist *cb;
 	int i;
 	int njoblist;
 	int error, s, timo;
 	int *ijoblist;
 	struct aiocb **ujoblist;
 	
 	if (uap->nent >= AIO_LISTIO_MAX)
 		return EINVAL;
 
 	timo = 0;
 	if (uap->timeout) {
 		/*
 		 * Get timespec struct
 		 */
 		if (error = copyin((caddr_t) uap->timeout, (caddr_t) &ts, sizeof ts)) {
 			return error;
 		}
 
 		if (ts.tv_nsec < 0 || ts.tv_nsec >= 1000000000)
 			return (EINVAL);
 
 		TIMESPEC_TO_TIMEVAL(&atv, &ts)
 		if (itimerfix(&atv))
 			return (EINVAL);
 		s = splclock();
 		timevaladd(&atv, &time);
 		timo = hzto(&atv);
 		splx(s);
 	}
 
 	ki = p->p_aioinfo;
 	if (ki == NULL)
 		return EAGAIN;
 
 	njoblist = 0;
 	ijoblist = zalloc(aiol_zone);
 	ujoblist = zalloc(aiol_zone);
 	cbptr = uap->aiocbp;
 
 	for(i = 0; i < uap->nent; i++) {
 		cbp = (struct aiocb *) fuword((caddr_t) &cbptr[i]);
 		if (cbp == 0)
 			continue;
 		ujoblist[njoblist] = cbp;
 		ijoblist[njoblist] = fuword(&cbp->_aiocb_private.kernelinfo);
 		njoblist++;
 	}
 	if (njoblist == 0) {
 		zfree(aiol_zone, ijoblist);
 		zfree(aiol_zone, ujoblist);
 		return 0;
 	}
 
 	error = 0;
 	while (1) {
 		for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
 			cb; cb = TAILQ_NEXT(cb, plist)) {
 			for(i = 0; i < njoblist; i++) {
 				if (((int) cb->uaiocb._aiocb_private.kernelinfo) ==
 					ijoblist[i]) {
 					if (ujoblist[i] != cb->uuaiocb)
 						error = EINVAL;
 					zfree(aiol_zone, ijoblist);
 					zfree(aiol_zone, ujoblist);
 					return error;
 				}
 			}
 		}
 
 		s = splbio();
 		for (cb = TAILQ_FIRST(&ki->kaio_bufdone);
 			cb; cb = TAILQ_NEXT(cb, plist)) {
 			for(i = 0; i < njoblist; i++) {
 				if (((int) cb->uaiocb._aiocb_private.kernelinfo) ==
 					ijoblist[i]) {
 					splx(s);
 					if (ujoblist[i] != cb->uuaiocb)
 						error = EINVAL;
 					zfree(aiol_zone, ijoblist);
 					zfree(aiol_zone, ujoblist);
 					return error;
 				}
 			}
 		}
 
 		ki->kaio_flags |= KAIO_WAKEUP;
 		error = tsleep(p, PRIBIO|PCATCH, "aiospn", timo);
 		splx(s);
 
 		if (error == EINTR) {
 			zfree(aiol_zone, ijoblist);
 			zfree(aiol_zone, ujoblist);
 			return EINTR;
 		} else if (error == EWOULDBLOCK) {
 			zfree(aiol_zone, ijoblist);
 			zfree(aiol_zone, ujoblist);
 			return EAGAIN;
 		}
 	}
 
 /* NOTREACHED */
 	return EINVAL;
 }
 
 /*
  * aio_cancel at the kernel level is a NOOP right now.  It
  * might be possible to support it partially in user mode, or
  * in kernel mode later on.
  */
 int
 aio_cancel(struct proc *p, struct aio_cancel_args *uap)
 {
       return ENOSYS;
 }
 
 /*
  * aio_error is implemented in the kernel level for compatibility
  * purposes only.  For a user mode async implementation, it would be
  * best to do it in a userland subroutine.
  */
 int
 aio_error(struct proc *p, struct aio_error_args *uap)
 {
 	int s;
 	struct aiocblist *cb;
 	struct kaioinfo *ki;
 	int jobref;
 	int error, status;
 
 	ki = p->p_aioinfo;
 	if (ki == NULL)
 		return EINVAL;
 
 	jobref = fuword(&uap->aiocbp->_aiocb_private.kernelinfo);
 	if ((jobref == -1) || (jobref == 0))
 		return EINVAL;
 
 	for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
 		cb;
 		cb = TAILQ_NEXT(cb, plist)) {
 
 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			p->p_retval[0] = cb->uaiocb._aiocb_private.error;
 			return 0;
 		}
 	}
 
 	for (cb = TAILQ_FIRST(&ki->kaio_jobqueue);
 		cb;
 		cb = TAILQ_NEXT(cb, plist)) {
 
 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			p->p_retval[0] = EINPROGRESS;
 			return 0;
 		}
 	}
 
 	s = splbio();
 	for (cb = TAILQ_FIRST(&ki->kaio_bufdone);
 		cb;
 		cb = TAILQ_NEXT(cb, plist)) {
 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			p->p_retval[0] = cb->uaiocb._aiocb_private.error;
 			splx(s);
 			return 0;
 		}
 	}
 
 	for (cb = TAILQ_FIRST(&ki->kaio_bufqueue);
 		cb;
 		cb = TAILQ_NEXT(cb, plist)) {
 		if (((int) cb->uaiocb._aiocb_private.kernelinfo) == jobref) {
 			p->p_retval[0] = EINPROGRESS;
 			splx(s);
 			return 0;
 		}
 	}
 	splx(s);
 
 
 	/*
 	 * Hack for lio
 	 */
 /*
 	status = fuword(&uap->aiocbp->_aiocb_private.status);
 	if (status == -1) {
 		return fuword(&uap->aiocbp->_aiocb_private.error);
 	}
 */
 	return EINVAL;
 }
 
 int
 aio_read(struct proc *p, struct aio_read_args *uap)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	unsigned int fd;
 	int cnt;
 	struct aiocb iocb;
 	int error, pmodes;
 
 	pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes);
 	if ((pmodes & AIO_PMODE_SYNC) == 0) {
 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ);
 	}
 
 	/*
 	 * Get control block
 	 */
 	if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb))
 		return error;
 
 	/*
 	 * Get the fd info for process
 	 */
 	fdp = p->p_fd;
 
 	/*
 	 * Range check file descriptor
 	 */
 	fd = iocb.aio_fildes;
 	if (fd >= fdp->fd_nfiles)
 		return EBADF;
 	fp = fdp->fd_ofiles[fd];
 	if ((fp == NULL) || ((fp->f_flag & FREAD) == 0))
 		return EBADF;
 	if (iocb.aio_offset == -1LL)
 		return EINVAL;
 
 	auio.uio_resid = iocb.aio_nbytes;
 	if (auio.uio_resid < 0)
 		return (EINVAL);
 
 	/*
 	 * Process sync simply -- queue async request.
 	 */
 	if ((iocb._aiocb_private.privatemodes & AIO_PMODE_SYNC) == 0) {
 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_READ);
 	}
 
 	aiov.iov_base = (void *) iocb.aio_buf;
 	aiov.iov_len = iocb.aio_nbytes;
 
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = iocb.aio_offset;
 	auio.uio_rw = UIO_READ;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 
 	cnt = iocb.aio_nbytes;
 	error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred);
 	if (error &&
 		(auio.uio_resid != cnt) &&
 		(error == ERESTART || error == EINTR || error == EWOULDBLOCK))
 			error = 0;
 	cnt -= auio.uio_resid;
 	p->p_retval[0] = cnt;
 	return error;
 }
 
 int
 aio_write(struct proc *p, struct aio_write_args *uap)
 {
 	struct filedesc *fdp;
 	struct file *fp;
 	struct uio auio;
 	struct iovec aiov;
 	unsigned int fd;
 	int cnt;
 	struct aiocb iocb;
 	int error;
 	int pmodes;
 
 	/*
 	 * Process sync simply -- queue async request.
 	 */
 	pmodes = fuword(&uap->aiocbp->_aiocb_private.privatemodes);
 	if ((pmodes & AIO_PMODE_SYNC) == 0) {
 		return aio_aqueue(p, (struct aiocb *) uap->aiocbp, LIO_WRITE);
 	}
 
 	if (error = copyin((caddr_t) uap->aiocbp, (caddr_t) &iocb, sizeof iocb))
 		return error;
 
 	/*
 	 * Get the fd info for process
 	 */
 	fdp = p->p_fd;
 
 	/*
 	 * Range check file descriptor
 	 */
 	fd = iocb.aio_fildes;
 	if (fd >= fdp->fd_nfiles)
 		return EBADF;
 	fp = fdp->fd_ofiles[fd];
 	if ((fp == NULL) || ((fp->f_flag & FWRITE) == 0))
 		return EBADF;
 	if (iocb.aio_offset == -1LL)
 		return EINVAL;
 
 	aiov.iov_base = (void *) iocb.aio_buf;
 	aiov.iov_len = iocb.aio_nbytes;
 	auio.uio_iov = &aiov;
 	auio.uio_iovcnt = 1;
 	auio.uio_offset = iocb.aio_offset;
 
 	auio.uio_resid = iocb.aio_nbytes;
 	if (auio.uio_resid < 0)
 		return (EINVAL);
 
 	auio.uio_rw = UIO_WRITE;
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_procp = p;
 
 	cnt = iocb.aio_nbytes;
 	error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred);
 	if (error) {
 		if (auio.uio_resid != cnt) {
 			if (error == ERESTART || error == EINTR || error == EWOULDBLOCK)
 				error = 0;
 			if (error == EPIPE)
 				psignal(p, SIGPIPE);
 		}
 	}
 	cnt -= auio.uio_resid;
 	p->p_retval[0] = cnt;
 	return error;
 }
 
 int
 lio_listio(struct proc *p, struct lio_listio_args *uap)
 {
 	int nent, nentqueued;
 	struct aiocb *iocb, * const *cbptr;
 	struct aiocblist *cb;
 	struct kaioinfo *ki;
 	struct aio_liojob *lj;
 	int error, runningcode;
 	int nerror;
 	int i;
 	int s;
 
 	if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) {
 		return EINVAL;
 	}
 
 	nent = uap->nent;
 	if (nent > AIO_LISTIO_MAX) {
 		return EINVAL;
 	}
 
 	if (p->p_aioinfo == NULL) {
 		aio_init_aioinfo(p);
 	}
 
 	if ((nent + num_queue_count) > max_queue_count) {
 		return EAGAIN;
 	}
 
 	ki = p->p_aioinfo;
 	if ((nent + ki->kaio_queue_count) > ki->kaio_qallowed_count) {
 		return EAGAIN;
 	}
 
 	lj = zalloc(aiolio_zone);
 	if (!lj) {
 		return EAGAIN;
 	}
 
 	lj->lioj_flags = 0;
 	lj->lioj_buffer_count = 0;
 	lj->lioj_buffer_finished_count = 0;
 	lj->lioj_queue_count = 0;
 	lj->lioj_queue_finished_count = 0;
 	lj->lioj_ki = ki;
 	TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list);
 
 	/*
 	 * Setup signal
 	 */
 	if (uap->sig && (uap->mode == LIO_NOWAIT)) {
 		error = copyin(uap->sig, &lj->lioj_signal, sizeof lj->lioj_signal);
 		if (error)
 			return error;
 		lj->lioj_flags |= LIOJ_SIGNAL;
 		lj->lioj_flags &= ~LIOJ_SIGNAL_POSTED;
 	} else {
 		lj->lioj_flags &= ~LIOJ_SIGNAL;
 	}
 
 /*
  * get pointers to the list of I/O requests
  */
 
 	nerror = 0;
 	nentqueued = 0;
 	cbptr = uap->acb_list;
 	for(i = 0; i < uap->nent; i++) {
 		iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]);
 		if (((int) iocb != -1) && ((int) iocb != NULL)) {
 			error = _aio_aqueue(p, iocb, lj, 0);
 			if (error == 0) {
 				nentqueued++;
 			} else {
 				nerror++;
 			}
 		}
 	}
 
 	/*
 	 * If we haven't queued any, then just return error
 	 */
 	if (nentqueued == 0) {
 		return 0;
 	}
 
 	/*
 	 * Calculate the appropriate error return
 	 */
 	runningcode = 0;
 	if (nerror)
 		runningcode = EIO;
 
 	if (uap->mode == LIO_WAIT) {
 		while (1) {
 			int found;
 			found = 0;
 			for(i = 0; i < uap->nent; i++) {
 				int jobref, command;
 
 				/*
 				 * Fetch address of the control buf pointer in user space
 				 */
 				iocb = (struct aiocb *) fuword((caddr_t) &cbptr[i]);
 				if (((int) iocb == -1) || ((int) iocb == 0))
 					continue;
 
 				/*
 				 * Fetch the associated command from user space
 				 */
 				command = fuword(&iocb->aio_lio_opcode);
 				if (command == LIO_NOP) {
 					found++;
 					continue;
 				}
 
 				jobref = fuword(&iocb->_aiocb_private.kernelinfo);
 
 				for (cb = TAILQ_FIRST(&ki->kaio_jobdone);
 					cb;
 					cb = TAILQ_NEXT(cb, plist)) {
 					if (((int) cb->uaiocb._aiocb_private.kernelinfo) ==
 						jobref) {
 						if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) {
 							curproc->p_stats->p_ru.ru_oublock +=
 								cb->outputcharge;
 							cb->outputcharge = 0;
 						} else if (cb->uaiocb.aio_lio_opcode == LIO_READ) {
 							curproc->p_stats->p_ru.ru_inblock +=
 								cb->inputcharge;
 							cb->inputcharge = 0;
 						}
 						found++;
 						break;
 					}
 				}
 
 				s = splbio();
 				for (cb = TAILQ_FIRST(&ki->kaio_bufdone);
 					cb;
 					cb = TAILQ_NEXT(cb, plist)) {
 					if (((int) cb->uaiocb._aiocb_private.kernelinfo) ==
 						jobref) {
 						found++;
 						break;
 					}
 				}
 				splx(s);
 				
 			}
 
 			/*
 			 * If all I/Os have been disposed of, then we can return
 			 */
 			if (found == nentqueued) {
 				return runningcode;
 			}
 			
 			ki->kaio_flags |= KAIO_WAKEUP;
 			error = tsleep(p, PRIBIO|PCATCH, "aiospn", 0);
 
 			if (error == EINTR) {
 				return EINTR;
 			} else if (error == EWOULDBLOCK) {
 				return EAGAIN;
 			}
 
 		}
 	}
 
 	return runningcode;
 }
 
 /*
  * This is a wierd hack so that we can post a signal.  It is safe
  * to do so from a timeout routine, but *not* from an interrupt routine.
  */
 static void
 process_signal(void *ljarg)
 {
 	struct aio_liojob *lj = ljarg;
 	if (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL) {
 		if (lj->lioj_queue_count == lj->lioj_queue_finished_count) {
 			psignal(lj->lioj_ki->kaio_p, lj->lioj_signal.sigev_signo);
 			lj->lioj_flags |= LIOJ_SIGNAL_POSTED;
 		}
 	}
 }
 
 /*
  * Interrupt handler for physio, performs the necessary process wakeups,
  * and signals.
  */
 static void
 aio_physwakeup(bp)
 	struct buf *bp;
 {
 	struct aiocblist *aiocbe;
 	struct proc *p;
 	struct kaioinfo *ki;
 	struct aio_liojob *lj;
 	int s;
 	s = splbio();
 
 	wakeup((caddr_t) bp);
 	bp->b_flags &= ~B_CALL;
 	bp->b_flags |= B_DONE;
 
 	aiocbe = (struct aiocblist *)bp->b_spc;
 	if (aiocbe) {
 		p = bp->b_proc;
 
 		aiocbe->jobstate = JOBST_JOBBFINISHED;
 		aiocbe->uaiocb._aiocb_private.status -= bp->b_resid;
 		aiocbe->uaiocb._aiocb_private.error = 0;
 		aiocbe->jobflags |= AIOCBLIST_DONE;
 
 		if (bp->b_flags & B_ERROR) {
 			aiocbe->uaiocb._aiocb_private.error = bp->b_error;
 		}
 
 		lj = aiocbe->lio;
 		if (lj) {
 			lj->lioj_buffer_finished_count++;
 			/*
 			 * wakeup/signal if all of the interrupt jobs are done
 			 */
 			if (lj->lioj_buffer_finished_count == lj->lioj_buffer_count) {
 				/*
 				 * post a signal if it is called for
 				 */
 				if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) ==
 					LIOJ_SIGNAL) {
 					lj->lioj_flags |= LIOJ_SIGNAL_POSTED;
 					timeout(process_signal, lj, 0);
 				}
 			}
 		}
 
 		ki = p->p_aioinfo;
 		if (ki) {
 			ki->kaio_buffer_finished_count++;
 			TAILQ_REMOVE(&aio_bufjobs, aiocbe, list);
 			TAILQ_REMOVE(&ki->kaio_bufqueue, aiocbe, plist);
 			TAILQ_INSERT_TAIL(&ki->kaio_bufdone, aiocbe, plist);
 			/*
 			 * and do the wakeup
 			 */
 			if (ki->kaio_flags & (KAIO_RUNDOWN|KAIO_WAKEUP)) {
 				ki->kaio_flags &= ~KAIO_WAKEUP;
 				wakeup(p);
 			}
 		}
 	}
 	splx(s);
 }
diff --git a/sys/miscfs/kernfs/kernfs_vnops.c b/sys/miscfs/kernfs/kernfs_vnops.c
index 41d543019bac..9d90c31db605 100644
--- a/sys/miscfs/kernfs/kernfs_vnops.c
+++ b/sys/miscfs/kernfs/kernfs_vnops.c
@@ -1,665 +1,667 @@
 /*
  * Copyright (c) 1992, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software donated to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)kernfs_vnops.c	8.15 (Berkeley) 5/21/95
- * $Id: kernfs_vnops.c,v 1.28 1997/10/27 13:33:40 bde Exp $
+ * $Id: kernfs_vnops.c,v 1.29 1998/03/26 20:52:21 phk Exp $
  */
 
 /*
  * Kernel parameter filesystem (/kern)
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/vmmeter.h>
 #include <sys/time.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
 #include <sys/stat.h>
 #include <sys/mount.h>
 #include <sys/namei.h>
 #include <sys/dirent.h>
+#include <sys/resource.h>
+
 #include <miscfs/kernfs/kernfs.h>
 
 #define KSTRING	256		/* Largest I/O available via this filesystem */
 #define	UIO_MX 32
 
 #define	READ_MODE	(S_IRUSR|S_IRGRP|S_IROTH)
 #define	WRITE_MODE	(S_IWUSR|S_IRUSR|S_IRGRP|S_IROTH)
 #define DIR_MODE	(S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH)
 
 static struct kern_target {
 	u_char kt_type;
 	u_char kt_namlen;
 	char *kt_name;
 	void *kt_data;
 #define	KTT_NULL	 1
 #define	KTT_TIME	 5
 #define KTT_INT		17
 #define	KTT_STRING	31
 #define KTT_HOSTNAME	47
 #define KTT_BOOTFILE	49
 #define KTT_AVENRUN	53
 #define KTT_DEVICE	71
 	u_char kt_tag;
 	u_char kt_vtype;
 	mode_t kt_mode;
 } kern_targets[] = {
 /* NOTE: The name must be less than UIO_MX-16 chars in length */
 #define N(s) sizeof(s)-1, s
      /*        name            data          tag           type  ro/rw  */
      { DT_DIR, N("."),         0,            KTT_NULL,     VDIR, DIR_MODE   },
      { DT_DIR, N(".."),        0,            KTT_NULL,     VDIR, DIR_MODE   },
      { DT_REG, N("boottime"),  &boottime.tv_sec, KTT_INT,  VREG, READ_MODE  },
      { DT_REG, N("copyright"), copyright,    KTT_STRING,   VREG, READ_MODE  },
      { DT_REG, N("hostname"),  0,            KTT_HOSTNAME, VREG, WRITE_MODE },
      { DT_REG, N("bootfile"),  0,	     KTT_BOOTFILE, VREG, READ_MODE  },
      { DT_REG, N("hz"),        &hz,          KTT_INT,      VREG, READ_MODE  },
      { DT_REG, N("loadavg"),   0,            KTT_AVENRUN,  VREG, READ_MODE  },
      { DT_REG, N("pagesize"),  &cnt.v_page_size, KTT_INT,  VREG, READ_MODE  },
      { DT_REG, N("physmem"),   &physmem,     KTT_INT,      VREG, READ_MODE  },
 #if 0
      { DT_DIR, N("root"),      0,            KTT_NULL,     VDIR, DIR_MODE   },
      { DT_BLK, N("rootdev"),   &rootdev,     KTT_DEVICE,   VBLK, READ_MODE  },
      { DT_CHR, N("rrootdev"),  &rrootdev,    KTT_DEVICE,   VCHR, READ_MODE  },
 #endif
      { DT_REG, N("time"),      0,            KTT_TIME,     VREG, READ_MODE  },
      { DT_REG, N("version"),   version,      KTT_STRING,   VREG, READ_MODE  },
 #undef N
 };
 static int nkern_targets = sizeof(kern_targets) / sizeof(kern_targets[0]);
 
 static int	kernfs_access __P((struct vop_access_args *ap));
 static int	kernfs_badop __P((void));
 static int	kernfs_enotsupp __P((void));
 static int	kernfs_getattr __P((struct vop_getattr_args *ap));
 static int	kernfs_inactive __P((struct vop_inactive_args *ap));
 static int	kernfs_lookup __P((struct vop_lookup_args *ap));
 static int	kernfs_pathconf __P((struct vop_pathconf_args *ap));
 static int	kernfs_print __P((struct vop_print_args *ap));
 static int	kernfs_read __P((struct vop_read_args *ap));
 static int	kernfs_readdir __P((struct vop_readdir_args *ap));
 static int	kernfs_reclaim __P((struct vop_reclaim_args *ap));
 static int	kernfs_setattr __P((struct vop_setattr_args *ap));
 static int	kernfs_write __P((struct vop_write_args *ap));
 static int	kernfs_xread __P((struct kern_target *kt, char *buf, int len,
 				  int *lenp));
 static int	kernfs_xwrite __P((struct kern_target *kt, char *buf, int len));
 
 static int
 kernfs_xread(kt, buf, len, lenp)
 	struct kern_target *kt;
 	char *buf;
 	int len;
 	int *lenp;
 {
 
 	switch (kt->kt_tag) {
 	case KTT_TIME: {
 		struct timeval tv;
 		microtime(&tv);
 		sprintf(buf, "%ld %ld\n", tv.tv_sec, tv.tv_usec);
 		break;
 	}
 
 	case KTT_INT: {
 		int *ip = kt->kt_data;
 		sprintf(buf, "%d\n", *ip);
 		break;
 	}
 
 	case KTT_STRING: {
 		char *cp = kt->kt_data;
 		int xlen = strlen(cp) + 1;
 
 		if (xlen >= len)
 			return (EINVAL);
 
 		bcopy(cp, buf, xlen);
 		break;
 	}
 
 	case KTT_HOSTNAME: {
 		char *cp = hostname;
 		int xlen = strlen(hostname);
 
 		if (xlen >= (len-2))
 			return (EINVAL);
 
 		bcopy(cp, buf, xlen);
 		buf[xlen] = '\n';
 		buf[xlen+1] = '\0';
 		break;
 	}
 
 	case KTT_BOOTFILE: {
 		char *cp = kernelname;
 		int xlen = strlen(cp) + 1;
 
 		if (xlen >= (len-2))
 			return (EINVAL);
 
 		bcopy(cp, buf, xlen);
 		buf[xlen] = '\n';
 		buf[xlen+1] = '\0';
 		break;
 	}
 
 	case KTT_AVENRUN:
 		sprintf(buf, "%ld %ld %ld %ld\n",
 		    averunnable.ldavg[0], averunnable.ldavg[1],
 		    averunnable.ldavg[2], averunnable.fscale);
 		break;
 
 	default:
 		return (EIO);
 	}
 
 	*lenp = strlen(buf);
 	return (0);
 }
 
 static int
 kernfs_xwrite(kt, buf, len)
 	struct kern_target *kt;
 	char *buf;
 	int len;
 {
 
 	switch (kt->kt_tag) {
 	case KTT_HOSTNAME:
 		/* XXX BOGUS !!! no check for the length */
 		if (buf[len-1] == '\n')
 			--len;
 		bcopy(buf, hostname, len);
 		hostname[len] = '\0';
 		return (0);
 
 	default:
 		return (EIO);
 	}
 }
 
 
 /*
  * vp is the current namei directory
  * ndp is the name to locate in that directory...
  */
 static int
 kernfs_lookup(ap)
 	struct vop_lookup_args /* {
 		struct vnode * a_dvp;
 		struct vnode ** a_vpp;
 		struct componentname * a_cnp;
 	} */ *ap;
 {
 	struct componentname *cnp = ap->a_cnp;
 	struct vnode **vpp = ap->a_vpp;
 	struct vnode *dvp = ap->a_dvp;
 	char *pname = cnp->cn_nameptr;
 	struct proc *p = cnp->cn_proc;
 	struct kern_target *kt;
 	struct vnode *fvp;
 	int nameiop = cnp->cn_nameiop;
 	int error, i;
 
 #ifdef KERNFS_DIAGNOSTIC
 	printf("kernfs_lookup(%x)\n", ap);
 	printf("kernfs_lookup(dp = %x, vpp = %x, cnp = %x)\n", dvp, vpp, ap->a_cnp);
 	printf("kernfs_lookup(%s)\n", pname);
 #endif
 
 	*vpp = NULLVP;
 
 	if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
 		return (EROFS);
 
 	VOP_UNLOCK(dvp, 0, p);
 	if (cnp->cn_namelen == 1 && *pname == '.') {
 		*vpp = dvp;
 		VREF(dvp);
 		vn_lock(dvp, LK_SHARED | LK_RETRY, p);
 		return (0);
 	}
 
 #if 0
 	if (cnp->cn_namelen == 4 && bcmp(pname, "root", 4) == 0) {
 		*vpp = rootdir;
 		VREF(rootdir);
 		vn_lock(rootdir, LK_SHARED | LK_RETRY, p)
 		return (0);
 	}
 #endif
 
 	for (kt = kern_targets, i = 0; i < nkern_targets; kt++, i++) {
 		if (cnp->cn_namelen == kt->kt_namlen &&
 		    bcmp(kt->kt_name, pname, cnp->cn_namelen) == 0)
 			goto found;
 	}
 
 #ifdef KERNFS_DIAGNOSTIC
 	printf("kernfs_lookup: i = %d, failed", i);
 #endif
 
 	vn_lock(dvp, LK_SHARED | LK_RETRY, p);
 	return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
 
 found:
 	if (kt->kt_tag == KTT_DEVICE) {
 		dev_t *dp = kt->kt_data;
 	loop:
 		if (*dp == NODEV || !vfinddev(*dp, kt->kt_vtype, &fvp)) {
 			vn_lock(dvp, LK_SHARED | LK_RETRY, p);
 			return (ENOENT);
 		}
 		*vpp = fvp;
 		if (vget(fvp, LK_EXCLUSIVE, p))
 			goto loop;
 		return (0);
 	}
 
 #ifdef KERNFS_DIAGNOSTIC
 	printf("kernfs_lookup: allocate new vnode\n");
 #endif
 	if (error = getnewvnode(VT_KERNFS, dvp->v_mount, kernfs_vnodeop_p,
 	    &fvp)) {
 		vn_lock(dvp, LK_SHARED | LK_RETRY, p);
 		return (error);
 	}
 
 	MALLOC(fvp->v_data, void *, sizeof(struct kernfs_node), M_TEMP,
 	    M_WAITOK);
 	VTOKERN(fvp)->kf_kt = kt;
 	fvp->v_type = kt->kt_vtype;
 	vn_lock(fvp, LK_SHARED | LK_RETRY, p);
 	*vpp = fvp;
 
 #ifdef KERNFS_DIAGNOSTIC
 	printf("kernfs_lookup: newvp = %x\n", fvp);
 #endif
 	return (0);
 }
 
 static int
 kernfs_access(ap)
 	struct vop_access_args /* {
 		struct vnode *a_vp;
 		int  a_mode;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	register struct vnode *vp = ap->a_vp;
 	register struct ucred *cred = ap->a_cred;
 	mode_t amode = ap->a_mode;
 	mode_t fmode =
 	    (vp->v_flag & VROOT) ? DIR_MODE : VTOKERN(vp)->kf_kt->kt_mode;
 	mode_t mask = 0;
 	register gid_t *gp;
 	int i;
 
 	/* Some files are simply not modifiable. */
 	if ((amode & VWRITE) && (fmode & (S_IWUSR|S_IWGRP|S_IWOTH)) == 0)
 		return (EPERM);
 
 	/* Root can do anything else. */
 	if (cred->cr_uid == 0)
 		return (0);
 
 	/* Check for group 0 (wheel) permissions. */
 	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
 		if (*gp == 0) {
 			if (amode & VEXEC)
 				mask |= S_IXGRP;
 			if (amode & VREAD)
 				mask |= S_IRGRP;
 			if (amode & VWRITE)
 				mask |= S_IWGRP;
 			return ((fmode & mask) == mask ?  0 : EACCES);
 		}
 
         /* Otherwise, check everyone else. */
 	if (amode & VEXEC)
 		mask |= S_IXOTH;
 	if (amode & VREAD)
 		mask |= S_IROTH;
 	if (amode & VWRITE)
 		mask |= S_IWOTH;
 	return ((fmode & mask) == mask ? 0 : EACCES);
 }
 
 static int
 kernfs_getattr(ap)
 	struct vop_getattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct vattr *vap = ap->a_vap;
 	struct timeval tv;
 	int error = 0;
 	char strbuf[KSTRING];
 
 	bzero((caddr_t) vap, sizeof(*vap));
 	vattr_null(vap);
 	vap->va_uid = 0;
 	vap->va_gid = 0;
 	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
 	vap->va_size = 0;
 	vap->va_blocksize = DEV_BSIZE;
 	nanotime(&vap->va_atime);
 	vap->va_mtime = vap->va_atime;
 	vap->va_ctime = vap->va_ctime;
 	vap->va_gen = 0;
 	vap->va_flags = 0;
 	vap->va_rdev = 0;
 	vap->va_bytes = 0;
 
 	if (vp->v_flag & VROOT) {
 #ifdef KERNFS_DIAGNOSTIC
 		printf("kernfs_getattr: stat rootdir\n");
 #endif
 		vap->va_type = VDIR;
 		vap->va_mode = DIR_MODE;
 		vap->va_nlink = 2;
 		vap->va_fileid = 2;
 		vap->va_size = DEV_BSIZE;
 	} else {
 		struct kern_target *kt = VTOKERN(vp)->kf_kt;
 		int nbytes;
 #ifdef KERNFS_DIAGNOSTIC
 		printf("kernfs_getattr: stat target %s\n", kt->kt_name);
 #endif
 		vap->va_type = kt->kt_vtype;
 		vap->va_mode = kt->kt_mode;
 		vap->va_nlink = 1;
 		vap->va_fileid = 1 + (kt - kern_targets) / sizeof(*kt);
 		error = kernfs_xread(kt, strbuf, sizeof(strbuf), &nbytes);
 		vap->va_size = nbytes;
 	}
 
 #ifdef KERNFS_DIAGNOSTIC
 	printf("kernfs_getattr: return error %d\n", error);
 #endif
 	return (error);
 }
 
 static int
 kernfs_setattr(ap)
 	struct vop_setattr_args /* {
 		struct vnode *a_vp;
 		struct vattr *a_vap;
 		struct ucred *a_cred;
 		struct proc *a_p;
 	} */ *ap;
 {
 
 	/*
 	 * Silently ignore attribute changes.
 	 * This allows for open with truncate to have no
 	 * effect until some data is written.  I want to
 	 * do it this way because all writes are atomic.
 	 */
 	return (0);
 }
 
 static int
 kernfs_read(ap)
 	struct vop_read_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct kern_target *kt;
 	char strbuf[KSTRING];
 	int off = uio->uio_offset;
 	int error, len;
 	char *cp;
 
 	if (vp->v_type == VDIR)
 		return (EOPNOTSUPP);
 
 	kt = VTOKERN(vp)->kf_kt;
 
 #ifdef KERNFS_DIAGNOSTIC
 	printf("kern_read %s\n", kt->kt_name);
 #endif
 
 	len = 0;
 	if (error = kernfs_xread(kt, strbuf, sizeof(strbuf), &len))
 		return (error);
 	if (len <= off)
 		return (0);
 	return (uiomove(&strbuf[off], len - off, uio));
 }
 
 static int
 kernfs_write(ap)
 	struct vop_write_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		int  a_ioflag;
 		struct ucred *a_cred;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct uio *uio = ap->a_uio;
 	struct kern_target *kt;
 	int error, xlen;
 	char strbuf[KSTRING];
 
 	if (vp->v_type == VDIR)
 		return (EOPNOTSUPP);
 
 	kt = VTOKERN(vp)->kf_kt;
 
 	if (uio->uio_offset != 0)
 		return (EINVAL);
 
 	xlen = min(uio->uio_resid, KSTRING-1);
 	if (error = uiomove(strbuf, xlen, uio))
 		return (error);
 
 	if (uio->uio_resid != 0)
 		return (EIO);
 
 	strbuf[xlen] = '\0';
 	xlen = strlen(strbuf);
 	return (kernfs_xwrite(kt, strbuf, xlen));
 }
 
 static int
 kernfs_readdir(ap)
 	struct vop_readdir_args /* {
 		struct vnode *a_vp;
 		struct uio *a_uio;
 		struct ucred *a_cred;
 		int *a_eofflag;
 		u_long *a_cookies;
 		int a_ncookies;
 	} */ *ap;
 {
 	int error, i;
 	struct uio *uio = ap->a_uio;
 	struct kern_target *kt;
 	struct dirent d;
 
 	if (ap->a_vp->v_type != VDIR)
 		return (ENOTDIR);
 
 	/*
 	 * We don't allow exporting kernfs mounts, and currently local
 	 * requests do not need cookies.
 	 */
 	if (ap->a_ncookies != NULL)
 		panic("kernfs_readdir: not hungry");
 
 	i = uio->uio_offset / UIO_MX;
 	error = 0;
 	for (kt = &kern_targets[i];
 		uio->uio_resid >= UIO_MX && i < nkern_targets; kt++, i++) {
 		struct dirent *dp = &d;
 #ifdef KERNFS_DIAGNOSTIC
 		printf("kernfs_readdir: i = %d\n", i);
 #endif
 
 		if (kt->kt_tag == KTT_DEVICE) {
 			dev_t *dp = kt->kt_data;
 			struct vnode *fvp;
 
 			if (*dp == NODEV || !vfinddev(*dp, kt->kt_vtype, &fvp))
 				continue;
 		}
 
 		bzero((caddr_t)dp, UIO_MX);
 		dp->d_namlen = kt->kt_namlen;
 		bcopy(kt->kt_name, dp->d_name, kt->kt_namlen+1);
 
 #ifdef KERNFS_DIAGNOSTIC
 		printf("kernfs_readdir: name = %s, len = %d\n",
 				dp->d_name, dp->d_namlen);
 #endif
 		/*
 		 * Fill in the remaining fields
 		 */
 		dp->d_reclen = UIO_MX;
 		dp->d_fileno = i + 3;
 		dp->d_type = kt->kt_type;
 		/*
 		 * And ship to userland
 		 */
 		if (error = uiomove((caddr_t)dp, UIO_MX, uio))
 			break;
 	}
 
 	uio->uio_offset = i * UIO_MX;
 
 	return (error);
 }
 
 static int
 kernfs_inactive(ap)
 	struct vop_inactive_args /* {
 		struct vnode *a_vp;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 #ifdef KERNFS_DIAGNOSTIC
 	printf("kernfs_inactive(%x)\n", vp);
 #endif
 	/*
 	 * Clear out the v_type field to avoid
 	 * nasty things happening in vgone().
 	 */
 	VOP_UNLOCK(vp, 0, ap->a_p);
 	vp->v_type = VNON;
 	return (0);
 }
 
 static int
 kernfs_reclaim(ap)
 	struct vop_reclaim_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 
 #ifdef KERNFS_DIAGNOSTIC
 	printf("kernfs_reclaim(%x)\n", vp);
 #endif
 	if (vp->v_data) {
 		FREE(vp->v_data, M_TEMP);
 		vp->v_data = 0;
 	}
 	return (0);
 }
 
 
 /*
  * Print out the contents of a kernfs vnode.
  */
 /* ARGSUSED */
 static int
 kernfs_print(ap)
 	struct vop_print_args /* {
 		struct vnode *a_vp;
 	} */ *ap;
 {
 
 	printf("tag VT_KERNFS, kernfs vnode\n");
 	return (0);
 }
 
 /*
  * Kernfs "should never get here" operation
  */
 static int
 kernfs_badop()
 {
 	return (EIO);
 }
 
 
 vop_t	**kernfs_vnodeop_p;
 static struct vnodeopv_entry_desc kernfs_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) vop_defaultop },
 	{ &vop_access_desc,		(vop_t *) kernfs_access },
 	{ &vop_bmap_desc,		(vop_t *) kernfs_badop },
 	{ &vop_getattr_desc,		(vop_t *) kernfs_getattr },
 	{ &vop_inactive_desc,		(vop_t *) kernfs_inactive },
 	{ &vop_lookup_desc,		(vop_t *) kernfs_lookup },
 	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
 	{ &vop_print_desc,		(vop_t *) kernfs_print },
 	{ &vop_read_desc,		(vop_t *) kernfs_read },
 	{ &vop_readdir_desc,		(vop_t *) kernfs_readdir },
 	{ &vop_reclaim_desc,		(vop_t *) kernfs_reclaim },
 	{ &vop_setattr_desc,		(vop_t *) kernfs_setattr },
 	{ &vop_write_desc,		(vop_t *) kernfs_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc kernfs_vnodeop_opv_desc =
 	{ &kernfs_vnodeop_p, kernfs_vnodeop_entries };
 
 VNODEOP_SET(kernfs_vnodeop_opv_desc);
diff --git a/sys/net/ppp_tty.c b/sys/net/ppp_tty.c
index f12a85ce0877..9d774e478481 100644
--- a/sys/net/ppp_tty.c
+++ b/sys/net/ppp_tty.c
@@ -1,1133 +1,1133 @@
 /*
  * ppp_tty.c - Point-to-Point Protocol (PPP) driver for asynchronous
  * 	       tty devices.
  *
  * Copyright (c) 1989 Carnegie Mellon University.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are permitted
  * provided that the above copyright notice and this paragraph are
  * duplicated in all such forms and that any documentation,
  * advertising materials, and other materials related to such
  * distribution and use acknowledge that the software was developed
  * by Carnegie Mellon University.  The name of the
  * University may not be used to endorse or promote products derived
  * from this software without specific prior written permission.
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
  * Drew D. Perkins
  * Carnegie Mellon University
  * 4910 Forbes Ave.
  * Pittsburgh, PA 15213
  * (412) 268-8576
  * ddp@andrew.cmu.edu
  *
  * Based on:
  *	@(#)if_sl.c	7.6.1.2 (Berkeley) 2/15/89
  *
  * Copyright (c) 1987 Regents of the University of California.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms are permitted
  * provided that the above copyright notice and this paragraph are
  * duplicated in all such forms and that any documentation,
  * advertising materials, and other materials related to such
  * distribution and use acknowledge that the software was developed
  * by the University of California, Berkeley.  The name of the
  * University may not be used to endorse or promote products derived
  * from this software without specific prior written permission.
  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
  *
  * Serial Line interface
  *
  * Rick Adams
  * Center for Seismic Studies
  * 1300 N 17th Street, Suite 1450
  * Arlington, Virginia 22209
  * (703)276-7900
  * rick@seismo.ARPA
  * seismo!rick
  *
  * Pounded on heavily by Chris Torek (chris@mimsy.umd.edu, umcp-cs!chris).
  * Converted to 4.3BSD Beta by Chris Torek.
  * Other changes made at Berkeley, based in part on code by Kirk Smith.
  *
  * Converted to 4.3BSD+ 386BSD by Brad Parker (brad@cayman.com)
  * Added VJ tcp header compression; more unified ioctls
  *
  * Extensively modified by Paul Mackerras (paulus@cs.anu.edu.au).
  * Cleaned up a lot of the mbuf-related code to fix bugs that
  * caused system crashes and packet corruption.  Changed pppstart
  * so that it doesn't just give up with a "collision" if the whole
  * packet doesn't fit in the output ring buffer.
  *
  * Added priority queueing for interactive IP packets, following
  * the model of if_sl.c, plus hooks for bpf.
  * Paul Mackerras (paulus@cs.anu.edu.au).
  */
 
-/* $Id: ppp_tty.c,v 1.29 1997/12/06 13:24:37 bde Exp $ */
+/* $Id: ppp_tty.c,v 1.30 1998/02/13 12:46:15 phk Exp $ */
 
 #include "ppp.h"
 #if NPPP > 0
 
 #include "opt_ppp.h"		/* XXX for ppp_defs.h */
 
 #define VJC			/* XXX for ppp_defs.h */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/mbuf.h>
 #include <sys/dkstat.h>
 #include <sys/socket.h>
 #include <sys/fcntl.h>
 #include <sys/tty.h>
 #include <sys/conf.h>
-
+#include <sys/uio.h>
 
 /*
  * XXX stop <sys/vnode.h> from including <vnode_if.h>.  <vnode_if.h> doesn't
  * exist if we are an LKM.
  */
 #undef KERNEL
 # include <sys/vnode.h>
 #define KERNEL
 
 #ifdef i386
 #include <i386/isa/intr_machdep.h>
 #endif
 
 #ifdef PPP_FILTER
 #include <net/bpf.h>
 #endif
 #include <net/if_ppp.h>
 #include <net/if_pppvar.h>
 
 static int	pppopen __P((dev_t dev, struct tty *tp));
 static int	pppclose __P((struct tty *tp, int flag));
 static int	pppread __P((struct tty *tp, struct uio *uio, int flag));
 static int	pppwrite __P((struct tty *tp, struct uio *uio, int flag));
 static int	ppptioctl __P((struct tty *tp, int cmd, caddr_t data, int flag,
 		       struct proc *));
 static int	pppinput __P((int c, struct tty *tp));
 static int	pppstart __P((struct tty *tp));
 
 static u_short	pppfcs __P((u_short fcs, u_char *cp, int len));
 static void	pppasyncstart __P((struct ppp_softc *));
 static void	pppasyncctlp __P((struct ppp_softc *));
 static void	pppasyncrelinq __P((struct ppp_softc *));
 static void	pppasyncsetmtu __P((struct ppp_softc *));
 static void	ppp_timeout __P((void *));
 static void	pppgetm __P((struct ppp_softc *sc));
 static void	ppplogchar __P((struct ppp_softc *, int));
 
 /* XXX called from if_ppp.c - layering violation */
 void		pppasyncattach __P((void *));
 
 /*
  * Some useful mbuf macros not in mbuf.h.
  */
 #define M_IS_CLUSTER(m)	((m)->m_flags & M_EXT)
 
 #define M_DATASTART(m)	\
 	(M_IS_CLUSTER(m) ? (m)->m_ext.ext_buf : \
 	    (m)->m_flags & M_PKTHDR ? (m)->m_pktdat : (m)->m_dat)
 
 #define M_DATASIZE(m)	\
 	(M_IS_CLUSTER(m) ? (m)->m_ext.ext_size : \
 	    (m)->m_flags & M_PKTHDR ? MHLEN: MLEN)
 
 /*
  * Does c need to be escaped?
  */
 #define ESCAPE_P(c)	(sc->sc_asyncmap[(c) >> 5] & (1 << ((c) & 0x1F)))
 
 /*
  * Procedures for using an async tty interface for PPP.
  */
 
 /* This is a FreeBSD-2.X kernel. */
 #define CCOUNT(q)	((q)->c_cc)
 #define PPP_LOWAT	100	/* Process more output when < LOWAT on queue */
 #define	PPP_HIWAT	400	/* Don't start a new packet if HIWAT on que */
 
 /*
  * Define the PPP line discipline.
  */
 
 static struct linesw pppdisc = {
 	pppopen,	pppclose,	pppread,	pppwrite,
 	ppptioctl,	pppinput,	pppstart,	ttymodem,
 	PPP_FLAG
 };
 
 void
 pppasyncattach(dummy)
     void *dummy;
 {
 #ifdef i386
     int s;
 
     s = splhigh();
 
     /*
      * Make sure that the soft net "engine" cannot run while spltty code is
      * active.  The if_ppp.c code can walk down into b_to_q etc, and it is
      * bad if the tty system was in the middle of another b_to_q...
      */
     tty_imask |= softnet_imask;	/* spltty() block spl[soft]net() */
     net_imask |= softtty_imask;	/* splimp() block splsofttty() */
     net_imask |= tty_imask;	/* splimp() block spltty() */
     update_intr_masks();
 
     splx(s);
     if ( bootverbose )
         printf("new masks: bio %x, tty %x, net %x\n",
                 bio_imask, tty_imask, net_imask);
 #endif
 
     /* register line discipline */
     linesw[PPPDISC] = pppdisc;
 }
 
 /*
  * Line specific open routine for async tty devices.
  * Attach the given tty to the first available ppp unit.
  * Called from device open routine or ttioctl() at >= splsofttty()
  */
 /* ARGSUSED */
 static int
 pppopen(dev, tp)
     dev_t dev;
     register struct tty *tp;
 {
     struct proc *p = curproc;		/* XXX */
     register struct ppp_softc *sc;
     int error, s;
 
     if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 	return (error);
 
     s = spltty();
 
     if (tp->t_line == PPPDISC) {
 	sc = (struct ppp_softc *) tp->t_sc;
 	if (sc != NULL && sc->sc_devp == (void *) tp) {
 	    splx(s);
 	    return (0);
 	}
     }
 
     if ((sc = pppalloc(p->p_pid)) == NULL) {
 	splx(s);
 	return ENXIO;
     }
 
     if (sc->sc_relinq)
 	(*sc->sc_relinq)(sc);	/* get previous owner to relinquish the unit */
 
     sc->sc_ilen = 0;
     sc->sc_m = NULL;
     bzero(sc->sc_asyncmap, sizeof(sc->sc_asyncmap));
     sc->sc_asyncmap[0] = 0xffffffff;
     sc->sc_asyncmap[3] = 0x60000000;
     sc->sc_rasyncmap = 0;
     sc->sc_devp = (void *) tp;
     sc->sc_start = pppasyncstart;
     sc->sc_ctlp = pppasyncctlp;
     sc->sc_relinq = pppasyncrelinq;
     sc->sc_setmtu = pppasyncsetmtu;
     sc->sc_outm = NULL;
     pppgetm(sc);
     sc->sc_if.if_flags |= IFF_RUNNING;
     microtime(&sc->sc_if.if_lastchange);
     sc->sc_if.if_baudrate = tp->t_ospeed;
 
     tp->t_sc = (caddr_t) sc;
     ttyflush(tp, FREAD | FWRITE);
 
     /*
      * Pre-allocate cblocks to the "just right" amount.  The 1 byte t_canq
      * allocation helps avoid the need for select and/or FIONREAD.
      * We also pass 1 byte tokens through t_canq...
      */
     clist_alloc_cblocks(&tp->t_canq, 1, 1);
     clist_alloc_cblocks(&tp->t_outq, sc->sc_if.if_mtu + PPP_HIWAT,
 			sc->sc_if.if_mtu + PPP_HIWAT);
     clist_alloc_cblocks(&tp->t_rawq, 0, 0);
 
     splx(s);
 
     return (0);
 }
 
 /*
  * Line specific close routine, called from device close routine
  * and from ttioctl at >= splsofttty().
  * Detach the tty from the ppp unit.
  * Mimics part of ttyclose().
  */
 static int
 pppclose(tp, flag)
     struct tty *tp;
     int flag;
 {
     register struct ppp_softc *sc;
     int s;
 
     s = spltty();
     ttyflush(tp, FREAD | FWRITE);
     clist_free_cblocks(&tp->t_canq);
     clist_free_cblocks(&tp->t_outq);
     tp->t_line = 0;
     sc = (struct ppp_softc *) tp->t_sc;
     if (sc != NULL) {
 	tp->t_sc = NULL;
 	if (tp == (struct tty *) sc->sc_devp) {
 	    pppasyncrelinq(sc);
 	    pppdealloc(sc);
 	}
     }
     splx(s);
     return 0;
 }
 
 /*
  * Relinquish the interface unit to another device.
  */
 static void
 pppasyncrelinq(sc)
     struct ppp_softc *sc;
 {
     int s;
 
     s = spltty();
     if (sc->sc_outm) {
 	m_freem(sc->sc_outm);
 	sc->sc_outm = NULL;
     }
     if (sc->sc_m) {
 	m_freem(sc->sc_m);
 	sc->sc_m = NULL;
     }
     if (sc->sc_flags & SC_TIMEOUT) {
 	untimeout(ppp_timeout, (void *) sc, sc->sc_ch);
 	sc->sc_flags &= ~SC_TIMEOUT;
     }
     splx(s);
 }
 
 /*
  * This gets called from the upper layer to notify a mtu change
  */
 static void
 pppasyncsetmtu(sc)
 register struct ppp_softc *sc;
 {
     register struct tty *tp = (struct tty *) sc->sc_devp;
     int s;
 
     s = spltty();
     if (tp != NULL)
 	clist_alloc_cblocks(&tp->t_outq, sc->sc_if.if_mtu + PPP_HIWAT,
 			     sc->sc_if.if_mtu + PPP_HIWAT);
     splx(s);
 }
 
 /*
  * Line specific (tty) read routine.
  * called at zero spl from the device driver in the response to user-level
  * reads on the tty file descriptor (ie: pppd).
  */
 static int
 pppread(tp, uio, flag)
     register struct tty *tp;
     struct uio *uio;
     int flag;
 {
     register struct ppp_softc *sc = (struct ppp_softc *)tp->t_sc;
     struct mbuf *m, *m0;
     register int s;
     int error = 0;
 
     if (sc == NULL)
 	return 0;
     /*
      * Loop waiting for input, checking that nothing disasterous
      * happens in the meantime.
      */
     s = spltty();
     for (;;) {
 	if (tp != (struct tty *) sc->sc_devp || tp->t_line != PPPDISC) {
 	    splx(s);
 	    return 0;
 	}
 	if (sc->sc_inq.ifq_head != NULL)
 	    break;
 	if ((tp->t_state & TS_CONNECTED) == 0) {
 	    splx(s);
 	    return 0;		/* end of file */
 	}
 	if (tp->t_state & TS_ASYNC || flag & IO_NDELAY) {
 	    splx(s);
 	    return (EWOULDBLOCK);
 	}
 	error = ttysleep(tp, TSA_HUP_OR_INPUT(tp), TTIPRI | PCATCH, "pppin", 0);
 	if (error) {
 	    splx(s);
 	    return error;
 	}
     }
 
     /* Pull place-holder byte out of canonical queue */
     getc(&tp->t_canq);
 
     /* Get the packet from the input queue */
     IF_DEQUEUE(&sc->sc_inq, m0);
     splx(s);
 
     for (m = m0; m && uio->uio_resid; m = m->m_next)
 	if ((error = uiomove(mtod(m, u_char *), m->m_len, uio)) != 0)
 	    break;
     m_freem(m0);
     return (error);
 }
 
 /*
  * Line specific (tty) write routine.
  * called at zero spl from the device driver in the response to user-level
  * writes on the tty file descriptor (ie: pppd).
  */
 static int
 pppwrite(tp, uio, flag)
     register struct tty *tp;
     struct uio *uio;
     int flag;
 {
     register struct ppp_softc *sc = (struct ppp_softc *)tp->t_sc;
     struct mbuf *m, *m0, **mp;
     struct sockaddr dst;
     int len, error, s;
 
     if ((tp->t_state & TS_CONNECTED) == 0)
 	return 0;		/* wrote 0 bytes */
     if (tp->t_line != PPPDISC)
 	return (EINVAL);
     if (sc == NULL || tp != (struct tty *) sc->sc_devp)
 	return EIO;
     if (uio->uio_resid > sc->sc_if.if_mtu + PPP_HDRLEN ||
 	uio->uio_resid < PPP_HDRLEN)
 	return (EMSGSIZE);
 
     s = spltty();
     for (mp = &m0; uio->uio_resid; mp = &m->m_next) {
 	MGET(m, M_WAIT, MT_DATA);
 	if ((*mp = m) == NULL) {
 	    m_freem(m0);
 	    splx(s);
 	    return (ENOBUFS);
 	}
 	m->m_len = 0;
 	if (uio->uio_resid >= MCLBYTES / 2)
 	    MCLGET(m, M_DONTWAIT);
 	len = M_TRAILINGSPACE(m);
 	if (len > uio->uio_resid)
 	    len = uio->uio_resid;
 	if ((error = uiomove(mtod(m, u_char *), len, uio)) != 0) {
 	    m_freem(m0);
 	    splx(s);
 	    return (error);
 	}
 	m->m_len = len;
     }
     dst.sa_family = AF_UNSPEC;
     bcopy(mtod(m0, u_char *), dst.sa_data, PPP_HDRLEN);
     m0->m_data += PPP_HDRLEN;
     m0->m_len -= PPP_HDRLEN;
 
     /* call the upper layer to "transmit" it... */
     error = pppoutput(&sc->sc_if, m0, &dst, (struct rtentry *)0);
     splx(s);
     return (error);
 }
 
 /*
  * Line specific (tty) ioctl routine.
  * This discipline requires that tty device drivers call
  * the line specific l_ioctl routine from their ioctl routines.
  */
 /* ARGSUSED */
 static int
 ppptioctl(tp, cmd, data, flag, p)
     struct tty *tp;
     int cmd;
     caddr_t data;
     int flag;
     struct proc *p;
 {
     struct ppp_softc *sc = (struct ppp_softc *) tp->t_sc;
     int error, s;
 
     if (sc == NULL || tp != (struct tty *) sc->sc_devp)
 	return (ENOIOCTL);
 
     error = 0;
     switch (cmd) {
     case PPPIOCSASYNCMAP:
 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 	    break;
 	sc->sc_asyncmap[0] = *(u_int *)data;
 	break;
 
     case PPPIOCGASYNCMAP:
 	*(u_int *)data = sc->sc_asyncmap[0];
 	break;
 
     case PPPIOCSRASYNCMAP:
 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 	    break;
 	sc->sc_rasyncmap = *(u_int *)data;
 	break;
 
     case PPPIOCGRASYNCMAP:
 	*(u_int *)data = sc->sc_rasyncmap;
 	break;
 
     case PPPIOCSXASYNCMAP:
 	if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 	    break;
 	s = spltty();
 	bcopy(data, sc->sc_asyncmap, sizeof(sc->sc_asyncmap));
 	sc->sc_asyncmap[1] = 0;		    /* mustn't escape 0x20 - 0x3f */
 	sc->sc_asyncmap[2] &= ~0x40000000;  /* mustn't escape 0x5e */
 	sc->sc_asyncmap[3] |= 0x60000000;   /* must escape 0x7d, 0x7e */
 	splx(s);
 	break;
 
     case PPPIOCGXASYNCMAP:
 	bcopy(sc->sc_asyncmap, data, sizeof(sc->sc_asyncmap));
 	break;
 
     default:
 	error = pppioctl(sc, cmd, data, flag, p);
 	if (error == 0 && cmd == PPPIOCSMRU)
 	    pppgetm(sc);
     }
 
     return error;
 }
 
 /*
  * FCS lookup table as calculated by genfcstab.
  */
 static u_short fcstab[256] = {
 	0x0000,	0x1189,	0x2312,	0x329b,	0x4624,	0x57ad,	0x6536,	0x74bf,
 	0x8c48,	0x9dc1,	0xaf5a,	0xbed3,	0xca6c,	0xdbe5,	0xe97e,	0xf8f7,
 	0x1081,	0x0108,	0x3393,	0x221a,	0x56a5,	0x472c,	0x75b7,	0x643e,
 	0x9cc9,	0x8d40,	0xbfdb,	0xae52,	0xdaed,	0xcb64,	0xf9ff,	0xe876,
 	0x2102,	0x308b,	0x0210,	0x1399,	0x6726,	0x76af,	0x4434,	0x55bd,
 	0xad4a,	0xbcc3,	0x8e58,	0x9fd1,	0xeb6e,	0xfae7,	0xc87c,	0xd9f5,
 	0x3183,	0x200a,	0x1291,	0x0318,	0x77a7,	0x662e,	0x54b5,	0x453c,
 	0xbdcb,	0xac42,	0x9ed9,	0x8f50,	0xfbef,	0xea66,	0xd8fd,	0xc974,
 	0x4204,	0x538d,	0x6116,	0x709f,	0x0420,	0x15a9,	0x2732,	0x36bb,
 	0xce4c,	0xdfc5,	0xed5e,	0xfcd7,	0x8868,	0x99e1,	0xab7a,	0xbaf3,
 	0x5285,	0x430c,	0x7197,	0x601e,	0x14a1,	0x0528,	0x37b3,	0x263a,
 	0xdecd,	0xcf44,	0xfddf,	0xec56,	0x98e9,	0x8960,	0xbbfb,	0xaa72,
 	0x6306,	0x728f,	0x4014,	0x519d,	0x2522,	0x34ab,	0x0630,	0x17b9,
 	0xef4e,	0xfec7,	0xcc5c,	0xddd5,	0xa96a,	0xb8e3,	0x8a78,	0x9bf1,
 	0x7387,	0x620e,	0x5095,	0x411c,	0x35a3,	0x242a,	0x16b1,	0x0738,
 	0xffcf,	0xee46,	0xdcdd,	0xcd54,	0xb9eb,	0xa862,	0x9af9,	0x8b70,
 	0x8408,	0x9581,	0xa71a,	0xb693,	0xc22c,	0xd3a5,	0xe13e,	0xf0b7,
 	0x0840,	0x19c9,	0x2b52,	0x3adb,	0x4e64,	0x5fed,	0x6d76,	0x7cff,
 	0x9489,	0x8500,	0xb79b,	0xa612,	0xd2ad,	0xc324,	0xf1bf,	0xe036,
 	0x18c1,	0x0948,	0x3bd3,	0x2a5a,	0x5ee5,	0x4f6c,	0x7df7,	0x6c7e,
 	0xa50a,	0xb483,	0x8618,	0x9791,	0xe32e,	0xf2a7,	0xc03c,	0xd1b5,
 	0x2942,	0x38cb,	0x0a50,	0x1bd9,	0x6f66,	0x7eef,	0x4c74,	0x5dfd,
 	0xb58b,	0xa402,	0x9699,	0x8710,	0xf3af,	0xe226,	0xd0bd,	0xc134,
 	0x39c3,	0x284a,	0x1ad1,	0x0b58,	0x7fe7,	0x6e6e,	0x5cf5,	0x4d7c,
 	0xc60c,	0xd785,	0xe51e,	0xf497,	0x8028,	0x91a1,	0xa33a,	0xb2b3,
 	0x4a44,	0x5bcd,	0x6956,	0x78df,	0x0c60,	0x1de9,	0x2f72,	0x3efb,
 	0xd68d,	0xc704,	0xf59f,	0xe416,	0x90a9,	0x8120,	0xb3bb,	0xa232,
 	0x5ac5,	0x4b4c,	0x79d7,	0x685e,	0x1ce1,	0x0d68,	0x3ff3,	0x2e7a,
 	0xe70e,	0xf687,	0xc41c,	0xd595,	0xa12a,	0xb0a3,	0x8238,	0x93b1,
 	0x6b46,	0x7acf,	0x4854,	0x59dd,	0x2d62,	0x3ceb,	0x0e70,	0x1ff9,
 	0xf78f,	0xe606,	0xd49d,	0xc514,	0xb1ab,	0xa022,	0x92b9,	0x8330,
 	0x7bc7,	0x6a4e,	0x58d5,	0x495c,	0x3de3,	0x2c6a,	0x1ef1,	0x0f78
 };
 
 /*
  * Calculate a new FCS given the current FCS and the new data.
  */
 static u_short
 pppfcs(u_short fcs, u_char *cp, int len)
 {
     while (len--)
 	fcs = PPP_FCS(fcs, *cp++);
     return (fcs);
 }
 
 /*
  * This gets called at splsoftnet from if_ppp.c at various times
  * when there is data ready to be sent.
  */
 static void
 pppasyncstart(sc)
     register struct ppp_softc *sc;
 {
     register struct tty *tp = (struct tty *) sc->sc_devp;
     register struct mbuf *m;
     register int len;
     register u_char *start, *stop, *cp;
     int n, ndone, done, idle;
     struct mbuf *m2;
     int s;
 
     idle = 0;
     /* XXX assumes atomic access to *tp although we're not at spltty(). */
     while (CCOUNT(&tp->t_outq) < PPP_HIWAT) {
 	/*
 	 * See if we have an existing packet partly sent.
 	 * If not, get a new packet and start sending it.
 	 */
 	m = sc->sc_outm;
 	if (m == NULL) {
 	    /*
 	     * Get another packet to be sent.
 	     */
 	    m = ppp_dequeue(sc);
 	    if (m == NULL) {
 		idle = 1;
 		break;
 	    }
 
 	    /*
 	     * The extra PPP_FLAG will start up a new packet, and thus
 	     * will flush any accumulated garbage.  We do this whenever
 	     * the line may have been idle for some time.
 	     */
 	    /* XXX as above. */
 	    if (CCOUNT(&tp->t_outq) == 0) {
 		++sc->sc_stats.ppp_obytes;
 		(void) putc(PPP_FLAG, &tp->t_outq);
 	    }
 
 	    /* Calculate the FCS for the first mbuf's worth. */
 	    sc->sc_outfcs = pppfcs(PPP_INITFCS, mtod(m, u_char *), m->m_len);
 	    gettime(&sc->sc_if.if_lastchange);
 	}
 
 	for (;;) {
 	    start = mtod(m, u_char *);
 	    len = m->m_len;
 	    stop = start + len;
 	    while (len > 0) {
 		/*
 		 * Find out how many bytes in the string we can
 		 * handle without doing something special.
 		 */
 		for (cp = start; cp < stop; cp++)
 		    if (ESCAPE_P(*cp))
 			break;
 		n = cp - start;
 		if (n) {
 		    /* NetBSD (0.9 or later), 4.3-Reno or similar. */
 		    ndone = n - b_to_q(start, n, &tp->t_outq);
 		    len -= ndone;
 		    start += ndone;
 		    sc->sc_stats.ppp_obytes += ndone;
 
 		    if (ndone < n)
 			break;	/* packet doesn't fit */
 		}
 		/*
 		 * If there are characters left in the mbuf,
 		 * the first one must be special.
 		 * Put it out in a different form.
 		 */
 		if (len) {
 		    s = spltty();
 		    if (putc(PPP_ESCAPE, &tp->t_outq))
 			break;
 		    if (putc(*start ^ PPP_TRANS, &tp->t_outq)) {
 			(void) unputc(&tp->t_outq);
 			splx(s);
 			break;
 		    }
 		    splx(s);
 		    sc->sc_stats.ppp_obytes += 2;
 		    start++;
 		    len--;
 		}
 	    }
 
 	    /*
 	     * If we didn't empty this mbuf, remember where we're up to.
 	     * If we emptied the last mbuf, try to add the FCS and closing
 	     * flag, and if we can't, leave sc_outm pointing to m, but with
 	     * m->m_len == 0, to remind us to output the FCS and flag later.
 	     */
 	    done = len == 0;
 	    if (done && m->m_next == NULL) {
 		u_char *p, *q;
 		int c;
 		u_char endseq[8];
 
 		/*
 		 * We may have to escape the bytes in the FCS.
 		 */
 		p = endseq;
 		c = ~sc->sc_outfcs & 0xFF;
 		if (ESCAPE_P(c)) {
 		    *p++ = PPP_ESCAPE;
 		    *p++ = c ^ PPP_TRANS;
 		} else
 		    *p++ = c;
 		c = (~sc->sc_outfcs >> 8) & 0xFF;
 		if (ESCAPE_P(c)) {
 		    *p++ = PPP_ESCAPE;
 		    *p++ = c ^ PPP_TRANS;
 		} else
 		    *p++ = c;
 		*p++ = PPP_FLAG;
 
 		/*
 		 * Try to output the FCS and flag.  If the bytes
 		 * don't all fit, back out.
 		 */
 		s = spltty();
 		for (q = endseq; q < p; ++q)
 		    if (putc(*q, &tp->t_outq)) {
 			done = 0;
 			for (; q > endseq; --q)
 			    unputc(&tp->t_outq);
 			break;
 		    }
 		splx(s);
 		if (done)
 		    sc->sc_stats.ppp_obytes += q - endseq;
 	    }
 
 	    if (!done) {
 		/* remember where we got to */
 		m->m_data = start;
 		m->m_len = len;
 		break;
 	    }
 
 	    /* Finished with this mbuf; free it and move on. */
 	    MFREE(m, m2);
 	    m = m2;
 	    if (m == NULL) {
 		/* Finished a packet */
 		break;
 	    }
 	    sc->sc_outfcs = pppfcs(sc->sc_outfcs, mtod(m, u_char *), m->m_len);
 	}
 
 	/*
 	 * If m == NULL, we have finished a packet.
 	 * If m != NULL, we've either done as much work this time
 	 * as we need to, or else we've filled up the output queue.
 	 */
 	sc->sc_outm = m;
 	if (m)
 	    break;
     }
 
     /* Call pppstart to start output again if necessary. */
     s = spltty();
     pppstart(tp);
 
     /*
      * This timeout is needed for operation on a pseudo-tty,
      * because the pty code doesn't call pppstart after it has
      * drained the t_outq.
      */
     if (!idle && (sc->sc_flags & SC_TIMEOUT) == 0) {
 	sc->sc_ch = timeout(ppp_timeout, (void *) sc, 1);
 	sc->sc_flags |= SC_TIMEOUT;
     }
 
     splx(s);
 }
 
 /*
  * This gets called when a received packet is placed on
  * the inq, at splsoftnet. The pppd daemon is to be woken up to do a read().
  */
 static void
 pppasyncctlp(sc)
     struct ppp_softc *sc;
 {
     struct tty *tp;
     int s;
 
     /* Put a placeholder byte in canq for ttselect()/ttnread(). */
     s = spltty();
     tp = (struct tty *) sc->sc_devp;
     putc(0, &tp->t_canq);
     ttwakeup(tp);
     splx(s);
 }
 
 /*
  * Start output on async tty interface.  If the transmit queue
  * has drained sufficiently, arrange for pppasyncstart to be
  * called later at splsoftnet.
  * Called at spltty or higher.
  */
 int
 pppstart(tp)
     register struct tty *tp;
 {
     register struct ppp_softc *sc = (struct ppp_softc *) tp->t_sc;
 
     /*
      * Call output process whether or not there is any output.
      * We are being called in lieu of ttstart and must do what it would.
      */
     if (tp->t_oproc != NULL)
 	(*tp->t_oproc)(tp);
 
     /*
      * If the transmit queue has drained and the tty has not hung up
      * or been disconnected from the ppp unit, then tell if_ppp.c that
      * we need more output.
      */
     if (CCOUNT(&tp->t_outq) < PPP_LOWAT
 	&& !((tp->t_state & TS_CONNECTED) == 0)
 	&& sc != NULL && tp == (struct tty *) sc->sc_devp) {
 	ppp_restart(sc);
     }
 
     return 0;
 }
 
 /*
  * Timeout routine - try to start some more output.
  */
 static void
 ppp_timeout(x)
     void *x;
 {
     struct ppp_softc *sc = (struct ppp_softc *) x;
     struct tty *tp = (struct tty *) sc->sc_devp;
     int s;
 
     s = spltty();
     sc->sc_flags &= ~SC_TIMEOUT;
     pppstart(tp);
     splx(s);
 }
 
 /*
  * Allocate enough mbuf to handle current MRU.
  */
 static void
 pppgetm(sc)
     register struct ppp_softc *sc;
 {
     struct mbuf *m, **mp;
     int len;
 
     mp = &sc->sc_m;
     for (len = sc->sc_mru + PPP_HDRLEN + PPP_FCSLEN; len > 0; ){
 	if ((m = *mp) == NULL) {
 	    MGETHDR(m, M_DONTWAIT, MT_DATA);
 	    if (m == NULL)
 		break;
 	    *mp = m;
 	    MCLGET(m, M_DONTWAIT);
 	}
 	len -= M_DATASIZE(m);
 	mp = &m->m_next;
     }
 }
 
 /*
  * tty interface receiver interrupt.
  */
 static unsigned paritytab[8] = {
     0x96696996, 0x69969669, 0x69969669, 0x96696996,
     0x69969669, 0x96696996, 0x96696996, 0x69969669
 };
 
 /*
  * Called when character is available from device driver.
  * Only guaranteed to be at splsofttty() or spltty()
  * This is safe to be called while the upper half's netisr is preempted.
  */
 static int
 pppinput(c, tp)
     int c;
     register struct tty *tp;
 {
     register struct ppp_softc *sc;
     struct mbuf *m;
     int ilen, s;
 
     sc = (struct ppp_softc *) tp->t_sc;
     if (sc == NULL || tp != (struct tty *) sc->sc_devp)
 	return 0;
 
     ++tk_nin;
     ++sc->sc_stats.ppp_ibytes;
 
     if ((tp->t_state & TS_CONNECTED) == 0) {
 	if (sc->sc_flags & SC_DEBUG)
 	    printf("ppp%d: no carrier\n", sc->sc_if.if_unit);
 	goto flush;
     }
 
     if (c & TTY_ERRORMASK) {
 	/* framing error or overrun on this char - abort packet */
 	if (sc->sc_flags & SC_DEBUG)
 	    printf("ppp%d: line error %x\n", sc->sc_if.if_unit,
 						c & TTY_ERRORMASK);
 	goto flush;
     }
 
     c &= TTY_CHARMASK;
 
     /*
      * Handle software flow control of output.
      */
     if (tp->t_iflag & IXON) {
 	if (c == tp->t_cc[VSTOP] && tp->t_cc[VSTOP] != _POSIX_VDISABLE) {
 	    if ((tp->t_state & TS_TTSTOP) == 0) {
 		tp->t_state |= TS_TTSTOP;
 		(*cdevsw[major(tp->t_dev)]->d_stop)(tp, 0);
 	    }
 	    return 0;
 	}
 	if (c == tp->t_cc[VSTART] && tp->t_cc[VSTART] != _POSIX_VDISABLE) {
 	    tp->t_state &= ~TS_TTSTOP;
 	    if (tp->t_oproc != NULL)
 		(*tp->t_oproc)(tp);
 	    return 0;
 	}
     }
 
     s = spltty();
     if (c & 0x80)
 	sc->sc_flags |= SC_RCV_B7_1;
     else
 	sc->sc_flags |= SC_RCV_B7_0;
     if (paritytab[c >> 5] & (1 << (c & 0x1F)))
 	sc->sc_flags |= SC_RCV_ODDP;
     else
 	sc->sc_flags |= SC_RCV_EVNP;
     splx(s);
 
     if (sc->sc_flags & SC_LOG_RAWIN)
 	ppplogchar(sc, c);
 
     if (c == PPP_FLAG) {
 	ilen = sc->sc_ilen;
 	sc->sc_ilen = 0;
 
 	if (sc->sc_rawin_count > 0) 
 	    ppplogchar(sc, -1);
 
 	/*
 	 * If SC_ESCAPED is set, then we've seen the packet
 	 * abort sequence "}~".
 	 */
 	if (sc->sc_flags & (SC_FLUSH | SC_ESCAPED)
 	    || (ilen > 0 && sc->sc_fcs != PPP_GOODFCS)) {
 	    s = spltty();
 	    sc->sc_flags |= SC_PKTLOST;	/* note the dropped packet */
 	    if ((sc->sc_flags & (SC_FLUSH | SC_ESCAPED)) == 0){
 		if (sc->sc_flags & SC_DEBUG)
 		    printf("ppp%d: bad fcs %x, pkt len %d\n",
 			   sc->sc_if.if_unit, sc->sc_fcs, ilen);
 		sc->sc_if.if_ierrors++;
 		sc->sc_stats.ppp_ierrors++;
 	    } else
 		sc->sc_flags &= ~(SC_FLUSH | SC_ESCAPED);
 	    splx(s);
 	    return 0;
 	}
 
 	if (ilen < PPP_HDRLEN + PPP_FCSLEN) {
 	    if (ilen) {
 		if (sc->sc_flags & SC_DEBUG)
 		    printf("ppp%d: too short (%d)\n", sc->sc_if.if_unit, ilen);
 		s = spltty();
 		sc->sc_if.if_ierrors++;
 		sc->sc_stats.ppp_ierrors++;
 		sc->sc_flags |= SC_PKTLOST;
 		splx(s);
 	    }
 	    return 0;
 	}
 
 	/*
 	 * Remove FCS trailer.  Somewhat painful...
 	 */
 	ilen -= 2;
 	if (--sc->sc_mc->m_len == 0) {
 	    for (m = sc->sc_m; m->m_next != sc->sc_mc; m = m->m_next)
 		;
 	    sc->sc_mc = m;
 	}
 	sc->sc_mc->m_len--;
 
 	/* excise this mbuf chain */
 	m = sc->sc_m;
 	sc->sc_m = sc->sc_mc->m_next;
 	sc->sc_mc->m_next = NULL;
 
 	ppppktin(sc, m, sc->sc_flags & SC_PKTLOST);
 	if (sc->sc_flags & SC_PKTLOST) {
 	    s = spltty();
 	    sc->sc_flags &= ~SC_PKTLOST;
 	    splx(s);
 	}
 
 	pppgetm(sc);
 	return 0;
     }
 
     if (sc->sc_flags & SC_FLUSH) {
 	if (sc->sc_flags & SC_LOG_FLUSH)
 	    ppplogchar(sc, c);
 	return 0;
     }
 
     if (c < 0x20 && (sc->sc_rasyncmap & (1 << c)))
 	return 0;
 
     s = spltty();
     if (sc->sc_flags & SC_ESCAPED) {
 	sc->sc_flags &= ~SC_ESCAPED;
 	c ^= PPP_TRANS;
     } else if (c == PPP_ESCAPE) {
 	sc->sc_flags |= SC_ESCAPED;
 	splx(s);
 	return 0;
     }
     splx(s);
 
     /*
      * Initialize buffer on first octet received.
      * First octet could be address or protocol (when compressing
      * address/control).
      * Second octet is control.
      * Third octet is first or second (when compressing protocol)
      * octet of protocol.
      * Fourth octet is second octet of protocol.
      */
     if (sc->sc_ilen == 0) {
 	/* reset the first input mbuf */
 	if (sc->sc_m == NULL) {
 	    pppgetm(sc);
 	    if (sc->sc_m == NULL) {
 		if (sc->sc_flags & SC_DEBUG)
 		    printf("ppp%d: no input mbufs!\n", sc->sc_if.if_unit);
 		goto flush;
 	    }
 	}
 	m = sc->sc_m;
 	m->m_len = 0;
 	m->m_data = M_DATASTART(sc->sc_m);
 	sc->sc_mc = m;
 	sc->sc_mp = mtod(m, char *);
 	sc->sc_fcs = PPP_INITFCS;
 	if (c != PPP_ALLSTATIONS) {
 	    if (sc->sc_flags & SC_REJ_COMP_AC) {
 		if (sc->sc_flags & SC_DEBUG)
 		    printf("ppp%d: garbage received: 0x%x (need 0xFF)\n",
 			   sc->sc_if.if_unit, c);
 		goto flush;
 	    }
 	    *sc->sc_mp++ = PPP_ALLSTATIONS;
 	    *sc->sc_mp++ = PPP_UI;
 	    sc->sc_ilen += 2;
 	    m->m_len += 2;
 	}
     }
     if (sc->sc_ilen == 1 && c != PPP_UI) {
 	if (sc->sc_flags & SC_DEBUG)
 	    printf("ppp%d: missing UI (0x3), got 0x%x\n",
 		   sc->sc_if.if_unit, c);
 	goto flush;
     }
     if (sc->sc_ilen == 2 && (c & 1) == 1) {
 	/* a compressed protocol */
 	*sc->sc_mp++ = 0;
 	sc->sc_ilen++;
 	sc->sc_mc->m_len++;
     }
     if (sc->sc_ilen == 3 && (c & 1) == 0) {
 	if (sc->sc_flags & SC_DEBUG)
 	    printf("ppp%d: bad protocol %x\n", sc->sc_if.if_unit,
 		   (sc->sc_mp[-1] << 8) + c);
 	goto flush;
     }
 
     /* packet beyond configured mru? */
     if (++sc->sc_ilen > sc->sc_mru + PPP_HDRLEN + PPP_FCSLEN) {
 	if (sc->sc_flags & SC_DEBUG)
 	    printf("ppp%d: packet too big\n", sc->sc_if.if_unit);
 	goto flush;
     }
 
     /* is this mbuf full? */
     m = sc->sc_mc;
     if (M_TRAILINGSPACE(m) <= 0) {
 	if (m->m_next == NULL) {
 	    pppgetm(sc);
 	    if (m->m_next == NULL) {
 		if (sc->sc_flags & SC_DEBUG)
 		    printf("ppp%d: too few input mbufs!\n", sc->sc_if.if_unit);
 		goto flush;
 	    }
 	}
 	sc->sc_mc = m = m->m_next;
 	m->m_len = 0;
 	m->m_data = M_DATASTART(m);
 	sc->sc_mp = mtod(m, char *);
     }
 
     ++m->m_len;
     *sc->sc_mp++ = c;
     sc->sc_fcs = PPP_FCS(sc->sc_fcs, c);
     return 0;
 
  flush:
     if (!(sc->sc_flags & SC_FLUSH)) {
 	s = spltty();
 	sc->sc_if.if_ierrors++;
 	sc->sc_stats.ppp_ierrors++;
 	sc->sc_flags |= SC_FLUSH;
 	splx(s);
 	if (sc->sc_flags & SC_LOG_FLUSH)
 	    ppplogchar(sc, c);
     }
     return 0;
 }
 
 #define MAX_DUMP_BYTES	128
 
 static void
 ppplogchar(sc, c)
     struct ppp_softc *sc;
     int c;
 {
     if (c >= 0)
 	sc->sc_rawin[sc->sc_rawin_count++] = c;
     if (sc->sc_rawin_count >= sizeof(sc->sc_rawin)
 	|| (c < 0 && sc->sc_rawin_count > 0)) {
 	printf("ppp%d input: %*D", sc->sc_if.if_unit,
 		sc->sc_rawin_count, sc->sc_rawin, " ");
 	sc->sc_rawin_count = 0;
     }
 }
 
 #endif	/* NPPP > 0 */
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 849e26f052e2..6ea3795586b7 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -1,888 +1,890 @@
 /*
  * Copyright (c) 1982, 1986, 1991, 1993, 1995
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)in_pcb.c	8.4 (Berkeley) 5/24/95
- *	$Id: in_pcb.c,v 1.40 1998/03/24 18:06:08 wollman Exp $
+ *	$Id: in_pcb.c,v 1.41 1998/03/28 10:18:21 bde Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/protosw.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 
+#include <machine/limits.h>
+
 #include <vm/vm_zone.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #include <netinet/in_var.h>
 #include <netinet/ip_var.h>
 
 struct	in_addr zeroin_addr;
 
 static void	in_pcbremlists __P((struct inpcb *));
 static void	in_rtchange __P((struct inpcb *, int));
 
 /*
  * These configure the range of local port addresses assigned to
  * "unspecified" outgoing connections/packets/whatever.
  */
 static int ipport_lowfirstauto  = IPPORT_RESERVED - 1;	/* 1023 */
 static int ipport_lowlastauto = IPPORT_RESERVEDSTART;	/* 600 */
 static int ipport_firstauto = IPPORT_RESERVED;		/* 1024 */
 static int ipport_lastauto  = IPPORT_USERRESERVED;	/* 5000 */
 static int ipport_hifirstauto = IPPORT_HIFIRSTAUTO;	/* 40000 */
 static int ipport_hilastauto  = IPPORT_HILASTAUTO;	/* 44999 */
 
 #define RANGECHK(var, min, max) \
 	if ((var) < (min)) { (var) = (min); } \
 	else if ((var) > (max)) { (var) = (max); }
 
 static int
 sysctl_net_ipport_check SYSCTL_HANDLER_ARGS
 {
 	int error = sysctl_handle_int(oidp,
 		oidp->oid_arg1, oidp->oid_arg2, req);
 	if (!error) {
 		RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
 		RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
 		RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
 		RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
 		RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
 	}
 	return error;
 }
 
 #undef RANGECHK
 
 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
 
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
 	   &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
 
 /*
  * in_pcb.c: manage the Protocol Control Blocks.
  *
  * NOTE: It is assumed that most of these functions will be called at
  * splnet(). XXX - There are, unfortunately, a few exceptions to this
  * rule that should be fixed.
  */
 
 /*
  * Allocate a PCB and associate it with the socket.
  */
 int
 in_pcballoc(so, pcbinfo, p)
 	struct socket *so;
 	struct inpcbinfo *pcbinfo;
 	struct proc *p;
 {
 	register struct inpcb *inp;
 
 	inp = zalloci(pcbinfo->ipi_zone);
 	if (inp == NULL)
 		return (ENOBUFS);
 	bzero((caddr_t)inp, sizeof(*inp));
 	inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
 	inp->inp_pcbinfo = pcbinfo;
 	inp->inp_socket = so;
 	LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
 	pcbinfo->ipi_count++;
 	so->so_pcb = (caddr_t)inp;
 	return (0);
 }
 
 int
 in_pcbbind(inp, nam, p)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	register struct socket *so = inp->inp_socket;
 	unsigned short *lastport;
 	struct sockaddr_in *sin;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	u_short lport = 0;
 	int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
 	int error;
 
 	if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
 		return (EADDRNOTAVAIL);
 	if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY)
 		return (EINVAL);
 	if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
 		wild = 1;
 	if (nam) {
 		sin = (struct sockaddr_in *)nam;
 		if (nam->sa_len != sizeof (*sin))
 			return (EINVAL);
 #ifdef notdef
 		/*
 		 * We should check the family, but old programs
 		 * incorrectly fail to initialize it.
 		 */
 		if (sin->sin_family != AF_INET)
 			return (EAFNOSUPPORT);
 #endif
 		lport = sin->sin_port;
 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
 			/*
 			 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
 			 * allow complete duplication of binding if
 			 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
 			 * and a multicast address is bound on both
 			 * new and duplicated sockets.
 			 */
 			if (so->so_options & SO_REUSEADDR)
 				reuseport = SO_REUSEADDR|SO_REUSEPORT;
 		} else if (sin->sin_addr.s_addr != INADDR_ANY) {
 			sin->sin_port = 0;		/* yech... */
 			if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
 				return (EADDRNOTAVAIL);
 		}
 		if (lport) {
 			struct inpcb *t;
 
 			/* GROSS */
 			if (ntohs(lport) < IPPORT_RESERVED && p &&
 			    suser(p->p_ucred, &p->p_acflag))
 				return (EACCES);
 			if (so->so_uid) {
 				t = in_pcblookup_local(inp->inp_pcbinfo,
 				    sin->sin_addr, lport, INPLOOKUP_WILDCARD);
 				if (t && (so->so_uid != t->inp_socket->so_uid))
 					return (EADDRINUSE);
 			}
 			t = in_pcblookup_local(pcbinfo, sin->sin_addr,
 			    lport, wild);
 			if (t && (reuseport & t->inp_socket->so_options) == 0)
 				return (EADDRINUSE);
 		}
 		inp->inp_laddr = sin->sin_addr;
 	}
 	if (lport == 0) {
 		ushort first, last;
 		int count;
 
 		inp->inp_flags |= INP_ANONPORT;
 
 		if (inp->inp_flags & INP_HIGHPORT) {
 			first = ipport_hifirstauto;	/* sysctl */
 			last  = ipport_hilastauto;
 			lastport = &pcbinfo->lasthi;
 		} else if (inp->inp_flags & INP_LOWPORT) {
 			if (p && (error = suser(p->p_ucred, &p->p_acflag)))
 				return error;
 			first = ipport_lowfirstauto;	/* 1023 */
 			last  = ipport_lowlastauto;	/* 600 */
 			lastport = &pcbinfo->lastlow;
 		} else {
 			first = ipport_firstauto;	/* sysctl */
 			last  = ipport_lastauto;
 			lastport = &pcbinfo->lastport;
 		}
 		/*
 		 * Simple check to ensure all ports are not used up causing
 		 * a deadlock here.
 		 *
 		 * We split the two cases (up and down) so that the direction
 		 * is not being tested on each round of the loop.
 		 */
 		if (first > last) {
 			/*
 			 * counting down
 			 */
 			count = first - last;
 
 			do {
 				if (count-- < 0) {	/* completely used? */
 					/*
 					 * Undo any address bind that may have
 					 * occurred above.
 					 */
 					inp->inp_laddr.s_addr = INADDR_ANY;
 					return (EAGAIN);
 				}
 				--*lastport;
 				if (*lastport > first || *lastport < last)
 					*lastport = first;
 				lport = htons(*lastport);
 			} while (in_pcblookup_local(pcbinfo,
 				 inp->inp_laddr, lport, wild));
 		} else {
 			/*
 			 * counting up
 			 */
 			count = last - first;
 
 			do {
 				if (count-- < 0) {	/* completely used? */
 					/*
 					 * Undo any address bind that may have
 					 * occurred above.
 					 */
 					inp->inp_laddr.s_addr = INADDR_ANY;
 					return (EAGAIN);
 				}
 				++*lastport;
 				if (*lastport < first || *lastport > last)
 					*lastport = first;
 				lport = htons(*lastport);
 			} while (in_pcblookup_local(pcbinfo,
 				 inp->inp_laddr, lport, wild));
 		}
 	}
 	inp->inp_lport = lport;
 	if (in_pcbinshash(inp) != 0) {
 		inp->inp_laddr.s_addr = INADDR_ANY;
 		inp->inp_lport = 0;
 		return (EAGAIN);
 	}
 	return (0);
 }
 
 /*
  *   Transform old in_pcbconnect() into an inner subroutine for new
  *   in_pcbconnect(): Do some validity-checking on the remote
  *   address (in mbuf 'nam') and then determine local host address
  *   (i.e., which interface) to use to access that remote host.
  *
  *   This preserves definition of in_pcbconnect(), while supporting a
  *   slightly different version for T/TCP.  (This is more than
  *   a bit of a kludge, but cleaning up the internal interfaces would
  *   have forced minor changes in every protocol).
  */
 
 int
 in_pcbladdr(inp, nam, plocal_sin)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct sockaddr_in **plocal_sin;
 {
 	struct in_ifaddr *ia;
 	register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
 
 	if (nam->sa_len != sizeof (*sin))
 		return (EINVAL);
 	if (sin->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 	if (sin->sin_port == 0)
 		return (EADDRNOTAVAIL);
 	if (!TAILQ_EMPTY(&in_ifaddrhead)) {
 		/*
 		 * If the destination address is INADDR_ANY,
 		 * use the primary local address.
 		 * If the supplied address is INADDR_BROADCAST,
 		 * and the primary interface supports broadcast,
 		 * choose the broadcast address for that interface.
 		 */
 #define	satosin(sa)	((struct sockaddr_in *)(sa))
 #define sintosa(sin)	((struct sockaddr *)(sin))
 #define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
 		if (sin->sin_addr.s_addr == INADDR_ANY)
 		    sin->sin_addr = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr;
 		else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST &&
 		  (in_ifaddrhead.tqh_first->ia_ifp->if_flags & IFF_BROADCAST))
 		    sin->sin_addr = satosin(&in_ifaddrhead.tqh_first->ia_broadaddr)->sin_addr;
 	}
 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
 		register struct route *ro;
 
 		ia = (struct in_ifaddr *)0;
 		/*
 		 * If route is known or can be allocated now,
 		 * our src addr is taken from the i/f, else punt.
 		 */
 		ro = &inp->inp_route;
 		if (ro->ro_rt &&
 		    (satosin(&ro->ro_dst)->sin_addr.s_addr !=
 			sin->sin_addr.s_addr ||
 		    inp->inp_socket->so_options & SO_DONTROUTE)) {
 			RTFREE(ro->ro_rt);
 			ro->ro_rt = (struct rtentry *)0;
 		}
 		if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
 		    (ro->ro_rt == (struct rtentry *)0 ||
 		    ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
 			/* No route yet, so try to acquire one */
 			ro->ro_dst.sa_family = AF_INET;
 			ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
 			((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
 				sin->sin_addr;
 			rtalloc(ro);
 		}
 		/*
 		 * If we found a route, use the address
 		 * corresponding to the outgoing interface
 		 * unless it is the loopback (in case a route
 		 * to our address on another net goes to loopback).
 		 */
 		if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
 			ia = ifatoia(ro->ro_rt->rt_ifa);
 		if (ia == 0) {
 			u_short fport = sin->sin_port;
 
 			sin->sin_port = 0;
 			ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin)));
 			if (ia == 0)
 				ia = ifatoia(ifa_ifwithnet(sintosa(sin)));
 			sin->sin_port = fport;
 			if (ia == 0)
 				ia = in_ifaddrhead.tqh_first;
 			if (ia == 0)
 				return (EADDRNOTAVAIL);
 		}
 		/*
 		 * If the destination address is multicast and an outgoing
 		 * interface has been set as a multicast option, use the
 		 * address of that interface as our source address.
 		 */
 		if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
 		    inp->inp_moptions != NULL) {
 			struct ip_moptions *imo;
 			struct ifnet *ifp;
 
 			imo = inp->inp_moptions;
 			if (imo->imo_multicast_ifp != NULL) {
 				ifp = imo->imo_multicast_ifp;
 				for (ia = in_ifaddrhead.tqh_first; ia; 
 				     ia = ia->ia_link.tqe_next)
 					if (ia->ia_ifp == ifp)
 						break;
 				if (ia == 0)
 					return (EADDRNOTAVAIL);
 			}
 		}
 	/*
 	 * Don't do pcblookup call here; return interface in plocal_sin
 	 * and exit to caller, that will do the lookup.
 	 */
 		*plocal_sin = &ia->ia_addr;
 
 	}
 	return(0);
 }
 
 /*
  * Outer subroutine:
  * Connect from a socket to a specified address.
  * Both address and port must be specified in argument sin.
  * If don't have a local address for this socket yet,
  * then pick one.
  */
 int
 in_pcbconnect(inp, nam, p)
 	register struct inpcb *inp;
 	struct sockaddr *nam;
 	struct proc *p;
 {
 	struct sockaddr_in *ifaddr;
 	register struct sockaddr_in *sin = (struct sockaddr_in *)nam;
 	int error;
 
 	/*
 	 *   Call inner routine, to assign local interface address.
 	 */
 	if (error = in_pcbladdr(inp, nam, &ifaddr))
 		return(error);
 
 	if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port,
 	    inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr,
 	    inp->inp_lport, 0) != NULL) {
 		return (EADDRINUSE);
 	}
 	if (inp->inp_laddr.s_addr == INADDR_ANY) {
 		if (inp->inp_lport == 0)
 			(void)in_pcbbind(inp, (struct sockaddr *)0, p);
 		inp->inp_laddr = ifaddr->sin_addr;
 	}
 	inp->inp_faddr = sin->sin_addr;
 	inp->inp_fport = sin->sin_port;
 	in_pcbrehash(inp);
 	return (0);
 }
 
 void
 in_pcbdisconnect(inp)
 	struct inpcb *inp;
 {
 
 	inp->inp_faddr.s_addr = INADDR_ANY;
 	inp->inp_fport = 0;
 	in_pcbrehash(inp);
 	if (inp->inp_socket->so_state & SS_NOFDREF)
 		in_pcbdetach(inp);
 }
 
 void
 in_pcbdetach(inp)
 	struct inpcb *inp;
 {
 	struct socket *so = inp->inp_socket;
 	struct inpcbinfo *ipi = inp->inp_pcbinfo;
 
 	inp->inp_gencnt = ++ipi->ipi_gencnt;
 	in_pcbremlists(inp);
 	so->so_pcb = 0;
 	sofree(so);
 	if (inp->inp_options)
 		(void)m_free(inp->inp_options);
 	if (inp->inp_route.ro_rt)
 		rtfree(inp->inp_route.ro_rt);
 	ip_freemoptions(inp->inp_moptions);
 	zfreei(ipi->ipi_zone, inp);
 }
 
 /*
  * The calling convention of in_setsockaddr() and in_setpeeraddr() was
  * modified to match the pru_sockaddr() and pru_peeraddr() entry points
  * in struct pr_usrreqs, so that protocols can just reference then directly
  * without the need for a wrapper function.  The socket must have a valid
  * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
  * except through a kernel programming error, so it is acceptable to panic
  * (or in this case trap) if the PCB is invalid.  (Actually, we don't trap
  * because there actually /is/ a programming error somewhere... XXX)
  */
 int
 in_setsockaddr(so, nam)
 	struct socket *so;
 	struct sockaddr **nam;
 {
 	int s;
 	register struct inpcb *inp;
 	register struct sockaddr_in *sin;
 
 	/*
 	 * Do the malloc first in case it blocks.
 	 */
 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
 	bzero(sin, sizeof *sin);
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 
 	s = splnet();
 	inp = sotoinpcb(so);
 	if (!inp) {
 		splx(s);
 		free(sin, M_SONAME);
 		return EINVAL;
 	}
 	sin->sin_port = inp->inp_lport;
 	sin->sin_addr = inp->inp_laddr;
 	splx(s);
 
 	*nam = (struct sockaddr *)sin;
 	return 0;
 }
 
 int
 in_setpeeraddr(so, nam)
 	struct socket *so;
 	struct sockaddr **nam;
 {
 	int s;
 	struct inpcb *inp;
 	register struct sockaddr_in *sin;
 
 	/*
 	 * Do the malloc first in case it blocks.
 	 */
 	MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, M_WAITOK);
 	bzero((caddr_t)sin, sizeof (*sin));
 	sin->sin_family = AF_INET;
 	sin->sin_len = sizeof(*sin);
 
 	s = splnet();
 	inp = sotoinpcb(so);
 	if (!inp) {
 		splx(s);
 		free(sin, M_SONAME);
 		return EINVAL;
 	}
 	sin->sin_port = inp->inp_fport;
 	sin->sin_addr = inp->inp_faddr;
 	splx(s);
 
 	*nam = (struct sockaddr *)sin;
 	return 0;
 }
 
 /*
  * Pass some notification to all connections of a protocol
  * associated with address dst.  The local address and/or port numbers
  * may be specified to limit the search.  The "usual action" will be
  * taken, depending on the ctlinput cmd.  The caller must filter any
  * cmds that are uninteresting (e.g., no error in the map).
  * Call the protocol specific routine (if any) to report
  * any errors for each matching socket.
  */
 void
 in_pcbnotify(head, dst, fport_arg, laddr, lport_arg, cmd, notify)
 	struct inpcbhead *head;
 	struct sockaddr *dst;
 	u_int fport_arg, lport_arg;
 	struct in_addr laddr;
 	int cmd;
 	void (*notify) __P((struct inpcb *, int));
 {
 	register struct inpcb *inp, *oinp;
 	struct in_addr faddr;
 	u_short fport = fport_arg, lport = lport_arg;
 	int errno, s;
 
 	if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET)
 		return;
 	faddr = ((struct sockaddr_in *)dst)->sin_addr;
 	if (faddr.s_addr == INADDR_ANY)
 		return;
 
 	/*
 	 * Redirects go to all references to the destination,
 	 * and use in_rtchange to invalidate the route cache.
 	 * Dead host indications: notify all references to the destination.
 	 * Otherwise, if we have knowledge of the local port and address,
 	 * deliver only to that socket.
 	 */
 	if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) {
 		fport = 0;
 		lport = 0;
 		laddr.s_addr = 0;
 		if (cmd != PRC_HOSTDEAD)
 			notify = in_rtchange;
 	}
 	errno = inetctlerrmap[cmd];
 	s = splnet();
 	for (inp = head->lh_first; inp != NULL;) {
 		if (inp->inp_faddr.s_addr != faddr.s_addr ||
 		    inp->inp_socket == 0 ||
 		    (lport && inp->inp_lport != lport) ||
 		    (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) ||
 		    (fport && inp->inp_fport != fport)) {
 			inp = inp->inp_list.le_next;
 			continue;
 		}
 		oinp = inp;
 		inp = inp->inp_list.le_next;
 		if (notify)
 			(*notify)(oinp, errno);
 	}
 	splx(s);
 }
 
 /*
  * Check for alternatives when higher level complains
  * about service problems.  For now, invalidate cached
  * routing information.  If the route was created dynamically
  * (by a redirect), time to try a default gateway again.
  */
 void
 in_losing(inp)
 	struct inpcb *inp;
 {
 	register struct rtentry *rt;
 	struct rt_addrinfo info;
 
 	if ((rt = inp->inp_route.ro_rt)) {
 		inp->inp_route.ro_rt = 0;
 		bzero((caddr_t)&info, sizeof(info));
 		info.rti_info[RTAX_DST] =
 			(struct sockaddr *)&inp->inp_route.ro_dst;
 		info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
 		info.rti_info[RTAX_NETMASK] = rt_mask(rt);
 		rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
 		if (rt->rt_flags & RTF_DYNAMIC)
 			(void) rtrequest(RTM_DELETE, rt_key(rt),
 				rt->rt_gateway, rt_mask(rt), rt->rt_flags,
 				(struct rtentry **)0);
 		else
 		/*
 		 * A new route can be allocated
 		 * the next time output is attempted.
 		 */
 			rtfree(rt);
 	}
 }
 
 /*
  * After a routing change, flush old routing
  * and allocate a (hopefully) better one.
  */
 static void
 in_rtchange(inp, errno)
 	register struct inpcb *inp;
 	int errno;
 {
 	if (inp->inp_route.ro_rt) {
 		rtfree(inp->inp_route.ro_rt);
 		inp->inp_route.ro_rt = 0;
 		/*
 		 * A new route can be allocated the next time
 		 * output is attempted.
 		 */
 	}
 }
 
 /*
  * Lookup a PCB based on the local address and port.
  */
 struct inpcb *
 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
 	struct inpcbinfo *pcbinfo;
 	struct in_addr laddr;
 	u_int lport_arg;
 	int wild_okay;
 {
 	register struct inpcb *inp, *match = NULL;
 	int matchwild = 3, wildcard;
 	u_short lport = lport_arg;
 
 	if (!wild_okay) {
 		struct inpcbhead *head;
 		/*
 		 * Look for an unconnected (wildcard foreign addr) PCB that
 		 * matches the local address and port we're looking for.
 		 */
 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
 		for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_laddr.s_addr == laddr.s_addr &&
 			    inp->inp_lport == lport) {
 				/*
 				 * Found.
 				 */
 				return (inp);
 			}
 		}
 		/*
 		 * Not found.
 		 */
 		return (NULL);
 	} else {
 		struct inpcbporthead *porthash;
 		struct inpcbport *phd;
 		struct inpcb *match = NULL;
 		/*
 		 * Best fit PCB lookup.
 		 *
 		 * First see if this local port is in use by looking on the
 		 * port hash list.
 		 */
 		porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
 		    pcbinfo->porthashmask)];
 		for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
 			if (phd->phd_port == lport)
 				break;
 		}
 		if (phd != NULL) {
 			/*
 			 * Port is in use by one or more PCBs. Look for best
 			 * fit.
 			 */
 			for (inp = phd->phd_pcblist.lh_first; inp != NULL;
 			    inp = inp->inp_portlist.le_next) {
 				wildcard = 0;
 				if (inp->inp_faddr.s_addr != INADDR_ANY)
 					wildcard++;
 				if (inp->inp_laddr.s_addr != INADDR_ANY) {
 					if (laddr.s_addr == INADDR_ANY)
 						wildcard++;
 					else if (inp->inp_laddr.s_addr != laddr.s_addr)
 						continue;
 				} else {
 					if (laddr.s_addr != INADDR_ANY)
 						wildcard++;
 				}
 				if (wildcard < matchwild) {
 					match = inp;
 					matchwild = wildcard;
 					if (matchwild == 0) {
 						break;
 					}
 				}
 			}
 		}
 		return (match);
 	}
 }
 
 /*
  * Lookup PCB in hash list.
  */
 struct inpcb *
 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard)
 	struct inpcbinfo *pcbinfo;
 	struct in_addr faddr, laddr;
 	u_int fport_arg, lport_arg;
 	int wildcard;
 {
 	struct inpcbhead *head;
 	register struct inpcb *inp;
 	u_short fport = fport_arg, lport = lport_arg;
 
 	/*
 	 * First look for an exact match.
 	 */
 	head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
 	for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
 		if (inp->inp_faddr.s_addr == faddr.s_addr &&
 		    inp->inp_laddr.s_addr == laddr.s_addr &&
 		    inp->inp_fport == fport &&
 		    inp->inp_lport == lport) {
 			/*
 			 * Found.
 			 */
 			return (inp);
 		}
 	}
 	if (wildcard) {
 		struct inpcb *local_wild = NULL;
 
 		head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
 		for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) {
 			if (inp->inp_faddr.s_addr == INADDR_ANY &&
 			    inp->inp_lport == lport) {
 				if (inp->inp_laddr.s_addr == laddr.s_addr)
 					return (inp);
 				else if (inp->inp_laddr.s_addr == INADDR_ANY)
 					local_wild = inp;
 			}
 		}
 		return (local_wild);
 	}
 
 	/*
 	 * Not found.
 	 */
 	return (NULL);
 }
 
 /*
  * Insert PCB onto various hash lists.
  */
 int
 in_pcbinshash(inp)
 	struct inpcb *inp;
 {
 	struct inpcbhead *pcbhash;
 	struct inpcbporthead *pcbporthash;
 	struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
 	struct inpcbport *phd;
 
 	pcbhash = &pcbinfo->hashbase[INP_PCBHASH(inp->inp_faddr.s_addr,
 		 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
 
 	pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
 	    pcbinfo->porthashmask)];
 
 	/*
 	 * Go through port list and look for a head for this lport.
 	 */
 	for (phd = pcbporthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) {
 		if (phd->phd_port == inp->inp_lport)
 			break;
 	}
 	/*
 	 * If none exists, malloc one and tack it on.
 	 */
 	if (phd == NULL) {
 		MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
 		if (phd == NULL) {
 			return (ENOBUFS); /* XXX */
 		}
 		phd->phd_port = inp->inp_lport;
 		LIST_INIT(&phd->phd_pcblist);
 		LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
 	}
 	inp->inp_phd = phd;
 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 	return (0);
 }
 
 /*
  * Move PCB to the proper hash bucket when { faddr, fport } have  been
  * changed. NOTE: This does not handle the case of the lport changing (the
  * hashed port list would have to be updated as well), so the lport must
  * not change after in_pcbinshash() has been called.
  */
 void
 in_pcbrehash(inp)
 	struct inpcb *inp;
 {
 	struct inpcbhead *head;
 
 	head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(inp->inp_faddr.s_addr,
 		inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
 
 	LIST_REMOVE(inp, inp_hash);
 	LIST_INSERT_HEAD(head, inp, inp_hash);
 }
 
 /*
  * Remove PCB from various lists.
  */
 static void
 in_pcbremlists(inp)
 	struct inpcb *inp;
 {
 	if (inp->inp_lport) {
 		struct inpcbport *phd = inp->inp_phd;
 
 		LIST_REMOVE(inp, inp_hash);
 		LIST_REMOVE(inp, inp_portlist);
 		if (phd->phd_pcblist.lh_first == NULL) {
 			LIST_REMOVE(phd, phd_hash);
 			free(phd, M_PCB);
 		}
 	}
 	LIST_REMOVE(inp, inp_list);
 	inp->inp_pcbinfo->ipi_count--;
 }
diff --git a/sys/nfs/bootp_subr.c b/sys/nfs/bootp_subr.c
index 7dbcd9e011cc..1cc0ec821299 100644
--- a/sys/nfs/bootp_subr.c
+++ b/sys/nfs/bootp_subr.c
@@ -1,1266 +1,1267 @@
-/*	$Id: bootp_subr.c,v 1.10 1998/03/14 03:25:14 tegge Exp $	*/
+/*	$Id: bootp_subr.c,v 1.11 1998/03/14 04:13:56 tegge Exp $	*/
 
 /*
  * Copyright (c) 1995 Gordon Ross, Adam Glass
  * Copyright (c) 1992 Regents of the University of California.
  * All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * based on:
  *      nfs/krpc_subr.c
  *	$NetBSD: krpc_subr.c,v 1.10 1995/08/08 20:43:43 gwr Exp $
  */
 
 #include "opt_bootp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sockio.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
+#include <sys/uio.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsdiskless.h>
 #include <nfs/krpc.h>
 #include <nfs/xdr_subs.h>
 
 
 #define BOOTP_MIN_LEN		300	/* Minimum size of bootp udp packet */
 
 /*
  * What is the longest we will wait before re-sending a request?
  * Note this is also the frequency of "RPC timeout" messages.
  * The re-send loop count sup linearly to this maximum, so the
  * first complaint will happen after (1+2+3+4+5)=15 seconds.
  */
 #define	MAX_RESEND_DELAY 5	/* seconds */
 
 /* Definitions from RFC951 */
 struct bootp_packet {
   u_int8_t op;
   u_int8_t htype;
   u_int8_t hlen;
   u_int8_t hops;
   u_int32_t xid;
   u_int16_t secs;
   u_int16_t flags;
   struct in_addr ciaddr;
   struct in_addr yiaddr;
   struct in_addr siaddr;
   struct in_addr giaddr;
   unsigned char chaddr[16];
   char sname[64];
   char file[128];
   unsigned char vend[256];
 };
 
 #define IPPORT_BOOTPC 68
 #define IPPORT_BOOTPS 67
 
 extern int nfs_diskless_valid;
 extern struct nfsv3_diskless nfsv3_diskless;
 
 /* mountd RPC */
 static int md_mount __P((struct sockaddr_in *mdsin, char *path,
 	u_char *fhp, int *fhsizep, struct nfs_args *args,struct proc *procp));
 static int md_lookup_swap __P((struct sockaddr_in *mdsin,char *path,
 			       u_char *fhp, int *fhsizep, 
 			       struct nfs_args *args,
 			       struct proc *procp));
 static int setfs __P((struct sockaddr_in *addr, char *path, char *p));
 static int getdec __P((char **ptr));
 static char *substr __P((char *a,char *b));
 static void mountopts __P((struct nfs_args *args, char *p)); 
 static int xdr_opaque_decode __P((struct mbuf **ptr,u_char *buf,
 				  int len));
 static int xdr_int_decode __P((struct mbuf **ptr,int *iptr));
 static void printip __P((char *prefix,struct in_addr addr));
 
 #ifdef BOOTP_DEBUG
 void bootpboot_p_sa(struct sockaddr *sa,struct sockaddr *ma);
 void bootpboot_p_ma(struct sockaddr *ma);
 void bootpboot_p_rtentry(struct rtentry *rt);
 void bootpboot_p_tree(struct radix_node *rn);
 void bootpboot_p_rtlist(void);
 void bootpboot_p_iflist(void);
 #endif
 
 static int  bootpc_call(struct bootp_packet *call,
 			struct bootp_packet *reply,
 			struct proc *procp);
 
 static int bootpc_fakeup_interface(struct ifreq *ireq,
 			struct socket *so,
 			struct proc *procp);
 
 static int 
 bootpc_adjust_interface(struct ifreq *ireq,struct socket *so,
 			struct sockaddr_in *myaddr,
 			struct sockaddr_in *netmask,
 			struct sockaddr_in *gw,
 			struct proc *procp);
 
 void bootpc_init(void);
 
 #ifdef BOOTP_DEBUG
 void bootpboot_p_sa(sa,ma)
      struct sockaddr *sa;
      struct sockaddr *ma;
 {
   if (!sa) {
     printf("(sockaddr *) <null>");
     return;
   }
   switch (sa->sa_family) {
   case AF_INET:
     {
       struct sockaddr_in *sin = (struct sockaddr_in *) sa;
       printf("inet %x",ntohl(sin->sin_addr.s_addr));
       if (ma) {
 	struct sockaddr_in *sin = (struct sockaddr_in *) ma;
 	printf(" mask %x",ntohl(sin->sin_addr.s_addr));
       }
     }
   break;
   case AF_LINK:
     {
       struct sockaddr_dl *sli = (struct sockaddr_dl *) sa;
       int i;
       printf("link %.*s ",sli->sdl_nlen,sli->sdl_data);
       for (i=0;i<sli->sdl_alen;i++) {
 	if (i>0)
 	  printf(":");
 	printf("%x",(unsigned char) sli->sdl_data[i+sli->sdl_nlen]);
       }
     }
   break;
   default:
     printf("af%d",sa->sa_family);
   }
 }
 
 void bootpboot_p_ma(ma)
      struct sockaddr *ma;
 {
   if (!ma) {
     printf("<null>");
     return;
   }
   printf("%x",*(int*)ma);
 }
 
 void bootpboot_p_rtentry(rt)
      struct rtentry *rt;
 {
   bootpboot_p_sa(rt_key(rt),rt_mask(rt));
   printf(" ");
   bootpboot_p_ma(rt->rt_genmask);
   printf(" ");
   bootpboot_p_sa(rt->rt_gateway,NULL);
   printf(" ");
   printf("flags %x",(unsigned short) rt->rt_flags);
   printf(" %d",rt->rt_rmx.rmx_expire);
   printf(" %s%d\n",rt->rt_ifp->if_name,rt->rt_ifp->if_unit);
 }
 void  bootpboot_p_tree(rn)
      struct radix_node *rn;
 {
   while (rn) {
     if (rn->rn_b < 0) {
       if (rn->rn_flags & RNF_ROOT) {
       } else {
 	bootpboot_p_rtentry((struct rtentry *) rn);
       }
       rn = rn->rn_dupedkey;
     } else {
       bootpboot_p_tree(rn->rn_l);
       bootpboot_p_tree(rn->rn_r);
       return;
     }
     
   }
 }
 
 void bootpboot_p_rtlist(void)
 {
   printf("Routing table:\n");
   bootpboot_p_tree(rt_tables[AF_INET]->rnh_treetop);
 }
 
 void bootpboot_p_iflist(void)
 {
   struct ifnet *ifp;
   struct ifaddr *ifa;
   printf("Interface list:\n");
   for (ifp = TAILQ_FIRST(&ifnet); ifp != 0; ifp = TAILQ_NEXT(ifp,if_link))
     {
       for (ifa = TAILQ_FIRST(&ifp->if_addrhead) ;ifa; 
 	   ifa=TAILQ_NEXT(ifa,ifa_link))
 	if (ifa->ifa_addr->sa_family == AF_INET ) {
 	  printf("%s%d flags %x, addr %x, bcast %x, net %x\n",
 		 ifp->if_name,ifp->if_unit,
 		 (unsigned short) ifp->if_flags,
 		 ntohl(((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr),
 		 ntohl(((struct sockaddr_in *) ifa->ifa_dstaddr)->sin_addr.s_addr),
 		 ntohl(((struct sockaddr_in *) ifa->ifa_netmask)->sin_addr.s_addr)
 		 );
 	}
     }
 }
 #endif
 
 static int
 bootpc_call(call,reply,procp)
      struct bootp_packet *call;
      struct bootp_packet *reply;	/* output */
      struct proc *procp;
 {
 	struct socket *so;
 	struct sockaddr_in *sin, sa;
 	struct mbuf *m;
 	struct uio auio;
 	struct iovec aio;
 	int error, rcvflg, timo, secs, len;
 	u_int tport;
 
 	/*
 	 * Create socket and set its recieve timeout.
 	 */
 	if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0,procp)))
 		goto out;
 
 	m = m_get(M_WAIT, MT_SOOPTS);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto out;
 	} else {
 		struct timeval *tv;
 		tv = mtod(m, struct timeval *);
 		m->m_len = sizeof(*tv);
 		tv->tv_sec = 1;
 		tv->tv_usec = 0;
 		if ((error = sosetopt(so, SOL_SOCKET, SO_RCVTIMEO, m, procp)))
 			goto out;
 	}
 
 	/*
 	 * Enable broadcast.
 	 */
 	{
 		int *on;
 		m = m_get(M_WAIT, MT_SOOPTS);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto out;
 		}
 		on = mtod(m, int *);
 		m->m_len = sizeof(*on);
 		*on = 1;
 		if ((error = sosetopt(so, SOL_SOCKET, SO_BROADCAST, m, procp)))
 			goto out;
 	}
 
 	/*
 	 * Bind the local endpoint to a bootp client port.
 	 */
 	sin = &sa;
 	bzero(sin, sizeof *sin);
 	sin->sin_len = sizeof(*sin);
 	sin->sin_family = AF_INET;
 	sin->sin_addr.s_addr = INADDR_ANY;
 	sin->sin_port = htons(IPPORT_BOOTPC);
 	error = sobind(so, (struct sockaddr *)sin, procp);
 	if (error) {
 		printf("bind failed\n");
 		goto out;
 	}
 
 	/*
 	 * Setup socket address for the server.
 	 */
 	sin = &sa;
 	bzero(sin, sizeof *sin);
 	sin->sin_len = sizeof(*sin);
 	sin->sin_family = AF_INET;
 	sin->sin_addr.s_addr = INADDR_BROADCAST;
 	sin->sin_port = htons(IPPORT_BOOTPS);
 
 	/*
 	 * Send it, repeatedly, until a reply is received,
 	 * but delay each re-send by an increasing amount.
 	 * If the delay hits the maximum, start complaining.
 	 */
 	timo = 0;
 	for (;;) {
 		/* Send BOOTP request (or re-send). */
 		
 		aio.iov_base = (caddr_t) call;
 		aio.iov_len = sizeof(*call);
 		
 		auio.uio_iov = &aio;
 		auio.uio_iovcnt = 1;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_rw = UIO_WRITE;
 		auio.uio_offset = 0;
 		auio.uio_resid = sizeof(*call);
 		auio.uio_procp = procp;
 
 		error = sosend(so, (struct sockaddr *)sin, &auio, NULL, 
 			       NULL, 0, procp);
 		if (error) {
 			printf("bootpc_call: sosend: %d\n", error);
 			goto out;
 		}
 
 		/* Determine new timeout. */
 		if (timo < MAX_RESEND_DELAY)
 			timo++;
 		else
 			printf("BOOTP timeout for server 0x%x\n",
 			       ntohl(sin->sin_addr.s_addr));
 
 		/*
 		 * Wait for up to timo seconds for a reply.
 		 * The socket receive timeout was set to 1 second.
 		 */
 		secs = timo;
 		while (secs > 0) {
 			aio.iov_base = (caddr_t) reply;
 			aio.iov_len = sizeof(*reply);
 
 			auio.uio_iov = &aio;
 			auio.uio_iovcnt = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_offset = 0;
 			auio.uio_resid = sizeof(*reply);
 			auio.uio_procp = procp;
 			
 			rcvflg = 0;
 			error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg);
 			if (error == EWOULDBLOCK) {
 				secs--;
 				call->secs=htons(ntohs(call->secs)+1);
 				continue;
 			}
 			if (error)
 				goto out;
 			len = sizeof(*reply) - auio.uio_resid;
 
 			/* Do we have the required number of bytes ? */
 			if (len < BOOTP_MIN_LEN)
 				continue;
 
 			/* Is it the right reply? */
 			if (reply->op != 2)
 			  continue;
 
 			if (reply->xid != call->xid)
 				continue;
 
 			if (reply->hlen != call->hlen)
 			  continue;
 
 			if (bcmp(reply->chaddr,call->chaddr,call->hlen))
 			  continue;
 
 			goto gotreply;	/* break two levels */
 
 		} /* while secs */
 	} /* forever send/receive */
 
 	error = ETIMEDOUT;
 	goto out;
 
  gotreply:
  out:
 	soclose(so);
 	return error;
 }
 
 static int 
 bootpc_fakeup_interface(struct ifreq *ireq,struct socket *so,
 			struct proc *procp)
 {
   struct sockaddr_in *sin;
   int error;
   struct sockaddr_in dst;
   struct sockaddr_in gw;
   struct sockaddr_in mask;
 
   /*
    * Bring up the interface.
    *
    * Get the old interface flags and or IFF_UP into them; if
    * IFF_UP set blindly, interface selection can be clobbered.
    */
   error = ifioctl(so, SIOCGIFFLAGS, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_fakeup_interface: GIFFLAGS, error=%d", error);
   ireq->ifr_flags |= IFF_UP;
   error = ifioctl(so, SIOCSIFFLAGS, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_fakeup_interface: SIFFLAGS, error=%d", error);
 
   /*
    * Do enough of ifconfig(8) so that the chosen interface
    * can talk to the servers.  (just set the address)
    */
   
   /* addr is 0.0.0.0 */
   
   sin = (struct sockaddr_in *)&ireq->ifr_addr;
   bzero((caddr_t)sin, sizeof(*sin));
   sin->sin_len = sizeof(*sin);
   sin->sin_family = AF_INET;
   sin->sin_addr.s_addr = INADDR_ANY;
   error = ifioctl(so, SIOCSIFADDR, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_fakeup_interface: set if addr, error=%d", error);
   
   /* netmask is 0.0.0.0 */
   
   sin = (struct sockaddr_in *)&ireq->ifr_addr;
   bzero((caddr_t)sin, sizeof(*sin));
   sin->sin_len = sizeof(*sin);
   sin->sin_family = AF_INET;
   sin->sin_addr.s_addr = INADDR_ANY;
   error = ifioctl(so, SIOCSIFNETMASK, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_fakeup_interface: set if net addr, error=%d", error);
   
   /* Broadcast is 255.255.255.255 */
   
   sin = (struct sockaddr_in *)&ireq->ifr_addr;
   bzero((caddr_t)sin, sizeof(*sin));
   sin->sin_len = sizeof(*sin);
   sin->sin_family = AF_INET;
   sin->sin_addr.s_addr = INADDR_BROADCAST;
   error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_fakeup_interface: set if broadcast addr, error=%d", error);
   
   /* Add default route to 0.0.0.0 so we can send data */
   
   bzero((caddr_t) &dst, sizeof(dst));
   dst.sin_len=sizeof(dst);
   dst.sin_family=AF_INET;
   dst.sin_addr.s_addr = htonl(0);
   
   bzero((caddr_t) &gw, sizeof(gw));
   gw.sin_len=sizeof(gw);
   gw.sin_family=AF_INET;
   gw.sin_addr.s_addr = htonl(0x0);
   
   bzero((caddr_t) &mask, sizeof(mask));
   mask.sin_len=sizeof(mask);
   mask.sin_family=AF_INET;
   mask.sin_addr.s_addr = htonl(0);
   
   error = rtrequest(RTM_ADD, 
 		    (struct sockaddr *) &dst, 
 		    (struct sockaddr *) &gw,
 		    (struct sockaddr *) &mask, 
 		    RTF_UP | RTF_STATIC
 		    , NULL);
   if (error)
     printf("bootpc_fakeup_interface: add default route, error=%d\n", error);
   return error;
 }
 
 static int 
 bootpc_adjust_interface(struct ifreq *ireq,struct socket *so,
 			struct sockaddr_in *myaddr,
 			struct sockaddr_in *netmask,
 			struct sockaddr_in *gw,
 			struct proc *procp)
 {
   int error;
   struct sockaddr_in oldgw;
   struct sockaddr_in olddst;
   struct sockaddr_in oldmask;
   struct sockaddr_in *sin;
 
   /* Remove old default route to 0.0.0.0 */
   
   bzero((caddr_t) &olddst, sizeof(olddst));
   olddst.sin_len=sizeof(olddst);
   olddst.sin_family=AF_INET;
   olddst.sin_addr.s_addr = INADDR_ANY;
   
   bzero((caddr_t) &oldgw, sizeof(oldgw));
   oldgw.sin_len=sizeof(oldgw);
   oldgw.sin_family=AF_INET;
   oldgw.sin_addr.s_addr = INADDR_ANY;
   
   bzero((caddr_t) &oldmask, sizeof(oldmask));
   oldmask.sin_len=sizeof(oldmask);
   oldmask.sin_family=AF_INET;
   oldmask.sin_addr.s_addr = INADDR_ANY;
   
   error = rtrequest(RTM_DELETE, 
 		    (struct sockaddr *) &olddst,
 		    (struct sockaddr *) &oldgw,
 		    (struct sockaddr *) &oldmask, 
 		    (RTF_UP | RTF_STATIC), NULL);
   if (error) {
     printf("nfs_boot: del default route, error=%d\n", error);
     return error;
   }
 
   /*
    * Do enough of ifconfig(8) so that the chosen interface
    * can talk to the servers.  (just set the address)
    */
   bcopy(netmask,&ireq->ifr_addr,sizeof(*netmask));
   error = ifioctl(so, SIOCSIFNETMASK, (caddr_t)ireq, procp);
   if (error)
     panic("nfs_boot: set if netmask, error=%d", error);
 
   /* Broadcast is with host part of IP address all 1's */
   
   sin = (struct sockaddr_in *)&ireq->ifr_addr;
   bzero((caddr_t)sin, sizeof(*sin));
   sin->sin_len = sizeof(*sin);
   sin->sin_family = AF_INET;
   sin->sin_addr.s_addr = myaddr->sin_addr.s_addr | ~ netmask->sin_addr.s_addr;
   error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_call: set if broadcast addr, error=%d", error);
   
   bcopy(myaddr,&ireq->ifr_addr,sizeof(*myaddr));
   error = ifioctl(so, SIOCSIFADDR, (caddr_t)ireq, procp);
   if (error)
     panic("nfs_boot: set if addr, error=%d", error);
 
   /* Add new default route */
 
   error = rtrequest(RTM_ADD, 
 		    (struct sockaddr *) &olddst,
 		    (struct sockaddr *) gw,
 		    (struct sockaddr *) &oldmask,
 		    (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL);
   if (error) {
     printf("nfs_boot: add net route, error=%d\n", error);
     return error;
   }
 
   return 0;
 }
 
 static int setfs(addr, path, p)
 	struct sockaddr_in *addr;
 	char *path;
 	char *p;
 {
 	unsigned ip = 0;
 	int val;
 
 	if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
 	ip = val << 24;
 	if (*p != '.') return(0);
 	p++;
 	if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
 	ip |= (val << 16);
 	if (*p != '.') return(0);
 	p++;
 	if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
 	ip |= (val << 8);
 	if (*p != '.') return(0);
 	p++;
 	if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
 	ip |= val;
 	if (*p != ':') return(0);
 	p++;
 
 	addr->sin_addr.s_addr = htonl(ip);
 	addr->sin_len = sizeof(struct sockaddr_in);
 	addr->sin_family = AF_INET;
 
 	strncpy(path,p,MNAMELEN-1);
 	return(1);
 }
 
 static int getdec(ptr)
 	char **ptr;
 {
 	char *p = *ptr;
 	int ret=0;
 	if ((*p < '0') || (*p > '9')) return(-1);
 	while ((*p >= '0') && (*p <= '9')) {
 		ret = ret*10 + (*p - '0');
 		p++;
 	}
 	*ptr = p;
 	return(ret);
 }
 
 static char *substr(a,b)
 	char *a,*b;
 {
 	char *loc1;
 	char *loc2;
 
         while (*a != '\0') {
                 loc1 = a;
                 loc2 = b;
                 while (*loc1 == *loc2++) {
                         if (*loc1 == '\0') return (0);
                         loc1++;
                         if (*loc2 == '\0') return (loc1);
                 }
         a++;
         }
         return (0);
 }
 
 static void mountopts(args,p)
 	struct nfs_args *args;
 	char *p;
 {
 	char *tmp;
   
 	args->flags = NFSMNT_RSIZE | NFSMNT_WSIZE | NFSMNT_RESVPORT;
 	args->sotype = SOCK_DGRAM;
 	if ((tmp = (char *)substr(p,"rsize=")))
 		args->rsize=getdec(&tmp);
 	if ((tmp = (char *)substr(p,"wsize=")))
 		args->wsize=getdec(&tmp);
 	if ((tmp = (char *)substr(p,"intr")))
 		args->flags |= NFSMNT_INT;
 	if ((tmp = (char *)substr(p,"soft")))
 		args->flags |= NFSMNT_SOFT;
 	if ((tmp = (char *)substr(p,"noconn")))
 		args->flags |= NFSMNT_NOCONN;
 	if ((tmp = (char *)substr(p, "tcp")))
 	    args->sotype = SOCK_STREAM;
 }
 
 static int xdr_opaque_decode(mptr,buf,len)
      struct mbuf **mptr;
      u_char *buf;
      int len;	
 {
   struct mbuf *m;
   int alignedlen;
 
   m = *mptr;
   alignedlen = ( len + 3 ) & ~3;
 
   if (m->m_len < alignedlen) {
     m = m_pullup(m,alignedlen);
     if (m == NULL) {
       *mptr = NULL;
       return EBADRPC;
     }
   }
   bcopy(mtod(m,u_char *),buf,len);
   m_adj(m,alignedlen);
   *mptr = m;
   return 0;
 }
 
 static int xdr_int_decode(mptr,iptr)
      struct mbuf **mptr;
      int *iptr;
 {
   u_int32_t i;
   if (xdr_opaque_decode(mptr,(u_char *) &i,sizeof(u_int32_t)))
     return EBADRPC;
   *iptr = fxdr_unsigned(u_int32_t,i);
   return 0;
 }
 
 static void printip(char *prefix,struct in_addr addr)
 {
   unsigned int ip;
 
   ip = ntohl(addr.s_addr);
 
   printf("%s is %d.%d.%d.%d\n",prefix,
 	 ip >> 24, (ip >> 16) & 255 ,(ip >> 8) & 255 ,ip & 255 );
 }
 
 void
 bootpc_init(void)
 {
   struct bootp_packet call;
   struct bootp_packet reply;
   static u_int32_t xid = ~0xFF;
   
   struct ifreq ireq;
   struct ifnet *ifp;
   struct socket *so;
   int error;
   int code,ncode,len;
   int i,j;
   char *p;
   unsigned int ip;
 
   struct sockaddr_in myaddr;
   struct sockaddr_in netmask;
   struct sockaddr_in gw;
   int gotgw=0;
   int gotnetmask=0;
   int gotrootpath=0;
   int gotswappath=0;
   char lookup_path[24];
 
 #define EALEN 6
   unsigned char ea[EALEN];
   struct ifaddr *ifa;
   struct sockaddr_dl *sdl = NULL;
   char *delim;
 
   struct nfsv3_diskless *nd = &nfsv3_diskless;
   struct proc *procp = curproc;
 
   /*
    * If already filled in, don't touch it here 
    */
   if (nfs_diskless_valid)
     return;
 
   /*
    * Wait until arp entries can be handled.
    */
   while (time.tv_sec == 0)
 	tsleep(&time, PZERO+8, "arpkludge", 10);
 
   /*
    * Find a network interface.
    */
 #ifdef BOOTP_WIRED_TO
   printf("bootpc_init: wired to interface '%s'\n",
          __XSTRING(BOOTP_WIRED_TO)); 
 #endif
   bzero(&ireq, sizeof(ireq));
   for (ifp = TAILQ_FIRST(&ifnet); ifp != 0; ifp = TAILQ_NEXT(ifp,if_link))
   {
     sprintf(ireq.ifr_name, "%s%d", ifp->if_name, ifp->if_unit);
 #ifdef BOOTP_WIRED_TO
     if (strcmp(ireq.ifr_name, __XSTRING(BOOTP_WIRED_TO)) == 0)
         break;
 #else
     if ((ifp->if_flags &
       (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
 	break;
 #endif
   }     
   if (ifp == NULL)
     panic("bootpc_init: no suitable interface");
   strcpy(nd->myif.ifra_name,ireq.ifr_name);
   printf("bootpc_init: using network interface '%s'\n",
 	 ireq.ifr_name);
 
   if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0,procp)) != 0)
     panic("nfs_boot: socreate, error=%d", error);
 	  
   bootpc_fakeup_interface(&ireq,so,procp);
 
   printf("Bootpc testing starting\n");
   
   /* Get HW address */
 
   for (ifa = TAILQ_FIRST(&ifp->if_addrhead) ;ifa; 
        ifa=TAILQ_NEXT(ifa,ifa_link))
     if (ifa->ifa_addr->sa_family == AF_LINK &&
 	(sdl = ((struct sockaddr_dl *) ifa->ifa_addr)) &&
 	sdl->sdl_type == IFT_ETHER)
       break;
   
   if (!sdl)
     panic("bootpc: Unable to find HW address");
   if (sdl->sdl_alen != EALEN ) 
     panic("bootpc: HW address len is %d, expected value is %d",
 	  sdl->sdl_alen,EALEN);
 
   printf("bootpc hw address is ");
   delim="";
   for (j=0;j<sdl->sdl_alen;j++) {
     printf("%s%x",delim,((unsigned char *)LLADDR(sdl))[j]);
     delim=":";
   }
   printf("\n");
 
 #if 0
   bootpboot_p_iflist();
   bootpboot_p_rtlist();
 #endif
   
   bzero((caddr_t) &call, sizeof(call));
 
   /* bootpc part */
   call.op = 1; 			/* BOOTREQUEST */
   call.htype= 1;		/* 10mb ethernet */
   call.hlen=sdl->sdl_alen;	/* Hardware address length */
   call.hops=0;	
   xid++;
   call.xid = txdr_unsigned(xid);
   bcopy(LLADDR(sdl),&call.chaddr,sdl->sdl_alen);
   
   call.vend[0]=99;
   call.vend[1]=130;
   call.vend[2]=83;
   call.vend[3]=99;
   call.vend[4]=255;
   
   call.secs = 0;
   call.flags = htons(0x8000); /* We need an broadcast answer */
   
   error = bootpc_call(&call,&reply,procp);
   
   if (error) {
 #ifdef BOOTP_NFSROOT
     panic("BOOTP call failed");
 #endif
     return;
   }
   
   bzero(&myaddr,sizeof(myaddr));
   bzero(&netmask,sizeof(netmask));
   bzero(&gw,sizeof(gw));
 
   myaddr.sin_len = sizeof(myaddr);
   myaddr.sin_family = AF_INET;
 
   netmask.sin_len = sizeof(netmask);
   netmask.sin_family = AF_INET;
 
   gw.sin_len = sizeof(gw);
   gw.sin_family= AF_INET;
 
   nd->root_args.version = NFS_ARGSVERSION;
   nd->root_args.rsize = 8192;
   nd->root_args.wsize = 8192;
   nd->root_args.sotype = SOCK_DGRAM;
   nd->root_args.flags = (NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_RESVPORT);
 
   nd->swap_saddr.sin_len = sizeof(gw);
   nd->swap_saddr.sin_family = AF_INET;
 
   nd->swap_args.version = NFS_ARGSVERSION;
   nd->swap_args.rsize = 8192;
   nd->swap_args.wsize = 8192;
   nd->swap_args.sotype = SOCK_DGRAM;
   nd->swap_args.flags = (NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_RESVPORT);
   
   myaddr.sin_addr = reply.yiaddr;
 
   ip = ntohl(myaddr.sin_addr.s_addr);
   sprintf(lookup_path,"swap.%d.%d.%d.%d",
 	  ip >> 24, (ip >> 16) & 255 ,(ip >> 8) & 255 ,ip & 255 );
 
   printip("My ip address",myaddr.sin_addr);
 
   printip("Server ip address",reply.siaddr);
 
   gw.sin_addr = reply.giaddr;
   printip("Gateway ip address",reply.giaddr);
 
   if (reply.sname[0])
     printf("Server name is %s\n",reply.sname);
   if (reply.file[0])
     printf("boot file is %s\n",reply.file);
   if (reply.vend[0]==99 && reply.vend[1]==130 &&
       reply.vend[2]==83 && reply.vend[3]==99) {
     j=4;
     ncode = reply.vend[j];
     while (j<sizeof(reply.vend)) {
       code = reply.vend[j] = ncode;
       if (code==255)
 	break;
       if (code==0) {
 	j++;
 	continue;
       }
       len = reply.vend[j+1];
       j+=2;
       if (len+j>=sizeof(reply.vend)) {
 	printf("Truncated field");
 	break;
       }
       ncode = reply.vend[j+len];
       reply.vend[j+len]='\0';
       p = &reply.vend[j];
       switch (code) {
       case 1:
 	if (len!=4) 
 	  panic("bootpc: subnet mask len is %d",len);
 	bcopy(&reply.vend[j],&netmask.sin_addr,4);
 	gotnetmask=1;
 	printip("Subnet mask",netmask.sin_addr);
 	break;
       case 6:	/* Domain Name servers. Unused */
       case 16:	/* Swap server IP address. unused */
       case 2:
 	/* Time offset */
 	break;
       case 3:
 	/* Routers */
 	if (len % 4) 
 	  panic("bootpc: Router Len is %d",len);
 	if (len > 0) {
 	  bcopy(&reply.vend[j],&gw.sin_addr,4);
 	  printip("Router",gw.sin_addr);
 	  gotgw=1;
 	}
 	break;
       case 17:
 	if (setfs(&nd->root_saddr, nd->root_hostnam, p)) {
 	  printf("rootfs is %s\n",p);
 	  gotrootpath=1;
 	} else 
 	  panic("Failed to set rootfs to %s",p);
 	break;
       case 12:
 	if (len>=MAXHOSTNAMELEN)
 	  panic("bootpc: hostname  >=%d bytes",MAXHOSTNAMELEN);
 	strncpy(nd->my_hostnam,&reply.vend[j],len);
 	nd->my_hostnam[len]=0;
 	strncpy(hostname,&reply.vend[j],len);
 	hostname[len]=0;
 	printf("Hostname is %s\n",hostname);
 	break;
       case 128:
 	if (setfs(&nd->swap_saddr, nd->swap_hostnam, p)) {
 	  gotswappath=1;
 	  printf("swapfs is %s\n",p);
 	} else
 	  panic("Failed to set swapfs to %s",p);
 	break;
       case 129:
 	{
 	  int swaplen;
 	  if (len!=4) 
 	    panic("bootpc: Expected 4 bytes for swaplen, not %d bytes",len);
 	  bcopy(&reply.vend[j],&swaplen,4);
 	  nd->swap_nblks = ntohl(swaplen);
 	  printf("bootpc: Swap size is %d KB\n",nd->swap_nblks);
 	}
 	break;
       case 130:	/* root mount options */
 	mountopts(&nd->root_args,p);
 	break;
       case 131:	/* swap mount options */
 	mountopts(&nd->swap_args,p);
 	break;
       default:
 	printf("Ignoring field type %d\n",code);
       }
       j+=len;
     }
   }
 
   if (!gotswappath)
     nd->swap_nblks = 0;
 #ifdef BOOTP_NFSROOT
   if (!gotrootpath)
     panic("bootpc: No root path offered");
 #endif
 
   if (!gotnetmask) {
     if (IN_CLASSA(ntohl(myaddr.sin_addr.s_addr)))
       netmask.sin_addr.s_addr = htonl(IN_CLASSA_NET);
     else if (IN_CLASSB(ntohl(myaddr.sin_addr.s_addr)))
       netmask.sin_addr.s_addr = htonl(IN_CLASSB_NET);
     else 
       netmask.sin_addr.s_addr = htonl(IN_CLASSC_NET);
   }
   if (!gotgw) {
     /* Use proxyarp */
     gw.sin_addr.s_addr = myaddr.sin_addr.s_addr;
   }
   
 #if 0
   bootpboot_p_iflist();
   bootpboot_p_rtlist();
 #endif
   error = bootpc_adjust_interface(&ireq,so,
 				  &myaddr,&netmask,&gw,procp);
   
   soclose(so);
 
 #if 0
   bootpboot_p_iflist();
   bootpboot_p_rtlist();
 #endif
 
   if (gotrootpath) {
 
     error = md_mount(&nd->root_saddr, nd->root_hostnam, 
 		     nd->root_fh, &nd->root_fhsize,
 		     &nd->root_args,procp);
     if (error)
       panic("nfs_boot: mountd root, error=%d", error);
     
     if (gotswappath) {
 
       error = md_mount(&nd->swap_saddr, 
 		       nd->swap_hostnam,
 		       nd->swap_fh, &nd->swap_fhsize,&nd->swap_args,procp);
       if (error)
 	panic("nfs_boot: mountd swap, error=%d", error);
       
       error = md_lookup_swap(&nd->swap_saddr,lookup_path,nd->swap_fh, 
 			     &nd->swap_fhsize, &nd->swap_args,procp);
       if (error)
 	panic("nfs_boot: lookup swap, error=%d", error);
     }
     nfs_diskless_valid = 3;
   }
 
 
   bcopy(&myaddr,&nd->myif.ifra_addr,sizeof(myaddr));
   bcopy(&myaddr,&nd->myif.ifra_broadaddr,sizeof(myaddr));
   ((struct sockaddr_in *) &nd->myif.ifra_broadaddr)->sin_addr.s_addr = 
     myaddr.sin_addr.s_addr | ~ netmask.sin_addr.s_addr;
   bcopy(&netmask,&nd->myif.ifra_mask,sizeof(netmask));
 
 #if 0
   bootpboot_p_iflist();
   bootpboot_p_rtlist();
 #endif
   return;
 }
 
 /*
  * RPC: mountd/mount
  * Given a server pathname, get an NFS file handle.
  * Also, sets sin->sin_port to the NFS service port.
  */
 static int
 md_mount(mdsin, path, fhp, fhsizep, args, procp)
 	struct sockaddr_in *mdsin;		/* mountd server address */
 	char *path;
 	u_char *fhp;
 	int *fhsizep;
 	struct nfs_args *args;
 	struct proc *procp;
 {
 	struct mbuf *m;
 	int error;
 	int authunixok;
 	int authcount;
 	int authver;
 
 #ifdef BOOTP_NFSV3
 	/* First try NFS v3 */
 	/* Get port number for MOUNTD. */
 	error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER3,
 						 &mdsin->sin_port, procp);
 	if (!error) {
 	  m = xdr_string_encode(path, strlen(path));
 	  
 	  /* Do RPC to mountd. */
 	  error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER3,
 			    RPCMNT_MOUNT, &m, NULL, curproc);
 	}
 	if (!error) {
 	  args->flags |= NFSMNT_NFSV3;
 	} else {
 #endif
 	  /* Fallback to NFS v2 */
 	  
 	  /* Get port number for MOUNTD. */
 	  error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER1,
 			       &mdsin->sin_port, procp);
 	  if (error) return error;
 	  
 	  m = xdr_string_encode(path, strlen(path));
 	  
 	  /* Do RPC to mountd. */
 	  error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER1,
 			    RPCMNT_MOUNT, &m, NULL, curproc);
 	  if (error)
 	    return error;	/* message already freed */
 
 #ifdef BOOTP_NFSV3
 	}
 #endif
 
 	if (xdr_int_decode(&m,&error) || error)
 	  goto bad;
 
 	if (args->flags & NFSMNT_NFSV3) {
 	  if (xdr_int_decode(&m,fhsizep) ||
 	      *fhsizep > NFSX_V3FHMAX || *fhsizep <= 0 ) 
 	    goto bad;
 	} else 
 	  *fhsizep = NFSX_V2FH;
 
 	if (xdr_opaque_decode(&m,fhp,*fhsizep))
 	  goto bad;
 
 	if (args->flags & NFSMNT_NFSV3) {
 	  if (xdr_int_decode(&m,&authcount))
 	    goto bad;
 	  authunixok = 0;
 	  if (authcount<0 || authcount>100)
 	    goto bad;
 	  while (authcount>0) {
 	    if (xdr_int_decode(&m,&authver))
 	      goto bad;
 	    if (authver == RPCAUTH_UNIX)
 	      authunixok = 1;
 	    authcount--;
 	  }
 	  if (!authunixok)
 	    goto bad;
 	}
 	  
 	/* Set port number for NFS use. */
 	error = krpc_portmap(mdsin, NFS_PROG, 
 			     (args->flags & NFSMNT_NFSV3)?NFS_VER3:NFS_VER2,
 			     &mdsin->sin_port, procp);
 
 	goto out;
 
 bad:
 	error = EBADRPC;
 
 out:
 	m_freem(m);
 	return error;
 }
 
 
 static int md_lookup_swap(mdsin, path, fhp, fhsizep, args, procp)
 	struct sockaddr_in *mdsin;		/* mountd server address */
 	char *path;
 	u_char *fhp;
 	int *fhsizep;
 	struct nfs_args *args;
 	struct proc *procp;
 {
 	struct mbuf *m;
 	int error;
 	int size = -1;
 	int attribs_present;
 	int status;
 	union {
 	  u_int32_t v2[17];
 	  u_int32_t v3[21];
 	} fattribs;
 
 	m = m_get(M_WAIT,MT_DATA);
 	if (!m)
 	  	return ENOBUFS;
 
 	if (args->flags & NFSMNT_NFSV3) {
 	  *mtod(m,u_int32_t *) = txdr_unsigned(*fhsizep);
 	  bcopy(fhp,mtod(m,u_char *)+sizeof(u_int32_t),*fhsizep);
 	  m->m_len = *fhsizep + sizeof(u_int32_t);
 	} else {
 	  bcopy(fhp,mtod(m,u_char *),NFSX_V2FH);
 	  m->m_len = NFSX_V2FH;
 	}
 	
 	m->m_next = xdr_string_encode(path, strlen(path));
 	if (!m->m_next) {
 	  error = ENOBUFS;
 	  goto out;
 	}
 
 	/* Do RPC to nfsd. */
 	if (args->flags & NFSMNT_NFSV3)
 	  error = krpc_call(mdsin, NFS_PROG, NFS_VER3,
 			    NFSPROC_LOOKUP, &m, NULL, procp);
 	else 
 	  error = krpc_call(mdsin, NFS_PROG, NFS_VER2,
 			    NFSV2PROC_LOOKUP, &m, NULL, procp);
 	if (error)
 	  return error;	/* message already freed */
 
 	if (xdr_int_decode(&m,&status))
 	  goto bad;
 	if (status) {
 	  error = ENOENT;
 	  goto out;
 	}
 	
 	if (args->flags & NFSMNT_NFSV3) {
 	  if (xdr_int_decode(&m,fhsizep) ||
 	      *fhsizep > NFSX_V3FHMAX || *fhsizep <= 0 ) 
 	    goto bad;
 	} else
 	  *fhsizep = NFSX_V2FH;
 	
 	if (xdr_opaque_decode(&m, fhp, *fhsizep))
 	  goto bad;
 
 	if (args->flags & NFSMNT_NFSV3) {
 	  if (xdr_int_decode(&m,&attribs_present))
 	    goto bad;
 	  if (attribs_present) {
 	    if (xdr_opaque_decode(&m,(u_char *) &fattribs.v3,
 				  sizeof(u_int32_t)*21))
 	      goto bad;
 	    size = fxdr_unsigned(u_int32_t, fattribs.v3[6]);
 	  }
 	} else {
   	  if (xdr_opaque_decode(&m,(u_char *) &fattribs.v2,
 				sizeof(u_int32_t)*17))
 	    goto bad;
 	  size = fxdr_unsigned(u_int32_t, fattribs.v2[5]);
 	}
 	  
 	if (!nfsv3_diskless.swap_nblks && size!= -1) {
 	  nfsv3_diskless.swap_nblks = size/1024;
 	  printf("md_lookup_swap: Swap size is %d KB\n",
 		 nfsv3_diskless.swap_nblks);
 	}
 	
 	goto out;
 
 bad:
 	error = EBADRPC;
 
 out:
 	m_freem(m);
 	return error;
 }
diff --git a/sys/nfs/krpc_subr.c b/sys/nfs/krpc_subr.c
index be5384342e53..f1dfa962abab 100644
--- a/sys/nfs/krpc_subr.c
+++ b/sys/nfs/krpc_subr.c
@@ -1,486 +1,487 @@
 /*	$NetBSD: krpc_subr.c,v 1.12.4.1 1996/06/07 00:52:26 cgd Exp $	*/
-/*	$Id: krpc_subr.c,v 1.7 1997/10/28 15:59:03 bde Exp $	*/
+/*	$Id: krpc_subr.c,v 1.8 1998/03/14 03:25:16 tegge Exp $	*/
 
 /*
  * Copyright (c) 1995 Gordon Ross, Adam Glass
  * Copyright (c) 1992 Regents of the University of California.
  * All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * partially based on:
  *      libnetboot/rpc.c
  *               @(#) Header: rpc.c,v 1.12 93/09/28 08:31:56 leres Exp  (LBL)
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
+#include <sys/uio.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/krpc.h>
 #include <nfs/xdr_subs.h>
 
 /*
  * Kernel support for Sun RPC
  *
  * Used currently for bootstrapping in nfs diskless configurations.
  */
 
 /*
  * Generic RPC headers
  */
 
 struct auth_info {
 	u_int32_t 	authtype;	/* auth type */
 	u_int32_t	authlen;	/* auth length */
 };
 
 struct auth_unix {
 	int32_t   ua_time;
 	int32_t   ua_hostname;	/* null */
 	int32_t   ua_uid;
 	int32_t   ua_gid;
 	int32_t   ua_gidlist;	/* null */
 };
 
 struct rpc_call {
 	u_int32_t	rp_xid;		/* request transaction id */
 	int32_t 	rp_direction;	/* call direction (0) */
 	u_int32_t	rp_rpcvers;	/* rpc version (2) */
 	u_int32_t	rp_prog;	/* program */
 	u_int32_t	rp_vers;	/* version */
 	u_int32_t	rp_proc;	/* procedure */
 	struct	auth_info rpc_auth;
 	struct	auth_unix rpc_unix;
 	struct	auth_info rpc_verf;
 };
 
 struct rpc_reply {
 	u_int32_t rp_xid;		/* request transaction id */
 	int32_t  rp_direction;		/* call direction (1) */
 	int32_t  rp_astatus;		/* accept status (0: accepted) */
 	union {
 		u_int32_t rpu_errno;
 		struct {
 			struct auth_info rok_auth;
 			u_int32_t	rok_status;
 		} rpu_rok;
 	} rp_u;
 };
 #define rp_errno  rp_u.rpu_errno
 #define rp_auth   rp_u.rpu_rok.rok_auth
 #define rp_status rp_u.rpu_rok.rok_status
 
 #define MIN_REPLY_HDR 16	/* xid, dir, astat, errno */
 
 /*
  * What is the longest we will wait before re-sending a request?
  * Note this is also the frequency of "RPC timeout" messages.
  * The re-send loop count sup linearly to this maximum, so the
  * first complaint will happen after (1+2+3+4+5)=15 seconds.
  */
 #define	MAX_RESEND_DELAY 5	/* seconds */
 
 /*
  * Call portmap to lookup a port number for a particular rpc program
  * Returns non-zero error on failure.
  */
 int
 krpc_portmap(sin,  prog, vers, portp, procp)
 	struct sockaddr_in *sin;		/* server address */
 	u_int prog, vers;	/* host order */
 	u_int16_t *portp;	/* network order */
 	struct proc *procp;
 {
 	struct sdata {
 		u_int32_t prog;		/* call program */
 		u_int32_t vers;		/* call version */
 		u_int32_t proto;	/* call protocol */
 		u_int32_t port;		/* call port (unused) */
 	} *sdata;
 	struct rdata {
 		u_int16_t pad;
 		u_int16_t port;
 	} *rdata;
 	struct mbuf *m;
 	int error;
 
 	/* The portmapper port is fixed. */
 	if (prog == PMAPPROG) {
 		*portp = htons(PMAPPORT);
 		return 0;
 	}
 
 	m = m_get(M_WAIT, MT_DATA);
 	if (m == NULL)
 		return ENOBUFS;
 	sdata = mtod(m, struct sdata *);
 	m->m_len = sizeof(*sdata);
 
 	/* Do the RPC to get it. */
 	sdata->prog = txdr_unsigned(prog);
 	sdata->vers = txdr_unsigned(vers);
 	sdata->proto = txdr_unsigned(IPPROTO_UDP);
 	sdata->port = 0;
 
 	sin->sin_port = htons(PMAPPORT);
 	error = krpc_call(sin, PMAPPROG, PMAPVERS,
 					  PMAPPROC_GETPORT, &m, NULL, procp);
 	if (error) 
 		return error;
 
 	if (m->m_len < sizeof(*rdata)) {
 		m = m_pullup(m, sizeof(*rdata));
 		if (m == NULL)
 			return ENOBUFS;
 	}
 	rdata = mtod(m, struct rdata *);
 	*portp = rdata->port;
 
 	m_freem(m);
 	return 0;
 }
 
 /*
  * Do a remote procedure call (RPC) and wait for its reply.
  * If from_p is non-null, then we are doing broadcast, and
  * the address from whence the response came is saved there.
  */
 int
 krpc_call(sa, prog, vers, func, data, from_p, procp)
 	struct sockaddr_in *sa;
 	u_int prog, vers, func;
 	struct mbuf **data;	/* input/output */
 	struct sockaddr **from_p;	/* output */
 	struct proc *procp;
 {
 	struct socket *so;
 	struct sockaddr_in *sin, ssin;
 	struct sockaddr *from;
 	struct mbuf *m, *nam, *mhead;
 	struct rpc_call *call;
 	struct rpc_reply *reply;
 	struct uio auio;
 	int error, rcvflg, timo, secs, len;
 	static u_int32_t xid = ~0xFF;
 	u_int16_t tport;
 
 	/*
 	 * Validate address family.
 	 * Sorry, this is INET specific...
 	 */
 	if (sa->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 
 	/* Free at end if not null. */
 	nam = mhead = NULL;
 	from = NULL;
 
 	/*
 	 * Create socket and set its recieve timeout.
 	 */
 	if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0, procp)))
 		goto out;
 
 	m = m_get(M_WAIT, MT_SOOPTS);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto out;
 	} else {
 		struct timeval *tv;
 		tv = mtod(m, struct timeval *);
 		m->m_len = sizeof(*tv);
 		tv->tv_sec = 1;
 		tv->tv_usec = 0;
 		if ((error = sosetopt(so, SOL_SOCKET, SO_RCVTIMEO, m, procp)))
 			goto out;
 	}
 
 	/*
 	 * Enable broadcast if necessary.
 	 */
 	if (from_p) {
 		int32_t *on;
 		m = m_get(M_WAIT, MT_SOOPTS);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto out;
 		}
 		on = mtod(m, int32_t *);
 		m->m_len = sizeof(*on);
 		*on = 1;
 		if ((error = sosetopt(so, SOL_SOCKET, SO_BROADCAST, m, procp)))
 			goto out;
 	}
 
 	/*
 	 * Bind the local endpoint to a reserved port,
 	 * because some NFS servers refuse requests from
 	 * non-reserved (non-privileged) ports.
 	 */
 	sin = &ssin;
 	bzero(sin, sizeof *sin);
 	sin->sin_len = sizeof(*sin);
 	sin->sin_family = AF_INET;
 	sin->sin_addr.s_addr = INADDR_ANY;
 	tport = IPPORT_RESERVED;
 	do {
 		tport--;
 		sin->sin_port = htons(tport);
 		error = sobind(so, (struct sockaddr *)sin, procp);
 	} while (error == EADDRINUSE &&
 			 tport > IPPORT_RESERVED / 2);
 	if (error) {
 		printf("bind failed\n");
 		goto out;
 	}
 
 	/*
 	 * Setup socket address for the server.
 	 */
 
 	/*
 	 * Prepend RPC message header.
 	 */
 	mhead = m_gethdr(M_WAIT, MT_DATA);
 	mhead->m_next = *data;
 	call = mtod(mhead, struct rpc_call *);
 	mhead->m_len = sizeof(*call);
 	bzero((caddr_t)call, sizeof(*call));
 	/* rpc_call part */
 	xid++;
 	call->rp_xid = txdr_unsigned(xid);
 	/* call->rp_direction = 0; */
 	call->rp_rpcvers = txdr_unsigned(2);
 	call->rp_prog = txdr_unsigned(prog);
 	call->rp_vers = txdr_unsigned(vers);
 	call->rp_proc = txdr_unsigned(func);
 	/* rpc_auth part (auth_unix as root) */
 	call->rpc_auth.authtype = txdr_unsigned(RPCAUTH_UNIX);
 	call->rpc_auth.authlen  = txdr_unsigned(sizeof(struct auth_unix));
 	/* rpc_verf part (auth_null) */
 	call->rpc_verf.authtype = 0;
 	call->rpc_verf.authlen  = 0;
 
 	/*
 	 * Setup packet header
 	 */
 	len = 0;
 	m = mhead;
 	while (m) {
 		len += m->m_len;
 		m = m->m_next;
 	}
 	mhead->m_pkthdr.len = len;
 	mhead->m_pkthdr.rcvif = NULL;
 
 	/*
 	 * Send it, repeatedly, until a reply is received,
 	 * but delay each re-send by an increasing amount.
 	 * If the delay hits the maximum, start complaining.
 	 */
 	timo = 0;
 	for (;;) {
 		/* Send RPC request (or re-send). */
 		m = m_copym(mhead, 0, M_COPYALL, M_WAIT);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto out;
 		}
 		error = sosend(so, (struct sockaddr *)sa, NULL, m,
 			       NULL, 0, 0);
 		if (error) {
 			printf("krpc_call: sosend: %d\n", error);
 			goto out;
 		}
 		m = NULL;
 
 		/* Determine new timeout. */
 		if (timo < MAX_RESEND_DELAY)
 			timo++;
 		else
 			printf("RPC timeout for server 0x%x\n",
 			       ntohl(sa->sin_addr.s_addr));
 
 		/*
 		 * Wait for up to timo seconds for a reply.
 		 * The socket receive timeout was set to 1 second.
 		 */
 		secs = timo;
 		while (secs > 0) {
 			if (from) {
 				FREE(from, M_SONAME);
 				from = NULL;
 			}
 			if (m) {
 				m_freem(m);
 				m = NULL;
 			}
 			bzero(&auio,sizeof(auio));
 			auio.uio_resid = len = 1<<16;
 			rcvflg = 0;
 			error = soreceive(so, &from, &auio, &m, NULL, &rcvflg);
 			if (error == EWOULDBLOCK) {
 				secs--;
 				continue;
 			}
 			if (error)
 				goto out;
 			len -= auio.uio_resid;
 
 			/* Does the reply contain at least a header? */
 			if (len < MIN_REPLY_HDR)
 				continue;
 			if (m->m_len < MIN_REPLY_HDR)
 				continue;
 			reply = mtod(m, struct rpc_reply *);
 
 			/* Is it the right reply? */
 			if (reply->rp_direction != txdr_unsigned(RPC_REPLY))
 				continue;
 
 			if (reply->rp_xid != txdr_unsigned(xid))
 				continue;
 
 			/* Was RPC accepted? (authorization OK) */
 			if (reply->rp_astatus != 0) {
 				error = fxdr_unsigned(u_int32_t, reply->rp_errno);
 				printf("rpc denied, error=%d\n", error);
 				continue;
 			}
 
 			/* Did the call succeed? */
 			if (reply->rp_status != 0) {
 				error = fxdr_unsigned(u_int32_t, reply->rp_status);
 				if (error == RPC_PROGMISMATCH) {
 				  error = EBADRPC;
 				  goto out;
 				}
 				printf("rpc denied, status=%d\n", error);
 				continue;
 			}
 
 			goto gotreply;	/* break two levels */
 
 		} /* while secs */
 	} /* forever send/receive */
 
 	error = ETIMEDOUT;
 	goto out;
 
  gotreply:
 
 	/*
 	 * Get RPC reply header into first mbuf,
 	 * get its length, then strip it off.
 	 */
 	len = sizeof(*reply);
 	if (m->m_len < len) {
 		m = m_pullup(m, len);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto out;
 		}
 	}
 	reply = mtod(m, struct rpc_reply *);
 	if (reply->rp_auth.authtype != 0) {
 		len += fxdr_unsigned(u_int32_t, reply->rp_auth.authlen);
 		len = (len + 3) & ~3; /* XXX? */
 	}
 	m_adj(m, len);
 
 	/* result */
 	*data = m;
 	if (from_p) {
 		*from_p = from;
 		from = NULL;
 	}
 
  out:
 	if (mhead) m_freem(mhead);
 	if (from) free(from, M_SONAME);
 	soclose(so);
 	return error;
 }
 
 /*
  * eXternal Data Representation routines.
  * (but with non-standard args...)
  */
 
 /*
  * String representation for RPC.
  */
 struct xdr_string {
 	u_int32_t len;		/* length without null or padding */
 	char data[4];	/* data (longer, of course) */
     /* data is padded to a long-word boundary */
 };
 
 struct mbuf *
 xdr_string_encode(str, len)
 	char *str;
 	int len;
 {
 	struct mbuf *m;
 	struct xdr_string *xs;
 	int dlen;	/* padded string length */
 	int mlen;	/* message length */
 
 	dlen = (len + 3) & ~3;
 	mlen = dlen + 4;
 
 	if (mlen > MCLBYTES)		/* If too big, we just can't do it. */
 		return (NULL);
 
 	m = m_get(M_WAIT, MT_DATA);
 	if (mlen > MLEN) {
 		MCLGET(m, M_WAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			(void) m_free(m);	/* There can be only one. */
 			return (NULL);
 		}
 	}
 	xs = mtod(m, struct xdr_string *);
 	m->m_len = mlen;
 	xs->len = txdr_unsigned(len);
 	bcopy(str, xs->data, len);
 	return (m);
 }
diff --git a/sys/nfsclient/bootp_subr.c b/sys/nfsclient/bootp_subr.c
index 7dbcd9e011cc..1cc0ec821299 100644
--- a/sys/nfsclient/bootp_subr.c
+++ b/sys/nfsclient/bootp_subr.c
@@ -1,1266 +1,1267 @@
-/*	$Id: bootp_subr.c,v 1.10 1998/03/14 03:25:14 tegge Exp $	*/
+/*	$Id: bootp_subr.c,v 1.11 1998/03/14 04:13:56 tegge Exp $	*/
 
 /*
  * Copyright (c) 1995 Gordon Ross, Adam Glass
  * Copyright (c) 1992 Regents of the University of California.
  * All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * based on:
  *      nfs/krpc_subr.c
  *	$NetBSD: krpc_subr.c,v 1.10 1995/08/08 20:43:43 gwr Exp $
  */
 
 #include "opt_bootp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/sockio.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
+#include <sys/uio.h>
 
 #include <net/if.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <net/if_types.h>
 #include <net/if_dl.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/nfsproto.h>
 #include <nfs/nfs.h>
 #include <nfs/nfsdiskless.h>
 #include <nfs/krpc.h>
 #include <nfs/xdr_subs.h>
 
 
 #define BOOTP_MIN_LEN		300	/* Minimum size of bootp udp packet */
 
 /*
  * What is the longest we will wait before re-sending a request?
  * Note this is also the frequency of "RPC timeout" messages.
  * The re-send loop count sup linearly to this maximum, so the
  * first complaint will happen after (1+2+3+4+5)=15 seconds.
  */
 #define	MAX_RESEND_DELAY 5	/* seconds */
 
 /* Definitions from RFC951 */
 struct bootp_packet {
   u_int8_t op;
   u_int8_t htype;
   u_int8_t hlen;
   u_int8_t hops;
   u_int32_t xid;
   u_int16_t secs;
   u_int16_t flags;
   struct in_addr ciaddr;
   struct in_addr yiaddr;
   struct in_addr siaddr;
   struct in_addr giaddr;
   unsigned char chaddr[16];
   char sname[64];
   char file[128];
   unsigned char vend[256];
 };
 
 #define IPPORT_BOOTPC 68
 #define IPPORT_BOOTPS 67
 
 extern int nfs_diskless_valid;
 extern struct nfsv3_diskless nfsv3_diskless;
 
 /* mountd RPC */
 static int md_mount __P((struct sockaddr_in *mdsin, char *path,
 	u_char *fhp, int *fhsizep, struct nfs_args *args,struct proc *procp));
 static int md_lookup_swap __P((struct sockaddr_in *mdsin,char *path,
 			       u_char *fhp, int *fhsizep, 
 			       struct nfs_args *args,
 			       struct proc *procp));
 static int setfs __P((struct sockaddr_in *addr, char *path, char *p));
 static int getdec __P((char **ptr));
 static char *substr __P((char *a,char *b));
 static void mountopts __P((struct nfs_args *args, char *p)); 
 static int xdr_opaque_decode __P((struct mbuf **ptr,u_char *buf,
 				  int len));
 static int xdr_int_decode __P((struct mbuf **ptr,int *iptr));
 static void printip __P((char *prefix,struct in_addr addr));
 
 #ifdef BOOTP_DEBUG
 void bootpboot_p_sa(struct sockaddr *sa,struct sockaddr *ma);
 void bootpboot_p_ma(struct sockaddr *ma);
 void bootpboot_p_rtentry(struct rtentry *rt);
 void bootpboot_p_tree(struct radix_node *rn);
 void bootpboot_p_rtlist(void);
 void bootpboot_p_iflist(void);
 #endif
 
 static int  bootpc_call(struct bootp_packet *call,
 			struct bootp_packet *reply,
 			struct proc *procp);
 
 static int bootpc_fakeup_interface(struct ifreq *ireq,
 			struct socket *so,
 			struct proc *procp);
 
 static int 
 bootpc_adjust_interface(struct ifreq *ireq,struct socket *so,
 			struct sockaddr_in *myaddr,
 			struct sockaddr_in *netmask,
 			struct sockaddr_in *gw,
 			struct proc *procp);
 
 void bootpc_init(void);
 
 #ifdef BOOTP_DEBUG
 void bootpboot_p_sa(sa,ma)
      struct sockaddr *sa;
      struct sockaddr *ma;
 {
   if (!sa) {
     printf("(sockaddr *) <null>");
     return;
   }
   switch (sa->sa_family) {
   case AF_INET:
     {
       struct sockaddr_in *sin = (struct sockaddr_in *) sa;
       printf("inet %x",ntohl(sin->sin_addr.s_addr));
       if (ma) {
 	struct sockaddr_in *sin = (struct sockaddr_in *) ma;
 	printf(" mask %x",ntohl(sin->sin_addr.s_addr));
       }
     }
   break;
   case AF_LINK:
     {
       struct sockaddr_dl *sli = (struct sockaddr_dl *) sa;
       int i;
       printf("link %.*s ",sli->sdl_nlen,sli->sdl_data);
       for (i=0;i<sli->sdl_alen;i++) {
 	if (i>0)
 	  printf(":");
 	printf("%x",(unsigned char) sli->sdl_data[i+sli->sdl_nlen]);
       }
     }
   break;
   default:
     printf("af%d",sa->sa_family);
   }
 }
 
 void bootpboot_p_ma(ma)
      struct sockaddr *ma;
 {
   if (!ma) {
     printf("<null>");
     return;
   }
   printf("%x",*(int*)ma);
 }
 
 void bootpboot_p_rtentry(rt)
      struct rtentry *rt;
 {
   bootpboot_p_sa(rt_key(rt),rt_mask(rt));
   printf(" ");
   bootpboot_p_ma(rt->rt_genmask);
   printf(" ");
   bootpboot_p_sa(rt->rt_gateway,NULL);
   printf(" ");
   printf("flags %x",(unsigned short) rt->rt_flags);
   printf(" %d",rt->rt_rmx.rmx_expire);
   printf(" %s%d\n",rt->rt_ifp->if_name,rt->rt_ifp->if_unit);
 }
 void  bootpboot_p_tree(rn)
      struct radix_node *rn;
 {
   while (rn) {
     if (rn->rn_b < 0) {
       if (rn->rn_flags & RNF_ROOT) {
       } else {
 	bootpboot_p_rtentry((struct rtentry *) rn);
       }
       rn = rn->rn_dupedkey;
     } else {
       bootpboot_p_tree(rn->rn_l);
       bootpboot_p_tree(rn->rn_r);
       return;
     }
     
   }
 }
 
 void bootpboot_p_rtlist(void)
 {
   printf("Routing table:\n");
   bootpboot_p_tree(rt_tables[AF_INET]->rnh_treetop);
 }
 
 void bootpboot_p_iflist(void)
 {
   struct ifnet *ifp;
   struct ifaddr *ifa;
   printf("Interface list:\n");
   for (ifp = TAILQ_FIRST(&ifnet); ifp != 0; ifp = TAILQ_NEXT(ifp,if_link))
     {
       for (ifa = TAILQ_FIRST(&ifp->if_addrhead) ;ifa; 
 	   ifa=TAILQ_NEXT(ifa,ifa_link))
 	if (ifa->ifa_addr->sa_family == AF_INET ) {
 	  printf("%s%d flags %x, addr %x, bcast %x, net %x\n",
 		 ifp->if_name,ifp->if_unit,
 		 (unsigned short) ifp->if_flags,
 		 ntohl(((struct sockaddr_in *) ifa->ifa_addr)->sin_addr.s_addr),
 		 ntohl(((struct sockaddr_in *) ifa->ifa_dstaddr)->sin_addr.s_addr),
 		 ntohl(((struct sockaddr_in *) ifa->ifa_netmask)->sin_addr.s_addr)
 		 );
 	}
     }
 }
 #endif
 
 static int
 bootpc_call(call,reply,procp)
      struct bootp_packet *call;
      struct bootp_packet *reply;	/* output */
      struct proc *procp;
 {
 	struct socket *so;
 	struct sockaddr_in *sin, sa;
 	struct mbuf *m;
 	struct uio auio;
 	struct iovec aio;
 	int error, rcvflg, timo, secs, len;
 	u_int tport;
 
 	/*
 	 * Create socket and set its recieve timeout.
 	 */
 	if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0,procp)))
 		goto out;
 
 	m = m_get(M_WAIT, MT_SOOPTS);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto out;
 	} else {
 		struct timeval *tv;
 		tv = mtod(m, struct timeval *);
 		m->m_len = sizeof(*tv);
 		tv->tv_sec = 1;
 		tv->tv_usec = 0;
 		if ((error = sosetopt(so, SOL_SOCKET, SO_RCVTIMEO, m, procp)))
 			goto out;
 	}
 
 	/*
 	 * Enable broadcast.
 	 */
 	{
 		int *on;
 		m = m_get(M_WAIT, MT_SOOPTS);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto out;
 		}
 		on = mtod(m, int *);
 		m->m_len = sizeof(*on);
 		*on = 1;
 		if ((error = sosetopt(so, SOL_SOCKET, SO_BROADCAST, m, procp)))
 			goto out;
 	}
 
 	/*
 	 * Bind the local endpoint to a bootp client port.
 	 */
 	sin = &sa;
 	bzero(sin, sizeof *sin);
 	sin->sin_len = sizeof(*sin);
 	sin->sin_family = AF_INET;
 	sin->sin_addr.s_addr = INADDR_ANY;
 	sin->sin_port = htons(IPPORT_BOOTPC);
 	error = sobind(so, (struct sockaddr *)sin, procp);
 	if (error) {
 		printf("bind failed\n");
 		goto out;
 	}
 
 	/*
 	 * Setup socket address for the server.
 	 */
 	sin = &sa;
 	bzero(sin, sizeof *sin);
 	sin->sin_len = sizeof(*sin);
 	sin->sin_family = AF_INET;
 	sin->sin_addr.s_addr = INADDR_BROADCAST;
 	sin->sin_port = htons(IPPORT_BOOTPS);
 
 	/*
 	 * Send it, repeatedly, until a reply is received,
 	 * but delay each re-send by an increasing amount.
 	 * If the delay hits the maximum, start complaining.
 	 */
 	timo = 0;
 	for (;;) {
 		/* Send BOOTP request (or re-send). */
 		
 		aio.iov_base = (caddr_t) call;
 		aio.iov_len = sizeof(*call);
 		
 		auio.uio_iov = &aio;
 		auio.uio_iovcnt = 1;
 		auio.uio_segflg = UIO_SYSSPACE;
 		auio.uio_rw = UIO_WRITE;
 		auio.uio_offset = 0;
 		auio.uio_resid = sizeof(*call);
 		auio.uio_procp = procp;
 
 		error = sosend(so, (struct sockaddr *)sin, &auio, NULL, 
 			       NULL, 0, procp);
 		if (error) {
 			printf("bootpc_call: sosend: %d\n", error);
 			goto out;
 		}
 
 		/* Determine new timeout. */
 		if (timo < MAX_RESEND_DELAY)
 			timo++;
 		else
 			printf("BOOTP timeout for server 0x%x\n",
 			       ntohl(sin->sin_addr.s_addr));
 
 		/*
 		 * Wait for up to timo seconds for a reply.
 		 * The socket receive timeout was set to 1 second.
 		 */
 		secs = timo;
 		while (secs > 0) {
 			aio.iov_base = (caddr_t) reply;
 			aio.iov_len = sizeof(*reply);
 
 			auio.uio_iov = &aio;
 			auio.uio_iovcnt = 1;
 			auio.uio_segflg = UIO_SYSSPACE;
 			auio.uio_rw = UIO_READ;
 			auio.uio_offset = 0;
 			auio.uio_resid = sizeof(*reply);
 			auio.uio_procp = procp;
 			
 			rcvflg = 0;
 			error = soreceive(so, NULL, &auio, NULL, NULL, &rcvflg);
 			if (error == EWOULDBLOCK) {
 				secs--;
 				call->secs=htons(ntohs(call->secs)+1);
 				continue;
 			}
 			if (error)
 				goto out;
 			len = sizeof(*reply) - auio.uio_resid;
 
 			/* Do we have the required number of bytes ? */
 			if (len < BOOTP_MIN_LEN)
 				continue;
 
 			/* Is it the right reply? */
 			if (reply->op != 2)
 			  continue;
 
 			if (reply->xid != call->xid)
 				continue;
 
 			if (reply->hlen != call->hlen)
 			  continue;
 
 			if (bcmp(reply->chaddr,call->chaddr,call->hlen))
 			  continue;
 
 			goto gotreply;	/* break two levels */
 
 		} /* while secs */
 	} /* forever send/receive */
 
 	error = ETIMEDOUT;
 	goto out;
 
  gotreply:
  out:
 	soclose(so);
 	return error;
 }
 
 static int 
 bootpc_fakeup_interface(struct ifreq *ireq,struct socket *so,
 			struct proc *procp)
 {
   struct sockaddr_in *sin;
   int error;
   struct sockaddr_in dst;
   struct sockaddr_in gw;
   struct sockaddr_in mask;
 
   /*
    * Bring up the interface.
    *
    * Get the old interface flags and or IFF_UP into them; if
    * IFF_UP set blindly, interface selection can be clobbered.
    */
   error = ifioctl(so, SIOCGIFFLAGS, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_fakeup_interface: GIFFLAGS, error=%d", error);
   ireq->ifr_flags |= IFF_UP;
   error = ifioctl(so, SIOCSIFFLAGS, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_fakeup_interface: SIFFLAGS, error=%d", error);
 
   /*
    * Do enough of ifconfig(8) so that the chosen interface
    * can talk to the servers.  (just set the address)
    */
   
   /* addr is 0.0.0.0 */
   
   sin = (struct sockaddr_in *)&ireq->ifr_addr;
   bzero((caddr_t)sin, sizeof(*sin));
   sin->sin_len = sizeof(*sin);
   sin->sin_family = AF_INET;
   sin->sin_addr.s_addr = INADDR_ANY;
   error = ifioctl(so, SIOCSIFADDR, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_fakeup_interface: set if addr, error=%d", error);
   
   /* netmask is 0.0.0.0 */
   
   sin = (struct sockaddr_in *)&ireq->ifr_addr;
   bzero((caddr_t)sin, sizeof(*sin));
   sin->sin_len = sizeof(*sin);
   sin->sin_family = AF_INET;
   sin->sin_addr.s_addr = INADDR_ANY;
   error = ifioctl(so, SIOCSIFNETMASK, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_fakeup_interface: set if net addr, error=%d", error);
   
   /* Broadcast is 255.255.255.255 */
   
   sin = (struct sockaddr_in *)&ireq->ifr_addr;
   bzero((caddr_t)sin, sizeof(*sin));
   sin->sin_len = sizeof(*sin);
   sin->sin_family = AF_INET;
   sin->sin_addr.s_addr = INADDR_BROADCAST;
   error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_fakeup_interface: set if broadcast addr, error=%d", error);
   
   /* Add default route to 0.0.0.0 so we can send data */
   
   bzero((caddr_t) &dst, sizeof(dst));
   dst.sin_len=sizeof(dst);
   dst.sin_family=AF_INET;
   dst.sin_addr.s_addr = htonl(0);
   
   bzero((caddr_t) &gw, sizeof(gw));
   gw.sin_len=sizeof(gw);
   gw.sin_family=AF_INET;
   gw.sin_addr.s_addr = htonl(0x0);
   
   bzero((caddr_t) &mask, sizeof(mask));
   mask.sin_len=sizeof(mask);
   mask.sin_family=AF_INET;
   mask.sin_addr.s_addr = htonl(0);
   
   error = rtrequest(RTM_ADD, 
 		    (struct sockaddr *) &dst, 
 		    (struct sockaddr *) &gw,
 		    (struct sockaddr *) &mask, 
 		    RTF_UP | RTF_STATIC
 		    , NULL);
   if (error)
     printf("bootpc_fakeup_interface: add default route, error=%d\n", error);
   return error;
 }
 
 static int 
 bootpc_adjust_interface(struct ifreq *ireq,struct socket *so,
 			struct sockaddr_in *myaddr,
 			struct sockaddr_in *netmask,
 			struct sockaddr_in *gw,
 			struct proc *procp)
 {
   int error;
   struct sockaddr_in oldgw;
   struct sockaddr_in olddst;
   struct sockaddr_in oldmask;
   struct sockaddr_in *sin;
 
   /* Remove old default route to 0.0.0.0 */
   
   bzero((caddr_t) &olddst, sizeof(olddst));
   olddst.sin_len=sizeof(olddst);
   olddst.sin_family=AF_INET;
   olddst.sin_addr.s_addr = INADDR_ANY;
   
   bzero((caddr_t) &oldgw, sizeof(oldgw));
   oldgw.sin_len=sizeof(oldgw);
   oldgw.sin_family=AF_INET;
   oldgw.sin_addr.s_addr = INADDR_ANY;
   
   bzero((caddr_t) &oldmask, sizeof(oldmask));
   oldmask.sin_len=sizeof(oldmask);
   oldmask.sin_family=AF_INET;
   oldmask.sin_addr.s_addr = INADDR_ANY;
   
   error = rtrequest(RTM_DELETE, 
 		    (struct sockaddr *) &olddst,
 		    (struct sockaddr *) &oldgw,
 		    (struct sockaddr *) &oldmask, 
 		    (RTF_UP | RTF_STATIC), NULL);
   if (error) {
     printf("nfs_boot: del default route, error=%d\n", error);
     return error;
   }
 
   /*
    * Do enough of ifconfig(8) so that the chosen interface
    * can talk to the servers.  (just set the address)
    */
   bcopy(netmask,&ireq->ifr_addr,sizeof(*netmask));
   error = ifioctl(so, SIOCSIFNETMASK, (caddr_t)ireq, procp);
   if (error)
     panic("nfs_boot: set if netmask, error=%d", error);
 
   /* Broadcast is with host part of IP address all 1's */
   
   sin = (struct sockaddr_in *)&ireq->ifr_addr;
   bzero((caddr_t)sin, sizeof(*sin));
   sin->sin_len = sizeof(*sin);
   sin->sin_family = AF_INET;
   sin->sin_addr.s_addr = myaddr->sin_addr.s_addr | ~ netmask->sin_addr.s_addr;
   error = ifioctl(so, SIOCSIFBRDADDR, (caddr_t)ireq, procp);
   if (error)
     panic("bootpc_call: set if broadcast addr, error=%d", error);
   
   bcopy(myaddr,&ireq->ifr_addr,sizeof(*myaddr));
   error = ifioctl(so, SIOCSIFADDR, (caddr_t)ireq, procp);
   if (error)
     panic("nfs_boot: set if addr, error=%d", error);
 
   /* Add new default route */
 
   error = rtrequest(RTM_ADD, 
 		    (struct sockaddr *) &olddst,
 		    (struct sockaddr *) gw,
 		    (struct sockaddr *) &oldmask,
 		    (RTF_UP | RTF_GATEWAY | RTF_STATIC), NULL);
   if (error) {
     printf("nfs_boot: add net route, error=%d\n", error);
     return error;
   }
 
   return 0;
 }
 
 static int setfs(addr, path, p)
 	struct sockaddr_in *addr;
 	char *path;
 	char *p;
 {
 	unsigned ip = 0;
 	int val;
 
 	if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
 	ip = val << 24;
 	if (*p != '.') return(0);
 	p++;
 	if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
 	ip |= (val << 16);
 	if (*p != '.') return(0);
 	p++;
 	if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
 	ip |= (val << 8);
 	if (*p != '.') return(0);
 	p++;
 	if (((val = getdec(&p)) < 0) || (val > 255)) return(0);
 	ip |= val;
 	if (*p != ':') return(0);
 	p++;
 
 	addr->sin_addr.s_addr = htonl(ip);
 	addr->sin_len = sizeof(struct sockaddr_in);
 	addr->sin_family = AF_INET;
 
 	strncpy(path,p,MNAMELEN-1);
 	return(1);
 }
 
 static int getdec(ptr)
 	char **ptr;
 {
 	char *p = *ptr;
 	int ret=0;
 	if ((*p < '0') || (*p > '9')) return(-1);
 	while ((*p >= '0') && (*p <= '9')) {
 		ret = ret*10 + (*p - '0');
 		p++;
 	}
 	*ptr = p;
 	return(ret);
 }
 
 static char *substr(a,b)
 	char *a,*b;
 {
 	char *loc1;
 	char *loc2;
 
         while (*a != '\0') {
                 loc1 = a;
                 loc2 = b;
                 while (*loc1 == *loc2++) {
                         if (*loc1 == '\0') return (0);
                         loc1++;
                         if (*loc2 == '\0') return (loc1);
                 }
         a++;
         }
         return (0);
 }
 
 static void mountopts(args,p)
 	struct nfs_args *args;
 	char *p;
 {
 	char *tmp;
   
 	args->flags = NFSMNT_RSIZE | NFSMNT_WSIZE | NFSMNT_RESVPORT;
 	args->sotype = SOCK_DGRAM;
 	if ((tmp = (char *)substr(p,"rsize=")))
 		args->rsize=getdec(&tmp);
 	if ((tmp = (char *)substr(p,"wsize=")))
 		args->wsize=getdec(&tmp);
 	if ((tmp = (char *)substr(p,"intr")))
 		args->flags |= NFSMNT_INT;
 	if ((tmp = (char *)substr(p,"soft")))
 		args->flags |= NFSMNT_SOFT;
 	if ((tmp = (char *)substr(p,"noconn")))
 		args->flags |= NFSMNT_NOCONN;
 	if ((tmp = (char *)substr(p, "tcp")))
 	    args->sotype = SOCK_STREAM;
 }
 
 static int xdr_opaque_decode(mptr,buf,len)
      struct mbuf **mptr;
      u_char *buf;
      int len;	
 {
   struct mbuf *m;
   int alignedlen;
 
   m = *mptr;
   alignedlen = ( len + 3 ) & ~3;
 
   if (m->m_len < alignedlen) {
     m = m_pullup(m,alignedlen);
     if (m == NULL) {
       *mptr = NULL;
       return EBADRPC;
     }
   }
   bcopy(mtod(m,u_char *),buf,len);
   m_adj(m,alignedlen);
   *mptr = m;
   return 0;
 }
 
 static int xdr_int_decode(mptr,iptr)
      struct mbuf **mptr;
      int *iptr;
 {
   u_int32_t i;
   if (xdr_opaque_decode(mptr,(u_char *) &i,sizeof(u_int32_t)))
     return EBADRPC;
   *iptr = fxdr_unsigned(u_int32_t,i);
   return 0;
 }
 
 static void printip(char *prefix,struct in_addr addr)
 {
   unsigned int ip;
 
   ip = ntohl(addr.s_addr);
 
   printf("%s is %d.%d.%d.%d\n",prefix,
 	 ip >> 24, (ip >> 16) & 255 ,(ip >> 8) & 255 ,ip & 255 );
 }
 
 void
 bootpc_init(void)
 {
   struct bootp_packet call;
   struct bootp_packet reply;
   static u_int32_t xid = ~0xFF;
   
   struct ifreq ireq;
   struct ifnet *ifp;
   struct socket *so;
   int error;
   int code,ncode,len;
   int i,j;
   char *p;
   unsigned int ip;
 
   struct sockaddr_in myaddr;
   struct sockaddr_in netmask;
   struct sockaddr_in gw;
   int gotgw=0;
   int gotnetmask=0;
   int gotrootpath=0;
   int gotswappath=0;
   char lookup_path[24];
 
 #define EALEN 6
   unsigned char ea[EALEN];
   struct ifaddr *ifa;
   struct sockaddr_dl *sdl = NULL;
   char *delim;
 
   struct nfsv3_diskless *nd = &nfsv3_diskless;
   struct proc *procp = curproc;
 
   /*
    * If already filled in, don't touch it here 
    */
   if (nfs_diskless_valid)
     return;
 
   /*
    * Wait until arp entries can be handled.
    */
   while (time.tv_sec == 0)
 	tsleep(&time, PZERO+8, "arpkludge", 10);
 
   /*
    * Find a network interface.
    */
 #ifdef BOOTP_WIRED_TO
   printf("bootpc_init: wired to interface '%s'\n",
          __XSTRING(BOOTP_WIRED_TO)); 
 #endif
   bzero(&ireq, sizeof(ireq));
   for (ifp = TAILQ_FIRST(&ifnet); ifp != 0; ifp = TAILQ_NEXT(ifp,if_link))
   {
     sprintf(ireq.ifr_name, "%s%d", ifp->if_name, ifp->if_unit);
 #ifdef BOOTP_WIRED_TO
     if (strcmp(ireq.ifr_name, __XSTRING(BOOTP_WIRED_TO)) == 0)
         break;
 #else
     if ((ifp->if_flags &
       (IFF_LOOPBACK|IFF_POINTOPOINT)) == 0)
 	break;
 #endif
   }     
   if (ifp == NULL)
     panic("bootpc_init: no suitable interface");
   strcpy(nd->myif.ifra_name,ireq.ifr_name);
   printf("bootpc_init: using network interface '%s'\n",
 	 ireq.ifr_name);
 
   if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0,procp)) != 0)
     panic("nfs_boot: socreate, error=%d", error);
 	  
   bootpc_fakeup_interface(&ireq,so,procp);
 
   printf("Bootpc testing starting\n");
   
   /* Get HW address */
 
   for (ifa = TAILQ_FIRST(&ifp->if_addrhead) ;ifa; 
        ifa=TAILQ_NEXT(ifa,ifa_link))
     if (ifa->ifa_addr->sa_family == AF_LINK &&
 	(sdl = ((struct sockaddr_dl *) ifa->ifa_addr)) &&
 	sdl->sdl_type == IFT_ETHER)
       break;
   
   if (!sdl)
     panic("bootpc: Unable to find HW address");
   if (sdl->sdl_alen != EALEN ) 
     panic("bootpc: HW address len is %d, expected value is %d",
 	  sdl->sdl_alen,EALEN);
 
   printf("bootpc hw address is ");
   delim="";
   for (j=0;j<sdl->sdl_alen;j++) {
     printf("%s%x",delim,((unsigned char *)LLADDR(sdl))[j]);
     delim=":";
   }
   printf("\n");
 
 #if 0
   bootpboot_p_iflist();
   bootpboot_p_rtlist();
 #endif
   
   bzero((caddr_t) &call, sizeof(call));
 
   /* bootpc part */
   call.op = 1; 			/* BOOTREQUEST */
   call.htype= 1;		/* 10mb ethernet */
   call.hlen=sdl->sdl_alen;	/* Hardware address length */
   call.hops=0;	
   xid++;
   call.xid = txdr_unsigned(xid);
   bcopy(LLADDR(sdl),&call.chaddr,sdl->sdl_alen);
   
   call.vend[0]=99;
   call.vend[1]=130;
   call.vend[2]=83;
   call.vend[3]=99;
   call.vend[4]=255;
   
   call.secs = 0;
   call.flags = htons(0x8000); /* We need an broadcast answer */
   
   error = bootpc_call(&call,&reply,procp);
   
   if (error) {
 #ifdef BOOTP_NFSROOT
     panic("BOOTP call failed");
 #endif
     return;
   }
   
   bzero(&myaddr,sizeof(myaddr));
   bzero(&netmask,sizeof(netmask));
   bzero(&gw,sizeof(gw));
 
   myaddr.sin_len = sizeof(myaddr);
   myaddr.sin_family = AF_INET;
 
   netmask.sin_len = sizeof(netmask);
   netmask.sin_family = AF_INET;
 
   gw.sin_len = sizeof(gw);
   gw.sin_family= AF_INET;
 
   nd->root_args.version = NFS_ARGSVERSION;
   nd->root_args.rsize = 8192;
   nd->root_args.wsize = 8192;
   nd->root_args.sotype = SOCK_DGRAM;
   nd->root_args.flags = (NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_RESVPORT);
 
   nd->swap_saddr.sin_len = sizeof(gw);
   nd->swap_saddr.sin_family = AF_INET;
 
   nd->swap_args.version = NFS_ARGSVERSION;
   nd->swap_args.rsize = 8192;
   nd->swap_args.wsize = 8192;
   nd->swap_args.sotype = SOCK_DGRAM;
   nd->swap_args.flags = (NFSMNT_WSIZE | NFSMNT_RSIZE | NFSMNT_RESVPORT);
   
   myaddr.sin_addr = reply.yiaddr;
 
   ip = ntohl(myaddr.sin_addr.s_addr);
   sprintf(lookup_path,"swap.%d.%d.%d.%d",
 	  ip >> 24, (ip >> 16) & 255 ,(ip >> 8) & 255 ,ip & 255 );
 
   printip("My ip address",myaddr.sin_addr);
 
   printip("Server ip address",reply.siaddr);
 
   gw.sin_addr = reply.giaddr;
   printip("Gateway ip address",reply.giaddr);
 
   if (reply.sname[0])
     printf("Server name is %s\n",reply.sname);
   if (reply.file[0])
     printf("boot file is %s\n",reply.file);
   if (reply.vend[0]==99 && reply.vend[1]==130 &&
       reply.vend[2]==83 && reply.vend[3]==99) {
     j=4;
     ncode = reply.vend[j];
     while (j<sizeof(reply.vend)) {
       code = reply.vend[j] = ncode;
       if (code==255)
 	break;
       if (code==0) {
 	j++;
 	continue;
       }
       len = reply.vend[j+1];
       j+=2;
       if (len+j>=sizeof(reply.vend)) {
 	printf("Truncated field");
 	break;
       }
       ncode = reply.vend[j+len];
       reply.vend[j+len]='\0';
       p = &reply.vend[j];
       switch (code) {
       case 1:
 	if (len!=4) 
 	  panic("bootpc: subnet mask len is %d",len);
 	bcopy(&reply.vend[j],&netmask.sin_addr,4);
 	gotnetmask=1;
 	printip("Subnet mask",netmask.sin_addr);
 	break;
       case 6:	/* Domain Name servers. Unused */
       case 16:	/* Swap server IP address. unused */
       case 2:
 	/* Time offset */
 	break;
       case 3:
 	/* Routers */
 	if (len % 4) 
 	  panic("bootpc: Router Len is %d",len);
 	if (len > 0) {
 	  bcopy(&reply.vend[j],&gw.sin_addr,4);
 	  printip("Router",gw.sin_addr);
 	  gotgw=1;
 	}
 	break;
       case 17:
 	if (setfs(&nd->root_saddr, nd->root_hostnam, p)) {
 	  printf("rootfs is %s\n",p);
 	  gotrootpath=1;
 	} else 
 	  panic("Failed to set rootfs to %s",p);
 	break;
       case 12:
 	if (len>=MAXHOSTNAMELEN)
 	  panic("bootpc: hostname  >=%d bytes",MAXHOSTNAMELEN);
 	strncpy(nd->my_hostnam,&reply.vend[j],len);
 	nd->my_hostnam[len]=0;
 	strncpy(hostname,&reply.vend[j],len);
 	hostname[len]=0;
 	printf("Hostname is %s\n",hostname);
 	break;
       case 128:
 	if (setfs(&nd->swap_saddr, nd->swap_hostnam, p)) {
 	  gotswappath=1;
 	  printf("swapfs is %s\n",p);
 	} else
 	  panic("Failed to set swapfs to %s",p);
 	break;
       case 129:
 	{
 	  int swaplen;
 	  if (len!=4) 
 	    panic("bootpc: Expected 4 bytes for swaplen, not %d bytes",len);
 	  bcopy(&reply.vend[j],&swaplen,4);
 	  nd->swap_nblks = ntohl(swaplen);
 	  printf("bootpc: Swap size is %d KB\n",nd->swap_nblks);
 	}
 	break;
       case 130:	/* root mount options */
 	mountopts(&nd->root_args,p);
 	break;
       case 131:	/* swap mount options */
 	mountopts(&nd->swap_args,p);
 	break;
       default:
 	printf("Ignoring field type %d\n",code);
       }
       j+=len;
     }
   }
 
   if (!gotswappath)
     nd->swap_nblks = 0;
 #ifdef BOOTP_NFSROOT
   if (!gotrootpath)
     panic("bootpc: No root path offered");
 #endif
 
   if (!gotnetmask) {
     if (IN_CLASSA(ntohl(myaddr.sin_addr.s_addr)))
       netmask.sin_addr.s_addr = htonl(IN_CLASSA_NET);
     else if (IN_CLASSB(ntohl(myaddr.sin_addr.s_addr)))
       netmask.sin_addr.s_addr = htonl(IN_CLASSB_NET);
     else 
       netmask.sin_addr.s_addr = htonl(IN_CLASSC_NET);
   }
   if (!gotgw) {
     /* Use proxyarp */
     gw.sin_addr.s_addr = myaddr.sin_addr.s_addr;
   }
   
 #if 0
   bootpboot_p_iflist();
   bootpboot_p_rtlist();
 #endif
   error = bootpc_adjust_interface(&ireq,so,
 				  &myaddr,&netmask,&gw,procp);
   
   soclose(so);
 
 #if 0
   bootpboot_p_iflist();
   bootpboot_p_rtlist();
 #endif
 
   if (gotrootpath) {
 
     error = md_mount(&nd->root_saddr, nd->root_hostnam, 
 		     nd->root_fh, &nd->root_fhsize,
 		     &nd->root_args,procp);
     if (error)
       panic("nfs_boot: mountd root, error=%d", error);
     
     if (gotswappath) {
 
       error = md_mount(&nd->swap_saddr, 
 		       nd->swap_hostnam,
 		       nd->swap_fh, &nd->swap_fhsize,&nd->swap_args,procp);
       if (error)
 	panic("nfs_boot: mountd swap, error=%d", error);
       
       error = md_lookup_swap(&nd->swap_saddr,lookup_path,nd->swap_fh, 
 			     &nd->swap_fhsize, &nd->swap_args,procp);
       if (error)
 	panic("nfs_boot: lookup swap, error=%d", error);
     }
     nfs_diskless_valid = 3;
   }
 
 
   bcopy(&myaddr,&nd->myif.ifra_addr,sizeof(myaddr));
   bcopy(&myaddr,&nd->myif.ifra_broadaddr,sizeof(myaddr));
   ((struct sockaddr_in *) &nd->myif.ifra_broadaddr)->sin_addr.s_addr = 
     myaddr.sin_addr.s_addr | ~ netmask.sin_addr.s_addr;
   bcopy(&netmask,&nd->myif.ifra_mask,sizeof(netmask));
 
 #if 0
   bootpboot_p_iflist();
   bootpboot_p_rtlist();
 #endif
   return;
 }
 
 /*
  * RPC: mountd/mount
  * Given a server pathname, get an NFS file handle.
  * Also, sets sin->sin_port to the NFS service port.
  */
 static int
 md_mount(mdsin, path, fhp, fhsizep, args, procp)
 	struct sockaddr_in *mdsin;		/* mountd server address */
 	char *path;
 	u_char *fhp;
 	int *fhsizep;
 	struct nfs_args *args;
 	struct proc *procp;
 {
 	struct mbuf *m;
 	int error;
 	int authunixok;
 	int authcount;
 	int authver;
 
 #ifdef BOOTP_NFSV3
 	/* First try NFS v3 */
 	/* Get port number for MOUNTD. */
 	error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER3,
 						 &mdsin->sin_port, procp);
 	if (!error) {
 	  m = xdr_string_encode(path, strlen(path));
 	  
 	  /* Do RPC to mountd. */
 	  error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER3,
 			    RPCMNT_MOUNT, &m, NULL, curproc);
 	}
 	if (!error) {
 	  args->flags |= NFSMNT_NFSV3;
 	} else {
 #endif
 	  /* Fallback to NFS v2 */
 	  
 	  /* Get port number for MOUNTD. */
 	  error = krpc_portmap(mdsin, RPCPROG_MNT, RPCMNT_VER1,
 			       &mdsin->sin_port, procp);
 	  if (error) return error;
 	  
 	  m = xdr_string_encode(path, strlen(path));
 	  
 	  /* Do RPC to mountd. */
 	  error = krpc_call(mdsin, RPCPROG_MNT, RPCMNT_VER1,
 			    RPCMNT_MOUNT, &m, NULL, curproc);
 	  if (error)
 	    return error;	/* message already freed */
 
 #ifdef BOOTP_NFSV3
 	}
 #endif
 
 	if (xdr_int_decode(&m,&error) || error)
 	  goto bad;
 
 	if (args->flags & NFSMNT_NFSV3) {
 	  if (xdr_int_decode(&m,fhsizep) ||
 	      *fhsizep > NFSX_V3FHMAX || *fhsizep <= 0 ) 
 	    goto bad;
 	} else 
 	  *fhsizep = NFSX_V2FH;
 
 	if (xdr_opaque_decode(&m,fhp,*fhsizep))
 	  goto bad;
 
 	if (args->flags & NFSMNT_NFSV3) {
 	  if (xdr_int_decode(&m,&authcount))
 	    goto bad;
 	  authunixok = 0;
 	  if (authcount<0 || authcount>100)
 	    goto bad;
 	  while (authcount>0) {
 	    if (xdr_int_decode(&m,&authver))
 	      goto bad;
 	    if (authver == RPCAUTH_UNIX)
 	      authunixok = 1;
 	    authcount--;
 	  }
 	  if (!authunixok)
 	    goto bad;
 	}
 	  
 	/* Set port number for NFS use. */
 	error = krpc_portmap(mdsin, NFS_PROG, 
 			     (args->flags & NFSMNT_NFSV3)?NFS_VER3:NFS_VER2,
 			     &mdsin->sin_port, procp);
 
 	goto out;
 
 bad:
 	error = EBADRPC;
 
 out:
 	m_freem(m);
 	return error;
 }
 
 
 static int md_lookup_swap(mdsin, path, fhp, fhsizep, args, procp)
 	struct sockaddr_in *mdsin;		/* mountd server address */
 	char *path;
 	u_char *fhp;
 	int *fhsizep;
 	struct nfs_args *args;
 	struct proc *procp;
 {
 	struct mbuf *m;
 	int error;
 	int size = -1;
 	int attribs_present;
 	int status;
 	union {
 	  u_int32_t v2[17];
 	  u_int32_t v3[21];
 	} fattribs;
 
 	m = m_get(M_WAIT,MT_DATA);
 	if (!m)
 	  	return ENOBUFS;
 
 	if (args->flags & NFSMNT_NFSV3) {
 	  *mtod(m,u_int32_t *) = txdr_unsigned(*fhsizep);
 	  bcopy(fhp,mtod(m,u_char *)+sizeof(u_int32_t),*fhsizep);
 	  m->m_len = *fhsizep + sizeof(u_int32_t);
 	} else {
 	  bcopy(fhp,mtod(m,u_char *),NFSX_V2FH);
 	  m->m_len = NFSX_V2FH;
 	}
 	
 	m->m_next = xdr_string_encode(path, strlen(path));
 	if (!m->m_next) {
 	  error = ENOBUFS;
 	  goto out;
 	}
 
 	/* Do RPC to nfsd. */
 	if (args->flags & NFSMNT_NFSV3)
 	  error = krpc_call(mdsin, NFS_PROG, NFS_VER3,
 			    NFSPROC_LOOKUP, &m, NULL, procp);
 	else 
 	  error = krpc_call(mdsin, NFS_PROG, NFS_VER2,
 			    NFSV2PROC_LOOKUP, &m, NULL, procp);
 	if (error)
 	  return error;	/* message already freed */
 
 	if (xdr_int_decode(&m,&status))
 	  goto bad;
 	if (status) {
 	  error = ENOENT;
 	  goto out;
 	}
 	
 	if (args->flags & NFSMNT_NFSV3) {
 	  if (xdr_int_decode(&m,fhsizep) ||
 	      *fhsizep > NFSX_V3FHMAX || *fhsizep <= 0 ) 
 	    goto bad;
 	} else
 	  *fhsizep = NFSX_V2FH;
 	
 	if (xdr_opaque_decode(&m, fhp, *fhsizep))
 	  goto bad;
 
 	if (args->flags & NFSMNT_NFSV3) {
 	  if (xdr_int_decode(&m,&attribs_present))
 	    goto bad;
 	  if (attribs_present) {
 	    if (xdr_opaque_decode(&m,(u_char *) &fattribs.v3,
 				  sizeof(u_int32_t)*21))
 	      goto bad;
 	    size = fxdr_unsigned(u_int32_t, fattribs.v3[6]);
 	  }
 	} else {
   	  if (xdr_opaque_decode(&m,(u_char *) &fattribs.v2,
 				sizeof(u_int32_t)*17))
 	    goto bad;
 	  size = fxdr_unsigned(u_int32_t, fattribs.v2[5]);
 	}
 	  
 	if (!nfsv3_diskless.swap_nblks && size!= -1) {
 	  nfsv3_diskless.swap_nblks = size/1024;
 	  printf("md_lookup_swap: Swap size is %d KB\n",
 		 nfsv3_diskless.swap_nblks);
 	}
 	
 	goto out;
 
 bad:
 	error = EBADRPC;
 
 out:
 	m_freem(m);
 	return error;
 }
diff --git a/sys/nfsclient/krpc_subr.c b/sys/nfsclient/krpc_subr.c
index be5384342e53..f1dfa962abab 100644
--- a/sys/nfsclient/krpc_subr.c
+++ b/sys/nfsclient/krpc_subr.c
@@ -1,486 +1,487 @@
 /*	$NetBSD: krpc_subr.c,v 1.12.4.1 1996/06/07 00:52:26 cgd Exp $	*/
-/*	$Id: krpc_subr.c,v 1.7 1997/10/28 15:59:03 bde Exp $	*/
+/*	$Id: krpc_subr.c,v 1.8 1998/03/14 03:25:16 tegge Exp $	*/
 
 /*
  * Copyright (c) 1995 Gordon Ross, Adam Glass
  * Copyright (c) 1992 Regents of the University of California.
  * All rights reserved.
  *
  * This software was developed by the Computer Systems Engineering group
  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
  * contributed to Berkeley.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Lawrence Berkeley Laboratory and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  * partially based on:
  *      libnetboot/rpc.c
  *               @(#) Header: rpc.c,v 1.12 93/09/28 08:31:56 leres Exp  (LBL)
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/mbuf.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
+#include <sys/uio.h>
 
 #include <net/if.h>
 #include <netinet/in.h>
 
 #include <nfs/rpcv2.h>
 #include <nfs/krpc.h>
 #include <nfs/xdr_subs.h>
 
 /*
  * Kernel support for Sun RPC
  *
  * Used currently for bootstrapping in nfs diskless configurations.
  */
 
 /*
  * Generic RPC headers
  */
 
 struct auth_info {
 	u_int32_t 	authtype;	/* auth type */
 	u_int32_t	authlen;	/* auth length */
 };
 
 struct auth_unix {
 	int32_t   ua_time;
 	int32_t   ua_hostname;	/* null */
 	int32_t   ua_uid;
 	int32_t   ua_gid;
 	int32_t   ua_gidlist;	/* null */
 };
 
 struct rpc_call {
 	u_int32_t	rp_xid;		/* request transaction id */
 	int32_t 	rp_direction;	/* call direction (0) */
 	u_int32_t	rp_rpcvers;	/* rpc version (2) */
 	u_int32_t	rp_prog;	/* program */
 	u_int32_t	rp_vers;	/* version */
 	u_int32_t	rp_proc;	/* procedure */
 	struct	auth_info rpc_auth;
 	struct	auth_unix rpc_unix;
 	struct	auth_info rpc_verf;
 };
 
 struct rpc_reply {
 	u_int32_t rp_xid;		/* request transaction id */
 	int32_t  rp_direction;		/* call direction (1) */
 	int32_t  rp_astatus;		/* accept status (0: accepted) */
 	union {
 		u_int32_t rpu_errno;
 		struct {
 			struct auth_info rok_auth;
 			u_int32_t	rok_status;
 		} rpu_rok;
 	} rp_u;
 };
 #define rp_errno  rp_u.rpu_errno
 #define rp_auth   rp_u.rpu_rok.rok_auth
 #define rp_status rp_u.rpu_rok.rok_status
 
 #define MIN_REPLY_HDR 16	/* xid, dir, astat, errno */
 
 /*
  * What is the longest we will wait before re-sending a request?
  * Note this is also the frequency of "RPC timeout" messages.
  * The re-send loop count sup linearly to this maximum, so the
  * first complaint will happen after (1+2+3+4+5)=15 seconds.
  */
 #define	MAX_RESEND_DELAY 5	/* seconds */
 
 /*
  * Call portmap to lookup a port number for a particular rpc program
  * Returns non-zero error on failure.
  */
 int
 krpc_portmap(sin,  prog, vers, portp, procp)
 	struct sockaddr_in *sin;		/* server address */
 	u_int prog, vers;	/* host order */
 	u_int16_t *portp;	/* network order */
 	struct proc *procp;
 {
 	struct sdata {
 		u_int32_t prog;		/* call program */
 		u_int32_t vers;		/* call version */
 		u_int32_t proto;	/* call protocol */
 		u_int32_t port;		/* call port (unused) */
 	} *sdata;
 	struct rdata {
 		u_int16_t pad;
 		u_int16_t port;
 	} *rdata;
 	struct mbuf *m;
 	int error;
 
 	/* The portmapper port is fixed. */
 	if (prog == PMAPPROG) {
 		*portp = htons(PMAPPORT);
 		return 0;
 	}
 
 	m = m_get(M_WAIT, MT_DATA);
 	if (m == NULL)
 		return ENOBUFS;
 	sdata = mtod(m, struct sdata *);
 	m->m_len = sizeof(*sdata);
 
 	/* Do the RPC to get it. */
 	sdata->prog = txdr_unsigned(prog);
 	sdata->vers = txdr_unsigned(vers);
 	sdata->proto = txdr_unsigned(IPPROTO_UDP);
 	sdata->port = 0;
 
 	sin->sin_port = htons(PMAPPORT);
 	error = krpc_call(sin, PMAPPROG, PMAPVERS,
 					  PMAPPROC_GETPORT, &m, NULL, procp);
 	if (error) 
 		return error;
 
 	if (m->m_len < sizeof(*rdata)) {
 		m = m_pullup(m, sizeof(*rdata));
 		if (m == NULL)
 			return ENOBUFS;
 	}
 	rdata = mtod(m, struct rdata *);
 	*portp = rdata->port;
 
 	m_freem(m);
 	return 0;
 }
 
 /*
  * Do a remote procedure call (RPC) and wait for its reply.
  * If from_p is non-null, then we are doing broadcast, and
  * the address from whence the response came is saved there.
  */
 int
 krpc_call(sa, prog, vers, func, data, from_p, procp)
 	struct sockaddr_in *sa;
 	u_int prog, vers, func;
 	struct mbuf **data;	/* input/output */
 	struct sockaddr **from_p;	/* output */
 	struct proc *procp;
 {
 	struct socket *so;
 	struct sockaddr_in *sin, ssin;
 	struct sockaddr *from;
 	struct mbuf *m, *nam, *mhead;
 	struct rpc_call *call;
 	struct rpc_reply *reply;
 	struct uio auio;
 	int error, rcvflg, timo, secs, len;
 	static u_int32_t xid = ~0xFF;
 	u_int16_t tport;
 
 	/*
 	 * Validate address family.
 	 * Sorry, this is INET specific...
 	 */
 	if (sa->sin_family != AF_INET)
 		return (EAFNOSUPPORT);
 
 	/* Free at end if not null. */
 	nam = mhead = NULL;
 	from = NULL;
 
 	/*
 	 * Create socket and set its recieve timeout.
 	 */
 	if ((error = socreate(AF_INET, &so, SOCK_DGRAM, 0, procp)))
 		goto out;
 
 	m = m_get(M_WAIT, MT_SOOPTS);
 	if (m == NULL) {
 		error = ENOBUFS;
 		goto out;
 	} else {
 		struct timeval *tv;
 		tv = mtod(m, struct timeval *);
 		m->m_len = sizeof(*tv);
 		tv->tv_sec = 1;
 		tv->tv_usec = 0;
 		if ((error = sosetopt(so, SOL_SOCKET, SO_RCVTIMEO, m, procp)))
 			goto out;
 	}
 
 	/*
 	 * Enable broadcast if necessary.
 	 */
 	if (from_p) {
 		int32_t *on;
 		m = m_get(M_WAIT, MT_SOOPTS);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto out;
 		}
 		on = mtod(m, int32_t *);
 		m->m_len = sizeof(*on);
 		*on = 1;
 		if ((error = sosetopt(so, SOL_SOCKET, SO_BROADCAST, m, procp)))
 			goto out;
 	}
 
 	/*
 	 * Bind the local endpoint to a reserved port,
 	 * because some NFS servers refuse requests from
 	 * non-reserved (non-privileged) ports.
 	 */
 	sin = &ssin;
 	bzero(sin, sizeof *sin);
 	sin->sin_len = sizeof(*sin);
 	sin->sin_family = AF_INET;
 	sin->sin_addr.s_addr = INADDR_ANY;
 	tport = IPPORT_RESERVED;
 	do {
 		tport--;
 		sin->sin_port = htons(tport);
 		error = sobind(so, (struct sockaddr *)sin, procp);
 	} while (error == EADDRINUSE &&
 			 tport > IPPORT_RESERVED / 2);
 	if (error) {
 		printf("bind failed\n");
 		goto out;
 	}
 
 	/*
 	 * Setup socket address for the server.
 	 */
 
 	/*
 	 * Prepend RPC message header.
 	 */
 	mhead = m_gethdr(M_WAIT, MT_DATA);
 	mhead->m_next = *data;
 	call = mtod(mhead, struct rpc_call *);
 	mhead->m_len = sizeof(*call);
 	bzero((caddr_t)call, sizeof(*call));
 	/* rpc_call part */
 	xid++;
 	call->rp_xid = txdr_unsigned(xid);
 	/* call->rp_direction = 0; */
 	call->rp_rpcvers = txdr_unsigned(2);
 	call->rp_prog = txdr_unsigned(prog);
 	call->rp_vers = txdr_unsigned(vers);
 	call->rp_proc = txdr_unsigned(func);
 	/* rpc_auth part (auth_unix as root) */
 	call->rpc_auth.authtype = txdr_unsigned(RPCAUTH_UNIX);
 	call->rpc_auth.authlen  = txdr_unsigned(sizeof(struct auth_unix));
 	/* rpc_verf part (auth_null) */
 	call->rpc_verf.authtype = 0;
 	call->rpc_verf.authlen  = 0;
 
 	/*
 	 * Setup packet header
 	 */
 	len = 0;
 	m = mhead;
 	while (m) {
 		len += m->m_len;
 		m = m->m_next;
 	}
 	mhead->m_pkthdr.len = len;
 	mhead->m_pkthdr.rcvif = NULL;
 
 	/*
 	 * Send it, repeatedly, until a reply is received,
 	 * but delay each re-send by an increasing amount.
 	 * If the delay hits the maximum, start complaining.
 	 */
 	timo = 0;
 	for (;;) {
 		/* Send RPC request (or re-send). */
 		m = m_copym(mhead, 0, M_COPYALL, M_WAIT);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto out;
 		}
 		error = sosend(so, (struct sockaddr *)sa, NULL, m,
 			       NULL, 0, 0);
 		if (error) {
 			printf("krpc_call: sosend: %d\n", error);
 			goto out;
 		}
 		m = NULL;
 
 		/* Determine new timeout. */
 		if (timo < MAX_RESEND_DELAY)
 			timo++;
 		else
 			printf("RPC timeout for server 0x%x\n",
 			       ntohl(sa->sin_addr.s_addr));
 
 		/*
 		 * Wait for up to timo seconds for a reply.
 		 * The socket receive timeout was set to 1 second.
 		 */
 		secs = timo;
 		while (secs > 0) {
 			if (from) {
 				FREE(from, M_SONAME);
 				from = NULL;
 			}
 			if (m) {
 				m_freem(m);
 				m = NULL;
 			}
 			bzero(&auio,sizeof(auio));
 			auio.uio_resid = len = 1<<16;
 			rcvflg = 0;
 			error = soreceive(so, &from, &auio, &m, NULL, &rcvflg);
 			if (error == EWOULDBLOCK) {
 				secs--;
 				continue;
 			}
 			if (error)
 				goto out;
 			len -= auio.uio_resid;
 
 			/* Does the reply contain at least a header? */
 			if (len < MIN_REPLY_HDR)
 				continue;
 			if (m->m_len < MIN_REPLY_HDR)
 				continue;
 			reply = mtod(m, struct rpc_reply *);
 
 			/* Is it the right reply? */
 			if (reply->rp_direction != txdr_unsigned(RPC_REPLY))
 				continue;
 
 			if (reply->rp_xid != txdr_unsigned(xid))
 				continue;
 
 			/* Was RPC accepted? (authorization OK) */
 			if (reply->rp_astatus != 0) {
 				error = fxdr_unsigned(u_int32_t, reply->rp_errno);
 				printf("rpc denied, error=%d\n", error);
 				continue;
 			}
 
 			/* Did the call succeed? */
 			if (reply->rp_status != 0) {
 				error = fxdr_unsigned(u_int32_t, reply->rp_status);
 				if (error == RPC_PROGMISMATCH) {
 				  error = EBADRPC;
 				  goto out;
 				}
 				printf("rpc denied, status=%d\n", error);
 				continue;
 			}
 
 			goto gotreply;	/* break two levels */
 
 		} /* while secs */
 	} /* forever send/receive */
 
 	error = ETIMEDOUT;
 	goto out;
 
  gotreply:
 
 	/*
 	 * Get RPC reply header into first mbuf,
 	 * get its length, then strip it off.
 	 */
 	len = sizeof(*reply);
 	if (m->m_len < len) {
 		m = m_pullup(m, len);
 		if (m == NULL) {
 			error = ENOBUFS;
 			goto out;
 		}
 	}
 	reply = mtod(m, struct rpc_reply *);
 	if (reply->rp_auth.authtype != 0) {
 		len += fxdr_unsigned(u_int32_t, reply->rp_auth.authlen);
 		len = (len + 3) & ~3; /* XXX? */
 	}
 	m_adj(m, len);
 
 	/* result */
 	*data = m;
 	if (from_p) {
 		*from_p = from;
 		from = NULL;
 	}
 
  out:
 	if (mhead) m_freem(mhead);
 	if (from) free(from, M_SONAME);
 	soclose(so);
 	return error;
 }
 
 /*
  * eXternal Data Representation routines.
  * (but with non-standard args...)
  */
 
 /*
  * String representation for RPC.
  */
 struct xdr_string {
 	u_int32_t len;		/* length without null or padding */
 	char data[4];	/* data (longer, of course) */
     /* data is padded to a long-word boundary */
 };
 
 struct mbuf *
 xdr_string_encode(str, len)
 	char *str;
 	int len;
 {
 	struct mbuf *m;
 	struct xdr_string *xs;
 	int dlen;	/* padded string length */
 	int mlen;	/* message length */
 
 	dlen = (len + 3) & ~3;
 	mlen = dlen + 4;
 
 	if (mlen > MCLBYTES)		/* If too big, we just can't do it. */
 		return (NULL);
 
 	m = m_get(M_WAIT, MT_DATA);
 	if (mlen > MLEN) {
 		MCLGET(m, M_WAIT);
 		if ((m->m_flags & M_EXT) == 0) {
 			(void) m_free(m);	/* There can be only one. */
 			return (NULL);
 		}
 	}
 	xs = mtod(m, struct xdr_string *);
 	m->m_len = mlen;
 	xs->len = txdr_unsigned(len);
 	bcopy(str, xs->data, len);
 	return (m);
 }
diff --git a/sys/pci/meteor.c b/sys/pci/meteor.c
index ac24de7a68be..f836d14b8b35 100644
--- a/sys/pci/meteor.c
+++ b/sys/pci/meteor.c
@@ -1,2093 +1,2095 @@
 /*
  * Copyright (c) 1995 Mark Tinguely and Jim Lowe
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by Mark Tinguely and Jim Lowe
  * 4. The name of the author may not be used to endorse or promote products 
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
 /*		Change History:
 	8/21/95		Release
 	8/23/95		On advice from Stefan Esser, added volatile to PCI
 			memory pointers to remove PCI caching .
 	8/29/95		Fixes suggested by Bruce Evans.
 			meteor_mmap should return -1 on error rather than 0.
 			unit # > NMETEOR should be unit # >= NMETEOR.
 	10/24/95	Turn 50 Hz processing for SECAM and 60 Hz processing
 			off for AUTOMODE.
 	11/11/95	Change UV from always begin signed to ioctl selected
 			to either signed or unsigned.
 	12/07/95	Changed 7196 startup codes for 50 Hz as recommended
 			by Luigi Rizzo (luigi@iet.unipi.it)
 	12/08/95	Clear SECAM bit in PAL/NTSC and set input field count
 			bits for 50 Hz mode (PAL/SECAM) before I was setting the
 			output count bits. by Luigi Rizzo (luigi@iet.unipi.it)
 	12/18/95	Correct odd DMA field (never exceed, but good for safety
 			Changed 7196 startup codes for 50 Hz as recommended
 			by Luigi Rizzo (luigi@iet.unipi.it)
 	12/19/95	Changed field toggle mode to enable (offset 0x3c)
 			recommended by luigi@iet.unipi.it
 			Added in prototyping, include file, staticizing,
 			and DEVFS changes from FreeBSD team.
 			Changed the default allocated pages from 151 (NTSC)
 			to 217 (PAL).
 			Cleaned up some old comments in iic_write().
 			Added a Field (even or odd) only capture mode to 
 			eliminate the high frequency problems with compression
 			algorithms.  Recommended by luigi@iet.unipi.it.
 			Changed geometry ioctl so if it couldn't allocated a
 			large enough contiguous space, it wouldn't free the
 			stuff it already had.
 			Added new mode called YUV_422 which delivers the
 			data in planer Y followed by U followed by V. This
 			differs from the standard YUV_PACKED mode in that
 			the chrominance (UV) data is in the correct (different)
 			order. This is for programs like vic and mpeg_encode
 			so they don't have to reorder the chrominance data.
 			Added field count to stats.
 			Increment frame count stat if capturing continuous on
 			even frame grabs.
 			Added my email address to these comments
 			(james@cs.uwm.edu) suggested by (luigi@iet.unipt.it :-).
 			Changed the user mode signal mechanism to allow the
 			user program to be interrupted at the end of a frame
 			in any one of the modes.  Added SSIGNAL ioctl.
 			Added a SFPS/GFPS ioctl so one may set the frames per
 			second that the card catpures.  This code needs to be
 			completed.
 			Changed the interrupt routine so synchronous capture
 			will work on fields or frames and the starting frame
 			can be either even or odd.
 			Added HALT_N_FRAMES and CONT_N_FRAMES so one could
 			stop and continue synchronous capture mode.
 			Change the tsleep/wakeup function to wait on mtr
 			rather than &read_intr_wait.
 	1/22/96		Add option (METEOR_FreeBSD_210) for FreeBSD 2.1
 			to compile.
 			Changed intr so it only printed errors every 50 times.
 			Added unit number to error messages.
 			Added get_meteor_mem and enabled range checking.
 	1/30/96		Added prelim test stuff for direct video dma transfers
 			from Amancio Hasty (hasty@rah.star-gate.com).  Until
 			we get some stuff sorted out, this will be ifdef'ed
 			with METEOR_DIRECT_VIDEO.  This is very dangerous to
 			use at present since we don't check the address that
 			is passed by the user!!!!!
 	2/26/96		Added special SVIDEO input device type.
 	2/27/96		Added meteor_reg.h file and associate types Converted
 			meteor.c over to using meteor.h file.  Prompted by
 			Lars Jonas Olsson <ljo@po.cwru.edu>.
 	2/28/96		Added meteor RGB code from Lars Jonas Olsson
 			<ljo@po.cwru.edu>.  I make some mods to this code, so
 			I hope it still works as I don't have an rgb card to
 			test with.
 	2/29/96		<ljo@po.cwru.edu> tested the meteor RGB and supplied
 			me with diffs.  Thanks, we now have a working RGB
 			version of the driver.  Still need to clean up this
 			code.
 	3/1/96		Fixed a nasty little bug that was clearing the VTR
 			mode bit when the 7196 status was requested.
 	3/15/96		Fixed bug introduced in previous version that
 			stopped the only fields mode from working.
 			Added METEOR{GS}TS ioctl, still needs work.
 	3/25/96		Added YUV_9 and YUV_12 modes.  Cleaned up some of the
 			code and converted variables to use the new register
 			types.
 	4/8/96		Fixed the a bug in with the range enable.  Pointed
 			out by Jim Bray.
 	5/13/96		Fix the FPS ioctl so it actually sets the frames
 			per second.  Code supplied by ian@robots.ox.ac.uk.
 			The new code implements a new define:
 			METEOR_SYSTEM_DEFAULT  which should be defined as
 			METEOR_PAL, METEOR_SECAM, or METEOR_NTSC in your system
 			configuration file.  If METEOR_SYSTEM_DEFAULT isn't
 			defined, and there is not a signal when set_fps is
 			called, then the call has no effect.
 			Changed the spelling of PLANER to PLANAR as pointed
 			out by Paco Hope <paco@cs.virigina.edu> and define
 			PLANER to be PLANAR for backward compatibility.
 	5/28/95		METEOR_INPUT_DEV_RCA -> METEOR_INPUT_DEV0, not
 			METEOR_GEO_DEV0.  Pointed out by Ian Reid,
 			<ian@robots.ox.ac.uk>.
 			METEOR_DEV_MASK should be 0x0000f000 and not 
 			0x2000f000, otherwise METEOR_RGB gets masked
 			out.  Pointed out by Ian Reid.
 			Changed the fps code to give even distribution for
 			low frame rates.  Code supplied by Ian Reid.
 			Fix some problems with the RGB version.  Patch supplied
 			by <ljo@po.cwru.edu>.
 			Added METEOR_FIELD_MODE to include files for a 
 			future version of this driver.
 */
 
 #include "meteor.h"
 
 #if NMETEOR > 0
 #include "opt_devfs.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/conf.h>
 #include <sys/kernel.h>
 #include <sys/signalvar.h>
 #include <sys/mman.h>
 #ifdef DEVFS
 #include <sys/devfsext.h>
 #endif /* DEVFS */
+#include <sys/uio.h>
+
 #if defined(METEOR_FreeBSD_210)
 #include <machine/cpu.h>	/* bootverbose */
 #endif
 
 #include <vm/vm.h>
 #include <vm/vm_kern.h>
 #include <vm/pmap.h>
 #include <vm/vm_extern.h>
 
 #include "pci.h"
 #if NPCI > 0
 #include <pci/pcivar.h>
 #include <pci/pcireg.h>
 #endif
 #include <machine/ioctl_meteor.h>
 #include <pci/meteor_reg.h>
 
 
 static void meteor_intr __P((void *arg));
 
 /* 
  * Allocate enough memory for:
  *	768x576 RGB 16 or YUV (16 storage bits/pixel) = 884736 = 216 pages
  *
  * You may override this using the options "METEOR_ALLOC_PAGES=value" in your
  * kernel configuration file.
  */
 #ifndef METEOR_ALLOC_PAGES
 #define METEOR_ALLOC_PAGES 217
 #endif
 #define METEOR_ALLOC (METEOR_ALLOC_PAGES * PAGE_SIZE)
 
 static meteor_reg_t meteor[NMETEOR];
 #define METEOR_NUM(mtr)	((mtr - &meteor[0])/sizeof(meteor_reg_t))
 
 #define METPRI (PZERO+8)|PCATCH
 
 static	char*	met_probe (pcici_t tag, pcidi_t type);
 static	void	met_attach(pcici_t tag, int unit);
 static	u_long	met_count;
 
 static struct	pci_device met_device = {
 	"meteor",
 	met_probe,
 	met_attach,
 	&met_count
 };
 
 DATA_SET (pcidevice_set, met_device);
 
 #if defined(METEOR_FreeBSD_210)	/* XXX */
 d_open_t	meteor_open;
 d_close_t	meteor_close;
 d_read_t	meteor_read;
 d_write_t	meteor_write;
 d_ioctl_t	meteor_ioctl;
 d_mmap_t	meteor_mmap;
 #else
 static	d_open_t	meteor_open;
 static	d_close_t	meteor_close;
 static	d_read_t	meteor_read;
 static	d_write_t	meteor_write;
 static	d_ioctl_t	meteor_ioctl;
 static	d_mmap_t	meteor_mmap;
 
 #define CDEV_MAJOR 67
 static struct cdevsw meteor_cdevsw = 
         { meteor_open,  meteor_close,   meteor_read,    meteor_write,   /*67*/
           meteor_ioctl, nostop,         nullreset,   nodevtotty,/* Meteor */
           seltrue,	meteor_mmap, NULL,	"meteor",	NULL,	-1 };
 #endif
 
 static mreg_t saa7116_pci_default[sizeof(struct saa7116_regs)/sizeof(mreg_t)]={
 				/* PCI Memory registers	    	*/
 				/* BITS	  Type	Description	*/
 /* 0x00 */	0x00000000,	/* 31:1   e*RW	DMA 1 (Even)
 				      0   RO    0x0 		*/
 /* 0x04 */	0x00000000,	/* 31:2   e*RW	DMA 2 (Even)
 				    1:0   RO	0x0		*/
 /* 0x08 */	0x00000000,	/* 31:2   e*RW  DMA 3 (Even)
 				    1:0   RO    0x0		*/
 /* 0x0c */	0x00000000,	/* 31:1   o*RW	DMA 1 (Odd)
 				      0   RO	0x0		*/
 /* 0x10 */	0x00000000,	/* 31:2	  o*RW	DMA 2 (Odd)
 				    1:0	  RO	0x0		*/
 /* 0x14 */	0x00000000,	/* 31:2   o*RW	DMA 3 (Odd)
 				    1:0   RO	0x0		*/
 /* 0x18 */	0x00000500,	/* 15:2   e*RW  Stride 1 (Even)
 				    1:0   RO	0x0		*/
 /* 0x1c */	0x00000000,	/* 15:2	  e*RW	Stride 2 (Even)
 				    1:0	  RO	0x0		*/
 /* 0x20 */	0x00000000,	/* 15:2	  e*RW	Stride 3 (Even)
 				    1:0	  RO	0x0		*/
 /* 0x24 */	0x00000500,	/* 15:2	  o*RW	Stride 1 (Odd)
 				    1:0	  RO	0x0		*/
 /* 0x28 */	0x00000000,	/* 15:2	  o*RW	Stride 2 (Odd)
 				    1:0	  RO	0x0		*/
 /* 0x2c */	0x00000000,	/* 15:2	  o*RW	Stride 3 (Odd)
 				    1:0	  RO	0x0		*/
 /* 0x30 */	0xeeeeee01,	/* 31:8	  *RW	Route (Even)
 				    7:0	  *RW	Mode (Even)	*/
 /* 0x34 */	0xeeeeee01,	/* 31:8	  *RW	Route (Odd)
 				    7:0	  *RW	Mode (Odd)	*/
 /* 0x38 */	0x00200020, 	/* 22:16  *RW	FIFO Trigger Planer Mode,
 				    6:0	  *RW	FIFO Trigger Packed Mode */
 /* 0x3c */	0x00000107,	/*  9:8   *RW	Reserved (0x0)
 				      2	  *RW	Field Toggle
 				      1	  *RW	Reserved (0x1)
 				      0	  *RW	Reserved (0x1)		*/
 /* 0x40 */	0x000000c0,	/*    15  *RW	Range Enable
 				      14  *RW	Corrupt Disable
 				      11  *RR	Address Error (Odd)
 				      10  *RR	Address Error (Even)
 				      9   *RR	Field Corrupt (Odd)
 				      8   *RR	Field Corrupt (Even)
 				      7	  *RW	Fifo Enable
 				      6   *RW	VRSTN#
 				      5	  *RR	Field Done (Odd)
 				      4   *RR	Field Done (Even)
 				      3	  *RS	Single Field Capture (Odd)
 				      2	  *RS	Single Field Capture (Even)
 				      1	  *RW	Capture (ODD) Continous
 				      0	  *RW	Capture (Even) Continous */
 /* 0x44 */	0x00000000,	/*  7:0	  *RW	Retry Wait Counter */
 /* 0x48 */	0x00000307,	/*    10  *RW	Interrupt mask, start of field
 				      9   *RW	Interrupt mask, end odd field
 				      8	  *RW	Interrupt mask, end even field
 				      2   *RR	Interrupt status, start of field
 				      1   *RR	Interrupt status, end of odd
 				      0	  *RR	Interrupt status, end of even */
 /* 0x4c */	0x00000001,	/* 31:0   *RW	Field Mask (Even) continous */
 /* 0x50 */	0x00000001,	/* 31:0   *RW	Field Mask (Odd) continous */
 /* 0x54 */	0x00000000,	/* 20:16  *RW	Mask Length (Odd)
 				    4:0	  *RW	Mask Length (Even)	*/
 /* 0x58 */	0x0005007c,	/* 22:16  *RW	FIFO almost empty
 				    6:0	  *RW	FIFO almost full	*/
 /* 0x5c */	0x461e1e0f,	/* 31:24  *RW	I2C Phase 4
 				   23:16  *RW	I2C Phase 3
 				   15:8   *RW	I2C Phase 2
 				    7:0	  *RW	I2C Phase 1	*/
 /* 0x60 */	0x00000300,	/* 31:24  *RO	I2C Read Data
 				   23:16  **RW  I2C Auto Address
 				      11  RO	I2C SCL Input
 				      10  RO	I2C SDA Input
 				      9	  RR	I2C Direct Abort
 				      8   RR	I2C Auto Abort
 				      3   RW	I2C SCL Output
 				      2   RW	I2C SDA Output
 				      1	  RW	I2C Bypass
 				      0	  RW	I2C Auto Enable	*/
 /* 0x64 */	0x00000000,	/*    24  RS	I2C New Cycle
 				   23:16  **RW	I2C Direct Address
 				   15:8   **RW	I2C Direct Sub-address
 				    7:0	  **RW	I2C Direct Write Address */
 /* 0x68 */	0x00000000,	/* 31:24  **RW  I2C Auto Sub-address 1 (Even)
 				   23:16  **RW  I2C Auto Data 1 (Even)
 				   15:8   **RW  I2C Auto Sub-address 0 (Even)
 				    7:0	  **RW	I2C Auto Data 0 (Even) */
 /* 0x6c */	0x00000000,	/* 31:24  **RW  I2C Auto Sub-address 3 (Even)
 				   23:16  **RW  I2C Auto Data 3 (Even)
 				   15:8   **RW  I2C Auto Sub-address 2 (Even)
 				    7:0	  **RW	I2C Auto Data 2 (Even) */
 /* 0x70 */	0x00000000,	/* 31:24  **RW  I2C Auto Sub-address 5 (Even)
 				   23:16  **RW  I2C Auto Data 5 (Even)
 				   15:8   **RW  I2C Auto Sub-address 4 (Even)
 				    7:0	  **RW	I2C Auto Data 4 (Even) */
 /* 0x74 */	0x00000000,	/* 31:24  **RW  I2C Auto Sub-address 7 (Even)
 				   23:16  **RW  I2C Auto Data 7 (Even)
 				   15:8   **RW  I2C Auto Sub-address 6 (Even)
 				    7:0	  **RW	I2C Auto Data 6 (Even) */
 /* 0x78 */	0x00000000,	/* 31:24  **RW  I2C Auto Sub-address 1 (Odd)
 				   23:16  **RW  I2C Auto Data 1 (Odd)
 				   15:8   **RW  I2C Auto Sub-address 0 (Odd)
 				    7:0	  **RW	I2C Auto Data 0 (Odd) */
 /* 0x7c */	0x00000000,	/* 31:24  **RW  I2C Auto Sub-address 3 (Odd)
 				   23:16  **RW  I2C Auto Data 3 (Odd)
 				   15:8   **RW  I2C Auto Sub-address 2 (Odd)
 				    7:0	  **RW	I2C Auto Data 2 (Odd) */
 /* 0x80 */	0x00000000,	/* 31:24  **RW  I2C Auto Sub-address 5 (Odd)
 				   23:16  **RW  I2C Auto Data 5 (Odd)
 				   15:8   **RW  I2C Auto Sub-address 4 (Odd)
 				    7:0	  **RW	I2C Auto Data 4 (Odd) */
 /* 0x84 */	0x00000000,	/* 31:24  **RW  I2C Auto Sub-address 7 (Odd)
 				   23:16  **RW  I2C Auto Data 7 (Odd)
 				   15:8   **RW  I2C Auto Sub-address 6 (Odd)
 				    7:0	  **RW	I2C Auto Data 6 (Odd) */
 /* 0x88 */	0x00000000,	/* 23:16  **RW	I2C Register Enable (Odd)
 				    7:0	  **RW	I2C Register Enable (Even) */
 /* 0x8c */	0x00000000,	/* 23:2	  e*RW	DMA End (Even)
 				    1:0	  RO	0x0	*/
 /* 0x90 */	0x00000000	/* 23:2	  e*RW	DMA End (Odd)
 				    1:0	  RO	0x0	*/
 };
 
 static u_char saa7196_i2c_default[NUM_SAA7196_I2C_REGS] = {
 			/* SAA7196 I2C bus control			*/
 			/* BITS	Function				*/
 /* 00 */	0x50,	/* 7:0	Increment Delay				*/
 /* 01 */	0x30,	/* 7:0	Horizontal Sync Begin for 50hz		*/
 /* 02 */	0x00,	/* 7:0	Horizontal Sync Stop for 50hz		*/
 /* 03 */	0xe8,	/* 7:0	Horizontal Sync Clamp Start for 50hz	*/
 /* 04 */	0xb6,	/* 7:0	Horizontal Sync Clamp Stop for 50hz 	*/
 /* 05 */	0xf4,	/* 7:0	Horizontal Sync Start after PH1 for 50hz */
 /* 06 */	0x46,	/*   7	Input mode =0 CVBS, =1 S-Video 
 			     6	Pre filter
 			   5:4  Aperture Bandpass characteristics
 			   3:2	Coring range for high freq
 			   1:0	Aperture bandpass filter weights	*/
 /* 07 */	0x00,	/* 7:0	Hue					*/
 /* 08 */	0x7f,	/* 7:3	Colour-killer threshold QAM (PAL, NTSC) */
 /* 09 */	0x7f,	/* 7:3	Colour-killer threshold SECAM		*/
 /* 0a */	0x7f,	/* 7:0	PAL switch sensitivity			*/
 /* 0b */	0x7f,	/* 7:0	SECAM switch sensitivity		*/
 /* 0c */	0x40,	/*   7	Colour-on bit
 			   6:5	AGC filter				*/
 /* 0d */	0x84,	/*   7	VTR/TV mode bit = 1->VTR mode
 			     3	Realtime output mode select bit
 			     2	HREF position select
 			     1	Status byte select
 			     0	SECAM mode bit				*/
 /* 0e */	0x38,	/*   7	Horizontal clock PLL
 			     5	Select interal/external clock source
 			     4	Output enable of Horizontal/Vertical sync
 			     3	Data output YUV enable
 			     2	S-VHS bit
 			     1	GPSW2
 			     0	GPSW1					*/
 /* 0f */	0x50,	/*   7	Automatic Field detection
 			     6	Field Select 0 = 50hz, 1=60hz
 			     5	SECAM cross-colour reduction
 			     4	Enable sync and clamping pulse
 			   3:1	Luminance delay compensation		*/
 /* 10 */	0x00,	/*   2	Select HREF Position
 			   1:0  Vertical noise reduction		*/
 /* 11 */	0x2c,	/* 7:0	Chrominance gain conrtol for QAM	*/
 /* 12 */	0x40,	/* 7:0	Chrominance saturation control for VRAM port */
 /* 13 */	0x40,	/* 7:0	Luminance contract control for VRAM port */
 /* 14 */	0x34,	/* 7:0	Horizontal sync begin for 60hz		*/
 #ifdef notdef
 /* 15 */	0x0c,	/* 7:0	Horizontal sync stop for 60hz		*/
 /* 16 */	0xfb,	/* 7:0	Horizontal clamp begin for 60hz		*/
 /* 17 */	0xd4,	/* 7:0	Horizontal clamp stop for 60hz		*/
 /* 18 */	0xec,	/* 7:0	Horizontal sync start after PH1 for 60hz */
 #else
 		0x0a, 0xf4, 0xce, 0xf4,
 #endif
 /* 19 */	0x80,	/* 7:0	Luminance brightness control for VRAM port */
 /* 1a */	0x00,
 /* 1b */	0x00,
 /* 1c */	0x00,
 /* 1d */	0x00,
 /* 1e */	0x00,
 /* 1f */	0x00,
 /* 20 */	0x90,	/*   7	ROM table bypass switch
 			   6:5	Set output field mode
 			     4	VRAM port outputs enable
 			   3:2	First pixel position in VRO data
 			   1:0	FIFO output register select		*/
 /* 21 */	0x80,	/* 7:0	[7:0] Pixel number per line on output	*/
 /* 22 */	0x80,	/* 7:0	[7:0] Pixel number per line on input	*/
 /* 23 */	0x03,	/* 7:0	[7:0] Horizontal start position of scaling win*/
 /* 24 */	0x8a,	/* 7:5	Horizontal decimation filter
 			     4  [8] Horizontal start position of scaling win
 			   3:2	[9:8] Pixel number per line on input
 			   1:0  [9:8] Pixel number per line on output 	*/
 /* 25 */	0xf0,	/* 7:0	[7:0] Line number per output field	*/
 /* 26 */	0xf0,	/* 7:0	[7:0] Line number per input field	*/
 /* 27 */	0x0f,	/* 7:0	[7:0] Vertical start of scaling window	*/
 /* 28 */	0x80,	/*   7	Adaptive filter switch
 			   6:5	Vertical luminance data processing
 			     4	[8] Vertical start of scaling window 
 			   3:2  [9:8] Line number per input field
 			   1:0	[9:8] Line number per output field	*/
 /* 29 */	0x16,	/* 7:0	[7:0] Vertical bypass start		*/
 /* 2a */	0x00,	/* 7:0	[7:0] Vertical bypass count		*/
 /* 2b */	0x00,	/*   4  [8] Vertical bypass start
 			     2  [8] Vertical bypass count
 			     0	Polarity, internally detected odd even flag */
 /* 2c */	0x80,	/* 7:0	Set lower limit V for colour-keying	*/
 /* 2d */	0x7f,	/* 7:0	Set upper limit V for colour-keying	*/
 /* 2e */	0x80,	/* 7:0	Set lower limit U for colour-keying	*/
 /* 2f */	0x7f,	/* 7:0	Set upper limit U for colour-keying	*/
 /* 30 */	0xbf	/*   7	VRAM bus output format
 			     6	Adaptive geometrical filter
 			     5	Luminance limiting value
 			     4	Monochrome and two's complement output data sel
 			     3	Line quailifier flag
 			     2	Pixel qualifier flag
 			     1	Transparent data transfer
 			     0	Extended formats enable bit		*/
 };
 
 static u_char bt254_default[NUM_BT254_REGS] = {
 	0x00, 	/* 24 bpp */
 	0xa0,
 	0xa0,
 	0xa0,
 	0x50,
 	0x50,
 	0x50,
 } ;
 
 /*
  * i2c_write:
  * Returns	0	Succesful completion.
  * Returns	1	If transfer aborted or timeout occured.
  *
  */
 static i2c_print_err = 1;
 static int
 i2c_write(meteor_reg_t * mtr, u_char slave, u_char rw, u_char reg, u_char data)
 {
 register unsigned long	wait_counter = 0x0001ffff;
 register mreg_t *	iic_write_loc = &mtr->base->i2c_write;
 register int		err = 0;
 
 
 	/* Write the data the the i2c write register */
 	*iic_write_loc = SAA7116_IIC_NEW_CYCLE |
 		(((u_long)slave|(u_long)rw) << 16) |
 		((u_long)reg << 8) | (u_long)data;
 
 	/* Wait until the i2c cycle is compeleted */
 	while((*iic_write_loc & SAA7116_IIC_NEW_CYCLE)) {
 		if(!wait_counter) break;
 		wait_counter--;
 	}
 
 	/* 1ffff should be enough delay time for the i2c cycle to complete */
 	if(!wait_counter) {
 		if(i2c_print_err)
 			printf("meteor%d: %d i2c %s transfer timeout 0x%x",
 				METEOR_NUM(mtr), slave, 
 				rw ? "read" : "write", *iic_write_loc);
 			
 		err=1;
 	} 
 
 	/* Check for error on direct write, clear if any */
 	if(mtr->base->i2c_read & SAA7116_IIC_DIRECT_TRANSFER_ABORTED){
 		mtr->base->i2c_read |= SAA7116_IIC_DIRECT_TRANSFER_ABORTED;
 		if(i2c_print_err)
 			printf("meteor%d: 0x%x i2c %s tranfer aborted",
 				METEOR_NUM(mtr), slave,
 				rw ? "read" : "write" );
 		err= 1;
 	}
 
 	if(err) {
 		if(i2c_print_err)
 			printf(" - reg=0x%x, value=0x%x.\n", reg, data);
 	}
 		
 	return err;
 }
 #undef i2c_print
 
 static	char *
 met_probe (pcici_t tag, pcidi_t type)
 {
 	
 	switch (type) {
 	case SAA7116_PHILIPS_ID:	/* meteor */
 		return("Philips SAA 7116");
 	};
 	return ((char *)0);
 }
 
 	/* interrupt handling routine 
 	   complete meteor_read() if using interrupts
 	*/
 static void
 meteor_intr(void *arg)
 {
 	meteor_reg_t	*mtr	   = (meteor_reg_t *) arg;
 	mreg_t		*cap	   = &mtr->base->cap_cntl,
 			*base	   = &mtr->base->dma1e,
 			*stat	   = &mtr->base->irq_stat;
 	u_long		status	   = *stat,
 			cap_err	   = *cap & 0x00000f00,
 #ifdef METEOR_CHECK_PCI_BUS
 			pci_err    = pci_conf_read(mtr->tag,
 						PCI_COMMAND_STATUS_REG),
 #endif
 			next_base  = (u_long)(vtophys(mtr->bigbuf));
 
 	/*
 	 * Disable future interrupts if a capture mode is not selected.
 	 * This can happen when we are in the process of closing or 
 	 * changing capture modes, otherwise it shouldn't happen.
 	 */
 	if(!(mtr->flags & METEOR_CAP_MASK)) {
 		*cap &= 0x8ff0;	/* disable future interrupts */
 	}
 #ifdef METEOR_CHECK_PCI_BUS
 	/*
 	 * Check for pci bus errors.
 	 */
 #define METEOR_MASTER_ABORT	0x20000000
 #define METEOR_TARGET_ABORT	0x10000000
 	if(pci_err & METEOR_MASTER_ABORT) {
 		printf("meteor%d: intr: pci bus master dma abort: 0x%x 0x%x.\n",
 			METEOR_NUM(mtr), *base, *(base+3));
 		pci_conf_write(mtr->tag, PCI_COMMAND_STATUS_REG, pci_err);
 	}
 	if(pci_err & METEOR_TARGET_ABORT) {
 		printf("meteor%d: intr: pci bus target dma abort: 0x%x 0x%x.\n",
 			METEOR_NUM(mtr), *base, *(base+3));
 		pci_conf_write(mtr->tag, PCI_COMMAND_STATUS_REG, pci_err);
 	}
 #endif
 	/*
 	 * Check for errors.
 	 */
 	if (cap_err) {
 	   if (cap_err & 0x300) {
 		if(mtr->fifo_errors % 50 == 0) {
 	   		printf("meteor%d: capture error", METEOR_NUM(mtr));
 			printf(": %s FIFO overflow.\n",
 				cap_err&0x0100? "even" : "odd");
 		}
 		mtr->fifo_errors++ ;	/* increment fifo capture errors cnt */
 	   }
 	   if (cap_err & 0xc00) {
 		if(mtr->dma_errors % 50 == 0) {
 	   		printf("meteor%d: capture error", METEOR_NUM(mtr));
 			printf(": %s DMA address.\n",
 				cap_err&0x0400? "even" : "odd");
 		}
 		mtr->dma_errors++ ;	/* increment DMA capture errors cnt */
 	   }
 	}
 	*cap |= 0x0f30;		/* clear error and field done */
 
 	/*
 	 * In synchronous capture mode we need to know what the address
 	 * offset for the next field/frame will be.  next_base holds the
 	 * value for the even dma buffers (for odd, one must add stride).
 	 */
 	if((mtr->flags & METEOR_SYNCAP) && !mtr->synch_wait &&
 	   (mtr->current < mtr->frames)) { /* could be !=, but < is safer */
 		/* next_base is initialized to mtr->bigbuf */
 		next_base += mtr->frame_size * mtr->current;
 		if(mtr->flags & METEOR_WANT_TS)
 			next_base += sizeof(struct timeval) * mtr->current;
 	}
 
 	/*
 	 * Count the field and clear the field flag.
 	 *
 	 * In single mode capture, clear the continuous capture mode.
 	 *
 	 * In synchronous capture mode, if we have room for another field,
 	 * adjust DMA buffer pointers.
 	 * When we are above the hi water mark (hiwat), mtr->synch_wait will
 	 * be set and we will not bump the DMA buffer pointers.  Thus, once
 	 * we reach the hi water mark,  the driver acts like a continuous mode
 	 * capture on the mtr->current frame until we hit the low water
 	 * mark (lowat).  The user had the option of stopping or halting
 	 * the capture if this is not the desired effect.
 	 */
 	if (status & 0x1) {		/* even field */
 		mtr->even_fields_captured++;
 		mtr->flags &= ~METEOR_WANT_EVEN;
 		if((mtr->flags & METEOR_SYNCAP) && !mtr->synch_wait) {
 			*base = next_base;
 			/* XXX should add adjustments for YUV_422 & PLANAR */
 		}
 		/*
 		 * If the user requested to be notified via signal,
 		 * let them know the field is complete.
 		 */
 		if(mtr->proc && (mtr->signal & METEOR_SIG_MODE_MASK))
 			psignal(mtr->proc, mtr->signal&(~METEOR_SIG_MODE_MASK));
 	}
 	if (status & 0x2) {		/* odd field */
 		mtr->odd_fields_captured++;
 		mtr->flags &= ~METEOR_WANT_ODD;
 		if((mtr->flags & METEOR_SYNCAP) && !mtr->synch_wait) {
 			*(base+3) = next_base + *(base+6);
 			/* XXX should add adjustments for YUV_422 & PLANAR */
 		}
 		/*
 		 * If the user requested to be notified via signal,
 		 * let them know the field is complete.
 		 */
 		if(mtr->proc && (mtr->signal & METEOR_SIG_MODE_MASK))
 			psignal(mtr->proc, mtr->signal&(~METEOR_SIG_MODE_MASK));
 	}
 
 	/*
 	 * If we have a complete frame.
 	 */
 	if(!(mtr->flags & METEOR_WANT_MASK)) {
 		mtr->frames_captured++;
 		/*
 		 * post the completion time. 
 		 */
 		if(mtr->flags & METEOR_WANT_TS) {
 			struct timeval *ts;
 			
 			if(mtr->alloc_pages * PAGE_SIZE <= (mtr->frame_size +
 					sizeof(struct timeval))) {
 				ts =(struct timeval *)mtr->bigbuf +
 							mtr->frame_size;
 			/* doesn't work in synch mode except for first frame */
 			/* XXX */
 				microtime(ts);
 			}
 		}
 		/*
 		 * Wake up the user in single capture mode.
 		 */
 		if(mtr->flags & METEOR_SINGLE)
 			wakeup((caddr_t)mtr);
 		/*
 		 * If the user requested to be notified via signal,
 		 * let them know the frame is complete.
 		 */
 		if(mtr->proc && !(mtr->signal & METEOR_SIG_MODE_MASK))
 			psignal(mtr->proc, mtr->signal&(~METEOR_SIG_MODE_MASK));
 		/*
 		 * Reset the want flags if in continuous or
 		 * synchronous capture mode.
 		 */
 		if(mtr->flags & (METEOR_CONTIN|METEOR_SYNCAP)) {
 			switch(mtr->flags & METEOR_ONLY_FIELDS_MASK) {
 			case METEOR_ONLY_ODD_FIELDS:
 				mtr->flags |= METEOR_WANT_ODD;
 				break;
 			case METEOR_ONLY_EVEN_FIELDS:
 				mtr->flags |= METEOR_WANT_EVEN;
 				break;
 			default:
 				mtr->flags |= METEOR_WANT_MASK;
 				break;
 			}
 		}
 		/*
 		 * Special handling for synchronous capture mode.
 		 */
 		if(mtr->flags & METEOR_SYNCAP) {
 			struct meteor_mem *mm = mtr->mem;
 			/*
 			 * Mark the current frame as active.  It is up to
 			 * the user to clear this, but we will clear it
 			 * for the user for the current frame being captured
 			 * if we are within the water marks (see below).
 			 */
 			mm->active |= 1 << (mtr->current - 1);
 
 			/*
 			 * Since the user can muck with these values, we need
 			 * to check and see if they are sane. If they don't
 			 * pass the sanity check, disable the capture mode.
 			 * This is rather rude, but then so was the user.
 			 *
 			 * Do we really need all of this or should we just
 			 * eliminate the possiblity of allowing the
 			 * user to change hi and lo water marks while it
 			 * is running? XXX
 			 */
 			if(mm->num_active_bufs < 0 ||
 			   mm->num_active_bufs > mtr->frames ||
 		   	   mm->lowat < 1 || mm->lowat >= mtr->frames ||
 			   mm->hiwat < 1 || mm->hiwat >= mtr->frames ||
 			   mm->lowat > mm->hiwat ) {
 				*cap &= 0x8ff0;
 				mtr->flags &= ~(METEOR_SYNCAP|METEOR_WANT_MASK);
 			} else {
 				/*
 			 	 * Ok, they are sane, now we want to
 				 * check the water marks.
 			 	 */
 				if(mm->num_active_bufs <= mm->lowat)
 					mtr->synch_wait = 0;
 				if(mm->num_active_bufs >= mm->hiwat)
 					mtr->synch_wait = 1;
 				/*
 				 * Clear the active frame bit for this frame
 				 * and advance the counters if we are within
 				 * the banks of the water marks. 
 				 */
 				if(!mtr->synch_wait) {
 					mm->active &= ~(1 << mtr->current);
 					mtr->current++;
 					if(mtr->current > mtr->frames)
 						mtr->current = 1;
 					mm->num_active_bufs++;
 				}
 			}
 		}
 	}
 
 	*stat |=  0x7;		/* clear interrupt status */
 	return;
 }
 
 static void
 set_fps(meteor_reg_t *mtr, u_short fps)
 {
 	struct saa7116_regs *s7116 = mtr->base;
 	unsigned status;
 	unsigned maxfps, mask = 0x1, length = 0;
 
 	SAA7196_WRITE(mtr, SAA7196_STDC, SAA7196_REG(mtr, SAA7196_STDC) | 0x02);
 	SAA7196_READ(mtr);
 	status = (s7116->i2c_read & 0xff000000L) >> 24;
 
 	/*
 	 * Determine if there is an input signal.  Depending on the
 	 * frequency we either have a max of 25 fps (50 hz) or 30 fps (60 hz).
 	 * If there is no input signal, then we need some defaults.  If the
 	 * user neglected to specify any defaults, just set to the fps to max.
 	 */
 	if((status & 0x40) == 0) {	/* Is there a signal ? */
 		if(status & 0x20) {
 			maxfps = 30;	/* 60 hz system */
 		} else {
 			maxfps = 25;	/* 50 hz system */
 		}
 	} else {			/* We have no signal, check defaults */
 #if METEOR_SYSTEM_DEFAULT == METEOR_PAL || METEOR_SYSTEM_DEFAULT == METEOR_SECAM
 		maxfps = 25;
 #elif METEOR_SYSTEM_DEFAULT == METEOR_NTSC
 		maxfps = 30;
 #else
 		/* Don't really know what to do, just set max */
 		maxfps = 30;
 		fps = 30;
 #endif
 	}
 
 	/*
 	 * A little sanity checking...
 	 */
 	if(fps <  1)	  fps = 1;
 	if(fps > maxfps) fps = maxfps;
 
 	/*
 	 * Compute the mask/length using the fps.
 	 */
 	if(fps == maxfps) {
 		mask = 0x1;
 		length = 0x0;
 	} else if ((float)fps == maxfps/2.0) {	
 		mask = 0x1;
 		length = 0x1;
 	} else if (fps > maxfps/2) {
 		float step, b;
 
 		mask = (1<<maxfps) - 1;
 		length = maxfps - 1;
 		step = (float)(maxfps - 1)/(float)(maxfps - fps);
 		for(b=step; b < maxfps; b += step) {
 			mask &= ~(1<<((int)b));	/* mask out the bth frame */
 		}
 	} else {	/* fps < maxfps/2 */
 		float step, b;
 
 		mask = 0x1;
 		length = maxfps - 1;
 		step = (float)(maxfps -1)/(float)(fps);
 		for(b = step + 1; b < maxfps - 1; b += step) {
 			mask |= (1<<((int)b));	/* mask in the bth frame */
 		}
 	}
 
 	/*
 	 * Set the fps.
 	 */
 	s7116->fme = s7116->fmo = mask;
 	s7116->fml = (length << 16) | length;;
 
 	mtr->fps = fps;
 
 	return;
 
 }
 
 /*
  * There is also a problem with range checking on the 7116.
  * It seems to only work for 22 bits, so the max size we can allocate
  * is 22 bits long or 4194304 bytes assuming that we put the beginning
  * of the buffer on a 2^24 bit boundary.  The range registers will use
  * the top 8 bits of the dma start registers along with the bottom 22
  * bits of the range register to determine if we go out of range.
  * This makes getting memory a real kludge.
  *
  */
 #define RANGE_BOUNDARY	(1<<22)
 static vm_offset_t
 get_meteor_mem(int unit, unsigned size)
 {
 vm_offset_t	addr = 0;
 
 	addr = vm_page_alloc_contig(size, 0x100000, 0xffffffff, 1<<24);
 	if(addr == 0)
 		addr = vm_page_alloc_contig(size, 0x100000, 0xffffffff,
 								PAGE_SIZE);
 	if(addr == 0) {
 		printf("meteor%d: Unable to allocate %d bytes of memory.\n",
 			unit, size);
 	}
 
 	return addr;
 }
 
 static void
 bt254_write(meteor_reg_t *mtr, u_char addr, u_char data)
 {
 	addr &= 0x7;						/* sanity? */
 	mtr->bt254_reg[addr] = data;
 	PCF8574_DATA_WRITE(mtr, data);				/* set data */
 	PCF8574_CTRL_WRITE(mtr, (PCF8574_CTRL_REG(mtr) & ~0x7) | addr);
 	PCF8574_CTRL_WRITE(mtr, PCF8574_CTRL_REG(mtr) & ~0x10);	/* WR/ to 0 */
 	PCF8574_CTRL_WRITE(mtr, PCF8574_CTRL_REG(mtr) | 0x10);	/* WR to 1 */
 	PCF8574_DATA_WRITE(mtr, 0xff);				/* clr data */
 
 }
 
 static void
 bt254_init(meteor_reg_t *mtr)
 {
 int	i;
 
 	PCF8574_CTRL_WRITE(mtr, 0x7f);
 	PCF8574_DATA_WRITE(mtr, 0xff);	/* data port must be 0xff */
 	PCF8574_CTRL_WRITE(mtr, 0x7f);
 
 	/* init RGB module for 24bpp, composite input */
 	for(i=0; i<NUM_BT254_REGS; i++)
 		bt254_write(mtr, i, bt254_default[i]);
 
 	bt254_write(mtr, BT254_COMMAND, 0x00);	/* 24 bpp */
 }
 
 static void
 bt254_ntsc(meteor_reg_t *mtr, int arg)
 {
         if (arg){
 	  /* Set NTSC bit */
 	  PCF8574_CTRL_WRITE(mtr, PCF8574_CTRL_REG(mtr) | 0x20);
 	}
 	else {
 	  /* reset NTSC bit */
 	  PCF8574_CTRL_WRITE(mtr, PCF8574_CTRL_REG(mtr) &= ~0x20);
 	}
 }
 
 static void
 select_bt254(meteor_reg_t *mtr)
 {
 	/* disable saa7196, saaen = 1 */
 	PCF8574_CTRL_WRITE(mtr, PCF8574_CTRL_REG(mtr) | 0x80);
 	/* enable Bt254, bten = 0 */
 	PCF8574_CTRL_WRITE(mtr, PCF8574_CTRL_REG(mtr) & ~0x40);
 }
 
 static void
 select_saa7196(meteor_reg_t *mtr)
 {
 	/* disable Bt254, bten = 1 */
 	PCF8574_CTRL_WRITE(mtr, PCF8574_CTRL_REG(mtr) | 0x40);
 	/* enable saa7196, saaen = 0 */
 	PCF8574_CTRL_WRITE(mtr, PCF8574_CTRL_REG(mtr) & ~0x80);
 }
 
 /*
  * Initialize the 7116, 7196 and the RGB module.
  */
 static void
 meteor_init ( meteor_reg_t *mtr )
 {
 	mreg_t	*vbase_addr;
 	int 	i;
 
 	/*
 	 * Initialize the Philips SAA7116
 	 */
 	mtr->base->cap_cntl = 0x00000040L;
 	vbase_addr = &mtr->base->dma1e;
 	for (i = 0 ; i < (sizeof(struct saa7116_regs)/sizeof(mreg_t)); i++)
 		*vbase_addr++ = saa7116_pci_default[i];
 
 	/*
 	 * Check for the Philips SAA7196
 	 */
 	i2c_print_err = 0;
 	if(i2c_write(mtr, SAA7196_I2C_ADDR, SAA7116_I2C_WRITE, 0, 0xff) == 0) {
 		i2c_print_err = 1;
 		/*
 		 * Initialize 7196
 		 */
 		for (i = 0; i < NUM_SAA7196_I2C_REGS; i++) 
 			SAA7196_WRITE(mtr, i, saa7196_i2c_default[i]);
 		/*
 		 * Get version number.
 		 */
 		SAA7196_WRITE(mtr, SAA7196_STDC,
 			SAA7196_REG(mtr, SAA7196_STDC) & ~0x02);
 		SAA7196_READ(mtr);
 		printf("meteor%d: <Philips SAA 7196> rev 0x%x\n",
 			METEOR_NUM(mtr), (mtr->base->i2c_read&0xff000000L)>>28);
 	} else {
 		i2c_print_err = 1;
 		printf("meteor%d: <Philips SAA 7196 NOT FOUND>\n",
 			METEOR_NUM(mtr));
 	}
 	/*
 	 * Check for RGB module, initialized if found.
 	 */
 	i2c_print_err = 0;
 	if(i2c_write(mtr,PCF8574_DATA_I2C_ADDR,SAA7116_I2C_WRITE,0,0xff) == 0) {
 		i2c_print_err = 1;
 		printf("meteor%d: <Booktree 254 (RGB module)>\n",
 			METEOR_NUM(mtr));	/* does this have a rev #? */
 		bt254_init(mtr);	/* Set up RGB module */
 		mtr->flags = METEOR_RGB;
 	} else {
 		i2c_print_err = 1;
 		mtr->flags = 0;
 	}
 
 	set_fps(mtr, 30);
 
 }
 
 static	void
 met_attach(pcici_t tag, int unit)
 {
 #ifdef METEOR_IRQ
 	u_long old_irq, new_irq;
 #endif METEOR_IRQ
 	meteor_reg_t *mtr;
 	vm_offset_t buf;
 	u_long latency;
 
 	if (unit >= NMETEOR) {
 		printf("meteor%d: attach: only %d units configured.\n",
 				unit, NMETEOR);
 		printf("meteor%d: attach: invalid unit number.\n", unit);
         	return ;
 	}
 
 	mtr = &meteor[unit];
 	mtr->tag = tag;
 	pci_map_mem(tag, PCI_MAP_REG_START, (vm_offset_t *)&mtr->base,
 				&mtr->phys_base);
 
 #ifdef METEOR_IRQ		/* from the configuration file */
 	old_irq = pci_conf_read(tag, PCI_INTERRUPT_REG);
 	pci_conf_write(tag, PCI_INTERRUPT_REG, METEOR_IRQ);
 	new_irq = pci_conf_read(tag, PCI_INTERRUPT_REG);
 	printf("meteor%d: attach: irq changed from %d to %d\n",
 		unit, (old_irq & 0xff), (new_irq & 0xff));
 #endif METEOR_IRQ
 				/* setup the interrupt handling routine */
 	pci_map_int(tag, meteor_intr, (void*) mtr, &net_imask); 
 
 /*
  * PCI latency timer.  32 is a good value for 4 bus mastering slots, if
  * you have more than for, then 16 would probably be a better value.
  *
  */
 #ifndef METEOR_DEF_LATENCY_VALUE
 #define METEOR_DEF_LATENCY_VALUE	32	
 #endif
 	latency = pci_conf_read(tag, PCI_LATENCY_TIMER);
 	latency = (latency >> 8) & 0xff;
 	if(bootverbose) {
 		if(latency)
 			printf("meteor%d: PCI bus latency is", unit);
 		else
 			printf("meteor%d: PCI bus latency was 0 changing to",
 				unit);
 	}
 	if(!latency) {
 		latency = METEOR_DEF_LATENCY_VALUE;
 		pci_conf_write(tag, PCI_LATENCY_TIMER,  latency<<8);
 	}
 	if(bootverbose) {
 		printf(" %d.\n", latency);
 	}
 
 	meteor_init(mtr);	/* set up saa7116, saa7196, and rgb module */
 
 	if(METEOR_ALLOC)
 		buf = get_meteor_mem(unit, METEOR_ALLOC);
 	else
 		buf = 0;
 	if(bootverbose) {
 		printf("meteor%d: buffer size %d, addr 0x%x\n",
 			unit, METEOR_ALLOC, vtophys(buf));
 	}
 
 	mtr->bigbuf = buf;
 	mtr->alloc_pages = METEOR_ALLOC_PAGES;
 	if(buf != 0) {
 		bzero((caddr_t) buf, METEOR_ALLOC);
 		buf = vtophys(buf);
 					/* 640x480 RGB 16 */
 		mtr->base->dma1e = buf;
 		mtr->base->dma1o = buf + 0x500;
 		mtr->base->dma_end_e = 
 		mtr->base->dma_end_o = buf + METEOR_ALLOC;
 	}
 	/* 1 frame of 640x480 RGB 16 */
 	mtr->cols = 640;
 	mtr->rows = 480;
 	mtr->depth = 2;		/* two bytes per pixel */
 	mtr->frames = 1;	/* one frame */
 
     	mtr->flags |= METEOR_INITALIZED | METEOR_AUTOMODE | METEOR_DEV0 |
 		   METEOR_RGB16;
 #ifdef DEVFS
 	mtr->devfs_token = devfs_add_devswf(&meteor_cdevsw, unit,
 						DV_CHR, 0, 0, 0644, "meteor");
 #endif
 }
 
 #define UNIT(x)	((x) & 0x07)
 
 #ifdef unused
 static int
 meteor_reset(dev_t dev)
 {
 int			unit = UNIT(minor(dev));
 struct	saa7116_regs	*m;
 
 	if(unit >= NMETEOR)
 		return ENXIO;
 
 	m = meteor[unit].base;
 
 	m->cap_cntl = 0x0;
 	tsleep((caddr_t)m, METPRI, "Mreset", hz/50);
 
 	m->cap_cntl = 0x8ff0;
 	m->cap_cntl = 0x80c0;
 	m->cap_cntl = 0x8040;
 	tsleep((caddr_t)m, METPRI, "Mreset", hz/10);
 	m->cap_cntl = 0x80c0;
 
 	return 0;
 
 }
 #endif
 
 /*---------------------------------------------------------
 **
 **	Meteor character device driver routines
 **
 **---------------------------------------------------------
 */
 
 
 int
 meteor_open(dev_t dev, int flags, int fmt, struct proc *p)
 {
 	meteor_reg_t *mtr;
 	int	unit; 
 	int	i;
 
 	unit = UNIT(minor(dev));
 	if (unit >= NMETEOR)	/* unit out of range */
 		return(ENXIO);
 
 	mtr = &(meteor[unit]);
 
 	if (!(mtr->flags & METEOR_INITALIZED))	/* device not found */
 		return(ENXIO);
 
 	if (mtr->flags & METEOR_OPEN)		/* device is busy */
 		return(EBUSY);
 
 	mtr->flags |= METEOR_OPEN;
 	/*
 	 * Make sure that the i2c regs are set the same for each open.
 	 */
 	for(i=0; i< NUM_SAA7196_I2C_REGS; i++) {
 		SAA7196_WRITE(mtr, i, saa7196_i2c_default[i]);
 	}
 
 	mtr->fifo_errors = 0;
 	mtr->dma_errors = 0;
 	mtr->frames_captured = 0;
 	mtr->even_fields_captured = 0;
 	mtr->odd_fields_captured = 0;
 	mtr->proc = (struct proc *)0;
 	set_fps(mtr, 30);
 #ifdef METEOR_TEST_VIDEO
 	mtr->video.addr = 0;
 	mtr->video.width = 0;
 	mtr->video.banksize = 0;
 	mtr->video.ramsize = 0;
 #endif
 
 	return(0);
 }
 
 int
 meteor_close(dev_t dev, int flags, int fmt, struct proc *p)
 {
 	meteor_reg_t *mtr;
 	int	unit; 
 #ifdef METEOR_DEALLOC_ABOVE
 	int	temp;
 #endif
 
 	unit = UNIT(minor(dev));
 	if (unit >= NMETEOR)	/* unit out of range */
 		return(ENXIO);
 
 	mtr = &(meteor[unit]);
 	mtr->flags &= ~METEOR_OPEN;
 
 	if(mtr->flags & METEOR_SINGLE)
 				/* this should not happen, the read capture 
 				  should have completed or in the very least
 				  recieved a signal before close is called. */
 		wakeup((caddr_t)mtr);	/* continue read */
 	/*
 	 * Turn off capture mode.
 	 */
 	mtr->base->cap_cntl = 0x8ff0;
 	mtr->flags &= ~(METEOR_CAP_MASK|METEOR_WANT_MASK);
 
 #ifdef METEOR_DEALLOC_PAGES
 	if (mtr->bigbuf != NULL) {
 		kmem_free(kernel_map,mtr->bigbuf,(mtr->alloc_pages*PAGE_SIZE));
 		mtr->bigbuf = NULL;
 		mtr->alloc_pages = 0;
 	}
 #else
 #ifdef METEOR_DEALLOC_ABOVE
 	if (mtr->bigbuf != NULL && mtr->alloc_pages > METEOR_DEALLOC_ABOVE) {
 		temp = METEOR_DEALLOC_ABOVE - mtr->alloc_pages;
 		kmem_free(kernel_map,
 			  mtr->bigbuf+((mtr->alloc_pages - temp) * PAGE_SIZE),
 			  (temp * PAGE_SIZE));
 		mtr->alloc_pages = METEOR_DEALLOC_ABOVE;
 	}
 #endif
 #endif
 
 	return(0);
 }
 
 static void
 start_capture(meteor_reg_t *mtr, unsigned type)
 {
 mreg_t *cap = &mtr->base->cap_cntl;
 
 	mtr->flags |= type;
 	switch(mtr->flags & METEOR_ONLY_FIELDS_MASK) {
 	case METEOR_ONLY_EVEN_FIELDS:
 		mtr->flags |= METEOR_WANT_EVEN;
 		if(type == METEOR_SINGLE)
 			*cap = 0x0ff4 | mtr->range_enable;
 		else
 			*cap = 0x0ff1 | mtr->range_enable;
 		break;
 	case METEOR_ONLY_ODD_FIELDS:
 		mtr->flags |= METEOR_WANT_ODD;
 		if(type == METEOR_SINGLE)
 			*cap = 0x0ff8 | mtr->range_enable;
 		else
 			*cap = 0x0ff2 | mtr->range_enable;
 		break;
 	default:
 		mtr->flags |= METEOR_WANT_MASK;
 		if(type == METEOR_SINGLE)
 			*cap = 0x0ffc | mtr->range_enable;
 		else
 			*cap = 0x0ff3 | mtr->range_enable;
 		break;
 	}
 }
 
 int
 meteor_read(dev_t dev, struct uio *uio, int ioflag)
 {
 	meteor_reg_t *mtr;
 	int	unit; 
 	int	status;
 	int	count;
 
 	unit = UNIT(minor(dev));
 	if (unit >= NMETEOR)	/* unit out of range */
 		return(ENXIO);
 
 	mtr = &(meteor[unit]);
 	if (mtr->bigbuf == 0)/* no frame buffer allocated (ioctl failed) */
 		return(ENOMEM);
 
 	if (mtr->flags & METEOR_CAP_MASK)
 		return(EIO);		/* already capturing */
 
 	count = mtr->rows * mtr->cols * mtr->depth;
 	if (uio->uio_iov->iov_len < count)
 		return(EINVAL);
 
 	/* Start capture */
 	start_capture(mtr, METEOR_SINGLE);
 
 	status=tsleep((caddr_t)mtr, METPRI, "capturing", 0);
 	if (!status) 		/* successful capture */
 		status = uiomove((caddr_t)mtr->bigbuf, count, uio);
 	else
 		printf ("meteor%d: read: tsleep error %d\n", unit, status);
 
 	mtr->flags &= ~(METEOR_SINGLE | METEOR_WANT_MASK);
 
 	return(status);
 }
 
 int
 meteor_write(dev_t dev, struct uio *uio, int ioflag)
 {
 	return(0);
 }
 
 int
 meteor_ioctl(dev_t dev, int cmd, caddr_t arg, int flag, struct proc *pr)
 {
 	int	error;  
 	int	unit;   
 	unsigned int	temp;
 	meteor_reg_t *mtr;
 	struct meteor_counts *cnt;
 	struct meteor_geomet *geo;
 	struct meteor_mem *mem;
 	struct meteor_capframe *frame;
 #ifdef METEOR_TEST_VIDEO
 	struct meteor_video *video;
 #endif
 	vm_offset_t buf;
 	struct saa7116_regs *base;
 
 	error = 0;
 
 	if (!arg) return(EINVAL);
 	unit = UNIT(minor(dev));
 	if (unit >= NMETEOR)	/* unit out of range */
 		return(ENXIO);
 
 	mtr = &(meteor[unit]);
 	base = mtr->base;
 
 	switch (cmd) {
 	case METEORSTS:
 		if(*arg)
 			mtr->flags |= METEOR_WANT_TS;
 		else
 			mtr->flags &= ~METEOR_WANT_TS;
 		break;
 	case METEORGTS:
 		if(mtr->flags & METEOR_WANT_TS)
 			*arg = 1;
 		else
 			*arg = 0;
 		break;
 #ifdef METEOR_TEST_VIDEO
 	case METEORGVIDEO:
 		video = (struct meteor_video *)arg;
 		video->addr = mtr->video.addr;
 		video->width = mtr->video.width;
 		video->banksize = mtr->video.banksize;
 		video->ramsize = mtr->video.ramsize;
 		break;
 	case METEORSVIDEO:
 		video = (struct meteor_video *)arg;
 		mtr->video.addr = video->addr;
 		mtr->video.width = video->width;
 		mtr->video.banksize = video->banksize;
 		mtr->video.ramsize = video->ramsize;
 		break;
 #endif
 	case METEORSFPS:
 		set_fps(mtr, *(u_short *)arg);
 		break;
 	case METEORGFPS:
 		*(u_short *)arg = mtr->fps;
 		break;
 	case METEORSSIGNAL:
 		mtr->signal = *(int *) arg;
 		mtr->proc = pr;
 		break;
 	case METEORGSIGNAL:
 		*(int *)arg = mtr->signal;
 		break;
 	case METEORSTATUS:	/* get 7196 status */
 		temp = 0;
 		SAA7196_WRITE(mtr, SAA7196_STDC,
 			SAA7196_REG(mtr, SAA7196_STDC) | 0x02);
 		SAA7196_READ(mtr);
 		temp |= (base->i2c_read & 0xff000000L) >> 24;
 		SAA7196_WRITE(mtr, SAA7196_STDC,
 			SAA7196_REG(mtr, SAA7196_STDC) & ~0x02);
 		SAA7196_READ(mtr);
 		temp |= (base->i2c_read & 0xff000000L) >> 16;
 		*(u_short *)arg = temp;
 		break;
 	case METEORSHUE:	/* set hue */
 		SAA7196_WRITE(mtr, SAA7196_HUEC, *(char *)arg);
 		break;
 	case METEORGHUE:	/* get hue */
 		*(char *)arg = SAA7196_REG(mtr, SAA7196_HUEC);
 		break;
 	case METEORSCHCV:	/* set chrominance gain */
 		SAA7196_WRITE(mtr, SAA7196_CGAINR, *(char *)arg);
 		break;
 	case METEORGCHCV:	/* get chrominance gain */
 		*(char *)arg = SAA7196_REG(mtr, SAA7196_CGAINR);
 		break;
 	case METEORSBRIG:	/* set brightness */
 		SAA7196_WRITE(mtr, SAA7196_BRIG, *(char *)arg);
 		break;
 	case METEORGBRIG:	/* get brightness */
 		*(char *)arg = SAA7196_REG(mtr, SAA7196_BRIG);
 		break;
 	case METEORSCSAT:	/* set chroma saturation */
 		SAA7196_WRITE(mtr, SAA7196_CSAT, *(char *)arg);
 		break;
 	case METEORGCSAT:	/* get chroma saturation */
 		*(char *)arg = SAA7196_REG(mtr, SAA7196_CSAT);
 		break;
 	case METEORSCONT:	/* set contrast */
 		SAA7196_WRITE(mtr, SAA7196_CONT, *(char *)arg);
 		break;
 	case METEORGCONT:	/* get contrast */
 		*(char *)arg = SAA7196_REG(mtr, SAA7196_CONT);
 		break;
 	case METEORSBT254:
 		if((mtr->flags & METEOR_RGB) == 0)
 			return EINVAL;
 		temp = *(unsigned short *)arg;
 		bt254_write(mtr, temp & 0xf, (temp & 0x0ff0) >> 4);
 		break;
 	case METEORGBT254:
 		if((mtr->flags & METEOR_RGB) == 0)
 			return EINVAL;
 		temp = *(unsigned short *)arg & 0x7;
 		*(unsigned short *)arg = mtr->bt254_reg[temp] << 4 | temp;
 		break;
 	case METEORSHWS:	/* set horizontal window start */
 		SAA7196_WRITE(mtr, SAA7196_HWS, *(char *)arg);
 		break;
 	case METEORGHWS:	/* get horizontal window start */
 		*(char *)arg = SAA7196_REG(mtr, SAA7196_HWS);
 		break;
 	case METEORSVWS:	/* set vertical window start */
 		SAA7196_WRITE(mtr, SAA7196_VWS, *(char *)arg);
 		break;
 	case METEORGVWS:	/* get vertical window start */
 		*(char *)arg = SAA7196_REG(mtr, SAA7196_VWS);
 		break;
 	case METEORSINPUT:	/* set input device */
 		switch(*(unsigned long *)arg & METEOR_DEV_MASK) {
 		case 0:			/* default */
 		case METEOR_INPUT_DEV0:
 			if(mtr->flags & METEOR_RGB)
 				select_saa7196(mtr);
 			mtr->flags = (mtr->flags & ~METEOR_DEV_MASK)
 				| METEOR_DEV0;
 			SAA7196_WRITE(mtr, 0x0e,
 				(SAA7196_REG(mtr, 0x0e) & ~0x3) | 0x0);
 			SAA7196_WRITE(mtr, 0x06,
 				(SAA7196_REG(mtr, 0x06) & ~0x80));
 			break;
 		case METEOR_INPUT_DEV1:
 			if(mtr->flags & METEOR_RGB)
 				select_saa7196(mtr);
 			mtr->flags = (mtr->flags & ~METEOR_DEV_MASK)
 					       | METEOR_DEV1;
 			SAA7196_WRITE(mtr, 0x0e,
 				(SAA7196_REG(mtr, 0x0e) & ~0x3) | 0x1);
 			SAA7196_WRITE(mtr, 0x06,
 				(SAA7196_REG(mtr, 0x06) & ~0x80));
 			break;
 		case METEOR_INPUT_DEV2:
 			if(mtr->flags & METEOR_RGB)
 				select_saa7196(mtr);
 			mtr->flags = (mtr->flags & ~METEOR_DEV_MASK)
 					       | METEOR_DEV2;
 			SAA7196_WRITE(mtr, 0x0e,
 				(SAA7196_REG(mtr, 0x0e) & ~0x3) | 0x2);
 			SAA7196_WRITE(mtr, 0x06,
 				(SAA7196_REG(mtr, 0x06) & ~0x80));
 			break;
 		case METEOR_INPUT_DEV3:
 			if(mtr->flags & METEOR_RGB)
 				select_saa7196(mtr);
 			mtr->flags = (mtr->flags & ~METEOR_DEV_MASK)
 					       | METEOR_DEV3;
 			SAA7196_WRITE(mtr, 0x0e,
 				(SAA7196_REG(mtr, 0x0e) | 0x3));
 			SAA7196_WRITE(mtr, 0x06,
 				(SAA7196_REG(mtr, 0x06) & ~0x80) );
 			break;
 		case METEOR_INPUT_DEV_SVIDEO:
 			if(mtr->flags & METEOR_RGB)
 				select_saa7196(mtr);
 			mtr->flags = (mtr->flags & ~METEOR_DEV_MASK)
 					       | METEOR_DEV_SVIDEO;
 			SAA7196_WRITE(mtr, 0x0e,
 				(SAA7196_REG(mtr, 0x0e) & ~0x3) | 0x2);
 			SAA7196_WRITE(mtr, 0x06,
 				(SAA7196_REG(mtr, 0x06) & ~0x80) | 0x80);
 			break;
 		case METEOR_INPUT_DEV_RGB:
 			if((mtr->flags & METEOR_RGB) == 0)
 				return EINVAL;
 			mtr->flags = (mtr->flags & ~METEOR_DEV_MASK)
 					       | METEOR_DEV_RGB;
 			SAA7196_WRITE(mtr, 0x0e,
 				(SAA7196_REG(mtr, 0x0e) & ~0x3) | 0x3);
 			SAA7196_WRITE(mtr, 0x06,
 				(SAA7196_REG(mtr, 0x06) & ~0x80));
 			select_bt254(mtr);
 			SAA7196_WRITE(mtr, 0x0e,	/* chn 3 for synch */
 				(SAA7196_REG(mtr, 0x0e) & ~0x3) | 0x3);
 			break;
 		default:
 			return EINVAL;
 		}
 		break;
 	case METEORGINPUT:	/* get input device */
 		*(u_long *)arg = mtr->flags & METEOR_DEV_MASK;
 		break;
 	case METEORSFMT:	/* set input format */
 		switch(*(unsigned long *)arg & METEOR_FORM_MASK ) {
 		case 0:			/* default */
 		case METEOR_FMT_NTSC:
 			mtr->flags = (mtr->flags & ~METEOR_FORM_MASK) |
 				METEOR_NTSC;
 			SAA7196_WRITE(mtr, SAA7196_STDC, 
 				(SAA7196_REG(mtr, SAA7196_STDC) & ~0x01));
 			SAA7196_WRITE(mtr, 0x0f,
 				(SAA7196_REG(mtr, 0x0f) & ~0xe0) | 0x40);
 			SAA7196_WRITE(mtr, 0x22, 0x80);
 			SAA7196_WRITE(mtr, 0x24, 
 				(SAA7196_REG(mtr, 0x24) & ~0x0c) | 0x08);
 			SAA7196_WRITE(mtr, 0x26, 0xf0);
 			SAA7196_WRITE(mtr, 0x28, 
 				(SAA7196_REG(mtr, 0x28) & ~0x0c)) ;
 			if(mtr->flags & METEOR_RGB){
 			  bt254_ntsc(mtr, 1);			  
 			}
 		break;
 		case METEOR_FMT_PAL:
 			mtr->flags = (mtr->flags & ~METEOR_FORM_MASK) |
 				METEOR_PAL;
 			SAA7196_WRITE(mtr, SAA7196_STDC, 
 				(SAA7196_REG(mtr, SAA7196_STDC) & ~0x01));
 			SAA7196_WRITE(mtr, 0x0f, 
 				(SAA7196_REG(mtr, 0x0f) & ~0xe0));
 			SAA7196_WRITE(mtr, 0x22, 0x00);
 			SAA7196_WRITE(mtr, 0x24, 
 				(SAA7196_REG(mtr, 0x24) | 0x0c));
 			SAA7196_WRITE(mtr, 0x26, 0x20);
 			SAA7196_WRITE(mtr, 0x28, 
 				(SAA7196_REG(mtr, 0x28) & ~0x0c) | 0x04) ;
 			if(mtr->flags & METEOR_RGB){
 			  bt254_ntsc(mtr, 0);			  
 			}
 		break;
 		case METEOR_FMT_SECAM:
 			mtr->flags = (mtr->flags & ~METEOR_FORM_MASK) |
 				METEOR_SECAM;
 			SAA7196_WRITE(mtr, SAA7196_STDC, 
 				(SAA7196_REG(mtr, SAA7196_STDC) & ~0x01) | 0x1);
 			SAA7196_WRITE(mtr, 0x0f, 
 				(SAA7196_REG(mtr, 0x0f) & ~0xe0) | 0x20);
 			SAA7196_WRITE(mtr, 0x22, 0x00);
 			SAA7196_WRITE(mtr, 0x24, 
 				(SAA7196_REG(mtr, 0x24) | 0x0c));
 			SAA7196_WRITE(mtr, 0x26, 0x20);
 			SAA7196_WRITE(mtr, 0x28, 
 				(SAA7196_REG(mtr, 0x28) & ~0x0c) | 0x04) ;
 			if(mtr->flags & METEOR_RGB){
 			  bt254_ntsc(mtr, 0);
 			}
 		break;
 		case METEOR_FMT_AUTOMODE:
 			mtr->flags = (mtr->flags & ~METEOR_FORM_MASK) |
 				METEOR_AUTOMODE;
 			SAA7196_WRITE(mtr, SAA7196_STDC, 
 				(SAA7196_REG(mtr, SAA7196_STDC) & ~0x01));
 			SAA7196_WRITE(mtr, 0x0f, 
 				(SAA7196_REG(mtr, 0x0f) & ~0xe0) | 0x80);
 		break;
 		default:
 			return EINVAL;
 		}
 		break;
 	case METEORGFMT:	/* get input format */
 		*(u_long *)arg = mtr->flags & METEOR_FORM_MASK;
 		break;
 	case METEORCAPTUR:
 		temp = mtr->flags;
 		switch (*(int *) arg) {
 		case METEOR_CAP_SINGLE:
 			if (mtr->bigbuf==0)	/* no frame buffer allocated */
 				return(ENOMEM);
 
 			if (temp & METEOR_CAP_MASK)
 				return(EIO);		/* already capturing */
 
 			start_capture(mtr, METEOR_SINGLE);
 
 			/* wait for capture to complete */
 			error=tsleep((caddr_t)mtr, METPRI, "capturing", 0);
 			if(error)
 				printf("meteor%d: ioctl: tsleep error %d\n",
 					unit, error);
 			mtr->flags &= ~(METEOR_SINGLE|METEOR_WANT_MASK);
 			break;
 		case METEOR_CAP_CONTINOUS:
 			if (mtr->bigbuf==0)	/* no frame buffer allocated */
 				return(ENOMEM);
 
 			if (temp & METEOR_CAP_MASK)
 				return(EIO);		/* already capturing */
 
 			start_capture(mtr, METEOR_CONTIN);
 
 			break;
 		case METEOR_CAP_STOP_CONT:
 			if (mtr->flags & METEOR_CONTIN) {
 							/* turn off capture */
 				base->cap_cntl = 0x8ff0;
 				mtr->flags &= ~(METEOR_CONTIN|METEOR_WANT_MASK);
 			}
 			break;
 	
 		default:
 			error = EINVAL;
 			break;
 		}
 		break;
 	case METEORCAPFRM:
 	    frame = (struct meteor_capframe *) arg;
 	    if (!frame) 
 		return(EINVAL);
 	    switch (frame->command) {
 	    case METEOR_CAP_N_FRAMES:
 		if (mtr->flags & METEOR_CAP_MASK)
 			return(EIO);
 		if (mtr->flags & (METEOR_YUV_PLANAR | METEOR_YUV_422)) /* XXX */
 			return(EINVAL); /* should fix intr so we allow these */
 		if (mtr->bigbuf == 0)
 			return(ENOMEM);
 		if ((mtr->frames < 2) ||
 		    (frame->lowat < 1 || frame->lowat >= mtr->frames) ||
 		    (frame->hiwat < 1 || frame->hiwat >= mtr->frames) ||
 		    (frame->lowat > frame->hiwat)) 
 			return(EINVAL);
 			/* meteor_mem structure is on the page after the data */
 		mem = mtr->mem = (struct meteor_mem *) (mtr->bigbuf +
 				(round_page(mtr->frame_size * mtr->frames)));
 		mtr->current = 1;
 		mtr->synch_wait = 0;
         	mem->num_bufs = mtr->frames;
 		mem->frame_size= mtr->frame_size;
                 /* user and kernel change these */ 
 		mem->lowat = frame->lowat;
 		mem->hiwat = frame->hiwat;
         	mem->active = 0;
         	mem->num_active_bufs = 0;
 		/* Start capture */
 		start_capture(mtr, METEOR_SYNCAP);
 		break;
 	    case METEOR_CAP_STOP_FRAMES:
 		if (mtr->flags & METEOR_SYNCAP) {
 						/* turn off capture */
 			base->cap_cntl = 0x8ff0;
 			mtr->flags &= ~(METEOR_SYNCAP|METEOR_WANT_MASK);
 		}
 		break;
 	    case METEOR_HALT_N_FRAMES:
 		if(mtr->flags & METEOR_SYNCAP) {
 			base->cap_cntl = 0x8ff0;
 			mtr->flags &= ~(METEOR_WANT_MASK);
 		}
 		break;
 	    case METEOR_CONT_N_FRAMES:
 		if(!(mtr->flags & METEOR_SYNCAP)) {
 			error = EINVAL;
 			break;
 		}
 		start_capture(mtr, METEOR_SYNCAP);
 		break;
 	    default:
 		error = EINVAL;
 		break;
 	    }
 	    break;
  
 	case METEORSETGEO:
 		geo = (struct meteor_geomet *) arg;
 
 		/* Either even or odd, if even & odd, then these a zero */
 		if((geo->oformat & METEOR_GEO_ODD_ONLY) &&
 			(geo->oformat & METEOR_GEO_EVEN_ONLY)) {
 			printf("meteor%d: ioctl: Geometry odd or even only.\n",
 				unit);
 			return EINVAL;
 		}
 		/* set/clear even/odd flags */
 		if(geo->oformat & METEOR_GEO_ODD_ONLY)
 			mtr->flags |= METEOR_ONLY_ODD_FIELDS;
 		else
 			mtr->flags &= ~METEOR_ONLY_ODD_FIELDS;
 		if(geo->oformat & METEOR_GEO_EVEN_ONLY)
 			mtr->flags |= METEOR_ONLY_EVEN_FIELDS;
 		else
 			mtr->flags &= ~METEOR_ONLY_EVEN_FIELDS;
 
 		/* can't change parameters while capturing */
 		if (mtr->flags & METEOR_CAP_MASK)
 			return(EBUSY);
 
 		if ((geo->columns & 0x3fe) != geo->columns) {
 			printf(
 			"meteor%d: ioctl: %d: columns too large or not even.\n",
 				unit, geo->columns);
 			error = EINVAL;
 		}
 		if (((geo->rows & 0x7fe) != geo->rows) ||
 			((geo->oformat & METEOR_GEO_FIELD_MASK) &&
 				((geo->rows & 0x3fe) != geo->rows)) ) {
 			printf(
 			"meteor%d: ioctl: %d: rows too large or not even.\n",
 				unit, geo->rows);
 			error = EINVAL;
 		}
 		if (geo->frames > 32) {
 			printf("meteor%d: ioctl: too many frames.\n", unit);
 			error = EINVAL;
 		}
 		if(error) return error;
 
 		if (temp=geo->rows * geo->columns * geo->frames * 2) {
 			if (geo->oformat & METEOR_GEO_RGB24) temp = temp * 2;
 
 		   	/* meteor_mem structure for SYNC Capture */
 		   	if (geo->frames > 1) temp += PAGE_SIZE;
 
 		   	temp = btoc(temp);
 		   	if (temp > mtr->alloc_pages
 #ifdef METEOR_TEST_VIDEO
 			    && mtr->video.addr == 0
 #endif
 			) {
 				buf = get_meteor_mem(unit, temp*PAGE_SIZE);
 				if(buf != 0) {
 					kmem_free(kernel_map, mtr->bigbuf,
 					  (mtr->alloc_pages * PAGE_SIZE));
 					mtr->bigbuf = buf;
 					mtr->alloc_pages = temp;
 					if(bootverbose)
 						printf(
 				"meteor%d: ioctl: Allocating %d bytes\n",
 							unit, temp*PAGE_SIZE);
 				} else {
 					error = ENOMEM;
 				}
 		   	}
 		}
 		if(error) return error;
 
 		mtr->rows = geo->rows;
 		mtr->cols = geo->columns;
 		mtr->frames = geo->frames;
 
 #ifdef METEOR_TEST_VIDEO
 		if(mtr->video.addr)
 			buf = vtophys(mtr->video.addr);
 		else
 #endif
 			buf = vtophys(mtr->bigbuf);
 
 		/* set defaults and end of buffer locations */
 		base->dma1e = buf;
 		base->dma2e = buf;
 		base->dma3e = buf;
 		base->dma1o = buf;
 		base->dma2o = buf;
 		base->dma3o = buf;
 		base->stride1e = 0;
 		base->stride2e = 0;
 		base->stride3e = 0;
 		base->stride1o = 0;
 		base->stride2o = 0;
 		base->stride3o = 0;
 				/* set end of DMA location, even/odd */
 		base->dma_end_e =
 		base->dma_end_o = buf + mtr->alloc_pages * PAGE_SIZE;
 
 		/*
 		 * Determine if we can use the hardware range detect.
 		 */
 		if(mtr->alloc_pages * PAGE_SIZE < RANGE_BOUNDARY &&
 		  ((buf & 0xff000000) | base->dma_end_e) ==
 			(buf + mtr->alloc_pages * PAGE_SIZE) )
                         mtr->range_enable = 0x8000;
 		else {
 			mtr->range_enable = 0x0;
 			base->dma_end_e =
 			base->dma_end_o = 0xffffffff;
 		}
 
 
 		switch (geo->oformat & METEOR_GEO_OUTPUT_MASK) {
 		case 0:			/* default */
 		case METEOR_GEO_RGB16:
 			mtr->depth = 2;
 			mtr->frame_size = mtr->rows * mtr->cols * mtr->depth;
 			mtr->flags &= ~METEOR_OUTPUT_FMT_MASK;
 			mtr->flags |= METEOR_RGB16;
 			temp = mtr->cols * mtr->depth;
 		      	/* recal stride and starting point */
 			switch(mtr->flags & METEOR_ONLY_FIELDS_MASK) {
 			case METEOR_ONLY_ODD_FIELDS:
 				base->dma1o = buf;
 #ifdef METEOR_TEST_VIDEO
 				if(mtr->video.addr && mtr->video.width) 
 					base->stride1o = mtr->video.width-temp;
 #endif
 				SAA7196_WRITE(mtr, 0x20, 0xd0);
 				break;
 			case METEOR_ONLY_EVEN_FIELDS:
 				base->dma1e = buf;
 #ifdef METEOR_TEST_VIDEO
 				if(mtr->video.addr && mtr->video.width) 
 					base->stride1e = mtr->video.width-temp;
 #endif
 				SAA7196_WRITE(mtr, 0x20, 0xf0);
 				break;
 			default: /* interlaced even/odd */
 				base->dma1e = buf;		
 				base->dma1o = buf + temp;
 				base->stride1e = base->stride1o = temp;
 #ifdef METEOR_TEST_VIDEO
 				if(mtr->video.addr && mtr->video.width) {
 					base->dma1o = buf + mtr->video.width;
 					base->stride1e = base->stride1o =
 						mtr->video.width -
 						temp + mtr->video.width;
 				}
 #endif
 				SAA7196_WRITE(mtr, 0x20, 0x90);
 				break;
 			}
 	 		base->routee = base->routeo  = 0xeeeeee01;
 			break;
 		case METEOR_GEO_RGB24:
 			mtr->depth = 4;
 			mtr->frame_size = mtr->rows * mtr->cols * mtr->depth;
 			mtr->flags &= ~METEOR_OUTPUT_FMT_MASK;
 			mtr->flags |= METEOR_RGB24;
 			temp = mtr->cols * mtr->depth;
 			/* recal stride and starting point */
 			switch(mtr->flags & METEOR_ONLY_FIELDS_MASK) {
 			case METEOR_ONLY_ODD_FIELDS:
 				base->dma1o = buf;
 #ifdef METEOR_TEST_VIDEO
 				if(mtr->video.addr && mtr->video.width) 
 					base->stride1o = mtr->video.width-temp;
 #endif
 				SAA7196_WRITE(mtr, 0x20, 0xd2);
 				break;
 			case METEOR_ONLY_EVEN_FIELDS:
 				base->dma1e = buf;
 #ifdef METEOR_TEST_VIDEO
 				if(mtr->video.addr && mtr->video.width) 
 					base->stride1e = mtr->video.width-temp;
 #endif
 				SAA7196_WRITE(mtr, 0x20, 0xf2);
 				break;
 			default: /* interlaced even/odd */
 				base->dma1e = buf;
 				base->dma1o = buf + mtr->cols * mtr->depth;
 				base->stride1e = base->stride1o =
 					mtr->cols * mtr->depth;
 #ifdef METEOR_TEST_VIDEO
 				if(mtr->video.addr && mtr->video.width) {
 					base->dma1o = buf + mtr->video.width;
 					base->stride1e = base->stride1o = 
 						mtr->video.width -
 						temp + mtr->video.width;
 				}
 #endif
 				SAA7196_WRITE(mtr, 0x20, 0x92);
 				break;
 			}
 			base->routee= base->routeo= 0x39393900;
 			break;
 		case METEOR_GEO_YUV_PLANAR:
 			mtr->depth = 2;
 			temp = mtr->rows * mtr->cols;	/* compute frame size */
 			mtr->frame_size = temp * mtr->depth;
 			mtr->flags &= ~METEOR_OUTPUT_FMT_MASK;
 			mtr->flags |= METEOR_YUV_PLANAR;
 			/* recal stride and starting point */
 			switch(mtr->flags & METEOR_ONLY_FIELDS_MASK) {
 			case METEOR_ONLY_ODD_FIELDS:
 				base->dma1o = buf;		/* Y Odd */
 				base->dma2o = buf + temp;	/* U Odd */
 				temp >>= 1;
 				base->dma3o = base->dma2o + temp; /* V Odd */
 				SAA7196_WRITE(mtr, 0x20, 0xd1);
 				break;
 			case METEOR_ONLY_EVEN_FIELDS:
 				base->dma1e = buf;		/* Y Even */
 				base->dma2e = buf + temp;	/* U Even */
 				temp >>= 1;
 				base->dma2e= base->dma2e + temp; /* V Even */
 				SAA7196_WRITE(mtr, 0x20, 0xf1);
 				break;
 			default: /* interlaced even/odd */
 				base->dma1e = buf;		/* Y Even */
 				base->dma2e = buf + temp;	/* U Even */
 				temp >>= 2;
 				base->dma3e = base->dma2e + temp; /* V Even */
 				base->dma1o = base->dma1e+mtr->cols;/* Y Odd */
 				base->dma2o = base->dma3e + temp; /* U Odd */
 				base->dma3o = base->dma2o + temp; /* V Odd */
 				base->stride1e = base->stride1o = mtr->cols;
 				SAA7196_WRITE(mtr, 0x20, 0x91);
 				break;
 			}
 			switch (geo->oformat &
 				(METEOR_GEO_YUV_12 | METEOR_GEO_YUV_9)) {
 				case METEOR_GEO_YUV_9:
 					base->routee=base->routeo = 0xaaaaffc3;
 					break;
 				case METEOR_GEO_YUV_12:
 					base->routee=base->routeo = 0xaaaaffc2;
 					break;
 				default:
 					base->routee=base->routeo = 0xaaaaffc1;
 					break;
 			}
 			break;
 		case METEOR_GEO_YUV_422:/* same as planer, different uv order */
 			mtr->depth = 2;
 			temp = mtr->rows * mtr->cols;	/* compute frame size */
 			mtr->frame_size = temp * mtr->depth;
 			mtr->flags &= ~METEOR_OUTPUT_FMT_MASK;
 			mtr->flags |= METEOR_YUV_422;
 			switch(mtr->flags & METEOR_ONLY_FIELDS_MASK) {
 			case METEOR_ONLY_ODD_FIELDS:
 				base->dma1o = buf;
 				base->dma2o = buf + temp;
 				base->dma3o = base->dma2o  + (temp >> 1);
 				SAA7196_WRITE(mtr, 0x20, 0xd1);
 				break;
 			case METEOR_ONLY_EVEN_FIELDS:
 				base->dma1e = buf;
 				base->dma2e = buf + temp;
 				base->dma3e = base->dma2e + (temp >> 1);
 				SAA7196_WRITE(mtr, 0x20, 0xf1);
 				break;
 			default: /* interlaced even/odd */
 				base->dma1e = buf;		/* Y even */
 				base->dma2e = buf + temp;	/* U even */
 				base->dma3e =
 					base->dma2e + (temp >> 1);/* V even */
 				base->dma1o = base->dma1e+mtr->cols;/* Y odd */
 				temp = mtr->cols >> 1;
 				base->dma2o = base->dma2e+temp;	/* U odd */
 				base->dma3o = base->dma3e+temp;	/* V odd */
 				base->stride1e =
 				base->stride1o = mtr->cols;	/* Y stride */
 				base->stride2e = 
 				base->stride2o = temp;		/* U stride */
 				base->stride3e =
 				base->stride3o = temp;		/* V stride */
 				SAA7196_WRITE(mtr, 0x20, 0x91);
 				break;
 			}
 			switch (geo->oformat &
 				(METEOR_GEO_YUV_12 | METEOR_GEO_YUV_9)) {
 				case METEOR_GEO_YUV_9:
 					base->routee=base->routeo = 0xaaaaffc3;
 					break;
 				case METEOR_GEO_YUV_12:
 					base->routee=base->routeo = 0xaaaaffc2;
 					break;
 				default:
 					base->routee=base->routeo = 0xaaaaffc1;
 					break;
 			}
 			break;
 		case METEOR_GEO_YUV_PACKED:
 			mtr->depth = 2;
 			mtr->frame_size = mtr->rows * mtr->cols * mtr->depth;
 			mtr->flags &= ~METEOR_OUTPUT_FMT_MASK;
 			mtr->flags |= METEOR_YUV_PACKED;
 			/* recal stride and odd starting point */
 			switch(mtr->flags & METEOR_ONLY_FIELDS_MASK) {
 			case METEOR_ONLY_ODD_FIELDS:
 				base->dma1o = buf;
 				SAA7196_WRITE(mtr, 0x20, 0xd1);
 				break;
 			case METEOR_ONLY_EVEN_FIELDS:
 				base->dma1e = buf;
 				SAA7196_WRITE(mtr, 0x20, 0xf1);
 				break;
 			default: /* interlaced even/odd */
 				base->dma1e = buf;
 				base->dma1o = buf + mtr->cols * mtr->depth;
 				base->stride1e = base->stride1o =
 					mtr->cols * mtr->depth;
 				SAA7196_WRITE(mtr, 0x20, 0x91);
 				break;
 			}
 			base->routee = base->routeo = 0xeeeeee41;
 			break;
 		default:
 			error = EINVAL;	/* invalid argument */
 			printf("meteor%d: ioctl: invalid output format\n",unit);
 			break;
 		}
 		/* set cols */
 		SAA7196_WRITE(mtr, 0x21, mtr->cols & 0xff);
 		SAA7196_WRITE(mtr, 0x24,
 				((SAA7196_REG(mtr, 0x24) & ~0x03) |
 				((mtr->cols >> 8) & 0x03)));
 		/* set rows */
 		if(mtr->flags & METEOR_ONLY_FIELDS_MASK) {
 			SAA7196_WRITE(mtr, 0x25, ((mtr->rows) & 0xff));
 			SAA7196_WRITE(mtr, 0x28,
 					((SAA7196_REG(mtr, 0x28) & ~0x03) |
 					((mtr->rows >> 8) & 0x03)));
 		} else {	/* Interlaced */
 			SAA7196_WRITE(mtr, 0x25, ((mtr->rows >> 1) & 0xff));
 			SAA7196_WRITE(mtr, 0x28,
 					((SAA7196_REG(mtr, 0x28) & ~0x03) |
 					((mtr->rows >> 9) & 0x03)));
 		}
 		/* set signed/unsigned chrominance */
 		SAA7196_WRITE(mtr, 0x30, (SAA7196_REG(mtr, 0x30) & ~0x10) |
 				((geo->oformat&METEOR_GEO_UNSIGNED)?0:0x10));
 		break;
 	case METEORGETGEO:
 		geo = (struct meteor_geomet *) arg;
 		geo->rows = mtr->rows;
 		geo->columns = mtr->cols;
 		geo->frames = mtr->frames;
 		geo->oformat = (mtr->flags & METEOR_OUTPUT_FMT_MASK) |
 			       (mtr->flags & METEOR_ONLY_FIELDS_MASK) |
 			       (SAA7196_REG(mtr, 0x30) & 0x10 ? 
 				0:METEOR_GEO_UNSIGNED);
 		switch(base->routee & 0xff) {
 		case	0xc3:
 			geo->oformat |=  METEOR_GEO_YUV_9;
 			break;
 		case	0xc2:
 			geo->oformat |=  METEOR_GEO_YUV_12;
 			break;
 		default:
 			break;
 		}
 		break;
 	case METEORSCOUNT:	/* (re)set error counts */
 		cnt = (struct meteor_counts *) arg;
 		mtr->fifo_errors = cnt->fifo_errors;
 		mtr->dma_errors = cnt->dma_errors;
 		mtr->frames_captured = cnt->frames_captured;
 		mtr->even_fields_captured = cnt->even_fields_captured;
 		mtr->odd_fields_captured = cnt->odd_fields_captured;
 		break;
 	case METEORGCOUNT:	/* get error counts */
 		cnt = (struct meteor_counts *) arg;
 		cnt->fifo_errors = mtr->fifo_errors;
 		cnt->dma_errors = mtr->dma_errors;
 		cnt->frames_captured = mtr->frames_captured;
 		cnt->even_fields_captured = mtr->even_fields_captured;
 		cnt->odd_fields_captured = mtr->odd_fields_captured;
 		break;
 	default:
 		printf("meteor%d: ioctl: invalid ioctl request\n", unit);
 		error = ENOTTY;
 		break;
 	}
 	return(error);
 }
 
 int
 meteor_mmap(dev_t dev, int offset, int nprot)
 {
 
 	int	unit;
 	meteor_reg_t *mtr;
 
 	unit = UNIT(minor(dev));
 	if (unit >= NMETEOR)		/* at this point could this happen? */
 		return(-1);
 
 	mtr = &(meteor[unit]);
 
 
 	if(nprot & PROT_EXEC)
 		return -1;
 
 	if(offset >= mtr->alloc_pages * PAGE_SIZE)
 		return -1;
 
 	return i386_btop(vtophys(mtr->bigbuf) + offset);
 }
 
 
 #if !defined(METEOR_FreeBSD_210)	/* XXX */
 static meteor_devsw_installed = 0;
 
 static void 	meteor_drvinit(void *unused)
 {
 	dev_t dev;
 
 	if( ! meteor_devsw_installed ) {
 		dev = makedev(CDEV_MAJOR, 0);
 		cdevsw_add(&dev,&meteor_cdevsw, NULL);
 		meteor_devsw_installed = 1;
     	}
 }
 
 SYSINIT(meteordev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR,meteor_drvinit,NULL)
 #endif
 
 #endif /* NMETEOR > 0 */
diff --git a/sys/scsi/scsi_ioctl.c b/sys/scsi/scsi_ioctl.c
index 7998282cd377..086f42634a4b 100644
--- a/sys/scsi/scsi_ioctl.c
+++ b/sys/scsi/scsi_ioctl.c
@@ -1,405 +1,406 @@
 /*
  * Copyright (C) 1992, 1993, 1994, HD Associates, Inc.
  * PO Box 276
  * Pepperell, MA 01463
  * 508 433 5266
  * dufault@hda.com
  *
  * This code is contributed to the University of California at Berkeley:
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *End copyright
  *
- * $Id: scsi_ioctl.c,v 1.28 1998/02/01 04:13:00 wollman Exp $
+ * $Id: scsi_ioctl.c,v 1.29 1998/02/01 18:09:46 wollman Exp $
  *
  *
  */
 
 #include "opt_bounce.h"
 #include "opt_scsi.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/malloc.h>
 #include <sys/buf.h>
 
 #define	b_screq b_driver1	/* a patch in buf.h */
 #define	b_sc_link b_driver2	/* a patch in buf.h */
 
 #include <sys/fcntl.h>
 #include <sys/proc.h>
+#include <sys/uio.h>
 
 #include <scsi/scsiconf.h>
 #include <scsi/scsi_debug.h>
 #include <sys/scsiio.h>
 
 static void scsierr(struct buf *, int); /* XXX ??? */
 
 /*
  * We let the user interpret his own sense in the generic scsi world.
  * This routine is called at interrupt time if the SCSI_USER bit was set
  * in the flags passed to scsi_scsi_cmd(). No other completion processing
  * takes place, even if we are running over another device driver.
  * The lower level routines that call us here, will free the xs and restart
  * the device's queue if such exists.
  */
 #ifndef min
 #define min(A,B) ((A<B) ? A : B )
 #endif
 
 void scsi_user_done(xs)
 struct	scsi_xfer *xs;
 {
 
 	struct	buf	*bp;
 	scsireq_t *screq;
 
 	bp = xs->bp;
 	if(!bp) {	/* ALL user requests must have a buf */
 		sc_print_addr(xs->sc_link);
 		printf("User command with no buf\n");
 		return ;
 	}
 	screq = bp->b_screq;
 	if (!screq) {	/* Is it one of ours? (the SCSI_USER bit says it is) */
 		sc_print_addr(xs->sc_link);
 		printf("User command with no request\n");
 		return ;
 	}
 
 	SC_DEBUG(xs->sc_link,SDEV_DB2,("user-done\n"));
 	screq->retsts = 0;
 	screq->status = xs->status;
 	switch((int)xs->error) {
 	case	XS_NOERROR:
 		SC_DEBUG(xs->sc_link,SDEV_DB3,("no error\n"));
 		if (xs->flags & SCSI_RESID_VALID)
 			screq->datalen_used = xs->datalen - xs->resid;
 		else
 			screq->datalen_used = xs->datalen;
 		screq->retsts = SCCMD_OK;
 		break;
 
 	case	XS_SENSE:
 		SC_DEBUG(xs->sc_link,SDEV_DB3,("have sense\n"));
 		screq->senselen_used = min(sizeof(xs->sense),SENSEBUFLEN);
 		bcopy(&xs->sense,screq->sense,screq->senselen);
 		screq->retsts = SCCMD_SENSE;
 		break;
 
 	case	XS_DRIVER_STUFFUP:
 		sc_print_addr(xs->sc_link);
 		printf("host adapter code inconsistency\n");
 		screq->retsts = SCCMD_UNKNOWN;
 		break;
 
 	case	XS_TIMEOUT:
 		SC_DEBUG(xs->sc_link,SDEV_DB3,("timeout\n"));
 		screq->retsts = SCCMD_TIMEOUT;
 		break;
 
 	case	XS_BUSY:
 		SC_DEBUG(xs->sc_link,SDEV_DB3,("busy\n"));
 		screq->retsts = SCCMD_BUSY;
 		break;
 
 	default:
 		sc_print_addr(xs->sc_link);
 		printf("unknown error category from host adapter code\n");
 		screq->retsts = SCCMD_UNKNOWN;
 		break;
 	}
 	biodone(bp); 	/* we're waiting on it in scsistrategy() */
 	return;		/* it'll free the xs and restart any queue */
 }
 
 
 /* Pseudo strategy function
  * Called by scsi_do_ioctl() via physio/physstrat if there is to
  * be data transfered, and directly if there is no data transfer.
  *
  * Can't be used with block devices or raw_read/raw_write directly
  * from the cdevsw/bdevsw tables because they couldn't have added
  * the screq structure. [JRE]
  */
 static void 
 scsistrategy(struct buf *bp)
 {
 	errval err;
 	struct	scsi_link *sc_link = bp->b_sc_link;
 	scsireq_t *screq;
 	u_int32_t flags = 0;
 	int s;
 
 
 	if(!sc_link) {
 		printf("user_strat: No link pointer\n");
 		scsierr(bp,EINVAL);
 		return;
 	}
 	SC_DEBUG(sc_link,SDEV_DB2,("user_strategy\n"));
 	screq = bp->b_screq;
 	if(!screq) {
 		sc_print_addr(sc_link);
 		printf("No request block\n");
 		scsierr(bp,EINVAL);
 		return;
 	}
 
 	/* We're in trouble if physio tried to break up the
 	 * transfer:
 	 */
 	if (bp->b_bcount != screq->datalen) {
 		sc_print_addr(sc_link);
 		printf("physio split the request.. cannot proceed\n");
 		scsierr(bp, EIO);
 		return;
 	}
 
 	if (screq->timeout == 0) {
 		scsierr(bp, EINVAL);
 		return;
 	}
 
 	if (screq->cmdlen > sizeof(struct scsi_generic)) {
 		sc_print_addr(sc_link);
 		printf("cmdlen too big ");
 		scsierr(bp, EFAULT);
 		return;
 	}
 
 
 	if (screq->flags & SCCMD_READ)
 		flags |= SCSI_DATA_IN;
 
 	if (screq->flags & SCCMD_WRITE)
 		flags |= SCSI_DATA_OUT;
 
 	if (screq->flags & SCCMD_TARGET)
 		flags |= SCSI_TARGET;
 
 	if (screq->flags & SCCMD_ESCAPE)
 		flags |= SCSI_ESCAPE;
 
 #ifdef BOUNCE_BUFFERS
 	if (sc_link->flags & SDEV_BOUNCE)
 		vm_bounce_alloc(bp);
 #endif
 
 	err = scsi_scsi_cmd(sc_link,
 			(struct	scsi_generic *)screq->cmd,
 			screq->cmdlen,
 			(u_char *)bp->b_data,
 			screq->datalen,
 			0,	/* user must do the retries *//* ignored */
 			screq->timeout,
 			bp,
 			flags | SCSI_USER);
 
 
 
 	/*because there is a bp, scsi_scsi_cmd will return immediatly*/
 	if (err)
 	{
 		scsierr(bp, err);
 		return;
 	}
 	SC_DEBUG(sc_link,SDEV_DB3,("about to  sleep\n"));
 	s = splbio();
 	while(!(bp->b_flags & B_DONE))
 	{
 		tsleep((caddr_t)bp, PRIBIO, "scsistrat", 0);
 	}
 	splx(s);
 	SC_DEBUG(sc_link,SDEV_DB3,("back from sleep\n"));
 	return;
 }
 
 /*
  * Something (e.g. another driver) has called us
  * with an sc_link for a target/lun/adapter, and a scsi
  * specific ioctl to perform, better try.
  * If user-level type command, we must still be running
  * in the context of the calling process
  */
 errval	scsi_do_ioctl(dev_t dev, int cmd, caddr_t addr, int flags,
 struct proc *p, struct scsi_link *sc_link)
 {
 	errval ret = 0;
 
 	/* If we can't write the device we can't permit much:
 	 */
 
 	if (cmd != SCIOCIDENTIFY && cmd != SCIOCGETDEVINFO&& !(flags & FWRITE))
 		return EACCES;
 
 	SC_DEBUG(sc_link,SDEV_DB2,("scsi_do_ioctl(0x%x)\n",cmd));
 	switch(cmd)
 	{
 		case SCIOCCOMMAND:
 		{
 			/*
 			 * You won't believe this, but the arg copied in
  			 * from the user space, is on the kernel stack
 			 * for this process, so we can't write
 			 * to it at interrupt time..
 			 * we need to copy it in and out!
 			 * Make a static copy using malloc!
 			 */
 			scsireq_t *screq2 = (scsireq_t *)addr;
 			scsireq_t *screq = (scsireq_t *)addr;
 			int rwflag = (screq->flags & SCCMD_READ) ? B_READ : B_WRITE;
 			struct buf *bp;
 			caddr_t	d_addr;
 			int	len;
 
 #if 0	/* XXX dufault@hda.com: This looks too rev dependent.  Do it always? */
 			if((unsigned int)screq < (unsigned int)KERNBASE)
 #endif
 			{
 				screq = malloc(sizeof(scsireq_t),M_TEMP,M_WAITOK);
 				bcopy(screq2,screq,sizeof(scsireq_t));
 			}
 			bp = malloc(sizeof (struct buf),M_TEMP,M_WAITOK);
 			bzero(bp,sizeof(struct buf));
 			d_addr = screq->databuf;
 			bp->b_bcount = len = screq->datalen;
 			bp->b_screq = screq;
 			bp->b_sc_link = sc_link;
 			if (len) {
 				struct uio auio;
 				struct iovec aiov;
 				long cnt;
 
 				aiov.iov_base = d_addr;
 				aiov.iov_len = len;
 				auio.uio_iov = &aiov;
 				auio.uio_iovcnt = 1;
 
 				auio.uio_resid = len;
 				if (auio.uio_resid < 0)
 					return (EINVAL);
 
 				auio.uio_rw = (rwflag == B_READ)  ? UIO_READ : UIO_WRITE;
 				auio.uio_segflg = UIO_USERSPACE;
 				auio.uio_procp = curproc;
 				cnt = len;
 				ret = physio(scsistrategy, bp, dev, rwflag,
 					minphys, &auio);
 			} else {
 				/* if no data, no need to translate it.. */
 				bp->b_data = 0;
 				bp->b_dev = dev;
 				bp->b_flags |= B_BUSY;
 
 				scsistrategy(bp);
 				ret =  bp->b_error;
 			}
 			free(bp,M_TEMP);
 #if 0	/* XXX dufault@hda.com: This looks too rev dependent.  Do it always? */
 			if((unsigned int)screq2 < (unsigned int)KERNBASE)
 #endif
 			{
 				bcopy(screq,screq2,sizeof(scsireq_t));
 				free(screq,M_TEMP);
 			}
 			break;
 		}
 		case SCIOCDEBUG:
 		{
 			int level = *((int *)addr);
 			SC_DEBUG(sc_link,SDEV_DB3,("debug set to %d\n",level));
 			sc_link->flags &= ~SDEV_DBX; /*clear debug bits */
 			if(level & 1) sc_link->flags |= SDEV_DB1;
 			if(level & 2) sc_link->flags |= SDEV_DB2;
 			if(level & 4) sc_link->flags |= SDEV_DB3;
 			if(level & 8) sc_link->flags |= SDEV_DB4;
 			ret = 0;
 			break;
 		}
 		case SCIOCREPROBE:
 		{
 			struct scsi_addr *sca = (struct scsi_addr *) addr;
 
 			ret = scsi_probe_busses(sca->scbus,sca->target,sca->lun);
 			break;
 		}
 		case SCIOCRECONFIG:
 		case SCIOCDECONFIG:
 			ret = EINVAL;
 			break;
 
 		case SCIOCIDENTIFY:
 		{
 			struct scsi_addr *sca = (struct scsi_addr *) addr;
 			sca->scbus	= sc_link->scsibus;
 			sca->target	= sc_link->target;
 			sca->lun	= sc_link->lun;
 			break;
 		}
 		case SCIOCGETDEVINFO:
 		{
 			struct scsi_devinfo *scd = (struct scsi_devinfo *)addr;
 			struct scsi_link *scl;
 			scl = scsi_link_get(scd->addr.scbus, scd->addr.target,
 				scd->addr.lun);
 			if (scl != 0) {
 				scd->dev = scl->dev;
 				/* XXX buffers better be big enough... */
 				sprintf(scd->devname, "%s%d", 
 					 scl->device->name, scl->dev_unit);
 				sprintf(scd->adname, "%s%d:%d",
 					 scl->adapter->name, scl->adapter_unit,
 					 scl->adapter_bus);
 				ret = 0;
 			} else {
 				ret = ENXIO;
 			}
 			break;
 		}
 
 		default:
 			ret = ENOTTY;
 		break;
 	}
 
 	return ret;
 }
 
 void
 scsierr(bp,err)
 	struct buf *bp;
 	int	err;
 {
 		bp->b_flags |= B_ERROR;
 		bp->b_error = err;
 		biodone(bp);
 		return;
 }
 
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
index 6d6b3563b7ee..329a4b0b8cdd 100644
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@@ -1,331 +1,333 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)buf.h	8.9 (Berkeley) 3/30/95
- * $Id: buf.h,v 1.48 1998/03/16 01:55:33 dyson Exp $
+ * $Id: buf.h,v 1.49 1998/03/19 22:49:01 dyson Exp $
  */
 
 #ifndef _SYS_BUF_H_
 #define	_SYS_BUF_H_
 
 #include <sys/queue.h>
 
 #define NOLIST ((struct buf *)0x87654321)
 
 struct buf;
 struct mount;
 
 /*
  * To avoid including <ufs/ffs/softdep.h> 
  */   
 LIST_HEAD(workhead, worklist);
 /*
  * These are currently used only by the soft dependency code, hence
  * are stored once in a global variable. If other subsystems wanted
  * to use these hooks, a pointer to a set of bio_ops could be added
  * to each buffer.
  */
 extern struct bio_ops {
 	void	(*io_start) __P((struct buf *));
 	void	(*io_complete) __P((struct buf *));
 	void	(*io_deallocate) __P((struct buf *));
 	int	(*io_sync) __P((struct mount *));
 } bioops;
 
 struct iodone_chain {
 	long	ic_prev_flags;
 	void	(*ic_prev_iodone) __P((struct buf *));
 	void	*ic_prev_iodone_chain;
 	struct {
 		long	ia_long;
 		void	*ia_ptr;
 	}	ic_args[5];
 };
 
 /*
  * The buffer header describes an I/O operation in the kernel.
  */
 struct buf {
 	LIST_ENTRY(buf) b_hash;		/* Hash chain. */
 	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
 	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
 	TAILQ_ENTRY(buf) b_act;		/* Device driver queue when active. *new* */
 	struct  proc *b_proc;		/* Associated proc; NULL if kernel. */
 	long	b_flags;		/* B_* flags. */
 	unsigned short b_qindex;	/* buffer queue index */
 	unsigned char b_usecount;	/* buffer use count */
 	int	b_error;		/* Errno value. */
 	long	b_bufsize;		/* Allocated buffer size. */
 	long	b_bcount;		/* Valid bytes in buffer. */
 	long	b_resid;		/* Remaining I/O. */
 	dev_t	b_dev;			/* Device associated with buffer. */
 	caddr_t	b_data;			/* Memory, superblocks, indirect etc. */
 	caddr_t	b_kvabase;		/* base kva for buffer */
 	int	b_kvasize;		/* size of kva for buffer */
 	daddr_t	b_lblkno;		/* Logical block number. */
 	daddr_t	b_blkno;		/* Underlying physical block number. */
 	off_t	b_offset;		/* Offset into file */
 					/* Function to call upon completion. */
 	void	(*b_iodone) __P((struct buf *));
 					/* For nested b_iodone's. */
 	struct	iodone_chain *b_iodone_chain;
 	struct	vnode *b_vp;		/* Device vnode. */
 	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
 	int	b_dirtyend;		/* Offset of end of dirty region. */
 	struct	ucred *b_rcred;		/* Read credentials reference. */
 	struct	ucred *b_wcred;		/* Write credentials reference. */
 	int	b_validoff;		/* Offset in buffer of valid region. */
 	int	b_validend;		/* Offset of end of valid region. */
 	daddr_t	b_pblkno;               /* physical block number */
 	void	*b_saveaddr;		/* Original b_addr for physio. */
 	caddr_t	b_savekva;              /* saved kva for transfer while bouncing */
 	void	*b_driver1;		/* for private use by the driver */
 	void	*b_driver2;		/* for private use by the driver */
 	void	*b_spc;
 	union	cluster_info {
 		TAILQ_HEAD(cluster_list_head, buf) cluster_head;
 		TAILQ_ENTRY(buf) cluster_entry;
 	} b_cluster;
 	struct	vm_page *b_pages[btoc(MAXPHYS)];
 	int		b_npages;
 	struct	workhead b_dep;		/* List of filesystem dependencies. */
 };
 
 /*
  * These flags are kept in b_flags.
  */
 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
 #define	B_NEEDCOMMIT	0x00000002	/* Append-write in progress. */
 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
 #define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
 #define	B_BUSY		0x00000010	/* I/O in progress. */
 #define	B_CACHE		0x00000020	/* Bread found us in the cache. */
 #define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
 #define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
 #define	B_DIRTY		0x00000100	/* Dirty page to be pushed out async. */
 #define	B_DONE		0x00000200	/* I/O completed. */
 #define	B_EINTR		0x00000400	/* I/O was interrupted */
 #define	B_ERROR		0x00000800	/* I/O error occurred. */
 #define	B_GATHERED	0x00001000	/* LFS: already in a segment. */
 #define	B_INVAL		0x00002000	/* Does not contain valid info. */
 #define	B_LOCKED	0x00004000	/* Locked in core (not reusable). */
 #define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
 #define	B_MALLOC	0x00010000	/* malloced b_data */
 #define	B_CLUSTEROK	0x00020000	/* Pagein op, so swap() can count it. */
 #define	B_PHYS		0x00040000	/* I/O to user memory. */
 #define	B_RAW		0x00080000	/* Set by physio for raw transfers. */
 #define	B_READ		0x00100000	/* Read buffer. */
 #define	B_TAPE		0x00200000	/* Magnetic tape I/O. */
 #define	B_RELBUF	0x00400000	/* Release VMIO buffer. */
 #define	B_WANTED	0x00800000	/* Process wants this buffer. */
 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
 #define	B_WRITEINPROG	0x01000000	/* Write in progress. */
 #define	B_XXX		0x02000000	/* Debugging flag. */
 #define	B_PAGING	0x04000000	/* volatile paging I/O -- bypass VMIO */
 #define	B_ORDERED	0x08000000	/* Must guarantee I/O ordering */
 #define B_RAM		0x10000000	/* Read ahead mark (flag) */
 #define B_VMIO		0x20000000	/* VMIO flag */
 #define B_CLUSTER	0x40000000	/* pagein op, so swap() can count it */
 #define B_BOUNCE	0x80000000	/* bounce buffer flag */
 
 #define	NOOFFSET	(-1LL)		/* No buffer offset calculated yet */
 
 typedef struct buf_queue_head {
 	TAILQ_HEAD(, buf) queue;
 	struct	buf *insert_point;
 	struct	buf *switch_point;
 } buf_queue_head, *buf_queue_head_t;
 
 static __inline void bufq_init __P((buf_queue_head *head));
 
 static __inline void bufq_insert_tail __P((buf_queue_head *head,
 						struct buf *bp));
 
 static __inline void bufq_remove __P((buf_queue_head *head,
 					   struct buf *bp));
 
 static __inline struct buf *bufq_first __P((buf_queue_head *head));
 
 static __inline void
 bufq_init(buf_queue_head *head)
 {
 	TAILQ_INIT(&head->queue);
 	head->insert_point = NULL;
 	head->switch_point = NULL;
 }
 
 static __inline void
 bufq_insert_tail(buf_queue_head *head, struct buf *bp)
 {
 	if ((bp->b_flags & B_ORDERED) != 0) {
 		head->insert_point = bp;
 		head->switch_point = NULL;
 	}
 	TAILQ_INSERT_TAIL(&head->queue, bp, b_act);
 }
 
 static __inline void
 bufq_remove(buf_queue_head *head, struct buf *bp)
 {
 	if (bp == TAILQ_FIRST(&head->queue)) {
 		if (bp == head->insert_point)
 			head->insert_point = NULL;
 		if (TAILQ_NEXT(bp, b_act) == head->switch_point)
 			head->switch_point = NULL;
 	} else {
 		if (bp == head->insert_point) {
 			/*
 			 * Not 100% correct (we really want the
 			 * previous bp), but it will ensure queue
 			 * ordering and is less expensive than
 			 * using a CIRCLEQ.
 			 */
 			head->insert_point = TAILQ_NEXT(bp, b_act);
 		}
 		if (bp == head->switch_point) {
 			head->switch_point = TAILQ_NEXT(bp, b_act);
 		}		
 	}
 	TAILQ_REMOVE(&head->queue, bp, b_act);
 }
 
 static __inline struct buf *
 bufq_first(buf_queue_head *head)
 {
 	return (TAILQ_FIRST(&head->queue));
 }
 
 
 /*
  * number of buffer hash entries
  */
 #define BUFHSZ 512
 
 /*
  * buffer hash table calculation, originally by David Greenman
  */
 #define BUFHASH(vnp, bn)        \
 	(&bufhashtbl[(((unsigned long)(vnp) >> 7)+(int)(bn)) % BUFHSZ])
 
 /*
  * Definitions for the buffer free lists.
  */
 #define BUFFER_QUEUES	6	/* number of free buffer queues */
 
 #define QUEUE_NONE	0	/* on no queue */
 #define QUEUE_LOCKED	1	/* locked buffers */
 #define QUEUE_LRU	2	/* useful buffers */
 #define QUEUE_VMIO	3	/* VMIO buffers */
 #define QUEUE_AGE	4	/* not-useful buffers */
 #define QUEUE_EMPTY	5	/* empty buffer headers*/
 
 /*
  * Zero out the buffer's data area.
  */
 #define	clrbuf(bp) {							\
 	bzero((bp)->b_data, (u_int)(bp)->b_bcount);			\
 	(bp)->b_resid = 0;						\
 }
 
 /* Flags to low-level allocation routines. */
 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
 #define B_SYNC		0x02	/* Do all allocations synchronously. */
 
 #ifdef KERNEL
 extern int	nbuf;			/* The number of buffer headers */
 extern struct	buf *buf;		/* The buffer headers. */
 extern char	*buffers;		/* The buffer contents. */
 extern int	bufpages;		/* Number of memory pages in the buffer pool. */
 extern struct	buf *swbuf;		/* Swap I/O buffer headers. */
 extern int	nswbuf;			/* Number of swap I/O buffer headers. */
 extern int	needsbuffer, numdirtybuffers;
 extern TAILQ_HEAD(swqueue, buf) bswlist;
 extern TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES];
 
+struct uio;
+
 void	bufinit __P((void));
 void	bremfree __P((struct buf *));
 int	bread __P((struct vnode *, daddr_t, int,
 	    struct ucred *, struct buf **));
 int	breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
 	    struct ucred *, struct buf **));
 int	bwrite __P((struct buf *));
 void	bdwrite __P((struct buf *));
 void	bawrite __P((struct buf *));
 void	bdirty __P((struct buf *));
 int	bowrite __P((struct buf *));
 void	brelse __P((struct buf *));
 void	bqrelse __P((struct buf *));
 int	vfs_bio_awrite __P((struct buf *));
 struct buf *     getpbuf __P((void));
 struct buf *incore __P((struct vnode *, daddr_t));
 struct buf *gbincore __P((struct vnode *, daddr_t));
 int	inmem __P((struct vnode *, daddr_t));
 struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
 struct buf *geteblk __P((int));
 int	allocbuf __P((struct buf *, int));
 int	biowait __P((struct buf *));
 void	biodone __P((struct buf *));
 
 void	cluster_callback __P((struct buf *));
 int	cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
 	    struct ucred *, long, int, struct buf **));
 int	cluster_wbuild __P((struct vnode *, long, daddr_t, int));
 void	cluster_write __P((struct buf *, u_quad_t));
 int	physio __P((void (*)(struct buf *), struct buf *, dev_t, 
 	    int, u_int (*)(struct buf *), struct uio *));
 u_int	minphys __P((struct buf *));
 void	vfs_bio_clrbuf __P((struct buf *));
 void	vfs_busy_pages __P((struct buf *, int clear_modify));
 void	vfs_unbusy_pages __P((struct buf *));
 void	vwakeup __P((struct buf *));
 void	vmapbuf __P((struct buf *));
 void	vunmapbuf __P((struct buf *));
 void	relpbuf __P((struct buf *));
 void	brelvp __P((struct buf *));
 void	bgetvp __P((struct vnode *, struct buf *));
 void	pbgetvp __P((struct vnode *, struct buf *));
 void	pbrelvp __P((struct buf *));
 void	reassignbuf __P((struct buf *, struct vnode *));
 struct	buf *trypbuf __P((void));
 void	vm_bounce_alloc __P((struct buf *));
 void	vm_bounce_free __P((struct buf *));
 vm_offset_t	vm_bounce_kva_alloc __P((int));
 void	vm_bounce_kva_alloc_free __P((vm_offset_t, int));
 void	vfs_bio_need_satisfy __P((void));
 #endif /* KERNEL */
 
 #endif /* !_SYS_BUF_H_ */
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index 6d6b3563b7ee..329a4b0b8cdd 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -1,331 +1,333 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)buf.h	8.9 (Berkeley) 3/30/95
- * $Id: buf.h,v 1.48 1998/03/16 01:55:33 dyson Exp $
+ * $Id: buf.h,v 1.49 1998/03/19 22:49:01 dyson Exp $
  */
 
 #ifndef _SYS_BUF_H_
 #define	_SYS_BUF_H_
 
 #include <sys/queue.h>
 
 #define NOLIST ((struct buf *)0x87654321)
 
 struct buf;
 struct mount;
 
 /*
  * To avoid including <ufs/ffs/softdep.h> 
  */   
 LIST_HEAD(workhead, worklist);
 /*
  * These are currently used only by the soft dependency code, hence
  * are stored once in a global variable. If other subsystems wanted
  * to use these hooks, a pointer to a set of bio_ops could be added
  * to each buffer.
  */
 extern struct bio_ops {
 	void	(*io_start) __P((struct buf *));
 	void	(*io_complete) __P((struct buf *));
 	void	(*io_deallocate) __P((struct buf *));
 	int	(*io_sync) __P((struct mount *));
 } bioops;
 
 struct iodone_chain {
 	long	ic_prev_flags;
 	void	(*ic_prev_iodone) __P((struct buf *));
 	void	*ic_prev_iodone_chain;
 	struct {
 		long	ia_long;
 		void	*ia_ptr;
 	}	ic_args[5];
 };
 
 /*
  * The buffer header describes an I/O operation in the kernel.
  */
 struct buf {
 	LIST_ENTRY(buf) b_hash;		/* Hash chain. */
 	LIST_ENTRY(buf) b_vnbufs;	/* Buffer's associated vnode. */
 	TAILQ_ENTRY(buf) b_freelist;	/* Free list position if not active. */
 	TAILQ_ENTRY(buf) b_act;		/* Device driver queue when active. *new* */
 	struct  proc *b_proc;		/* Associated proc; NULL if kernel. */
 	long	b_flags;		/* B_* flags. */
 	unsigned short b_qindex;	/* buffer queue index */
 	unsigned char b_usecount;	/* buffer use count */
 	int	b_error;		/* Errno value. */
 	long	b_bufsize;		/* Allocated buffer size. */
 	long	b_bcount;		/* Valid bytes in buffer. */
 	long	b_resid;		/* Remaining I/O. */
 	dev_t	b_dev;			/* Device associated with buffer. */
 	caddr_t	b_data;			/* Memory, superblocks, indirect etc. */
 	caddr_t	b_kvabase;		/* base kva for buffer */
 	int	b_kvasize;		/* size of kva for buffer */
 	daddr_t	b_lblkno;		/* Logical block number. */
 	daddr_t	b_blkno;		/* Underlying physical block number. */
 	off_t	b_offset;		/* Offset into file */
 					/* Function to call upon completion. */
 	void	(*b_iodone) __P((struct buf *));
 					/* For nested b_iodone's. */
 	struct	iodone_chain *b_iodone_chain;
 	struct	vnode *b_vp;		/* Device vnode. */
 	int	b_dirtyoff;		/* Offset in buffer of dirty region. */
 	int	b_dirtyend;		/* Offset of end of dirty region. */
 	struct	ucred *b_rcred;		/* Read credentials reference. */
 	struct	ucred *b_wcred;		/* Write credentials reference. */
 	int	b_validoff;		/* Offset in buffer of valid region. */
 	int	b_validend;		/* Offset of end of valid region. */
 	daddr_t	b_pblkno;               /* physical block number */
 	void	*b_saveaddr;		/* Original b_addr for physio. */
 	caddr_t	b_savekva;              /* saved kva for transfer while bouncing */
 	void	*b_driver1;		/* for private use by the driver */
 	void	*b_driver2;		/* for private use by the driver */
 	void	*b_spc;
 	union	cluster_info {
 		TAILQ_HEAD(cluster_list_head, buf) cluster_head;
 		TAILQ_ENTRY(buf) cluster_entry;
 	} b_cluster;
 	struct	vm_page *b_pages[btoc(MAXPHYS)];
 	int		b_npages;
 	struct	workhead b_dep;		/* List of filesystem dependencies. */
 };
 
 /*
  * These flags are kept in b_flags.
  */
 #define	B_AGE		0x00000001	/* Move to age queue when I/O done. */
 #define	B_NEEDCOMMIT	0x00000002	/* Append-write in progress. */
 #define	B_ASYNC		0x00000004	/* Start I/O, do not wait. */
 #define	B_BAD		0x00000008	/* Bad block revectoring in progress. */
 #define	B_BUSY		0x00000010	/* I/O in progress. */
 #define	B_CACHE		0x00000020	/* Bread found us in the cache. */
 #define	B_CALL		0x00000040	/* Call b_iodone from biodone. */
 #define	B_DELWRI	0x00000080	/* Delay I/O until buffer reused. */
 #define	B_DIRTY		0x00000100	/* Dirty page to be pushed out async. */
 #define	B_DONE		0x00000200	/* I/O completed. */
 #define	B_EINTR		0x00000400	/* I/O was interrupted */
 #define	B_ERROR		0x00000800	/* I/O error occurred. */
 #define	B_GATHERED	0x00001000	/* LFS: already in a segment. */
 #define	B_INVAL		0x00002000	/* Does not contain valid info. */
 #define	B_LOCKED	0x00004000	/* Locked in core (not reusable). */
 #define	B_NOCACHE	0x00008000	/* Do not cache block after use. */
 #define	B_MALLOC	0x00010000	/* malloced b_data */
 #define	B_CLUSTEROK	0x00020000	/* Pagein op, so swap() can count it. */
 #define	B_PHYS		0x00040000	/* I/O to user memory. */
 #define	B_RAW		0x00080000	/* Set by physio for raw transfers. */
 #define	B_READ		0x00100000	/* Read buffer. */
 #define	B_TAPE		0x00200000	/* Magnetic tape I/O. */
 #define	B_RELBUF	0x00400000	/* Release VMIO buffer. */
 #define	B_WANTED	0x00800000	/* Process wants this buffer. */
 #define	B_WRITE		0x00000000	/* Write buffer (pseudo flag). */
 #define	B_WRITEINPROG	0x01000000	/* Write in progress. */
 #define	B_XXX		0x02000000	/* Debugging flag. */
 #define	B_PAGING	0x04000000	/* volatile paging I/O -- bypass VMIO */
 #define	B_ORDERED	0x08000000	/* Must guarantee I/O ordering */
 #define B_RAM		0x10000000	/* Read ahead mark (flag) */
 #define B_VMIO		0x20000000	/* VMIO flag */
 #define B_CLUSTER	0x40000000	/* pagein op, so swap() can count it */
 #define B_BOUNCE	0x80000000	/* bounce buffer flag */
 
 #define	NOOFFSET	(-1LL)		/* No buffer offset calculated yet */
 
 typedef struct buf_queue_head {
 	TAILQ_HEAD(, buf) queue;
 	struct	buf *insert_point;
 	struct	buf *switch_point;
 } buf_queue_head, *buf_queue_head_t;
 
 static __inline void bufq_init __P((buf_queue_head *head));
 
 static __inline void bufq_insert_tail __P((buf_queue_head *head,
 						struct buf *bp));
 
 static __inline void bufq_remove __P((buf_queue_head *head,
 					   struct buf *bp));
 
 static __inline struct buf *bufq_first __P((buf_queue_head *head));
 
 static __inline void
 bufq_init(buf_queue_head *head)
 {
 	TAILQ_INIT(&head->queue);
 	head->insert_point = NULL;
 	head->switch_point = NULL;
 }
 
 static __inline void
 bufq_insert_tail(buf_queue_head *head, struct buf *bp)
 {
 	if ((bp->b_flags & B_ORDERED) != 0) {
 		head->insert_point = bp;
 		head->switch_point = NULL;
 	}
 	TAILQ_INSERT_TAIL(&head->queue, bp, b_act);
 }
 
 static __inline void
 bufq_remove(buf_queue_head *head, struct buf *bp)
 {
 	if (bp == TAILQ_FIRST(&head->queue)) {
 		if (bp == head->insert_point)
 			head->insert_point = NULL;
 		if (TAILQ_NEXT(bp, b_act) == head->switch_point)
 			head->switch_point = NULL;
 	} else {
 		if (bp == head->insert_point) {
 			/*
 			 * Not 100% correct (we really want the
 			 * previous bp), but it will ensure queue
 			 * ordering and is less expensive than
 			 * using a CIRCLEQ.
 			 */
 			head->insert_point = TAILQ_NEXT(bp, b_act);
 		}
 		if (bp == head->switch_point) {
 			head->switch_point = TAILQ_NEXT(bp, b_act);
 		}		
 	}
 	TAILQ_REMOVE(&head->queue, bp, b_act);
 }
 
 static __inline struct buf *
 bufq_first(buf_queue_head *head)
 {
 	return (TAILQ_FIRST(&head->queue));
 }
 
 
 /*
  * number of buffer hash entries
  */
 #define BUFHSZ 512
 
 /*
  * buffer hash table calculation, originally by David Greenman
  */
 #define BUFHASH(vnp, bn)        \
 	(&bufhashtbl[(((unsigned long)(vnp) >> 7)+(int)(bn)) % BUFHSZ])
 
 /*
  * Definitions for the buffer free lists.
  */
 #define BUFFER_QUEUES	6	/* number of free buffer queues */
 
 #define QUEUE_NONE	0	/* on no queue */
 #define QUEUE_LOCKED	1	/* locked buffers */
 #define QUEUE_LRU	2	/* useful buffers */
 #define QUEUE_VMIO	3	/* VMIO buffers */
 #define QUEUE_AGE	4	/* not-useful buffers */
 #define QUEUE_EMPTY	5	/* empty buffer headers*/
 
 /*
  * Zero out the buffer's data area.
  */
 #define	clrbuf(bp) {							\
 	bzero((bp)->b_data, (u_int)(bp)->b_bcount);			\
 	(bp)->b_resid = 0;						\
 }
 
 /* Flags to low-level allocation routines. */
 #define B_CLRBUF	0x01	/* Request allocated buffer be cleared. */
 #define B_SYNC		0x02	/* Do all allocations synchronously. */
 
 #ifdef KERNEL
 extern int	nbuf;			/* The number of buffer headers */
 extern struct	buf *buf;		/* The buffer headers. */
 extern char	*buffers;		/* The buffer contents. */
 extern int	bufpages;		/* Number of memory pages in the buffer pool. */
 extern struct	buf *swbuf;		/* Swap I/O buffer headers. */
 extern int	nswbuf;			/* Number of swap I/O buffer headers. */
 extern int	needsbuffer, numdirtybuffers;
 extern TAILQ_HEAD(swqueue, buf) bswlist;
 extern TAILQ_HEAD(bqueues, buf) bufqueues[BUFFER_QUEUES];
 
+struct uio;
+
 void	bufinit __P((void));
 void	bremfree __P((struct buf *));
 int	bread __P((struct vnode *, daddr_t, int,
 	    struct ucred *, struct buf **));
 int	breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
 	    struct ucred *, struct buf **));
 int	bwrite __P((struct buf *));
 void	bdwrite __P((struct buf *));
 void	bawrite __P((struct buf *));
 void	bdirty __P((struct buf *));
 int	bowrite __P((struct buf *));
 void	brelse __P((struct buf *));
 void	bqrelse __P((struct buf *));
 int	vfs_bio_awrite __P((struct buf *));
 struct buf *     getpbuf __P((void));
 struct buf *incore __P((struct vnode *, daddr_t));
 struct buf *gbincore __P((struct vnode *, daddr_t));
 int	inmem __P((struct vnode *, daddr_t));
 struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
 struct buf *geteblk __P((int));
 int	allocbuf __P((struct buf *, int));
 int	biowait __P((struct buf *));
 void	biodone __P((struct buf *));
 
 void	cluster_callback __P((struct buf *));
 int	cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
 	    struct ucred *, long, int, struct buf **));
 int	cluster_wbuild __P((struct vnode *, long, daddr_t, int));
 void	cluster_write __P((struct buf *, u_quad_t));
 int	physio __P((void (*)(struct buf *), struct buf *, dev_t, 
 	    int, u_int (*)(struct buf *), struct uio *));
 u_int	minphys __P((struct buf *));
 void	vfs_bio_clrbuf __P((struct buf *));
 void	vfs_busy_pages __P((struct buf *, int clear_modify));
 void	vfs_unbusy_pages __P((struct buf *));
 void	vwakeup __P((struct buf *));
 void	vmapbuf __P((struct buf *));
 void	vunmapbuf __P((struct buf *));
 void	relpbuf __P((struct buf *));
 void	brelvp __P((struct buf *));
 void	bgetvp __P((struct vnode *, struct buf *));
 void	pbgetvp __P((struct vnode *, struct buf *));
 void	pbrelvp __P((struct buf *));
 void	reassignbuf __P((struct buf *, struct vnode *));
 struct	buf *trypbuf __P((void));
 void	vm_bounce_alloc __P((struct buf *));
 void	vm_bounce_free __P((struct buf *));
 vm_offset_t	vm_bounce_kva_alloc __P((int));
 void	vm_bounce_kva_alloc_free __P((vm_offset_t, int));
 void	vfs_bio_need_satisfy __P((void));
 #endif /* KERNEL */
 
 #endif /* !_SYS_BUF_H_ */
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 05eae825620d..09b919ccef93 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -1,493 +1,491 @@
 /*
  * Copyright (c) 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)mount.h	8.21 (Berkeley) 5/20/95
- *	$Id: mount.h,v 1.57 1998/03/01 22:46:36 msmith Exp $
+ *	$Id: mount.h,v 1.58 1998/03/08 09:58:29 julian Exp $
  */
 
 #ifndef _SYS_MOUNT_H_
 #define _SYS_MOUNT_H_
 
-#ifndef KERNEL
 #include <sys/ucred.h>
-#endif
 #include <sys/queue.h>
 #include <sys/lock.h>
 #include <net/radix.h>
 #include <sys/socket.h>		/* XXX for AF_MAX */
 
 typedef struct fsid { int32_t val[2]; } fsid_t;	/* file system id type */
 
 /*
  * File identifier.
  * These are unique per filesystem on a single machine.
  */
 #define	MAXFIDSZ	16
 
 struct fid {
 	u_short		fid_len;		/* length of data in bytes */
 	u_short		fid_reserved;		/* force longword alignment */
 	char		fid_data[MAXFIDSZ];	/* data (variable length) */
 };
 
 /*
  * file system statistics
  */
 
 #define MFSNAMELEN	16	/* length of fs type name, including null */
 #define	MNAMELEN	90	/* length of buffer for returned name */
 
 struct statfs {
 	long	f_spare2;		/* placeholder */
 	long	f_bsize;		/* fundamental file system block size */
 	long	f_iosize;		/* optimal transfer block size */
 	long	f_blocks;		/* total data blocks in file system */
 	long	f_bfree;		/* free blocks in fs */
 	long	f_bavail;		/* free blocks avail to non-superuser */
 	long	f_files;		/* total file nodes in file system */
 	long	f_ffree;		/* free file nodes in fs */
 	fsid_t	f_fsid;			/* file system id */
 	uid_t	f_owner;		/* user that mounted the filesystem */
 	int	f_type;			/* type of filesystem (see below) */
 	int	f_flags;		/* copy of mount exported flags */
 	long    f_syncwrites;		/* count of sync writes since mount */
 	long    f_asyncwrites;		/* count of async writes since mount */
 	char	f_fstypename[MFSNAMELEN]; /* fs type name */
 	char	f_mntonname[MNAMELEN];	/* directory on which mounted */
 	char	f_mntfromname[MNAMELEN];/* mounted filesystem */
 };
 
 /*
  * File system types (for backwards compat with 4.4Lite.)
  */
 #define	MOUNT_NONE	0
 #define	MOUNT_UFS	1	/* Fast Filesystem */
 #define	MOUNT_NFS	2	/* Sun-compatible Network Filesystem */
 #define	MOUNT_MFS	3	/* Memory-based Filesystem */
 #define	MOUNT_MSDOS	4	/* MS/DOS Filesystem */
 #define	MOUNT_LFS	5	/* Log-based Filesystem */
 #define	MOUNT_LOFS	6	/* Loopback Filesystem */
 #define	MOUNT_FDESC	7	/* File Descriptor Filesystem */
 #define	MOUNT_PORTAL	8	/* Portal Filesystem */
 #define MOUNT_NULL	9	/* Minimal Filesystem Layer */
 #define MOUNT_UMAP	10	/* User/Group Identifier Remapping Filesystem */
 #define MOUNT_KERNFS	11	/* Kernel Information Filesystem */
 #define MOUNT_PROCFS	12	/* /proc Filesystem */
 #define MOUNT_AFS	13	/* Andrew Filesystem */
 #define MOUNT_CD9660	14	/* ISO9660 (aka CDROM) Filesystem */
 #define MOUNT_UNION	15	/* Union (translucent) Filesystem */
 #define MOUNT_DEVFS	16	/* existing device Filesystem */
 #define	MOUNT_EXT2FS	17	/* Linux EXT2FS */
 #define MOUNT_TFS	18	/* Netcon Novell filesystem */
 #define	MOUNT_CFS	19	/* Coda filesystem */
 #define	MOUNT_MAXTYPE	19
 
 #define INITMOUNTNAMES { \
 	"none",		/*  0 MOUNT_NONE */ \
 	"ufs",		/*  1 MOUNT_UFS */ \
 	"nfs",		/*  2 MOUNT_NFS */ \
 	"mfs",		/*  3 MOUNT_MFS */ \
 	"msdos",	/*  4 MOUNT_MSDOS */ \
 	"lfs",		/*  5 MOUNT_LFS */ \
 	"lofs",		/*  6 MOUNT_LOFS */ \
 	"fdesc",	/*  7 MOUNT_FDESC */ \
 	"portal",	/*  8 MOUNT_PORTAL */ \
 	"null",		/*  9 MOUNT_NULL */ \
 	"umap",		/* 10 MOUNT_UMAP */ \
 	"kernfs",	/* 11 MOUNT_KERNFS */ \
 	"procfs",	/* 12 MOUNT_PROCFS */ \
 	"afs",		/* 13 MOUNT_AFS */ \
 	"cd9660",	/* 14 MOUNT_CD9660 */ \
 	"union",	/* 15 MOUNT_UNION */ \
 	"devfs",	/* 16 MOUNT_DEVFS */ \
 	"ext2fs",	/* 17 MOUNT_EXT2FS */ \
 	"tfs",		/* 18 MOUNT_TFS */ \
 	"cfs",		/* 19 MOUNT_CFS */ \
 	0,		/* 20 MOUNT_SPARE */ \
 }
 
 /*
  * Structure per mounted file system.  Each mounted file system has an
  * array of operations and an instance record.  The file systems are
  * put on a doubly linked list.
  */
 LIST_HEAD(vnodelst, vnode);
 
 struct mount {
 	CIRCLEQ_ENTRY(mount) mnt_list;		/* mount list */
 	struct vfsops	*mnt_op;		/* operations on fs */
 	struct vfsconf	*mnt_vfc;		/* configuration info */
 	struct vnode	*mnt_vnodecovered;	/* vnode we mounted on */
 	struct vnode	*mnt_syncer;		/* syncer vnode */
 	struct vnodelst	mnt_vnodelist;		/* list of vnodes this mount */
 	struct lock	mnt_lock;		/* mount structure lock */
 	int		mnt_flag;		/* flags shared with user */
 	int		mnt_kern_flag;		/* kernel only flags */
 	int		mnt_maxsymlinklen;	/* max size of short symlink */
 	struct statfs	mnt_stat;		/* cache of filesystem stats */
 	qaddr_t		mnt_data;		/* private data */
 	time_t		mnt_time;		/* last time written*/
 };
 
 /*
  * User specifiable flags.
  */
 #define	MNT_RDONLY	0x00000001	/* read only filesystem */
 #define	MNT_SYNCHRONOUS	0x00000002	/* file system written synchronously */
 #define	MNT_NOEXEC	0x00000004	/* can't exec from filesystem */
 #define	MNT_NOSUID	0x00000008	/* don't honor setuid bits on fs */
 #define	MNT_NODEV	0x00000010	/* don't interpret special files */
 #define	MNT_UNION	0x00000020	/* union with underlying filesystem */
 #define	MNT_ASYNC	0x00000040	/* file system written asynchronously */
 #define	MNT_SUIDDIR	0x00100000	/* special handling of SUID on dirs */
 #define	MNT_SOFTDEP	0x00200000	/* soft updates being done */
 #define	MNT_NOATIME	0x10000000	/* disable update of file access time */
 #define	MNT_NOCLUSTERR	0x40000000	/* disable cluster read */
 #define	MNT_NOCLUSTERW	0x80000000	/* disable cluster write */
 
 /*
  * NFS export related mount flags.
  */
 #define	MNT_EXRDONLY	0x00000080	/* exported read only */
 #define	MNT_EXPORTED	0x00000100	/* file system is exported */
 #define	MNT_DEFEXPORTED	0x00000200	/* exported to the world */
 #define	MNT_EXPORTANON	0x00000400	/* use anon uid mapping for everyone */
 #define	MNT_EXKERB	0x00000800	/* exported with Kerberos uid mapping */
 #define	MNT_EXPUBLIC	0x20000000	/* public export (WebNFS) */
 
 /*
  * Flags set by internal operations,
  * but visible to the user.
  * XXX some of these are not quite right.. (I've never seen the root flag set)
  */
 #define	MNT_LOCAL	0x00001000	/* filesystem is stored locally */
 #define	MNT_QUOTA	0x00002000	/* quotas are enabled on filesystem */
 #define	MNT_ROOTFS	0x00004000	/* identifies the root filesystem */
 #define	MNT_USER	0x00008000	/* mounted by a user */
 
 /*
  * Mask of flags that are visible to statfs()
  * XXX I think that this could now become (~(MNT_CMDFLAGS))
  * but the 'mount' program may need changing to handle this.
  * XXX MNT_EXPUBLIC is presently left out. I don't know why.
  */
 #define	MNT_VISFLAGMASK	(MNT_RDONLY	| MNT_SYNCHRONOUS | MNT_NOEXEC	| \
 			MNT_NOSUID	| MNT_NODEV	| MNT_UNION	| \
 			MNT_ASYNC	| MNT_EXRDONLY	| MNT_EXPORTED	| \
 			MNT_DEFEXPORTED	| MNT_EXPORTANON| MNT_EXKERB	| \
 			MNT_LOCAL	| MNT_USER	| MNT_QUOTA	| \
 			MNT_ROOTFS	| MNT_NOATIME	| MNT_NOCLUSTERR| \
 			MNT_NOCLUSTERW	| MNT_SUIDDIR	| MNT_SOFTDEP	 \
 			/*	| MNT_EXPUBLIC */)
 /*
  * External filesystem command modifier flags.
  * Unmount can use the MNT_FORCE flag.
  * XXX These are not STATES and really should be somewhere else.
  */
 #define	MNT_UPDATE	0x00010000	/* not a real mount, just an update */
 #define	MNT_DELEXPORT	0x00020000	/* delete export host lists */
 #define	MNT_RELOAD	0x00040000	/* reload filesystem data */
 #define	MNT_FORCE	0x00080000	/* force unmount or readonly change */
 #define MNT_CMDFLAGS	(MNT_UPDATE|MNT_DELEXPORT|MNT_RELOAD|MNT_FORCE)
 /*
  * Internal filesystem control flags stored in mnt_kern_flag.
  *
  * MNTK_UNMOUNT locks the mount entry so that name lookup cannot proceed
  * past the mount point.  This keeps the subtree stable during mounts
  * and unmounts.
  */
 #define MNTK_UNMOUNT	0x01000000	/* unmount in progress */
 #define	MNTK_MWAIT	0x02000000	/* waiting for unmount to finish */
 #define MNTK_WANTRDWR	0x04000000	/* upgrade to read/write requested */
 
 /*
  * Sysctl CTL_VFS definitions.
  *
  * Second level identifier specifies which filesystem. Second level
  * identifier VFS_VFSCONF returns information about all filesystems.
  * Second level identifier VFS_GENERIC is non-terminal.
  */
 #define	VFS_VFSCONF		0	/* get configured filesystems */
 #define	VFS_GENERIC		0	/* generic filesystem information */
 /*
  * Third level identifiers for VFS_GENERIC are given below; third
  * level identifiers for specific filesystems are given in their
  * mount specific header files.
  */
 #define VFS_MAXTYPENUM	1	/* int: highest defined filesystem type */
 #define VFS_CONF	2	/* struct: vfsconf for filesystem given
 				   as next argument */
 
 /*
  * Flags for various system call interfaces.
  *
  * waitfor flags to vfs_sync() and getfsstat()
  */
 #define MNT_WAIT	1	/* synchronously wait for I/O to complete */
 #define MNT_NOWAIT	2	/* start all I/O, but do not wait for it */
 #define MNT_LAZY	3	/* push data not written by filesystem syncer */
 
 /*
  * Generic file handle
  */
 struct fhandle {
 	fsid_t	fh_fsid;	/* File system id of mount point */
 	struct	fid fh_fid;	/* File sys specific id */
 };
 typedef struct fhandle	fhandle_t;
 
 /*
  * Export arguments for local filesystem mount calls.
  */
 struct export_args {
 	int	ex_flags;		/* export related flags */
 	uid_t	ex_root;		/* mapping for root uid */
 	struct	ucred ex_anon;		/* mapping for anonymous user */
 	struct	sockaddr *ex_addr;	/* net address to which exported */
 	int	ex_addrlen;		/* and the net address length */
 	struct	sockaddr *ex_mask;	/* mask of valid bits in saddr */
 	int	ex_masklen;		/* and the smask length */
 	char	*ex_indexfile;		/* index file for WebNFS URLs */
 };
 
 /*
  * Structure holding information for a publicly exported filesystem
  * (WebNFS). Currently the specs allow just for one such filesystem.
  */
 struct nfs_public {
 	int		np_valid;	/* Do we hold valid information */
 	fhandle_t	np_handle;	/* Filehandle for pub fs (internal) */
 	struct mount	*np_mount;	/* Mountpoint of exported fs */
 	char		*np_index;	/* Index file */
 };
 
 /*
  * Filesystem configuration information. One of these exists for each
  * type of filesystem supported by the kernel. These are searched at
  * mount time to identify the requested filesystem.
  */
 struct vfsconf {
 	struct	vfsops *vfc_vfsops;	/* filesystem operations vector */
 	char	vfc_name[MFSNAMELEN];	/* filesystem type name */
 	int	vfc_typenum;		/* historic filesystem type number */
 	int	vfc_refcount;		/* number mounted of this type */
 	int	vfc_flags;		/* permanent flags */
 	struct	vfsconf *vfc_next;	/* next in list */
 };
 
 struct ovfsconf {
 	void	*vfc_vfsops;
 	char	vfc_name[32];
 	int	vfc_index;
 	int	vfc_refcount;
 	int	vfc_flags;
 };
 
 /*
  * NB: these flags refer to IMPLEMENTATION properties, not properties of
  * any actual mounts; i.e., it does not make sense to change the flags.
  */
 #define	VFCF_STATIC	0x00010000	/* statically compiled into kernel */
 #define	VFCF_NETWORK	0x00020000	/* may get data over the network */
 #define	VFCF_READONLY	0x00040000	/* writes are not implemented */
 #define VFCF_SYNTHETIC	0x00080000	/* data does not represent real files */
 #define	VFCF_LOOPBACK	0x00100000	/* aliases some other mounted FS */
 #define	VFCF_UNICODE	0x00200000	/* stores file names as Unicode*/
 
 #ifdef KERNEL
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_MOUNT);
 #endif
 extern int maxvfsconf;		/* highest defined filesystem type */
 extern struct vfsconf *vfsconf;	/* head of list of filesystem types */
 
 /*
  * Operations supported on mounted file system.
  */
 #ifdef __STDC__
 struct nameidata;
 struct mbuf;
 #endif
 
 struct vfsops {
 	int	(*vfs_mount)	__P((struct mount *mp, char *path, caddr_t data,
 				    struct nameidata *ndp, struct proc *p));
 	int	(*vfs_start)	__P((struct mount *mp, int flags,
 				    struct proc *p));
 	int	(*vfs_unmount)	__P((struct mount *mp, int mntflags,
 				    struct proc *p));
 	int	(*vfs_root)	__P((struct mount *mp, struct vnode **vpp));
 	int	(*vfs_quotactl)	__P((struct mount *mp, int cmds, uid_t uid,
 				    caddr_t arg, struct proc *p));
 	int	(*vfs_statfs)	__P((struct mount *mp, struct statfs *sbp,
 				    struct proc *p));
 	int	(*vfs_sync)	__P((struct mount *mp, int waitfor,
 				    struct ucred *cred, struct proc *p));
 	int	(*vfs_vget)	__P((struct mount *mp, ino_t ino,
 				    struct vnode **vpp));
 	int	(*vfs_vrele)	__P((struct mount *mp, struct vnode *vp));
 	int	(*vfs_fhtovp)	__P((struct mount *mp, struct fid *fhp,
 				    struct sockaddr *nam, struct vnode **vpp,
 				    int *exflagsp, struct ucred **credanonp));
 	int	(*vfs_vptofh)	__P((struct vnode *vp, struct fid *fhp));
 	int	(*vfs_init)	__P((struct vfsconf *));
 };
 
 #define VFS_MOUNT(MP, PATH, DATA, NDP, P) \
 	(*(MP)->mnt_op->vfs_mount)(MP, PATH, DATA, NDP, P)
 #define VFS_START(MP, FLAGS, P)	  (*(MP)->mnt_op->vfs_start)(MP, FLAGS, P)
 #define VFS_UNMOUNT(MP, FORCE, P) (*(MP)->mnt_op->vfs_unmount)(MP, FORCE, P)
 #define VFS_ROOT(MP, VPP)	  (*(MP)->mnt_op->vfs_root)(MP, VPP)
 #define VFS_QUOTACTL(MP,C,U,A,P)  (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A, P)
 #define VFS_STATFS(MP, SBP, P)	  (*(MP)->mnt_op->vfs_statfs)(MP, SBP, P)
 #define VFS_SYNC(MP, WAIT, C, P)  (*(MP)->mnt_op->vfs_sync)(MP, WAIT, C, P)
 #define VFS_VGET(MP, INO, VPP)	  (*(MP)->mnt_op->vfs_vget)(MP, INO, VPP)
 #define VFS_VRELE(MP, VP)	  (*(MP)->mnt_op->vfs_vrele)(MP, VP)
 #define VFS_FHTOVP(MP, FIDP, NAM, VPP, EXFLG, CRED) \
 	(*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, NAM, VPP, EXFLG, CRED)
 #define	VFS_VPTOFH(VP, FIDP)	  (*(VP)->v_mount->mnt_op->vfs_vptofh)(VP, FIDP)
 
 #ifdef VFS_LKM
 #include <sys/conf.h>
 #include <sys/exec.h>
 #include <sys/sysent.h>
 #include <sys/lkm.h>
 
 #define VFS_SET(vfsops, fsname, index, flags) \
 	static struct vfsconf _fs_vfsconf = { \
 		&vfsops, \
 		#fsname, \
 		index, \
 		0, \
 		flags, \
 	}; \
 	extern struct linker_set MODVNOPS; \
 	MOD_VFS(fsname,&MODVNOPS,&_fs_vfsconf); \
 	extern int \
 	fsname ## _mod __P((struct lkm_table *, int, int)); \
 	int \
 	fsname ## _mod(struct lkm_table *lkmtp, int cmd, int ver) { \
 		MOD_DISPATCH(fsname, \
 		lkmtp, cmd, ver, lkm_nullcmd, lkm_nullcmd, lkm_nullcmd); }
 #else
 
 #define VFS_SET(vfsops, fsname, index, flags) \
 	static struct vfsconf _fs_vfsconf = { \
 		&vfsops, \
 		#fsname, \
 		index, \
 		0, \
 		flags | VFCF_STATIC, \
 	}; \
 	DATA_SET(vfs_set,_fs_vfsconf)
 #endif /* VFS_LKM */
 
 #endif /* KERNEL */
 
 #ifdef KERNEL
 #include <net/radix.h>
 #include <sys/socket.h>		/* XXX for AF_MAX */
 
 /*
  * Network address lookup element
  */
 struct netcred {
 	struct	radix_node netc_rnodes[2];
 	int	netc_exflags;
 	struct	ucred netc_anon;
 };
 
 /*
  * Network export information
  */
 struct netexport {
 	struct	netcred ne_defexported;		      /* Default export */
 	struct	radix_node_head *ne_rtable[AF_MAX+1]; /* Individual exports */
 };
 
 extern	char *mountrootfsname;
 
 /*
  * exported vnode operations
  */
 int	dounmount __P((struct mount *, int, struct proc *));
 int	vfs_setpublicfs			    /* set publicly exported fs */
 	  __P((struct mount *, struct netexport *, struct export_args *));
 int	vfs_lock __P((struct mount *));         /* lock a vfs */
 void	vfs_msync __P((struct mount *, int));
 void	vfs_unlock __P((struct mount *));       /* unlock a vfs */
 int	vfs_busy __P((struct mount *, int, struct simplelock *, struct proc *));
 int	vfs_export			    /* process mount export info */
 	  __P((struct mount *, struct netexport *, struct export_args *));
 int	vfs_vrele __P((struct mount *, struct vnode *));
 struct	netcred *vfs_export_lookup	    /* lookup host in fs export list */
 	  __P((struct mount *, struct netexport *, struct sockaddr *));
 int	vfs_allocate_syncvnode __P((struct mount *));
 void	vfs_getnewfsid __P((struct mount *));
 struct	mount *vfs_getvfs __P((fsid_t *));      /* return vfs given fsid */
 int	vfs_mountedon __P((struct vnode *));    /* is a vfs mounted on vp */
 int	vfs_rootmountalloc __P((char *, char *, struct mount **));
 void	vfs_unbusy __P((struct mount *, struct proc *));
 void	vfs_unmountall __P((void));
 extern	CIRCLEQ_HEAD(mntlist, mount) mountlist;	/* mounted filesystem list */
 extern	struct simplelock mountlist_slock;
 extern	struct nfs_public nfs_pub;
 
 #else /* !KERNEL */
 
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
 int	fstatfs __P((int, struct statfs *));
 int	getfh __P((const char *, fhandle_t *));
 int	getfsstat __P((struct statfs *, long, int));
 int	getmntinfo __P((struct statfs **, int));
 int	mount __P((const char *, const char *, int, void *));
 int	statfs __P((const char *, struct statfs *));
 int	unmount __P((const char *, int));
 
 /* C library stuff */
 void	endvfsent __P((void));
 struct	ovfsconf *getvfsbyname __P((const char *));
 struct	ovfsconf *getvfsbytype __P((int));
 struct	ovfsconf *getvfsent __P((void));
 #define	getvfsbyname	new_getvfsbyname
 int	new_getvfsbyname __P((const char *, struct vfsconf *));
 void	setvfsent __P((int));
 int	vfsisloadable __P((const char *));
 int	vfsload __P((const char *));
 __END_DECLS
 
 #endif /* KERNEL */
 
 #endif /* !_SYS_MOUNT_H_ */
diff --git a/sys/sys/namei.h b/sys/sys/namei.h
index ebeb8531c9b2..27f8343d00f5 100644
--- a/sys/sys/namei.h
+++ b/sys/sys/namei.h
@@ -1,195 +1,196 @@
 /*
  * Copyright (c) 1985, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)namei.h	8.5 (Berkeley) 1/9/95
- * $Id: namei.h,v 1.18 1998/01/06 05:22:51 dyson Exp $
+ * $Id: namei.h,v 1.19 1998/01/12 19:10:43 phk Exp $
  */
 
 #ifndef _SYS_NAMEI_H_
 #define	_SYS_NAMEI_H_
 
 #include <sys/queue.h>
+#include <sys/uio.h>
 
 /*
  * Encapsulation of namei parameters.
  */
 struct nameidata {
 	/*
 	 * Arguments to namei/lookup.
 	 */
 	const	char *ni_dirp;		/* pathname pointer */
 	enum	uio_seg ni_segflg;	/* location of pathname */
      /* u_long	ni_nameiop;		   namei operation */
      /* u_long	ni_flags;		   flags to namei */
      /* struct	proc *ni_proc;		   process requesting lookup */
 	/*
 	 * Arguments to lookup.
 	 */
      /* struct	ucred *ni_cred;		   credentials */
 	struct	vnode *ni_startdir;	/* starting directory */
 	struct	vnode *ni_rootdir;	/* logical root directory */
 	/*
 	 * Results: returned from/manipulated by lookup
 	 */
 	struct	vnode *ni_vp;		/* vnode of result */
 	struct	vnode *ni_dvp;		/* vnode of intermediate directory */
 	/*
 	 * Shared between namei and lookup/commit routines.
 	 */
 	long	ni_pathlen;		/* remaining chars in path */
 	char	*ni_next;		/* next location in pathname */
 	u_long	ni_loopcnt;		/* count of symlinks encountered */
 	/*
 	 * Lookup parameters: this structure describes the subset of
 	 * information from the nameidata structure that is passed
 	 * through the VOP interface.
 	 */
 	struct componentname {
 		/*
 		 * Arguments to lookup.
 		 */
 		u_long	cn_nameiop;	/* namei operation */
 		u_long	cn_flags;	/* flags to namei */
 		struct	proc *cn_proc;	/* process requesting lookup */
 		struct	ucred *cn_cred;	/* credentials */
 		/*
 		 * Shared between lookup and commit routines.
 		 */
 		char	*cn_pnbuf;	/* pathname buffer */
 		char	*cn_nameptr;	/* pointer to looked up name */
 		long	cn_namelen;	/* length of looked up component */
 		u_long	cn_hash;	/* hash value of looked up name */
 		long	cn_consume;	/* chars to consume in lookup() */
 	} ni_cnd;
 };
 
 #ifdef KERNEL
 /*
  * namei operations
  */
 #define	LOOKUP		0	/* perform name lookup only */
 #define	CREATE		1	/* setup for file creation */
 #define	DELETE		2	/* setup for file deletion */
 #define	RENAME		3	/* setup for file renaming */
 #define	OPMASK		3	/* mask for operation */
 /*
  * namei operational modifier flags, stored in ni_cnd.flags
  */
 #define	LOCKLEAF	0x0004	/* lock inode on return */
 #define	LOCKPARENT	0x0008	/* want parent vnode returned locked */
 #define	WANTPARENT	0x0010	/* want parent vnode returned unlocked */
 #define	NOCACHE		0x0020	/* name must not be left in cache */
 #define	FOLLOW		0x0040	/* follow symbolic links */
 #define	NOOBJ		0x0080	/* don't create object */
 #define	NOFOLLOW	0x0000	/* do not follow symbolic links (pseudo) */
 #define	MODMASK		0x00fc	/* mask of operational modifiers */
 /*
  * Namei parameter descriptors.
  *
  * SAVENAME may be set by either the callers of namei or by VOP_LOOKUP.
  * If the caller of namei sets the flag (for example execve wants to
  * know the name of the program that is being executed), then it must
  * free the buffer. If VOP_LOOKUP sets the flag, then the buffer must
  * be freed by either the commit routine or the VOP_ABORT routine.
  * SAVESTART is set only by the callers of namei. It implies SAVENAME
  * plus the addition of saving the parent directory that contains the
  * name in ni_startdir. It allows repeated calls to lookup for the
  * name being sought. The caller is responsible for releasing the
  * buffer and for vrele'ing ni_startdir.
  */
 #define	NOCROSSMOUNT	0x000100 /* do not cross mount points */
 #define	RDONLY		0x000200 /* lookup with read-only semantics */
 #define	HASBUF		0x000400 /* has allocated pathname buffer */
 #define	SAVENAME	0x000800 /* save pathname buffer */
 #define	SAVESTART	0x001000 /* save starting directory */
 #define ISDOTDOT	0x002000 /* current component name is .. */
 #define MAKEENTRY	0x004000 /* entry is to be added to name cache */
 #define ISLASTCN	0x008000 /* this is last component of pathname */
 #define ISSYMLINK	0x010000 /* symlink needs interpretation */
 #define	ISWHITEOUT	0x020000 /* found whiteout */
 #define	DOWHITEOUT	0x040000 /* do whiteouts */
 #define	WILLBEDIR	0x080000 /* new files will be dirs; allow trailing / */
 #define	ISUNICODE	0x100000 /* current component name is unicode*/
 #define PARAMASK	0x1fff00 /* mask of parameter descriptors */
 /*
  * Initialization of an nameidata structure.
  */
 #define NDINIT(ndp, op, flags, segflg, namep, p) { \
 	(ndp)->ni_cnd.cn_nameiop = op; \
 	(ndp)->ni_cnd.cn_flags = flags; \
 	(ndp)->ni_segflg = segflg; \
 	(ndp)->ni_dirp = namep; \
 	(ndp)->ni_cnd.cn_proc = p; \
 }
 #endif
 
 /*
  * This structure describes the elements in the cache of recent
  * names looked up by namei.
  */
 
 struct	namecache {
 	LIST_ENTRY(namecache) nc_hash;	/* hash chain */
 	LIST_ENTRY(namecache) nc_src;	/* source vnode list */
 	TAILQ_ENTRY(namecache) nc_dst;	/* destination vnode list */
 	struct	vnode *nc_dvp;		/* vnode of parent of name */
 	struct	vnode *nc_vp;		/* vnode the name refers to */
 	u_char	nc_flag;		/* flag bits */
 	u_char	nc_nlen;		/* length of name */
 	char	nc_name[0];		/* segment name */
 };
 
 #ifdef KERNEL
 
 int	namei __P((struct nameidata *ndp));
 int	lookup __P((struct nameidata *ndp));
 int	relookup __P((struct vnode *dvp, struct vnode **vpp,
 	    struct componentname *cnp));
 #endif
 
 /*
  * Stats on usefulness of namei caches.
  */
 struct	nchstats {
 	long	ncs_goodhits;		/* hits that we can really use */
 	long	ncs_neghits;		/* negative hits that we can use */
 	long	ncs_badhits;		/* hits we must drop */
 	long	ncs_falsehits;		/* hits with id mismatch */
 	long	ncs_miss;		/* misses */
 	long	ncs_long;		/* long names that ignore cache */
 	long	ncs_pass2;		/* names found with passes == 2 */
 	long	ncs_2passes;		/* number of times we attempt it */
 };
 
 extern struct nchstats nchstats;
 
 #endif /* !_SYS_NAMEI_H_ */
diff --git a/sys/sys/param.h b/sys/sys/param.h
index a8c0f0a879ba..68ac09c545e1 100644
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@@ -1,224 +1,224 @@
 /*-
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)param.h	8.3 (Berkeley) 4/4/95
- * $Id: param.h,v 1.26 1997/03/03 09:51:10 ache Exp $
+ * $Id: param.h,v 1.27 1997/09/21 22:09:16 gibbs Exp $
  */
 
 #ifndef _SYS_PARAM_H_
 #define _SYS_PARAM_H_
 
 #define	BSD	199506		/* System version (year & month). */
 #define BSD4_3	1
 #define BSD4_4	1
 
 #ifndef NULL
 #define	NULL	0
 #endif
 
 #ifndef LOCORE
 #include <sys/types.h>
 #endif
 
 /*
  * Machine-independent constants (some used in following include files).
  * Redefined constants are from POSIX 1003.1 limits file.
  *
  * MAXCOMLEN should be >= sizeof(ac_comm) (see <acct.h>)
  * MAXLOGNAME should be == UT_NAMESIZE+1 (see <utmp.h>)
  */
 #include <sys/syslimits.h>
 
 #define	MAXCOMLEN	16		/* max command name remembered */
 #define	MAXINTERP	32		/* max interpreter file name length */
 #define	MAXLOGNAME	17		/* max login name length (incl. NUL) */
 #define	MAXUPRC		CHILD_MAX	/* max simultaneous processes */
 #define	NCARGS		ARG_MAX		/* max bytes for an exec function */
 #define	NGROUPS		NGROUPS_MAX	/* max number groups */
 #define	NOFILE		OPEN_MAX	/* max open files per process */
 #define	NOGROUP		65535		/* marker for empty group set member */
 #define MAXHOSTNAMELEN	256		/* max hostname size */
 
 /* More types and definitions used throughout the kernel. */
 #ifdef KERNEL
 #include <sys/cdefs.h>
 #include <sys/errno.h>
 #include <sys/time.h>
-#include <sys/resource.h>
-#include <sys/ucred.h>
-#include <sys/uio.h>
-#include <sys/rtprio.h>
 
 #define	FALSE	0
 #define	TRUE	1
 #endif
 
+#ifndef KERNEL
 /* Signals. */
 #include <sys/signal.h>
+#endif
 
 /* Machine type dependent parameters. */
 #include <machine/param.h>
+#ifndef KERNEL
 #include <machine/limits.h>
+#endif
 
 /*
  * Priorities.  Note that with 32 run queues, differences less than 4 are
  * insignificant.
  */
 #define	PSWP	0
 #define	PVM	4
 #define	PINOD	8
 #define	PRIBIO	16
 #define	PVFS	20
 #define	PZERO	22		/* No longer magic, shouldn't be here.  XXX */
 #define	PSOCK	24
 #define	PWAIT	32
 #define	PCONFIG	32
 #define	PLOCK	36
 #define	PPAUSE	40
 #define	PUSER	50
 #define	MAXPRI	127		/* Priorities range from 0 through MAXPRI. */
 
 #define	PRIMASK	0x0ff
 #define	PCATCH	0x100		/* OR'd with pri for tsleep to check signals */
 
 #define	NZERO	0		/* default "nice" */
 
 #define	NBPW	sizeof(int)	/* number of bytes per word (integer) */
 
 #define	CMASK	022		/* default file mask: S_IWGRP|S_IWOTH */
 #define	NODEV	(dev_t)(-1)	/* non-existent device */
 
 /*
  * Clustering of hardware pages on machines with ridiculously small
  * page sizes is done here.  The paging subsystem deals with units of
  * CLSIZE pte's describing PAGE_SIZE (from machine/machparam.h) pages each.
  */
 #if 0
 #define	CLBYTES		(CLSIZE*PAGE_SIZE)
 #endif
 
 #define	CBLOCK	128		/* Clist block size, must be a power of 2. */
 #define CBQSIZE	(CBLOCK/NBBY)	/* Quote bytes/cblock - can do better. */
 				/* Data chars/clist. */
 #define	CBSIZE	(CBLOCK - sizeof(struct cblock *) - CBQSIZE)
 #define	CROUND	(CBLOCK - 1)	/* Clist rounding. */
 
 /*
  * File system parameters and macros.
  *
  * The file system is made out of blocks of at most MAXBSIZE units, with
  * smaller units (fragments) only in the last direct block.  MAXBSIZE
  * primarily determines the size of buffers in the buffer pool.  It may be
  * made larger without any effect on existing file systems; however making
  * it smaller make make some file systems unmountable.  Also, MAXBSIZE
  * must be less than MAXPHYS!!!  DFLTBSIZE is the average amount of
  * memory allocated by vfs_bio per nbuf.  BKVASIZE is the average amount
  * of kernel virtual space allocated per nbuf.  BKVASIZE should be >=
  * DFLTBSIZE.  If it is significantly bigger than DFLTBSIZE, then
  * kva fragmentation causes fewer performance problems.
  */
 #define	MAXBSIZE	65536
 #define BKVASIZE	8192
 #define DFLTBSIZE	4096
 #define MAXFRAG 	8
 
 /*
  * MAXPATHLEN defines the longest permissible path length after expanding
  * symbolic links. It is used to allocate a temporary buffer from the buffer
  * pool in which to do the name expansion, hence should be a power of two,
  * and must be less than or equal to MAXBSIZE.  MAXSYMLINKS defines the
  * maximum number of symbolic links that may be expanded in a path name.
  * It should be set high enough to allow all legitimate uses, but halt
  * infinite loops reasonably quickly.
  */
 #define	MAXPATHLEN	PATH_MAX
 #define MAXSYMLINKS	32
 
 /* Bit map related macros. */
 #define	setbit(a,i)	((a)[(i)/NBBY] |= 1<<((i)%NBBY))
 #define	clrbit(a,i)	((a)[(i)/NBBY] &= ~(1<<((i)%NBBY)))
 #define	isset(a,i)	((a)[(i)/NBBY] & (1<<((i)%NBBY)))
 #define	isclr(a,i)	(((a)[(i)/NBBY] & (1<<((i)%NBBY))) == 0)
 
 /* Macros for counting and rounding. */
 #ifndef howmany
 #define	howmany(x, y)	(((x)+((y)-1))/(y))
 #endif
 #define	rounddown(x, y)	(((x)/(y))*(y))
 #define	roundup(x, y)	((((x)+((y)-1))/(y))*(y))  /* to any y */
 #define	roundup2(x, y)	(((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */
 #define powerof2(x)	((((x)-1)&(x))==0)
 
 /* Macros for min/max. */
 #ifndef KERNEL
 #define	MIN(a,b) (((a)<(b))?(a):(b))
 #define	MAX(a,b) (((a)>(b))?(a):(b))
 #endif
 
 /*
  * Constants for setting the parameters of the kernel memory allocator.
  *
  * 2 ** MINBUCKET is the smallest unit of memory that will be
  * allocated. It must be at least large enough to hold a pointer.
  *
  * Units of memory less or equal to MAXALLOCSAVE will permanently
  * allocate physical memory; requests for these size pieces of
  * memory are quite fast. Allocations greater than MAXALLOCSAVE must
  * always allocate and free physical memory; requests for these
  * size allocations should be done infrequently as they will be slow.
  *
  * Constraints: PAGE_SIZE <= MAXALLOCSAVE <= 2 ** (MINBUCKET + 14), and
  * MAXALLOCSIZE must be a power of two.
  */
 #define MINBUCKET	4		/* 4 => min allocation of 16 bytes */
 #define MAXALLOCSAVE	(2 * PAGE_SIZE)
 
 /*
  * Scale factor for scaled integers used to count %cpu time and load avgs.
  *
  * The number of CPU `tick's that map to a unique `%age' can be expressed
  * by the formula (1 / (2 ^ (FSHIFT - 11))).  The maximum load average that
  * can be calculated (assuming 32 bits) can be closely approximated using
  * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15).
  *
  * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age',
  * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024.
  */
 #define	FSHIFT	11		/* bits to right of fixed binary point */
 #define FSCALE	(1<<FSHIFT)
 
 #endif	/* _SYS_PARAM_H_ */
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index bed9f51f3679..1b58ebd1a9e4 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -1,351 +1,354 @@
 /*-
  * Copyright (c) 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
  * All or some portions of this file are derived from material licensed
  * to the University of California by American Telephone and Telegraph
  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  * the permission of UNIX System Laboratories, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)proc.h	8.15 (Berkeley) 5/19/95
- * $Id: proc.h,v 1.54 1998/02/20 13:52:15 bde Exp $
+ * $Id: proc.h,v 1.55 1998/03/04 10:26:37 dufault Exp $
  */
 
 #ifndef _SYS_PROC_H_
 #define	_SYS_PROC_H_
 
 #include <machine/proc.h>		/* Machine-dependent proc substruct. */
 #include <sys/callout.h>		/* For struct callout_handle. */
 #include <sys/rtprio.h>			/* For struct rtprio. */
 #include <sys/select.h>			/* For struct selinfo. */
+#include <sys/signal.h>
+#ifndef KERNEL
 #include <sys/time.h>			/* For structs itimerval, timeval. */
+#endif
+#include <sys/ucred.h>
 #include <sys/queue.h>
-#include <sys/param.h>
 
 /*
  * One structure allocated per session.
  */
 struct	session {
 	int	s_count;		/* Ref cnt; pgrps in session. */
 	struct	proc *s_leader;		/* Session leader. */
 	struct	vnode *s_ttyvp;		/* Vnode of controlling terminal. */
 	struct	tty *s_ttyp;		/* Controlling terminal. */
 	char	s_login[roundup(MAXLOGNAME, sizeof(long))];	/* Setlogin() name. */
 };
 
 /*
  * One structure allocated per process group.
  */
 struct	pgrp {
 	LIST_ENTRY(pgrp) pg_hash;	/* Hash chain. */
 	LIST_HEAD(, proc) pg_members;	/* Pointer to pgrp members. */
 	struct	session *pg_session;	/* Pointer to session. */
 	pid_t	pg_id;			/* Pgrp id. */
 	int	pg_jobc;	/* # procs qualifying pgrp for job control */
 };
 
 /*
  * Description of a process.
  *
  * This structure contains the information needed to manage a thread of
  * control, known in UN*X as a process; it has references to substructures
  * containing descriptions of things that the process uses, but may share
  * with related processes.  The process structure and the substructures
  * are always addressable except for those marked "(PROC ONLY)" below,
  * which might be addressable only on a processor on which the process
  * is running.
  */
 struct	proc {
 	TAILQ_ENTRY(proc) p_procq;	/* run/sleep queue. */
 	LIST_ENTRY(proc) p_list;	/* List of all processes. */
 
 	/* substructures: */
 	struct	pcred *p_cred;		/* Process owner's identity. */
 	struct	filedesc *p_fd;		/* Ptr to open files structure. */
 	struct	pstats *p_stats;	/* Accounting/statistics (PROC ONLY). */
 	struct	plimit *p_limit;	/* Process limits. */
 	struct	vm_object *p_upages_obj;/* Upages object */
 	struct	sigacts *p_sigacts;	/* Signal actions, state (PROC ONLY). */
 
 #define	p_ucred		p_cred->pc_ucred
 #define	p_rlimit	p_limit->pl_rlimit
 
 	int	p_flag;			/* P_* flags. */
 	char	p_stat;			/* S* process status. */
 	char	p_pad1[3];
 
 	pid_t	p_pid;			/* Process identifier. */
 	LIST_ENTRY(proc) p_hash;	/* Hash chain. */
 	LIST_ENTRY(proc) p_pglist;	/* List of processes in pgrp. */
 	struct	proc *p_pptr;	 	/* Pointer to parent process. */
 	LIST_ENTRY(proc) p_sibling;	/* List of sibling processes. */
 	LIST_HEAD(, proc) p_children;	/* Pointer to list of children. */
 
 	struct callout_handle p_ithandle; /*
 					      * Callout handle for scheduling
 					      * p_realtimer.
 					      */
 /* The following fields are all zeroed upon creation in fork. */
 #define	p_startzero	p_oppid
 
 	pid_t	p_oppid;	 /* Save parent pid during ptrace. XXX */
 	int	p_dupfd;	 /* Sideways return value from fdopen. XXX */
 
 	struct	vmspace *p_vmspace;	/* Address space. */
 
 	/* scheduling */
 	u_int	p_estcpu;	 /* Time averaged value of p_cpticks. */
 	int	p_cpticks;	 /* Ticks of cpu time. */
 	fixpt_t	p_pctcpu;	 /* %cpu for this process during p_swtime */
 	void	*p_wchan;	 /* Sleep address. */
 	const char *p_wmesg;	 /* Reason for sleep. */
 	u_int	p_swtime;	 /* Time swapped in or out. */
 	u_int	p_slptime;	 /* Time since last blocked. */
 
 	struct	itimerval p_realtimer;	/* Alarm timer. */
 	struct	timeval p_rtime;	/* Real time. */
 	u_quad_t p_uticks;		/* Statclock hits in user mode. */
 	u_quad_t p_sticks;		/* Statclock hits in system mode. */
 	u_quad_t p_iticks;		/* Statclock hits processing intr. */
 	struct	timeval *p_sleepend;	/* Wake time for nanosleep & friends */
 
 	int	p_traceflag;		/* Kernel trace points. */
 	struct	vnode *p_tracep;	/* Trace to vnode. */
 
 	int	p_siglist;		/* Signals arrived but not delivered. */
 
 	struct	vnode *p_textvp;	/* Vnode of executable. */
 
 	char	p_lock;			/* Process lock (prevent swap) count. */
 	char	p_oncpu;		/* Which cpu we are on */
 	char	p_lastcpu;		/* Last cpu we were on */
 	char	p_pad2;			/* alignment */
 
 	short	p_locks;		/* DEBUG: lockmgr count of held locks */
 	short	p_simple_locks;		/* DEBUG: count of held simple locks */
 	unsigned int	p_stops;	/* procfs event bitmask */
 	unsigned int	p_stype;	/* procfs stop event type */
 	char	p_step;			/* procfs stop *once* flag */
 	unsigned char	p_pfsflags;	/* procfs flags */
 	char	p_pad3[2];		/* padding for alignment */
 	register_t p_retval[2];		/* syscall aux returns */
 
 /* End area that is zeroed on creation. */
 #define	p_endzero	p_startcopy
 
 /* The following fields are all copied upon creation in fork. */
 #define	p_startcopy	p_sigmask
 
 	sigset_t p_sigmask;	/* Current signal mask. */
 	sigset_t p_sigignore;	/* Signals being ignored. */
 	sigset_t p_sigcatch;	/* Signals being caught by user. */
 
 	u_char	p_priority;	/* Process priority. */
 	u_char	p_usrpri;	/* User-priority based on p_cpu and p_nice. */
 	char	p_nice;		/* Process "nice" value. */
 	char	p_comm[MAXCOMLEN+1];
 
 	struct 	pgrp *p_pgrp;	/* Pointer to process group. */
 
 	struct 	sysentvec *p_sysent; /* System call dispatch information. */
 
 	struct	rtprio p_rtprio;	/* Realtime priority. */
 /* End area that is copied on creation. */
 #define	p_endcopy	p_addr
 	struct	user *p_addr;	/* Kernel virtual addr of u-area (PROC ONLY). */
 	struct	mdproc p_md;	/* Any machine-dependent fields. */
 
 	u_short	p_xstat;	/* Exit status for wait; also stop signal. */
 	u_short	p_acflag;	/* Accounting flags. */
 	struct	rusage *p_ru;	/* Exit information. XXX */
 
 	int	p_nthreads;	/* number of threads (only in leader) */
 	void	*p_aioinfo;	/* ASYNC I/O info */
 	int	p_wakeup;	/* thread id */
 	struct proc *p_peers;	
 	struct proc *p_leader;
 };
 
 #define	p_session	p_pgrp->pg_session
 #define	p_pgid		p_pgrp->pg_id
 
 /* Status values. */
 #define	SIDL	1		/* Process being created by fork. */
 #define	SRUN	2		/* Currently runnable. */
 #define	SSLEEP	3		/* Sleeping on an address. */
 #define	SSTOP	4		/* Process debugging or suspension. */
 #define	SZOMB	5		/* Awaiting collection by parent. */
 
 /* These flags are kept in p_flags. */
 #define	P_ADVLOCK	0x00001	/* Process may hold a POSIX advisory lock. */
 #define	P_CONTROLT	0x00002	/* Has a controlling terminal. */
 #define	P_INMEM		0x00004	/* Loaded into memory. */
 #define	P_NOCLDSTOP	0x00008	/* No SIGCHLD when children stop. */
 #define	P_PPWAIT	0x00010	/* Parent is waiting for child to exec/exit. */
 #define	P_PROFIL	0x00020	/* Has started profiling. */
 #define	P_SELECT	0x00040	/* Selecting; wakeup/waiting danger. */
 #define	P_SINTR		0x00080	/* Sleep is interruptible. */
 #define	P_SUGID		0x00100	/* Had set id privileges since last exec. */
 #define	P_SYSTEM	0x00200	/* System proc: no sigs, stats or swapping. */
 #define	P_TIMEOUT	0x00400	/* Timing out during sleep. */
 #define	P_TRACED	0x00800	/* Debugged process being traced. */
 #define	P_WAITED	0x01000	/* Debugging process has waited for child. */
 #define	P_WEXIT		0x02000	/* Working on exiting. */
 #define	P_EXEC		0x04000	/* Process called exec. */
 
 /* Should probably be changed into a hold count. */
 #define	P_NOSWAP	0x08000	/* Another flag to prevent swap out. */
 #define	P_PHYSIO	0x10000	/* Doing physical I/O. */
 
 /* Should be moved to machine-dependent areas. */
 #define	P_OWEUPC	0x20000	/* Owe process an addupc() call at next ast. */
 
 #define	P_SWAPPING	0x40000	/* Process is being swapped. */
 #define	P_SWAPINREQ	0x80000	/* Swapin request due to wakeup */
 
 /* Marked a kernel thread */
 #define P_KTHREADP	0x200000 /* Process is really a kernel thread */
 
 #define	P_NOCLDWAIT	0x400000 /* No zombies if child dies */
 
 
 /*
  * MOVE TO ucred.h?
  *
  * Shareable process credentials (always resident).  This includes a reference
  * to the current user credentials as well as real and saved ids that may be
  * used to change ids.
  */
 struct	pcred {
 	struct	ucred *pc_ucred;	/* Current credentials. */
 	uid_t	p_ruid;			/* Real user id. */
 	uid_t	p_svuid;		/* Saved effective user id. */
 	gid_t	p_rgid;			/* Real group id. */
 	gid_t	p_svgid;		/* Saved effective group id. */
 	int	p_refcnt;		/* Number of references. */
 };
 
 #ifdef KERNEL
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_SESSION);
 MALLOC_DECLARE(M_SUBPROC);
 #endif
 
 /*
  * We use process IDs <= PID_MAX; PID_MAX + 1 must also fit in a pid_t,
  * as it is used to represent "no process group".
  */
 #define	PID_MAX		30000
 #define	NO_PID		30001
 
 #define SESS_LEADER(p)	((p)->p_session->s_leader == (p))
 #define	SESSHOLD(s)	((s)->s_count++)
 #define	SESSRELE(s) {							\
 	if (--(s)->s_count == 0)					\
 		FREE(s, M_SESSION);					\
 }
 
 extern void stopevent(struct proc*, unsigned int, unsigned int);
 #define	STOPEVENT(p,e,v)	do { \
 	if ((p)->p_stops & (e)) stopevent(p,e,v); } while (0)
 
 /* hold process U-area in memory, normally for ptrace/procfs work */
 #define PHOLD(p) {							\
 	if ((p)->p_lock++ == 0 && ((p)->p_flag & P_INMEM) == 0)	\
 		faultin(p);						\
 }
 #define PRELE(p)	(--(p)->p_lock)
 
 #define	PIDHASH(pid)	(&pidhashtbl[(pid) & pidhash])
 extern LIST_HEAD(pidhashhead, proc) *pidhashtbl;
 extern u_long pidhash;
 
 #define	PGRPHASH(pgid)	(&pgrphashtbl[(pgid) & pgrphash])
 extern LIST_HEAD(pgrphashhead, pgrp) *pgrphashtbl;
 extern u_long pgrphash;
 
 extern struct proc *curproc;		/* Current running proc. */
 extern struct proc proc0;		/* Process slot for swapper. */
 extern int nprocs, maxproc;		/* Current and max number of procs. */
 extern int maxprocperuid;		/* Max procs per uid. */
 
 LIST_HEAD(proclist, proc);
 extern struct proclist allproc;		/* List of all processes. */
 extern struct proclist zombproc;	/* List of zombie processes. */
 extern struct proc *initproc, *pageproc; /* Process slots for init, pager. */
 
 #define	NQS	32			/* 32 run queues. */
 extern struct prochd qs[];
 extern struct prochd rtqs[];
 extern struct prochd idqs[];
 extern int	whichqs;	/* Bit mask summary of non-empty Q's. */
 extern int	whichrtqs;	/* Bit mask summary of non-empty Q's. */
 extern int	whichidqs;	/* Bit mask summary of non-empty Q's. */
 struct	prochd {
 	struct	proc *ph_link;		/* Linked list of running processes. */
 	struct	proc *ph_rlink;
 };
 
 struct proc *pfind __P((pid_t));	/* Find process by id. */
 struct pgrp *pgfind __P((pid_t));	/* Find process group by id. */
 
 struct vm_zone;
 extern struct vm_zone *proc_zone;
 
 int	chgproccnt __P((uid_t uid, int diff));
 int	enterpgrp __P((struct proc *p, pid_t pgid, int mksess));
 void	fixjobc __P((struct proc *p, struct pgrp *pgrp, int entering));
 int	inferior __P((struct proc *p));
 int	leavepgrp __P((struct proc *p));
 void	mi_switch __P((void));
 void	procinit __P((void));
 void	resetpriority __P((struct proc *));
 int	roundrobin_interval __P((void));
 void	setrunnable __P((struct proc *));
 void	setrunqueue __P((struct proc *));
 void	sleepinit __P((void));
 void	remrq __P((struct proc *));
 void	cpu_switch __P((struct proc *));
 void	unsleep __P((struct proc *));
 void	wakeup_one __P((void *chan));
 
 void	cpu_exit __P((struct proc *)) __dead2;
 void	exit1 __P((struct proc *, int)) __dead2;
 void	cpu_fork __P((struct proc *, struct proc *));
 int		fork1 __P((struct proc *, int));
 int	trace_req __P((struct proc *));
 void	cpu_wait __P((struct proc *));
 int	cpu_coredump __P((struct proc *, struct vnode *, struct ucred *));
 void		setsugid __P((struct proc *p));
 #endif	/* KERNEL */
 
 #endif	/* !_SYS_PROC_H_ */
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
index 7a6c5c1c89a8..086b62f05918 100644
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h
@@ -1,92 +1,96 @@
 /*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)resourcevar.h	8.4 (Berkeley) 1/9/95
- * $Id$
+ * $Id: resourcevar.h,v 1.10 1997/02/22 09:45:46 peter Exp $
  */
 
 #ifndef	_SYS_RESOURCEVAR_H_
 #define	_SYS_RESOURCEVAR_H_
 
+#include <sys/resource.h>
+
 /*
  * Kernel per-process accounting / statistics
  * (not necessarily resident except when running).
  */
 struct pstats {
 #define	pstat_startzero	p_ru
 	struct	rusage p_ru;		/* stats for this proc */
 	struct	rusage p_cru;		/* sum of stats for reaped children */
 #define	pstat_endzero	pstat_startcopy
 
 #define	pstat_startcopy	p_timer
 	struct	itimerval p_timer[3];	/* virtual-time timers */
 
 	struct uprof {			/* profile arguments */
 		caddr_t	pr_base;	/* buffer base */
 		u_long	pr_size;	/* buffer size */
 		u_long	pr_off;		/* pc offset */
 		u_long	pr_scale;	/* pc scaling */
 		u_long	pr_addr;	/* temp storage for addr until AST */
 		u_long	pr_ticks;	/* temp storage for ticks until AST */
 	} p_prof;
 #define	pstat_endcopy	p_start
 	struct	timeval p_start;	/* starting time */
 };
 
 /*
  * Kernel shareable process resource limits.  Because this structure
  * is moderately large but changes infrequently, it is normally
  * shared copy-on-write after forks.  If a group of processes
  * ("threads") share modifications, the PL_SHAREMOD flag is set,
  * and a copy must be made for the child of a new fork that isn't
  * sharing modifications to the limits.
  */
 struct plimit {
 	struct	rlimit pl_rlimit[RLIM_NLIMITS];
 #define	PL_SHAREMOD	0x01		/* modifications are shared */
 	int	p_lflags;
 	int	p_refcnt;		/* number of references */
 };
 
 #ifdef KERNEL
+struct proc;
+
 void	 addupc_intr __P((struct proc *p, u_long pc, u_int ticks));
 void	 addupc_task __P((struct proc *p, u_long pc, u_int ticks));
 void	 calcru __P((struct proc *p, struct timeval *up, struct timeval *sp,
 	    struct timeval *ip));
 int	 fuswintr __P((void *base));
 struct plimit
 	*limcopy __P((struct plimit *lim));
 void	 ruadd __P((struct rusage *ru, struct rusage *ru2));
 int	 suswintr __P((void *base, int word));
 #endif
 
 #endif	/* !_SYS_RESOURCEVAR_H_ */
diff --git a/sys/sys/signalvar.h b/sys/sys/signalvar.h
index 54e68a98356d..320120ee5e55 100644
--- a/sys/sys/signalvar.h
+++ b/sys/sys/signalvar.h
@@ -1,172 +1,175 @@
 /*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)signalvar.h	8.6 (Berkeley) 2/19/95
- * $Id: signalvar.h,v 1.16 1997/08/30 11:24:05 peter Exp $
+ * $Id: signalvar.h,v 1.17 1998/02/24 02:01:11 bde Exp $
  */
 
 #ifndef	_SYS_SIGNALVAR_H_		/* tmp for user.h */
 #define	_SYS_SIGNALVAR_H_
 
+#include <sys/signal.h>
+
 /*
  * Kernel signal definitions and data structures,
  * not exported to user programs.
  */
 
 /*
  * Process signal actions and state, needed only within the process
  * (not necessarily resident).
  */
 struct	sigacts {
 	sig_t	ps_sigact[NSIG];	/* disposition of signals */
 	sigset_t ps_catchmask[NSIG];	/* signals to be blocked */
 	sigset_t ps_sigonstack;		/* signals to take on sigstack */
 	sigset_t ps_sigintr;		/* signals that interrupt syscalls */
 	sigset_t ps_sigreset;		/* signals that reset when caught */
 	sigset_t ps_signodefer;		/* signals not masked while handled */
 	sigset_t ps_oldmask;		/* saved mask from before sigpause */
 	int	ps_flags;		/* signal flags, below */
 	struct	sigaltstack ps_sigstk;	/* sp & on stack state variable */
 	int	ps_sig;			/* for core dump/debugger XXX */
 	u_long	ps_code;		/* for core dump/debugger XXX */
 	sigset_t ps_usertramp;		/* SunOS compat; libc sigtramp XXX */
 };
 
 /* signal flags */
 #define	SAS_OLDMASK	0x01		/* need to restore mask before pause */
 #define	SAS_ALTSTACK	0x02		/* have alternate signal stack */
 
 /* additional signal action values, used only temporarily/internally */
 #define	SIG_CATCH	((__sighandler_t *)2)
 #define	SIG_HOLD	((__sighandler_t *)3)
 
 /*
  * get signal action for process and signal; currently only for current process
  */
 #define SIGACTION(p, sig)	(p->p_sigacts->ps_sigact[(sig)])
 
 /*
  * Determine signal that should be delivered to process p, the current
  * process, 0 if none.  If there is a pending stop signal with default
  * action, the process stops in issignal().
  */
 #define	CURSIG(p)							\
 	(((p)->p_siglist == 0 ||					\
 	    (((p)->p_flag & P_TRACED) == 0 &&				\
 	     ((p)->p_siglist & ~(p)->p_sigmask) == 0)) ?		\
 	    0 : issignal(p))
 
 /*
  * Clear a pending signal from a process.
  */
 #define	CLRSIG(p, sig)	{ (p)->p_siglist &= ~sigmask(sig); }
 
 /*
  * Signal properties and actions.
  * The array below categorizes the signals and their default actions
  * according to the following properties:
  */
 #define	SA_KILL		0x01		/* terminates process by default */
 #define	SA_CORE		0x02		/* ditto and coredumps */
 #define	SA_STOP		0x04		/* suspend process */
 #define	SA_TTYSTOP	0x08		/* ditto, from tty */
 #define	SA_IGNORE	0x10		/* ignore by default */
 #define	SA_CONT		0x20		/* continue if suspended */
 #define	SA_CANTMASK	0x40		/* non-maskable, catchable */
 
 #ifdef	SIGPROP
 static int sigprop[NSIG + 1] = {
 	0,			/* unused */
 	SA_KILL,		/* SIGHUP */
 	SA_KILL,		/* SIGINT */
 	SA_KILL|SA_CORE,	/* SIGQUIT */
 	SA_KILL|SA_CORE,	/* SIGILL */
 	SA_KILL|SA_CORE,	/* SIGTRAP */
 	SA_KILL|SA_CORE,	/* SIGABRT */
 	SA_KILL|SA_CORE,	/* SIGEMT */
 	SA_KILL|SA_CORE,	/* SIGFPE */
 	SA_KILL,		/* SIGKILL */
 	SA_KILL|SA_CORE,	/* SIGBUS */
 	SA_KILL|SA_CORE,	/* SIGSEGV */
 	SA_KILL|SA_CORE,	/* SIGSYS */
 	SA_KILL,		/* SIGPIPE */
 	SA_KILL,		/* SIGALRM */
 	SA_KILL,		/* SIGTERM */
 	SA_IGNORE,		/* SIGURG */
 	SA_STOP,		/* SIGSTOP */
 	SA_STOP|SA_TTYSTOP,	/* SIGTSTP */
 	SA_IGNORE|SA_CONT,	/* SIGCONT */
 	SA_IGNORE,		/* SIGCHLD */
 	SA_STOP|SA_TTYSTOP,	/* SIGTTIN */
 	SA_STOP|SA_TTYSTOP,	/* SIGTTOU */
 	SA_IGNORE,		/* SIGIO */
 	SA_KILL,		/* SIGXCPU */
 	SA_KILL,		/* SIGXFSZ */
 	SA_KILL,		/* SIGVTALRM */
 	SA_KILL,		/* SIGPROF */
 	SA_IGNORE,		/* SIGWINCH  */
 	SA_IGNORE,		/* SIGINFO */
 	SA_KILL,		/* SIGUSR1 */
 	SA_KILL,		/* SIGUSR2 */
 };
 
 #define	contsigmask	(sigmask(SIGCONT))
 #define	stopsigmask	(sigmask(SIGSTOP) | sigmask(SIGTSTP) | \
 			    sigmask(SIGTTIN) | sigmask(SIGTTOU))
 
 #endif /* SIGPROP */
 
 #define	sigcantmask	(sigmask(SIGKILL) | sigmask(SIGSTOP))
 
 #ifdef KERNEL
 struct pgrp;
+struct proc;
 
 /*
  * Machine-independent functions:
  */
 void	execsigs __P((struct proc *p));
 void	gsignal __P((int pgid, int sig));
 int	issignal __P((struct proc *p));
 void	killproc __P((struct proc *p, char *why));
 void	pgsignal __P((struct pgrp *pgrp, int sig, int checkctty));
 void	postsig __P((int sig));
 void	psignal __P((struct proc *p, int sig));
 void	sigexit __P((struct proc *p, int signum));
 void	siginit __P((struct proc *p));
 void	trapsignal __P((struct proc *p, int sig, u_long code));
 
 /*
  * Machine-dependent functions:
  */
 void	sendsig __P((sig_t action, int sig, int returnmask, u_long code));
 #endif	/* KERNEL */
 #endif	/* !_SYS_SIGNALVAR_H_ */
diff --git a/sys/sys/user.h b/sys/sys/user.h
index e05e0eab2b48..ab85860994b1 100644
--- a/sys/sys/user.h
+++ b/sys/sys/user.h
@@ -1,135 +1,137 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)user.h	8.2 (Berkeley) 9/23/93
- * $Id: user.h,v 1.13 1997/03/03 08:34:08 ache Exp $
+ * $Id: user.h,v 1.14 1997/03/03 09:51:15 ache Exp $
  */
 
 #ifndef _SYS_USER_H_
 #define _SYS_USER_H_
 
 #include <machine/pcb.h>
 #ifndef KERNEL
 /* stuff that *used* to be included by user.h, or is now needed */
 #include <sys/errno.h>
 #include <sys/time.h>
 #include <sys/resource.h>
 #include <sys/ucred.h>
 #include <sys/uio.h>
 #include <sys/proc.h>
 #include <vm/vm.h>		/* XXX */
 #include <vm/vm_param.h>	/* XXX */
 #include <vm/pmap.h>		/* XXX */
 #include <sys/lock.h>		/* XXX */
 #include <vm/vm_map.h>		/* XXX */
-#else
-#include <vm/vm.h>		/* XXX */
 #endif /* !KERNEL */
+#ifndef _SYS_RESOURCEVAR_H_
 #include <sys/resourcevar.h>
+#endif
+#ifndef _SYS_SIGNALVAR_H_
 #include <sys/signalvar.h>
+#endif
 
 /*
  * KERN_PROC subtype ops return arrays of augmented proc structures:
  */
 struct kinfo_proc {
 	struct	proc kp_proc;			/* proc structure */
 	struct	eproc {
 		struct	proc *e_paddr;		/* address of proc */
 		struct	session *e_sess;	/* session pointer */
 		struct	pcred e_pcred;		/* process credentials */
 		struct	ucred e_ucred;		/* current credentials */
 		struct	vmspace e_vm;		/* address space */
 		pid_t	e_ppid;			/* parent process id */
 		pid_t	e_pgid;			/* process group id */
 		short	e_jobc;			/* job control counter */
 		dev_t	e_tdev;			/* controlling tty dev */
 		pid_t	e_tpgid;		/* tty process group id */
 		struct	session *e_tsess;	/* tty session pointer */
 #define	WMESGLEN	7
 		char	e_wmesg[WMESGLEN+1];	/* wchan message */
 		segsz_t e_xsize;		/* text size */
 		short	e_xrssize;		/* text rss */
 		short	e_xccount;		/* text references */
 		short	e_xswrss;
 		long	e_flag;
 #define	EPROC_CTTY	0x01	/* controlling tty vnode active */
 #define	EPROC_SLEADER	0x02	/* session leader */
 		char	e_login[roundup(MAXLOGNAME, sizeof(long))];	/* setlogin() name */
 		long	e_spare[2];
 	} kp_eproc;
 };
 void fill_eproc __P((struct proc *, struct eproc *));
 
 
 /*
  * Per process structure containing data that isn't needed in core
  * when the process isn't running (esp. when swapped out).
  * This structure may or may not be at the same kernel address
  * in all processes.
  */
 
 struct	user {
 	struct	pcb u_pcb;
 
 	struct	sigacts u_sigacts;	/* p_sigacts points here (use it!) */
 	struct	pstats u_stats;		/* p_stats points here (use it!) */
 
 	/*
 	 * Remaining fields only for core dump and/or ptrace--
 	 * not valid at other times!
 	 */
 	struct	kinfo_proc u_kproc;	/* proc + eproc */
 	struct	md_coredump u_md;	/* machine dependent glop */
 };
 
 /*
  * Redefinitions to make the debuggers happy for now...  This subterfuge
  * brought to you by coredump() and trace_req().  These fields are *only*
  * valid at those times!
  */
 #define	U_ar0	u_kproc.kp_proc.p_md.md_regs /* copy of curproc->p_md.md_regs */
 #define	U_tsize	u_kproc.kp_eproc.e_vm.vm_tsize
 #define	U_dsize	u_kproc.kp_eproc.e_vm.vm_dsize
 #define	U_ssize	u_kproc.kp_eproc.e_vm.vm_ssize
 #define	U_sig	u_sigacts.ps_sig
 #define	U_code	u_sigacts.ps_code
 
 #ifndef KERNEL
 #define	u_ar0	U_ar0
 #define	u_tsize	U_tsize
 #define	u_dsize	U_dsize
 #define	u_ssize	U_ssize
 #define	u_sig	U_sig
 #define	u_code	U_code
 #endif /* KERNEL */
 
 #endif
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 6cd92bd2685f..c109a0a434e0 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -1,548 +1,549 @@
 /*
  * Copyright (c) 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vnode.h	8.7 (Berkeley) 2/4/94
- * $Id: vnode.h,v 1.68 1998/03/08 09:58:35 julian Exp $
+ * $Id: vnode.h,v 1.69 1998/03/16 01:55:35 dyson Exp $
  */
 
 #ifndef _SYS_VNODE_H_
 #define	_SYS_VNODE_H_
 
 #include <sys/queue.h>
-#include <sys/select.h>		/* needed for struct selinfo in vnodes */
+#include <sys/select.h>
+#include <sys/uio.h>
 
 #include <machine/lock.h>
 
 /*
  * The vnode is the focus of all file activity in UNIX.  There is a
  * unique vnode allocated for each active file, each current directory,
  * each mounted-on file, text file, and the root.
  */
 
 /*
  * Vnode types.  VNON means no type.
  */
 enum vtype	{ VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD };
 
 /*
  * Vnode tag types.
  * These are for the benefit of external programs only (e.g., pstat)
  * and should NEVER be inspected by the kernel.
  */
 enum vtagtype	{
 	VT_NON, VT_UFS, VT_NFS, VT_MFS, VT_PC, VT_LFS, VT_LOFS, VT_FDESC,
 	VT_PORTAL, VT_NULL, VT_UMAP, VT_KERNFS, VT_PROCFS, VT_AFS, VT_ISOFS,
 	VT_UNION, VT_MSDOSFS, VT_DEVFS, VT_TFS, VT_VFS
 };
 
 /*
  * Each underlying filesystem allocates its own private area and hangs
  * it from v_data.  If non-null, this area is freed in getnewvnode().
  */
 LIST_HEAD(buflists, buf);
 
 typedef	int 	vop_t __P((void *));
 struct namecache;
 
 /*
  * Reading or writing any of these items requires holding the appropriate lock.
  * v_freelist is locked by the global vnode_free_list simple lock.
  * v_mntvnodes is locked by the global mntvnodes simple lock.
  * v_flag, v_usecount, v_holdcount and v_writecount are
  *    locked by the v_interlock simple lock.
  * v_pollinfo is locked by the lock contained inside it.
  */
 struct vnode {
 	u_long	v_flag;				/* vnode flags (see below) */
 	int	v_usecount;			/* reference count of users */
 	int	v_writecount;			/* reference count of writers */
 	int	v_holdcnt;			/* page & buffer references */
 	daddr_t	v_lastr;			/* last read (read-ahead) */
 	u_long	v_id;				/* capability identifier */
 	struct	mount *v_mount;			/* ptr to vfs we are in */
 	vop_t	**v_op;				/* vnode operations vector */
 	TAILQ_ENTRY(vnode) v_freelist;		/* vnode freelist */
 	LIST_ENTRY(vnode) v_mntvnodes;		/* vnodes for mount point */
 	struct	buflists v_cleanblkhd;		/* clean blocklist head */
 	struct	buflists v_dirtyblkhd;		/* dirty blocklist head */
 	LIST_ENTRY(vnode) v_synclist;		/* vnodes with dirty buffers */
 	long	v_numoutput;			/* num of writes in progress */
 	enum	vtype v_type;			/* vnode type */
 	union {
 		struct mount	*vu_mountedhere;/* ptr to mounted vfs (VDIR) */
 		struct socket	*vu_socket;	/* unix ipc (VSOCK) */
 		struct specinfo	*vu_specinfo;	/* device (VCHR, VBLK) */
 		struct fifoinfo	*vu_fifoinfo;	/* fifo (VFIFO) */
 	} v_un;
 	struct	nqlease *v_lease;		/* Soft reference to lease */
 	daddr_t	v_lastw;			/* last write (write cluster) */
 	daddr_t	v_cstart;			/* start block of cluster */
 	daddr_t	v_lasta;			/* last allocation */
 	int	v_clen;				/* length of current cluster */
 	int	v_maxio;			/* maximum I/O cluster size */
 	struct vm_object *v_object;		/* Place to store VM object */
 	struct	simplelock v_interlock;		/* lock on usecount and flag */
 	struct	lock *v_vnlock;			/* used for non-locking fs's */
 	enum	vtagtype v_tag;			/* type of underlying data */
 	void 	*v_data;			/* private data for fs */
 	LIST_HEAD(, namecache) v_cache_src;	/* Cache entries from us */
 	TAILQ_HEAD(, namecache) v_cache_dst;	/* Cache entries to us */
 	struct	vnode *v_dd;			/* .. vnode */
 	u_long	v_ddid;				/* .. capability identifier */
 	struct	{
 		struct	simplelock vpi_lock;	/* lock to protect below */
 		struct	selinfo vpi_selinfo;	/* identity of poller(s) */
 		short	vpi_events;		/* what they are looking for */
 		short	vpi_revents;		/* what has happened */
 	} v_pollinfo;
 };
 #define	v_mountedhere	v_un.vu_mountedhere
 #define	v_socket	v_un.vu_socket
 #define	v_specinfo	v_un.vu_specinfo
 #define	v_fifoinfo	v_un.vu_fifoinfo
 
 #define	VN_POLLEVENT(vp, events)				\
 	do {							\
 		if ((vp)->v_pollinfo.vpi_events & (events))	\
 			vn_pollevent((vp), (events));		\
 	} while (0)
 
 /*
  * Vnode flags.
  */
 #define	VROOT		0x00001	/* root of its file system */
 #define	VTEXT		0x00002	/* vnode is a pure text prototype */
 #define	VSYSTEM		0x00004	/* vnode being used by kernel */
 #define	VISTTY		0x00008	/* vnode represents a tty */
 #define	VXLOCK		0x00100	/* vnode is locked to change underlying type */
 #define	VXWANT		0x00200	/* process is waiting for vnode */
 #define	VBWAIT		0x00400	/* waiting for output to complete */
 #define	VALIASED	0x00800	/* vnode has an alias */
 #define	VDIROP		0x01000	/* LFS: vnode is involved in a directory op */
 #define	VOBJBUF		0x02000	/* Allocate buffers in VM object */
 #define	VNINACT		0x04000	/* LFS: skip ufs_inactive() in lfs_vunref */
 #define	VAGE		0x08000	/* Insert vnode at head of free list */
 #define	VOLOCK		0x10000	/* vnode is locked waiting for an object */
 #define	VOWANT		0x20000	/* a process is waiting for VOLOCK */
 #define	VDOOMED		0x40000	/* This vnode is being recycled */
 #define	VFREE		0x80000	/* This vnode is on the freelist */
 #define	VTBFREE		0x100000 /* This vnode is on the to-be-freelist */
 #define	VONWORKLST	0x200000 /* On syncer work-list */
 
 /*
  * Vnode attributes.  A field value of VNOVAL represents a field whose value
  * is unavailable (getattr) or which is not to be changed (setattr).
  */
 struct vattr {
 	enum vtype	va_type;	/* vnode type (for create) */
 	u_short		va_mode;	/* files access mode and type */
 	short		va_nlink;	/* number of references to file */
 	uid_t		va_uid;		/* owner user id */
 	gid_t		va_gid;		/* owner group id */
 	long		va_fsid;	/* file system id (dev for now) */
 	long		va_fileid;	/* file id */
 	u_quad_t	va_size;	/* file size in bytes */
 	long		va_blocksize;	/* blocksize preferred for i/o */
 	struct timespec	va_atime;	/* time of last access */
 	struct timespec	va_mtime;	/* time of last modification */
 	struct timespec	va_ctime;	/* time file changed */
 	u_long		va_gen;		/* generation number of file */
 	u_long		va_flags;	/* flags defined for file */
 	dev_t		va_rdev;	/* device the special file represents */
 	u_quad_t	va_bytes;	/* bytes of disk space held by file */
 	u_quad_t	va_filerev;	/* file modification number */
 	u_int		va_vaflags;	/* operations flags, see below */
 	long		va_spare;	/* remain quad aligned */
 };
 
 /*
  * Flags for va_vaflags.
  */
 #define	VA_UTIMES_NULL	0x01		/* utimes argument was NULL */
 #define VA_EXCLUSIVE	0x02		/* exclusive create request */
 
 /*
  * Flags for ioflag.
  */
 #define	IO_UNIT		0x01		/* do I/O as atomic unit */
 #define	IO_APPEND	0x02		/* append write to end */
 #define	IO_SYNC		0x04		/* do I/O synchronously */
 #define	IO_NODELOCKED	0x08		/* underlying node already locked */
 #define	IO_NDELAY	0x10		/* FNDELAY flag set in file table */
 #define	IO_VMIO		0x20		/* data already in VMIO space */
 #define	IO_INVAL	0x40		/* invalidate after I/O */
 
 /*
  *  Modes.  Some values same as Ixxx entries from inode.h for now.
  */
 #define	VSUID	04000		/* set user id on execution */
 #define	VSGID	02000		/* set group id on execution */
 #define	VSVTX	01000		/* save swapped text even after use */
 #define	VREAD	00400		/* read, write, execute permissions */
 #define	VWRITE	00200
 #define	VEXEC	00100
 
 /*
  * Token indicating no attribute value yet assigned.
  */
 #define	VNOVAL	(-1)
 
 #ifdef KERNEL
 
 #ifdef MALLOC_DECLARE
 MALLOC_DECLARE(M_VNODE);
 #endif
 
 /*
  * Convert between vnode types and inode formats (since POSIX.1
  * defines mode word of stat structure in terms of inode formats).
  */
 extern enum vtype	iftovt_tab[];
 extern int		vttoif_tab[];
 #define IFTOVT(mode)	(iftovt_tab[((mode) & S_IFMT) >> 12])
 #define VTTOIF(indx)	(vttoif_tab[(int)(indx)])
 #define MAKEIMODE(indx, mode)	(int)(VTTOIF(indx) | (mode))
 
 /*
  * Flags to various vnode functions.
  */
 #define	SKIPSYSTEM	0x0001		/* vflush: skip vnodes marked VSYSTEM */
 #define	FORCECLOSE	0x0002		/* vflush: force file closure */
 #define	WRITECLOSE	0x0004		/* vflush: only close writable files */
 #define	DOCLOSE		0x0008		/* vclean: close active files */
 #define	V_SAVE		0x0001		/* vinvalbuf: sync file first */
 #define	V_SAVEMETA	0x0002		/* vinvalbuf: leave indirect blocks */
 #define	REVOKEALL	0x0001		/* vop_revoke: revoke all aliases */
 
 #define	VREF(vp)	vref(vp)
 
 
 #ifdef DIAGNOSTIC
 #define	VATTR_NULL(vap)	vattr_null(vap)
 #else
 #define	VATTR_NULL(vap)	(*(vap) = va_null)	/* initialize a vattr */
 #endif /* DIAGNOSTIC */
 
 #define	NULLVP	((struct vnode *)NULL)
 
 #ifdef VFS_LKM
 #define	VNODEOP_SET(f) DATA_SET(MODVNOPS,f)
 #else
 #define	VNODEOP_SET(f) DATA_SET(vfs_opv_descs_,f)
 #endif
 
 /*
  * Global vnode data.
  */
 extern	struct vnode *rootvnode;	/* root (i.e. "/") vnode */
 extern	int desiredvnodes;		/* number of vnodes desired */
 extern	time_t syncdelay;		/* time to delay syncing vnodes */
 extern	int rushjob;		/* # of slots filesys_syncer should run ASAP */
 extern	struct vm_zone *namei_zone;
 extern	int prtactive;			/* nonzero to call vprint() */
 extern	struct vattr va_null;		/* predefined null vattr structure */
 extern	int vfs_ioopt;
 
 /*
  * Macro/function to check for client cache inconsistency w.r.t. leasing.
  */
 #define	LEASE_READ	0x1		/* Check lease for readers */
 #define	LEASE_WRITE	0x2		/* Check lease for modifiers */
 
 
 extern void	(*lease_updatetime) __P((int deltat));
 
 #define VSHOULDFREE(vp)	\
 	(!((vp)->v_flag & (VFREE|VDOOMED)) && \
 	 !(vp)->v_holdcnt && !(vp)->v_usecount && \
 	 (!(vp)->v_object || \
 	  !((vp)->v_object->ref_count || (vp)->v_object->resident_page_count)))
 
 #define VSHOULDBUSY(vp)	\
 	(((vp)->v_flag & (VFREE|VTBFREE)) && \
 	 ((vp)->v_holdcnt || (vp)->v_usecount))
 
 #endif /* KERNEL */
 
 
 /*
  * Mods for extensibility.
  */
 
 /*
  * Flags for vdesc_flags:
  */
 #define VDESC_MAX_VPS		16
 /* Low order 16 flag bits are reserved for willrele flags for vp arguments. */
 #define VDESC_VP0_WILLRELE	0x0001
 #define VDESC_VP1_WILLRELE	0x0002
 #define VDESC_VP2_WILLRELE	0x0004
 #define VDESC_VP3_WILLRELE	0x0008
 #define VDESC_NOMAP_VPP		0x0100
 #define VDESC_VPP_WILLRELE	0x0200
 
 /*
  * VDESC_NO_OFFSET is used to identify the end of the offset list
  * and in places where no such field exists.
  */
 #define VDESC_NO_OFFSET -1
 
 /*
  * This structure describes the vnode operation taking place.
  */
 struct vnodeop_desc {
 	int	vdesc_offset;		/* offset in vector--first for speed */
 	char    *vdesc_name;		/* a readable name for debugging */
 	int	vdesc_flags;		/* VDESC_* flags */
 
 	/*
 	 * These ops are used by bypass routines to map and locate arguments.
 	 * Creds and procs are not needed in bypass routines, but sometimes
 	 * they are useful to (for example) transport layers.
 	 * Nameidata is useful because it has a cred in it.
 	 */
 	int	*vdesc_vp_offsets;	/* list ended by VDESC_NO_OFFSET */
 	int	vdesc_vpp_offset;	/* return vpp location */
 	int	vdesc_cred_offset;	/* cred location, if any */
 	int	vdesc_proc_offset;	/* proc location, if any */
 	int	vdesc_componentname_offset; /* if any */
 	/*
 	 * Finally, we've got a list of private data (about each operation)
 	 * for each transport layer.  (Support to manage this list is not
 	 * yet part of BSD.)
 	 */
 	caddr_t	*vdesc_transports;
 };
 
 #ifdef KERNEL
 /*
  * A list of all the operation descs.
  */
 extern struct vnodeop_desc *vnodeop_descs[];
 
 /*
  * Interlock for scanning list of vnodes attached to a mountpoint
  */
 extern struct simplelock mntvnode_slock;
 
 /*
  * This macro is very helpful in defining those offsets in the vdesc struct.
  *
  * This is stolen from X11R4.  I ignored all the fancy stuff for
  * Crays, so if you decide to port this to such a serious machine,
  * you might want to consult Intrinsic.h's XtOffset{,Of,To}.
  */
 #define VOPARG_OFFSET(p_type,field) \
         ((int) (((char *) (&(((p_type)NULL)->field))) - ((char *) NULL)))
 #define VOPARG_OFFSETOF(s_type,field) \
 	VOPARG_OFFSET(s_type*,field)
 #define VOPARG_OFFSETTO(S_TYPE,S_OFFSET,STRUCT_P) \
 	((S_TYPE)(((char*)(STRUCT_P))+(S_OFFSET)))
 
 
 /*
  * This structure is used to configure the new vnodeops vector.
  */
 struct vnodeopv_entry_desc {
 	struct vnodeop_desc *opve_op;   /* which operation this is */
 	vop_t *opve_impl;		/* code implementing this operation */
 };
 struct vnodeopv_desc {
 			/* ptr to the ptr to the vector where op should go */
 	vop_t ***opv_desc_vector_p;
 	struct vnodeopv_entry_desc *opv_desc_ops;   /* null terminated list */
 };
 
 /*
  * A generic structure.
  * This can be used by bypass routines to identify generic arguments.
  */
 struct vop_generic_args {
 	struct vnodeop_desc *a_desc;
 	/* other random data follows, presumably */
 };
 
 
 #ifdef DEBUG_VFS_LOCKS
 /*
  * Macros to aid in tracing VFS locking problems.  Not totally
  * reliable since if the process sleeps between changing the lock
  * state and checking it with the assert, some other process could
  * change the state.  They are good enough for debugging a single
  * filesystem using a single-threaded test.  I find that 'cvs co src'
  * is a pretty good test.
  */
 
 /*
  * [dfr] Kludge until I get around to fixing all the vfs locking.
  */
 #define IS_LOCKING_VFS(vp)	((vp)->v_tag == VT_UFS		\
 				 || (vp)->v_tag == VT_MFS	\
 				 || (vp)->v_tag == VT_NFS	\
 				 || (vp)->v_tag == VT_LFS	\
 				 || (vp)->v_tag == VT_ISOFS	\
 				 || (vp)->v_tag == VT_MSDOSFS	\
 				 || (vp)->v_tag == VT_DEVFS)
 
 #define ASSERT_VOP_LOCKED(vp, str)				\
     if ((vp) && IS_LOCKING_VFS(vp) && !VOP_ISLOCKED(vp)) {	\
 	panic("%s: %x is not locked but should be", str, vp);	\
     }
 
 #define ASSERT_VOP_UNLOCKED(vp, str)				\
     if ((vp) && IS_LOCKING_VFS(vp) && VOP_ISLOCKED(vp)) {	\
 	panic("%s: %x is locked but shouldn't be", str, vp);	\
     }
 
 #else
 
 #define ASSERT_VOP_LOCKED(vp, str)
 #define ASSERT_VOP_UNLOCKED(vp, str)
 
 #endif
 
 /*
  * VOCALL calls an op given an ops vector.  We break it out because BSD's
  * vclean changes the ops vector and then wants to call ops with the old
  * vector.
  */
 #define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))
 
 /*
  * This call works for vnodes in the kernel.
  */
 #define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))
 #define VDESC(OP) (& __CONCAT(OP,_desc))
 #define VOFFSET(OP) (VDESC(OP)->vdesc_offset)
 
 /*
  * Finally, include the default set of vnode operations.
  */
 #include "vnode_if.h"
 
 /*
  * Public vnode manipulation functions.
  */
 struct componentname;
 struct file;
 struct mount;
 struct nameidata;
 struct ostat;
 struct proc;
 struct stat;
 struct ucred;
 struct uio;
 struct vattr;
 struct vnode;
 struct vop_bwrite_args;
 
 extern int	(*lease_check_hook) __P((struct vop_lease_args *));
 
 int 	bdevvp __P((dev_t dev, struct vnode **vpp));
 /* cache_* may belong in namei.h. */
 void	cache_enter __P((struct vnode *dvp, struct vnode *vp,
 	    struct componentname *cnp));
 int	cache_lookup __P((struct vnode *dvp, struct vnode **vpp,
 	    struct componentname *cnp));
 void	cache_purge __P((struct vnode *vp));
 void	cache_purgevfs __P((struct mount *mp));
 void	cvtstat __P((struct stat *st, struct ostat *ost));
 int 	getnewvnode __P((enum vtagtype tag,
 	    struct mount *mp, vop_t **vops, struct vnode **vpp));
 int	lease_check __P((struct vop_lease_args *ap));
 void 	vattr_null __P((struct vattr *vap));
 int 	vcount __P((struct vnode *vp));
 void	vdrop __P((struct vnode *));
 int	vfinddev __P((dev_t dev, enum vtype type, struct vnode **vpp));
 void	vfs_opv_init __P((struct vnodeopv_desc **them));
 int	vflush __P((struct mount *mp, struct vnode *skipvp, int flags));
 int 	vget __P((struct vnode *vp, int lockflag, struct proc *p));
 void 	vgone __P((struct vnode *vp));
 void	vhold __P((struct vnode *));
 int	vinvalbuf __P((struct vnode *vp, int save, struct ucred *cred,
 
 	    struct proc *p, int slpflag, int slptimeo));
 int	vtruncbuf __P((struct vnode *vp, struct ucred *cred, struct proc *p,
 		off_t length, int blksize));
 void	vprint __P((char *label, struct vnode *vp));
 int	vrecycle __P((struct vnode *vp, struct simplelock *inter_lkp,
 	    struct proc *p));
 int 	vn_close __P((struct vnode *vp,
 	    int flags, struct ucred *cred, struct proc *p));
 int	vn_lock __P((struct vnode *vp, int flags, struct proc *p));
 int 	vn_open __P((struct nameidata *ndp, int fmode, int cmode));
 void	vn_pollevent __P((struct vnode *vp, int events));
 void	vn_pollgone __P((struct vnode *vp));
 int	vn_pollrecord __P((struct vnode *vp, struct proc *p, int events));
 int 	vn_rdwr __P((enum uio_rw rw, struct vnode *vp, caddr_t base,
 	    int len, off_t offset, enum uio_seg segflg, int ioflg,
 	    struct ucred *cred, int *aresid, struct proc *p));
 int	vn_stat __P((struct vnode *vp, struct stat *sb, struct proc *p));
 void	vn_syncer_add_to_worklist __P((struct vnode *vp, int delay));
 int	vfs_cache_lookup __P((struct vop_lookup_args *ap));
 int	vfs_object_create __P((struct vnode *vp, struct proc *p,
                 struct ucred *cred, int waslocked));
 int 	vn_writechk __P((struct vnode *vp));
 int	vop_stdbwrite __P((struct vop_bwrite_args *ap));
 int	vop_stdislocked __P((struct vop_islocked_args *));
 int	vop_stdlock __P((struct vop_lock_args *));
 int	vop_stdunlock __P((struct vop_unlock_args *));
 int	vop_noislocked __P((struct vop_islocked_args *));
 int	vop_nolock __P((struct vop_lock_args *));
 int	vop_nopoll __P((struct vop_poll_args *));
 int	vop_nounlock __P((struct vop_unlock_args *));
 int	vop_stdpathconf __P((struct vop_pathconf_args *));
 int	vop_stdpoll __P((struct vop_poll_args *));
 int	vop_revoke __P((struct vop_revoke_args *));
 int	vop_sharedlock __P((struct vop_lock_args *));
 int	vop_eopnotsupp __P((struct vop_generic_args *ap));
 int	vop_ebadf __P((struct vop_generic_args *ap));
 int	vop_einval __P((struct vop_generic_args *ap));
 int	vop_enotty __P((struct vop_generic_args *ap));
 int	vop_defaultop __P((struct vop_generic_args *ap));
 int	vop_null __P((struct vop_generic_args *ap));
 
 struct vnode *
 	checkalias __P((struct vnode *vp, dev_t nvp_rdev, struct mount *mp));
 void 	vput __P((struct vnode *vp));
 void 	vrele __P((struct vnode *vp));
 void	vref __P((struct vnode *vp));
 void	vbusy __P((struct vnode *vp));
 
 extern	vop_t	**default_vnodeop_p;
 
 extern TAILQ_HEAD(tobefreelist, vnode)
 	vnode_tobefree_list;	/* vnode free list */
 
 #endif /* KERNEL */
 
 #endif /* !_SYS_VNODE_H_ */
diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c
index 7d7de141dfbe..36537180788b 100644
--- a/sys/ufs/ffs/ffs_subr.c
+++ b/sys/ufs/ffs/ffs_subr.c
@@ -1,272 +1,274 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_subr.c	8.5 (Berkeley) 3/21/95
- * $Id: ffs_subr.c,v 1.19 1998/02/13 00:20:36 bde Exp $
+ * $Id: ffs_subr.c,v 1.20 1998/03/08 09:58:59 julian Exp $
  */
 
 #include <sys/param.h>
 #include <ufs/ffs/fs.h>
 
 #ifndef KERNEL
 #include <ufs/ufs/dinode.h>
 #else
 #include "opt_ddb.h"
 
 #include <sys/systm.h>
 #include <sys/lock.h>
 #include <sys/vnode.h>
 #include <sys/buf.h>
+#include <sys/ucred.h>
+
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ffs/ffs_extern.h>
 
 #ifdef DDB
 static void	ffs_checkoverlap __P((struct buf *, struct inode *));
 #endif
 
 /*
  * Return buffer with the contents of block "offset" from the beginning of
  * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
  * remaining space in the directory.
  */
 int
 ffs_blkatoff(vp, offset, res, bpp)
 	struct vnode *vp;
 	off_t offset;
 	char **res;
 	struct buf **bpp;
 {
 	struct inode *ip;
 	register struct fs *fs;
 	struct buf *bp;
 	ufs_daddr_t lbn;
 	int bsize, error;
 
 	ip = VTOI(vp);
 	fs = ip->i_fs;
 	lbn = lblkno(fs, offset);
 	bsize = blksize(fs, ip, lbn);
 
 	*bpp = NULL;
 	error = bread(vp, lbn, bsize, NOCRED, &bp);
 	if (error) {
 		brelse(bp);
 		return (error);
 	}
 	if (res)
 		*res = (char *)bp->b_data + blkoff(fs, offset);
 	*bpp = bp;
 	return (0);
 }
 #endif
 
 /*
  * Update the frsum fields to reflect addition or deletion
  * of some frags.
  */
 void
 ffs_fragacct(fs, fragmap, fraglist, cnt)
 	struct fs *fs;
 	int fragmap;
 	int32_t fraglist[];
 	int cnt;
 {
 	int inblk;
 	register int field, subfield;
 	register int siz, pos;
 
 	inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1;
 	fragmap <<= 1;
 	for (siz = 1; siz < fs->fs_frag; siz++) {
 		if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0)
 			continue;
 		field = around[siz];
 		subfield = inside[siz];
 		for (pos = siz; pos <= fs->fs_frag; pos++) {
 			if ((fragmap & field) == subfield) {
 				fraglist[siz] += cnt;
 				pos += siz;
 				field <<= siz;
 				subfield <<= siz;
 			}
 			field <<= 1;
 			subfield <<= 1;
 		}
 	}
 }
 
 #ifdef DDB
 static void
 ffs_checkoverlap(bp, ip)
 	struct buf *bp;
 	struct inode *ip;
 {
 	register struct buf *ebp, *ep;
 	register ufs_daddr_t start, last;
 	struct vnode *vp;
 
 	ebp = &buf[nbuf];
 	start = bp->b_blkno;
 	last = start + btodb(bp->b_bcount) - 1;
 	for (ep = buf; ep < ebp; ep++) {
 		if (ep == bp || (ep->b_flags & B_INVAL) ||
 		    ep->b_vp == NULLVP)
 			continue;
 		if (VOP_BMAP(ep->b_vp, (ufs_daddr_t)0, &vp, (ufs_daddr_t)0,
 		    NULL, NULL))
 			continue;
 		if (vp != ip->i_devvp)
 			continue;
 		/* look for overlap */
 		if (ep->b_bcount == 0 || ep->b_blkno > last ||
 		    ep->b_blkno + btodb(ep->b_bcount) <= start)
 			continue;
 		vprint("Disk overlap", vp);
 		(void)printf("\tstart %lu, end %lu overlap start %lu, end %lu\n",
 			(u_long)start, (u_long)last, (u_long)ep->b_blkno,
 			(u_long)(ep->b_blkno + btodb(ep->b_bcount) - 1));
 		panic("ffs_checkoverlap: Disk buffer overlap");
 	}
 }
 #endif /* DDB */
 
 /*
  * block operations
  *
  * check if a block is available
  */
 int
 ffs_isblock(fs, cp, h)
 	struct fs *fs;
 	unsigned char *cp;
 	ufs_daddr_t h;
 {
 	unsigned char mask;
 
 	switch ((int)fs->fs_frag) {
 	case 8:
 		return (cp[h] == 0xff);
 	case 4:
 		mask = 0x0f << ((h & 0x1) << 2);
 		return ((cp[h >> 1] & mask) == mask);
 	case 2:
 		mask = 0x03 << ((h & 0x3) << 1);
 		return ((cp[h >> 2] & mask) == mask);
 	case 1:
 		mask = 0x01 << (h & 0x7);
 		return ((cp[h >> 3] & mask) == mask);
 	default:
 		panic("ffs_isblock");
 	}
 }
 
 /*
  * check if a block is free
  */
 int
 ffs_isfreeblock(fs, cp, h)
 	struct fs *fs;
 	unsigned char *cp;
 	ufs_daddr_t h;
 {
 
 	switch ((int)fs->fs_frag) {
 	case 8:
 		return (cp[h] == 0);
 	case 4:
 		return ((cp[h >> 1] & (0x0f << ((h & 0x1) << 2))) == 0);
 	case 2:
 		return ((cp[h >> 2] & (0x03 << ((h & 0x3) << 1))) == 0);
 	case 1:
 		return ((cp[h >> 3] & (0x01 << (h & 0x7))) == 0);
 	default:
 		panic("ffs_isfreeblock");
 	}
 }
 
 /*
  * take a block out of the map
  */
 void
 ffs_clrblock(fs, cp, h)
 	struct fs *fs;
 	u_char *cp;
 	ufs_daddr_t h;
 {
 
 	switch ((int)fs->fs_frag) {
 	case 8:
 		cp[h] = 0;
 		return;
 	case 4:
 		cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
 		return;
 	case 2:
 		cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
 		return;
 	case 1:
 		cp[h >> 3] &= ~(0x01 << (h & 0x7));
 		return;
 	default:
 		panic("ffs_clrblock");
 	}
 }
 
 /*
  * put a block into the map
  */
 void
 ffs_setblock(fs, cp, h)
 	struct fs *fs;
 	unsigned char *cp;
 	ufs_daddr_t h;
 {
 
 	switch ((int)fs->fs_frag) {
 
 	case 8:
 		cp[h] = 0xff;
 		return;
 	case 4:
 		cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
 		return;
 	case 2:
 		cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
 		return;
 	case 1:
 		cp[h >> 3] |= (0x01 << (h & 0x7));
 		return;
 	default:
 		panic("ffs_setblock");
 	}
 }
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index 0f7f2105b77e..9e42e63d5647 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -1,261 +1,263 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)ffs_vnops.c	8.15 (Berkeley) 5/14/95
- * $Id: ffs_vnops.c,v 1.45 1998/03/19 22:49:44 dyson Exp $
+ * $Id: ffs_vnops.c,v 1.46 1998/03/21 05:16:09 dyson Exp $
  */
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/resourcevar.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/stat.h>
 #include <sys/buf.h>
 #include <sys/proc.h>
 #include <sys/mount.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 
+#include <machine/limits.h>
+
 #include <vm/vm.h>
 #include <vm/vm_prot.h>
 #include <vm/vm_page.h>
 #include <vm/vm_object.h>
 #include <vm/vm_extern.h>
 
 #include <ufs/ufs/quota.h>
 #include <ufs/ufs/inode.h>
 #include <ufs/ufs/ufsmount.h>
 #include <ufs/ufs/ufs_extern.h>
 
 #include <ufs/ffs/fs.h>
 #include <ufs/ffs/ffs_extern.h>
 
 static int	ffs_fsync __P((struct vop_fsync_args *));
 static int	ffs_getpages __P((struct vop_getpages_args *));
 static int	ffs_putpages __P((struct vop_putpages_args *));
 static int	ffs_read __P((struct vop_read_args *));
 static int	ffs_write __P((struct vop_write_args *));
 
 /* Global vfs data structures for ufs. */
 vop_t **ffs_vnodeop_p;
 static struct vnodeopv_entry_desc ffs_vnodeop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) ufs_vnoperate },
 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
 	{ &vop_getpages_desc,		(vop_t *) ffs_getpages },
 	{ &vop_putpages_desc,		(vop_t *) ffs_putpages },
 	{ &vop_read_desc,		(vop_t *) ffs_read },
 	{ &vop_balloc_desc,		(vop_t *) ffs_balloc },
 	{ &vop_reallocblks_desc,	(vop_t *) ffs_reallocblks },
 	{ &vop_write_desc,		(vop_t *) ffs_write },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ffs_vnodeop_opv_desc =
 	{ &ffs_vnodeop_p, ffs_vnodeop_entries };
 
 vop_t **ffs_specop_p;
 static struct vnodeopv_entry_desc ffs_specop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) ufs_vnoperatespec },
 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ffs_specop_opv_desc =
 	{ &ffs_specop_p, ffs_specop_entries };
 
 vop_t **ffs_fifoop_p;
 static struct vnodeopv_entry_desc ffs_fifoop_entries[] = {
 	{ &vop_default_desc,		(vop_t *) ufs_vnoperatefifo },
 	{ &vop_fsync_desc,		(vop_t *) ffs_fsync },
 	{ NULL, NULL }
 };
 static struct vnodeopv_desc ffs_fifoop_opv_desc =
 	{ &ffs_fifoop_p, ffs_fifoop_entries };
 
 VNODEOP_SET(ffs_vnodeop_opv_desc);
 VNODEOP_SET(ffs_specop_opv_desc);
 VNODEOP_SET(ffs_fifoop_opv_desc);
 
 SYSCTL_NODE(_vfs, MOUNT_UFS, ffs, CTLFLAG_RW, 0, "FFS filesystem");
 
 #include <ufs/ufs/ufs_readwrite.c>
 
 /*
  * Synch an open file.
  */
 /* ARGSUSED */
 static int
 ffs_fsync(ap)
 	struct vop_fsync_args /* {
 		struct vnode *a_vp;
 		struct ucred *a_cred;
 		int a_waitfor;
 		struct proc *a_p;
 	} */ *ap;
 {
 	struct vnode *vp = ap->a_vp;
 	struct buf *bp;
 	struct timeval tv;
 	struct buf *nbp;
 	int s, error, passes, skipmeta;
 	daddr_t lbn;
 
 
 	if (vp->v_type == VBLK) {
 		lbn = INT_MAX;
 	} else {
 		struct inode *ip;
 		ip = VTOI(vp);
 		lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1));
 	}
 
 	/*
 	 * Flush all dirty buffers associated with a vnode.
 	 */
 	passes = NIADDR;
 	skipmeta = 0;
 	if (ap->a_waitfor == MNT_WAIT)
 		skipmeta = 1;
 loop:
 	s = splbio();
 loop2:
 	for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
 		nbp = bp->b_vnbufs.le_next;
 		/* 
 		 * First time through on a synchronous call,
 		 * or if it's already scheduled, skip to the next 
 		 * buffer
 		 */
 		if ((bp->b_flags & B_BUSY) ||
 		    ((skipmeta == 1) && (bp->b_lblkno < 0)))
 			continue;
 		if ((bp->b_flags & B_DELWRI) == 0)
 			panic("ffs_fsync: not dirty");
 		/*
 		 * If data is outstanding to another vnode, or we were
 		 * asked to wait for everything, or it's not a file or BDEV,
 		 * start the IO on this buffer immediatly.
 		 */
 		if (((bp->b_vp != vp) || (ap->a_waitfor == MNT_WAIT)) ||
 		    ((vp->v_type != VREG) && (vp->v_type != VBLK))) {
 
 			/*
 			 * Wait for I/O associated with indirect blocks to
 			 * complete, since there is no way to quickly wait
 			 * for them below.
 			 */
 			if ((bp->b_vp == vp) || (ap->a_waitfor != MNT_WAIT)) {
 				if (bp->b_flags & B_CLUSTEROK) {
 					(void) vfs_bio_awrite(bp);
 					splx(s);
 				} else {
 					bremfree(bp);
 					bp->b_flags |= B_BUSY;
 					splx(s);
 					(void) bawrite(bp);
 				}
 			} else {
 				bremfree(bp);
 				bp->b_flags |= B_BUSY;
 				splx(s);
 				(void) bwrite(bp);
 			}
 		} else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) {
 			/* 
 			 * If the buffer is for data that has been truncated
 			 * off the file, then throw it away.
 			 */
 			bremfree(bp);
 			bp->b_flags |= B_BUSY | B_INVAL | B_NOCACHE;
 			brelse(bp);
 			splx(s);
 		} else {
 			vfs_bio_awrite(bp);
 			splx(s);
 		}
 		goto loop;
 	}
 	/*
 	 * If we were asked to do this synchronously, then go back for
 	 * another pass, this time doing the metadata.
 	 */
 	if (skipmeta) {
 		skipmeta = 0;
 		goto loop2; /* stay within the splbio() */
 	}
 	splx(s);
 
 	if (ap->a_waitfor == MNT_WAIT) {
 
 		s = splbio();
 		if (!DOINGSOFTDEP(vp)) {
 			while (vp->v_numoutput) {
 				vp->v_flag |= VBWAIT;
 				(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 4, "ffsfsn", 0);
 			}
 		} else {
 			/* 
 			 * Ensure that any filesystem metatdata associated
 			 * with the vnode has been written.
 			 */
 			if ((error = softdep_sync_metadata(ap)) != 0) {
 				splx(s);
 				return (error);
 			}
 		}
 
 		if (vp->v_dirtyblkhd.lh_first) {
 			/*
 			 * Block devices associated with filesystems may
 			 * have new I/O requests posted for them even if
 			 * the vnode is locked, so no amount of trying will
 			 * get them clean. Thus we give block devices a
 			 * good effort, then just give up. For all other file
 			 * types, go around and try again until it is clean.
 			 */
 			if (passes > 0) {
 				passes -= 1;
 				goto loop2;
 			}
 #ifdef DIAGNOSTIC
 			if (vp->v_type != VBLK)
 				vprint("ffs_fsync: dirty", vp);
 #endif
 		}
 	}
 	gettime(&tv);
 	error = UFS_UPDATE(ap->a_vp, &tv, &tv, (ap->a_waitfor == MNT_WAIT));
 	if (error)
 		return (error);
 	if (DOINGSOFTDEP(vp) && ap->a_waitfor == MNT_WAIT)
 		error = softdep_fsync(vp);
 	return (error);
 }
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index edfaf9b50951..08ab10499df5 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -1,216 +1,217 @@
 /*
  * Copyright (c) 1982, 1986, 1989, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)vm_meter.c	8.4 (Berkeley) 1/4/94
- * $Id: vm_meter.c,v 1.22 1997/09/01 03:17:19 bde Exp $
+ * $Id: vm_meter.c,v 1.23 1997/11/24 15:15:33 bde Exp $
  */
 
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
+#include <sys/resource.h>
 #include <sys/vmmeter.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/vm_param.h>
 #include <sys/lock.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_object.h>
 #include <sys/sysctl.h>
 
 struct loadavg averunnable =
 	{ {0, 0, 0}, FSCALE };	/* load average, of runnable procs */
 
 struct vmmeter cnt;
 
 static int maxslp = MAXSLP;
 
 /*
  * Constants for averages over 1, 5, and 15 minutes
  * when sampling at 5 second intervals.
  */
 static fixpt_t cexp[3] = {
 	0.9200444146293232 * FSCALE,	/* exp(-1/12) */
 	0.9834714538216174 * FSCALE,	/* exp(-1/60) */
 	0.9944598480048967 * FSCALE,	/* exp(-1/180) */
 };
 
 /*
  * Compute a tenex style load average of a quantity on
  * 1, 5 and 15 minute intervals.
  */
 static void
 loadav(struct loadavg *avg)
 {
 	register int i, nrun;
 	register struct proc *p;
 
 	for (nrun = 0, p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 		switch (p->p_stat) {
 		case SSLEEP:
 			if (p->p_priority > PZERO || p->p_slptime != 0)
 				continue;
 			/* fall through */
 		case SRUN:
 		case SIDL:
 			nrun++;
 		}
 	}
 	for (i = 0; i < 3; i++)
 		avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
 		    nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
 }
 
 void
 vmmeter()
 {
 
 	if (time.tv_sec % 5 == 0)
 		loadav(&averunnable);
 	if (proc0.p_slptime > maxslp / 2)
 		wakeup(&proc0);
 }
 
 SYSCTL_INT(_vm, VM_V_FREE_MIN, v_free_min,
 	CTLFLAG_RW, &cnt.v_free_min, 0, "");
 SYSCTL_INT(_vm, VM_V_FREE_TARGET, v_free_target,
 	CTLFLAG_RW, &cnt.v_free_target, 0, "");
 SYSCTL_INT(_vm, VM_V_FREE_RESERVED, v_free_reserved,
 	CTLFLAG_RW, &cnt.v_free_reserved, 0, "");
 SYSCTL_INT(_vm, VM_V_INACTIVE_TARGET, v_inactive_target,
 	CTLFLAG_RW, &cnt.v_inactive_target, 0, "");
 SYSCTL_INT(_vm, VM_V_CACHE_MIN, v_cache_min,
 	CTLFLAG_RW, &cnt.v_cache_min, 0, "");
 SYSCTL_INT(_vm, VM_V_CACHE_MAX, v_cache_max,
 	CTLFLAG_RW, &cnt.v_cache_max, 0, "");
 SYSCTL_INT(_vm, VM_V_PAGEOUT_FREE_MIN, v_pageout_free_min,
 	CTLFLAG_RW, &cnt.v_pageout_free_min, 0, "");
 
 SYSCTL_STRUCT(_vm, VM_LOADAVG, loadavg, CTLFLAG_RD, &averunnable, loadavg, "");
 
 static int
 vmtotal SYSCTL_HANDLER_ARGS
 {
 	struct proc *p;
 	struct vmtotal total, *totalp;
 	vm_map_entry_t entry;
 	vm_object_t object;
 	vm_map_t map;
 	int paging;
 
 	totalp = &total;
 	bzero(totalp, sizeof *totalp);
 	/*
 	 * Mark all objects as inactive.
 	 */
 	for (object = TAILQ_FIRST(&vm_object_list);
 	    object != NULL;
 	    object = TAILQ_NEXT(object,object_list))
 		object->flags &= ~OBJ_ACTIVE;
 	/*
 	 * Calculate process statistics.
 	 */
 	for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
 		if (p->p_flag & P_SYSTEM)
 			continue;
 		switch (p->p_stat) {
 		case 0:
 			continue;
 
 		case SSLEEP:
 		case SSTOP:
 			if (p->p_flag & P_INMEM) {
 				if (p->p_priority <= PZERO)
 					totalp->t_dw++;
 				else if (p->p_slptime < maxslp)
 					totalp->t_sl++;
 			} else if (p->p_slptime < maxslp)
 				totalp->t_sw++;
 			if (p->p_slptime >= maxslp)
 				continue;
 			break;
 
 		case SRUN:
 		case SIDL:
 			if (p->p_flag & P_INMEM)
 				totalp->t_rq++;
 			else
 				totalp->t_sw++;
 			if (p->p_stat == SIDL)
 				continue;
 			break;
 		}
 		/*
 		 * Note active objects.
 		 */
 		paging = 0;
 		for (map = &p->p_vmspace->vm_map, entry = map->header.next;
 		    entry != &map->header; entry = entry->next) {
 			if ((entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) ||
 			    entry->object.vm_object == NULL)
 				continue;
 			entry->object.vm_object->flags |= OBJ_ACTIVE;
 			paging |= entry->object.vm_object->paging_in_progress;
 		}
 		if (paging)
 			totalp->t_pw++;
 	}
 	/*
 	 * Calculate object memory usage statistics.
 	 */
 	for (object = TAILQ_FIRST(&vm_object_list);
 	    object != NULL;
 	    object = TAILQ_NEXT(object, object_list)) {
 		totalp->t_vm += object->size;
 		totalp->t_rm += object->resident_page_count;
 		if (object->flags & OBJ_ACTIVE) {
 			totalp->t_avm += object->size;
 			totalp->t_arm += object->resident_page_count;
 		}
 		if (object->shadow_count > 1) {
 			/* shared object */
 			totalp->t_vmshr += object->size;
 			totalp->t_rmshr += object->resident_page_count;
 			if (object->flags & OBJ_ACTIVE) {
 				totalp->t_avmshr += object->size;
 				totalp->t_armshr += object->resident_page_count;
 			}
 		}
 	}
 	totalp->t_free = cnt.v_free_count + cnt.v_cache_count;
 	return (sysctl_handle_opaque(oidp, totalp, sizeof total, req));
 }
 
 SYSCTL_PROC(_vm, VM_METER, vmmeter, CTLTYPE_OPAQUE|CTLFLAG_RD,
 	0, sizeof(struct vmtotal), vmtotal, "S,vmtotal", "");