Index: head/sys/compat/linprocfs/linprocfs.c
===================================================================
--- head/sys/compat/linprocfs/linprocfs.c	(revision 367361)
+++ head/sys/compat/linprocfs/linprocfs.c	(revision 367362)
@@ -1,1990 +1,1986 @@
 /*-
  * SPDX-License-Identifier: BSD-4-Clause
  *
  * Copyright (c) 2000 Dag-Erling Coïdan Smørgrav
  * Copyright (c) 1999 Pierre Beyssac
  * Copyright (c) 1993 Jan-Simon Pendry
  * Copyright (c) 1993
  *	The Regents of the University of California.  All rights reserved.
  *
  * This code is derived from software contributed to Berkeley by
  * Jan-Simon Pendry.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. All advertising materials mentioning features or use of this software
  *    must display the following acknowledgement:
  *	This product includes software developed by the University of
  *	California, Berkeley and its contributors.
  * 4. Neither the name of the University nor the names of its contributors
  *    may be used to endorse or promote products derived from this software
  *    without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
  *	@(#)procfs_status.c	8.4 (Berkeley) 6/15/94
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/queue.h>
 #include <sys/blist.h>
 #include <sys/conf.h>
 #include <sys/exec.h>
 #include <sys/fcntl.h>
 #include <sys/filedesc.h>
 #include <sys/jail.h>
 #include <sys/kernel.h>
 #include <sys/limits.h>
 #include <sys/linker.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/msg.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/ptrace.h>
 #include <sys/resourcevar.h>
 #include <sys/resource.h>
 #include <sys/sbuf.h>
 #include <sys/sem.h>
 #include <sys/shm.h>
 #include <sys/smp.h>
 #include <sys/socket.h>
 #include <sys/syscallsubr.h>
 #include <sys/sysctl.h>
 #include <sys/sysent.h>
 #include <sys/systm.h>
 #include <sys/time.h>
 #include <sys/tty.h>
 #include <sys/user.h>
 #include <sys/uuid.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #include <sys/bus.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
 #include <net/if_types.h>
 
 #include <vm/vm.h>
 #include <vm/vm_extern.h>
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <vm/vm_param.h>
 #include <vm/vm_object.h>
 #include <vm/swap_pager.h>
 
 #include <machine/clock.h>
 
 #include <geom/geom.h>
 #include <geom/geom_int.h>
 
 #if defined(__i386__) || defined(__amd64__)
 #include <machine/cputypes.h>
 #include <machine/md_var.h>
 #endif /* __i386__ || __amd64__ */
 
 #include <compat/linux/linux.h>
 #include <compat/linux/linux_mib.h>
 #include <compat/linux/linux_misc.h>
 #include <compat/linux/linux_util.h>
 #include <fs/pseudofs/pseudofs.h>
 #include <fs/procfs/procfs.h>
 
 /*
  * Various conversion macros
  */
 #define T2J(x) ((long)(((x) * 100ULL) / (stathz ? stathz : hz)))	/* ticks to jiffies */
 #define T2CS(x) ((unsigned long)(((x) * 100ULL) / (stathz ? stathz : hz)))	/* ticks to centiseconds */
 #define T2S(x) ((x) / (stathz ? stathz : hz))		/* ticks to seconds */
 #define B2K(x) ((x) >> 10)				/* bytes to kbytes */
 #define B2P(x) ((x) >> PAGE_SHIFT)			/* bytes to pages */
 #define P2B(x) ((x) << PAGE_SHIFT)			/* pages to bytes */
 #define P2K(x) ((x) << (PAGE_SHIFT - 10))		/* pages to kbytes */
 #define TV2J(x)	((x)->tv_sec * 100UL + (x)->tv_usec / 10000)
 
 /**
  * @brief Mapping of ki_stat in struct kinfo_proc to the linux state
  *
  * The linux procfs state field displays one of the characters RSDZTW to
  * denote running, sleeping in an interruptible wait, waiting in an
  * uninterruptible disk sleep, a zombie process, process is being traced
  * or stopped, or process is paging respectively.
  *
  * Our struct kinfo_proc contains the variable ki_stat which contains a
  * value out of SIDL, SRUN, SSLEEP, SSTOP, SZOMB, SWAIT and SLOCK.
  *
  * This character array is used with ki_stati-1 as an index and tries to
  * map our states to suitable linux states.
  */
 static char linux_state[] = "RRSTZDD";
 
 /*
  * Filler function for proc/meminfo
  */
 static int
 linprocfs_domeminfo(PFS_FILL_ARGS)
 {
 	unsigned long memtotal;		/* total memory in bytes */
 	unsigned long memfree;		/* free memory in bytes */
 	unsigned long cached;		/* page cache */
 	unsigned long buffers;		/* buffer cache */
 	unsigned long long swaptotal;	/* total swap space in bytes */
 	unsigned long long swapused;	/* used swap space in bytes */
 	unsigned long long swapfree;	/* free swap space in bytes */
 	size_t sz;
 	int error, i, j;
 
 	memtotal = physmem * PAGE_SIZE;
 	memfree = (unsigned long)vm_free_count() * PAGE_SIZE;
 	swap_pager_status(&i, &j);
 	swaptotal = (unsigned long long)i * PAGE_SIZE;
 	swapused = (unsigned long long)j * PAGE_SIZE;
 	swapfree = swaptotal - swapused;
 
 	/*
 	 * This value may exclude wired pages, but we have no good way of
 	 * accounting for that.
 	 */
 	cached =
 	    (vm_active_count() + vm_inactive_count() + vm_laundry_count()) *
 	    PAGE_SIZE;
 
 	sz = sizeof(buffers);
 	error = kernel_sysctlbyname(curthread, "vfs.bufspace", &buffers, &sz,
 	    NULL, 0, 0, 0);
 	if (error != 0)
 		buffers = 0;
 
 	sbuf_printf(sb,
 	    "MemTotal: %9lu kB\n"
 	    "MemFree:  %9lu kB\n"
 	    "Buffers:  %9lu kB\n"
 	    "Cached:   %9lu kB\n"
 	    "SwapTotal:%9llu kB\n"
 	    "SwapFree: %9llu kB\n",
 	    B2K(memtotal), B2K(memfree), B2K(buffers),
 	    B2K(cached), B2K(swaptotal), B2K(swapfree));
 
 	return (0);
 }
 
 #if defined(__i386__) || defined(__amd64__)
 /*
  * Filler function for proc/cpuinfo (i386 & amd64 version)
  */
 static int
 linprocfs_docpuinfo(PFS_FILL_ARGS)
 {
 	int hw_model[2];
 	char model[128];
 	uint64_t freq;
 	size_t size;
 	u_int cache_size[4];
 	int fqmhz, fqkhz;
 	int i, j;
 
 	/*
 	 * We default the flags to include all non-conflicting flags,
 	 * and the Intel versions of conflicting flags.
 	 */
 	static char *cpu_feature_names[] = {
 		/*  0 */ "fpu", "vme", "de", "pse",
 		/*  4 */ "tsc", "msr", "pae", "mce",
 		/*  8 */ "cx8", "apic", "", "sep",
 		/* 12 */ "mtrr", "pge", "mca", "cmov",
 		/* 16 */ "pat", "pse36", "pn", "clflush",
 		/* 20 */ "", "dts", "acpi", "mmx",
 		/* 24 */ "fxsr", "sse", "sse2", "ss",
 		/* 28 */ "ht", "tm", "ia64", "pbe"
 	};
 
 	static char *amd_feature_names[] = {
 		/*  0 */ "", "", "", "",
 		/*  4 */ "", "", "", "",
 		/*  8 */ "", "", "", "syscall",
 		/* 12 */ "", "", "", "",
 		/* 16 */ "", "", "", "mp",
 		/* 20 */ "nx", "", "mmxext", "",
 		/* 24 */ "", "fxsr_opt", "pdpe1gb", "rdtscp",
 		/* 28 */ "", "lm", "3dnowext", "3dnow"
 	};
 
 	static char *cpu_feature2_names[] = {
 		/*  0 */ "pni", "pclmulqdq", "dtes64", "monitor",
 		/*  4 */ "ds_cpl", "vmx", "smx", "est",
 		/*  8 */ "tm2", "ssse3", "cid", "sdbg",
 		/* 12 */ "fma", "cx16", "xtpr", "pdcm",
 		/* 16 */ "", "pcid", "dca", "sse4_1",
 		/* 20 */ "sse4_2", "x2apic", "movbe", "popcnt",
 		/* 24 */ "tsc_deadline_timer", "aes", "xsave", "",
 		/* 28 */ "avx", "f16c", "rdrand", "hypervisor"
 	};
 
 	static char *amd_feature2_names[] = {
 		/*  0 */ "lahf_lm", "cmp_legacy", "svm", "extapic",
 		/*  4 */ "cr8_legacy", "abm", "sse4a", "misalignsse",
 		/*  8 */ "3dnowprefetch", "osvw", "ibs", "xop",
 		/* 12 */ "skinit", "wdt", "", "lwp",
 		/* 16 */ "fma4", "tce", "", "nodeid_msr",
 		/* 20 */ "", "tbm", "topoext", "perfctr_core",
 		/* 24 */ "perfctr_nb", "", "bpext", "ptsc",
 		/* 28 */ "perfctr_llc", "mwaitx", "", ""
 	};
 
 	static char *cpu_stdext_feature_names[] = {
 		/*  0 */ "fsgsbase", "tsc_adjust", "", "bmi1",
 		/*  4 */ "hle", "avx2", "", "smep",
 		/*  8 */ "bmi2", "erms", "invpcid", "rtm",
 		/* 12 */ "cqm", "", "mpx", "rdt_a",
 		/* 16 */ "avx512f", "avx512dq", "rdseed", "adx",
 		/* 20 */ "smap", "avx512ifma", "", "clflushopt",
 		/* 24 */ "clwb", "intel_pt", "avx512pf", "avx512er",
 		/* 28 */ "avx512cd", "sha_ni", "avx512bw", "avx512vl"
 	};
 
 	static char *power_flags[] = {
 		"ts",           "fid",          "vid",
 		"ttp",          "tm",           "stc",
 		"100mhzsteps",  "hwpstate",     "",
 		"cpb",          "eff_freq_ro",  "proc_feedback",
 		"acc_power",
 	};
 
 	hw_model[0] = CTL_HW;
 	hw_model[1] = HW_MODEL;
 	model[0] = '\0';
 	size = sizeof(model);
 	if (kernel_sysctl(td, hw_model, 2, &model, &size, 0, 0, 0, 0) != 0)
 		strcpy(model, "unknown");
 #ifdef __i386__
 	switch (cpu_vendor_id) {
 	case CPU_VENDOR_AMD:
 		if (cpu_class < CPUCLASS_686)
 			cpu_feature_names[16] = "fcmov";
 		break;
 	case CPU_VENDOR_CYRIX:
 		cpu_feature_names[24] = "cxmmx";
 		break;
 	}
 #endif
 	if (cpu_exthigh >= 0x80000006)
 		do_cpuid(0x80000006, cache_size);
 	else
 		memset(cache_size, 0, sizeof(cache_size));
 	for (i = 0; i < mp_ncpus; ++i) {
 		fqmhz = 0;
 		fqkhz = 0;
 		freq = atomic_load_acq_64(&tsc_freq);
 		if (freq != 0) {
 			fqmhz = (freq + 4999) / 1000000;
 			fqkhz = ((freq + 4999) / 10000) % 100;
 		}
 		sbuf_printf(sb,
 		    "processor\t: %d\n"
 		    "vendor_id\t: %.20s\n"
 		    "cpu family\t: %u\n"
 		    "model\t\t: %u\n"
 		    "model name\t: %s\n"
 		    "stepping\t: %u\n"
 		    "cpu MHz\t\t: %d.%02d\n"
 		    "cache size\t: %d KB\n"
 		    "physical id\t: %d\n"
 		    "siblings\t: %d\n"
 		    "core id\t\t: %d\n"
 		    "cpu cores\t: %d\n"
 		    "apicid\t\t: %d\n"
 		    "initial apicid\t: %d\n"
 		    "fpu\t\t: %s\n"
 		    "fpu_exception\t: %s\n"
 		    "cpuid level\t: %d\n"
 		    "wp\t\t: %s\n",
 		    i, cpu_vendor, CPUID_TO_FAMILY(cpu_id),
 		    CPUID_TO_MODEL(cpu_id), model, cpu_id & CPUID_STEPPING,
 		    fqmhz, fqkhz,
 		    (cache_size[2] >> 16), 0, mp_ncpus, i, mp_ncpus,
 		    i, i, /*cpu_id & CPUID_LOCAL_APIC_ID ??*/
 		    (cpu_feature & CPUID_FPU) ? "yes" : "no", "yes",
 		    CPUID_TO_FAMILY(cpu_id), "yes");
 		sbuf_cat(sb, "flags\t\t:");
 		for (j = 0; j < nitems(cpu_feature_names); j++)
 			if (cpu_feature & (1 << j) &&
 			    cpu_feature_names[j][0] != '\0')
 				sbuf_printf(sb, " %s", cpu_feature_names[j]);
 		for (j = 0; j < nitems(amd_feature_names); j++)
 			if (amd_feature & (1 << j) &&
 			    amd_feature_names[j][0] != '\0')
 				sbuf_printf(sb, " %s", amd_feature_names[j]);
 		for (j = 0; j < nitems(cpu_feature2_names); j++)
 			if (cpu_feature2 & (1 << j) &&
 			    cpu_feature2_names[j][0] != '\0')
 				sbuf_printf(sb, " %s", cpu_feature2_names[j]);
 		for (j = 0; j < nitems(amd_feature2_names); j++)
 			if (amd_feature2 & (1 << j) &&
 			    amd_feature2_names[j][0] != '\0')
 				sbuf_printf(sb, " %s", amd_feature2_names[j]);
 		for (j = 0; j < nitems(cpu_stdext_feature_names); j++)
 			if (cpu_stdext_feature & (1 << j) &&
 			    cpu_stdext_feature_names[j][0] != '\0')
 				sbuf_printf(sb, " %s",
 				    cpu_stdext_feature_names[j]);
 		sbuf_cat(sb, "\n");
 		sbuf_printf(sb,
 		    "bugs\t\t: %s\n"
 		    "bogomips\t: %d.%02d\n"
 		    "clflush size\t: %d\n"
 		    "cache_alignment\t: %d\n"
 		    "address sizes\t: %d bits physical, %d bits virtual\n",
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 		    (has_f00f_bug) ? "Intel F00F" : "",
 #else
 		    "",
 #endif
 		    fqmhz * 2, fqkhz,
 		    cpu_clflush_line_size, cpu_clflush_line_size,
 		    cpu_maxphyaddr,
 		    (cpu_maxphyaddr > 32) ? 48 : 0);
 		sbuf_cat(sb, "power management: ");
 		for (j = 0; j < nitems(power_flags); j++)
 			if (amd_pminfo & (1 << j))
 				sbuf_printf(sb, " %s", power_flags[j]);
 		sbuf_cat(sb, "\n\n");
 
 		/* XXX per-cpu vendor / class / model / id? */
 	}
 	sbuf_cat(sb, "\n");
 
 	return (0);
 }
 #else
 /* ARM64TODO: implement non-stubbed linprocfs_docpuinfo */
 static int
 linprocfs_docpuinfo(PFS_FILL_ARGS)
 {
 	int i;
 
 	for (i = 0; i < mp_ncpus; ++i) {
 		sbuf_printf(sb,
 		    "processor\t: %d\n"
 		    "BogoMIPS\t: %d.%02d\n",
 		    i, 0, 0);
 		sbuf_cat(sb, "Features\t: ");
 		sbuf_cat(sb, "\n");
 		sbuf_printf(sb,
 		    "CPU implementer\t: \n"
 		    "CPU architecture: \n"
 		    "CPU variant\t: 0x%x\n"
 		    "CPU part\t: 0x%x\n"
 		    "CPU revision\t: %d\n",
 		    0, 0, 0);
 		sbuf_cat(sb, "\n");
 	}
 
 	return (0);
 }
 #endif /* __i386__ || __amd64__ */
 
 /*
  * Filler function for proc/mtab
  *
  * This file doesn't exist in Linux' procfs, but is included here so
  * users can symlink /compat/linux/etc/mtab to /proc/mtab
  */
 static int
 linprocfs_domtab(PFS_FILL_ARGS)
 {
 	struct nameidata nd;
 	const char *lep;
 	char *dlep, *flep, *mntto, *mntfrom, *fstype;
 	size_t lep_len;
 	int error;
 	struct statfs *buf, *sp;
 	size_t count;
 
 	/* resolve symlinks etc. in the emulation tree prefix */
 	NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, linux_emul_path, td);
 	flep = NULL;
 	error = namei(&nd);
 	lep = linux_emul_path;
 	if (error == 0) {
 		if (vn_fullpath(nd.ni_vp, &dlep, &flep) == 0)
 			lep = dlep;
 		vrele(nd.ni_vp);
 	}
 	lep_len = strlen(lep);
 
 	buf = NULL;
 	error = kern_getfsstat(td, &buf, SIZE_T_MAX, &count,
 	    UIO_SYSSPACE, MNT_WAIT);
 	if (error != 0) {
 		free(buf, M_TEMP);
 		free(flep, M_TEMP);
 		return (error);
 	}
 
 	for (sp = buf; count > 0; sp++, count--) {
 		/* determine device name */
 		mntfrom = sp->f_mntfromname;
 
 		/* determine mount point */
 		mntto = sp->f_mntonname;
 		if (strncmp(mntto, lep, lep_len) == 0 && mntto[lep_len] == '/')
 			mntto += lep_len;
 
 		/* determine fs type */
 		fstype = sp->f_fstypename;
 		if (strcmp(fstype, pn->pn_info->pi_name) == 0)
 			mntfrom = fstype = "proc";
 		else if (strcmp(fstype, "procfs") == 0)
 			continue;
 
 		if (strcmp(fstype, "autofs") == 0) {
 			/*
 			 * FreeBSD uses eg "map -hosts", whereas Linux
 			 * expects just "-hosts".
 			 */
 			if (strncmp(mntfrom, "map ", 4) == 0)
 				mntfrom += 4;
 		}
 
 		if (strcmp(fstype, "linsysfs") == 0) {
 			sbuf_printf(sb, "/sys %s sysfs %s", mntto,
 			    sp->f_flags & MNT_RDONLY ? "ro" : "rw");
 		} else {
 			/* For Linux msdosfs is called vfat */
 			if (strcmp(fstype, "msdosfs") == 0)
 				fstype = "vfat";
 			sbuf_printf(sb, "%s %s %s %s", mntfrom, mntto, fstype,
 			    sp->f_flags & MNT_RDONLY ? "ro" : "rw");
 		}
 #define ADD_OPTION(opt, name) \
 	if (sp->f_flags & (opt)) sbuf_printf(sb, "," name);
 		ADD_OPTION(MNT_SYNCHRONOUS,	"sync");
 		ADD_OPTION(MNT_NOEXEC,		"noexec");
 		ADD_OPTION(MNT_NOSUID,		"nosuid");
 		ADD_OPTION(MNT_UNION,		"union");
 		ADD_OPTION(MNT_ASYNC,		"async");
 		ADD_OPTION(MNT_SUIDDIR,		"suiddir");
 		ADD_OPTION(MNT_NOSYMFOLLOW,	"nosymfollow");
 		ADD_OPTION(MNT_NOATIME,		"noatime");
 #undef ADD_OPTION
 		/* a real Linux mtab will also show NFS options */
 		sbuf_printf(sb, " 0 0\n");
 	}
 
 	free(buf, M_TEMP);
 	free(flep, M_TEMP);
 	return (error);
 }
 
 /*
  * Filler function for proc/partitions
  */
 static int
 linprocfs_dopartitions(PFS_FILL_ARGS)
 {
 	struct g_class *cp;
 	struct g_geom *gp;
 	struct g_provider *pp;
 	int major, minor;
 
 	g_topology_lock();
 	sbuf_printf(sb, "major minor  #blocks  name rio rmerge rsect "
 	    "ruse wio wmerge wsect wuse running use aveq\n");
 
 	LIST_FOREACH(cp, &g_classes, class) {
 		if (strcmp(cp->name, "DISK") == 0 ||
 		    strcmp(cp->name, "PART") == 0)
 			LIST_FOREACH(gp, &cp->geom, geom) {
 				LIST_FOREACH(pp, &gp->provider, provider) {
 					if (linux_driver_get_major_minor(
 					    pp->name, &major, &minor) != 0) {
 						major = 0;
 						minor = 0;
 					}
 					sbuf_printf(sb, "%d %d %lld %s "
 					    "%d %d %d %d %d "
 					     "%d %d %d %d %d %d\n",
 					     major, minor,
 					     (long long)pp->mediasize, pp->name,
 					     0, 0, 0, 0, 0,
 					     0, 0, 0, 0, 0, 0);
 				}
 			}
 	}
 	g_topology_unlock();
 
 	return (0);
 }
 
 /*
  * Filler function for proc/stat
  *
  * Output depends on kernel version:
  *
  * v2.5.40 <=
  *   user nice system idle
  * v2.5.41
  *   user nice system idle iowait
  * v2.6.11
  *   user nice system idle iowait irq softirq steal
  * v2.6.24
  *   user nice system idle iowait irq softirq steal guest
  * v2.6.33 >=
  *   user nice system idle iowait irq softirq steal guest guest_nice
  */
 static int
 linprocfs_dostat(PFS_FILL_ARGS)
 {
 	struct pcpu *pcpu;
 	long cp_time[CPUSTATES];
 	long *cp;
 	struct timeval boottime;
 	int i;
 	char *zero_pad;
 	bool has_intr = true;
 
 	if (linux_kernver(td) >= LINUX_KERNVER(2,6,33)) {
 		zero_pad = " 0 0 0 0\n";
 	} else if (linux_kernver(td) >= LINUX_KERNVER(2,6,24)) {
 		zero_pad = " 0 0 0\n";
 	} else if (linux_kernver(td) >= LINUX_KERNVER(2,6,11)) {
 		zero_pad = " 0 0\n";
 	} else if (linux_kernver(td) >= LINUX_KERNVER(2,5,41)) {
 		has_intr = false;
 		zero_pad = " 0\n";
 	} else {
 		has_intr = false;
 		zero_pad = "\n";
 	}
 
 	read_cpu_time(cp_time);
 	getboottime(&boottime);
 	/* Parameters common to all versions */
 	sbuf_printf(sb, "cpu %lu %lu %lu %lu",
 	    T2J(cp_time[CP_USER]),
 	    T2J(cp_time[CP_NICE]),
 	    T2J(cp_time[CP_SYS]),
 	    T2J(cp_time[CP_IDLE]));
 
 	/* Print interrupt stats if available */
 	if (has_intr) {
 		sbuf_printf(sb, " 0 %lu", T2J(cp_time[CP_INTR]));
 	}
 
 	/* Pad out remaining fields depending on version */
 	sbuf_printf(sb, "%s", zero_pad);
 
 	CPU_FOREACH(i) {
 		pcpu = pcpu_find(i);
 		cp = pcpu->pc_cp_time;
 		sbuf_printf(sb, "cpu%d %lu %lu %lu %lu", i,
 		    T2J(cp[CP_USER]),
 		    T2J(cp[CP_NICE]),
 		    T2J(cp[CP_SYS]),
 		    T2J(cp[CP_IDLE]));
 
 		if (has_intr) {
 			sbuf_printf(sb, " 0 %lu", T2J(cp[CP_INTR]));
 		}
 
 		sbuf_printf(sb, "%s", zero_pad);
 	}
 	sbuf_printf(sb,
 	    "disk 0 0 0 0\n"
 	    "page %ju %ju\n"
 	    "swap %ju %ju\n"
 	    "intr %ju\n"
 	    "ctxt %ju\n"
 	    "btime %lld\n",
 	    (uintmax_t)VM_CNT_FETCH(v_vnodepgsin),
 	    (uintmax_t)VM_CNT_FETCH(v_vnodepgsout),
 	    (uintmax_t)VM_CNT_FETCH(v_swappgsin),
 	    (uintmax_t)VM_CNT_FETCH(v_swappgsout),
 	    (uintmax_t)VM_CNT_FETCH(v_intr),
 	    (uintmax_t)VM_CNT_FETCH(v_swtch),
 	    (long long)boottime.tv_sec);
 	return (0);
 }
 
 static int
 linprocfs_doswaps(PFS_FILL_ARGS)
 {
 	struct xswdev xsw;
 	uintmax_t total, used;
 	int n;
 	char devname[SPECNAMELEN + 1];
 
 	sbuf_printf(sb, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
 	for (n = 0; ; n++) {
 		if (swap_dev_info(n, &xsw, devname, sizeof(devname)) != 0)
 			break;
 		total = (uintmax_t)xsw.xsw_nblks * PAGE_SIZE / 1024;
 		used  = (uintmax_t)xsw.xsw_used * PAGE_SIZE / 1024;
 
 		/*
 		 * The space and not tab after the device name is on
 		 * purpose.  Linux does so.
 		 */
 		sbuf_printf(sb, "/dev/%-34s unknown\t\t%jd\t%jd\t-1\n",
 		    devname, total, used);
 	}
 	return (0);
 }
 
 /*
  * Filler function for proc/uptime
  */
 static int
 linprocfs_douptime(PFS_FILL_ARGS)
 {
 	long cp_time[CPUSTATES];
 	struct timeval tv;
 
 	getmicrouptime(&tv);
 	read_cpu_time(cp_time);
 	sbuf_printf(sb, "%lld.%02ld %ld.%02lu\n",
 	    (long long)tv.tv_sec, tv.tv_usec / 10000,
 	    T2S(cp_time[CP_IDLE] / mp_ncpus),
 	    T2CS(cp_time[CP_IDLE] / mp_ncpus) % 100);
 	return (0);
 }
 
 /*
  * Get OS build date
  */
 static void
 linprocfs_osbuild(struct thread *td, struct sbuf *sb)
 {
 #if 0
 	char osbuild[256];
 	char *cp1, *cp2;
 
 	strncpy(osbuild, version, 256);
 	osbuild[255] = '\0';
 	cp1 = strstr(osbuild, "\n");
 	cp2 = strstr(osbuild, ":");
 	if (cp1 && cp2) {
 		*cp1 = *cp2 = '\0';
 		cp1 = strstr(osbuild, "#");
 	} else
 		cp1 = NULL;
 	if (cp1)
 		sbuf_printf(sb, "%s%s", cp1, cp2 + 1);
 	else
 #endif
 		sbuf_cat(sb, "#4 Sun Dec 18 04:30:00 CET 1977");
 }
 
 /*
  * Get OS builder
  */
 static void
 linprocfs_osbuilder(struct thread *td, struct sbuf *sb)
 {
 #if 0
 	char builder[256];
 	char *cp;
 
 	cp = strstr(version, "\n    ");
 	if (cp) {
 		strncpy(builder, cp + 5, 256);
 		builder[255] = '\0';
 		cp = strstr(builder, ":");
 		if (cp)
 			*cp = '\0';
 	}
 	if (cp)
 		sbuf_cat(sb, builder);
 	else
 #endif
 		sbuf_cat(sb, "des@freebsd.org");
 }
 
 /*
  * Filler function for proc/version
  */
 static int
 linprocfs_doversion(PFS_FILL_ARGS)
 {
 	char osname[LINUX_MAX_UTSNAME];
 	char osrelease[LINUX_MAX_UTSNAME];
 
 	linux_get_osname(td, osname);
 	linux_get_osrelease(td, osrelease);
 	sbuf_printf(sb, "%s version %s (", osname, osrelease);
 	linprocfs_osbuilder(td, sb);
 	sbuf_cat(sb, ") (gcc version " __VERSION__ ") ");
 	linprocfs_osbuild(td, sb);
 	sbuf_cat(sb, "\n");
 
 	return (0);
 }
 
 /*
  * Filler function for proc/loadavg
  */
 static int
 linprocfs_doloadavg(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb,
 	    "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
 	    (int)(averunnable.ldavg[0] / averunnable.fscale),
 	    (int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
 	    (int)(averunnable.ldavg[1] / averunnable.fscale),
 	    (int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
 	    (int)(averunnable.ldavg[2] / averunnable.fscale),
 	    (int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
 	    1,				/* number of running tasks */
 	    nprocs,			/* number of tasks */
 	    lastpid			/* the last pid */
 	);
 	return (0);
 }
 
 static int
 linprocfs_get_tty_nr(struct proc *p)
 {
 	struct session *sp;
 	const char *ttyname;
 	int error, major, minor, nr;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
 	sx_assert(&proctree_lock, SX_LOCKED);
 
 	if ((p->p_flag & P_CONTROLT) == 0)
 		return (-1);
 
 	sp = p->p_pgrp->pg_session;
 	if (sp == NULL)
 		return (-1);
 
 	ttyname = devtoname(sp->s_ttyp->t_dev);
 	error = linux_driver_get_major_minor(ttyname, &major, &minor);
 	if (error != 0)
 		return (-1);
 
 	nr = makedev(major, minor);
 	return (nr);
 }
 
 /*
  * Filler function for proc/pid/stat
  */
 static int
 linprocfs_doprocstat(PFS_FILL_ARGS)
 {
 	struct kinfo_proc kp;
 	struct timeval boottime;
 	char state;
 	static int ratelimit = 0;
 	int tty_nr;
 	vm_offset_t startcode, startdata;
 
 	getboottime(&boottime);
 	sx_slock(&proctree_lock);
 	PROC_LOCK(p);
 	fill_kinfo_proc(p, &kp);
 	tty_nr = linprocfs_get_tty_nr(p);
 	sx_sunlock(&proctree_lock);
 	if (p->p_vmspace) {
 	   startcode = (vm_offset_t)p->p_vmspace->vm_taddr;
 	   startdata = (vm_offset_t)p->p_vmspace->vm_daddr;
 	} else {
 	   startcode = 0;
 	   startdata = 0;
 	}
 	sbuf_printf(sb, "%d", p->p_pid);
 #define PS_ADD(name, fmt, arg) sbuf_printf(sb, " " fmt, arg)
 	PS_ADD("comm",		"(%s)",	p->p_comm);
 	if (kp.ki_stat > sizeof(linux_state)) {
 		state = 'R';
 
 		if (ratelimit == 0) {
 			printf("linprocfs: don't know how to handle unknown FreeBSD state %d/%zd, mapping to R\n",
 			    kp.ki_stat, sizeof(linux_state));
 			++ratelimit;
 		}
 	} else
 		state = linux_state[kp.ki_stat - 1];
 	PS_ADD("state",		"%c",	state);
 	PS_ADD("ppid",		"%d",	p->p_pptr ? p->p_pptr->p_pid : 0);
 	PS_ADD("pgrp",		"%d",	p->p_pgid);
 	PS_ADD("session",	"%d",	p->p_session->s_sid);
 	PROC_UNLOCK(p);
 	PS_ADD("tty",		"%d",	tty_nr);
 	PS_ADD("tpgid",		"%d",	kp.ki_tpgid);
 	PS_ADD("flags",		"%u",	0); /* XXX */
 	PS_ADD("minflt",	"%lu",	kp.ki_rusage.ru_minflt);
 	PS_ADD("cminflt",	"%lu",	kp.ki_rusage_ch.ru_minflt);
 	PS_ADD("majflt",	"%lu",	kp.ki_rusage.ru_majflt);
 	PS_ADD("cmajflt",	"%lu",	kp.ki_rusage_ch.ru_majflt);
 	PS_ADD("utime",		"%ld",	TV2J(&kp.ki_rusage.ru_utime));
 	PS_ADD("stime",		"%ld",	TV2J(&kp.ki_rusage.ru_stime));
 	PS_ADD("cutime",	"%ld",	TV2J(&kp.ki_rusage_ch.ru_utime));
 	PS_ADD("cstime",	"%ld",	TV2J(&kp.ki_rusage_ch.ru_stime));
 	PS_ADD("priority",	"%d",	kp.ki_pri.pri_user);
 	PS_ADD("nice",		"%d",	kp.ki_nice); /* 19 (nicest) to -19 */
 	PS_ADD("0",		"%d",	0); /* removed field */
 	PS_ADD("itrealvalue",	"%d",	0); /* XXX */
 	PS_ADD("starttime",	"%lu",	TV2J(&kp.ki_start) - TV2J(&boottime));
 	PS_ADD("vsize",		"%ju",	P2K((uintmax_t)kp.ki_size));
 	PS_ADD("rss",		"%ju",	(uintmax_t)kp.ki_rssize);
 	PS_ADD("rlim",		"%lu",	kp.ki_rusage.ru_maxrss);
 	PS_ADD("startcode",	"%ju",	(uintmax_t)startcode);
 	PS_ADD("endcode",	"%ju",	(uintmax_t)startdata);
 	PS_ADD("startstack",	"%u",	0); /* XXX */
 	PS_ADD("kstkesp",	"%u",	0); /* XXX */
 	PS_ADD("kstkeip",	"%u",	0); /* XXX */
 	PS_ADD("signal",	"%u",	0); /* XXX */
 	PS_ADD("blocked",	"%u",	0); /* XXX */
 	PS_ADD("sigignore",	"%u",	0); /* XXX */
 	PS_ADD("sigcatch",	"%u",	0); /* XXX */
 	PS_ADD("wchan",		"%u",	0); /* XXX */
 	PS_ADD("nswap",		"%lu",	kp.ki_rusage.ru_nswap);
 	PS_ADD("cnswap",	"%lu",	kp.ki_rusage_ch.ru_nswap);
 	PS_ADD("exitsignal",	"%d",	0); /* XXX */
 	PS_ADD("processor",	"%u",	kp.ki_lastcpu);
 	PS_ADD("rt_priority",	"%u",	0); /* XXX */ /* >= 2.5.19 */
 	PS_ADD("policy",	"%u",	kp.ki_pri.pri_class); /* >= 2.5.19 */
 #undef PS_ADD
 	sbuf_putc(sb, '\n');
 
 	return (0);
 }
 
 /*
  * Filler function for proc/pid/statm
  */
 static int
 linprocfs_doprocstatm(PFS_FILL_ARGS)
 {
 	struct kinfo_proc kp;
 	segsz_t lsize;
 
 	sx_slock(&proctree_lock);
 	PROC_LOCK(p);
 	fill_kinfo_proc(p, &kp);
 	PROC_UNLOCK(p);
 	sx_sunlock(&proctree_lock);
 
 	/*
 	 * See comments in linprocfs_doprocstatus() regarding the
 	 * computation of lsize.
 	 */
 	/* size resident share trs drs lrs dt */
 	sbuf_printf(sb, "%ju ", B2P((uintmax_t)kp.ki_size));
 	sbuf_printf(sb, "%ju ", (uintmax_t)kp.ki_rssize);
 	sbuf_printf(sb, "%ju ", (uintmax_t)0); /* XXX */
 	sbuf_printf(sb, "%ju ",	(uintmax_t)kp.ki_tsize);
 	sbuf_printf(sb, "%ju ", (uintmax_t)(kp.ki_dsize + kp.ki_ssize));
 	lsize = B2P(kp.ki_size) - kp.ki_dsize -
 	    kp.ki_ssize - kp.ki_tsize - 1;
 	sbuf_printf(sb, "%ju ", (uintmax_t)lsize);
 	sbuf_printf(sb, "%ju\n", (uintmax_t)0); /* XXX */
 
 	return (0);
 }
 
 /*
  * Filler function for proc/pid/status
  */
 static int
 linprocfs_doprocstatus(PFS_FILL_ARGS)
 {
 	struct kinfo_proc kp;
 	char *state;
 	segsz_t lsize;
 	struct thread *td2;
 	struct sigacts *ps;
 	l_sigset_t siglist, sigignore, sigcatch;
 	int i;
 
 	sx_slock(&proctree_lock);
 	PROC_LOCK(p);
 	td2 = FIRST_THREAD_IN_PROC(p); /* XXXKSE pretend only one thread */
 
 	if (P_SHOULDSTOP(p)) {
 		state = "T (stopped)";
 	} else {
 		switch(p->p_state) {
 		case PRS_NEW:
 			state = "I (idle)";
 			break;
 		case PRS_NORMAL:
 			if (p->p_flag & P_WEXIT) {
 				state = "X (exiting)";
 				break;
 			}
 			switch(td2->td_state) {
 			case TDS_INHIBITED:
 				state = "S (sleeping)";
 				break;
 			case TDS_RUNQ:
 			case TDS_RUNNING:
 				state = "R (running)";
 				break;
 			default:
 				state = "? (unknown)";
 				break;
 			}
 			break;
 		case PRS_ZOMBIE:
 			state = "Z (zombie)";
 			break;
 		default:
 			state = "? (unknown)";
 			break;
 		}
 	}
 
 	fill_kinfo_proc(p, &kp);
 	sx_sunlock(&proctree_lock);
 
 	sbuf_printf(sb, "Name:\t%s\n",		p->p_comm); /* XXX escape */
 	sbuf_printf(sb, "State:\t%s\n",		state);
 
 	/*
 	 * Credentials
 	 */
 	sbuf_printf(sb, "Tgid:\t%d\n",		p->p_pid);
 	sbuf_printf(sb, "Pid:\t%d\n",		p->p_pid);
 	sbuf_printf(sb, "PPid:\t%d\n",		kp.ki_ppid );
 	sbuf_printf(sb, "TracerPid:\t%d\n",	kp.ki_tracer );
 	sbuf_printf(sb, "Uid:\t%d %d %d %d\n",	p->p_ucred->cr_ruid,
 						p->p_ucred->cr_uid,
 						p->p_ucred->cr_svuid,
 						/* FreeBSD doesn't have fsuid */
 						p->p_ucred->cr_uid);
 	sbuf_printf(sb, "Gid:\t%d %d %d %d\n",	p->p_ucred->cr_rgid,
 						p->p_ucred->cr_gid,
 						p->p_ucred->cr_svgid,
 						/* FreeBSD doesn't have fsgid */
 						p->p_ucred->cr_gid);
 	sbuf_cat(sb, "Groups:\t");
 	for (i = 0; i < p->p_ucred->cr_ngroups; i++)
 		sbuf_printf(sb, "%d ",		p->p_ucred->cr_groups[i]);
 	PROC_UNLOCK(p);
 	sbuf_putc(sb, '\n');
 
 	/*
 	 * Memory
 	 *
 	 * While our approximation of VmLib may not be accurate (I
 	 * don't know of a simple way to verify it, and I'm not sure
 	 * it has much meaning anyway), I believe it's good enough.
 	 *
 	 * The same code that could (I think) accurately compute VmLib
 	 * could also compute VmLck, but I don't really care enough to
 	 * implement it. Submissions are welcome.
 	 */
 	sbuf_printf(sb, "VmSize:\t%8ju kB\n",	B2K((uintmax_t)kp.ki_size));
 	sbuf_printf(sb, "VmLck:\t%8u kB\n",	P2K(0)); /* XXX */
 	sbuf_printf(sb, "VmRSS:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_rssize));
 	sbuf_printf(sb, "VmData:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_dsize));
 	sbuf_printf(sb, "VmStk:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_ssize));
 	sbuf_printf(sb, "VmExe:\t%8ju kB\n",	P2K((uintmax_t)kp.ki_tsize));
 	lsize = B2P(kp.ki_size) - kp.ki_dsize -
 	    kp.ki_ssize - kp.ki_tsize - 1;
 	sbuf_printf(sb, "VmLib:\t%8ju kB\n",	P2K((uintmax_t)lsize));
 
 	/*
 	 * Signal masks
 	 */
 	PROC_LOCK(p);
 	bsd_to_linux_sigset(&p->p_siglist, &siglist);
 	ps = p->p_sigacts;
 	mtx_lock(&ps->ps_mtx);
 	bsd_to_linux_sigset(&ps->ps_sigignore, &sigignore);
 	bsd_to_linux_sigset(&ps->ps_sigcatch, &sigcatch);
 	mtx_unlock(&ps->ps_mtx);
 	PROC_UNLOCK(p);
 
 	sbuf_printf(sb, "SigPnd:\t%016jx\n",	siglist.__mask);
 	/*
 	 * XXX. SigBlk - target thread's signal mask, td_sigmask.
 	 * To implement SigBlk pseudofs should support proc/tid dir entries.
 	 */
 	sbuf_printf(sb, "SigBlk:\t%016x\n",	0);
 	sbuf_printf(sb, "SigIgn:\t%016jx\n",	sigignore.__mask);
 	sbuf_printf(sb, "SigCgt:\t%016jx\n",	sigcatch.__mask);
 
 	/*
 	 * Linux also prints the capability masks, but we don't have
 	 * capabilities yet, and when we do get them they're likely to
 	 * be meaningless to Linux programs, so we lie. XXX
 	 */
 	sbuf_printf(sb, "CapInh:\t%016x\n",	0);
 	sbuf_printf(sb, "CapPrm:\t%016x\n",	0);
 	sbuf_printf(sb, "CapEff:\t%016x\n",	0);
 
 	return (0);
 }
 
 /*
  * Filler function for proc/pid/cwd
  */
 static int
 linprocfs_doproccwd(PFS_FILL_ARGS)
 {
 	struct pwd *pwd;
 	char *fullpath = "unknown";
 	char *freepath = NULL;
 
 	pwd = pwd_hold(td);
 	vn_fullpath(pwd->pwd_cdir, &fullpath, &freepath);
 	sbuf_printf(sb, "%s", fullpath);
 	if (freepath)
 		free(freepath, M_TEMP);
 	pwd_drop(pwd);
 	return (0);
 }
 
 /*
  * Filler function for proc/pid/root
  */
 static int
 linprocfs_doprocroot(PFS_FILL_ARGS)
 {
 	struct pwd *pwd;
 	struct vnode *vp;
 	char *fullpath = "unknown";
 	char *freepath = NULL;
 
 	pwd = pwd_hold(td);
 	vp = jailed(p->p_ucred) ? pwd->pwd_jdir : pwd->pwd_rdir;
 	vn_fullpath(vp, &fullpath, &freepath);
 	sbuf_printf(sb, "%s", fullpath);
 	if (freepath)
 		free(freepath, M_TEMP);
 	pwd_drop(pwd);
 	return (0);
 }
 
 /*
  * Filler function for proc/pid/cmdline
  */
 static int
 linprocfs_doproccmdline(PFS_FILL_ARGS)
 {
 	int ret;
 
 	PROC_LOCK(p);
 	if ((ret = p_cansee(td, p)) != 0) {
 		PROC_UNLOCK(p);
 		return (ret);
 	}
 
 	/*
 	 * Mimic linux behavior and pass only processes with usermode
 	 * address space as valid.  Return zero silently otherwize.
 	 */
 	if (p->p_vmspace == &vmspace0) {
 		PROC_UNLOCK(p);
 		return (0);
 	}
 	if (p->p_args != NULL) {
 		sbuf_bcpy(sb, p->p_args->ar_args, p->p_args->ar_length);
 		PROC_UNLOCK(p);
 		return (0);
 	}
 
 	if ((p->p_flag & P_SYSTEM) != 0) {
 		PROC_UNLOCK(p);
 		return (0);
 	}
 
 	PROC_UNLOCK(p);
 
 	ret = proc_getargv(td, p, sb);
 	return (ret);
 }
 
 /*
  * Filler function for proc/pid/environ
  */
 static int
 linprocfs_doprocenviron(PFS_FILL_ARGS)
 {
 
 	/*
 	 * Mimic linux behavior and pass only processes with usermode
 	 * address space as valid.  Return zero silently otherwize.
 	 */
 	if (p->p_vmspace == &vmspace0)
 		return (0);
 
 	return (proc_getenvv(td, p, sb));
 }
 
 static char l32_map_str[] = "%08lx-%08lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
 static char l64_map_str[] = "%016lx-%016lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
 static char vdso_str[] = "      [vdso]";
 static char stack_str[] = "      [stack]";
 
 /*
  * Filler function for proc/pid/maps
  */
 static int
 linprocfs_doprocmaps(PFS_FILL_ARGS)
 {
 	struct vmspace *vm;
 	vm_map_t map;
 	vm_map_entry_t entry, tmp_entry;
 	vm_object_t obj, tobj, lobj;
 	vm_offset_t e_start, e_end;
 	vm_ooffset_t off;
 	vm_prot_t e_prot;
 	unsigned int last_timestamp;
 	char *name = "", *freename = NULL;
 	const char *l_map_str;
 	ino_t ino;
 	int ref_count, shadow_count, flags;
 	int error;
 	struct vnode *vp;
 	struct vattr vat;
 	bool private;
 
 	PROC_LOCK(p);
 	error = p_candebug(td, p);
 	PROC_UNLOCK(p);
 	if (error)
 		return (error);
 
 	if (uio->uio_rw != UIO_READ)
 		return (EOPNOTSUPP);
 
 	error = 0;
 	vm = vmspace_acquire_ref(p);
 	if (vm == NULL)
 		return (ESRCH);
 
 	if (SV_CURPROC_FLAG(SV_LP64))
 		l_map_str = l64_map_str;
 	else
 		l_map_str = l32_map_str;
 	map = &vm->vm_map;
 	vm_map_lock_read(map);
 	VM_MAP_ENTRY_FOREACH(entry, map) {
 		name = "";
 		freename = NULL;
 		if (entry->eflags & MAP_ENTRY_IS_SUB_MAP)
 			continue;
 		e_prot = entry->protection;
 		e_start = entry->start;
 		e_end = entry->end;
 		obj = entry->object.vm_object;
 		off = entry->offset;
 		for (lobj = tobj = obj; tobj != NULL;
 		    lobj = tobj, tobj = tobj->backing_object) {
 			VM_OBJECT_RLOCK(tobj);
 			off += lobj->backing_object_offset;
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 		}
 		private = (entry->eflags & MAP_ENTRY_COW) != 0 || obj == NULL ||
 		    (obj->flags & OBJ_ANON) != 0;
 		last_timestamp = map->timestamp;
 		vm_map_unlock_read(map);
 		ino = 0;
 		if (lobj) {
 			vp = vm_object_vnode(lobj);
 			if (vp != NULL)
 				vref(vp);
 			if (lobj != obj)
 				VM_OBJECT_RUNLOCK(lobj);
 			flags = obj->flags;
 			ref_count = obj->ref_count;
 			shadow_count = obj->shadow_count;
 			VM_OBJECT_RUNLOCK(obj);
 			if (vp != NULL) {
 				vn_fullpath(vp, &name, &freename);
 				vn_lock(vp, LK_SHARED | LK_RETRY);
 				VOP_GETATTR(vp, &vat, td->td_ucred);
 				ino = vat.va_fileid;
 				vput(vp);
 			} else if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
 				if (e_start == p->p_sysent->sv_shared_page_base)
 					name = vdso_str;
 				if (e_end == p->p_sysent->sv_usrstack)
 					name = stack_str;
 			}
 		} else {
 			flags = 0;
 			ref_count = 0;
 			shadow_count = 0;
 		}
 
 		/*
 		 * format:
 		 *  start, end, access, offset, major, minor, inode, name.
 		 */
 		error = sbuf_printf(sb, l_map_str,
 		    (u_long)e_start, (u_long)e_end,
 		    (e_prot & VM_PROT_READ)?"r":"-",
 		    (e_prot & VM_PROT_WRITE)?"w":"-",
 		    (e_prot & VM_PROT_EXECUTE)?"x":"-",
 		    private ? "p" : "s",
 		    (u_long)off,
 		    0,
 		    0,
 		    (u_long)ino,
 		    *name ? "     " : " ",
 		    name
 		    );
-		if (error == -1) {
-			linux_msg(td, "cannot fill /proc/self/maps; "
-			    "consider bumping PFS_MAXBUFSIZ");
-		}
 		if (freename)
 			free(freename, M_TEMP);
 		vm_map_lock_read(map);
 		if (error == -1) {
 			error = 0;
 			break;
 		}
 		if (last_timestamp != map->timestamp) {
 			/*
 			 * Look again for the entry because the map was
 			 * modified while it was unlocked.  Specifically,
 			 * the entry may have been clipped, merged, or deleted.
 			 */
 			vm_map_lookup_entry(map, e_end - 1, &tmp_entry);
 			entry = tmp_entry;
 		}
 	}
 	vm_map_unlock_read(map);
 	vmspace_free(vm);
 
 	return (error);
 }
 
 /*
  * Filler function for proc/pid/mem
  */
 static int
 linprocfs_doprocmem(PFS_FILL_ARGS)
 {
 	ssize_t resid;
 	int error;
 
 	resid = uio->uio_resid;
 	error = procfs_doprocmem(PFS_FILL_ARGNAMES);
 
 	if (uio->uio_rw == UIO_READ && resid != uio->uio_resid)
 		return (0);
 
 	if (error == EFAULT)
 		error = EIO;
 
 	return (error);
 }
 
 /*
  * Criteria for interface name translation
  */
 #define IFP_IS_ETH(ifp) (ifp->if_type == IFT_ETHER)
 
 static int
 linux_ifname(struct ifnet *ifp, char *buffer, size_t buflen)
 {
 	struct ifnet *ifscan;
 	int ethno;
 
 	IFNET_RLOCK_ASSERT();
 
 	/* Short-circuit non ethernet interfaces */
 	if (!IFP_IS_ETH(ifp))
 		return (strlcpy(buffer, ifp->if_xname, buflen));
 
 	/* Determine the (relative) unit number for ethernet interfaces */
 	ethno = 0;
 	CK_STAILQ_FOREACH(ifscan, &V_ifnet, if_link) {
 		if (ifscan == ifp)
 			return (snprintf(buffer, buflen, "eth%d", ethno));
 		if (IFP_IS_ETH(ifscan))
 			ethno++;
 	}
 
 	return (0);
 }
 
 /*
  * Filler function for proc/net/dev
  */
 static int
 linprocfs_donetdev(PFS_FILL_ARGS)
 {
 	char ifname[16]; /* XXX LINUX_IFNAMSIZ */
 	struct ifnet *ifp;
 
 	sbuf_printf(sb, "%6s|%58s|%s\n"
 	    "%6s|%58s|%58s\n",
 	    "Inter-", "   Receive", "  Transmit",
 	    " face",
 	    "bytes    packets errs drop fifo frame compressed multicast",
 	    "bytes    packets errs drop fifo colls carrier compressed");
 
 	CURVNET_SET(TD_TO_VNET(curthread));
 	IFNET_RLOCK();
 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
 		linux_ifname(ifp, ifname, sizeof ifname);
 		sbuf_printf(sb, "%6.6s: ", ifname);
 		sbuf_printf(sb, "%7ju %7ju %4ju %4ju %4lu %5lu %10lu %9ju ",
 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IBYTES),
 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS),
 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IERRORS),
 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS),
 							/* rx_missed_errors */
 		    0UL,				/* rx_fifo_errors */
 		    0UL,				/* rx_length_errors +
 							 * rx_over_errors +
 							 * rx_crc_errors +
 							 * rx_frame_errors */
 		    0UL,				/* rx_compressed */
 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS));
 							/* XXX-BZ rx only? */
 		sbuf_printf(sb, "%8ju %7ju %4ju %4ju %4lu %5ju %7lu %10lu\n",
 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OBYTES),
 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS),
 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OERRORS),
 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS),
 		    0UL,				/* tx_fifo_errors */
 		    (uintmax_t )ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS),
 		    0UL,				/* tx_carrier_errors +
 							 * tx_aborted_errors +
 							 * tx_window_errors +
 							 * tx_heartbeat_errors*/
 		    0UL);				/* tx_compressed */
 	}
 	IFNET_RUNLOCK();
 	CURVNET_RESTORE();
 
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/osrelease
  */
 static int
 linprocfs_doosrelease(PFS_FILL_ARGS)
 {
 	char osrelease[LINUX_MAX_UTSNAME];
 
 	linux_get_osrelease(td, osrelease);
 	sbuf_printf(sb, "%s\n", osrelease);
 
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/ostype
  */
 static int
 linprocfs_doostype(PFS_FILL_ARGS)
 {
 	char osname[LINUX_MAX_UTSNAME];
 
 	linux_get_osname(td, osname);
 	sbuf_printf(sb, "%s\n", osname);
 
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/version
  */
 static int
 linprocfs_doosbuild(PFS_FILL_ARGS)
 {
 
 	linprocfs_osbuild(td, sb);
 	sbuf_cat(sb, "\n");
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/msgmax
  */
 static int
 linprocfs_domsgmax(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "%d\n", msginfo.msgmax);
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/msgmni
  */
 static int
 linprocfs_domsgmni(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "%d\n", msginfo.msgmni);
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/msgmnb
  */
 static int
 linprocfs_domsgmnb(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "%d\n", msginfo.msgmnb);
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/ngroups_max
  *
  * Note that in Linux it defaults to 65536, not 1023.
  */
 static int
 linprocfs_dongroups_max(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "%d\n", ngroups_max);
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/pid_max
  */
 static int
 linprocfs_dopid_max(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "%i\n", PID_MAX);
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/sem
  */
 static int
 linprocfs_dosem(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "%d %d %d %d\n", seminfo.semmsl, seminfo.semmns,
 	    seminfo.semopm, seminfo.semmni);
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/shmall
  */
 static int
 linprocfs_doshmall(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "%lu\n", shminfo.shmall);
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/shmmax
  */
 static int
 linprocfs_doshmmax(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "%lu\n", shminfo.shmmax);
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/shmmni
  */
 static int
 linprocfs_doshmmni(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "%lu\n", shminfo.shmmni);
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/tainted
  */
 static int
 linprocfs_dotainted(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "0\n");
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/vm/min_free_kbytes
  *
  * This mirrors the approach in illumos to return zero for reads. Effectively,
  * it says, no memory is kept in reserve for "atomic allocations". This class
  * of allocation can be used at times when a thread cannot be suspended.
  */
 static int
 linprocfs_dominfree(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "%d\n", 0);
 	return (0);
 }
 
 /*
  * Filler function for proc/scsi/device_info
  */
 static int
 linprocfs_doscsidevinfo(PFS_FILL_ARGS)
 {
 
 	return (0);
 }
 
 /*
  * Filler function for proc/scsi/scsi
  */
 static int
 linprocfs_doscsiscsi(PFS_FILL_ARGS)
 {
 
 	return (0);
 }
 
 /*
  * Filler function for proc/devices
  */
 static int
 linprocfs_dodevices(PFS_FILL_ARGS)
 {
 	char *char_devices;
 	sbuf_printf(sb, "Character devices:\n");
 
 	char_devices = linux_get_char_devices();
 	sbuf_printf(sb, "%s", char_devices);
 	linux_free_get_char_devices(char_devices);
 
 	sbuf_printf(sb, "\nBlock devices:\n");
 
 	return (0);
 }
 
 /*
  * Filler function for proc/cmdline
  */
 static int
 linprocfs_docmdline(PFS_FILL_ARGS)
 {
 
 	sbuf_printf(sb, "BOOT_IMAGE=%s", kernelname);
 	sbuf_printf(sb, " ro root=302\n");
 	return (0);
 }
 
 /*
  * Filler function for proc/filesystems
  */
 static int
 linprocfs_dofilesystems(PFS_FILL_ARGS)
 {
 	struct vfsconf *vfsp;
 
 	vfsconf_slock();
 	TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
 		if (vfsp->vfc_flags & VFCF_SYNTHETIC)
 			sbuf_printf(sb, "nodev");
 		sbuf_printf(sb, "\t%s\n", vfsp->vfc_name);
 	}
 	vfsconf_sunlock();
 	return(0);
 }
 
 /*
  * Filler function for proc/modules
  */
 static int
 linprocfs_domodules(PFS_FILL_ARGS)
 {
 #if 0
 	struct linker_file *lf;
 
 	TAILQ_FOREACH(lf, &linker_files, link) {
 		sbuf_printf(sb, "%-20s%8lu%4d\n", lf->filename,
 		    (unsigned long)lf->size, lf->refs);
 	}
 #endif
 	return (0);
 }
 
 /*
  * Filler function for proc/pid/fd
  */
 static int
 linprocfs_dofdescfs(PFS_FILL_ARGS)
 {
 
 	if (p == curproc)
 		sbuf_printf(sb, "/dev/fd");
 	else
 		sbuf_printf(sb, "unknown");
 	return (0);
 }
 
 /*
  * Filler function for proc/pid/limits
  */
 static const struct linux_rlimit_ident {
 	const char	*desc;
 	const char	*unit;
 	unsigned int	rlim_id;
 } linux_rlimits_ident[] = {
 	{ "Max cpu time",	"seconds",	RLIMIT_CPU },
 	{ "Max file size", 	"bytes",	RLIMIT_FSIZE },
 	{ "Max data size",	"bytes", 	RLIMIT_DATA },
 	{ "Max stack size",	"bytes", 	RLIMIT_STACK },
 	{ "Max core file size",  "bytes",	RLIMIT_CORE },
 	{ "Max resident set",	"bytes",	RLIMIT_RSS },
 	{ "Max processes",	"processes",	RLIMIT_NPROC },
 	{ "Max open files",	"files",	RLIMIT_NOFILE },
 	{ "Max locked memory",	"bytes",	RLIMIT_MEMLOCK },
 	{ "Max address space",	"bytes",	RLIMIT_AS },
 	{ "Max file locks",	"locks",	LINUX_RLIMIT_LOCKS },
 	{ "Max pending signals", "signals",	LINUX_RLIMIT_SIGPENDING },
 	{ "Max msgqueue size",	"bytes",	LINUX_RLIMIT_MSGQUEUE },
 	{ "Max nice priority", 		"",	LINUX_RLIMIT_NICE },
 	{ "Max realtime priority",	"",	LINUX_RLIMIT_RTPRIO },
 	{ "Max realtime timeout",	"us",	LINUX_RLIMIT_RTTIME },
 	{ 0, 0, 0 }
 };
 
 static int
 linprocfs_doproclimits(PFS_FILL_ARGS)
 {
 	const struct linux_rlimit_ident *li;
 	struct plimit *limp;
 	struct rlimit rl;
 	ssize_t size;
 	int res, error;
 
 	error = 0;
 
 	PROC_LOCK(p);
 	limp = lim_hold(p->p_limit);
 	PROC_UNLOCK(p);
 	size = sizeof(res);
 	sbuf_printf(sb, "%-26s%-21s%-21s%-21s\n", "Limit", "Soft Limit",
 			"Hard Limit", "Units");
 	for (li = linux_rlimits_ident; li->desc != NULL; ++li) {
 		switch (li->rlim_id)
 		{
 		case LINUX_RLIMIT_LOCKS:
 			/* FALLTHROUGH */
 		case LINUX_RLIMIT_RTTIME:
 			rl.rlim_cur = RLIM_INFINITY;
 			break;
 		case LINUX_RLIMIT_SIGPENDING:
 			error = kernel_sysctlbyname(td,
 			    "kern.sigqueue.max_pending_per_proc",
 			    &res, &size, 0, 0, 0, 0);
 			if (error != 0)
 				goto out;
 			rl.rlim_cur = res;
 			rl.rlim_max = res;
 			break;
 		case LINUX_RLIMIT_MSGQUEUE:
 			error = kernel_sysctlbyname(td,
 			    "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0);
 			if (error != 0)
 				goto out;
 			rl.rlim_cur = res;
 			rl.rlim_max = res;
 			break;
 		case LINUX_RLIMIT_NICE:
 			/* FALLTHROUGH */
 		case LINUX_RLIMIT_RTPRIO:
 			rl.rlim_cur = 0;
 			rl.rlim_max = 0;
 			break;
 		default:
 			rl = limp->pl_rlimit[li->rlim_id];
 			break;
 		}
 		if (rl.rlim_cur == RLIM_INFINITY)
 			sbuf_printf(sb, "%-26s%-21s%-21s%-10s\n",
 			    li->desc, "unlimited", "unlimited", li->unit);
 		else
 			sbuf_printf(sb, "%-26s%-21llu%-21llu%-10s\n",
 			    li->desc, (unsigned long long)rl.rlim_cur,
 			    (unsigned long long)rl.rlim_max, li->unit);
 	}
 out:
 	lim_free(limp);
 	return (error);
 }
 
 /*
  * The point of the following two functions is to work around
  * an assertion in Chromium; see kern/240991 for details.
  */
 static int
 linprocfs_dotaskattr(PFS_ATTR_ARGS)
 {
 
 	vap->va_nlink = 3;
 	return (0);
 }
 
 /*
  * Filler function for proc/<pid>/task/.dummy
  */
 static int
 linprocfs_dotaskdummy(PFS_FILL_ARGS)
 {
 
 	return (0);
 }
 
 /*
  * Filler function for proc/sys/kernel/random/uuid
  */
 static int
 linprocfs_douuid(PFS_FILL_ARGS)
 {
 	struct uuid uuid;
 
 	kern_uuidgen(&uuid, 1);
 	sbuf_printf_uuid(sb, &uuid);
 	sbuf_printf(sb, "\n");
 	return(0);
 }
 
 /*
  * Filler function for proc/pid/auxv
  */
 static int
 linprocfs_doauxv(PFS_FILL_ARGS)
 {
 	struct sbuf *asb;
 	off_t buflen, resid;
 	int error;
 
 	/*
 	 * Mimic linux behavior and pass only processes with usermode
 	 * address space as valid. Return zero silently otherwise.
 	 */
 	if (p->p_vmspace == &vmspace0)
 		return (0);
 
 	if (uio->uio_resid == 0)
 		return (0);
 	if (uio->uio_offset < 0 || uio->uio_resid < 0)
 		return (EINVAL);
 
 	asb = sbuf_new_auto();
 	if (asb == NULL)
 		return (ENOMEM);
 	error = proc_getauxv(td, p, asb);
 	if (error == 0)
 		error = sbuf_finish(asb);
 
 	resid = sbuf_len(asb) - uio->uio_offset;
 	if (resid > uio->uio_resid)
 		buflen = uio->uio_resid;
 	else
 		buflen = resid;
 	if (buflen > IOSIZE_MAX)
 		return (EINVAL);
 	if (buflen > MAXPHYS)
 		buflen = MAXPHYS;
 	if (resid <= 0)
 		return (0);
 
 	if (error == 0)
 		error = uiomove(sbuf_data(asb) + uio->uio_offset, buflen, uio);
 	sbuf_delete(asb);
 	return (error);
 }
 
 /*
  * Constructor
  */
 static int
 linprocfs_init(PFS_INIT_ARGS)
 {
 	struct pfs_node *root;
 	struct pfs_node *dir;
 	struct pfs_node *sys;
 
 	root = pi->pi_root;
 
 	/* /proc/... */
 	pfs_create_file(root, "cmdline", &linprocfs_docmdline,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "cpuinfo", &linprocfs_docpuinfo,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "devices", &linprocfs_dodevices,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "filesystems", &linprocfs_dofilesystems,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "loadavg", &linprocfs_doloadavg,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "meminfo", &linprocfs_domeminfo,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "modules", &linprocfs_domodules,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "mounts", &linprocfs_domtab,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "mtab", &linprocfs_domtab,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "partitions", &linprocfs_dopartitions,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_link(root, "self", &procfs_docurproc,
 	    NULL, NULL, NULL, 0);
 	pfs_create_file(root, "stat", &linprocfs_dostat,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "swaps", &linprocfs_doswaps,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "uptime", &linprocfs_douptime,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(root, "version", &linprocfs_doversion,
 	    NULL, NULL, NULL, PFS_RD);
 
 	/* /proc/bus/... */
 	dir = pfs_create_dir(root, "bus", NULL, NULL, NULL, 0);
 	dir = pfs_create_dir(dir, "pci", NULL, NULL, NULL, 0);
 	dir = pfs_create_dir(dir, "devices", NULL, NULL, NULL, 0);
 
 	/* /proc/net/... */
 	dir = pfs_create_dir(root, "net", NULL, NULL, NULL, 0);
 	pfs_create_file(dir, "dev", &linprocfs_donetdev,
 	    NULL, NULL, NULL, PFS_RD);
 
 	/* /proc/<pid>/... */
 	dir = pfs_create_dir(root, "pid", NULL, NULL, NULL, PFS_PROCDEP);
 	pfs_create_file(dir, "cmdline", &linprocfs_doproccmdline,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_link(dir, "cwd", &linprocfs_doproccwd,
 	    NULL, NULL, NULL, 0);
 	pfs_create_file(dir, "environ", &linprocfs_doprocenviron,
 	    NULL, &procfs_candebug, NULL, PFS_RD);
 	pfs_create_link(dir, "exe", &procfs_doprocfile,
 	    NULL, &procfs_notsystem, NULL, 0);
 	pfs_create_file(dir, "maps", &linprocfs_doprocmaps,
-	    NULL, NULL, NULL, PFS_RD);
+	    NULL, NULL, NULL, PFS_RD | PFS_AUTODRAIN);
 	pfs_create_file(dir, "mem", &linprocfs_doprocmem,
 	    procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
 	pfs_create_file(dir, "mounts", &linprocfs_domtab,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_link(dir, "root", &linprocfs_doprocroot,
 	    NULL, NULL, NULL, 0);
 	pfs_create_file(dir, "stat", &linprocfs_doprocstat,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "statm", &linprocfs_doprocstatm,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "status", &linprocfs_doprocstatus,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_link(dir, "fd", &linprocfs_dofdescfs,
 	    NULL, NULL, NULL, 0);
 	pfs_create_file(dir, "auxv", &linprocfs_doauxv,
 	    NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD);
 	pfs_create_file(dir, "limits", &linprocfs_doproclimits,
 	    NULL, NULL, NULL, PFS_RD);
 
 	/* /proc/<pid>/task/... */
 	dir = pfs_create_dir(dir, "task", linprocfs_dotaskattr, NULL, NULL, 0);
 	pfs_create_file(dir, ".dummy", &linprocfs_dotaskdummy,
 	    NULL, NULL, NULL, PFS_RD);
 
 	/* /proc/scsi/... */
 	dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0);
 	pfs_create_file(dir, "device_info", &linprocfs_doscsidevinfo,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "scsi", &linprocfs_doscsiscsi,
 	    NULL, NULL, NULL, PFS_RD);
 
 	/* /proc/sys/... */
 	sys = pfs_create_dir(root, "sys", NULL, NULL, NULL, 0);
 
 	/* /proc/sys/kernel/... */
 	dir = pfs_create_dir(sys, "kernel", NULL, NULL, NULL, 0);
 	pfs_create_file(dir, "osrelease", &linprocfs_doosrelease,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "ostype", &linprocfs_doostype,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "version", &linprocfs_doosbuild,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "msgmax", &linprocfs_domsgmax,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "msgmni", &linprocfs_domsgmni,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "msgmnb", &linprocfs_domsgmnb,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "ngroups_max", &linprocfs_dongroups_max,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "pid_max", &linprocfs_dopid_max,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "sem", &linprocfs_dosem,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "shmall", &linprocfs_doshmall,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "shmmax", &linprocfs_doshmmax,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "shmmni", &linprocfs_doshmmni,
 	    NULL, NULL, NULL, PFS_RD);
 	pfs_create_file(dir, "tainted", &linprocfs_dotainted,
 	    NULL, NULL, NULL, PFS_RD);
 
 	/* /proc/sys/kernel/random/... */
 	dir = pfs_create_dir(dir, "random", NULL, NULL, NULL, 0);
 	pfs_create_file(dir, "uuid", &linprocfs_douuid,
 	    NULL, NULL, NULL, PFS_RD);
 
 	/* /proc/sys/vm/.... */
 	dir = pfs_create_dir(sys, "vm", NULL, NULL, NULL, 0);
 	pfs_create_file(dir, "min_free_kbytes", &linprocfs_dominfree,
 	    NULL, NULL, NULL, PFS_RD);
 
 	return (0);
 }
 
 /*
  * Destructor
  */
 static int
 linprocfs_uninit(PFS_INIT_ARGS)
 {
 
 	/* nothing to do, pseudofs will GC */
 	return (0);
 }
 
 PSEUDOFS(linprocfs, 1, VFCF_JAIL);
 #if defined(__aarch64__) || defined(__amd64__)
 MODULE_DEPEND(linprocfs, linux_common, 1, 1, 1);
 #else
 MODULE_DEPEND(linprocfs, linux, 1, 1, 1);
 #endif
 MODULE_DEPEND(linprocfs, procfs, 1, 1, 1);
 MODULE_DEPEND(linprocfs, sysvmsg, 1, 1, 1);
 MODULE_DEPEND(linprocfs, sysvsem, 1, 1, 1);
 MODULE_DEPEND(linprocfs, sysvshm, 1, 1, 1);
Index: head/sys/fs/pseudofs/pseudofs.h
===================================================================
--- head/sys/fs/pseudofs/pseudofs.h	(revision 367361)
+++ head/sys/fs/pseudofs/pseudofs.h	(revision 367362)
@@ -1,313 +1,314 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  *
  *      $FreeBSD$
  */
 
 #ifndef _PSEUDOFS_H_INCLUDED
 #define _PSEUDOFS_H_INCLUDED
 
 #include <sys/jail.h>
 
 /*
  * Opaque structures
  */
 struct mntarg;
 struct mount;
 struct nameidata;
 struct proc;
 struct sbuf;
 struct statfs;
 struct thread;
 struct uio;
 struct vfsconf;
 struct vnode;
 
 /*
  * Limits and constants
  */
 #define PFS_NAMELEN		128
 #define PFS_FSNAMELEN		16	/* equal to MFSNAMELEN */
 #define PFS_DELEN		(offsetof(struct dirent, d_name) + PFS_NAMELEN)
 
 typedef enum {
 	pfstype_none = 0,
 	pfstype_root,
 	pfstype_dir,
 	pfstype_this,
 	pfstype_parent,
 	pfstype_file,
 	pfstype_symlink,
 	pfstype_procdir
 } pfs_type_t;
 
 /*
  * Flags
  */
 #define PFS_RD		0x0001	/* readable */
 #define PFS_WR		0x0002	/* writeable */
 #define PFS_RDWR	(PFS_RD|PFS_WR)
 #define PFS_RAWRD	0x0004	/* raw reader */
 #define	PFS_RAWWR	0x0008	/* raw writer */
 #define PFS_RAW		(PFS_RAWRD|PFS_RAWWR)
 #define PFS_PROCDEP	0x0010	/* process-dependent */
 #define PFS_NOWAIT	0x0020 /* allow malloc to fail */
+#define PFS_AUTODRAIN	0x0040	/* sbuf_print can sleep to drain */
 
 /*
  * Data structures
  */
 struct pfs_info;
 struct pfs_node;
 
 /*
  * Init / uninit callback
  */
 #define PFS_INIT_ARGS \
 	struct pfs_info *pi, struct vfsconf *vfc
 #define PFS_INIT_ARGNAMES \
 	pi, vfc
 #define PFS_INIT_PROTO(name) \
 	int name(PFS_INIT_ARGS);
 typedef int (*pfs_init_t)(PFS_INIT_ARGS);
 
 /*
  * Filler callback
  * Called with proc held but unlocked
  */
 #define PFS_FILL_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn, \
 	struct sbuf *sb, struct uio *uio
 #define PFS_FILL_ARGNAMES \
 	td, p, pn, sb, uio
 #define PFS_FILL_PROTO(name) \
 	int name(PFS_FILL_ARGS);
 typedef int (*pfs_fill_t)(PFS_FILL_ARGS);
 
 /*
  * Attribute callback
  * Called with proc locked
  */
 struct vattr;
 #define PFS_ATTR_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn, \
 	struct vattr *vap
 #define PFS_ATTR_ARGNAMES \
 	td, p, pn, vap
 #define PFS_ATTR_PROTO(name) \
 	int name(PFS_ATTR_ARGS);
 typedef int (*pfs_attr_t)(PFS_ATTR_ARGS);
 
 /*
  * Visibility callback
  * Called with proc locked
  */
 #define PFS_VIS_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn
 #define PFS_VIS_ARGNAMES \
 	td, p, pn
 #define PFS_VIS_PROTO(name) \
 	int name(PFS_VIS_ARGS);
 typedef int (*pfs_vis_t)(PFS_VIS_ARGS);
 
 /*
  * Ioctl callback
  * Called with proc locked
  */
 #define PFS_IOCTL_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn, \
 	unsigned long cmd, void *data
 #define PFS_IOCTL_ARGNAMES \
 	td, p, pn, cmd, data
 #define PFS_IOCTL_PROTO(name) \
 	int name(PFS_IOCTL_ARGS);
 typedef int (*pfs_ioctl_t)(PFS_IOCTL_ARGS);
 
 /*
  * Getextattr callback
  * Called with proc locked
  */
 #define PFS_GETEXTATTR_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn, \
 	int attrnamespace, const char *name, struct uio *uio,	\
 	size_t *size, struct ucred *cred
 #define PFS_GETEXTATTR_ARGNAMES \
 	td, p, pn, attrnamespace, name, uio, size, cred
 #define PFS_GETEXTATTR_PROTO(name) \
 	int name(PFS_GETEXTATTR_ARGS);
 struct ucred;
 typedef int (*pfs_getextattr_t)(PFS_GETEXTATTR_ARGS);
 
 /*
  * Last-close callback
  * Called with proc locked
  */
 #define PFS_CLOSE_ARGS \
 	struct thread *td, struct proc *p, struct pfs_node *pn
 #define PFS_CLOSE_ARGNAMES \
 	td, p, pn
 #define PFS_CLOSE_PROTO(name) \
 	int name(PFS_CLOSE_ARGS);
 typedef int (*pfs_close_t)(PFS_CLOSE_ARGS);
 
 /*
  * Destroy callback
  */
 #define PFS_DESTROY_ARGS \
 	struct pfs_node *pn
 #define PFS_DESTROY_ARGNAMES \
 	pn
 #define PFS_DESTROY_PROTO(name) \
 	int name(PFS_DESTROY_ARGS);
 typedef int (*pfs_destroy_t)(PFS_DESTROY_ARGS);
 
 /*
  * pfs_info: describes a pseudofs instance
  *
  * The pi_mutex is only used to avoid using the global subr_unit lock
  * for unrhdr.  The rest of struct pfs_info is only modified during
  * vfs_init() and vfs_uninit() of the consumer filesystem.
  */
 struct pfs_info {
 	char			 pi_name[PFS_FSNAMELEN];
 	pfs_init_t		 pi_init;
 	pfs_init_t		 pi_uninit;
 
 	/* members below this line are initialized at run time */
 	struct pfs_node		*pi_root;
 	struct mtx		 pi_mutex;
 	struct unrhdr		*pi_unrhdr;
 };
 
 /*
  * pfs_node: describes a node (file or directory) within a pseudofs
  *
  * - Fields marked (o) are protected by the node's own mutex.
  * - Fields marked (p) are protected by the node's parent's mutex.
  * - Remaining fields are not protected by any lock and are assumed to be
  *   immutable once the node has been created.
  *
  * To prevent deadlocks, if a node's mutex is to be held at the same time
  * as its parent's (e.g. when adding or removing nodes to a directory),
  * the parent's mutex must always be acquired first.  Unfortunately, this
  * is not enforcable by WITNESS.
  */
 struct pfs_node {
 	char			 pn_name[PFS_NAMELEN];
 	pfs_type_t		 pn_type;
 	int			 pn_flags;
 	struct mtx		 pn_mutex;
 	void			*pn_data;		/* (o) */
 
 	pfs_fill_t		 pn_fill;
 	pfs_ioctl_t		 pn_ioctl;
 	pfs_close_t		 pn_close;
 	pfs_attr_t		 pn_attr;
 	pfs_vis_t		 pn_vis;
 	pfs_getextattr_t	 pn_getextattr;
 	pfs_destroy_t		 pn_destroy;
 
 	struct pfs_info		*pn_info;
 	u_int32_t		 pn_fileno;		/* (o) */
 
 	struct pfs_node		*pn_parent;		/* (o) */
 	struct pfs_node		*pn_nodes;		/* (o) */
 	struct pfs_node		*pn_last_node;		/* (o) */
 	struct pfs_node		*pn_next;		/* (p) */
 };
 
 /*
  * VFS interface
  */
 int		 pfs_mount	(struct pfs_info *pi, struct mount *mp);
 int		 pfs_cmount	(struct mntarg *ma, void *data, uint64_t flags);
 int		 pfs_unmount	(struct mount *mp, int mntflags);
 int		 pfs_root	(struct mount *mp, int flags,
 				 struct vnode **vpp);
 int		 pfs_statfs	(struct mount *mp, struct statfs *sbp);
 int		 pfs_init	(struct pfs_info *pi, struct vfsconf *vfc);
 int		 pfs_uninit	(struct pfs_info *pi, struct vfsconf *vfc);
 
 /*
  * Directory structure construction and manipulation
  */
 struct pfs_node	*pfs_create_dir	(struct pfs_node *parent, const char *name,
 				 pfs_attr_t attr, pfs_vis_t vis,
 				 pfs_destroy_t destroy, int flags);
 struct pfs_node	*pfs_create_file(struct pfs_node *parent, const char *name,
 				 pfs_fill_t fill, pfs_attr_t attr,
 				 pfs_vis_t vis, pfs_destroy_t destroy,
 				 int flags);
 struct pfs_node	*pfs_create_link(struct pfs_node *parent, const char *name,
 				 pfs_fill_t fill, pfs_attr_t attr,
 				 pfs_vis_t vis, pfs_destroy_t destroy,
 				 int flags);
 struct pfs_node	*pfs_find_node	(struct pfs_node *parent, const char *name);
 void		 pfs_purge	(struct pfs_node *pn);
 int		 pfs_destroy	(struct pfs_node *pn);
 
 /*
  * Now for some initialization magic...
  */
 #define PSEUDOFS(name, version, flags)					\
 									\
 static struct pfs_info name##_info = {					\
 	#name,								\
 	name##_init,							\
 	name##_uninit,							\
 };									\
 									\
 static int								\
 _##name##_mount(struct mount *mp) {					\
 	return (pfs_mount(&name##_info, mp));				\
 }									\
 									\
 static int								\
 _##name##_init(struct vfsconf *vfc) {					\
 	return (pfs_init(&name##_info, vfc));				\
 }									\
 									\
 static int								\
 _##name##_uninit(struct vfsconf *vfc) {					\
 	return (pfs_uninit(&name##_info, vfc));				\
 }									\
 									\
 static struct vfsops name##_vfsops = {					\
 	.vfs_cmount =		pfs_cmount,				\
 	.vfs_init =		_##name##_init,				\
 	.vfs_mount =		_##name##_mount,			\
 	.vfs_root =		pfs_root,				\
 	.vfs_statfs =		pfs_statfs,				\
 	.vfs_uninit =		_##name##_uninit,			\
 	.vfs_unmount =		pfs_unmount,				\
 };									\
 VFS_SET(name##_vfsops, name, VFCF_SYNTHETIC | flags);			\
 MODULE_VERSION(name, version);						\
 MODULE_DEPEND(name, pseudofs, 1, 1, 1);
 
 #endif
Index: head/sys/fs/pseudofs/pseudofs_vnops.c
===================================================================
--- head/sys/fs/pseudofs/pseudofs_vnops.c	(revision 367361)
+++ head/sys/fs/pseudofs/pseudofs_vnops.c	(revision 367362)
@@ -1,1096 +1,1169 @@
 /*-
  * SPDX-License-Identifier: BSD-3-Clause
  *
  * Copyright (c) 2001 Dag-Erling Coïdan Smørgrav
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer
  *    in this position and unchanged.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
  * 3. The name of the author may not be used to endorse or promote products
  *    derived from this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
 #include "opt_pseudofs.h"
 
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
 #include <sys/ctype.h>
 #include <sys/dirent.h>
 #include <sys/fcntl.h>
 #include <sys/limits.h>
 #include <sys/lock.h>
 #include <sys/malloc.h>
 #include <sys/mount.h>
 #include <sys/mutex.h>
 #include <sys/namei.h>
 #include <sys/proc.h>
 #include <sys/sbuf.h>
 #include <sys/sx.h>
 #include <sys/sysctl.h>
 #include <sys/vnode.h>
 
 #include <fs/pseudofs/pseudofs.h>
 #include <fs/pseudofs/pseudofs_internal.h>
 
 #define KASSERT_PN_IS_DIR(pn)						\
 	KASSERT((pn)->pn_type == pfstype_root ||			\
 	    (pn)->pn_type == pfstype_dir ||				\
 	    (pn)->pn_type == pfstype_procdir,				\
 	    ("%s(): VDIR vnode refers to non-directory pfs_node", __func__))
 
 #define KASSERT_PN_IS_FILE(pn)						\
 	KASSERT((pn)->pn_type == pfstype_file,				\
 	    ("%s(): VREG vnode refers to non-file pfs_node", __func__))
 
 #define KASSERT_PN_IS_LINK(pn)						\
 	KASSERT((pn)->pn_type == pfstype_symlink,			\
 	    ("%s(): VLNK vnode refers to non-link pfs_node", __func__))
 
 #define	PFS_MAXBUFSIZ		1024 * 1024
 
 /*
  * Returns the fileno, adjusted for target pid
  */
 static uint32_t
 pn_fileno(struct pfs_node *pn, pid_t pid)
 {
 
 	KASSERT(pn->pn_fileno > 0,
 	    ("%s(): no fileno allocated", __func__));
 	if (pid != NO_PID)
 		return (pn->pn_fileno * NO_PID + pid);
 	return (pn->pn_fileno);
 }
 
 /*
  * Returns non-zero if given file is visible to given thread.
  */
 static int
 pfs_visible_proc(struct thread *td, struct pfs_node *pn, struct proc *proc)
 {
 	int visible;
 
 	if (proc == NULL)
 		return (0);
 
 	PROC_LOCK_ASSERT(proc, MA_OWNED);
 
 	visible = ((proc->p_flag & P_WEXIT) == 0);
 	if (visible)
 		visible = (p_cansee(td, proc) == 0);
 	if (visible && pn->pn_vis != NULL)
 		visible = pn_vis(td, proc, pn);
 	if (!visible)
 		return (0);
 	return (1);
 }
 
 static int
 pfs_visible(struct thread *td, struct pfs_node *pn, pid_t pid,
     struct proc **p)
 {
 	struct proc *proc;
 
 	PFS_TRACE(("%s (pid: %d, req: %d)",
 	    pn->pn_name, pid, td->td_proc->p_pid));
 
 	if (p)
 		*p = NULL;
 	if (pid == NO_PID)
 		PFS_RETURN (1);
 	proc = pfind(pid);
 	if (proc == NULL)
 		PFS_RETURN (0);
 	if (pfs_visible_proc(td, pn, proc)) {
 		if (p)
 			*p = proc;
 		else
 			PROC_UNLOCK(proc);
 		PFS_RETURN (1);
 	}
 	PROC_UNLOCK(proc);
 	PFS_RETURN (0);
 }
 
 static int
 pfs_lookup_proc(pid_t pid, struct proc **p)
 {
 	struct proc *proc;
 
 	proc = pfind(pid);
 	if (proc == NULL)
 		return (0);
 	if ((proc->p_flag & P_WEXIT) != 0) {
 		PROC_UNLOCK(proc);
 		return (0);
 	}
 	_PHOLD(proc);
 	PROC_UNLOCK(proc);
 	*p = proc;
 	return (1);
 }
 
 /*
  * Verify permissions
  */
 static int
 pfs_access(struct vop_access_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct vattr vattr;
 	int error;
 
 	PFS_TRACE(("%s", pvd->pvd_pn->pn_name));
 	(void)pvd;
 
 	error = VOP_GETATTR(vn, &vattr, va->a_cred);
 	if (error)
 		PFS_RETURN (error);
 	error = vaccess(vn->v_type, vattr.va_mode, vattr.va_uid, vattr.va_gid,
 	    va->a_accmode, va->a_cred);
 	PFS_RETURN (error);
 }
 
 /*
  * Close a file or directory
  */
 static int
 pfs_close(struct vop_close_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct proc *proc;
 	int error;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	/*
 	 * Do nothing unless this is the last close and the node has a
 	 * last-close handler.
 	 */
 	if (vrefcnt(vn) > 1 || pn->pn_close == NULL)
 		PFS_RETURN (0);
 
 	if (pvd->pvd_pid != NO_PID) {
 		proc = pfind(pvd->pvd_pid);
 	} else {
 		proc = NULL;
 	}
 
 	error = pn_close(va->a_td, proc, pn);
 
 	if (proc != NULL)
 		PROC_UNLOCK(proc);
 
 	PFS_RETURN (error);
 }
 
 /*
  * Get file attributes
  */
 static int
 pfs_getattr(struct vop_getattr_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct vattr *vap = va->a_vap;
 	struct proc *proc;
 	int error = 0;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (ENOENT);
 
 	vap->va_type = vn->v_type;
 	vap->va_fileid = pn_fileno(pn, pvd->pvd_pid);
 	vap->va_flags = 0;
 	vap->va_blocksize = PAGE_SIZE;
 	vap->va_bytes = vap->va_size = 0;
 	vap->va_filerev = 0;
 	vap->va_fsid = vn->v_mount->mnt_stat.f_fsid.val[0];
 	vap->va_nlink = 1;
 	nanotime(&vap->va_ctime);
 	vap->va_atime = vap->va_mtime = vap->va_ctime;
 
 	switch (pn->pn_type) {
 	case pfstype_procdir:
 	case pfstype_root:
 	case pfstype_dir:
 #if 0
 		pfs_lock(pn);
 		/* compute link count */
 		pfs_unlock(pn);
 #endif
 		vap->va_mode = 0555;
 		break;
 	case pfstype_file:
 	case pfstype_symlink:
 		vap->va_mode = 0444;
 		break;
 	default:
 		printf("shouldn't be here!\n");
 		vap->va_mode = 0;
 		break;
 	}
 
 	if (proc != NULL) {
 		vap->va_uid = proc->p_ucred->cr_ruid;
 		vap->va_gid = proc->p_ucred->cr_rgid;
 	} else {
 		vap->va_uid = 0;
 		vap->va_gid = 0;
 	}
 
 	if (pn->pn_attr != NULL)
 		error = pn_attr(curthread, proc, pn, vap);
 
 	if(proc != NULL)
 		PROC_UNLOCK(proc);
 
 	PFS_RETURN (error);
 }
 
 /*
  * Perform an ioctl
  */
 static int
 pfs_ioctl(struct vop_ioctl_args *va)
 {
 	struct vnode *vn;
 	struct pfs_vdata *pvd;
 	struct pfs_node *pn;
 	struct proc *proc;
 	int error;
 
 	vn = va->a_vp;
 	vn_lock(vn, LK_SHARED | LK_RETRY);
 	if (VN_IS_DOOMED(vn)) {
 		VOP_UNLOCK(vn);
 		return (EBADF);
 	}
 	pvd = vn->v_data;
 	pn = pvd->pvd_pn;
 
 	PFS_TRACE(("%s: %lx", pn->pn_name, va->a_command));
 	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VREG) {
 		VOP_UNLOCK(vn);
 		PFS_RETURN (EINVAL);
 	}
 	KASSERT_PN_IS_FILE(pn);
 
 	if (pn->pn_ioctl == NULL) {
 		VOP_UNLOCK(vn);
 		PFS_RETURN (ENOTTY);
 	}
 
 	/*
 	 * This is necessary because process' privileges may
 	 * have changed since the open() call.
 	 */
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc)) {
 		VOP_UNLOCK(vn);
 		PFS_RETURN (EIO);
 	}
 
 	error = pn_ioctl(curthread, proc, pn, va->a_command, va->a_data);
 
 	if (proc != NULL)
 		PROC_UNLOCK(proc);
 
 	VOP_UNLOCK(vn);
 	PFS_RETURN (error);
 }
 
 /*
  * Perform getextattr
  */
 static int
 pfs_getextattr(struct vop_getextattr_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct proc *proc;
 	int error;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	/*
 	 * This is necessary because either process' privileges may
 	 * have changed since the open() call.
 	 */
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (EIO);
 
 	if (pn->pn_getextattr == NULL)
 		error = EOPNOTSUPP;
 	else
 		error = pn_getextattr(curthread, proc, pn,
 		    va->a_attrnamespace, va->a_name, va->a_uio,
 		    va->a_size, va->a_cred);
 
 	if (proc != NULL)
 		PROC_UNLOCK(proc);
 
 	PFS_RETURN (error);
 }
 
 /*
  * Convert a vnode to its component name
  */
 static int
 pfs_vptocnp(struct vop_vptocnp_args *ap)
 {
 	struct vnode *vp = ap->a_vp;
 	struct vnode **dvp = ap->a_vpp;
 	struct pfs_vdata *pvd = vp->v_data;
 	struct pfs_node *pd = pvd->pvd_pn;
 	struct pfs_node *pn;
 	struct mount *mp;
 	char *buf = ap->a_buf;
 	size_t *buflen = ap->a_buflen;
 	char pidbuf[PFS_NAMELEN];
 	pid_t pid = pvd->pvd_pid;
 	int len, i, error, locked;
 
 	i = *buflen;
 	error = 0;
 
 	pfs_lock(pd);
 
 	if (vp->v_type == VDIR && pd->pn_type == pfstype_root) {
 		*dvp = vp;
 		vhold(*dvp);
 		pfs_unlock(pd);
 		PFS_RETURN (0);
 	} else if (vp->v_type == VDIR && pd->pn_type == pfstype_procdir) {
 		len = snprintf(pidbuf, sizeof(pidbuf), "%d", pid);
 		i -= len;
 		if (i < 0) {
 			error = ENOMEM;
 			goto failed;
 		}
 		bcopy(pidbuf, buf + i, len);
 	} else {
 		len = strlen(pd->pn_name);
 		i -= len;
 		if (i < 0) {
 			error = ENOMEM;
 			goto failed;
 		}
 		bcopy(pd->pn_name, buf + i, len);
 	}
 
 	pn = pd->pn_parent;
 	pfs_unlock(pd);
 
 	mp = vp->v_mount;
 	error = vfs_busy(mp, 0);
 	if (error)
 		return (error);
 
 	/*
 	 * vp is held by caller.
 	 */
 	locked = VOP_ISLOCKED(vp);
 	VOP_UNLOCK(vp);
 
 	error = pfs_vncache_alloc(mp, dvp, pn, pid);
 	if (error) {
 		vn_lock(vp, locked | LK_RETRY);
 		vfs_unbusy(mp);
 		PFS_RETURN(error);
 	}
 
 	*buflen = i;
 	VOP_UNLOCK(*dvp);
 	vn_lock(vp, locked | LK_RETRY);
 	vfs_unbusy(mp);
 
 	PFS_RETURN (0);
 failed:
 	pfs_unlock(pd);
 	PFS_RETURN(error);
 }
 
 /*
  * Look up a file or directory
  */
 static int
 pfs_lookup(struct vop_cachedlookup_args *va)
 {
 	struct vnode *vn = va->a_dvp;
 	struct vnode **vpp = va->a_vpp;
 	struct componentname *cnp = va->a_cnp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pd = pvd->pvd_pn;
 	struct pfs_node *pn, *pdn = NULL;
 	struct mount *mp;
 	pid_t pid = pvd->pvd_pid;
 	char *pname;
 	int error, i, namelen, visible;
 
 	PFS_TRACE(("%.*s", (int)cnp->cn_namelen, cnp->cn_nameptr));
 	pfs_assert_not_owned(pd);
 
 	if (vn->v_type != VDIR)
 		PFS_RETURN (ENOTDIR);
 	KASSERT_PN_IS_DIR(pd);
 
 	/*
 	 * Don't support DELETE or RENAME.  CREATE is supported so
 	 * that O_CREAT will work, but the lookup will still fail if
 	 * the file does not exist.
 	 */
 	if ((cnp->cn_flags & ISLASTCN) &&
 	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
 		PFS_RETURN (EOPNOTSUPP);
 
 	/* shortcut: check if the name is too long */
 	if (cnp->cn_namelen >= PFS_NAMELEN)
 		PFS_RETURN (ENOENT);
 
 	/* check that parent directory is visible... */
 	if (!pfs_visible(curthread, pd, pvd->pvd_pid, NULL))
 		PFS_RETURN (ENOENT);
 
 	/* self */
 	namelen = cnp->cn_namelen;
 	pname = cnp->cn_nameptr;
 	if (namelen == 1 && pname[0] == '.') {
 		pn = pd;
 		*vpp = vn;
 		VREF(vn);
 		PFS_RETURN (0);
 	}
 
 	mp = vn->v_mount;
 
 	/* parent */
 	if (cnp->cn_flags & ISDOTDOT) {
 		if (pd->pn_type == pfstype_root)
 			PFS_RETURN (EIO);
 		error = vfs_busy(mp, MBF_NOWAIT);
 		if (error != 0) {
 			vfs_ref(mp);
 			VOP_UNLOCK(vn);
 			error = vfs_busy(mp, 0);
 			vn_lock(vn, LK_EXCLUSIVE | LK_RETRY);
 			vfs_rel(mp);
 			if (error != 0)
 				PFS_RETURN(ENOENT);
 			if (VN_IS_DOOMED(vn)) {
 				vfs_unbusy(mp);
 				PFS_RETURN(ENOENT);
 			}
 		}
 		VOP_UNLOCK(vn);
 		KASSERT(pd->pn_parent != NULL,
 		    ("%s(): non-root directory has no parent", __func__));
 		/*
 		 * This one is tricky.  Descendents of procdir nodes
 		 * inherit their parent's process affinity, but
 		 * there's no easy reverse mapping.  For simplicity,
 		 * we assume that if this node is a procdir, its
 		 * parent isn't (which is correct as long as
 		 * descendents of procdir nodes are never procdir
 		 * nodes themselves)
 		 */
 		if (pd->pn_type == pfstype_procdir)
 			pid = NO_PID;
 		pfs_lock(pd);
 		pn = pd->pn_parent;
 		pfs_unlock(pd);
 		goto got_pnode;
 	}
 
 	pfs_lock(pd);
 
 	/* named node */
 	for (pn = pd->pn_nodes; pn != NULL; pn = pn->pn_next)
 		if (pn->pn_type == pfstype_procdir)
 			pdn = pn;
 		else if (pn->pn_name[namelen] == '\0' &&
 		    bcmp(pname, pn->pn_name, namelen) == 0) {
 			pfs_unlock(pd);
 			goto got_pnode;
 		}
 
 	/* process dependent node */
 	if ((pn = pdn) != NULL) {
 		pid = 0;
 		for (pid = 0, i = 0; i < namelen && isdigit(pname[i]); ++i)
 			if ((pid = pid * 10 + pname[i] - '0') > PID_MAX)
 				break;
 		if (i == cnp->cn_namelen) {
 			pfs_unlock(pd);
 			goto got_pnode;
 		}
 	}
 
 	pfs_unlock(pd);
 
 	PFS_RETURN (ENOENT);
 
  got_pnode:
 	pfs_assert_not_owned(pd);
 	pfs_assert_not_owned(pn);
 	visible = pfs_visible(curthread, pn, pid, NULL);
 	if (!visible) {
 		error = ENOENT;
 		goto failed;
 	}
 
 	error = pfs_vncache_alloc(mp, vpp, pn, pid);
 	if (error)
 		goto failed;
 
 	if (cnp->cn_flags & ISDOTDOT) {
 		vfs_unbusy(mp);
 		vn_lock(vn, LK_EXCLUSIVE | LK_RETRY);
 		if (VN_IS_DOOMED(vn)) {
 			vput(*vpp);
 			*vpp = NULL;
 			PFS_RETURN(ENOENT);
 		}
 	}
 	if (cnp->cn_flags & MAKEENTRY && !VN_IS_DOOMED(vn))
 		cache_enter(vn, *vpp, cnp);
 	PFS_RETURN (0);
  failed:
 	if (cnp->cn_flags & ISDOTDOT) {
 		vfs_unbusy(mp);
 		vn_lock(vn, LK_EXCLUSIVE | LK_RETRY);
 		*vpp = NULL;
 	}
 	PFS_RETURN(error);
 }
 
 /*
  * Open a file or directory.
  */
 static int
 pfs_open(struct vop_open_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	int mode = va->a_mode;
 
 	PFS_TRACE(("%s (mode 0x%x)", pn->pn_name, mode));
 	pfs_assert_not_owned(pn);
 
 	/* check if the requested mode is permitted */
 	if (((mode & FREAD) && !(mode & PFS_RD)) ||
 	    ((mode & FWRITE) && !(mode & PFS_WR)))
 		PFS_RETURN (EPERM);
 
 	/* we don't support locking */
 	if ((mode & O_SHLOCK) || (mode & O_EXLOCK))
 		PFS_RETURN (EOPNOTSUPP);
 
 	PFS_RETURN (0);
 }
 
+struct sbuf_seek_helper {
+	off_t		skip_bytes;
+	struct uio	*uio;
+};
+
+static int
+pfs_sbuf_uio_drain(void *arg, const char *data, int len)
+{
+	struct sbuf_seek_helper *ssh;
+	struct uio *uio;
+	int error, skipped;
+
+	ssh = arg;
+	uio = ssh->uio;
+	skipped = 0;
+
+	/* Need to discard first uio_offset bytes. */
+	if (ssh->skip_bytes > 0) {
+		if (ssh->skip_bytes >= len) {
+			ssh->skip_bytes -= len;
+			return (len);
+		}
+
+		data += ssh->skip_bytes;
+		len -= ssh->skip_bytes;
+		skipped = ssh->skip_bytes;
+		ssh->skip_bytes = 0;
+	}
+
+	error = uiomove(__DECONST(void *, data), len, uio);
+	if (error != 0)
+		return (-error);
+
+	/*
+	 * The fill function has more to emit, but the reader is finished.
+	 * This is similar to the truncated read case for non-draining PFS
+	 * sbufs, and should be handled appropriately in fill-routines.
+	 */
+	if (uio->uio_resid == 0)
+		return (-ENOBUFS);
+
+	return (skipped + len);
+}
+
 /*
  * Read from a file
  */
 static int
 pfs_read(struct vop_read_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct uio *uio = va->a_uio;
 	struct proc *proc;
 	struct sbuf *sb = NULL;
 	int error, locked;
-	off_t buflen;
+	off_t buflen, buflim;
+	struct sbuf_seek_helper ssh;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VREG)
 		PFS_RETURN (EINVAL);
 	KASSERT_PN_IS_FILE(pn);
 
 	if (!(pn->pn_flags & PFS_RD))
 		PFS_RETURN (EBADF);
 
 	if (pn->pn_fill == NULL)
 		PFS_RETURN (EIO);
 
 	/*
 	 * This is necessary because either process' privileges may
 	 * have changed since the open() call.
 	 */
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (EIO);
 	if (proc != NULL) {
 		_PHOLD(proc);
 		PROC_UNLOCK(proc);
 	}
 
 	vhold(vn);
 	locked = VOP_ISLOCKED(vn);
 	VOP_UNLOCK(vn);
 
 	if (pn->pn_flags & PFS_RAWRD) {
 		PFS_TRACE(("%zd resid", uio->uio_resid));
 		error = pn_fill(curthread, proc, pn, NULL, uio);
 		PFS_TRACE(("%zd resid", uio->uio_resid));
 		goto ret;
 	}
 
 	if (uio->uio_resid < 0 || uio->uio_offset < 0 ||
 	    uio->uio_resid > OFF_MAX - uio->uio_offset) {
 		error = EINVAL;
 		goto ret;
 	}
-	buflen = uio->uio_offset + uio->uio_resid;
-	if (buflen > PFS_MAXBUFSIZ)
-		buflen = PFS_MAXBUFSIZ;
+	buflen = uio->uio_offset + uio->uio_resid + 1;
+	if (pn->pn_flags & PFS_AUTODRAIN)
+		/*
+		 * We can use a smaller buffer if we can stream output to the
+		 * consumer.
+		 */
+		buflim = PAGE_SIZE;
+	else
+		buflim = PFS_MAXBUFSIZ;
+	if (buflen > buflim)
+		buflen = buflim;
 
-	sb = sbuf_new(sb, NULL, buflen + 1, 0);
+	sb = sbuf_new(sb, NULL, buflen, 0);
 	if (sb == NULL) {
 		error = EIO;
 		goto ret;
 	}
 
+	if (pn->pn_flags & PFS_AUTODRAIN) {
+		ssh.skip_bytes = uio->uio_offset;
+		ssh.uio = uio;
+		sbuf_set_drain(sb, pfs_sbuf_uio_drain, &ssh);
+	}
+
 	error = pn_fill(curthread, proc, pn, sb, uio);
 
 	if (error) {
 		sbuf_delete(sb);
 		goto ret;
 	}
 
 	/*
 	 * XXX: If the buffer overflowed, sbuf_len() will not return
 	 * the data length. Then just use the full length because an
 	 * overflowed sbuf must be full.
 	 */
-	if (sbuf_finish(sb) == 0)
-		buflen = sbuf_len(sb);
-	error = uiomove_frombuf(sbuf_data(sb), buflen, uio);
+	error = sbuf_finish(sb);
+	if ((pn->pn_flags & PFS_AUTODRAIN)) {
+		/*
+		 * ENOBUFS just indicates early termination of the fill
+		 * function as the caller's buffer was already filled.  Squash
+		 * to zero.
+		 */
+		if (uio->uio_resid == 0 && error == ENOBUFS)
+			error = 0;
+	} else {
+		if (error == 0)
+			buflen = sbuf_len(sb);
+		else
+			/* The trailing byte is not valid. */
+			buflen--;
+		error = uiomove_frombuf(sbuf_data(sb), buflen, uio);
+	}
 	sbuf_delete(sb);
 ret:
 	vn_lock(vn, locked | LK_RETRY);
 	vdrop(vn);
 	if (proc != NULL)
 		PRELE(proc);
 	PFS_RETURN (error);
 }
 
 /*
  * Iterate through directory entries
  */
 static int
 pfs_iterate(struct thread *td, struct proc *proc, struct pfs_node *pd,
 	    struct pfs_node **pn, struct proc **p)
 {
 	int visible;
 
 	sx_assert(&allproc_lock, SX_SLOCKED);
 	pfs_assert_owned(pd);
  again:
 	if (*pn == NULL) {
 		/* first node */
 		*pn = pd->pn_nodes;
 	} else if ((*pn)->pn_type != pfstype_procdir) {
 		/* next node */
 		*pn = (*pn)->pn_next;
 	}
 	if (*pn != NULL && (*pn)->pn_type == pfstype_procdir) {
 		/* next process */
 		if (*p == NULL)
 			*p = LIST_FIRST(&allproc);
 		else
 			*p = LIST_NEXT(*p, p_list);
 		/* out of processes: next node */
 		if (*p == NULL)
 			*pn = (*pn)->pn_next;
 		else
 			PROC_LOCK(*p);
 	}
 
 	if ((*pn) == NULL)
 		return (-1);
 
 	if (*p != NULL) {
 		visible = pfs_visible_proc(td, *pn, *p);
 		PROC_UNLOCK(*p);
 	} else if (proc != NULL) {
 		visible = pfs_visible_proc(td, *pn, proc);
 	} else {
 		visible = 1;
 	}
 	if (!visible)
 		goto again;
 
 	return (0);
 }
 
 /* Directory entry list */
 struct pfsentry {
 	STAILQ_ENTRY(pfsentry)	link;
 	struct dirent		entry;
 };
 STAILQ_HEAD(pfsdirentlist, pfsentry);
 
 /*
  * Return directory entries.
  */
 static int
 pfs_readdir(struct vop_readdir_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pd = pvd->pvd_pn;
 	pid_t pid = pvd->pvd_pid;
 	struct proc *p, *proc;
 	struct pfs_node *pn;
 	struct uio *uio;
 	struct pfsentry *pfsent, *pfsent2;
 	struct pfsdirentlist lst;
 	off_t offset;
 	int error, i, resid;
 
 	STAILQ_INIT(&lst);
 	error = 0;
 	KASSERT(pd->pn_info == vn->v_mount->mnt_data,
 	    ("%s(): pn_info does not match mountpoint", __func__));
 	PFS_TRACE(("%s pid %lu", pd->pn_name, (unsigned long)pid));
 	pfs_assert_not_owned(pd);
 
 	if (vn->v_type != VDIR)
 		PFS_RETURN (ENOTDIR);
 	KASSERT_PN_IS_DIR(pd);
 	uio = va->a_uio;
 
 	/* only allow reading entire entries */
 	offset = uio->uio_offset;
 	resid = uio->uio_resid;
 	if (offset < 0 || offset % PFS_DELEN != 0 ||
 	    (resid && resid < PFS_DELEN))
 		PFS_RETURN (EINVAL);
 	if (resid == 0)
 		PFS_RETURN (0);
 
 	proc = NULL;
 	if (pid != NO_PID && !pfs_lookup_proc(pid, &proc))
 		PFS_RETURN (ENOENT);
 
 	sx_slock(&allproc_lock);
 	pfs_lock(pd);
 
 	KASSERT(pid == NO_PID || proc != NULL,
 	    ("%s(): no process for pid %lu", __func__, (unsigned long)pid));
 
 	if (pid != NO_PID) {
 		PROC_LOCK(proc);
 
 		/* check if the directory is visible to the caller */
 		if (!pfs_visible_proc(curthread, pd, proc)) {
 			_PRELE(proc);
 			PROC_UNLOCK(proc);
 			sx_sunlock(&allproc_lock);
 			pfs_unlock(pd);
 			PFS_RETURN (ENOENT);
 		}
 	}
 
 	/* skip unwanted entries */
 	for (pn = NULL, p = NULL; offset > 0; offset -= PFS_DELEN) {
 		if (pfs_iterate(curthread, proc, pd, &pn, &p) == -1) {
 			/* nothing left... */
 			if (proc != NULL) {
 				_PRELE(proc);
 				PROC_UNLOCK(proc);
 			}
 			pfs_unlock(pd);
 			sx_sunlock(&allproc_lock);
 			PFS_RETURN (0);
 		}
 	}
 
 	/* fill in entries */
 	while (pfs_iterate(curthread, proc, pd, &pn, &p) != -1 &&
 	    resid >= PFS_DELEN) {
 		if ((pfsent = malloc(sizeof(struct pfsentry), M_IOV,
 		    M_NOWAIT | M_ZERO)) == NULL) {
 			error = ENOMEM;
 			break;
 		}
 		pfsent->entry.d_reclen = PFS_DELEN;
 		pfsent->entry.d_fileno = pn_fileno(pn, pid);
 		/* PFS_DELEN was picked to fit PFS_NAMLEN */
 		for (i = 0; i < PFS_NAMELEN - 1 && pn->pn_name[i] != '\0'; ++i)
 			pfsent->entry.d_name[i] = pn->pn_name[i];
 		pfsent->entry.d_namlen = i;
 		/* NOTE: d_off is the offset of the *next* entry. */
 		pfsent->entry.d_off = offset + PFS_DELEN;
 		switch (pn->pn_type) {
 		case pfstype_procdir:
 			KASSERT(p != NULL,
 			    ("reached procdir node with p == NULL"));
 			pfsent->entry.d_namlen = snprintf(pfsent->entry.d_name,
 			    PFS_NAMELEN, "%d", p->p_pid);
 			/* fall through */
 		case pfstype_root:
 		case pfstype_dir:
 		case pfstype_this:
 		case pfstype_parent:
 			pfsent->entry.d_type = DT_DIR;
 			break;
 		case pfstype_file:
 			pfsent->entry.d_type = DT_REG;
 			break;
 		case pfstype_symlink:
 			pfsent->entry.d_type = DT_LNK;
 			break;
 		default:
 			panic("%s has unexpected node type: %d", pn->pn_name, pn->pn_type);
 		}
 		PFS_TRACE(("%s", pfsent->entry.d_name));
 		dirent_terminate(&pfsent->entry);
 		STAILQ_INSERT_TAIL(&lst, pfsent, link);
 		offset += PFS_DELEN;
 		resid -= PFS_DELEN;
 	}
 	if (proc != NULL) {
 		_PRELE(proc);
 		PROC_UNLOCK(proc);
 	}
 	pfs_unlock(pd);
 	sx_sunlock(&allproc_lock);
 	i = 0;
 	STAILQ_FOREACH_SAFE(pfsent, &lst, link, pfsent2) {
 		if (error == 0)
 			error = uiomove(&pfsent->entry, PFS_DELEN, uio);
 		free(pfsent, M_IOV);
 		i++;
 	}
 	PFS_TRACE(("%ju bytes", (uintmax_t)(i * PFS_DELEN)));
 	PFS_RETURN (error);
 }
 
 /*
  * Read a symbolic link
  */
 static int
 pfs_readlink(struct vop_readlink_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct uio *uio = va->a_uio;
 	struct proc *proc = NULL;
 	char buf[PATH_MAX];
 	struct sbuf sb;
 	int error, locked;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VLNK)
 		PFS_RETURN (EINVAL);
 	KASSERT_PN_IS_LINK(pn);
 
 	if (pn->pn_fill == NULL)
 		PFS_RETURN (EIO);
 
 	if (pvd->pvd_pid != NO_PID) {
 		if ((proc = pfind(pvd->pvd_pid)) == NULL)
 			PFS_RETURN (EIO);
 		if (proc->p_flag & P_WEXIT) {
 			PROC_UNLOCK(proc);
 			PFS_RETURN (EIO);
 		}
 		_PHOLD(proc);
 		PROC_UNLOCK(proc);
 	}
 	vhold(vn);
 	locked = VOP_ISLOCKED(vn);
 	VOP_UNLOCK(vn);
 
 	/* sbuf_new() can't fail with a static buffer */
 	sbuf_new(&sb, buf, sizeof buf, 0);
 
 	error = pn_fill(curthread, proc, pn, &sb, NULL);
 
 	if (proc != NULL)
 		PRELE(proc);
 	vn_lock(vn, locked | LK_RETRY);
 	vdrop(vn);
 
 	if (error) {
 		sbuf_delete(&sb);
 		PFS_RETURN (error);
 	}
 
 	if (sbuf_finish(&sb) != 0) {
 		sbuf_delete(&sb);
 		PFS_RETURN (ENAMETOOLONG);
 	}
 
 	error = uiomove_frombuf(sbuf_data(&sb), sbuf_len(&sb), uio);
 	sbuf_delete(&sb);
 	PFS_RETURN (error);
 }
 
 /*
  * Reclaim a vnode
  */
 static int
 pfs_reclaim(struct vop_reclaim_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	return (pfs_vncache_free(va->a_vp));
 }
 
 /*
  * Set attributes
  */
 static int
 pfs_setattr(struct vop_setattr_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	/* Silently ignore unchangeable attributes. */
 	PFS_RETURN (0);
 }
 
 /*
  * Write to a file
  */
 static int
 pfs_write(struct vop_write_args *va)
 {
 	struct vnode *vn = va->a_vp;
 	struct pfs_vdata *pvd = vn->v_data;
 	struct pfs_node *pn = pvd->pvd_pn;
 	struct uio *uio = va->a_uio;
 	struct proc *proc;
 	struct sbuf sb;
 	int error;
 
 	PFS_TRACE(("%s", pn->pn_name));
 	pfs_assert_not_owned(pn);
 
 	if (vn->v_type != VREG)
 		PFS_RETURN (EINVAL);
 	KASSERT_PN_IS_FILE(pn);
 
 	if (!(pn->pn_flags & PFS_WR))
 		PFS_RETURN (EBADF);
 
 	if (pn->pn_fill == NULL)
 		PFS_RETURN (EIO);
 
 	/*
 	 * This is necessary because either process' privileges may
 	 * have changed since the open() call.
 	 */
 	if (!pfs_visible(curthread, pn, pvd->pvd_pid, &proc))
 		PFS_RETURN (EIO);
 	if (proc != NULL) {
 		_PHOLD(proc);
 		PROC_UNLOCK(proc);
 	}
 
 	if (pn->pn_flags & PFS_RAWWR) {
 		error = pn_fill(curthread, proc, pn, NULL, uio);
 		if (proc != NULL)
 			PRELE(proc);
 		PFS_RETURN (error);
 	}
 
 	sbuf_uionew(&sb, uio, &error);
 	if (error) {
 		if (proc != NULL)
 			PRELE(proc);
 		PFS_RETURN (error);
 	}
 
 	error = pn_fill(curthread, proc, pn, &sb, uio);
 
 	sbuf_delete(&sb);
 	if (proc != NULL)
 		PRELE(proc);
 	PFS_RETURN (error);
 }
 
 /*
  * Vnode operations
  */
 struct vop_vector pfs_vnodeops = {
 	.vop_default =		&default_vnodeops,
 
 	.vop_access =		pfs_access,
 	.vop_cachedlookup =	pfs_lookup,
 	.vop_close =		pfs_close,
 	.vop_create =		VOP_EOPNOTSUPP,
 	.vop_getattr =		pfs_getattr,
 	.vop_getextattr =	pfs_getextattr,
 	.vop_ioctl =		pfs_ioctl,
 	.vop_link =		VOP_EOPNOTSUPP,
 	.vop_lookup =		vfs_cache_lookup,
 	.vop_mkdir =		VOP_EOPNOTSUPP,
 	.vop_mknod =		VOP_EOPNOTSUPP,
 	.vop_open =		pfs_open,
 	.vop_read =		pfs_read,
 	.vop_readdir =		pfs_readdir,
 	.vop_readlink =		pfs_readlink,
 	.vop_reclaim =		pfs_reclaim,
 	.vop_remove =		VOP_EOPNOTSUPP,
 	.vop_rename =		VOP_EOPNOTSUPP,
 	.vop_rmdir =		VOP_EOPNOTSUPP,
 	.vop_setattr =		pfs_setattr,
 	.vop_symlink =		VOP_EOPNOTSUPP,
 	.vop_vptocnp =		pfs_vptocnp,
 	.vop_write =		pfs_write,
 	/* XXX I've probably forgotten a few that need VOP_EOPNOTSUPP */
 };
 VFS_VOP_VECTOR_REGISTER(pfs_vnodeops);