Index: head/share/man/man9/Makefile =================================================================== --- head/share/man/man9/Makefile (revision 42015) +++ head/share/man/man9/Makefile (revision 42016) @@ -1,89 +1,90 @@ -# $Id: Makefile,v 1.40 1998/10/28 00:55:42 nsouch Exp $ +# $Id: Makefile,v 1.41 1998/12/21 10:29:28 dillon Exp $ MAN9= MD5.9 \ VFS.9 VFS_FHTOVP.9 VFS_INIT.9 VFS_MOUNT.9 VFS_QUOTACTL.9 \ VFS_ROOT.9 VFS_START.9 VFS_STATFS.9 VFS_SYNC.9 VFS_UNMOUNT.9 \ VFS_VGET.9 VFS_VPTOFH.9 \ VOP_ABORTOP.9 VOP_ACCESS.9 VOP_ADVLOCK.9 VOP_ATTRIB.9 \ VOP_BWRITE.9 VOP_CREATE.9 VOP_FSYNC.9 \ VOP_GETPAGES.9 VOP_INACTIVE.9 VOP_IOCTL.9 VOP_LINK.9 \ VOP_LOCK.9 VOP_LOOKUP.9 VOP_MMAP.9 VOP_OPENCLOSE.9 \ VOP_PATHCONF.9 VOP_PRINT.9 VOP_RDWR.9 VOP_READDIR.9 \ VOP_READLINK.9 VOP_REALLOCBLKS.9 VOP_REMOVE.9 VOP_RENAME.9 \ VOP_STRATEGY.9 \ - at_exit.9 at_fork.9 at_shutdown.9 bios.9 boot.9 cd.9 copy.9 \ + at_exit.9 at_fork.9 at_shutdown.9 bios.9 boot.9 buf.9 cd.9 copy.9 \ devfs_add_devswf.9 devfs_link.9 devfs_remove_dev.9 devstat.9 \ fetch.9 ifnet.9 inittodr.9 intro.9 kernacc.9 malloc.9 microseq.9 \ mi_switch.9 namei.9 panic.9 physio.9 posix4.9 ppbconf.9 psignal.9 \ resettodr.9 rtalloc.9 rtentry.9 scsiconf.9 sd.9 sleep.9 spl.9 st.9 \ store.9 style.9 suser.9 time.9 timeout.9 uio.9 \ vget.9 vnode.9 vput.9 vref.9 vrele.9 vslock.9 MAN9+= device.9 device_add_child.9 device_delete_child.9 device_enable.9 \ device_find_child.9 device_get_devclass.9 device_get_driver.9 \ device_get_ivars.9 device_get_softc.9 device_get_state.9 \ device_get_unit.9 device_probe_and_attach.9 device_set_desc.9 \ driver.9 \ devclass.9 devclass_add_driver.9 devclass_find.9 \ devclass_get_device.9 devclass_get_devices.9 devclass_get_maxunit.9 \ devclass_get_name.9 devclass_get_softc.9 \ DEVICE_ATTACH.9 DEVICE_DETACH.9 DEVICE_PROBE.9 DEVICE_SHUTDOWN.9 \ BUS_CREATE_INTR.9 BUS_CONNECT_INTR.9 BUS_PRINT_CHILD.9 \ BUS_READ_IVAR.9 \ bus_generic_attach.9 bus_generic_detach.9 bus_generic_map_intr.9 \ bus_generic_print_child.9 bus_generic_read_ivar.9 \ bus_generic_shutdown.9 MLINKS+=MD5.9 MD5Init.9 MD5.9 MD5Transform.9 MLINKS+=VOP_ATTRIB.9 VOP_GETATTR.9 MLINKS+=VOP_ATTRIB.9 VOP_SETATTR.9 MLINKS+=VOP_CREATE.9 VOP_MKDIR.9 MLINKS+=VOP_CREATE.9 VOP_MKNOD.9 MLINKS+=VOP_CREATE.9 VOP_SYMLINK.9 MLINKS+=VOP_GETPAGES.9 VOP_PUTPAGES.9 MLINKS+=VOP_INACTIVE.9 VOP_RECLAIM.9 MLINKS+=VOP_LOCK.9 VOP_ISLOCKED.9 MLINKS+=VOP_LOCK.9 VOP_UNLOCK.9 MLINKS+=VOP_OPENCLOSE.9 VOP_CLOSE.9 MLINKS+=VOP_OPENCLOSE.9 VOP_OPEN.9 MLINKS+=VOP_RDWR.9 VOP_READ.9 MLINKS+=VOP_RDWR.9 VOP_WRITE.9 MLINKS+=VOP_REMOVE.9 VOP_RMDIR.9 MLINKS+=at_exit.9 rm_at_exit.9 MLINKS+=at_fork.9 rm_at_fork.9 MLINKS+=at_shutdown.9 rm_at_shutdown.9 MLINKS+=copy.9 copyin.9 copy.9 copyinstr.9 copy.9 copyout.9 copy.9 copystr.9 MLINKS+=devstat.9 devicestat.9 MLINKS+=fetch.9 fubyte.9 fetch.9 fuswintr.9 fetch.9 fusword.9 fetch.9 fuword.9 MLINKS+=ifnet.9 if_data.9 ifnet.9 ifaddr.9 ifnet.9 ifqueue.9 MLINKS+=kernacc.9 useracc.9 MLINKS+=malloc.9 FREE.9 malloc.9 MALLOC.9 malloc.9 free.9 MLINKS+=mi_switch.9 cpu_switch.9 MLINKS+=posix4.9 p1003_1b.9 MLINKS+=psignal.9 gsignal.9 psignal.9 pgsignal.9 MLINKS+=rtalloc.9 rtalloc1.9 rtalloc.9 rtalloc_ign.9 MLINKS+=sleep.9 tsleep.9 sleep.9 wakeup.9 sleep.9 wakeup_one.9 MLINKS+=sleep.9 asleep.9 sleep.9 await.9 +MLINKS+=buf.9 bp.9 MLINKS+=spl.9 spl0.9 MLINKS+=spl.9 splbio.9 spl.9 splclock.9 spl.9 splhigh.9 spl.9 splimp.9 MLINKS+=spl.9 splnet.9 spl.9 splsoftclock.9 spl.9 splsofttty.9 MLINKS+=spl.9 splstatclock.9 spl.9 spltty.9 spl.9 splvm.9 spl.9 splx.9 MLINKS+=store.9 subyte.9 store.9 suswintr.9 store.9 susword.9 store.9 suword.9 MLINKS+=time.9 boottime.9 time.9 mono_time.9 time.9 runtime.9 MLINKS+=timeout.9 untimeout.9 MLINKS+=vref.9 VREF.9 MLINKS+=vslock.9 vsunlock.9 MLINKS+=device_add_child.9 device_add_child_after.9 MLINKS+=device_enable.9 device_disable.9 MLINKS+=device_enable.9 device_is_enabled.9 MLINKS+=device_get_state.9 device_busy.9 MLINKS+=device_get_state.9 device_unbusy.9 MLINKS+=device_get_state.9 device_is_alive.9 MLINKS+=device_set_desc.9 device_get_desc.9 MLINKS+=devclass_add_driver.9 devclass_delete_driver.9 MLINKS+=devclass_add_driver.9 devclass_find_driver.9 MLINKS+=BUS_READ_IVAR.9 BUS_WRITE_IVAR.9 MLINKS+=bus_generic_read_ivar.9 bus_generic_write_ivar.9 .include Index: head/share/man/man9/buf.9 =================================================================== --- head/share/man/man9/buf.9 (nonexistent) +++ head/share/man/man9/buf.9 (revision 42016) @@ -0,0 +1,123 @@ +.\" Copyright (c) 1998 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $Id: security.7,v 1.1 1998/12/20 20:12:17 dillon Exp $ +.\" +.Dd December 22, 1998 +.Dt BUF 9 +.Os +.Sh NAME +.Nm BUF/BP +.Nd Kernel Buffer I/O scheme used in FreeBSD VM system +.Sh DESCRIPTION +.Pp +The kernel implements a KVM abstraction of the buffer cache which allows it +to map potentially disparate vm_page's into contiguous KVM for use by +(mainly filesystem) devices and device I/O. This abstraction supports +block sizes from DEV_BSIZE (usually 512) to upwards of several pages or more. +It also supports a relatively primitive byte-granular valid range and dirty +range currently hardcoded for use by NFS. The code implementing the +VM Buffer abstraction is mostly concentrated in /usr/src/sys/kern/vfs_bio.c. +.Pp +One of the most important things to remember when dealing with buffer pointers +(struct buf) is that the underlying pages are mapped directly from the buffer +cache. No data copying occurs in the scheme proper, though some filesystems +such as UFS do have to copy a little when dealing with file fragments. The +second most important thing to remember is that due to the underlying page +mapping, the b_data base pointer in a buf is always *page* aligned, not +*block* aligned. When you have a VM buffer representing some b_offset and +b_size, the actual start of the buffer is (b_data + (b_offset & PAGE_MASK)) +and not just b_data. Finally, the VM system's core buffer cache supports +valid and dirty bits (m->valid, m->dirty) for pages in DEV_BSIZE chunks. Thus +a platform with a hardware page size of 4096 bytes has 8 valid and 8 dirty +bits. These bits are generally set and cleared in groups based on the device +block size of the device backing the page. Complete page's worth are often +refered to using the VM_PAGE_BITS_ALL bitmask (i.e. 0xFF if the hardware page +size is 4096). +.Pp +VM buffers also keep track of a byte-granular dirty range and valid range. +This feature is normally only used by the NFS subsystem. I'm not sure why it +is used at all, actually, since we have DEV_BSIZE valid/dirty granularity +within the VM buffer. If a buffer dirty operation creates a 'hole', +the dirty range will extend to cover the hole. If a buffer validation +operation creates a 'hole' the byte-granular valid range is left alone and +will not take into account the new extension. Thus the whole byte-granular +abstraction is considered a bad hack and it would be nice if we could get rid +of it completely. +.Pp +A VM buffer is capable of mapping the underlying VM cache pages into KVM in +order to allow the kernel to directly manipulate the data associated with +the (vnode,b_offset,b_size). The kernel typically unmaps VM buffers the moment +they are no longer needed but often keeps the 'struct buf' structure +instantiated and even bp->b_pages array instantiated despite having unmapped +them from KVM. If a page making up a VM buffer is about to undergo I/O, the +system typically unmaps it from KVM and replaces the page in the b_pages[] +array with a placemarker called bogus_page. The placemarker forces any kernel +subsystems referencing the associated struct buf to re-lookup the associated +page. I believe the placemarker hack is used to allow sophisticated devices +such as filesystem devices to remap underlying pages in order to deal with, +for example, remapping a file fragment into a file block. +.Pp +VM buffers are used to track I/O operations within the kernel. Unfortunately, +the I/O implementation is also somewhat of a hack because the kernel wants +to clear the dirty bit on the underlying pages the moment it queues the I/O +to the VFS device, not when the physical I/O is actually initiated. This +can create confusion within filesystem devices that use delayed-writes because +you wind up with pages marked clean that are actually still dirty. If not +treated carefully, these pages could be thrown away! Indeed, a number of +serious bugs related to this hack were not fixed until the 2.2.8/3.0.0 release. +The kernel uses an instantiated VM buffer (i.e. struct buf) to placemark pages +in this special state. The buffer is typically flagged B_DELWRI. When a +device no longer needs a buffer it typically flags it as B_RELBUF. Due to +the underlying pages being marked clean, the B_DELWRI|B_RELBUF combination must +be interpreted to mean that the buffer is still actually dirty and must be +written to its backing store before it can actually be released. In the case +where B_DELWRI is not set, the underlying dirty pages are still properly +marked as dirty and the buffer can be completely freed without losing that +clean/dirty state information. ( XXX do we have to check other flags in +regards to this situation ??? ). +.Pp +The kernel reserves a portion of its KVM space to hold VM Buffer's data +maps. Even though this is virtual space (since the buffers are mapped +from the buffer cache), we cannot make it arbitrarily large because +instantiated VM Buffers (struct buf's) prevent their underlying pages in the +buffer cache from being freed. This can complicate the life of the paging +system. +.Pp +.Sh SEE ALSO +.Pp +.Xr 9 +.Sh HISTORY +The +.Nm +manual page was originally written by Matthew Dillon and first appeared +in FreeBSD-3.0.1, December 1998. + Property changes on: head/share/man/man9/buf.9 ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property